• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 11904027177

19 Nov 2024 12:33AM UTC coverage: 75.477% (+0.3%) from 75.174%
11904027177

push

github

web-flow
Move DateTimePattern into pattern module (#5834)

#1317

Also removes `NeoNeverMarker` and fixes #5689

258 of 319 new or added lines in 6 files covered. (80.88%)

6967 existing lines in 278 files now uncovered.

54522 of 72237 relevant lines covered (75.48%)

655305.49 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.86
/provider/blob/src/export/blob_exporter.rs
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
// This is "export" feature, and there are many internal invariants
6
#![allow(clippy::expect_used)]
7

8
use crate::blob_schema::*;
9
use icu_provider::export::*;
10
use icu_provider::{marker::DataMarkerPathHash, prelude::*};
11
use std::collections::{BTreeMap, BTreeSet, HashMap};
12
use std::sync::Mutex;
13
use zerotrie::ZeroTrieSimpleAscii;
14
use zerovec::vecs::Index32;
15
use zerovec::vecs::VarZeroVecOwned;
16
use zerovec::VarZeroVec;
17
use zerovec::ZeroVec;
18

19
use postcard::ser_flavors::{AllocVec, Flavor};
20

21
/// A data exporter that writes data to a single-file blob.
22
/// See the module-level docs for an example.
23
pub struct BlobExporter<'w> {
24
    /// Map of marker path hash -> locale byte string -> blob ID
25
    #[allow(clippy::type_complexity)]
26
    resources: Mutex<BTreeMap<DataMarkerPathHash, BTreeMap<Vec<u8>, usize>>>,
27
    // All seen markers
28
    all_markers: Mutex<BTreeSet<DataMarkerPathHash>>,
29
    /// Map from blob to blob ID
30
    unique_resources: Mutex<HashMap<Vec<u8>, usize>>,
31
    sink: Box<dyn std::io::Write + Sync + 'w>,
32
}
33

34
impl core::fmt::Debug for BlobExporter<'_> {
UNCOV
35
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
×
UNCOV
36
        f.debug_struct("BlobExporter")
×
UNCOV
37
            .field("resources", &self.resources)
×
UNCOV
38
            .field("unique_resources", &self.unique_resources)
×
UNCOV
39
            .field("all_markers", &self.all_markers)
×
40
            .field("sink", &"<sink>")
41
            .finish()
UNCOV
42
    }
×
43
}
44

45
impl<'w> BlobExporter<'w> {
46
    /// Creates a version 1 [`BlobExporter`] that writes to the given I/O stream.
47
    ///
48
    /// Version 1 is needed if the blob may be consumed by ICU4X versions 1.0 through 1.3. If
49
    /// targeting only ICU4X 1.4 and above, see [BlobExporter::new_with_sink()].
50
    pub fn new_with_sink(sink: Box<dyn std::io::Write + Sync + 'w>) -> Self {
6✔
51
        Self {
6✔
52
            resources: Default::default(),
6✔
53
            unique_resources: Default::default(),
6✔
54
            all_markers: Default::default(),
6✔
55
            sink,
6✔
UNCOV
56
        }
×
57
    }
6✔
58
}
59

60
impl DataExporter for BlobExporter<'_> {
61
    fn put_payload(
17,857✔
62
        &self,
63
        marker: DataMarkerInfo,
64
        id: DataIdentifierBorrowed,
65
        payload: &DataPayload<ExportMarker>,
66
    ) -> Result<(), DataError> {
67
        let mut serializer = postcard::Serializer {
35,714✔
68
            output: AllocVec::new(),
17,857✔
69
        };
70
        payload.serialize(&mut serializer)?;
35,714✔
71
        let output = serializer
17,847✔
72
            .output
73
            .finalize()
74
            .expect("Failed to finalize serializer output");
17,851✔
75
        let idx = {
76
            let mut unique_resources = self.unique_resources.lock().expect("poison");
17,851✔
77
            let len = unique_resources.len();
17,851✔
78
            *unique_resources.entry(output).or_insert(len)
17,851✔
79
        };
17,851✔
80
        #[allow(clippy::expect_used)]
81
        self.resources
53,553✔
82
            .lock()
83
            .expect("poison")
84
            .entry(marker.path.hashed())
17,851✔
85
            .or_default()
86
            .entry({
87
                let mut key = id.locale.to_string();
17,851✔
88
                if !id.marker_attributes.is_empty() {
17,851✔
89
                    key.push(crate::blob_schema::REQUEST_SEPARATOR);
4✔
90
                    key.push_str(id.marker_attributes);
4✔
91
                }
92
                key.into_bytes()
17,851✔
93
            })
17,851✔
94
            .or_insert(idx);
17,851✔
95
        Ok(())
17,851✔
96
    }
17,851✔
97

98
    fn flush(&self, marker: DataMarkerInfo, _metadata: FlushMetadata) -> Result<(), DataError> {
7✔
99
        self.all_markers
14✔
100
            .lock()
101
            .expect("poison")
102
            .insert(marker.path.hashed());
7✔
103
        Ok(())
7✔
104
    }
7✔
105

106
    fn close(&mut self) -> Result<ExporterCloseMetadata, DataError> {
6✔
107
        self.close_internal()
6✔
108
    }
6✔
109
}
110

111
struct FinalizedBuffers {
112
    /// Sorted list of blob to old ID; the index in the vec is the new ID
113
    vzv: VarZeroVec<'static, [u8], Index32>,
114
    /// Map from old ID to new ID
115
    remap: HashMap<usize, usize>,
116
}
117

118
impl BlobExporter<'_> {
119
    fn finalize_buffers(&mut self) -> FinalizedBuffers {
6✔
120
        // The blob IDs are unstable due to the parallel nature of datagen.
121
        // In order to make a canonical form, we sort them lexicographically now.
122

123
        // This is a sorted list of blob to old ID; the index in the vec is the new ID
124
        let sorted: Vec<(Vec<u8>, usize)> = {
6✔
125
            let mut unique_resources = self.unique_resources.lock().expect("poison");
6✔
126
            let mut sorted: Vec<(Vec<u8>, usize)> = unique_resources.drain().collect();
6✔
127
            sorted.sort();
6✔
128
            sorted
6✔
129
        };
6✔
130

131
        // This is a map from old ID to new ID
132
        let remap: HashMap<usize, usize> = sorted
6✔
133
            .iter()
134
            .enumerate()
135
            .map(|(new_id, (_, old_id))| (*old_id, new_id))
17,671✔
136
            .collect();
137

138
        // Convert the sorted list to a VarZeroVec
139
        let vzv: VarZeroVec<[u8], Index32> = {
140
            let buffers: Vec<Vec<u8>> = sorted.into_iter().map(|(blob, _)| blob).collect();
17,677✔
141
            buffers.as_slice().into()
6✔
142
        };
6✔
143

144
        FinalizedBuffers { vzv, remap }
6✔
145
    }
6✔
146

147
    fn close_internal(&mut self) -> Result<ExporterCloseMetadata, DataError> {
16✔
148
        let FinalizedBuffers { vzv, remap } = self.finalize_buffers();
16✔
149

150
        let all_markers = self.all_markers.lock().expect("poison");
16✔
151
        let resources = self.resources.lock().expect("poison");
16✔
152

153
        let markers: ZeroVec<DataMarkerPathHash> = all_markers.iter().copied().collect();
16✔
154

155
        let locales_vec: Vec<Vec<u8>> = all_markers
16✔
156
            .iter()
157
            .map(|marker_path_hash| resources.get(marker_path_hash))
7✔
158
            .map(|option_sub_map| {
7✔
159
                if let Some(sub_map) = option_sub_map {
7✔
160
                    let mut sub_map = sub_map.clone();
5✔
161
                    sub_map
5✔
162
                        .iter_mut()
163
                        .for_each(|(_, id)| *id = *remap.get(id).expect("in-bound index"));
17,856✔
164
                    let zerotrie = ZeroTrieSimpleAscii::try_from(&sub_map).expect("in-bounds");
5✔
165
                    zerotrie.into_store()
5✔
166
                } else {
5✔
167
                    // Key with no locales: insert an empty ZeroTrie
168
                    ZeroTrieSimpleAscii::default().into_store()
2✔
169
                }
170
            })
7✔
171
            .collect();
172

173
        if !markers.is_empty() {
12✔
174
            if let Ok(locales_vzv) =
6✔
175
                VarZeroVecOwned::<[u8]>::try_from_elements(locales_vec.as_slice())
6✔
176
            {
177
                let blob = BlobSchema::V003(BlobSchemaV3 {
5✔
178
                    markers: &markers,
5✔
179
                    locales: &locales_vzv,
5✔
180
                    buffers: &vzv,
5✔
181
                });
182
                log::info!("Serializing blob to output stream...");
5✔
183

184
                let output = postcard::to_allocvec(&blob)?;
5✔
185
                self.sink.write_all(&output)?;
5✔
186
            } else {
5✔
187
                log::info!("Upgrading to BlobSchemaV3 (bigger)...");
1✔
188
                let locales_vzv =
189
                    VarZeroVecOwned::<[u8], Index32>::try_from_elements(locales_vec.as_slice())
1✔
190
                        .expect("Locales vector does not fit in Index32 buffer!");
191
                let blob = BlobSchema::V003Bigger(BlobSchemaV3 {
1✔
192
                    markers: &markers,
1✔
193
                    locales: &locales_vzv,
1✔
194
                    buffers: &vzv,
1✔
195
                });
196
                log::info!("Serializing blob to output stream...");
1✔
197

198
                let output = postcard::to_allocvec(&blob)?;
1✔
199
                self.sink.write_all(&output)?;
17✔
200
            }
1✔
201
        }
16✔
202

203
        Ok(Default::default())
6✔
204
    }
16✔
205
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc