• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 6815798908

09 Nov 2023 05:17PM CUT coverage: 72.607% (-2.4%) from 75.01%
6815798908

push

github

web-flow
Implement `Any/BufferProvider` for some smart pointers (#4255)

Allows storing them as a `Box<dyn Any/BufferProvider>` without using a
wrapper type that implements the trait.

44281 of 60987 relevant lines covered (72.61%)

201375.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.1
/provider/blob/src/export/blob_exporter.rs
1
// This file is part of ICU4X. For terms of use, please see the file
×
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
// This is "export" feature, and there are many internal invariants
6
#![allow(clippy::expect_used)]
7

8
use crate::blob_schema::*;
9
use icu_provider::datagen::*;
10
use icu_provider::prelude::*;
11
use std::collections::{BTreeMap, BTreeSet, HashMap};
12
use std::sync::Mutex;
13
use writeable::Writeable;
14
use zerotrie::ZeroTrieSimpleAscii;
15
use zerovec::ule::VarULE;
16
use zerovec::vecs::Index32;
17
use zerovec::VarZeroVec;
18
use zerovec::ZeroMap2d;
19
use zerovec::ZeroVec;
20

21
use postcard::ser_flavors::{AllocVec, Flavor};
22

23
enum VersionConfig {
24
    V001,
25
    V002,
26
}
27

28
/// A data exporter that writes data to a single-file blob.
29
/// See the module-level docs for an example.
30
pub struct BlobExporter<'w> {
31
    /// Map of key hash -> locale byte string -> blob ID
32
    #[allow(clippy::type_complexity)]
33
    resources: Mutex<BTreeMap<DataKeyHash, BTreeMap<Vec<u8>, usize>>>,
34
    // All seen keys
35
    all_keys: Mutex<BTreeSet<DataKeyHash>>,
36
    /// Map from blob to blob ID
37
    unique_resources: Mutex<HashMap<Vec<u8>, usize>>,
38
    sink: Box<dyn std::io::Write + Sync + 'w>,
39
    version: VersionConfig,
40
}
41

42
impl core::fmt::Debug for BlobExporter<'_> {
43
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
×
44
        f.debug_struct("BlobExporter")
×
45
            .field("resources", &self.resources)
46
            .field("unique_resources", &self.unique_resources)
×
47
            .field("all_keys", &self.all_keys)
×
48
            .field("sink", &"<sink>")
49
            .finish()
50
    }
×
51
}
52

53
impl<'w> BlobExporter<'w> {
54
    /// Creates a version 1 [`BlobExporter`] that writes to the given I/O stream.
55
    ///
56
    /// Version 1 is needed if the blob may be consumed by ICU4X versions 1.0 through 1.3. If
57
    /// targeting only ICU4X 1.4 and above, see [BlobExporter::new_v2_with_sink()].
58
    pub fn new_with_sink(sink: Box<dyn std::io::Write + Sync + 'w>) -> Self {
4✔
59
        Self {
4✔
60
            resources: Default::default(),
4✔
61
            unique_resources: Default::default(),
4✔
62
            all_keys: Default::default(),
4✔
63
            sink,
4✔
64
            version: VersionConfig::V001,
65
        }
×
66
    }
4✔
67

68
    /// Creates a version 2 [`BlobExporter`] that writes to the given I/O stream.
69
    ///
70
    /// Version 2 produces a smaller postcard file than version 1 without sacrificing performance.
71
    /// It is compatible with ICU4X 1.4 and above. If you need to support older version of ICU4X,
72
    /// see [BlobExporter::new_with_sink()].
73
    pub fn new_v2_with_sink(sink: Box<dyn std::io::Write + Sync + 'w>) -> Self {
3✔
74
        Self {
3✔
75
            resources: Default::default(),
3✔
76
            unique_resources: Default::default(),
3✔
77
            all_keys: Default::default(),
3✔
78
            sink,
3✔
79
            version: VersionConfig::V002,
80
        }
×
81
    }
3✔
82
}
83

84
impl DataExporter for BlobExporter<'_> {
85
    fn put_payload(
116✔
86
        &self,
87
        key: DataKey,
88
        locale: &DataLocale,
89
        payload: &DataPayload<ExportMarker>,
90
    ) -> Result<(), DataError> {
91
        let mut serializer = postcard::Serializer {
232✔
92
            output: AllocVec::new(),
116✔
93
        };
94
        payload.serialize(&mut serializer)?;
116✔
95
        let output = serializer
76✔
96
            .output
97
            .finalize()
98
            .expect("Failed to finalize serializer output");
106✔
99
        let idx = {
100
            let mut unique_resources = self.unique_resources.lock().expect("poison");
106✔
101
            let len = unique_resources.len();
84✔
102
            *unique_resources.entry(output).or_insert(len)
76✔
103
        };
78✔
104
        #[allow(clippy::expect_used)]
105
        self.resources
236✔
106
            .lock()
107
            .expect("poison")
108
            .entry(key.hashed())
79✔
109
            .or_default()
110
            .entry(locale.write_to_string().into_owned().into_bytes())
78✔
111
            .or_insert(idx);
79✔
112
        Ok(())
80✔
113
    }
80✔
114

115
    fn flush(&self, key: DataKey) -> Result<(), DataError> {
7✔
116
        self.all_keys.lock().expect("poison").insert(key.hashed());
7✔
117
        Ok(())
7✔
118
    }
7✔
119

120
    fn close(&mut self) -> Result<(), DataError> {
7✔
121
        match self.version {
7✔
122
            VersionConfig::V001 => self.close_v1(),
4✔
123
            VersionConfig::V002 => self.close_v2(),
3✔
124
        }
125
    }
7✔
126
}
127

128
struct FinalizedBuffers {
129
    /// Sorted list of blob to old ID; the index in the vec is the new ID
130
    vzv: VarZeroVec<'static, [u8], Index32>,
131
    /// Map from old ID to new ID
132
    remap: HashMap<usize, usize>,
133
}
134

135
impl BlobExporter<'_> {
136
    fn finalize_buffers(&mut self) -> FinalizedBuffers {
7✔
137
        // The blob IDs are unstable due to the parallel nature of datagen.
138
        // In order to make a canonical form, we sort them lexicographically now.
139

140
        // This is a sorted list of blob to old ID; the index in the vec is the new ID
141
        let sorted: Vec<(Vec<u8>, usize)> = {
7✔
142
            let mut unique_resources = self.unique_resources.lock().expect("poison");
7✔
143
            let mut sorted: Vec<(Vec<u8>, usize)> = unique_resources.drain().collect();
7✔
144
            sorted.sort();
7✔
145
            sorted
7✔
146
        };
7✔
147

148
        // This is a map from old ID to new ID
149
        let remap: HashMap<usize, usize> = sorted
7✔
150
            .iter()
151
            .enumerate()
152
            .map(|(new_id, (_, old_id))| (*old_id, new_id))
81✔
153
            .collect();
154

155
        // Convert the sorted list to a VarZeroVec
156
        let vzv: VarZeroVec<[u8], Index32> = {
157
            let buffers: Vec<Vec<u8>> = sorted.into_iter().map(|(blob, _)| blob).collect();
83✔
158
            buffers.as_slice().into()
7✔
159
        };
7✔
160

161
        FinalizedBuffers { vzv, remap }
3✔
162
    }
3✔
163

164
    fn close_v1(&mut self) -> Result<(), DataError> {
12✔
165
        let FinalizedBuffers { vzv, remap } = self.finalize_buffers();
12✔
166

167
        // Now build up the ZeroMap2d, changing old ID to new ID
168
        let mut zm = self
12✔
169
            .resources
170
            .get_mut()
171
            .expect("poison")
172
            .iter()
173
            .flat_map(|(hash, sub_map)| {
2✔
174
                sub_map
4✔
175
                    .iter()
176
                    .map(|(locale, old_id)| (*hash, locale, old_id))
56✔
177
            })
2✔
178
            .map(|(hash, locale, old_id)| {
58✔
179
                (
54✔
180
                    hash,
54✔
181
                    Index32U8::parse_byte_slice(locale)
54✔
182
                        .expect("[u8] to IndexU32U8 should never fail"),
183
                    remap.get(old_id).expect("in-bound index"),
54✔
184
                )
185
            })
54✔
186
            .collect::<ZeroMap2d<DataKeyHash, Index32U8, usize>>();
187

188
        for key in self.all_keys.lock().expect("poison").iter() {
8✔
189
            if zm.get0(key).is_none() {
4✔
190
                zm.insert(key, Index32U8::SENTINEL, &vzv.len());
2✔
191
            }
192
        }
193

194
        if !zm.is_empty() {
4✔
195
            let blob = BlobSchema::V001(BlobSchemaV1 {
4✔
196
                keys: zm.as_borrowed(),
4✔
197
                buffers: &vzv,
4✔
198
            });
199
            log::info!("Serializing blob to output stream...");
4✔
200

201
            let output = postcard::to_allocvec(&blob)?;
4✔
202
            self.sink.write_all(&output)?;
4✔
203
        }
4✔
204
        Ok(())
8✔
205
    }
4✔
206

207
    fn close_v2(&mut self) -> Result<(), DataError> {
9✔
208
        let FinalizedBuffers { vzv, remap } = self.finalize_buffers();
9✔
209

210
        let all_keys = self.all_keys.lock().expect("poison");
9✔
211
        let resources = self.resources.lock().expect("poison");
9✔
212

213
        let keys: ZeroVec<DataKeyHash> = all_keys.iter().copied().collect();
9✔
214

215
        let locales_vec: Vec<Vec<u8>> = all_keys
9✔
216
            .iter()
217
            .map(|data_key_hash| resources.get(data_key_hash))
6✔
218
            .map(|option_sub_map| {
6✔
219
                if let Some(sub_map) = option_sub_map {
3✔
220
                    let mut sub_map = sub_map.clone();
1✔
221
                    sub_map
1✔
222
                        .iter_mut()
223
                        .for_each(|(_, id)| *id = *remap.get(id).expect("in-bound index"));
28✔
224
                    let zerotrie = ZeroTrieSimpleAscii::try_from(&sub_map).expect("in-bounds");
1✔
225
                    zerotrie.take_store()
1✔
226
                } else {
1✔
227
                    // Key with no locales: insert an empty ZeroTrie
228
                    ZeroTrieSimpleAscii::default().take_store()
2✔
229
                }
230
            })
3✔
231
            .collect();
232

233
        let locales_vzv: VarZeroVec<[u8]> = locales_vec.as_slice().into();
9✔
234

235
        if !keys.is_empty() {
3✔
236
            let blob = BlobSchema::V002(BlobSchemaV2 {
3✔
237
                keys: &keys,
3✔
238
                locales: &locales_vzv,
3✔
239
                buffers: &vzv,
3✔
240
            });
241
            log::info!("Serializing blob to output stream...");
3✔
242

243
            let output = postcard::to_allocvec(&blob)?;
3✔
244
            self.sink.write_all(&output)?;
3✔
245
        }
3✔
246
        Ok(())
6✔
247
    }
3✔
248
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc