• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 6815798908

09 Nov 2023 05:17PM CUT coverage: 72.607% (-2.4%) from 75.01%
6815798908

push

github

web-flow
Implement `Any/BufferProvider` for some smart pointers (#4255)

Allows storing them as a `Box<dyn Any/BufferProvider>` without using a
wrapper type that implements the trait.

44281 of 60987 relevant lines covered (72.61%)

201375.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.02
/provider/blob/src/blob_schema.rs
1
// This file is part of ICU4X. For terms of use, please see the file
1✔
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
use alloc::boxed::Box;
6
use icu_provider::prelude::*;
7
use serde::Deserialize;
8
use writeable::Writeable;
9
use zerotrie::ZeroTrieSimpleAscii;
10
use zerovec::maps::{ZeroMap2dBorrowed, ZeroMapKV};
11
use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec, ZeroSlice};
12

13
/// A versioned Serde schema for ICU4X data blobs.
14
#[derive(serde::Deserialize, yoke::Yokeable)]
143✔
15
#[yoke(prove_covariance_manually)]
16
#[cfg_attr(feature = "export", derive(serde::Serialize))]
7✔
17
#[derive(Debug, Clone)]
×
18
pub(crate) enum BlobSchema<'data> {
19
    #[serde(borrow)]
20
    V001(BlobSchemaV1<'data>),
7✔
21
    #[serde(borrow)]
22
    V002(BlobSchemaV2<'data>),
6✔
23
}
24

25
impl<'data> BlobSchema<'data> {
26
    pub fn deserialize_and_check<D: serde::Deserializer<'data>>(
13✔
27
        de: D,
28
    ) -> Result<BlobSchema<'data>, D::Error> {
29
        let blob = Self::deserialize(de)?;
13✔
30
        #[cfg(debug_assertions)]
31
        blob.check_invariants();
13✔
32
        Ok(blob)
13✔
33
    }
13✔
34

35
    pub fn load(&self, key: DataKey, req: DataRequest) -> Result<&'data [u8], DataError> {
84✔
36
        match self {
84✔
37
            BlobSchema::V001(s) => s.load(key, req),
51✔
38
            BlobSchema::V002(s) => s.load(key, req),
33✔
39
        }
40
    }
84✔
41

42
    #[cfg(debug_assertions)]
43
    fn check_invariants(&self) {
13✔
44
        match self {
13✔
45
            BlobSchema::V001(s) => s.check_invariants(),
7✔
46
            BlobSchema::V002(s) => s.check_invariants(),
6✔
47
        }
48
    }
13✔
49
}
50

51
/// Version 1 of the ICU4X data blob schema.
52
#[derive(Clone, Copy, Debug, serde::Deserialize, yoke::Yokeable)]
14✔
53
#[yoke(prove_covariance_manually)]
54
#[cfg_attr(feature = "export", derive(serde::Serialize))]
4✔
55
pub(crate) struct BlobSchemaV1<'data> {
56
    /// Map from key hash and locale to buffer index.
57
    /// Weak invariant: the `usize` values are valid indices into `self.buffers`
58
    /// Weak invariant: there is at least one value for every integer in 0..self.buffers.len()
59
    #[serde(borrow)]
60
    pub keys: ZeroMap2dBorrowed<'data, DataKeyHash, Index32U8, usize>,
×
61
    /// Vector of buffers
62
    #[serde(borrow)]
63
    pub buffers: &'data VarZeroSlice<[u8], Index32>,
×
64
}
65

66
impl Default for BlobSchemaV1<'_> {
67
    fn default() -> Self {
×
68
        Self {
×
69
            keys: ZeroMap2dBorrowed::new(),
×
70
            buffers: VarZeroSlice::new_empty(),
×
71
        }
72
    }
×
73
}
74

75
impl<'data> BlobSchemaV1<'data> {
76
    pub fn load(&self, key: DataKey, req: DataRequest) -> Result<&'data [u8], DataError> {
51✔
77
        let idx = self
204✔
78
            .keys
79
            .get0(&key.hashed())
51✔
80
            .ok_or(DataErrorKind::MissingDataKey)
51✔
81
            .and_then(|cursor| {
102✔
82
                if key.metadata().singleton && !req.locale.is_empty() {
51✔
83
                    return Err(DataErrorKind::ExtraneousLocale);
1✔
84
                }
85
                cursor
100✔
86
                    .get1_copied_by(|bytes| req.locale.strict_cmp(&bytes.0).reverse())
282✔
87
                    .ok_or(DataErrorKind::MissingLocale)
50✔
88
            })
51✔
89
            .map_err(|kind| kind.with_req(key, req))?;
56✔
90
        self.buffers
92✔
91
            .get(idx)
92
            .ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(key, req))
46✔
93
    }
51✔
94

95
    /// Verifies the weak invariants using debug assertions
96
    #[cfg(debug_assertions)]
97
    fn check_invariants(&self) {
7✔
98
        if self.keys.is_empty() && self.buffers.is_empty() {
7✔
99
            return;
100
        }
101
        // Note: We could check that every index occurs at least once, but that's a more expensive
102
        // operation, so we will just check for the min and max index.
103
        let mut seen_min = false;
7✔
104
        let mut seen_max = self.buffers.is_empty();
7✔
105
        for cursor in self.keys.iter0() {
35✔
106
            for (locale, idx) in cursor.iter1_copied() {
2,310✔
107
                debug_assert!(idx < self.buffers.len() || locale == Index32U8::SENTINEL);
2,282✔
108
                if idx == 0 {
2,282✔
109
                    seen_min = true;
7✔
110
                }
111
                if idx + 1 == self.buffers.len() {
2,282✔
112
                    seen_max = true;
5✔
113
                }
114
            }
115
        }
116
        debug_assert!(seen_min);
7✔
117
        debug_assert!(seen_max);
7✔
118
    }
7✔
119
}
120

121
/// Version 2 of the ICU4X data blob schema.
122
#[derive(Clone, Copy, Debug, serde::Deserialize, yoke::Yokeable)]
12✔
123
#[yoke(prove_covariance_manually)]
124
#[cfg_attr(feature = "export", derive(serde::Serialize))]
3✔
125
pub(crate) struct BlobSchemaV2<'data> {
126
    /// Map from key hash to locale trie.
127
    /// Weak invariant: should be sorted.
128
    #[serde(borrow)]
129
    pub keys: &'data ZeroSlice<DataKeyHash>,
×
130
    /// Map from locale to buffer index.
131
    /// Weak invariant: the `usize` values are valid indices into `self.buffers`
132
    /// Weak invariant: there is at least one value for every integer in 0..self.buffers.len()
133
    /// Weak invariant: keys and locales are the same length
134
    // TODO: Make ZeroTrieSimpleAscii<[u8]> work when in this position.
135
    #[serde(borrow)]
136
    pub locales: &'data VarZeroSlice<[u8]>,
×
137
    /// Vector of buffers
138
    #[serde(borrow)]
139
    pub buffers: &'data VarZeroSlice<[u8], Index32>,
×
140
}
141

142
impl Default for BlobSchemaV2<'_> {
143
    fn default() -> Self {
×
144
        Self {
×
145
            keys: ZeroSlice::new_empty(),
×
146
            locales: VarZeroSlice::new_empty(),
×
147
            buffers: VarZeroSlice::new_empty(),
×
148
        }
149
    }
×
150
}
151

152
impl<'data> BlobSchemaV2<'data> {
153
    pub fn load(&self, key: DataKey, req: DataRequest) -> Result<&'data [u8], DataError> {
33✔
154
        let key_index = self
99✔
155
            .keys
156
            .binary_search(&key.hashed())
33✔
157
            .ok()
158
            .ok_or_else(|| DataErrorKind::MissingDataKey.with_req(key, req))?;
33✔
159
        if key.metadata().singleton && !req.locale.is_empty() {
33✔
160
            return Err(DataErrorKind::ExtraneousLocale.with_req(key, req));
1✔
161
        }
162
        let zerotrie = self
64✔
163
            .locales
164
            .get(key_index)
165
            .ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(key, req))?;
32✔
166
        // TODO(#4249): Add a lookup function to zerotrie so we don't need to stringify
167
        let locale_str = req.locale.write_to_string();
32✔
168
        let blob_index = ZeroTrieSimpleAscii::from_store(zerotrie)
64✔
169
            .get(locale_str.as_bytes())
32✔
170
            .ok_or_else(|| DataErrorKind::MissingLocale.with_req(key, req))?;
36✔
171
        let buffer = self
30✔
172
            .buffers
173
            .get(blob_index)
174
            .ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(key, req))?;
30✔
175
        Ok(buffer)
30✔
176
    }
33✔
177

178
    /// Verifies the weak invariants using debug assertions
179
    #[cfg(debug_assertions)]
180
    fn check_invariants(&self) {
6✔
181
        if self.keys.is_empty() && self.locales.is_empty() && self.buffers.is_empty() {
6✔
182
            return;
183
        }
184
        debug_assert_eq!(self.keys.len(), self.locales.len());
18✔
185
        // Note: We could check that every index occurs at least once, but that's a more expensive
186
        // operation, so we will just check for the min and max index.
187
        let mut seen_min = self.buffers.is_empty();
18✔
188
        let mut seen_max = self.buffers.is_empty();
18✔
189
        for zerotrie in self.locales.iter() {
18✔
190
            for (_locale, idx) in ZeroTrieSimpleAscii::from_store(zerotrie).iter() {
114✔
191
                debug_assert!(idx < self.buffers.len());
108✔
192
                if idx == 0 {
108✔
193
                    seen_min = true;
4✔
194
                }
195
                if idx + 1 == self.buffers.len() {
108✔
196
                    seen_max = true;
4✔
197
                }
198
            }
108✔
199
        }
200
        debug_assert!(seen_min);
6✔
201
        debug_assert!(seen_max);
6✔
202
    }
18✔
203
}
204

205
/// This type lets us use a u32-index-format VarZeroVec with the ZeroMap2dBorrowed.
206
///
207
/// Eventually we will have a FormatSelector type that lets us do `ZeroMap<FormatSelector<K, Index32>, V>`
208
/// (https://github.com/unicode-org/icu4x/issues/2312)
209
///
210
/// IndexU32Borrowed isn't actually important; it's just more convenient to use make_varule to get the
211
/// full suite of traits instead of `#[derive(VarULE)]`. (With `#[derive(VarULE)]` we would have to manually
212
/// define a Serialize implementation, and that would be gnarly)
213
/// https://github.com/unicode-org/icu4x/issues/2310 tracks being able to do this with derive(ULE)
214
#[zerovec::make_varule(Index32U8)]
22,312✔
215
#[zerovec::derive(Debug)]
216
#[zerovec::skip_derive(ZeroMapKV)]
217
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, serde::Deserialize)]
6,116✔
218
#[zerovec::derive(Deserialize)]
219
#[cfg_attr(feature = "export", derive(serde::Serialize))]
×
220
#[cfg_attr(feature = "export", zerovec::derive(Serialize))]
221
pub(crate) struct Index32U8Borrowed<'a>(
222
    #[cfg_attr(feature = "export", serde(borrow))] pub &'a [u8],
3,058✔
223
);
224

225
impl<'a> ZeroMapKV<'a> for Index32U8 {
226
    type Container = VarZeroVec<'a, Index32U8, Index32>;
227
    type Slice = VarZeroSlice<Index32U8, Index32>;
228
    type GetType = Index32U8;
229
    type OwnedType = Box<Index32U8>;
230
}
231

232
impl Index32U8 {
233
    #[allow(dead_code)]
234
    pub(crate) const SENTINEL: &'static Self = unsafe { &*(&[] as *const [u8] as *const Self) };
235
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc