• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 6815798908

09 Nov 2023 05:17PM CUT coverage: 72.607% (-2.4%) from 75.01%
6815798908

push

github

web-flow
Implement `Any/BufferProvider` for some smart pointers (#4255)

Allows storing them as a `Box<dyn Any/BufferProvider>` without using a
wrapper type that implements the trait.

44281 of 60987 relevant lines covered (72.61%)

201375.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.62
/components/collections/src/codepointinvliststringlist/mod.rs
1
// This file is part of ICU4X. For terms of use, please see the file
2✔
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
//! This module provides functionality for querying of sets of Unicode code points and strings.
6
//!
7
//! It depends on [`CodePointInversionList`] to efficiently represent Unicode code points, while
8
//! it also maintains a list of strings in the set.
9
//!
10
//! It is an implementation of the existing [ICU4C UnicodeSet API](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1UnicodeSet.html).
11

12
use crate::codepointinvlist::{
13
    CodePointInversionList, CodePointInversionListBuilder, CodePointInversionListError,
14
    CodePointInversionListULE,
15
};
16
use alloc::string::{String, ToString};
17
use alloc::vec::Vec;
18
use displaydoc::Display;
19
use yoke::Yokeable;
20
use zerofrom::ZeroFrom;
21
use zerovec::{VarZeroSlice, VarZeroVec};
22

23
/// A data structure providing a concrete implementation of a `UnicodeSet`
24
/// (which represents a set of code points and strings) using an inversion list for the code points and a simple
25
/// list-like structure to store and iterate over the strings.
26
#[zerovec::make_varule(CodePointInversionListAndStringListULE)]
961,066✔
27
#[zerovec::skip_derive(Ord)]
28
#[zerovec::derive(Debug)]
29
#[derive(Debug, Eq, PartialEq, Clone, Yokeable, ZeroFrom)]
86✔
30
// Valid to auto-derive Deserialize because the invariants are weakly held
31
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
×
32
#[cfg_attr(feature = "serde", zerovec::derive(Serialize, Deserialize, Debug))]
33
pub struct CodePointInversionListAndStringList<'data> {
34
    #[cfg_attr(feature = "serde", serde(borrow))]
35
    #[zerovec::varule(CodePointInversionListULE)]
36
    cp_inv_list: CodePointInversionList<'data>,
83✔
37
    // Invariants (weakly held):
38
    //   - no input string is length 1 (a length 1 string should be a single code point)
39
    //   - the string list is sorted
40
    //   - the elements in the string list are unique
41
    #[cfg_attr(feature = "serde", serde(borrow))]
42
    str_list: VarZeroVec<'data, str>,
83✔
43
}
44

45
#[cfg(feature = "databake")]
46
impl databake::Bake for CodePointInversionListAndStringList<'_> {
47
    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
×
48
        env.insert("icu_collections");
×
49
        let cp_inv_list = self.cp_inv_list.bake(env);
×
50
        let str_list = self.str_list.bake(env);
×
51
        // Safe because our parts are safe.
52
        databake::quote! {
×
53
            icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList::from_parts_unchecked(#cp_inv_list, #str_list)
54
        }
55
    }
×
56
}
57

58
impl<'data> CodePointInversionListAndStringList<'data> {
59
    /// Returns a new [`CodePointInversionListAndStringList`] from both a [`CodePointInversionList`] for the
60
    /// code points and a [`VarZeroVec`]`<`[`str`]`>` of strings.
61
    pub fn try_from(
589✔
62
        cp_inv_list: CodePointInversionList<'data>,
63
        str_list: VarZeroVec<'data, str>,
64
    ) -> Result<Self, CodePointInversionListAndStringListError> {
65
        // Verify invariants:
66
        // Do so by using the equivalent of str_list.iter().windows(2) to get
67
        // overlapping windows of size 2. The above putative code is not possible
68
        // because `.windows()` exists on a slice, but VarZeroVec cannot return a slice
69
        // because the non-fixed size elements necessitate at least some type
70
        // of allocation.
71
        {
589✔
72
            let mut it = str_list.iter();
589✔
73
            if let Some(mut x) = it.next() {
464✔
74
                if x.len() == 1 {
65✔
75
                    return Err(
1✔
76
                        CodePointInversionListAndStringListError::InvalidStringLength(
1✔
77
                            x.to_string(),
1✔
78
                        ),
79
                    );
80
                }
81
                for y in it {
294✔
82
                    if x.len() == 1 {
230✔
83
                        return Err(
×
84
                            CodePointInversionListAndStringListError::InvalidStringLength(
×
85
                                x.to_string(),
×
86
                            ),
87
                        );
88
                    } else if x == y {
230✔
89
                        return Err(
1✔
90
                            CodePointInversionListAndStringListError::StringListNotUnique(
1✔
91
                                x.to_string(),
1✔
92
                            ),
93
                        );
94
                    } else if x > y {
229✔
95
                        return Err(
1✔
96
                            CodePointInversionListAndStringListError::StringListNotSorted(
1✔
97
                                x.to_string(),
1✔
98
                                y.to_string(),
1✔
99
                            ),
×
100
                        );
101
                    }
102

103
                    // Next window begins. Update `x` here, `y` will be updated in next loop iteration.
104
                    x = y;
228✔
105
                }
106
            }
107
        }
460✔
108

109
        Ok(CodePointInversionListAndStringList {
460✔
110
            cp_inv_list,
460✔
111
            str_list,
460✔
112
        })
113
    }
587✔
114

115
    #[doc(hidden)]
116
    pub const fn from_parts_unchecked(
×
117
        cp_inv_list: CodePointInversionList<'data>,
118
        str_list: VarZeroVec<'data, str>,
119
    ) -> Self {
120
        CodePointInversionListAndStringList {
×
121
            cp_inv_list,
×
122
            str_list,
×
123
        }
124
    }
×
125

126
    /// Returns the number of elements in this set (its cardinality).
127
    /// Note than the elements of a set may include both individual
128
    /// codepoints and strings.
129
    pub fn size(&self) -> usize {
147✔
130
        self.cp_inv_list.size() + self.str_list.len()
147✔
131
    }
147✔
132

133
    /// Return true if this set contains multi-code point strings or the empty string.
134
    pub fn has_strings(&self) -> bool {
70✔
135
        !self.str_list.is_empty()
70✔
136
    }
70✔
137

138
    ///
139
    /// # Examples
140
    /// ```
141
    /// use icu_collections::codepointinvlist::CodePointInversionList;
142
    /// use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
143
    /// use zerovec::VarZeroVec;
144
    ///
145
    /// let cp_slice = &[0, 0x1_0000, 0x10_FFFF, 0x11_0000];
146
    /// let cp_list =
147
    ///    CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
148
    /// let str_slice = &["", "bmp_max", "unicode_max", "zero"];
149
    /// let str_list = VarZeroVec::<str>::from(str_slice);
150
    ///
151
    /// let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
152
    ///
153
    /// assert!(cpilsl.contains("bmp_max"));
154
    /// assert!(cpilsl.contains(""));
155
    /// assert!(cpilsl.contains("A"));
156
    /// assert!(cpilsl.contains("ቔ"));  // U+1254 ETHIOPIC SYLLABLE QHEE
157
    /// assert!(!cpilsl.contains("bazinga!"));
158
    /// ```
159
    pub fn contains(&self, s: &str) -> bool {
15,897✔
160
        let mut chars = s.chars();
15,897✔
161
        if let Some(first_char) = chars.next() {
15,897✔
162
            if chars.next().is_none() {
7,972✔
163
                return self.contains_char(first_char);
7,938✔
164
            }
165
        }
166
        self.str_list.binary_search(s).is_ok()
7,959✔
167
    }
15,897✔
168

169
    ///
170
    /// # Examples
171
    /// ```
172
    /// use icu_collections::codepointinvlist::CodePointInversionList;
173
    /// use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
174
    /// use zerovec::VarZeroVec;
175
    ///
176
    /// let cp_slice = &[0, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
177
    /// let cp_list =
178
    ///     CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
179
    /// let str_slice = &["", "ascii_max", "bmp_max", "unicode_max", "zero"];
180
    /// let str_list = VarZeroVec::<str>::from(str_slice);
181
    ///
182
    /// let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
183
    ///
184
    /// assert!(cpilsl.contains32(0));
185
    /// assert!(cpilsl.contains32(0x0042));
186
    /// assert!(!cpilsl.contains32(0x0080));
187
    /// ```
188
    pub fn contains32(&self, cp: u32) -> bool {
240,279✔
189
        self.cp_inv_list.contains32(cp)
240,279✔
190
    }
240,279✔
191

192
    ///
193
    /// # Examples
194
    /// ```
195
    /// use icu_collections::codepointinvlist::CodePointInversionList;
196
    /// use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
197
    /// use zerovec::VarZeroVec;
198
    ///
199
    /// let cp_slice = &[0, 0x1_0000, 0x10_FFFF, 0x11_0000];
200
    /// let cp_list =
201
    ///    CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
202
    /// let str_slice = &["", "bmp_max", "unicode_max", "zero"];
203
    /// let str_list = VarZeroVec::<str>::from(str_slice);
204
    ///
205
    /// let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
206
    ///
207
    /// assert!(cpilsl.contains_char('A'));
208
    /// assert!(cpilsl.contains_char('ቔ'));  // U+1254 ETHIOPIC SYLLABLE QHEE
209
    /// assert!(!cpilsl.contains_char('\u{1_0000}'));
210
    /// assert!(!cpilsl.contains_char('🨫'));  // U+1FA2B NEUTRAL CHESS TURNED QUEEN
211
    pub fn contains_char(&self, ch: char) -> bool {
240,274✔
212
        self.contains32(ch as u32)
240,274✔
213
    }
240,274✔
214

215
    /// Access the underlying [`CodePointInversionList`].
216
    pub fn code_points(&self) -> &CodePointInversionList<'data> {
515✔
217
        &self.cp_inv_list
218
    }
515✔
219

220
    /// Access the contained strings.
221
    pub fn strings(&self) -> &VarZeroSlice<str> {
232,472✔
222
        &self.str_list
232,472✔
223
    }
232,472✔
224
}
225

226
impl<'a> FromIterator<&'a str> for CodePointInversionListAndStringList<'_> {
227
    fn from_iter<I>(it: I) -> Self
4✔
228
    where
229
        I: IntoIterator<Item = &'a str>,
230
    {
231
        let mut builder = CodePointInversionListBuilder::new();
4✔
232
        let mut strings = Vec::<&str>::new();
4✔
233
        for s in it {
64✔
234
            let mut chars = s.chars();
60✔
235
            if let Some(first_char) = chars.next() {
60✔
236
                if chars.next().is_none() {
60✔
237
                    builder.add_char(first_char);
54✔
238
                    continue;
239
                }
240
            }
241
            strings.push(s);
6✔
242
        }
243

244
        // Ensure that the string list is sorted. If not, the binary search that
245
        // is used for `.contains(&str)` will return garbase otuput.
246
        strings.sort_unstable();
4✔
247
        strings.dedup();
4✔
248

249
        let cp_inv_list = builder.build();
4✔
250
        let str_list = VarZeroVec::<str>::from(&strings);
4✔
251

252
        CodePointInversionListAndStringList {
4✔
253
            cp_inv_list,
4✔
254
            str_list,
4✔
255
        }
256
    }
4✔
257
}
258

259
/// Custom Errors for [`CodePointInversionListAndStringList`].
260
///
261
/// Re-exported as [`Error`].
262
#[derive(Display, Debug)]
×
263
pub enum CodePointInversionListAndStringListError {
264
    /// An invalid CodePointInversionList was constructed
265
    #[displaydoc("Invalid code point inversion list: {0:?}")]
266
    InvalidCodePointInversionList(CodePointInversionListError),
×
267
    /// A string in the string list had an invalid length
268
    #[displaydoc("Invalid string length for string: {0}")]
×
269
    InvalidStringLength(String),
×
270
    /// A string in the string list appears more than once
271
    #[displaydoc("String list has duplicate: {0}")]
×
272
    StringListNotUnique(String),
×
273
    /// Two strings in the string list compare to each other opposite of sorted order
274
    #[displaydoc("Strings in string list not in sorted order: ({0}, {1})")]
×
275
    StringListNotSorted(String, String),
×
276
}
277

278
#[doc(no_inline)]
279
pub use CodePointInversionListAndStringListError as Error;
280

281
#[cfg(test)]
282
mod tests {
283
    use super::*;
284

285
    #[test]
286
    fn test_size_has_strings() {
2✔
287
        let cp_slice = &[0, 1, 0x7F, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
1✔
288
        let cp_list =
289
            CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
1✔
290
        let str_slice = &["ascii_max", "bmp_max", "unicode_max", "zero"];
1✔
291
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
292

293
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
1✔
294

295
        assert!(cpilsl.has_strings());
1✔
296
        assert_eq!(8, cpilsl.size());
1✔
297
    }
2✔
298

299
    #[test]
300
    fn test_empty_string_allowed() {
2✔
301
        let cp_slice = &[0, 1, 0x7F, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
1✔
302
        let cp_list =
303
            CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
1✔
304
        let str_slice = &["", "ascii_max", "bmp_max", "unicode_max", "zero"];
1✔
305
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
306

307
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
1✔
308

309
        assert!(cpilsl.has_strings());
1✔
310
        assert_eq!(9, cpilsl.size());
1✔
311
    }
2✔
312

313
    #[test]
314
    fn test_invalid_string() {
2✔
315
        let cp_slice = &[0, 1];
1✔
316
        let cp_list =
317
            CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
1✔
318
        let str_slice = &["a"];
1✔
319
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
320

321
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
1✔
322

323
        assert!(matches!(
1✔
324
            cpilsl,
1✔
325
            Err(CodePointInversionListAndStringListError::InvalidStringLength(_))
326
        ));
327
    }
2✔
328

329
    #[test]
330
    fn test_invalid_string_list_has_duplicate() {
2✔
331
        let cp_slice = &[0, 1];
1✔
332
        let cp_list =
333
            CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
1✔
334
        let str_slice = &["abc", "abc"];
1✔
335
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
336

337
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
1✔
338

339
        assert!(matches!(
1✔
340
            cpilsl,
1✔
341
            Err(CodePointInversionListAndStringListError::StringListNotUnique(_))
342
        ));
343
    }
2✔
344

345
    #[test]
346
    fn test_invalid_string_list_not_sorted() {
2✔
347
        let cp_slice = &[0, 1];
1✔
348
        let cp_list =
349
            CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
1✔
350
        let str_slice = &["xyz", "abc"];
1✔
351
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
352

353
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
1✔
354

355
        assert!(matches!(
1✔
356
            cpilsl,
1✔
357
            Err(CodePointInversionListAndStringListError::StringListNotSorted(_, _))
358
        ));
359
    }
2✔
360

361
    #[test]
362
    fn test_from_iter_invariants() {
2✔
363
        let in_strs_1 = ["a", "abc", "xyz", "abc"];
1✔
364
        let in_strs_2 = ["xyz", "abc", "a", "abc"];
1✔
365

366
        let cpilsl_1 = CodePointInversionListAndStringList::from_iter(in_strs_1);
1✔
367
        let cpilsl_2 = CodePointInversionListAndStringList::from_iter(in_strs_2);
1✔
368

369
        assert_eq!(cpilsl_1, cpilsl_2);
1✔
370

371
        assert!(cpilsl_1.has_strings());
1✔
372
        assert!(cpilsl_1.contains("abc"));
1✔
373
        assert!(cpilsl_1.contains("xyz"));
1✔
374
        assert!(!cpilsl_1.contains("def"));
1✔
375

376
        assert_eq!(1, cpilsl_1.cp_inv_list.size());
1✔
377
        assert!(cpilsl_1.contains_char('a'));
1✔
378
        assert!(!cpilsl_1.contains_char('0'));
1✔
379
        assert!(!cpilsl_1.contains_char('q'));
1✔
380

381
        assert_eq!(3, cpilsl_1.size());
1✔
382
    }
2✔
383
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc