• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 13958601093

19 Mar 2025 04:17PM UTC coverage: 74.164% (-1.5%) from 75.71%
13958601093

push

github

web-flow
Clean up properties docs (#6315)

58056 of 78281 relevant lines covered (74.16%)

819371.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.77
/components/collections/src/codepointinvliststringlist/mod.rs
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
//! This module provides functionality for querying of sets of Unicode code points and strings.
6
//!
7
//! It depends on [`CodePointInversionList`] to efficiently represent Unicode code points, while
8
//! it also maintains a list of strings in the set.
9
//!
10
//! It is an implementation of the existing [ICU4C UnicodeSet API](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1UnicodeSet.html).
11

12
#[cfg(feature = "alloc")]
13
use crate::codepointinvlist::CodePointInversionListBuilder;
14
use crate::codepointinvlist::{CodePointInversionList, CodePointInversionListULE};
15
#[cfg(feature = "alloc")]
16
use alloc::string::{String, ToString};
17
#[cfg(feature = "alloc")]
18
use alloc::vec::Vec;
19
use displaydoc::Display;
20
use yoke::Yokeable;
21
use zerofrom::ZeroFrom;
22
use zerovec::{VarZeroSlice, VarZeroVec};
23

24
/// A data structure providing a concrete implementation of a set of code points and strings,
25
/// using an inversion list for the code points.
26
///
27
/// This is what ICU4C calls a `UnicodeSet`.
28
#[zerovec::make_varule(CodePointInversionListAndStringListULE)]
963,185✔
29
#[zerovec::skip_derive(Ord)]
30
#[zerovec::derive(Debug)]
31
#[derive(Debug, Eq, PartialEq, Clone, Yokeable, ZeroFrom)]
409✔
32
#[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV, ToOwned))]
33
// Valid to auto-derive Deserialize because the invariants are weakly held
34
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
273✔
35
#[cfg_attr(feature = "serde", zerovec::derive(Serialize, Deserialize, Debug))]
36
pub struct CodePointInversionListAndStringList<'data> {
37
    #[cfg_attr(feature = "serde", serde(borrow))]
38
    #[zerovec::varule(CodePointInversionListULE)]
39
    cp_inv_list: CodePointInversionList<'data>,
316✔
40
    // Invariants (weakly held):
41
    //   - no input string is length 1 (a length 1 string should be a single code point)
42
    //   - the string list is sorted
43
    //   - the elements in the string list are unique
44
    #[cfg_attr(feature = "serde", serde(borrow))]
45
    str_list: VarZeroVec<'data, str>,
316✔
46
}
47

48
#[cfg(feature = "databake")]
49
impl databake::Bake for CodePointInversionListAndStringList<'_> {
50
    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
×
51
        env.insert("icu_collections");
×
52
        let cp_inv_list = self.cp_inv_list.bake(env);
×
53
        let str_list = self.str_list.bake(env);
×
54
        // Safe because our parts are safe.
55
        databake::quote! {
×
56
            icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList::from_parts_unchecked(#cp_inv_list, #str_list)
57
        }
58
    }
×
59
}
60

61
#[cfg(feature = "databake")]
62
impl databake::BakeSize for CodePointInversionListAndStringList<'_> {
63
    fn borrows_size(&self) -> usize {
×
64
        self.cp_inv_list.borrows_size() + self.str_list.borrows_size()
×
65
    }
×
66
}
67

68
impl<'data> CodePointInversionListAndStringList<'data> {
69
    /// Returns a new [`CodePointInversionListAndStringList`] from both a [`CodePointInversionList`] for the
70
    /// code points and a [`VarZeroVec`]`<`[`str`]`>` of strings.
71
    pub fn try_from(
645✔
72
        cp_inv_list: CodePointInversionList<'data>,
73
        str_list: VarZeroVec<'data, str>,
74
    ) -> Result<Self, InvalidStringList> {
75
        // Verify invariants:
76
        // Do so by using the equivalent of str_list.iter().windows(2) to get
77
        // overlapping windows of size 2. The above putative code is not possible
78
        // because `.windows()` exists on a slice, but VarZeroVec cannot return a slice
79
        // because the non-fixed size elements necessitate at least some type
80
        // of allocation.
81
        {
82
            let mut it = str_list.iter();
645✔
83
            if let Some(mut x) = it.next() {
644✔
84
                if x.len() == 1 {
68✔
85
                    return Err(InvalidStringList::InvalidStringLength(
1✔
86
                        #[cfg(feature = "alloc")]
87
                        x.to_string(),
1✔
88
                    ));
89
                }
90
                for y in it {
501✔
91
                    if x.len() == 1 {
436✔
92
                        return Err(InvalidStringList::InvalidStringLength(
×
93
                            #[cfg(feature = "alloc")]
94
                            x.to_string(),
×
95
                        ));
96
                    } else if x == y {
436✔
97
                        return Err(InvalidStringList::StringListNotUnique(
1✔
98
                            #[cfg(feature = "alloc")]
99
                            x.to_string(),
1✔
100
                        ));
101
                    } else if x > y {
435✔
102
                        return Err(InvalidStringList::StringListNotSorted(
1✔
103
                            #[cfg(feature = "alloc")]
104
                            x.to_string(),
1✔
105
                            #[cfg(feature = "alloc")]
106
                            y.to_string(),
1✔
107
                        ));
×
108
                    }
109

110
                    // Next window begins. Update `x` here, `y` will be updated in next loop iteration.
111
                    x = y;
434✔
112
                }
113
            }
114
        }
115

116
        Ok(CodePointInversionListAndStringList {
640✔
117
            cp_inv_list,
640✔
118
            str_list,
640✔
119
        })
120
    }
643✔
121

122
    #[doc(hidden)] // databake internal
123
    pub const fn from_parts_unchecked(
×
124
        cp_inv_list: CodePointInversionList<'data>,
125
        str_list: VarZeroVec<'data, str>,
126
    ) -> Self {
127
        CodePointInversionListAndStringList {
×
128
            cp_inv_list,
129
            str_list,
130
        }
131
    }
×
132

133
    /// Returns the number of elements in this set (its cardinality).
134
    /// Note than the elements of a set may include both individual
135
    /// codepoints and strings.
136
    pub fn size(&self) -> usize {
147✔
137
        self.cp_inv_list.size() + self.str_list.len()
147✔
138
    }
147✔
139

140
    /// Return true if this set contains multi-code point strings or the empty string.
141
    pub fn has_strings(&self) -> bool {
80✔
142
        !self.str_list.is_empty()
80✔
143
    }
80✔
144

145
    ///
146
    /// # Examples
147
    /// ```
148
    /// use icu::collections::codepointinvlist::CodePointInversionList;
149
    /// use icu::collections::codepointinvliststringlist::CodePointInversionListAndStringList;
150
    /// use zerovec::VarZeroVec;
151
    ///
152
    /// let cp_slice = &[0, 0x1_0000, 0x10_FFFF, 0x11_0000];
153
    /// let cp_list =
154
    ///    CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
155
    /// let str_slice = &["", "bmp_max", "unicode_max", "zero"];
156
    /// let str_list = VarZeroVec::<str>::from(str_slice);
157
    ///
158
    /// let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
159
    ///
160
    /// assert!(cpilsl.contains_str("bmp_max"));
161
    /// assert!(cpilsl.contains_str(""));
162
    /// assert!(cpilsl.contains_str("A"));
163
    /// assert!(cpilsl.contains_str("ቔ"));  // U+1254 ETHIOPIC SYLLABLE QHEE
164
    /// assert!(!cpilsl.contains_str("bazinga!"));
165
    /// ```
166
    pub fn contains_str(&self, s: &str) -> bool {
15,915✔
167
        let mut chars = s.chars();
15,915✔
168
        if let Some(first_char) = chars.next() {
15,915✔
169
            if chars.next().is_none() {
7,987✔
170
                return self.contains(first_char);
7,950✔
171
            }
172
        }
173
        self.str_list.binary_search(s).is_ok()
7,965✔
174
    }
15,915✔
175

176
    ///
177
    /// # Examples
178
    /// ```
179
    /// use icu::collections::codepointinvlist::CodePointInversionList;
180
    /// use icu::collections::codepointinvliststringlist::CodePointInversionListAndStringList;
181
    /// use zerovec::VarZeroVec;
182
    ///
183
    /// let cp_slice = &[0, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
184
    /// let cp_list =
185
    ///     CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
186
    /// let str_slice = &["", "ascii_max", "bmp_max", "unicode_max", "zero"];
187
    /// let str_list = VarZeroVec::<str>::from(str_slice);
188
    ///
189
    /// let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
190
    ///
191
    /// assert!(cpilsl.contains32(0));
192
    /// assert!(cpilsl.contains32(0x0042));
193
    /// assert!(!cpilsl.contains32(0x0080));
194
    /// ```
195
    pub fn contains32(&self, cp: u32) -> bool {
240,664✔
196
        self.cp_inv_list.contains32(cp)
240,664✔
197
    }
240,664✔
198

199
    ///
200
    /// # Examples
201
    /// ```
202
    /// use icu::collections::codepointinvlist::CodePointInversionList;
203
    /// use icu::collections::codepointinvliststringlist::CodePointInversionListAndStringList;
204
    /// use zerovec::VarZeroVec;
205
    ///
206
    /// let cp_slice = &[0, 0x1_0000, 0x10_FFFF, 0x11_0000];
207
    /// let cp_list =
208
    ///    CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
209
    /// let str_slice = &["", "bmp_max", "unicode_max", "zero"];
210
    /// let str_list = VarZeroVec::<str>::from(str_slice);
211
    ///
212
    /// let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
213
    ///
214
    /// assert!(cpilsl.contains('A'));
215
    /// assert!(cpilsl.contains('ቔ'));  // U+1254 ETHIOPIC SYLLABLE QHEE
216
    /// assert!(!cpilsl.contains('\u{1_0000}'));
217
    /// assert!(!cpilsl.contains('🨫'));  // U+1FA2B NEUTRAL CHESS TURNED QUEEN
218
    pub fn contains(&self, ch: char) -> bool {
240,660✔
219
        self.contains32(ch as u32)
240,660✔
220
    }
240,660✔
221

222
    /// Access the underlying [`CodePointInversionList`].
223
    pub fn code_points(&self) -> &CodePointInversionList<'data> {
568✔
224
        &self.cp_inv_list
568✔
225
    }
568✔
226

227
    /// Access the contained strings.
228
    pub fn strings(&self) -> &VarZeroSlice<str> {
232,870✔
229
        &self.str_list
232,870✔
230
    }
232,870✔
231
}
232

233
#[cfg(feature = "alloc")]
234
impl<'a> FromIterator<&'a str> for CodePointInversionListAndStringList<'_> {
235
    fn from_iter<I>(it: I) -> Self
94✔
236
    where
237
        I: IntoIterator<Item = &'a str>,
238
    {
239
        let mut builder = CodePointInversionListBuilder::new();
94✔
240
        let mut strings = Vec::<&str>::new();
94✔
241
        for s in it {
4,921✔
242
            let mut chars = s.chars();
4,827✔
243
            if let Some(first_char) = chars.next() {
4,827✔
244
                if chars.next().is_none() {
4,827✔
245
                    builder.add_char(first_char);
4,794✔
246
                    continue;
247
                }
248
            }
249
            strings.push(s);
33✔
250
        }
94✔
251

252
        // Ensure that the string list is sorted. If not, the binary search that
253
        // is used for `.contains(&str)` will return garbage output.
254
        strings.sort_unstable();
94✔
255
        strings.dedup();
94✔
256

257
        let cp_inv_list = builder.build();
94✔
258
        let str_list = VarZeroVec::<str>::from(&strings);
94✔
259

260
        CodePointInversionListAndStringList {
94✔
261
            cp_inv_list,
94✔
262
            str_list,
263
        }
264
    }
94✔
265
}
266

267
/// Custom Errors for [`CodePointInversionListAndStringList`].
268
#[derive(Display, Debug)]
×
269
pub enum InvalidStringList {
270
    /// A string in the string list had an invalid length
271
    #[cfg_attr(feature = "alloc", displaydoc("Invalid string length for string: {0}"))]
×
272
    InvalidStringLength(#[cfg(feature = "alloc")] String),
×
273
    /// A string in the string list appears more than once
274
    #[cfg_attr(feature = "alloc", displaydoc("String list has duplicate: {0}"))]
×
275
    StringListNotUnique(#[cfg(feature = "alloc")] String),
×
276
    /// Two strings in the string list compare to each other opposite of sorted order
277
    #[cfg_attr(
278
        feature = "alloc",
279
        displaydoc("Strings in string list not in sorted order: ({0}, {1})")
×
280
    )]
281
    StringListNotSorted(
282
        #[cfg(feature = "alloc")] String,
×
283
        #[cfg(feature = "alloc")] String,
×
284
    ),
285
}
286

287
#[cfg(test)]
288
mod tests {
289
    use super::*;
290

291
    #[test]
292
    fn test_size_has_strings() {
2✔
293
        let cp_slice = &[0, 1, 0x7F, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
1✔
294
        let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
1✔
295
        let str_slice = &["ascii_max", "bmp_max", "unicode_max", "zero"];
1✔
296
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
297

298
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
1✔
299

300
        assert!(cpilsl.has_strings());
1✔
301
        assert_eq!(8, cpilsl.size());
1✔
302
    }
2✔
303

304
    #[test]
305
    fn test_empty_string_allowed() {
2✔
306
        let cp_slice = &[0, 1, 0x7F, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
1✔
307
        let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
1✔
308
        let str_slice = &["", "ascii_max", "bmp_max", "unicode_max", "zero"];
1✔
309
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
310

311
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
1✔
312

313
        assert!(cpilsl.has_strings());
1✔
314
        assert_eq!(9, cpilsl.size());
1✔
315
    }
2✔
316

317
    #[test]
318
    fn test_invalid_string() {
2✔
319
        let cp_slice = &[0, 1];
1✔
320
        let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
1✔
321
        let str_slice = &["a"];
1✔
322
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
323

324
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
1✔
325

326
        assert!(matches!(
1✔
327
            cpilsl,
1✔
328
            Err(InvalidStringList::InvalidStringLength(_))
329
        ));
330
    }
2✔
331

332
    #[test]
333
    fn test_invalid_string_list_has_duplicate() {
2✔
334
        let cp_slice = &[0, 1];
1✔
335
        let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
1✔
336
        let str_slice = &["abc", "abc"];
1✔
337
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
338

339
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
1✔
340

341
        assert!(matches!(
1✔
342
            cpilsl,
1✔
343
            Err(InvalidStringList::StringListNotUnique(_))
344
        ));
345
    }
2✔
346

347
    #[test]
348
    fn test_invalid_string_list_not_sorted() {
2✔
349
        let cp_slice = &[0, 1];
1✔
350
        let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
1✔
351
        let str_slice = &["xyz", "abc"];
1✔
352
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
353

354
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
1✔
355

356
        assert!(matches!(
1✔
357
            cpilsl,
1✔
358
            Err(InvalidStringList::StringListNotSorted(_, _))
359
        ));
360
    }
2✔
361

362
    #[test]
363
    fn test_from_iter_invariants() {
2✔
364
        let in_strs_1 = ["a", "abc", "xyz", "abc"];
1✔
365
        let in_strs_2 = ["xyz", "abc", "a", "abc"];
1✔
366

367
        let cpilsl_1 = CodePointInversionListAndStringList::from_iter(in_strs_1);
1✔
368
        let cpilsl_2 = CodePointInversionListAndStringList::from_iter(in_strs_2);
1✔
369

370
        assert_eq!(cpilsl_1, cpilsl_2);
1✔
371

372
        assert!(cpilsl_1.has_strings());
1✔
373
        assert!(cpilsl_1.contains_str("abc"));
1✔
374
        assert!(cpilsl_1.contains_str("xyz"));
1✔
375
        assert!(!cpilsl_1.contains_str("def"));
1✔
376

377
        assert_eq!(1, cpilsl_1.cp_inv_list.size());
1✔
378
        assert!(cpilsl_1.contains('a'));
1✔
379
        assert!(!cpilsl_1.contains('0'));
1✔
380
        assert!(!cpilsl_1.contains('q'));
1✔
381

382
        assert_eq!(3, cpilsl_1.size());
1✔
383
    }
2✔
384
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc