• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 11904027177

19 Nov 2024 12:33AM UTC coverage: 75.477% (+0.3%) from 75.174%
11904027177

push

github

web-flow
Move DateTimePattern into pattern module (#5834)

#1317

Also removes `NeoNeverMarker` and fixes #5689

258 of 319 new or added lines in 6 files covered. (80.88%)

6967 existing lines in 278 files now uncovered.

54522 of 72237 relevant lines covered (75.48%)

655305.49 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.71
/components/collections/src/codepointinvliststringlist/mod.rs
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
//! This module provides functionality for querying of sets of Unicode code points and strings.
6
//!
7
//! It depends on [`CodePointInversionList`] to efficiently represent Unicode code points, while
8
//! it also maintains a list of strings in the set.
9
//!
10
//! It is an implementation of the existing [ICU4C UnicodeSet API](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1UnicodeSet.html).
11

12
use crate::codepointinvlist::{
13
    CodePointInversionList, CodePointInversionListBuilder, CodePointInversionListULE,
14
};
15
use alloc::string::{String, ToString};
16
use alloc::vec::Vec;
17
use displaydoc::Display;
18
use yoke::Yokeable;
19
use zerofrom::ZeroFrom;
20
use zerovec::{VarZeroSlice, VarZeroVec};
21

22
/// A data structure providing a concrete implementation of a set of code points and strings,
23
/// using an inversion list for the code points.
24
///
25
/// This is what ICU4C calls a `UnicodeSet`.
26
#[zerovec::make_varule(CodePointInversionListAndStringListULE)]
963,185✔
27
#[zerovec::skip_derive(Ord)]
28
#[zerovec::derive(Debug)]
29
#[derive(Debug, Eq, PartialEq, Clone, Yokeable, ZeroFrom)]
409✔
30
// Valid to auto-derive Deserialize because the invariants are weakly held
31
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
273✔
32
#[cfg_attr(feature = "serde", zerovec::derive(Serialize, Deserialize, Debug))]
33
pub struct CodePointInversionListAndStringList<'data> {
34
    #[cfg_attr(feature = "serde", serde(borrow))]
35
    #[zerovec::varule(CodePointInversionListULE)]
36
    cp_inv_list: CodePointInversionList<'data>,
316✔
37
    // Invariants (weakly held):
38
    //   - no input string is length 1 (a length 1 string should be a single code point)
39
    //   - the string list is sorted
40
    //   - the elements in the string list are unique
41
    #[cfg_attr(feature = "serde", serde(borrow))]
42
    str_list: VarZeroVec<'data, str>,
316✔
43
}
44

45
#[cfg(feature = "databake")]
46
impl databake::Bake for CodePointInversionListAndStringList<'_> {
47
    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
×
48
        env.insert("icu_collections");
×
49
        let cp_inv_list = self.cp_inv_list.bake(env);
×
UNCOV
50
        let str_list = self.str_list.bake(env);
×
51
        // Safe because our parts are safe.
UNCOV
52
        databake::quote! {
×
53
            icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList::from_parts_unchecked(#cp_inv_list, #str_list)
54
        }
UNCOV
55
    }
×
56
}
57

58
#[cfg(feature = "databake")]
59
impl databake::BakeSize for CodePointInversionListAndStringList<'_> {
UNCOV
60
    fn borrows_size(&self) -> usize {
×
UNCOV
61
        self.cp_inv_list.borrows_size() + self.str_list.borrows_size()
×
UNCOV
62
    }
×
63
}
64

65
impl<'data> CodePointInversionListAndStringList<'data> {
66
    /// Returns a new [`CodePointInversionListAndStringList`] from both a [`CodePointInversionList`] for the
67
    /// code points and a [`VarZeroVec`]`<`[`str`]`>` of strings.
68
    pub fn try_from(
646✔
69
        cp_inv_list: CodePointInversionList<'data>,
70
        str_list: VarZeroVec<'data, str>,
71
    ) -> Result<Self, InvalidStringList> {
72
        // Verify invariants:
73
        // Do so by using the equivalent of str_list.iter().windows(2) to get
74
        // overlapping windows of size 2. The above putative code is not possible
75
        // because `.windows()` exists on a slice, but VarZeroVec cannot return a slice
76
        // because the non-fixed size elements necessitate at least some type
77
        // of allocation.
78
        {
79
            let mut it = str_list.iter();
646✔
80
            if let Some(mut x) = it.next() {
644✔
81
                if x.len() == 1 {
68✔
82
                    return Err(InvalidStringList::InvalidStringLength(x.to_string()));
1✔
83
                }
84
                for y in it {
502✔
85
                    if x.len() == 1 {
436✔
UNCOV
86
                        return Err(InvalidStringList::InvalidStringLength(x.to_string()));
×
87
                    } else if x == y {
436✔
88
                        return Err(InvalidStringList::StringListNotUnique(x.to_string()));
1✔
89
                    } else if x > y {
435✔
90
                        return Err(InvalidStringList::StringListNotSorted(
1✔
91
                            x.to_string(),
1✔
92
                            y.to_string(),
1✔
UNCOV
93
                        ));
×
94
                    }
95

96
                    // Next window begins. Update `x` here, `y` will be updated in next loop iteration.
97
                    x = y;
434✔
98
                }
99
            }
100
        }
101

102
        Ok(CodePointInversionListAndStringList {
641✔
103
            cp_inv_list,
641✔
104
            str_list,
641✔
105
        })
106
    }
644✔
107

108
    #[doc(hidden)] // databake internal
109
    pub const fn from_parts_unchecked(
×
110
        cp_inv_list: CodePointInversionList<'data>,
111
        str_list: VarZeroVec<'data, str>,
112
    ) -> Self {
UNCOV
113
        CodePointInversionListAndStringList {
×
114
            cp_inv_list,
115
            str_list,
116
        }
UNCOV
117
    }
×
118

119
    /// Returns the number of elements in this set (its cardinality).
120
    /// Note than the elements of a set may include both individual
121
    /// codepoints and strings.
122
    pub fn size(&self) -> usize {
147✔
123
        self.cp_inv_list.size() + self.str_list.len()
147✔
124
    }
147✔
125

126
    /// Return true if this set contains multi-code point strings or the empty string.
127
    pub fn has_strings(&self) -> bool {
80✔
128
        !self.str_list.is_empty()
80✔
129
    }
80✔
130

131
    ///
132
    /// # Examples
133
    /// ```
134
    /// use icu::collections::codepointinvlist::CodePointInversionList;
135
    /// use icu::collections::codepointinvliststringlist::CodePointInversionListAndStringList;
136
    /// use zerovec::VarZeroVec;
137
    ///
138
    /// let cp_slice = &[0, 0x1_0000, 0x10_FFFF, 0x11_0000];
139
    /// let cp_list =
140
    ///    CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
141
    /// let str_slice = &["", "bmp_max", "unicode_max", "zero"];
142
    /// let str_list = VarZeroVec::<str>::from(str_slice);
143
    ///
144
    /// let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
145
    ///
146
    /// assert!(cpilsl.contains_str("bmp_max"));
147
    /// assert!(cpilsl.contains_str(""));
148
    /// assert!(cpilsl.contains_str("A"));
149
    /// assert!(cpilsl.contains_str("ቔ"));  // U+1254 ETHIOPIC SYLLABLE QHEE
150
    /// assert!(!cpilsl.contains_str("bazinga!"));
151
    /// ```
152
    pub fn contains_str(&self, s: &str) -> bool {
15,903✔
153
        let mut chars = s.chars();
15,903✔
154
        if let Some(first_char) = chars.next() {
15,903✔
155
            if chars.next().is_none() {
7,975✔
156
                return self.contains(first_char);
7,941✔
157
            }
158
        }
159
        self.str_list.binary_search(s).is_ok()
7,962✔
160
    }
15,903✔
161

162
    ///
163
    /// # Examples
164
    /// ```
165
    /// use icu::collections::codepointinvlist::CodePointInversionList;
166
    /// use icu::collections::codepointinvliststringlist::CodePointInversionListAndStringList;
167
    /// use zerovec::VarZeroVec;
168
    ///
169
    /// let cp_slice = &[0, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
170
    /// let cp_list =
171
    ///     CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
172
    /// let str_slice = &["", "ascii_max", "bmp_max", "unicode_max", "zero"];
173
    /// let str_list = VarZeroVec::<str>::from(str_slice);
174
    ///
175
    /// let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
176
    ///
177
    /// assert!(cpilsl.contains32(0));
178
    /// assert!(cpilsl.contains32(0x0042));
179
    /// assert!(!cpilsl.contains32(0x0080));
180
    /// ```
181
    pub fn contains32(&self, cp: u32) -> bool {
240,634✔
182
        self.cp_inv_list.contains32(cp)
240,634✔
183
    }
240,634✔
184

185
    ///
186
    /// # Examples
187
    /// ```
188
    /// use icu::collections::codepointinvlist::CodePointInversionList;
189
    /// use icu::collections::codepointinvliststringlist::CodePointInversionListAndStringList;
190
    /// use zerovec::VarZeroVec;
191
    ///
192
    /// let cp_slice = &[0, 0x1_0000, 0x10_FFFF, 0x11_0000];
193
    /// let cp_list =
194
    ///    CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
195
    /// let str_slice = &["", "bmp_max", "unicode_max", "zero"];
196
    /// let str_list = VarZeroVec::<str>::from(str_slice);
197
    ///
198
    /// let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
199
    ///
200
    /// assert!(cpilsl.contains('A'));
201
    /// assert!(cpilsl.contains('ቔ'));  // U+1254 ETHIOPIC SYLLABLE QHEE
202
    /// assert!(!cpilsl.contains('\u{1_0000}'));
203
    /// assert!(!cpilsl.contains('🨫'));  // U+1FA2B NEUTRAL CHESS TURNED QUEEN
204
    pub fn contains(&self, ch: char) -> bool {
240,630✔
205
        self.contains32(ch as u32)
240,630✔
206
    }
240,630✔
207

208
    /// Access the underlying [`CodePointInversionList`].
209
    pub fn code_points(&self) -> &CodePointInversionList<'data> {
568✔
210
        &self.cp_inv_list
568✔
211
    }
568✔
212

213
    /// Access the contained strings.
214
    pub fn strings(&self) -> &VarZeroSlice<str> {
232,870✔
215
        &self.str_list
232,870✔
216
    }
232,870✔
217
}
218

219
impl<'a> FromIterator<&'a str> for CodePointInversionListAndStringList<'_> {
220
    fn from_iter<I>(it: I) -> Self
94✔
221
    where
222
        I: IntoIterator<Item = &'a str>,
223
    {
224
        let mut builder = CodePointInversionListBuilder::new();
94✔
225
        let mut strings = Vec::<&str>::new();
94✔
226
        for s in it {
4,921✔
227
            let mut chars = s.chars();
4,827✔
228
            if let Some(first_char) = chars.next() {
4,827✔
229
                if chars.next().is_none() {
4,827✔
230
                    builder.add_char(first_char);
4,794✔
231
                    continue;
232
                }
233
            }
234
            strings.push(s);
33✔
235
        }
94✔
236

237
        // Ensure that the string list is sorted. If not, the binary search that
238
        // is used for `.contains(&str)` will return garbage output.
239
        strings.sort_unstable();
94✔
240
        strings.dedup();
94✔
241

242
        let cp_inv_list = builder.build();
94✔
243
        let str_list = VarZeroVec::<str>::from(&strings);
94✔
244

245
        CodePointInversionListAndStringList {
94✔
246
            cp_inv_list,
94✔
247
            str_list,
248
        }
249
    }
94✔
250
}
251

252
/// Custom Errors for [`CodePointInversionListAndStringList`].
UNCOV
253
#[derive(Display, Debug)]
×
254
pub enum InvalidStringList {
255
    /// A string in the string list had an invalid length
UNCOV
256
    #[displaydoc("Invalid string length for string: {0}")]
×
UNCOV
257
    InvalidStringLength(String),
×
258
    /// A string in the string list appears more than once
UNCOV
259
    #[displaydoc("String list has duplicate: {0}")]
×
UNCOV
260
    StringListNotUnique(String),
×
261
    /// Two strings in the string list compare to each other opposite of sorted order
UNCOV
262
    #[displaydoc("Strings in string list not in sorted order: ({0}, {1})")]
×
UNCOV
263
    StringListNotSorted(String, String),
×
264
}
265

266
#[cfg(test)]
267
mod tests {
268
    use super::*;
269

270
    #[test]
271
    fn test_size_has_strings() {
2✔
272
        let cp_slice = &[0, 1, 0x7F, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
1✔
273
        let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
1✔
274
        let str_slice = &["ascii_max", "bmp_max", "unicode_max", "zero"];
1✔
275
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
276

277
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
1✔
278

279
        assert!(cpilsl.has_strings());
1✔
280
        assert_eq!(8, cpilsl.size());
1✔
281
    }
2✔
282

283
    #[test]
284
    fn test_empty_string_allowed() {
2✔
285
        let cp_slice = &[0, 1, 0x7F, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
1✔
286
        let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
1✔
287
        let str_slice = &["", "ascii_max", "bmp_max", "unicode_max", "zero"];
1✔
288
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
289

290
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
1✔
291

292
        assert!(cpilsl.has_strings());
1✔
293
        assert_eq!(9, cpilsl.size());
1✔
294
    }
2✔
295

296
    #[test]
297
    fn test_invalid_string() {
2✔
298
        let cp_slice = &[0, 1];
1✔
299
        let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
1✔
300
        let str_slice = &["a"];
1✔
301
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
302

303
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
1✔
304

305
        assert!(matches!(
1✔
306
            cpilsl,
1✔
307
            Err(InvalidStringList::InvalidStringLength(_))
308
        ));
309
    }
2✔
310

311
    #[test]
312
    fn test_invalid_string_list_has_duplicate() {
2✔
313
        let cp_slice = &[0, 1];
1✔
314
        let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
1✔
315
        let str_slice = &["abc", "abc"];
1✔
316
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
317

318
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
1✔
319

320
        assert!(matches!(
1✔
321
            cpilsl,
1✔
322
            Err(InvalidStringList::StringListNotUnique(_))
323
        ));
324
    }
2✔
325

326
    #[test]
327
    fn test_invalid_string_list_not_sorted() {
2✔
328
        let cp_slice = &[0, 1];
1✔
329
        let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
1✔
330
        let str_slice = &["xyz", "abc"];
1✔
331
        let str_list = VarZeroVec::<str>::from(str_slice);
1✔
332

333
        let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
1✔
334

335
        assert!(matches!(
1✔
336
            cpilsl,
1✔
337
            Err(InvalidStringList::StringListNotSorted(_, _))
338
        ));
339
    }
2✔
340

341
    #[test]
342
    fn test_from_iter_invariants() {
2✔
343
        let in_strs_1 = ["a", "abc", "xyz", "abc"];
1✔
344
        let in_strs_2 = ["xyz", "abc", "a", "abc"];
1✔
345

346
        let cpilsl_1 = CodePointInversionListAndStringList::from_iter(in_strs_1);
1✔
347
        let cpilsl_2 = CodePointInversionListAndStringList::from_iter(in_strs_2);
1✔
348

349
        assert_eq!(cpilsl_1, cpilsl_2);
1✔
350

351
        assert!(cpilsl_1.has_strings());
1✔
352
        assert!(cpilsl_1.contains_str("abc"));
1✔
353
        assert!(cpilsl_1.contains_str("xyz"));
1✔
354
        assert!(!cpilsl_1.contains_str("def"));
1✔
355

356
        assert_eq!(1, cpilsl_1.cp_inv_list.size());
1✔
357
        assert!(cpilsl_1.contains('a'));
1✔
358
        assert!(!cpilsl_1.contains('0'));
1✔
359
        assert!(!cpilsl_1.contains('q'));
1✔
360

361
        assert_eq!(3, cpilsl_1.size());
1✔
362
    }
2✔
363
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc