• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 13958601093

19 Mar 2025 04:17PM UTC coverage: 74.164% (-1.5%) from 75.71%
13958601093

push

github

web-flow
Clean up properties docs (#6315)

58056 of 78281 relevant lines covered (74.16%)

819371.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.36
/components/properties/src/code_point_map.rs
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
#[cfg(feature = "alloc")]
6
use crate::code_point_set::CodePointSetData;
7
use crate::props::GeneralCategory;
8
use crate::props::GeneralCategoryGroup;
9
use crate::provider::*;
10
use core::ops::RangeInclusive;
11
use icu_collections::codepointtrie::{CodePointMapRange, CodePointTrie, TrieValue};
12
use icu_provider::marker::ErasedMarker;
13
use icu_provider::prelude::*;
14

15
/// A wrapper around code point map data.
16
///
17
/// It is returned by APIs that return Unicode
18
/// property data in a map-like form, ex: enumerated property value data keyed
19
/// by code point. Access its data via the borrowed version,
20
/// [`CodePointMapDataBorrowed`].
21
#[derive(Debug, Clone)]
×
22
pub struct CodePointMapData<T: TrieValue> {
23
    data: DataPayload<ErasedMarker<PropertyCodePointMap<'static, T>>>,
×
24
}
25

26
impl<T: TrieValue> CodePointMapData<T> {
27
    /// Creates a new [`CodePointMapData`] for a [`EnumeratedProperty`].
28
    ///
29
    /// See the documentation on [`EnumeratedProperty`] implementations for details.
30
    ///
31
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
32
    ///
33
    /// [📚 Help choosing a constructor](icu_provider::constructors)
34
    #[cfg(feature = "compiled_data")]
35
    #[allow(clippy::new_ret_no_self)]
36
    pub const fn new() -> CodePointMapDataBorrowed<'static, T>
1,716,696✔
37
    where
38
        T: EnumeratedProperty,
39
    {
40
        CodePointMapDataBorrowed::new()
1,716,696✔
41
    }
1,716,696✔
42

43
    #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
44
    pub fn try_new_unstable(
11,029✔
45
        provider: &(impl DataProvider<T::DataMarker> + ?Sized),
46
    ) -> Result<Self, DataError>
47
    where
48
        T: EnumeratedProperty,
49
    {
50
        Ok(Self {
11,029✔
51
            data: provider.load(Default::default())?.payload.cast(),
11,029✔
52
        })
53
    }
11,029✔
54

55
    /// Construct a borrowed version of this type that can be queried.
56
    ///
57
    /// This avoids a potential small underlying cost per API call (like `get()`) by consolidating it
58
    /// up front.
59
    ///
60
    /// This owned version if returned by functions that use a runtime data provider.
61
    #[inline]
62
    pub fn as_borrowed(&self) -> CodePointMapDataBorrowed<'_, T> {
11,027✔
63
        CodePointMapDataBorrowed {
64
            map: self.data.get(),
11,027✔
65
        }
66
    }
11,027✔
67

68
    /// Convert this map to a map around another type
69
    ///
70
    /// Typically useful for type-erasing maps into maps around integers.
71
    ///
72
    /// # Panics
73
    /// Will panic if T and P are different sizes
74
    ///
75
    /// # Example
76
    ///
77
    /// ```
78
    /// use icu::properties::CodePointMapData;
79
    /// use icu::properties::props::GeneralCategory;
80
    ///
81
    /// let data = CodePointMapData::<GeneralCategory>::new().static_to_owned();
82
    ///
83
    /// let gc = data.try_into_converted::<u8>().unwrap();
84
    /// let gc = gc.as_borrowed();
85
    ///
86
    /// assert_eq!(gc.get('木'), GeneralCategory::OtherLetter as u8);  // U+6728
87
    /// assert_eq!(gc.get('🎃'), GeneralCategory::OtherSymbol as u8);  // U+1F383 JACK-O-LANTERN
88
    /// ```
89
    #[cfg(feature = "alloc")]
90
    pub fn try_into_converted<P>(self) -> Result<CodePointMapData<P>, zerovec::ule::UleError>
×
91
    where
92
        P: TrieValue,
93
    {
94
        self.data
×
95
            .try_map_project(|data, _| data.try_into_converted())
×
96
            .map(CodePointMapData::from_data::<ErasedMarker<PropertyCodePointMap<'static, P>>>)
97
    }
×
98

99
    /// Construct a new one from loaded data
100
    ///
101
    /// Typically it is preferable to use getters like [`load_general_category()`] instead
102
    pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self
×
103
    where
104
        M: DynamicDataMarker<DataStruct = PropertyCodePointMap<'static, T>>,
105
    {
106
        Self { data: data.cast() }
×
107
    }
×
108

109
    /// Construct a new one an owned [`CodePointTrie`]
110
    pub fn from_code_point_trie(trie: CodePointTrie<'static, T>) -> Self {
111
        let set = PropertyCodePointMap::from_code_point_trie(trie);
112
        CodePointMapData::from_data(
113
            DataPayload::<ErasedMarker<PropertyCodePointMap<'static, T>>>::from_owned(set),
114
        )
115
    }
116

117
    /// Convert this type to a [`CodePointTrie`] as a borrowed value.
118
    ///
119
    /// The data backing this is extensible and supports multiple implementations.
120
    /// Currently it is always [`CodePointTrie`]; however in the future more backends may be
121
    /// added, and users may select which at data generation time.
122
    ///
123
    /// This method returns an `Option` in order to return `None` when the backing data provider
124
    /// cannot return a [`CodePointTrie`], or cannot do so within the expected constant time
125
    /// constraint.
126
    pub fn as_code_point_trie(&self) -> Option<&CodePointTrie<'_, T>> {
2✔
127
        self.data.get().as_code_point_trie()
2✔
128
    }
2✔
129

130
    /// Convert this type to a [`CodePointTrie`], borrowing if possible,
131
    /// otherwise allocating a new [`CodePointTrie`].
132
    ///
133
    /// The data backing this is extensible and supports multiple implementations.
134
    /// Currently it is always [`CodePointTrie`]; however in the future more backends may be
135
    /// added, and users may select which at data generation time.
136
    ///
137
    /// The performance of the conversion to this specific return type will vary
138
    /// depending on the data structure that is backing `self`.
139
    pub fn to_code_point_trie(&self) -> CodePointTrie<'_, T> {
140
        self.data.get().to_code_point_trie()
141
    }
142
}
143

144
/// A borrowed wrapper around code point set data, returned by
145
/// [`CodePointSetData::as_borrowed()`]. More efficient to query.
146
#[derive(Clone, Copy, Debug)]
×
147
pub struct CodePointMapDataBorrowed<'a, T: TrieValue> {
148
    map: &'a PropertyCodePointMap<'a, T>,
×
149
}
150

151
impl<'a, T: TrieValue> CodePointMapDataBorrowed<'a, T> {
152
    /// Get the value this map has associated with code point `ch`
153
    ///
154
    /// # Example
155
    ///
156
    /// ```
157
    /// use icu::properties::CodePointMapData;
158
    /// use icu::properties::props::GeneralCategory;
159
    ///
160
    /// let gc = CodePointMapData::<GeneralCategory>::new();
161
    ///
162
    /// assert_eq!(gc.get('木'), GeneralCategory::OtherLetter);  // U+6728
163
    /// assert_eq!(gc.get('🎃'), GeneralCategory::OtherSymbol);  // U+1F383 JACK-O-LANTERN
164
    /// ```
165
    pub fn get(self, ch: char) -> T {
16,812,223✔
166
        self.map.get32(ch as u32)
16,812,223✔
167
    }
16,812,223✔
168

169
    /// See [`Self::get`].
170
    pub fn get32(self, ch: u32) -> T {
219,169,304✔
171
        self.map.get32(ch)
219,169,304✔
172
    }
219,169,304✔
173

174
    /// Get a [`CodePointSetData`] for all elements corresponding to a particular value
175
    ///
176
    /// # Example
177
    ///
178
    /// ```
179
    /// use icu::properties::props::GeneralCategory;
180
    /// use icu::properties::CodePointMapData;
181
    ///
182
    /// let gc = CodePointMapData::<GeneralCategory>::new();
183
    ///
184
    /// let other_letter_set_data =
185
    ///     gc.get_set_for_value(GeneralCategory::OtherLetter);
186
    /// let other_letter_set = other_letter_set_data.as_borrowed();
187
    ///
188
    /// assert!(other_letter_set.contains('木')); // U+6728
189
    /// assert!(!other_letter_set.contains('🎃')); // U+1F383 JACK-O-LANTERN
190
    /// ```
191
    #[cfg(feature = "alloc")]
192
    pub fn get_set_for_value(self, value: T) -> CodePointSetData {
50✔
193
        let set = self.map.get_set_for_value(value);
50✔
194
        CodePointSetData::from_code_point_inversion_list(set)
50✔
195
    }
50✔
196

197
    /// Yields an [`Iterator`] returning ranges of consecutive code points that
198
    /// share the same value in the [`CodePointMapData`].
199
    ///
200
    /// # Examples
201
    ///
202
    /// ```
203
    /// use icu::properties::props::GeneralCategory;
204
    /// use icu::properties::CodePointMapData;
205
    ///
206
    /// let gc = CodePointMapData::<GeneralCategory>::new();
207
    /// let mut ranges = gc.iter_ranges();
208
    /// let next = ranges.next().unwrap();
209
    /// assert_eq!(next.range, 0..=31);
210
    /// assert_eq!(next.value, GeneralCategory::Control);
211
    /// let next = ranges.next().unwrap();
212
    /// assert_eq!(next.range, 32..=32);
213
    /// assert_eq!(next.value, GeneralCategory::SpaceSeparator);
214
    /// ```
215
    pub fn iter_ranges(self) -> impl Iterator<Item = CodePointMapRange<T>> + 'a {
10,946✔
216
        self.map.iter_ranges()
10,946✔
217
    }
10,946✔
218

219
    /// Yields an [`Iterator`] returning ranges of consecutive code points that
220
    /// share the same value `v` in the [`CodePointMapData`].
221
    ///
222
    /// # Examples
223
    ///
224
    ///
225
    /// ```
226
    /// use icu::properties::props::GeneralCategory;
227
    /// use icu::properties::CodePointMapData;
228
    ///
229
    /// let gc = CodePointMapData::<GeneralCategory>::new();
230
    /// let mut ranges = gc.iter_ranges_for_value(GeneralCategory::UppercaseLetter);
231
    /// assert_eq!(ranges.next().unwrap(), 'A' as u32..='Z' as u32);
232
    /// assert_eq!(ranges.next().unwrap(), 'À' as u32..='Ö' as u32);
233
    /// assert_eq!(ranges.next().unwrap(), 'Ø' as u32..='Þ' as u32);
234
    /// ```
235
    pub fn iter_ranges_for_value(self, val: T) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
5✔
236
        self.map
5✔
237
            .iter_ranges()
238
            .filter(move |r| r.value == val)
13,322✔
239
            .map(|r| r.range)
1,062✔
240
    }
5✔
241

242
    /// Yields an [`Iterator`] returning ranges of consecutive code points that
243
    /// do *not* have the value `v` in the [`CodePointMapData`].
244
    pub fn iter_ranges_for_value_complemented(
5✔
245
        self,
246
        val: T,
247
    ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
248
        self.map
5✔
249
            .iter_ranges_mapped(move |value| value != val)
13,322✔
250
            .filter(|v| v.value)
2,128✔
251
            .map(|v| v.range)
1,066✔
252
    }
5✔
253

254
    /// Exposed for FFI needs, could be exposed in general in the future but we should
255
    /// have a use case first.
256
    ///
257
    /// FFI needs this since it operates on erased maps and can't use `iter_ranges_for_group()`
258
    #[doc(hidden)] // used by FFI code
259
    pub fn iter_ranges_mapped<U: Eq + 'a>(
×
260
        self,
261
        predicate: impl FnMut(T) -> U + Copy + 'a,
262
    ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a {
263
        self.map.iter_ranges_mapped(predicate)
×
264
    }
×
265
}
266

267
impl CodePointMapDataBorrowed<'_, GeneralCategory> {
268
    /// Get a [`CodePointSetData`] for all elements corresponding to a particular value group
269
    ///
270
    /// # Example
271
    ///
272
    /// ```
273
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
274
    /// use icu::properties::CodePointMapData;
275
    ///
276
    /// let gc = CodePointMapData::<GeneralCategory>::new();
277
    ///
278
    /// let other_letter_set_data =
279
    ///     gc.get_set_for_value_group(GeneralCategoryGroup::OtherLetter);
280
    /// let other_letter_set = other_letter_set_data.as_borrowed();
281
    ///
282
    /// assert!(other_letter_set.contains('木')); // U+6728
283
    /// assert!(!other_letter_set.contains('🎃')); // U+1F383 JACK-O-LANTERN
284
    /// ```
285
    #[cfg(feature = "alloc")]
286
    pub fn get_set_for_value_group(self, value: GeneralCategoryGroup) -> crate::CodePointSetData {
10,945✔
287
        let matching_gc_ranges = self
10,945✔
288
            .iter_ranges()
289
            .filter(|cpm_range| (1 << cpm_range.value as u32) & value.0 != 0)
44,642,803✔
290
            .map(|cpm_range| cpm_range.range);
20,918,228✔
291
        CodePointSetData::from_code_point_inversion_list(matching_gc_ranges.collect())
10,945✔
292
    }
10,945✔
293
}
294

295
#[cfg(feature = "compiled_data")]
296
impl<T: EnumeratedProperty> Default for CodePointMapDataBorrowed<'static, T> {
297
    fn default() -> Self {
298
        Self::new()
299
    }
300
}
301

302
impl<T: TrieValue> CodePointMapDataBorrowed<'static, T> {
303
    /// Creates a new [`CodePointMapDataBorrowed`] for a [`EnumeratedProperty`].
304
    ///
305
    /// See the documentation on [`EnumeratedProperty`] implementations for details.
306
    ///
307
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
308
    ///
309
    /// [📚 Help choosing a constructor](icu_provider::constructors)
310
    #[cfg(feature = "compiled_data")]
311
    pub const fn new() -> Self
1,713,327✔
312
    where
313
        T: EnumeratedProperty,
314
    {
315
        CodePointMapDataBorrowed { map: T::SINGLETON }
316
    }
1,713,327✔
317

318
    /// Cheaply converts a [`CodePointMapDataBorrowed<'static>`] into a [`CodePointMapData`].
319
    ///
320
    /// Note: Due to branching and indirection, using [`CodePointMapData`] might inhibit some
321
    /// compile-time optimizations that are possible with [`CodePointMapDataBorrowed`].
322
    pub const fn static_to_owned(self) -> CodePointMapData<T> {
1✔
323
        CodePointMapData {
1✔
324
            data: DataPayload::from_static_ref(self.map),
1✔
325
        }
326
    }
1✔
327
}
328

329
impl<'a> CodePointMapDataBorrowed<'a, GeneralCategory> {
330
    /// Yields an [`Iterator`] returning ranges of consecutive code points that
331
    /// have a `General_Category` value belonging to the specified [`GeneralCategoryGroup`]
332
    ///
333
    /// # Examples
334
    ///
335
    /// ```
336
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
337
    /// use icu::properties::CodePointMapData;
338
    ///
339
    /// let gc = CodePointMapData::<GeneralCategory>::new();
340
    /// let mut ranges = gc.iter_ranges_for_group(GeneralCategoryGroup::Letter);
341
    /// assert_eq!(ranges.next().unwrap(), 'A' as u32..='Z' as u32);
342
    /// assert_eq!(ranges.next().unwrap(), 'a' as u32..='z' as u32);
343
    /// assert_eq!(ranges.next().unwrap(), 'ª' as u32..='ª' as u32);
344
    /// assert_eq!(ranges.next().unwrap(), 'µ' as u32..='µ' as u32);
345
    /// assert_eq!(ranges.next().unwrap(), 'º' as u32..='º' as u32);
346
    /// assert_eq!(ranges.next().unwrap(), 'À' as u32..='Ö' as u32);
347
    /// assert_eq!(ranges.next().unwrap(), 'Ø' as u32..='ö' as u32);
348
    /// ```
349
    pub fn iter_ranges_for_group(
1✔
350
        self,
351
        group: GeneralCategoryGroup,
352
    ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
353
        self.map
1✔
354
            .iter_ranges_mapped(move |value| group.contains(value))
355
            .filter(|v| v.value)
356
            .map(|v| v.range)
357
    }
1✔
358
}
359

360
/// A Unicode character property that assigns a value to each code point.
361
///
362
/// The descriptions of most properties are taken from [`TR44`], the documentation for the
363
/// Unicode Character Database.
364
///
365
/// <div class="stab unstable">
366
/// 🚫 This trait is sealed; it cannot be implemented by user code. If an API requests an item that implements this
367
/// trait, please consider using a type from the implementors listed below.
368
/// </div>
369
///
370
/// [`TR44`]: https://www.unicode.org/reports/tr44
371
pub trait EnumeratedProperty: crate::private::Sealed + TrieValue {
372
    #[doc(hidden)]
373
    type DataMarker: DataMarker<DataStruct = PropertyCodePointMap<'static, Self>>;
374
    #[doc(hidden)]
375
    #[cfg(feature = "compiled_data")]
376
    const SINGLETON: &'static PropertyCodePointMap<'static, Self>;
377
    /// The name of this property
378
    const NAME: &'static [u8];
379
    /// The abbreviated name of this property, if it exists, otherwise the name
380
    const SHORT_NAME: &'static [u8];
381

382
    /// Convenience method for `CodePointMapData::new().get(ch)`
383
    ///
384
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
385
    #[cfg(feature = "compiled_data")]
386
    fn for_char(ch: char) -> Self {
×
387
        CodePointMapData::new().get(ch)
×
388
    }
×
389
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc