• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 12020603084

23 Nov 2024 08:43PM UTC coverage: 75.71% (+0.2%) from 75.477%
12020603084

push

github

sffc
Touch Cargo.lock

55589 of 73424 relevant lines covered (75.71%)

644270.14 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.29
/components/properties/src/script.rs
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
//! Data and APIs for supporting Script_Extensions property
6
//! values in an efficient structure.
7

8
use crate::props::Script;
9
use crate::provider::*;
10

11
use core::iter::FromIterator;
12
use core::ops::RangeInclusive;
13
use icu_collections::codepointinvlist::CodePointInversionList;
14
use icu_provider::prelude::*;
15
use zerovec::{ule::AsULE, ZeroSlice};
16

17
/// The number of bits at the low-end of a `ScriptWithExt` value used for
18
/// storing the `Script` value (or `extensions` index).
19
const SCRIPT_VAL_LENGTH: u16 = 10;
20

21
/// The bit mask necessary to retrieve the `Script` value (or `extensions` index)
22
/// from a `ScriptWithExt` value.
23
const SCRIPT_X_SCRIPT_VAL: u16 = (1 << SCRIPT_VAL_LENGTH) - 1;
24

25
/// An internal-use only pseudo-property that represents the values stored in
26
/// the trie of the special data structure [`ScriptWithExtensionsPropertyV1`].
27
///
28
/// Note: The will assume a 12-bit layout. The 2 higher order bits in positions
29
/// 11..10 will indicate how to deduce the Script value and Script_Extensions,
30
/// and the lower 10 bits 9..0 indicate either the Script value or the index
31
/// into the `extensions` structure.
32
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
721,222✔
33
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
×
34
#[cfg_attr(feature = "datagen", derive(databake::Bake))]
×
35
#[cfg_attr(feature = "datagen", databake(path = icu_properties::script))]
36
#[repr(transparent)]
37
#[doc(hidden)]
38
// `ScriptWithExt` not intended as public-facing but for `ScriptWithExtensionsPropertyV1` constructor
39
#[allow(clippy::exhaustive_structs)] // this type is stable
40
pub struct ScriptWithExt(pub u16);
477,766✔
41

42
#[allow(missing_docs)] // These constants don't need individual documentation.
43
#[allow(non_upper_case_globals)]
44
#[doc(hidden)] // `ScriptWithExt` not intended as public-facing but for `ScriptWithExtensionsPropertyV1` constructor
45
impl ScriptWithExt {
46
    pub const Unknown: ScriptWithExt = ScriptWithExt(0);
47
}
48

49
impl AsULE for ScriptWithExt {
50
    type ULE = <u16 as AsULE>::ULE;
51

52
    #[inline]
53
    fn to_unaligned(self) -> Self::ULE {
51,335✔
54
        Script(self.0).to_unaligned()
51,335✔
55
    }
51,335✔
56

57
    #[inline]
58
    fn from_unaligned(unaligned: Self::ULE) -> Self {
303,686✔
59
        ScriptWithExt(Script::from_unaligned(unaligned).0)
303,686✔
60
    }
303,686✔
61
}
62

63
#[doc(hidden)] // `ScriptWithExt` not intended as public-facing but for `ScriptWithExtensionsPropertyV1` constructor
64
impl ScriptWithExt {
65
    /// Returns whether the [`ScriptWithExt`] value has Script_Extensions and
66
    /// also indicates a Script value of [`Script::Common`].
67
    ///
68
    /// # Examples
69
    ///
70
    /// ```
71
    /// use icu::properties::script::ScriptWithExt;
72
    ///
73
    /// assert!(ScriptWithExt(0x04FF).is_common());
74
    /// assert!(ScriptWithExt(0x0400).is_common());
75
    ///
76
    /// assert!(!ScriptWithExt(0x08FF).is_common());
77
    /// assert!(!ScriptWithExt(0x0800).is_common());
78
    ///
79
    /// assert!(!ScriptWithExt(0x0CFF).is_common());
80
    /// assert!(!ScriptWithExt(0x0C00).is_common());
81
    ///
82
    /// assert!(!ScriptWithExt(0xFF).is_common());
83
    /// assert!(!ScriptWithExt(0x0).is_common());
84
    /// ```
85
    pub fn is_common(&self) -> bool {
1,645✔
86
        self.0 >> SCRIPT_VAL_LENGTH == 1
1,645✔
87
    }
1,645✔
88

89
    /// Returns whether the [`ScriptWithExt`] value has Script_Extensions and
90
    /// also indicates a Script value of [`Script::Inherited`].
91
    ///
92
    /// # Examples
93
    ///
94
    /// ```
95
    /// use icu::properties::script::ScriptWithExt;
96
    ///
97
    /// assert!(!ScriptWithExt(0x04FF).is_inherited());
98
    /// assert!(!ScriptWithExt(0x0400).is_inherited());
99
    ///
100
    /// assert!(ScriptWithExt(0x08FF).is_inherited());
101
    /// assert!(ScriptWithExt(0x0800).is_inherited());
102
    ///
103
    /// assert!(!ScriptWithExt(0x0CFF).is_inherited());
104
    /// assert!(!ScriptWithExt(0x0C00).is_inherited());
105
    ///
106
    /// assert!(!ScriptWithExt(0xFF).is_inherited());
107
    /// assert!(!ScriptWithExt(0x0).is_inherited());
108
    /// ```
109
    pub fn is_inherited(&self) -> bool {
545✔
110
        self.0 >> SCRIPT_VAL_LENGTH == 2
545✔
111
    }
545✔
112

113
    /// Returns whether the [`ScriptWithExt`] value has Script_Extensions and
114
    /// also indicates that the Script value is neither [`Script::Common`] nor
115
    /// [`Script::Inherited`].
116
    ///
117
    /// # Examples
118
    ///
119
    /// ```
120
    /// use icu::properties::script::ScriptWithExt;
121
    ///
122
    /// assert!(!ScriptWithExt(0x04FF).is_other());
123
    /// assert!(!ScriptWithExt(0x0400).is_other());
124
    ///
125
    /// assert!(!ScriptWithExt(0x08FF).is_other());
126
    /// assert!(!ScriptWithExt(0x0800).is_other());
127
    ///
128
    /// assert!(ScriptWithExt(0x0CFF).is_other());
129
    /// assert!(ScriptWithExt(0x0C00).is_other());
130
    ///
131
    /// assert!(!ScriptWithExt(0xFF).is_other());
132
    /// assert!(!ScriptWithExt(0x0).is_other());
133
    /// ```
134
    pub fn is_other(&self) -> bool {
2,049✔
135
        self.0 >> SCRIPT_VAL_LENGTH == 3
2,049✔
136
    }
2,049✔
137

138
    /// Returns whether the [`ScriptWithExt`] value has Script_Extensions.
139
    ///
140
    /// # Examples
141
    ///
142
    /// ```
143
    /// use icu::properties::script::ScriptWithExt;
144
    ///
145
    /// assert!(ScriptWithExt(0x04FF).has_extensions());
146
    /// assert!(ScriptWithExt(0x0400).has_extensions());
147
    ///
148
    /// assert!(ScriptWithExt(0x08FF).has_extensions());
149
    /// assert!(ScriptWithExt(0x0800).has_extensions());
150
    ///
151
    /// assert!(ScriptWithExt(0x0CFF).has_extensions());
152
    /// assert!(ScriptWithExt(0x0C00).has_extensions());
153
    ///
154
    /// assert!(!ScriptWithExt(0xFF).has_extensions());
155
    /// assert!(!ScriptWithExt(0x0).has_extensions());
156
    /// ```
157
    pub fn has_extensions(&self) -> bool {
27,277✔
158
        let high_order_bits = self.0 >> SCRIPT_VAL_LENGTH;
27,277✔
159
        high_order_bits > 0
27,277✔
160
    }
27,277✔
161
}
162

163
impl From<ScriptWithExt> for u32 {
164
    fn from(swe: ScriptWithExt) -> Self {
×
165
        swe.0 as u32
×
166
    }
×
167
}
168

169
impl From<ScriptWithExt> for Script {
170
    fn from(swe: ScriptWithExt) -> Self {
25,275✔
171
        Script(swe.0)
172
    }
25,275✔
173
}
174

175
/// A struct that wraps a [`Script`] array, such as in the return value for
176
/// [`get_script_extensions_val()`](ScriptWithExtensionsBorrowed::get_script_extensions_val).
177
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
×
178
pub struct ScriptExtensionsSet<'a> {
179
    values: &'a ZeroSlice<Script>,
×
180
}
181

182
impl<'a> ScriptExtensionsSet<'a> {
183
    /// Returns whether this set contains the given script.
184
    ///
185
    /// # Example
186
    ///
187
    /// ```
188
    /// use icu::properties::props::Script;
189
    /// use icu::properties::script::ScriptWithExtensions;
190
    /// let swe = ScriptWithExtensions::new();
191
    ///
192
    /// assert!(swe
193
    ///     .get_script_extensions_val('\u{11303}') // GRANTHA SIGN VISARGA
194
    ///     .contains(&Script::Grantha));
195
    /// ```
196
    pub fn contains(&self, x: &Script) -> bool {
3✔
197
        ZeroSlice::binary_search(self.values, x).is_ok()
3✔
198
    }
3✔
199

200
    /// Gets an iterator over the elements.
201
    ///
202
    /// # Example
203
    ///
204
    /// ```
205
    /// use icu::properties::props::Script;
206
    /// use icu::properties::script::ScriptWithExtensions;
207
    /// let swe = ScriptWithExtensions::new();
208
    ///
209
    /// assert_eq!(
210
    ///     swe.get_script_extensions_val('௫') // U+0BEB TAMIL DIGIT FIVE
211
    ///         .iter()
212
    ///         .collect::<Vec<_>>(),
213
    ///     [Script::Tamil, Script::Grantha]
214
    /// );
215
    /// ```
216
    pub fn iter(&self) -> impl DoubleEndedIterator<Item = Script> + 'a {
17✔
217
        ZeroSlice::iter(self.values)
17✔
218
    }
17✔
219

220
    /// For accessing this set as an array instead of an iterator
221
    #[doc(hidden)] // used by FFI code
222
    pub fn array_len(&self) -> usize {
×
223
        self.values.len()
×
224
    }
×
225
    /// For accessing this set as an array instead of an iterator
226
    #[doc(hidden)] // used by FFI code
227
    pub fn array_get(&self, index: usize) -> Option<Script> {
×
228
        self.values.get(index)
×
229
    }
×
230
}
231

232
/// A struct that represents the data for the Script and Script_Extensions properties.
233
///
234
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
235
///
236
/// [📚 Help choosing a constructor](icu_provider::constructors)
237
///
238
/// Most useful methods are on [`ScriptWithExtensionsBorrowed`] obtained by calling [`ScriptWithExtensions::as_borrowed()`]
239
///
240
/// # Examples
241
///
242
/// ```
243
/// use icu::properties::script::ScriptWithExtensions;
244
/// use icu::properties::props::Script;
245
/// let swe = ScriptWithExtensions::new();
246
///
247
/// // get the `Script` property value
248
/// assert_eq!(swe.get_script_val('ـ'), Script::Common); // U+0640 ARABIC TATWEEL
249
/// assert_eq!(swe.get_script_val('\u{0650}'), Script::Inherited); // U+0650 ARABIC KASRA
250
/// assert_eq!(swe.get_script_val('٠'), Script::Arabic); // // U+0660 ARABIC-INDIC DIGIT ZERO
251
/// assert_eq!(swe.get_script_val('ﷲ'), Script::Arabic); // U+FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM
252
///
253
/// // get the `Script_Extensions` property value
254
/// assert_eq!(
255
///     swe.get_script_extensions_val('ـ') // U+0640 ARABIC TATWEEL
256
///         .iter().collect::<Vec<_>>(),
257
///     [Script::Arabic, Script::Syriac, Script::Mandaic, Script::Manichaean,
258
///          Script::PsalterPahlavi, Script::Adlam, Script::HanifiRohingya, Script::Sogdian,
259
///          Script::OldUyghur]
260
/// );
261
/// assert_eq!(
262
///     swe.get_script_extensions_val('🥳') // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
263
///         .iter().collect::<Vec<_>>(),
264
///     [Script::Common]
265
/// );
266
/// assert_eq!(
267
///     swe.get_script_extensions_val('\u{200D}') // ZERO WIDTH JOINER
268
///         .iter().collect::<Vec<_>>(),
269
///     [Script::Inherited]
270
/// );
271
/// assert_eq!(
272
///     swe.get_script_extensions_val('௫') // U+0BEB TAMIL DIGIT FIVE
273
///         .iter().collect::<Vec<_>>(),
274
///     [Script::Tamil, Script::Grantha]
275
/// );
276
///
277
/// // check containment of a `Script` value in the `Script_Extensions` value
278
/// // U+0650 ARABIC KASRA
279
/// assert!(!swe.has_script('\u{0650}', Script::Inherited)); // main Script value
280
/// assert!(swe.has_script('\u{0650}', Script::Arabic));
281
/// assert!(swe.has_script('\u{0650}', Script::Syriac));
282
/// assert!(!swe.has_script('\u{0650}', Script::Thaana));
283
///
284
/// // get a `CodePointInversionList` for when `Script` value is contained in `Script_Extensions` value
285
/// let syriac = swe.get_script_extensions_set(Script::Syriac);
286
/// assert!(syriac.contains('\u{0650}')); // ARABIC KASRA
287
/// assert!(!syriac.contains('٠')); // ARABIC-INDIC DIGIT ZERO
288
/// assert!(!syriac.contains('ﷲ')); // ARABIC LIGATURE ALLAH ISOLATED FORM
289
/// assert!(syriac.contains('܀')); // SYRIAC END OF PARAGRAPH
290
/// assert!(syriac.contains('\u{074A}')); // SYRIAC BARREKH
291
/// ```
292
#[derive(Debug)]
×
293
pub struct ScriptWithExtensions {
294
    data: DataPayload<ScriptWithExtensionsPropertyV1Marker>,
×
295
}
296

297
/// A borrowed wrapper around script extension data, returned by
298
/// [`ScriptWithExtensions::as_borrowed()`]. More efficient to query.
299
#[derive(Clone, Copy, Debug)]
×
300
pub struct ScriptWithExtensionsBorrowed<'a> {
301
    data: &'a ScriptWithExtensionsPropertyV1<'a>,
×
302
}
303

304
impl ScriptWithExtensions {
305
    /// Creates a new instance of `ScriptWithExtensionsBorrowed` using compiled data.
306
    ///
307
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
308
    ///
309
    /// [📚 Help choosing a constructor](icu_provider::constructors)
310
    #[cfg(feature = "compiled_data")]
311
    #[allow(clippy::new_ret_no_self)]
312
    pub fn new() -> ScriptWithExtensionsBorrowed<'static> {
9✔
313
        ScriptWithExtensionsBorrowed::new()
9✔
314
    }
9✔
315

316
    icu_provider::gen_any_buffer_data_constructors!(
317
        () -> result: Result<ScriptWithExtensions, DataError>,
318
        functions: [
319
            new: skip,
320
            try_new_with_any_provider,
321
            try_new_with_buffer_provider,
322
            try_new_unstable,
323
            Self,
324
        ]
325
    );
326

327
    #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
328
    pub fn try_new_unstable(
8✔
329
        provider: &(impl DataProvider<ScriptWithExtensionsPropertyV1Marker> + ?Sized),
330
    ) -> Result<Self, DataError> {
331
        Ok(ScriptWithExtensions::from_data(
8✔
332
            provider.load(Default::default())?.payload,
8✔
333
        ))
334
    }
8✔
335

336
    /// Construct a borrowed version of this type that can be queried.
337
    ///
338
    /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it
339
    /// up front.
340
    #[inline]
341
    pub fn as_borrowed(&self) -> ScriptWithExtensionsBorrowed<'_> {
8✔
342
        ScriptWithExtensionsBorrowed {
343
            data: self.data.get(),
8✔
344
        }
345
    }
8✔
346

347
    /// Construct a new one from loaded data
348
    ///
349
    /// Typically it is preferable to use getters like [`load_script_with_extensions_unstable()`] instead
350
    pub(crate) fn from_data(data: DataPayload<ScriptWithExtensionsPropertyV1Marker>) -> Self {
14✔
351
        Self { data }
14✔
352
    }
14✔
353
}
354

355
impl<'a> ScriptWithExtensionsBorrowed<'a> {
356
    /// Returns the `Script` property value for this code point.
357
    ///
358
    /// # Examples
359
    ///
360
    /// ```
361
    /// use icu::properties::script::ScriptWithExtensions;
362
    /// use icu::properties::props::Script;
363
    ///
364
    /// let swe = ScriptWithExtensions::new();
365
    ///
366
    /// // U+0640 ARABIC TATWEEL
367
    /// assert_eq!(swe.get_script_val('ـ'), Script::Common); // main Script value
368
    /// assert_ne!(swe.get_script_val('ـ'), Script::Arabic);
369
    /// assert_ne!(swe.get_script_val('ـ'), Script::Syriac);
370
    /// assert_ne!(swe.get_script_val('ـ'), Script::Thaana);
371
    ///
372
    /// // U+0650 ARABIC KASRA
373
    /// assert_eq!(swe.get_script_val('\u{0650}'), Script::Inherited); // main Script value
374
    /// assert_ne!(swe.get_script_val('\u{0650}'), Script::Arabic);
375
    /// assert_ne!(swe.get_script_val('\u{0650}'), Script::Syriac);
376
    /// assert_ne!(swe.get_script_val('\u{0650}'), Script::Thaana);
377
    ///
378
    /// // U+0660 ARABIC-INDIC DIGIT ZERO
379
    /// assert_ne!(swe.get_script_val('٠'), Script::Common);
380
    /// assert_eq!(swe.get_script_val('٠'), Script::Arabic); // main Script value
381
    /// assert_ne!(swe.get_script_val('٠'), Script::Syriac);
382
    /// assert_ne!(swe.get_script_val('٠'), Script::Thaana);
383
    ///
384
    /// // U+FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM
385
    /// assert_ne!(swe.get_script_val('ﷲ'), Script::Common);
386
    /// assert_eq!(swe.get_script_val('ﷲ'), Script::Arabic); // main Script value
387
    /// assert_ne!(swe.get_script_val('ﷲ'), Script::Syriac);
388
    /// assert_ne!(swe.get_script_val('ﷲ'), Script::Thaana);
389
    /// ```
390
    pub fn get_script_val(self, ch: char) -> Script {
35✔
391
        self.get_script_val32(ch as u32)
35✔
392
    }
35✔
393

394
    /// See [`Self::get_script_val`].
395
    pub fn get_script_val32(self, code_point: u32) -> Script {
38✔
396
        let sc_with_ext = self.data.trie.get32(code_point);
38✔
397

398
        if sc_with_ext.is_other() {
38✔
399
            let ext_idx = sc_with_ext.0 & SCRIPT_X_SCRIPT_VAL;
12✔
400
            let scx_val = self.data.extensions.get(ext_idx as usize);
12✔
401
            let scx_first_sc = scx_val.and_then(|scx| scx.get(0));
24✔
402

403
            let default_sc_val = Script::Unknown;
38✔
404

405
            scx_first_sc.unwrap_or(default_sc_val)
12✔
406
        } else if sc_with_ext.is_common() {
26✔
407
            Script::Common
6✔
408
        } else if sc_with_ext.is_inherited() {
20✔
409
            Script::Inherited
5✔
410
        } else {
411
            let script_val = sc_with_ext.0;
15✔
412
            Script(script_val)
15✔
413
        }
414
    }
38✔
415
    // Returns the Script_Extensions value for a code_point when the trie value
416
    // is already known.
417
    // This private helper method exists to prevent code duplication in callers like
418
    // `get_script_extensions_val`, `get_script_extensions_set`, and `has_script`.
419
    fn get_scx_val_using_trie_val(
2,003✔
420
        self,
421
        sc_with_ext_ule: &'a <ScriptWithExt as AsULE>::ULE,
422
    ) -> &'a ZeroSlice<Script> {
423
        let sc_with_ext = ScriptWithExt::from_unaligned(*sc_with_ext_ule);
2,003✔
424
        if sc_with_ext.is_other() {
2,003✔
425
            let ext_idx = sc_with_ext.0 & SCRIPT_X_SCRIPT_VAL;
392✔
426
            let ext_subarray = self.data.extensions.get(ext_idx as usize);
392✔
427
            // In the OTHER case, where the 2 higher-order bits of the
428
            // `ScriptWithExt` value in the trie doesn't indicate the Script value,
429
            // the Script value is copied/inserted into the first position of the
430
            // `extensions` array. So we must remove it to return the actual scx array val.
431
            let scx_slice = ext_subarray
392✔
432
                .and_then(|zslice| zslice.as_ule_slice().get(1..))
392✔
433
                .unwrap_or_default();
434
            ZeroSlice::from_ule_slice(scx_slice)
392✔
435
        } else if sc_with_ext.is_common() || sc_with_ext.is_inherited() {
1,611✔
436
            let ext_idx = sc_with_ext.0 & SCRIPT_X_SCRIPT_VAL;
1,601✔
437
            let scx_val = self.data.extensions.get(ext_idx as usize);
1,601✔
438
            scx_val.unwrap_or_default()
1,601✔
439
        } else {
440
            // Note: `Script` and `ScriptWithExt` are both represented as the same
441
            // u16 value when the `ScriptWithExt` has no higher-order bits set.
442
            let script_ule_slice = core::slice::from_ref(sc_with_ext_ule);
10✔
443
            ZeroSlice::from_ule_slice(script_ule_slice)
10✔
444
        }
445
    }
2,003✔
446
    /// Return the `Script_Extensions` property value for this code point.
447
    ///
448
    /// If `code_point` has Script_Extensions, then return the Script codes in
449
    /// the Script_Extensions. In this case, the Script property value
450
    /// (normally Common or Inherited) is not included in the [`ScriptExtensionsSet`].
451
    ///
452
    /// If c does not have Script_Extensions, then the one Script code is put
453
    /// into the [`ScriptExtensionsSet`] and also returned.
454
    ///
455
    /// If c is not a valid code point, then return an empty [`ScriptExtensionsSet`].
456
    ///
457
    /// # Examples
458
    ///
459
    /// ```
460
    /// use icu::properties::script::ScriptWithExtensions;
461
    /// use icu::properties::props::Script;
462
    ///
463
    /// let swe = ScriptWithExtensions::new();
464
    ///
465
    /// assert_eq!(
466
    ///     swe.get_script_extensions_val('𐓐') // U+104D0 OSAGE CAPITAL LETTER KHA
467
    ///         .iter()
468
    ///         .collect::<Vec<_>>(),
469
    ///     [Script::Osage]
470
    /// );
471
    /// assert_eq!(
472
    ///     swe.get_script_extensions_val('🥳') // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
473
    ///         .iter()
474
    ///         .collect::<Vec<_>>(),
475
    ///     [Script::Common]
476
    /// );
477
    /// assert_eq!(
478
    ///     swe.get_script_extensions_val('\u{200D}') // ZERO WIDTH JOINER
479
    ///         .iter()
480
    ///         .collect::<Vec<_>>(),
481
    ///     [Script::Inherited]
482
    /// );
483
    /// assert_eq!(
484
    ///     swe.get_script_extensions_val('௫') // U+0BEB TAMIL DIGIT FIVE
485
    ///         .iter()
486
    ///         .collect::<Vec<_>>(),
487
    ///     [Script::Tamil, Script::Grantha]
488
    /// );
489
    /// ```
490
    pub fn get_script_extensions_val(self, ch: char) -> ScriptExtensionsSet<'a> {
13✔
491
        self.get_script_extensions_val32(ch as u32)
13✔
492
    }
13✔
493

494
    /// See [`Self::get_script_extensions_val`].
495
    pub fn get_script_extensions_val32(self, code_point: u32) -> ScriptExtensionsSet<'a> {
20✔
496
        let sc_with_ext_ule = self.data.trie.get32_ule(code_point);
20✔
497

498
        ScriptExtensionsSet {
499
            values: match sc_with_ext_ule {
40✔
500
                Some(ule_ref) => self.get_scx_val_using_trie_val(ule_ref),
20✔
501
                None => ZeroSlice::from_ule_slice(&[]),
×
502
            },
503
        }
504
    }
20✔
505

506
    /// Returns whether `script` is contained in the Script_Extensions
507
    /// property value if the code_point has Script_Extensions, otherwise
508
    /// if the code point does not have Script_Extensions then returns
509
    /// whether the Script property value matches.
510
    ///
511
    /// Some characters are commonly used in multiple scripts. For more information,
512
    /// see UAX #24: <http://www.unicode.org/reports/tr24/>.
513
    ///
514
    /// # Examples
515
    ///
516
    /// ```
517
    /// use icu::properties::script::ScriptWithExtensions;
518
    /// use icu::properties::props::Script;
519
    ///
520
    /// let swe = ScriptWithExtensions::new();
521
    ///
522
    /// // U+0650 ARABIC KASRA
523
    /// assert!(!swe.has_script('\u{0650}', Script::Inherited)); // main Script value
524
    /// assert!(swe.has_script('\u{0650}', Script::Arabic));
525
    /// assert!(swe.has_script('\u{0650}', Script::Syriac));
526
    /// assert!(!swe.has_script('\u{0650}', Script::Thaana));
527
    ///
528
    /// // U+0660 ARABIC-INDIC DIGIT ZERO
529
    /// assert!(!swe.has_script('٠', Script::Common)); // main Script value
530
    /// assert!(swe.has_script('٠', Script::Arabic));
531
    /// assert!(!swe.has_script('٠', Script::Syriac));
532
    /// assert!(swe.has_script('٠', Script::Thaana));
533
    ///
534
    /// // U+FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM
535
    /// assert!(!swe.has_script('ﷲ', Script::Common));
536
    /// assert!(swe.has_script('ﷲ', Script::Arabic)); // main Script value
537
    /// assert!(!swe.has_script('ﷲ', Script::Syriac));
538
    /// assert!(swe.has_script('ﷲ', Script::Thaana));
539
    /// ```
540
    pub fn has_script(self, ch: char, script: Script) -> bool {
25✔
541
        self.has_script32(ch as u32, script)
25✔
542
    }
25✔
543

544
    /// See [`Self::has_script`].
545
    pub fn has_script32(self, code_point: u32, script: Script) -> bool {
59✔
546
        let sc_with_ext_ule = if let Some(scwe_ule) = self.data.trie.get32_ule(code_point) {
59✔
547
            scwe_ule
548
        } else {
549
            return false;
×
550
        };
551
        let sc_with_ext = <ScriptWithExt as AsULE>::from_unaligned(*sc_with_ext_ule);
59✔
552

553
        if !sc_with_ext.has_extensions() {
59✔
554
            let script_val = sc_with_ext.0;
11✔
555
            script == Script(script_val)
11✔
556
        } else {
557
            let scx_val = self.get_scx_val_using_trie_val(sc_with_ext_ule);
48✔
558
            let script_find = scx_val.iter().find(|&sc| sc == script);
176✔
559
            script_find.is_some()
48✔
560
        }
561
    }
59✔
562

563
    /// Returns all of the matching `CodePointMapRange`s for the given [`Script`]
564
    /// in which `has_script` will return true for all of the contained code points.
565
    ///
566
    /// # Examples
567
    ///
568
    /// ```
569
    /// use icu::properties::props::Script;
570
    /// use icu::properties::script::ScriptWithExtensions;
571
    ///
572
    /// let swe = ScriptWithExtensions::new();
573
    ///
574
    /// let syriac_script_extensions_ranges =
575
    ///     swe.get_script_extensions_ranges(Script::Syriac);
576
    ///
577
    /// let exp_ranges = [
578
    ///     0x060C..=0x060C, // ARABIC COMMA
579
    ///     0x061B..=0x061C, // ARABIC SEMICOLON, ARABIC LETTER MARK
580
    ///     0x061F..=0x061F, // ARABIC QUESTION MARK
581
    ///     0x0640..=0x0640, // ARABIC TATWEEL
582
    ///     0x064B..=0x0655, // ARABIC FATHATAN..ARABIC HAMZA BELOW
583
    ///     0x0670..=0x0670, // ARABIC LETTER SUPERSCRIPT ALEF
584
    ///     0x0700..=0x070D, // Syriac block begins at U+0700
585
    ///     0x070F..=0x074A, // Syriac block
586
    ///     0x074D..=0x074F, // Syriac block ends at U+074F
587
    ///     0x0860..=0x086A, // Syriac Supplement block is U+0860..=U+086F
588
    ///     0x1DF8..=0x1DF8, // U+1DF8 COMBINING DOT ABOVE LEFT
589
    ///     0x1DFA..=0x1DFA, // U+1DFA COMBINING DOT BELOW LEFT
590
    /// ];
591
    ///
592
    /// assert_eq!(
593
    ///     syriac_script_extensions_ranges.collect::<Vec<_>>(),
594
    ///     exp_ranges
595
    /// );
596
    /// ```
597
    pub fn get_script_extensions_ranges(
15✔
598
        self,
599
        script: Script,
600
    ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
601
        self.data
30✔
602
            .trie
603
            .iter_ranges_mapped(move |value| {
27,225✔
604
                let sc_with_ext = ScriptWithExt(value.0);
27,210✔
605
                if sc_with_ext.has_extensions() {
27,210✔
606
                    self.get_scx_val_using_trie_val(&sc_with_ext.to_unaligned())
3,870✔
607
                        .iter()
608
                        .any(|sc| sc == script)
8,374✔
609
                } else {
610
                    script == sc_with_ext.into()
25,275✔
611
                }
612
            })
27,210✔
613
            .filter(|v| v.value)
932✔
614
            .map(|v| v.range)
459✔
615
    }
15✔
616

617
    /// Returns a [`CodePointInversionList`] for the given [`Script`] which represents all
618
    /// code points for which `has_script` will return true.
619
    ///
620
    /// # Examples
621
    ///
622
    /// ```
623
    /// use icu::properties::script::ScriptWithExtensions;
624
    /// use icu::properties::props::Script;
625
    ///
626
    /// let swe = ScriptWithExtensions::new();
627
    ///
628
    /// let syriac = swe.get_script_extensions_set(Script::Syriac);
629
    ///
630
    /// assert!(!syriac.contains('؞')); // ARABIC TRIPLE DOT PUNCTUATION MARK
631
    /// assert!(syriac.contains('؟')); // ARABIC QUESTION MARK
632
    /// assert!(!syriac.contains('ؠ')); // ARABIC LETTER KASHMIRI YEH
633
    ///
634
    /// assert!(syriac.contains('܀')); // SYRIAC END OF PARAGRAPH
635
    /// assert!(syriac.contains('\u{074A}')); // SYRIAC BARREKH
636
    /// assert!(!syriac.contains('\u{074B}')); // unassigned
637
    /// assert!(syriac.contains('ݏ')); // SYRIAC LETTER SOGDIAN FE
638
    /// assert!(!syriac.contains('ݐ')); // ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW
639
    ///
640
    /// assert!(syriac.contains('\u{1DF8}')); // COMBINING DOT ABOVE LEFT
641
    /// assert!(!syriac.contains('\u{1DF9}')); // COMBINING WIDE INVERTED BRIDGE BELOW
642
    /// assert!(syriac.contains('\u{1DFA}')); // COMBINING DOT BELOW LEFT
643
    /// assert!(!syriac.contains('\u{1DFB}')); // COMBINING DELETION MARK
644
    /// ```
645
    pub fn get_script_extensions_set(self, script: Script) -> CodePointInversionList<'a> {
14✔
646
        CodePointInversionList::from_iter(self.get_script_extensions_ranges(script))
14✔
647
    }
14✔
648
}
649

650
#[cfg(feature = "compiled_data")]
651
impl Default for ScriptWithExtensionsBorrowed<'static> {
652
    fn default() -> Self {
×
653
        Self::new()
×
654
    }
×
655
}
656

657
impl ScriptWithExtensionsBorrowed<'static> {
658
    /// Creates a new instance of `ScriptWithExtensionsBorrowed` using compiled data.
659
    ///
660
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
661
    ///
662
    /// [📚 Help choosing a constructor](icu_provider::constructors)
663
    #[cfg(feature = "compiled_data")]
664
    pub fn new() -> Self {
9✔
665
        Self {
666
            data: crate::provider::Baked::SINGLETON_SCRIPT_WITH_EXTENSIONS_PROPERTY_V1_MARKER,
667
        }
668
    }
9✔
669

670
    /// Cheaply converts a [`ScriptWithExtensionsBorrowed<'static>`] into a [`ScriptWithExtensions`].
671
    ///
672
    /// Note: Due to branching and indirection, using [`ScriptWithExtensions`] might inhibit some
673
    /// compile-time optimizations that are possible with [`ScriptWithExtensionsBorrowed`].
674
    pub const fn static_to_owned(self) -> ScriptWithExtensions {
×
675
        ScriptWithExtensions {
×
676
            data: DataPayload::from_static_ref(self.data),
×
677
        }
678
    }
×
679
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc