• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 13958601093

19 Mar 2025 04:17PM UTC coverage: 74.164% (-1.5%) from 75.71%
13958601093

push

github

web-flow
Clean up properties docs (#6315)

58056 of 78281 relevant lines covered (74.16%)

819371.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.11
/provider/source/src/properties/script.rs
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
use crate::SourceDataProvider;
6
use icu::collections::codepointtrie::CodePointTrie;
7
use icu::properties::props::Script;
8
use icu::properties::provider::{PropertyScriptWithExtensionsV1, ScriptWithExtensionsProperty};
9
use icu::properties::script::ScriptWithExt;
10
use icu_provider::prelude::*;
11
use std::collections::HashSet;
12
use std::convert::TryFrom;
13
use zerovec::{VarZeroVec, ZeroSlice, ZeroVec};
14

15
// implement data provider
16
impl DataProvider<PropertyScriptWithExtensionsV1> for SourceDataProvider {
17
    fn load(
5✔
18
        &self,
19
        req: DataRequest,
20
    ) -> Result<DataResponse<PropertyScriptWithExtensionsV1>, DataError> {
21
        self.check_req::<PropertyScriptWithExtensionsV1>(req)?;
5✔
22
        let scx_data = self
5✔
23
            .icuexport()?
×
24
            .read_and_parse_toml::<super::uprops_serde::script_extensions::Main>(&format!(
5✔
25
                "uprops/{}/scx.toml",
26
                self.trie_type(),
5✔
27
            ))?
×
28
            .script_extensions
29
            .first()
30
            .ok_or_else(|| DataError::custom("Could not parse Script_Extensions data from TOML"))?;
5✔
31

32
        let cpt_data = &scx_data.code_point_trie;
5✔
33
        let scx_array_data = &scx_data.script_code_array;
5✔
34

35
        let trie = CodePointTrie::<ScriptWithExt>::try_from(cpt_data).map_err(|e| {
5✔
36
            DataError::custom("Could not parse CodePointTrie TOML").with_display_context(&e)
×
37
        })?;
×
38

39
        // Convert the input from Vec<Vec<u16>> to Vec<ZeroVec<Script>> so that
40
        // we can go through the VarZeroVec construction process for a desired result
41
        // type of VZV<ZeroSlice<Script>>
42
        let ule_scx_array_data: Vec<ZeroVec<Script>> = scx_array_data
5✔
43
            .iter()
44
            .map(|v| {
570✔
45
                v.iter()
570✔
46
                    .copied()
47
                    .map(Script::from_icu4c_value)
48
                    .collect::<ZeroVec<Script>>()
49
            })
570✔
50
            .collect::<Vec<ZeroVec<Script>>>();
51
        let scx_vzv: VarZeroVec<ZeroSlice<Script>> =
52
            VarZeroVec::from(ule_scx_array_data.as_slice());
5✔
53

54
        let data_struct = ScriptWithExtensionsProperty {
5✔
55
            trie,
5✔
56
            extensions: scx_vzv,
57
        };
58

59
        Ok(DataResponse {
5✔
60
            metadata: Default::default(),
5✔
61
            payload: DataPayload::from_owned(data_struct),
5✔
62
        })
63
    }
5✔
64
}
65

66
impl crate::IterableDataProviderCached<PropertyScriptWithExtensionsV1> for SourceDataProvider {
67
    fn iter_ids_cached(&self) -> Result<HashSet<DataIdentifierCow<'static>>, DataError> {
×
68
        Ok(HashSet::from_iter([Default::default()]))
×
69
    }
×
70
}
71

72
#[cfg(test)]
73
mod tests {
74
    use super::*;
75

76
    #[test]
77
    fn test_script_val_from_script_extensions() {
2✔
78
        let provider = SourceDataProvider::new_testing();
1✔
79

80
        let swe =
81
            icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
1✔
82
        let swe = swe.as_borrowed();
1✔
83

84
        assert_eq!(swe.get_script_val('𐓐'), Script::Osage); // U+104D0 OSAGE CAPITAL LETTER KHA
1✔
85
        assert_eq!(swe.get_script_val('🥳'), Script::Common); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
1✔
86
        assert_eq!(swe.get_script_val32(0x200D), Script::Inherited); // ZERO WIDTH JOINER
1✔
87
        assert_eq!(swe.get_script_val('௫'), Script::Tamil); // U+0BEB TAMIL DIGIT FIVE
1✔
88
        assert_eq!(swe.get_script_val32(0x11303), Script::Grantha); // GRANTHA SIGN VISARGA
1✔
89
        assert_eq!(swe.get_script_val32(0x30A0), Script::Common); // U+30A0 KATAKANA-HIRAGANA DOUBLE HYPHEN
1✔
90
    }
2✔
91

92
    #[test]
93
    fn test_scx_array_from_script_extensions() {
2✔
94
        let provider = SourceDataProvider::new_testing();
1✔
95

96
        let swe =
97
            icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
1✔
98
        let swe = swe.as_borrowed();
1✔
99

100
        assert_eq!(
2✔
101
            swe.get_script_extensions_val('𐓐') /* U+104D0 OSAGE CAPITAL LETTER KHA */
1✔
102
                .iter()
103
                .collect::<Vec<_>>(),
104
            [Script::Osage]
105
        );
106
        assert_eq!(
2✔
107
            swe.get_script_extensions_val('🥳') /* U+1F973 FACE WITH PARTY HORN AND PARTY HAT */
1✔
108
                .iter()
109
                .collect::<Vec<_>>(),
110
            [Script::Common]
111
        );
112
        assert_eq!(
2✔
113
            swe.get_script_extensions_val32(0x200D) // ZERO WIDTH JOINER
1✔
114
                .iter()
115
                .collect::<Vec<_>>(),
116
            [Script::Inherited]
117
        );
118
        assert_eq!(
2✔
119
            swe.get_script_extensions_val('௫') // U+0BEB TAMIL DIGIT FIVE
1✔
120
                .iter()
121
                .collect::<Vec<_>>(),
122
            [Script::Tamil, Script::Grantha]
123
        );
124
        assert_eq!(
2✔
125
            swe.get_script_extensions_val32(0x11303) // GRANTHA SIGN VISARGA
1✔
126
                .iter()
127
                .collect::<Vec<_>>(),
128
            [Script::Tamil, Script::Grantha]
129
        );
130
        assert_eq!(
2✔
131
            swe.get_script_extensions_val32(0x30A0) // KATAKANA-HIRAGANA DOUBLE HYPHEN
1✔
132
                .iter()
133
                .collect::<Vec<_>>(),
134
            [Script::Hiragana, Script::Katakana]
135
        );
136

137
        assert_eq!(
1✔
138
            swe.get_script_extensions_val32(0x200D) // ZERO WIDTH JOINER
1✔
139
                .iter()
140
                .next(),
141
            Some(Script::Inherited)
142
        );
143

144
        assert!(swe
1✔
145
            .get_script_extensions_val32(0x11303) // GRANTHA SIGN VISARGA
146
            .contains(&Script::Grantha));
147

148
        assert!(!swe
1✔
149
            .get_script_extensions_val32(0x11303) // GRANTHA SIGN VISARGA
150
            .contains(&Script::Common));
151

152
        // // Invalid code point
153
        assert_eq!(
2✔
154
            swe.get_script_extensions_val32(0x11_0000) // CODE_POINT_MAX + 1 is invalid
1✔
155
                .iter()
156
                .collect::<Vec<_>>(),
157
            [Script::Unknown]
158
        );
159
    }
2✔
160

161
    #[test]
162
    fn test_has_script() {
2✔
163
        let provider = SourceDataProvider::new_testing();
1✔
164

165
        let swe =
166
            icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
1✔
167
        let swe = swe.as_borrowed();
1✔
168

169
        assert!(swe.has_script('𐓐', Script::Osage));
1✔
170
        assert!(!swe.has_script('𐓐', Script::Common));
1✔
171
        assert!(!swe.has_script('𐓐', Script::Inherited));
1✔
172

173
        assert!(swe.has_script('🥳', Script::Common));
1✔
174
        assert!(!swe.has_script('🥳', Script::Inherited));
1✔
175

176
        assert!(!swe.has_script32(0x200D, Script::Common));
1✔
177
        assert!(swe.has_script32(0x200D, Script::Inherited));
1✔
178

179
        assert!(swe.has_script('௫', Script::Tamil));
1✔
180
        assert!(swe.has_script('௫', Script::Grantha));
1✔
181
        assert!(!swe.has_script('௫', Script::Common));
1✔
182
        assert!(!swe.has_script('௫', Script::Inherited));
1✔
183

184
        assert!(swe.has_script32(0x11303, Script::Tamil));
1✔
185
        assert!(swe.has_script32(0x11303, Script::Grantha));
1✔
186
        assert!(!swe.has_script32(0x11303, Script::Common));
1✔
187
        assert!(!swe.has_script32(0x11303, Script::Inherited));
1✔
188

189
        assert!(swe.has_script32(0x30A0, Script::Hiragana));
1✔
190
        assert!(swe.has_script32(0x30A0, Script::Katakana));
1✔
191
        assert!(!swe.has_script32(0x30A0, Script::Common));
1✔
192
        assert!(!swe.has_script32(0x30A0, Script::Inherited));
1✔
193

194
        // U+0964 DEVANAGARI DANDA
195
        assert!(!swe.has_script32(0x0964, Script::Common));
1✔
196
        assert!(swe.has_script32(0x0964, Script::Devanagari));
1✔
197
        assert!(swe.has_script32(0x0964, Script::Bengali));
1✔
198

199
        // TestHasScript() test cases from ICU4J
200

201
        // U+063F ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
202
        assert!(!swe.has_script32(0x063F, Script::Common));
1✔
203
        assert!(swe.has_script32(0x063F, Script::Arabic)); // main Script value
1✔
204
        assert!(!swe.has_script32(0x063F, Script::Syriac));
1✔
205
        assert!(!swe.has_script32(0x063F, Script::Thaana));
1✔
206

207
        // U+0640 ARABIC TATWEEL
208
        assert!(!swe.has_script32(0x0640, Script::Common)); // main Script value
1✔
209
        assert!(swe.has_script32(0x0640, Script::Arabic));
1✔
210
        assert!(swe.has_script32(0x0640, Script::Syriac));
1✔
211
        assert!(!swe.has_script32(0x0640, Script::Thaana));
1✔
212

213
        // U+0650 ARABIC KASRA
214
        assert!(!swe.has_script32(0x0650, Script::Inherited)); // main Script value
1✔
215
        assert!(swe.has_script32(0x0650, Script::Arabic));
1✔
216
        assert!(swe.has_script32(0x0650, Script::Syriac));
1✔
217
        assert!(!swe.has_script32(0x0650, Script::Thaana));
1✔
218

219
        // U+0660 ARABIC-INDIC DIGIT ZERO
220
        assert!(!swe.has_script32(0x0660, Script::Common));
1✔
221
        assert!(swe.has_script32(0x0660, Script::Arabic)); // main Script value
1✔
222
        assert!(!swe.has_script32(0x0660, Script::Syriac));
1✔
223
        assert!(swe.has_script32(0x0660, Script::Thaana));
1✔
224

225
        // U+FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM
226
        assert!(!swe.has_script32(0xFDF2, Script::Common));
1✔
227
        assert!(swe.has_script32(0xFDF2, Script::Arabic)); // main Script value
1✔
228
        assert!(!swe.has_script32(0xFDF2, Script::Syriac));
1✔
229
        assert!(swe.has_script32(0xFDF2, Script::Thaana));
1✔
230

231
        // The ICU4J comment for this test says:
232
        // An unguarded implementation might go into an infinite loop.
233
        assert!(!swe.has_script32(0x0640, Script::from_icu4c_value(0xAFFE)));
1✔
234
    }
2✔
235

236
    #[test]
237
    fn test_get_script_extensions_set() {
2✔
238
        let provider = SourceDataProvider::new_testing();
1✔
239

240
        let swe =
241
            icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
1✔
242
        let swe = swe.as_borrowed();
1✔
243

244
        let grantha = swe.get_script_extensions_set(Script::Grantha);
1✔
245
        assert!(!grantha.contains32(0x0BE5)); // unknown with unknown script in Tamil block
1✔
246
        assert!(grantha.contains32(0x0BE6)); // TAMIL DIGIT ZERO
1✔
247
        assert!(grantha.contains32(0x0BEB)); // TAMIL DIGIT FIVE
1✔
248
        assert!(grantha.contains32(0x0BEF)); // TAMIL DIGIT NINE
1✔
249
        assert!(grantha.contains32(0x0BF2)); // TAMIL NUMBER ONE THOUSAND
1✔
250
        assert!(grantha.contains32(0x0BF3)); // TAMIL DAY SIGN
1✔
251
        assert!(!grantha.contains32(0x0BF4)); // TAMIL MONTH SIGN
1✔
252
        assert!(grantha.contains32(0x11300)); // GRANTHA SIGN COMBINING ANUSVARA ABOVE
1✔
253
        assert!(grantha.contains32(0x11301)); // GRANTHA SIGN CANDRABINDU
1✔
254
        assert!(grantha.contains32(0x11302)); // GRANTHA SIGN ANUSVARA
1✔
255
        assert!(grantha.contains32(0x11303)); // GRANTHA SIGN VISARGA
1✔
256
        assert!(!grantha.contains32(0x11304)); // unknown with unknown script in Grantha block
1✔
257
        assert!(grantha.contains32(0x11305)); // GRANTHA LETTER A
1✔
258

259
        let tamil = swe.get_script_extensions_set(Script::Tamil);
1✔
260
        assert!(!tamil.contains32(0x0BE5)); // unknown with unknown script in Tamil block
1✔
261
        assert!(tamil.contains32(0x0BE6)); // TAMIL DIGIT ZERO
1✔
262
        assert!(tamil.contains32(0x0BEB)); // TAMIL DIGIT FIVE
1✔
263
        assert!(tamil.contains32(0x0BEF)); // TAMIL DIGIT NINE
1✔
264
        assert!(tamil.contains32(0x0BF2)); // TAMIL NUMBER ONE THOUSAND
1✔
265
        assert!(tamil.contains32(0x0BF3)); // TAMIL DAY SIGN
1✔
266
        assert!(tamil.contains32(0x0BF4)); // TAMIL MONTH SIGN
1✔
267
        assert!(!tamil.contains32(0x11300)); // GRANTHA SIGN COMBINING ANUSVARA ABOVE
1✔
268
        assert!(tamil.contains32(0x11301)); // GRANTHA SIGN CANDRABINDU
1✔
269
        assert!(!tamil.contains32(0x11302)); // GRANTHA SIGN ANUSVARA
1✔
270
        assert!(tamil.contains32(0x11303)); // GRANTHA SIGN VISARGA
1✔
271
        assert!(!tamil.contains32(0x11304)); // unknown with unknown script in Grantha block
1✔
272
        assert!(!tamil.contains32(0x11305)); // GRANTHA LETTER A
1✔
273

274
        let hiragana = swe.get_script_extensions_set(Script::Hiragana);
1✔
275
        assert!(hiragana.contains32(0x3046)); // HIRAGANA LETTER U
1✔
276
        assert!(hiragana.contains32(0x309F)); // HIRAGANA DIGRAPH YORI
1✔
277
        assert!(hiragana.contains32(0x30A0)); // KATAKANA-HIRAGANA DOUBLE HYPHEN
1✔
278
        assert!(!hiragana.contains32(0x30A1)); // KATAKANA LETTER SMALL A
1✔
279
        assert!(hiragana.contains32(0x30FB)); // KATAKANA MIDDLE DOT
1✔
280
        assert!(hiragana.contains32(0x30FC)); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
1✔
281
        assert!(!hiragana.contains32(0x30FD)); // KATAKANA ITERATION MARK
1✔
282

283
        let katakana = swe.get_script_extensions_set(Script::Katakana);
1✔
284
        assert!(!katakana.contains32(0x3046)); // HIRAGANA LETTER U
1✔
285
        assert!(!katakana.contains32(0x309F)); // HIRAGANA DIGRAPH YORI
1✔
286
        assert!(katakana.contains32(0x30A0)); // KATAKANA-HIRAGANA DOUBLE HYPHEN
1✔
287
        assert!(katakana.contains32(0x30A1)); // KATAKANA LETTER SMALL A
1✔
288
        assert!(katakana.contains32(0x30FB)); // KATAKANA MIDDLE DOT
1✔
289
        assert!(katakana.contains32(0x30FC)); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
1✔
290
        assert!(katakana.contains32(0x30FD)); // KATAKANA ITERATION MARK
1✔
291

292
        let common = swe.get_script_extensions_set(Script::Common);
1✔
293
        assert!(common.contains('🥳'));
1✔
294
        assert!(!common.contains32(0x200D));
1✔
295
        assert!(!common.contains32(0x30A0));
1✔
296

297
        let inherited = swe.get_script_extensions_set(Script::Inherited);
1✔
298
        assert!(!inherited.contains('🥳'));
1✔
299
        assert!(inherited.contains32(0x200D));
1✔
300
        assert!(!inherited.contains32(0x30A0));
1✔
301

302
        // inspired by https://github.com/unicode-org/unicodetools/issues/192
303

304
        let bangla = swe.get_script_extensions_set(Script::Bengali);
1✔
305
        assert!(bangla.contains32(0x09E7)); // BENGALI DIGIT ONE
1✔
306
        assert!(!bangla.contains32(0x0963)); // DEVANAGARI VOWEL SIGN VOCALIC LL
1✔
307
        assert!(bangla.contains32(0x0964)); // DEVANAGARI DANDA
1✔
308
        assert!(bangla.contains32(0x0965)); // DEVANAGARI DOUBLE DANDA
1✔
309
        assert!(!bangla.contains32(0x0966)); // DEVANAGARI DIGIT ZERO
1✔
310

311
        let devanagari = swe.get_script_extensions_set(Script::Devanagari);
1✔
312
        assert!(!devanagari.contains32(0x09E7)); // BENGALI DIGIT ONE
1✔
313
        assert!(devanagari.contains32(0x0963)); // DEVANAGARI VOWEL SIGN VOCALIC LL
1✔
314
        assert!(devanagari.contains32(0x0964)); // DEVANAGARI DANDA
1✔
315
        assert!(devanagari.contains32(0x0965)); // DEVANAGARI DOUBLE DANDA
1✔
316
        assert!(devanagari.contains32(0x0966)); // DEVANAGARI DIGIT ZERO
1✔
317

318
        assert!(!common.contains32(0x0964)); // DEVANAGARI DANDA
1✔
319
        assert!(!common.contains32(0x0965)); // DEVANAGARI DOUBLE DANDA
1✔
320
    }
2✔
321
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc