• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 9357137046

03 Jun 2024 08:51PM UTC coverage: 75.121% (-1.1%) from 76.254%
9357137046

push

github

web-flow
Switch locid Value to use Subtag (#4941)

This is part of #1833 switching Value API to use Subtag.

61 of 71 new or added lines in 11 files covered. (85.92%)

3224 existing lines in 178 files now uncovered.

52958 of 70497 relevant lines covered (75.12%)

572757.08 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.95
/utils/tinystr/src/int_ops.rs
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
use crate::asciibyte::AsciiByte;
6

7
/// Internal helper struct that performs operations on aligned integers.
8
/// Supports strings up to 4 bytes long.
9
#[repr(transparent)]
10
pub struct Aligned4(u32);
11

12
impl Aligned4 {
13
    /// # Panics
14
    /// Panics if N is greater than 4
15
    #[inline]
16
    pub const fn from_bytes<const N: usize>(src: &[u8; N]) -> Self {
835,348✔
17
        let mut bytes = [0; 4];
835,348✔
18
        let mut i = 0;
835,348✔
19
        // The function documentation defines when panics may occur
20
        #[allow(clippy::indexing_slicing)]
21
        while i < N {
3,502,328✔
22
            bytes[i] = src[i];
2,666,980✔
23
            i += 1;
2,666,980✔
24
        }
25
        Self(u32::from_ne_bytes(bytes))
835,348✔
26
    }
835,348✔
27

28
    #[inline]
29
    pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self {
835,494✔
30
        Self::from_bytes::<N>(unsafe { core::mem::transmute::<&[AsciiByte; N], &[u8; N]>(src) })
835,494✔
31
    }
835,494✔
32

33
    #[inline]
34
    pub const fn to_bytes(&self) -> [u8; 4] {
107,022✔
35
        self.0.to_ne_bytes()
107,022✔
36
    }
107,022✔
37

38
    #[inline]
39
    pub const fn to_ascii_bytes(&self) -> [AsciiByte; 4] {
107,023✔
40
        unsafe { core::mem::transmute(self.to_bytes()) }
107,023✔
41
    }
107,023✔
42

43
    pub const fn len(&self) -> usize {
582,061✔
44
        let word = self.0;
582,061✔
45
        #[cfg(target_endian = "little")]
46
        let len = (4 - word.leading_zeros() / 8) as usize;
582,061✔
47
        #[cfg(target_endian = "big")]
48
        let len = (4 - word.trailing_zeros() / 8) as usize;
49
        len
50
    }
582,061✔
51

52
    pub const fn is_ascii_alphabetic(&self) -> bool {
106,461✔
53
        let word = self.0;
106,461✔
54
        // Each of the following bitmasks set *the high bit* (0x8) to 0 for valid and 1 for invalid.
55
        // `mask` sets all NUL bytes to 0.
56
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
106,461✔
57
        // `lower` converts the string to lowercase. It may also change the value of non-alpha
58
        // characters, but this does not matter for the alphabetic test that follows.
59
        let lower = word | 0x2020_2020;
106,461✔
60
        // `alpha` sets all alphabetic bytes to 0. We only need check for lowercase characters.
61
        let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505);
106,461✔
62
        // The overall string is valid if every character passes at least one test.
63
        // We performed two tests here: non-NUL (`mask`) and alphabetic (`alpha`).
64
        (alpha & mask) == 0
106,461✔
65
    }
106,461✔
66

67
    pub const fn is_ascii_alphanumeric(&self) -> bool {
120✔
68
        let word = self.0;
120✔
69
        // See explanatory comments in is_ascii_alphabetic
70
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
120✔
71
        let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646);
120✔
72
        let lower = word | 0x2020_2020;
120✔
73
        let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505);
120✔
74
        (alpha & numeric & mask) == 0
120✔
75
    }
120✔
76

77
    pub const fn is_ascii_numeric(&self) -> bool {
20,905✔
78
        let word = self.0;
20,905✔
79
        // See explanatory comments in is_ascii_alphabetic
80
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
20,905✔
81
        let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646);
20,905✔
82
        (numeric & mask) == 0
20,905✔
83
    }
20,905✔
84

85
    pub const fn is_ascii_lowercase(&self) -> bool {
120✔
86
        let word = self.0;
120✔
87
        // For efficiency, this function tests for an invalid string rather than a valid string.
88
        // A string is ASCII lowercase iff it contains no uppercase ASCII characters.
89
        // `invalid_case` sets all uppercase ASCII characters to 0 and all others to 1.
90
        let invalid_case = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525);
120✔
91
        // The string is valid if it contains no invalid characters (if all high bits are 1).
92
        (invalid_case & 0x8080_8080) == 0x8080_8080
120✔
93
    }
120✔
94

95
    pub const fn is_ascii_titlecase(&self) -> bool {
120✔
96
        let word = self.0;
120✔
97
        // See explanatory comments in is_ascii_lowercase
98
        let invalid_case = if cfg!(target_endian = "little") {
120✔
99
            !(word + 0x3f3f_3f1f) | (word + 0x2525_2505)
120✔
100
        } else {
UNCOV
101
            !(word + 0x1f3f_3f3f) | (word + 0x0525_2525)
×
102
        };
103
        (invalid_case & 0x8080_8080) == 0x8080_8080
120✔
104
    }
120✔
105

106
    pub const fn is_ascii_uppercase(&self) -> bool {
120✔
107
        let word = self.0;
120✔
108
        // See explanatory comments in is_ascii_lowercase
109
        let invalid_case = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505);
120✔
110
        (invalid_case & 0x8080_8080) == 0x8080_8080
120✔
111
    }
120✔
112

113
    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
1,636✔
114
        let word = self.0;
1,636✔
115
        // `mask` sets all NUL bytes to 0.
116
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
1,636✔
117
        // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1.
118
        let lower_alpha = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505);
1,636✔
119
        // The overall string is valid if every character passes at least one test.
120
        // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`).
121
        (lower_alpha & mask) == 0
1,636✔
122
    }
1,636✔
123

124
    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
8,218✔
125
        let word = self.0;
8,218✔
126
        // See explanatory comments in is_ascii_alphabetic_lowercase
127
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
8,218✔
128
        let title_case = if cfg!(target_endian = "little") {
8,218✔
129
            !(word + 0x1f1f_1f3f) | (word + 0x0505_0525)
8,218✔
130
        } else {
UNCOV
131
            !(word + 0x3f1f_1f1f) | (word + 0x2505_0505)
×
132
        };
133
        (title_case & mask) == 0
8,218✔
134
    }
8,218✔
135

136
    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
9,162✔
137
        let word = self.0;
9,162✔
138
        // See explanatory comments in is_ascii_alphabetic_lowercase
139
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
9,162✔
140
        let upper_alpha = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525);
9,162✔
141
        (upper_alpha & mask) == 0
9,162✔
142
    }
9,162✔
143

144
    pub const fn to_ascii_lowercase(&self) -> Self {
59,194✔
145
        let word = self.0;
59,194✔
146
        let result = word | (((word + 0x3f3f_3f3f) & !(word + 0x2525_2525) & 0x8080_8080) >> 2);
59,194✔
147
        Self(result)
59,194✔
148
    }
59,194✔
149

150
    pub const fn to_ascii_titlecase(&self) -> Self {
30,458✔
151
        let word = self.0.to_le();
30,458✔
152
        let mask = ((word + 0x3f3f_3f1f) & !(word + 0x2525_2505) & 0x8080_8080) >> 2;
30,458✔
153
        let result = (word | mask) & !(0x20 & mask);
30,458✔
154
        Self(u32::from_le(result))
30,458✔
155
    }
30,458✔
156

157
    pub const fn to_ascii_uppercase(&self) -> Self {
17,382✔
158
        let word = self.0;
17,382✔
159
        let result = word & !(((word + 0x1f1f_1f1f) & !(word + 0x0505_0505) & 0x8080_8080) >> 2);
17,382✔
160
        Self(result)
17,382✔
161
    }
17,382✔
162
}
163

164
/// Internal helper struct that performs operations on aligned integers.
165
/// Supports strings up to 8 bytes long.
166
#[repr(transparent)]
167
pub struct Aligned8(u64);
168

169
impl Aligned8 {
170
    /// # Panics
171
    /// Panics if N is greater than 8
172
    #[inline]
173
    pub const fn from_bytes<const N: usize>(src: &[u8; N]) -> Self {
40,443✔
174
        let mut bytes = [0; 8];
40,443✔
175
        let mut i = 0;
40,443✔
176
        // The function documentation defines when panics may occur
177
        #[allow(clippy::indexing_slicing)]
178
        while i < N {
359,856✔
179
            bytes[i] = src[i];
319,413✔
180
            i += 1;
319,413✔
181
        }
182
        Self(u64::from_ne_bytes(bytes))
40,443✔
183
    }
40,443✔
184

185
    #[inline]
186
    pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self {
40,442✔
187
        Self::from_bytes::<N>(unsafe { core::mem::transmute::<&[AsciiByte; N], &[u8; N]>(src) })
40,442✔
188
    }
40,442✔
189

190
    #[inline]
191
    pub const fn to_bytes(&self) -> [u8; 8] {
4,709✔
192
        self.0.to_ne_bytes()
4,709✔
193
    }
4,709✔
194

195
    #[inline]
196
    pub const fn to_ascii_bytes(&self) -> [AsciiByte; 8] {
4,709✔
197
        unsafe { core::mem::transmute(self.to_bytes()) }
4,709✔
198
    }
4,709✔
199

200
    pub const fn len(&self) -> usize {
30,155✔
201
        let word = self.0;
30,155✔
202
        #[cfg(target_endian = "little")]
203
        let len = (8 - word.leading_zeros() / 8) as usize;
30,155✔
204
        #[cfg(target_endian = "big")]
205
        let len = (8 - word.trailing_zeros() / 8) as usize;
206
        len
207
    }
30,155✔
208

209
    pub const fn is_ascii_alphabetic(&self) -> bool {
197✔
210
        let word = self.0;
197✔
211
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
197✔
212
        let lower = word | 0x2020_2020_2020_2020;
197✔
213
        let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505);
197✔
214
        (alpha & mask) == 0
197✔
215
    }
197✔
216

217
    pub const fn is_ascii_alphanumeric(&self) -> bool {
4,018✔
218
        let word = self.0;
4,018✔
219
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
4,018✔
220
        let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646);
4,018✔
221
        let lower = word | 0x2020_2020_2020_2020;
4,018✔
222
        let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505);
4,018✔
223
        (alpha & numeric & mask) == 0
4,018✔
224
    }
4,018✔
225

226
    pub const fn is_ascii_numeric(&self) -> bool {
197✔
227
        let word = self.0;
197✔
228
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
197✔
229
        let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646);
197✔
230
        (numeric & mask) == 0
197✔
231
    }
197✔
232

233
    pub const fn is_ascii_lowercase(&self) -> bool {
206✔
234
        let word = self.0;
206✔
235
        let invalid_case = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525);
206✔
236
        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
206✔
237
    }
206✔
238

239
    pub const fn is_ascii_titlecase(&self) -> bool {
197✔
240
        let word = self.0;
197✔
241
        let invalid_case = if cfg!(target_endian = "little") {
197✔
242
            !(word + 0x3f3f_3f3f_3f3f_3f1f) | (word + 0x2525_2525_2525_2505)
197✔
243
        } else {
UNCOV
244
            !(word + 0x1f3f_3f3f_3f3f_3f3f) | (word + 0x0525_2525_2525_2525)
×
245
        };
246
        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
197✔
247
    }
197✔
248

249
    pub const fn is_ascii_uppercase(&self) -> bool {
197✔
250
        let word = self.0;
197✔
251
        let invalid_case = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505);
197✔
252
        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
197✔
253
    }
197✔
254

255
    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
197✔
256
        let word = self.0;
197✔
257
        // `mask` sets all NUL bytes to 0.
258
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
197✔
259
        // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1.
260
        let lower_alpha = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505);
197✔
261
        // The overall string is valid if every character passes at least one test.
262
        // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`).
263
        (lower_alpha & mask) == 0
197✔
264
    }
197✔
265

266
    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
197✔
267
        let word = self.0;
197✔
268
        // See explanatory comments in is_ascii_alphabetic_lowercase
269
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
197✔
270
        let title_case = if cfg!(target_endian = "little") {
197✔
271
            !(word + 0x1f1f_1f1f_1f1f_1f3f) | (word + 0x0505_0505_0505_0525)
197✔
272
        } else {
UNCOV
273
            !(word + 0x3f1f_1f1f_1f1f_1f1f) | (word + 0x2505_0505_0505_0505)
×
274
        };
275
        (title_case & mask) == 0
197✔
276
    }
197✔
277

278
    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
197✔
279
        let word = self.0;
197✔
280
        // See explanatory comments in is_ascii_alphabetic_lowercase
281
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
197✔
282
        let upper_alpha = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525);
197✔
283
        (upper_alpha & mask) == 0
197✔
284
    }
197✔
285

286
    pub const fn to_ascii_lowercase(&self) -> Self {
4,315✔
287
        let word = self.0;
4,315✔
288
        let result = word
4,315✔
289
            | (((word + 0x3f3f_3f3f_3f3f_3f3f)
8,630✔
290
                & !(word + 0x2525_2525_2525_2525)
4,315✔
291
                & 0x8080_8080_8080_8080)
292
                >> 2);
293
        Self(result)
4,315✔
294
    }
4,315✔
295

296
    pub const fn to_ascii_titlecase(&self) -> Self {
198✔
297
        let word = self.0.to_le();
198✔
298
        let mask = ((word + 0x3f3f_3f3f_3f3f_3f1f)
396✔
299
            & !(word + 0x2525_2525_2525_2505)
198✔
300
            & 0x8080_8080_8080_8080)
301
            >> 2;
302
        let result = (word | mask) & !(0x20 & mask);
198✔
303
        Self(u64::from_le(result))
198✔
304
    }
198✔
305

306
    pub const fn to_ascii_uppercase(&self) -> Self {
198✔
307
        let word = self.0;
198✔
308
        let result = word
198✔
309
            & !(((word + 0x1f1f_1f1f_1f1f_1f1f)
396✔
310
                & !(word + 0x0505_0505_0505_0505)
198✔
311
                & 0x8080_8080_8080_8080)
312
                >> 2);
313
        Self(result)
198✔
314
    }
198✔
315
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc