• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 9457158389

10 Jun 2024 11:45PM UTC coverage: 75.174% (+0.05%) from 75.121%
9457158389

push

github

web-flow
Add constructing TinyAsciiStr from utf16 (#5025)

Introduces TinyAsciiStr constructors from utf16 and converges on the
consensus from #4931.

---------

Co-authored-by: Robert Bastian <4706271+robertbastian@users.noreply.github.com>

65 of 82 new or added lines in 14 files covered. (79.27%)

3441 existing lines in 141 files now uncovered.

52850 of 70304 relevant lines covered (75.17%)

563298.06 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.9
/utils/tinystr/src/ascii.rs
1
// This file is part of ICU4X. For terms of use, please see the file
5,267✔
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
use crate::asciibyte::AsciiByte;
6
use crate::int_ops::{Aligned4, Aligned8};
7
use crate::TinyStrError;
8
use core::fmt;
9
use core::ops::Deref;
10
use core::str::{self, FromStr};
11

12
#[repr(transparent)]
13
#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)]
6,404,825✔
14
pub struct TinyAsciiStr<const N: usize> {
15
    bytes: [AsciiByte; N],
3,189,983✔
16
}
17

18
impl<const N: usize> TinyAsciiStr<N> {
19
    /// Creates a `TinyAsciiStr<N>` from the given UTF-8 slice.
20
    /// `bytes` may contain at most `N` non-null ASCII code points.
21
    pub const fn try_from_utf8(bytes: &[u8]) -> Result<Self, TinyStrError> {
38✔
22
        Self::try_from_utf8_inner(bytes, 0, bytes.len(), false)
38✔
23
    }
38✔
24

25
    /// Creates a `TinyAsciiStr<N>` from the given UTF-16 slice.
26
    /// `code_points` may contain at most `N` non-null ASCII code points.
27
    pub const fn try_from_utf16(code_points: &[u16]) -> Result<Self, TinyStrError> {
5,240✔
28
        Self::try_from_utf16_inner(code_points, 0, code_points.len(), false)
5,240✔
29
    }
5,240✔
30

31
    /// Creates a `TinyAsciiStr<N>` from a UTF-8 slice, replacing invalid code units.
32
    ///
33
    /// Invalid code units, as well as null or non-ASCII code points
34
    /// (i.e. those outside the range U+0001..=U+007F`)
35
    /// will be replaced with the replacement byte.
36
    ///
37
    /// The input slice will be truncated if its length exceeds `N`.
38
    pub const fn from_utf8_lossy(bytes: &[u8], replacement: u8) -> Self {
5✔
39
        let mut out = [0; N];
5✔
40
        let mut i = 0;
5✔
41
        // Ord is not available in const, so no `.min(N)`
42
        let len = if bytes.len() > N { N } else { bytes.len() };
5✔
43

44
        // Indexing is protected by the len check above
45
        #[allow(clippy::indexing_slicing)]
46
        while i < len {
18✔
47
            let b = bytes[i];
13✔
48
            if b > 0 && b < 0x80 {
26✔
49
                out[i] = b;
9✔
50
            } else {
51
                out[i] = replacement;
4✔
52
            }
53
            i += 1;
13✔
54
        }
55

56
        Self {
5✔
57
            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
58
            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
5✔
59
        }
60
    }
5✔
61

62
    /// Creates a `TinyAsciiStr<N>` from a UTF-16 slice, replacing invalid code units.
63
    ///
64
    /// Invalid code units, as well as null or non-ASCII code points
65
    /// (i.e. those outside the range U+0001..=U+007F`)
66
    /// will be replaced with the replacement byte.
67
    ///
68
    /// The input slice will be truncated if its length exceeds `N`.
69
    pub const fn from_utf16_lossy(code_points: &[u16], replacement: u8) -> Self {
70
        let mut out = [0; N];
71
        let mut i = 0;
72
        // Ord is not available in const, so no `.min(N)`
73
        let len = if code_points.len() > N {
74
            N
75
        } else {
76
            code_points.len()
77
        };
78

79
        // Indexing is protected by the len check above
80
        #[allow(clippy::indexing_slicing)]
81
        while i < len {
82
            let b = code_points[i];
83
            if b > 0 && b < 0x80 {
84
                out[i] = b as u8;
85
            } else {
86
                out[i] = replacement;
87
            }
88
            i += 1;
89
        }
90

91
        Self {
92
            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
93
            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
94
        }
95
    }
96

97
    /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`.
98
    ///
99
    /// The byte array may contain trailing NUL bytes.
100
    ///
101
    /// # Example
102
    ///
103
    /// ```
104
    /// use tinystr::tinystr;
105
    /// use tinystr::TinyAsciiStr;
106
    ///
107
    /// assert_eq!(
108
    ///     TinyAsciiStr::<3>::try_from_raw(*b"GB\0"),
109
    ///     Ok(tinystr!(3, "GB"))
110
    /// );
111
    /// assert_eq!(
112
    ///     TinyAsciiStr::<3>::try_from_raw(*b"USD"),
113
    ///     Ok(tinystr!(3, "USD"))
114
    /// );
115
    /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_)));
116
    /// ```
117
    pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, TinyStrError> {
26,058✔
118
        Self::try_from_utf8_inner(&raw, 0, N, true)
26,058✔
119
    }
26,058✔
120

121
    /// Equivalent to [`try_from_utf8(bytes[start..end])`](Self::try_from_utf8),
122
    /// but callable in a `const` context (which range indexing is not).
123
    pub const fn try_from_utf8_manual_slice(
130,831✔
124
        bytes: &[u8],
125
        start: usize,
126
        end: usize,
127
    ) -> Result<Self, TinyStrError> {
128
        Self::try_from_utf8_inner(bytes, start, end, false)
130,831✔
129
    }
130,831✔
130

131
    /// Equivalent to [`try_from_utf16(bytes[start..end])`](Self::try_from_utf16),
132
    /// but callable in a `const` context (which range indexing is not).
133
    pub const fn try_from_utf16_manual_slice(
134
        code_points: &[u16],
135
        start: usize,
136
        end: usize,
137
    ) -> Result<Self, TinyStrError> {
138
        Self::try_from_utf16_inner(code_points, start, end, false)
139
    }
140

141
    #[inline]
142
    pub(crate) const fn try_from_utf8_inner(
207,502✔
143
        bytes: &[u8],
144
        start: usize,
145
        end: usize,
146
        allow_trailing_null: bool,
147
    ) -> Result<Self, TinyStrError> {
148
        let len = end - start;
207,502✔
149
        if len > N {
207,502✔
150
            return Err(TinyStrError::TooLarge { max: N, len });
4,019✔
151
        }
152

153
        let mut out = [0; N];
203,483✔
154
        let mut i = 0;
203,483✔
155
        let mut found_null = false;
203,483✔
156
        // Indexing is protected by TinyStrError::TooLarge
157
        #[allow(clippy::indexing_slicing)]
158
        while i < len {
894,489✔
159
            let b = bytes[start + i];
691,007✔
160

161
            if b == 0 {
691,007✔
162
                found_null = true;
40,313✔
163
            } else if b >= 0x80 {
650,694✔
164
                return Err(TinyStrError::NonAscii);
1✔
165
            } else if found_null {
650,693✔
166
                // Error if there are contentful bytes after null
167
                return Err(TinyStrError::ContainsNull);
×
168
            }
169
            out[i] = b;
691,006✔
170

171
            i += 1;
691,006✔
172
        }
173

174
        if !allow_trailing_null && found_null {
203,482✔
175
            // We found some trailing nulls, error
176
            return Err(TinyStrError::ContainsNull);
×
177
        }
178

179
        Ok(Self {
203,480✔
180
            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
181
            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
203,480✔
182
        })
183
    }
207,500✔
184

185
    #[inline]
186
    pub(crate) const fn try_from_utf16_inner(
5,267✔
187
        code_points: &[u16],
188
        start: usize,
189
        end: usize,
190
        allow_trailing_null: bool,
191
    ) -> Result<Self, TinyStrError> {
192
        let len = end - start;
5,267✔
193
        if len > N {
5,267✔
NEW
194
            return Err(TinyStrError::TooLarge { max: N, len });
×
195
        }
196

197
        let mut out = [0; N];
5,267✔
198
        let mut i = 0;
5,267✔
199
        let mut found_null = false;
5,267✔
200
        // Indexing is protected by TinyStrError::TooLarge
201
        #[allow(clippy::indexing_slicing)]
202
        while i < len {
27,446✔
203
            let b = code_points[start + i];
22,179✔
204

205
            if b == 0 {
22,179✔
NEW
206
                found_null = true;
×
207
            } else if b >= 0x80 {
22,179✔
NEW
208
                return Err(TinyStrError::NonAscii);
×
209
            } else if found_null {
22,179✔
210
                // Error if there are contentful bytes after null
NEW
211
                return Err(TinyStrError::ContainsNull);
×
212
            }
213
            out[i] = b as u8;
22,179✔
214

215
            i += 1;
22,179✔
216
        }
217

218
        if !allow_trailing_null && found_null {
5,267✔
219
            // We found some trailing nulls, error
NEW
220
            return Err(TinyStrError::ContainsNull);
×
221
        }
222

223
        Ok(Self {
5,267✔
224
            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
225
            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
5,267✔
226
        })
227
    }
5,267✔
228

229
    #[inline]
230
    pub const fn try_from_str(s: &str) -> Result<Self, TinyStrError> {
28,842✔
231
        Self::try_from_utf8_inner(s.as_bytes(), 0, s.len(), false)
28,842✔
232
    }
28,842✔
233

234
    #[inline]
235
    pub const fn as_str(&self) -> &str {
664,314✔
236
        // as_utf8 is valid utf8
237
        unsafe { str::from_utf8_unchecked(self.as_utf8()) }
664,314✔
238
    }
664,314✔
239

240
    #[inline]
241
    #[must_use]
242
    pub const fn len(&self) -> usize {
769,472✔
243
        if N <= 4 {
769,472✔
244
            Aligned4::from_ascii_bytes(&self.bytes).len()
561,250✔
245
        } else if N <= 8 {
386,428✔
246
            Aligned8::from_ascii_bytes(&self.bytes).len()
30,016✔
247
        } else {
248
            let mut i = 0;
178,206✔
249
            #[allow(clippy::indexing_slicing)] // < N is safe
250
            while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 {
809,952✔
251
                i += 1
631,746✔
252
            }
253
            i
178,206✔
254
        }
255
    }
769,472✔
256

257
    #[inline]
258
    #[must_use]
259
    pub const fn is_empty(&self) -> bool {
4✔
260
        self.bytes[0] as u8 == AsciiByte::B0 as u8
4✔
261
    }
4✔
262

263
    #[inline]
264
    #[must_use]
265
    pub const fn as_utf8(&self) -> &[u8] {
664,286✔
266
        // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`,
267
        // and changing the length of that slice to self.len() < N is safe.
268
        unsafe {
269
            core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len())
664,286✔
270
        }
271
    }
664,286✔
272

273
    #[inline]
274
    #[must_use]
275
    pub const fn all_bytes(&self) -> &[u8; N] {
66,047✔
276
        // SAFETY: `self.bytes` has same size as [u8; N]
277
        unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) }
66,047✔
278
    }
66,047✔
279

280
    #[inline]
281
    #[must_use]
282
    /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`.
283
    ///
284
    /// If `M < len()` the string gets truncated, otherwise only the
285
    /// memory representation changes.
286
    pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> {
8,223✔
287
        let mut bytes = [0; M];
8,223✔
288
        let mut i = 0;
8,223✔
289
        // Indexing is protected by the loop guard
290
        #[allow(clippy::indexing_slicing)]
291
        while i < M && i < N {
72,751✔
292
            bytes[i] = self.bytes[i] as u8;
64,528✔
293
            i += 1;
64,528✔
294
        }
295
        // `self.bytes` only contains ASCII bytes, with no null bytes between
296
        // ASCII characters, so this also holds for `bytes`.
297
        unsafe { TinyAsciiStr::from_utf8_unchecked(bytes) }
8,223✔
298
    }
8,223✔
299

300
    /// # Safety
301
    /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes
302
    /// between ASCII characters
303
    #[must_use]
304
    pub const unsafe fn from_utf8_unchecked(bytes: [u8; N]) -> Self {
8,224✔
305
        Self {
8,224✔
306
            bytes: AsciiByte::to_ascii_byte_array(&bytes),
8,224✔
307
        }
308
    }
8,224✔
309
}
310

311
macro_rules! check_is {
312
    ($self:ident, $check_int:ident, $check_u8:ident) => {
313
        if N <= 4 {
314
            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
315
        } else if N <= 8 {
316
            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
317
        } else {
318
            let mut i = 0;
319
            // Won't panic because self.bytes has length N
320
            #[allow(clippy::indexing_slicing)]
321
            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
322
                if !($self.bytes[i] as u8).$check_u8() {
323
                    return false;
324
                }
325
                i += 1;
326
            }
327
            true
328
        }
329
    };
330
    ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => {
331
        if N <= 4 {
332
            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
333
        } else if N <= 8 {
334
            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
335
        } else {
336
            // Won't panic because N is > 8
337
            if ($self.bytes[0] as u8).$check_u8_0_inv() {
338
                return false;
339
            }
340
            let mut i = 1;
341
            // Won't panic because self.bytes has length N
342
            #[allow(clippy::indexing_slicing)]
343
            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
344
                if ($self.bytes[i] as u8).$check_u8_1_inv() {
345
                    return false;
346
                }
347
                i += 1;
348
            }
349
            true
350
        }
351
    };
352
    ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => {
353
        if N <= 4 {
354
            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
355
        } else if N <= 8 {
356
            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
357
        } else {
358
            // Won't panic because N is > 8
359
            if !($self.bytes[0] as u8).$check_u8_0_inv() {
360
                return false;
361
            }
362
            let mut i = 1;
363
            // Won't panic because self.bytes has length N
364
            #[allow(clippy::indexing_slicing)]
365
            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
366
                if !($self.bytes[i] as u8).$check_u8_1_inv() {
367
                    return false;
368
                }
369
                i += 1;
370
            }
371
            true
372
        }
373
    };
374
}
375

376
impl<const N: usize> TinyAsciiStr<N> {
377
    /// Checks if the value is composed of ASCII alphabetic characters:
378
    ///
379
    ///  * U+0041 'A' ..= U+005A 'Z', or
380
    ///  * U+0061 'a' ..= U+007A 'z'.
381
    ///
382
    /// # Examples
383
    ///
384
    /// ```
385
    /// use tinystr::TinyAsciiStr;
386
    ///
387
    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
388
    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
389
    ///
390
    /// assert!(s1.is_ascii_alphabetic());
391
    /// assert!(!s2.is_ascii_alphabetic());
392
    /// ```
393
    #[inline]
394
    #[must_use]
395
    pub const fn is_ascii_alphabetic(&self) -> bool {
107,201✔
396
        check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic)
107,487✔
397
    }
107,201✔
398

399
    /// Checks if the value is composed of ASCII alphanumeric characters:
400
    ///
401
    ///  * U+0041 'A' ..= U+005A 'Z', or
402
    ///  * U+0061 'a' ..= U+007A 'z', or
403
    ///  * U+0030 '0' ..= U+0039 '9'.
404
    ///
405
    /// # Examples
406
    ///
407
    /// ```
408
    /// use tinystr::TinyAsciiStr;
409
    ///
410
    /// let s1: TinyAsciiStr<4> = "A15b".parse().expect("Failed to parse.");
411
    /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect("Failed to parse.");
412
    ///
413
    /// assert!(s1.is_ascii_alphanumeric());
414
    /// assert!(!s2.is_ascii_alphanumeric());
415
    /// ```
416
    #[inline]
417
    #[must_use]
418
    pub const fn is_ascii_alphanumeric(&self) -> bool {
4,698✔
419
        check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric)
5,106✔
420
    }
4,698✔
421

422
    /// Checks if the value is composed of ASCII decimal digits:
423
    ///
424
    ///  * U+0030 '0' ..= U+0039 '9'.
425
    ///
426
    /// # Examples
427
    ///
428
    /// ```
429
    /// use tinystr::TinyAsciiStr;
430
    ///
431
    /// let s1: TinyAsciiStr<4> = "312".parse().expect("Failed to parse.");
432
    /// let s2: TinyAsciiStr<4> = "3d".parse().expect("Failed to parse.");
433
    ///
434
    /// assert!(s1.is_ascii_numeric());
435
    /// assert!(!s2.is_ascii_numeric());
436
    /// ```
437
    #[inline]
438
    #[must_use]
439
    pub const fn is_ascii_numeric(&self) -> bool {
21,666✔
440
        check_is!(self, is_ascii_numeric, is_ascii_digit)
21,708✔
441
    }
21,666✔
442

443
    /// Checks if the value is in ASCII lower case.
444
    ///
445
    /// All letter characters are checked for case. Non-letter characters are ignored.
446
    ///
447
    /// # Examples
448
    ///
449
    /// ```
450
    /// use tinystr::TinyAsciiStr;
451
    ///
452
    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
453
    /// let s2: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
454
    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
455
    ///
456
    /// assert!(!s1.is_ascii_lowercase());
457
    /// assert!(s2.is_ascii_lowercase());
458
    /// assert!(s3.is_ascii_lowercase());
459
    /// ```
460
    #[inline]
461
    #[must_use]
462
    pub const fn is_ascii_lowercase(&self) -> bool {
889✔
463
        check_is!(
1,349✔
464
            self,
465
            is_ascii_lowercase,
466
            !is_ascii_uppercase,
467
            !is_ascii_uppercase
468
        )
469
    }
889✔
470

471
    /// Checks if the value is in ASCII title case.
472
    ///
473
    /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase.
474
    /// Non-letter characters are ignored.
475
    ///
476
    /// # Examples
477
    ///
478
    /// ```
479
    /// use tinystr::TinyAsciiStr;
480
    ///
481
    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
482
    /// let s2: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
483
    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
484
    ///
485
    /// assert!(!s1.is_ascii_titlecase());
486
    /// assert!(s2.is_ascii_titlecase());
487
    /// assert!(s3.is_ascii_titlecase());
488
    /// ```
489
    #[inline]
490
    #[must_use]
491
    pub const fn is_ascii_titlecase(&self) -> bool {
880✔
492
        check_is!(
1,308✔
493
            self,
494
            is_ascii_titlecase,
495
            !is_ascii_lowercase,
496
            !is_ascii_uppercase
497
        )
498
    }
880✔
499

500
    /// Checks if the value is in ASCII upper case.
501
    ///
502
    /// All letter characters are checked for case. Non-letter characters are ignored.
503
    ///
504
    /// # Examples
505
    ///
506
    /// ```
507
    /// use tinystr::TinyAsciiStr;
508
    ///
509
    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
510
    /// let s2: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
511
    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
512
    ///
513
    /// assert!(!s1.is_ascii_uppercase());
514
    /// assert!(s2.is_ascii_uppercase());
515
    /// assert!(!s3.is_ascii_uppercase());
516
    /// ```
517
    #[inline]
518
    #[must_use]
519
    pub const fn is_ascii_uppercase(&self) -> bool {
880✔
520
        check_is!(
1,336✔
521
            self,
522
            is_ascii_uppercase,
523
            !is_ascii_lowercase,
524
            !is_ascii_lowercase
525
        )
526
    }
880✔
527

528
    /// Checks if the value is composed of ASCII alphabetic lower case characters:
529
    ///
530
    ///  * U+0061 'a' ..= U+007A 'z',
531
    ///
532
    /// # Examples
533
    ///
534
    /// ```
535
    /// use tinystr::TinyAsciiStr;
536
    ///
537
    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
538
    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
539
    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
540
    /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
541
    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
542
    ///
543
    /// assert!(!s1.is_ascii_alphabetic_lowercase());
544
    /// assert!(!s2.is_ascii_alphabetic_lowercase());
545
    /// assert!(!s3.is_ascii_alphabetic_lowercase());
546
    /// assert!(s4.is_ascii_alphabetic_lowercase());
547
    /// assert!(!s5.is_ascii_alphabetic_lowercase());
548
    /// ```
549
    #[inline]
550
    #[must_use]
551
    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
2,394✔
552
        check_is!(
2,446✔
553
            self,
554
            is_ascii_alphabetic_lowercase,
555
            is_ascii_lowercase,
556
            is_ascii_lowercase
557
        )
558
    }
2,394✔
559

560
    /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase.
561
    ///
562
    /// # Examples
563
    ///
564
    /// ```
565
    /// use tinystr::TinyAsciiStr;
566
    ///
567
    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
568
    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
569
    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
570
    /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
571
    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
572
    ///
573
    /// assert!(s1.is_ascii_alphabetic_titlecase());
574
    /// assert!(!s2.is_ascii_alphabetic_titlecase());
575
    /// assert!(!s3.is_ascii_alphabetic_titlecase());
576
    /// assert!(!s4.is_ascii_alphabetic_titlecase());
577
    /// assert!(!s5.is_ascii_alphabetic_titlecase());
578
    /// ```
579
    #[inline]
580
    #[must_use]
581
    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
8,976✔
582
        check_is!(
9,020✔
583
            self,
584
            is_ascii_alphabetic_titlecase,
585
            is_ascii_uppercase,
586
            is_ascii_lowercase
587
        )
588
    }
8,976✔
589

590
    /// Checks if the value is composed of ASCII alphabetic upper case characters:
591
    ///
592
    ///  * U+0041 'A' ..= U+005A 'Z',
593
    ///
594
    /// # Examples
595
    ///
596
    /// ```
597
    /// use tinystr::TinyAsciiStr;
598
    ///
599
    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
600
    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
601
    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
602
    /// let s4: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
603
    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
604
    ///
605
    /// assert!(!s1.is_ascii_alphabetic_uppercase());
606
    /// assert!(!s2.is_ascii_alphabetic_uppercase());
607
    /// assert!(!s3.is_ascii_alphabetic_uppercase());
608
    /// assert!(s4.is_ascii_alphabetic_uppercase());
609
    /// assert!(!s5.is_ascii_alphabetic_uppercase());
610
    /// ```
611
    #[inline]
612
    #[must_use]
613
    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
9,920✔
614
        check_is!(
9,948✔
615
            self,
616
            is_ascii_alphabetic_uppercase,
617
            is_ascii_uppercase,
618
            is_ascii_uppercase
619
        )
620
    }
9,920✔
621
}
622

623
macro_rules! to {
624
    ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{
625
        let mut i = 0;
626
        if N <= 4 {
627
            let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
628
            // Won't panic because self.bytes has length N and aligned has length >= N
629
            #[allow(clippy::indexing_slicing)]
630
            while i < N {
631
                $self.bytes[i] = aligned[i];
632
                i += 1;
633
            }
634
        } else if N <= 8 {
635
            let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
636
            // Won't panic because self.bytes has length N and aligned has length >= N
637
            #[allow(clippy::indexing_slicing)]
638
            while i < N {
639
                $self.bytes[i] = aligned[i];
640
                i += 1;
641
            }
642
        } else {
643
            // Won't panic because self.bytes has length N
644
            #[allow(clippy::indexing_slicing)]
645
            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
646
                // SAFETY: AsciiByte is repr(u8) and has same size as u8
647
                unsafe {
648
                    $self.bytes[i] = core::mem::transmute::<u8, AsciiByte>(
649
                        ($self.bytes[i] as u8).$later_char_to()
650
                    );
651
                }
652
                i += 1;
653
            }
654
            // SAFETY: AsciiByte is repr(u8) and has same size as u8
655
            $(
656
                $self.bytes[0] = unsafe {
657
                    core::mem::transmute::<u8, AsciiByte>(($self.bytes[0] as u8).$first_char_to())
658
                };
659
            )?
660
        }
661
        $self
662
    }};
663
}
664

665
impl<const N: usize> TinyAsciiStr<N> {
666
    /// Converts this type to its ASCII lower case equivalent in-place.
667
    ///
668
    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged.
669
    ///
670
    /// # Examples
671
    ///
672
    /// ```
673
    /// use tinystr::TinyAsciiStr;
674
    ///
675
    /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect("Failed to parse.");
676
    ///
677
    /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3");
678
    /// ```
679
    #[inline]
680
    #[must_use]
681
    pub const fn to_ascii_lowercase(mut self) -> Self {
65,082✔
682
        to!(self, to_ascii_lowercase, to_ascii_lowercase)
283,082✔
683
    }
65,084✔
684

685
    /// Converts this type to its ASCII title case equivalent in-place.
686
    ///
687
    /// The first character is converted to ASCII uppercase; the remaining characters
688
    /// are converted to ASCII lowercase.
689
    ///
690
    /// # Examples
691
    ///
692
    /// ```
693
    /// use tinystr::TinyAsciiStr;
694
    ///
695
    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
696
    ///
697
    /// assert_eq!(&*s1.to_ascii_titlecase(), "Test");
698
    /// ```
699
    #[inline]
700
    #[must_use]
701
    pub const fn to_ascii_titlecase(mut self) -> Self {
32,234✔
702
        to!(
162,570✔
703
            self,
704
            to_ascii_titlecase,
705
            to_ascii_lowercase,
706
            to_ascii_uppercase
707
        )
708
    }
32,234✔
709

710
    /// Converts this type to its ASCII upper case equivalent in-place.
711
    ///
712
    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged.
713
    ///
714
    /// # Examples
715
    ///
716
    /// ```
717
    /// use tinystr::TinyAsciiStr;
718
    ///
719
    /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect("Failed to parse.");
720
    ///
721
    /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3");
722
    /// ```
723
    #[inline]
724
    #[must_use]
725
    pub const fn to_ascii_uppercase(mut self) -> Self {
19,153✔
726
        to!(self, to_ascii_uppercase, to_ascii_uppercase)
79,917✔
727
    }
19,153✔
728
}
729

730
impl<const N: usize> fmt::Debug for TinyAsciiStr<N> {
731
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
×
732
        fmt::Debug::fmt(self.as_str(), f)
×
733
    }
×
734
}
735

736
impl<const N: usize> fmt::Display for TinyAsciiStr<N> {
737
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
5,436✔
738
        fmt::Display::fmt(self.as_str(), f)
5,436✔
739
    }
5,436✔
740
}
741

742
impl<const N: usize> Deref for TinyAsciiStr<N> {
743
    type Target = str;
744
    #[inline]
745
    fn deref(&self) -> &str {
47,387✔
746
        self.as_str()
47,387✔
747
    }
47,387✔
748
}
749

750
impl<const N: usize> FromStr for TinyAsciiStr<N> {
751
    type Err = TinyStrError;
752
    #[inline]
753
    fn from_str(s: &str) -> Result<Self, Self::Err> {
21,114✔
754
        Self::try_from_str(s)
21,114✔
755
    }
21,114✔
756
}
757

758
impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> {
759
    fn eq(&self, other: &str) -> bool {
760
        self.deref() == other
761
    }
762
}
763

764
impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> {
765
    fn eq(&self, other: &&str) -> bool {
31,308✔
766
        self.deref() == *other
31,308✔
767
    }
31,308✔
768
}
769

770
#[cfg(feature = "alloc")]
771
impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> {
772
    fn eq(&self, other: &alloc::string::String) -> bool {
773
        self.deref() == other.deref()
774
    }
775
}
776

777
#[cfg(feature = "alloc")]
778
impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String {
779
    fn eq(&self, other: &TinyAsciiStr<N>) -> bool {
30✔
780
        self.deref() == other.deref()
30✔
781
    }
30✔
782
}
783

784
#[cfg(test)]
785
mod test {
786
    use super::*;
787
    use rand::distributions::Distribution;
788
    use rand::distributions::Standard;
789
    use rand::rngs::SmallRng;
790
    use rand::seq::SliceRandom;
791
    use rand::SeedableRng;
792

793
    const STRINGS: [&str; 26] = [
794
        "Latn",
795
        "laTn",
796
        "windows",
797
        "AR",
798
        "Hans",
799
        "macos",
800
        "AT",
801
        "infiniband",
802
        "FR",
803
        "en",
804
        "Cyrl",
805
        "FromIntegral",
806
        "NO",
807
        "419",
808
        "MacintoshOSX2019",
809
        "a3z",
810
        "A3z",
811
        "A3Z",
812
        "a3Z",
813
        "3A",
814
        "3Z",
815
        "3a",
816
        "3z",
817
        "@@[`{",
818
        "UK",
819
        "E12",
820
    ];
821

822
    fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> {
70✔
823
        let mut rng = SmallRng::seed_from_u64(2022);
74✔
824
        // Need to do this in 2 steps since the RNG is needed twice
825
        let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap())
7,152✔
826
            .take(num_strings)
827
            .collect::<Vec<usize>>();
828
        string_lengths
70✔
829
            .iter()
830
            .map(|len| {
7,194✔
831
                Standard
14,244✔
832
                    .sample_iter(&mut rng)
7,122✔
833
                    .filter(|b: &u8| *b > 0 && *b < 0x80)
75,371✔
834
                    .take(*len)
7,122✔
835
                    .collect::<Vec<u8>>()
836
            })
7,122✔
837
            .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII"))
7,140✔
838
            .collect()
839
    }
68✔
840

841
    fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2)
18,216✔
842
    where
843
        F1: Fn(&str) -> T,
844
        F2: Fn(TinyAsciiStr<N>) -> T,
845
        T: core::fmt::Debug + core::cmp::PartialEq,
846
    {
847
        for s in STRINGS
18,288✔
848
            .into_iter()
849
            .map(str::to_owned)
72✔
850
            .chain(gen_strings(100, &[3, 4, 5, 8, 12]))
144✔
851
        {
852
            let t = match TinyAsciiStr::<N>::from_str(&s) {
9,072✔
853
                Ok(t) => t,
5,280✔
854
                Err(TinyStrError::TooLarge { .. }) => continue,
855
                Err(e) => panic!("{}", e),
×
856
            };
857
            let expected = reference_f(&s);
5,280✔
858
            let actual = tinystr_f(t);
5,280✔
859
            assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
5,280✔
860

861
            let s_utf16: Vec<u16> = s.encode_utf16().collect();
5,280✔
862
            let t = match TinyAsciiStr::<N>::try_from_utf16(&s_utf16) {
5,280✔
863
                Ok(t) => t,
5,280✔
864
                Err(TinyStrError::TooLarge { .. }) => continue,
NEW
865
                Err(e) => panic!("{}", e),
×
866
            };
867
            let expected = reference_f(&s);
5,280✔
868
            let actual = tinystr_f(t);
5,280✔
869
            assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
5,280✔
870
        }
9,072✔
871
    }
72✔
872

873
    #[test]
874
    fn test_is_ascii_alphabetic() {
2✔
875
        fn check<const N: usize>() {
6✔
876
            check_operation(
6✔
877
                |s| s.chars().all(|c| c.is_ascii_alphabetic()),
2,524✔
878
                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t),
880✔
879
            )
880
        }
6✔
881
        check::<2>();
1✔
882
        check::<3>();
1✔
883
        check::<4>();
1✔
884
        check::<5>();
1✔
885
        check::<8>();
1✔
886
        check::<16>();
1✔
887
    }
2✔
888

889
    #[test]
890
    fn test_is_ascii_alphanumeric() {
2✔
891
        fn check<const N: usize>() {
6✔
892
            check_operation(
6✔
893
                |s| s.chars().all(|c| c.is_ascii_alphanumeric()),
2,852✔
894
                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t),
880✔
895
            )
896
        }
6✔
897
        check::<2>();
1✔
898
        check::<3>();
1✔
899
        check::<4>();
1✔
900
        check::<5>();
1✔
901
        check::<8>();
1✔
902
        check::<16>();
1✔
903
    }
2✔
904

905
    #[test]
906
    fn test_is_ascii_numeric() {
2✔
907
        fn check<const N: usize>() {
6✔
908
            check_operation(
6✔
909
                |s| s.chars().all(|c| c.is_ascii_digit()),
1,916✔
910
                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t),
880✔
911
            )
912
        }
6✔
913
        check::<2>();
1✔
914
        check::<3>();
1✔
915
        check::<4>();
1✔
916
        check::<5>();
1✔
917
        check::<8>();
1✔
918
        check::<16>();
1✔
919
    }
2✔
920

921
    #[test]
922
    fn test_is_ascii_lowercase() {
2✔
923
        fn check<const N: usize>() {
6✔
924
            check_operation(
6✔
925
                |s| {
880✔
926
                    s == TinyAsciiStr::<16>::from_str(s)
880✔
927
                        .unwrap()
928
                        .to_ascii_lowercase()
929
                        .as_str()
930
                },
880✔
931
                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t),
880✔
932
            )
933
        }
6✔
934
        check::<2>();
1✔
935
        check::<3>();
1✔
936
        check::<4>();
1✔
937
        check::<5>();
1✔
938
        check::<8>();
1✔
939
        check::<16>();
1✔
940
    }
2✔
941

942
    #[test]
943
    fn test_is_ascii_titlecase() {
2✔
944
        fn check<const N: usize>() {
6✔
945
            check_operation(
6✔
946
                |s| {
880✔
947
                    s == TinyAsciiStr::<16>::from_str(s)
880✔
948
                        .unwrap()
949
                        .to_ascii_titlecase()
950
                        .as_str()
951
                },
880✔
952
                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t),
880✔
953
            )
954
        }
6✔
955
        check::<2>();
1✔
956
        check::<3>();
1✔
957
        check::<4>();
1✔
958
        check::<5>();
1✔
959
        check::<8>();
1✔
960
        check::<16>();
1✔
961
    }
2✔
962

963
    #[test]
964
    fn test_is_ascii_uppercase() {
2✔
965
        fn check<const N: usize>() {
6✔
966
            check_operation(
6✔
967
                |s| {
880✔
968
                    s == TinyAsciiStr::<16>::from_str(s)
880✔
969
                        .unwrap()
970
                        .to_ascii_uppercase()
971
                        .as_str()
972
                },
880✔
973
                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t),
880✔
974
            )
975
        }
6✔
976
        check::<2>();
1✔
977
        check::<3>();
1✔
978
        check::<4>();
1✔
979
        check::<5>();
1✔
980
        check::<8>();
1✔
981
        check::<16>();
1✔
982
    }
2✔
983

984
    #[test]
985
    fn test_is_ascii_alphabetic_lowercase() {
2✔
986
        fn check<const N: usize>() {
6✔
987
            check_operation(
6✔
988
                |s| {
880✔
989
                    // Check alphabetic
990
                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
2,524✔
991
                    // Check lowercase
992
                    s == TinyAsciiStr::<16>::from_str(s)
136✔
993
                        .unwrap()
994
                        .to_ascii_lowercase()
995
                        .as_str()
996
                },
880✔
997
                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t),
880✔
998
            )
999
        }
6✔
1000
        check::<2>();
1✔
1001
        check::<3>();
1✔
1002
        check::<4>();
1✔
1003
        check::<5>();
1✔
1004
        check::<8>();
1✔
1005
        check::<16>();
1✔
1006
    }
2✔
1007

1008
    #[test]
1009
    fn test_is_ascii_alphabetic_titlecase() {
2✔
1010
        fn check<const N: usize>() {
6✔
1011
            check_operation(
6✔
1012
                |s| {
880✔
1013
                    // Check alphabetic
1014
                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
2,524✔
1015
                    // Check titlecase
1016
                    s == TinyAsciiStr::<16>::from_str(s)
136✔
1017
                        .unwrap()
1018
                        .to_ascii_titlecase()
1019
                        .as_str()
1020
                },
880✔
1021
                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t),
880✔
1022
            )
1023
        }
6✔
1024
        check::<2>();
1✔
1025
        check::<3>();
1✔
1026
        check::<4>();
1✔
1027
        check::<5>();
1✔
1028
        check::<8>();
1✔
1029
        check::<16>();
1✔
1030
    }
2✔
1031

1032
    #[test]
1033
    fn test_is_ascii_alphabetic_uppercase() {
2✔
1034
        fn check<const N: usize>() {
6✔
1035
            check_operation(
6✔
1036
                |s| {
880✔
1037
                    // Check alphabetic
1038
                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
2,524✔
1039
                    // Check uppercase
1040
                    s == TinyAsciiStr::<16>::from_str(s)
136✔
1041
                        .unwrap()
1042
                        .to_ascii_uppercase()
1043
                        .as_str()
1044
                },
880✔
1045
                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t),
880✔
1046
            )
1047
        }
6✔
1048
        check::<2>();
1✔
1049
        check::<3>();
1✔
1050
        check::<4>();
1✔
1051
        check::<5>();
1✔
1052
        check::<8>();
1✔
1053
        check::<16>();
1✔
1054
    }
2✔
1055

1056
    #[test]
1057
    fn test_to_ascii_lowercase() {
2✔
1058
        fn check<const N: usize>() {
6✔
1059
            check_operation(
6✔
1060
                |s| {
880✔
1061
                    s.chars()
880✔
1062
                        .map(|c| c.to_ascii_lowercase())
3,720✔
1063
                        .collect::<String>()
1064
                },
880✔
1065
                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(),
880✔
1066
            )
1067
        }
6✔
1068
        check::<2>();
1✔
1069
        check::<3>();
1✔
1070
        check::<4>();
1✔
1071
        check::<5>();
1✔
1072
        check::<8>();
1✔
1073
        check::<16>();
1✔
1074
    }
2✔
1075

1076
    #[test]
1077
    fn test_to_ascii_titlecase() {
2✔
1078
        fn check<const N: usize>() {
6✔
1079
            check_operation(
6✔
1080
                |s| {
880✔
1081
                    let mut r = s
880✔
1082
                        .chars()
1083
                        .map(|c| c.to_ascii_lowercase())
3,720✔
1084
                        .collect::<String>();
1085
                    // Safe because the string is nonempty and an ASCII string
1086
                    unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() };
880✔
1087
                    r
880✔
1088
                },
880✔
1089
                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(),
880✔
1090
            )
1091
        }
6✔
1092
        check::<2>();
1✔
1093
        check::<3>();
1✔
1094
        check::<4>();
1✔
1095
        check::<5>();
1✔
1096
        check::<8>();
1✔
1097
        check::<16>();
1✔
1098
    }
2✔
1099

1100
    #[test]
1101
    fn test_to_ascii_uppercase() {
2✔
1102
        fn check<const N: usize>() {
6✔
1103
            check_operation(
6✔
1104
                |s| {
880✔
1105
                    s.chars()
880✔
1106
                        .map(|c| c.to_ascii_uppercase())
3,720✔
1107
                        .collect::<String>()
1108
                },
880✔
1109
                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(),
880✔
1110
            )
1111
        }
6✔
1112
        check::<2>();
1✔
1113
        check::<3>();
1✔
1114
        check::<4>();
1✔
1115
        check::<5>();
1✔
1116
        check::<8>();
1✔
1117
        check::<16>();
1✔
1118
    }
2✔
1119

1120
    #[test]
1121
    fn lossy_constructor() {
2✔
1122
        assert_eq!(TinyAsciiStr::<4>::from_utf8_lossy(b"", b'?').as_str(), "");
1✔
1123
        assert_eq!(
1✔
1124
            TinyAsciiStr::<4>::from_utf8_lossy(b"oh\0o", b'?').as_str(),
1✔
1125
            "oh?o"
1126
        );
1127
        assert_eq!(
1✔
1128
            TinyAsciiStr::<4>::from_utf8_lossy(b"\0", b'?').as_str(),
1✔
1129
            "?"
1130
        );
1131
        assert_eq!(
1✔
1132
            TinyAsciiStr::<4>::from_utf8_lossy(b"toolong", b'?').as_str(),
1✔
1133
            "tool"
1134
        );
1135
        assert_eq!(
1✔
1136
            TinyAsciiStr::<4>::from_utf8_lossy(&[b'a', 0x80, 0xFF, b'1'], b'?').as_str(),
1✔
1137
            "a??1"
1138
        );
1139
    }
2✔
1140
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc