• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 8219362155

08 Mar 2024 01:21PM UTC coverage: 75.985% (+3.0%) from 73.009%
8219362155

push

github

web-flow
Bump diplomat (#4671)

And fix some Dart renames

49581 of 65251 relevant lines covered (75.99%)

519628.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.79
/utils/pattern/src/parser/mod.rs
1
// This file is part of ICU4X. For terms of use, please see the file
25✔
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
pub mod error;
6
pub mod token;
7

8
use alloc::{borrow::Cow, vec, vec::Vec};
9
use core::{fmt::Debug, marker::PhantomData, str::FromStr};
10
pub use error::ParserError;
11
pub use token::ParsedPatternItem;
12

13
#[derive(PartialEq, Debug)]
×
14
enum ParserState {
15
    Default,
16
    Placeholder,
17
    QuotedLiteral,
18
    Apostrophe { quoted: bool },
×
19
}
20

21
impl Default for ParserState {
22
    fn default() -> Self {
40✔
23
        Self::Default
40✔
24
    }
40✔
25
}
26

27
macro_rules! handle_literal {
28
    ($self:ident, $quoted:expr, $next_state:expr) => {{
29
        let range = $self.advance_state($self.idx, $next_state);
30
        if !range.is_empty() {
31
            #[allow(clippy::indexing_slicing)]
32
            // TODO(#1668) Clippy exceptions need docs or fixing.
33
            return Ok(Some(ParsedPatternItem::Literal {
34
                content: Cow::Borrowed(&$self.input[range]),
35
                quoted: $quoted,
36
            }));
37
        } else {
38
            continue;
39
        }
40
    }};
41
}
42

43
/// Options passed to the constructor of [`Parser`].
44
///
45
/// ✨ *Enabled with the `alloc` Cargo feature.*
46
#[derive(Debug)]
×
47
#[non_exhaustive]
48
pub struct ParserOptions {
49
    /// Controls whether ASCII letters can appear in the raw
50
    /// pattern.
51
    ///
52
    /// If set to `true`, ASCII letters can be used directly in the pattern,
53
    /// like "{0} days".
54
    ///
55
    /// If set to `false`, ASCII letters can only appear in quoted literals,
56
    /// like "{0} 'days'".
57
    ///
58
    /// Default is `true`.
59
    pub allow_raw_letters: bool,
×
60
}
61

62
impl Default for ParserOptions {
63
    fn default() -> Self {
7✔
64
        Self {
7✔
65
            allow_raw_letters: true,
66
        }
67
    }
7✔
68
}
69

70
/// Placeholder pattern parser.
71
///
72
/// The parser allows for handling flexible range of generic patterns
73
/// with placeholders.
74
///
75
/// The [`Parser`] is generic over any placeholder which implements [`FromStr`]
76
/// allowing the consumer to parse placeholder patterns such as "{0}, {1}",
77
/// "{date}, {time}" or any other. A placeholder must be enclosed in `{` and `}`
78
/// characters in the input pattern string.
79
///
80
/// At the moment the parser is written as a custom fallible iterator.
81
///
82
/// ✨ *Enabled with the `alloc` Cargo feature.*
83
///
84
/// # Examples
85
///
86
/// ```
87
/// use icu_pattern::{ParsedPatternItem, Parser, ParserOptions};
88
///
89
/// let input = "{0}, {1}";
90
///
91
/// let mut parser = Parser::new(input, ParserOptions::default());
92
///
93
/// let mut result = vec![];
94
///
95
/// while let Some(element) =
96
///     parser.try_next().expect("Failed to advance iterator")
97
/// {
98
///     result.push(element);
99
/// }
100
///
101
/// assert_eq!(
102
///     result,
103
///     &[
104
///         ParsedPatternItem::Placeholder(0),
105
///         ParsedPatternItem::Literal {
106
///             content: ", ".into(),
107
///             quoted: false
108
///         },
109
///         ParsedPatternItem::Placeholder(1),
110
///     ]
111
/// );
112
/// ```
113
///
114
/// # Named placeholders
115
///
116
/// The parser is also capable of parsing different placeholder types such as strings.
117
///
118
/// ## Examples
119
/// ```
120
/// use icu_pattern::{ParsedPatternItem, Parser, ParserOptions};
121
///
122
/// let input = "{start}, {end}";
123
///
124
/// let mut parser = Parser::new(input, ParserOptions::default());
125
///
126
/// let mut result = vec![];
127
///
128
/// while let Some(element) =
129
///     parser.try_next().expect("Failed to advance iterator")
130
/// {
131
///     result.push(element);
132
/// }
133
///
134
/// assert_eq!(
135
///     result,
136
///     &[
137
///         ParsedPatternItem::Placeholder("start".to_owned()),
138
///         ParsedPatternItem::Literal {
139
///             content: ", ".into(),
140
///             quoted: false
141
///         },
142
///         ParsedPatternItem::Placeholder("end".to_owned()),
143
///     ]
144
/// );
145
/// ```
146
///
147
/// # Type parameters
148
///
149
/// - `P`: The type of the placeholder used as a key for the [`PlaceholderValueProvider`].
150
///
151
/// # Lifetimes
152
///
153
/// - `p`: The life time of an input string slice to be parsed.
154
///
155
/// # Design Decisions
156
///
157
/// The parser is written in an intentionally generic way to enable use against wide range
158
/// of potential placeholder pattern models and use cases.
159
///
160
/// Serveral design decisions have been made that the reader should be aware of when using the API.
161
///
162
/// ## Zero copy
163
///
164
/// The parser is intended for runtime use and is optimized for performance and low memory overhad.
165
///
166
/// Zero copy parsing is a model which allows the parser to produce tokens that are de-facto
167
/// slices of the input without ever having to modify the input or copy from it.
168
///
169
/// In case of ICU patterns that decision brings a trade-off around handling of quoted literals.
170
/// A parser that copies bytes from the input when generating the output can take a pattern literal
171
/// that contains a quoted portion and concatenace the parts, effectively generating a single
172
/// literal out of a series of syntactical literal quoted and unquoted nodes.
173
/// A zero copy parser sacrifices that convenience for marginal performance gains.
174
///
175
/// The rationale for the decision is that many placeholder patterns do not contain ASCII letters
176
/// and therefore can benefit from this design decision.
177
/// Secondly, even in scenarios where ASCII letters, or other quoted literals, are used, the
178
/// zero-copy design still maintains high performance, only increasing the number of tokens
179
/// returned by the parser, but without increase to allocations.
180
///
181
/// ### Examples
182
/// ```
183
/// use icu_pattern::{ParsedPatternItem, Parser, ParserOptions};
184
///
185
/// let input = "{0} 'and' {1}";
186
///
187
/// let mut parser = Parser::new(input, ParserOptions::default());
188
///
189
/// let mut result = vec![];
190
///
191
/// while let Some(element) =
192
///     parser.try_next().expect("Failed to advance iterator")
193
/// {
194
///     result.push(element);
195
/// }
196
///
197
/// assert_eq!(
198
///     result,
199
///     &[
200
///         ParsedPatternItem::Placeholder(0),
201
///         ParsedPatternItem::Literal {
202
///             content: " ".into(),
203
///             quoted: false
204
///         },
205
///         ParsedPatternItem::Literal {
206
///             content: "and".into(),
207
///             quoted: true
208
///         },
209
///         ParsedPatternItem::Literal {
210
///             content: " ".into(),
211
///             quoted: false
212
///         },
213
///         ParsedPatternItem::Placeholder(1),
214
///     ]
215
/// );
216
/// ```
217
///
218
/// ## Fallible Iterator
219
///
220
/// Rust providers a strong support for iterators and iterator combinators, which
221
/// fits very well into the design of this parser/interpolator model.
222
///
223
/// Unfortunately, Rust iterators at the moment are infallible, while parsers are inhereantely
224
/// fallible. As such, the decision has been made to design the API in line with what
225
/// we hope will become a trait signature of a fallible iterator in the future, rather
226
/// than implementing a reversed infallible iterator (where the [`Item`] would be
227
/// `Option<Result<Item>>`).
228
///
229
/// That decision impacts the ergonomics of operating on the parser, on one hand making
230
/// the fallible iteration more ergonomic, at a trade-off of losing access to the wide
231
/// range of Rust iterator traits.
232
///
233
/// ## Generic Placeholder
234
///
235
/// To handle generic placeholder design, the only constrain necessary in the parser
236
/// is that a placeholder must be parsed from a string slice.
237
/// At the moment of writing, Rust is [preparing to deprecate][`RFC 2924`] [`FromStr`] in favor of
238
/// [`TryFrom<&str>`][`TryFrom`].
239
/// Among many benfits of such transition would be the auto-trait behavior of [`From`] and
240
/// a [`TryFrom<&str>`][`TryFrom`] for [`&str`] allowing for placeholders to be [`&str`] themselves.
241
///
242
/// Unfortunately, at the moment [`TryFrom<&str>`][`TryFrom`] for [`usize`] is not implemented, which would
243
/// impact the core use case of placeholder patterns.
244
///
245
/// In result, the decision has been made to use [`FromStr`] for the time being, until
246
/// [`TryFrom<&str>`][`TryFrom`] gets implemented on all types that support [`FromStr`].
247
///
248
/// [`TR35 2.6.1]: https://unicode.org/reports/tr35/tr35-dates.html#dateTimeFormat
249
/// [`RFC 2924`]: https://github.com/rust-lang/rfcs/pull/2924
250
/// [`Item`]: core::iter::Iterator::Item
251
/// [`TryFrom`]: core::convert::TryFrom
252
/// [`PlaceholderValueProvider`]: crate::PlaceholderValueProvider
253
#[derive(Debug)]
254
pub struct Parser<'p, P> {
255
    input: &'p str,
256
    len: usize,
257

258
    allow_raw_letters: bool,
259

260
    start_idx: usize,
261
    idx: usize,
262

263
    state: ParserState,
264
    marker: PhantomData<P>,
265
}
266

267
impl<'p, P> Parser<'p, P> {
268
    /// Creates a new `Parser`.
269
    ///
270
    /// The `allow_raw_letters` controls whether the parser will support
271
    /// ASCII letters without quotes.
272
    ///
273
    /// # Examples
274
    /// ```
275
    /// use icu_pattern::{Parser, ParserOptions};
276
    /// let mut parser = Parser::<usize>::new("{0}, {1}", ParserOptions::default());
277
    /// ```
278
    pub fn new(input: &'p str, options: ParserOptions) -> Self {
19✔
279
        Self {
19✔
280
            input,
281
            len: input.len(),
19✔
282

283
            allow_raw_letters: options.allow_raw_letters,
284

285
            start_idx: 0,
286
            idx: 0,
287

288
            state: ParserState::default(),
19✔
289
            marker: PhantomData,
290
        }
291
    }
19✔
292

293
    /// An iterator method that advances the iterator and returns the result of an attempt to parse
294
    /// the next token.
295
    ///
296
    /// # Examples
297
    /// ```
298
    /// use icu_pattern::{ParsedPatternItem, Parser, ParserOptions};
299
    ///
300
    /// let mut parser = Parser::<usize>::new("{0}, {1}", ParserOptions::default());
301
    ///
302
    /// // A call to try_next() returns the next value…
303
    /// assert_eq!(
304
    ///     Ok(Some(ParsedPatternItem::Placeholder(0))),
305
    ///     parser.try_next()
306
    /// );
307
    /// assert_eq!(
308
    ///     Ok(Some(ParsedPatternItem::Literal {
309
    ///         content: ", ".into(),
310
    ///         quoted: false
311
    ///     })),
312
    ///     parser.try_next()
313
    /// );
314
    /// assert_eq!(
315
    ///     Ok(Some(ParsedPatternItem::Placeholder(1))),
316
    ///     parser.try_next()
317
    /// );
318
    ///
319
    /// // … and then `None` once it's over.
320
    /// assert_eq!(Ok(None), parser.try_next());
321
    /// ```
322
    pub fn try_next(
61✔
323
        &mut self,
324
    ) -> Result<Option<ParsedPatternItem<'p, P>>, ParserError<<P as FromStr>::Err>>
325
    where
326
        P: FromStr,
327
        P::Err: Debug,
328
    {
329
        while let Some(b) = self.input.as_bytes().get(self.idx) {
181✔
330
            match self.state {
163✔
331
                ParserState::Placeholder if *b == b'}' => {
43✔
332
                    let range = self.advance_state(self.idx, ParserState::Default);
17✔
333
                    #[allow(clippy::indexing_slicing)]
334
                    // TODO(#1668) Clippy exceptions need docs or fixing.
335
                    return self.input[range]
17✔
336
                        .parse()
337
                        .map(|ret| Some(ParsedPatternItem::Placeholder(ret)))
15✔
338
                        .map_err(ParserError::InvalidPlaceholder);
339
                }
340
                ParserState::QuotedLiteral if *b == b'\'' => {
42✔
341
                    if self.input.as_bytes().get(self.idx + 1) == Some(&b'\'') {
10✔
342
                        handle_literal!(self, true, ParserState::Apostrophe { quoted: true })
1✔
343
                    } else {
344
                        handle_literal!(self, true, ParserState::Default)
9✔
345
                    }
346
                }
347
                ParserState::Default if *b == b'{' => {
76✔
348
                    handle_literal!(self, false, ParserState::Placeholder)
20✔
349
                }
350
                ParserState::Default if *b == b'\'' => {
56✔
351
                    if self.input.as_bytes().get(self.idx + 1) == Some(&b'\'') {
11✔
352
                        handle_literal!(self, false, ParserState::Apostrophe { quoted: false })
1✔
353
                    } else {
354
                        handle_literal!(self, false, ParserState::QuotedLiteral)
10✔
355
                    }
356
                }
357
                ParserState::Default if !self.allow_raw_letters && b.is_ascii_alphabetic() => {
45✔
358
                    return Err(ParserError::IllegalCharacter(*b as char));
1✔
359
                }
360
                ParserState::Apostrophe { quoted } => {
2✔
361
                    self.start_idx -= 1;
2✔
362
                    if quoted {
2✔
363
                        handle_literal!(self, true, ParserState::QuotedLiteral)
1✔
364
                    } else {
365
                        handle_literal!(self, false, ParserState::Default)
1✔
366
                    }
367
                }
368
                _ => self.idx += 1,
102✔
369
            }
370
        }
371
        match self.state {
18✔
372
            ParserState::Placeholder => Err(ParserError::UnclosedPlaceholder),
3✔
373
            ParserState::QuotedLiteral => Err(ParserError::UnclosedQuotedLiteral),
1✔
374
            ParserState::Apostrophe { .. } => unreachable!(),
×
375
            ParserState::Default => {
376
                let range = self.start_idx..self.len;
14✔
377
                if !range.is_empty() {
14✔
378
                    self.start_idx = self.len;
3✔
379
                    #[allow(clippy::indexing_slicing)]
380
                    // TODO(#1668) Clippy exceptions need docs or fixing.
381
                    Ok(Some(ParsedPatternItem::Literal {
3✔
382
                        content: Cow::Borrowed(&self.input[range]),
3✔
383
                        quoted: false,
384
                    }))
385
                } else {
386
                    Ok(None)
11✔
387
                }
388
            }
389
        }
390
    }
61✔
391

392
    fn advance_state(&mut self, idx: usize, next_state: ParserState) -> core::ops::Range<usize> {
60✔
393
        let range = self.start_idx..idx;
60✔
394
        self.idx = idx + 1;
60✔
395
        self.start_idx = self.idx;
60✔
396
        self.state = next_state;
60✔
397
        range
398
    }
60✔
399

400
    /// Mutates this parser and collects all [`ParsedPatternItem`]s into a vector.
401
    pub fn try_collect_into_vec(
14✔
402
        mut self,
403
    ) -> Result<Vec<ParsedPatternItem<'p, P>>, ParserError<<P as FromStr>::Err>>
404
    where
405
        P: FromStr,
406
        P::Err: Debug,
407
    {
408
        let mut result = vec![];
14✔
409
        while let Some(token) = self.try_next()? {
46✔
410
            result.push(token);
32✔
411
        }
412
        Ok(result)
7✔
413
    }
14✔
414
}
415

416
#[cfg(test)]
417
mod tests {
418
    use super::*;
419
    use core::ops::Deref;
420

421
    #[test]
422
    fn pattern_parse_placeholders() {
2✔
423
        let samples = vec![
2✔
424
            ("{0}", vec![ParsedPatternItem::Placeholder(0)]),
1✔
425
            (
1✔
426
                "{0}{1}",
427
                vec![
1✔
428
                    ParsedPatternItem::Placeholder(0),
1✔
429
                    ParsedPatternItem::Placeholder(1),
1✔
430
                ],
431
            ),
432
            (
1✔
433
                "{0} 'at' {1}",
434
                vec![
2✔
435
                    ParsedPatternItem::Placeholder(0),
1✔
436
                    ParsedPatternItem::Literal {
1✔
437
                        content: " ".into(),
1✔
438
                        quoted: false,
439
                    },
440
                    ParsedPatternItem::Literal {
1✔
441
                        content: "at".into(),
1✔
442
                        quoted: true,
443
                    },
444
                    ParsedPatternItem::Literal {
1✔
445
                        content: " ".into(),
1✔
446
                        quoted: false,
447
                    },
448
                    ParsedPatternItem::Placeholder(1),
1✔
449
                ],
450
            ),
451
            (
1✔
452
                "{0}'at'{1}",
453
                vec![
2✔
454
                    ParsedPatternItem::Placeholder(0),
1✔
455
                    ParsedPatternItem::Literal {
1✔
456
                        content: "at".into(),
1✔
457
                        quoted: true,
458
                    },
459
                    ParsedPatternItem::Placeholder(1),
1✔
460
                ],
461
            ),
462
            (
1✔
463
                "'{0}' 'at' '{1}'",
464
                vec![
2✔
465
                    ParsedPatternItem::Literal {
1✔
466
                        content: "{0}".into(),
1✔
467
                        quoted: true,
468
                    },
469
                    ParsedPatternItem::Literal {
1✔
470
                        content: " ".into(),
1✔
471
                        quoted: false,
472
                    },
473
                    ParsedPatternItem::Literal {
1✔
474
                        content: "at".into(),
1✔
475
                        quoted: true,
476
                    },
477
                    ParsedPatternItem::Literal {
1✔
478
                        content: " ".into(),
1✔
479
                        quoted: false,
480
                    },
481
                    ParsedPatternItem::Literal {
1✔
482
                        content: "{1}".into(),
1✔
483
                        quoted: true,
484
                    },
485
                ],
486
            ),
487
            (
1✔
488
                "'PRE' {0} 'and' {1} 'POST'",
489
                vec![
2✔
490
                    ParsedPatternItem::Literal {
1✔
491
                        content: "PRE".into(),
1✔
492
                        quoted: true,
493
                    },
494
                    ParsedPatternItem::Literal {
1✔
495
                        content: " ".into(),
1✔
496
                        quoted: false,
497
                    },
498
                    ParsedPatternItem::Placeholder(0),
1✔
499
                    ParsedPatternItem::Literal {
1✔
500
                        content: " ".into(),
1✔
501
                        quoted: false,
502
                    },
503
                    ParsedPatternItem::Literal {
1✔
504
                        content: "and".into(),
1✔
505
                        quoted: true,
506
                    },
507
                    ParsedPatternItem::Literal {
1✔
508
                        content: " ".into(),
1✔
509
                        quoted: false,
510
                    },
511
                    ParsedPatternItem::Placeholder(1),
1✔
512
                    ParsedPatternItem::Literal {
1✔
513
                        content: " ".into(),
1✔
514
                        quoted: false,
515
                    },
516
                    ParsedPatternItem::Literal {
1✔
517
                        content: "POST".into(),
1✔
518
                        quoted: true,
519
                    },
520
                ],
521
            ),
522
            (
1✔
523
                "{0} o''clock and 'o''clock'",
524
                vec![
2✔
525
                    ParsedPatternItem::Placeholder(0),
1✔
526
                    ParsedPatternItem::Literal {
1✔
527
                        content: " o".into(),
1✔
528
                        quoted: false,
529
                    },
530
                    ParsedPatternItem::Literal {
1✔
531
                        content: "'".into(),
1✔
532
                        quoted: false,
533
                    },
534
                    ParsedPatternItem::Literal {
1✔
535
                        content: "clock and ".into(),
1✔
536
                        quoted: false,
537
                    },
538
                    ParsedPatternItem::Literal {
1✔
539
                        content: "o".into(),
1✔
540
                        quoted: true,
541
                    },
542
                    ParsedPatternItem::Literal {
1✔
543
                        content: "'".into(),
1✔
544
                        quoted: true,
545
                    },
546
                    ParsedPatternItem::Literal {
1✔
547
                        content: "clock".into(),
1✔
548
                        quoted: true,
549
                    },
550
                ],
551
            ),
552
        ];
553

554
        for (input, expected) in samples {
8✔
555
            let parser = Parser::new(
7✔
556
                input,
557
                ParserOptions {
7✔
558
                    allow_raw_letters: true,
559
                },
560
            );
561
            let result = parser
7✔
562
                .try_collect_into_vec()
563
                .expect("Failed to parse a pattern");
564
            assert_eq!(result.deref(), expected,);
7✔
565
        }
7✔
566

567
        let broken: Vec<(_, Option<ParserError<core::num::ParseIntError>>)> = vec![
2✔
568
            ("{", Some(ParserError::UnclosedPlaceholder)),
1✔
569
            ("{0", Some(ParserError::UnclosedPlaceholder)),
1✔
570
            ("{01", Some(ParserError::UnclosedPlaceholder)),
1✔
571
            (
1✔
572
                "{date}",
573
                // This should be:
574
                // ```
575
                // ParserError::InvalidPlaceholder(
576
                //     ParseIntError {
577
                //         kind: core::num::IntErrorKind::InvalidDigit
578
                //     }
579
                // ),
580
                // ```
581
                // Pending: https://github.com/rust-lang/rust/issues/22639
582
                //
583
                // Once that is fixed, we can stop using an `Option` here.
584
                None,
1✔
585
            ),
586
            ("{date} 'days'", None),
1✔
587
            ("'{00}", Some(ParserError::UnclosedQuotedLiteral)),
1✔
588
            ("d", Some(ParserError::IllegalCharacter('d'))),
1✔
589
        ];
590

591
        for (input, error) in broken {
8✔
592
            let parser = Parser::<usize>::new(
7✔
593
                input,
594
                ParserOptions {
7✔
595
                    allow_raw_letters: false,
596
                },
597
            );
598
            let result = parser.try_collect_into_vec();
7✔
599
            if let Some(error) = error {
7✔
600
                assert_eq!(result.expect_err("Should have failed."), error,);
5✔
601
            } else {
602
                assert!(result.is_err());
2✔
603
            }
604
        }
7✔
605
    }
2✔
606
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc