• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pomsky-lang / pomsky / 12166507628

04 Dec 2024 07:03PM UTC coverage: 80.471% (+0.009%) from 80.462%
12166507628

push

github

Aloso
refactor: improve parser

77 of 81 new or added lines in 3 files covered. (95.06%)

1 existing line in 1 file now uncovered.

4372 of 5433 relevant lines covered (80.47%)

395515.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.8
/pomsky-syntax/src/parse/parser_impl.rs
1
use std::collections::HashSet;
2

3
use intersection::Intersection;
4

5
use crate::{
6
    diagnose::{
7
        CharClassError, CharStringError, DeprecationWarning, NumberError, ParseWarningKind,
8
        RepetitionError,
9
    },
10
    error::{ParseError, ParseErrorKind as PEK},
11
    exprs::{negation::Negation, test::*, *},
12
    lexer::Token,
13
    Span,
14
};
15

16
use super::{helper, Parser};
17

18
type PResult<T> = Result<T, ParseError>;
19

20
const MAX_GROUP_NUMBER: u32 = 65_535;
21

22
impl<'i> Parser<'i> {
23
    pub(super) fn parse_modified(&mut self) -> PResult<Rule> {
803✔
24
        let mut stmts = Vec::new();
803✔
25

803✔
26
        let was_lazy = self.is_lazy;
803✔
27
        let was_unicode_aware = self.is_unicode_aware;
803✔
28

29
        loop {
30
            let Some(stmt) = self
884✔
31
                .parse_mode_modifier()?
884✔
32
                .try_or_else(|| self.parse_let())?
884✔
33
                .try_or_else(|| self.parse_test())?
878✔
34
            else {
35
                break;
793✔
36
            };
37

38
            match &stmt.0 {
7✔
39
                Stmt::Enable(BooleanSetting::Lazy, _) => self.is_lazy = true,
5✔
40
                Stmt::Enable(BooleanSetting::Unicode, _) => self.is_unicode_aware = true,
2✔
41
                Stmt::Disable(BooleanSetting::Lazy, _) => self.is_lazy = false,
1✔
42
                Stmt::Disable(BooleanSetting::Unicode, _) => self.is_unicode_aware = false,
16✔
43
                _ => {}
57✔
44
            }
45

46
            stmts.push(stmt);
81✔
47
        }
48

49
        self.recursion_start()?;
793✔
50
        let mut rule = self.parse_or()?;
791✔
51
        self.recursion_end();
483✔
52

483✔
53
        self.is_lazy = was_lazy;
483✔
54
        self.is_unicode_aware = was_unicode_aware;
483✔
55

483✔
56
        // TODO: This should not be part of the parser
483✔
57
        if stmts.len() > 1 {
483✔
58
            let mut set = HashSet::new();
10✔
59
            for (stmt, _) in &stmts {
32✔
60
                if let Stmt::Let(l) = stmt {
23✔
61
                    if set.contains(l.name()) {
19✔
62
                        return Err(PEK::LetBindingExists.at(l.name_span));
1✔
63
                    }
18✔
64
                    set.insert(l.name());
18✔
65
                }
4✔
66
            }
67
        }
473✔
68

69
        let span_end = rule.span();
482✔
70
        for (stmt, span) in stmts.into_iter().rev() {
482✔
71
            rule = Rule::StmtExpr(Box::new(StmtExpr::new(stmt, rule, span.join(span_end))));
78✔
72
        }
78✔
73

74
        Ok(rule)
482✔
75
    }
803✔
76

77
    fn parse_mode_modifier(&mut self) -> PResult<Option<(Stmt, Span)>> {
884✔
78
        let mode = if self.consume_reserved("enable") {
884✔
79
            true
7✔
80
        } else if self.consume_reserved("disable") {
877✔
81
            false
17✔
82
        } else {
83
            return Ok(None);
860✔
84
        };
85

86
        let span_start = self.last_span();
24✔
87
        let setting = if self.consume_reserved("lazy") {
24✔
88
            BooleanSetting::Lazy
6✔
89
        } else if self.consume_contextual_keyword("unicode") {
18✔
90
            BooleanSetting::Unicode
18✔
91
        } else {
92
            return Err(PEK::Expected("`lazy` or `unicode`").at(self.span()));
×
93
        };
94
        self.expect(Token::Semicolon)?;
24✔
95
        let span_end = self.last_span();
24✔
96
        let span = span_start.join(span_end);
24✔
97

98
        let stmt = if mode { Stmt::Enable(setting, span) } else { Stmt::Disable(setting, span) };
24✔
99

100
        Ok(Some((stmt, span)))
24✔
101
    }
884✔
102

103
    fn parse_let(&mut self) -> PResult<Option<(Stmt, Span)>> {
860✔
104
        if self.consume_reserved("let") {
860✔
105
            let span_start = self.last_span();
49✔
106
            let name_span = self.span();
49✔
107
            let name = self.expect_as(Token::Identifier).map_err(|e| {
49✔
108
                if self.is(Token::ReservedName) {
2✔
109
                    PEK::KeywordAfterLet(self.source_at(self.span()).to_owned()).at(e.span)
1✔
110
                } else {
111
                    e
1✔
112
                }
113
            })?;
49✔
114

115
            self.expect(Token::Equals)?;
47✔
116

117
            self.recursion_start()?;
46✔
118
            let rule = self.parse_or()?;
46✔
119
            self.recursion_end();
44✔
120

44✔
121
            self.expect(Token::Semicolon)
44✔
122
                .map_err(|p| PEK::Expected("expression or `;`").at(p.span))?;
44✔
123
            let span_end = self.last_span();
43✔
124

43✔
125
            Ok(Some((Stmt::Let(Let::new(name, rule, name_span)), span_start.join(span_end))))
43✔
126
        } else {
127
            Ok(None)
811✔
128
        }
129
    }
860✔
130

131
    fn parse_test(&mut self) -> PResult<Option<(Stmt, Span)>> {
811✔
132
        if self.consume_reserved("test") {
811✔
133
            let span_start = self.last_span();
18✔
134
            self.expect(Token::OpenBrace)?;
18✔
135

136
            let mut cases = Vec::new();
18✔
137
            while let Some(case) = self.parse_test_cases()? {
48✔
138
                cases.push(case);
30✔
139
            }
30✔
140

141
            self.expect(Token::CloseBrace)?;
14✔
142
            let span_end = self.last_span();
14✔
143
            let span = span_start.join(span_end);
14✔
144

14✔
145
            Ok(Some((Stmt::Test(Test { cases, span }), span)))
14✔
146
        } else {
147
            Ok(None)
793✔
148
        }
149
    }
811✔
150

151
    fn parse_test_cases(&mut self) -> PResult<Option<TestCase>> {
48✔
152
        if self.consume_contextual_keyword("match") {
48✔
153
            let mut matches = Vec::new();
22✔
154
            let mut literal = None;
22✔
155

22✔
156
            if self.peek() != Some((Token::Identifier, "in")) {
22✔
157
                matches.push(self.parse_test_match()?);
20✔
158
                while self.consume(Token::Comma) {
23✔
159
                    matches.push(self.parse_test_match()?);
4✔
160
                }
161
            }
2✔
162

163
            if self.consume_contextual_keyword("in") {
21✔
164
                literal = self.parse_literal()?;
9✔
165
                if literal.is_none() {
9✔
166
                    return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
167
                };
9✔
168
            }
12✔
169
            self.expect(Token::Semicolon)?;
21✔
170

171
            if let Some(literal) = literal {
20✔
172
                Ok(Some(TestCase::MatchAll(TestCaseMatchAll { literal, matches })))
9✔
173
            } else if matches.len() > 1 {
11✔
174
                let span = matches[0].span.join(matches.last().unwrap().span);
1✔
175
                Err(PEK::MultipleStringsInTestCase.at(span))
1✔
176
            } else {
177
                let match_ = matches.pop().unwrap();
10✔
178
                Ok(Some(TestCase::Match(match_)))
10✔
179
            }
180
        } else if self.consume_contextual_keyword("reject") {
26✔
181
            let as_substring = self.consume_contextual_keyword("in");
12✔
182

183
            let Some(literal) = self.parse_literal()? else {
12✔
184
                return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
185
            };
186

187
            self.expect(Token::Semicolon)?;
12✔
188

189
            Ok(Some(TestCase::Reject(TestCaseReject { literal, as_substring })))
11✔
190
        } else {
191
            Ok(None)
14✔
192
        }
193
    }
48✔
194

195
    fn parse_test_match(&mut self) -> PResult<TestCaseMatch> {
24✔
196
        let Some(literal) = self.parse_literal()? else {
24✔
197
            return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
198
        };
199
        let span_start = self.last_span();
24✔
200

24✔
201
        let mut captures = Vec::new();
24✔
202

24✔
203
        if self.consume_contextual_keyword("as") {
24✔
204
            self.expect(Token::OpenBrace)?;
10✔
205

206
            let mut is_first = true;
10✔
207
            loop {
208
                if !is_first && !self.consume(Token::Comma) {
20✔
209
                    break;
7✔
210
                }
13✔
211
                let Some(capture) = self.parse_test_capture()? else {
13✔
212
                    break;
3✔
213
                };
214
                captures.push(capture);
10✔
215
                is_first = false;
10✔
216
            }
217

218
            self.expect(Token::CloseBrace)?;
10✔
219
        }
14✔
220

221
        let span_end = self.last_span();
23✔
222
        Ok(TestCaseMatch { literal, captures, span: span_start.join(span_end) })
23✔
223
    }
24✔
224

225
    fn parse_test_capture(&mut self) -> PResult<Option<TestCapture>> {
13✔
226
        let ident = if let Some(n) = self.consume_number(u16::MAX)? {
13✔
227
            CaptureIdent::Index(n)
6✔
228
        } else if let Some(name) = self.consume_as(Token::Identifier) {
7✔
229
            CaptureIdent::Name(name.to_string())
4✔
230
        } else {
231
            return Ok(None);
3✔
232
        };
233
        let ident_span = self.last_span();
10✔
234

10✔
235
        self.expect(Token::Colon)?;
10✔
236
        let Some(literal) = self.parse_literal()? else {
10✔
237
            return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
238
        };
239
        Ok(Some(TestCapture { ident, ident_span, literal }))
10✔
240
    }
13✔
241

242
    fn parse_or(&mut self) -> PResult<Rule> {
837✔
243
        let mut span = self.span();
837✔
244
        let leading_pipe = self.consume(Token::Pipe);
837✔
245

837✔
246
        let mut alts = Vec::new();
837✔
247
        let Some(first_alt) = self.parse_and()? else {
837✔
248
            if leading_pipe {
37✔
249
                return Err(PEK::LonePipe.at(span));
6✔
250
            } else {
251
                return Ok(Rule::Literal(Literal::new("".to_string(), Span::default())));
31✔
252
            }
253
        };
254
        alts.push(first_alt);
496✔
255

256
        while self.consume(Token::Pipe) {
534✔
257
            let Some(next_alt) = self.parse_and()? else {
38✔
NEW
258
                return Err(PEK::LonePipe.at(self.last_span()));
×
259
            };
260
            span = span.join(next_alt.span());
38✔
261
            alts.push(next_alt);
38✔
262
        }
263

264
        if alts.len() == 1 {
496✔
265
            Ok(alts.pop().unwrap())
470✔
266
        } else {
267
            Ok(Rule::Alternation(Alternation { rules: alts, span }))
26✔
268
        }
269
    }
837✔
270

271
    fn parse_and(&mut self) -> PResult<Option<Rule>> {
875✔
272
        let span_start = self.span();
875✔
273
        let has_leading_ampersand = self.consume(Token::Ampersand);
875✔
274

275
        let Some(first_sequence) = self.parse_sequence()? else {
875✔
276
            if has_leading_ampersand {
37✔
277
                return Err(PEK::Expected("expression").at(self.span()));
×
278
            }
37✔
279
            return Ok(None);
37✔
280
        };
281
        if !self.is(Token::Ampersand) {
534✔
282
            return Ok(Some(first_sequence));
527✔
283
        }
7✔
284

7✔
285
        let mut rules = Vec::with_capacity(2);
7✔
286
        rules.push(first_sequence);
7✔
287
        loop {
288
            if !self.consume(Token::Ampersand) {
14✔
289
                let span = span_start.join(self.last_span());
7✔
290
                return Ok(Some(Rule::Intersection(Intersection { rules, span })));
7✔
291
            }
7✔
292

293
            let Some(next_sequence) = self.parse_sequence()? else {
7✔
294
                return Err(PEK::Expected("expression").at(self.span()));
×
295
            };
296
            rules.push(next_sequence);
7✔
297
        }
298
    }
875✔
299

300
    fn parse_sequence(&mut self) -> PResult<Option<Rule>> {
882✔
301
        let mut fixes = Vec::new();
882✔
302
        while let Some(fix) = self.parse_fixes()? {
1,980✔
303
            fixes.push(fix);
1,098✔
304
        }
1,098✔
305

306
        Ok(if fixes.is_empty() {
578✔
307
            None
37✔
308
        } else if fixes.len() == 1 {
541✔
309
            Some(fixes.pop().unwrap())
405✔
310
        } else {
311
            let start = fixes.first().map(Rule::span).unwrap_or_default();
136✔
312
            let end = fixes.last().map(Rule::span).unwrap_or_default();
136✔
313
            let span = start.join(end);
136✔
314

136✔
315
            Some(Rule::Group(Group::new(fixes, GroupKind::Implicit, span)))
136✔
316
        })
317
    }
882✔
318

319
    fn parse_fixes(&mut self) -> PResult<Option<Rule>> {
1,980✔
320
        let mut nots_span = self.span();
1,980✔
321
        let mut nots = 0usize;
1,980✔
322
        while self.consume(Token::Not) {
2,067✔
323
            nots += 1;
87✔
324
            nots_span = nots_span.join(self.last_span());
87✔
325
        }
87✔
326

327
        let Some(mut rule) = self.parse_lookaround()?.try_or_else(|| self.parse_atom())? else {
1,980✔
328
            if nots == 0 {
579✔
329
                return Ok(None);
578✔
330
            } else {
331
                return Err(PEK::Expected("expression").at(self.span()));
1✔
332
            }
333
        };
334

335
        for _ in 0..nots {
1,109✔
336
            rule = Rule::Negation(Box::new(Negation { rule, not_span: nots_span }));
85✔
337
        }
85✔
338

339
        while let Some((kind, quantifier, span)) = self.parse_repetition()? {
1,233✔
340
            let span = rule.span().join(span);
124✔
341
            rule = Rule::Repetition(Box::new(Repetition::new(rule, kind, quantifier, span)));
124✔
342
        }
124✔
343

344
        Ok(Some(rule))
1,098✔
345
    }
1,980✔
346

347
    fn parse_lookaround(&mut self) -> PResult<Option<Rule>> {
1,980✔
348
        let kind = if self.consume(Token::LookAhead) {
1,980✔
349
            LookaroundKind::Ahead
19✔
350
        } else if self.consume(Token::LookBehind) {
1,961✔
351
            LookaroundKind::Behind
15✔
352
        } else {
353
            return Ok(None);
1,946✔
354
        };
355
        let start_span = self.last_span();
34✔
356

34✔
357
        self.recursion_start()?;
34✔
358
        let rule = self.parse_modified()?;
34✔
359
        self.recursion_end();
34✔
360

34✔
361
        let span = rule.span();
34✔
362
        Ok(Some(Rule::Lookaround(Box::new(Lookaround::new(rule, kind, start_span.join(span))))))
34✔
363
    }
1,980✔
364

365
    /// Parse a repetition that can follow an atom: `+`, `?`, `*`, `{x}`,
366
    /// `{x,}`, `{,x}` or `{x,y}` optionally followed by the `greedy` or
367
    /// `lazy` keyword. `x` and `y` are number literals.
368
    fn parse_repetition(&mut self) -> PResult<Option<(RepetitionKind, Quantifier, Span)>> {
1,233✔
369
        let start = self.span();
1,233✔
370

371
        let kind = if self.consume(Token::Plus) {
1,233✔
372
            RepetitionKind::one_inf()
45✔
373
        } else if self.consume(Token::Star) {
1,188✔
374
            RepetitionKind::zero_inf()
23✔
375
        } else if self.consume(Token::QuestionMark) {
1,165✔
376
            RepetitionKind::zero_one()
35✔
377
        } else if let Some(kind) = self.parse_repetition_braces()? {
1,130✔
378
            kind
27✔
379
        } else {
380
            return Ok(None);
1,098✔
381
        };
382

383
        let quantifier = if self.consume_reserved("greedy") {
130✔
384
            Quantifier::Greedy
1✔
385
        } else if self.consume_reserved("lazy") {
129✔
386
            Quantifier::Lazy
8✔
387
        } else if self.is_lazy {
121✔
388
            Quantifier::DefaultLazy
8✔
389
        } else {
390
            Quantifier::DefaultGreedy
113✔
391
        };
392

393
        let multi_span = self.span();
130✔
394
        if self.consume(Token::Plus) || self.consume(Token::Star) {
130✔
395
            return Err(PEK::Repetition(RepetitionError::Multi).at(multi_span));
2✔
396
        } else if self.consume(Token::QuestionMark) {
128✔
397
            return Err(PEK::Repetition(RepetitionError::QmSuffix).at(multi_span));
2✔
398
        } else if self.parse_repetition_braces()?.is_some() {
126✔
399
            return Err(
2✔
400
                PEK::Repetition(RepetitionError::Multi).at(multi_span.join(self.last_span()))
2✔
401
            );
2✔
402
        }
124✔
403

124✔
404
        let end = self.last_span();
124✔
405
        Ok(Some((kind, quantifier, start.join(end))))
124✔
406
    }
1,233✔
407

408
    /// Parse `{2}`, `{2,}`, `{,2}` or `{2,5}`.
409
    fn parse_repetition_braces(&mut self) -> PResult<Option<RepetitionKind>> {
1,256✔
410
        if self.consume(Token::OpenBrace) {
1,256✔
411
            let num_start = self.span();
34✔
412

413
            // Both numbers and the comma are parsed optionally, then we check that one
414
            // of the allowed syntaxes is used: There must be at least one number, and if
415
            // there are two numbers, the comma is required. It also checks that the
416
            // numbers are in increasing order.
417
            let lower = self.consume_number(65_535)?;
34✔
418
            let comma = self.consume(Token::Comma);
34✔
419
            let upper = self.consume_number(65_535)?;
34✔
420

421
            let num_end = self.last_span();
33✔
422
            let num_span = num_start.join(num_end);
33✔
423

424
            let kind = match (lower, comma, upper) {
33✔
425
                (lower, true, upper) => (lower.unwrap_or(0), upper)
21✔
426
                    .try_into()
21✔
427
                    .map_err(|e| PEK::Repetition(e).at(num_span))?,
21✔
428

429
                (Some(_), false, Some(_)) => return Err(PEK::Expected("`}` or `,`").at(num_end)),
×
430
                (Some(rep), false, None) | (None, false, Some(rep)) => RepetitionKind::fixed(rep),
11✔
431
                (None, false, None) => return Err(PEK::Expected("number").at(self.span())),
1✔
432
            };
433

434
            self.expect(Token::CloseBrace)?;
31✔
435

436
            Ok(Some(kind))
29✔
437
        } else {
438
            Ok(None)
1,222✔
439
        }
440
    }
1,256✔
441

442
    fn parse_atom(&mut self) -> PResult<Option<Rule>> {
1,946✔
443
        Ok(self
1,946✔
444
            .parse_group()?
1,946✔
445
            .try_or_else(|| self.parse_string())?
1,682✔
446
            .try_or_else(|| self.parse_char_set())?
1,681✔
447
            .or_else(|| self.parse_boundary())
1,663✔
448
            .try_or_else(|| self.parse_reference())?
1,663✔
449
            .try_or_else(|| self.parse_code_point_rule())?
1,663✔
450
            .try_or_else(|| self.parse_range())?
1,660✔
451
            .try_or_else(|| self.parse_regex())?
1,655✔
452
            .try_or_else(|| self.parse_variable())?
1,655✔
453
            .or_else(|| self.parse_dot())
1,654✔
454
            .or_else(|| self.parse_recursion()))
1,654✔
455
    }
1,946✔
456

457
    /// Parses a (possibly capturing) group, e.g. `(E E | E)` or `:name(E)`.
458
    fn parse_group(&mut self) -> PResult<Option<Rule>> {
1,946✔
459
        let (kind, start_span) = self.parse_group_kind()?;
1,946✔
460
        if !kind.is_normal() {
1,943✔
461
            self.expect(Token::OpenParen)?;
88✔
462
        } else if !self.consume(Token::OpenParen) {
1,855✔
463
            return Ok(None);
1,512✔
464
        }
343✔
465

466
        self.recursion_start()?;
430✔
467
        let rule = self.parse_modified()?;
430✔
468
        self.recursion_end();
172✔
469

172✔
470
        self.expect(Token::CloseParen)
172✔
471
            .map_err(|p| PEK::Expected("`)` or an expression").at(p.span))?;
172✔
472
        // start_span may be 0..0, so we need to use join_unchecked
473
        let span = start_span.join_unchecked(self.last_span());
170✔
474

170✔
475
        let rule = Rule::Group(Group::new(vec![rule], kind, span));
170✔
476
        Ok(Some(rule))
170✔
477
    }
1,946✔
478

479
    /// Parses `:name` or just `:`. Returns the span of the colon with the name.
480
    fn parse_group_kind(&mut self) -> PResult<(GroupKind, Span)> {
1,946✔
481
        if self.consume_reserved("atomic") {
1,946✔
482
            let span = self.last_span();
3✔
483
            Ok((GroupKind::Atomic, span))
3✔
484
        } else if self.consume(Token::Colon) {
1,943✔
485
            let span = self.last_span();
88✔
486

487
            if let Some(keyword) = self.consume_as(Token::ReservedName) {
88✔
488
                return Err(PEK::KeywordAfterColon(keyword.into()).at(self.last_span()));
1✔
489
            }
87✔
490

87✔
491
            let name = self.consume_as(Token::Identifier);
87✔
492
            if let Some(name) = name {
87✔
493
                if let Some(invalid_index) = name.find(|c: char| !c.is_ascii_alphanumeric()) {
156✔
494
                    let c = name[invalid_index..].chars().next().unwrap();
1✔
495
                    let start = self.last_span().range_unchecked().start + invalid_index;
1✔
496
                    let len = c.len_utf8();
1✔
497
                    return Err(PEK::NonAsciiIdentAfterColon(c).at(Span::new(start, start + len)));
1✔
498
                }
52✔
499

52✔
500
                if name.len() > 32 {
52✔
501
                    return Err(PEK::GroupNameTooLong(name.len()).at(self.last_span()));
1✔
502
                }
51✔
503
            }
34✔
504
            Ok((GroupKind::Capturing(Capture::new(name)), span))
85✔
505
        } else {
506
            Ok((GroupKind::Normal, self.span().start()))
1,855✔
507
        }
508
    }
1,946✔
509

510
    /// Parses a string literal.
511
    fn parse_string(&mut self) -> PResult<Option<Rule>> {
1,512✔
512
        Ok(self.parse_literal()?.map(Rule::Literal))
1,512✔
513
    }
1,512✔
514

515
    fn parse_literal(&mut self) -> PResult<Option<Literal>> {
1,567✔
516
        let Some(s) = self.consume_as(Token::String) else { return Ok(None) };
1,567✔
517
        let span = self.last_span();
300✔
518
        let content = helper::parse_quoted_text(s).map_err(|k| k.at(span))?;
300✔
519
        Ok(Some(Literal::new(content.to_string(), span)))
299✔
520
    }
1,567✔
521

522
    /// Parses a char set, surrounded by `[` `]`. This was previously called a
523
    /// "char class", but that name is ambiguous and is being phased out.
524
    ///
525
    /// This function does _not_ parse exclamation marks in front of a char
526
    /// class, because negation is handled separately.
527
    fn parse_char_set(&mut self) -> PResult<Option<Rule>> {
1,267✔
528
        if self.consume(Token::OpenBracket) {
1,267✔
529
            let start_span = self.last_span();
249✔
530

249✔
531
            if self.consume(Token::Caret) {
249✔
532
                return Err(PEK::CharClass(CharClassError::CaretInGroup).at(self.last_span()));
2✔
533
            }
247✔
534

535
            let inner = self.parse_char_set_inner()?;
247✔
536

537
            self.expect(Token::CloseBracket).map_err(|p| {
233✔
538
                PEK::Expected("character class, string, code point, Unicode property or `]`")
1✔
539
                    .at(p.span)
1✔
540
            })?;
233✔
541
            let span = start_span.join(self.last_span());
232✔
542

232✔
543
            if inner.is_empty() {
232✔
544
                return Err(PEK::CharClass(CharClassError::Empty).at(span));
1✔
545
            }
231✔
546

231✔
547
            Ok(Some(Rule::CharClass(CharClass::new(inner, span, self.is_unicode_aware))))
231✔
548
        } else {
549
            Ok(None)
1,018✔
550
        }
551
    }
1,267✔
552

553
    /// Parses a char group, i.e. the contents of a char set. This is a sequence
554
    /// of characters, character classes, character ranges or Unicode
555
    /// properties. Some of them can be negated.
556
    fn parse_char_set_inner(&mut self) -> PResult<Vec<GroupItem>> {
247✔
557
        let mut items = Vec::new();
247✔
558
        loop {
559
            let mut nots_span = self.span();
542✔
560
            let mut nots = 0usize;
542✔
561
            while self.consume(Token::Not) {
574✔
562
                nots += 1;
32✔
563
                nots_span = nots_span.join(self.last_span());
32✔
564
            }
32✔
565

566
            let group = if let Some(group) = self.parse_char_group_chars_or_range()? {
542✔
567
                if nots > 0 {
111✔
568
                    return Err(PEK::UnallowedNot.at(nots_span));
1✔
569
                }
110✔
570
                group
110✔
571
            } else if let Some(group) = self.parse_char_group_ident(nots % 2 != 0)? {
424✔
572
                if nots > 1 {
186✔
573
                    return Err(PEK::UnallowedMultiNot(nots).at(nots_span));
1✔
574
                }
185✔
575
                group
185✔
576
            } else if nots > 0 {
233✔
577
                return Err(PEK::ExpectedToken(Token::Identifier).at(self.span()));
×
578
            } else {
579
                break;
233✔
580
            };
581
            items.extend(group);
295✔
582
        }
583

584
        Ok(items)
233✔
585
    }
247✔
586

587
    /// Parses an identifier or dot in a char set
588
    fn parse_char_group_ident(&mut self, negative: bool) -> PResult<Option<Vec<GroupItem>>> {
424✔
589
        if !self.consume(Token::Identifier) {
424✔
590
            if let Some(name) = self.consume_as(Token::ReservedName) {
235✔
591
                return Err(PEK::UnexpectedKeyword(name.to_owned()).at(self.last_span()));
2✔
592
            }
233✔
593
            return Ok(None);
233✔
594
        }
189✔
595
        let span = self.last_span();
189✔
596

189✔
597
        let before_colon = self.source_at(span);
189✔
598
        let after_colon = if self.consume(Token::Colon) {
189✔
599
            Some(self.expect_as(Token::Identifier)?)
14✔
600
        } else {
601
            None
175✔
602
        };
603
        let (kind, name, span) = match after_colon {
189✔
604
            Some(name) => (Some(before_colon), name, span.join(self.last_span())),
14✔
605
            None => (None, before_colon, span),
175✔
606
        };
607

608
        let item =
186✔
609
            CharGroup::try_from_group_name(kind, name, negative, span).map_err(|e| e.at(span))?;
189✔
610

611
        Ok(Some(item))
186✔
612
    }
424✔
613

614
    /// Parses a string literal or a character range in a char set, e.g. `"axd"`
615
    /// or `'0'-'7'`.
616
    fn parse_char_group_chars_or_range(&mut self) -> PResult<Option<Vec<GroupItem>>> {
542✔
617
        let span1 = self.span();
542✔
618
        let Some(first) = self.parse_string_or_char()? else {
542✔
619
            return Ok(None);
424✔
620
        };
621

622
        if self.consume(Token::Dash) {
118✔
623
            let span2 = self.span();
17✔
624
            let Some(last) = self.parse_string_or_char()? else {
17✔
625
                return Err(PEK::Expected("code point or character").at(self.span()));
1✔
626
            };
627

628
            if let StringOrChar::Char { is_shorthand: true, c } = first {
16✔
629
                self.add_warning(
1✔
630
                    ParseWarningKind::Deprecation(DeprecationWarning::ShorthandInRange(c))
1✔
631
                        .at(span1),
1✔
632
                );
1✔
633
            }
15✔
634
            if let StringOrChar::Char { is_shorthand: true, c } = last {
16✔
635
                self.add_warning(
1✔
636
                    ParseWarningKind::Deprecation(DeprecationWarning::ShorthandInRange(c))
1✔
637
                        .at(span2),
1✔
638
                );
1✔
639
            }
15✔
640

641
            let first = first.to_char().map_err(|e| e.at(span1))?;
16✔
642
            let last = last.to_char().map_err(|e| e.at(span2))?;
12✔
643

644
            let group = CharGroup::try_from_range(first, last).ok_or_else(|| {
11✔
645
                PEK::CharClass(CharClassError::NonAscendingRange(first, last)).at(span1.join(span2))
1✔
646
            })?;
11✔
647
            Ok(Some(group))
10✔
648
        } else {
649
            let group = match first {
101✔
650
                StringOrChar::String(s) => {
50✔
651
                    let chars = helper::parse_quoted_text(s).map_err(|k| k.at(span1))?;
50✔
652
                    chars.chars().map(GroupItem::Char).collect()
50✔
653
                }
654
                StringOrChar::Char { c, .. } => vec![GroupItem::Char(c)],
51✔
655
            };
656
            Ok(Some(group))
101✔
657
        }
658
    }
542✔
659

660
    fn parse_string_or_char(&mut self) -> PResult<Option<StringOrChar<'i>>> {
559✔
661
        let res = if let Some(s) = self.consume_as(Token::String) {
559✔
662
            StringOrChar::String(s)
75✔
663
        } else if let Some((c, _)) = self.parse_code_point()? {
484✔
664
            StringOrChar::Char { c, is_shorthand: false }
12✔
665
        } else if let Some(c) = self.parse_special_char() {
472✔
666
            StringOrChar::Char { c, is_shorthand: true }
47✔
667
        } else {
668
            return Ok(None);
425✔
669
        };
670
        Ok(Some(res))
134✔
671
    }
559✔
672

673
    fn parse_code_point(&mut self) -> PResult<Option<(char, Span)>> {
1,383✔
674
        let Some(cp) = self.consume_as(Token::CodePoint) else { return Ok(None) };
1,383✔
675
        let span = self.last_span();
53✔
676
        let trimmed_u = cp[1..].trim_start();
53✔
677
        if !trimmed_u.starts_with('+') {
53✔
NEW
678
            let warning = DeprecationWarning::Unicode(cp.into());
×
NEW
679
            self.add_warning(ParseWarningKind::Deprecation(warning).at(span))
×
680
        }
53✔
681

682
        let hex = trimmed_u.trim_start_matches(|c: char| c == '+' || c.is_whitespace());
106✔
683

53✔
684
        u32::from_str_radix(hex, 16)
53✔
685
            .ok()
53✔
686
            .and_then(|n| char::try_from(n).ok())
53✔
687
            .map(|c| Some((c, span)))
53✔
688
            .ok_or_else(|| PEK::InvalidCodePoint.at(span))
53✔
689
    }
1,383✔
690

691
    fn parse_code_point_rule(&mut self) -> PResult<Option<Rule>> {
899✔
692
        let Some((c, span)) = self.parse_code_point()? else { return Ok(None) };
899✔
693
        let inner = vec![GroupItem::Char(c)];
38✔
694
        Ok(Some(Rule::CharClass(CharClass::new(inner, span, self.is_unicode_aware))))
38✔
695
    }
899✔
696

697
    fn parse_special_char(&mut self) -> Option<char> {
472✔
698
        let Some((Token::Identifier, string)) = self.peek() else { return None };
472✔
699
        let c = match string {
236✔
700
            "n" => '\n',
236✔
701
            "r" => '\r',
223✔
702
            "t" => '\t',
211✔
703
            "a" => '\u{07}',
208✔
704
            "e" => '\u{1B}',
204✔
705
            "f" => '\u{0C}',
201✔
706
            _ => return None,
189✔
707
        };
708
        self.advance();
47✔
709
        Some(c)
47✔
710
    }
472✔
711

712
    /// Parses a boundary. For start and end, there are two syntaxes: `^` and `$`.
713
    /// Word boundaries are `%`.
714
    ///
715
    /// The deprecated syntax issues a warning.
716
    ///
717
    /// This function does _not_ parse negated negated word boundaries (`!%`),
718
    /// since negation is handled elsewhere. It also does _not_ parse the
719
    /// `Start` and `End` global variables.
720
    fn parse_boundary(&mut self) -> Option<Rule> {
1,018✔
721
        let span = self.span();
1,018✔
722
        let kind = if self.consume(Token::Caret) {
1,018✔
723
            BoundaryKind::Start
12✔
724
        } else if self.consume(Token::Dollar) {
1,006✔
725
            BoundaryKind::End
9✔
726
        } else if self.consume(Token::Percent) {
997✔
727
            BoundaryKind::Word
27✔
728
        } else if self.consume(Token::AngleLeft) {
970✔
729
            BoundaryKind::WordStart
6✔
730
        } else if self.consume(Token::AngleRight) {
964✔
731
            BoundaryKind::WordEnd
4✔
732
        } else {
733
            return None;
960✔
734
        };
735
        Some(Rule::Boundary(Boundary::new(kind, self.is_unicode_aware, span)))
58✔
736
    }
1,018✔
737

738
    /// Parses a reference. Supported syntaxes are `::name`, `::3`, `::+3` and
739
    /// `::-3`.
740
    fn parse_reference(&mut self) -> PResult<Option<Rule>> {
960✔
741
        if self.consume(Token::DoubleColon) {
960✔
742
            let start_span = self.last_span();
61✔
743

744
            let target = if self.consume(Token::Plus) {
61✔
745
                let num = self.expect_number::<i32>()?;
1✔
746
                ReferenceTarget::Relative(num)
1✔
747
            } else if self.consume(Token::Dash) {
60✔
748
                let num = self.expect_number::<i32>()?;
2✔
749
                // negating from positive to negative can't overflow, luckily
750
                ReferenceTarget::Relative(-num)
2✔
751
            } else if let Some(num) = self.consume_number(MAX_GROUP_NUMBER)? {
58✔
752
                ReferenceTarget::Number(num)
36✔
753
            } else {
754
                // TODO: Better diagnostic for `::let`
755
                let name = self
22✔
756
                    .expect_as(Token::Identifier)
22✔
757
                    .map_err(|p| PEK::Expected("number or group name").at(p.span))?;
22✔
758
                ReferenceTarget::Named(name.to_string())
22✔
759
            };
760

761
            let span = start_span.join(self.last_span());
61✔
762
            Ok(Some(Rule::Reference(Reference::new(target, span))))
61✔
763
        } else {
764
            Ok(None)
899✔
765
        }
766
    }
960✔
767

768
    fn parse_range(&mut self) -> PResult<Option<Rule>> {
858✔
769
        if self.consume_reserved("range") {
858✔
770
            let span_start = self.last_span();
28✔
771

772
            let first = self.expect_as(Token::String)?;
28✔
773
            let span_1 = self.last_span();
27✔
774
            self.expect(Token::Dash)?;
27✔
775
            let second = self.expect_as(Token::String)?;
27✔
776
            let span_2 = self.last_span();
27✔
777

778
            let radix = if self.consume_reserved("base") {
27✔
779
                let n = self.expect_number()?;
3✔
780
                let span = self.last_span();
2✔
781
                if n > 36 {
2✔
782
                    return Err(PEK::Number(NumberError::TooLarge).at(span));
×
783
                } else if n < 2 {
2✔
784
                    return Err(PEK::Number(NumberError::TooSmall).at(span));
×
785
                }
2✔
786
                n
2✔
787
            } else {
788
                10u8
24✔
789
            };
790

791
            let span = span_start.join(self.last_span());
26✔
792

793
            let start = helper::parse_number(helper::strip_first_last(first), radix)
26✔
794
                .map_err(|k| PEK::from(k).at(span_1))?;
26✔
795
            let end = helper::parse_number(helper::strip_first_last(second), radix)
26✔
796
                .map_err(|k| PEK::from(k).at(span_2))?;
26✔
797

798
            if start.is_empty() || end.is_empty() {
26✔
799
                let span = if start.is_empty() { span_1 } else { span_2 };
1✔
800
                return Err(PEK::Number(NumberError::Empty).at(span));
1✔
801
            }
25✔
802

25✔
803
            if start.len() > end.len() || (start.len() == end.len() && start > end) {
25✔
804
                return Err(PEK::RangeIsNotIncreasing.at(span_1.join(span_2)));
1✔
805
            }
24✔
806

24✔
807
            if start.len() != end.len()
24✔
808
                && (helper::has_leading_zero(&start) || helper::has_leading_zero(&end))
17✔
809
            {
810
                return Err(PEK::RangeLeadingZeroesVariableLength.at(span_1.join(span_2)));
1✔
811
            }
23✔
812

23✔
813
            Ok(Some(Rule::Range(Range::new(
23✔
814
                start.into_boxed_slice(),
23✔
815
                end.into_boxed_slice(),
23✔
816
                radix,
23✔
817
                span,
23✔
818
            ))))
23✔
819
        } else {
820
            Ok(None)
830✔
821
        }
822
    }
858✔
823

824
    /// Parses an unescaped regex expression (`regex "[test]"`)
825
    fn parse_regex(&mut self) -> PResult<Option<Rule>> {
830✔
826
        if self.consume_reserved("regex") {
830✔
827
            let span_start = self.last_span();
10✔
828
            let lit = self.expect_as(Token::String)?;
10✔
829
            let span_end = self.last_span();
10✔
830

831
            let content = helper::parse_quoted_text(lit).map_err(|k| k.at(span_end))?;
10✔
832

833
            let span = span_start.join(span_end);
10✔
834
            Ok(Some(Rule::Regex(Regex::new(content.to_string(), span))))
10✔
835
        } else {
836
            Ok(None)
820✔
837
        }
838
    }
830✔
839

840
    /// Parses a variable (usage site).
841
    fn parse_variable(&mut self) -> PResult<Option<Rule>> {
820✔
842
        let Some(ident) = self.consume_as(Token::Identifier) else { return Ok(None) };
820✔
843
        let span1 = self.last_span();
212✔
844
        let rule = Rule::Variable(Variable::new(ident, span1));
212✔
845
        if let Some((Token::Equals, span2)) = self.peek_pair() {
212✔
846
            return Err(PEK::MissingLetKeyword.at(span1.join(span2)));
1✔
847
        }
211✔
848
        Ok(Some(rule))
211✔
849
    }
820✔
850

851
    /// Parses the dot
852
    fn parse_dot(&mut self) -> Option<Rule> {
608✔
853
        if self.consume(Token::Dot) {
608✔
854
            Some(Rule::Dot)
26✔
855
        } else {
856
            None
582✔
857
        }
858
    }
608✔
859

860
    /// Parses the `recursion` keyword
861
    fn parse_recursion(&mut self) -> Option<Rule> {
582✔
862
        if self.consume_reserved("recursion") {
582✔
863
            Some(Rule::Recursion(Recursion { span: self.last_span() }))
3✔
864
        } else {
865
            None
579✔
866
        }
867
    }
582✔
868
}
869

870
#[derive(Clone, Copy)]
871
enum StringOrChar<'i> {
872
    String(&'i str),
873
    Char { c: char, is_shorthand: bool },
874
}
875

876
impl StringOrChar<'_> {
877
    fn to_char(self) -> Result<char, PEK> {
28✔
878
        Err(PEK::CharString(match self {
28✔
879
            StringOrChar::Char { c, .. } => return Ok(c),
8✔
880
            StringOrChar::String(s) => {
20✔
881
                let s = helper::parse_quoted_text(s)?;
20✔
882
                let mut iter = s.chars();
20✔
883
                match iter.next() {
20✔
884
                    Some(c) if iter.next().is_none() => return Ok(c),
19✔
885
                    Some(_) => CharStringError::TooManyCodePoints,
4✔
886
                    _ => CharStringError::Empty,
1✔
887
                }
888
            }
889
        }))
890
    }
28✔
891
}
892

893
trait TryOptionExt<T> {
894
    fn try_or_else<E>(self, f: impl FnMut() -> Result<Option<T>, E>) -> Result<Option<T>, E>;
895
}
896

897
impl<T> TryOptionExt<T> for Option<T> {
898
    #[inline(always)]
899
    fn try_or_else<E>(self, mut f: impl FnMut() -> Result<Option<T>, E>) -> Result<Option<T>, E> {
15,401✔
900
        match self {
15,401✔
901
            Some(val) => Ok(Some(val)),
4,638✔
902
            None => f(),
10,763✔
903
        }
904
    }
15,401✔
905
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc