• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pomsky-lang / pomsky / 12099739427

30 Nov 2024 09:51PM UTC coverage: 80.473% (-0.6%) from 81.072%
12099739427

push

github

Aloso
feat: character set intersections

200 of 274 new or added lines in 19 files covered. (72.99%)

3 existing lines in 3 files now uncovered.

4422 of 5495 relevant lines covered (80.47%)

391063.72 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.83
/pomsky-syntax/src/parse/parser_impl.rs
1
use std::collections::HashSet;
2

3
use intersection::Intersection;
4

5
use crate::{
6
    diagnose::{
7
        CharClassError, CharStringError, DeprecationWarning, NumberError, ParseWarningKind,
8
        RepetitionError,
9
    },
10
    error::{ParseError, ParseErrorKind as PEK},
11
    exprs::{negation::Negation, test::*, *},
12
    lexer::Token,
13
    Span,
14
};
15

16
use super::{helper, Parser};
17

18
type PResult<T> = Result<T, ParseError>;
19

20
const MAX_REPETITION: u32 = 65_535;
21

22
impl<'i> Parser<'i> {
23
    pub(super) fn parse_modified(&mut self) -> PResult<Rule> {
803✔
24
        let mut stmts = Vec::new();
803✔
25

803✔
26
        let was_lazy = self.is_lazy;
803✔
27
        let was_unicode_aware = self.is_unicode_aware;
803✔
28

29
        loop {
30
            let Some(stmt) = self
884✔
31
                .parse_mode_modifier()?
884✔
32
                .try_or_else(|| self.parse_let())?
884✔
33
                .try_or_else(|| self.parse_test())?
878✔
34
            else {
35
                break;
793✔
36
            };
37

38
            match &stmt.0 {
7✔
39
                Stmt::Enable(BooleanSetting::Lazy, _) => self.is_lazy = true,
5✔
40
                Stmt::Enable(BooleanSetting::Unicode, _) => self.is_unicode_aware = true,
2✔
41
                Stmt::Disable(BooleanSetting::Lazy, _) => self.is_lazy = false,
1✔
42
                Stmt::Disable(BooleanSetting::Unicode, _) => self.is_unicode_aware = false,
16✔
43
                _ => {}
57✔
44
            }
45

46
            stmts.push(stmt);
81✔
47
        }
48

49
        self.recursion_start()?;
793✔
50
        let mut rule = self.parse_or()?;
791✔
51
        self.recursion_end();
483✔
52

483✔
53
        self.is_lazy = was_lazy;
483✔
54
        self.is_unicode_aware = was_unicode_aware;
483✔
55

483✔
56
        // TODO: This should not be part of the parser
483✔
57
        if stmts.len() > 1 {
483✔
58
            let mut set = HashSet::new();
10✔
59
            for (stmt, _) in &stmts {
32✔
60
                if let Stmt::Let(l) = stmt {
23✔
61
                    if set.contains(l.name()) {
19✔
62
                        return Err(PEK::LetBindingExists.at(l.name_span));
1✔
63
                    }
18✔
64
                    set.insert(l.name());
18✔
65
                }
4✔
66
            }
67
        }
473✔
68

69
        let span_end = rule.span();
482✔
70
        for (stmt, span) in stmts.into_iter().rev() {
482✔
71
            rule = Rule::StmtExpr(Box::new(StmtExpr::new(stmt, rule, span.join(span_end))));
78✔
72
        }
78✔
73

74
        Ok(rule)
482✔
75
    }
803✔
76

77
    fn parse_mode_modifier(&mut self) -> PResult<Option<(Stmt, Span)>> {
884✔
78
        let mode = if self.consume_reserved("enable") {
884✔
79
            true
7✔
80
        } else if self.consume_reserved("disable") {
877✔
81
            false
17✔
82
        } else {
83
            return Ok(None);
860✔
84
        };
85

86
        let span_start = self.last_span();
24✔
87
        let setting = if self.consume_reserved("lazy") {
24✔
88
            BooleanSetting::Lazy
6✔
89
        } else if let Some((Token::Identifier, "unicode")) = self.peek() {
18✔
90
            self.advance();
18✔
91
            BooleanSetting::Unicode
18✔
92
        } else {
93
            return Err(PEK::Expected("`lazy` or `unicode`").at(self.span()));
×
94
        };
95
        self.expect(Token::Semicolon)?;
24✔
96
        let span_end = self.last_span();
24✔
97
        let span = span_start.join(span_end);
24✔
98

99
        let stmt = if mode { Stmt::Enable(setting, span) } else { Stmt::Disable(setting, span) };
24✔
100

101
        Ok(Some((stmt, span)))
24✔
102
    }
884✔
103

104
    fn parse_let(&mut self) -> PResult<Option<(Stmt, Span)>> {
860✔
105
        if self.consume_reserved("let") {
860✔
106
            let span_start = self.last_span();
49✔
107
            let name_span = self.span();
49✔
108
            let name = self.expect_as(Token::Identifier).map_err(|e| {
49✔
109
                if self.is(Token::ReservedName) {
2✔
110
                    PEK::KeywordAfterLet(self.source_at(self.span()).to_owned()).at(e.span)
1✔
111
                } else {
112
                    e
1✔
113
                }
114
            })?;
49✔
115

116
            self.expect(Token::Equals)?;
47✔
117

118
            self.recursion_start()?;
46✔
119
            let rule = self.parse_or()?;
46✔
120
            self.recursion_end();
44✔
121

44✔
122
            self.expect(Token::Semicolon)
44✔
123
                .map_err(|p| PEK::Expected("expression or `;`").at(p.span))?;
44✔
124
            let span_end = self.last_span();
43✔
125

43✔
126
            Ok(Some((Stmt::Let(Let::new(name, rule, name_span)), span_start.join(span_end))))
43✔
127
        } else {
128
            Ok(None)
811✔
129
        }
130
    }
860✔
131

132
    fn parse_test(&mut self) -> PResult<Option<(Stmt, Span)>> {
811✔
133
        if self.consume_reserved("test") {
811✔
134
            let span_start = self.last_span();
18✔
135
            self.expect(Token::OpenBrace)?;
18✔
136

137
            let mut cases = Vec::new();
18✔
138
            while let Some(case) = self.parse_test_cases()? {
48✔
139
                cases.push(case);
30✔
140
            }
30✔
141

142
            self.expect(Token::CloseBrace)?;
14✔
143
            let span_end = self.last_span();
14✔
144
            let span = span_start.join(span_end);
14✔
145

14✔
146
            Ok(Some((Stmt::Test(Test { cases, span }), span)))
14✔
147
        } else {
148
            Ok(None)
793✔
149
        }
150
    }
811✔
151

152
    fn parse_test_cases(&mut self) -> PResult<Option<TestCase>> {
48✔
153
        if self.consume_contextual_keyword("match") {
48✔
154
            let mut matches = Vec::new();
22✔
155
            let mut literal = None;
22✔
156

157
            if let Some((Token::Identifier, "in")) = self.peek() {
22✔
158
            } else {
2✔
159
                matches.push(self.parse_test_match()?);
20✔
160
                while self.consume(Token::Comma) {
23✔
161
                    matches.push(self.parse_test_match()?);
4✔
162
                }
163
            }
164

165
            if self.consume_contextual_keyword("in") {
21✔
166
                literal = self.parse_literal()?;
9✔
167
                if literal.is_none() {
9✔
168
                    return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
169
                };
9✔
170
            }
12✔
171
            self.expect(Token::Semicolon)?;
21✔
172

173
            if let Some(literal) = literal {
20✔
174
                Ok(Some(TestCase::MatchAll(TestCaseMatchAll { literal, matches })))
9✔
175
            } else if matches.len() > 1 {
11✔
176
                let span = matches[0].span.join(matches.last().unwrap().span);
1✔
177
                Err(PEK::MultipleStringsInTestCase.at(span))
1✔
178
            } else {
179
                let match_ = matches.pop().unwrap();
10✔
180
                Ok(Some(TestCase::Match(match_)))
10✔
181
            }
182
        } else if self.consume_contextual_keyword("reject") {
26✔
183
            let as_substring = self.consume_contextual_keyword("in");
12✔
184

185
            let Some(literal) = self.parse_literal()? else {
12✔
186
                return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
187
            };
188

189
            self.expect(Token::Semicolon)?;
12✔
190

191
            Ok(Some(TestCase::Reject(TestCaseReject { literal, as_substring })))
11✔
192
        } else {
193
            Ok(None)
14✔
194
        }
195
    }
48✔
196

197
    fn parse_test_match(&mut self) -> PResult<TestCaseMatch> {
24✔
198
        let Some(literal) = self.parse_literal()? else {
24✔
199
            return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
200
        };
201
        let span_start = self.last_span();
24✔
202

24✔
203
        let mut captures = Vec::new();
24✔
204

24✔
205
        if self.consume_contextual_keyword("as") {
24✔
206
            self.expect(Token::OpenBrace)?;
10✔
207

208
            let mut is_first = true;
10✔
209
            loop {
210
                if !is_first && !self.consume(Token::Comma) {
20✔
211
                    break;
7✔
212
                }
13✔
213
                let Some(capture) = self.parse_test_capture()? else {
13✔
214
                    break;
3✔
215
                };
216
                captures.push(capture);
10✔
217
                is_first = false;
10✔
218
            }
219

220
            self.expect(Token::CloseBrace)?;
10✔
221
        }
14✔
222

223
        let span_end = self.last_span();
23✔
224
        Ok(TestCaseMatch { literal, captures, span: span_start.join(span_end) })
23✔
225
    }
24✔
226

227
    fn parse_test_capture(&mut self) -> PResult<Option<TestCapture>> {
13✔
228
        let ident = if let Some(n) = self.consume_number(u16::MAX)? {
13✔
229
            CaptureIdent::Index(n)
6✔
230
        } else if let Some(name) = self.consume_as(Token::Identifier) {
7✔
231
            CaptureIdent::Name(name.to_string())
4✔
232
        } else {
233
            return Ok(None);
3✔
234
        };
235
        let ident_span = self.last_span();
10✔
236

10✔
237
        self.expect(Token::Colon)?;
10✔
238
        let Some(literal) = self.parse_literal()? else {
10✔
239
            return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
240
        };
241
        Ok(Some(TestCapture { ident, ident_span, literal }))
10✔
242
    }
13✔
243

244
    fn parse_or(&mut self) -> PResult<Rule> {
837✔
245
        let mut span = self.span();
837✔
246
        let leading_pipe = self.consume(Token::Pipe);
837✔
247

837✔
248
        let mut alts = Vec::new();
837✔
249
        if let Some(first_alt) = self.parse_and()? {
837✔
250
            alts.push(first_alt);
496✔
251

252
            while self.consume(Token::Pipe) {
534✔
253
                if let Some(next_alt) = self.parse_and()? {
38✔
254
                    span = span.join(next_alt.span());
38✔
255
                    alts.push(next_alt);
38✔
256
                } else {
38✔
257
                    return Err(PEK::LonePipe.at(self.last_span()));
×
258
                }
259
            }
260

261
            if alts.len() == 1 {
496✔
262
                Ok(alts.pop().unwrap())
470✔
263
            } else {
264
                Ok(Alternation::new_expr(alts))
26✔
265
            }
266
        } else if leading_pipe {
37✔
267
            Err(PEK::LonePipe.at(span))
6✔
268
        } else {
269
            Ok(Alternation::new_expr(alts))
31✔
270
        }
271
    }
837✔
272

273
    fn parse_and(&mut self) -> PResult<Option<Rule>> {
875✔
274
        let span_start = self.span();
875✔
275
        let has_leading_ampersand = self.consume(Token::Ampersand);
875✔
276

277
        let Some(first_sequence) = self.parse_sequence()? else {
875✔
278
            if has_leading_ampersand {
37✔
NEW
279
                return Err(PEK::Expected("expression").at(self.span()));
×
280
            }
37✔
281
            return Ok(None);
37✔
282
        };
283
        if !self.is(Token::Ampersand) {
534✔
284
            return Ok(Some(first_sequence));
527✔
285
        }
7✔
286

7✔
287
        let mut rules = Vec::with_capacity(2);
7✔
288
        rules.push(first_sequence);
7✔
289
        loop {
290
            if !self.consume(Token::Ampersand) {
14✔
291
                return Ok(Some(
7✔
292
                    Intersection::new_expr(rules, span_start).expect("intersection can't be empty"),
7✔
293
                ));
7✔
294
            }
7✔
295

296
            let Some(next_sequence) = self.parse_sequence()? else {
7✔
NEW
297
                return Err(PEK::Expected("expression").at(self.span()));
×
298
            };
299
            rules.push(next_sequence);
7✔
300
        }
301
    }
875✔
302

303
    fn parse_sequence(&mut self) -> PResult<Option<Rule>> {
882✔
304
        let mut fixes = Vec::new();
882✔
305
        while let Some(fix) = self.parse_fixes()? {
1,980✔
306
            fixes.push(fix);
1,098✔
307
        }
1,098✔
308

309
        Ok(if fixes.is_empty() {
578✔
310
            None
37✔
311
        } else if fixes.len() == 1 {
541✔
312
            Some(fixes.pop().unwrap())
405✔
313
        } else {
314
            let start = fixes.first().map(Rule::span).unwrap_or_default();
136✔
315
            let end = fixes.last().map(Rule::span).unwrap_or_default();
136✔
316
            let span = start.join(end);
136✔
317

136✔
318
            Some(Rule::Group(Group::new(fixes, GroupKind::Implicit, span)))
136✔
319
        })
320
    }
882✔
321

322
    fn parse_fixes(&mut self) -> PResult<Option<Rule>> {
1,980✔
323
        let mut nots_span = self.span();
1,980✔
324
        let mut nots = 0usize;
1,980✔
325
        while self.consume(Token::Not) {
2,067✔
326
            nots += 1;
87✔
327
            nots_span = nots_span.join(self.last_span());
87✔
328
        }
87✔
329

330
        let Some(mut rule) = self.parse_lookaround()?.try_or_else(|| self.parse_atom())? else {
1,980✔
331
            if nots == 0 {
579✔
332
                return Ok(None);
578✔
333
            } else {
334
                return Err(PEK::Expected("expression").at(self.span()));
1✔
335
            }
336
        };
337

338
        for _ in 0..nots {
1,109✔
339
            rule = Rule::Negation(Box::new(Negation { rule, not_span: nots_span }));
85✔
340
        }
85✔
341

342
        while let Some((kind, quantifier, span)) = self.parse_repetition()? {
1,233✔
343
            let span = rule.span().join(span);
124✔
344
            rule = Rule::Repetition(Box::new(Repetition::new(rule, kind, quantifier, span)));
124✔
345
        }
124✔
346

347
        Ok(Some(rule))
1,098✔
348
    }
1,980✔
349

350
    fn parse_lookaround(&mut self) -> PResult<Option<Rule>> {
1,980✔
351
        let kind = if self.consume(Token::LookAhead) {
1,980✔
352
            LookaroundKind::Ahead
19✔
353
        } else if self.consume(Token::LookBehind) {
1,961✔
354
            LookaroundKind::Behind
15✔
355
        } else {
356
            return Ok(None);
1,946✔
357
        };
358
        let start_span = self.last_span();
34✔
359

34✔
360
        self.recursion_start()?;
34✔
361
        let rule = self.parse_modified()?;
34✔
362
        self.recursion_end();
34✔
363

34✔
364
        let span = rule.span();
34✔
365
        Ok(Some(Rule::Lookaround(Box::new(Lookaround::new(rule, kind, start_span.join(span))))))
34✔
366
    }
1,980✔
367

368
    /// Parse a repetition that can follow an atom: `+`, `?`, `*`, `{x}`,
369
    /// `{x,}`, `{,x}` or `{x,y}` optionally followed by the `greedy` or
370
    /// `lazy` keyword. `x` and `y` are number literals.
371
    fn parse_repetition(&mut self) -> PResult<Option<(RepetitionKind, Quantifier, Span)>> {
1,233✔
372
        let start = self.span();
1,233✔
373

374
        let kind = if self.consume(Token::Plus) {
1,233✔
375
            RepetitionKind::one_inf()
45✔
376
        } else if self.consume(Token::Star) {
1,188✔
377
            RepetitionKind::zero_inf()
23✔
378
        } else if self.consume(Token::QuestionMark) {
1,165✔
379
            RepetitionKind::zero_one()
35✔
380
        } else if let Some(kind) = self.parse_repetition_braces()? {
1,130✔
381
            kind
27✔
382
        } else {
383
            return Ok(None);
1,098✔
384
        };
385

386
        let quantifier = if self.consume_reserved("greedy") {
130✔
387
            Quantifier::Greedy
1✔
388
        } else if self.consume_reserved("lazy") {
129✔
389
            Quantifier::Lazy
8✔
390
        } else if self.is_lazy {
121✔
391
            Quantifier::DefaultLazy
8✔
392
        } else {
393
            Quantifier::DefaultGreedy
113✔
394
        };
395

396
        let multi_span = self.span();
130✔
397
        if self.consume(Token::Plus) || self.consume(Token::Star) {
130✔
398
            return Err(PEK::Repetition(RepetitionError::Multi).at(multi_span));
2✔
399
        } else if self.consume(Token::QuestionMark) {
128✔
400
            return Err(PEK::Repetition(RepetitionError::QmSuffix).at(multi_span));
2✔
401
        } else if self.parse_repetition_braces()?.is_some() {
126✔
402
            return Err(
2✔
403
                PEK::Repetition(RepetitionError::Multi).at(multi_span.join(self.last_span()))
2✔
404
            );
2✔
405
        }
124✔
406

124✔
407
        let end = self.last_span();
124✔
408
        Ok(Some((kind, quantifier, start.join(end))))
124✔
409
    }
1,233✔
410

411
    /// Parse `{2}`, `{2,}`, `{,2}` or `{2,5}`.
412
    fn parse_repetition_braces(&mut self) -> PResult<Option<RepetitionKind>> {
1,256✔
413
        if self.consume(Token::OpenBrace) {
1,256✔
414
            let num_start = self.span();
34✔
415

416
            // Both numbers and the comma are parsed optionally, then we check that one
417
            // of the allowed syntaxes is used: There must be at least one number, and if
418
            // there are two numbers, the comma is required. It also checks that the
419
            // numbers are in increasing order.
420
            let lower = self.consume_number(65_535)?;
34✔
421
            let comma = self.consume(Token::Comma);
34✔
422
            let upper = self.consume_number(65_535)?;
34✔
423

424
            let num_end = self.last_span();
33✔
425
            let num_span = num_start.join(num_end);
33✔
426

427
            let kind = match (lower, comma, upper) {
33✔
428
                (lower, true, upper) => (lower.unwrap_or(0), upper)
21✔
429
                    .try_into()
21✔
430
                    .map_err(|e| PEK::Repetition(e).at(num_span))?,
21✔
431

432
                (Some(_), false, Some(_)) => return Err(PEK::Expected("`}` or `,`").at(num_end)),
×
433
                (Some(rep), false, None) | (None, false, Some(rep)) => RepetitionKind::fixed(rep),
11✔
434
                (None, false, None) => return Err(PEK::Expected("number").at(self.span())),
1✔
435
            };
436

437
            self.expect(Token::CloseBrace)?;
31✔
438

439
            Ok(Some(kind))
29✔
440
        } else {
441
            Ok(None)
1,222✔
442
        }
443
    }
1,256✔
444

445
    fn parse_atom(&mut self) -> PResult<Option<Rule>> {
1,946✔
446
        Ok(self
1,946✔
447
            .parse_group()?
1,946✔
448
            .try_or_else(|| self.parse_string())?
1,682✔
449
            .try_or_else(|| self.parse_char_set())?
1,681✔
450
            .or_else(|| self.parse_boundary())
1,663✔
451
            .try_or_else(|| self.parse_reference())?
1,663✔
452
            .try_or_else(|| self.parse_code_point_rule())?
1,663✔
453
            .try_or_else(|| self.parse_range())?
1,660✔
454
            .try_or_else(|| self.parse_regex())?
1,655✔
455
            .try_or_else(|| self.parse_variable())?
1,655✔
456
            .or_else(|| self.parse_dot())
1,654✔
457
            .or_else(|| self.parse_recursion()))
1,654✔
458
    }
1,946✔
459

460
    /// Parses a (possibly capturing) group, e.g. `(E E | E)` or `:name(E)`.
461
    fn parse_group(&mut self) -> PResult<Option<Rule>> {
1,946✔
462
        let (kind, start_span) = self.parse_group_kind()?;
1,946✔
463
        if !kind.is_normal() {
1,943✔
464
            self.expect(Token::OpenParen)?;
88✔
465
        } else if !self.consume(Token::OpenParen) {
1,855✔
466
            return Ok(None);
1,512✔
467
        }
343✔
468

469
        self.recursion_start()?;
430✔
470
        let rule = self.parse_modified()?;
430✔
471
        self.recursion_end();
172✔
472

172✔
473
        self.expect(Token::CloseParen)
172✔
474
            .map_err(|p| PEK::Expected("`)` or an expression").at(p.span))?;
172✔
475
        // start_span may be 0..0, so we need to use join_unchecked
476
        let span = start_span.join_unchecked(self.last_span());
170✔
477

170✔
478
        let rule = Rule::Group(Group::new(vec![rule], kind, span));
170✔
479
        Ok(Some(rule))
170✔
480
    }
1,946✔
481

482
    /// Parses `:name` or just `:`. Returns the span of the colon with the name.
483
    fn parse_group_kind(&mut self) -> PResult<(GroupKind, Span)> {
1,946✔
484
        if self.consume_reserved("atomic") {
1,946✔
485
            let span = self.last_span();
3✔
486
            Ok((GroupKind::Atomic, span))
3✔
487
        } else if self.consume(Token::Colon) {
1,943✔
488
            let span = self.last_span();
88✔
489

490
            if let Some(keyword) = self.consume_as(Token::ReservedName) {
88✔
491
                return Err(PEK::KeywordAfterColon(keyword.into()).at(self.last_span()));
1✔
492
            }
87✔
493

87✔
494
            let name = self.consume_as(Token::Identifier);
87✔
495
            if let Some(name) = name {
87✔
496
                if let Some(invalid_index) = name.find(|c: char| !c.is_ascii_alphanumeric()) {
156✔
497
                    let c = name[invalid_index..].chars().next().unwrap();
1✔
498
                    let start = self.last_span().range_unchecked().start + invalid_index;
1✔
499
                    let len = c.len_utf8();
1✔
500
                    return Err(PEK::NonAsciiIdentAfterColon(c).at(Span::new(start, start + len)));
1✔
501
                }
52✔
502

52✔
503
                if name.len() > 32 {
52✔
504
                    return Err(PEK::GroupNameTooLong(name.len()).at(self.last_span()));
1✔
505
                }
51✔
506
            }
34✔
507
            Ok((GroupKind::Capturing(Capture::new(name)), span))
85✔
508
        } else {
509
            Ok((GroupKind::Normal, self.span().start()))
1,855✔
510
        }
511
    }
1,946✔
512

513
    /// Parses a string literal.
514
    fn parse_string(&mut self) -> PResult<Option<Rule>> {
1,512✔
515
        Ok(self.parse_literal()?.map(Rule::Literal))
1,512✔
516
    }
1,512✔
517

518
    fn parse_literal(&mut self) -> PResult<Option<Literal>> {
1,567✔
519
        if let Some(s) = self.consume_as(Token::String) {
1,567✔
520
            let span = self.last_span();
300✔
521
            let content = helper::parse_quoted_text(s).map_err(|k| k.at(span))?;
300✔
522
            Ok(Some(Literal::new(content.to_string(), span)))
299✔
523
        } else {
524
            Ok(None)
1,267✔
525
        }
526
    }
1,567✔
527

528
    /// Parses a char set, surrounded by `[` `]`. This was previously called a
529
    /// "char class", but that name is ambiguous and is being phased out.
530
    ///
531
    /// This function does _not_ parse exclamation marks in front of a char
532
    /// class, because negation is handled separately.
533
    fn parse_char_set(&mut self) -> PResult<Option<Rule>> {
1,267✔
534
        if self.consume(Token::OpenBracket) {
1,267✔
535
            let start_span = self.last_span();
249✔
536

249✔
537
            if self.consume(Token::Caret) {
249✔
538
                return Err(PEK::CharClass(CharClassError::CaretInGroup).at(self.last_span()));
2✔
539
            }
247✔
540

541
            let inner = self.parse_char_set_inner()?;
247✔
542

543
            self.expect(Token::CloseBracket).map_err(|p| {
233✔
544
                PEK::Expected("character class, string, code point, Unicode property or `]`")
1✔
545
                    .at(p.span)
1✔
546
            })?;
233✔
547
            let span = start_span.join(self.last_span());
232✔
548

232✔
549
            if inner.is_empty() {
232✔
550
                return Err(PEK::CharClass(CharClassError::Empty).at(span));
1✔
551
            }
231✔
552

231✔
553
            Ok(Some(Rule::CharClass(CharClass::new(inner, span, self.is_unicode_aware))))
231✔
554
        } else {
555
            Ok(None)
1,018✔
556
        }
557
    }
1,267✔
558

559
    /// Parses a char group, i.e. the contents of a char set. This is a sequence
560
    /// of characters, character classes, character ranges or Unicode
561
    /// properties. Some of them can be negated.
562
    fn parse_char_set_inner(&mut self) -> PResult<Vec<GroupItem>> {
247✔
563
        let mut items = Vec::new();
247✔
564
        loop {
565
            let mut nots_span = self.span();
542✔
566
            let mut nots = 0usize;
542✔
567
            while self.consume(Token::Not) {
574✔
568
                nots += 1;
32✔
569
                nots_span = nots_span.join(self.last_span());
32✔
570
            }
32✔
571

572
            let group = if let Some(group) = self.parse_char_group_chars_or_range()? {
542✔
573
                if nots > 0 {
111✔
574
                    return Err(PEK::UnallowedNot.at(nots_span));
1✔
575
                }
110✔
576
                group
110✔
577
            } else if let Some(group) = self.parse_char_group_ident(nots % 2 != 0)? {
424✔
578
                if nots > 1 {
186✔
579
                    return Err(PEK::UnallowedMultiNot(nots).at(nots_span));
1✔
580
                }
185✔
581
                group
185✔
582
            } else if nots > 0 {
233✔
583
                return Err(PEK::ExpectedToken(Token::Identifier).at(self.span()));
×
584
            } else {
585
                break;
233✔
586
            };
587
            items.extend(group);
295✔
588
        }
589

590
        Ok(items)
233✔
591
    }
247✔
592

593
    /// Parses an identifier or dot in a char set
594
    fn parse_char_group_ident(&mut self, negative: bool) -> PResult<Option<Vec<GroupItem>>> {
424✔
595
        if self.consume(Token::Identifier) {
424✔
596
            let span = self.last_span();
189✔
597

189✔
598
            let before_colon = self.source_at(span);
189✔
599
            let after_colon = if self.consume(Token::Colon) {
189✔
600
                Some(self.expect_as(Token::Identifier)?)
14✔
601
            } else {
602
                None
175✔
603
            };
604
            let (kind, name, span) = match after_colon {
189✔
605
                Some(name) => (Some(before_colon), name, span.join(self.last_span())),
14✔
606
                None => (None, before_colon, span),
175✔
607
            };
608

609
            let item = CharGroup::try_from_group_name(kind, name, negative, span)
189✔
610
                .map_err(|e| e.at(span))?;
189✔
611

612
            Ok(Some(item))
186✔
613
        } else if let Some(name) = self.consume_as(Token::ReservedName) {
235✔
614
            Err(PEK::UnexpectedKeyword(name.to_owned()).at(self.last_span()))
2✔
615
        } else {
616
            Ok(None)
233✔
617
        }
618
    }
424✔
619

620
    /// Parses a string literal or a character range in a char set, e.g. `"axd"`
621
    /// or `'0'-'7'`.
622
    fn parse_char_group_chars_or_range(&mut self) -> PResult<Option<Vec<GroupItem>>> {
542✔
623
        let span1 = self.span();
542✔
624
        let Some(first) = self.parse_string_or_char()? else {
542✔
625
            return Ok(None);
424✔
626
        };
627

628
        if self.consume(Token::Dash) {
118✔
629
            let span2 = self.span();
17✔
630
            let Some(last) = self.parse_string_or_char()? else {
17✔
631
                return Err(PEK::Expected("code point or character").at(self.span()));
1✔
632
            };
633

634
            if let StringOrChar::Char { is_shorthand: true, c } = first {
16✔
635
                self.add_warning(
1✔
636
                    ParseWarningKind::Deprecation(DeprecationWarning::ShorthandInRange(c))
1✔
637
                        .at(span1),
1✔
638
                );
1✔
639
            }
15✔
640
            if let StringOrChar::Char { is_shorthand: true, c } = last {
16✔
641
                self.add_warning(
1✔
642
                    ParseWarningKind::Deprecation(DeprecationWarning::ShorthandInRange(c))
1✔
643
                        .at(span2),
1✔
644
                );
1✔
645
            }
15✔
646

647
            let first = first.to_char().map_err(|e| e.at(span1))?;
16✔
648
            let last = last.to_char().map_err(|e| e.at(span2))?;
12✔
649

650
            let group = CharGroup::try_from_range(first, last).ok_or_else(|| {
11✔
651
                PEK::CharClass(CharClassError::NonAscendingRange(first, last)).at(span1.join(span2))
1✔
652
            })?;
11✔
653
            Ok(Some(group))
10✔
654
        } else {
655
            let group = match first {
101✔
656
                StringOrChar::String(s) => {
50✔
657
                    let chars = helper::parse_quoted_text(s).map_err(|k| k.at(span1))?;
50✔
658
                    chars.chars().map(GroupItem::Char).collect()
50✔
659
                }
660
                StringOrChar::Char { c, .. } => vec![GroupItem::Char(c)],
51✔
661
            };
662
            Ok(Some(group))
101✔
663
        }
664
    }
542✔
665

666
    fn parse_string_or_char(&mut self) -> PResult<Option<StringOrChar<'i>>> {
559✔
667
        let res = if let Some(s) = self.consume_as(Token::String) {
559✔
668
            StringOrChar::String(s)
75✔
669
        } else if let Some((c, _)) = self.parse_code_point()? {
484✔
670
            StringOrChar::Char { c, is_shorthand: false }
12✔
671
        } else if let Some(c) = self.parse_special_char() {
472✔
672
            StringOrChar::Char { c, is_shorthand: true }
47✔
673
        } else {
674
            return Ok(None);
425✔
675
        };
676
        Ok(Some(res))
134✔
677
    }
559✔
678

679
    fn parse_code_point(&mut self) -> PResult<Option<(char, Span)>> {
1,383✔
680
        if let Some(cp) = self.consume_as(Token::CodePoint) {
1,383✔
681
            let span = self.last_span();
53✔
682
            let trimmed_u = cp[1..].trim_start();
53✔
683
            if !trimmed_u.starts_with('+') {
53✔
684
                let warning = DeprecationWarning::Unicode(cp.into());
×
685
                self.add_warning(ParseWarningKind::Deprecation(warning).at(span))
×
686
            }
53✔
687

688
            let hex = trimmed_u.trim_start_matches(|c: char| c == '+' || c.is_whitespace());
106✔
689

53✔
690
            u32::from_str_radix(hex, 16)
53✔
691
                .ok()
53✔
692
                .and_then(|n| char::try_from(n).ok())
53✔
693
                .map(|c| Some((c, span)))
53✔
694
                .ok_or_else(|| PEK::InvalidCodePoint.at(span))
53✔
695
        } else {
696
            Ok(None)
1,330✔
697
        }
698
    }
1,383✔
699

700
    fn parse_code_point_rule(&mut self) -> PResult<Option<Rule>> {
899✔
701
        if let Some((c, span)) = self.parse_code_point()? {
899✔
702
            Ok(Some(Rule::CharClass(CharClass::new(
38✔
703
                vec![GroupItem::Char(c)],
38✔
704
                span,
38✔
705
                self.is_unicode_aware,
38✔
706
            ))))
38✔
707
        } else {
708
            Ok(None)
858✔
709
        }
710
    }
899✔
711

712
    fn parse_special_char(&mut self) -> Option<char> {
472✔
713
        if let Some((Token::Identifier, string)) = self.peek() {
472✔
714
            let c = match string {
236✔
715
                "n" => '\n',
236✔
716
                "r" => '\r',
223✔
717
                "t" => '\t',
211✔
718
                "a" => '\u{07}',
208✔
719
                "e" => '\u{1B}',
204✔
720
                "f" => '\u{0C}',
201✔
721
                _ => return None,
189✔
722
            };
723
            self.advance();
47✔
724
            Some(c)
47✔
725
        } else {
726
            None
236✔
727
        }
728
    }
472✔
729

730
    /// Parses a boundary. For start and end, there are two syntaxes: `^` and `$`.
731
    /// Word boundaries are `%`.
732
    ///
733
    /// The deprecated syntax issues a warning.
734
    ///
735
    /// This function does _not_ parse negated negated word boundaries (`!%`),
736
    /// since negation is handled elsewhere. It also does _not_ parse the
737
    /// `Start` and `End` global variables.
738
    fn parse_boundary(&mut self) -> Option<Rule> {
1,018✔
739
        let span = self.span();
1,018✔
740
        let kind = if self.consume(Token::Caret) {
1,018✔
741
            BoundaryKind::Start
12✔
742
        } else if self.consume(Token::Dollar) {
1,006✔
743
            BoundaryKind::End
9✔
744
        } else if self.consume(Token::BWord) {
997✔
745
            BoundaryKind::Word
27✔
746
        } else if self.consume(Token::AngleLeft) {
970✔
747
            BoundaryKind::WordStart
6✔
748
        } else if self.consume(Token::AngleRight) {
964✔
749
            BoundaryKind::WordEnd
4✔
750
        } else {
751
            return None;
960✔
752
        };
753
        Some(Rule::Boundary(Boundary::new(kind, self.is_unicode_aware, span)))
58✔
754
    }
1,018✔
755

756
    /// Parses a reference. Supported syntaxes are `::name`, `::3`, `::+3` and
757
    /// `::-3`.
758
    fn parse_reference(&mut self) -> PResult<Option<Rule>> {
960✔
759
        if self.consume(Token::DoubleColon) {
960✔
760
            let start_span = self.last_span();
61✔
761

762
            let target = if self.consume(Token::Plus) {
61✔
763
                let num = self.expect_number::<i32>()?;
1✔
764
                ReferenceTarget::Relative(num)
1✔
765
            } else if self.consume(Token::Dash) {
60✔
766
                let num = self.expect_number::<i32>()?;
2✔
767
                // negating from positive to negative can't overflow, luckily
768
                ReferenceTarget::Relative(-num)
2✔
769
            } else if let Some(num) = self.consume_number(MAX_REPETITION)? {
58✔
770
                ReferenceTarget::Number(num)
36✔
771
            } else {
772
                // TODO: Better diagnostic for `::let`
773
                let name = self
22✔
774
                    .expect_as(Token::Identifier)
22✔
775
                    .map_err(|p| PEK::Expected("number or group name").at(p.span))?;
22✔
776
                ReferenceTarget::Named(name.to_string())
22✔
777
            };
778

779
            let span = start_span.join(self.last_span());
61✔
780
            Ok(Some(Rule::Reference(Reference::new(target, span))))
61✔
781
        } else {
782
            Ok(None)
899✔
783
        }
784
    }
960✔
785

786
    fn parse_range(&mut self) -> PResult<Option<Rule>> {
858✔
787
        if self.consume_reserved("range") {
858✔
788
            let span_start = self.last_span();
28✔
789

790
            let first = self.expect_as(Token::String)?;
28✔
791
            let span_1 = self.last_span();
27✔
792
            self.expect(Token::Dash)?;
27✔
793
            let second = self.expect_as(Token::String)?;
27✔
794
            let span_2 = self.last_span();
27✔
795

796
            let radix = if self.consume_reserved("base") {
27✔
797
                let n = self.expect_number()?;
3✔
798
                let span = self.last_span();
2✔
799
                if n > 36 {
2✔
800
                    return Err(PEK::Number(NumberError::TooLarge).at(span));
×
801
                } else if n < 2 {
2✔
802
                    return Err(PEK::Number(NumberError::TooSmall).at(span));
×
803
                }
2✔
804
                n
2✔
805
            } else {
806
                10u8
24✔
807
            };
808

809
            let span = span_start.join(self.last_span());
26✔
810

811
            let start = helper::parse_number(helper::strip_first_last(first), radix)
26✔
812
                .map_err(|k| PEK::from(k).at(span_1))?;
26✔
813
            let end = helper::parse_number(helper::strip_first_last(second), radix)
26✔
814
                .map_err(|k| PEK::from(k).at(span_2))?;
26✔
815

816
            if start.is_empty() || end.is_empty() {
26✔
817
                let span = if start.is_empty() { span_1 } else { span_2 };
1✔
818
                return Err(PEK::Number(NumberError::Empty).at(span));
1✔
819
            }
25✔
820

25✔
821
            if start.len() > end.len() || (start.len() == end.len() && start > end) {
25✔
822
                return Err(PEK::RangeIsNotIncreasing.at(span_1.join(span_2)));
1✔
823
            }
24✔
824

24✔
825
            if start.len() != end.len()
24✔
826
                && (helper::has_leading_zero(&start) || helper::has_leading_zero(&end))
17✔
827
            {
828
                return Err(PEK::RangeLeadingZeroesVariableLength.at(span_1.join(span_2)));
1✔
829
            }
23✔
830

23✔
831
            Ok(Some(Rule::Range(Range::new(
23✔
832
                start.into_boxed_slice(),
23✔
833
                end.into_boxed_slice(),
23✔
834
                radix,
23✔
835
                span,
23✔
836
            ))))
23✔
837
        } else {
838
            Ok(None)
830✔
839
        }
840
    }
858✔
841

842
    /// Parses an unescaped regex expression (`regex "[test]"`)
843
    fn parse_regex(&mut self) -> PResult<Option<Rule>> {
830✔
844
        if self.consume_reserved("regex") {
830✔
845
            let span_start = self.last_span();
10✔
846
            let lit = self.expect_as(Token::String)?;
10✔
847
            let span_end = self.last_span();
10✔
848

849
            let content = helper::parse_quoted_text(lit).map_err(|k| k.at(span_end))?;
10✔
850

851
            let span = span_start.join(span_end);
10✔
852
            Ok(Some(Rule::Regex(Regex::new(content.to_string(), span))))
10✔
853
        } else {
854
            Ok(None)
820✔
855
        }
856
    }
830✔
857

858
    /// Parses a variable (usage site).
859
    fn parse_variable(&mut self) -> PResult<Option<Rule>> {
820✔
860
        if let Some(ident) = self.consume_as(Token::Identifier) {
820✔
861
            let span1 = self.last_span();
212✔
862
            let rule = Rule::Variable(Variable::new(ident, span1));
212✔
863
            if let Some((Token::Equals, span2)) = self.peek_pair() {
212✔
864
                return Err(PEK::MissingLetKeyword.at(span1.join(span2)));
1✔
865
            }
211✔
866
            Ok(Some(rule))
211✔
867
        } else {
868
            Ok(None)
608✔
869
        }
870
    }
820✔
871

872
    /// Parses the dot
873
    fn parse_dot(&mut self) -> Option<Rule> {
608✔
874
        if self.consume(Token::Dot) {
608✔
875
            Some(Rule::Dot)
26✔
876
        } else {
877
            None
582✔
878
        }
879
    }
608✔
880

881
    /// Parses the `recursion` keyword
882
    fn parse_recursion(&mut self) -> Option<Rule> {
582✔
883
        if self.consume_reserved("recursion") {
582✔
884
            Some(Rule::Recursion(Recursion { span: self.last_span() }))
3✔
885
        } else {
886
            None
579✔
887
        }
888
    }
582✔
889
}
890

891
#[derive(Clone, Copy)]
892
enum StringOrChar<'i> {
893
    String(&'i str),
894
    Char { c: char, is_shorthand: bool },
895
}
896

897
impl StringOrChar<'_> {
898
    fn to_char(self) -> Result<char, PEK> {
28✔
899
        Err(PEK::CharString(match self {
28✔
900
            StringOrChar::Char { c, .. } => return Ok(c),
8✔
901
            StringOrChar::String(s) => {
20✔
902
                let s = helper::parse_quoted_text(s)?;
20✔
903
                let mut iter = s.chars();
20✔
904
                match iter.next() {
20✔
905
                    Some(c) if iter.next().is_none() => return Ok(c),
19✔
906
                    Some(_) => CharStringError::TooManyCodePoints,
4✔
907
                    _ => CharStringError::Empty,
1✔
908
                }
909
            }
910
        }))
911
    }
28✔
912
}
913

914
trait TryOptionExt<T> {
915
    fn try_or_else<E>(self, f: impl FnMut() -> Result<Option<T>, E>) -> Result<Option<T>, E>;
916
}
917

918
impl<T> TryOptionExt<T> for Option<T> {
919
    #[inline(always)]
920
    fn try_or_else<E>(self, mut f: impl FnMut() -> Result<Option<T>, E>) -> Result<Option<T>, E> {
15,401✔
921
        match self {
15,401✔
922
            Some(val) => Ok(Some(val)),
4,638✔
923
            None => f(),
10,763✔
924
        }
925
    }
15,401✔
926
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc