• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pomsky-lang / pomsky / 12076846628

29 Nov 2024 12:11AM UTC coverage: 81.241% (-1.6%) from 82.811%
12076846628

push

github

Aloso
chore: try linking PCRE2 statically

4296 of 5288 relevant lines covered (81.24%)

406362.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.07
/pomsky-syntax/src/parse/parser_impl.rs
1
use std::collections::HashSet;
2

3
use crate::{
4
    diagnose::{
5
        CharClassError, CharStringError, DeprecationWarning, NumberError, ParseWarningKind,
6
        RepetitionError,
7
    },
8
    error::{ParseError, ParseErrorKind as PEK},
9
    exprs::{negation::Negation, test::*, *},
10
    lexer::Token,
11
    Span,
12
};
13

14
use super::{helper, Parser};
15

16
type PResult<T> = Result<T, ParseError>;
17

18
const MAX_REPETITION: u32 = 65_535;
19

20
impl<'i> Parser<'i> {
21
    pub(super) fn parse_modified(&mut self) -> PResult<Rule> {
794✔
22
        let mut stmts = Vec::new();
794✔
23

794✔
24
        let was_lazy = self.is_lazy;
794✔
25
        let was_unicode_aware = self.is_unicode_aware;
794✔
26

27
        loop {
28
            let Some(stmt) = self
874✔
29
                .parse_mode_modifier()?
874✔
30
                .try_or_else(|| self.parse_let())?
874✔
31
                .try_or_else(|| self.parse_test())?
868✔
32
            else {
33
                break;
784✔
34
            };
35

36
            match &stmt.0 {
7✔
37
                Stmt::Enable(BooleanSetting::Lazy, _) => self.is_lazy = true,
5✔
38
                Stmt::Enable(BooleanSetting::Unicode, _) => self.is_unicode_aware = true,
2✔
39
                Stmt::Disable(BooleanSetting::Lazy, _) => self.is_lazy = false,
1✔
40
                Stmt::Disable(BooleanSetting::Unicode, _) => self.is_unicode_aware = false,
16✔
41
                _ => {}
56✔
42
            }
43

44
            stmts.push(stmt);
80✔
45
        }
46

47
        self.recursion_start()?;
784✔
48
        let mut rule = self.parse_or()?;
782✔
49
        self.recursion_end();
474✔
50

474✔
51
        self.is_lazy = was_lazy;
474✔
52
        self.is_unicode_aware = was_unicode_aware;
474✔
53

474✔
54
        // TODO: This should not be part of the parser
474✔
55
        if stmts.len() > 1 {
474✔
56
            let mut set = HashSet::new();
10✔
57
            for (stmt, _) in &stmts {
32✔
58
                if let Stmt::Let(l) = stmt {
23✔
59
                    if set.contains(l.name()) {
19✔
60
                        return Err(PEK::LetBindingExists.at(l.name_span));
1✔
61
                    }
18✔
62
                    set.insert(l.name());
18✔
63
                }
4✔
64
            }
65
        }
464✔
66

67
        let span_end = rule.span();
473✔
68
        for (stmt, span) in stmts.into_iter().rev() {
473✔
69
            rule = Rule::StmtExpr(Box::new(StmtExpr::new(stmt, rule, span.join(span_end))));
77✔
70
        }
77✔
71

72
        Ok(rule)
473✔
73
    }
794✔
74

75
    fn parse_mode_modifier(&mut self) -> PResult<Option<(Stmt, Span)>> {
874✔
76
        let mode = if self.consume_reserved("enable") {
874✔
77
            true
7✔
78
        } else if self.consume_reserved("disable") {
867✔
79
            false
17✔
80
        } else {
81
            return Ok(None);
850✔
82
        };
83

84
        let span_start = self.last_span();
24✔
85
        let setting = if self.consume_reserved("lazy") {
24✔
86
            BooleanSetting::Lazy
6✔
87
        } else if let Some((Token::Identifier, "unicode")) = self.peek() {
18✔
88
            self.advance();
18✔
89
            BooleanSetting::Unicode
18✔
90
        } else {
91
            return Err(PEK::Expected("`lazy` or `unicode`").at(self.span()));
×
92
        };
93
        self.expect(Token::Semicolon)?;
24✔
94
        let span_end = self.last_span();
24✔
95
        let span = span_start.join(span_end);
24✔
96

97
        let stmt = if mode { Stmt::Enable(setting, span) } else { Stmt::Disable(setting, span) };
24✔
98

99
        Ok(Some((stmt, span)))
24✔
100
    }
874✔
101

102
    fn parse_let(&mut self) -> PResult<Option<(Stmt, Span)>> {
850✔
103
        if self.consume_reserved("let") {
850✔
104
            let span_start = self.last_span();
48✔
105
            let name_span = self.span();
48✔
106
            let name = self.expect_as(Token::Identifier).map_err(|e| {
48✔
107
                if self.is(Token::ReservedName) {
2✔
108
                    PEK::KeywordAfterLet(self.source_at(self.span()).to_owned()).at(e.span)
1✔
109
                } else {
110
                    e
1✔
111
                }
112
            })?;
48✔
113

114
            self.expect(Token::Equals)?;
46✔
115

116
            self.recursion_start()?;
45✔
117
            let rule = self.parse_or()?;
45✔
118
            self.recursion_end();
43✔
119

43✔
120
            self.expect(Token::Semicolon)
43✔
121
                .map_err(|p| PEK::Expected("expression or `;`").at(p.span))?;
43✔
122
            let span_end = self.last_span();
42✔
123

42✔
124
            Ok(Some((Stmt::Let(Let::new(name, rule, name_span)), span_start.join(span_end))))
42✔
125
        } else {
126
            Ok(None)
802✔
127
        }
128
    }
850✔
129

130
    fn parse_test(&mut self) -> PResult<Option<(Stmt, Span)>> {
802✔
131
        if self.consume_reserved("test") {
802✔
132
            let span_start = self.last_span();
18✔
133
            self.expect(Token::OpenBrace)?;
18✔
134

135
            let mut cases = Vec::new();
18✔
136
            while let Some(case) = self.parse_test_cases()? {
48✔
137
                cases.push(case);
30✔
138
            }
30✔
139

140
            self.expect(Token::CloseBrace)?;
14✔
141
            let span_end = self.last_span();
14✔
142
            let span = span_start.join(span_end);
14✔
143

14✔
144
            Ok(Some((Stmt::Test(Test { cases, span }), span)))
14✔
145
        } else {
146
            Ok(None)
784✔
147
        }
148
    }
802✔
149

150
    fn parse_test_cases(&mut self) -> PResult<Option<TestCase>> {
48✔
151
        if self.consume_contextual_keyword("match") {
48✔
152
            let mut matches = Vec::new();
22✔
153
            let mut literal = None;
22✔
154

155
            if let Some((Token::Identifier, "in")) = self.peek() {
22✔
156
            } else {
2✔
157
                matches.push(self.parse_test_match()?);
20✔
158
                while self.consume(Token::Comma) {
23✔
159
                    matches.push(self.parse_test_match()?);
4✔
160
                }
161
            }
162

163
            if self.consume_contextual_keyword("in") {
21✔
164
                literal = self.parse_literal()?;
9✔
165
                if literal.is_none() {
9✔
166
                    return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
167
                };
9✔
168
            }
12✔
169
            self.expect(Token::Semicolon)?;
21✔
170

171
            if let Some(literal) = literal {
20✔
172
                Ok(Some(TestCase::MatchAll(TestCaseMatchAll { literal, matches })))
9✔
173
            } else if matches.len() > 1 {
11✔
174
                let span = matches[0].span.join(matches.last().unwrap().span);
1✔
175
                Err(PEK::MultipleStringsInTestCase.at(span))
1✔
176
            } else {
177
                let match_ = matches.pop().unwrap();
10✔
178
                Ok(Some(TestCase::Match(match_)))
10✔
179
            }
180
        } else if self.consume_contextual_keyword("reject") {
26✔
181
            let as_substring = self.consume_contextual_keyword("in");
12✔
182

183
            let Some(literal) = self.parse_literal()? else {
12✔
184
                return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
185
            };
186

187
            self.expect(Token::Semicolon)?;
12✔
188

189
            Ok(Some(TestCase::Reject(TestCaseReject { literal, as_substring })))
11✔
190
        } else {
191
            Ok(None)
14✔
192
        }
193
    }
48✔
194

195
    fn parse_test_match(&mut self) -> PResult<TestCaseMatch> {
24✔
196
        let Some(literal) = self.parse_literal()? else {
24✔
197
            return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
198
        };
199
        let span_start = self.last_span();
24✔
200

24✔
201
        let mut captures = Vec::new();
24✔
202

24✔
203
        if self.consume_contextual_keyword("as") {
24✔
204
            self.expect(Token::OpenBrace)?;
10✔
205

206
            let mut is_first = true;
10✔
207
            loop {
208
                if !is_first && !self.consume(Token::Comma) {
20✔
209
                    break;
7✔
210
                }
13✔
211
                let Some(capture) = self.parse_test_capture()? else {
13✔
212
                    break;
3✔
213
                };
214
                captures.push(capture);
10✔
215
                is_first = false;
10✔
216
            }
217

218
            self.expect(Token::CloseBrace)?;
10✔
219
        }
14✔
220

221
        let span_end = self.last_span();
23✔
222
        Ok(TestCaseMatch { literal, captures, span: span_start.join(span_end) })
23✔
223
    }
24✔
224

225
    fn parse_test_capture(&mut self) -> PResult<Option<TestCapture>> {
13✔
226
        let ident = if let Some(n) = self.consume_number(u16::MAX)? {
13✔
227
            CaptureIdent::Index(n)
6✔
228
        } else if let Some(name) = self.consume_as(Token::Identifier) {
7✔
229
            CaptureIdent::Name(name.to_string())
4✔
230
        } else {
231
            return Ok(None);
3✔
232
        };
233
        let ident_span = self.last_span();
10✔
234

10✔
235
        self.expect(Token::Colon)?;
10✔
236
        let Some(literal) = self.parse_literal()? else {
10✔
237
            return Err(PEK::ExpectedToken(Token::String).at(self.span()));
×
238
        };
239
        Ok(Some(TestCapture { ident, ident_span, literal }))
10✔
240
    }
13✔
241

242
    fn parse_or(&mut self) -> PResult<Rule> {
827✔
243
        let mut span = self.span();
827✔
244
        let leading_pipe = self.consume(Token::Pipe);
827✔
245

827✔
246
        let mut alts = Vec::new();
827✔
247
        if let Some(first_alt) = self.parse_sequence()? {
827✔
248
            alts.push(first_alt);
486✔
249

250
            while self.consume(Token::Pipe) {
524✔
251
                if let Some(next_alt) = self.parse_sequence()? {
38✔
252
                    span = span.join(next_alt.span());
38✔
253
                    alts.push(next_alt);
38✔
254
                } else {
38✔
255
                    return Err(PEK::LonePipe.at(self.last_span()));
×
256
                }
257
            }
258

259
            if alts.len() == 1 {
486✔
260
                Ok(alts.pop().unwrap())
460✔
261
            } else {
262
                Ok(Alternation::new_expr(alts))
26✔
263
            }
264
        } else if leading_pipe {
37✔
265
            Err(PEK::LonePipe.at(span))
6✔
266
        } else {
267
            Ok(Alternation::new_expr(alts))
31✔
268
        }
269
    }
827✔
270

271
    fn parse_sequence(&mut self) -> PResult<Option<Rule>> {
865✔
272
        let mut fixes = Vec::new();
865✔
273
        while let Some(fix) = self.parse_fixes()? {
1,946✔
274
            fixes.push(fix);
1,081✔
275
        }
1,081✔
276

277
        Ok(if fixes.is_empty() {
561✔
278
            None
37✔
279
        } else if fixes.len() == 1 {
524✔
280
            Some(fixes.pop().unwrap())
388✔
281
        } else {
282
            let start = fixes.first().map(Rule::span).unwrap_or_default();
136✔
283
            let end = fixes.last().map(Rule::span).unwrap_or_default();
136✔
284
            let span = start.join(end);
136✔
285

136✔
286
            Some(Rule::Group(Group::new(fixes, GroupKind::Implicit, span)))
136✔
287
        })
288
    }
865✔
289

290
    fn parse_fixes(&mut self) -> PResult<Option<Rule>> {
1,946✔
291
        let mut nots_span = self.span();
1,946✔
292
        let mut nots = 0usize;
1,946✔
293
        while self.consume(Token::Not) {
2,031✔
294
            nots += 1;
85✔
295
            nots_span = nots_span.join(self.last_span());
85✔
296
        }
85✔
297

298
        let Some(mut rule) = self.parse_lookaround()?.try_or_else(|| self.parse_atom())? else {
1,946✔
299
            if nots == 0 {
562✔
300
                return Ok(None);
561✔
301
            } else {
302
                return Err(PEK::Expected("expression").at(self.span()));
1✔
303
            }
304
        };
305

306
        for _ in 0..nots {
1,092✔
307
            rule = Rule::Negation(Box::new(Negation { rule, not_span: nots_span }));
83✔
308
        }
83✔
309

310
        while let Some((kind, quantifier, span)) = self.parse_repetition()? {
1,216✔
311
            let span = rule.span().join(span);
124✔
312
            rule = Rule::Repetition(Box::new(Repetition::new(rule, kind, quantifier, span)));
124✔
313
        }
124✔
314

315
        Ok(Some(rule))
1,081✔
316
    }
1,946✔
317

318
    fn parse_lookaround(&mut self) -> PResult<Option<Rule>> {
1,946✔
319
        let kind = if self.consume(Token::LookAhead) {
1,946✔
320
            LookaroundKind::Ahead
19✔
321
        } else if self.consume(Token::LookBehind) {
1,927✔
322
            LookaroundKind::Behind
15✔
323
        } else {
324
            return Ok(None);
1,912✔
325
        };
326
        let start_span = self.last_span();
34✔
327

34✔
328
        self.recursion_start()?;
34✔
329
        let rule = self.parse_modified()?;
34✔
330
        self.recursion_end();
34✔
331

34✔
332
        let span = rule.span();
34✔
333
        Ok(Some(Rule::Lookaround(Box::new(Lookaround::new(rule, kind, start_span.join(span))))))
34✔
334
    }
1,946✔
335

336
    /// Parse a repetition that can follow an atom: `+`, `?`, `*`, `{x}`,
337
    /// `{x,}`, `{,x}` or `{x,y}` optionally followed by the `greedy` or
338
    /// `lazy` keyword. `x` and `y` are number literals.
339
    fn parse_repetition(&mut self) -> PResult<Option<(RepetitionKind, Quantifier, Span)>> {
1,216✔
340
        let start = self.span();
1,216✔
341

342
        let kind = if self.consume(Token::Plus) {
1,216✔
343
            RepetitionKind::one_inf()
45✔
344
        } else if self.consume(Token::Star) {
1,171✔
345
            RepetitionKind::zero_inf()
23✔
346
        } else if self.consume(Token::QuestionMark) {
1,148✔
347
            RepetitionKind::zero_one()
35✔
348
        } else if let Some(kind) = self.parse_repetition_braces()? {
1,113✔
349
            kind
27✔
350
        } else {
351
            return Ok(None);
1,081✔
352
        };
353

354
        let quantifier = if self.consume_reserved("greedy") {
130✔
355
            Quantifier::Greedy
1✔
356
        } else if self.consume_reserved("lazy") {
129✔
357
            Quantifier::Lazy
8✔
358
        } else if self.is_lazy {
121✔
359
            Quantifier::DefaultLazy
8✔
360
        } else {
361
            Quantifier::DefaultGreedy
113✔
362
        };
363

364
        let multi_span = self.span();
130✔
365
        if self.consume(Token::Plus) || self.consume(Token::Star) {
130✔
366
            return Err(PEK::Repetition(RepetitionError::Multi).at(multi_span));
2✔
367
        } else if self.consume(Token::QuestionMark) {
128✔
368
            return Err(PEK::Repetition(RepetitionError::QmSuffix).at(multi_span));
2✔
369
        } else if self.parse_repetition_braces()?.is_some() {
126✔
370
            return Err(
2✔
371
                PEK::Repetition(RepetitionError::Multi).at(multi_span.join(self.last_span()))
2✔
372
            );
2✔
373
        }
124✔
374

124✔
375
        let end = self.last_span();
124✔
376
        Ok(Some((kind, quantifier, start.join(end))))
124✔
377
    }
1,216✔
378

379
    /// Parse `{2}`, `{2,}`, `{,2}` or `{2,5}`.
380
    fn parse_repetition_braces(&mut self) -> PResult<Option<RepetitionKind>> {
1,239✔
381
        if self.consume(Token::OpenBrace) {
1,239✔
382
            let num_start = self.span();
34✔
383

384
            // Both numbers and the comma are parsed optionally, then we check that one
385
            // of the allowed syntaxes is used: There must be at least one number, and if
386
            // there are two numbers, the comma is required. It also checks that the
387
            // numbers are in increasing order.
388
            let lower = self.consume_number(65_535)?;
34✔
389
            let comma = self.consume(Token::Comma);
34✔
390
            let upper = self.consume_number(65_535)?;
34✔
391

392
            let num_end = self.last_span();
33✔
393
            let num_span = num_start.join(num_end);
33✔
394

395
            let kind = match (lower, comma, upper) {
33✔
396
                (lower, true, upper) => (lower.unwrap_or(0), upper)
21✔
397
                    .try_into()
21✔
398
                    .map_err(|e| PEK::Repetition(e).at(num_span))?,
21✔
399

400
                (Some(_), false, Some(_)) => return Err(PEK::Expected("`}` or `,`").at(num_end)),
×
401
                (Some(rep), false, None) | (None, false, Some(rep)) => RepetitionKind::fixed(rep),
11✔
402
                (None, false, None) => return Err(PEK::Expected("number").at(self.span())),
1✔
403
            };
404

405
            self.expect(Token::CloseBrace)?;
31✔
406

407
            Ok(Some(kind))
29✔
408
        } else {
409
            Ok(None)
1,205✔
410
        }
411
    }
1,239✔
412

413
    fn parse_atom(&mut self) -> PResult<Option<Rule>> {
1,912✔
414
        Ok(self
1,912✔
415
            .parse_group()?
1,912✔
416
            .try_or_else(|| self.parse_string())?
1,648✔
417
            .try_or_else(|| self.parse_char_set())?
1,647✔
418
            .or_else(|| self.parse_boundary())
1,629✔
419
            .try_or_else(|| self.parse_reference())?
1,629✔
420
            .try_or_else(|| self.parse_code_point_rule())?
1,629✔
421
            .try_or_else(|| self.parse_range())?
1,626✔
422
            .try_or_else(|| self.parse_regex())?
1,621✔
423
            .try_or_else(|| self.parse_variable())?
1,621✔
424
            .or_else(|| self.parse_dot())
1,620✔
425
            .or_else(|| self.parse_recursion()))
1,620✔
426
    }
1,912✔
427

428
    /// Parses a (possibly capturing) group, e.g. `(E E | E)` or `:name(E)`.
429
    fn parse_group(&mut self) -> PResult<Option<Rule>> {
1,912✔
430
        let (kind, start_span) = self.parse_group_kind()?;
1,912✔
431
        if !kind.is_normal() {
1,909✔
432
            self.expect(Token::OpenParen)?;
88✔
433
        } else if !self.consume(Token::OpenParen) {
1,821✔
434
            return Ok(None);
1,480✔
435
        }
341✔
436

437
        self.recursion_start()?;
428✔
438
        let rule = self.parse_modified()?;
428✔
439
        self.recursion_end();
170✔
440

170✔
441
        self.expect(Token::CloseParen)
170✔
442
            .map_err(|p| PEK::Expected("`)` or an expression").at(p.span))?;
170✔
443
        // start_span may be 0..0, so we need to use join_unchecked
444
        let span = start_span.join_unchecked(self.last_span());
168✔
445

168✔
446
        let rule = Rule::Group(Group::new(vec![rule], kind, span));
168✔
447
        Ok(Some(rule))
168✔
448
    }
1,912✔
449

450
    /// Parses `:name` or just `:`. Returns the span of the colon with the name.
451
    fn parse_group_kind(&mut self) -> PResult<(GroupKind, Span)> {
1,912✔
452
        if self.consume_reserved("atomic") {
1,912✔
453
            let span = self.last_span();
3✔
454
            Ok((GroupKind::Atomic, span))
3✔
455
        } else if self.consume(Token::Colon) {
1,909✔
456
            let span = self.last_span();
88✔
457

458
            if let Some(keyword) = self.consume_as(Token::ReservedName) {
88✔
459
                return Err(PEK::KeywordAfterColon(keyword.into()).at(self.last_span()));
1✔
460
            }
87✔
461

87✔
462
            let name = self.consume_as(Token::Identifier);
87✔
463
            if let Some(name) = name {
87✔
464
                if let Some(invalid_index) = name.find(|c: char| !c.is_ascii_alphanumeric()) {
156✔
465
                    let c = name[invalid_index..].chars().next().unwrap();
1✔
466
                    let start = self.last_span().range_unchecked().start + invalid_index;
1✔
467
                    let len = c.len_utf8();
1✔
468
                    return Err(PEK::NonAsciiIdentAfterColon(c).at(Span::new(start, start + len)));
1✔
469
                }
52✔
470

52✔
471
                if name.len() > 32 {
52✔
472
                    return Err(PEK::GroupNameTooLong(name.len()).at(self.last_span()));
1✔
473
                }
51✔
474
            }
34✔
475
            Ok((GroupKind::Capturing(Capture::new(name)), span))
85✔
476
        } else {
477
            Ok((GroupKind::Normal, self.span().start()))
1,821✔
478
        }
479
    }
1,912✔
480

481
    /// Parses a string literal.
482
    fn parse_string(&mut self) -> PResult<Option<Rule>> {
1,480✔
483
        Ok(self.parse_literal()?.map(Rule::Literal))
1,480✔
484
    }
1,480✔
485

486
    fn parse_literal(&mut self) -> PResult<Option<Literal>> {
1,535✔
487
        if let Some(s) = self.consume_as(Token::String) {
1,535✔
488
            let span = self.last_span();
300✔
489
            let content = helper::parse_quoted_text(s).map_err(|k| k.at(span))?;
300✔
490
            Ok(Some(Literal::new(content.to_string(), span)))
299✔
491
        } else {
492
            Ok(None)
1,235✔
493
        }
494
    }
1,535✔
495

496
    /// Parses a char set, surrounded by `[` `]`. This was previously called a
497
    /// "char class", but that name is ambiguous and is being phased out.
498
    ///
499
    /// This function does _not_ parse exclamation marks in front of a char
500
    /// class, because negation is handled separately.
501
    fn parse_char_set(&mut self) -> PResult<Option<Rule>> {
1,235✔
502
        if self.consume(Token::OpenBracket) {
1,235✔
503
            let start_span = self.last_span();
235✔
504

235✔
505
            if self.consume(Token::Caret) {
235✔
506
                return Err(PEK::CharClass(CharClassError::CaretInGroup).at(self.last_span()));
2✔
507
            }
233✔
508

509
            let inner = self.parse_char_set_inner()?;
233✔
510

511
            self.expect(Token::CloseBracket).map_err(|p| {
219✔
512
                PEK::Expected("character class, string, code point, Unicode property or `]`")
1✔
513
                    .at(p.span)
1✔
514
            })?;
219✔
515
            let span = start_span.join(self.last_span());
218✔
516

218✔
517
            if inner.is_empty() {
218✔
518
                return Err(PEK::CharClass(CharClassError::Empty).at(span));
1✔
519
            }
217✔
520

217✔
521
            Ok(Some(Rule::CharClass(CharClass::new(inner, span, self.is_unicode_aware))))
217✔
522
        } else {
523
            Ok(None)
1,000✔
524
        }
525
    }
1,235✔
526

527
    /// Parses a char group, i.e. the contents of a char set. This is a sequence
528
    /// of characters, character classes, character ranges or Unicode
529
    /// properties. Some of them can be negated.
530
    fn parse_char_set_inner(&mut self) -> PResult<Vec<GroupItem>> {
233✔
531
        let mut items = Vec::new();
233✔
532
        loop {
533
            let mut nots_span = self.span();
513✔
534
            let mut nots = 0usize;
513✔
535
            while self.consume(Token::Not) {
545✔
536
                nots += 1;
32✔
537
                nots_span = nots_span.join(self.last_span());
32✔
538
            }
32✔
539

540
            let group = if let Some(group) = self.parse_char_group_chars_or_range()? {
513✔
541
                if nots > 0 {
106✔
542
                    return Err(PEK::UnallowedNot.at(nots_span));
1✔
543
                }
105✔
544
                group
105✔
545
            } else if let Some(group) = self.parse_char_group_ident(nots % 2 != 0)? {
400✔
546
                if nots > 1 {
176✔
547
                    return Err(PEK::UnallowedMultiNot(nots).at(nots_span));
1✔
548
                }
175✔
549
                group
175✔
550
            } else if nots > 0 {
219✔
551
                return Err(PEK::ExpectedToken(Token::Identifier).at(self.span()));
×
552
            } else {
553
                break;
219✔
554
            };
555
            items.extend(group);
280✔
556
        }
557

558
        Ok(items)
219✔
559
    }
233✔
560

561
    /// Parses an identifier or dot in a char set
562
    fn parse_char_group_ident(&mut self, negative: bool) -> PResult<Option<Vec<GroupItem>>> {
400✔
563
        if self.consume(Token::Identifier) {
400✔
564
            let span = self.last_span();
179✔
565

179✔
566
            let before_colon = self.source_at(span);
179✔
567
            let after_colon = if self.consume(Token::Colon) {
179✔
568
                Some(self.expect_as(Token::Identifier)?)
14✔
569
            } else {
570
                None
165✔
571
            };
572
            let (kind, name, span) = match after_colon {
179✔
573
                Some(name) => (Some(before_colon), name, span.join(self.last_span())),
14✔
574
                None => (None, before_colon, span),
165✔
575
            };
576

577
            let item = CharGroup::try_from_group_name(kind, name, negative, span)
179✔
578
                .map_err(|e| e.at(span))?;
179✔
579

580
            Ok(Some(item))
176✔
581
        } else if let Some(name) = self.consume_as(Token::ReservedName) {
221✔
582
            Err(PEK::UnexpectedKeyword(name.to_owned()).at(self.last_span()))
2✔
583
        } else {
584
            Ok(None)
219✔
585
        }
586
    }
400✔
587

588
    /// Parses a string literal or a character range in a char set, e.g. `"axd"`
589
    /// or `'0'-'7'`.
590
    fn parse_char_group_chars_or_range(&mut self) -> PResult<Option<Vec<GroupItem>>> {
513✔
591
        let span1 = self.span();
513✔
592
        let Some(first) = self.parse_string_or_char()? else {
513✔
593
            return Ok(None);
400✔
594
        };
595

596
        if self.consume(Token::Dash) {
113✔
597
            let span2 = self.span();
13✔
598
            let Some(last) = self.parse_string_or_char()? else {
13✔
599
                return Err(PEK::Expected("code point or character").at(self.span()));
1✔
600
            };
601

602
            if let StringOrChar::Char { is_shorthand: true, c } = first {
12✔
603
                self.add_warning(
1✔
604
                    ParseWarningKind::Deprecation(DeprecationWarning::ShorthandInRange(c))
1✔
605
                        .at(span1),
1✔
606
                );
1✔
607
            }
11✔
608
            if let StringOrChar::Char { is_shorthand: true, c } = last {
12✔
609
                self.add_warning(
1✔
610
                    ParseWarningKind::Deprecation(DeprecationWarning::ShorthandInRange(c))
1✔
611
                        .at(span2),
1✔
612
                );
1✔
613
            }
11✔
614

615
            let first = first.to_char().map_err(|e| e.at(span1))?;
12✔
616
            let last = last.to_char().map_err(|e| e.at(span2))?;
8✔
617

618
            let group = CharGroup::try_from_range(first, last).ok_or_else(|| {
7✔
619
                PEK::CharClass(CharClassError::NonAscendingRange(first, last)).at(span1.join(span2))
1✔
620
            })?;
7✔
621
            Ok(Some(group))
6✔
622
        } else {
623
            let group = match first {
100✔
624
                StringOrChar::String(s) => {
49✔
625
                    let chars = helper::parse_quoted_text(s).map_err(|k| k.at(span1))?;
49✔
626
                    chars.chars().map(GroupItem::Char).collect()
49✔
627
                }
628
                StringOrChar::Char { c, .. } => vec![GroupItem::Char(c)],
51✔
629
            };
630
            Ok(Some(group))
100✔
631
        }
632
    }
513✔
633

634
    fn parse_string_or_char(&mut self) -> PResult<Option<StringOrChar<'i>>> {
526✔
635
        let res = if let Some(s) = self.consume_as(Token::String) {
526✔
636
            StringOrChar::String(s)
66✔
637
        } else if let Some((c, _)) = self.parse_code_point()? {
460✔
638
            StringOrChar::Char { c, is_shorthand: false }
12✔
639
        } else if let Some(c) = self.parse_special_char() {
448✔
640
            StringOrChar::Char { c, is_shorthand: true }
47✔
641
        } else {
642
            return Ok(None);
401✔
643
        };
644
        Ok(Some(res))
125✔
645
    }
526✔
646

647
    fn parse_code_point(&mut self) -> PResult<Option<(char, Span)>> {
1,341✔
648
        if let Some(cp) = self.consume_as(Token::CodePoint) {
1,341✔
649
            let span = self.last_span();
53✔
650
            let trimmed_u = cp[1..].trim_start();
53✔
651
            if !trimmed_u.starts_with('+') {
53✔
652
                let warning = DeprecationWarning::Unicode(cp.into());
×
653
                self.add_warning(ParseWarningKind::Deprecation(warning).at(span))
×
654
            }
53✔
655

656
            let hex = trimmed_u.trim_start_matches(|c: char| c == '+' || c.is_whitespace());
106✔
657

53✔
658
            u32::from_str_radix(hex, 16)
53✔
659
                .ok()
53✔
660
                .and_then(|n| char::try_from(n).ok())
53✔
661
                .map(|c| Some((c, span)))
53✔
662
                .ok_or_else(|| PEK::InvalidCodePoint.at(span))
53✔
663
        } else {
664
            Ok(None)
1,288✔
665
        }
666
    }
1,341✔
667

668
    fn parse_code_point_rule(&mut self) -> PResult<Option<Rule>> {
881✔
669
        if let Some((c, span)) = self.parse_code_point()? {
881✔
670
            Ok(Some(Rule::CharClass(CharClass::new(
38✔
671
                vec![GroupItem::Char(c)],
38✔
672
                span,
38✔
673
                self.is_unicode_aware,
38✔
674
            ))))
38✔
675
        } else {
676
            Ok(None)
840✔
677
        }
678
    }
881✔
679

680
    fn parse_special_char(&mut self) -> Option<char> {
448✔
681
        if let Some((Token::Identifier, string)) = self.peek() {
448✔
682
            let c = match string {
226✔
683
                "n" => '\n',
226✔
684
                "r" => '\r',
213✔
685
                "t" => '\t',
201✔
686
                "a" => '\u{07}',
198✔
687
                "e" => '\u{1B}',
194✔
688
                "f" => '\u{0C}',
191✔
689
                _ => return None,
179✔
690
            };
691
            self.advance();
47✔
692
            Some(c)
47✔
693
        } else {
694
            None
222✔
695
        }
696
    }
448✔
697

698
    /// Parses a boundary. For start and end, there are two syntaxes: `^` and `$`.
699
    /// Word boundaries are `%`.
700
    ///
701
    /// The deprecated syntax issues a warning.
702
    ///
703
    /// This function does _not_ parse negated negated word boundaries (`!%`),
704
    /// since negation is handled elsewhere. It also does _not_ parse the
705
    /// `Start` and `End` global variables.
706
    fn parse_boundary(&mut self) -> Option<Rule> {
1,000✔
707
        let span = self.span();
1,000✔
708
        let kind = if self.consume(Token::Caret) {
1,000✔
709
            BoundaryKind::Start
12✔
710
        } else if self.consume(Token::Dollar) {
988✔
711
            BoundaryKind::End
9✔
712
        } else if self.consume(Token::BWord) {
979✔
713
            BoundaryKind::Word
27✔
714
        } else if self.consume(Token::AngleLeft) {
952✔
715
            BoundaryKind::WordStart
6✔
716
        } else if self.consume(Token::AngleRight) {
946✔
717
            BoundaryKind::WordEnd
4✔
718
        } else {
719
            return None;
942✔
720
        };
721
        Some(Rule::Boundary(Boundary::new(kind, self.is_unicode_aware, span)))
58✔
722
    }
1,000✔
723

724
    /// Parses a reference. Supported syntaxes are `::name`, `::3`, `::+3` and
725
    /// `::-3`.
726
    fn parse_reference(&mut self) -> PResult<Option<Rule>> {
942✔
727
        if self.consume(Token::DoubleColon) {
942✔
728
            let start_span = self.last_span();
61✔
729

730
            let target = if self.consume(Token::Plus) {
61✔
731
                let num = self.expect_number::<i32>()?;
1✔
732
                ReferenceTarget::Relative(num)
1✔
733
            } else if self.consume(Token::Dash) {
60✔
734
                let num = self.expect_number::<i32>()?;
2✔
735
                // negating from positive to negative can't overflow, luckily
736
                ReferenceTarget::Relative(-num)
2✔
737
            } else if let Some(num) = self.consume_number(MAX_REPETITION)? {
58✔
738
                ReferenceTarget::Number(num)
36✔
739
            } else {
740
                // TODO: Better diagnostic for `::let`
741
                let name = self
22✔
742
                    .expect_as(Token::Identifier)
22✔
743
                    .map_err(|p| PEK::Expected("number or group name").at(p.span))?;
22✔
744
                ReferenceTarget::Named(name.to_string())
22✔
745
            };
746

747
            let span = start_span.join(self.last_span());
61✔
748
            Ok(Some(Rule::Reference(Reference::new(target, span))))
61✔
749
        } else {
750
            Ok(None)
881✔
751
        }
752
    }
942✔
753

754
    fn parse_range(&mut self) -> PResult<Option<Rule>> {
840✔
755
        if self.consume_reserved("range") {
840✔
756
            let span_start = self.last_span();
28✔
757

758
            let first = self.expect_as(Token::String)?;
28✔
759
            let span_1 = self.last_span();
27✔
760
            self.expect(Token::Dash)?;
27✔
761
            let second = self.expect_as(Token::String)?;
27✔
762
            let span_2 = self.last_span();
27✔
763

764
            let radix = if self.consume_reserved("base") {
27✔
765
                let n = self.expect_number()?;
3✔
766
                let span = self.last_span();
2✔
767
                if n > 36 {
2✔
768
                    return Err(PEK::Number(NumberError::TooLarge).at(span));
×
769
                } else if n < 2 {
2✔
770
                    return Err(PEK::Number(NumberError::TooSmall).at(span));
×
771
                }
2✔
772
                n
2✔
773
            } else {
774
                10u8
24✔
775
            };
776

777
            let span = span_start.join(self.last_span());
26✔
778

779
            let start = helper::parse_number(helper::strip_first_last(first), radix)
26✔
780
                .map_err(|k| PEK::from(k).at(span_1))?;
26✔
781
            let end = helper::parse_number(helper::strip_first_last(second), radix)
26✔
782
                .map_err(|k| PEK::from(k).at(span_2))?;
26✔
783

784
            if start.is_empty() || end.is_empty() {
26✔
785
                let span = if start.is_empty() { span_1 } else { span_2 };
1✔
786
                return Err(PEK::Number(NumberError::Empty).at(span));
1✔
787
            }
25✔
788

25✔
789
            if start.len() > end.len() || (start.len() == end.len() && start > end) {
25✔
790
                return Err(PEK::RangeIsNotIncreasing.at(span_1.join(span_2)));
1✔
791
            }
24✔
792

24✔
793
            if start.len() != end.len()
24✔
794
                && (helper::has_leading_zero(&start) || helper::has_leading_zero(&end))
17✔
795
            {
796
                return Err(PEK::RangeLeadingZeroesVariableLength.at(span_1.join(span_2)));
1✔
797
            }
23✔
798

23✔
799
            Ok(Some(Rule::Range(Range::new(
23✔
800
                start.into_boxed_slice(),
23✔
801
                end.into_boxed_slice(),
23✔
802
                radix,
23✔
803
                span,
23✔
804
            ))))
23✔
805
        } else {
806
            Ok(None)
812✔
807
        }
808
    }
840✔
809

810
    /// Parses an unescaped regex expression (`regex "[test]"`)
811
    fn parse_regex(&mut self) -> PResult<Option<Rule>> {
812✔
812
        if self.consume_reserved("regex") {
812✔
813
            let span_start = self.last_span();
10✔
814
            let lit = self.expect_as(Token::String)?;
10✔
815
            let span_end = self.last_span();
10✔
816

817
            let content = helper::parse_quoted_text(lit).map_err(|k| k.at(span_end))?;
10✔
818

819
            let span = span_start.join(span_end);
10✔
820
            Ok(Some(Rule::Regex(Regex::new(content.to_string(), span))))
10✔
821
        } else {
822
            Ok(None)
802✔
823
        }
824
    }
812✔
825

826
    /// Parses a variable (usage site).
827
    fn parse_variable(&mut self) -> PResult<Option<Rule>> {
802✔
828
        if let Some(ident) = self.consume_as(Token::Identifier) {
802✔
829
            let span1 = self.last_span();
211✔
830
            let rule = Rule::Variable(Variable::new(ident, span1));
211✔
831
            if let Some((Token::Equals, span2)) = self.peek_pair() {
211✔
832
                return Err(PEK::MissingLetKeyword.at(span1.join(span2)));
1✔
833
            }
210✔
834
            Ok(Some(rule))
210✔
835
        } else {
836
            Ok(None)
591✔
837
        }
838
    }
802✔
839

840
    /// Parses the dot
841
    fn parse_dot(&mut self) -> Option<Rule> {
591✔
842
        if self.consume(Token::Dot) {
591✔
843
            Some(Rule::Dot)
26✔
844
        } else {
845
            None
565✔
846
        }
847
    }
591✔
848

849
    /// Parses the `recursion` keyword
850
    fn parse_recursion(&mut self) -> Option<Rule> {
565✔
851
        if self.consume_reserved("recursion") {
565✔
852
            Some(Rule::Recursion(Recursion { span: self.last_span() }))
3✔
853
        } else {
854
            None
562✔
855
        }
856
    }
565✔
857
}
858

859
#[derive(Clone, Copy)]
860
enum StringOrChar<'i> {
861
    String(&'i str),
862
    Char { c: char, is_shorthand: bool },
863
}
864

865
impl StringOrChar<'_> {
866
    fn to_char(self) -> Result<char, PEK> {
20✔
867
        Err(PEK::CharString(match self {
20✔
868
            StringOrChar::Char { c, .. } => return Ok(c),
8✔
869
            StringOrChar::String(s) => {
12✔
870
                let s = helper::parse_quoted_text(s)?;
12✔
871
                let mut iter = s.chars();
12✔
872
                match iter.next() {
12✔
873
                    Some(c) if iter.next().is_none() => return Ok(c),
11✔
874
                    Some(_) => CharStringError::TooManyCodePoints,
4✔
875
                    _ => CharStringError::Empty,
1✔
876
                }
877
            }
878
        }))
879
    }
20✔
880
}
881

882
trait TryOptionExt<T> {
883
    fn try_or_else<E>(self, f: impl FnMut() -> Result<Option<T>, E>) -> Result<Option<T>, E>;
884
}
885

886
impl<T> TryOptionExt<T> for Option<T> {
887
    #[inline(always)]
888
    fn try_or_else<E>(self, mut f: impl FnMut() -> Result<Option<T>, E>) -> Result<Option<T>, E> {
15,109✔
889
        match self {
15,109✔
890
            Some(val) => Ok(Some(val)),
4,553✔
891
            None => f(),
10,556✔
892
        }
893
    }
15,109✔
894
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc