• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pomsky-lang / pomsky / 12021434535

26 Nov 2024 12:37AM UTC coverage: 83.127% (-0.2%) from 83.301%
12021434535

push

github

Aloso
fix: bless tests

4296 of 5168 relevant lines covered (83.13%)

415790.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.85
/pomsky-lib/src/regex/optimize.rs
1
use std::{mem, ops::Add};
2

3
use pomsky_syntax::exprs::RepetitionKind;
4

5
use crate::{exprs::group::RegexGroupKind, unicode_set::UnicodeSet};
6

7
use super::{Regex, RegexCharSet};
8

9
#[derive(Clone, Copy, PartialEq, Eq)]
10
pub(crate) enum Count {
11
    Zero,
12
    One,
13
    Many,
14
}
15

16
impl Add for Count {
17
    type Output = Count;
18

19
    fn add(self, rhs: Self) -> Self::Output {
910✔
20
        match (self, rhs) {
910✔
21
            (Count::Zero, Count::Zero) => Count::Zero,
13✔
22
            (Count::Zero, Count::One) | (Count::One, Count::Zero) => Count::One,
330✔
23
            _ => Count::Many,
567✔
24
        }
25
    }
910✔
26
}
27

28
impl Regex {
29
    pub(crate) fn optimize(&mut self) -> Count {
1,546✔
30
        match self {
1,546✔
31
            Regex::Literal(l) => {
295✔
32
                let l: &str = &*l;
295✔
33
                if l.is_empty() {
295✔
34
                    // indicates that the parent should remove it
35
                    Count::Zero
23✔
36
                } else if l.chars().nth(1).is_none() {
272✔
37
                    Count::One
158✔
38
                } else {
39
                    Count::Many
114✔
40
                }
41
            }
42
            Regex::Group(g) => {
411✔
43
                let mut count = Count::Zero;
411✔
44
                g.parts.retain_mut(|part| {
910✔
45
                    let add = part.optimize();
910✔
46
                    count = count + add;
910✔
47
                    add != Count::Zero
910✔
48
                });
910✔
49

411✔
50
                if g.parts.len() == 1
411✔
51
                    && g.kind == RegexGroupKind::Normal
147✔
52
                    && !matches!(&g.parts[0], Regex::Unescaped(_))
91✔
53
                {
54
                    // don't remove group if it is wrapping raw regex
55
                    *self = g.parts.pop().unwrap();
90✔
56
                    return count;
90✔
57
                }
321✔
58

59
                if matches!(g.kind, RegexGroupKind::Capture | RegexGroupKind::NamedCapture(_)) {
321✔
60
                    Count::One
63✔
61
                } else if g.parts.is_empty() {
258✔
62
                    // indicates that the parent should remove it
63
                    Count::Zero
5✔
64
                } else {
65
                    count
253✔
66
                }
67
            }
68
            Regex::Alternation(a) => {
83✔
69
                for part in &mut a.parts {
292✔
70
                    part.optimize();
209✔
71
                }
209✔
72

73
                let mut i = 0;
83✔
74
                while i < a.parts.len() - 1 {
209✔
75
                    let (p1, p2) = a.parts.split_at_mut(i + 1);
126✔
76
                    let lhs = &mut p1[i];
126✔
77
                    let rhs = &mut p2[0];
126✔
78

126✔
79
                    if lhs.is_single_char() && rhs.is_single_char() {
126✔
80
                        match (lhs, rhs) {
15✔
81
                            (Regex::Literal(lit1), Regex::Literal(lit2)) => {
11✔
82
                                let mut set = UnicodeSet::new();
11✔
83
                                set.add_char(lit1.chars().next().unwrap());
11✔
84
                                set.add_char(lit2.chars().next().unwrap());
11✔
85
                                a.parts[i] = Regex::CharSet(RegexCharSet::new(set));
11✔
86
                                a.parts.remove(i + 1);
11✔
87
                            }
11✔
88
                            (Regex::Literal(lit), Regex::CharSet(set))
×
89
                            | (Regex::CharSet(set), Regex::Literal(lit))
4✔
90
                                if !set.negative =>
×
91
                            {
4✔
92
                                let mut set = std::mem::take(set);
4✔
93
                                set.set.add_char(lit.chars().next().unwrap());
4✔
94
                                a.parts[i] = Regex::CharSet(set);
4✔
95
                                a.parts.remove(i + 1);
4✔
96
                            }
4✔
97
                            (Regex::CharSet(set1), Regex::CharSet(set2))
×
98
                                if !set1.negative && !set2.negative =>
×
99
                            {
100
                                for range in set2.set.ranges() {
×
101
                                    set1.set.add_set_range(range);
×
102
                                }
×
103
                                for prop in set2.set.props() {
×
104
                                    set1.set.add_prop(prop);
×
105
                                }
×
106
                                a.parts.remove(i + 1);
×
107
                            }
108
                            _ => {
×
109
                                i += 1;
×
110
                            }
×
111
                        }
112
                    } else {
111✔
113
                        i += 1;
111✔
114
                    }
111✔
115
                }
116

117
                if a.parts.len() == 1 {
83✔
118
                    *self = a.parts.pop().unwrap();
14✔
119
                    return self.optimize();
14✔
120
                }
69✔
121

69✔
122
                Count::One
69✔
123
            }
124
            Regex::Repetition(r) => {
188✔
125
                if r.kind.lower_bound == 1 && r.kind.upper_bound == Some(1) {
188✔
126
                    *self = mem::take(&mut r.content);
1✔
127
                    return self.optimize();
1✔
128
                }
187✔
129

187✔
130
                match r.content.optimize() {
187✔
131
                    Count::Zero => {
132
                        // indicates that the parent should remove it
133
                        return Count::Zero;
3✔
134
                    }
135
                    Count::One => match &mut r.content {
9✔
136
                        Regex::Repetition(inner) if inner.quantifier == r.quantifier => {
9✔
137
                            if let Some(kind) = reduce_repetitions(r.kind, inner.kind) {
9✔
138
                                inner.kind = kind;
9✔
139
                                *self = mem::take(&mut r.content);
9✔
140
                            }
9✔
141
                        }
142
                        _ => {}
148✔
143
                    },
144
                    Count::Many => {}
27✔
145
                }
146

147
                Count::One
184✔
148
            }
149
            Regex::Lookaround(l) => {
30✔
150
                l.content.optimize();
30✔
151
                Count::One
30✔
152
            }
153
            Regex::Unescaped(_) => Count::Many,
9✔
154
            Regex::CharSet(_)
155
            | Regex::Grapheme
156
            | Regex::Dot
157
            | Regex::Boundary(_)
158
            | Regex::Reference(_)
159
            | Regex::Recursion => Count::One,
530✔
160
        }
161
    }
1,546✔
162
}
163

164
fn reduce_repetitions(outer: RepetitionKind, inner: RepetitionKind) -> Option<RepetitionKind> {
9✔
165
    match (outer, inner) {
9✔
166
        (
167
            RepetitionKind { lower_bound: 0 | 1, upper_bound: _ },
168
            RepetitionKind { lower_bound: 0 | 1, upper_bound: None },
169
        )
170
        | (
171
            RepetitionKind { lower_bound: 0 | 1, upper_bound: None },
172
            RepetitionKind { lower_bound: 0 | 1, upper_bound: _ },
173
        ) => {
174
            let lower_bound = inner.lower_bound.min(outer.lower_bound);
4✔
175
            Some(RepetitionKind { lower_bound, upper_bound: None })
4✔
176
        }
177

178
        (
179
            RepetitionKind { lower_bound: 0 | 1, upper_bound: Some(n) },
1✔
180
            RepetitionKind { lower_bound: 0, upper_bound: Some(1) },
181
        )
182
        | (
183
            RepetitionKind { lower_bound: 0, upper_bound: Some(1) },
184
            RepetitionKind { lower_bound: 0 | 1, upper_bound: Some(n) },
×
185
        ) => Some(RepetitionKind { lower_bound: 0, upper_bound: Some(n) }),
1✔
186

187
        (
188
            RepetitionKind { lower_bound: l1, upper_bound: None },
2✔
189
            RepetitionKind { lower_bound: l2, upper_bound: None },
2✔
190
        ) => Some(RepetitionKind { lower_bound: mul_repetitions(l1, l2)?, upper_bound: None }),
2✔
191

192
        (
193
            RepetitionKind { lower_bound: l1, upper_bound: Some(l1_1) },
×
194
            RepetitionKind { lower_bound: l2, upper_bound: Some(l2_1) },
×
195
        ) if l1 == l1_1 && l2 == l2_1 => {
×
196
            let repetition = mul_repetitions(l1, l2)?;
×
197
            Some(RepetitionKind { lower_bound: repetition, upper_bound: Some(repetition) })
×
198
        }
199

200
        (
201
            RepetitionKind { lower_bound: 0 | 1, upper_bound: Some(u1) },
×
202
            RepetitionKind { lower_bound: 0 | 1, upper_bound: Some(u2) },
×
203
        ) => {
×
204
            let lower_bound = inner.lower_bound.min(outer.lower_bound);
×
205
            Some(RepetitionKind { lower_bound, upper_bound: Some(mul_repetitions(u1, u2)?) })
×
206
        }
207

208
        _ => None,
×
209
    }
210
}
7✔
211

212
fn mul_repetitions(a: u32, b: u32) -> Option<u32> {
2✔
213
    let res = a.saturating_mul(b);
2✔
214
    if res > u16::MAX as u32 {
2✔
215
        // some regex flavors don't support repetitions greater than 2^16
216
        None
×
217
    } else {
218
        Some(res)
2✔
219
    }
220
}
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc