• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

shnewto / bnf / 21337350957

25 Jan 2026 06:22PM UTC coverage: 96.809% (+0.06%) from 96.753%
21337350957

Pull #187

github

web-flow
Merge ef047dccd into fbcc780ec
Pull Request #187: add explicit parser

165 of 171 new or added lines in 3 files covered. (96.49%)

1 existing line in 1 file now uncovered.

2245 of 2319 relevant lines covered (96.81%)

26061.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.83
/src/parser.rs
1
use crate::ParseTree;
2
use crate::earley::grammar::ParseGrammar;
3
use crate::error::Error;
4
use crate::grammar::Grammar;
5
use crate::term::Term;
6
use std::collections::HashSet;
7
use std::rc::Rc;
8

9
/// A reusable parser built from a `Grammar` that validates all nonterminals are defined
10
/// at construction time.
11
///
12
/// # Example
13
///
14
/// ```rust
15
/// use bnf::{Grammar, GrammarParser};
16
///
17
/// let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
18
/// <base> ::= 'A' | 'C' | 'G' | 'T'"
19
///     .parse()
20
///     .unwrap();
21
///
22
/// let parser = GrammarParser::new(&grammar)?;
23
/// let parse_trees: Vec<_> = parser.parse_input("GATTACA").collect();
24
/// # Ok::<(), bnf::Error>(())
25
/// ```
26
#[derive(Debug)]
27
pub struct GrammarParser<'gram> {
28
    grammar: &'gram Grammar,
29
    parse_grammar: Rc<ParseGrammar<'gram>>,
30
}
31

32
impl<'gram> GrammarParser<'gram> {
33
    /// Construct a new `GrammarParser` from a `Grammar`, validating that all
34
    /// nonterminals referenced in productions have definitions.
35
    ///
36
    /// # Errors
37
    ///
38
    /// Returns `Error::ValidationError` if any nonterminal used in the RHS of
39
    /// productions lacks a definition in the grammar.
40
    pub fn new(grammar: &'gram Grammar) -> Result<Self, Error> {
7✔
41
        validate_nonterminals(grammar)?;
7✔
42
        let parse_grammar = Rc::new(ParseGrammar::new(grammar));
6✔
43
        Ok(Self {
6✔
44
            grammar,
6✔
45
            parse_grammar,
6✔
46
        })
6✔
47
    }
7✔
48

49
    /// Parse an input string using the grammar's starting nonterminal.
50
    ///
51
    /// Returns an iterator over all possible parse trees for the input.
52
    pub fn parse_input(&self, input: &'gram str) -> impl Iterator<Item = ParseTree<'gram>> {
2✔
53
        let starting_term = self
2✔
54
            .grammar
2✔
55
            .starting_term()
2✔
56
            .expect("Grammar must have one production to parse");
2✔
57
        self.parse_input_starting_with(input, starting_term)
2✔
58
    }
2✔
59

60
    /// Parse an input string starting with the given term (nonterminal or terminal).
61
    ///
62
    /// Returns an iterator over all possible parse trees for the input.
63
    pub fn parse_input_starting_with(
6✔
64
        &self,
6✔
65
        input: &'gram str,
6✔
66
        start: &'gram Term,
6✔
67
    ) -> impl Iterator<Item = ParseTree<'gram>> {
6✔
68
        crate::earley::parse_starting_with_grammar(&self.parse_grammar, input, start)
6✔
69
    }
6✔
70
}
71

72
/// Validate that all nonterminals referenced in the grammar have definitions.
73
///
74
/// # Errors
75
///
76
/// Returns `Error::ValidationError` with a message listing all undefined nonterminals.
77
fn validate_nonterminals(grammar: &Grammar) -> Result<(), Error> {
7✔
78
    // Collect all nonterminals defined in LHS of productions
79
    let mut defined_nonterminals = HashSet::new();
7✔
80
    for production in grammar.productions_iter() {
14✔
81
        if let Term::Nonterminal(ref nt) = production.lhs {
14✔
82
            defined_nonterminals.insert(nt.clone());
14✔
83
        }
14✔
84
    }
85

86
    // Collect all nonterminals used in RHS of all productions
87
    let mut referenced_nonterminals = HashSet::new();
7✔
88
    for production in grammar.productions_iter() {
14✔
89
        for expression in production.rhs_iter() {
39✔
90
            for term in expression.terms_iter() {
46✔
91
                match term {
46✔
92
                    Term::Nonterminal(nt) => {
22✔
93
                        referenced_nonterminals.insert(nt.clone());
22✔
94
                    }
22✔
NEW
95
                    Term::AnonymousNonterminal(exprs) => {
×
96
                        // For anonymous nonterminals, check the expressions they contain
NEW
97
                        for expr in exprs {
×
NEW
98
                            for inner_term in expr.terms_iter() {
×
NEW
99
                                if let Term::Nonterminal(nt) = inner_term {
×
NEW
100
                                    referenced_nonterminals.insert(nt.clone());
×
NEW
101
                                }
×
102
                            }
103
                        }
104
                    }
105
                    Term::Terminal(_) => {
24✔
106
                        // Terminals don't need definitions
24✔
107
                    }
24✔
108
                }
109
            }
110
        }
111
    }
112

113
    // Find undefined nonterminals
114
    let undefined: Vec<String> = referenced_nonterminals
7✔
115
        .difference(&defined_nonterminals)
7✔
116
        .cloned()
7✔
117
        .collect();
7✔
118

119
    if !undefined.is_empty() {
7✔
120
        let message = format!(
1✔
121
            "Undefined nonterminals: {}",
122
            undefined
1✔
123
                .iter()
1✔
124
                .map(|nt| format!("<{nt}>"))
1✔
125
                .collect::<Vec<_>>()
1✔
126
                .join(", ")
1✔
127
        );
128
        return Err(Error::ValidationError(message));
1✔
129
    }
6✔
130

131
    Ok(())
6✔
132
}
7✔
133

134
#[cfg(test)]
135
mod tests {
136
    use super::*;
137
    use crate::Grammar;
138

139
    #[test]
140
    fn parser_construction_with_valid_grammar() {
1✔
141
        let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
1✔
142
        <base> ::= 'A' | 'C' | 'G' | 'T'"
1✔
143
            .parse()
1✔
144
            .unwrap();
1✔
145

146
        let parser = GrammarParser::new(&grammar);
1✔
147
        assert!(
1✔
148
            parser.is_ok(),
1✔
149
            "Parser should be constructible from valid grammar"
150
        );
151
    }
1✔
152

153
    #[test]
154
    fn parser_construction_fails_with_undefined_nonterminal() {
1✔
155
        let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
1✔
156
        <base> ::= <undefined>"
1✔
157
            .parse()
1✔
158
            .unwrap();
1✔
159

160
        let parser = GrammarParser::new(&grammar);
1✔
161
        assert!(
1✔
162
            parser.is_err(),
1✔
163
            "Parser construction should fail with undefined nonterminal"
164
        );
165
        assert!(
1✔
166
            matches!(parser.unwrap_err(), Error::ValidationError(_)),
1✔
167
            "Error should be ValidationError"
168
        );
169
    }
1✔
170

171
    #[test]
172
    fn parser_can_parse_multiple_inputs() {
1✔
173
        let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
1✔
174
        <base> ::= 'A' | 'C' | 'G' | 'T'"
1✔
175
            .parse()
1✔
176
            .unwrap();
1✔
177

178
        let parser = GrammarParser::new(&grammar).unwrap();
1✔
179

180
        let input1 = "GATTACA";
1✔
181
        let input2 = "ATCG";
1✔
182

183
        let parse_trees1: Vec<_> = parser.parse_input(input1).collect();
1✔
184
        let parse_trees2: Vec<_> = parser.parse_input(input2).collect();
1✔
185

186
        assert!(!parse_trees1.is_empty(), "Should parse first input");
1✔
187
        assert!(!parse_trees2.is_empty(), "Should parse second input");
1✔
188
    }
1✔
189

190
    #[test]
191
    fn parser_accepts_explicit_starting_nonterminal() {
1✔
192
        let grammar: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
1✔
193
        <dna> ::= <base> | <base> <dna>"
1✔
194
            .parse()
1✔
195
            .unwrap();
1✔
196

197
        let parser = GrammarParser::new(&grammar).unwrap();
1✔
198
        let input = "GATTACA";
1✔
199
        let start_term = Term::Nonterminal("dna".to_string());
1✔
200

201
        let parse_trees: Vec<_> = parser
1✔
202
            .parse_input_starting_with(input, &start_term)
1✔
203
            .collect();
1✔
204

205
        assert!(
1✔
206
            !parse_trees.is_empty(),
1✔
207
            "Should parse with explicit starting nonterminal"
208
        );
209
    }
1✔
210

211
    #[test]
212
    fn parser_accepts_explicit_starting_terminal() {
1✔
213
        let grammar: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
1✔
214
        <dna> ::= <base> | <base> <dna>"
1✔
215
            .parse()
1✔
216
            .unwrap();
1✔
217

218
        let parser = GrammarParser::new(&grammar).unwrap();
1✔
219
        let input = "G";
1✔
220
        let start_term = Term::Terminal("G".to_string());
1✔
221

222
        // Note: Starting with a terminal directly doesn't work with Earley parser
223
        // since it expects a nonterminal to have productions. This test verifies
224
        // the API accepts terminals, but they won't produce parse trees unless
225
        // there's a production with that terminal as LHS (which is invalid).
226
        let parse_trees: Vec<_> = parser
1✔
227
            .parse_input_starting_with(input, &start_term)
1✔
228
            .collect();
1✔
229

230
        // This will be empty since there's no production with a terminal as LHS
231
        // The API accepts it, but it won't produce results
232
        assert_eq!(
1✔
233
            parse_trees.len(),
1✔
234
            0,
235
            "Terminal starting term produces no parse trees"
236
        );
237
    }
1✔
238

239
    #[test]
240
    fn parser_is_order_independent() {
1✔
241
        // Create grammar with productions in one order
242
        let grammar1: Grammar = "<dna> ::= <base> | <base> <dna>
1✔
243
        <base> ::= 'A' | 'C' | 'G' | 'T'"
1✔
244
            .parse()
1✔
245
            .unwrap();
1✔
246

247
        // Create same grammar with productions in different order
248
        let grammar2: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
1✔
249
        <dna> ::= <base> | <base> <dna>"
1✔
250
            .parse()
1✔
251
            .unwrap();
1✔
252

253
        let parser1 = GrammarParser::new(&grammar1).unwrap();
1✔
254
        let parser2 = GrammarParser::new(&grammar2).unwrap();
1✔
255

256
        let input = "GATTACA";
1✔
257
        // Use explicit starting term to ensure both use the same starting point
258
        let start_term = Term::Nonterminal("dna".to_string());
1✔
259

260
        let parse_trees1: Vec<_> = parser1
1✔
261
            .parse_input_starting_with(input, &start_term)
1✔
262
            .collect();
1✔
263
        let parse_trees2: Vec<_> = parser2
1✔
264
            .parse_input_starting_with(input, &start_term)
1✔
265
            .collect();
1✔
266

267
        // Results should be identical regardless of production order when using
268
        // the same explicit starting term
269
        assert_eq!(
1✔
270
            parse_trees1.len(),
1✔
271
            parse_trees2.len(),
1✔
272
            "Should produce same number of parse trees regardless of order"
273
        );
274
    }
1✔
275
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc