• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

shnewto / bnf / 21337247663

25 Jan 2026 06:14PM UTC coverage: 96.791% (+0.04%) from 96.753%
21337247663

Pull #187

github

web-flow
Merge ebd9b927f into fbcc780ec
Pull Request #187: add explicit parser

179 of 185 new or added lines in 4 files covered. (96.76%)

1 existing line in 1 file now uncovered.

2262 of 2337 relevant lines covered (96.79%)

26199.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.83
/src/parser.rs
1
use crate::ParseTree;
2
use crate::earley::grammar::ParseGrammar;
3
use crate::error::Error;
4
use crate::grammar::Grammar;
5
use crate::term::Term;
6
use std::collections::HashSet;
7

8
/// A reusable parser built from a `Grammar` that validates all nonterminals are defined
9
/// at construction time.
10
///
11
/// # Example
12
///
13
/// ```rust
14
/// use bnf::{Grammar, GrammarParser};
15
///
16
/// let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
17
/// <base> ::= 'A' | 'C' | 'G' | 'T'"
18
///     .parse()
19
///     .unwrap();
20
///
21
/// let parser = GrammarParser::new(&grammar)?;
22
/// let parse_trees: Vec<_> = parser.parse_input("GATTACA").collect();
23
/// # Ok::<(), bnf::Error>(())
24
/// ```
25
#[derive(Debug)]
26
pub struct GrammarParser<'gram> {
27
    grammar: &'gram Grammar,
28
    parse_grammar: ParseGrammar<'gram>,
29
}
30

31
impl<'gram> GrammarParser<'gram> {
32
    /// Construct a new `GrammarParser` from a `Grammar`, validating that all
33
    /// nonterminals referenced in productions have definitions.
34
    ///
35
    /// # Errors
36
    ///
37
    /// Returns `Error::ValidationError` if any nonterminal used in the RHS of
38
    /// productions lacks a definition in the grammar.
39
    pub fn new(grammar: &'gram Grammar) -> Result<Self, Error> {
7✔
40
        validate_nonterminals(grammar)?;
7✔
41
        let parse_grammar = ParseGrammar::new(grammar);
6✔
42
        Ok(Self {
6✔
43
            grammar,
6✔
44
            parse_grammar,
6✔
45
        })
6✔
46
    }
7✔
47

48
    /// Parse an input string using the grammar's starting nonterminal.
49
    ///
50
    /// Returns an iterator over all possible parse trees for the input.
51
    pub fn parse_input(&self, input: &'gram str) -> impl Iterator<Item = ParseTree<'gram>> {
2✔
52
        let starting_term = self
2✔
53
            .grammar
2✔
54
            .starting_term()
2✔
55
            .expect("Grammar must have one production to parse");
2✔
56
        self.parse_input_starting_with(input, starting_term)
2✔
57
    }
2✔
58

59
    /// Parse an input string starting with the given term (nonterminal or terminal).
60
    ///
61
    /// Returns an iterator over all possible parse trees for the input.
62
    pub fn parse_input_starting_with(
6✔
63
        &self,
6✔
64
        input: &'gram str,
6✔
65
        start: &'gram Term,
6✔
66
    ) -> impl Iterator<Item = ParseTree<'gram>> {
6✔
67
        // Clone the parse_grammar for this parse call - it's cheap since it just
68
        // copies the data structures (the references stay the same)
69
        crate::earley::parse_starting_with_grammar(&self.parse_grammar, input, start)
6✔
70
    }
6✔
71
}
72

73
/// Validate that all nonterminals referenced in the grammar have definitions.
74
///
75
/// # Errors
76
///
77
/// Returns `Error::ValidationError` with a message listing all undefined nonterminals.
78
fn validate_nonterminals(grammar: &Grammar) -> Result<(), Error> {
7✔
79
    // Collect all nonterminals defined in LHS of productions
80
    let mut defined_nonterminals = HashSet::new();
7✔
81
    for production in grammar.productions_iter() {
14✔
82
        if let Term::Nonterminal(ref nt) = production.lhs {
14✔
83
            defined_nonterminals.insert(nt.clone());
14✔
84
        }
14✔
85
    }
86

87
    // Collect all nonterminals used in RHS of all productions
88
    let mut referenced_nonterminals = HashSet::new();
7✔
89
    for production in grammar.productions_iter() {
14✔
90
        for expression in production.rhs_iter() {
39✔
91
            for term in expression.terms_iter() {
46✔
92
                match term {
46✔
93
                    Term::Nonterminal(nt) => {
22✔
94
                        referenced_nonterminals.insert(nt.clone());
22✔
95
                    }
22✔
NEW
96
                    Term::AnonymousNonterminal(exprs) => {
×
97
                        // For anonymous nonterminals, check the expressions they contain
NEW
98
                        for expr in exprs {
×
NEW
99
                            for inner_term in expr.terms_iter() {
×
NEW
100
                                if let Term::Nonterminal(nt) = inner_term {
×
NEW
101
                                    referenced_nonterminals.insert(nt.clone());
×
NEW
102
                                }
×
103
                            }
104
                        }
105
                    }
106
                    Term::Terminal(_) => {
24✔
107
                        // Terminals don't need definitions
24✔
108
                    }
24✔
109
                }
110
            }
111
        }
112
    }
113

114
    // Find undefined nonterminals
115
    let undefined: Vec<String> = referenced_nonterminals
7✔
116
        .difference(&defined_nonterminals)
7✔
117
        .cloned()
7✔
118
        .collect();
7✔
119

120
    if !undefined.is_empty() {
7✔
121
        let message = format!(
1✔
122
            "Undefined nonterminals: {}",
123
            undefined
1✔
124
                .iter()
1✔
125
                .map(|nt| format!("<{nt}>"))
1✔
126
                .collect::<Vec<_>>()
1✔
127
                .join(", ")
1✔
128
        );
129
        return Err(Error::ValidationError(message));
1✔
130
    }
6✔
131

132
    Ok(())
6✔
133
}
7✔
134

135
#[cfg(test)]
136
mod tests {
137
    use super::*;
138
    use crate::Grammar;
139

140
    #[test]
141
    fn parser_construction_with_valid_grammar() {
1✔
142
        let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
1✔
143
        <base> ::= 'A' | 'C' | 'G' | 'T'"
1✔
144
            .parse()
1✔
145
            .unwrap();
1✔
146

147
        let parser = GrammarParser::new(&grammar);
1✔
148
        assert!(
1✔
149
            parser.is_ok(),
1✔
150
            "Parser should be constructible from valid grammar"
151
        );
152
    }
1✔
153

154
    #[test]
155
    fn parser_construction_fails_with_undefined_nonterminal() {
1✔
156
        let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
1✔
157
        <base> ::= <undefined>"
1✔
158
            .parse()
1✔
159
            .unwrap();
1✔
160

161
        let parser = GrammarParser::new(&grammar);
1✔
162
        assert!(
1✔
163
            parser.is_err(),
1✔
164
            "Parser construction should fail with undefined nonterminal"
165
        );
166
        assert!(
1✔
167
            matches!(parser.unwrap_err(), Error::ValidationError(_)),
1✔
168
            "Error should be ValidationError"
169
        );
170
    }
1✔
171

172
    #[test]
173
    fn parser_can_parse_multiple_inputs() {
1✔
174
        let grammar: Grammar = "<dna> ::= <base> | <base> <dna>
1✔
175
        <base> ::= 'A' | 'C' | 'G' | 'T'"
1✔
176
            .parse()
1✔
177
            .unwrap();
1✔
178

179
        let parser = GrammarParser::new(&grammar).unwrap();
1✔
180

181
        let input1 = "GATTACA";
1✔
182
        let input2 = "ATCG";
1✔
183

184
        let parse_trees1: Vec<_> = parser.parse_input(input1).collect();
1✔
185
        let parse_trees2: Vec<_> = parser.parse_input(input2).collect();
1✔
186

187
        assert!(!parse_trees1.is_empty(), "Should parse first input");
1✔
188
        assert!(!parse_trees2.is_empty(), "Should parse second input");
1✔
189
    }
1✔
190

191
    #[test]
192
    fn parser_accepts_explicit_starting_nonterminal() {
1✔
193
        let grammar: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
1✔
194
        <dna> ::= <base> | <base> <dna>"
1✔
195
            .parse()
1✔
196
            .unwrap();
1✔
197

198
        let parser = GrammarParser::new(&grammar).unwrap();
1✔
199
        let input = "GATTACA";
1✔
200
        let start_term = Term::Nonterminal("dna".to_string());
1✔
201

202
        let parse_trees: Vec<_> = parser
1✔
203
            .parse_input_starting_with(input, &start_term)
1✔
204
            .collect();
1✔
205

206
        assert!(
1✔
207
            !parse_trees.is_empty(),
1✔
208
            "Should parse with explicit starting nonterminal"
209
        );
210
    }
1✔
211

212
    #[test]
213
    fn parser_accepts_explicit_starting_terminal() {
1✔
214
        let grammar: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
1✔
215
        <dna> ::= <base> | <base> <dna>"
1✔
216
            .parse()
1✔
217
            .unwrap();
1✔
218

219
        let parser = GrammarParser::new(&grammar).unwrap();
1✔
220
        let input = "G";
1✔
221
        let start_term = Term::Terminal("G".to_string());
1✔
222

223
        // Note: Starting with a terminal directly doesn't work with Earley parser
224
        // since it expects a nonterminal to have productions. This test verifies
225
        // the API accepts terminals, but they won't produce parse trees unless
226
        // there's a production with that terminal as LHS (which is invalid).
227
        let parse_trees: Vec<_> = parser
1✔
228
            .parse_input_starting_with(input, &start_term)
1✔
229
            .collect();
1✔
230

231
        // This will be empty since there's no production with a terminal as LHS
232
        // The API accepts it, but it won't produce results
233
        assert_eq!(
1✔
234
            parse_trees.len(),
1✔
235
            0,
236
            "Terminal starting term produces no parse trees"
237
        );
238
    }
1✔
239

240
    #[test]
241
    fn parser_is_order_independent() {
1✔
242
        // Create grammar with productions in one order
243
        let grammar1: Grammar = "<dna> ::= <base> | <base> <dna>
1✔
244
        <base> ::= 'A' | 'C' | 'G' | 'T'"
1✔
245
            .parse()
1✔
246
            .unwrap();
1✔
247

248
        // Create same grammar with productions in different order
249
        let grammar2: Grammar = "<base> ::= 'A' | 'C' | 'G' | 'T'
1✔
250
        <dna> ::= <base> | <base> <dna>"
1✔
251
            .parse()
1✔
252
            .unwrap();
1✔
253

254
        let parser1 = GrammarParser::new(&grammar1).unwrap();
1✔
255
        let parser2 = GrammarParser::new(&grammar2).unwrap();
1✔
256

257
        let input = "GATTACA";
1✔
258
        // Use explicit starting term to ensure both use the same starting point
259
        let start_term = Term::Nonterminal("dna".to_string());
1✔
260

261
        let parse_trees1: Vec<_> = parser1
1✔
262
            .parse_input_starting_with(input, &start_term)
1✔
263
            .collect();
1✔
264
        let parse_trees2: Vec<_> = parser2
1✔
265
            .parse_input_starting_with(input, &start_term)
1✔
266
            .collect();
1✔
267

268
        // Results should be identical regardless of production order when using
269
        // the same explicit starting term
270
        assert_eq!(
1✔
271
            parse_trees1.len(),
1✔
272
            parse_trees2.len(),
1✔
273
            "Should produce same number of parse trees regardless of order"
274
        );
275
    }
1✔
276
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc