• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

dryruby / ebnf / 12110129472

01 Dec 2024 11:46PM UTC coverage: 93.531% (-0.7%) from 94.253%
12110129472

push

github

gkellogg
Finish 2.6.0

75 of 99 new or added lines in 9 files covered. (75.76%)

7 existing lines in 2 files now uncovered.

2140 of 2288 relevant lines covered (93.53%)

25866.68 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.68
/lib/ebnf/parser.rb
1
require_relative 'ebnf/meta'
2✔
2
require 'logger'
2✔
3

4
module EBNF
2✔
5
  class Parser
2✔
6
    include EBNF::PEG::Parser
2✔
7
    include EBNF::Terminals
2✔
8

9
    # Abstract syntax tree from parse
10
    #
11
    # @return [Array<EBNF::Rule>]
12
    attr_reader :ast
2✔
13

14
    # Set on first rule
15
    attr_reader :lhs_includes_identifier
2✔
16

17
    # Regular expression to match a [...] range, which may be distinguisehd from an LHS
18
    attr_reader :range
2✔
19

20
    # ## Terminals
21
    # Define rules for Terminals, placing results on the input stack, making them available to upstream non-Terminal rules.
22
    #
23
    # Terminals are defined with a symbol matching the associated rule name, and an optional (although strongly encouraged) regular expression used to match the head of the input stream.
24
    #
25
    # The result of the terminal block is the semantic value of that terminal, which if often a string, but may be any instance which reflects the semantic interpretation of that terminal.
26
    #
27
    # The `value` parameter is the value matched by the regexp, if defined, or by the sub-terminal rules otherwise.
28
    #
29
    # The `prod` parameter is the name of the parent rule for which this terminal is matched, which may have a bearing in some circumstances, although not used in this example.
30
    #
31
    # If no block is provided, then the value which would have been passed to the block is used as the result directly.
32

33
    # Match the Left hand side of a rule or terminal
34
    #
35
    #     [11] LHS        ::= ('[' SYMBOL+ ']' ' '+)? <? SYMBOL >? ' '* '::='
36
    terminal(:LHS, LHS) do |value, prod|
2✔
37
      md = value.to_s.scan(/(?:\[([^\]]+)\])?\s*<?(\w+)>?\s*::=/).first
5,202✔
38
      if @lhs_includes_identifier.nil?
5,202✔
39
        @lhs_includes_identifier = !md[0].nil?
352✔
40
        @range = md[0] ? RANGE_NOT_LHS : RANGE
352✔
41
      elsif @lhs_includes_identifier && !md[0]
4,850✔
NEW
42
        error("LHS",
×
43
          "Rule does not begin with a [xxx] identifier, which was established on the first rule",
44
          production: :LHS,
45
          rest: value)
46
      elsif !@lhs_includes_identifier && md[0]
4,850✔
NEW
47
        error("LHS",
×
48
          "Rule begins with a [xxx] identifier, which was not established on the first rule",
49
          production: :LHS,
50
          rest: value)
51
      end
52
      md
5,202✔
53
    end
54

55
    # Match `SYMBOL` terminal
56
    #
57
    #     [12] SYMBOL     ::= '<' O_SYMBOL '>' | O_SYMBOL
58
    #     [12a] O_SYMBOL  ::= ([a-z] | [A-Z] | [0-9] | '_' | '.')+
59
    terminal(:SYMBOL, SYMBOL) do |value|
2✔
60
      value = value[1..-2] if value.start_with?('<') && value.end_with?('>')
10,492✔
61
      value.to_sym
10,492✔
62
    end
63

64
    # Match `HEX` terminal
65
    #
66
    #     [13] HEX        ::= #x' ([a-f] | [A-F] | [0-9])+
67
    terminal(:HEX, HEX) do |value|
2✔
68
      [:hex, value]
316✔
69
    end
70

71
    # Terminal for `RANGE` is matched as part of a `primary` rule.
72
    # Note that this won't match if rules include identifiers.
73
    #
74
    #     [14] RANGE      ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']'
75
    terminal(:RANGE, proc {@range}) do |value|
46,352✔
76
      [:range, value[1..-2]]
2,690✔
77
    end
78

79
    # Terminal for `O_RANGE` is matched as part of a `primary` rule.
80
    #
81
    #     [15] O_RANGE    ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']'
82
    terminal(:O_RANGE, O_RANGE) do |value|
2✔
83
      [:range, value[1..-2]]
488✔
84
    end
85

86
    # Match double quote string
87
    #
88
    #     [16] STRING1    ::= '"' (CHAR - '"')* '"'
89
    terminal(:STRING1, STRING1) do |value|
2✔
90
      using ::EBNF
864✔
91
      value[1..-2].tap {|s| s.quote_style = :dquote}
1,728✔
92
    end
93

94
    # Match single quote string
95
    #
96
    #     [17] STRING2    ::= "'" (CHAR - "'")* "'"
97
    terminal(:STRING2, STRING2) do |value|
2✔
98
      using ::EBNF
7,778✔
99
      value[1..-2].tap {|s| s.quote_style = :squote}
15,556✔
100
    end
101

102
    # The `CHAR` and `R_CHAR` productions are not used explicitly
103

104
    # Match `POSTFIX` terminal
105
    #
106
    #     [20] POSTFIX    ::= [?*+]
107
    terminal(:POSTFIX, POSTFIX)
2✔
108

109
    # The `PASS` productions is not used explicitly
110

111
    # ## Non-terminal productions
112
    # Define productions for non-Termainals. This can include `start_production` as well as `production` to hook into rule start and end. In some cases, we need to use sub-productions as generated when turning EBNF into PEG.
113
    #
114
    # Productions are defined with a symbol matching the associated rule name.
115
    #
116
    # The result of the productions is typically the abstract syntax tree matched by the rule, so far, but could be a specific semantic value, or could be ignored with the result being returned via the `callback`.
117
    #
118
    # The `value` parameter is the result returned from child productions
119
    #
120
    # The `data` parameter other data which may be returned by child productions placing information onto their input (unused in this example).
121
    #
122
    # The `callback` parameter provides access to a callback defined in the call to `parse`).
123

124
    # Production for end of `declaration` non-terminal.
125
    #
126
    # Look for `@terminals` to change parser state to parsing terminals.
127
    #
128
    # Clears the packrat parser when called.
129
    #
130
    # `@pass` is ignored here.
131
    #
132
    #     [2] declaration ::= '@terminals' | pass
133
    production(:declaration, clear_packrat: true) do |value, data, callback|
2✔
134
      # value contains a declaration.
135
      # Invoke callback
136
      callback.call(:terminals) if value == '@terminals'
166✔
137
      nil
166✔
138
    end
139

140
    # Production for end of `rule` non-terminal.
141
    #
142
    # By setting `as_hash: true` in the `start_production`, the `value` parameter will be in the form `{LHS: "v", expression: "v"}`. Otherwise, it would be expressed using an array of hashes of the form `[{LHS: "v"}, {expression: "v"}]`.
143
    #
144
    # Clears the packrat parser when called.
145
    #
146
    # Create rule from expression value and pass to callback
147
    #
148
    #     [3] rule        ::= LHS expression
149
    start_production(:rule, as_hash: true)
2✔
150
    production(:rule, clear_packrat: true) do |value, data, callback|
2✔
151
      # value contains an expression.
152
      # Invoke callback
153
      id, sym = value[:LHS]
5,192✔
154
      expression = value[:expression]
5,192✔
155
      rule = EBNF::Rule.new(sym.to_sym, id, expression)
5,192✔
156
      progress(:rule, rule.to_sxp)
5,192✔
157
      callback.call(:rule, rule)
5,192✔
158
      nil
5,192✔
159
    end
160

161
    # Production for end of `expression` non-terminal.
162
    # Passes through the optimized value of the alt production as follows:
163
    #
164
    # The `value` parameter, is of the form `[{alt: "v"}]`.
165
    #
166
    #     [:alt foo] => foo
167
    #     [:alt foo bar] => [:alt foo bar]
168
    #
169
    #     [4] expression  ::= alt
170
    production(:expression) do |value|
2✔
171
      value.first[:alt]
8,638✔
172
    end
173

174
    # Production for end of `alt` non-terminal.
175
    # Passes through the optimized value of the seq production as follows:
176
    #
177
    # The `value` parameter, is of the form `{seq: "v", _alt_1: "v"}`.
178
    #
179
    #     [:seq foo] => foo
180
    #     [:seq foo bar] => [:seq foo bar]
181
    #
182
    # Note that this also may just pass through from `_alt_1`
183
    #
184
    #     [5] alt         ::= seq ('|' seq)*
185
    start_production(:alt, as_hash: true)
2✔
186
    production(:alt) do |value|
2✔
187
      if value[:_alt_1].length > 0
8,638✔
188
        [:alt, value[:seq]] + value[:_alt_1]
3,120✔
189
      else
190
        value[:seq]
5,518✔
191
      end
192
    end
193

194
    # Production for end of `_alt_1` non-terminal.
195
    # Used to collect the `('|' seq)*` portion of the `alt` non-terminal:
196
    #
197
    # The `value` parameter, is of the form `[{seq: ["v"]}]`.
198
    #
199
    #     [5] _alt_1         ::= ('|' seq)*
200
    production(:_alt_1) do |value|
2✔
201
      value.map {|a1| a1.last[:seq]}.compact # Get rid of '|'
16,532✔
202
    end
203

204
    # Production for end of `seq` non-terminal.
205
    # Passes through the optimized value of the `diff` production as follows:
206
    #
207
    # The `value` parameter, is an array of values, which cannot be empty.
208
    #
209
    #     [:diff foo] => foo
210
    #     [:diff foo bar] => [:diff foo bar]
211
    #
212
    # Note that this also may just pass through from `_seq_1`
213
    #
214
    #     [6] seq         ::= diff+
215
    production(:seq) do |value|
2✔
216
      value.length == 1 ? value.first : ([:seq] + value)
16,532✔
217
    end
218

219
    # `Diff` production returns concatenated postfix values
220
    #
221
    # The `value` parameter, is of the form `{postfix: "v", _diff_1: "v"}`.
222
    #
223
    #     [7] diff        ::= postfix ('-' postfix)?
224
    start_production(:diff, as_hash: true)
2✔
225
    production(:diff) do |value|
2✔
226
      if value[:_diff_1]
25,808✔
227
        [:diff, value[:postfix], value[:_diff_1]]
218✔
228
      else
229
        value[:postfix]
25,590✔
230
      end
231
    end
232

233
    production(:_diff_1) do |value|
2✔
234
      value.last[:postfix] if value
25,808✔
235
    end
236

237
    # Production for end of `postfix` non-terminal.
238
    # Either returns the `primary` production value, or as modified by the `postfix`.
239
    #
240
    # The `value` parameter, is of the form `{primary: "v", _postfix_1: "v"}`.
241
    #
242
    #     [:primary] => [:primary]
243
    #     [:primary, '*'] => [:star, :primary]
244
    #     [:primary, '+'] => [:plus, :primary]
245
    #     [:primary, '?'] => [:opt, :primary]
246
    #
247
    #     [8] postfix     ::= primary POSTFIX?
248
    start_production(:postfix, as_hash: true)
2✔
249
    production(:postfix) do |value|
2✔
250
      # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively
251
      case value[:_postfix_1]
26,026✔
252
      when "*" then [:star, value[:primary]]
1,596✔
253
      when "+" then [:plus, value[:primary]]
852✔
254
      when "?" then [:opt, value[:primary]]
1,472✔
255
      else value[:primary]
22,106✔
256
      end
257
    end
258

259
    # Production for end of `primary` non-terminal.
260
    # Places `:primary` on the stack
261
    #
262
    # The `value` parameter, is either a string (for a terminal) or an array of the form `['(': '(', expression: "v", ')', ')']`.
263
    #
264
    # This may either be a terminal, or the result of an `expression`.
265
    #
266
    #     [9] primary     ::= HEX
267
    #                     |   SYMBOL
268
    #                     |   RANGE
269
    #                     |   O_RANGE
270
    #                     |   STRING1
271
    #                     |   STRING2
272
    #                     |   '(' expression ')'
273
    production(:primary) do |value|
2✔
274
      Array(value).length > 2 ? value[1][:expression] : value
26,026✔
275
    end
276

277
    # Production for end of pass non-terminal.
278
    #
279
    #     [10] pass       ::= '@pass' expression
280
    production(:pass) do |value, data, callback|
2✔
281
      # Invoke callback
282
      callback.call(:pass, value.last[:expression])
48✔
283
    end
284

285
    # ## Parser invocation.
286
    # On start, yield ourselves if a block is given, otherwise, return this parser instance
287
    #
288
    # @param  [#read, #to_s]          input
289
    # @param  [Hash{Symbol => Object}] options
290
    # @option options [Boolean] :level
291
    #   Trace level. 0(debug), 1(info), 2(warn), 3(error).
292
    # @return [EBNFParser]
293
    def initialize(input, **options, &block)
2✔
294
      # If the `level` option is set, instantiate a logger for collecting trace information.
295
      if options.key?(:level)
354✔
NEW
296
        options[:logger] ||= Logger.new(STDERR).
×
NEW
297
          tap {|x| x.level = options[:level]}.
×
NEW
298
          tap {|x| x.formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}}
×
299
      end
300

301
      # This is established on the first rule.
302
      self.class.instance_variable_set(:@lhs_includes_identifier, nil)
354✔
303

304
      # Read input, if necessary, which will be used in a Scanner.
305
      @input = input.respond_to?(:read) ? input.read : input.to_s
354✔
306

307
      parsing_terminals = false
354✔
308
      @ast = []
354✔
309
      parse(@input, :ebnf, EBNFMeta::RULES,
354✔
310
                           # Use an optimized Regexp for whitespace
311
                           whitespace: EBNF::Terminals::PASS,
312
                           **options
313
      ) do |context, *data|
314
        rule = case context
5,358✔
315
        when :terminals
316
          # After parsing `@terminals`
317
          # This changes the state of the parser to treat subsequent rules as terminals.
318
          parsing_terminals = true
118✔
319
          rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals)
118✔
320
        when :pass
321
          # After parsing `@pass`
322
          # This defines a specific rule for whitespace.
323
          rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass)
48✔
324
        when :rule
325
          # A rule which has already been turned into a `Rule` object.
326
          rule = data.first
5,192✔
327
          rule.kind = :terminal if parsing_terminals
5,192✔
328
          rule
5,192✔
329
        end
330
        @ast << rule if rule
5,358✔
331
      end
332
    rescue EBNF::PEG::Parser::Error => e
333
      raise SyntaxError, e.message
16✔
334
    end
335
  end
336
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc