• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

type-ruby / t-ruby / 20560974963

28 Dec 2025 11:21PM UTC coverage: 79.076% (+1.7%) from 77.331%
20560974963

push

github

web-flow
refactor: migrate parser from regex to token-based parser combinator (#29)

* refactor: migrate parser from regex to token-based parser combinator

- Replace monolithic parser_combinator.rb (2833 lines) with modular architecture
- Add Scanner for tokenization with regex literal support
- Create IR::InterpolatedString for string interpolation parsing
- Fix type inference for interpolated strings (returns String)
- Add TRuby::ParseError for unified error handling
- Organize parsers into primitives/, combinators/, and token/ directories
- Each file contains exactly one class (snake_case filename matches PascalCase class)

* fix: enhance parser to support ternary, splat args, and statement expressions

- Add ternary operator (? :) parsing in ExpressionParser
- Support double splat (**opts) and single splat (*args) in method calls
- Support keyword arguments (name: value) in method calls
- Allow case/if/unless/begin as assignment right-hand side values
- Improve generic type compatibility (Array[untyped] with Array[T])

Fixes type inference errors in keyword_args samples.

* style: fix RuboCop violations and adjust metrics limits

* fix: require set for Ruby 3.1 compatibility

1849 of 2098 new or added lines in 53 files covered. (88.13%)

6 existing lines in 2 files now uncovered.

6644 of 8402 relevant lines covered (79.08%)

908.09 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.96
/lib/t_ruby/parser_combinator/token/expression_parser.rb
1
# frozen_string_literal: true
2

3
require "set"
1✔
4

5
module TRuby
1✔
6
  module ParserCombinator
1✔
7
    # Expression Parser - Parse expressions into IR nodes
8
    # Uses Pratt parser (operator precedence parsing) for correct precedence
9
    class ExpressionParser
1✔
10
      include TokenDSL
1✔
11

12
      # Operator precedence levels (higher = binds tighter)
13
      PRECEDENCE = {
1✔
14
        or_or: 1,      # ||
15
        and_and: 2,    # &&
16
        eq_eq: 3,      # ==
17
        bang_eq: 3,    # !=
18
        lt: 4,         # <
19
        gt: 4,         # >
20
        lt_eq: 4,      # <=
21
        gt_eq: 4,      # >=
22
        spaceship: 4,  # <=>
23
        pipe: 5,       # | (bitwise or)
24
        amp: 6,        # & (bitwise and)
25
        plus: 7,       # +
26
        minus: 7,      # -
27
        star: 8,       # *
28
        slash: 8,      # /
29
        percent: 8,    # %
30
        star_star: 9,  # ** (right-associative)
31
      }.freeze
32

33
      # Right-associative operators
34
      RIGHT_ASSOC = Set.new([:star_star]).freeze
1✔
35

36
      # Token type to operator symbol mapping
37
      OPERATOR_SYMBOLS = {
1✔
38
        or_or: :"||",
39
        and_and: :"&&",
40
        eq_eq: :==,
41
        bang_eq: :!=,
42
        lt: :<,
43
        gt: :>,
44
        lt_eq: :<=,
45
        gt_eq: :>=,
46
        spaceship: :<=>,
47
        plus: :+,
48
        minus: :-,
49
        star: :*,
50
        slash: :/,
51
        percent: :%,
52
        star_star: :**,
53
        pipe: :|,
54
        amp: :&,
55
      }.freeze
56

57
      def parse_expression(tokens, position = 0)
1✔
58
        parse_precedence(tokens, position, 0)
3,834✔
59
      end
60

61
      private
1✔
62

63
      def parse_precedence(tokens, position, min_precedence)
1✔
64
        result = parse_unary(tokens, position)
3,898✔
65
        return result if result.failure?
3,898✔
66

67
        left = result.value
3,883✔
68
        pos = result.position
3,883✔
69

70
        loop do
3,883✔
71
          break if pos >= tokens.length || tokens[pos].type == :eof
3,947✔
72

73
          operator_type = tokens[pos].type
228✔
74
          precedence = PRECEDENCE[operator_type]
228✔
75
          break unless precedence && precedence >= min_precedence
228✔
76

77
          pos += 1 # consume operator
64✔
78

79
          # Handle right associativity
80
          next_min = RIGHT_ASSOC.include?(operator_type) ? precedence : precedence + 1
64✔
81
          right_result = parse_precedence(tokens, pos, next_min)
64✔
82
          return right_result if right_result.failure?
64✔
83

84
          right = right_result.value
64✔
85
          pos = right_result.position
64✔
86

87
          left = IR::BinaryOp.new(
64✔
88
            operator: OPERATOR_SYMBOLS[operator_type],
89
            left: left,
90
            right: right
91
          )
92
        end
93

94
        # 삼항 연산자: condition ? then_branch : else_branch
95
        if pos < tokens.length && tokens[pos].type == :question
3,883✔
96
          pos += 1 # consume '?'
2✔
97

98
          then_result = parse_expression(tokens, pos)
2✔
99
          return then_result if then_result.failure?
2✔
100

101
          pos = then_result.position
2✔
102

103
          unless tokens[pos]&.type == :colon
2✔
NEW
104
            return TokenParseResult.failure("Expected ':' in ternary operator", tokens, pos)
×
105
          end
106

107
          pos += 1 # consume ':'
2✔
108

109
          else_result = parse_expression(tokens, pos)
2✔
110
          return else_result if else_result.failure?
2✔
111

112
          left = IR::Conditional.new(
2✔
113
            kind: :ternary,
114
            condition: left,
115
            then_branch: then_result.value,
116
            else_branch: else_result.value
117
          )
118
          pos = else_result.position
2✔
119
        end
120

121
        TokenParseResult.success(left, tokens, pos)
3,883✔
122
      end
123

124
      def parse_unary(tokens, position)
1✔
125
        return TokenParseResult.failure("End of input", tokens, position) if position >= tokens.length
3,900✔
126

127
        token = tokens[position]
3,900✔
128

129
        case token.type
3,900✔
130
        when :bang
131
          result = parse_unary(tokens, position + 1)
1✔
132
          return result if result.failure?
1✔
133

134
          node = IR::UnaryOp.new(operator: :!, operand: result.value)
1✔
135
          TokenParseResult.success(node, tokens, result.position)
1✔
136
        when :minus
137
          result = parse_unary(tokens, position + 1)
1✔
138
          return result if result.failure?
1✔
139

140
          # For negative number literals, we could fold them
141
          node = if result.value.is_a?(IR::Literal) && result.value.literal_type == :integer
1✔
142
                   IR::Literal.new(value: -result.value.value, literal_type: :integer)
1✔
NEW
143
                 elsif result.value.is_a?(IR::Literal) && result.value.literal_type == :float
×
NEW
144
                   IR::Literal.new(value: -result.value.value, literal_type: :float)
×
145
                 else
NEW
146
                   IR::UnaryOp.new(operator: :-, operand: result.value)
×
147
                 end
148
          TokenParseResult.success(node, tokens, result.position)
1✔
149
        else
150
          parse_postfix(tokens, position)
3,898✔
151
        end
152
      end
153

154
      def parse_postfix(tokens, position)
1✔
155
        result = parse_primary(tokens, position)
3,898✔
156
        return result if result.failure?
3,898✔
157

158
        left = result.value
3,887✔
159
        pos = result.position
3,887✔
160

161
        loop do
3,887✔
162
          break if pos >= tokens.length || tokens[pos].type == :eof
7,408✔
163

164
          case tokens[pos].type
3,740✔
165
          when :dot
166
            # Method call with receiver: obj.method or obj.method(args)
167
            pos += 1
3,507✔
168
            return TokenParseResult.failure("Expected method name after '.'", tokens, pos) if pos >= tokens.length
3,507✔
169

170
            method_token = tokens[pos]
3,507✔
171
            unless method_token.type == :identifier || keywords.key?(method_token.value)
3,507✔
NEW
172
              return TokenParseResult.failure("Expected method name", tokens, pos)
×
173
            end
174

175
            method_name = method_token.value
3,507✔
176
            pos += 1
3,507✔
177

178
            # Check for arguments
179
            args = []
3,507✔
180
            if pos < tokens.length && tokens[pos].type == :lparen
3,507✔
181
              args_result = parse_arguments(tokens, pos)
2✔
182
              return args_result if args_result.failure?
2✔
183

184
              args = args_result.value
2✔
185
              pos = args_result.position
2✔
186
            end
187

188
            left = IR::MethodCall.new(
3,507✔
189
              receiver: left,
190
              method_name: method_name,
191
              arguments: args
192
            )
193
          when :lbracket
194
            # Array access: arr[index]
195
            pos += 1
5✔
196
            index_result = parse_expression(tokens, pos)
5✔
197
            return index_result if index_result.failure?
5✔
198

199
            pos = index_result.position
5✔
200
            return TokenParseResult.failure("Expected ']'", tokens, pos) unless tokens[pos]&.type == :rbracket
5✔
201

202
            pos += 1
5✔
203

204
            left = IR::MethodCall.new(
5✔
205
              receiver: left,
206
              method_name: "[]",
207
              arguments: [index_result.value]
208
            )
209
          when :lparen
210
            # Function call without explicit receiver (left is identifier -> method call)
211
            break unless left.is_a?(IR::VariableRef) && left.scope == :local
13✔
212

213
            args_result = parse_arguments(tokens, pos)
13✔
214
            return args_result if args_result.failure?
13✔
215

216
            left = IR::MethodCall.new(
9✔
217
              method_name: left.name,
218
              arguments: args_result.value
219
            )
220
            pos = args_result.position
9✔
221

222
          else
223
            break
215✔
224
          end
225
        end
226

227
        TokenParseResult.success(left, tokens, pos)
3,883✔
228
      end
229

230
      def parse_primary(tokens, position)
1✔
231
        return TokenParseResult.failure("End of input", tokens, position) if position >= tokens.length
3,898✔
232

233
        token = tokens[position]
3,898✔
234

235
        case token.type
3,898✔
236
        when :integer
237
          node = IR::Literal.new(value: token.value.to_i, literal_type: :integer)
88✔
238
          TokenParseResult.success(node, tokens, position + 1)
88✔
239

240
        when :float
241
          node = IR::Literal.new(value: token.value.to_f, literal_type: :float)
2✔
242
          TokenParseResult.success(node, tokens, position + 1)
2✔
243

244
        when :string
245
          # Remove quotes from string value
246
          value = token.value[1..-2]
36✔
247
          node = IR::Literal.new(value: value, literal_type: :string)
36✔
248
          TokenParseResult.success(node, tokens, position + 1)
36✔
249

250
        when :string_start
251
          # Interpolated string: string_start, string_content*, string_end
252
          parse_interpolated_string(tokens, position)
16✔
253

254
        when :symbol
255
          # Remove : from symbol value
256
          value = token.value[1..].to_sym
8✔
257
          node = IR::Literal.new(value: value, literal_type: :symbol)
8✔
258
          TokenParseResult.success(node, tokens, position + 1)
8✔
259

260
        when true
261
          node = IR::Literal.new(value: true, literal_type: :boolean)
12✔
262
          TokenParseResult.success(node, tokens, position + 1)
12✔
263

264
        when false
265
          node = IR::Literal.new(value: false, literal_type: :boolean)
2✔
266
          TokenParseResult.success(node, tokens, position + 1)
2✔
267

268
        when :nil
269
          node = IR::Literal.new(value: nil, literal_type: :nil)
5✔
270
          TokenParseResult.success(node, tokens, position + 1)
5✔
271

272
        when :identifier
273
          node = IR::VariableRef.new(name: token.value, scope: :local)
3,693✔
274
          TokenParseResult.success(node, tokens, position + 1)
3,693✔
275

276
        when :constant
277
          node = IR::VariableRef.new(name: token.value, scope: :constant)
1✔
278
          TokenParseResult.success(node, tokens, position + 1)
1✔
279

280
        when :ivar
281
          node = IR::VariableRef.new(name: token.value, scope: :instance)
7✔
282
          TokenParseResult.success(node, tokens, position + 1)
7✔
283

284
        when :cvar
285
          node = IR::VariableRef.new(name: token.value, scope: :class)
1✔
286
          TokenParseResult.success(node, tokens, position + 1)
1✔
287

288
        when :gvar
289
          node = IR::VariableRef.new(name: token.value, scope: :global)
1✔
290
          TokenParseResult.success(node, tokens, position + 1)
1✔
291

292
        when :lparen
293
          # Parenthesized expression
294
          result = parse_expression(tokens, position + 1)
2✔
295
          return result if result.failure?
2✔
296

297
          pos = result.position
2✔
298
          return TokenParseResult.failure("Expected ')'", tokens, pos) unless tokens[pos]&.type == :rparen
2✔
299

300
          TokenParseResult.success(result.value, tokens, pos + 1)
2✔
301

302
        when :lbracket
303
          # Array literal
304
          parse_array_literal(tokens, position)
7✔
305

306
        when :lbrace
307
          # Hash literal
308
          parse_hash_literal(tokens, position)
13✔
309

310
        else
311
          TokenParseResult.failure("Unexpected token: #{token.type}", tokens, position)
4✔
312
        end
313
      end
314

315
      def parse_arguments(tokens, position)
1✔
316
        return TokenParseResult.failure("Expected '('", tokens, position) unless tokens[position]&.type == :lparen
15✔
317

318
        position += 1
15✔
319

320
        args = []
15✔
321

322
        # Empty arguments
323
        if tokens[position]&.type == :rparen
15✔
324
          return TokenParseResult.success(args, tokens, position + 1)
2✔
325
        end
326

327
        # Parse first argument
328
        result = parse_argument(tokens, position)
13✔
329
        return result if result.failure?
13✔
330

331
        args << result.value
9✔
332
        position = result.position
9✔
333

334
        # Parse remaining arguments
335
        while tokens[position]&.type == :comma
9✔
336
          position += 1
4✔
337
          result = parse_argument(tokens, position)
4✔
338
          return result if result.failure?
4✔
339

340
          args << result.value
4✔
341
          position = result.position
4✔
342
        end
343

344
        return TokenParseResult.failure("Expected ')'", tokens, position) unless tokens[position]&.type == :rparen
9✔
345

346
        TokenParseResult.success(args, tokens, position + 1)
9✔
347
      end
348

349
      # Parse a single argument (handles splat, double splat, and keyword arguments)
350
      def parse_argument(tokens, position)
1✔
351
        # Double splat argument: **expr
352
        if tokens[position]&.type == :star_star
17✔
NEW
353
          position += 1
×
NEW
354
          expr_result = parse_expression(tokens, position)
×
NEW
355
          return expr_result if expr_result.failure?
×
356

357
          # Wrap in a splat node (we'll use MethodCall with special name for now)
NEW
358
          node = IR::MethodCall.new(
×
359
            method_name: "**",
360
            arguments: [expr_result.value]
361
          )
NEW
362
          return TokenParseResult.success(node, tokens, expr_result.position)
×
363
        end
364

365
        # Single splat argument: *expr
366
        if tokens[position]&.type == :star
17✔
NEW
367
          position += 1
×
NEW
368
          expr_result = parse_expression(tokens, position)
×
NEW
369
          return expr_result if expr_result.failure?
×
370

NEW
371
          node = IR::MethodCall.new(
×
372
            method_name: "*",
373
            arguments: [expr_result.value]
374
          )
NEW
375
          return TokenParseResult.success(node, tokens, expr_result.position)
×
376
        end
377

378
        # Keyword argument: name: value
379
        if tokens[position]&.type == :identifier && tokens[position + 1]&.type == :colon
17✔
NEW
380
          key_name = tokens[position].value
×
NEW
381
          position += 2 # skip identifier and colon
×
382

NEW
383
          value_result = parse_expression(tokens, position)
×
NEW
384
          return value_result if value_result.failure?
×
385

386
          # Create a hash pair for keyword argument
NEW
387
          key = IR::Literal.new(value: key_name.to_sym, literal_type: :symbol)
×
NEW
388
          node = IR::HashPair.new(key: key, value: value_result.value)
×
NEW
389
          return TokenParseResult.success(node, tokens, value_result.position)
×
390
        end
391

392
        # Regular expression argument
393
        parse_expression(tokens, position)
17✔
394
      end
395

396
      def parse_array_literal(tokens, position)
1✔
397
        return TokenParseResult.failure("Expected '['", tokens, position) unless tokens[position]&.type == :lbracket
7✔
398

399
        position += 1
7✔
400

401
        elements = []
7✔
402

403
        # Empty array
404
        if tokens[position]&.type == :rbracket
7✔
405
          node = IR::ArrayLiteral.new(elements: elements)
3✔
406
          return TokenParseResult.success(node, tokens, position + 1)
3✔
407
        end
408

409
        # Parse first element
410
        result = parse_expression(tokens, position)
4✔
411
        return result if result.failure?
4✔
412

413
        elements << result.value
4✔
414
        position = result.position
4✔
415

416
        # Parse remaining elements
417
        while tokens[position]&.type == :comma
4✔
418
          position += 1
7✔
419
          result = parse_expression(tokens, position)
7✔
420
          return result if result.failure?
7✔
421

422
          elements << result.value
7✔
423
          position = result.position
7✔
424
        end
425

426
        return TokenParseResult.failure("Expected ']'", tokens, position) unless tokens[position]&.type == :rbracket
4✔
427

428
        node = IR::ArrayLiteral.new(elements: elements)
4✔
429
        TokenParseResult.success(node, tokens, position + 1)
4✔
430
      end
431

432
      def parse_hash_literal(tokens, position)
1✔
433
        return TokenParseResult.failure("Expected '{'", tokens, position) unless tokens[position]&.type == :lbrace
13✔
434

435
        position += 1
13✔
436

437
        pairs = []
13✔
438

439
        # Empty hash
440
        if tokens[position]&.type == :rbrace
13✔
441
          node = IR::HashLiteral.new(pairs: pairs)
3✔
442
          return TokenParseResult.success(node, tokens, position + 1)
3✔
443
        end
444

445
        # Parse first pair
446
        pair_result = parse_hash_pair(tokens, position)
10✔
447
        return pair_result if pair_result.failure?
10✔
448

449
        pairs << pair_result.value
3✔
450
        position = pair_result.position
3✔
451

452
        # Parse remaining pairs
453
        while tokens[position]&.type == :comma
3✔
454
          position += 1
5✔
455
          pair_result = parse_hash_pair(tokens, position)
5✔
456
          return pair_result if pair_result.failure?
5✔
457

458
          pairs << pair_result.value
5✔
459
          position = pair_result.position
5✔
460
        end
461

462
        return TokenParseResult.failure("Expected '}'", tokens, position) unless tokens[position]&.type == :rbrace
3✔
463

464
        node = IR::HashLiteral.new(pairs: pairs)
3✔
465
        TokenParseResult.success(node, tokens, position + 1)
3✔
466
      end
467

468
      def parse_hash_pair(tokens, position)
1✔
469
        # Handle symbol key shorthand: key: value
470
        if tokens[position]&.type == :identifier && tokens[position + 1]&.type == :colon
15✔
471
          key = IR::Literal.new(value: tokens[position].value.to_sym, literal_type: :symbol)
8✔
472
          position += 2 # skip identifier and colon
8✔
473
        else
474
          # Parse key expression
475
          key_result = parse_expression(tokens, position)
7✔
476
          return key_result if key_result.failure?
7✔
477

478
          key = key_result.value
7✔
479
          position = key_result.position
7✔
480

481
          # Expect => or :
482
          return TokenParseResult.failure("Expected ':' or '=>' in hash pair", tokens, position) unless tokens[position]&.type == :colon
7✔
483

NEW
484
          position += 1
×
485

486
        end
487

488
        # Parse value expression
489
        value_result = parse_expression(tokens, position)
8✔
490
        return value_result if value_result.failure?
8✔
491

492
        pair = IR::HashPair.new(key: key, value: value_result.value)
8✔
493
        TokenParseResult.success(pair, tokens, value_result.position)
8✔
494
      end
495

496
      def parse_interpolated_string(tokens, position)
1✔
497
        # string_start token contains the opening quote
498
        position += 1
16✔
499

500
        parts = []
16✔
501

502
        while position < tokens.length
16✔
503
          token = tokens[position]
64✔
504

505
          case token.type
64✔
506
          when :string_content
507
            parts << IR::Literal.new(value: token.value, literal_type: :string)
25✔
508
            position += 1
25✔
509
          when :interpolation_start
510
            # Skip #{ and parse expression
511
            position += 1
23✔
512
            expr_result = parse_expression(tokens, position)
23✔
513
            return expr_result if expr_result.failure?
23✔
514

515
            parts << expr_result.value
23✔
516
            position = expr_result.position
23✔
517

518
            # Expect interpolation_end (})
519
            return TokenParseResult.failure("Expected '}'", tokens, position) unless tokens[position]&.type == :interpolation_end
23✔
520

521
            position += 1
23✔
522

523
          when :string_end
524
            position += 1
16✔
525
            break
16✔
526
          else
NEW
527
            return TokenParseResult.failure("Unexpected token in string: #{token.type}", tokens, position)
×
528
          end
529
        end
530

531
        # Create interpolated string node
532
        node = IR::InterpolatedString.new(parts: parts)
16✔
533
        TokenParseResult.success(node, tokens, position)
16✔
534
      end
535

536
      def keywords
1✔
NEW
537
        @keywords ||= TRuby::Scanner::KEYWORDS
×
538
      end
539
    end
540
  end
541
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc