• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

KenKundert / psf_utils / 19979590183

06 Dec 2025 12:10AM UTC coverage: 55.0% (-1.3%) from 56.25%
19979590183

Pull #25

github

web-flow
Merge 358fb8060 into 7208265ad
Pull Request #25: [AI Agent Test] ply compatible fast read, mainly implementated in parse.py

49 of 61 new or added lines in 2 files covered. (80.33%)

1 existing line in 1 file now uncovered.

363 of 660 relevant lines covered (55.0%)

1.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.49
/psf_utils/parse.py
1
"""
2
Parse ASCII PSF Files
3
"""
4

5
# License {{{1
6
# Copyright (C) 2016-2023 Kenneth S. Kundert
7
#
8
# This program is free software: you can redistribute it and/or modify
9
# it under the terms of the GNU General Public License as published by
10
# the Free Software Foundation, either version 3 of the License, or
11
# (at your option) any later version.
12
#
13
# This program is distributed in the hope that it will be useful,
14
# but WITHOUT ANY WARRANTY; without even the implied warranty of
15
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
# GNU General Public License for more details.
17
#
18
# You should have received a copy of the GNU General Public License
19
# along with this program.  If not, see http://www.gnu.org/licenses/.
20

21

22
# Imports {{{1
23
import ply.lex
2✔
24
import ply.yacc
2✔
25
from inform import Info, is_str, is_mapping
2✔
26
import numpy as np
2✔
27

28

29
# Globals {{{1
30
Filename = None
2✔
31

32

33
# Utility classes {{{1
34
class Type(Info):
2✔
35
    pass
2✔
36

37

38
class Struct(Info):
2✔
39
    pass
2✔
40

41

42
class Sweep(Info):
2✔
43
    pass
2✔
44

45

46
class Trace(Info):
2✔
47
    pass
2✔
48

49

50
class Traces(Info):
2✔
51
    pass
2✔
52

53

54
class Value(Info):
2✔
55
    pass
2✔
56

57

58
class Array(Info):
2✔
59
    pass
2✔
60

61

62
# Exceptions {{{1
63
# ParseError {{{2
64
class ParseError(Exception):
2✔
65
    def __init__(self, text, loc=None):
2✔
66
        self.text = text
×
67
        self.loc = loc
×
68

69
    def __str__(self):
2✔
70
        "Return a formatted error message."
71
        if self.loc:
×
72
            return self.loc.message(Filename, self.text)
×
73
        if Filename:
×
74
            return "%s: %s" % (Filename, self.text)
×
75
        return self.text
×
76

77

78
# Record the location of a token {{{2
79
class TokenLocation(object):
2✔
80
    def __init__(self, token, index=None):
2✔
81
        "Records information about the location of a token."
82
        lexdata = token.lexer.lexdata
×
83
        if index:
×
84
            lexpos = token.lexpos(index)
×
85
        else:
86
            lexpos = token.lexpos
×
87
        bol = lexdata.rfind('\n', 0, lexpos) + 1
×
88
        if bol < 0:
×
89
            bol = 0
×
90
        eol = lexdata.find('\n', lexpos)
×
91
        if eol < 0:
×
92
            eol = len(lexdata)
×
93
        self.line = lexdata[bol:eol]
×
94
        self.col = (lexpos - bol)
×
95
        self.row = lexdata[0:lexpos].count('\n') + 1
×
96

97
    def annotateLine(self, prefix=None):
2✔
98
        """
99
        Produces a two or three line result, possibly a prefix, then the line
100
        that contains the token, and then a pointer to the token.  If a prefix
101
        is given, it will be printed before the line and will be separated from
102
        the line by ': '.  Generally the prefix contains the filename and line
103
        number.
104
        """
105
        if prefix:
×
106
            return "%s\n    %s\n    %s^" % (
×
107
                prefix,
108
                self.line,
109
                self.col*' '
110
            )
111
        return "%s\n%s^" % (self.line, (self.col-1)*' ')
×
112

113
    def message(self, filename, msg):
2✔
114
        """
115
        Produces a message about the token.  The line containing the token is
116
        shown, along with a pointer to the token.  Finally the message is
117
        returned.
118
        """
119
        # if self.row:
120
        #     loc = "%s.%s" % (self.row, self.col)
121
        if self.row:
×
122
            loc = "%s" % (self.row)
×
123
        else:
124
            loc = ""
×
125
        if filename:
×
126
            loc = "%s(%s)" % (filename, loc)
×
127
        if loc:
×
128
            return self.annotateLine("%s: %s" % (loc, msg))
×
129
        return self.annotateLine(msg)
×
130

131

132
# Lexer {{{1
133
# Reserved name tokens {{{2
134
reserved = {rw: rw for rw in [
2✔
135
    'ARRAY',
136
    'BYTE',
137
    'COMPLEX',
138
    'DOUBLE',
139
    'END',
140
    'FLOAT',
141
    'GROUP',
142
    'HEADER',
143
    'INT',
144
    'LONG',
145
    'NAN',
146
    'PROP',
147
    'SINGLE',
148
    'STRING',
149
    'STRUCT',
150
    'SWEEP',
151
    'TRACE',
152
    'TYPE',
153
    # 'VALUE',  # Removed to handle manually
154
]}
155
tokens = [
2✔
156
    'INTEGER',
157
    'REAL',
158
    'QUOTED_STRING',
159
    'VALUE',
160
    'FAST_VALUES',
161
] + list(reserved.values())
162

163
# Literal tokens {{{2
164
literals = r'()*'
2✔
165

166
# Number tokens {{{2
167
t_INTEGER = r"-?[0-9]+"
2✔
168
real_w_fract = r"[+-]?[0-9]+\.[0-9]*([eE][+-][0-9]+)?"
2✔
169
real_w_exp = r"[+-]?[0-9]+(\.[0-9]*)?[eE][+-][0-9]+"
2✔
170
t_REAL = f'({real_w_fract})|({real_w_exp})'
2✔
171

172

173
# NaN must be given as a function rather than a simple string.
174
# Doing so causes Ply to give it priority over keyword recognition
175
# because it is defined first.
176
def t_NAN(t):
2✔
177
    r"nan|NaN|inf"
178
    t.value = float(t.value)
2✔
179
    return t
2✔
180

181

182
# String tokens {{{2
183
t_QUOTED_STRING = r'"([^\\\n"]|(\\.))*"'
2✔
184
    # The complexity is because of the case "He yelled \"You are a fool!\".".
185
    # The first part says string cannot contain a backslash, newline, or a
186
    # quote. The second case allows backslashes when combined with any other
187
    # character, which allows \" and \\.
188

189
# Special handling for VALUE to enable fast reading
190
def t_VALUE(t):
2✔
191
    r'VALUE'
192
    # Try to fast read the entire section
193
    # Look ahead for END
194
    lexdata = t.lexer.lexdata
2✔
195
    lexpos = t.lexer.lexpos
2✔
196
    
197
    # First, check if there are GROUP traces in the TRACE section
198
    # TRACE section comes before VALUE
199
    trace_start = lexdata.rfind('TRACE', 0, lexpos)
2✔
200
    if trace_start != -1:
2✔
201
        trace_section = lexdata[trace_start:lexpos]
2✔
202
        if 'GROUP' in trace_section:
2✔
203
            # GROUP traces have different VALUES format, can't fast parse
204
            t.type = 'VALUE'
2✔
205
            return t
2✔
206
    
207
    end_idx = lexdata.find('END', lexpos)
2✔
208
    
209
    if end_idx != -1:
2✔
210
        # Check if the content between VALUE and END is "simple"
211
        # i.e. no composite values (parentheses)
212
        
213
        section_content = lexdata[lexpos:end_idx]
2✔
214
        
215
        # Heuristic: if '(' is present, fallback to slow parsing
216
        # This handles complex/composite values
217
        if '(' in section_content:
2✔
218
             t.type = 'VALUE'
2✔
219
             return t
2✔
220
             
221
        # Try fast parsing with numpy
222
        try:
2✔
223
            tokens_list = section_content.split()
2✔
224
            if not tokens_list:
2✔
NEW
225
                t.type = 'VALUE'
×
NEW
226
                return t
×
227
                
228
            # Identify signals
229
            # "name" value "name" value ...
230
            # Find cycle length
231
            if len(tokens_list) < 2:
2✔
NEW
232
                 t.type = 'VALUE'
×
NEW
233
                 return t
×
234
                 
235
            first_name = tokens_list[0]
2✔
236
            cycle_len = 0
2✔
237
            for i in range(2, len(tokens_list), 2):
2✔
238
                if tokens_list[i] == first_name:
2✔
239
                    cycle_len = i // 2
2✔
240
                    break
2✔
241
            
242
            if cycle_len == 0:
2✔
243
                 t.type = 'VALUE'
2✔
244
                 return t
2✔
245
            
246
            names = [tok.strip('"') for tok in tokens_list[0:cycle_len*2:2]]
2✔
247
            
248
            total_tokens = len(tokens_list)
2✔
249
            num_rows = total_tokens // (2 * cycle_len)
2✔
250
            
251
            if num_rows == 0:
2✔
NEW
252
                t.type = 'VALUE'
×
NEW
253
                return t
×
254
                
255
            # Truncate to full cycles
256
            tokens_list = tokens_list[:num_rows * 2 * cycle_len]
2✔
257
            
258
            # Convert to numpy array
259
            # This is the critical speedup
260
            values = np.array(tokens_list[1::2], dtype=float)
2✔
261
            data = values.reshape((num_rows, cycle_len))
2✔
262
            
263
            # Success!
264
            # Return FAST_VALUES token
265
            t.type = 'FAST_VALUES'
2✔
266
            t.value = (names, data)
2✔
267
            
268
            # Update lexer position to skip the consumed content
269
            # We consumed up to end_idx.
270
            # But we didn't consume 'END'.
271
            # So lexpos should be end_idx.
272
            t.lexer.lexpos = end_idx
2✔
273
            
274
            return t
2✔
275
            
NEW
276
        except Exception:
×
277
            # Fallback on any error
NEW
278
            t.type = 'VALUE'
×
NEW
279
            return t
×
280
            
NEW
281
    t.type = 'VALUE'
×
NEW
282
    return t
×
283

284

285
# Identifier tokens {{{2
286
def t_ID(t):
2✔
287
    r'[A-Z]+'
288
    t.type = reserved.get(t.value)
2✔
289
    if t.type is None:
2✔
290
        loc = TokenLocation(t)
×
291
        t.lexer.skip(1)
×
292
        raise ParseError(f"unknown keyword '{t.value}'.", loc)
×
293
    return t
2✔
294

295

296
# Whitespace {{{2
297
# ignore whitespace
298
t_ignore = ' \t\n'
2✔
299

300

301
# Error {{{2
302
def t_error(t):
2✔
303
    c = t.value[0]
×
304
    loc = TokenLocation(t)
×
305
    t.lexer.skip(1)
×
306
    raise ParseError("illegal character '%s'." % c, loc)
×
307

308

309
# Parser rules {{{1
310
def p_contents(p):
2✔
311
    "contents : header_section type_section sweep_section trace_section value_section end"
312
    p[0] = (p[1], p[2], p[3], p[4], p[5])
2✔
313

314

315
def p_contents_without_sweep(p):
2✔
316
    "contents : header_section type_section value_section end"
317
    p[0] = (p[1], p[2], None, None, p[3])
2✔
318

319

320
def p_contents_only_header(p):
2✔
321
    "contents : header_section end"
322
    p[0] = (p[1], {}, None, None, {})
2✔
323

324

325
def p_header_section(p):
2✔
326
    "header_section : HEADER named_values"
327
    p[0] = dict(p[2])
2✔
328

329

330
def p_named_values(p):
2✔
331
    "named_values : named_values named_value"
332
    p[1].append(p[2])
2✔
333
    p[0] = p[1]
2✔
334

335

336
def p_named_values_last(p):
2✔
337
    "named_values : named_value"
338
    p[0] = [p[1]]
2✔
339

340

341
def p_named_value(p):
2✔
342
    "named_value : string value"
343
    p[0] = (p[1], p[2])
2✔
344

345

346
def p_string_value(p):
2✔
347
    "value : string"
348
    p[0] = p[1]
2✔
349

350

351
def p_integer_value(p):
2✔
352
    "value : INTEGER"
353
    p[0] = int(p[1])
2✔
354

355

356
def p_real_value(p):
2✔
357
    """
358
    value : REAL
359
          | NAN
360
    """
361
    p[0] = float(p[1])
2✔
362

363

364
def p_string(p):
2✔
365
    "string : QUOTED_STRING"
366
    p[0] = (p[1][1:-1]).replace('\\', '')
2✔
367

368

369
def p_star(p):
2✔
370
    "star : '*'"
371
    p[0] = p[1]
2✔
372

373

374
def p_type_section(p):
2✔
375
    "type_section : TYPE types"
376
    p[0] = dict(p[2])
2✔
377

378

379
def p_types(p):
2✔
380
    "types : types type"
381
    p[1].append(p[2])
2✔
382
    p[0] = p[1]
2✔
383

384

385
def p_types_last(p):
2✔
386
    "types : type"
387
    p[0] = [p[1]]
2✔
388

389

390
def p_type(p):
2✔
391
    "type : string kinds"
392
    n = p[1]
2✔
393
    meta = {}
2✔
394
    kind = ' '.join(s for s in p[2] if is_str(s)).lower()
2✔
395
    if kind:
2✔
396
        meta['kind'] = kind
2✔
397
    for each in p[2]:
2✔
398
        if is_mapping(each):
2✔
399
            meta.update(each)
2✔
400
        elif isinstance(each, Struct):
2✔
401
            meta['struct'] = each
2✔
402
    p[0] = (n, Type(name=n, **meta))
2✔
403

404

405
def p_kinds(p):
2✔
406
    "kinds : kinds kind"
407
    p[1].append(p[2])
2✔
408
    p[0] = p[1]
2✔
409

410

411
def p_kinds_last(p):
2✔
412
    "kinds : kind"
413
    p[0] = [p[1]]
2✔
414

415

416
def p_kind(p):
2✔
417
    """
418
        kind : FLOAT
419
             | DOUBLE
420
             | COMPLEX
421
             | INT
422
             | BYTE
423
             | LONG
424
             | SINGLE
425
             | STRING
426
             | array
427
             | struct
428
             | prop
429
             | star
430
    """
431
    p[0] = p[1]
2✔
432

433

434
def p_struct(p):
2✔
435
    "struct : STRUCT '(' types ')'"
436
    p[0] = Struct(types=dict(p[3]))
2✔
437

438

439
def p_array(p):
2✔
440
    "array : ARRAY '(' star ')'"
441
    p[0] = Array(members=p[3])
×
442

443

444
def p_prop(p):
2✔
445
    "prop : PROP '(' named_values ')'"
446
    p[0] = dict(p[3])
2✔
447

448

449
def p_sweep_section(p):
2✔
450
    "sweep_section : SWEEP sweeps"
451
    p[0] = p[2]
2✔
452

453

454
def p_sweeps(p):
2✔
455
    "sweeps : sweeps sweep"
456
    p[1].append(p[2])
×
457
    p[0] = p[1]
×
458

459

460
def p_sweeps_last(p):
2✔
461
    "sweeps : sweep"
462
    p[0] = [p[1]]
2✔
463

464

465
def p_sweep(p):
2✔
466
    "sweep : string string kinds"
467
    p[0] = Sweep(name=p[1], type=p[2], **p[3][0])
2✔
468

469

470
def p_trace_section(p):
2✔
471
    "trace_section : TRACE traces"
472
    # Partition out the groups from the traces.
473
    # A group will have one entry in the list of traces, and it will have a
474
    # corresponding entry in the groups directory.  The entry is also a
475
    # dictionary that maps the member name to the member type.
476
    traces = []
2✔
477
    groups = {}
2✔
478
    index = None
2✔
479
    for trace in p[2]:
2✔
480
        name = trace.name
2✔
481
        try:
2✔
482
            count = int(trace.type)
2✔
483
            index = 0
2✔
484
            group_name = name
2✔
485
            groups[group_name] = {}
2✔
486
            trace.type = 'GROUP'
2✔
487
            traces.append(trace)
2✔
488
            continue
2✔
489
        except ValueError:
2✔
490
            pass
2✔
491
        if index is None:
2✔
492
            traces.append(trace)
2✔
493
        else:
494
            groups[group_name][trace.name] = trace.type
2✔
495
            index += 1
2✔
496
            if index == count:
2✔
497
                index = None
2✔
498
    p[0] = (traces, groups)
2✔
499

500

501
def p_traces(p):
2✔
502
    "traces : traces trace"
503
    p[1].append(p[2])
2✔
504
    p[0] = p[1]
2✔
505

506

507
def p_traces_last(p):
2✔
508
    "traces : trace"
509
    p[0] = [p[1]]
2✔
510

511

512
def p_traces_empty(p):
2✔
513
    "traces : "
514
    p[0] = []
2✔
515

516

517
def p_trace(p):
2✔
518
    "trace : named_value"
519
    name, type = p[1]
2✔
520
    p[0] = Trace(name=name, type=type)
2✔
521

522

523
def p_group_trace(p):
2✔
524
    "trace : string GROUP INTEGER"
525
    name = p[1]
2✔
526
    count = p[3]
2✔
527
    p[0] = Trace(name=name, type=count)
2✔
528

529

530
def p_trace_with_props(p):
2✔
531
    "trace : named_value prop"
532
    # some psf files place a units property on terminal current traces,
533
    # but the information seems redundant and can be ignored.
534
    name, type = p[1]
2✔
535
    p[0] = Trace(name=name, type=type)
2✔
536

537

538
def p_value_section(p):
2✔
539
    "value_section : VALUE values"
540
    p[0] = p[2]
2✔
541

542
def p_value_section_fast(p):
2✔
543
    "value_section : FAST_VALUES"
544
    names, data = p[1]
2✔
545
    values = {}
2✔
546
    
547
    # Construct values dict
548
    # We need to handle escaped names here too?
549
    # The names come from fast reader which might have escapes.
550
    # But standard parser unescapes strings in p_string.
551
    # So we should probably unescape here to match standard parser output.
552
    
553
    for i, name in enumerate(names):
2✔
554
        clean_name = name.replace('\\', '')
2✔
555
        col = data[:, i]
2✔
556
        # Wrap in Value object. 
557
        # Note: Standard parser produces Value(type=..., values=[list])
558
        # We produce Value(values=numpy_array, is_fast=True)
559
        # We don't have type info here, but __init__ handles that.
560
        values[clean_name] = Value(values=col, is_fast=True)
2✔
561
        
562
    p[0] = values
2✔
563

564

565
def p_values(p):
2✔
566
    "values : values signal_value"
567
    if p[2][0] not in p[1]:
2✔
568
        p[1][p[2][0]] = Value(type=p[2][1], values=[p[2][2]])
2✔
569
    else:
570
        p[1][p[2][0]].values.append(p[2][2])
2✔
571
    p[0] = p[1]
2✔
572

573

574
def p_values_last(p):
2✔
575
    "values : signal_value"
576
    p[0] = {p[1][0]: Value(type=p[1][1], values=[p[1][2]])}
2✔
577

578

579
def p_named_signal_scalar(p):
2✔
580
    """
581
    signal_value : string numbers
582
    """
583
    p[0] = (p[1], None, p[2])
2✔
584

585

586
def p_named_signal_with_type(p):
2✔
587
    """
588
    signal_value : string string numbers
589
    """
590
    p[0] = (p[1], p[2], p[3])
2✔
591

592

593
def p_named_string_signal_with_type(p):
2✔
594
    """
595
    signal_value : string string string
596
    """
597
    p[0] = (p[1], p[2], p[3])
2✔
598

599

600
def p_numbers(p):
2✔
601
    "numbers : numbers number"
602
    p[1].append(p[2])
2✔
603
    p[0] = p[1]
2✔
604

605

606
def p_last_number(p):
2✔
607
    "numbers : number"
608
    p[0] = [p[1]]
2✔
609

610

611
def p_number(p):
2✔
612
    """
613
    number : simple_number
614
           | composite_number
615
    """
616
    p[0] = p[1]
2✔
617

618

619
def p_integer_number(p):
2✔
620
    "simple_number : INTEGER"
621
    p[0] = int(p[1])
2✔
622

623

624
def p_real_number(p):
2✔
625
    # props are redundant, so ignore them
626
    """
627
    simple_number : REAL
628
                  | REAL prop
629
                  | NAN
630
                  | NAN prop
631
    """
632
    p[0] = float(p[1])
2✔
633

634

635
def p_composite_number(p):
2✔
636
    """
637
    composite_number : '(' numbers ')'
638
                     | '(' numbers ')' prop
639
    """
640
    p[0] = tuple(p[2])
2✔
641

642

643
def p_end(p):
2✔
644
    "end : END"
645

646

647
# Error rule for syntax errors
648
def p_error(p):
2✔
649
    if p:
×
650
        loc = TokenLocation(p)
×
651
        raise ParseError("syntax error at '%s'." % (p.value), loc)
×
652
    else:
653
        raise ParseError("premature end of content.")
×
654

655

656
# ParsePSF class {{{1
657
class ParsePSF:
2✔
658
    def __init__(self):
2✔
659
        self.lexer = ply.lex.lex()
2✔
660
        self.parser = ply.yacc.yacc(write_tables=False, debug=False)
2✔
661

662
    def parse(self, filename, content):
2✔
663
        global Filename
664
        Filename = filename
2✔
665

666
        result = self.parser.parse(content, tracking=False, lexer=self.lexer)
2✔
667
        return result
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc