• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

spyoungtech / json-five / 5756472930

pending completion
5756472930

Pull #45

github

web-flow
Merge c21720058 into 700e89e7d
Pull Request #45: Model improvements

246 of 246 new or added lines in 6 files covered. (100.0%)

1209 of 1248 relevant lines covered (96.88%)

3.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.65
/json5/parser.py
1
from __future__ import annotations
4✔
2

3
import ast
4✔
4
import sys
4✔
5
import typing
4✔
6
from functools import lru_cache
4✔
7
from typing import Any
4✔
8
from typing import Literal
4✔
9
from typing import Protocol
4✔
10

11
import regex as re
4✔
12
from sly import Parser  # type: ignore
4✔
13
from sly.yacc import SlyLogger  # type: ignore
4✔
14

15
from .model import BlockComment
4✔
16
from .model import BooleanLiteral
4✔
17
from .model import Comment
4✔
18
from .model import DoubleQuotedString
4✔
19
from .model import Float
4✔
20
from .model import Identifier
4✔
21
from .model import Infinity
4✔
22
from .model import Integer
4✔
23
from .model import JSONArray
4✔
24
from .model import JSONObject
4✔
25
from .model import JSONText
4✔
26
from .model import KeyValuePair
4✔
27
from .model import LineComment
4✔
28
from .model import NaN
4✔
29
from .model import NullLiteral
4✔
30
from .model import SingleQuotedString
4✔
31
from .model import TrailingComma
4✔
32
from .model import UnaryOp
4✔
33
from .tokenizer import JSON5Token
4✔
34
from .tokenizer import JSONLexer
4✔
35
from .tokenizer import tokenize
4✔
36
from .utils import JSON5DecodeError
4✔
37

38

39
class QuietSlyLogger(SlyLogger):  # type: ignore[misc]
4✔
40
    def warning(self, *args: Any, **kwargs: Any) -> None:
4✔
41
        return
×
42

43
    debug = warning
4✔
44
    info = warning
4✔
45

46

47
ESCAPE_SEQUENCES = {
4✔
48
    'b': '\u0008',
49
    'f': '\u000C',
50
    'n': '\u000A',
51
    'r': '\u000D',
52
    't': '\u0009',
53
    'v': '\u000B',
54
    '0': '\u0000',
55
    '\\': '\u005c',
56
    '"': '\u0022',
57
    "'": '\u0027',
58
}
59

60
# class TrailingComma:
61
#     pass
62

63

64
def replace_escape_literals(matchobj: re.Match[str]) -> str:
4✔
65
    s = matchobj.group(0)
4✔
66
    if s.startswith('\\0') and len(s) == 3:
4✔
67
        raise JSON5DecodeError("'\\0' MUST NOT be followed by a decimal digit", None)
4✔
68
    seq = matchobj.group(1)
4✔
69
    return ESCAPE_SEQUENCES.get(seq, seq)
4✔
70

71

72
@lru_cache(maxsize=1024)
4✔
73
def _latin_escape_replace(s: str) -> str:
4✔
74
    if s.startswith('\\x') and len(s) != 4:
4✔
75
        raise JSON5DecodeError("'\\x' MUST be followed by two hexadecimal digits", None)
4✔
76
    val: str = ast.literal_eval(f'"{s}"')
4✔
77
    if val == '\\':
4✔
78
        val = '\\\\'  # this is important; the subsequent regex will sub it back to \\
4✔
79
    return val
4✔
80

81

82
def latin_unicode_escape_replace(matchobj: re.Match[str]) -> str:
4✔
83
    s = matchobj.group(0)
4✔
84
    return _latin_escape_replace(s)
4✔
85

86

87
def _unicode_escape_replace(s: str) -> str:
4✔
88
    ret: str = ast.literal_eval(f'"{s}"')
4✔
89
    return ret
4✔
90

91

92
def unicode_escape_replace(matchobj: re.Match[str]) -> str:
4✔
93
    s = matchobj.group(0)
4✔
94
    return _unicode_escape_replace(s)
4✔
95

96

97
class T_TokenSlice(Protocol):
4✔
98
    def __getitem__(self, item: int) -> JSON5Token:
4✔
99
        ...
×
100

101

102
class T_AnyProduction(Protocol):
4✔
103
    _slice: T_TokenSlice
4✔
104

105

106
class T_TextProduction(Protocol):
4✔
107
    wsc0: list[Comment | str]
4✔
108
    wsc1: list[Comment | str]
4✔
109
    value: Value
4✔
110

111
    def __getitem__(self, i: Literal[1]) -> Value:
4✔
112
        ...
×
113

114

115
class T_FirstKeyValuePairProduction(Protocol):
4✔
116
    wsc0: list[Comment | str]
4✔
117
    wsc1: list[Comment | str]
4✔
118
    wsc2: list[Comment | str]
4✔
119
    key: Key
4✔
120
    value: Value
4✔
121
    _slice: T_TokenSlice
4✔
122

123
    def __getitem__(self, item: int) -> Key | Value:
4✔
124
        ...
×
125

126

127
class T_WSCProduction(Protocol):
4✔
128
    _slice: T_TokenSlice
4✔
129

130
    def __getitem__(self, item: Literal[0]) -> str | Comment:
4✔
131
        ...
×
132

133

134
class T_CommentProduction(Protocol):
4✔
135
    _slice: T_TokenSlice
4✔
136

137
    def __getitem__(self, item: Literal[0]) -> str:
4✔
138
        ...
×
139

140

141
class T_KeyValuePairsProduction(Protocol):
4✔
142
    _slice: T_TokenSlice
4✔
143
    first_key_value_pair: KeyValuePair
4✔
144
    subsequent_key_value_pair: list[KeyValuePair]
4✔
145

146

147
class T_JsonObjectProduction(Protocol):
4✔
148
    _slice: T_TokenSlice
4✔
149
    key_value_pairs: tuple[list[KeyValuePair], TrailingComma | None] | None
4✔
150
    wsc: list[Comment | str]
4✔
151

152

153
class SubsequentKeyValuePairProduction(Protocol):
4✔
154
    _slice: T_TokenSlice
4✔
155
    wsc: list[Comment | str]
4✔
156
    first_key_value_pair: KeyValuePair | None
4✔
157

158

159
class T_FirstArrayValueProduction(Protocol):
4✔
160
    _slice: T_TokenSlice
4✔
161

162
    def __getitem__(self, item: Literal[1]) -> Value:
4✔
163
        ...
×
164

165
    wsc: list[Comment | str]
4✔
166

167

168
class T_SubsequentArrayValueProduction(Protocol):
4✔
169
    _slice: T_TokenSlice
4✔
170
    first_array_value: Value | None
4✔
171
    wsc: list[Comment | str]
4✔
172

173

174
class T_ArrayValuesProduction(Protocol):
4✔
175
    _slice: T_TokenSlice
4✔
176
    first_array_value: Value
4✔
177
    subsequent_array_value: list[Value]
4✔
178

179

180
class T_JsonArrayProduction(Protocol):
4✔
181
    _slice: T_TokenSlice
4✔
182
    array_values: tuple[list[Value], TrailingComma | None] | None
4✔
183
    wsc: list[Comment | str]
4✔
184

185

186
class T_IdentifierProduction(Protocol):
4✔
187
    _slice: T_TokenSlice
4✔
188

189
    def __getitem__(self, item: Literal[0]) -> str:
4✔
190
        ...
×
191

192

193
class T_KeyProduction(Protocol):
4✔
194
    def __getitem__(self, item: Literal[1]) -> Identifier | DoubleQuotedString | SingleQuotedString:
4✔
195
        ...
×
196

197

198
class T_NumberProduction(Protocol):
4✔
199
    _slice: T_TokenSlice
4✔
200

201
    def __getitem__(self, item: Literal[0]) -> str:
4✔
202
        ...
×
203

204

205
class T_ValueNumberProduction(Protocol):
4✔
206
    _slice: T_TokenSlice
4✔
207
    number: Infinity | NaN | Float | Integer
4✔
208

209

210
class T_ExponentNotationProduction(Protocol):
4✔
211
    _slice: T_TokenSlice
4✔
212

213
    def __getitem__(self, item: int) -> str:
4✔
214
        ...
×
215

216

217
class T_StringTokenProduction(Protocol):
4✔
218
    _slice: T_TokenSlice
4✔
219

220
    def __getitem__(self, item: Literal[0]) -> str:
4✔
221
        ...
×
222

223

224
class T_StringProduction(Protocol):
4✔
225
    _slice: T_TokenSlice
4✔
226

227
    def __getitem__(self, item: Literal[0]) -> DoubleQuotedString | SingleQuotedString:
4✔
228
        ...
×
229

230

231
class T_ValueProduction(Protocol):
4✔
232
    _slice: T_TokenSlice
4✔
233

234
    def __getitem__(
4✔
235
        self, item: Literal[0]
236
    ) -> (
237
        DoubleQuotedString
238
        | SingleQuotedString
239
        | JSONObject
240
        | JSONArray
241
        | BooleanLiteral
242
        | NullLiteral
243
        | Infinity
244
        | Integer
245
        | Float
246
        | NaN
247
    ):
248
        ...
×
249

250

251
T_CallArg = typing.TypeVar('T_CallArg')
4✔
252
_: typing.Callable[..., typing.Callable[[T_CallArg], T_CallArg]]
4✔
253

254

255
class JSONParser(Parser):  # type: ignore[misc]
4✔
256
    # debugfile = 'parser.out'
257
    tokens = JSONLexer.tokens
4✔
258
    log = QuietSlyLogger(sys.stderr)
4✔
259

260
    def __init__(self, *args: Any, **kwargs: Any):
4✔
261
        super().__init__(*args, **kwargs)
4✔
262
        self.errors: list[JSON5DecodeError] = []
4✔
263
        self.last_token: JSON5Token | None = None
4✔
264
        self.seen_tokens: list[JSON5Token] = []
4✔
265
        self.expecting: list[list[str]] = []
4✔
266

267
    @_('{ wsc } value { wsc }')
4✔
268
    def text(self, p: T_TextProduction) -> JSONText:
4✔
269
        node = JSONText(value=p[1], tok=p.value._tok)
4✔
270
        for wsc in p.wsc0:
4✔
271
            node.wsc_before.append(wsc)
4✔
272
        for wsc in p.wsc1:
4✔
273
            node.wsc_after.append(wsc)
4✔
274
        return node
4✔
275

276
    @_('key { wsc } seen_colon COLON { wsc } object_value_seen value { wsc }')
4✔
277
    def first_key_value_pair(self, p: T_FirstKeyValuePairProduction) -> KeyValuePair:
4✔
278
        key = p[0]
4✔
279
        for wsc in p.wsc0:
4✔
280
            key.wsc_after.append(wsc)
4✔
281
        value = p[6]
4✔
282
        for wsc in p.wsc1:
4✔
283
            value.wsc_before.append(wsc)
4✔
284
        for wsc in p.wsc2:
4✔
285
            value.wsc_after.append(wsc)
4✔
286
        return KeyValuePair(key=p.key, value=p.value)
4✔
287

288
    @_('object_delimiter_seen COMMA { wsc } [ first_key_value_pair ]')
4✔
289
    def subsequent_key_value_pair(self, p: SubsequentKeyValuePairProduction) -> KeyValuePair | TrailingComma:
4✔
290
        node: KeyValuePair | TrailingComma
291
        if p.first_key_value_pair:
4✔
292
            node = p.first_key_value_pair
4✔
293
            for wsc in p.wsc:
4✔
294
                node.key.wsc_before.append(wsc)
4✔
295
        else:
296
            node = TrailingComma(tok=p._slice[1])
4✔
297
            for wsc in p.wsc:
4✔
298
                node.wsc_after.append(wsc)
4✔
299
        return node
4✔
300

301
    @_('WHITESPACE', 'comment')
4✔
302
    def wsc(self, p: T_WSCProduction) -> str | Comment:
4✔
303
        return p[0]
4✔
304

305
    @_('BLOCK_COMMENT')
4✔
306
    def comment(self, p: T_CommentProduction) -> BlockComment:
4✔
307
        return BlockComment(p[0], tok=p._slice[0])
4✔
308

309
    @_('LINE_COMMENT')  # type: ignore[no-redef]
4✔
310
    def comment(self, p: T_CommentProduction):
4✔
311
        return LineComment(p[0], tok=p._slice[0])
4✔
312

313
    @_('first_key_value_pair { subsequent_key_value_pair }')
4✔
314
    def key_value_pairs(self, p: T_KeyValuePairsProduction) -> tuple[list[KeyValuePair], TrailingComma | None]:
4✔
315
        ret = [
4✔
316
            p.first_key_value_pair,
317
        ]
318
        num_sqvp = len(p.subsequent_key_value_pair)
4✔
319
        for index, value in enumerate(p.subsequent_key_value_pair):
4✔
320
            if isinstance(value, TrailingComma):
4✔
321
                if index + 1 != num_sqvp:
4✔
322
                    offending_token = value._tok
4✔
323
                    self.errors.append(JSON5DecodeError("Syntax Error: multiple trailing commas", offending_token))
4✔
324
                return ret, value
4✔
325
            else:
326
                ret.append(value)
4✔
327
        return ret, None
4✔
328

329
    @_('')
4✔
330
    def seen_LBRACE(self, p: Any) -> None:
4✔
331
        self.expecting.append(['RBRACE', 'key'])
4✔
332

333
    @_('')
4✔
334
    def seen_key(self, p: Any) -> None:
4✔
335
        self.expecting.pop()
4✔
336
        self.expecting.append(['COLON'])
4✔
337

338
    @_('')
4✔
339
    def seen_colon(self, p: Any) -> None:
4✔
340
        self.expecting.pop()
4✔
341
        self.expecting.append(['value'])
4✔
342

343
    @_('')
4✔
344
    def object_value_seen(self, p: Any) -> None:
4✔
345
        self.expecting.pop()
4✔
346
        self.expecting.append(['COMMA', 'RBRACE'])
4✔
347

348
    @_('')
4✔
349
    def object_delimiter_seen(self, p: Any) -> None:
4✔
350
        self.expecting.pop()
4✔
351
        self.expecting.append(['RBRACE', 'key'])
4✔
352

353
    @_('')
4✔
354
    def seen_RBRACE(self, p: Any) -> None:
4✔
355
        self.expecting.pop()
4✔
356

357
    @_('seen_LBRACE LBRACE { wsc } [ key_value_pairs ] seen_RBRACE RBRACE')
4✔
358
    def json_object(self, p: T_JsonObjectProduction) -> JSONObject:
4✔
359
        if not p.key_value_pairs:
4✔
360
            node = JSONObject(leading_wsc=list(p.wsc or []), tok=p._slice[1], end_tok=p._slice[5])
4✔
361
        else:
362
            kvps, trailing_comma = p.key_value_pairs
4✔
363
            node = JSONObject(
4✔
364
                *kvps,
365
                trailing_comma=trailing_comma,
366
                leading_wsc=list(p.wsc or []),
367
                tok=p._slice[1],
368
                end_tok=p._slice[5],
369
            )
370

371
        return node
4✔
372

373
    @_('array_value_seen value { wsc }')
4✔
374
    def first_array_value(self, p: T_FirstArrayValueProduction) -> Value:
4✔
375
        node = p[1]
4✔
376
        for wsc in p.wsc:
4✔
377
            node.wsc_after.append(wsc)
4✔
378
        return node
4✔
379

380
    @_('array_delimiter_seen COMMA { wsc } [ first_array_value ]')
4✔
381
    def subsequent_array_value(self, p: T_SubsequentArrayValueProduction) -> Value | TrailingComma:
4✔
382
        node: Value | TrailingComma
383
        if p.first_array_value:
4✔
384
            node = p.first_array_value
4✔
385
            for wsc in p.wsc:
4✔
386
                node.wsc_before.append(wsc)
4✔
387
        else:
388
            node = TrailingComma(tok=p._slice[1])
4✔
389
            for wsc in p.wsc:
4✔
390
                node.wsc_after.append(wsc)
4✔
391
        return node
4✔
392

393
    @_('first_array_value { subsequent_array_value }')
4✔
394
    def array_values(self, p: T_ArrayValuesProduction) -> tuple[list[Value], TrailingComma | None]:
4✔
395
        ret = [
4✔
396
            p.first_array_value,
397
        ]
398
        num_values = len(p.subsequent_array_value)
4✔
399
        for index, value in enumerate(p.subsequent_array_value):
4✔
400
            if isinstance(value, TrailingComma):
4✔
401
                if index + 1 != num_values:
4✔
402
                    self.errors.append(JSON5DecodeError("Syntax Error: multiple trailing commas", value._tok))
4✔
403
                    return ret, value
4✔
404
                return ret, value
4✔
405
            else:
406
                ret.append(value)
4✔
407
        return ret, None
4✔
408

409
    @_('seen_LBRACKET LBRACKET { wsc } [ array_values ] seen_RBRACKET RBRACKET')
4✔
410
    def json_array(self, p: T_JsonArrayProduction) -> JSONArray:
4✔
411
        if not p.array_values:
4✔
412
            node = JSONArray(tok=p._slice[1], end_tok=p._slice[5])
4✔
413
        else:
414
            values, trailing_comma = p.array_values
4✔
415
            node = JSONArray(*values, trailing_comma=trailing_comma, tok=p._slice[1], end_tok=p._slice[5])
4✔
416

417
        for wsc in p.wsc:
4✔
418
            node.leading_wsc.append(wsc)
4✔
419

420
        return node
4✔
421

422
    @_('')
4✔
423
    def seen_LBRACKET(self, p: Any) -> None:
4✔
424
        self.expecting.append(['RBRACKET', 'value'])
4✔
425

426
    @_('')
4✔
427
    def seen_RBRACKET(self, p: Any) -> None:
4✔
428
        self.expecting.pop()
4✔
429

430
    @_('')
4✔
431
    def array_delimiter_seen(self, p: Any) -> None:
4✔
432
        assert len(self.expecting[-1]) == 2
4✔
433
        self.expecting[-1].pop()
4✔
434
        self.expecting[-1].append('value')
4✔
435

436
    @_('')
4✔
437
    def array_value_seen(self, p: Any) -> None:
4✔
438
        assert len(self.expecting[-1]) == 2
4✔
439
        assert self.expecting[-1][-1] == 'value'
4✔
440
        self.expecting[-1].pop()
4✔
441
        self.expecting[-1].append('COMMA')
4✔
442

443
    @_('NAME')
4✔
444
    def identifier(self, p: T_IdentifierProduction) -> Identifier:
4✔
445
        raw_value = p[0]
4✔
446
        name = re.sub(r'\\u[0-9a-fA-F]{4}', unicode_escape_replace, raw_value)
4✔
447
        pattern = r'[\w_\$]([\w_\d\$\p{Pc}\p{Mn}\p{Mc}\u200C\u200D])*'
4✔
448
        if not re.fullmatch(pattern, name):
4✔
449
            self.errors.append(JSON5DecodeError("Invalid identifier name", p._slice[0]))
4✔
450
        return Identifier(name=name, raw_value=raw_value, tok=p._slice[0])
4✔
451

452
    @_('seen_key identifier', 'seen_key string')
4✔
453
    def key(self, p: T_KeyProduction) -> Identifier | DoubleQuotedString | SingleQuotedString:
4✔
454
        node = p[1]
4✔
455
        return node
4✔
456

457
    @_('INTEGER')
4✔
458
    def number(self, p: T_NumberProduction):
4✔
459
        return Integer(p[0], tok=p._slice[0])
4✔
460

461
    @_('FLOAT')  # type: ignore[no-redef]
4✔
462
    def number(self, p: T_NumberProduction):
4✔
463
        return Float(p[0], tok=p._slice[0])
4✔
464

465
    @_('OCTAL')  # type: ignore[no-redef]
4✔
466
    def number(self, p: T_NumberProduction):
4✔
467
        self.errors.append(JSON5DecodeError("Invalid integer literal. Octals are not allowed", p._slice[0]))
4✔
468
        raw_value = p[0]
4✔
469
        if re.search(r'[89]+', raw_value):
4✔
470
            self.errors.append(JSON5DecodeError("Invalid octal format. Octal digits must be in range 0-7", p._slice[0]))
4✔
471
            return Integer(raw_value=oct(0), is_octal=True, tok=p._slice[0])
4✔
472
        return Integer(raw_value, is_octal=True, tok=p._slice[0])
4✔
473

474
    @_('INFINITY')  # type: ignore[no-redef]
4✔
475
    def number(self, p: T_AnyProduction) -> Infinity:
4✔
476
        return Infinity(tok=p._slice[0])
4✔
477

478
    @_('NAN')  # type: ignore[no-redef]
4✔
479
    def number(self, p: T_AnyProduction) -> NaN:
4✔
480
        return NaN(tok=p._slice[0])
4✔
481

482
    @_('MINUS number')
4✔
483
    def value(self, p: T_ValueNumberProduction) -> UnaryOp:
4✔
484
        if isinstance(p.number, Infinity):
4✔
485
            p.number.negative = True
4✔
486
        node = UnaryOp(op='-', value=p.number, tok=p._slice[0], end_tok=p.number._end_tok)
4✔
487
        return node
4✔
488

489
    @_('PLUS number')  # type: ignore[no-redef]
4✔
490
    def value(self, p: T_ValueNumberProduction):
4✔
491
        node = UnaryOp(op='+', value=p.number, tok=p._slice[0], end_tok=p.number._end_tok)
4✔
492
        return node
4✔
493

494
    @_('INTEGER EXPONENT', 'FLOAT EXPONENT')  # type: ignore[no-redef]
4✔
495
    def number(self, p: T_ExponentNotationProduction) -> Float:
4✔
496
        exp_notation = p[1][0]  # e or E
4✔
497
        return Float(p[0] + p[1], exp_notation=exp_notation, tok=p._slice[0], end_tok=p._slice[1])
4✔
498

499
    @_('HEXADECIMAL')  # type: ignore[no-redef]
4✔
500
    def number(self, p: T_NumberProduction) -> Integer:
4✔
501
        return Integer(p[0], is_hex=True, tok=p._slice[0])
4✔
502

503
    @_('DOUBLE_QUOTE_STRING')
4✔
504
    def double_quoted_string(self, p: T_StringTokenProduction) -> DoubleQuotedString:
4✔
505
        raw_value = p[0]
4✔
506
        contents = raw_value[1:-1]
4✔
507
        terminator_in_string = re.search(r'(?<!\\)([\u000D\u2028\u2029]|(?<!\r)\n)', contents)
4✔
508
        if terminator_in_string:
4✔
509
            end = terminator_in_string.span()[0]
4✔
510
            before_terminator = terminator_in_string.string[:end]
4✔
511
            tok = p._slice[0]
4✔
512
            pos = tok.index + len(before_terminator)
4✔
513
            doc = tok.doc
4✔
514
            lineno = doc.count('\n', 0, pos) + 1
4✔
515
            colno = pos - doc.rfind('\n', 0, pos) + 1
4✔
516
            index = pos + 1
4✔
517
            errmsg = f"Illegal line terminator (line {lineno} column {colno} (char {index}) without continuation"
4✔
518
            self.errors.append(JSON5DecodeError(errmsg, tok))
4✔
519
        contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
4✔
520
        try:
4✔
521
            contents = re.sub(r'(\\x[a-fA-F0-9]{0,2}|\\u[0-9a-fA-F]{4})', latin_unicode_escape_replace, contents)
4✔
522
        except JSON5DecodeError as exc:
4✔
523
            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
4✔
524
        try:
4✔
525
            contents = re.sub(r'\\(0\d|.)', replace_escape_literals, contents)
4✔
526
        except JSON5DecodeError as exc:
4✔
527
            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
4✔
528
        return DoubleQuotedString(contents, raw_value=raw_value, tok=p._slice[0])
4✔
529

530
    @_("SINGLE_QUOTE_STRING")
4✔
531
    def single_quoted_string(self, p: T_StringTokenProduction) -> SingleQuotedString:
4✔
532
        raw_value = p[0]
4✔
533
        contents = raw_value[1:-1]
4✔
534
        terminator_in_string = re.search(r'(?<!\\)([\u000D\u2028\u2029]|(?<!\r)\n)', contents)
4✔
535
        if terminator_in_string:
4✔
536
            end = terminator_in_string.span()[0]
4✔
537
            before_terminator = terminator_in_string.string[:end]
4✔
538
            tok = p._slice[0]
4✔
539
            pos = tok.index + len(before_terminator)
4✔
540
            doc = tok.doc
4✔
541
            lineno = doc.count('\n', 0, pos) + 1
4✔
542
            colno = pos - doc.rfind('\n', 0, pos) + 1
4✔
543
            index = pos + 1
4✔
544
            errmsg = f"Illegal line terminator (line {lineno} column {colno} (char {index}) without continuation"
4✔
545
            self.errors.append(JSON5DecodeError(errmsg, tok))
4✔
546
        contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
4✔
547
        try:
4✔
548
            contents = re.sub(r'(\\x[a-fA-F0-9]{0,2}|\\u[0-9a-fA-F]{4})', latin_unicode_escape_replace, contents)
4✔
549
        except JSON5DecodeError as exc:
4✔
550
            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
4✔
551
        try:
4✔
552
            contents = re.sub(r'\\(0\d|.)', replace_escape_literals, contents)
4✔
553
        except JSON5DecodeError as exc:
4✔
554
            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
4✔
555
        return SingleQuotedString(contents, raw_value=raw_value, tok=p._slice[0])
4✔
556

557
    @_('double_quoted_string', 'single_quoted_string')
4✔
558
    def string(self, p: T_StringProduction) -> SingleQuotedString | DoubleQuotedString:
4✔
559
        return p[0]
4✔
560

561
    @_('TRUE')
4✔
562
    def boolean(self, p: T_AnyProduction) -> BooleanLiteral:
4✔
563
        return BooleanLiteral(True, tok=p._slice[0])
4✔
564

565
    @_('FALSE')  # type: ignore[no-redef]
4✔
566
    def boolean(self, p: T_AnyProduction) -> BooleanLiteral:
4✔
567
        return BooleanLiteral(False, tok=p._slice[0])
4✔
568

569
    @_('NULL')
4✔
570
    def null(self, p: T_AnyProduction) -> NullLiteral:
4✔
571
        return NullLiteral(tok=p._slice[0])
4✔
572

573
    @_(  # type: ignore[no-redef]
4✔
574
        'string',
575
        'json_object',
576
        'json_array',
577
        'boolean',
578
        'null',
579
        'number',
580
    )
581
    def value(
4✔
582
        self, p: T_ValueProduction
583
    ) -> (
584
        DoubleQuotedString
585
        | SingleQuotedString
586
        | JSONObject
587
        | JSONArray
588
        | BooleanLiteral
589
        | NullLiteral
590
        | Infinity
591
        | Integer
592
        | Float
593
        | NaN
594
    ):
595
        node = p[0]
4✔
596
        return node
4✔
597

598
    @_('UNTERMINATED_SINGLE_QUOTE_STRING', 'UNTERMINATED_DOUBLE_QUOTE_STRING')  # type: ignore[no-redef]
4✔
599
    def string(self, p: T_StringTokenProduction) -> SingleQuotedString | DoubleQuotedString:
4✔
600
        self.error(p._slice[0])
4✔
601
        raw = p[0]
4✔
602
        if raw.startswith('"'):
4✔
603
            return DoubleQuotedString(raw[1:], raw_value=raw, tok=p._slice[0])
4✔
604
        return SingleQuotedString(raw[1:], raw_value=raw, tok=p._slice[0])
4✔
605

606
    def error(self, token: JSON5Token | None) -> JSON5Token | None:
4✔
607
        if token:
4✔
608
            if self.expecting:
4✔
609
                expected = self.expecting[-1]
4✔
610

611
                message = f"Syntax Error. Was expecting {' or '.join(expected)}"
4✔
612
            else:
613
                message = 'Syntax Error'
×
614

615
            self.errors.append(JSON5DecodeError(message, token))
4✔
616
            try:
4✔
617
                return next(self.tokens)  # type: ignore
4✔
618
            except StopIteration:
4✔
619
                # EOF
620
                class tok:
4✔
621
                    type = '$end'
4✔
622
                    value = None
4✔
623
                    lineno = None
4✔
624
                    index = None
4✔
625
                    end = None
4✔
626

627
                return JSON5Token(tok(), None)  # type: ignore[arg-type]
4✔
628
        elif self.last_token:
4✔
629
            doc = self.last_token.doc
4✔
630
            pos = len(doc)
4✔
631
            lineno = doc.count('\n', 0, pos) + 1
4✔
632
            colno = pos - doc.rfind('\n', 0, pos)
4✔
633
            message = f'Expecting value. Unexpected EOF at: ' f'line {lineno} column {colno} (char {pos})'
4✔
634
            if self.expecting:
4✔
635
                expected = self.expecting[-1]
4✔
636
                message += f'. Was expecting {f" or ".join(expected)}'
4✔
637
            self.errors.append(JSON5DecodeError(message, None))
4✔
638
        else:
639
            #  Empty file
640
            self.errors.append(JSON5DecodeError('Expecting value. Received unexpected EOF', None))
4✔
641
        return None
4✔
642

643
    def _token_gen(self, tokens: typing.Iterable[JSON5Token]) -> typing.Generator[JSON5Token, None, None]:
4✔
644
        for tok in tokens:
4✔
645
            self.last_token = tok
4✔
646
            self.seen_tokens.append(tok)
4✔
647
            yield tok
4✔
648

649
    def parse(self, tokens: typing.Iterable[JSON5Token]) -> JSONText:
4✔
650
        tokens = self._token_gen(tokens)
4✔
651
        model: JSONText = super().parse(tokens)
4✔
652
        if self.errors:
4✔
653
            if len(self.errors) > 1:
4✔
654
                primary_error = self.errors[0]
4✔
655
                msg = (
4✔
656
                    "There were multiple errors parsing the JSON5 document.\n"
657
                    "The primary error was: \n\t{}\n"
658
                    "Additionally, the following errors were also detected:\n\t{}"
659
                )
660

661
                num_additional_errors = len(self.errors) - 1
4✔
662
                additional_errors = '\n\t'.join(err.args[0] for err in self.errors[1:6])
4✔
663
                if num_additional_errors > 5:
4✔
664
                    additional_errors += f'\n\t{num_additional_errors - 5} additional error(s) truncated'
×
665
                msg = msg.format(primary_error.args[0], additional_errors)
4✔
666
                err = JSON5DecodeError(msg, None)
4✔
667
                err.lineno = primary_error.lineno
4✔
668
                err.token = primary_error.token
4✔
669
                err.index = primary_error.index
4✔
670
                raise err
4✔
671
            else:
672
                raise self.errors[0]
4✔
673
        return model
4✔
674

675

676
def parse_tokens(raw_tokens: typing.Iterable[JSON5Token]) -> JSONText:
4✔
677
    parser = JSONParser()
4✔
678
    return parser.parse(raw_tokens)
4✔
679

680

681
def parse_source(text: str) -> JSONText:
4✔
682
    tokens = tokenize(text)
4✔
683
    model = parse_tokens(tokens)
4✔
684
    return model
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc