• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

spyoungtech / json-five / 10567855230

26 Aug 2024 09:53PM UTC coverage: 97.125% (-1.0%) from 98.133%
10567855230

push

github

web-flow
[pre-commit.ci] pre-commit autoupdate

updates:
- [github.com/pre-commit/pre-commit-hooks: v4.5.0 → v4.6.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.5.0...v4.6.0)
- [github.com/asottile/reorder-python-imports: v3.12.0 → v3.13.0](https://github.com/asottile/reorder-python-imports/compare/v3.12.0...v3.13.0)
- [github.com/psf/black: 23.10.1 → 24.8.0](https://github.com/psf/black/compare/23.10.1...24.8.0)
- [github.com/asottile/pyupgrade: v3.15.0 → v3.17.0](https://github.com/asottile/pyupgrade/compare/v3.15.0...v3.17.0)
- [github.com/pre-commit/mirrors-mypy: v1.6.1 → v1.11.2](https://github.com/pre-commit/mirrors-mypy/compare/v1.6.1...v1.11.2)
- [github.com/pycqa/flake8: 6.1.0 → 7.1.1](https://github.com/pycqa/flake8/compare/6.1.0...7.1.1)

1216 of 1252 relevant lines covered (97.12%)

3.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.66
/json5/parser.py
1
from __future__ import annotations
4✔
2

3
import ast
4✔
4
import sys
4✔
5
import typing
4✔
6
from functools import lru_cache
4✔
7
from typing import Any
4✔
8
from typing import Literal
4✔
9
from typing import Protocol
4✔
10

11
import regex as re
4✔
12
from sly import Parser  # type: ignore
4✔
13
from sly.yacc import SlyLogger  # type: ignore
4✔
14

15
from .model import BlockComment
4✔
16
from .model import BooleanLiteral
4✔
17
from .model import Comment
4✔
18
from .model import DoubleQuotedString
4✔
19
from .model import Float
4✔
20
from .model import Identifier
4✔
21
from .model import Infinity
4✔
22
from .model import Integer
4✔
23
from .model import JSONArray
4✔
24
from .model import JSONObject
4✔
25
from .model import JSONText
4✔
26
from .model import Key
4✔
27
from .model import KeyValuePair
4✔
28
from .model import LineComment
4✔
29
from .model import NaN
4✔
30
from .model import NullLiteral
4✔
31
from .model import SingleQuotedString
4✔
32
from .model import TrailingComma
4✔
33
from .model import UnaryOp
4✔
34
from .model import Value
4✔
35
from .tokenizer import JSON5Token
4✔
36
from .tokenizer import JSONLexer
4✔
37
from .tokenizer import tokenize
4✔
38
from .utils import JSON5DecodeError
4✔
39

40

41
class QuietSlyLogger(SlyLogger):  # type: ignore[misc]
4✔
42
    def warning(self, *args: Any, **kwargs: Any) -> None:
4✔
43
        return
×
44

45
    debug = warning
4✔
46
    info = warning
4✔
47

48

49
ESCAPE_SEQUENCES = {
4✔
50
    'b': '\u0008',
51
    'f': '\u000C',
52
    'n': '\u000A',
53
    'r': '\u000D',
54
    't': '\u0009',
55
    'v': '\u000B',
56
    '0': '\u0000',
57
    '\\': '\u005c',
58
    '"': '\u0022',
59
    "'": '\u0027',
60
}
61

62
# class TrailingComma:
63
#     pass
64

65

66
def replace_escape_literals(matchobj: re.Match[str]) -> str:
4✔
67
    s = matchobj.group(0)
4✔
68
    if s.startswith('\\0') and len(s) == 3:
4✔
69
        raise JSON5DecodeError("'\\0' MUST NOT be followed by a decimal digit", None)
4✔
70
    seq = matchobj.group(1)
4✔
71
    return ESCAPE_SEQUENCES.get(seq, seq)
4✔
72

73

74
@lru_cache(maxsize=1024)
4✔
75
def _latin_escape_replace(s: str) -> str:
4✔
76
    if s.startswith('\\x') and len(s) != 4:
4✔
77
        raise JSON5DecodeError("'\\x' MUST be followed by two hexadecimal digits", None)
4✔
78
    val: str = ast.literal_eval(f'"{s}"')
4✔
79
    if val == '\\':
4✔
80
        val = '\\\\'  # this is important; the subsequent regex will sub it back to \\
4✔
81
    return val
4✔
82

83

84
def latin_unicode_escape_replace(matchobj: re.Match[str]) -> str:
4✔
85
    s = matchobj.group(0)
4✔
86
    return _latin_escape_replace(s)
4✔
87

88

89
def _unicode_escape_replace(s: str) -> str:
4✔
90
    ret: str = ast.literal_eval(f'"{s}"')
4✔
91
    return ret
4✔
92

93

94
def unicode_escape_replace(matchobj: re.Match[str]) -> str:
4✔
95
    s = matchobj.group(0)
4✔
96
    return _unicode_escape_replace(s)
4✔
97

98

99
class T_TokenSlice(Protocol):
4✔
100
    def __getitem__(self, item: int) -> JSON5Token:
4✔
101
        ...
×
102

103

104
class T_AnyProduction(Protocol):
4✔
105
    _slice: T_TokenSlice
4✔
106

107

108
class T_TextProduction(Protocol):
4✔
109
    wsc0: list[Comment | str]
4✔
110
    wsc1: list[Comment | str]
4✔
111
    value: Value
4✔
112

113
    def __getitem__(self, i: Literal[1]) -> Value:
4✔
114
        ...
×
115

116

117
class T_FirstKeyValuePairProduction(Protocol):
4✔
118
    wsc0: list[Comment | str]
4✔
119
    wsc1: list[Comment | str]
4✔
120
    wsc2: list[Comment | str]
4✔
121
    key: Key
4✔
122
    value: Value
4✔
123
    _slice: T_TokenSlice
4✔
124

125
    def __getitem__(self, item: int) -> Key | Value:
4✔
126
        ...
×
127

128

129
class T_WSCProduction(Protocol):
4✔
130
    _slice: T_TokenSlice
4✔
131

132
    def __getitem__(self, item: Literal[0]) -> str | Comment:
4✔
133
        ...
×
134

135

136
class T_CommentProduction(Protocol):
4✔
137
    _slice: T_TokenSlice
4✔
138

139
    def __getitem__(self, item: Literal[0]) -> str:
4✔
140
        ...
×
141

142

143
class T_KeyValuePairsProduction(Protocol):
4✔
144
    _slice: T_TokenSlice
4✔
145
    first_key_value_pair: KeyValuePair
4✔
146
    subsequent_key_value_pair: list[KeyValuePair]
4✔
147

148

149
class T_JsonObjectProduction(Protocol):
4✔
150
    _slice: T_TokenSlice
4✔
151
    key_value_pairs: tuple[list[KeyValuePair], TrailingComma | None] | None
4✔
152
    wsc: list[Comment | str]
4✔
153

154

155
class SubsequentKeyValuePairProduction(Protocol):
4✔
156
    _slice: T_TokenSlice
4✔
157
    wsc: list[Comment | str]
4✔
158
    first_key_value_pair: KeyValuePair | None
4✔
159

160

161
class T_FirstArrayValueProduction(Protocol):
4✔
162
    _slice: T_TokenSlice
4✔
163

164
    def __getitem__(self, item: Literal[1]) -> Value:
4✔
165
        ...
×
166

167
    wsc: list[Comment | str]
4✔
168

169

170
class T_SubsequentArrayValueProduction(Protocol):
4✔
171
    _slice: T_TokenSlice
4✔
172
    first_array_value: Value | None
4✔
173
    wsc: list[Comment | str]
4✔
174

175

176
class T_ArrayValuesProduction(Protocol):
4✔
177
    _slice: T_TokenSlice
4✔
178
    first_array_value: Value
4✔
179
    subsequent_array_value: list[Value]
4✔
180

181

182
class T_JsonArrayProduction(Protocol):
4✔
183
    _slice: T_TokenSlice
4✔
184
    array_values: tuple[list[Value], TrailingComma | None] | None
4✔
185
    wsc: list[Comment | str]
4✔
186

187

188
class T_IdentifierProduction(Protocol):
4✔
189
    _slice: T_TokenSlice
4✔
190

191
    def __getitem__(self, item: Literal[0]) -> str:
4✔
192
        ...
×
193

194

195
class T_KeyProduction(Protocol):
4✔
196
    def __getitem__(self, item: Literal[1]) -> Identifier | DoubleQuotedString | SingleQuotedString:
4✔
197
        ...
×
198

199

200
class T_NumberProduction(Protocol):
4✔
201
    _slice: T_TokenSlice
4✔
202

203
    def __getitem__(self, item: Literal[0]) -> str:
4✔
204
        ...
×
205

206

207
class T_ValueNumberProduction(Protocol):
4✔
208
    _slice: T_TokenSlice
4✔
209
    number: Infinity | NaN | Float | Integer
4✔
210

211

212
class T_ExponentNotationProduction(Protocol):
4✔
213
    _slice: T_TokenSlice
4✔
214

215
    def __getitem__(self, item: int) -> str:
4✔
216
        ...
×
217

218

219
class T_StringTokenProduction(Protocol):
4✔
220
    _slice: T_TokenSlice
4✔
221

222
    def __getitem__(self, item: Literal[0]) -> str:
4✔
223
        ...
×
224

225

226
class T_StringProduction(Protocol):
4✔
227
    _slice: T_TokenSlice
4✔
228

229
    def __getitem__(self, item: Literal[0]) -> DoubleQuotedString | SingleQuotedString:
4✔
230
        ...
×
231

232

233
class T_ValueProduction(Protocol):
4✔
234
    _slice: T_TokenSlice
4✔
235

236
    def __getitem__(
4✔
237
        self, item: Literal[0]
238
    ) -> (
239
        DoubleQuotedString
240
        | SingleQuotedString
241
        | JSONObject
242
        | JSONArray
243
        | BooleanLiteral
244
        | NullLiteral
245
        | Infinity
246
        | Integer
247
        | Float
248
        | NaN
249
    ):
250
        ...
×
251

252

253
T_CallArg = typing.TypeVar('T_CallArg')
4✔
254
_: typing.Callable[..., typing.Callable[[T_CallArg], T_CallArg]]
4✔
255

256

257
class JSONParser(Parser):  # type: ignore[misc]
4✔
258
    # debugfile = 'parser.out'
259
    tokens = JSONLexer.tokens
4✔
260
    log = QuietSlyLogger(sys.stderr)
4✔
261

262
    def __init__(self, *args: Any, **kwargs: Any):
4✔
263
        super().__init__(*args, **kwargs)
4✔
264
        self.errors: list[JSON5DecodeError] = []
4✔
265
        self.last_token: JSON5Token | None = None
4✔
266
        self.seen_tokens: list[JSON5Token] = []
4✔
267
        self.expecting: list[list[str]] = []
4✔
268

269
    @_('{ wsc } value { wsc }')
4✔
270
    def text(self, p: T_TextProduction) -> JSONText:
4✔
271
        node = JSONText(value=p[1], tok=p.value._tok)
4✔
272
        for wsc in p.wsc0:
4✔
273
            node.wsc_before.append(wsc)
4✔
274
        for wsc in p.wsc1:
4✔
275
            node.wsc_after.append(wsc)
4✔
276
        return node
4✔
277

278
    @_('key { wsc } seen_colon COLON { wsc } object_value_seen value { wsc }')
4✔
279
    def first_key_value_pair(self, p: T_FirstKeyValuePairProduction) -> KeyValuePair:
4✔
280
        key = p[0]
4✔
281
        for wsc in p.wsc0:
4✔
282
            key.wsc_after.append(wsc)
4✔
283
        value = p[6]
4✔
284
        for wsc in p.wsc1:
4✔
285
            value.wsc_before.append(wsc)
4✔
286
        for wsc in p.wsc2:
4✔
287
            value.wsc_after.append(wsc)
4✔
288
        return KeyValuePair(key=p.key, value=p.value)
4✔
289

290
    @_('object_delimiter_seen COMMA { wsc } [ first_key_value_pair ]')
4✔
291
    def subsequent_key_value_pair(self, p: SubsequentKeyValuePairProduction) -> KeyValuePair | TrailingComma:
4✔
292
        node: KeyValuePair | TrailingComma
293
        if p.first_key_value_pair:
4✔
294
            node = p.first_key_value_pair
4✔
295
            for wsc in p.wsc:
4✔
296
                node.key.wsc_before.append(wsc)
4✔
297
        else:
298
            node = TrailingComma(tok=p._slice[1])
4✔
299
            for wsc in p.wsc:
4✔
300
                node.wsc_after.append(wsc)
4✔
301
        return node
4✔
302

303
    @_('WHITESPACE', 'comment')
4✔
304
    def wsc(self, p: T_WSCProduction) -> str | Comment:
4✔
305
        return p[0]
4✔
306

307
    @_('BLOCK_COMMENT')
4✔
308
    def comment(self, p: T_CommentProduction) -> BlockComment:
4✔
309
        return BlockComment(p[0], tok=p._slice[0])
4✔
310

311
    @_('LINE_COMMENT')  # type: ignore[no-redef]
4✔
312
    def comment(self, p: T_CommentProduction):
4✔
313
        return LineComment(p[0], tok=p._slice[0])
4✔
314

315
    @_('first_key_value_pair { subsequent_key_value_pair }')
4✔
316
    def key_value_pairs(self, p: T_KeyValuePairsProduction) -> tuple[list[KeyValuePair], TrailingComma | None]:
4✔
317
        ret = [
4✔
318
            p.first_key_value_pair,
319
        ]
320
        num_sqvp = len(p.subsequent_key_value_pair)
4✔
321
        for index, value in enumerate(p.subsequent_key_value_pair):
4✔
322
            if isinstance(value, TrailingComma):
4✔
323
                if index + 1 != num_sqvp:
4✔
324
                    offending_token = value._tok
4✔
325
                    self.errors.append(JSON5DecodeError("Syntax Error: multiple trailing commas", offending_token))
4✔
326
                return ret, value
4✔
327
            else:
328
                ret.append(value)
4✔
329
        return ret, None
4✔
330

331
    @_('')
4✔
332
    def seen_LBRACE(self, p: Any) -> None:
4✔
333
        self.expecting.append(['RBRACE', 'key'])
4✔
334

335
    @_('')
4✔
336
    def seen_key(self, p: Any) -> None:
4✔
337
        self.expecting.pop()
4✔
338
        self.expecting.append(['COLON'])
4✔
339

340
    @_('')
4✔
341
    def seen_colon(self, p: Any) -> None:
4✔
342
        self.expecting.pop()
4✔
343
        self.expecting.append(['value'])
4✔
344

345
    @_('')
4✔
346
    def object_value_seen(self, p: Any) -> None:
4✔
347
        self.expecting.pop()
4✔
348
        self.expecting.append(['COMMA', 'RBRACE'])
4✔
349

350
    @_('')
4✔
351
    def object_delimiter_seen(self, p: Any) -> None:
4✔
352
        self.expecting.pop()
4✔
353
        self.expecting.append(['RBRACE', 'key'])
4✔
354

355
    @_('')
4✔
356
    def seen_RBRACE(self, p: Any) -> None:
4✔
357
        self.expecting.pop()
4✔
358

359
    @_('seen_LBRACE LBRACE { wsc } [ key_value_pairs ] seen_RBRACE RBRACE')
4✔
360
    def json_object(self, p: T_JsonObjectProduction) -> JSONObject:
4✔
361
        if not p.key_value_pairs:
4✔
362
            node = JSONObject(leading_wsc=list(p.wsc or []), tok=p._slice[1], end_tok=p._slice[5])
4✔
363
        else:
364
            kvps, trailing_comma = p.key_value_pairs
4✔
365
            node = JSONObject(
4✔
366
                *kvps,
367
                trailing_comma=trailing_comma,
368
                leading_wsc=list(p.wsc or []),
369
                tok=p._slice[1],
370
                end_tok=p._slice[5],
371
            )
372

373
        return node
4✔
374

375
    @_('array_value_seen value { wsc }')
4✔
376
    def first_array_value(self, p: T_FirstArrayValueProduction) -> Value:
4✔
377
        node = p[1]
4✔
378
        for wsc in p.wsc:
4✔
379
            node.wsc_after.append(wsc)
4✔
380
        return node
4✔
381

382
    @_('array_delimiter_seen COMMA { wsc } [ first_array_value ]')
4✔
383
    def subsequent_array_value(self, p: T_SubsequentArrayValueProduction) -> Value | TrailingComma:
4✔
384
        node: Value | TrailingComma
385
        if p.first_array_value:
4✔
386
            node = p.first_array_value
4✔
387
            for wsc in p.wsc:
4✔
388
                node.wsc_before.append(wsc)
4✔
389
        else:
390
            node = TrailingComma(tok=p._slice[1])
4✔
391
            for wsc in p.wsc:
4✔
392
                node.wsc_after.append(wsc)
4✔
393
        return node
4✔
394

395
    @_('first_array_value { subsequent_array_value }')
4✔
396
    def array_values(self, p: T_ArrayValuesProduction) -> tuple[list[Value], TrailingComma | None]:
4✔
397
        ret = [
4✔
398
            p.first_array_value,
399
        ]
400
        num_values = len(p.subsequent_array_value)
4✔
401
        for index, value in enumerate(p.subsequent_array_value):
4✔
402
            if isinstance(value, TrailingComma):
4✔
403
                if index + 1 != num_values:
4✔
404
                    self.errors.append(JSON5DecodeError("Syntax Error: multiple trailing commas", value._tok))
4✔
405
                    return ret, value
4✔
406
                return ret, value
4✔
407
            else:
408
                ret.append(value)
4✔
409
        return ret, None
4✔
410

411
    @_('seen_LBRACKET LBRACKET { wsc } [ array_values ] seen_RBRACKET RBRACKET')
4✔
412
    def json_array(self, p: T_JsonArrayProduction) -> JSONArray:
4✔
413
        if not p.array_values:
4✔
414
            node = JSONArray(tok=p._slice[1], end_tok=p._slice[5])
4✔
415
        else:
416
            values, trailing_comma = p.array_values
4✔
417
            node = JSONArray(*values, trailing_comma=trailing_comma, tok=p._slice[1], end_tok=p._slice[5])
4✔
418

419
        for wsc in p.wsc:
4✔
420
            node.leading_wsc.append(wsc)
4✔
421

422
        return node
4✔
423

424
    @_('')
4✔
425
    def seen_LBRACKET(self, p: Any) -> None:
4✔
426
        self.expecting.append(['RBRACKET', 'value'])
4✔
427

428
    @_('')
4✔
429
    def seen_RBRACKET(self, p: Any) -> None:
4✔
430
        self.expecting.pop()
4✔
431

432
    @_('')
4✔
433
    def array_delimiter_seen(self, p: Any) -> None:
4✔
434
        assert len(self.expecting[-1]) == 2
4✔
435
        self.expecting[-1].pop()
4✔
436
        self.expecting[-1].append('value')
4✔
437

438
    @_('')
4✔
439
    def array_value_seen(self, p: Any) -> None:
4✔
440
        assert len(self.expecting[-1]) == 2
4✔
441
        assert self.expecting[-1][-1] == 'value'
4✔
442
        self.expecting[-1].pop()
4✔
443
        self.expecting[-1].append('COMMA')
4✔
444

445
    @_('NAME')
4✔
446
    def identifier(self, p: T_IdentifierProduction) -> Identifier:
4✔
447
        raw_value = p[0]
4✔
448
        name = re.sub(r'\\u[0-9a-fA-F]{4}', unicode_escape_replace, raw_value)
4✔
449
        pattern = r'[\w_\$]([\w_\d\$\p{Pc}\p{Mn}\p{Mc}\u200C\u200D])*'
4✔
450
        if not re.fullmatch(pattern, name):
4✔
451
            self.errors.append(JSON5DecodeError("Invalid identifier name", p._slice[0]))
4✔
452
        return Identifier(name=name, raw_value=raw_value, tok=p._slice[0])
4✔
453

454
    @_('seen_key identifier', 'seen_key string')
4✔
455
    def key(self, p: T_KeyProduction) -> Identifier | DoubleQuotedString | SingleQuotedString:
4✔
456
        node = p[1]
4✔
457
        return node
4✔
458

459
    @_('INTEGER')
4✔
460
    def number(self, p: T_NumberProduction):
4✔
461
        return Integer(p[0], tok=p._slice[0])
4✔
462

463
    @_('FLOAT')  # type: ignore[no-redef]
4✔
464
    def number(self, p: T_NumberProduction):
4✔
465
        return Float(p[0], tok=p._slice[0])
4✔
466

467
    @_('OCTAL')  # type: ignore[no-redef]
4✔
468
    def number(self, p: T_NumberProduction):
4✔
469
        self.errors.append(JSON5DecodeError("Invalid integer literal. Octals are not allowed", p._slice[0]))
4✔
470
        raw_value = p[0]
4✔
471
        if re.search(r'[89]+', raw_value):
4✔
472
            self.errors.append(JSON5DecodeError("Invalid octal format. Octal digits must be in range 0-7", p._slice[0]))
4✔
473
            return Integer(raw_value=oct(0), is_octal=True, tok=p._slice[0])
4✔
474
        return Integer(raw_value, is_octal=True, tok=p._slice[0])
4✔
475

476
    @_('INFINITY')  # type: ignore[no-redef]
4✔
477
    def number(self, p: T_AnyProduction) -> Infinity:
4✔
478
        return Infinity(tok=p._slice[0])
4✔
479

480
    @_('NAN')  # type: ignore[no-redef]
4✔
481
    def number(self, p: T_AnyProduction) -> NaN:
4✔
482
        return NaN(tok=p._slice[0])
4✔
483

484
    @_('MINUS number')
4✔
485
    def value(self, p: T_ValueNumberProduction) -> UnaryOp:
4✔
486
        if isinstance(p.number, Infinity):
4✔
487
            p.number.negative = True
4✔
488
        node = UnaryOp(op='-', value=p.number, tok=p._slice[0], end_tok=p.number._end_tok)
4✔
489
        return node
4✔
490

491
    @_('PLUS number')  # type: ignore[no-redef]
4✔
492
    def value(self, p: T_ValueNumberProduction):
4✔
493
        node = UnaryOp(op='+', value=p.number, tok=p._slice[0], end_tok=p.number._end_tok)
4✔
494
        return node
4✔
495

496
    @_('INTEGER EXPONENT', 'FLOAT EXPONENT')  # type: ignore[no-redef]
4✔
497
    def number(self, p: T_ExponentNotationProduction) -> Float:
4✔
498
        exp_notation = p[1][0]  # e or E
4✔
499
        return Float(p[0] + p[1], exp_notation=exp_notation, tok=p._slice[0], end_tok=p._slice[1])
4✔
500

501
    @_('HEXADECIMAL')  # type: ignore[no-redef]
4✔
502
    def number(self, p: T_NumberProduction) -> Integer:
4✔
503
        return Integer(p[0], is_hex=True, tok=p._slice[0])
4✔
504

505
    @_('DOUBLE_QUOTE_STRING')
4✔
506
    def double_quoted_string(self, p: T_StringTokenProduction) -> DoubleQuotedString:
4✔
507
        raw_value = p[0]
4✔
508
        contents = raw_value[1:-1]
4✔
509
        terminator_in_string = re.search(r'(?<!\\)([\u000D\u2028\u2029]|(?<!\r)\n)', contents)
4✔
510
        if terminator_in_string:
4✔
511
            end = terminator_in_string.span()[0]
4✔
512
            before_terminator = terminator_in_string.string[:end]
4✔
513
            tok = p._slice[0]
4✔
514
            pos = tok.index + len(before_terminator)
4✔
515
            doc = tok.doc
4✔
516
            lineno = doc.count('\n', 0, pos) + 1
4✔
517
            colno = pos - doc.rfind('\n', 0, pos) + 1
4✔
518
            index = pos + 1
4✔
519
            errmsg = f"Illegal line terminator (line {lineno} column {colno} (char {index}) without continuation"
4✔
520
            self.errors.append(JSON5DecodeError(errmsg, tok))
4✔
521
        contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
4✔
522
        try:
4✔
523
            contents = re.sub(r'(\\x[a-fA-F0-9]{0,2}|\\u[0-9a-fA-F]{4})', latin_unicode_escape_replace, contents)
4✔
524
        except JSON5DecodeError as exc:
4✔
525
            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
4✔
526
        try:
4✔
527
            contents = re.sub(r'\\(0\d|.)', replace_escape_literals, contents)
4✔
528
        except JSON5DecodeError as exc:
4✔
529
            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
4✔
530
        return DoubleQuotedString(contents, raw_value=raw_value, tok=p._slice[0])
4✔
531

532
    @_("SINGLE_QUOTE_STRING")
4✔
533
    def single_quoted_string(self, p: T_StringTokenProduction) -> SingleQuotedString:
4✔
534
        raw_value = p[0]
4✔
535
        contents = raw_value[1:-1]
4✔
536
        terminator_in_string = re.search(r'(?<!\\)([\u000D\u2028\u2029]|(?<!\r)\n)', contents)
4✔
537
        if terminator_in_string:
4✔
538
            end = terminator_in_string.span()[0]
4✔
539
            before_terminator = terminator_in_string.string[:end]
4✔
540
            tok = p._slice[0]
4✔
541
            pos = tok.index + len(before_terminator)
4✔
542
            doc = tok.doc
4✔
543
            lineno = doc.count('\n', 0, pos) + 1
4✔
544
            colno = pos - doc.rfind('\n', 0, pos) + 1
4✔
545
            index = pos + 1
4✔
546
            errmsg = f"Illegal line terminator (line {lineno} column {colno} (char {index}) without continuation"
4✔
547
            self.errors.append(JSON5DecodeError(errmsg, tok))
4✔
548
        contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
4✔
549
        try:
4✔
550
            contents = re.sub(r'(\\x[a-fA-F0-9]{0,2}|\\u[0-9a-fA-F]{4})', latin_unicode_escape_replace, contents)
4✔
551
        except JSON5DecodeError as exc:
4✔
552
            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
4✔
553
        try:
4✔
554
            contents = re.sub(r'\\(0\d|.)', replace_escape_literals, contents)
4✔
555
        except JSON5DecodeError as exc:
4✔
556
            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
4✔
557
        return SingleQuotedString(contents, raw_value=raw_value, tok=p._slice[0])
4✔
558

559
    @_('double_quoted_string', 'single_quoted_string')
4✔
560
    def string(self, p: T_StringProduction) -> SingleQuotedString | DoubleQuotedString:
4✔
561
        return p[0]
4✔
562

563
    @_('TRUE')
4✔
564
    def boolean(self, p: T_AnyProduction) -> BooleanLiteral:
4✔
565
        return BooleanLiteral(True, tok=p._slice[0])
4✔
566

567
    @_('FALSE')  # type: ignore[no-redef]
4✔
568
    def boolean(self, p: T_AnyProduction) -> BooleanLiteral:
4✔
569
        return BooleanLiteral(False, tok=p._slice[0])
4✔
570

571
    @_('NULL')
4✔
572
    def null(self, p: T_AnyProduction) -> NullLiteral:
4✔
573
        return NullLiteral(tok=p._slice[0])
4✔
574

575
    @_(  # type: ignore[no-redef]
4✔
576
        'string',
577
        'json_object',
578
        'json_array',
579
        'boolean',
580
        'null',
581
        'number',
582
    )
583
    def value(
4✔
584
        self, p: T_ValueProduction
585
    ) -> (
586
        DoubleQuotedString
587
        | SingleQuotedString
588
        | JSONObject
589
        | JSONArray
590
        | BooleanLiteral
591
        | NullLiteral
592
        | Infinity
593
        | Integer
594
        | Float
595
        | NaN
596
    ):
597
        node = p[0]
4✔
598
        return node
4✔
599

600
    @_('UNTERMINATED_SINGLE_QUOTE_STRING', 'UNTERMINATED_DOUBLE_QUOTE_STRING')  # type: ignore[no-redef]
4✔
601
    def string(self, p: T_StringTokenProduction) -> SingleQuotedString | DoubleQuotedString:
4✔
602
        self.error(p._slice[0])
4✔
603
        raw = p[0]
4✔
604
        if raw.startswith('"'):
4✔
605
            return DoubleQuotedString(raw[1:], raw_value=raw, tok=p._slice[0])
4✔
606
        return SingleQuotedString(raw[1:], raw_value=raw, tok=p._slice[0])
4✔
607

608
    def error(self, token: JSON5Token | None) -> JSON5Token | None:
4✔
609
        if token:
4✔
610
            if self.expecting:
4✔
611
                expected = self.expecting[-1]
4✔
612

613
                message = f"Syntax Error. Was expecting {' or '.join(expected)}"
4✔
614
            else:
615
                message = 'Syntax Error'
×
616

617
            self.errors.append(JSON5DecodeError(message, token))
4✔
618
            try:
4✔
619
                return next(self.tokens)  # type: ignore
4✔
620
            except StopIteration:
4✔
621
                # EOF
622
                class tok:
4✔
623
                    type = '$end'
4✔
624
                    value = None
4✔
625
                    lineno = None
4✔
626
                    index = None
4✔
627
                    end = None
4✔
628

629
                return JSON5Token(tok(), None)  # type: ignore[arg-type]
4✔
630
        elif self.last_token:
4✔
631
            doc = self.last_token.doc
4✔
632
            pos = len(doc)
4✔
633
            lineno = doc.count('\n', 0, pos) + 1
4✔
634
            colno = pos - doc.rfind('\n', 0, pos)
4✔
635
            message = f'Expecting value. Unexpected EOF at: ' f'line {lineno} column {colno} (char {pos})'
4✔
636
            if self.expecting:
4✔
637
                expected = self.expecting[-1]
4✔
638
                message += f'. Was expecting {f" or ".join(expected)}'
4✔
639
            self.errors.append(JSON5DecodeError(message, None))
4✔
640
        else:
641
            #  Empty file
642
            self.errors.append(JSON5DecodeError('Expecting value. Received unexpected EOF', None))
4✔
643
        return None
4✔
644

645
    def _token_gen(self, tokens: typing.Iterable[JSON5Token]) -> typing.Generator[JSON5Token, None, None]:
4✔
646
        for tok in tokens:
4✔
647
            self.last_token = tok
4✔
648
            self.seen_tokens.append(tok)
4✔
649
            yield tok
4✔
650

651
    def parse(self, tokens: typing.Iterable[JSON5Token]) -> JSONText:
4✔
652
        tokens = self._token_gen(tokens)
4✔
653
        model: JSONText = super().parse(tokens)
4✔
654
        if self.errors:
4✔
655
            if len(self.errors) > 1:
4✔
656
                primary_error = self.errors[0]
4✔
657
                msg = (
4✔
658
                    "There were multiple errors parsing the JSON5 document.\n"
659
                    "The primary error was: \n\t{}\n"
660
                    "Additionally, the following errors were also detected:\n\t{}"
661
                )
662

663
                num_additional_errors = len(self.errors) - 1
4✔
664
                additional_errors = '\n\t'.join(err.args[0] for err in self.errors[1:6])
4✔
665
                if num_additional_errors > 5:
4✔
666
                    additional_errors += f'\n\t{num_additional_errors - 5} additional error(s) truncated'
×
667
                msg = msg.format(primary_error.args[0], additional_errors)
4✔
668
                err = JSON5DecodeError(msg, None)
4✔
669
                err.lineno = primary_error.lineno
4✔
670
                err.token = primary_error.token
4✔
671
                err.index = primary_error.index
4✔
672
                raise err
4✔
673
            else:
674
                raise self.errors[0]
4✔
675
        return model
4✔
676

677

678
def parse_tokens(raw_tokens: typing.Iterable[JSON5Token]) -> JSONText:
4✔
679
    parser = JSONParser()
4✔
680
    return parser.parse(raw_tokens)
4✔
681

682

683
def parse_source(text: str) -> JSONText:
4✔
684
    tokens = tokenize(text)
4✔
685
    model = parse_tokens(tokens)
4✔
686
    return model
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc