• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37474

pending completion
#37474

push

local

web-flow
irinterp: Allow setting all IR flags (#48993)

Currently, `IR_FLAG_NOTHROW` is the only flag that irinterp is allowed to
set on statements, under the assumption that in order for a call to
be irinterp-eligible, it must have been proven `:foldable`, thus `:effect_free`,
and thus `IR_FLAG_EFFECT_FREE` was assumed to have been set. That reasoning
was sound at the time this code was written, but have since introduced
`EFFECT_FREE_IF_INACCESSIBLEMEMONLY`, which breaks the reasoning that
an `:effect_free` inference for the whole function implies the flag on
every statement. As a result, we were failing to DCE otherwise dead
statements if the IR came from irinterp.

3 of 3 new or added lines in 1 file covered. (100.0%)

70258 of 82316 relevant lines covered (85.35%)

32461773.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.03
/base/toml_parser.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
module TOML
4

5
using Base: IdSet
6

7
# In case we do not have the Dates stdlib available
8
# we parse DateTime into these internal structs,
9
# note that these do not do any argument checking
10
struct Date
11
    year::Int
×
12
    month::Int
13
    day::Int
14
end
15
struct Time
16
    hour::Int
×
17
    minute::Int
18
    second::Int
19
    ms::Int
20
end
21
struct DateTime
22
    date::Date
×
23
    time::Time
24
end
25
DateTime(y, m, d, h, mi, s, ms) =
×
26
    DateTime(Date(y,m,d), Time(h, mi, s, ms))
27

28
const EOF_CHAR = typemax(Char)
29

30
const TOMLDict  = Dict{String, Any}
31

32
##########
33
# Parser #
34
##########
35

36
mutable struct Parser
37
    str::String
627✔
38
    # 1 character look ahead
39
    current_char::Char
40
    pos::Int
41
    # prevpos equals the startbyte of the look ahead character
42
    # prevpos-1 is therefore the end byte of the character we last ate
43
    prevpos::Int
44

45
    # File info
46
    column::Int
47
    line::Int
48

49
    # The function `take_substring` takes the substring from `marker` up
50
    # to `prevpos-1`.
51
    marker::Int
52

53
    # The current table that `key = value` entries are inserted into
54
    active_table::TOMLDict
55

56
    # As we parse dotted keys we store each part of the key in this cache
57
    # A future improvement would be to also store the spans of the keys
58
    # so that in error messages we could also show the previous key
59
    # definition in case of duplicated keys
60
    dotted_keys::Vector{String}
61

62
    # Strings in TOML can have line continuations ('\' as the last character
63
    # on a line. We store the byte ranges for each of these "chunks" in here
64
    chunks::Vector{UnitRange{Int}}
65

66
    # We need to keep track of those tables / arrays that are defined
67
    # inline since we are not allowed to add keys to those
68
    inline_tables::IdSet{TOMLDict}
69
    static_arrays::IdSet{Any}
70

71
    # [a.b.c.d] doesn't "define" the table [a]
72
    # so keys can later be added to [a], therefore
73
    # we need to keep track of what tables are
74
    # actually defined
75
    defined_tables::IdSet{TOMLDict}
76

77
    # The table we will finally return to the user
78
    root::TOMLDict
79

80
    # Filled in in case we are parsing a file to improve error messages
81
    filepath::Union{String, Nothing}
82

83
    # Get's populated with the Dates stdlib if it exists
84
    Dates::Union{Module, Nothing}
85
end
86

87
const DATES_PKGID = Base.PkgId(Base.UUID("ade2ca70-3891-5945-98fb-dc099432e06a"), "Dates")
88

89
function Parser(str::String; filepath=nothing)
1,254✔
90
    root = TOMLDict()
627✔
91
    l = Parser(
1,029✔
92
            str,                  # str
93
            EOF_CHAR,             # current_char
94
            firstindex(str),      # pos
95
            0,                    # prevpos
96
            0,                    # column
97
            1,                    # line
98
            0,                    # marker
99
            root,                 # active_table
100
            String[],             # dotted_keys
101
            UnitRange{Int}[],     # chunks
102
            IdSet{TOMLDict}(),    # inline_tables
103
            IdSet{Any}(),         # static_arrays
104
            IdSet{TOMLDict}(),    # defined_tables
105
            root,
106
            filepath,
107
            isdefined(Base, :maybe_root_module) ? Base.maybe_root_module(DATES_PKGID) : nothing,
108
        )
109
    startup(l)
627✔
110
    return l
627✔
111
end
112
function startup(l::Parser)
1,599✔
113
    # Populate our one character look-ahead
114
    c = eat_char(l)
3,195✔
115
    # Skip BOM
116
    if c === '\ufeff'
1,599✔
117
        l.column -= 1
×
118
        eat_char(l)
×
119
    end
120
end
121

122
Parser() = Parser("")
1✔
123
Parser(io::IO) = Parser(read(io, String))
×
124

125
function reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing)
1,944✔
126
    p.str = str
972✔
127
    p.current_char = EOF_CHAR
972✔
128
    p.pos = firstindex(str)
972✔
129
    p.prevpos = 0
972✔
130
    p.column = 0
972✔
131
    p.line = 1
972✔
132
    p.marker = 0
972✔
133
    p.root = TOMLDict()
972✔
134
    p.active_table = p.root
972✔
135
    empty!(p.dotted_keys)
972✔
136
    empty!(p.chunks)
972✔
137
    empty!(p.inline_tables)
972✔
138
    empty!(p.static_arrays)
972✔
139
    empty!(p.defined_tables)
972✔
140
    p.filepath = filepath
972✔
141
    startup(p)
972✔
142
    return p
972✔
143
end
144

145
##########
146
# Errors #
147
##########
148

149
throw_internal_error(msg) = error("internal TOML parser error: $msg")
×
150

151
# Many functions return a ParserError. We want this to bubble up
152
# all the way and have this error be returned to the user
153
# if the parse is called with `raise=false`. This macro
154
# makes that easier
155
@eval macro $(:var"try")(expr)
156
    return quote
157
        v = $(esc(expr))
137,193✔
158
        v isa ParserError && return v
134,166✔
159
        v
46,626✔
160
    end
161
end
162

163
# TODO: Check all of these are used
164
@enum ErrorType begin
165

166
    # Toplevel #
167
    ############
168
    ErrRedefineTableArray
169
    ErrExpectedNewLineKeyValue
170
    ErrAddKeyToInlineTable
171
    ErrAddArrayToStaticArray
172
    ErrArrayTreatedAsDictionary
173
    ErrExpectedEndOfTable
174
    ErrExpectedEndArrayOfTable
175

176
    # Keys #
177
    ########
178
    ErrExpectedEqualAfterKey
179
    # Check, are these the same?
180
    ErrDuplicatedKey
181
    ErrKeyAlreadyHasValue
182
    ErrInvalidBareKeyCharacter
183
    ErrEmptyBareKey
184

185
    # Values #
186
    ##########
187
    ErrUnexpectedEofExpectedValue
188
    ErrUnexpectedStartOfValue
189
    ErrGenericValueError
190

191
    # Arrays
192
    ErrExpectedCommaBetweenItemsArray
193

194
    # Inline tables
195
    ErrExpectedCommaBetweenItemsInlineTable
196
    ErrTrailingCommaInlineTable
197
    ErrInlineTableRedefine
198

199
    # Numbers
200
    ErrUnderscoreNotSurroundedByDigits
201
    ErrLeadingZeroNotAllowedInteger
202
    ErrOverflowError
203
    ErrLeadingDot
204
    ErrNoTrailingDigitAfterDot
205
    ErrTrailingUnderscoreNumber
206
    ErrSignInNonBase10Number
207

208
    # DateTime
209
    ErrParsingDateTime
210
    ErrOffsetDateNotSupported
211

212
    # Strings
213
    ErrNewLineInString
214
    ErrUnexpectedEndString
215
    ErrInvalidEscapeCharacter
216
    ErrInvalidUnicodeScalar
217
end
218

219
const err_message = Dict(
220
    ErrTrailingCommaInlineTable             => "trailing comma not allowed in inline table",
221
    ErrExpectedCommaBetweenItemsArray       => "expected comma between items in array",
222
    ErrExpectedCommaBetweenItemsInlineTable => "expected comma between items in inline table",
223
    ErrExpectedEndArrayOfTable              => "expected array of table to end with ']]'",
224
    ErrInvalidBareKeyCharacter              => "invalid bare key character",
225
    ErrRedefineTableArray                   => "tried to redefine an existing table as an array",
226
    ErrDuplicatedKey                        => "key already defined",
227
    ErrKeyAlreadyHasValue                   => "key already has a value",
228
    ErrEmptyBareKey                         => "bare key cannot be empty",
229
    ErrExpectedNewLineKeyValue              => "expected newline after key value pair",
230
    ErrNewLineInString                      => "newline character in single quoted string",
231
    ErrUnexpectedEndString                  => "string literal ended unexpectedly",
232
    ErrExpectedEndOfTable                   => "expected end of table ']'",
233
    ErrAddKeyToInlineTable                  => "tried to add a new key to an inline table",
234
    ErrInlineTableRedefine                  => "inline table overwrote key from other table",
235
    ErrArrayTreatedAsDictionary             => "tried to add a key to an array",
236
    ErrAddArrayToStaticArray                => "tried to append to a statically defined array",
237
    ErrGenericValueError                    => "failed to parse value",
238
    ErrLeadingZeroNotAllowedInteger         => "leading zero in integer not allowed",
239
    ErrUnderscoreNotSurroundedByDigits      => "underscore is not surrounded by digits",
240
    ErrUnexpectedStartOfValue               => "unexpected start of value",
241
    ErrOffsetDateNotSupported               => "offset date-time is not supported",
242
    ErrParsingDateTime                      => "parsing date/time value failed",
243
    ErrTrailingUnderscoreNumber             => "trailing underscore in number",
244
    ErrLeadingDot                           => "floats require a leading zero",
245
    ErrExpectedEqualAfterKey                => "expected equal sign after key",
246
    ErrNoTrailingDigitAfterDot              => "expected digit after dot",
247
    ErrOverflowError                        => "overflowed when parsing integer",
248
    ErrInvalidUnicodeScalar                 => "invalid unicode scalar",
249
    ErrInvalidEscapeCharacter               => "invalid escape character",
250
    ErrUnexpectedEofExpectedValue           => "unexpected end of file, expected a value",
251
    ErrSignInNonBase10Number                => "number not in base 10 is not allowed to have a sign",
252
)
253

254
for err in instances(ErrorType)
255
    @assert haskey(err_message, err) "$err does not have an error message"
256
end
257

258
mutable struct ParserError <: Exception
259
    type::ErrorType
282✔
260

261
    # Arbitrary data to store at the
262
    # call site to be used when formatting
263
    # the error
264
    data
265

266
    # These are filled in before returning from parse function
267
    str       ::Union{String,   Nothing}
268
    filepath  ::Union{String,   Nothing}
269
    line      ::Union{Int,      Nothing}
270
    column    ::Union{Int,      Nothing}
271
    pos       ::Union{Int,      Nothing} # position of parser when
272
    table     ::Union{TOMLDict, Nothing} # result parsed until error
273
end
274
ParserError(type, data) = ParserError(type, data, nothing, nothing, nothing, nothing, nothing, nothing)
282✔
275
ParserError(type) = ParserError(type, nothing)
241✔
276
# Defining these below can be useful when debugging code that erroneously returns a
277
# ParserError because you get a stacktrace to where the ParserError was created
278
#ParserError(type) = error(type)
279
#ParserError(type, data) = error(type,data)
280

281
# Many functions return either a T or a ParserError
282
const Err{T} = Union{T, ParserError}
283

284
function format_error_message_for_err_type(error::ParserError)
3✔
285
    msg = err_message[error.type]
3✔
286
    if error.type == ErrInvalidBareKeyCharacter
3✔
287
        c_escaped = escape_string(string(error.data)::String)
1✔
288
        msg *= ": '$c_escaped'"
1✔
289
    end
290
    return msg
3✔
291
end
292

293
# This is used in error formatting, for example,
294
# point_to_line("aa\nfoobar\n\bb", 4, 6) would return the strings:
295
# str1 = "foobar"
296
# str2 = "^^^"
297
# used to show the interval where an error happened
298
# Right now, it is only called with a == b
299
function point_to_line(str::AbstractString, a::Int, b::Int, context)
3✔
300
    @assert b >= a
3✔
301
    a = thisind(str, a)
6✔
302
    b = thisind(str, b)
6✔
303
    pos = something(findprev('\n', str, prevind(str, a)), 0) + 1
3✔
304
    io1 = IOContext(IOBuffer(), context)
3✔
305
    io2 = IOContext(IOBuffer(), context)
3✔
306
    while true
43✔
307
        if a <= pos <= b
43✔
308
            printstyled(io2, "^"; color=:light_green)
3✔
309
        else
310
            print(io2, " ")
40✔
311
        end
312
        it = iterate(str, pos)
80✔
313
        it === nothing && break
43✔
314
        c, pos = it
40✔
315
        c == '\n' && break
40✔
316
        print(io1, c)
43✔
317
    end
40✔
318
    return String(take!(io1.io)), String(take!(io2.io))
3✔
319
end
320

321
function Base.showerror(io::IO, err::ParserError)
3✔
322
    printstyled(io, "TOML Parser error:\n"; color=Base.error_color())
3✔
323
    f = something(err.filepath, "none")
6✔
324
    printstyled(io, f, ':', err.line, ':', err.column; bold=true)
6✔
325
    printstyled(io, " error: "; color=Base.error_color())
3✔
326
    println(io, format_error_message_for_err_type(err))
3✔
327
    # In this case we want the arrow to point one character
328
    pos = err.pos::Int
3✔
329
    err.type == ErrUnexpectedEofExpectedValue && (pos += 1)
3✔
330
    str1, err1 = point_to_line(err.str::String, pos, pos, io)
3✔
331
    @static if VERSION <= v"1.6.0-DEV.121"
×
332
        # See https://github.com/JuliaLang/julia/issues/36015
333
        format_fixer = get(io, :color, false)::Bool == true ? "\e[0m" : ""
334
        println(io, "$format_fixer  ", str1)
335
        print(io, "$format_fixer  ", err1)
336
    else
337
        println(io, "  ", str1)
3✔
338
        print(io, "  ", err1)
3✔
339
    end
340
end
341

342

343
################
344
# Parser utils #
345
################
346

347
@inline function next_char(l::Parser)::Char
×
348
    state = iterate(l.str, l.pos)
2,588,824✔
349
    l.prevpos = l.pos
1,295,119✔
350
    l.column += 1
1,295,119✔
351
    state === nothing && return EOF_CHAR
1,295,119✔
352
    c, pos = state
1,293,739✔
353
    l.pos = pos
1,293,739✔
354
    if c == '\n'
1,293,739✔
355
        l.line += 1
26,487✔
356
        l.column = 0
26,487✔
357
    end
358
    return c
1,293,739✔
359
end
360

361
@inline function eat_char(l::Parser)::Char
22✔
362
    c = l.current_char
2,011✔
363
    l.current_char = next_char(l)
2,588,858✔
364
    return c
1,295,119✔
365
end
366

367
@inline peek(l::Parser) = l.current_char
2,266,070✔
368

369
# Return true if the character was accepted. When a character
370
# is accepted it get's eaten and we move to the next character
371
@inline function accept(l::Parser, f::Union{Function, Char})::Bool
50✔
372
    c = peek(l)
2,034,666✔
373
    c == EOF_CHAR && return false
2,034,666✔
374
    ok = false
50✔
375
    if isa(f, Function)
50✔
376
        ok = f(c)
2,094,995✔
377
    elseif isa(f, Char)
528,141✔
378
        ok = c === f
528,141✔
379
    end
380
    ok && eat_char(l)
3,317,601✔
381
    return ok
2,029,392✔
382
end
383

384
# Return true if any character was accepted
385
function accept_batch(l::Parser, f::F)::Bool where {F}
357,690✔
386
    ok = false
×
387
    while accept(l, f)
3,002,373✔
388
        ok = true
×
389
    end
1,144,834✔
390
    return ok
357,690✔
391
end
392

393
# Return true if `f` was accepted `n` times
394
@inline function accept_n(l::Parser, n, f::F)::Bool where {F}
×
395
    for i in 1:n
292✔
396
        if !accept(l, f)
1,182✔
397
            return false
6✔
398
        end
399
    end
909✔
400
    return true
261✔
401
end
402

403
@inline iswhitespace(c::Char) = c == ' ' || c == '\t'
725,772✔
404
@inline isnewline(c::Char) = c == '\n' || c == '\r'
159,007✔
405

406
skip_ws(l::Parser) = accept_batch(l, iswhitespace)
226,642✔
407

408
skip_ws_nl_no_comment(l::Parser)::Bool = accept_batch(l, x -> iswhitespace(x) || isnewline(x))
304✔
409

410
function skip_ws_nl(l::Parser)::Bool
×
411
    skipped = false
×
412
    while true
57,756✔
413
        skipped_ws = accept_batch(l, x -> iswhitespace(x) || isnewline(x))
225,777✔
414
        skipped_comment = skip_comment(l)
57,756✔
415
        if !skipped_ws && !skipped_comment
57,756✔
416
            break
32,478✔
417
        end
418
        skipped = true
×
419
    end
25,278✔
420
    return skipped
32,478✔
421
end
422

423
# Returns true if a comment was skipped
424
function skip_comment(l::Parser)::Bool
57,864✔
425
    found_comment = accept(l, '#')
113,164✔
426
    if found_comment
57,864✔
427
        accept_batch(l, !isnewline)
328✔
428
    end
429
    return found_comment
57,864✔
430
end
431

432
skip_ws_comment(l::Parser) = skip_ws(l) && skip_comment(l)
45,828✔
433

434
@inline set_marker!(l::Parser) = l.marker = l.prevpos
43,990✔
435
take_substring(l::Parser) = SubString(l.str, l.marker:(l.prevpos-1))
43,824✔
436

437
############
438
# Toplevel #
439
############
440

441
# Driver, keeps parsing toplevel until we either get
442
# a `ParserError` or eof.
443
function parse(l::Parser)::TOMLDict
15✔
444
    v = tryparse(l)
1,190✔
445
    v isa ParserError && throw(v)
1,190✔
446
    return v
1,178✔
447
end
448

449
function tryparse(l::Parser)::Err{TOMLDict}
1,598✔
450
    while true
24,531✔
451
        skip_ws_nl(l)
70,969✔
452
        peek(l) == EOF_CHAR && break
24,539✔
453
        v = parse_toplevel(l)
23,223✔
454
        if v isa ParserError
23,223✔
455
            v.str      = l.str
282✔
456
            v.pos      = l.prevpos-1
282✔
457
            v.table    = l.root
282✔
458
            v.filepath = l.filepath
490✔
459
            v.line     = l.line
282✔
460
            v.column   = l.column-1
282✔
461
            return v
282✔
462
        end
463
    end
22,941✔
464
    return l.root
1,316✔
465
end
466

467
# Top level can be either a table key, an array of table statement
468
# or a key/value entry.
469
function parse_toplevel(l::Parser)::Err{Nothing}
23,223✔
470
    if accept(l, '[')
46,446✔
471
        l.active_table = l.root
4,003✔
472
        @try parse_table(l)
×
473
        skip_ws_comment(l)
7,912✔
474
        if !(peek(l) == '\n' || peek(l) == '\r' || peek(l) == '#' || peek(l) == EOF_CHAR)
3,976✔
475
            eat_char(l)
8✔
476
            return ParserError(ErrExpectedNewLineKeyValue)
3,971✔
477
        end
478
    else
479
        @try parse_entry(l, l.active_table)
×
480
        skip_ws_comment(l)
37,916✔
481
        # SPEC: "There must be a newline (or EOF) after a key/value pair."
482
        if !(peek(l) == '\n' || peek(l) == '\r' || peek(l) == '#' || peek(l) == EOF_CHAR)
19,105✔
483
            c = eat_char(l)
43✔
484
            return ParserError(ErrExpectedNewLineKeyValue)
18,997✔
485
        end
486
    end
487
end
488

489
function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String}, check=true)::Err{TOMLDict}
4,088✔
490
    for i in 1:length(dotted_keys)
10,284✔
491
        d = d::TOMLDict
4,116✔
492
        key = dotted_keys[i]
4,116✔
493
        d = get!(TOMLDict, d, key)
4,116✔
494
        if d isa Vector
4,116✔
495
            d = d[end]
344✔
496
        end
497
        check && @try check_allowed_add_key(l, d, i == length(dotted_keys))
4,116✔
498
    end
4,574✔
499
    return d::TOMLDict
4,070✔
500
end
501

502
function check_allowed_add_key(l::Parser, d, check_defined=true)::Err{Nothing}
×
503
    if !(d isa Dict)
3,048✔
504
        return ParserError(ErrKeyAlreadyHasValue)
11✔
505
    elseif d isa Dict && d in l.inline_tables
3,030✔
506
        return ParserError(ErrAddKeyToInlineTable)
3✔
507
    elseif check_defined && d in l.defined_tables
3,032✔
508
        return ParserError(ErrDuplicatedKey)
8✔
509
    end
510
    return nothing
3,016✔
511
end
512

513
# Can only enter here from toplevel
514
function parse_table(l)
4,003✔
515
    if accept(l, '[')
8,006✔
516
        return parse_array_table(l)
1,533✔
517
    end
518
    table_key = @try parse_key(l)
×
519
    skip_ws(l)
2,463✔
520
    if !accept(l, ']')
4,926✔
521
        return ParserError(ErrExpectedEndOfTable)
4✔
522
    end
523
    l.active_table = @try recurse_dict!(l, l.root, table_key)
×
524
    push!(l.defined_tables, l.active_table)
2,447✔
525
    return
2,447✔
526
end
527

528
function parse_array_table(l)::Union{Nothing, ParserError}
1,533✔
529
    table_key = @try parse_key(l)
×
530
    skip_ws(l)
1,532✔
531
    if !(accept(l, ']') && accept(l, ']'))
3,064✔
532
        return ParserError(ErrExpectedEndArrayOfTable)
2✔
533
    end
534
    d = @try recurse_dict!(l, l.root, @view(table_key[1:end-1]), false)
×
535
    k = table_key[end]
1,530✔
536
    old = get!(() -> [], d, k)
2,815✔
537
    if old isa Vector
1,530✔
538
        if old in l.static_arrays
1,528✔
539
            return ParserError(ErrAddArrayToStaticArray)
2✔
540
        end
541
    else
542
        return ParserError(ErrArrayTreatedAsDictionary)
4✔
543
    end
544
    d_new = TOMLDict()
1,524✔
545
    push!(old, d_new)
1,524✔
546
    push!(l.defined_tables, d_new)
1,524✔
547
    l.active_table = d_new
1,524✔
548

549
    return
1,524✔
550
end
551

552
function parse_entry(l::Parser, d)::Union{Nothing, ParserError}
37,542✔
553
    key = @try parse_key(l)
6✔
554
    skip_ws(l)
37,506✔
555
    if !accept(l, '=')
75,012✔
556
        return ParserError(ErrExpectedEqualAfterKey)
2✔
557
    end
558
    if length(key) > 1
37,504✔
559
        d = @try recurse_dict!(l, d, @view(key[1:end-1]))
×
560
    end
561
    last_key_part = l.dotted_keys[end]
37,500✔
562

563
    v = get(d, last_key_part, nothing)
37,508✔
564
    if v !== nothing
37,500✔
565
        @try check_allowed_add_key(l, v)
×
566
    end
567

568
    skip_ws(l)
37,494✔
569
    value = @try parse_value(l)
×
570
    # Not allowed to overwrite a value with an inline dict
571
    if value isa Dict && haskey(d, last_key_part)
37,316✔
572
        return ParserError(ErrInlineTableRedefine)
2✔
573
    end
574
    # TODO: Performance, hashing `last_key_part` again here
575
    d[last_key_part] = value
37,314✔
576
    return
37,314✔
577
end
578

579

580
########
581
# Keys #
582
########
583

584
# SPEC: "Bare keys may only contain ASCII letters, ASCII digits, underscores,
585
# and dashes (A-Za-z0-9_-).
586
# Note that bare keys are allowed to be composed of only ASCII digits, e.g. 1234,
587
# but are always interpreted as strings."
588
@inline isvalid_barekey_char(c::Char) =
810,290✔
589
    'a' <= c <= 'z' ||
590
    'A' <= c <= 'Z' ||
591
    isdigit(c) ||
592
    c == '-' || c == '_'
593

594
# Current key...
595
function parse_key(l::Parser)
8✔
596
    empty!(l.dotted_keys)
41,545✔
597
    _parse_key(l)
41,545✔
598
end
599

600
# Recursively add dotted keys to `l.dotted_key`
601
function _parse_key(l::Parser)
43,205✔
602
    skip_ws(l)
43,205✔
603
    # SPEC: "A bare key must be non-empty,"
604
    if isempty(l.dotted_keys) && accept(l, '=')
84,750✔
605
        return ParserError(ErrEmptyBareKey)
2✔
606
    end
607
    keyval = if accept(l, '"')
86,406✔
608
        @try parse_string_start(l, false)
×
609
    elseif accept(l, '\'')
86,006✔
610
        @try parse_string_start(l, true)
×
611
    else
612
        set_marker!(l)
42,976✔
613
        if accept_batch(l, isvalid_barekey_char)
42,976✔
614
            if !(peek(l) == '.' || peek(l) == ' ' || peek(l) == ']' || peek(l) == '=')
84,297✔
615
                c = eat_char(l)
35✔
616
                return ParserError(ErrInvalidBareKeyCharacter, c)
28✔
617
            end
618
            String(take_substring(l))
42,935✔
619
        else
620
            c = eat_char(l)
25✔
621
            return ParserError(ErrInvalidBareKeyCharacter, c)
86,136✔
622
        end
623
    end
624
    new_key = keyval
×
625
    push!(l.dotted_keys, new_key)
43,161✔
626
    # SPEC: "Whitespace around dot-separated parts is ignored."
627
    skip_ws(l)
43,161✔
628
    if accept(l, '.')
86,322✔
629
        skip_ws(l)
1,660✔
630
        @try _parse_key(l)
×
631
    end
632
    return l.dotted_keys
43,160✔
633
end
634

635

636
##########
637
# Values #
638
##########
639

640
function parse_value(l::Parser)
40,791✔
641
    val = if accept(l, '[')
81,581✔
642
        parse_array(l)
1,357✔
643
    elseif accept(l, '{')
78,867✔
644
        parse_inline_table(l)
9,191✔
645
    elseif accept(l, '"')
60,485✔
646
        parse_string_start(l, false)
29,411✔
647
    elseif accept(l, '\'')
1,663✔
648
        parse_string_start(l, true)
53✔
649
    elseif accept(l, 't')
1,557✔
650
        parse_bool(l, true)
37✔
651
    elseif accept(l, 'f')
1,483✔
652
        parse_bool(l, false)
19✔
653
    else
654
        parse_number_or_date_start(l)
112,821✔
655
    end
656
    if val === nothing
40,791✔
657
        return ParserError(ErrGenericValueError)
18✔
658
    end
659
    return val
40,773✔
660
end
661

662

663
#########
664
# Array #
665
#########
666

667
function push!!(v::Vector, el)
3,290✔
668
    # Since these types are typically non-inferrable, they are a big invalidation risk,
669
    # and since it's used by the package-loading infrastructure the cost of invalidation
670
    # is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather
671
    # than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there
672
    # is no ambiguity about what types of objects will be created.
673
    T = eltype(v)
204✔
674
    t = typeof(el)
204✔
675
    if el isa T || t === T
204✔
676
        push!(v, el::T)
1,914✔
677
        return v
1,914✔
678
    elseif T === Union{}
114✔
679
        out = Vector{t}(undef, 1)
1,351✔
680
        out[1] = el
1,351✔
681
        return out
1,351✔
682
    else
683
        if T isa Union
25✔
684
            newT = Any
2✔
685
        else
686
            newT = Union{T, typeof(el)}
23✔
687
        end
688
        new = Array{newT}(undef, length(v))
25✔
689
        copy!(new, v)
25✔
690
        return push!(new, el)
25✔
691
    end
692
end
693

694
function parse_array(l::Parser)::Err{Vector}
1,357✔
695
    skip_ws_nl(l)
1,475✔
696
    array = Vector{Union{}}()
1,357✔
697
    empty_array = accept(l, ']')
2,714✔
698
    while !empty_array
3,300✔
699
        v = @try parse_value(l)
3✔
700
        # TODO: Worth to function barrier this?
701
        array = push!!(array, v)
3,291✔
702
        # There can be an arbitrary number of newlines and comments before a value and before the closing bracket.
703
        skip_ws_nl(l)
3,403✔
704
        comma = accept(l, ',')
6,580✔
705
        skip_ws_nl(l)
7,177✔
706
        accept(l, ']') && break
6,579✔
707
        if !comma
1,947✔
708
            return ParserError(ErrExpectedCommaBetweenItemsArray)
4✔
709
        end
710
    end
1,943✔
711
    push!(l.static_arrays, array)
1,347✔
712
    return array
1,347✔
713
end
714

715

716
################
717
# Inline table #
718
################
719

720
function parse_inline_table(l::Parser)::Err{TOMLDict}
9,191✔
721
    dict = TOMLDict()
9,191✔
722
    push!(l.inline_tables, dict)
9,191✔
723
    skip_ws(l)
9,191✔
724
    accept(l, '}') && return dict
18,382✔
725
    while true
×
726
        @try parse_entry(l, dict)
×
727
        # SPEC: No newlines are allowed between the curly braces unless they are valid within a value.
728
        skip_ws(l)
18,317✔
729
        accept(l, '}') && return dict
36,634✔
730
        if accept(l, ',')
18,300✔
731
            skip_ws(l)
9,145✔
732
            if accept(l, '}')
18,290✔
733
                return ParserError(ErrTrailingCommaInlineTable)
1✔
734
            end
735
        else
736
            return ParserError(ErrExpectedCommaBetweenItemsInlineTable)
5✔
737
        end
738
    end
9,144✔
739
end
740

741

742
###########
743
# Numbers #
744
###########
745

746
parse_inf(l::Parser, sgn::Int) = accept(l, 'n') && accept(l, 'f') ? sgn * Inf : nothing
16✔
747
parse_nan(l::Parser) = accept(l, 'a') && accept(l, 'n') ? NaN : nothing
16✔
748

749
function parse_bool(l::Parser, v::Bool)::Union{Bool, Nothing}
56✔
750
    # Have eaten a 't' if `v` is true, otherwise have eaten a `f`.
751
    v ? (accept(l, 'r') && accept(l, 'u') && accept(l, 'e') && return true) :
90✔
752
        (accept(l, 'a') && accept(l, 'l') && accept(l, 's') && accept(l, 'e') && return false)
753
    return nothing
10✔
754
end
755

756
isvalid_hex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F')
1,070✔
757
isvalid_oct(c::Char) = '0' <= c <= '7'
486✔
758
isvalid_binary(c::Char) = '0' <= c <= '1'
1,195✔
759

760
const ValidSigs = Union{typeof.([isvalid_hex, isvalid_oct, isvalid_binary, isdigit])...}
761
# This function eats things accepted by `f` but also allows eating `_` in between
762
# digits. Returns if it ate at lest one character and if it ate an underscore
763
function accept_batch_underscore(l::Parser, f::ValidSigs, fail_if_underscore=true)::Err{Tuple{Bool, Bool}}
788✔
764
    contains_underscore = false
116✔
765
    at_least_one = false
×
766
    last_underscore = false
×
767
    while true
4,329✔
768
        c = peek(l)
4,329✔
769
        if c == '_'
4,329✔
770
            contains_underscore = true
×
771
            if fail_if_underscore
190✔
772
                return ParserError(ErrUnderscoreNotSurroundedByDigits)
13✔
773
            end
774
            eat_char(l)
353✔
775
            fail_if_underscore = true
×
776
            last_underscore = true
177✔
777
        else
778
            # SPEC:  "Each underscore must be surrounded by at least one digit on each side."
779
            fail_if_underscore = false
×
780
            if f(c)
4,501✔
781
                at_least_one = true
×
782
                eat_char(l)
3,364✔
783
            else
784
                if last_underscore
775✔
785
                    return ParserError(ErrTrailingUnderscoreNumber)
8✔
786
                end
787
                return at_least_one, contains_underscore
767✔
788
            end
789
            last_underscore = false
×
790
        end
791
    end
3,541✔
792
end
793

794
function parse_number_or_date_start(l::Parser)
723✔
795
    integer = true
×
796
    read_dot = false
×
797

798
    set_marker!(l)
723✔
799
    sgn = 1
×
800
    parsed_sign = false
×
801
    if accept(l, '+')
1,445✔
802
        parsed_sign = true
33✔
803
    elseif accept(l, '-')
690✔
804
        parsed_sign = true
×
805
        sgn = -1
×
806
    end
807
    if accept(l, 'i')
1,445✔
808
        return parse_inf(l, sgn)
16✔
809
    elseif accept(l, 'n')
1,413✔
810
        return parse_nan(l)
16✔
811
    end
812

813
    if accept(l, '.')
1,381✔
814
        return ParserError(ErrLeadingDot)
4✔
815
    end
816

817
    # Zero is allowed to follow by a end value char, a base x, o, b or a dot
818
    readed_zero = false
×
819
    if accept(l, '0')
1,373✔
820
        readed_zero = true # Intentional bad grammar to remove the ambiguity in "read"...
×
821
        if ok_end_value(peek(l))
396✔
822
            return Int64(0)
14✔
823
        elseif accept(l, 'x')
368✔
824
            parsed_sign && return ParserError(ErrSignInNonBase10Number)
57✔
825
            ate, contains_underscore = @try accept_batch_underscore(l, isvalid_hex)
×
826
            ate && return parse_hex(l, contains_underscore)
53✔
827
        elseif accept(l, 'o')
254✔
828
            parsed_sign && return ParserError(ErrSignInNonBase10Number)
34✔
829
            ate, contains_underscore = @try accept_batch_underscore(l, isvalid_oct)
×
830
            ate && return parse_oct(l, contains_underscore)
30✔
831
        elseif accept(l, 'b')
186✔
832
            parsed_sign && return ParserError(ErrSignInNonBase10Number)
31✔
833
            ate, contains_underscore = @try accept_batch_underscore(l, isvalid_binary)
×
834
            ate && return parse_bin(l, contains_underscore)
27✔
835
        elseif accept(l, isdigit)
124✔
836
            return parse_local_time(l)
23✔
837
        end
838
    end
839

840
    read_underscore = false
531✔
841
    read_digit = accept(l, isdigit)
1,061✔
842
    if !readed_zero && !read_digit
531✔
843
        if peek(l) == EOF_CHAR
23✔
844
            return ParserError(ErrUnexpectedEofExpectedValue)
1✔
845
        else
846
            return ParserError(ErrUnexpectedStartOfValue)
22✔
847
        end
848
    end
849
    ate, contains_underscore = @try accept_batch_underscore(l, isdigit, readed_zero)
×
850
    read_underscore |= contains_underscore
498✔
851
    if (read_digit || ate) && ok_end_value(peek(l))
909✔
852
        return parse_int(l, contains_underscore)
324✔
853
    end
854
    # Done with integers here
855

856
    if !read_underscore
174✔
857
        # No underscores in date / times
858
        if peek(l) == '-'
168✔
859
            return parse_datetime(l)
54✔
860
        elseif peek(l) == ':'
114✔
861
            return parse_local_time(l)
3✔
862
        end
863
    end
864
    # Done with datetime / localtime here
865

866
    # can optionally read a . + digits and then exponent
867
    ate_dot = accept(l, '.')
234✔
868
    ate, contains_underscore = @try accept_batch_underscore(l, isdigit, true)
×
869
    if ate_dot && !ate
115✔
870
        return ParserError(ErrNoTrailingDigitAfterDot)
10✔
871
    end
872
    read_underscore |= contains_underscore
105✔
873
    if accept(l, x -> x == 'e' || x == 'E')
374✔
874
        accept(l, x-> x == '+' || x == '-')
175✔
875
        # SPEC: (which follows the same rules as decimal integer values but may include leading zeros)
876
        read_digit = accept_batch(l, isdigit)
47✔
877
        ate, read_underscore = @try accept_batch_underscore(l, isdigit, !read_digit)
×
878
        contains_underscore |= read_underscore
×
879
    end
880
    if !ok_end_value(peek(l))
204✔
881
        eat_char(l)
16✔
882
        return ParserError(ErrGenericValueError)
8✔
883
    end
884
    return parse_float(l, read_underscore)
94✔
885
end
886

887

888
function take_string_or_substring(l, contains_underscore)::SubString
×
889
    subs = take_substring(l)
889✔
890
    # Need to pass a AbstractString to `parse` so materialize it in case it
891
    # contains underscore.
892
    return contains_underscore ? SubString(filter(!=('_'), subs)) : subs
1,739✔
893
end
894

895
function parse_float(l::Parser, contains_underscore)::Err{Float64}
94✔
896
    s = take_string_or_substring(l, contains_underscore)
180✔
897
    v = Base.tryparse(Float64, s)
101✔
898
    v === nothing && return(ParserError(ErrGenericValueError))
94✔
899
    return v
87✔
900
end
901

902
for (name, T1, T2, n1, n2) in (("int", Int64,  Int128,  17,  33),
903
                               ("hex", UInt64, UInt128, 18,  34),
904
                               ("oct", UInt64, UInt128, 24,  45),
905
                               ("bin", UInt64, UInt128, 66, 130),
906
                               )
907
    @eval function $(Symbol("parse_", name))(l::Parser, contains_underscore, base=nothing)::Err{Union{$(T1), $(T2), BigInt}}
795✔
908
        s = take_string_or_substring(l, contains_underscore)
2,330✔
909
        len = length(s)
795✔
910
        v = try
795✔
911
            if len ≤ $(n1)
795✔
912
                Base.parse($(T1), s; base)
756✔
913
            elseif $(n1) < len ≤ $(n2)
39✔
914
                Base.parse($(T2), s; base)
16✔
915
            else
916
                Base.parse(BigInt, s; base)
834✔
917
            end
918
        catch e
919
            e isa Base.OverflowError && return(ParserError(ErrOverflowError))
×
920
            error("internal parser error: did not correctly discredit $(repr(s)) as an int")
×
921
        end
922
        return v
795✔
923
    end
924
end
925

926

927
##########################
928
# Date / Time / DateTime #
929
##########################
930

931
ok_end_value(c::Char) = iswhitespace(c) || c == '#' || c == EOF_CHAR || c == ']' ||
1,615✔
932
                               c == '}' || c == ',' || c == '\n'     || c == '\r'
933

934
#=
935
# https://tools.ietf.org/html/rfc3339
936

937
# Internet Protocols MUST generate four digit years in dates.
938

939
   date-fullyear   = 4DIGIT
940
   date-month      = 2DIGIT  ; 01-12
941
   date-mday       = 2DIGIT  ; 01-28, 01-29, 01-30, 01-31 based on
942
                             ; month/year
943
   time-hour       = 2DIGIT  ; 00-23
944
   time-minute     = 2DIGIT  ; 00-59
945
   time-second     = 2DIGIT  ; 00-58, 00-59, 00-60 based on leap second
946
                             ; rules
947
   time-secfrac    = "." 1*DIGIT
948
   time-numoffset  = ("+" / "-") time-hour ":" time-minute
949
   time-offset     = "Z" / time-numoffset
950

951
   partial-time    = time-hour ":" time-minute ":" time-second
952
                     [time-secfrac]
953
   full-date       = date-fullyear "-" date-month "-" date-mday
954
   full-time       = partial-time time-offset
955

956
   date-time       = full-date "T" full-time
957
=#
958

959
accept_two(l, f::F) where {F} = accept_n(l, 2, f) || return(ParserError(ErrParsingDateTime))
242✔
960
function parse_datetime(l)
54✔
961
    # Year has already been eaten when we reach here
962
    year = @try parse_int(l, false)
×
963
    year in 0:9999 || return ParserError(ErrParsingDateTime)
54✔
964

965
    # Month
966
    accept(l, '-') || return ParserError(ErrParsingDateTime)
54✔
967
    set_marker!(l)
54✔
968
    @try accept_two(l, isdigit)
×
969
    month = @try parse_int(l, false)
×
970
    month in 1:12 || return ParserError(ErrParsingDateTime)
55✔
971
    accept(l, '-') || return ParserError(ErrParsingDateTime)
49✔
972

973
    # Day
974
    set_marker!(l)
49✔
975
    @try accept_two(l, isdigit)
×
976
    day = @try parse_int(l, false)
×
977
    # Verify the real range in the constructor below
978
    day in 1:31 || return ParserError(ErrParsingDateTime)
50✔
979

980
    # We might have a local date now
981
    read_space = false
×
982
    if ok_end_value(peek(l))
88✔
983
        if (read_space = accept(l, ' '))
16✔
984
            if !isdigit(peek(l))
4✔
985
                return try_return_date(l, year, month, day)
×
986
            end
987
        else
988
            return try_return_date(l, year, month, day)
4✔
989
        end
990
    end
991
    if !read_space
42✔
992
        accept(l, 'T') || accept(l, 't') || return ParserError(ErrParsingDateTime)
41✔
993
    end
994

995
    h, m, s, ms = @try _parse_local_time(l)
×
996

997
    # Julia doesn't support offset times
998
    if !accept(l, 'Z')
66✔
999
        if accept(l, '+') || accept(l, '-')
48✔
1000
            return ParserError(ErrOffsetDateNotSupported)
6✔
1001
        end
1002
    end
1003

1004
    if !ok_end_value(peek(l))
56✔
1005
        return ParserError(ErrParsingDateTime)
3✔
1006
    end
1007

1008
    # The DateTime parser verifies things like leap year for us
1009
    return try_return_datetime(l, year, month, day, h, m, s, ms)
25✔
1010
end
1011

1012
function try_return_datetime(p, year, month, day, h, m, s, ms)
25✔
1013
    Dates = p.Dates
25✔
1014
    if Dates !== nothing
25✔
1015
        try
25✔
1016
            return Dates.DateTime(year, month, day, h, m, s, ms)
25✔
1017
        catch
1018
            return ParserError(ErrParsingDateTime)
1✔
1019
        end
1020
    else
1021
        return DateTime(year, month, day, h, m, s, ms)
×
1022
    end
1023
end
1024

1025
function try_return_date(p, year, month, day)
4✔
1026
    Dates = p.Dates
4✔
1027
    if Dates !== nothing
4✔
1028
        try
4✔
1029
            return Dates.Date(year, month, day)
4✔
1030
        catch
1031
            return ParserError(ErrParsingDateTime)
×
1032
        end
1033
    else
1034
        return Date(year, month, day)
×
1035
    end
1036
end
1037

1038
function parse_local_time(l::Parser)
26✔
1039
    h = @try parse_int(l, false)
×
1040
    h in 0:23 || return ParserError(ErrParsingDateTime)
28✔
1041
    _, m, s, ms = @try _parse_local_time(l, true)
×
1042
    # TODO: Could potentially parse greater accuracy for the
1043
    # fractional seconds here.
1044
    return try_return_time(l, h, m, s, ms)
11✔
1045
end
1046

1047
function try_return_time(p, h, m, s, ms)
11✔
1048
    Dates = p.Dates
11✔
1049
    if Dates !== nothing
11✔
1050
        try
11✔
1051
            return Dates.Time(h, m, s, ms)
11✔
1052
        catch
1053
            return ParserError(ErrParsingDateTime)
×
1054
        end
1055
    else
1056
        return Time(h, m, s, ms)
×
1057
    end
1058
end
1059

1060
function _parse_local_time(l::Parser, skip_hour=false)::Err{NTuple{4, Int64}}
64✔
1061
    # Hour has potentially been already parsed in
1062
    # `parse_number_or_date_start` already
1063
    if skip_hour
104✔
1064
        hour = Int64(0)
24✔
1065
    else
1066
        set_marker!(l)
40✔
1067
        @try accept_two(l, isdigit)
×
1068
        hour = parse_int(l, false)
39✔
1069
        hour in 0:23 || return ParserError(ErrParsingDateTime)
39✔
1070
    end
1071

1072
    accept(l, ':') || return ParserError(ErrParsingDateTime)
73✔
1073

1074
    # minute
1075
    set_marker!(l)
51✔
1076
    @try accept_two(l, isdigit)
×
1077
    minute = parse_int(l, false)
51✔
1078
    minute in 0:59 || return ParserError(ErrParsingDateTime)
52✔
1079

1080
    accept(l, ':') || return ParserError(ErrParsingDateTime)
52✔
1081

1082
    # second
1083
    set_marker!(l)
48✔
1084
    @try accept_two(l, isdigit)
×
1085
    second = parse_int(l, false)
47✔
1086
    second in 0:59 || return ParserError(ErrParsingDateTime)
48✔
1087

1088
    # optional fractional second
1089
    fractional_second = Int64(0)
×
1090
    if accept(l, '.')
91✔
1091
        set_marker!(l)
24✔
1092
        found_fractional_digit = false
×
1093
        for i in 1:3
24✔
1094
            found_fractional_digit |= accept(l, isdigit)
143✔
1095
        end
120✔
1096
        if !found_fractional_digit
24✔
1097
            return ParserError(ErrParsingDateTime)
1✔
1098
        end
1099
        # DateTime in base only manages 3 significant digits in fractional
1100
        # second
1101
        fractional_second = parse_int(l, false)
23✔
1102
        # Truncate off the rest eventual digits
1103
        accept_batch(l, isdigit)
23✔
1104
    end
1105
    return hour, minute, second, fractional_second
45✔
1106
end
1107

1108

1109
##########
1110
# String #
1111
##########
1112

1113
function parse_string_start(l::Parser, quoted::Bool)::Err{String}
29,691✔
1114
    # Have eaten a `'` if `quoted` is true, otherwise have eaten a `"`
1115
    multiline = false
×
1116
    c = quoted ? '\'' : '"'
29,691✔
1117
    if accept(l, c) # Eat second quote
59,382✔
1118
        if !accept(l, c)
160✔
1119
            return ""
6✔
1120
        end
1121
        accept(l, '\r') # Eat third quote
148✔
1122
        accept(l, '\n') # Eat third quote
148✔
1123
        multiline = true
×
1124
    end
1125
    return parse_string_continue(l, multiline, quoted)
29,685✔
1126
end
1127

1128
@inline stop_candidates_multiline(x)         = x != '"'  &&  x != '\\'
1,393✔
1129
@inline stop_candidates_singleline(x)        = x != '"'  &&  x != '\\' && x != '\n'
536,480✔
1130
@inline stop_candidates_multiline_quoted(x)  = x != '\'' &&  x != '\\'
636✔
1131
@inline stop_candidates_singleline_quoted(x) = x != '\'' &&  x != '\\' && x != '\n'
703✔
1132

1133
function parse_string_continue(l::Parser, multiline::Bool, quoted::Bool)::Err{String}
29,685✔
1134
    start_chunk = l.prevpos
29,685✔
1135
    q = quoted ? '\'' : '"'
29,685✔
1136
    contains_backslash = false
×
1137
    offset = multiline ? 3 : 1
29,685✔
1138
    while true
29,893✔
1139
        if peek(l) == EOF_CHAR
29,893✔
1140
            return ParserError(ErrUnexpectedEndString)
3✔
1141
        end
1142
        if quoted
29,890✔
1143
            accept_batch(l, multiline ? stop_candidates_multiline_quoted : stop_candidates_singleline_quoted)
213✔
1144
        else
1145
            accept_batch(l, multiline ? stop_candidates_multiline : stop_candidates_singleline)
29,765✔
1146
        end
1147
        if !multiline && peek(l) == '\n'
29,890✔
1148
            return ParserError(ErrNewLineInString)
4✔
1149
        end
1150
        next_slash = peek(l) == '\\'
29,886✔
1151
        if !next_slash
29,886✔
1152
            # TODO: Doesn't handle values with e.g. format `""""str""""`
1153
            if accept(l, q) && (!multiline || (accept(l, q) && accept(l, q)))
29,781✔
1154
                push!(l.chunks, start_chunk:(l.prevpos-offset-1))
29,668✔
1155
                return take_chunks(l, contains_backslash)
29,659✔
1156
            end
1157
        end
1158
        c = eat_char(l) # eat the character we stopped at
452✔
1159
        next_slash = c == '\\'
227✔
1160
        if next_slash && !quoted
227✔
1161
            if peek(l) == '\n' || peek(l) == '\r'
317✔
1162
                push!(l.chunks, start_chunk:(l.prevpos-1-1)) # -1 due to eating the slash
36✔
1163
                skip_ws_nl_no_comment(l)
28✔
1164
                start_chunk = l.prevpos
28✔
1165
            else
1166
                c = eat_char(l) # eat the escaped character
288✔
1167
                if c == 'u'  || c == 'U'
268✔
1168
                    n = c == 'u' ? 4 : 6
25✔
1169
                    set_marker!(l)
25✔
1170
                    if !accept_n(l, n, isvalid_hex)
132✔
1171
                        return ParserError(ErrInvalidUnicodeScalar)
1✔
1172
                    end
1173
                    codepoint = parse_int(l, false, 16)::Int64
24✔
1174
                    #=
1175
                    Unicode Scalar Value
1176
                    ---------------------
1177
                    Any Unicode code point except high-surrogate and
1178
                    low-surrogate code points.  In other words, the ranges of
1179
                    integers 0 to D7FF16 and E00016 to 10FFFF16 inclusive.
1180
                    =#
1181
                    if !(codepoint <= 0xD7FF || 0xE000 <= codepoint <= 0x10FFFF)
29✔
1182
                        return ParserError(ErrInvalidUnicodeScalar)
5✔
1183
                    end
1184
                elseif c != 'b' && c != 't' && c != 'n' && c != 'f' && c != 'r' && c != '"' && c!= '\\'
119✔
1185
                    return ParserError(ErrInvalidEscapeCharacter)
13✔
1186
                end
1187
                contains_backslash = true
×
1188
            end
1189
        end
1190
    end
208✔
1191
end
1192

1193
function take_chunks(l::Parser, unescape::Bool)::String
29,659✔
1194
    nbytes = sum(length, l.chunks; init=0)
59,318✔
1195
    str = Base._string_n(nbytes)
29,659✔
1196
    offset = 1
×
1197
    for chunk in l.chunks
29,659✔
1198
        # The SubString constructor takes as an index the first byte of the
1199
        # last character but we have the last byte.
1200
        n = length(chunk)
29,685✔
1201
        GC.@preserve str begin
29,685✔
1202
            unsafe_copyto!(pointer(str, offset), pointer(l.str, first(chunk)), n)
29,685✔
1203
        end
1204
        offset += n
29,685✔
1205
    end
59,344✔
1206
    empty!(l.chunks)
29,659✔
1207
    return unescape ? unescape_string(str) : str
29,659✔
1208
end
1209

1210
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc