• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37429

pending completion
#37429

push

local

web-flow
automatically detect and debug deadlocks in package loading (#48504)

61 of 61 new or added lines in 1 file covered. (100.0%)

81430 of 88033 relevant lines covered (92.5%)

29508886.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.13
/stdlib/Profile/src/Profile.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
"""
4
Profiling support, main entry point is the [`@profile`](@ref) macro.
5
"""
6
module Profile
7

8
import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
9

10
const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
11

12
# deprecated functions: use `getdict` instead
13
lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))
4,854✔
14

15
export @profile
16

17
"""
18
    @profile
19

20
`@profile <expression>` runs your expression while taking periodic backtraces. These are
21
appended to an internal buffer of backtraces.
22
"""
23
macro profile(ex)
7✔
24
    return quote
7✔
25
        try
2✔
26
            start_timer()
2✔
27
            $(esc(ex))
1,001✔
28
        finally
29
            stop_timer()
2✔
30
        end
31
    end
32
end
33

34
# triggers printing the report and (optionally) saving a heap snapshot after a SIGINFO/SIGUSR1 profile request
35
const PROFILE_PRINT_COND = Ref{Base.AsyncCondition}()
36
function profile_printing_listener()
340✔
37
    try
340✔
38
        while true
×
39
            wait(PROFILE_PRINT_COND[])
342✔
40
            peek_report[]()
2✔
41
            if Base.get_bool_env("JULIA_PROFILE_PEEK_HEAP_SNAPSHOT", false) === true
2✔
42
                println(stderr, "Saving heap snapshot...")
×
43
                fname = take_heap_snapshot()
×
44
                println(stderr, "Heap snapshot saved to `$(fname)`")
×
45
            end
46
        end
2✔
47
    catch ex
48
        if !isa(ex, InterruptException)
×
49
            @error "Profile printing listener crashed" exception=ex,catch_backtrace()
×
50
        end
51
    end
52
end
53

54
# An internal function called to show the report after an information request (SIGINFO or SIGUSR1).
55
function _peek_report()
2✔
56
    iob = IOBuffer()
2✔
57
    ioc = IOContext(IOContext(iob, stderr), :displaysize=>displaysize(stderr))
2✔
58
    print(ioc, groupby = [:thread, :task])
4✔
59
    Base.print(stderr, String(take!(iob)))
2✔
60
end
61
# This is a ref so that it can be overridden by other profile info consumers.
62
const peek_report = Ref{Function}(_peek_report)
63

64
"""
65
    get_peek_duration()
66

67
Get the duration in seconds of the profile "peek" that is triggered via `SIGINFO` or `SIGUSR1`, depending on platform.
68
"""
69
get_peek_duration() = ccall(:jl_get_profile_peek_duration, Float64, ())
70
"""
71
    set_peek_duration(t::Float64)
72

73
Set the duration in seconds of the profile "peek" that is triggered via `SIGINFO` or `SIGUSR1`, depending on platform.
74
"""
75
set_peek_duration(t::Float64) = ccall(:jl_set_profile_peek_duration, Cvoid, (Float64,), t)
76

77
precompile_script = """
78
import Profile
79
Profile.@profile while Profile.len_data() < 1000; rand(10,10) * rand(10,10); end
80
Profile.peek_report[]()
81
Profile.clear()
82
"""
83

84
####
85
#### User-level functions
86
####
87

88
"""
89
    init(; n::Integer, delay::Real)
90

91
Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be
92
stored per thread. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long
93
list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
94
NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
95
using keywords or in the order `(n, delay)`.
96
"""
97
function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing, limitwarn::Bool = true)
16✔
98
    n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
8✔
99
    if n_cur == 0 && isnothing(n) && isnothing(delay)
8✔
100
        # indicates that the buffer hasn't been initialized at all, so set the default
101
        default_init()
1✔
102
        n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
1✔
103
    end
104
    delay_cur = ccall(:jl_profile_delay_nsec, UInt64, ())/10^9
8✔
105
    if n === nothing && delay === nothing
8✔
106
        return n_cur, delay_cur
4✔
107
    end
108
    nnew = (n === nothing) ? n_cur : n
4✔
109
    delaynew = (delay === nothing) ? delay_cur : delay
4✔
110
    init(nnew, delaynew; limitwarn)
4✔
111
end
112

113
function init(n::Integer, delay::Real; limitwarn::Bool = true)
18✔
114
    sample_size_bytes = sizeof(Ptr) # == Sys.WORD_SIZE / 8
9✔
115
    buffer_samples = n
9✔
116
    buffer_size_bytes = buffer_samples * sample_size_bytes
9✔
117
    if buffer_size_bytes > 2^29 && Sys.WORD_SIZE == 32
9✔
118
        buffer_samples = floor(Int, 2^29 / sample_size_bytes)
×
119
        buffer_size_bytes = buffer_samples * sample_size_bytes
×
120
        limitwarn && @warn "Requested profile buffer limited to 512MB (n = $buffer_samples) given that this system is 32-bit"
×
121
    end
122
    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), buffer_samples, round(UInt64, 10^9*delay))
9✔
123
    if status == -1
9✔
124
        error("could not allocate space for ", n, " instruction pointers ($(Base.format_bytes(buffer_size_bytes)))")
×
125
    end
126
end
127

128
function default_init()
5✔
129
    # init with default values
130
    # Use a max size of 10M profile samples, and fire timer every 1ms
131
    # (that should typically give around 100 seconds of record)
132
    @static if Sys.iswindows() && Sys.WORD_SIZE == 32
×
133
        # The Win32 unwinder is 1000x slower than elsewhere (around 1ms/frame),
134
        # so we don't want to slow the program down by quite that much
135
        n = 1_000_000
136
        delay = 0.01
137
    else
138
        # Keep these values synchronized with trigger_profile_peek
139
        n = 10_000_000
5✔
140
        delay = 0.001
5✔
141
    end
142
    init(n, delay, limitwarn = false)
5✔
143
end
144

145
# Checks whether the profile buffer has been initialized. If not, initializes it with the default size.
146
function check_init()
7✔
147
    buffer_size = @ccall jl_profile_maxlen_data()::Int
7✔
148
    if buffer_size == 0
7✔
149
        default_init()
7✔
150
    end
151
end
152

153
function __init__()
443✔
154
    # Note: The profile buffer is no longer initialized during __init__ because Profile is in the sysimage,
155
    # thus __init__ is called every startup. The buffer is lazily initialized the first time `@profile` is
156
    # used, if not manually initialized before that.
157
    @static if !Sys.iswindows()
×
158
        # triggering a profile via signals is not implemented on windows
159
        cond = Base.AsyncCondition()
443✔
160
        Base.uv_unref(cond.handle)
443✔
161
        PROFILE_PRINT_COND[] = cond
443✔
162
        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle)
443✔
163
        errormonitor(Threads.@spawn(profile_printing_listener()))
443✔
164
    end
165
end
166

167
"""
168
    clear()
169

170
Clear any existing backtraces from the internal buffer.
171
"""
172
clear() = ccall(:jl_profile_clear_data, Cvoid, ())
2✔
173

174
const LineInfoDict = Dict{UInt64, Vector{StackFrame}}
175
const LineInfoFlatDict = Dict{UInt64, StackFrame}
176

177
struct ProfileFormat
178
    maxdepth::Int
179
    mincount::Int
180
    noisefloor::Float64
181
    sortedby::Symbol
182
    combine::Bool
183
    C::Bool
184
    recur::Symbol
185
    function ProfileFormat(;
62✔
186
        C = false,
187
        combine = true,
188
        maxdepth::Int = typemax(Int),
189
        mincount::Int = 0,
190
        noisefloor = 0,
191
        sortedby::Symbol = :filefuncline,
192
        recur::Symbol = :off)
193
        return new(maxdepth, mincount, noisefloor, sortedby, combine, C, recur)
62✔
194
    end
195
end
196

197
# offsets of the metadata in the data stream
198
const META_OFFSET_SLEEPSTATE = 2
199
const META_OFFSET_CPUCYCLECLOCK = 3
200
const META_OFFSET_TASKID = 4
201
const META_OFFSET_THREADID = 5
202

203
"""
204
    print([io::IO = stdout,] [data::Vector = fetch()], [lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)]; kwargs...)
205

206
Prints profiling results to `io` (by default, `stdout`). If you do not
207
supply a `data` vector, the internal buffer of accumulated backtraces
208
will be used.
209

210
The keyword arguments can be any combination of:
211

212
 - `format` -- Determines whether backtraces are printed with (default, `:tree`) or without (`:flat`)
213
   indentation indicating tree structure.
214

215
 - `C` -- If `true`, backtraces from C and Fortran code are shown (normally they are excluded).
216

217
 - `combine` -- If `true` (default), instruction pointers are merged that correspond to the same line of code.
218

219
 - `maxdepth` -- Limits the depth higher than `maxdepth` in the `:tree` format.
220

221
 - `sortedby` -- Controls the order in `:flat` format. `:filefuncline` (default) sorts by the source
222
    line, `:count` sorts in order of number of collected samples, and `:overhead` sorts by the number of samples
223
    incurred by each function by itself.
224

225
 - `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:thread`, `:task`,
226
    `[:thread, :task]`, or `[:task, :thread]` where the last two provide nested grouping.
227

228
 - `noisefloor` -- Limits frames that exceed the heuristic noise floor of the sample (only applies to format `:tree`).
229
    A suggested value to try for this is 2.0 (the default is 0). This parameter hides samples for which `n <= noisefloor * √N`,
230
    where `n` is the number of samples on this line, and `N` is the number of samples for the callee.
231

232
 - `mincount` -- Limits the printout to only those lines with at least `mincount` occurrences.
233

234
 - `recur` -- Controls the recursion handling in `:tree` format. `:off` (default) prints the tree as normal. `:flat` instead
235
    compresses any recursion (by ip), showing the approximate effect of converting any self-recursion into an iterator.
236
    `:flatc` does the same but also includes collapsing of C frames (may do odd things around `jl_apply`).
237

238
 - `threads::Union{Int,AbstractVector{Int}}` -- Specify which threads to include snapshots from in the report. Note that
239
    this does not control which threads samples are collected on (which may also have been collected on another machine).
240

241
 - `tasks::Union{Int,AbstractVector{Int}}` -- Specify which tasks to include snapshots from in the report. Note that this
242
    does not control which tasks samples are collected within.
243
"""
244
function print(io::IO,
244✔
245
        data::Vector{<:Unsigned} = fetch(),
246
        lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)
247
        ;
248
        format = :tree,
249
        C = false,
250
        combine = true,
251
        maxdepth::Int = typemax(Int),
252
        mincount::Int = 0,
253
        noisefloor = 0,
254
        sortedby::Symbol = :filefuncline,
255
        groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
256
        recur::Symbol = :off,
257
        threads::Union{Int,AbstractVector{Int}} = 1:typemax(Int),
258
        tasks::Union{UInt,AbstractVector{UInt}} = typemin(UInt):typemax(UInt))
259

260
    pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
62✔
261
    if groupby === :none
60✔
262
        print(io, data, lidict, pf, format, threads, tasks, false)
20✔
263
    else
264
        if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
84✔
265
            error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
×
266
        elseif Sys.iswindows() && in(groupby, [:thread, [:task, :thread], [:thread, :task]])
40✔
267
            @warn "Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report"
×
268
        end
269
        any_nosamples = false
40✔
270
        println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
42✔
271
        println(io, "=========================================================")
42✔
272
        if groupby == [:task, :thread]
84✔
273
            for taskid in intersect(get_task_ids(data), tasks)
10✔
274
                threadids = intersect(get_thread_ids(data, taskid), threads)
10✔
275
                if length(threadids) == 0
10✔
276
                    any_nosamples = true
×
277
                else
278
                    nl = length(threadids) > 1 ? "\n" : ""
10✔
279
                    printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
10✔
280
                    for threadid in threadids
10✔
281
                        printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
10✔
282
                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
10✔
283
                        nosamples && (any_nosamples = true)
10✔
284
                        println(io)
10✔
285
                    end
10✔
286
                end
287
            end
20✔
288
        elseif groupby == [:thread, :task]
64✔
289
            for threadid in intersect(get_thread_ids(data), threads)
12✔
290
                taskids = intersect(get_task_ids(data, threadid), tasks)
12✔
291
                if length(taskids) == 0
12✔
292
                    any_nosamples = true
×
293
                else
294
                    nl = length(taskids) > 1 ? "\n" : ""
12✔
295
                    printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
12✔
296
                    for taskid in taskids
12✔
297
                        printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
13✔
298
                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
13✔
299
                        nosamples && (any_nosamples = true)
13✔
300
                        println(io)
13✔
301
                    end
13✔
302
                end
303
            end
12✔
304
        elseif groupby === :task
20✔
305
            threads = 1:typemax(Int)
10✔
306
            for taskid in intersect(get_task_ids(data), tasks)
10✔
307
                printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
10✔
308
                nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
10✔
309
                nosamples && (any_nosamples = true)
10✔
310
                println(io)
10✔
311
            end
20✔
312
        elseif groupby === :thread
10✔
313
            tasks = 1:typemax(UInt)
10✔
314
            for threadid in intersect(get_thread_ids(data), threads)
10✔
315
                printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
10✔
316
                nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
10✔
317
                nosamples && (any_nosamples = true)
10✔
318
                println(io)
10✔
319
            end
10✔
320
        end
321
        any_nosamples && warning_empty(summary = true)
42✔
322
    end
323
    return
62✔
324
end
325

326
"""
327
    print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...)
328

329
Prints profiling results to `io`. This variant is used to examine results exported by a
330
previous call to [`retrieve`](@ref). Supply the vector `data` of backtraces and
331
a dictionary `lidict` of line information.
332

333
See `Profile.print([io], data)` for an explanation of the valid keyword arguments.
334
"""
335
print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
1✔
336
    print(stdout, data, lidict; kwargs...)
337

338
function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
63✔
339
                format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
340
                is_subsection::Bool = false)
341
    cols::Int = Base.displaysize(io)[2]
63✔
342
    data = convert(Vector{UInt64}, data)
60✔
343
    fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
63✔
344
    if format === :tree
63✔
345
        nosamples = tree(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
34✔
346
        return nosamples
34✔
347
    elseif format === :flat
29✔
348
        fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
29✔
349
        nosamples = flat(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
29✔
350
        return nosamples
29✔
351
    else
352
        throw(ArgumentError("output format $(repr(format)) not recognized"))
×
353
    end
354
end
355

356
function get_task_ids(data::Vector{<:Unsigned}, threadid = nothing)
52✔
357
    taskids = UInt[]
52✔
358
    for i in length(data):-1:1
64✔
359
        if is_block_end(data, i)
2,088,650✔
360
            if isnothing(threadid) || data[i - META_OFFSET_THREADID] == threadid
10,542✔
361
                taskid = data[i - META_OFFSET_TASKID]
28,402✔
362
                !in(taskid, taskids) && push!(taskids, taskid)
28,498✔
363
            end
364
        end
365
    end
2,088,746✔
366
    return taskids
32✔
367
end
368

369
function get_thread_ids(data::Vector{<:Unsigned}, taskid = nothing)
52✔
370
    threadids = Int[]
54✔
371
    for i in length(data):-1:1
64✔
372
        if is_block_end(data, i)
2,088,650✔
373
            if isnothing(taskid) || data[i - META_OFFSET_TASKID] == taskid
8,930✔
374
                threadid = data[i - META_OFFSET_THREADID]
28,402✔
375
                !in(threadid, threadids) && push!(threadids, threadid)
28,402✔
376
            end
377
        end
378
    end
2,088,746✔
379
    return sort(threadids)
32✔
380
end
381

382
function is_block_end(data, i)
1,041,033✔
383
    i < nmeta + 1 && return false
7,685,093✔
384
    # 32-bit linux has been seen to have rogue NULL ips, so we use two to
385
    # indicate block end, where the 2nd is the actual end index.
386
    # and we could have (though very unlikely):
387
    # 1:<stack><metadata><null><null><NULL><metadata><null><null>:end
388
    # and we want to ignore the triple NULL (which is an ip).
389
    return data[i] == 0 && data[i - 1] == 0 && data[i - META_OFFSET_SLEEPSTATE] != 0
7,684,432✔
390
end
391

392
function has_meta(data)
44✔
393
    for i in 6:length(data)
259✔
394
        data[i] == 0 || continue            # first block end null
29,357✔
395
        data[i - 1] == 0 || continue        # second block end null
2,162✔
396
        data[i - META_OFFSET_SLEEPSTATE] in 1:2 || continue
124✔
397
        data[i - META_OFFSET_CPUCYCLECLOCK] != 0 || continue
124✔
398
        data[i - META_OFFSET_TASKID] != 0 || continue
124✔
399
        data[i - META_OFFSET_THREADID] != 0 || continue
124✔
400
        return true
124✔
401
    end
58,464✔
402
    return false
9✔
403
end
404

405
"""
406
    retrieve(; kwargs...) -> data, lidict
407

408
"Exports" profiling results in a portable format, returning the set of all backtraces
409
(`data`) and a dictionary that maps the (session-specific) instruction pointers in `data` to
410
`LineInfo` values that store the file name, function name, and line number. This function
411
allows you to save profiling results for future analysis.
412
"""
413
function retrieve(; kwargs...)
12✔
414
    data = fetch(; kwargs...)
6✔
415
    return (data, getdict(data))
6✔
416
end
417

418
function getdict(data::Vector{UInt})
64✔
419
    dict = LineInfoDict()
66✔
420
    return getdict!(dict, data)
66✔
421
end
422

423
function getdict!(dict::LineInfoDict, data::Vector{UInt})
66✔
424
    # we don't want metadata here as we're just looking up ips
425
    unique_ips = unique(has_meta(data) ? strip_meta(data) : data)
66✔
426
    n_unique_ips = length(unique_ips)
66✔
427
    n_unique_ips == 0 && return dict
66✔
428
    iplookups = similar(unique_ips, Vector{StackFrame})
61✔
429
    @sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.threadpoolsize(), RoundUp))
61✔
430
        Threads.@spawn begin
61✔
431
            for i in indexes_part
122✔
432
                iplookups[i] = _lookup_corrected(unique_ips[i])
10,562✔
433
            end
21,063✔
434
        end
435
    end
×
436
    for i in eachindex(unique_ips)
122✔
437
        dict[unique_ips[i]] = iplookups[i]
10,562✔
438
    end
21,063✔
439
    return dict
61✔
440
end
441

442
function _lookup_corrected(ip::UInt)
10,562✔
443
    st = lookup(convert(Ptr{Cvoid}, ip))
10,562✔
444
    # To correct line numbers for moving code, put it in the form expected by
445
    # Base.update_stackframes_callback[]
446
    stn = map(x->(x, 1), st)
25,127✔
447
    # Note: Base.update_stackframes_callback[] should be data-race free
448
    try Base.invokelatest(Base.update_stackframes_callback[], stn) catch end
21,124✔
449
    return map(first, stn)
10,562✔
450
end
451

452
"""
453
    flatten(btdata::Vector, lidict::LineInfoDict) -> (newdata::Vector{UInt64}, newdict::LineInfoFlatDict)
454

455
Produces "flattened" backtrace data. Individual instruction pointers
456
sometimes correspond to a multi-frame backtrace due to inlining; in
457
such cases, this function inserts fake instruction pointers for the
458
inlined calls, and returns a dictionary that is a 1-to-1 mapping
459
between instruction pointers and a single StackFrame.
460
"""
461
function flatten(data::Vector, lidict::LineInfoDict)
5✔
462
    # Makes fake instruction pointers, counting down from typemax(UInt)
463
    newip = typemax(UInt64) - 1
5✔
464
    taken = Set(keys(lidict))  # make sure we don't pick one that's already used
5✔
465
    newdict = Dict{UInt64,StackFrame}()
5✔
466
    newmap  = Dict{UInt64,Vector{UInt64}}()
5✔
467
    for (ip, trace) in lidict
7✔
468
        if length(trace) == 1
200✔
469
            newdict[ip] = trace[1]
132✔
470
        else
471
            newm = UInt64[]
68✔
472
            for sf in trace
68✔
473
                while newip ∈ taken && newip > 0
184✔
474
                    newip -= 1
×
475
                end
×
476
                newip == 0 && error("all possible instruction pointers used")
184✔
477
                push!(newm, newip)
184✔
478
                newdict[newip] = sf
184✔
479
                newip -= 1
184✔
480
            end
252✔
481
            newmap[ip] = newm
68✔
482
        end
483
    end
398✔
484
    newdata = UInt64[]
5✔
485
    for ip::UInt64 in data
8✔
486
        if haskey(newmap, ip)
69,256✔
487
            append!(newdata, newmap[ip])
27,580✔
488
        else
489
            push!(newdata, ip)
41,676✔
490
        end
491
    end
69,258✔
492
    return (newdata, newdict)
5✔
493
end
494

495
# Take a file-system path and try to form a concise representation of it
496
# based on the package ecosystem
497
function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
820✔
498
    return get!(filenamecache, spath) do
1,717✔
499
        path = string(spath)
777✔
500
        if isabspath(path)
778✔
501
            if ispath(path)
265✔
502
                # try to replace the file-system prefix with a short "@Module" one,
503
                # assuming that profile came from the current machine
504
                # (or at least has the same file-system layout)
505
                root = path
×
506
                while !isempty(root)
1,084✔
507
                    root, base = splitdir(root)
1,084✔
508
                    isempty(base) && break
1,084✔
509
                    @assert startswith(path, root)
993✔
510
                    for proj in Base.project_names
993✔
511
                        project_file = joinpath(root, proj)
1,986✔
512
                        if Base.isfile_casesensitive(project_file)
1,986✔
513
                            pkgid = Base.project_file_name_uuid(project_file, "")
168✔
514
                            isempty(pkgid.name) && return path # bad Project file
168✔
515
                            # return the joined the module name prefix and path suffix
516
                            path = path[nextind(path, sizeof(root)):end]
336✔
517
                            return string("@", pkgid.name, path)
168✔
518
                        end
519
                    end
2,643✔
520
                end
825✔
521
            end
522
            return path
97✔
523
        elseif isfile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
512✔
524
            # do the same mechanic for Base (or Core/Compiler) files as above,
525
            # but they start from a relative path
526
            return joinpath("@Base", normpath(path))
509✔
527
        else
528
            # for non-existent relative paths (such as "REPL[1]"), just consider simplifying them
529
            return normpath(path) # drop leading "./"
3✔
530
        end
531
    end
532
end
533

534
"""
535
    callers(funcname, [data, lidict], [filename=<filename>], [linerange=<start:stop>]) -> Vector{Tuple{count, lineinfo}}
536

537
Given a previous profiling run, determine who called a particular function. Supplying the
538
filename (and optionally, range of line numbers over which the function is defined) allows
539
you to disambiguate an overloaded method. The returned value is a vector containing a count
540
of the number of calls and line information about the caller. One can optionally supply
541
backtrace `data` obtained from [`retrieve`](@ref); otherwise, the current internal
542
profile buffer is used.
543
"""
544
function callers end
545

546
function callers(funcname::String, bt::Vector, lidict::LineInfoFlatDict; filename = nothing, linerange = nothing)
6✔
547
    if filename === nothing && linerange === nothing
3✔
548
        return callersf(li -> String(li.func) == funcname,
2✔
549
            bt, lidict)
550
    end
551
    filename === nothing && throw(ArgumentError("if supplying linerange, you must also supply the filename"))
1✔
552
    filename = String(filename)
×
553
    if linerange === nothing
×
554
        return callersf(li -> String(li.func) == funcname && String(li.file) == filename,
×
555
            bt, lidict)
556
    else
557
        return callersf(li -> String(li.func) == funcname && String(li.file) == filename && in(li.line, linerange),
×
558
            bt, lidict)
559
    end
560
end
561

562
callers(funcname::String, bt::Vector, lidict::LineInfoDict; kwargs...) =
6✔
563
    callers(funcname, flatten(bt, lidict)...; kwargs...)
564
callers(funcname::String; kwargs...) = callers(funcname, retrieve()...; kwargs...)
2✔
565
callers(func::Function, bt::Vector, lidict::LineInfoFlatDict; kwargs...) =
566
    callers(string(func), bt, lidict; kwargs...)
567
callers(func::Function; kwargs...) = callers(string(func), retrieve()...; kwargs...)
4✔
568

569
##
570
## For --track-allocation
571
##
572
# Reset the malloc log. Used to avoid counting memory allocated during
573
# compilation.
574

575
"""
576
    clear_malloc_data()
577

578
Clears any stored memory allocation data when running julia with `--track-allocation`.
579
Execute the command(s) you want to test (to force JIT-compilation), then call
580
[`clear_malloc_data`](@ref). Then execute your command(s) again, quit
581
Julia, and examine the resulting `*.mem` files.
582
"""
583
clear_malloc_data() = ccall(:jl_clear_malloc_data, Cvoid, ())
584

585
# C wrappers
586
function start_timer()
7✔
587
    check_init() # if the profile buffer hasn't been initialized, initialize with default size
10✔
588
    status = ccall(:jl_profile_start_timer, Cint, ())
7✔
589
    if status < 0
7✔
590
        error(error_codes[status])
×
591
    end
592
end
593

594

595
stop_timer() = ccall(:jl_profile_stop_timer, Cvoid, ())
7✔
596

597
is_running() = ccall(:jl_profile_is_running, Cint, ())!=0
598

599
is_buffer_full() = ccall(:jl_profile_is_buffer_full, Cint, ())!=0
70✔
600

601
get_data_pointer() = convert(Ptr{UInt}, ccall(:jl_profile_get_data, Ptr{UInt8}, ()))
70✔
602

603
len_data() = convert(Int, ccall(:jl_profile_len_data, Csize_t, ()))
78✔
604

605
maxlen_data() = convert(Int, ccall(:jl_profile_maxlen_data, Csize_t, ()))
71✔
606

607
error_codes = Dict(
608
    -1=>"cannot specify signal action for profiling",
609
    -2=>"cannot create the timer for profiling",
610
    -3=>"cannot start the timer for profiling",
611
    -4=>"cannot unblock SIGUSR1")
612

613

614
"""
615
    fetch(;include_meta = true) -> data
616

617
Return a copy of the buffer of profile backtraces. Note that the
618
values in `data` have meaning only on this machine in the current session, because it
619
depends on the exact memory addresses used in JIT-compiling. This function is primarily for
620
internal use; [`retrieve`](@ref) may be a better choice for most users.
621
By default metadata such as threadid and taskid is included. Set `include_meta` to `false` to strip metadata.
622
"""
623
function fetch(;include_meta = true, limitwarn = true)
142✔
624
    maxlen = maxlen_data()
71✔
625
    if maxlen == 0
71✔
626
        error("The profiling data buffer is not initialized. A profile has not been requested this session.")
1✔
627
    end
628
    len = len_data()
70✔
629
    if limitwarn && is_buffer_full()
70✔
630
        @warn """The profile data buffer is full; profiling probably terminated
1✔
631
                 before your program finished. To profile for longer runs, call
632
                 `Profile.init()` with a larger buffer and/or larger delay."""
633
    end
634
    data = Vector{UInt}(undef, len)
70✔
635
    GC.@preserve data unsafe_copyto!(pointer(data), get_data_pointer(), len)
70✔
636
    if include_meta || isempty(data)
71✔
637
        return data
69✔
638
    end
639
    return strip_meta(data)
1✔
640
end
641

642
function strip_meta(data)
63✔
643
    nblocks = count(Base.Fix1(is_block_end, data), eachindex(data))
2,081,723✔
644
    data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
63✔
645
    j = length(data_stripped)
63✔
646
    i = length(data)
63✔
647
    while i > 0 && j > 0
1,801,511✔
648
        data_stripped[j] = data[i]
1,801,448✔
649
        if is_block_end(data, i)
3,602,821✔
650
            i -= (nmeta + 1) # metadata fields and the extra NULL IP
56,055✔
651
        end
652
        i -= 1
1,801,448✔
653
        j -= 1
1,801,448✔
654
    end
1,801,448✔
655
    @assert i == j == 0 "metadata stripping failed"
63✔
656
    return data_stripped
63✔
657
end
658

659
"""
660
    Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) -> data_with_meta
661

662
The converse of `Profile.fetch(;include_meta = false)`; this will add fake metadata, and can be used
663
for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not depend on the internal
664
details of the metadata format.
665
"""
666
function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
10✔
667
    threadid == 0 && error("Fake threadid cannot be 0")
5✔
668
    taskid == 0 && error("Fake taskid cannot be 0")
5✔
669
    !isempty(data) && has_meta(data) && error("input already has metadata")
5✔
670
    cpu_clock_cycle = UInt64(99)
4✔
671
    data_with_meta = similar(data, 0)
4✔
672
    for i = 1:length(data)
8✔
673
        val = data[i]
28,938✔
674
        if iszero(val)
28,938✔
675
            # (threadid, taskid, cpu_cycle_clock, thread_sleeping)
676
            push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
899✔
677
        else
678
            push!(data_with_meta, val)
28,039✔
679
        end
680
    end
57,872✔
681
    return data_with_meta
4✔
682
end
683

684
## Print as a flat list
685
# Counts the number of times each line appears, at any nesting level and at the topmost level
686
# Merging multiple equivalent entries and recursive calls
687
function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, C::Bool,
29✔
688
                    threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}) where {T}
689
    !isempty(data) && !has_meta(data) && error("Profile data is missing required metadata")
29✔
690
    lilist = StackFrame[]
29✔
691
    n = Int[]
29✔
692
    m = Int[]
29✔
693
    lilist_idx = Dict{T, Int}()
29✔
694
    recursive = Set{T}()
29✔
695
    first = true
19✔
696
    totalshots = 0
19✔
697
    startframe = length(data)
29✔
698
    skip = false
19✔
699
    nsleeping = 0
19✔
700
    for i in startframe:-1:1
57✔
701
        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
901,321✔
702
        ip = data[i]
780,761✔
703
        if is_block_end(data, i)
1,561,494✔
704
            # read metadata
705
            thread_sleeping = data[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
24,112✔
706
            # cpu_cycle_clock = data[i - META_OFFSET_CPUCYCLECLOCK]
707
            taskid = data[i - META_OFFSET_TASKID]
24,112✔
708
            threadid = data[i - META_OFFSET_THREADID]
24,112✔
709
            if !in(threadid, threads) || !in(taskid, tasks)
48,224✔
710
                skip = true
×
711
                continue
×
712
            end
713
            if thread_sleeping == 1
24,112✔
714
                nsleeping += 1
×
715
            end
716
            skip = false
24,112✔
717
            totalshots += 1
24,112✔
718
            empty!(recursive)
24,112✔
719
            first = true
15,182✔
720
            startframe = i
24,112✔
721
        elseif !skip
756,649✔
722
            frames = lidict[ip]
756,649✔
723
            nframes = (frames isa Vector ? length(frames) : 1)
756,649✔
724
            for j = 1:nframes
1,513,298✔
725
                frame = (frames isa Vector ? frames[j] : frames)
1,209,277✔
726
                !C && frame.from_c && continue
1,209,277✔
727
                key = (T === UInt64 ? ip : frame)
312,644✔
728
                idx = get!(lilist_idx, key, length(lilist) + 1)
480,054✔
729
                if idx > length(lilist)
480,054✔
730
                    push!(recursive, key)
820✔
731
                    push!(lilist, frame)
820✔
732
                    push!(n, 1)
820✔
733
                    push!(m, 0)
820✔
734
                elseif !(key in recursive)
934,934✔
735
                    push!(recursive, key)
467,038✔
736
                    n[idx] += 1
467,038✔
737
                end
738
                if first
480,054✔
739
                    m[idx] += 1
23,565✔
740
                    first = false
14,845✔
741
                end
742
            end
1,209,277✔
743
        end
744
    end
1,802,614✔
745
    @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
29✔
746
    return (lilist, n, m, totalshots, nsleeping)
29✔
747
end
748

749
function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
29✔
750
                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
751
    lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
58✔
752
    util_perc = (1 - (nsleeping / totalshots)) * 100
29✔
753
    if isempty(lilist)
29✔
754
        if is_subsection
2✔
755
            Base.print(io, "Total snapshots: ")
×
756
            printstyled(io, "$(totalshots)", color=Base.warn_color())
×
757
            Base.println(io, " (", round(Int, util_perc), "% utilization)")
×
758
        else
759
            warning_empty()
2✔
760
        end
761
        return true
2✔
762
    end
763
    if false # optional: drop the "non-interpretable" ones
27✔
764
        keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
×
765
        lilist = lilist[keep]
×
766
        n = n[keep]
×
767
        m = m[keep]
×
768
    end
769
    filenamemap = Dict{Symbol,String}()
27✔
770
    print_flat(io, lilist, n, m, cols, filenamemap, fmt)
27✔
771
    Base.print(io, "Total snapshots: ", totalshots, " (", round(Int, util_perc), "% utilization")
27✔
772
    if is_subsection
27✔
773
        println(io, ")")
20✔
774
    else
775
        println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)")
7✔
776
    end
777
    return false
27✔
778
end
779

780
function print_flat(io::IO, lilist::Vector{StackFrame},
27✔
781
        n::Vector{Int}, m::Vector{Int},
782
        cols::Int, filenamemap::Dict{Symbol,String},
783
        fmt::ProfileFormat)
784
    if fmt.sortedby === :count
27✔
785
        p = sortperm(n)
1✔
786
    elseif fmt.sortedby === :overhead
26✔
787
        p = sortperm(m)
×
788
    else
789
        p = liperm(lilist)
26✔
790
    end
791
    lilist = lilist[p]
27✔
792
    n = n[p]
27✔
793
    m = m[p]
27✔
794
    filenames = String[short_path(li.file, filenamemap) for li in lilist]
54✔
795
    funcnames = String[string(li.func) for li in lilist]
54✔
796
    wcounts = max(6, ndigits(maximum(n)))
54✔
797
    wself = max(9, ndigits(maximum(m)))
54✔
798
    maxline = 1
27✔
799
    maxfile = 6
27✔
800
    maxfunc = 10
27✔
801
    for i in 1:length(lilist)
54✔
802
        li = lilist[i]
820✔
803
        maxline = max(maxline, li.line)
820✔
804
        maxfunc = max(maxfunc, length(funcnames[i]))
820✔
805
        maxfile = max(maxfile, length(filenames[i]))
820✔
806
    end
1,613✔
807
    wline = max(5, ndigits(maxline))
27✔
808
    ntext = max(20, cols - wcounts - wself - wline - 3)
27✔
809
    maxfunc += 25 # for type signatures
27✔
810
    if maxfile + maxfunc <= ntext
27✔
811
        wfile = maxfile
×
812
        wfunc = ntext - maxfunc # take the full width (for type sig)
×
813
    else
814
        wfile = 2*ntext÷5
27✔
815
        wfunc = 3*ntext÷5
27✔
816
    end
817
    println(io, lpad("Count", wcounts, " "), " ", lpad("Overhead", wself, " "), " ",
27✔
818
            rpad("File", wfile, " "), " ", lpad("Line", wline, " "), " Function")
819
    println(io, lpad("=====", wcounts, " "), " ", lpad("========", wself, " "), " ",
27✔
820
            rpad("====", wfile, " "), " ", lpad("====", wline, " "), " ========")
821
    for i = 1:length(n)
54✔
822
        n[i] < fmt.mincount && continue
820✔
823
        li = lilist[i]
820✔
824
        Base.print(io, lpad(string(n[i]), wcounts, " "), " ")
820✔
825
        Base.print(io, lpad(string(m[i]), wself, " "), " ")
820✔
826
        if li == UNKNOWN
822✔
827
            if !fmt.combine && li.pointer != 0
1✔
828
                Base.print(io, "@0x", string(li.pointer, base=16))
×
829
            else
830
                Base.print(io, "[any unknown stackframes]")
2✔
831
            end
832
        else
833
            file = filenames[i]
819✔
834
            isempty(file) && (file = "[unknown file]")
819✔
835
            Base.print(io, rpad(rtruncto(file, wfile), wfile, " "), " ")
819✔
836
            Base.print(io, lpad(li.line > 0 ? string(li.line) : "?", wline, " "), " ")
819✔
837
            fname = funcnames[i]
819✔
838
            if !li.from_c && li.linfo !== nothing
819✔
839
                fname = sprint(show_spec_linfo, li)
243✔
840
            end
841
            isempty(fname) && (fname = "[unknown function]")
819✔
842
            Base.print(io, ltruncto(fname, wfunc))
819✔
843
        end
844
        println(io)
820✔
845
    end
1,613✔
846
    nothing
27✔
847
end
848

849
## A tree representation
850

851
# Representation of a prefix trie of backtrace counts
852
mutable struct StackFrameTree{T} # where T <: Union{UInt64, StackFrame}
853
    # content fields:
854
    frame::StackFrame
855
    count::Int          # number of frames this appeared in
856
    overhead::Int       # number frames where this was the code being executed
857
    flat_count::Int     # number of times this frame was in the flattened representation (unlike count, this'll sum to 100% of parent)
858
    max_recur::Int      # maximum number of times this frame was the *top* of the recursion in the stack
859
    count_recur::Int    # sum of the number of times this frame was the *top* of the recursion in a stack (divide by count to get an average)
860
    down::Dict{T, StackFrameTree{T}}
861
    # construction workers:
862
    recur::Int
863
    builder_key::Vector{UInt64}
864
    builder_value::Vector{StackFrameTree{T}}
865
    up::StackFrameTree{T}
866
    StackFrameTree{T}() where {T} = new(UNKNOWN, 0, 0, 0, 0, 0, Dict{T, StackFrameTree{T}}(), 0, UInt64[], StackFrameTree{T}[])
970✔
867
end
868

869

870
const indent_s = "    ╎"^10
871
const indent_z = collect(eachindex(indent_s))
872
function indent(depth::Int)
866✔
873
    depth < 1 && return ""
866✔
874
    depth <= length(indent_z) && return indent_s[1:indent_z[depth]]
829✔
875
    div, rem = divrem(depth, length(indent_z))
×
876
    indent = indent_s^div
×
877
    rem != 0 && (indent *= SubString(indent_s, 1, indent_z[rem]))
×
878
    return indent
×
879
end
880

881
function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::Dict{Symbol,String}, showpointer::Bool)
715✔
882
    nindent = min(cols>>1, level)
715✔
883
    ndigoverhead = ndigits(maxes.overhead)
715✔
884
    ndigcounts = ndigits(maxes.count)
715✔
885
    ndigline = ndigits(maximum(frame.frame.line for frame in frames)) + 6
1,430✔
886
    ntext = max(30, cols - ndigoverhead - nindent - ndigcounts - ndigline - 6)
715✔
887
    widthfile = 2*ntext÷5 # min 12
715✔
888
    widthfunc = 3*ntext÷5 # min 18
715✔
889
    strs = Vector{String}(undef, length(frames))
715✔
890
    showextra = false
×
891
    if level > nindent
715✔
892
        nextra = level - nindent
72✔
893
        nindent -= ndigits(nextra) + 2
72✔
894
        showextra = true
×
895
    end
896
    for i = 1:length(frames)
1,430✔
897
        frame = frames[i]
903✔
898
        li = frame.frame
903✔
899
        stroverhead = lpad(frame.overhead > 0 ? string(frame.overhead) : "", ndigoverhead, " ")
903✔
900
        base = nindent == 0 ? "" : indent(nindent - 1) * " "
1,769✔
901
        if showextra
903✔
902
            base = string(base, "+", nextra, " ")
91✔
903
        end
904
        strcount = rpad(string(frame.count), ndigcounts, " ")
903✔
905
        if li != UNKNOWN
906✔
906
            if li.line == li.pointer
897✔
907
                strs[i] = string(stroverhead, "╎", base, strcount, " ",
×
908
                    "[unknown function] (pointer: 0x",
909
                    string(li.pointer, base = 16, pad = 2*sizeof(Ptr{Cvoid})),
910
                    ")")
911
            else
912
                if !li.from_c && li.linfo !== nothing
897✔
913
                    fname = sprint(show_spec_linfo, li)
273✔
914
                else
915
                    fname = string(li.func)
624✔
916
                end
917
                filename = short_path(li.file, filenamemap)
897✔
918
                if showpointer
897✔
919
                    fname = string(
×
920
                        "0x",
921
                        string(li.pointer, base = 16, pad = 2*sizeof(Ptr{Cvoid})),
922
                        " ",
923
                        fname)
924
                end
925
                strs[i] = string(stroverhead, "╎", base, strcount, " ",
1,794✔
926
                    rtruncto(filename, widthfile),
927
                    ":",
928
                    li.line == -1 ? "?" : string(li.line),
929
                    "; ",
930
                    ltruncto(fname, widthfunc))
931
            end
932
        else
933
            strs[i] = string(stroverhead, "╎", base, strcount, " [unknown stackframe]")
6✔
934
        end
935
    end
1,091✔
936
    return strs
715✔
937
end
938

939
# turn a list of backtraces into a tree (implicitly separated by NULL markers)
940
function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, C::Bool, recur::Symbol,
36✔
941
                threads::Union{Int,AbstractVector{Int},Nothing}=nothing, tasks::Union{UInt,AbstractVector{UInt},Nothing}=nothing) where {T}
942
    !isempty(all) && !has_meta(all) && error("Profile data is missing required metadata")
36✔
943
    parent = root
22✔
944
    tops = Vector{StackFrameTree{T}}()
35✔
945
    build = Vector{StackFrameTree{T}}()
35✔
946
    startframe = length(all)
35✔
947
    skip = false
22✔
948
    nsleeping = 0
22✔
949
    for i in startframe:-1:1
69✔
950
        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP
1,036,576✔
951
        ip = all[i]
899,001✔
952
        if is_block_end(all, i)
1,797,956✔
953
            # read metadata
954
            thread_sleeping = all[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
28,210✔
955
            # cpu_cycle_clock = all[i - META_OFFSET_CPUCYCLECLOCK]
956
            taskid = all[i - META_OFFSET_TASKID]
28,210✔
957
            threadid = all[i - META_OFFSET_THREADID]
28,210✔
958
            if (threads !== nothing && !in(threadid, threads)) ||
56,416✔
959
               (tasks !== nothing && !in(taskid, tasks))
960
                skip = true
×
961
                continue
696✔
962
            end
963
            if thread_sleeping == 1
27,514✔
964
                nsleeping += 1
1,318✔
965
            end
966
            skip = false
27,514✔
967
            # sentinel value indicates the start of a new backtrace
968
            empty!(build)
27,514✔
969
            root.recur = 0
27,514✔
970
            if recur !== :off
27,514✔
971
                # We mark all visited nodes to so we'll only count those branches
972
                # once for each backtrace. Reset that now for the next backtrace.
973
                push!(tops, parent)
893✔
974
                for top in tops
893✔
975
                    while top.recur != 0
17,634✔
976
                        top.max_recur < top.recur && (top.max_recur = top.recur)
16,741✔
977
                        top.recur = 0
16,741✔
978
                        top = top.up
16,741✔
979
                    end
16,741✔
980
                end
1,786✔
981
                empty!(tops)
893✔
982
            end
983
            let this = parent
16,972✔
984
                while this !== root
552,684✔
985
                    this.flat_count += 1
525,170✔
986
                    this = this.up
525,170✔
987
                end
525,170✔
988
            end
989
            parent.overhead += 1
27,514✔
990
            parent = root
16,972✔
991
            root.count += 1
27,514✔
992
            startframe = i
27,514✔
993
        elseif !skip
870,791✔
994
            if recur === :flat || recur === :flatc
1,663,910✔
995
                pushfirst!(build, parent)
28,024✔
996
                # Rewind the `parent` tree back, if this exact ip was already present *higher* in the current tree
997
                found = false
28,024✔
998
                for j in 1:(startframe - i)
56,048✔
999
                    if ip == all[i + j]
554,142✔
1000
                        if recur === :flat # if not flattening C frames, check that now
3,482✔
1001
                            frames = lidict[ip]
3,482✔
1002
                            frame = (frames isa Vector ? frames[1] : frames)
3,482✔
1003
                            frame.from_c && break # not flattening this frame
3,482✔
1004
                        end
1005
                        push!(tops, parent)
×
1006
                        parent = build[j]
×
1007
                        parent.recur += 1
×
1008
                        parent.count_recur += 1
×
1009
                        found = true
×
1010
                        break
×
1011
                    end
1012
                end
550,660✔
1013
                found && continue
28,024✔
1014
            end
1015
            builder_key = parent.builder_key
845,967✔
1016
            builder_value = parent.builder_value
845,967✔
1017
            fastkey = searchsortedfirst(builder_key, ip)
845,967✔
1018
            if fastkey < length(builder_key) && builder_key[fastkey] === ip
845,967✔
1019
                # jump forward to the end of the inlining chain
1020
                # avoiding an extra (slow) lookup of `ip` in `lidict`
1021
                # and an extra chain of them in `down`
1022
                # note that we may even have this === parent (if we're ignoring this frame ip)
1023
                this = builder_value[fastkey]
839,428✔
1024
                let this = this
529,016✔
1025
                    while this !== parent && (recur === :off || this.recur == 0)
1,378,119✔
1026
                        this.count += 1
522,048✔
1027
                        this.recur = 1
522,048✔
1028
                        this = this.up
522,048✔
1029
                    end
522,048✔
1030
                end
1031
                parent = this
529,016✔
1032
                continue
839,428✔
1033
            end
1034

1035
            frames = lidict[ip]
6,539✔
1036
            nframes = (frames isa Vector ? length(frames) : 1)
6,539✔
1037
            this = parent
3,454✔
1038
            # add all the inlining frames
1039
            for i = nframes:-1:1
13,068✔
1040
                frame = (frames isa Vector ? frames[i] : frames)
8,964✔
1041
                !C && frame.from_c && continue
8,964✔
1042
                key = (T === UInt64 ? ip : frame)
2,112✔
1043
                this = get!(StackFrameTree{T}, parent.down, key)
3,138✔
1044
                if recur === :off || this.recur == 0
3,236✔
1045
                    this.frame = frame
3,138✔
1046
                    this.up = parent
3,138✔
1047
                    this.count += 1
3,138✔
1048
                    this.recur = 1
3,138✔
1049
                end
1050
                parent = this
2,112✔
1051
            end
11,389✔
1052
            # record where the end of this chain is for this ip
1053
            insert!(builder_key, fastkey, ip)
6,539✔
1054
            insert!(builder_value, fastkey, this)
6,539✔
1055
        end
1056
    end
2,073,118✔
1057
    function cleanup!(node::StackFrameTree)
70✔
1058
        stack = [node]
35✔
1059
        while !isempty(stack)
1,005✔
1060
            node = pop!(stack)
970✔
1061
            node.recur = 0
970✔
1062
            empty!(node.builder_key)
970✔
1063
            empty!(node.builder_value)
970✔
1064
            append!(stack, values(node.down))
970✔
1065
        end
970✔
1066
        nothing
35✔
1067
    end
1068
    cleanup!(root)
35✔
1069
    return root, nsleeping
35✔
1070
end
1071

1072
function maxstats(root::StackFrameTree)
34✔
1073
    maxcount = Ref(0)
×
1074
    maxflatcount = Ref(0)
×
1075
    maxoverhead = Ref(0)
×
1076
    maxmaxrecur = Ref(0)
×
1077
    stack = [root]
34✔
1078
    while !isempty(stack)
995✔
1079
        node = pop!(stack)
961✔
1080
        maxcount[] = max(maxcount[], node.count)
961✔
1081
        maxoverhead[] = max(maxoverhead[], node.overhead)
961✔
1082
        maxflatcount[] = max(maxflatcount[], node.flat_count)
961✔
1083
        maxmaxrecur[] = max(maxmaxrecur[], node.max_recur)
961✔
1084
        append!(stack, values(node.down))
961✔
1085
    end
961✔
1086
    return (count=maxcount[], count_flat=maxflatcount[], overhead=maxoverhead[], max_recur=maxmaxrecur[])
34✔
1087
end
1088

1089
# Print the stack frame tree starting at a particular root. Uses a worklist to
1090
# avoid stack overflows.
1091
function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat, is_subsection::Bool) where T
34✔
1092
    maxes = maxstats(bt)
34✔
1093
    filenamemap = Dict{Symbol,String}()
34✔
1094
    worklist = [(bt, 0, 0, "")]
34✔
1095
    if !is_subsection
34✔
1096
        println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
11✔
1097
        println(io, "=========================================================")
11✔
1098
    end
1099
    while !isempty(worklist)
971✔
1100
        (bt, level, noisefloor, str) = popfirst!(worklist)
937✔
1101
        isempty(str) || println(io, str)
1,840✔
1102
        level > fmt.maxdepth && continue
937✔
1103
        isempty(bt.down) && continue
936✔
1104
        # Order the line information
1105
        nexts = collect(values(bt.down))
715✔
1106
        # Generate the string for each line
1107
        strs = tree_format(nexts, level, cols, maxes, filenamemap, T === UInt64)
715✔
1108
        # Recurse to the next level
1109
        if fmt.sortedby === :count
715✔
1110
            counts = collect(frame.count for frame in nexts)
×
1111
            p = sortperm(counts)
×
1112
        elseif fmt.sortedby === :overhead
715✔
1113
            m = collect(frame.overhead for frame in nexts)
×
1114
            p = sortperm(m)
×
1115
        elseif fmt.sortedby === :flat_count
715✔
1116
            m = collect(frame.flat_count for frame in nexts)
×
1117
            p = sortperm(m)
×
1118
        else
1119
            lilist = collect(frame.frame for frame in nexts)
715✔
1120
            p = liperm(lilist)
715✔
1121
        end
1122
        for i in reverse(p)
715✔
1123
            down = nexts[i]
903✔
1124
            count = down.count
903✔
1125
            count < fmt.mincount && continue
903✔
1126
            count < noisefloor && continue
903✔
1127
            str = strs[i]
903✔
1128
            noisefloor_down = fmt.noisefloor > 0 ? floor(Int, fmt.noisefloor * sqrt(count)) : 0
903✔
1129
            pushfirst!(worklist, (down, level + 1, noisefloor_down, str))
903✔
1130
        end
903✔
1131
    end
937✔
1132
    return
34✔
1133
end
1134

1135
function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat,
34✔
1136
                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
1137
    if fmt.combine
34✔
1138
        root, nsleeping = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
34✔
1139
    else
1140
        root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
×
1141
    end
1142
    util_perc = (1 - (nsleeping / root.count)) * 100
68✔
1143
    !is_subsection && print_tree(io, root, cols, fmt, is_subsection)
45✔
1144
    if isempty(root.down)
68✔
1145
        if is_subsection
2✔
1146
            Base.print(io, "Total snapshots: ")
×
1147
            printstyled(io, "$(root.count)", color=Base.warn_color())
×
1148
            Base.println(io, ". Utilization: ", round(Int, util_perc), "%")
×
1149
        else
1150
            warning_empty()
2✔
1151
        end
1152
        return true
2✔
1153
    else
1154
        Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
32✔
1155
    end
1156
    if is_subsection
32✔
1157
        println(io)
23✔
1158
        print_tree(io, root, cols, fmt, is_subsection)
46✔
1159
    else
1160
        println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task")
9✔
1161
    end
1162
    return false
32✔
1163
end
1164

1165
function callersf(matchfunc::Function, bt::Vector, lidict::LineInfoFlatDict)
2✔
1166
    counts = Dict{StackFrame, Int}()
2✔
1167
    lastmatched = false
2✔
1168
    for id in bt
4✔
1169
        if id == 0
×
1170
            lastmatched = false
×
1171
            continue
×
1172
        end
1173
        li = lidict[id]
×
1174
        if lastmatched
×
1175
            if haskey(counts, li)
×
1176
                counts[li] += 1
×
1177
            else
1178
                counts[li] = 1
×
1179
            end
1180
        end
1181
        lastmatched = matchfunc(li)
×
1182
    end
×
1183
    k = collect(keys(counts))
2✔
1184
    v = collect(values(counts))
2✔
1185
    p = sortperm(v, rev=true)
2✔
1186
    return [(v[i], k[i]) for i in p]
2✔
1187
end
1188

1189
# Utilities
1190
function rtruncto(str::String, w::Int)
1,716✔
1191
    if length(str) <= w
1,716✔
1192
        return str
1,000✔
1193
    else
1194
        return string("...", str[prevind(str, end, w-4):end])
716✔
1195
    end
1196
end
1197
function ltruncto(str::String, w::Int)
1,716✔
1198
    if length(str) <= w
1,716✔
1199
        return str
1,152✔
1200
    else
1201
        return string(str[1:nextind(str, 1, w-4)], "...")
564✔
1202
    end
1203
end
1204

1205

1206
truncto(str::Symbol, w::Int) = truncto(string(str), w)
1207

1208
# Order alphabetically (file, function) and then by line number
1209
function liperm(lilist::Vector{StackFrame})
716✔
1210
    function lt(a::StackFrame, b::StackFrame)
11,152✔
1211
        a == UNKNOWN && return false
11,201✔
1212
        b == UNKNOWN && return true
11,179✔
1213
        fcmp = cmp(a.file, b.file)
11,110✔
1214
        fcmp < 0 && return true
11,110✔
1215
        fcmp > 0 && return false
6,560✔
1216
        fcmp = cmp(a.func, b.func)
2,010✔
1217
        fcmp < 0 && return true
2,010✔
1218
        fcmp > 0 && return false
1,203✔
1219
        fcmp = cmp(a.line, b.line)
396✔
1220
        fcmp < 0 && return true
396✔
1221
        return false
224✔
1222
    end
1223
    return sortperm(lilist, lt = lt)
741✔
1224
end
1225

1226
function warning_empty(;summary = false)
4✔
1227
    if summary
×
1228
        @warn """
×
1229
        There were no samples collected in one or more groups.
1230
        This may be due to idle threads, or you may need to run your
1231
        program longer (perhaps by running it multiple times),
1232
        or adjust the delay between samples with `Profile.init()`."""
1233
    else
1234
        @warn """
4✔
1235
        There were no samples collected.
1236
        Run your program longer (perhaps by running it multiple times),
1237
        or adjust the delay between samples with `Profile.init()`."""
1238
    end
1239
end
1240

1241

1242
"""
1243
    Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false)
1244
    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false)
1245
    Profile.take_heap_snapshot(all_one::Bool=false)
1246

1247
Write a snapshot of the heap, in the JSON format expected by the Chrome
1248
Devtools Heap Snapshot viewer (.heapsnapshot extension), to a file
1249
(`\$pid_\$timestamp.heapsnapshot`) in the current directory, or the given
1250
file path, or IO stream. If `all_one` is true, then report the size of
1251
every object as one so they can be easily counted. Otherwise, report the
1252
actual size.
1253
"""
1254
function take_heap_snapshot(io::IOStream, all_one::Bool=false)
1✔
1255
    Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one)))
1✔
1256
end
1257
function take_heap_snapshot(filepath::String, all_one::Bool=false)
×
1258
    open(filepath, "w") do io
1✔
1259
        take_heap_snapshot(io, all_one)
1✔
1260
    end
1261
    return filepath
×
1262
end
1263
function take_heap_snapshot(all_one::Bool=false)
1✔
1264
    f = abspath("$(getpid())_$(time_ns()).heapsnapshot")
2✔
1265
    return take_heap_snapshot(f, all_one)
1✔
1266
end
1267

1268

1269
include("Allocs.jl")
1270

1271
end # module
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc