• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37552

pending completion
#37552

push

local

web-flow
Abbreviate varinfo signature and re-order for consistency (#48860)

1 of 1 new or added line in 1 file covered. (100.0%)

72746 of 83846 relevant lines covered (86.76%)

34617131.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

3.31
/stdlib/Profile/src/Profile.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
"""
4
Profiling support, main entry point is the [`@profile`](@ref) macro.
5
"""
6
module Profile
4✔
7

8
import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
9

10
const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
11

12
# deprecated functions: use `getdict` instead
13
lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))
×
14

15
export @profile
16

17
"""
18
    @profile
19

20
`@profile <expression>` runs your expression while taking periodic backtraces. These are
21
appended to an internal buffer of backtraces.
22
"""
23
macro profile(ex)
3✔
24
    return quote
3✔
25
        try
26
            start_timer()
27
            $(esc(ex))
28
        finally
29
            stop_timer()
30
        end
31
    end
32
end
33

34
# An internal function called to show the report after an information request (SIGINFO or SIGUSR1).
35
function _peek_report()
×
36
    iob = IOBuffer()
×
37
    ioc = IOContext(IOContext(iob, stderr), :displaysize=>displaysize(stderr))
×
38
    print(ioc, groupby = [:thread, :task])
×
39
    Base.print(stderr, String(take!(iob)))
×
40
end
41
# This is a ref so that it can be overridden by other profile info consumers.
42
const peek_report = Ref{Function}(_peek_report)
43

44
"""
45
    get_peek_duration()
46

47
Get the duration in seconds of the profile "peek" that is triggered via `SIGINFO` or `SIGUSR1`, depending on platform.
48
"""
49
get_peek_duration() = ccall(:jl_get_profile_peek_duration, Float64, ())
×
50
"""
51
    set_peek_duration(t::Float64)
52

53
Set the duration in seconds of the profile "peek" that is triggered via `SIGINFO` or `SIGUSR1`, depending on platform.
54
"""
55
set_peek_duration(t::Float64) = ccall(:jl_set_profile_peek_duration, Cvoid, (Float64,), t)
×
56

57

58

59
####
60
#### User-level functions
61
####
62

63
"""
64
    init(; n::Integer, delay::Real)
65

66
Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be
67
stored per thread. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long
68
list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
69
NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
70
using keywords or in the order `(n, delay)`.
71
"""
72
function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing, limitwarn::Bool = true)
×
73
    n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
×
74
    if n_cur == 0 && isnothing(n) && isnothing(delay)
×
75
        # indicates that the buffer hasn't been initialized at all, so set the default
76
        default_init()
×
77
        n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
×
78
    end
79
    delay_cur = ccall(:jl_profile_delay_nsec, UInt64, ())/10^9
×
80
    if n === nothing && delay === nothing
×
81
        return n_cur, delay_cur
×
82
    end
83
    nnew = (n === nothing) ? n_cur : n
×
84
    delaynew = (delay === nothing) ? delay_cur : delay
×
85
    init(nnew, delaynew; limitwarn)
×
86
end
87

88
function init(n::Integer, delay::Real; limitwarn::Bool = true)
6✔
89
    sample_size_bytes = sizeof(Ptr) # == Sys.WORD_SIZE / 8
3✔
90
    buffer_samples = n
3✔
91
    buffer_size_bytes = buffer_samples * sample_size_bytes
3✔
92
    if buffer_size_bytes > 2^29 && Sys.WORD_SIZE == 32
3✔
93
        buffer_samples = floor(Int, 2^29 / sample_size_bytes)
×
94
        buffer_size_bytes = buffer_samples * sample_size_bytes
×
95
        limitwarn && @warn "Requested profile buffer limited to 512MB (n = $buffer_samples) given that this system is 32-bit"
×
96
    end
97
    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), buffer_samples, round(UInt64, 10^9*delay))
3✔
98
    if status == -1
3✔
99
        error("could not allocate space for ", n, " instruction pointers ($(Base.format_bytes(buffer_size_bytes)))")
×
100
    end
101
end
102

103
function default_init()
3✔
104
    # init with default values
105
    # Use a max size of 10M profile samples, and fire timer every 1ms
106
    # (that should typically give around 100 seconds of record)
107
    @static if Sys.iswindows() && Sys.WORD_SIZE == 32
×
108
        # The Win32 unwinder is 1000x slower than elsewhere (around 1ms/frame),
109
        # so we don't want to slow the program down by quite that much
110
        n = 1_000_000
111
        delay = 0.01
112
    else
113
        # Keep these values synchronized with trigger_profile_peek
114
        n = 10_000_000
3✔
115
        delay = 0.001
3✔
116
    end
117
    init(n, delay, limitwarn = false)
3✔
118
end
119

120
# Checks whether the profile buffer has been initialized. If not, initializes it with the default size.
121
function check_init()
3✔
122
    buffer_size = @ccall jl_profile_maxlen_data()::Int
3✔
123
    if buffer_size == 0
3✔
124
        default_init()
3✔
125
    end
126
end
127

128
"""
129
    clear()
130

131
Clear any existing backtraces from the internal buffer.
132
"""
133
clear() = ccall(:jl_profile_clear_data, Cvoid, ())
×
134

135
const LineInfoDict = Dict{UInt64, Vector{StackFrame}}
136
const LineInfoFlatDict = Dict{UInt64, StackFrame}
137

138
struct ProfileFormat
139
    maxdepth::Int
140
    mincount::Int
141
    noisefloor::Float64
142
    sortedby::Symbol
143
    combine::Bool
144
    C::Bool
145
    recur::Symbol
146
    function ProfileFormat(;
×
147
        C = false,
148
        combine = true,
149
        maxdepth::Int = typemax(Int),
150
        mincount::Int = 0,
151
        noisefloor = 0,
152
        sortedby::Symbol = :filefuncline,
153
        recur::Symbol = :off)
154
        return new(maxdepth, mincount, noisefloor, sortedby, combine, C, recur)
×
155
    end
156
end
157

158
# offsets of the metadata in the data stream
159
const META_OFFSET_SLEEPSTATE = 2
160
const META_OFFSET_CPUCYCLECLOCK = 3
161
const META_OFFSET_TASKID = 4
162
const META_OFFSET_THREADID = 5
163

164
"""
165
    print([io::IO = stdout,] [data::Vector = fetch()], [lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)]; kwargs...)
166

167
Prints profiling results to `io` (by default, `stdout`). If you do not
168
supply a `data` vector, the internal buffer of accumulated backtraces
169
will be used.
170

171
The keyword arguments can be any combination of:
172

173
 - `format` -- Determines whether backtraces are printed with (default, `:tree`) or without (`:flat`)
174
   indentation indicating tree structure.
175

176
 - `C` -- If `true`, backtraces from C and Fortran code are shown (normally they are excluded).
177

178
 - `combine` -- If `true` (default), instruction pointers are merged that correspond to the same line of code.
179

180
 - `maxdepth` -- Limits the depth higher than `maxdepth` in the `:tree` format.
181

182
 - `sortedby` -- Controls the order in `:flat` format. `:filefuncline` (default) sorts by the source
183
    line, `:count` sorts in order of number of collected samples, and `:overhead` sorts by the number of samples
184
    incurred by each function by itself.
185

186
 - `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:thread`, `:task`,
187
    `[:thread, :task]`, or `[:task, :thread]` where the last two provide nested grouping.
188

189
 - `noisefloor` -- Limits frames that exceed the heuristic noise floor of the sample (only applies to format `:tree`).
190
    A suggested value to try for this is 2.0 (the default is 0). This parameter hides samples for which `n <= noisefloor * √N`,
191
    where `n` is the number of samples on this line, and `N` is the number of samples for the callee.
192

193
 - `mincount` -- Limits the printout to only those lines with at least `mincount` occurrences.
194

195
 - `recur` -- Controls the recursion handling in `:tree` format. `:off` (default) prints the tree as normal. `:flat` instead
196
    compresses any recursion (by ip), showing the approximate effect of converting any self-recursion into an iterator.
197
    `:flatc` does the same but also includes collapsing of C frames (may do odd things around `jl_apply`).
198

199
 - `threads::Union{Int,AbstractVector{Int}}` -- Specify which threads to include snapshots from in the report. Note that
200
    this does not control which threads samples are collected on (which may also have been collected on another machine).
201

202
 - `tasks::Union{Int,AbstractVector{Int}}` -- Specify which tasks to include snapshots from in the report. Note that this
203
    does not control which tasks samples are collected within.
204
"""
205
function print(io::IO,
×
206
        data::Vector{<:Unsigned} = fetch(),
207
        lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)
208
        ;
209
        format = :tree,
210
        C = false,
211
        combine = true,
212
        maxdepth::Int = typemax(Int),
213
        mincount::Int = 0,
214
        noisefloor = 0,
215
        sortedby::Symbol = :filefuncline,
216
        groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
217
        recur::Symbol = :off,
218
        threads::Union{Int,AbstractVector{Int}} = 1:typemax(Int),
219
        tasks::Union{UInt,AbstractVector{UInt}} = typemin(UInt):typemax(UInt))
220

221
    pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
×
222
    if groupby === :none
×
223
        print(io, data, lidict, pf, format, threads, tasks, false)
×
224
    else
225
        if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
×
226
            error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
×
227
        elseif Sys.iswindows() && in(groupby, [:thread, [:task, :thread], [:thread, :task]])
×
228
            @warn "Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report"
×
229
        end
230
        any_nosamples = true
×
231
        if format === :tree
×
232
            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
×
233
            Base.print(io, "=========================================================\n")
×
234
        end
235
        if groupby == [:task, :thread]
×
236
            taskids = intersect(get_task_ids(data), tasks)
×
237
            isempty(taskids) && (any_nosamples = true)
×
238
            for taskid in taskids
×
239
                threadids = intersect(get_thread_ids(data, taskid), threads)
×
240
                if length(threadids) == 0
×
241
                    any_nosamples = true
×
242
                else
243
                    nl = length(threadids) > 1 ? "\n" : ""
×
244
                    printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
×
245
                    for threadid in threadids
×
246
                        printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
×
247
                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
×
248
                        nosamples && (any_nosamples = true)
×
249
                        println(io)
×
250
                    end
×
251
                end
252
            end
×
253
        elseif groupby == [:thread, :task]
×
254
            threadids = intersect(get_thread_ids(data), threads)
×
255
            isempty(threadids) && (any_nosamples = true)
×
256
            for threadid in threadids
×
257
                taskids = intersect(get_task_ids(data, threadid), tasks)
×
258
                if length(taskids) == 0
×
259
                    any_nosamples = true
×
260
                else
261
                    nl = length(taskids) > 1 ? "\n" : ""
×
262
                    printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
×
263
                    for taskid in taskids
×
264
                        printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
×
265
                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
×
266
                        nosamples && (any_nosamples = true)
×
267
                        println(io)
×
268
                    end
×
269
                end
270
            end
×
271
        elseif groupby === :task
×
272
            threads = 1:typemax(Int)
×
273
            taskids = intersect(get_task_ids(data), tasks)
×
274
            isempty(taskids) && (any_nosamples = true)
×
275
            for taskid in taskids
×
276
                printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
×
277
                nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
×
278
                nosamples && (any_nosamples = true)
×
279
                println(io)
×
280
            end
×
281
        elseif groupby === :thread
×
282
            tasks = 1:typemax(UInt)
×
283
            threadids = intersect(get_thread_ids(data), threads)
×
284
            isempty(threadids) && (any_nosamples = true)
×
285
            for threadid in threadids
×
286
                printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
×
287
                nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
×
288
                nosamples && (any_nosamples = true)
×
289
                println(io)
×
290
            end
×
291
        end
292
        any_nosamples && warning_empty(summary = true)
×
293
    end
294
    return
×
295
end
296

297
"""
298
    print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...)
299

300
Prints profiling results to `io`. This variant is used to examine results exported by a
301
previous call to [`retrieve`](@ref). Supply the vector `data` of backtraces and
302
a dictionary `lidict` of line information.
303

304
See `Profile.print([io], data)` for an explanation of the valid keyword arguments.
305
"""
306
print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
×
307
    print(stdout, data, lidict; kwargs...)
308

309
function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
×
310
                format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
311
                is_subsection::Bool = false)
312
    cols::Int = Base.displaysize(io)[2]
×
313
    data = convert(Vector{UInt64}, data)
×
314
    fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
×
315
    if format === :tree
×
316
        nosamples = tree(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
×
317
        return nosamples
×
318
    elseif format === :flat
×
319
        fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
×
320
        nosamples = flat(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
×
321
        return nosamples
×
322
    else
323
        throw(ArgumentError("output format $(repr(format)) not recognized"))
×
324
    end
325
end
326

327
function get_task_ids(data::Vector{<:Unsigned}, threadid = nothing)
×
328
    taskids = UInt[]
×
329
    for i in length(data):-1:1
×
330
        if is_block_end(data, i)
×
331
            if isnothing(threadid) || data[i - META_OFFSET_THREADID] == threadid
×
332
                taskid = data[i - META_OFFSET_TASKID]
×
333
                !in(taskid, taskids) && push!(taskids, taskid)
×
334
            end
335
        end
336
    end
×
337
    return taskids
×
338
end
339

340
function get_thread_ids(data::Vector{<:Unsigned}, taskid = nothing)
×
341
    threadids = Int[]
×
342
    for i in length(data):-1:1
×
343
        if is_block_end(data, i)
×
344
            if isnothing(taskid) || data[i - META_OFFSET_TASKID] == taskid
×
345
                threadid = data[i - META_OFFSET_THREADID]
×
346
                !in(threadid, threadids) && push!(threadids, threadid)
×
347
            end
348
        end
349
    end
×
350
    return sort(threadids)
×
351
end
352

353
function is_block_end(data, i)
×
354
    i < nmeta + 1 && return false
×
355
    # 32-bit linux has been seen to have rogue NULL ips, so we use two to
356
    # indicate block end, where the 2nd is the actual end index.
357
    # and we could have (though very unlikely):
358
    # 1:<stack><metadata><null><null><NULL><metadata><null><null>:end
359
    # and we want to ignore the triple NULL (which is an ip).
360
    return data[i] == 0 && data[i - 1] == 0 && data[i - META_OFFSET_SLEEPSTATE] != 0
×
361
end
362

363
function has_meta(data)
×
364
    for i in 6:length(data)
×
365
        data[i] == 0 || continue            # first block end null
×
366
        data[i - 1] == 0 || continue        # second block end null
×
367
        data[i - META_OFFSET_SLEEPSTATE] in 1:2 || continue
×
368
        data[i - META_OFFSET_CPUCYCLECLOCK] != 0 || continue
×
369
        data[i - META_OFFSET_TASKID] != 0 || continue
×
370
        data[i - META_OFFSET_THREADID] != 0 || continue
×
371
        return true
×
372
    end
×
373
    return false
×
374
end
375

376
"""
377
    retrieve(; kwargs...) -> data, lidict
378

379
"Exports" profiling results in a portable format, returning the set of all backtraces
380
(`data`) and a dictionary that maps the (session-specific) instruction pointers in `data` to
381
`LineInfo` values that store the file name, function name, and line number. This function
382
allows you to save profiling results for future analysis.
383
"""
384
function retrieve(; kwargs...)
×
385
    data = fetch(; kwargs...)
×
386
    return (data, getdict(data))
×
387
end
388

389
function getdict(data::Vector{UInt})
×
390
    dict = LineInfoDict()
×
391
    return getdict!(dict, data)
×
392
end
393

394
function getdict!(dict::LineInfoDict, data::Vector{UInt})
×
395
    # we don't want metadata here as we're just looking up ips
396
    unique_ips = unique(has_meta(data) ? strip_meta(data) : data)
×
397
    n_unique_ips = length(unique_ips)
×
398
    n_unique_ips == 0 && return dict
×
399
    iplookups = similar(unique_ips, Vector{StackFrame})
×
400
    sort!(unique_ips) # help each thread to get a disjoint set of libraries, as much if possible
×
401
    @sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.threadpoolsize(), RoundUp))
×
402
        Threads.@spawn begin
×
403
            for i in indexes_part
×
404
                iplookups[i] = _lookup_corrected(unique_ips[i])
×
405
            end
×
406
        end
407
    end
×
408
    for i in eachindex(unique_ips)
×
409
        dict[unique_ips[i]] = iplookups[i]
×
410
    end
×
411
    return dict
×
412
end
413

414
function _lookup_corrected(ip::UInt)
×
415
    st = lookup(convert(Ptr{Cvoid}, ip))
×
416
    # To correct line numbers for moving code, put it in the form expected by
417
    # Base.update_stackframes_callback[]
418
    stn = map(x->(x, 1), st)
×
419
    # Note: Base.update_stackframes_callback[] should be data-race free
420
    try Base.invokelatest(Base.update_stackframes_callback[], stn) catch end
×
421
    return map(first, stn)
×
422
end
423

424
"""
425
    flatten(btdata::Vector, lidict::LineInfoDict) -> (newdata::Vector{UInt64}, newdict::LineInfoFlatDict)
426

427
Produces "flattened" backtrace data. Individual instruction pointers
428
sometimes correspond to a multi-frame backtrace due to inlining; in
429
such cases, this function inserts fake instruction pointers for the
430
inlined calls, and returns a dictionary that is a 1-to-1 mapping
431
between instruction pointers and a single StackFrame.
432
"""
433
function flatten(data::Vector, lidict::LineInfoDict)
×
434
    # Makes fake instruction pointers, counting down from typemax(UInt)
435
    newip = typemax(UInt64) - 1
×
436
    taken = Set(keys(lidict))  # make sure we don't pick one that's already used
×
437
    newdict = Dict{UInt64,StackFrame}()
×
438
    newmap  = Dict{UInt64,Vector{UInt64}}()
×
439
    for (ip, trace) in lidict
×
440
        if length(trace) == 1
×
441
            newdict[ip] = trace[1]
×
442
        else
443
            newm = UInt64[]
×
444
            for sf in trace
×
445
                while newip ∈ taken && newip > 0
×
446
                    newip -= 1
×
447
                end
×
448
                newip == 0 && error("all possible instruction pointers used")
×
449
                push!(newm, newip)
×
450
                newdict[newip] = sf
×
451
                newip -= 1
×
452
            end
×
453
            newmap[ip] = newm
×
454
        end
455
    end
×
456
    newdata = UInt64[]
×
457
    for ip::UInt64 in data
×
458
        if haskey(newmap, ip)
×
459
            append!(newdata, newmap[ip])
×
460
        else
461
            push!(newdata, ip)
×
462
        end
463
    end
×
464
    return (newdata, newdict)
×
465
end
466

467
# Take a file-system path and try to form a concise representation of it
468
# based on the package ecosystem
469
function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
×
470
    return get!(filenamecache, spath) do
×
471
        path = string(spath)
×
472
        if isabspath(path)
×
473
            if ispath(path)
×
474
                # try to replace the file-system prefix with a short "@Module" one,
475
                # assuming that profile came from the current machine
476
                # (or at least has the same file-system layout)
477
                root = path
×
478
                while !isempty(root)
×
479
                    root, base = splitdir(root)
×
480
                    isempty(base) && break
×
481
                    @assert startswith(path, root)
×
482
                    for proj in Base.project_names
×
483
                        project_file = joinpath(root, proj)
×
484
                        if Base.isfile_casesensitive(project_file)
×
485
                            pkgid = Base.project_file_name_uuid(project_file, "")
×
486
                            isempty(pkgid.name) && return path # bad Project file
×
487
                            # return the joined the module name prefix and path suffix
488
                            path = path[nextind(path, sizeof(root)):end]
×
489
                            return string("@", pkgid.name, path)
×
490
                        end
491
                    end
×
492
                end
×
493
            end
494
            return path
×
495
        elseif isfile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
×
496
            # do the same mechanic for Base (or Core/Compiler) files as above,
497
            # but they start from a relative path
498
            return joinpath("@Base", normpath(path))
×
499
        else
500
            # for non-existent relative paths (such as "REPL[1]"), just consider simplifying them
501
            return normpath(path) # drop leading "./"
×
502
        end
503
    end
504
end
505

506
"""
507
    callers(funcname, [data, lidict], [filename=<filename>], [linerange=<start:stop>]) -> Vector{Tuple{count, lineinfo}}
508

509
Given a previous profiling run, determine who called a particular function. Supplying the
510
filename (and optionally, range of line numbers over which the function is defined) allows
511
you to disambiguate an overloaded method. The returned value is a vector containing a count
512
of the number of calls and line information about the caller. One can optionally supply
513
backtrace `data` obtained from [`retrieve`](@ref); otherwise, the current internal
514
profile buffer is used.
515
"""
516
function callers end
517

518
function callers(funcname::String, bt::Vector, lidict::LineInfoFlatDict; filename = nothing, linerange = nothing)
×
519
    if filename === nothing && linerange === nothing
×
520
        return callersf(li -> String(li.func) == funcname,
×
521
            bt, lidict)
522
    end
523
    filename === nothing && throw(ArgumentError("if supplying linerange, you must also supply the filename"))
×
524
    filename = String(filename)
×
525
    if linerange === nothing
×
526
        return callersf(li -> String(li.func) == funcname && String(li.file) == filename,
×
527
            bt, lidict)
528
    else
529
        return callersf(li -> String(li.func) == funcname && String(li.file) == filename && in(li.line, linerange),
×
530
            bt, lidict)
531
    end
532
end
533

534
callers(funcname::String, bt::Vector, lidict::LineInfoDict; kwargs...) =
×
535
    callers(funcname, flatten(bt, lidict)...; kwargs...)
536
callers(funcname::String; kwargs...) = callers(funcname, retrieve()...; kwargs...)
×
537
callers(func::Function, bt::Vector, lidict::LineInfoFlatDict; kwargs...) =
×
538
    callers(string(func), bt, lidict; kwargs...)
539
callers(func::Function; kwargs...) = callers(string(func), retrieve()...; kwargs...)
×
540

541
##
542
## For --track-allocation
543
##
544
# Reset the malloc log. Used to avoid counting memory allocated during
545
# compilation.
546

547
"""
548
    clear_malloc_data()
549

550
Clears any stored memory allocation data when running julia with `--track-allocation`.
551
Execute the command(s) you want to test (to force JIT-compilation), then call
552
[`clear_malloc_data`](@ref). Then execute your command(s) again, quit
553
Julia, and examine the resulting `*.mem` files.
554
"""
555
clear_malloc_data() = ccall(:jl_clear_malloc_data, Cvoid, ())
×
556

557
# C wrappers
558
function start_timer()
3✔
559
    check_init() # if the profile buffer hasn't been initialized, initialize with default size
3✔
560
    status = ccall(:jl_profile_start_timer, Cint, ())
3✔
561
    if status < 0
3✔
562
        error(error_codes[status])
×
563
    end
564
end
565

566

567
stop_timer() = ccall(:jl_profile_stop_timer, Cvoid, ())
3✔
568

569
is_running() = ccall(:jl_profile_is_running, Cint, ())!=0
×
570

571
is_buffer_full() = ccall(:jl_profile_is_buffer_full, Cint, ())!=0
×
572

573
get_data_pointer() = convert(Ptr{UInt}, ccall(:jl_profile_get_data, Ptr{UInt8}, ()))
×
574

575
len_data() = convert(Int, ccall(:jl_profile_len_data, Csize_t, ()))
×
576

577
maxlen_data() = convert(Int, ccall(:jl_profile_maxlen_data, Csize_t, ()))
×
578

579
error_codes = Dict(
580
    -1=>"cannot specify signal action for profiling",
581
    -2=>"cannot create the timer for profiling",
582
    -3=>"cannot start the timer for profiling",
583
    -4=>"cannot unblock SIGUSR1")
584

585

586
"""
587
    fetch(;include_meta = true) -> data
588

589
Return a copy of the buffer of profile backtraces. Note that the
590
values in `data` have meaning only on this machine in the current session, because it
591
depends on the exact memory addresses used in JIT-compiling. This function is primarily for
592
internal use; [`retrieve`](@ref) may be a better choice for most users.
593
By default metadata such as threadid and taskid is included. Set `include_meta` to `false` to strip metadata.
594
"""
595
function fetch(;include_meta = true, limitwarn = true)
×
596
    maxlen = maxlen_data()
×
597
    if maxlen == 0
×
598
        error("The profiling data buffer is not initialized. A profile has not been requested this session.")
×
599
    end
600
    len = len_data()
×
601
    if limitwarn && is_buffer_full()
×
602
        @warn """The profile data buffer is full; profiling probably terminated
×
603
                 before your program finished. To profile for longer runs, call
604
                 `Profile.init()` with a larger buffer and/or larger delay."""
605
    end
606
    data = Vector{UInt}(undef, len)
×
607
    GC.@preserve data unsafe_copyto!(pointer(data), get_data_pointer(), len)
×
608
    if include_meta || isempty(data)
×
609
        return data
×
610
    end
611
    return strip_meta(data)
×
612
end
613

614
function strip_meta(data)
×
615
    nblocks = count(Base.Fix1(is_block_end, data), eachindex(data))
×
616
    data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
×
617
    j = length(data_stripped)
×
618
    i = length(data)
×
619
    while i > 0 && j > 0
×
620
        data_stripped[j] = data[i]
×
621
        if is_block_end(data, i)
×
622
            i -= (nmeta + 1) # metadata fields and the extra NULL IP
×
623
        end
624
        i -= 1
×
625
        j -= 1
×
626
    end
×
627
    @assert i == j == 0 "metadata stripping failed"
×
628
    return data_stripped
×
629
end
630

631
"""
632
    Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) -> data_with_meta
633

634
The converse of `Profile.fetch(;include_meta = false)`; this will add fake metadata, and can be used
635
for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not depend on the internal
636
details of the metadata format.
637
"""
638
function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
×
639
    threadid == 0 && error("Fake threadid cannot be 0")
×
640
    taskid == 0 && error("Fake taskid cannot be 0")
×
641
    !isempty(data) && has_meta(data) && error("input already has metadata")
×
642
    cpu_clock_cycle = UInt64(99)
×
643
    data_with_meta = similar(data, 0)
×
644
    for i = 1:length(data)
×
645
        val = data[i]
×
646
        if iszero(val)
×
647
            # (threadid, taskid, cpu_cycle_clock, thread_sleeping)
648
            push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
×
649
        else
650
            push!(data_with_meta, val)
×
651
        end
652
    end
×
653
    return data_with_meta
×
654
end
655

656
## Print as a flat list
657
# Counts the number of times each line appears, at any nesting level and at the topmost level
658
# Merging multiple equivalent entries and recursive calls
659
function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, C::Bool,
×
660
                    threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}) where {T}
661
    !isempty(data) && !has_meta(data) && error("Profile data is missing required metadata")
×
662
    lilist = StackFrame[]
×
663
    n = Int[]
×
664
    m = Int[]
×
665
    lilist_idx = Dict{T, Int}()
×
666
    recursive = Set{T}()
×
667
    leaf = 0
×
668
    totalshots = 0
×
669
    startframe = length(data)
×
670
    skip = false
×
671
    nsleeping = 0
×
672
    for i in startframe:-1:1
×
673
        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
×
674
        ip = data[i]
×
675
        if is_block_end(data, i)
×
676
            # read metadata
677
            thread_sleeping = data[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
×
678
            # cpu_cycle_clock = data[i - META_OFFSET_CPUCYCLECLOCK]
679
            taskid = data[i - META_OFFSET_TASKID]
×
680
            threadid = data[i - META_OFFSET_THREADID]
×
681
            if !in(threadid, threads) || !in(taskid, tasks)
×
682
                skip = true
×
683
                continue
×
684
            end
685
            if thread_sleeping == 1
×
686
                nsleeping += 1
×
687
            end
688
            skip = false
×
689
            totalshots += 1
×
690
            empty!(recursive)
×
691
            if leaf != 0
×
692
                m[leaf] += 1
×
693
            end
694
            leaf = 0
×
695
            startframe = i
×
696
        elseif !skip
×
697
            frames = lidict[ip]
×
698
            nframes = (frames isa Vector ? length(frames) : 1)
×
699
            # the last lookup is the non-inlined root frame, the first is the inlined leaf frame
700
            for j = nframes:-1:1
×
701
                frame = (frames isa Vector ? frames[j] : frames)
×
702
                !C && frame.from_c && continue
×
703
                key = (T === UInt64 ? ip : frame)
×
704
                idx = get!(lilist_idx, key, length(lilist) + 1)
×
705
                if idx > length(lilist)
×
706
                    push!(recursive, key)
×
707
                    push!(lilist, frame)
×
708
                    push!(n, 1)
×
709
                    push!(m, 0)
×
710
                elseif !(key in recursive)
×
711
                    push!(recursive, key)
×
712
                    n[idx] += 1
×
713
                end
714
                leaf = idx
×
715
            end
×
716
        end
717
    end
×
718
    @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
×
719
    return (lilist, n, m, totalshots, nsleeping)
×
720
end
721

722
function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
×
723
                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
724
    lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
×
725
    if false # optional: drop the "non-interpretable" ones
×
726
        keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
×
727
        lilist = lilist[keep]
×
728
        n = n[keep]
×
729
        m = m[keep]
×
730
    end
731
    util_perc = (1 - (nsleeping / totalshots)) * 100
×
732
    filenamemap = Dict{Symbol,String}()
×
733
    if isempty(lilist)
×
734
        if is_subsection
×
735
            Base.print(io, "Total snapshots: ")
×
736
            printstyled(io, "$(totalshots)", color=Base.warn_color())
×
737
            Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n")
×
738
        else
739
            warning_empty()
×
740
        end
741
        return true
×
742
    end
743
    is_subsection || print_flat(io, lilist, n, m, cols, filenamemap, fmt)
×
744
    Base.print(io, "Total snapshots: ", totalshots, ". Utilization: ", round(Int, util_perc), "%")
×
745
    if is_subsection
×
746
        println(io)
×
747
        print_flat(io, lilist, n, m, cols, filenamemap, fmt)
×
748
    else
749
        Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
×
750
    end
751
    return false
×
752
end
753

754
function print_flat(io::IO, lilist::Vector{StackFrame},
×
755
        n::Vector{Int}, m::Vector{Int},
756
        cols::Int, filenamemap::Dict{Symbol,String},
757
        fmt::ProfileFormat)
758
    if fmt.sortedby === :count
×
759
        p = sortperm(n)
×
760
    elseif fmt.sortedby === :overhead
×
761
        p = sortperm(m)
×
762
    else
763
        p = liperm(lilist)
×
764
    end
765
    lilist = lilist[p]
×
766
    n = n[p]
×
767
    m = m[p]
×
768
    filenames = String[short_path(li.file, filenamemap) for li in lilist]
×
769
    funcnames = String[string(li.func) for li in lilist]
×
770
    wcounts = max(6, ndigits(maximum(n)))
×
771
    wself = max(9, ndigits(maximum(m)))
×
772
    maxline = 1
×
773
    maxfile = 6
×
774
    maxfunc = 10
×
775
    for i in 1:length(lilist)
×
776
        li = lilist[i]
×
777
        maxline = max(maxline, li.line)
×
778
        maxfunc = max(maxfunc, length(funcnames[i]))
×
779
        maxfile = max(maxfile, length(filenames[i]))
×
780
    end
×
781
    wline = max(5, ndigits(maxline))
×
782
    ntext = max(20, cols - wcounts - wself - wline - 3)
×
783
    maxfunc += 25 # for type signatures
×
784
    if maxfile + maxfunc <= ntext
×
785
        wfile = maxfile
×
786
        wfunc = ntext - maxfunc # take the full width (for type sig)
×
787
    else
788
        wfile = 2*ntext÷5
×
789
        wfunc = 3*ntext÷5
×
790
    end
791
    println(io, lpad("Count", wcounts, " "), " ", lpad("Overhead", wself, " "), " ",
×
792
            rpad("File", wfile, " "), " ", lpad("Line", wline, " "), " Function")
793
    println(io, lpad("=====", wcounts, " "), " ", lpad("========", wself, " "), " ",
×
794
            rpad("====", wfile, " "), " ", lpad("====", wline, " "), " ========")
795
    for i = 1:length(n)
×
796
        n[i] < fmt.mincount && continue
×
797
        li = lilist[i]
×
798
        Base.print(io, lpad(string(n[i]), wcounts, " "), " ")
×
799
        Base.print(io, lpad(string(m[i]), wself, " "), " ")
×
800
        if li == UNKNOWN
×
801
            if !fmt.combine && li.pointer != 0
×
802
                Base.print(io, "@0x", string(li.pointer, base=16))
×
803
            else
804
                Base.print(io, "[any unknown stackframes]")
×
805
            end
806
        else
807
            file = filenames[i]
×
808
            isempty(file) && (file = "[unknown file]")
×
809
            Base.print(io, rpad(rtruncto(file, wfile), wfile, " "), " ")
×
810
            Base.print(io, lpad(li.line > 0 ? string(li.line) : "?", wline, " "), " ")
×
811
            fname = funcnames[i]
×
812
            if !li.from_c && li.linfo !== nothing
×
813
                fname = sprint(show_spec_linfo, li)
×
814
            end
815
            isempty(fname) && (fname = "[unknown function]")
×
816
            Base.print(io, ltruncto(fname, wfunc))
×
817
        end
818
        println(io)
×
819
    end
×
820
    nothing
×
821
end
822

823
## A tree representation
824

825
# Representation of a prefix trie of backtrace counts
826
mutable struct StackFrameTree{T} # where T <: Union{UInt64, StackFrame}
827
    # content fields:
828
    frame::StackFrame
829
    count::Int          # number of frames this appeared in
830
    overhead::Int       # number frames where this was the code being executed
831
    flat_count::Int     # number of times this frame was in the flattened representation (unlike count, this'll sum to 100% of parent)
832
    max_recur::Int      # maximum number of times this frame was the *top* of the recursion in the stack
833
    count_recur::Int    # sum of the number of times this frame was the *top* of the recursion in a stack (divide by count to get an average)
834
    down::Dict{T, StackFrameTree{T}}
835
    # construction workers:
836
    recur::Int
837
    builder_key::Vector{UInt64}
838
    builder_value::Vector{StackFrameTree{T}}
839
    up::StackFrameTree{T}
840
    StackFrameTree{T}() where {T} = new(UNKNOWN, 0, 0, 0, 0, 0, Dict{T, StackFrameTree{T}}(), 0, UInt64[], StackFrameTree{T}[])
×
841
end
842

843

844
const indent_s = "    ╎"^10
845
const indent_z = collect(eachindex(indent_s))
846
function indent(depth::Int)
×
847
    depth < 1 && return ""
×
848
    depth <= length(indent_z) && return indent_s[1:indent_z[depth]]
×
849
    div, rem = divrem(depth, length(indent_z))
×
850
    indent = indent_s^div
×
851
    rem != 0 && (indent *= SubString(indent_s, 1, indent_z[rem]))
×
852
    return indent
×
853
end
854

855
function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::Dict{Symbol,String}, showpointer::Bool)
×
856
    nindent = min(cols>>1, level)
×
857
    ndigoverhead = ndigits(maxes.overhead)
×
858
    ndigcounts = ndigits(maxes.count)
×
859
    ndigline = ndigits(maximum(frame.frame.line for frame in frames)) + 6
×
860
    ntext = max(30, cols - ndigoverhead - nindent - ndigcounts - ndigline - 6)
×
861
    widthfile = 2*ntext÷5 # min 12
×
862
    widthfunc = 3*ntext÷5 # min 18
×
863
    strs = Vector{String}(undef, length(frames))
×
864
    showextra = false
×
865
    if level > nindent
×
866
        nextra = level - nindent
×
867
        nindent -= ndigits(nextra) + 2
×
868
        showextra = true
×
869
    end
870
    for i = 1:length(frames)
×
871
        frame = frames[i]
×
872
        li = frame.frame
×
873
        stroverhead = lpad(frame.overhead > 0 ? string(frame.overhead) : "", ndigoverhead, " ")
×
874
        base = nindent == 0 ? "" : indent(nindent - 1) * " "
×
875
        if showextra
×
876
            base = string(base, "+", nextra, " ")
×
877
        end
878
        strcount = rpad(string(frame.count), ndigcounts, " ")
×
879
        if li != UNKNOWN
×
880
            if li.line == li.pointer
×
881
                strs[i] = string(stroverhead, "╎", base, strcount, " ",
×
882
                    "[unknown function] (pointer: 0x",
883
                    string(li.pointer, base = 16, pad = 2*sizeof(Ptr{Cvoid})),
884
                    ")")
885
            else
886
                if !li.from_c && li.linfo !== nothing
×
887
                    fname = sprint(show_spec_linfo, li)
×
888
                else
889
                    fname = string(li.func)
×
890
                end
891
                filename = short_path(li.file, filenamemap)
×
892
                if showpointer
×
893
                    fname = string(
×
894
                        "0x",
895
                        string(li.pointer, base = 16, pad = 2*sizeof(Ptr{Cvoid})),
896
                        " ",
897
                        fname)
898
                end
899
                strs[i] = string(stroverhead, "╎", base, strcount, " ",
×
900
                    rtruncto(filename, widthfile),
901
                    ":",
902
                    li.line == -1 ? "?" : string(li.line),
903
                    "; ",
904
                    ltruncto(fname, widthfunc))
905
            end
906
        else
907
            strs[i] = string(stroverhead, "╎", base, strcount, " [unknown stackframe]")
×
908
        end
909
    end
×
910
    return strs
×
911
end
912

913
# turn a list of backtraces into a tree (implicitly separated by NULL markers)
914
function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, C::Bool, recur::Symbol,
×
915
                threads::Union{Int,AbstractVector{Int},Nothing}=nothing, tasks::Union{UInt,AbstractVector{UInt},Nothing}=nothing) where {T}
916
    !isempty(all) && !has_meta(all) && error("Profile data is missing required metadata")
×
917
    parent = root
×
918
    tops = Vector{StackFrameTree{T}}()
×
919
    build = Vector{StackFrameTree{T}}()
×
920
    startframe = length(all)
×
921
    skip = false
×
922
    nsleeping = 0
×
923
    for i in startframe:-1:1
×
924
        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP
×
925
        ip = all[i]
×
926
        if is_block_end(all, i)
×
927
            # read metadata
928
            thread_sleeping = all[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
×
929
            # cpu_cycle_clock = all[i - META_OFFSET_CPUCYCLECLOCK]
930
            taskid = all[i - META_OFFSET_TASKID]
×
931
            threadid = all[i - META_OFFSET_THREADID]
×
932
            if (threads !== nothing && !in(threadid, threads)) ||
×
933
               (tasks !== nothing && !in(taskid, tasks))
934
                skip = true
×
935
                continue
×
936
            end
937
            if thread_sleeping == 1
×
938
                nsleeping += 1
×
939
            end
940
            skip = false
×
941
            # sentinel value indicates the start of a new backtrace
942
            empty!(build)
×
943
            root.recur = 0
×
944
            if recur !== :off
×
945
                # We mark all visited nodes to so we'll only count those branches
946
                # once for each backtrace. Reset that now for the next backtrace.
947
                push!(tops, parent)
×
948
                for top in tops
×
949
                    while top.recur != 0
×
950
                        top.max_recur < top.recur && (top.max_recur = top.recur)
×
951
                        top.recur = 0
×
952
                        top = top.up
×
953
                    end
×
954
                end
×
955
                empty!(tops)
×
956
            end
957
            let this = parent
×
958
                while this !== root
×
959
                    this.flat_count += 1
×
960
                    this = this.up
×
961
                end
×
962
            end
963
            parent.overhead += 1
×
964
            parent = root
×
965
            root.count += 1
×
966
            startframe = i
×
967
        elseif !skip
×
968
            if recur === :flat || recur === :flatc
×
969
                pushfirst!(build, parent)
×
970
                # Rewind the `parent` tree back, if this exact ip was already present *higher* in the current tree
971
                found = false
×
972
                for j in 1:(startframe - i)
×
973
                    if ip == all[i + j]
×
974
                        if recur === :flat # if not flattening C frames, check that now
×
975
                            frames = lidict[ip]
×
976
                            frame = (frames isa Vector ? frames[1] : frames)
×
977
                            frame.from_c && break # not flattening this frame
×
978
                        end
979
                        push!(tops, parent)
×
980
                        parent = build[j]
×
981
                        parent.recur += 1
×
982
                        parent.count_recur += 1
×
983
                        found = true
×
984
                        break
×
985
                    end
986
                end
×
987
                found && continue
×
988
            end
989
            builder_key = parent.builder_key
×
990
            builder_value = parent.builder_value
×
991
            fastkey = searchsortedfirst(builder_key, ip)
×
992
            if fastkey < length(builder_key) && builder_key[fastkey] === ip
×
993
                # jump forward to the end of the inlining chain
994
                # avoiding an extra (slow) lookup of `ip` in `lidict`
995
                # and an extra chain of them in `down`
996
                # note that we may even have this === parent (if we're ignoring this frame ip)
997
                this = builder_value[fastkey]
×
998
                let this = this
×
999
                    while this !== parent && (recur === :off || this.recur == 0)
×
1000
                        this.count += 1
×
1001
                        this.recur = 1
×
1002
                        this = this.up
×
1003
                    end
×
1004
                end
1005
                parent = this
×
1006
                continue
×
1007
            end
1008

1009
            frames = lidict[ip]
×
1010
            nframes = (frames isa Vector ? length(frames) : 1)
×
1011
            this = parent
×
1012
            # add all the inlining frames
1013
            for i = nframes:-1:1
×
1014
                frame = (frames isa Vector ? frames[i] : frames)
×
1015
                !C && frame.from_c && continue
×
1016
                key = (T === UInt64 ? ip : frame)
×
1017
                this = get!(StackFrameTree{T}, parent.down, key)
×
1018
                if recur === :off || this.recur == 0
×
1019
                    this.frame = frame
×
1020
                    this.up = parent
×
1021
                    this.count += 1
×
1022
                    this.recur = 1
×
1023
                end
1024
                parent = this
×
1025
            end
×
1026
            # record where the end of this chain is for this ip
1027
            insert!(builder_key, fastkey, ip)
×
1028
            insert!(builder_value, fastkey, this)
×
1029
        end
1030
    end
×
1031
    function cleanup!(node::StackFrameTree)
×
1032
        stack = [node]
×
1033
        while !isempty(stack)
×
1034
            node = pop!(stack)
×
1035
            node.recur = 0
×
1036
            empty!(node.builder_key)
×
1037
            empty!(node.builder_value)
×
1038
            append!(stack, values(node.down))
×
1039
        end
×
1040
        nothing
×
1041
    end
1042
    cleanup!(root)
×
1043
    return root, nsleeping
×
1044
end
1045

1046
function maxstats(root::StackFrameTree)
×
1047
    maxcount = Ref(0)
×
1048
    maxflatcount = Ref(0)
×
1049
    maxoverhead = Ref(0)
×
1050
    maxmaxrecur = Ref(0)
×
1051
    stack = [root]
×
1052
    while !isempty(stack)
×
1053
        node = pop!(stack)
×
1054
        maxcount[] = max(maxcount[], node.count)
×
1055
        maxoverhead[] = max(maxoverhead[], node.overhead)
×
1056
        maxflatcount[] = max(maxflatcount[], node.flat_count)
×
1057
        maxmaxrecur[] = max(maxmaxrecur[], node.max_recur)
×
1058
        append!(stack, values(node.down))
×
1059
    end
×
1060
    return (count=maxcount[], count_flat=maxflatcount[], overhead=maxoverhead[], max_recur=maxmaxrecur[])
×
1061
end
1062

1063
# Print the stack frame tree starting at a particular root. Uses a worklist to
1064
# avoid stack overflows.
1065
function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat, is_subsection::Bool) where T
×
1066
    maxes = maxstats(bt)
×
1067
    filenamemap = Dict{Symbol,String}()
×
1068
    worklist = [(bt, 0, 0, "")]
×
1069
    if !is_subsection
×
1070
        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
×
1071
        Base.print(io, "=========================================================\n")
×
1072
    end
1073
    while !isempty(worklist)
×
1074
        (bt, level, noisefloor, str) = popfirst!(worklist)
×
1075
        isempty(str) || println(io, str)
×
1076
        level > fmt.maxdepth && continue
×
1077
        isempty(bt.down) && continue
×
1078
        # Order the line information
1079
        nexts = collect(values(bt.down))
×
1080
        # Generate the string for each line
1081
        strs = tree_format(nexts, level, cols, maxes, filenamemap, T === UInt64)
×
1082
        # Recurse to the next level
1083
        if fmt.sortedby === :count
×
1084
            counts = collect(frame.count for frame in nexts)
×
1085
            p = sortperm(counts)
×
1086
        elseif fmt.sortedby === :overhead
×
1087
            m = collect(frame.overhead for frame in nexts)
×
1088
            p = sortperm(m)
×
1089
        elseif fmt.sortedby === :flat_count
×
1090
            m = collect(frame.flat_count for frame in nexts)
×
1091
            p = sortperm(m)
×
1092
        else
1093
            lilist = collect(frame.frame for frame in nexts)
×
1094
            p = liperm(lilist)
×
1095
        end
1096
        for i in reverse(p)
×
1097
            down = nexts[i]
×
1098
            count = down.count
×
1099
            count < fmt.mincount && continue
×
1100
            count < noisefloor && continue
×
1101
            str = strs[i]
×
1102
            noisefloor_down = fmt.noisefloor > 0 ? floor(Int, fmt.noisefloor * sqrt(count)) : 0
×
1103
            pushfirst!(worklist, (down, level + 1, noisefloor_down, str))
×
1104
        end
×
1105
    end
×
1106
    return
×
1107
end
1108

1109
function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat,
×
1110
                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
1111
    if fmt.combine
×
1112
        root, nsleeping = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
×
1113
    else
1114
        root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
×
1115
    end
1116
    util_perc = (1 - (nsleeping / root.count)) * 100
×
1117
    is_subsection || print_tree(io, root, cols, fmt, is_subsection)
×
1118
    if isempty(root.down)
×
1119
        if is_subsection
×
1120
            Base.print(io, "Total snapshots: ")
×
1121
            printstyled(io, "$(root.count)", color=Base.warn_color())
×
1122
            Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n")
×
1123
        else
1124
            warning_empty()
×
1125
        end
1126
        return true
×
1127
    end
1128
    Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
×
1129
    if is_subsection
×
1130
        Base.println(io)
×
1131
        print_tree(io, root, cols, fmt, is_subsection)
×
1132
    else
1133
        Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
×
1134
    end
1135
    return false
×
1136
end
1137

1138
function callersf(matchfunc::Function, bt::Vector, lidict::LineInfoFlatDict)
×
1139
    counts = Dict{StackFrame, Int}()
×
1140
    lastmatched = false
×
1141
    for id in bt
×
1142
        if id == 0
×
1143
            lastmatched = false
×
1144
            continue
×
1145
        end
1146
        li = lidict[id]
×
1147
        if lastmatched
×
1148
            if haskey(counts, li)
×
1149
                counts[li] += 1
×
1150
            else
1151
                counts[li] = 1
×
1152
            end
1153
        end
1154
        lastmatched = matchfunc(li)
×
1155
    end
×
1156
    k = collect(keys(counts))
×
1157
    v = collect(values(counts))
×
1158
    p = sortperm(v, rev=true)
×
1159
    return [(v[i], k[i]) for i in p]
×
1160
end
1161

1162
# Utilities
1163
function rtruncto(str::String, w::Int)
×
1164
    if length(str) <= w
×
1165
        return str
×
1166
    else
1167
        return string("...", str[prevind(str, end, w-4):end])
×
1168
    end
1169
end
1170
function ltruncto(str::String, w::Int)
×
1171
    if length(str) <= w
×
1172
        return str
×
1173
    else
1174
        return string(str[1:nextind(str, 1, w-4)], "...")
×
1175
    end
1176
end
1177

1178

1179
truncto(str::Symbol, w::Int) = truncto(string(str), w)
×
1180

1181
# Order alphabetically (file, function) and then by line number
1182
function liperm(lilist::Vector{StackFrame})
×
1183
    function lt(a::StackFrame, b::StackFrame)
×
1184
        a == UNKNOWN && return false
×
1185
        b == UNKNOWN && return true
×
1186
        fcmp = cmp(a.file, b.file)
×
1187
        fcmp < 0 && return true
×
1188
        fcmp > 0 && return false
×
1189
        fcmp = cmp(a.func, b.func)
×
1190
        fcmp < 0 && return true
×
1191
        fcmp > 0 && return false
×
1192
        fcmp = cmp(a.line, b.line)
×
1193
        fcmp < 0 && return true
×
1194
        return false
×
1195
    end
1196
    return sortperm(lilist, lt = lt)
×
1197
end
1198

1199
function warning_empty(;summary = false)
×
1200
    if summary
×
1201
        @warn """
×
1202
        There were no samples collected in one or more groups.
1203
        This may be due to idle threads, or you may need to run your
1204
        program longer (perhaps by running it multiple times),
1205
        or adjust the delay between samples with `Profile.init()`."""
1206
    else
1207
        @warn """
×
1208
        There were no samples collected.
1209
        Run your program longer (perhaps by running it multiple times),
1210
        or adjust the delay between samples with `Profile.init()`."""
1211
    end
1212
end
1213

1214

1215
"""
1216
    Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false)
1217
    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false)
1218
    Profile.take_heap_snapshot(all_one::Bool=false)
1219

1220
Write a snapshot of the heap, in the JSON format expected by the Chrome
1221
Devtools Heap Snapshot viewer (.heapsnapshot extension), to a file
1222
(`\$pid_\$timestamp.heapsnapshot`) in the current directory, or the given
1223
file path, or IO stream. If `all_one` is true, then report the size of
1224
every object as one so they can be easily counted. Otherwise, report the
1225
actual size.
1226
"""
1227
function take_heap_snapshot(io::IOStream, all_one::Bool=false)
×
1228
    Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one)))
×
1229
end
1230
function take_heap_snapshot(filepath::String, all_one::Bool=false)
×
1231
    open(filepath, "w") do io
×
1232
        take_heap_snapshot(io, all_one)
×
1233
    end
1234
    return filepath
×
1235
end
1236
function take_heap_snapshot(all_one::Bool=false)
×
1237
    f = abspath("$(getpid())_$(time_ns()).heapsnapshot")
×
1238
    return take_heap_snapshot(f, all_one)
×
1239
end
1240

1241

1242
include("Allocs.jl")
1243
include("precompile.jl")
1244

1245
end # module
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc