• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37933

15 Oct 2024 07:30AM UTC coverage: 87.724% (+1.3%) from 86.441%
#37933

push

local

web-flow
Fix markdown list in installation.md (#56165)

Documenter.jl requires all trailing list content to follow the same
indentation as the header. So, in the current view
(https://docs.julialang.org/en/v1/manual/installation/#Command-line-arguments)
the list appears broken.

78955 of 90004 relevant lines covered (87.72%)

16956008.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.45
/stdlib/Profile/src/Profile.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
"""
4
    Profile
5

6
Profiling support.
7

8
## CPU profiling
9
- `@profile foo()` to profile a specific call.
10
- `Profile.print()` to print the report. Paths are clickable links in supported terminals and specialized for JULIA_EDITOR etc.
11
- `Profile.clear()` to clear the buffer.
12
- Send a $(Sys.isbsd() ? "SIGINFO (ctrl-t)" : "SIGUSR1") signal to the process to automatically trigger a profile and print.
13

14
## Memory profiling
15
- `Profile.Allocs.@profile [sample_rate=0.1] foo()` to sample allocations within a specific call. A sample rate of 1.0 will record everything; 0.0 will record nothing.
16
- `Profile.Allocs.print()` to print the report.
17
- `Profile.Allocs.clear()` to clear the buffer.
18

19
## Heap profiling
20
- `Profile.take_heap_snapshot()` to record a `.heapsnapshot` record of the heap.
21
- Set `JULIA_PROFILE_PEEK_HEAP_SNAPSHOT=true` to capture a heap snapshot when signal $(Sys.isbsd() ? "SIGINFO (ctrl-t)" : "SIGUSR1") is sent.
22
"""
23
module Profile
24

25
global print
26
export @profile
27
public clear,
28
    print,
29
    fetch,
30
    retrieve,
31
    add_fake_meta,
32
    flatten,
33
    callers,
34
    init,
35
    take_heap_snapshot,
36
    take_page_profile,
37
    clear_malloc_data,
38
    Allocs
39

40
import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
41
import Base: AnnotatedString
42
using StyledStrings: @styled_str
43

44
const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
45

46
# deprecated functions: use `getdict` instead
47
lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))
5,541✔
48

49
"""
50
    @profile
51

52
`@profile <expression>` runs your expression while taking periodic backtraces. These are
53
appended to an internal buffer of backtraces.
54
"""
55
macro profile(ex)
4✔
56
    return quote
4✔
57
        try
1✔
58
            start_timer()
1✔
59
            $(esc(ex))
1✔
60
        finally
61
            stop_timer()
1✔
62
        end
63
    end
64
end
65

66
# An internal function called to show the report after an information request (SIGINFO or SIGUSR1).
67
function _peek_report()
×
68
    iob = Base.AnnotatedIOBuffer()
×
69
    ioc = IOContext(IOContext(iob, stderr), :displaysize=>displaysize(stderr))
×
70
    print(ioc, groupby = [:thread, :task])
×
71
    Base.print(stderr, read(seekstart(iob), AnnotatedString))
×
72
end
73
# This is a ref so that it can be overridden by other profile info consumers.
74
const peek_report = Ref{Function}(_peek_report)
75

76
"""
77
    get_peek_duration()
78

79
Get the duration in seconds of the profile "peek" that is triggered via `SIGINFO` or `SIGUSR1`, depending on platform.
80
"""
81
get_peek_duration() = ccall(:jl_get_profile_peek_duration, Float64, ())
×
82
"""
83
    set_peek_duration(t::Float64)
84

85
Set the duration in seconds of the profile "peek" that is triggered via `SIGINFO` or `SIGUSR1`, depending on platform.
86
"""
87
set_peek_duration(t::Float64) = ccall(:jl_set_profile_peek_duration, Cvoid, (Float64,), t)
×
88

89

90

91
####
92
#### User-level functions
93
####
94

95
"""
96
    init(; n::Integer, delay::Real)
97

98
Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be
99
stored per thread. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long
100
list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
101
NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
102
using keywords or in the order `(n, delay)`.
103
"""
104
function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing, limitwarn::Bool = true)
12✔
105
    n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
8✔
106
    if n_cur == 0 && isnothing(n) && isnothing(delay)
8✔
107
        # indicates that the buffer hasn't been initialized at all, so set the default
108
        default_init()
1✔
109
        n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
1✔
110
    end
111
    delay_cur = ccall(:jl_profile_delay_nsec, UInt64, ())/10^9
8✔
112
    if n === nothing && delay === nothing
8✔
113
        return n_cur, delay_cur
4✔
114
    end
115
    nnew = (n === nothing) ? n_cur : n
4✔
116
    delaynew = (delay === nothing) ? delay_cur : delay
4✔
117
    init(nnew, delaynew; limitwarn)
4✔
118
end
119

120
function init(n::Integer, delay::Real; limitwarn::Bool = true)
9✔
121
    sample_size_bytes = sizeof(Ptr) # == Sys.WORD_SIZE / 8
5✔
122
    buffer_samples = n
5✔
123
    buffer_size_bytes = buffer_samples * sample_size_bytes
5✔
124
    if buffer_size_bytes > 2^29 && Sys.WORD_SIZE == 32
5✔
125
        buffer_samples = floor(Int, 2^29 / sample_size_bytes)
×
126
        buffer_size_bytes = buffer_samples * sample_size_bytes
×
127
        limitwarn && @warn "Requested profile buffer limited to 512MB (n = $buffer_samples) given that this system is 32-bit"
×
128
    end
129
    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), buffer_samples, round(UInt64, 10^9*delay))
5✔
130
    if status == -1
5✔
131
        error("could not allocate space for ", n, " instruction pointers ($(Base.format_bytes(buffer_size_bytes)))")
×
132
    end
133
end
134

135
function default_init()
136
    # init with default values
137
    # Use a max size of 10M profile samples, and fire timer every 1ms
138
    # (that should typically give around 100 seconds of record)
139
    @static if Sys.iswindows() && Sys.WORD_SIZE == 32
1✔
140
        # The Win32 unwinder is 1000x slower than elsewhere (around 1ms/frame),
141
        # so we don't want to slow the program down by quite that much
142
        n = 1_000_000
143
        delay = 0.01
144
    else
145
        # Keep these values synchronized with trigger_profile_peek
146
        n = 10_000_000
1✔
147
        delay = 0.001
1✔
148
    end
149
    init(n, delay, limitwarn = false)
1✔
150
end
151

152
# Checks whether the profile buffer has been initialized. If not, initializes it with the default size.
153
function check_init()
154
    buffer_size = @ccall jl_profile_maxlen_data()::Int
3✔
155
    if buffer_size == 0
3✔
156
        default_init()
×
157
    end
158
end
159

160
"""
161
    clear()
162

163
Clear any existing backtraces from the internal buffer.
164
"""
165
clear() = ccall(:jl_profile_clear_data, Cvoid, ())
2✔
166

167
const LineInfoDict = Dict{UInt64, Vector{StackFrame}}
168
const LineInfoFlatDict = Dict{UInt64, StackFrame}
169

170
struct ProfileFormat
171
    maxdepth::Int
172
    mincount::Int
173
    noisefloor::Float64
174
    sortedby::Symbol
175
    combine::Bool
176
    C::Bool
177
    recur::Symbol
178
    function ProfileFormat(;
70✔
179
        C = false,
180
        combine = true,
181
        maxdepth::Int = typemax(Int),
182
        mincount::Int = 0,
183
        noisefloor = 0,
184
        sortedby::Symbol = :filefuncline,
185
        recur::Symbol = :off)
186
        return new(maxdepth, mincount, noisefloor, sortedby, combine, C, recur)
67✔
187
    end
188
end
189

190
# offsets of the metadata in the data stream
191
const META_OFFSET_SLEEPSTATE = 2
192
const META_OFFSET_CPUCYCLECLOCK = 3
193
const META_OFFSET_TASKID = 4
194
const META_OFFSET_THREADID = 5
195

196
"""
197
    print([io::IO = stdout,] [data::Vector = fetch()], [lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)]; kwargs...)
198

199
Prints profiling results to `io` (by default, `stdout`). If you do not
200
supply a `data` vector, the internal buffer of accumulated backtraces
201
will be used. Paths are clickable links in supported terminals and
202
specialized for [`JULIA_EDITOR`](@ref) with line numbers, or just file
203
links if no editor is set.
204

205
The keyword arguments can be any combination of:
206

207
 - `format` -- Determines whether backtraces are printed with (default, `:tree`) or without (`:flat`)
208
   indentation indicating tree structure.
209

210
 - `C` -- If `true`, backtraces from C and Fortran code are shown (normally they are excluded).
211

212
 - `combine` -- If `true` (default), instruction pointers are merged that correspond to the same line of code.
213

214
 - `maxdepth` -- Limits the depth higher than `maxdepth` in the `:tree` format.
215

216
 - `sortedby` -- Controls the order in `:flat` format. `:filefuncline` (default) sorts by the source
217
    line, `:count` sorts in order of number of collected samples, and `:overhead` sorts by the number of samples
218
    incurred by each function by itself.
219

220
 - `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:thread`, `:task`,
221
    `[:thread, :task]`, or `[:task, :thread]` where the last two provide nested grouping.
222

223
 - `noisefloor` -- Limits frames that exceed the heuristic noise floor of the sample (only applies to format `:tree`).
224
    A suggested value to try for this is 2.0 (the default is 0). This parameter hides samples for which `n <= noisefloor * √N`,
225
    where `n` is the number of samples on this line, and `N` is the number of samples for the callee.
226

227
 - `mincount` -- Limits the printout to only those lines with at least `mincount` occurrences.
228

229
 - `recur` -- Controls the recursion handling in `:tree` format. `:off` (default) prints the tree as normal. `:flat` instead
230
    compresses any recursion (by ip), showing the approximate effect of converting any self-recursion into an iterator.
231
    `:flatc` does the same but also includes collapsing of C frames (may do odd things around `jl_apply`).
232

233
 - `threads::Union{Int,AbstractVector{Int}}` -- Specify which threads to include snapshots from in the report. Note that
234
    this does not control which threads samples are collected on (which may also have been collected on another machine).
235

236
 - `tasks::Union{Int,AbstractVector{Int}}` -- Specify which tasks to include snapshots from in the report. Note that this
237
    does not control which tasks samples are collected within.
238

239
!!! compat "Julia 1.8"
240
    The `groupby`, `threads`, and `tasks` keyword arguments were introduced in Julia 1.8.
241

242
!!! note
243
    Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report.
244

245
"""
246
function print(io::IO,
236✔
247
        data::Vector{<:Unsigned} = fetch(),
248
        lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)
249
        ;
250
        format = :tree,
251
        C = false,
252
        combine = true,
253
        maxdepth::Int = typemax(Int),
254
        mincount::Int = 0,
255
        noisefloor = 0,
256
        sortedby::Symbol = :filefuncline,
257
        groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
258
        recur::Symbol = :off,
259
        threads::Union{Int,AbstractVector{Int}} = 1:typemax(Int),
260
        tasks::Union{UInt,AbstractVector{UInt}} = typemin(UInt):typemax(UInt))
261

262
    pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
60✔
263
    if groupby === :none
60✔
264
        print_group(io, data, lidict, pf, format, threads, tasks, false)
20✔
265
    else
266
        if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
80✔
267
            error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
×
268
        elseif Sys.iswindows() && in(groupby, [:thread, [:task, :thread], [:thread, :task]])
40✔
269
            @warn "Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report"
×
270
        end
271
        any_nosamples = true
40✔
272
        if format === :tree
40✔
273
            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line  Function\n")
20✔
274
            Base.print(io, "=========================================================\n")
20✔
275
        end
276
        if groupby == [:task, :thread]
80✔
277
            taskids = intersect(get_task_ids(data), tasks)
10✔
278
            isempty(taskids) && (any_nosamples = true)
10✔
279
            for taskid in taskids
10✔
280
                threadids = intersect(get_thread_ids(data, taskid), threads)
10✔
281
                if length(threadids) == 0
10✔
282
                    any_nosamples = true
×
283
                else
284
                    nl = length(threadids) > 1 ? "\n" : ""
10✔
285
                    printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
20✔
286
                    for threadid in threadids
10✔
287
                        printstyled(io, " Thread $threadid ($(Threads.threadpooldescription(threadid))) "; bold=true, color=Base.info_color())
20✔
288
                        nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
10✔
289
                        nosamples && (any_nosamples = true)
10✔
290
                        println(io)
10✔
291
                    end
10✔
292
                end
293
            end
10✔
294
        elseif groupby == [:thread, :task]
60✔
295
            threadids = intersect(get_thread_ids(data), threads)
10✔
296
            isempty(threadids) && (any_nosamples = true)
10✔
297
            for threadid in threadids
10✔
298
                taskids = intersect(get_task_ids(data, threadid), tasks)
10✔
299
                if length(taskids) == 0
10✔
300
                    any_nosamples = true
×
301
                else
302
                    nl = length(taskids) > 1 ? "\n" : ""
10✔
303
                    printstyled(io, "Thread $threadid ($(Threads.threadpooldescription(threadid)))$nl"; bold=true, color=Base.info_color())
20✔
304
                    for taskid in taskids
10✔
305
                        printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
20✔
306
                        nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
10✔
307
                        nosamples && (any_nosamples = true)
10✔
308
                        println(io)
10✔
309
                    end
10✔
310
                end
311
            end
10✔
312
        elseif groupby === :task
20✔
313
            threads = 1:typemax(Int)
10✔
314
            taskids = intersect(get_task_ids(data), tasks)
10✔
315
            isempty(taskids) && (any_nosamples = true)
10✔
316
            for taskid in taskids
10✔
317
                printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
20✔
318
                nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true)
10✔
319
                nosamples && (any_nosamples = true)
10✔
320
                println(io)
10✔
321
            end
10✔
322
        elseif groupby === :thread
10✔
323
            tasks = 1:typemax(UInt)
10✔
324
            threadids = intersect(get_thread_ids(data), threads)
10✔
325
            isempty(threadids) && (any_nosamples = true)
10✔
326
            for threadid in threadids
10✔
327
                printstyled(io, "Thread $threadid ($(Threads.threadpooldescription(threadid))) "; bold=true, color=Base.info_color())
20✔
328
                nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
10✔
329
                nosamples && (any_nosamples = true)
10✔
330
                println(io)
10✔
331
            end
10✔
332
        end
333
        any_nosamples && warning_empty(summary = true)
40✔
334
    end
335
    return
60✔
336
end
337

338
"""
339
    print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...)
340

341
Prints profiling results to `io`. This variant is used to examine results exported by a
342
previous call to [`retrieve`](@ref). Supply the vector `data` of backtraces and
343
a dictionary `lidict` of line information.
344

345
See `Profile.print([io], data)` for an explanation of the valid keyword arguments.
346
"""
347
print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
1✔
348
    print(stdout, data, lidict; kwargs...)
349

350
function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
60✔
351
                format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
352
                is_subsection::Bool = false)
353
    cols::Int = Base.displaysize(io)[2]
60✔
354
    data = convert(Vector{UInt64}, data)
60✔
355
    fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
60✔
356
    if format === :tree
60✔
357
        nosamples = tree(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
31✔
358
        return nosamples
31✔
359
    elseif format === :flat
29✔
360
        fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
29✔
361
        nosamples = flat(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
29✔
362
        return nosamples
29✔
363
    else
364
        throw(ArgumentError("output format $(repr(format)) not recognized"))
×
365
    end
366
end
367

368
function get_task_ids(data::Vector{<:Unsigned}, threadid = nothing)
30✔
369
    taskids = UInt[]
50✔
370
    for i in length(data):-1:1
60✔
371
        if is_block_end(data, i)
1,831,980✔
372
            if isnothing(threadid) || data[i - META_OFFSET_THREADID] == threadid
8,490✔
373
                taskid = data[i - META_OFFSET_TASKID]
25,470✔
374
                !in(taskid, taskids) && push!(taskids, taskid)
25,470✔
375
            end
376
        end
377
    end
1,832,070✔
378
    return taskids
30✔
379
end
380

381
function get_thread_ids(data::Vector{<:Unsigned}, taskid = nothing)
30✔
382
    threadids = Int[]
50✔
383
    for i in length(data):-1:1
60✔
384
        if is_block_end(data, i)
1,831,980✔
385
            if isnothing(taskid) || data[i - META_OFFSET_TASKID] == taskid
8,490✔
386
                threadid = data[i - META_OFFSET_THREADID]
25,470✔
387
                !in(threadid, threadids) && push!(threadids, threadid)
25,470✔
388
            end
389
        end
390
    end
1,832,070✔
391
    return sort(threadids)
30✔
392
end
393

394
function is_block_end(data, i)
395
    i < nmeta + 1 && return false
6,805,151✔
396
    # 32-bit linux has been seen to have rogue NULL ips, so we use two to
397
    # indicate block end, where the 2nd is the actual end index.
398
    # and we could have (though very unlikely):
399
    # 1:<stack><metadata><null><null><NULL><metadata><null><null>:end
400
    # and we want to ignore the triple NULL (which is an ip).
401
    return data[i] == 0 && data[i - 1] == 0 && data[i - META_OFFSET_SLEEPSTATE] != 0
6,804,189✔
402
end
403

404
function has_meta(data)
405
    for i in 6:length(data)
128✔
406
        data[i] == 0 || continue            # first block end null
30,024✔
407
        data[i - 1] == 0 || continue        # second block end null
2,063✔
408
        data[i - META_OFFSET_SLEEPSTATE] in 1:2 || continue
123✔
409
        data[i - META_OFFSET_CPUCYCLECLOCK] != 0 || continue
119✔
410
        data[i - META_OFFSET_TASKID] != 0 || continue
119✔
411
        data[i - META_OFFSET_THREADID] != 0 || continue
119✔
412
        return true
119✔
413
    end
59,808✔
414
    return false
9✔
415
end
416

417
"""
418
    retrieve(; kwargs...) -> data, lidict
419

420
"Exports" profiling results in a portable format, returning the set of all backtraces
421
(`data`) and a dictionary that maps the (session-specific) instruction pointers in `data` to
422
`LineInfo` values that store the file name, function name, and line number. This function
423
allows you to save profiling results for future analysis.
424
"""
425
function retrieve(; kwargs...)
6✔
426
    data = fetch(; kwargs...)
6✔
427
    return (data, getdict(data))
6✔
428
end
429

430
function getdict(data::Vector{UInt})
431
    dict = LineInfoDict()
64✔
432
    return getdict!(dict, data)
64✔
433
end
434

435
function getdict!(dict::LineInfoDict, data::Vector{UInt})
64✔
436
    # we don't want metadata here as we're just looking up ips
437
    unique_ips = unique(has_meta(data) ? strip_meta(data) : data)
64✔
438
    n_unique_ips = length(unique_ips)
64✔
439
    n_unique_ips == 0 && return dict
64✔
440
    iplookups = similar(unique_ips, Vector{StackFrame})
118✔
441
    sort!(unique_ips) # help each thread to get a disjoint set of libraries, as much if possible
118✔
442
    @sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.threadpoolsize(), RoundUp))
59✔
443
        Threads.@spawn begin
118✔
444
            for i in indexes_part
118✔
445
                iplookups[i] = _lookup_corrected(unique_ips[i])
9,435✔
446
            end
9,435✔
447
        end
448
    end
449
    for i in eachindex(unique_ips)
59✔
450
        dict[unique_ips[i]] = iplookups[i]
9,435✔
451
    end
18,811✔
452
    return dict
59✔
453
end
454

455
function _lookup_corrected(ip::UInt)
9,435✔
456
    st = lookup(convert(Ptr{Cvoid}, ip))
9,435✔
457
    # To correct line numbers for moving code, put it in the form expected by
458
    # Base.update_stackframes_callback[]
459
    stn = map(x->(x, 1), st)
22,323✔
460
    # Note: Base.update_stackframes_callback[] should be data-race free
461
    try Base.invokelatest(Base.update_stackframes_callback[], stn) catch end
9,435✔
462
    return map(first, stn)
9,435✔
463
end
464

465
"""
466
    flatten(btdata::Vector, lidict::LineInfoDict) -> (newdata::Vector{UInt64}, newdict::LineInfoFlatDict)
467

468
Produces "flattened" backtrace data. Individual instruction pointers
469
sometimes correspond to a multi-frame backtrace due to inlining; in
470
such cases, this function inserts fake instruction pointers for the
471
inlined calls, and returns a dictionary that is a 1-to-1 mapping
472
between instruction pointers and a single StackFrame.
473
"""
474
function flatten(data::Vector, lidict::LineInfoDict)
5✔
475
    # Makes fake instruction pointers, counting down from typemax(UInt)
476
    newip = typemax(UInt64) - 1
5✔
477
    taken = Set(keys(lidict))  # make sure we don't pick one that's already used
7✔
478
    newdict = Dict{UInt64,StackFrame}()
5✔
479
    newmap  = Dict{UInt64,Vector{UInt64}}()
5✔
480
    for (ip, trace) in lidict
7✔
481
        if length(trace) == 1
144✔
482
            newdict[ip] = trace[1]
92✔
483
        else
484
            newm = UInt64[]
52✔
485
            for sf in trace
52✔
486
                while newip ∈ taken && newip > 0
284✔
487
                    newip -= 1
×
488
                end
×
489
                newip == 0 && error("all possible instruction pointers used")
142✔
490
                push!(newm, newip)
142✔
491
                newdict[newip] = sf
142✔
492
                newip -= 1
142✔
493
            end
142✔
494
            newmap[ip] = newm
52✔
495
        end
496
    end
286✔
497
    newdata = UInt64[]
5✔
498
    for ip::UInt64 in data
5✔
499
        if haskey(newmap, ip)
125,880✔
500
            append!(newdata, newmap[ip])
35,948✔
501
        else
502
            push!(newdata, ip)
44,966✔
503
        end
504
    end
62,940✔
505
    return (newdata, newdict)
5✔
506
end
507

508
const SRC_DIR = normpath(joinpath(Sys.BUILD_ROOT_PATH, "src"))
509

510
# Take a file-system path and try to form a concise representation of it
511
# based on the package ecosystem
512
function short_path(spath::Symbol, filenamecache::Dict{Symbol, Tuple{String,String,String}})
513
    return get!(filenamecache, spath) do
3,221✔
514
        path = Base.fixup_stdlib_path(string(spath))
877✔
515
        path_norm = normpath(path)
877✔
516
        possible_base_path = normpath(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
877✔
517
        lib_dir = abspath(Sys.BINDIR, Base.LIBDIR)
877✔
518
        if startswith(path_norm, SRC_DIR)
877✔
519
            remainder = only(split(path_norm, SRC_DIR, keepempty=false))
54✔
520
            return (isfile(path_norm) ? path_norm : ""), "@juliasrc", remainder
54✔
521
        elseif startswith(path_norm, lib_dir)
823✔
522
            remainder = only(split(path_norm, lib_dir, keepempty=false))
2✔
523
            return (isfile(path_norm) ? path_norm : ""), "@julialib", remainder
2✔
524
        elseif isabspath(path)
1,640✔
525
            if ispath(path)
257✔
526
                # try to replace the file-system prefix with a short "@Module" one,
527
                # assuming that profile came from the current machine
528
                # (or at least has the same file-system layout)
529
                root = path
×
530
                while !isempty(root)
1,118✔
531
                    root, base = splitdir(root)
1,118✔
532
                    isempty(base) && break
1,118✔
533
                    @assert startswith(path, root)
1,046✔
534
                    for proj in Base.project_names
1,046✔
535
                        project_file = joinpath(root, proj)
2,092✔
536
                        if Base.isfile_casesensitive(project_file)
2,092✔
537
                            pkgid = Base.project_file_name_uuid(project_file, "")
177✔
538
                            isempty(pkgid.name) && return path # bad Project file
177✔
539
                            # return the joined the module name prefix and path suffix
540
                            _short_path = path[nextind(path, sizeof(root)):end]
354✔
541
                            return path, string("@", pkgid.name), _short_path
177✔
542
                        end
543
                    end
2,784✔
544
                end
869✔
545
            end
546
            return path, "", path
80✔
547
        elseif isfile(possible_base_path)
1,128✔
548
            # do the same mechanic for Base (or Core/Compiler) files as above,
549
            # but they start from a relative path
550
            return possible_base_path, "@Base", normpath(path)
560✔
551
        else
552
            # for non-existent relative paths (such as "REPL[1]"), just consider simplifying them
553
            path = normpath(path)
4✔
554
            return "", "", path # drop leading "./"
4✔
555
        end
556
    end
557
end
558

559
"""
560
    callers(funcname, [data, lidict], [filename=<filename>], [linerange=<start:stop>]) -> Vector{Tuple{count, lineinfo}}
561

562
Given a previous profiling run, determine who called a particular function. Supplying the
563
filename (and optionally, range of line numbers over which the function is defined) allows
564
you to disambiguate an overloaded method. The returned value is a vector containing a count
565
of the number of calls and line information about the caller. One can optionally supply
566
backtrace `data` obtained from [`retrieve`](@ref); otherwise, the current internal
567
profile buffer is used.
568
"""
569
function callers end
570

571
function callers(funcname::String, bt::Vector, lidict::LineInfoFlatDict; filename = nothing, linerange = nothing)
4✔
572
    if filename === nothing && linerange === nothing
3✔
573
        return callersf(li -> String(li.func) == funcname,
2✔
574
            bt, lidict)
575
    end
576
    filename === nothing && throw(ArgumentError("if supplying linerange, you must also supply the filename"))
1✔
577
    filename = String(filename)
×
578
    if linerange === nothing
×
579
        return callersf(li -> String(li.func) == funcname && String(li.file) == filename,
×
580
            bt, lidict)
581
    else
582
        return callersf(li -> String(li.func) == funcname && String(li.file) == filename && in(li.line, linerange),
×
583
            bt, lidict)
584
    end
585
end
586

587
callers(funcname::String, bt::Vector, lidict::LineInfoDict; kwargs...) =
6✔
588
    callers(funcname, flatten(bt, lidict)...; kwargs...)
589
callers(funcname::String; kwargs...) = callers(funcname, retrieve()...; kwargs...)
2✔
590
callers(func::Function, bt::Vector, lidict::LineInfoFlatDict; kwargs...) =
×
591
    callers(string(func), bt, lidict; kwargs...)
592
callers(func::Function; kwargs...) = callers(string(func), retrieve()...; kwargs...)
4✔
593

594
##
595
## For --track-allocation
596
##
597
# Reset the malloc log. Used to avoid counting memory allocated during
598
# compilation.
599

600
"""
601
    clear_malloc_data()
602

603
Clears any stored memory allocation data when running julia with `--track-allocation`.
604
Execute the command(s) you want to test (to force JIT-compilation), then call
605
[`clear_malloc_data`](@ref). Then execute your command(s) again, quit
606
Julia, and examine the resulting `*.mem` files.
607
"""
608
clear_malloc_data() = ccall(:jl_clear_malloc_data, Cvoid, ())
×
609

610
# C wrappers
611
function start_timer()
3✔
612
    check_init() # if the profile buffer hasn't been initialized, initialize with default size
3✔
613
    status = ccall(:jl_profile_start_timer, Cint, ())
3✔
614
    if status < 0
3✔
615
        error(error_codes[status])
×
616
    end
617
end
618

619

620
stop_timer() = ccall(:jl_profile_stop_timer, Cvoid, ())
3✔
621

622
is_running() = ccall(:jl_profile_is_running, Cint, ())!=0
×
623

624
is_buffer_full() = ccall(:jl_profile_is_buffer_full, Cint, ())!=0
68✔
625

626
get_data_pointer() = convert(Ptr{UInt}, ccall(:jl_profile_get_data, Ptr{UInt8}, ()))
68✔
627

628
len_data() = convert(Int, ccall(:jl_profile_len_data, Csize_t, ()))
75✔
629

630
maxlen_data() = convert(Int, ccall(:jl_profile_maxlen_data, Csize_t, ()))
69✔
631

632
error_codes = Dict(
633
    -1=>"cannot specify signal action for profiling",
634
    -2=>"cannot create the timer for profiling",
635
    -3=>"cannot start the timer for profiling",
636
    -4=>"cannot unblock SIGUSR1")
637

638

639
"""
640
    fetch(;include_meta = true) -> data
641

642
Return a copy of the buffer of profile backtraces. Note that the
643
values in `data` have meaning only on this machine in the current session, because it
644
depends on the exact memory addresses used in JIT-compiling. This function is primarily for
645
internal use; [`retrieve`](@ref) may be a better choice for most users.
646
By default metadata such as threadid and taskid is included. Set `include_meta` to `false` to strip metadata.
647
"""
648
function fetch(;include_meta = true, limitwarn = true)
138✔
649
    maxlen = maxlen_data()
69✔
650
    if maxlen == 0
69✔
651
        error("The profiling data buffer is not initialized. A profile has not been requested this session.")
1✔
652
    end
653
    len = len_data()
68✔
654
    if limitwarn && is_buffer_full()
68✔
655
        @warn """The profile data buffer is full; profiling probably terminated
1✔
656
                 before your program finished. To profile for longer runs, call
657
                 `Profile.init()` with a larger buffer and/or larger delay."""
658
    end
659
    data = Vector{UInt}(undef, len)
129✔
660
    GC.@preserve data unsafe_copyto!(pointer(data), get_data_pointer(), len)
68✔
661
    if include_meta || isempty(data)
69✔
662
        return data
67✔
663
    end
664
    return strip_meta(data)
1✔
665
end
666

667
function strip_meta(data)
61✔
668
    nblocks = count(Base.Fix1(is_block_end, data), eachindex(data))
122✔
669
    data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
122✔
670
    j = length(data_stripped)
61✔
671
    i = length(data)
61✔
672
    while i > 0 && j > 0
1,605,801✔
673
        data_stripped[j] = data[i]
1,605,740✔
674
        if is_block_end(data, i)
3,211,236✔
675
            i -= (nmeta + 1) # metadata fields and the extra NULL IP
51,755✔
676
        end
677
        i -= 1
1,605,740✔
678
        j -= 1
1,605,740✔
679
    end
1,605,740✔
680
    @assert i == j == 0 "metadata stripping failed"
61✔
681
    return data_stripped
61✔
682
end
683

684
"""
685
    Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) -> data_with_meta
686

687
The converse of `Profile.fetch(;include_meta = false)`; this will add fake metadata, and can be used
688
for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not depend on the internal
689
details of the metadata format.
690
"""
691
function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
10✔
692
    threadid == 0 && error("Fake threadid cannot be 0")
5✔
693
    taskid == 0 && error("Fake taskid cannot be 0")
5✔
694
    !isempty(data) && has_meta(data) && error("input already has metadata")
5✔
695
    cpu_clock_cycle = UInt64(99)
4✔
696
    data_with_meta = similar(data, 0)
4✔
697
    for i in eachindex(data)
4✔
698
        val = data[i]
26,311✔
699
        if iszero(val)
26,311✔
700
            # META_OFFSET_THREADID, META_OFFSET_TASKID, META_OFFSET_CPUCYCLECLOCK, META_OFFSET_SLEEPSTATE
701
            push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
857✔
702
        else
703
            push!(data_with_meta, val)
25,454✔
704
        end
705
    end
52,618✔
706
    return data_with_meta
4✔
707
end
708

709
## Print as a flat list
710
# Counts the number of times each line appears, at any nesting level and at the topmost level
711
# Merging multiple equivalent entries and recursive calls
712
function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, C::Bool,
29✔
713
                    threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}) where {T}
714
    !isempty(data) && !has_meta(data) && error("Profile data is missing required metadata")
29✔
715
    lilist = StackFrame[]
29✔
716
    n = Int[]
29✔
717
    m = Int[]
29✔
718
    lilist_idx = Dict{T, Int}()
29✔
719
    recursive = Set{T}()
29✔
720
    leaf = 0
19✔
721
    totalshots = 0
19✔
722
    startframe = length(data)
29✔
723
    skip = false
19✔
724
    nsleeping = 0
19✔
725
    for i in startframe:-1:1
58✔
726
        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
824,452✔
727
        ip = data[i]
709,832✔
728
        if is_block_end(data, i)
1,419,555✔
729
            # read metadata
730
            thread_sleeping = data[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
22,924✔
731
            # cpu_cycle_clock = data[i - META_OFFSET_CPUCYCLECLOCK]
732
            taskid = data[i - META_OFFSET_TASKID]
22,924✔
733
            threadid = data[i - META_OFFSET_THREADID]
22,924✔
734
            if !in(threadid, threads) || !in(taskid, tasks)
45,848✔
735
                skip = true
×
736
                continue
×
737
            end
738
            if thread_sleeping == 1
22,924✔
739
                nsleeping += 1
×
740
            end
741
            skip = false
22,924✔
742
            totalshots += 1
22,924✔
743
            empty!(recursive)
22,924✔
744
            if leaf != 0
22,924✔
745
                m[leaf] += 1
22,324✔
746
            end
747
            leaf = 0
14,434✔
748
            startframe = i
14,434✔
749
        elseif !skip
686,908✔
750
            frames = lidict[ip]
1,373,816✔
751
            nframes = (frames isa Vector ? length(frames) : 1)
686,908✔
752
            # the last lookup is the non-inlined root frame, the first is the inlined leaf frame
753
            for j = nframes:-1:1
932,770✔
754
                frame = (frames isa Vector ? frames[j] : frames)
1,026,838✔
755
                !C && frame.from_c && continue
1,026,838✔
756
                key = (T === UInt64 ? ip : frame)
278,783✔
757
                idx = get!(lilist_idx, key, length(lilist) + 1)
429,253✔
758
                if idx > length(lilist)
429,253✔
759
                    push!(recursive, key)
796✔
760
                    push!(lilist, frame)
796✔
761
                    push!(n, 1)
796✔
762
                    push!(m, 0)
796✔
763
                elseif !(key in recursive)
428,457✔
764
                    push!(recursive, key)
420,527✔
765
                    n[idx] += 1
420,527✔
766
                end
767
                leaf = idx
278,783✔
768
            end
1,026,838✔
769
        end
770
    end
1,648,876✔
771
    @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
29✔
772
    return (lilist, n, m, totalshots, nsleeping)
29✔
773
end
774

775
const FileNameMap = Dict{Symbol,Tuple{String,String,String}}
776

777
function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
29✔
778
                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
779
    lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
58✔
780
    if false # optional: drop the "non-interpretable" ones
29✔
781
        keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
×
782
        lilist = lilist[keep]
×
783
        n = n[keep]
×
784
        m = m[keep]
×
785
    end
786
    util_perc = (1 - (nsleeping / totalshots)) * 100
29✔
787
    filenamemap = FileNameMap()
29✔
788
    if isempty(lilist)
29✔
789
        if is_subsection
2✔
790
            Base.print(io, "Total snapshots: ")
×
791
            printstyled(io, "$(totalshots)", color=Base.warn_color())
×
792
            Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n")
×
793
        else
794
            warning_empty()
2✔
795
        end
796
        return true
2✔
797
    end
798
    is_subsection || print_flat(io, lilist, n, m, cols, filenamemap, fmt)
34✔
799
    Base.print(io, "Total snapshots: ", totalshots, ". Utilization: ", round(Int, util_perc), "%")
27✔
800
    if is_subsection
27✔
801
        println(io)
20✔
802
        print_flat(io, lilist, n, m, cols, filenamemap, fmt)
20✔
803
    else
804
        Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
7✔
805
    end
806
    return false
27✔
807
end
808

809
# make a terminal-clickable link to the file and linenum.
810
# Similar to `define_default_editors` in `Base.Filesystem` but for creating URIs not commands
811
function editor_link(path::String, linenum::Int)
3,214✔
812
    # Note: the editor path can include spaces (if escaped) and flags.
813
    editor = nothing
×
814
    for var in ["JULIA_EDITOR", "VISUAL", "EDITOR"]
3,214✔
815
        str = get(ENV, var, nothing)
9,642✔
816
        str isa String || continue
9,642✔
817
        editor = str
×
818
        break
×
819
    end
9,642✔
820
    path_encoded = Base.Filesystem.encode_uri_component(path)
3,214✔
821
    if editor !== nothing
3,214✔
822
        if editor == "code"
×
823
            return "vscode://file/$path_encoded:$linenum"
×
824
        elseif editor == "subl" || editor == "sublime_text"
×
825
            return "subl://open?url=file://$path_encoded&line=$linenum"
×
826
        elseif editor == "idea" || occursin("idea", editor)
×
827
            return "idea://open?file=$path_encoded&line=$linenum"
×
828
        elseif editor == "pycharm"
×
829
            return "pycharm://open?file=$path_encoded&line=$linenum"
×
830
        elseif editor == "atom"
×
831
            return "atom://core/open/file?filename=$path_encoded&line=$linenum"
×
832
        elseif editor == "emacsclient" || editor == "emacs"
×
833
            return "emacs://open?file=$path_encoded&line=$linenum"
×
834
        elseif editor == "vim" || editor == "nvim"
×
835
            # Note: Vim/Nvim may not support standard URI schemes without specific plugins
836
            return "vim://open?file=$path_encoded&line=$linenum"
×
837
        end
838
    end
839
    # fallback to generic URI, but line numbers are not supported by generic URI
840
    return Base.Filesystem.uripath(path)
3,214✔
841
end
842

843
function print_flat(io::IO, lilist::Vector{StackFrame},
29✔
844
        n::Vector{Int}, m::Vector{Int},
845
        cols::Int, filenamemap::FileNameMap,
846
        fmt::ProfileFormat)
847
    if fmt.sortedby === :count
29✔
848
        p = sortperm(n)
2✔
849
    elseif fmt.sortedby === :overhead
27✔
850
        p = sortperm(m)
×
851
    else
852
        p = liperm(lilist)
27✔
853
    end
854
    lilist = lilist[p]
29✔
855
    n = n[p]
29✔
856
    m = m[p]
29✔
857
    pkgnames_filenames = Tuple{String,String,String}[short_path(li.file, filenamemap) for li in lilist]
29✔
858
    funcnames = String[string(li.func) for li in lilist]
29✔
859
    wcounts = max(6, ndigits(maximum(n)))
29✔
860
    wself = max(9, ndigits(maximum(m)))
29✔
861
    maxline = 1
29✔
862
    maxfile = 6
29✔
863
    maxfunc = 10
29✔
864
    for i in eachindex(lilist)
29✔
865
        li = lilist[i]
1,144✔
866
        maxline = max(maxline, li.line)
1,144✔
867
        maxfunc = max(maxfunc, textwidth(funcnames[i]))
1,144✔
868
        maxfile = max(maxfile, sum(textwidth, pkgnames_filenames[i][2:3]) + 1)
1,144✔
869
    end
2,259✔
870
    wline = max(5, ndigits(maxline))
29✔
871
    ntext = max(20, cols - wcounts - wself - wline - 3)
29✔
872
    maxfunc += 25 # for type signatures
29✔
873
    if maxfile + maxfunc <= ntext
29✔
874
        wfile = maxfile
×
875
        wfunc = ntext - maxfunc # take the full width (for type sig)
×
876
    else
877
        wfile = 2*ntext÷5
29✔
878
        wfunc = 3*ntext÷5
29✔
879
    end
880
    println(io, lpad("Count", wcounts, " "), " ", lpad("Overhead", wself, " "), " ",
29✔
881
            rpad("File", wfile, " "), " ", lpad("Line", wline, " "), " Function")
882
    println(io, lpad("=====", wcounts, " "), " ", lpad("========", wself, " "), " ",
29✔
883
            rpad("====", wfile, " "), " ", lpad("====", wline, " "), " ========")
884
    for i in eachindex(n)
29✔
885
        n[i] < fmt.mincount && continue
1,144✔
886
        li = lilist[i]
1,144✔
887
        Base.print(io, lpad(string(n[i]), wcounts, " "), " ")
1,144✔
888
        Base.print(io, lpad(string(m[i]), wself, " "), " ")
1,144✔
889
        if li == UNKNOWN
1,148✔
890
            if !fmt.combine && li.pointer != 0
2✔
891
                Base.print(io, "@0x", string(li.pointer, base=16))
×
892
            else
893
                Base.print(io, "[any unknown stackframes]")
2✔
894
            end
895
        else
896
            path, pkgname, file = pkgnames_filenames[i]
1,142✔
897
            isempty(file) && (file = "[unknown file]")
1,142✔
898
            pkgcolor = get!(() -> popfirst!(Base.STACKTRACE_MODULECOLORS), PACKAGE_FIXEDCOLORS, pkgname)
1,142✔
899
            Base.printstyled(io, pkgname, color=pkgcolor)
2,284✔
900
            file_trunc = ltruncate(file, max(1, wfile))
1,142✔
901
            wpad = wfile - textwidth(pkgname)
1,142✔
902
            if !isempty(pkgname) && !startswith(file_trunc, "/")
1,142✔
903
                Base.print(io, "/")
789✔
904
                wpad -= 1
789✔
905
            end
906
            if isempty(path)
1,142✔
907
                Base.print(io, rpad(file_trunc, wpad, " "))
1✔
908
            else
909
                link = editor_link(path, li.line)
1,141✔
910
                Base.print(io, rpad(styled"{link=$link:$file_trunc}", wpad, " "))
1,141✔
911
            end
912
            Base.print(io, lpad(li.line > 0 ? string(li.line) : "?", wline, " "), " ")
1,142✔
913
            fname = funcnames[i]
1,142✔
914
            if !li.from_c && li.linfo !== nothing
1,142✔
915
                fname = sprint(show_spec_linfo, li)
388✔
916
            end
917
            isempty(fname) && (fname = "[unknown function]")
1,142✔
918
            Base.print(io, rtruncate(fname, wfunc))
1,142✔
919
        end
920
        println(io)
1,144✔
921
    end
2,259✔
922
    nothing
29✔
923
end
924

925
## A tree representation
926

927
# Representation of a prefix trie of backtrace counts
928
mutable struct StackFrameTree{T} # where T <: Union{UInt64, StackFrame}
929
    # content fields:
930
    frame::StackFrame
931
    count::Int          # number of frames this appeared in
932
    overhead::Int       # number frames where this was the code being executed
933
    flat_count::Int     # number of times this frame was in the flattened representation (unlike count, this'll sum to 100% of parent)
934
    max_recur::Int      # maximum number of times this frame was the *top* of the recursion in the stack
935
    count_recur::Int    # sum of the number of times this frame was the *top* of the recursion in a stack (divide by count to get an average)
936
    down::Dict{T, StackFrameTree{T}}
937
    # construction workers:
938
    recur::Int
939
    builder_key::Vector{UInt64}
940
    builder_value::Vector{StackFrameTree{T}}
941
    up::StackFrameTree{T}
942
    StackFrameTree{T}() where {T} = new(UNKNOWN, 0, 0, 0, 0, 0, Dict{T, StackFrameTree{T}}(), 0, UInt64[], StackFrameTree{T}[])
2,453✔
943
end
944

945

946
const indent_s = "    ╎"^10
947
const indent_z = collect(eachindex(indent_s))
948
function indent(depth::Int)
2,060✔
949
    depth < 1 && return ""
2,060✔
950
    depth <= length(indent_z) && return indent_s[1:indent_z[depth]]
2,024✔
951
    div, rem = divrem(depth, length(indent_z))
×
952
    indent = indent_s^div
×
953
    rem != 0 && (indent *= SubString(indent_s, 1, indent_z[rem]))
×
954
    return indent
×
955
end
956

957
# mimics Stacktraces
958
const PACKAGE_FIXEDCOLORS = Dict{String, Any}("@Base" => :gray, "@Core" => :gray)
959

960
function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::FileNameMap, showpointer::Bool)
1,730✔
961
    nindent = min(cols>>1, level)
1,730✔
962
    ndigoverhead = ndigits(maxes.overhead)
1,730✔
963
    ndigcounts = ndigits(maxes.count)
1,730✔
964
    ndigline = ndigits(maximum(frame.frame.line for frame in frames)) + 6
3,460✔
965
    ntext = max(30, cols - ndigoverhead - nindent - ndigcounts - ndigline - 6)
1,730✔
966
    widthfile = 2*ntext÷5 # min 12
1,730✔
967
    strs = Vector{AnnotatedString{String}}(undef, length(frames))
3,460✔
968
    showextra = false
×
969
    if level > nindent
1,730✔
970
        nextra = level - nindent
684✔
971
        nindent -= ndigits(nextra) + 2
684✔
972
        showextra = true
×
973
    end
974
    for i in eachindex(frames)
1,730✔
975
        frame = frames[i]
2,096✔
976
        li = frame.frame
2,096✔
977
        stroverhead = lpad(frame.overhead > 0 ? string(frame.overhead) : "", ndigoverhead, " ")
2,096✔
978
        base = nindent == 0 ? "" : indent(nindent - 1) * " "
4,156✔
979
        if showextra
2,096✔
980
            base = string(base, "+", nextra, " ")
779✔
981
        end
982
        strcount = rpad(string(frame.count), ndigcounts, " ")
2,096✔
983
        if li != UNKNOWN
2,106✔
984
            if li.line == li.pointer
2,077✔
985
                strs[i] = string(stroverhead, "╎", base, strcount, " ",
×
986
                    "[unknown function] (pointer: 0x",
987
                    string(li.pointer, base = 16, pad = 2*sizeof(Ptr{Cvoid})),
988
                    ")")
989
            else
990
                if !li.from_c && li.linfo !== nothing
2,077✔
991
                    fname = sprint(show_spec_linfo, li)
599✔
992
                else
993
                    fname = string(li.func)
1,478✔
994
                end
995
                path, pkgname, filename = short_path(li.file, filenamemap)
2,077✔
996
                if showpointer
2,077✔
997
                    fname = string(
×
998
                        "0x",
999
                        string(li.pointer, base = 16, pad = 2*sizeof(Ptr{Cvoid})),
1000
                        " ",
1001
                        fname)
1002
                end
1003
                pkgcolor = get!(() -> popfirst!(Base.STACKTRACE_MODULECOLORS), PACKAGE_FIXEDCOLORS, pkgname)
2,083✔
1004
                remaining_path = ltruncate(filename, max(1, widthfile - textwidth(pkgname) - 1))
2,077✔
1005
                linenum = li.line == -1 ? "?" : string(li.line)
4,091✔
1006
                slash = (!isempty(pkgname) && !startswith(remaining_path, "/")) ? "/" : ""
2,077✔
1007
                styled_path = styled"{$pkgcolor:$pkgname}$slash$remaining_path:$linenum"
2,266✔
1008
                rich_file = if isempty(path)
2,077✔
1009
                    styled_path
×
1010
                else
1011
                    link = editor_link(path, li.line)
2,073✔
1012
                    styled"{link=$link:$styled_path}"
4,150✔
1013
                end
1014
                strs[i] = Base.annotatedstring(stroverhead, "╎", base, strcount, " ", rich_file, "  ", fname)
2,077✔
1015
                if frame.overhead > 0
2,077✔
1016
                    strs[i] = styled"{bold:$(strs[i])}"
433✔
1017
                end
1018
            end
1019
        else
1020
            strs[i] = string(stroverhead, "╎", base, strcount, " [unknown stackframe]")
19✔
1021
        end
1022
        strs[i] = rtruncate(strs[i], cols)
2,096✔
1023
    end
2,462✔
1024
    return strs
1,730✔
1025
end
1026

1027
# turn a list of backtraces into a tree (implicitly separated by NULL markers)
1028
function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, C::Bool, recur::Symbol,
33✔
1029
                threads::Union{Int,AbstractVector{Int},Nothing}=nothing, tasks::Union{UInt,AbstractVector{UInt},Nothing}=nothing) where {T}
1030
    !isempty(all) && !has_meta(all) && error("Profile data is missing required metadata")
33✔
1031
    parent = root
22✔
1032
    tops = Vector{StackFrameTree{T}}()
32✔
1033
    build = Vector{StackFrameTree{T}}()
32✔
1034
    startframe = length(all)
32✔
1035
    skip = false
22✔
1036
    nsleeping = 0
22✔
1037
    for i in startframe:-1:1
64✔
1038
        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP
885,559✔
1039
        ip = all[i]
762,429✔
1040
        if is_block_end(all, i)
1,524,737✔
1041
            # read metadata
1042
            thread_sleeping = all[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
24,626✔
1043
            # cpu_cycle_clock = all[i - META_OFFSET_CPUCYCLECLOCK]
1044
            taskid = all[i - META_OFFSET_TASKID]
24,626✔
1045
            threadid = all[i - META_OFFSET_THREADID]
24,626✔
1046
            if (threads !== nothing && !in(threadid, threads)) ||
49,248✔
1047
               (tasks !== nothing && !in(taskid, tasks))
1048
                skip = true
×
1049
                continue
×
1050
            end
1051
            if thread_sleeping == 1
24,626✔
1052
                nsleeping += 1
×
1053
            end
1054
            skip = false
24,626✔
1055
            # sentinel value indicates the start of a new backtrace
1056
            empty!(build)
49,188✔
1057
            root.recur = 0
24,626✔
1058
            if recur !== :off
24,626✔
1059
                # We mark all visited nodes to so we'll only count those branches
1060
                # once for each backtrace. Reset that now for the next backtrace.
1061
                push!(tops, parent)
849✔
1062
                for top in tops
849✔
1063
                    while top.recur != 0
15,878✔
1064
                        top.max_recur < top.recur && (top.max_recur = top.recur)
15,029✔
1065
                        top.recur = 0
15,029✔
1066
                        top = top.up
15,029✔
1067
                    end
15,029✔
1068
                end
849✔
1069
                empty!(tops)
849✔
1070
            end
1071
            let this = parent
16,136✔
1072
                while this !== root
483,432✔
1073
                    this.flat_count += 1
458,806✔
1074
                    this = this.up
458,806✔
1075
                end
458,806✔
1076
            end
1077
            parent.overhead += 1
24,626✔
1078
            parent = root
16,136✔
1079
            root.count += 1
24,626✔
1080
            startframe = i
16,136✔
1081
        elseif !skip
737,803✔
1082
            if recur === :flat || recur === :flatc
1,450,165✔
1083
                pushfirst!(build, parent)
25,723✔
1084
                # Rewind the `parent` tree back, if this exact ip was already present *higher* in the current tree
1085
                found = false
25,441✔
1086
                for j in 1:(startframe - i)
25,441✔
1087
                    if ip == all[i + j]
513,023✔
1088
                        if recur === :flat # if not flattening C frames, check that now
1,660✔
1089
                            frames = lidict[ip]
3,320✔
1090
                            frame = (frames isa Vector ? frames[1] : frames)
1,660✔
1091
                            frame.from_c && break # not flattening this frame
1,660✔
1092
                        end
1093
                        push!(tops, parent)
×
1094
                        parent = build[j]
×
1095
                        parent.recur += 1
×
1096
                        parent.count_recur += 1
×
1097
                        found = true
×
1098
                        break
×
1099
                    end
1100
                end
511,363✔
1101
                found && continue
25,441✔
1102
            end
1103
            builder_key = parent.builder_key
737,803✔
1104
            builder_value = parent.builder_value
737,803✔
1105
            fastkey = searchsortedfirst(builder_key, ip)
737,803✔
1106
            if fastkey < length(builder_key) && builder_key[fastkey] === ip
737,803✔
1107
                # jump forward to the end of the inlining chain
1108
                # avoiding an extra (slow) lookup of `ip` in `lidict`
1109
                # and an extra chain of them in `down`
1110
                # note that we may even have this === parent (if we're ignoring this frame ip)
1111
                this = builder_value[fastkey]
732,457✔
1112
                let this = this
479,857✔
1113
                    while this !== parent && (recur === :off || this.recur == 0)
1,203,974✔
1114
                        this.count += 1
456,557✔
1115
                        this.recur = 1
456,557✔
1116
                        this = this.up
456,557✔
1117
                    end
456,557✔
1118
                end
1119
                parent = this
479,857✔
1120
                continue
732,457✔
1121
            end
1122

1123
            frames = lidict[ip]
10,692✔
1124
            nframes = (frames isa Vector ? length(frames) : 1)
5,346✔
1125
            this = parent
3,536✔
1126
            # add all the inlining frames
1127
            for i = nframes:-1:1
6,637✔
1128
                frame = (frames isa Vector ? frames[i] : frames)
7,424✔
1129
                !C && frame.from_c && continue
7,424✔
1130
                key = (T === UInt64 ? ip : frame)
1,933✔
1131
                this = get!(StackFrameTree{T}, parent.down, key)
2,803✔
1132
                if recur === :off || this.recur == 0
2,890✔
1133
                    this.frame = frame
2,803✔
1134
                    this.up = parent
2,803✔
1135
                    this.count += 1
2,803✔
1136
                    this.recur = 1
2,803✔
1137
                end
1138
                parent = this
1,933✔
1139
            end
9,502✔
1140
            # record where the end of this chain is for this ip
1141
            insert!(builder_key, fastkey, ip)
5,346✔
1142
            insert!(builder_value, fastkey, this)
5,346✔
1143
        end
1144
    end
1,771,087✔
1145
    function cleanup!(node::StackFrameTree)
64✔
1146
        stack = [node]
32✔
1147
        while !isempty(stack)
962✔
1148
            node = pop!(stack)
930✔
1149
            node.recur = 0
930✔
1150
            empty!(node.builder_key)
5,846✔
1151
            empty!(node.builder_value)
5,846✔
1152
            append!(stack, values(node.down))
930✔
1153
        end
930✔
1154
        nothing
×
1155
    end
1156
    cleanup!(root)
32✔
1157
    return root, nsleeping
32✔
1158
end
1159

1160
function maxstats(root::StackFrameTree)
36✔
1161
    maxcount = Ref(0)
×
1162
    maxflatcount = Ref(0)
×
1163
    maxoverhead = Ref(0)
×
1164
    maxmaxrecur = Ref(0)
×
1165
    stack = [root]
36✔
1166
    while !isempty(stack)
2,480✔
1167
        node = pop!(stack)
2,444✔
1168
        maxcount[] = max(maxcount[], node.count)
2,444✔
1169
        maxoverhead[] = max(maxoverhead[], node.overhead)
2,444✔
1170
        maxflatcount[] = max(maxflatcount[], node.flat_count)
2,444✔
1171
        maxmaxrecur[] = max(maxmaxrecur[], node.max_recur)
2,444✔
1172
        append!(stack, values(node.down))
2,444✔
1173
    end
2,444✔
1174
    return (count=maxcount[], count_flat=maxflatcount[], overhead=maxoverhead[], max_recur=maxmaxrecur[])
36✔
1175
end
1176

1177
# Print the stack frame tree starting at a particular root. Uses a worklist to
1178
# avoid stack overflows.
1179
function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat, is_subsection::Bool) where T
36✔
1180
    maxes = maxstats(bt)
36✔
1181
    filenamemap = FileNameMap()
36✔
1182
    worklist = [(bt, 0, 0, AnnotatedString(""))]
36✔
1183
    if !is_subsection
36✔
1184
        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line  Function\n")
16✔
1185
        Base.print(io, "=========================================================\n")
16✔
1186
    end
1187
    while !isempty(worklist)
2,168✔
1188
        (bt, level, noisefloor, str) = popfirst!(worklist)
2,132✔
1189
        isempty(str) || println(io, str)
4,228✔
1190
        level > fmt.maxdepth && continue
2,132✔
1191
        isempty(bt.down) && continue
2,130✔
1192
        # Order the line information
1193
        nexts = collect(values(bt.down))
3,460✔
1194
        # Generate the string for each line
1195
        strs = tree_format(nexts, level, cols, maxes, filenamemap, T === UInt64)
1,730✔
1196
        # Recurse to the next level
1197
        if fmt.sortedby === :count
1,730✔
1198
            counts = collect(frame.count for frame in nexts)
×
1199
            p = sortperm(counts)
×
1200
        elseif fmt.sortedby === :overhead
1,730✔
1201
            m = collect(frame.overhead for frame in nexts)
×
1202
            p = sortperm(m)
×
1203
        elseif fmt.sortedby === :flat_count
1,730✔
1204
            m = collect(frame.flat_count for frame in nexts)
×
1205
            p = sortperm(m)
×
1206
        else
1207
            lilist = collect(frame.frame for frame in nexts)
1,730✔
1208
            p = liperm(lilist)
1,730✔
1209
        end
1210
        for i in reverse(p)
1,730✔
1211
            down = nexts[i]
2,096✔
1212
            count = down.count
2,096✔
1213
            count < fmt.mincount && continue
2,096✔
1214
            count < noisefloor && continue
2,096✔
1215
            str = strs[i]::AnnotatedString
2,096✔
1216
            noisefloor_down = fmt.noisefloor > 0 ? floor(Int, fmt.noisefloor * sqrt(count)) : 0
2,096✔
1217
            pushfirst!(worklist, (down, level + 1, noisefloor_down, str))
2,096✔
1218
        end
2,096✔
1219
    end
2,132✔
1220
    return
36✔
1221
end
1222

1223
function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat,
31✔
1224
                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
1225
    if fmt.combine
31✔
1226
        root, nsleeping = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
31✔
1227
    else
1228
        root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
×
1229
    end
1230
    util_perc = (1 - (nsleeping / root.count)) * 100
62✔
1231
    is_subsection || print_tree(io, root, cols, fmt, is_subsection)
42✔
1232
    if isempty(root.down)
62✔
1233
        if is_subsection
2✔
1234
            Base.print(io, "Total snapshots: ")
×
1235
            printstyled(io, "$(root.count)", color=Base.warn_color())
×
1236
            Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n")
×
1237
        else
1238
            warning_empty()
2✔
1239
        end
1240
        return true
2✔
1241
    end
1242
    Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
29✔
1243
    if is_subsection
29✔
1244
        Base.println(io)
20✔
1245
        print_tree(io, root, cols, fmt, is_subsection)
40✔
1246
    else
1247
        Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
9✔
1248
    end
1249
    return false
29✔
1250
end
1251

1252
function callersf(matchfunc::Function, bt::Vector, lidict::LineInfoFlatDict)
2✔
1253
    counts = Dict{StackFrame, Int}()
2✔
1254
    lastmatched = false
2✔
1255
    for id in bt
2✔
1256
        if id == 0
×
1257
            lastmatched = false
×
1258
            continue
×
1259
        end
1260
        li = lidict[id]
×
1261
        if lastmatched
×
1262
            if haskey(counts, li)
×
1263
                counts[li] += 1
×
1264
            else
1265
                counts[li] = 1
×
1266
            end
1267
        end
1268
        lastmatched = matchfunc(li)
×
1269
    end
×
1270
    k = collect(keys(counts))
2✔
1271
    v = collect(values(counts))
2✔
1272
    p = sortperm(v, rev=true)
2✔
1273
    return [(v[i], k[i]) for i in p]
2✔
1274
end
1275

1276
## Utilities
1277

1278
# Order alphabetically (file, function) and then by line number
1279
function liperm(lilist::Vector{StackFrame})
1280
    function lt(a::StackFrame, b::StackFrame)
15,682✔
1281
        a == UNKNOWN && return false
15,721✔
1282
        b == UNKNOWN && return true
15,692✔
1283
        fcmp = cmp(a.file, b.file)
15,630✔
1284
        fcmp < 0 && return true
15,630✔
1285
        fcmp > 0 && return false
9,700✔
1286
        fcmp = cmp(a.func, b.func)
3,770✔
1287
        fcmp < 0 && return true
3,770✔
1288
        fcmp > 0 && return false
2,311✔
1289
        fcmp = cmp(a.line, b.line)
852✔
1290
        fcmp < 0 && return true
852✔
1291
        return false
454✔
1292
    end
1293
    return sortperm(lilist, lt = lt)
1,757✔
1294
end
1295

1296
function warning_empty(;summary = false)
88✔
1297
    if summary
44✔
1298
        @warn """
40✔
1299
        There were no samples collected in one or more groups.
1300
        This may be due to idle threads, or you may need to run your
1301
        program longer (perhaps by running it multiple times),
1302
        or adjust the delay between samples with `Profile.init()`."""
1303
    else
1304
        @warn """
4✔
1305
        There were no samples collected.
1306
        Run your program longer (perhaps by running it multiple times),
1307
        or adjust the delay between samples with `Profile.init()`."""
1308
    end
1309
end
1310

1311

1312
"""
1313
    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false;
1314
                               redact_data::Bool=true, streaming::Bool=false)
1315
    Profile.take_heap_snapshot(all_one::Bool=false; redact_data:Bool=true,
1316
                               dir::String=nothing, streaming::Bool=false)
1317

1318
Write a snapshot of the heap, in the JSON format expected by the Chrome
1319
Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
1320
(`\$pid_\$timestamp.heapsnapshot`) in the current directory by default (or tempdir if
1321
the current directory is unwritable), or in `dir` if given, or the given
1322
full file path, or IO stream.
1323

1324
If `all_one` is true, then report the size of every object as one so they can be easily
1325
counted. Otherwise, report the actual size.
1326

1327
If `redact_data` is true (default), then do not emit the contents of any object.
1328

1329
If `streaming` is true, we will stream the snapshot data out into four files, using filepath
1330
as the prefix, to avoid having to hold the entire snapshot in memory. This option should be
1331
used for any setting where your memory is constrained. These files can then be reassembled
1332
by calling Profile.HeapSnapshot.assemble_snapshot(), which can
1333
be done offline.
1334

1335
NOTE: We strongly recommend setting streaming=true for performance reasons. Reconstructing
1336
the snapshot from the parts requires holding the entire snapshot in memory, so if the
1337
snapshot is large, you can run out of memory while processing it. Streaming allows you to
1338
reconstruct the snapshot offline, after your workload is done running.
1339
If you do attempt to collect a snapshot with streaming=false (the default, for
1340
backwards-compatibility) and your process is killed, note that this will always save the
1341
parts in the same directory as your provided filepath, so you can still reconstruct the
1342
snapshot after the fact, via `assemble_snapshot()`.
1343
"""
1344
function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; redact_data::Bool=true, streaming::Bool=false)
4✔
1345
    if streaming
2✔
1346
        _stream_heap_snapshot(filepath, all_one, redact_data)
×
1347
    else
1348
        # Support the legacy, non-streaming mode, by first streaming the parts, then
1349
        # reassembling it after we're done.
1350
        prefix = filepath
2✔
1351
        _stream_heap_snapshot(prefix, all_one, redact_data)
2✔
1352
        Profile.HeapSnapshot.assemble_snapshot(prefix, filepath)
2✔
1353
        Profile.HeapSnapshot.cleanup_streamed_files(prefix)
2✔
1354
    end
1355
    return filepath
2✔
1356
end
1357
function take_heap_snapshot(io::IO, all_one::Bool=false; redact_data::Bool=true)
×
1358
    # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir,
1359
    # then reassembling it after we're done.
1360
    dir = tempdir()
×
1361
    prefix = joinpath(dir, "snapshot")
×
1362
    _stream_heap_snapshot(prefix, all_one, redact_data)
×
1363
    Profile.HeapSnapshot.assemble_snapshot(prefix, io)
×
1364
end
1365
function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool, redact_data::Bool)
1366
    # Nodes and edges are binary files
1367
    open("$prefix.nodes", "w") do nodes
2✔
1368
        open("$prefix.edges", "w") do edges
2✔
1369
            open("$prefix.strings", "w") do strings
2✔
1370
                # The following file is json data
1371
                open("$prefix.metadata.json", "w") do json
2✔
1372
                    Base.@_lock_ios(nodes,
2✔
1373
                    Base.@_lock_ios(edges,
1374
                    Base.@_lock_ios(strings,
1375
                    Base.@_lock_ios(json,
1376
                        ccall(:jl_gc_take_heap_snapshot,
1377
                            Cvoid,
1378
                            (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar, Cchar),
1379
                            nodes.handle, edges.handle, strings.handle, json.handle,
1380
                            Cchar(all_one), Cchar(redact_data))
1381
                    )
1382
                    )
1383
                    )
1384
                    )
1385
                end
1386
            end
1387
        end
1388
    end
1389
end
1390
function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing, kwargs...) where {S <: AbstractString}
6✔
1391
    fname = "$(getpid())_$(time_ns()).heapsnapshot"
2✔
1392
    if isnothing(dir)
2✔
1393
        wd = pwd()
2✔
1394
        fpath = joinpath(wd, fname)
2✔
1395
        try
2✔
1396
            touch(fpath)
2✔
1397
            rm(fpath; force=true)
2✔
1398
        catch
1399
            @warn "Cannot write to current directory `$(pwd())` so saving heap snapshot to `$(tempdir())`" maxlog=1 _id=Symbol(wd)
×
1400
            fpath = joinpath(tempdir(), fname)
×
1401
        end
1402
    else
1403
        fpath = joinpath(expanduser(dir), fname)
×
1404
    end
1405
    return take_heap_snapshot(fpath, all_one; kwargs...)
2✔
1406
end
1407

1408
"""
1409
    Profile.take_page_profile(io::IOStream)
1410
    Profile.take_page_profile(filepath::String)
1411

1412
Write a JSON snapshot of the pages from Julia's pool allocator, printing for every pool allocated object, whether it's garbage, or its type.
1413
"""
1414
function take_page_profile(io::IOStream)
1✔
1415
    Base.@_lock_ios(io, ccall(:jl_gc_take_page_profile, Cvoid, (Ptr{Cvoid},), io.handle))
1✔
1416
end
1417
function take_page_profile(filepath::String)
1✔
1418
    open(filepath, "w") do io
1✔
1419
        take_page_profile(io)
1✔
1420
    end
1421
    return filepath
1✔
1422
end
1423

1424
include("Allocs.jl")
1425
include("heapsnapshot_reassemble.jl")
1426
include("precompile.jl")
1427

1428
end # module
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc