• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / 1259

03 Sep 2025 10:52PM UTC coverage: 77.165% (-0.02%) from 77.182%
1259

push

buildkite

web-flow
Update nghttp2 to 1.67 (#59472)

61413 of 79587 relevant lines covered (77.16%)

23416318.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.48
/Compiler/src/optimize.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
#############
4
# constants #
5
#############
6

7
# The slot has uses that are not statically dominated by any assignment
8
# This is implied by `SLOT_USEDUNDEF`.
9
# If this is not set, all the uses are (statically) dominated by the defs.
10
# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
11
const SLOT_STATICUNDEF  = 1 # slot might be used before it is defined (structurally)
12
const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
13
const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
14
# const SLOT_CALLED      = 64
15

16
# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
17

18
const IR_FLAG_NULL        = zero(UInt32)
19
# This statement is marked as @inbounds by user.
20
# If replaced by inlining, any contained boundschecks may be removed.
21
const IR_FLAG_INBOUNDS    = one(UInt32) << 0
22
# This statement is marked as @inline by user
23
const IR_FLAG_INLINE      = one(UInt32) << 1
24
# This statement is marked as @noinline by user
25
const IR_FLAG_NOINLINE    = one(UInt32) << 2
26
# This statement is proven :consistent
27
const IR_FLAG_CONSISTENT  = one(UInt32) << 3
28
# This statement is proven :effect_free
29
const IR_FLAG_EFFECT_FREE = one(UInt32) << 4
30
# This statement is proven :nothrow
31
const IR_FLAG_NOTHROW     = one(UInt32) << 5
32
# This statement is proven :terminates_globally
33
const IR_FLAG_TERMINATES  = one(UInt32) << 6
34
#const IR_FLAG_TERMINATES_LOCALLY = one(UInt32) << 7
35
#const IR_FLAG_NOTASKSTATE = one(UInt32) << 8
36
#const IR_FLAG_INACCESSIBLEMEM = one(UInt32) << 9
37
const IR_FLAG_NOUB        = one(UInt32) << 10
38
#const IR_FLAG_NOUBINIB   = one(UInt32) << 11
39
#const IR_FLAG_CONSISTENTOVERLAY = one(UInt32) << 12
40
# This statement is :nortcall
41
const IR_FLAG_NORTCALL = one(UInt32) << 13
42
# An optimization pass has updated this statement in a way that may
43
# have exposed information that inference did not see. Re-running
44
# inference on this statement may be profitable.
45
const IR_FLAG_REFINED     = one(UInt32) << 16
46
# This statement has no users and may be deleted if flags get refined to IR_FLAGS_REMOVABLE
47
const IR_FLAG_UNUSED      = one(UInt32) << 17
48
# TODO: Both of these next two should eventually go away once
49
# This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
50
const IR_FLAG_EFIIMO      = one(UInt32) << 18
51
# This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
52
const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 19
53

54
const NUM_IR_FLAGS = 3 # sync with julia.h
55

56
const IR_FLAGS_EFFECTS =
57
    IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW |
58
    IR_FLAG_TERMINATES | IR_FLAG_NOUB | IR_FLAG_NORTCALL
59

60
const IR_FLAGS_REMOVABLE = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_TERMINATES
61

62
const IR_FLAGS_NEEDS_EA = IR_FLAG_EFIIMO | IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
63

64
has_flag(curr::UInt32, flag::UInt32) = (curr & flag) == flag
138,454,954✔
65

66
function iscallstmt(@nospecialize stmt)
67
    stmt isa Expr || return false
2,973,581✔
68
    head = stmt.head
1,798,763✔
69
    return head === :call || head === :invoke || head === :foreigncall
3,201,905✔
70
end
71

72
function flags_for_effects(effects::Effects)
73
    flags = zero(UInt32)
1,006,188✔
74
    if is_consistent(effects)
3,823,027✔
75
        flags |= IR_FLAG_CONSISTENT
915,725✔
76
    end
77
    if is_effect_free(effects)
3,823,027✔
78
        flags |= IR_FLAG_EFFECT_FREE
2,887,915✔
79
    elseif is_effect_free_if_inaccessiblememonly(effects)
935,112✔
80
        flags |= IR_FLAG_EFIIMO
14,485✔
81
    end
82
    if is_nothrow(effects)
3,823,027✔
83
        flags |= IR_FLAG_NOTHROW
3,522,779✔
84
    end
85
    if is_terminates(effects)
3,823,027✔
86
        flags |= IR_FLAG_TERMINATES
3,701,924✔
87
    end
88
    if is_inaccessiblemem_or_argmemonly(effects)
3,823,027✔
89
        flags |= IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
228,480✔
90
    end
91
    if is_noub(effects)
3,823,027✔
92
        flags |= IR_FLAG_NOUB
3,617,140✔
93
    end
94
    if is_nortcall(effects)
3,823,027✔
95
        flags |= IR_FLAG_NORTCALL
3,685,371✔
96
    end
97
    return flags
3,823,027✔
98
end
99

100
const TOP_TUPLE = GlobalRef(Core, :tuple)
101

102
# This corresponds to the type of `CodeInfo`'s `inlining_cost` field
103
const InlineCostType = UInt16
104
const MAX_INLINE_COST = typemax(InlineCostType)
105
const MIN_INLINE_COST = InlineCostType(10)
106
const MaybeCompressed = Union{CodeInfo, String}
107

108
is_inlineable(@nospecialize src::MaybeCompressed) =
612,776✔
109
    ccall(:jl_ir_inlining_cost, InlineCostType, (Any,), src) != MAX_INLINE_COST
110
set_inlineable!(src::CodeInfo, val::Bool) =
233✔
111
    src.inlining_cost = (val ? MIN_INLINE_COST : MAX_INLINE_COST)
112

113
function inline_cost_clamp(x::Int)
114
    x > MAX_INLINE_COST && return MAX_INLINE_COST
108,105✔
115
    x < MIN_INLINE_COST && return MIN_INLINE_COST
108,105✔
116
    x = ccall(:jl_encode_inlining_cost, UInt8, (InlineCostType,), x)
37,716✔
117
    x = ccall(:jl_decode_inlining_cost, InlineCostType, (UInt8,), x)
37,716✔
118
    return x
37,716✔
119
end
120

121
const SRC_FLAG_DECLARED_INLINE = 0x1
122
const SRC_FLAG_DECLARED_NOINLINE = 0x2
123

124
is_declared_inline(@nospecialize src::MaybeCompressed) =
415,675✔
125
    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_INLINE
126

127
is_declared_noinline(@nospecialize src::MaybeCompressed) =
195✔
128
    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_NOINLINE
129

130
#####################
131
# OptimizationState #
132
#####################
133

134
# return whether this src should be inlined. If so, retrieve_ir_for_inlining must return an IRCode from it
135

136
function src_inlining_policy(interp::AbstractInterpreter, mi::MethodInstance,
137
    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
138
    # If we have a generator, but we can't invoke it (because argument type information is lacking),
139
    # don't inline so we defer its invocation to runtime where we'll have precise type information.
140
    if isa(mi.def, Method) && hasgenerator(mi)
632,931✔
141
        may_invoke_generator(mi) || return false
434✔
142
    end
143
    return src_inlining_policy(interp, src, info, stmt_flag)
632,940✔
144
end
145

146
function src_inlining_policy(interp::AbstractInterpreter,
636,027✔
147
    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
148
    isa(src, OptimizationState) && (src = src.src)
636,222✔
149
    if isa(src, MaybeCompressed)
636,222✔
150
        src_inlineable = is_stmt_inline(stmt_flag) || is_inlineable(src)
1,085,500✔
151
        return src_inlineable
542,784✔
152
    elseif isa(src, IRCode)
93,438✔
153
        return true
195✔
154
    end
155
    @assert !isa(src, CodeInstance) # handled by caller
93,243✔
156
    return false
93,243✔
157
end
158

159
struct InliningState{Interp<:AbstractInterpreter}
160
    edges::Vector{Any}
167,654✔
161
    world::UInt
162
    interp::Interp
163
    opt_cache::IdDict{MethodInstance,CodeInstance}
164
end
165
function InliningState(sv::InferenceState, interp::AbstractInterpreter,
166
                       opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
167
    return InliningState(sv.edges, frame_world(sv), interp, opt_cache)
167,636✔
168
end
169
function InliningState(interp::AbstractInterpreter,
9✔
170
                       opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
171
    return InliningState(Any[], get_inference_world(interp), interp, opt_cache)
27✔
172
end
173

174
struct OptimizerCache{CodeCache}
175
    wvc::WorldView{CodeCache}
176
    owner
177
    opt_cache::IdDict{MethodInstance,CodeInstance}
178
    function OptimizerCache(
179
        wvc::WorldView{CodeCache},
180
        @nospecialize(owner),
181
        opt_cache::IdDict{MethodInstance,CodeInstance}) where CodeCache
182
        new{CodeCache}(wvc, owner, opt_cache)
161,058✔
183
    end
184
end
185
function get((; wvc, owner, opt_cache)::OptimizerCache, mi::MethodInstance, default)
117,678✔
186
    if haskey(opt_cache, mi)
117,786✔
187
        codeinst = opt_cache[mi]
108✔
188
        @assert codeinst.min_world ≤ wvc.worlds.min_world &&
108✔
189
                wvc.worlds.max_world ≤ codeinst.max_world &&
190
                codeinst.owner === owner
191
        @assert isdefined(codeinst, :inferred) && codeinst.inferred === nothing
108✔
192
        return codeinst
108✔
193
    end
194
    return get(wvc, mi, default)
117,570✔
195
end
196

197
# get `code_cache(::AbstractInterpreter)` from `state::InliningState`
198
function code_cache(state::InliningState)
199
    cache = WorldView(code_cache(state.interp), state.world)
161,058✔
200
    owner = cache_owner(state.interp)
161,058✔
201
    return OptimizerCache(cache, owner, state.opt_cache)
161,058✔
202
end
203

204
mutable struct OptimizationResult
205
    ir::IRCode
169,208✔
206
    inline_flag::UInt8
207
    simplified::Bool # indicates whether the IR was processed with `cfg_simplify!`
208
end
209

210
function simplify_ir!(result::OptimizationResult)
211
    result.ir = cfg_simplify!(result.ir)
126,816✔
212
    result.simplified = true
126,816✔
213
end
214

215
mutable struct OptimizationState{Interp<:AbstractInterpreter}
216
    linfo::MethodInstance
167,645✔
217
    src::CodeInfo
218
    optresult::Union{Nothing, OptimizationResult}
219
    stmt_info::Vector{CallInfo}
220
    mod::Module
221
    sptypes::Vector{VarState}
222
    slottypes::Vector{Any}
223
    inlining::InliningState{Interp}
224
    cfg::CFG
225
    unreachable::BitSet
226
    bb_vartables::Vector{Union{Nothing,VarTable}}
227
    insert_coverage::Bool
228
end
229
function OptimizationState(sv::InferenceState, interp::AbstractInterpreter,
230
                           opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
231
    inlining = InliningState(sv, interp, opt_cache)
177,679✔
232
    return OptimizationState(sv.linfo, sv.src, nothing, sv.stmt_info, sv.mod,
167,636✔
233
                             sv.sptypes, sv.slottypes, inlining, sv.cfg,
234
                             sv.unreachable, sv.bb_vartables, sv.insert_coverage)
235
end
236
function OptimizationState(mi::MethodInstance, src::CodeInfo, interp::AbstractInterpreter,
9✔
237
                           opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
238
    # prepare src for running optimization passes if it isn't already
239
    nssavalues = src.ssavaluetypes
18✔
240
    if nssavalues isa Int
9✔
241
        src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
72✔
242
    else
243
        nssavalues = length(src.ssavaluetypes::Vector{Any})
×
244
    end
245
    sptypes = sptypes_from_meth_instance(mi)
9✔
246
    nslots = length(src.slotflags)
9✔
247
    slottypes = src.slottypes
9✔
248
    if slottypes === nothing
9✔
249
        slottypes = Any[ Any for i = 1:nslots ]
24✔
250
    end
251
    stmt_info = CallInfo[ NoCallInfo() for i = 1:nssavalues ]
72✔
252
    # cache some useful state computations
253
    def = mi.def
9✔
254
    mod = isa(def, Method) ? def.module : def
12✔
255
    # Allow using the global MI cache, but don't track edges.
256
    # This method is mostly used for unit testing the optimizer
257
    inlining = InliningState(interp, opt_cache)
9✔
258
    cfg = compute_basic_blocks(src.code)
9✔
259
    unreachable = BitSet()
9✔
260
    bb_vartables = Union{VarTable,Nothing}[]
9✔
261
    for block = 1:length(cfg.blocks)
9✔
262
        push!(bb_vartables, VarState[
129✔
263
            VarState(slottypes[slot], src.slotflags[slot] & SLOT_USEDUNDEF != 0)
264
            for slot = 1:nslots
265
        ])
266
    end
51✔
267
    return OptimizationState(mi, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, cfg, unreachable, bb_vartables, false)
9✔
268
end
269
function OptimizationState(mi::MethodInstance, interp::AbstractInterpreter)
6✔
270
    world = get_inference_world(interp)
6✔
271
    src = retrieve_code_info(mi, world)
6✔
272
    src === nothing && return nothing
6✔
273
    return OptimizationState(mi, src, interp)
6✔
274
end
275

276
function argextype end # imported by EscapeAnalysis
277
function try_compute_field end # imported by EscapeAnalysis
278

279
include("ssair/heap.jl")
280
include("ssair/slot2ssa.jl")
281
include("ssair/inlining.jl")
282
include("ssair/verify.jl")
283
include("ssair/legacy.jl")
284
include("ssair/EscapeAnalysis.jl")
285
include("ssair/passes.jl")
286
include("ssair/irinterp.jl")
287

288
function ir_to_codeinf!(opt::OptimizationState, frame::InferenceState, edges::SimpleVector)
9,804✔
289
    ir_to_codeinf!(opt, edges, compute_inlining_cost(frame.interp, frame.result, opt.optresult))
9,804✔
290
end
291

292
function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector, inlining_cost::InlineCostType)
293
    src = ir_to_codeinf!(opt, edges)
19,608✔
294
    src.inlining_cost = inlining_cost
9,804✔
295
    src
9,804✔
296
end
297

298
function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector)
299
    src = ir_to_codeinf!(opt)
22,537✔
300
    src.edges = edges
12,733✔
301
    src
4,138✔
302
end
303

304
function ir_to_codeinf!(opt::OptimizationState)
305
    (; linfo, src, optresult) = opt
38,164✔
306
    if optresult === nothing
38,164✔
307
        return src
2,929✔
308
    end
309
    src = ir_to_codeinf!(src, optresult.ir)
35,235✔
310
    opt.optresult = nothing
35,235✔
311
    opt.src = src
35,235✔
312
    maybe_validate_code(linfo, src, "optimized")
35,235✔
313
    return src
35,235✔
314
end
315

316
function ir_to_codeinf!(src::CodeInfo, ir::IRCode)
317
    replace_code_newstyle!(src, ir)
35,300✔
318
    widen_all_consts!(src)
35,300✔
319
    return src
13,185✔
320
end
321

322
# widen all Const elements in type annotations
323
function widen_all_consts!(src::CodeInfo)
13,185✔
324
    ssavaluetypes = src.ssavaluetypes::Vector{Any}
13,185✔
325
    for i = 1:length(ssavaluetypes)
26,370✔
326
        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
1,414,236✔
327
    end
2,815,287✔
328

329
    for i = 1:length(src.code)
26,370✔
330
        x = src.code[i]
1,414,236✔
331
        if isa(x, PiNode)
1,414,236✔
332
            src.code[i] = PiNode(x.val, widenconst(x.typ))
9,489✔
333
        end
334
    end
2,815,287✔
335

336
    return src
13,185✔
337
end
338

339
#########
340
# logic #
341
#########
342

343
_topmod(sv::OptimizationState) = _topmod(sv.mod)
×
344

345
is_stmt_inline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_INLINE)
1,871,360✔
346
is_stmt_noinline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_NOINLINE)
634,107✔
347

348
function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
6,681✔
349
    Targ = args[1]
13,236✔
350
    atyp = argextype(Targ, src)
6,681✔
351
    # `Expr(:new)` of unknown type could raise arbitrary TypeError.
352
    typ, isexact = instanceof_tfunc(atyp, true)
6,681✔
353
    if !isexact
6,681✔
354
        atyp = unwrap_unionall(widenconst(atyp))
150✔
355
        if isType(atyp) && isTypeDataType(atyp.parameters[1])
300✔
356
            typ = atyp.parameters[1]
150✔
357
        else
358
            return (false, false, false)
×
359
        end
360
        isabstracttype(typ) && return (false, false, false)
150✔
361
    else
362
        isconcretedispatch(typ) || return (false, false, false)
6,531✔
363
    end
364
    typ = typ::DataType
6,681✔
365
    fcount = datatype_fieldcount(typ)
6,681✔
366
    fcount === nothing && return (false, false, false)
6,681✔
367
    fcount >= length(args) - 1 || return (false, false, false)
6,681✔
368
    for fidx in 1:(length(args) - 1)
13,362✔
369
        farg = args[fidx + 1]
11,721✔
370
        eT = argextype(farg, src)
11,721✔
371
        fT = fieldtype(typ, fidx)
11,721✔
372
        if !isexact && has_free_typevars(fT)
11,721✔
373
            if pattern_match !== nothing && pattern_match(src, typ, fidx, Targ, farg)
144✔
374
                continue
×
375
            end
376
            return (false, false, false)
144✔
377
        end
378
        ⊑(𝕃ₒ, eT, fT) || return (false, false, false)
11,577✔
379
    end
16,617✔
380
    return (false, true, true)
6,537✔
381
end
382

383
# Returns a tuple of `(:consistent, :removable, :nothrow)` flags for a given statement.
384
function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
1,240,698✔
385
    # TODO: We're duplicating analysis from inference here.
386
    isa(stmt, PiNode) && return (true, true, true)
1,240,698✔
387
    isa(stmt, PhiNode) && return (true, true, true)
1,190,931✔
388
    isa(stmt, ReturnNode) && return (true, false, true)
1,180,715✔
389
    isa(stmt, EnterNode) && return (true, false, true)
1,045,163✔
390
    isa(stmt, GotoNode) && return (true, false, true)
1,042,019✔
391
    isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
1,033,747✔
392
    if isa(stmt, GlobalRef)
1,026,466✔
393
        # Modeled more precisely in abstract_eval_globalref. In general, if a
394
        # GlobalRef was moved to statement position, it is probably not `const`,
395
        # so we can't say much about it anyway.
396
        return (false, false, false)
×
397
    elseif isa(stmt, Expr)
1,026,466✔
398
        (; head, args) = stmt
761,112✔
399
        if head === :static_parameter
761,112✔
400
            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
401
            sptypes = isa(src, IRCode) ? src.sptypes : src.ir.sptypes
50✔
402
            nothrow = !sptypes[args[1]::Int].undef
50✔
403
            return (true, nothrow, nothrow)
50✔
404
        end
405
        if head === :call
761,062✔
406
            f = argextype(args[1], src)
327,521✔
407
            f = singleton_type(f)
327,587✔
408
            f === nothing && return (false, false, false)
327,521✔
409
            if f === Intrinsics.cglobal || f === Intrinsics.llvmcall
654,910✔
410
                # TODO: these are not yet linearized
411
                return (false, false, false)
×
412
            end
413
            isa(f, Builtin) || return (false, false, false)
568,371✔
414
            # Needs to be handled in inlining to look at the callee effects
415
            f === Core._apply_iterate && return (false, false, false)
86,539✔
416
            argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
166,196✔
417
            effects = builtin_effects(𝕃ₒ, f, argtypes, rt)
86,539✔
418
            consistent = is_consistent(effects)
86,539✔
419
            effect_free = is_effect_free(effects)
86,539✔
420
            nothrow = is_nothrow(effects)
86,539✔
421
            terminates = is_terminates(effects)
86,539✔
422
            removable = effect_free & nothrow & terminates
86,539✔
423
            return (consistent, removable, nothrow)
86,539✔
424
        elseif head === :new
433,541✔
425
            return new_expr_effect_flags(𝕃ₒ, args, src)
6,555✔
426
        elseif head === :foreigncall
426,986✔
427
            effects = foreigncall_effects(stmt) do @nospecialize x
620✔
428
                argextype(x, src)
429
            end
430
            consistent = is_consistent(effects)
620✔
431
            effect_free = is_effect_free(effects)
620✔
432
            nothrow = is_nothrow(effects)
620✔
433
            terminates = is_terminates(effects)
620✔
434
            removable = effect_free & nothrow & terminates
620✔
435
            return (consistent, removable, nothrow)
620✔
436
        elseif head === :new_opaque_closure
426,366✔
437
            length(args) < 4 && return (false, false, false)
×
438
            typ = argextype(args[1], src)
×
439
            typ, isexact = instanceof_tfunc(typ, true)
×
440
            isexact || return (false, false, false)
×
441
            ⊑(𝕃ₒ, typ, Tuple) || return (false, false, false)
×
442
            rt_lb = argextype(args[2], src)
×
443
            rt_ub = argextype(args[3], src)
×
444
            source = argextype(args[5], src)
×
445
            if !(⊑(𝕃ₒ, rt_lb, Type) && ⊑(𝕃ₒ, rt_ub, Type) && ⊑(𝕃ₒ, source, Method))
×
446
                return (false, false, false)
×
447
            end
448
            return (false, true, true)
×
449
        elseif head === :inbounds
426,366✔
450
            return (true, true, true)
×
451
        elseif head === :boundscheck || head === :isdefined || head === :the_exception || head === :copyast
852,615✔
452
            return (false, true, true)
345✔
453
        else
454
            # e.g. :loopinfo
455
            return (false, false, false)
426,021✔
456
        end
457
    end
458
    isa(stmt, SlotNumber) && error("unexpected IR elements")
265,354✔
459
    return (true, true, true)
265,354✔
460
end
461

462
function recompute_effects_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt),
463
                                 src::Union{IRCode,IncrementalCompact})
464
    flag = IR_FLAG_NULL
1,240,698✔
465
    (consistent, removable, nothrow) = stmt_effect_flags(𝕃ₒ, stmt, rt, src)
1,426,412✔
466
    if consistent
1,426,412✔
467
        flag |= IR_FLAG_CONSISTENT
560,392✔
468
    end
469
    if removable
1,426,412✔
470
        flag |= IR_FLAGS_REMOVABLE
461,074✔
471
    elseif nothrow
965,338✔
472
        flag |= IR_FLAG_NOTHROW
192,330✔
473
    end
474
    if !iscallstmt(stmt)
1,912,657✔
475
        # There is a bit of a subtle point here, which is that some non-call
476
        # statements (e.g. PiNode) can be UB:, however, we consider it
477
        # illegal to introduce such statements that actually cause UB (for any
478
        # input). Ideally that'd be handled at insertion time (TODO), but for
479
        # the time being just do that here.
480
        flag |= IR_FLAG_NOUB
1,032,913✔
481
    end
482
    return flag
1,426,412✔
483
end
484

485
"""
486
    argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
487
    argextype(x, src::CodeInfo, sptypes::Vector{VarState}) -> t
488

489
Return the type of value `x` in the context of inferred source `src`.
490
Note that `t` might be an extended lattice element.
491
Use `widenconst(t)` to get the native Julia type of `x`.
492
"""
493
argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{VarState} = ir.sptypes) =
5,201,435✔
494
    argextype(x, ir, sptypes, ir.argtypes)
495
function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{VarState} = compact.ir.sptypes)
496
    isa(x, AnySSAValue) && return types(compact)[x]
6,517,977✔
497
    return argextype(x, compact, sptypes, compact.ir.argtypes)
2,948,907✔
498
end
499
function argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{VarState})
500
    return argextype(x, src, sptypes, src.slottypes::Union{Vector{Any},Nothing})
4,408✔
501
end
502
function argextype(
4,546,805✔
503
    @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
504
    sptypes::Vector{VarState}, slottypes::Union{Vector{Any},Nothing})
505
    if isa(x, Expr)
4,594,802✔
506
        if x.head === :static_parameter
×
507
            idx = x.args[1]::Int
×
508
            (1 ≤ idx ≤ length(sptypes)) || throw(InvalidIRError())
×
509
            return sptypes[idx].typ
×
510
        elseif x.head === :boundscheck
×
511
            return Bool
×
512
        elseif x.head === :copyast
×
513
            length(x.args) == 0 && throw(InvalidIRError())
×
514
            return argextype(x.args[1], src, sptypes, slottypes)
×
515
        end
516
        Core.println("argextype called on Expr with head ", x.head,
×
517
                     " which is not valid for IR in argument-position.")
518
        @assert false
×
519
    elseif isa(x, SlotNumber)
4,594,802✔
520
        slottypes === nothing && return Any
×
521
        (1 ≤ x.id ≤ length(slottypes)) || throw(InvalidIRError())
×
522
        return slottypes[x.id]
×
523
    elseif isa(x, SSAValue)
4,594,802✔
524
        return abstract_eval_ssavalue(x, src)
1,006,738✔
525
    elseif isa(x, Argument)
4,334,799✔
526
        slottypes === nothing && return Any
314,282✔
527
        (1 ≤ x.n ≤ length(slottypes)) || throw(InvalidIRError())
314,474✔
528
        return slottypes[x.n]
314,474✔
529
    elseif isa(x, QuoteNode)
4,020,517✔
530
        return Const(x.value)
40,639✔
531
    elseif isa(x, GlobalRef)
3,979,878✔
532
        return abstract_eval_globalref_type(x, src)
3,390,636✔
533
    elseif isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode)
1,178,469✔
534
        return Any
×
535
    elseif isa(x, PiNode)
589,242✔
536
        return x.typ
×
537
    else
538
        return Const(x)
589,242✔
539
    end
540
end
541
function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo)
×
542
    ssavaluetypes = src.ssavaluetypes
×
543
    if ssavaluetypes isa Int
×
544
        (1 ≤ s.id ≤ ssavaluetypes) || throw(InvalidIRError())
×
545
        return Any
×
546
    else
547
        return abstract_eval_ssavalue(s, ssavaluetypes::Vector{Any})
×
548
    end
549
end
550
abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
1,006,762✔
551

552
"""
553
    finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
554

555
Called at the end of optimization to store the resulting IR back into the OptimizationState.
556
"""
557
function finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
1,856✔
558
    opt.optresult = OptimizationResult(ir, ccall(:jl_ir_flag_inlining, UInt8, (Any,), opt.src), false)
169,208✔
559
    return nothing
160,598✔
560
end
561

562
function visit_bb_phis!(callback, ir::IRCode, bb::Int)
563
    stmts = ir.cfg.blocks[bb].stmts
×
564
    for idx in stmts
×
565
        stmt = ir[SSAValue(idx)][:stmt]
×
566
        if !isa(stmt, PhiNode)
×
567
            if !is_valid_phiblock_stmt(stmt)
×
568
                return
×
569
            end
570
        else
571
            callback(idx)
×
572
        end
573
    end
×
574
end
575

576
function any_stmt_may_throw(ir::IRCode, bb::Int)
577
    for idx in ir.cfg.blocks[bb].stmts
324✔
578
        if !has_flag(ir[SSAValue(idx)], IR_FLAG_NOTHROW)
222✔
579
            return true
162✔
580
        end
581
    end
120✔
582
    return false
×
583
end
584

585
visit_conditional_successors(callback, ir::IRCode, bb::Int) = # used for test
6✔
586
    visit_conditional_successors(callback, LazyPostDomtree(ir), ir, bb)
587
function visit_conditional_successors(callback, lazypostdomtree::LazyPostDomtree, ir::IRCode, bb::Int)
168✔
588
    visited = BitSet((bb,))
168✔
589
    worklist = Int[bb]
168✔
590
    while !isempty(worklist)
190✔
591
        thisbb = popfirst!(worklist)
184✔
592
        for succ in ir.cfg.blocks[thisbb].succs
184✔
593
            succ in visited && continue
190✔
594
            push!(visited, succ)
190✔
595
            if postdominates(get!(lazypostdomtree), succ, bb)
274✔
596
                # this successor is not conditional, so no need to visit it further
597
                continue
12✔
598
            elseif callback(succ)
238✔
599
                return true
162✔
600
            else
601
                push!(worklist, succ)
16✔
602
            end
603
        end
28✔
604
    end
22✔
605
    return false
6✔
606
end
607

608
struct AugmentedDomtree
609
    cfg::CFG
×
610
    domtree::DomTree
611
end
612

613
mutable struct LazyAugmentedDomtree
614
    const ir::IRCode
615
    agdomtree::AugmentedDomtree
616
    LazyAugmentedDomtree(ir::IRCode) = new(ir)
158,506✔
617
end
618

619
function get!(lazyagdomtree::LazyAugmentedDomtree)
×
620
    isdefined(lazyagdomtree, :agdomtree) && return lazyagdomtree.agdomtree
×
621
    ir = lazyagdomtree.ir
×
622
    cfg = copy(ir.cfg)
×
623
    # Add a virtual basic block to represent the exit
624
    push!(cfg.blocks, BasicBlock(StmtRange(0:-1)))
×
625
    for bb = 1:(length(cfg.blocks)-1)
×
626
        terminator = ir[SSAValue(last(cfg.blocks[bb].stmts))][:stmt]
×
627
        if isa(terminator, ReturnNode) && isdefined(terminator, :val)
×
628
            cfg_insert_edge!(cfg, bb, length(cfg.blocks))
×
629
        end
630
    end
×
631
    domtree = construct_domtree(cfg)
×
632
    return lazyagdomtree.agdomtree = AugmentedDomtree(cfg, domtree)
×
633
end
634

635
mutable struct PostOptAnalysisState
636
    const result::InferenceResult
637
    const ir::IRCode
638
    const inconsistent::BitSetBoundedMinPrioritySet
639
    const tpdum::TwoPhaseDefUseMap
640
    const lazypostdomtree::LazyPostDomtree
641
    const lazyagdomtree::LazyAugmentedDomtree
642
    const ea_analysis_pending::Vector{Int}
643
    all_retpaths_consistent::Bool
644
    all_effect_free::Bool
645
    effect_free_if_argmem_only::Union{Nothing,Bool}
646
    all_nothrow::Bool
647
    all_noub::Bool
648
    any_conditional_ub::Bool
649
    nortcall::Bool
650
    function PostOptAnalysisState(result::InferenceResult, ir::IRCode)
651
        inconsistent = BitSetBoundedMinPrioritySet(length(ir.stmts))
158,506✔
652
        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
5,018,001✔
653
        lazypostdomtree = LazyPostDomtree(ir)
158,506✔
654
        lazyagdomtree = LazyAugmentedDomtree(ir)
158,506✔
655
        return new(result, ir, inconsistent, tpdum, lazypostdomtree, lazyagdomtree, Int[],
158,506✔
656
                   true, true, nothing, true, true, false, true)
657
    end
658
end
659

660
give_up_refinements!(sv::PostOptAnalysisState) =
×
661
    sv.all_retpaths_consistent = sv.all_effect_free = sv.effect_free_if_argmem_only =
662
    sv.all_nothrow = sv.all_noub = sv.nortcall = false
663

664
function any_refinable(sv::PostOptAnalysisState)
665
    effects = sv.result.ipo_effects
1,172,348✔
666
    return ((!is_consistent(effects) & sv.all_retpaths_consistent) |
1,172,348✔
667
            (!is_effect_free(effects) & sv.all_effect_free) |
668
            (!is_nothrow(effects) & sv.all_nothrow) |
669
            (!is_noub(effects) & sv.all_noub) |
670
            (!is_nortcall(effects) & sv.nortcall))
671
end
672

673
struct GetNativeEscapeCache{CodeCache}
674
    code_cache::CodeCache
675
    GetNativeEscapeCache(code_cache::CodeCache) where CodeCache = new{CodeCache}(code_cache)
×
676
end
677
GetNativeEscapeCache(interp::AbstractInterpreter) = GetNativeEscapeCache(code_cache(interp))
×
678
function ((; code_cache)::GetNativeEscapeCache)(codeinst::Union{CodeInstance,MethodInstance})
679
    if codeinst isa MethodInstance
×
680
        codeinst = get(code_cache, codeinst, nothing)
×
681
        codeinst isa CodeInstance || return false
×
682
    end
683
    argescapes = traverse_analysis_results(codeinst) do @nospecialize result
×
684
        return result isa EscapeAnalysis.ArgEscapeCache ? result : nothing
×
685
    end
686
    if argescapes !== nothing
×
687
        return argescapes
×
688
    end
689
    effects = decode_effects(codeinst.ipo_purity_bits)
×
690
    if is_effect_free(effects) && is_inaccessiblememonly(effects)
×
691
        # We might not have run EA on simple frames without any escapes (e.g. when optimization
692
        # is skipped when result is constant-folded by abstract interpretation). If those
693
        # frames aren't inlined, the accuracy of EA for caller context takes a big hit.
694
        # This is a HACK to avoid that, but obviously, a more comprehensive fix would be ideal.
695
        return true
×
696
    end
697
    return false
×
698
end
699

700
function refine_effects!(interp::AbstractInterpreter, opt::OptimizationState, sv::PostOptAnalysisState)
158,506✔
701
    if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
158,506✔
702
        ir = sv.ir
×
703
        nargs = Int(opt.src.nargs)
×
704
        estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), get_escape_cache(interp))
×
705
        argescapes = EscapeAnalysis.ArgEscapeCache(estate)
×
706
        stack_analysis_result!(sv.result, argescapes)
×
707
        validate_mutable_arg_escapes!(estate, sv)
×
708
    end
709

710
    any_refinable(sv) || return false
316,406✔
711
    effects = sv.result.ipo_effects
606✔
712
    sv.result.ipo_effects = Effects(effects;
606✔
713
        consistent = sv.all_retpaths_consistent ? ALWAYS_TRUE : effects.consistent,
714
        effect_free = sv.all_effect_free ? ALWAYS_TRUE :
715
                      sv.effect_free_if_argmem_only === true ? EFFECT_FREE_IF_INACCESSIBLEMEMONLY : effects.effect_free,
716
        nothrow = sv.all_nothrow ? true : effects.nothrow,
717
        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub,
718
        nortcall = sv.nortcall ? true : effects.nortcall)
719
    return true
606✔
720
end
721

722
function is_ipo_dataflow_analysis_profitable(effects::Effects)
723
    return !(is_consistent(effects) && is_effect_free(effects) &&
158,781✔
724
             is_nothrow(effects) && is_noub(effects))
725
end
726

727
function iscall_with_boundscheck(@nospecialize(stmt), sv::PostOptAnalysisState)
1,043,714✔
728
    isexpr(stmt, :call) || return false
2,037,122✔
729
    ft = argextype(stmt.args[1], sv.ir)
50,306✔
730
    f = singleton_type(ft)
50,342✔
731
    f === nothing && return false
50,306✔
732
    if f === getfield
50,270✔
733
        nargs = 4
14,423✔
734
    elseif f === memoryrefnew
35,847✔
735
        nargs= 3
324✔
736
    elseif f === memoryrefget || f === memoryref_isassigned
71,046✔
737
        nargs = 4
×
738
    elseif f === memoryrefset!
35,523✔
739
        nargs = 5
36✔
740
    else
741
        return false
35,487✔
742
    end
743
    length(stmt.args) < nargs && return false
14,783✔
744
    boundscheck = stmt.args[end]
1,374✔
745
    argextype(boundscheck, sv.ir) === Bool || return false
1,389✔
746
    isa(boundscheck, SSAValue) || return false
1,359✔
747
    return true
1,359✔
748
end
749

750
function check_all_args_noescape!(sv::PostOptAnalysisState, ir::IRCode, @nospecialize(stmt),
×
751
                                  estate::EscapeAnalysis.EscapeState)
752
    stmt isa Expr || return false
×
753
    if isexpr(stmt, :invoke)
×
754
        startidx = 2
×
755
    elseif isexpr(stmt, :new)
×
756
        startidx = 1
×
757
    else
758
        return false
×
759
    end
760
    has_no_escape(x::EscapeAnalysis.EscapeInfo) =
×
761
        EscapeAnalysis.has_no_escape(EscapeAnalysis.ignore_argescape(x))
762
    for i = startidx:length(stmt.args)
×
763
        arg = stmt.args[i]
×
764
        argt = argextype(arg, ir)
×
765
        if is_mutation_free_argtype(argt)
×
766
            continue
×
767
        end
768
        # See if we can find the allocation
769
        if isa(arg, Argument)
×
770
            if has_no_escape(estate[arg])
×
771
                # Even if we prove everything else effect_free, the best we can
772
                # say is :effect_free_if_argmem_only
773
                if sv.effect_free_if_argmem_only === nothing
×
774
                    sv.effect_free_if_argmem_only = true
×
775
                end
776
            else
777
                sv.effect_free_if_argmem_only = false
×
778
            end
779
            return false
×
780
        elseif isa(arg, SSAValue)
×
781
            has_no_escape(estate[arg]) || return false
×
782
            check_all_args_noescape!(sv, ir, ir[arg][:stmt], estate) || return false
×
783
        else
784
            return false
×
785
        end
786
    end
×
787
    return true
×
788
end
789

790
function validate_mutable_arg_escapes!(estate::EscapeAnalysis.EscapeState, sv::PostOptAnalysisState)
791
    ir = sv.ir
×
792
    for idx in sv.ea_analysis_pending
×
793
        # See if any mutable memory was allocated in this function and determined
794
        # not to escape.
795
        inst = ir[SSAValue(idx)]
×
796
        stmt = inst[:stmt]
×
797
        if !check_all_args_noescape!(sv, ir, stmt, estate)
×
798
            return sv.all_effect_free = false
×
799
        end
800
    end
×
801
    return true
×
802
end
803

804
function is_conditional_noub(inst::Instruction, sv::PostOptAnalysisState)
805
    stmt = inst[:stmt]
29,872✔
806
    iscall_with_boundscheck(stmt, sv) || return false
59,597✔
807
    barg = stmt.args[end]::SSAValue
147✔
808
    bstmt = sv.ir[barg][:stmt]
147✔
809
    isexpr(bstmt, :boundscheck) || return false
147✔
810
    # If IR_FLAG_INBOUNDS is already set, no more conditional ub
811
    (!isempty(bstmt.args) && bstmt.args[1] === false) && return false
147✔
812
    return true
87✔
813
end
814

815
function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState)
1,013,842✔
816
    flag = inst[:flag]
1,013,842✔
817
    # If we can prove that the argmem does not escape the current function, we can
818
    # refine this to :effect_free.
819
    needs_ea_validation = has_flag(flag, IR_FLAGS_NEEDS_EA)
1,013,842✔
820
    stmt = inst[:stmt]
1,013,842✔
821
    if !needs_ea_validation
1,013,842✔
822
        if !isterminator(stmt) && stmt !== nothing
2,011,544✔
823
            # ignore control flow node – they are not removable on their own and thus not
824
            # have `IR_FLAG_EFFECT_FREE` but still do not taint `:effect_free`-ness of
825
            # the whole method invocation
826
            sv.all_effect_free &= has_flag(flag, IR_FLAG_EFFECT_FREE)
983,239✔
827
        end
828
    elseif sv.all_effect_free
18✔
829
        if (isexpr(stmt, :invoke) || isexpr(stmt, :new) ||
×
830
            # HACK for performance: limit the scope of EA to code with object field access only,
831
            # since its abilities to reason about e.g. arrays are currently very limited anyways.
832
            is_known_call(stmt, setfield!, sv.ir))
833
            push!(sv.ea_analysis_pending, inst.idx)
×
834
        else
835
            sv.all_effect_free = false
×
836
        end
837
    end
838
    sv.all_nothrow &= has_flag(flag, IR_FLAG_NOTHROW)
1,013,842✔
839
    if !has_flag(flag, IR_FLAG_NOUB)
1,013,842✔
840
        # Special case: `:boundscheck` into `getfield` or memory operations is `:noub_if_noinbounds`
841
        if is_conditional_noub(inst, sv)
29,959✔
842
            sv.any_conditional_ub = true
87✔
843
        else
844
            sv.all_noub = false
29,785✔
845
        end
846
    end
847
    if !has_flag(flag, IR_FLAG_NORTCALL)
1,013,842✔
848
        # if a function call that might invoke `Core.Compiler.return_type` has been deleted,
849
        # there's no need to taint with `:nortcall`, allowing concrete evaluation
850
        if iscallstmt(stmt)
1,876,657✔
851
            sv.nortcall = false
26,563✔
852
        end
853
    end
854
    nothing
1,013,842✔
855
end
856

857
function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState)
1,013,842✔
858
    flag = inst[:flag]
1,013,842✔
859
    stmt_inconsistent = !has_flag(flag, IR_FLAG_CONSISTENT)
1,013,842✔
860
    stmt = inst[:stmt]
1,013,842✔
861
    # Special case: For `getfield` and memory operations, we allow inconsistency of the :boundscheck argument
862
    (; inconsistent, tpdum) = sv
1,013,842✔
863
    if iscall_with_boundscheck(stmt, sv)
1,013,842✔
864
        for i = 1:(length(stmt.args)-1)
2,424✔
865
            val = stmt.args[i]
3,654✔
866
            if isa(val, SSAValue)
3,654✔
867
                stmt_inconsistent |= val.id in inconsistent
2,496✔
868
                count!(tpdum, val)
1,248✔
869
            end
870
        end
6,096✔
871
    else
872
        for ur in userefs(stmt)
1,109,100✔
873
            val = ur[]
296,532✔
874
            if isa(val, SSAValue)
296,532✔
875
                stmt_inconsistent |= val.id in inconsistent
135,924✔
876
                count!(tpdum, val)
67,962✔
877
            end
878
        end
296,532✔
879
    end
880
    stmt_inconsistent && push!(inconsistent, inst.idx)
1,013,842✔
881
    return stmt_inconsistent
1,013,842✔
882
end
883

884
struct ScanStmt
885
    sv::PostOptAnalysisState
158,506✔
886
end
887

888
function ((; sv)::ScanStmt)(inst::Instruction, lstmt::Int, bb::Int)
1,013,842✔
889
    stmt = inst[:stmt]
1,013,842✔
890

891
    if isa(stmt, EnterNode)
1,013,842✔
892
        # try/catch not yet modeled
893
        give_up_refinements!(sv)
×
894
        return nothing
×
895
    end
896

897
    scan_non_dataflow_flags!(inst, sv)
1,013,842✔
898

899
    stmt_inconsistent = scan_inconsistency!(inst, sv)
1,013,842✔
900

901
    if stmt_inconsistent
1,013,842✔
902
        if !has_flag(inst[:flag], IR_FLAG_NOTHROW)
971,043✔
903
            # Taint :consistent if this statement may raise since :consistent requires
904
            # consistent termination. TODO: Separate :consistent_return and :consistent_termination from :consistent.
905
            sv.all_retpaths_consistent = false
914,489✔
906
        end
907
        if inst.idx == lstmt
971,043✔
908
            if isa(stmt, ReturnNode) && isdefined(stmt, :val)
21,950✔
909
                sv.all_retpaths_consistent = false
×
910
            elseif isa(stmt, GotoIfNot)
21,950✔
911
                # Conditional Branch with inconsistent condition.
912
                # If we do not know this function terminates, taint consistency, now,
913
                # :consistent requires consistent termination. TODO: Just look at the
914
                # inconsistent region.
915
                if !sv.result.ipo_effects.terminates
6,309✔
916
                    sv.all_retpaths_consistent = false
6,147✔
917
                elseif visit_conditional_successors(sv.lazypostdomtree, sv.ir, bb) do succ::Int
162✔
918
                        return any_stmt_may_throw(sv.ir, succ)
222✔
919
                    end
920
                    # check if this `GotoIfNot` leads to conditional throws, which taints consistency
921
                    sv.all_retpaths_consistent = false
162✔
922
                else
923
                    (; cfg, domtree) = get!(sv.lazyagdomtree)
×
924
                    for succ in iterated_dominance_frontier(cfg, BlockLiveness(sv.ir.cfg.blocks[bb].succs, nothing), domtree)
×
925
                        if succ == length(cfg.blocks)
×
926
                            # Phi node in the virtual exit -> We have a conditional
927
                            # return. TODO: Check if all the retvals are egal.
928
                            sv.all_retpaths_consistent = false
×
929
                        else
930
                            visit_bb_phis!(sv.ir, succ) do phiidx::Int
×
931
                                push!(sv.inconsistent, phiidx)
×
932
                            end
933
                        end
934
                    end
×
935
                end
936
            end
937
        end
938
    end
939

940
    # bail out early if there are no possibilities to refine the effects
941
    if !any_refinable(sv)
1,013,842✔
942
        return nothing
130,326✔
943
    end
944

945
    return true
883,516✔
946
end
947

948
function check_inconsistentcy!(sv::PostOptAnalysisState, scanner::BBScanner)
×
949
    (; ir, inconsistent, tpdum) = sv
×
950

951
    scan!(ScanStmt(sv), scanner, false)
×
952
    complete!(tpdum); push!(scanner.bb_ip, 1)
×
953
    populate_def_use_map!(tpdum, scanner)
×
954

955
    stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
×
956
    for def in inconsistent
×
957
        for use in tpdum[def]
×
958
            if !(use in inconsistent)
×
959
                push!(inconsistent, use)
×
960
                append!(stmt_ip, tpdum[use])
×
961
            end
962
        end
×
963
    end
×
964
    lazydomtree = LazyDomtree(ir)
×
965
    while !isempty(stmt_ip)
×
966
        idx = popfirst!(stmt_ip)
×
967
        inst = ir[SSAValue(idx)]
×
968
        stmt = inst[:stmt]
×
969
        if iscall_with_boundscheck(stmt, sv)
×
970
            any_non_boundscheck_inconsistent = false
×
971
            for i = 1:(length(stmt.args)-1)
×
972
                val = stmt.args[i]
×
973
                if isa(val, SSAValue)
×
974
                    any_non_boundscheck_inconsistent |= val.id in inconsistent
×
975
                    any_non_boundscheck_inconsistent && break
×
976
                end
977
            end
×
978
            any_non_boundscheck_inconsistent || continue
×
979
        elseif isa(stmt, ReturnNode)
×
980
            sv.all_retpaths_consistent = false
×
981
        elseif isa(stmt, GotoIfNot)
×
982
            bb = block_for_inst(ir, idx)
×
983
            cfg = ir.cfg
×
984
            blockliveness = BlockLiveness(cfg.blocks[bb].succs, nothing)
×
985
            for succ in iterated_dominance_frontier(cfg, blockliveness, get!(lazydomtree))
×
986
                visit_bb_phis!(ir, succ) do phiidx::Int
×
987
                    push!(inconsistent, phiidx)
×
988
                    push!(stmt_ip, phiidx)
×
989
                end
990
            end
×
991
        end
992
        sv.all_retpaths_consistent || break
×
993
        append!(inconsistent, tpdum[idx])
×
994
        append!(stmt_ip, tpdum[idx])
×
995
    end
×
996
end
997

998
function ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
158,781✔
999
                                ir::IRCode, result::InferenceResult)
1000
    if !is_ipo_dataflow_analysis_profitable(result.ipo_effects)
158,781✔
1001
        return false
275✔
1002
    end
1003

1004
    @assert isempty(ir.new_nodes) "IRCode should be compacted before post-opt analysis"
158,506✔
1005

1006
    sv = PostOptAnalysisState(result, ir)
5,018,001✔
1007
    scanner = BBScanner(ir)
158,506✔
1008

1009
    completed_scan = scan!(ScanStmt(sv), scanner, true)
158,506✔
1010

1011
    if !completed_scan
158,506✔
1012
        if sv.all_retpaths_consistent
8✔
1013
            check_inconsistentcy!(sv, scanner)
×
1014
        else
1015
            # No longer any dataflow concerns, just scan the flags
1016
            scan!(scanner, false) do inst::Instruction, lstmt::Int, bb::Int
8✔
1017
                scan_non_dataflow_flags!(inst, sv)
×
1018
                # bail out early if there are no possibilities to refine the effects
1019
                if !any_refinable(sv)
×
1020
                    return nothing
×
1021
                end
1022
                return true
×
1023
            end
1024
        end
1025
    end
1026

1027
    return refine_effects!(interp, opt, sv)
158,506✔
1028
end
1029

1030
# run the optimization work
1031
function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
157,548✔
1032
    @zone "CC: OPTIMIZER" ir = run_passes_ipo_safe(opt.src, opt)
167,357✔
1033
    ipo_dataflow_analysis!(interp, opt, ir, caller)
167,352✔
1034
    finishopt!(interp, opt, ir)
167,352✔
1035
    return nothing
158,811✔
1036
end
1037

1038
const ALL_PASS_NAMES = String[]
1039
macro pass(name::String, expr)
1040
    optimize_until = esc(:optimize_until)
1041
    stage = esc(:__stage__)
1042
    macrocall = :(@zone $name $(esc(expr)))
1043
    macrocall.args[2] = __source__  # `@timeit` may want to use it
1044
    push!(ALL_PASS_NAMES, name)
1045
    quote
1046
        $macrocall
1,206,619✔
1047
        matchpass($optimize_until, ($stage += 1), $name) && $(esc(:(@goto __done__)))
1,112,470✔
1048
    end
1049
end
1050

1051
matchpass(optimize_until::Int, stage, _) = optimize_until == stage
117✔
1052
matchpass(optimize_until::String, _, name) = optimize_until == name
130✔
1053
matchpass(::Nothing, _, _) = false
×
1054

1055
function run_passes_ipo_safe(
158,937✔
1056
    ci::CodeInfo,
1057
    sv::OptimizationState,
1058
    optimize_until::Union{Nothing, Int, String} = nothing)  # run all passes by default
1059
    if optimize_until isa String && !contains_is(ALL_PASS_NAMES, optimize_until)
326,403✔
1060
        error("invalid `optimize_until` argument, no such optimization pass")
2✔
1061
    elseif optimize_until isa Int && (optimize_until < 1 || optimize_until > length(ALL_PASS_NAMES))
158,965✔
1062
        error("invalid `optimize_until` argument, no such optimization pass")
2✔
1063
    end
1064

1065
    __stage__ = 0  # used by @pass
158,933✔
1066
    # NOTE: The pass name MUST be unique for `optimize_until::String` to work
1067
    @pass "CC: CONVERT"   ir = convert_to_ircode(ci, sv)
1068
    @pass "CC: SLOT2REG"  ir = slot2reg(ir, ci, sv)
1069
    # TODO: Domsorting can produce an updated domtree - no need to recompute here
1070
    @pass "CC: COMPACT_1" ir = compact!(ir)
1071
    @pass "CC: INLINING"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
1072
    # @zone "CC: VERIFY 2" verify_ir(ir)
1073
    @pass "CC: COMPACT_2" ir = compact!(ir)
1074
    @pass "CC: SROA"      ir = sroa_pass!(ir, sv.inlining)
1075
    @pass "CC: ADCE"      (ir, made_changes) = adce_pass!(ir, sv.inlining)
1076
    if made_changes
158,868✔
1077
        @pass "CC: COMPACT_3" ir = compact!(ir, true)
1078
    end
1079
    if is_asserts()
158,868✔
1080
        @zone "CC: VERIFY_3" begin
1081
            verify_ir(ir, true, false, optimizer_lattice(sv.inlining.interp), sv.linfo)
×
1082
            verify_linetable(ir.debuginfo, length(ir.stmts))
×
1083
        end
1084
    end
1085
    @label __done__  # used by @pass
1086
    return ir
158,930✔
1087
end
1088

1089
function strip_trailing_junk!(code::Vector{Any}, ssavaluetypes::Vector{Any}, ssaflags::Vector, debuginfo::DebugInfoStream, cfg::CFG, info::Vector{CallInfo})
130,413✔
1090
    # Remove `nothing`s at the end, we don't handle them well
1091
    # (we expect the last instruction to be a terminator)
1092
    codelocs = debuginfo.codelocs
130,413✔
1093
    for i = length(code):-1:1
260,826✔
1094
        if code[i] !== nothing
130,413✔
1095
            resize!(code, i)
130,413✔
1096
            resize!(ssavaluetypes, i)
130,413✔
1097
            resize!(codelocs, 3i)
130,413✔
1098
            resize!(info, i)
130,413✔
1099
            resize!(ssaflags, i)
130,413✔
1100
            break
130,413✔
1101
        end
1102
    end
×
1103
    # If the last instruction is not a terminator, add one. This can
1104
    # happen for implicit return on dead branches.
1105
    term = code[end]
130,413✔
1106
    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
130,413✔
1107
        push!(code, ReturnNode())
9,123✔
1108
        push!(ssavaluetypes, Union{})
9,123✔
1109
        push!(codelocs, 0, 0, 0)
9,123✔
1110
        push!(info, NoCallInfo())
9,123✔
1111
        push!(ssaflags, IR_FLAG_NOTHROW)
9,123✔
1112

1113
        # Update CFG to include appended terminator
1114
        old_range = cfg.blocks[end].stmts
9,123✔
1115
        new_range = StmtRange(first(old_range), last(old_range) + 1)
9,123✔
1116
        cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
9,123✔
1117
        (length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
9,123✔
1118
    end
1119
    nothing
130,413✔
1120
end
1121

1122
function changed_lineinfo(di::DebugInfo, codeloc::Int, prevloc::Int)
1,350,493✔
1123
    while true
1,350,493✔
1124
        next = getdebugidx(di, codeloc)
1,351,876✔
1125
        line = next[1]
1,351,876✔
1126
        line < 0 && return false # invalid info
1,351,876✔
1127
        line == 0 && next[2] == 0 && return false # no new info
1,351,846✔
1128
        prevloc <= 0 && return true # no old info
1,196,849✔
1129
        prev = getdebugidx(di, prevloc)
1,066,451✔
1130
        next === prev && return false # exactly identical
1,066,451✔
1131
        prevline = prev[1]
76,050✔
1132
        prevline < 0 && return true # previous invalid info, now valid
76,050✔
1133
        edge = next[2]
76,050✔
1134
        edge === prev[2] || return true # change to this edge
76,146✔
1135
        linetable = di.linetable
75,954✔
1136
        # check for change to line number here
1137
        if linetable === nothing || line == 0
75,954✔
1138
            line == prevline || return true
150,525✔
1139
        else
1140
            changed_lineinfo(linetable::DebugInfo, Int(line), Int(prevline)) && return true
×
1141
        end
1142
        # check for change to edge here
1143
        edge == 0 && return false # no edge here
1,383✔
1144
        di = di.edges[Int(edge)]::DebugInfo
1,383✔
1145
        codeloc = Int(next[3])
1,383✔
1146
        prevloc = Int(prev[3])
1,383✔
1147
    end
1,383✔
1148
end
1149

1150
function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
158,799✔
1151
    # Update control-flow to reflect any unreachable branches.
1152
    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
158,799✔
1153
    ci.code = code = copy_exprargs(ci.code)
1,635,751✔
1154
    di = DebugInfoStream(sv.linfo, ci.debuginfo, length(code))
158,799✔
1155
    codelocs = di.codelocs
158,799✔
1156
    ssaflags = ci.ssaflags
158,799✔
1157
    for i = 1:length(code)
289,212✔
1158
        expr = code[i]
1,635,751✔
1159
        if !(i in sv.unreachable)
1,635,751✔
1160
            if isa(expr, GotoIfNot)
1,442,864✔
1161
                # Replace this live GotoIfNot with:
1162
                # - no-op if :nothrow and the branch target is unreachable
1163
                # - cond if :nothrow and both targets are unreachable
1164
                # - typeassert if must-throw
1165
                block = block_for_inst(sv.cfg, i)
59,543✔
1166
                if ssavaluetypes[i] === Bottom
59,543✔
1167
                    destblock = block_for_inst(sv.cfg, expr.dest)
×
1168
                    cfg_delete_edge!(sv.cfg, block, block + 1)
×
1169
                    ((block + 1) != destblock) && cfg_delete_edge!(sv.cfg, block, destblock)
×
1170
                    expr = Expr(:call, Core.typeassert, expr.cond, Bool)
×
1171
                elseif i + 1 in sv.unreachable
59,543✔
1172
                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
13,910✔
1173
                    cfg_delete_edge!(sv.cfg, block, block + 1)
13,910✔
1174
                    expr = GotoNode(expr.dest)
13,910✔
1175
                elseif expr.dest in sv.unreachable
45,633✔
1176
                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
37,456✔
1177
                    cfg_delete_edge!(sv.cfg, block, block_for_inst(sv.cfg, expr.dest))
37,456✔
1178
                    expr = nothing
37,456✔
1179
                end
1180
                code[i] = expr
59,543✔
1181
            elseif isa(expr, EnterNode)
1,383,321✔
1182
                catchdest = expr.catch_dest
3,290✔
1183
                if catchdest in sv.unreachable
3,290✔
1184
                    cfg_delete_edge!(sv.cfg, block_for_inst(sv.cfg, i), block_for_inst(sv.cfg, catchdest))
15✔
1185
                    if isdefined(expr, :scope)
15✔
1186
                        # We've proven that nothing inside the enter region throws,
1187
                        # but we don't yet know whether something might read the scope,
1188
                        # so we need to retain this enter for the time being. However,
1189
                        # we use the special marker `0` to indicate that setting up
1190
                        # the try/catch frame is not required.
1191
                        code[i] = EnterNode(expr, 0)
12✔
1192
                    else
1193
                        code[i] = nothing
3✔
1194
                    end
1195
                end
1196
            elseif isa(expr, PhiNode)
1,380,031✔
1197
                new_edges = Int32[]
×
1198
                new_vals = Any[]
×
1199
                for j = 1:length(expr.edges)
×
1200
                    edge = expr.edges[j]
×
1201
                    (edge in sv.unreachable || (ssavaluetypes[edge] === Union{} && !isa(code[edge], PhiNode))) && continue
×
1202
                    push!(new_edges, edge)
×
1203
                    if isassigned(expr.values, j)
×
1204
                        push!(new_vals, expr.values[j])
×
1205
                    else
1206
                        resize!(new_vals, length(new_edges))
×
1207
                    end
1208
                end
×
1209
                code[i] = PhiNode(new_edges, new_vals)
×
1210
            end
1211
        end
1212
    end
3,112,703✔
1213

1214
    # Go through and add an unreachable node after every
1215
    # Union{} call. Then reindex labels.
1216
    stmtinfo = sv.stmt_info
158,799✔
1217
    meta = Expr[]
158,799✔
1218
    idx = 1
158,799✔
1219
    oldidx = 1
158,799✔
1220
    nstmts = length(code)
158,799✔
1221
    ssachangemap = labelchangemap = blockchangemap = nothing
158,799✔
1222
    prevloc = 0
158,799✔
1223
    while idx <= length(code)
1,787,360✔
1224
        if sv.insert_coverage && changed_lineinfo(ci.debuginfo, oldidx, prevloc)
1,628,561✔
1225
            # insert a side-effect instruction before the current instruction in the same basic block
1226
            insert!(code, idx, Expr(:code_coverage_effect))
258,002✔
1227
            splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
258,002✔
1228
            insert!(ssavaluetypes, idx, Nothing)
258,002✔
1229
            insert!(stmtinfo, idx, NoCallInfo())
258,002✔
1230
            insert!(ssaflags, idx, IR_FLAG_NULL)
258,002✔
1231
            if ssachangemap === nothing
258,002✔
1232
                ssachangemap = fill(0, nstmts)
1,635,558✔
1233
            end
1234
            if labelchangemap === nothing
258,002✔
1235
                labelchangemap = fill(0, nstmts)
1,635,558✔
1236
            end
1237
            ssachangemap[oldidx] += 1
258,002✔
1238
            if oldidx < length(labelchangemap)
258,002✔
1239
                labelchangemap[oldidx + 1] += 1
257,738✔
1240
            end
1241
            if blockchangemap === nothing
258,002✔
1242
                blockchangemap = fill(0, length(sv.cfg.blocks))
322,175✔
1243
            end
1244
            blockchangemap[block_for_inst(sv.cfg, oldidx)] += 1
258,002✔
1245
            idx += 1
258,002✔
1246
            prevloc = oldidx
258,002✔
1247
        end
1248
        if ssavaluetypes[idx] === Union{} && !(oldidx in sv.unreachable) && !isa(code[idx], PhiNode)
1,628,561✔
1249
            # We should have converted any must-throw terminators to an equivalent w/o control-flow edges
1250
            @assert !isterminator(code[idx])
4,747✔
1251

1252
            block = block_for_inst(sv.cfg, oldidx)
4,747✔
1253
            block_end = last(sv.cfg.blocks[block].stmts) + (idx - oldidx)
4,747✔
1254

1255
            # Delete all successors to this basic block
1256
            for succ in sv.cfg.blocks[block].succs
4,747✔
1257
                preds = sv.cfg.blocks[succ].preds
1,240✔
1258
                deleteat!(preds, findfirst(x::Int->x==block, preds)::Int)
6,522✔
1259
            end
1,240✔
1260
            empty!(sv.cfg.blocks[block].succs)
4,747✔
1261

1262
            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
4,747✔
1263
                # Any statements from here to the end of the block have been wrapped in Core.Const(...)
1264
                # by type inference (effectively deleting them). Only task left is to replace the block
1265
                # terminator with an explicit `unreachable` marker.
1266

1267
                if block_end > idx
4,747✔
1268
                    if is_asserts()
4,152✔
1269
                        # Verify that type-inference did its job
1270
                        for i = (oldidx + 1):last(sv.cfg.blocks[block].stmts)
×
1271
                            @assert i in sv.unreachable
×
1272
                        end
×
1273
                    end
1274
                    code[block_end] = ReturnNode()
4,152✔
1275
                    codelocs[3block_end-2], codelocs[3block_end-1], codelocs[3block_end-0] = (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0])
4,152✔
1276
                    ssavaluetypes[block_end] = Union{}
4,152✔
1277
                    stmtinfo[block_end] = NoCallInfo()
4,152✔
1278
                    ssaflags[block_end] = IR_FLAG_NOTHROW
4,152✔
1279
                    idx += block_end - idx
4,152✔
1280
                else
1281
                    insert!(code, idx + 1, ReturnNode())
595✔
1282
                    splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
595✔
1283
                    insert!(ssavaluetypes, idx + 1, Union{})
595✔
1284
                    insert!(stmtinfo, idx + 1, NoCallInfo())
595✔
1285
                    insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
595✔
1286
                    if ssachangemap === nothing
595✔
1287
                        ssachangemap = fill(0, nstmts)
×
1288
                    end
1289
                    if labelchangemap === nothing
595✔
1290
                        labelchangemap = sv.insert_coverage ? fill(0, nstmts) : ssachangemap
×
1291
                    end
1292
                    if oldidx < length(ssachangemap)
595✔
1293
                        ssachangemap[oldidx + 1] += 1
595✔
1294
                        sv.insert_coverage && (labelchangemap[oldidx + 1] += 1)
595✔
1295
                    end
1296
                    if blockchangemap === nothing
595✔
1297
                        blockchangemap = fill(0, length(sv.cfg.blocks))
×
1298
                    end
1299
                    blockchangemap[block] += 1
595✔
1300
                    idx += 1
595✔
1301
                end
1302
                oldidx = last(sv.cfg.blocks[block].stmts)
4,747✔
1303
            end
1304
        end
1305
        idx += 1
1,628,561✔
1306
        oldidx += 1
1,628,561✔
1307
    end
1,628,561✔
1308
    empty!(sv.unreachable)
159,494✔
1309

1310
    if ssachangemap !== nothing && labelchangemap !== nothing
158,799✔
1311
        renumber_ir_elements!(code, ssachangemap, labelchangemap)
158,621✔
1312
    end
1313
    if blockchangemap !== nothing
158,799✔
1314
        renumber_cfg_stmts!(sv.cfg, blockchangemap)
158,621✔
1315
    end
1316

1317
    for i = 1:length(code)
289,212✔
1318
        code[i] = process_meta!(meta, code[i])
1,894,348✔
1319
    end
3,629,897✔
1320
    strip_trailing_junk!(code, ssavaluetypes, ssaflags, di, sv.cfg, stmtinfo)
158,799✔
1321
    types = Any[]
158,799✔
1322
    stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
158,799✔
1323
    # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
1324
    # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
1325
    # and eliminates slots (see below)
1326
    argtypes = sv.slottypes
158,799✔
1327
    return IRCode(stmts, sv.cfg, di, argtypes, meta, sv.sptypes, world_range(ci))
158,799✔
1328
end
1329

1330
function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
1331
    if isexpr(stmt, :meta) && length(stmt.args) ≥ 1
1,894,348✔
1332
        push!(meta, stmt)
×
1333
        return nothing
×
1334
    end
1335
    return stmt
1,894,348✔
1336
end
1337

1338
function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
130,413✔
1339
    # need `ci` for the slot metadata, IR for the code
1340
    svdef = sv.linfo.def
158,799✔
1341
    @zone "CC: DOMTREE_1" domtree = construct_domtree(ir)
158,929✔
1342
    defuse_insts = scan_slot_def_use(Int(ci.nargs), ci, ir.stmts.stmt)
158,929✔
1343
    𝕃ₒ = optimizer_lattice(sv.inlining.interp)
158,799✔
1344
    @zone "CC: CONSTRUCT_SSA" ir = construct_ssa!(ci, ir, sv, domtree, defuse_insts, 𝕃ₒ) # consumes `ir`
158,929✔
1345
    # NOTE now we have converted `ir` to the SSA form and eliminated slots
1346
    # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
1347
    resize!(ir.argtypes, ci.nargs)
158,929✔
1348
    return ir
158,929✔
1349
end
1350

1351
## Computing the cost of a function body
1352

1353
# saturating sum (inputs are non-negative), prevents overflow with typemax(Int) below
1354
plus_saturate(x::Int, y::Int) = max(x, y, x+y)
3,218,618✔
1355

1356
# known return type
1357
isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
4,549✔
1358

1359
function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
2,978,504✔
1360
                        params::OptimizationParams)
1361
    #=const=# UNKNOWN_CALL_COST = 20
2,978,504✔
1362
    head = ex.head
2,978,504✔
1363
    if is_meta_expr_head(head)
5,950,932✔
1364
        return 0
6,076✔
1365
    elseif head === :call
2,972,428✔
1366
        farg = ex.args[1]
173,832✔
1367
        ftyp = argextype(farg, src, sptypes)
173,832✔
1368
        if ftyp === IntrinsicFunction && farg isa SSAValue
173,832✔
1369
            # if this comes from code that was already inlined into another function,
1370
            # Consts have been widened. try to recover in simple cases.
1371
            farg = isa(src, CodeInfo) ? src.code[farg.id] : src[farg][:stmt]
×
1372
            if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
×
1373
                ftyp = argextype(farg, src, sptypes)
×
1374
            end
1375
        end
1376
        f = singleton_type(ftyp)
173,850✔
1377
        if isa(f, IntrinsicFunction)
173,832✔
1378
            iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
67,578✔
1379
            if isassigned(T_IFUNC, iidx)
67,578✔
1380
                minarg, maxarg, = T_IFUNC[iidx]
67,578✔
1381
                nargs = length(ex.args)
67,578✔
1382
                if minarg + 1 <= nargs <= maxarg + 1
67,578✔
1383
                    # With mostly constant arguments, all Intrinsics tend to become very cheap
1384
                    # and are likely to combine with the operations around them,
1385
                    # so reduce their cost by half.
1386
                    cost = T_IFUNC_COST[iidx]
67,578✔
1387
                    if cost == 0 || nargs < 3 ||
109,140✔
1388
                       (f === Intrinsics.cglobal || f === Intrinsics.llvmcall) # these hold malformed IR, so argextype will crash on them
1389
                        return cost
27,729✔
1390
                    end
1391
                    aty2 = widenconditional(argextype(ex.args[2], src, sptypes))
39,849✔
1392
                    nconst = Int(aty2 isa Const)
39,849✔
1393
                    for i = 3:nargs
79,602✔
1394
                        aty = widenconditional(argextype(ex.args[i], src, sptypes))
39,849✔
1395
                        if widenconst(aty) != widenconst(aty2)
39,849✔
1396
                            nconst = 0
9,558✔
1397
                            break
9,558✔
1398
                        end
1399
                        nconst += aty isa Const
30,291✔
1400
                    end
30,291✔
1401
                    if nconst + 2 >= nargs
39,849✔
1402
                        cost = (cost - 1) ÷ 2
20,364✔
1403
                    end
1404
                    return cost
39,849✔
1405
                end
1406
            end
1407
            # unknown/unhandled intrinsic: hopefully the caller gets a slightly better answer after the inlining
1408
            return UNKNOWN_CALL_COST
×
1409
        end
1410
        if isa(f, Builtin) && f !== invoke
106,254✔
1411
            # The efficiency of operations like a[i] and s.b
1412
            # depend strongly on whether the result can be
1413
            # inferred, so check the type of ex
1414
            if f === Core.getfield || f === Core.tuple || f === Core.getglobal
162,751✔
1415
                # we might like to penalize non-inferrability, but
1416
                # tuple iteration/destructuring makes that impossible
1417
                # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
1418
                return 0
56,142✔
1419
            elseif (f === Core.memoryrefget || f === Core.memoryref_isassigned) && length(ex.args) >= 3
100,109✔
1420
                atyp = argextype(ex.args[2], src, sptypes)
61✔
1421
                return isknowntype(atyp) ? 1 : params.inline_nonleaf_penalty
61✔
1422
            elseif f === Core.memoryrefset! && length(ex.args) >= 3
50,024✔
1423
                atyp = argextype(ex.args[2], src, sptypes)
4,494✔
1424
                return isknowntype(atyp) ? 5 : params.inline_nonleaf_penalty
4,494✔
1425
            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
50,949✔
1426
                return 1
1,237✔
1427
            end
1428
            fidx = find_tfunc(f)
1,019,952✔
1429
            if fidx === nothing
44,293✔
1430
                # unknown/unhandled builtin
1431
                # Use the generic cost of a direct function call
1432
                return UNKNOWN_CALL_COST
4,236✔
1433
            end
1434
            return T_FFUNC_COST[fidx]
40,057✔
1435
        end
1436
        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
54✔
1437
        if extyp === Union{}
27✔
1438
            return 0
×
1439
        end
1440
        return params.inline_nonleaf_penalty
27✔
1441
    elseif head === :foreigncall
2,798,596✔
1442
        foreigncall = ex.args[1]
2,282✔
1443
        if foreigncall isa QuoteNode && foreigncall.value === :jl_string_ptr
2,282✔
1444
            return 1
6✔
1445
        end
1446
        return 20
2,276✔
1447
    elseif head === :invoke || head === :invoke_modify
5,544,638✔
1448
        # Calls whose "return type" is Union{} do not actually return:
1449
        # they are errors. Since these are not part of the typical
1450
        # run-time of the function, we omit them from
1451
        # consideration. This way, non-inlined error branches do not
1452
        # prevent inlining.
1453
        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
95,980✔
1454
        return extyp === Union{} ? 0 : UNKNOWN_CALL_COST
47,990✔
1455
    elseif head === :(=)
2,748,324✔
1456
        return statement_cost(ex.args[2], -1, src, sptypes, params)
×
1457
    elseif head === :copyast
2,748,324✔
1458
        return 100
×
1459
    end
1460
    return 0
2,748,324✔
1461
end
1462

1463
function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
1464
                                  params::OptimizationParams)
1465
    thiscost = 0
3,218,618✔
1466
    dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
3,287,967✔
1467
    if stmt isa Expr
3,218,663✔
1468
        thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
2,978,534✔
1469
    elseif stmt isa GotoNode
240,129✔
1470
        # loops are generally always expensive
1471
        # but assume that forward jumps are already counted for from
1472
        # summing the cost of the not-taken branch
1473
        thiscost = dst(stmt.label) < line ? 40 : 0
41,865✔
1474
    elseif stmt isa GotoIfNot
198,264✔
1475
        thiscost = dst(stmt.dest) < line ? 40 : 0
27,493✔
1476
    elseif stmt isa EnterNode
170,771✔
1477
        # try/catch is a couple function calls,
1478
        # but don't inline functions with try/catch
1479
        # since these aren't usually performance-sensitive functions,
1480
        # and llvm is more likely to miscompile them when these functions get large
1481
        thiscost = typemax(Int)
2,178✔
1482
    end
1483
    return thiscost
3,218,663✔
1484
end
1485

1486
function inline_cost_model(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
112,347✔
1487
    bodycost = 0
112,347✔
1488
    for i = 1:length(ir.stmts)
224,633✔
1489
        stmt = ir[SSAValue(i)][:stmt]
3,218,618✔
1490
        thiscost = statement_or_branch_cost(stmt, i, ir, ir.sptypes, params)
3,458,732✔
1491
        bodycost = plus_saturate(bodycost, thiscost)
3,218,618✔
1492
        if bodycost > cost_threshold
3,218,618✔
1493
            return MAX_INLINE_COST
4,242✔
1494
        end
1495
    end
6,320,647✔
1496
    return inline_cost_clamp(bodycost)
108,105✔
1497
end
1498

1499
function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, params::OptimizationParams)
1500
    maxcost = 0
×
1501
    for line = 1:length(body)
3✔
1502
        stmt = body[line]
45✔
1503
        thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
60✔
1504
                                            params)
1505
        cost[line] = thiscost
45✔
1506
        if thiscost > maxcost
45✔
1507
            maxcost = thiscost
×
1508
        end
1509
    end
87✔
1510
    return maxcost
3✔
1511
end
1512

1513
function renumber_ir_elements!(body::Vector{Any}, cfg::Union{CFG,Nothing}, ssachangemap::Vector{Int})
×
1514
    return renumber_ir_elements!(body, cfg, ssachangemap, ssachangemap)
×
1515
end
1516

1517
function cumsum_ssamap!(ssachangemap::Vector{Int})
1518
    any_change = false
391,194✔
1519
    rel_change = 0
391,194✔
1520
    for i = 1:length(ssachangemap)
782,388✔
1521
        val = ssachangemap[i]
2,983,858✔
1522
        any_change |= val ≠ 0
2,983,858✔
1523
        rel_change += val
2,983,858✔
1524
        if val == -1
2,983,858✔
1525
            # Keep a marker that this statement was deleted
1526
            ssachangemap[i] = typemin(Int)
×
1527
        else
1528
            ssachangemap[i] = rel_change
2,983,858✔
1529
        end
1530
    end
5,576,522✔
1531
    return any_change
391,194✔
1532
end
1533

1534
function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
130,398✔
1535
    any_change = cumsum_ssamap!(labelchangemap)
1,356,919✔
1536
    if ssachangemap !== labelchangemap
130,398✔
1537
        any_change |= cumsum_ssamap!(ssachangemap)
1,356,919✔
1538
    end
1539
    any_change || return
130,398✔
1540
    for i = 1:length(body)
260,796✔
1541
        el = body[i]
1,562,266✔
1542
        if isa(el, GotoNode)
1,562,266✔
1543
            body[i] = GotoNode(el.label + labelchangemap[el.label])
37,913✔
1544
        elseif isa(el, GotoIfNot)
1,524,353✔
1545
            cond = el.cond
3,996✔
1546
            if isa(cond, SSAValue)
3,996✔
1547
                cond = SSAValue(cond.id + ssachangemap[cond.id])
3,934✔
1548
            end
1549
            was_deleted = labelchangemap[el.dest] == typemin(Int)
3,996✔
1550
            body[i] = was_deleted ? cond : GotoIfNot(cond, el.dest + labelchangemap[el.dest])
3,996✔
1551
        elseif isa(el, ReturnNode)
1,520,357✔
1552
            if isdefined(el, :val)
135,516✔
1553
                val = el.val
131,739✔
1554
                if isa(val, SSAValue)
131,739✔
1555
                    body[i] = ReturnNode(SSAValue(val.id + ssachangemap[val.id]))
131,508✔
1556
                end
1557
            end
1558
        elseif isa(el, SSAValue)
1,384,841✔
1559
            body[i] = SSAValue(el.id + ssachangemap[el.id])
×
1560
        elseif isa(el, PhiNode)
1,384,841✔
1561
            i = 1
×
1562
            edges = el.edges
×
1563
            values = el.values
×
1564
            while i <= length(edges)
×
1565
                was_deleted = ssachangemap[edges[i]] == typemin(Int)
×
1566
                if was_deleted
×
1567
                    deleteat!(edges, i)
×
1568
                    deleteat!(values, i)
×
1569
                else
1570
                    edges[i] += ssachangemap[edges[i]]
×
1571
                    val = values[i]
×
1572
                    if isa(val, SSAValue)
×
1573
                        values[i] = SSAValue(val.id + ssachangemap[val.id])
×
1574
                    end
1575
                    i += 1
×
1576
                end
1577
            end
×
1578
        elseif isa(el, EnterNode)
1,384,841✔
1579
            tgt = el.catch_dest
3,144✔
1580
            if tgt != 0
3,144✔
1581
                was_deleted = labelchangemap[tgt] == typemin(Int)
3,144✔
1582
                if was_deleted
3,144✔
1583
                    @assert !isdefined(el, :scope)
×
1584
                    body[i] = nothing
×
1585
                else
1586
                    if isdefined(el, :scope) && isa(el.scope, SSAValue)
3,144✔
1587
                        body[i] = EnterNode(tgt + labelchangemap[tgt], SSAValue(el.scope.id + ssachangemap[el.scope.id]))
2,916✔
1588
                    else
1589
                        body[i] = EnterNode(el, tgt + labelchangemap[tgt])
228✔
1590
                    end
1591
                end
1592
            end
1593
        elseif isa(el, Expr)
1,381,697✔
1594
            if el.head === :(=) && el.args[2] isa Expr
720,332✔
1595
                el = el.args[2]::Expr
40,332✔
1596
            end
1597
            if !is_meta_expr_head(el.head)
1,440,541✔
1598
                args = el.args
720,209✔
1599
                for i = 1:length(args)
1,235,125✔
1600
                    el = args[i]
1,383,255✔
1601
                    if isa(el, SSAValue)
1,383,255✔
1602
                        args[i] = SSAValue(el.id + ssachangemap[el.id])
692,597✔
1603
                    end
1604
                end
1,383,255✔
1605
            end
1606
        end
1607
    end
1,562,266✔
1608
end
1609

1610
function renumber_cfg_stmts!(cfg::CFG, blockchangemap::Vector{Int})
130,398✔
1611
    cumsum_ssamap!(blockchangemap) || return
130,398✔
1612
    for i = 1:length(cfg.blocks)
260,796✔
1613
        old_range = cfg.blocks[i].stmts
270,020✔
1614
        new_range = StmtRange(first(old_range) + ((i > 1) ? blockchangemap[i - 1] : 0),
270,020✔
1615
                              last(old_range) + blockchangemap[i])
1616
        cfg.blocks[i] = BasicBlock(cfg.blocks[i], new_range)
270,020✔
1617
        if i <= length(cfg.index)
270,020✔
1618
            cfg.index[i] = cfg.index[i] + blockchangemap[i]
270,020✔
1619
        end
1620
    end
270,020✔
1621
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc