• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / 1293

03 Oct 2025 01:05AM UTC coverage: 76.952% (-0.1%) from 77.068%
1293

push

buildkite

web-flow
Support superscript small q (#59544)

Co-authored-by: Steven G. Johnson <stevenj@alum.mit.edu>

61282 of 79637 relevant lines covered (76.95%)

21700458.33 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.45
/Compiler/src/optimize.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
#############
4
# constants #
5
#############
6

7
# The slot has uses that are not statically dominated by any assignment
8
# This is implied by `SLOT_USEDUNDEF`.
9
# If this is not set, all the uses are (statically) dominated by the defs.
10
# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
11
const SLOT_STATICUNDEF  = 1 # slot might be used before it is defined (structurally)
12
const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
13
const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
14
# const SLOT_CALLED      = 64
15

16
# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
17

18
const IR_FLAG_NULL        = zero(UInt32)
19
# This statement is marked as @inbounds by user.
20
# If replaced by inlining, any contained boundschecks may be removed.
21
const IR_FLAG_INBOUNDS    = one(UInt32) << 0
22
# This statement is marked as @inline by user
23
const IR_FLAG_INLINE      = one(UInt32) << 1
24
# This statement is marked as @noinline by user
25
const IR_FLAG_NOINLINE    = one(UInt32) << 2
26
# This statement is proven :consistent
27
const IR_FLAG_CONSISTENT  = one(UInt32) << 3
28
# This statement is proven :effect_free
29
const IR_FLAG_EFFECT_FREE = one(UInt32) << 4
30
# This statement is proven :nothrow
31
const IR_FLAG_NOTHROW     = one(UInt32) << 5
32
# This statement is proven :terminates_globally
33
const IR_FLAG_TERMINATES  = one(UInt32) << 6
34
#const IR_FLAG_TERMINATES_LOCALLY = one(UInt32) << 7
35
#const IR_FLAG_NOTASKSTATE = one(UInt32) << 8
36
#const IR_FLAG_INACCESSIBLEMEM = one(UInt32) << 9
37
const IR_FLAG_NOUB        = one(UInt32) << 10
38
#const IR_FLAG_NOUBINIB   = one(UInt32) << 11
39
#const IR_FLAG_CONSISTENTOVERLAY = one(UInt32) << 12
40
# This statement is :nortcall
41
const IR_FLAG_NORTCALL = one(UInt32) << 13
42
# An optimization pass has updated this statement in a way that may
43
# have exposed information that inference did not see. Re-running
44
# inference on this statement may be profitable.
45
const IR_FLAG_REFINED     = one(UInt32) << 16
46
# This statement has no users and may be deleted if flags get refined to IR_FLAGS_REMOVABLE
47
const IR_FLAG_UNUSED      = one(UInt32) << 17
48
# TODO: Both of these next two should eventually go away once
49
# This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
50
const IR_FLAG_EFIIMO      = one(UInt32) << 18
51
# This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
52
const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 19
53

54
const NUM_IR_FLAGS = 3 # sync with julia.h
55

56
const IR_FLAGS_EFFECTS =
57
    IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW |
58
    IR_FLAG_TERMINATES | IR_FLAG_NOUB | IR_FLAG_NORTCALL
59

60
const IR_FLAGS_REMOVABLE = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_TERMINATES
61

62
const IR_FLAGS_NEEDS_EA = IR_FLAG_EFIIMO | IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
63

64
has_flag(curr::UInt32, flag::UInt32) = (curr & flag) == flag
135,515,888✔
65

66
function iscallstmt(@nospecialize stmt)
67
    stmt isa Expr || return false
2,517,290✔
68
    head = stmt.head
1,521,952✔
69
    return head === :call || head === :invoke || head === :foreigncall
2,706,502✔
70
end
71

72
function flags_for_effects(effects::Effects)
73
    flags = zero(UInt32)
843,964✔
74
    if is_consistent(effects)
3,571,821✔
75
        flags |= IR_FLAG_CONSISTENT
767,759✔
76
    end
77
    if is_effect_free(effects)
3,571,821✔
78
        flags |= IR_FLAG_EFFECT_FREE
2,694,966✔
79
    elseif is_effect_free_if_inaccessiblememonly(effects)
876,855✔
80
        flags |= IR_FLAG_EFIIMO
13,306✔
81
    end
82
    if is_nothrow(effects)
3,571,821✔
83
        flags |= IR_FLAG_NOTHROW
3,287,535✔
84
    end
85
    if is_terminates(effects)
3,571,821✔
86
        flags |= IR_FLAG_TERMINATES
3,456,104✔
87
    end
88
    if is_inaccessiblemem_or_argmemonly(effects)
3,571,821✔
89
        flags |= IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
217,263✔
90
    end
91
    if is_noub(effects)
3,571,821✔
92
        flags |= IR_FLAG_NOUB
3,378,502✔
93
    end
94
    if is_nortcall(effects)
3,571,821✔
95
        flags |= IR_FLAG_NORTCALL
3,442,813✔
96
    end
97
    return flags
3,571,821✔
98
end
99

100
const TOP_TUPLE = GlobalRef(Core, :tuple)
101

102
# This corresponds to the type of `CodeInfo`'s `inlining_cost` field
103
const InlineCostType = UInt16
104
const MAX_INLINE_COST = typemax(InlineCostType)
105
const MIN_INLINE_COST = InlineCostType(10)
106
const MaybeCompressed = Union{CodeInfo, String}
107

108
is_inlineable(@nospecialize src::MaybeCompressed) =
530,249✔
109
    ccall(:jl_ir_inlining_cost, InlineCostType, (Any,), src) != MAX_INLINE_COST
110
set_inlineable!(src::CodeInfo, val::Bool) =
101✔
111
    src.inlining_cost = (val ? MIN_INLINE_COST : MAX_INLINE_COST)
112

113
function inline_cost_clamp(x::Int)
114
    x > MAX_INLINE_COST && return MAX_INLINE_COST
90,606✔
115
    x < MIN_INLINE_COST && return MIN_INLINE_COST
90,606✔
116
    x = ccall(:jl_encode_inlining_cost, UInt8, (InlineCostType,), x)
31,599✔
117
    x = ccall(:jl_decode_inlining_cost, InlineCostType, (UInt8,), x)
31,599✔
118
    return x
31,599✔
119
end
120

121
const SRC_FLAG_DECLARED_INLINE = 0x1
122
const SRC_FLAG_DECLARED_NOINLINE = 0x2
123

124
is_declared_inline(@nospecialize src::MaybeCompressed) =
365,808✔
125
    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_INLINE
126

127
is_declared_noinline(@nospecialize src::MaybeCompressed) =
9✔
128
    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_NOINLINE
129

130
#####################
131
# OptimizationState #
132
#####################
133

134
# return whether this src should be inlined. If so, retrieve_ir_for_inlining must return an IRCode from it
135

136
function src_inlining_policy(interp::AbstractInterpreter, mi::MethodInstance,
137
    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
138
    # If we have a generator, but we can't invoke it (because argument type information is lacking),
139
    # don't inline so we defer its invocation to runtime where we'll have precise type information.
140
    if isa(mi.def, Method) && hasgenerator(mi)
546,540✔
141
        may_invoke_generator(mi) || return false
420✔
142
    end
143
    return src_inlining_policy(interp, src, info, stmt_flag)
546,555✔
144
end
145

146
function src_inlining_policy(::AbstractInterpreter,
551,386✔
147
    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
148
    isa(src, OptimizationState) && (src = src.src)
551,395✔
149
    if isa(src, MaybeCompressed)
551,395✔
150
        src_inlineable = is_stmt_inline(stmt_flag) || is_inlineable(src)
930,282✔
151
        return src_inlineable
465,175✔
152
    elseif isa(src, IRCode)
86,220✔
153
        return true
9✔
154
    end
155
    @assert !isa(src, CodeInstance) # handled by caller
86,211✔
156
    return false
86,211✔
157
end
158

159
struct InliningState{Interp<:AbstractInterpreter}
160
    edges::Vector{Any}
145,404✔
161
    world::UInt
162
    interp::Interp
163
    opt_cache::IdDict{MethodInstance,CodeInstance}
164
end
165
function InliningState(sv::InferenceState, interp::AbstractInterpreter,
166
                       opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
167
    return InliningState(sv.edges, frame_world(sv), interp, opt_cache)
145,386✔
168
end
169
function InliningState(interp::AbstractInterpreter,
9✔
170
                       opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
171
    return InliningState(Any[], get_inference_world(interp), interp, opt_cache)
27✔
172
end
173

174
struct OptimizerCache{CodeCache}
175
    wvc::WorldView{CodeCache}
176
    owner
177
    opt_cache::IdDict{MethodInstance,CodeInstance}
178
    function OptimizerCache(
179
        wvc::WorldView{CodeCache},
180
        @nospecialize(owner),
181
        opt_cache::IdDict{MethodInstance,CodeInstance}) where CodeCache
182
        new{CodeCache}(wvc, owner, opt_cache)
139,956✔
183
    end
184
end
185
function get((; wvc, owner, opt_cache)::OptimizerCache, mi::MethodInstance, default)
99,955✔
186
    if haskey(opt_cache, mi)
100,045✔
187
        codeinst = opt_cache[mi]
90✔
188
        @assert codeinst.min_world ≤ wvc.worlds.min_world &&
90✔
189
                wvc.worlds.max_world ≤ codeinst.max_world &&
190
                codeinst.owner === owner
191
        @assert isdefined(codeinst, :inferred) && codeinst.inferred === nothing
90✔
192
        return codeinst
90✔
193
    end
194
    return get(wvc, mi, default)
99,865✔
195
end
196

197
# get `code_cache(::AbstractInterpreter)` from `state::InliningState`
198
function code_cache(state::InliningState)
199
    cache = WorldView(code_cache(state.interp), state.world)
139,956✔
200
    owner = cache_owner(state.interp)
139,956✔
201
    return OptimizerCache(cache, owner, state.opt_cache)
139,956✔
202
end
203

204
mutable struct OptimizationResult
205
    ir::IRCode
147,216✔
206
    inline_flag::UInt8
207
    simplified::Bool # indicates whether the IR was processed with `cfg_simplify!`
208
end
209

210
function simplify_ir!(result::OptimizationResult)
211
    result.ir = cfg_simplify!(result.ir)
107,292✔
212
    result.simplified = true
107,292✔
213
end
214

215
mutable struct OptimizationState{Interp<:AbstractInterpreter}
216
    linfo::MethodInstance
145,395✔
217
    src::CodeInfo
218
    optresult::Union{Nothing, OptimizationResult}
219
    stmt_info::Vector{CallInfo}
220
    mod::Module
221
    sptypes::Vector{VarState}
222
    slottypes::Vector{Any}
223
    inlining::InliningState{Interp}
224
    cfg::CFG
225
    unreachable::BitSet
226
    bb_vartables::Vector{Union{Nothing,VarTable}}
227
    insert_coverage::Bool
228
end
229
function OptimizationState(sv::InferenceState, interp::AbstractInterpreter,
230
                           opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
231
    inlining = InliningState(sv, interp, opt_cache)
156,441✔
232
    return OptimizationState(sv.linfo, sv.src, nothing, sv.stmt_info, sv.mod,
145,386✔
233
                             sv.sptypes, sv.slottypes, inlining, sv.cfg,
234
                             sv.unreachable, sv.bb_vartables, sv.insert_coverage)
235
end
236
function OptimizationState(mi::MethodInstance, src::CodeInfo, interp::AbstractInterpreter,
9✔
237
                           opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
238
    # prepare src for running optimization passes if it isn't already
239
    nssavalues = src.ssavaluetypes
18✔
240
    if nssavalues isa Int
9✔
241
        src.ssavaluetypes = Any[ Any for _ = 1:nssavalues ]
72✔
242
    else
243
        nssavalues = length(src.ssavaluetypes::Vector{Any})
×
244
    end
245
    sptypes = sptypes_from_meth_instance(mi)
9✔
246
    nslots = length(src.slotflags)
9✔
247
    slottypes = src.slottypes
9✔
248
    if slottypes === nothing
9✔
249
        slottypes = Any[ Any for _ = 1:nslots ]
24✔
250
    end
251
    stmt_info = CallInfo[ NoCallInfo() for _ = 1:nssavalues ]
72✔
252
    # cache some useful state computations
253
    def = mi.def
9✔
254
    mod = isa(def, Method) ? def.module : def
12✔
255
    # Allow using the global MI cache, but don't track edges.
256
    # This method is mostly used for unit testing the optimizer
257
    inlining = InliningState(interp, opt_cache)
9✔
258
    cfg = compute_basic_blocks(src.code)
9✔
259
    unreachable = BitSet()
9✔
260
    bb_vartables = Union{VarTable,Nothing}[]
9✔
261
    for _ = 1:length(cfg.blocks)
9✔
262
        push!(bb_vartables, VarState[
129✔
263
            VarState(slottypes[slot], src.slotflags[slot] & SLOT_USEDUNDEF != 0)
264
            for slot = 1:nslots
265
        ])
266
    end
51✔
267
    return OptimizationState(mi, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, cfg, unreachable, bb_vartables, false)
9✔
268
end
269
function OptimizationState(mi::MethodInstance, interp::AbstractInterpreter)
6✔
270
    world = get_inference_world(interp)
6✔
271
    src = retrieve_code_info(mi, world)
6✔
272
    src === nothing && return nothing
6✔
273
    return OptimizationState(mi, src, interp)
6✔
274
end
275

276
function argextype end # imported by EscapeAnalysis
277
function try_compute_field end # imported by EscapeAnalysis
278

279
include("ssair/heap.jl")
280
include("ssair/slot2ssa.jl")
281
include("ssair/inlining.jl")
282
include("ssair/verify.jl")
283
include("ssair/legacy.jl")
284
include("ssair/EscapeAnalysis.jl")
285
include("ssair/passes.jl")
286
include("ssair/irinterp.jl")
287

288
function ir_to_codeinf!(opt::OptimizationState, frame::InferenceState, edges::SimpleVector)
10,794✔
289
    ir_to_codeinf!(opt, edges, compute_inlining_cost(frame.interp, frame.result, opt.optresult))
10,794✔
290
end
291

292
function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector, inlining_cost::InlineCostType)
293
    src = ir_to_codeinf!(opt, edges)
21,588✔
294
    src.inlining_cost = inlining_cost
10,794✔
295
    src
10,794✔
296
end
297

298
function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector)
299
    src = ir_to_codeinf!(opt)
24,044✔
300
    src.edges = edges
13,250✔
301
    src
3,425✔
302
end
303

304
function ir_to_codeinf!(opt::OptimizationState)
305
    (; linfo, src, optresult) = opt
35,873✔
306
    if optresult === nothing
35,873✔
307
        return src
2,456✔
308
    end
309
    src = ir_to_codeinf!(src, optresult.ir)
33,417✔
310
    opt.optresult = nothing
33,417✔
311
    opt.src = src
33,417✔
312
    maybe_validate_code(linfo, src, "optimized")
33,417✔
313
    return src
33,417✔
314
end
315

316
function ir_to_codeinf!(src::CodeInfo, ir::IRCode)
317
    replace_code_newstyle!(src, ir)
33,486✔
318
    widen_all_consts!(src)
33,486✔
319
    return src
11,183✔
320
end
321

322
# widen all Const elements in type annotations
323
function widen_all_consts!(src::CodeInfo)
11,183✔
324
    ssavaluetypes = src.ssavaluetypes::Vector{Any}
11,183✔
325
    for i = 1:length(ssavaluetypes)
22,366✔
326
        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
1,213,089✔
327
    end
2,414,995✔
328

329
    for i = 1:length(src.code)
22,366✔
330
        x = src.code[i]
1,213,089✔
331
        if isa(x, PiNode)
1,213,089✔
332
            src.code[i] = PiNode(x.val, widenconst(x.typ))
7,986✔
333
        end
334
    end
2,414,995✔
335

336
    return src
11,183✔
337
end
338

339
#########
340
# logic #
341
#########
342

343
_topmod(sv::OptimizationState) = _topmod(sv.mod)
×
344

345
is_stmt_inline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_INLINE)
1,685,754✔
346
is_stmt_noinline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_NOINLINE)
547,804✔
347

348
function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
5,774✔
349
    Targ = args[1]
11,443✔
350
    atyp = argextype(Targ, src)
5,774✔
351
    # `Expr(:new)` of unknown type could raise arbitrary TypeError.
352
    typ, isexact = instanceof_tfunc(atyp, true)
5,774✔
353
    if !isexact
5,774✔
354
        atyp = unwrap_unionall(widenconst(atyp))
126✔
355
        if isType(atyp) && isTypeDataType(atyp.parameters[1])
252✔
356
            typ = atyp.parameters[1]
126✔
357
        else
358
            return (false, false, false)
×
359
        end
360
        isabstracttype(typ) && return (false, false, false)
126✔
361
    else
362
        isconcretedispatch(typ) || return (false, false, false)
5,648✔
363
    end
364
    typ = typ::DataType
5,774✔
365
    fcount = datatype_fieldcount(typ)
5,774✔
366
    fcount === nothing && return (false, false, false)
5,774✔
367
    fcount >= length(args) - 1 || return (false, false, false)
5,774✔
368
    for fidx in 1:(length(args) - 1)
11,548✔
369
        farg = args[fidx + 1]
10,375✔
370
        eT = argextype(farg, src)
10,375✔
371
        fT = fieldtype(typ, fidx)
10,375✔
372
        if !isexact && has_free_typevars(fT)
10,375✔
373
            if pattern_match !== nothing && pattern_match(src, typ, fidx, Targ, farg)
120✔
374
                continue
×
375
            end
376
            return (false, false, false)
120✔
377
        end
378
        ⊑(𝕃ₒ, eT, fT) || return (false, false, false)
10,255✔
379
    end
14,856✔
380
    return (false, true, true)
5,654✔
381
end
382

383
# Returns a tuple of `(:consistent, :removable, :nothrow)` flags for a given statement.
384
function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
1,041,204✔
385
    # TODO: We're duplicating analysis from inference here.
386
    isa(stmt, PiNode) && return (true, true, true)
1,041,204✔
387
    isa(stmt, PhiNode) && return (true, true, true)
999,549✔
388
    isa(stmt, ReturnNode) && return (true, false, true)
990,639✔
389
    isa(stmt, EnterNode) && return (true, false, true)
876,924✔
390
    isa(stmt, GotoNode) && return (true, false, true)
874,272✔
391
    isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
867,251✔
392
    if isa(stmt, GlobalRef)
861,047✔
393
        # Modeled more precisely in abstract_eval_globalref. In general, if a
394
        # GlobalRef was moved to statement position, it is probably not `const`,
395
        # so we can't say much about it anyway.
396
        return (false, false, false)
×
397
    elseif isa(stmt, Expr)
861,047✔
398
        (; head, args) = stmt
638,844✔
399
        if head === :static_parameter
638,844✔
400
            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
401
            sptypes = isa(src, IRCode) ? src.sptypes : src.ir.sptypes
47✔
402
            nothrow = !sptypes[args[1]::Int].undef
47✔
403
            return (true, nothrow, nothrow)
47✔
404
        end
405
        if head === :call
638,797✔
406
            f = argextype(args[1], src)
274,731✔
407
            f = singleton_type(f)
274,788✔
408
            f === nothing && return (false, false, false)
274,731✔
409
            if f === Intrinsics.cglobal || f === Intrinsics.llvmcall
549,348✔
410
                # TODO: these are not yet linearized
411
                return (false, false, false)
×
412
            end
413
            isa(f, Builtin) || return (false, false, false)
476,652✔
414
            # Needs to be handled in inlining to look at the callee effects
415
            f === Core._apply_iterate && return (false, false, false)
72,696✔
416
            argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
139,909✔
417
            effects = builtin_effects(𝕃ₒ, f, argtypes, rt)
72,696✔
418
            consistent = is_consistent(effects)
72,696✔
419
            effect_free = is_effect_free(effects)
72,696✔
420
            nothrow = is_nothrow(effects)
72,696✔
421
            terminates = is_terminates(effects)
72,696✔
422
            removable = effect_free & nothrow & terminates
72,696✔
423
            return (consistent, removable, nothrow)
72,696✔
424
        elseif head === :new
364,066✔
425
            return new_expr_effect_flags(𝕃ₒ, args, src)
5,669✔
426
        elseif head === :foreigncall
358,397✔
427
            effects = foreigncall_effects(stmt) do @nospecialize x
551✔
428
                argextype(x, src)
429
            end
430
            consistent = is_consistent(effects)
551✔
431
            effect_free = is_effect_free(effects)
551✔
432
            nothrow = is_nothrow(effects)
551✔
433
            terminates = is_terminates(effects)
551✔
434
            removable = effect_free & nothrow & terminates
551✔
435
            return (consistent, removable, nothrow)
551✔
436
        elseif head === :new_opaque_closure
357,846✔
437
            length(args) < 4 && return (false, false, false)
×
438
            typ = argextype(args[1], src)
×
439
            typ, isexact = instanceof_tfunc(typ, true)
×
440
            isexact || return (false, false, false)
×
441
            ⊑(𝕃ₒ, typ, Tuple) || return (false, false, false)
×
442
            rt_lb = argextype(args[2], src)
×
443
            rt_ub = argextype(args[3], src)
×
444
            source = argextype(args[5], src)
×
445
            if !(⊑(𝕃ₒ, rt_lb, Type) && ⊑(𝕃ₒ, rt_ub, Type) && ⊑(𝕃ₒ, source, Method))
×
446
                return (false, false, false)
×
447
            end
448
            return (false, true, true)
×
449
        elseif head === :inbounds
357,846✔
450
            return (true, true, true)
×
451
        elseif head === :boundscheck || head === :isdefined || head === :the_exception || head === :copyast
715,584✔
452
            return (false, true, true)
330✔
453
        else
454
            # e.g. :loopinfo
455
            return (false, false, false)
357,516✔
456
        end
457
    end
458
    isa(stmt, SlotNumber) && error("unexpected IR elements")
222,203✔
459
    return (true, true, true)
222,203✔
460
end
461

462
function recompute_effects_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt),
463
                                 src::Union{IRCode,IncrementalCompact})
464
    flag = IR_FLAG_NULL
1,041,204✔
465
    (consistent, removable, nothrow) = stmt_effect_flags(𝕃ₒ, stmt, rt, src)
1,210,713✔
466
    if consistent
1,210,713✔
467
        flag |= IR_FLAG_CONSISTENT
470,184✔
468
    end
469
    if removable
1,210,713✔
470
        flag |= IR_FLAGS_REMOVABLE
390,501✔
471
    elseif nothrow
820,212✔
472
        flag |= IR_FLAG_NOTHROW
164,349✔
473
    end
474
    if !iscallstmt(stmt)
1,622,409✔
475
        # There is a bit of a subtle point here, which is that some non-call
476
        # statements (e.g. PiNode) can be UB:, however, we consider it
477
        # illegal to introduce such statements that actually cause UB (for any
478
        # input). Ideally that'd be handled at insertion time (TODO), but for
479
        # the time being just do that here.
480
        flag |= IR_FLAG_NOUB
875,069✔
481
    end
482
    return flag
1,210,713✔
483
end
484

485
"""
486
    argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
487
    argextype(x, src::CodeInfo, sptypes::Vector{VarState}) -> t
488

489
Return the type of value `x` in the context of inferred source `src`.
490
Note that `t` might be an extended lattice element.
491
Use `widenconst(t)` to get the native Julia type of `x`.
492
"""
493
argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{VarState} = ir.sptypes) =
4,015,667✔
494
    argextype(x, ir, sptypes, ir.argtypes)
495
function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{VarState} = compact.ir.sptypes)
496
    isa(x, AnySSAValue) && return types(compact)[x]
5,822,318✔
497
    return argextype(x, compact, sptypes, compact.ir.argtypes)
2,647,575✔
498
end
499
function argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{VarState})
500
    return argextype(x, src, sptypes, src.slottypes::Union{Vector{Any},Nothing})
4,440✔
501
end
502
function argextype(
3,992,811✔
503
    @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
504
    sptypes::Vector{VarState}, slottypes::Union{Vector{Any},Nothing})
505
    if isa(x, Expr)
4,032,862✔
506
        if x.head === :static_parameter
×
507
            idx = x.args[1]::Int
×
508
            (1 ≤ idx ≤ length(sptypes)) || throw(InvalidIRError())
×
509
            return sptypes[idx].typ
×
510
        elseif x.head === :boundscheck
×
511
            return Bool
×
512
        elseif x.head === :copyast
×
513
            length(x.args) == 0 && throw(InvalidIRError())
×
514
            return argextype(x.args[1], src, sptypes, slottypes)
×
515
        end
516
        Core.println("argextype called on Expr with head ", x.head,
×
517
                     " which is not valid for IR in argument-position.")
518
        @assert false
×
519
    elseif isa(x, SlotNumber)
4,032,862✔
520
        slottypes === nothing && return Any
×
521
        (1 ≤ x.id ≤ length(slottypes)) || throw(InvalidIRError())
×
522
        return slottypes[x.id]
×
523
    elseif isa(x, SSAValue)
4,032,862✔
524
        return abstract_eval_ssavalue(x, src)
726,553✔
525
    elseif isa(x, Argument)
3,813,579✔
526
        slottypes === nothing && return Any
264,629✔
527
        (1 ≤ x.n ≤ length(slottypes)) || throw(InvalidIRError())
264,759✔
528
        return slottypes[x.n]
264,759✔
529
    elseif isa(x, QuoteNode)
3,548,950✔
530
        return Const(x.value)
34,958✔
531
    elseif isa(x, GlobalRef)
3,513,992✔
532
        return abstract_eval_globalref_type(x, src)
3,020,133✔
533
    elseif isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode)
987,703✔
534
        return Any
×
535
    elseif isa(x, PiNode)
493,859✔
536
        return x.typ
×
537
    else
538
        return Const(x)
493,859✔
539
    end
540
end
541
function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo)
×
542
    ssavaluetypes = src.ssavaluetypes
×
543
    if ssavaluetypes isa Int
×
544
        (1 ≤ s.id ≤ ssavaluetypes) || throw(InvalidIRError())
×
545
        return Any
×
546
    else
547
        return abstract_eval_ssavalue(s, ssavaluetypes::Vector{Any})
×
548
    end
549
end
550
abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
726,607✔
551

552
"""
553
    finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
554

555
Called at the end of optimization to store the resulting IR back into the OptimizationState.
556
"""
557
function finishopt!(::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
2,168✔
558
    opt.optresult = OptimizationResult(ir, ccall(:jl_ir_flag_inlining, UInt8, (Any,), opt.src), false)
147,216✔
559
    return nothing
137,397✔
560
end
561

562
function visit_bb_phis!(callback, ir::IRCode, bb::Int)
563
    stmts = ir.cfg.blocks[bb].stmts
×
564
    for idx in stmts
×
565
        stmt = ir[SSAValue(idx)][:stmt]
×
566
        if !isa(stmt, PhiNode)
×
567
            if !is_valid_phiblock_stmt(stmt)
×
568
                return
×
569
            end
570
        else
571
            callback(idx)
×
572
        end
573
    end
×
574
end
575

576
function any_stmt_may_throw(ir::IRCode, bb::Int)
577
    for idx in ir.cfg.blocks[bb].stmts
288✔
578
        if !has_flag(ir[SSAValue(idx)], IR_FLAG_NOTHROW)
195✔
579
            return true
144✔
580
        end
581
    end
102✔
582
    return false
×
583
end
584

585
visit_conditional_successors(callback, ir::IRCode, bb::Int) = # used for test
9✔
586
    visit_conditional_successors(callback, LazyPostDomtree(ir), ir, bb)
587
function visit_conditional_successors(callback, lazypostdomtree::LazyPostDomtree, ir::IRCode, bb::Int)
153✔
588
    visited = BitSet((bb,))
153✔
589
    worklist = Int[bb]
153✔
590
    while !isempty(worklist)
186✔
591
        thisbb = popfirst!(worklist)
177✔
592
        for succ in ir.cfg.blocks[thisbb].succs
177✔
593
            succ in visited && continue
180✔
594
            push!(visited, succ)
180✔
595
            if postdominates(get!(lazypostdomtree), succ, bb)
258✔
596
                # this successor is not conditional, so no need to visit it further
597
                continue
12✔
598
            elseif callback(succ)
219✔
599
                return true
144✔
600
            else
601
                push!(worklist, succ)
24✔
602
            end
603
        end
36✔
604
    end
33✔
605
    return false
9✔
606
end
607

608
struct AugmentedDomtree
609
    cfg::CFG
×
610
    domtree::DomTree
611
end
612

613
mutable struct LazyAugmentedDomtree
614
    const ir::IRCode
615
    agdomtree::AugmentedDomtree
616
    LazyAugmentedDomtree(ir::IRCode) = new(ir)
134,992✔
617
end
618

619
function get!(lazyagdomtree::LazyAugmentedDomtree)
×
620
    isdefined(lazyagdomtree, :agdomtree) && return lazyagdomtree.agdomtree
×
621
    ir = lazyagdomtree.ir
×
622
    cfg = copy(ir.cfg)
×
623
    # Add a virtual basic block to represent the exit
624
    push!(cfg.blocks, BasicBlock(StmtRange(0:-1)))
×
625
    for bb = 1:(length(cfg.blocks)-1)
×
626
        terminator = ir[SSAValue(last(cfg.blocks[bb].stmts))][:stmt]
×
627
        if isa(terminator, ReturnNode) && isdefined(terminator, :val)
×
628
            cfg_insert_edge!(cfg, bb, length(cfg.blocks))
×
629
        end
630
    end
×
631
    domtree = construct_domtree(cfg)
×
632
    return lazyagdomtree.agdomtree = AugmentedDomtree(cfg, domtree)
×
633
end
634

635
mutable struct PostOptAnalysisState
636
    const result::InferenceResult
637
    const ir::IRCode
638
    const inconsistent::BitSetBoundedMinPrioritySet
639
    const tpdum::TwoPhaseDefUseMap
640
    const lazypostdomtree::LazyPostDomtree
641
    const lazyagdomtree::LazyAugmentedDomtree
642
    const ea_analysis_pending::Vector{Int}
643
    all_retpaths_consistent::Bool
644
    all_effect_free::Bool
645
    effect_free_if_argmem_only::Union{Nothing,Bool}
646
    all_nothrow::Bool
647
    all_noub::Bool
648
    any_conditional_ub::Bool
649
    nortcall::Bool
650
    function PostOptAnalysisState(result::InferenceResult, ir::IRCode)
651
        inconsistent = BitSetBoundedMinPrioritySet(length(ir.stmts))
134,992✔
652
        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
4,285,076✔
653
        lazypostdomtree = LazyPostDomtree(ir)
134,992✔
654
        lazyagdomtree = LazyAugmentedDomtree(ir)
134,992✔
655
        return new(result, ir, inconsistent, tpdum, lazypostdomtree, lazyagdomtree, Int[],
134,992✔
656
                   true, true, nothing, true, true, false, true)
657
    end
658
end
659

660
give_up_refinements!(sv::PostOptAnalysisState) =
×
661
    sv.all_retpaths_consistent = sv.all_effect_free = sv.effect_free_if_argmem_only =
662
    sv.all_nothrow = sv.all_noub = sv.nortcall = false
663

664
function any_refinable(sv::PostOptAnalysisState)
665
    effects = sv.result.ipo_effects
990,591✔
666
    return ((!is_consistent(effects) & sv.all_retpaths_consistent) |
990,591✔
667
            (!is_effect_free(effects) & sv.all_effect_free) |
668
            (!is_nothrow(effects) & sv.all_nothrow) |
669
            (!is_noub(effects) & sv.all_noub) |
670
            (!is_nortcall(effects) & sv.nortcall))
671
end
672

673
struct GetNativeEscapeCache{CodeCache}
674
    code_cache::CodeCache
675
    GetNativeEscapeCache(code_cache::CodeCache) where CodeCache = new{CodeCache}(code_cache)
×
676
end
677
GetNativeEscapeCache(interp::AbstractInterpreter) = GetNativeEscapeCache(code_cache(interp))
×
678
function ((; code_cache)::GetNativeEscapeCache)(codeinst::Union{CodeInstance,MethodInstance})
679
    if codeinst isa MethodInstance
×
680
        codeinst = get(code_cache, codeinst, nothing)
×
681
        codeinst isa CodeInstance || return false
×
682
    end
683
    argescapes = traverse_analysis_results(codeinst) do @nospecialize result
×
684
        return result isa EscapeAnalysis.ArgEscapeCache ? result : nothing
×
685
    end
686
    if argescapes !== nothing
×
687
        return argescapes
×
688
    end
689
    effects = decode_effects(codeinst.ipo_purity_bits)
×
690
    if is_effect_free(effects) && is_inaccessiblememonly(effects)
×
691
        # We might not have run EA on simple frames without any escapes (e.g. when optimization
692
        # is skipped when result is constant-folded by abstract interpretation). If those
693
        # frames aren't inlined, the accuracy of EA for caller context takes a big hit.
694
        # This is a HACK to avoid that, but obviously, a more comprehensive fix would be ideal.
695
        return true
×
696
    end
697
    return false
×
698
end
699

700
function refine_effects!(interp::AbstractInterpreter, opt::OptimizationState, sv::PostOptAnalysisState)
134,992✔
701
    if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
134,992✔
702
        ir = sv.ir
×
703
        nargs = Int(opt.src.nargs)
×
704
        estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), get_escape_cache(interp))
×
705
        argescapes = EscapeAnalysis.ArgEscapeCache(estate)
×
706
        stack_analysis_result!(sv.result, argescapes)
×
707
        validate_mutable_arg_escapes!(estate, sv)
×
708
    end
709

710
    any_refinable(sv) || return false
269,449✔
711
    effects = sv.result.ipo_effects
535✔
712
    sv.result.ipo_effects = Effects(effects;
535✔
713
        consistent = sv.all_retpaths_consistent ? ALWAYS_TRUE : effects.consistent,
714
        effect_free = sv.all_effect_free ? ALWAYS_TRUE :
715
                      sv.effect_free_if_argmem_only === true ? EFFECT_FREE_IF_INACCESSIBLEMEMONLY : effects.effect_free,
716
        nothrow = sv.all_nothrow ? true : effects.nothrow,
717
        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub,
718
        nortcall = sv.nortcall ? true : effects.nortcall)
719
    return true
535✔
720
end
721

722
function is_ipo_dataflow_analysis_profitable(effects::Effects)
723
    return !(is_consistent(effects) && is_effect_free(effects) &&
135,268✔
724
             is_nothrow(effects) && is_noub(effects))
725
end
726

727
function iscall_with_boundscheck(@nospecialize(stmt), sv::PostOptAnalysisState)
880,619✔
728
    isexpr(stmt, :call) || return false
1,717,958✔
729
    ft = argextype(stmt.args[1], sv.ir)
43,280✔
730
    f = singleton_type(ft)
43,310✔
731
    f === nothing && return false
43,280✔
732
    if f === getfield
43,250✔
733
        nargs = 4
12,293✔
734
    elseif f === memoryrefnew
30,957✔
735
        nargs= 3
288✔
736
    elseif f === memoryrefget || f === memoryref_isassigned
61,338✔
737
        nargs = 4
×
738
    elseif f === memoryrefset!
30,669✔
739
        nargs = 5
30✔
740
    else
741
        return false
30,639✔
742
    end
743
    length(stmt.args) < nargs && return false
12,611✔
744
    boundscheck = stmt.args[end]
1,167✔
745
    argextype(boundscheck, sv.ir) === Bool || return false
1,182✔
746
    isa(boundscheck, SSAValue) || return false
1,152✔
747
    return true
1,152✔
748
end
749

750
function check_all_args_noescape!(sv::PostOptAnalysisState, ir::IRCode, @nospecialize(stmt),
×
751
                                  estate::EscapeAnalysis.EscapeState)
752
    stmt isa Expr || return false
×
753
    if isexpr(stmt, :invoke)
×
754
        startidx = 2
×
755
    elseif isexpr(stmt, :new)
×
756
        startidx = 1
×
757
    else
758
        return false
×
759
    end
760
    has_no_escape(x::EscapeAnalysis.EscapeInfo) =
×
761
        EscapeAnalysis.has_no_escape(EscapeAnalysis.ignore_argescape(x))
762
    for i = startidx:length(stmt.args)
×
763
        arg = stmt.args[i]
×
764
        argt = argextype(arg, ir)
×
765
        if is_mutation_free_argtype(argt)
×
766
            continue
×
767
        end
768
        # See if we can find the allocation
769
        if isa(arg, Argument)
×
770
            if has_no_escape(estate[arg])
×
771
                # Even if we prove everything else effect_free, the best we can
772
                # say is :effect_free_if_argmem_only
773
                if sv.effect_free_if_argmem_only === nothing
×
774
                    sv.effect_free_if_argmem_only = true
×
775
                end
776
            else
777
                sv.effect_free_if_argmem_only = false
×
778
            end
779
            return false
×
780
        elseif isa(arg, SSAValue)
×
781
            has_no_escape(estate[arg]) || return false
×
782
            check_all_args_noescape!(sv, ir, ir[arg][:stmt], estate) || return false
×
783
        else
784
            return false
×
785
        end
786
    end
×
787
    return true
×
788
end
789

790
function validate_mutable_arg_escapes!(estate::EscapeAnalysis.EscapeState, sv::PostOptAnalysisState)
791
    ir = sv.ir
×
792
    for idx in sv.ea_analysis_pending
×
793
        # See if any mutable memory was allocated in this function and determined
794
        # not to escape.
795
        inst = ir[SSAValue(idx)]
×
796
        stmt = inst[:stmt]
×
797
        if !check_all_args_noescape!(sv, ir, stmt, estate)
×
798
            return sv.all_effect_free = false
×
799
        end
800
    end
×
801
    return true
×
802
end
803

804
function is_conditional_noub(inst::Instruction, sv::PostOptAnalysisState)
805
    stmt = inst[:stmt]
25,020✔
806
    iscall_with_boundscheck(stmt, sv) || return false
49,908✔
807
    barg = stmt.args[end]::SSAValue
132✔
808
    bstmt = sv.ir[barg][:stmt]
132✔
809
    isexpr(bstmt, :boundscheck) || return false
132✔
810
    # If IR_FLAG_INBOUNDS is already set, no more conditional ub
811
    (!isempty(bstmt.args) && bstmt.args[1] === false) && return false
132✔
812
    return true
81✔
813
end
814

815
function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState)
855,599✔
816
    flag = inst[:flag]
855,599✔
817
    # If we can prove that the argmem does not escape the current function, we can
818
    # refine this to :effect_free.
819
    needs_ea_validation = has_flag(flag, IR_FLAGS_NEEDS_EA)
855,599✔
820
    stmt = inst[:stmt]
855,599✔
821
    if !needs_ea_validation
855,599✔
822
        if !isterminator(stmt) && stmt !== nothing
1,697,555✔
823
            # ignore control flow node – they are not removable on their own and thus not
824
            # have `IR_FLAG_EFFECT_FREE` but still do not taint `:effect_free`-ness of
825
            # the whole method invocation
826
            sv.all_effect_free &= has_flag(flag, IR_FLAG_EFFECT_FREE)
829,681✔
827
        end
828
    elseif sv.all_effect_free
15✔
829
        if (isexpr(stmt, :invoke) || isexpr(stmt, :new) ||
×
830
            # HACK for performance: limit the scope of EA to code with object field access only,
831
            # since its abilities to reason about e.g. arrays are currently very limited anyways.
832
            is_known_call(stmt, setfield!, sv.ir))
833
            push!(sv.ea_analysis_pending, inst.idx)
×
834
        else
835
            sv.all_effect_free = false
×
836
        end
837
    end
838
    sv.all_nothrow &= has_flag(flag, IR_FLAG_NOTHROW)
855,599✔
839
    if !has_flag(flag, IR_FLAG_NOUB)
855,599✔
840
        # Special case: `:boundscheck` into `getfield` or memory operations is `:noub_if_noinbounds`
841
        if is_conditional_noub(inst, sv)
25,101✔
842
            sv.any_conditional_ub = true
81✔
843
        else
844
            sv.all_noub = false
24,939✔
845
        end
846
    end
847
    if !has_flag(flag, IR_FLAG_NORTCALL)
855,599✔
848
        # if a function call that might invoke `Core.Compiler.return_type` has been deleted,
849
        # there's no need to taint with `:nortcall`, allowing concrete evaluation
850
        if iscallstmt(stmt)
1,581,762✔
851
            sv.nortcall = false
22,251✔
852
        end
853
    end
854
    nothing
855,599✔
855
end
856

857
function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState)
855,599✔
858
    flag = inst[:flag]
855,599✔
859
    stmt_inconsistent = !has_flag(flag, IR_FLAG_CONSISTENT)
855,599✔
860
    stmt = inst[:stmt]
855,599✔
861
    # Special case: For `getfield` and memory operations, we allow inconsistency of the :boundscheck argument
862
    (; inconsistent, tpdum) = sv
855,599✔
863
    if iscall_with_boundscheck(stmt, sv)
855,599✔
864
        for i = 1:(length(stmt.args)-1)
2,040✔
865
            val = stmt.args[i]
3,075✔
866
            if isa(val, SSAValue)
3,075✔
867
                stmt_inconsistent |= val.id in inconsistent
2,088✔
868
                count!(tpdum, val)
1,044✔
869
            end
870
        end
5,130✔
871
    else
872
        for ur in userefs(stmt)
936,710✔
873
            val = ur[]
252,255✔
874
            if isa(val, SSAValue)
252,255✔
875
                stmt_inconsistent |= val.id in inconsistent
117,034✔
876
                count!(tpdum, val)
58,517✔
877
            end
878
        end
252,255✔
879
    end
880
    stmt_inconsistent && push!(inconsistent, inst.idx)
855,599✔
881
    return stmt_inconsistent
855,599✔
882
end
883

884
struct ScanStmt
885
    sv::PostOptAnalysisState
134,992✔
886
end
887

888
function ((; sv)::ScanStmt)(inst::Instruction, lstmt::Int, bb::Int)
855,599✔
889
    stmt = inst[:stmt]
855,599✔
890

891
    if isa(stmt, EnterNode)
855,599✔
892
        # try/catch not yet modeled
893
        give_up_refinements!(sv)
×
894
        return nothing
×
895
    end
896

897
    scan_non_dataflow_flags!(inst, sv)
855,599✔
898

899
    stmt_inconsistent = scan_inconsistency!(inst, sv)
855,599✔
900

901
    if stmt_inconsistent
855,599✔
902
        if !has_flag(inst[:flag], IR_FLAG_NOTHROW)
819,470✔
903
            # Taint :consistent if this statement may raise since :consistent requires
904
            # consistent termination. TODO: Separate :consistent_return and :consistent_termination from :consistent.
905
            sv.all_retpaths_consistent = false
770,206✔
906
        end
907
        if inst.idx == lstmt
819,470✔
908
            if isa(stmt, ReturnNode) && isdefined(stmt, :val)
18,435✔
909
                sv.all_retpaths_consistent = false
×
910
            elseif isa(stmt, GotoIfNot)
18,435✔
911
                # Conditional Branch with inconsistent condition.
912
                # If we do not know this function terminates, taint consistency, now,
913
                # :consistent requires consistent termination. TODO: Just look at the
914
                # inconsistent region.
915
                if !sv.result.ipo_effects.terminates
5,358✔
916
                    sv.all_retpaths_consistent = false
5,214✔
917
                elseif visit_conditional_successors(sv.lazypostdomtree, sv.ir, bb) do succ::Int
144✔
918
                        return any_stmt_may_throw(sv.ir, succ)
195✔
919
                    end
920
                    # check if this `GotoIfNot` leads to conditional throws, which taints consistency
921
                    sv.all_retpaths_consistent = false
144✔
922
                else
923
                    (; cfg, domtree) = get!(sv.lazyagdomtree)
×
924
                    for succ in iterated_dominance_frontier(cfg, BlockLiveness(sv.ir.cfg.blocks[bb].succs, nothing), domtree)
×
925
                        if succ == length(cfg.blocks)
×
926
                            # Phi node in the virtual exit -> We have a conditional
927
                            # return. TODO: Check if all the retvals are egal.
928
                            sv.all_retpaths_consistent = false
×
929
                        else
930
                            visit_bb_phis!(sv.ir, succ) do phiidx::Int
×
931
                                push!(sv.inconsistent, phiidx)
×
932
                            end
933
                        end
934
                    end
×
935
                end
936
            end
937
        end
938
    end
939

940
    # bail out early if there are no possibilities to refine the effects
941
    if !any_refinable(sv)
855,599✔
942
        return nothing
109,290✔
943
    end
944

945
    return true
746,309✔
946
end
947

948
function check_inconsistentcy!(sv::PostOptAnalysisState, scanner::BBScanner)
×
949
    (; ir, inconsistent, tpdum) = sv
×
950

951
    scan!(ScanStmt(sv), scanner, false)
×
952
    complete!(tpdum); push!(scanner.bb_ip, 1)
×
953
    populate_def_use_map!(tpdum, scanner)
×
954

955
    stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
×
956
    for def in inconsistent
×
957
        for use in tpdum[def]
×
958
            if !(use in inconsistent)
×
959
                push!(inconsistent, use)
×
960
                append!(stmt_ip, tpdum[use])
×
961
            end
962
        end
×
963
    end
×
964
    lazydomtree = LazyDomtree(ir)
×
965
    while !isempty(stmt_ip)
×
966
        idx = popfirst!(stmt_ip)
×
967
        inst = ir[SSAValue(idx)]
×
968
        stmt = inst[:stmt]
×
969
        if iscall_with_boundscheck(stmt, sv)
×
970
            any_non_boundscheck_inconsistent = false
×
971
            for i = 1:(length(stmt.args)-1)
×
972
                val = stmt.args[i]
×
973
                if isa(val, SSAValue)
×
974
                    any_non_boundscheck_inconsistent |= val.id in inconsistent
×
975
                    any_non_boundscheck_inconsistent && break
×
976
                end
977
            end
×
978
            any_non_boundscheck_inconsistent || continue
×
979
        elseif isa(stmt, ReturnNode)
×
980
            sv.all_retpaths_consistent = false
×
981
        elseif isa(stmt, GotoIfNot)
×
982
            bb = block_for_inst(ir, idx)
×
983
            cfg = ir.cfg
×
984
            blockliveness = BlockLiveness(cfg.blocks[bb].succs, nothing)
×
985
            for succ in iterated_dominance_frontier(cfg, blockliveness, get!(lazydomtree))
×
986
                visit_bb_phis!(ir, succ) do phiidx::Int
×
987
                    push!(inconsistent, phiidx)
×
988
                    push!(stmt_ip, phiidx)
×
989
                end
990
            end
×
991
        end
992
        sv.all_retpaths_consistent || break
×
993
        append!(inconsistent, tpdum[idx])
×
994
        append!(stmt_ip, tpdum[idx])
×
995
    end
×
996
end
997

998
function ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
135,268✔
999
                                ir::IRCode, result::InferenceResult)
1000
    if !is_ipo_dataflow_analysis_profitable(result.ipo_effects)
135,268✔
1001
        return false
276✔
1002
    end
1003

1004
    @assert isempty(ir.new_nodes) "IRCode should be compacted before post-opt analysis"
134,992✔
1005

1006
    sv = PostOptAnalysisState(result, ir)
4,285,076✔
1007
    scanner = BBScanner(ir)
134,992✔
1008

1009
    completed_scan = scan!(ScanStmt(sv), scanner, true)
134,992✔
1010

1011
    if !completed_scan
134,992✔
1012
        if sv.all_retpaths_consistent
8✔
1013
            check_inconsistentcy!(sv, scanner)
×
1014
        else
1015
            # No longer any dataflow concerns, just scan the flags
1016
            scan!(scanner, false) do inst::Instruction, ::Int, ::Int
8✔
1017
                scan_non_dataflow_flags!(inst, sv)
×
1018
                # bail out early if there are no possibilities to refine the effects
1019
                if !any_refinable(sv)
×
1020
                    return nothing
×
1021
                end
1022
                return true
×
1023
            end
1024
        end
1025
    end
1026

1027
    return refine_effects!(interp, opt, sv)
134,992✔
1028
end
1029

1030
# run the optimization work
1031
function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
134,254✔
1032
    @zone "CC: OPTIMIZER" ir = run_passes_ipo_safe(opt.src, opt)
145,054✔
1033
    ipo_dataflow_analysis!(interp, opt, ir, caller)
145,048✔
1034
    finishopt!(interp, opt, ir)
145,048✔
1035
    return nothing
135,266✔
1036
end
1037

1038
const ALL_PASS_NAMES = String[]
1039
macro pass(name::String, expr)
1040
    optimize_until = esc(:optimize_until)
1041
    stage = esc(:__stage__)
1042
    macrocall = :(@zone $name $(esc(expr)))
1043
    macrocall.args[2] = __source__  # `@timeit` may want to use it
1044
    push!(ALL_PASS_NAMES, name)
1045
    quote
1046
        $macrocall
1,027,583✔
1047
        matchpass($optimize_until, ($stage += 1), $name) && $(esc(:(@goto __done__)))
947,665✔
1048
    end
1049
end
1050

1051
matchpass(optimize_until::Int, stage, _) = optimize_until == stage
123✔
1052
matchpass(optimize_until::String, _, name) = optimize_until == name
159✔
1053
matchpass(::Nothing, _, _) = false
×
1054

1055
function run_passes_ipo_safe(
135,404✔
1056
    ci::CodeInfo,
1057
    sv::OptimizationState,
1058
    optimize_until::Union{Nothing, Int, String} = nothing)  # run all passes by default
1059
    if optimize_until isa String && !contains_is(ALL_PASS_NAMES, optimize_until)
280,596✔
1060
        error("invalid `optimize_until` argument, no such optimization pass")
3✔
1061
    elseif optimize_until isa Int && (optimize_until < 1 || optimize_until > length(ALL_PASS_NAMES))
135,434✔
1062
        error("invalid `optimize_until` argument, no such optimization pass")
3✔
1063
    end
1064

1065
    __stage__ = 0  # used by @pass
135,398✔
1066
    # NOTE: The pass name MUST be unique for `optimize_until::String` to work
1067
    @pass "CC: CONVERT"   ir = convert_to_ircode(ci, sv)
1068
    @pass "CC: SLOT2REG"  ir = slot2reg(ir, ci, sv)
1069
    # TODO: Domsorting can produce an updated domtree - no need to recompute here
1070
    @pass "CC: COMPACT_1" ir = compact!(ir)
1071
    @pass "CC: INLINING"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
1072
    # @zone "CC: VERIFY 2" verify_ir(ir)
1073
    @pass "CC: COMPACT_2" ir = compact!(ir)
1074
    @pass "CC: SROA"      ir = sroa_pass!(ir, sv.inlining)
1075
    @pass "CC: ADCE"      (ir, made_changes) = adce_pass!(ir, sv.inlining)
1076
    if made_changes
135,323✔
1077
        @pass "CC: COMPACT_3" ir = compact!(ir, true)
1078
    end
1079
    if is_asserts()
135,323✔
1080
        @zone "CC: VERIFY_3" begin
1081
            verify_ir(ir, true, false, optimizer_lattice(sv.inlining.interp), sv.linfo)
×
1082
            verify_linetable(ir.debuginfo, length(ir.stmts))
×
1083
        end
1084
    end
1085
    @label __done__  # used by @pass
1086
    return ir
135,395✔
1087
end
1088

1089
function strip_trailing_junk!(code::Vector{Any}, ssavaluetypes::Vector{Any}, ssaflags::Vector, debuginfo::DebugInfoStream, cfg::CFG, info::Vector{CallInfo})
109,371✔
1090
    # Remove `nothing`s at the end, we don't handle them well
1091
    # (we expect the last instruction to be a terminator)
1092
    codelocs = debuginfo.codelocs
109,371✔
1093
    for i = length(code):-1:1
218,742✔
1094
        if code[i] !== nothing
109,371✔
1095
            resize!(code, i)
109,371✔
1096
            resize!(ssavaluetypes, i)
109,371✔
1097
            resize!(codelocs, 3i)
109,371✔
1098
            resize!(info, i)
109,371✔
1099
            resize!(ssaflags, i)
109,371✔
1100
            break
109,371✔
1101
        end
1102
    end
×
1103
    # If the last instruction is not a terminator, add one. This can
1104
    # happen for implicit return on dead branches.
1105
    term = code[end]
109,371✔
1106
    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
109,371✔
1107
        push!(code, ReturnNode())
7,626✔
1108
        push!(ssavaluetypes, Union{})
7,626✔
1109
        push!(codelocs, 0, 0, 0)
7,626✔
1110
        push!(info, NoCallInfo())
7,626✔
1111
        push!(ssaflags, IR_FLAG_NOTHROW)
7,626✔
1112

1113
        # Update CFG to include appended terminator
1114
        old_range = cfg.blocks[end].stmts
7,626✔
1115
        new_range = StmtRange(first(old_range), last(old_range) + 1)
7,626✔
1116
        cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
7,626✔
1117
        (length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
7,626✔
1118
    end
1119
    nothing
109,371✔
1120
end
1121

1122
function changed_lineinfo(di::DebugInfo, codeloc::Int, prevloc::Int)
1,132,820✔
1123
    while true
1,132,820✔
1124
        next = getdebugidx(di, codeloc)
1,134,203✔
1125
        line = next[1]
1,134,203✔
1126
        line < 0 && return false # invalid info
1,134,203✔
1127
        line == 0 && next[2] == 0 && return false # no new info
1,134,173✔
1128
        prevloc <= 0 && return true # no old info
1,004,285✔
1129
        prev = getdebugidx(di, prevloc)
894,929✔
1130
        next === prev && return false # exactly identical
894,929✔
1131
        prevline = prev[1]
64,350✔
1132
        prevline < 0 && return true # previous invalid info, now valid
64,350✔
1133
        edge = next[2]
64,350✔
1134
        edge === prev[2] || return true # change to this edge
64,446✔
1135
        linetable = di.linetable
64,254✔
1136
        # check for change to line number here
1137
        if linetable === nothing || line == 0
64,254✔
1138
            line == prevline || return true
127,125✔
1139
        else
1140
            changed_lineinfo(linetable::DebugInfo, Int(line), Int(prevline)) && return true
×
1141
        end
1142
        # check for change to edge here
1143
        edge == 0 && return false # no edge here
1,383✔
1144
        di = di.edges[Int(edge)]::DebugInfo
1,383✔
1145
        codeloc = Int(next[3])
1,383✔
1146
        prevloc = Int(prev[3])
1,383✔
1147
    end
1,383✔
1148
end
1149

1150
function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
135,286✔
1151
    # Update control-flow to reflect any unreachable branches.
1152
    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
135,286✔
1153
    ci.code = code = copy_exprargs(ci.code)
1,392,839✔
1154
    di = DebugInfoStream(sv.linfo, ci.debuginfo, length(code))
135,286✔
1155
    codelocs = di.codelocs
135,286✔
1156
    ssaflags = ci.ssaflags
135,286✔
1157
    for i = 1:length(code)
244,657✔
1158
        expr = code[i]
1,392,839✔
1159
        if !(i in sv.unreachable)
1,392,839✔
1160
            if isa(expr, GotoIfNot)
1,229,815✔
1161
                # Replace this live GotoIfNot with:
1162
                # - no-op if :nothrow and the branch target is unreachable
1163
                # - cond if :nothrow and both targets are unreachable
1164
                # - typeassert if must-throw
1165
                block = block_for_inst(sv.cfg, i)
50,615✔
1166
                if ssavaluetypes[i] === Bottom
50,615✔
1167
                    destblock = block_for_inst(sv.cfg, expr.dest)
×
1168
                    cfg_delete_edge!(sv.cfg, block, block + 1)
×
1169
                    ((block + 1) != destblock) && cfg_delete_edge!(sv.cfg, block, destblock)
×
1170
                    expr = Expr(:call, Core.typeassert, expr.cond, Bool)
×
1171
                elseif i + 1 in sv.unreachable
50,615✔
1172
                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
11,728✔
1173
                    cfg_delete_edge!(sv.cfg, block, block + 1)
11,728✔
1174
                    expr = GotoNode(expr.dest)
11,728✔
1175
                elseif expr.dest in sv.unreachable
38,887✔
1176
                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
31,566✔
1177
                    cfg_delete_edge!(sv.cfg, block, block_for_inst(sv.cfg, expr.dest))
31,566✔
1178
                    expr = nothing
31,566✔
1179
                end
1180
                code[i] = expr
50,615✔
1181
            elseif isa(expr, EnterNode)
1,179,200✔
1182
                catchdest = expr.catch_dest
2,747✔
1183
                if catchdest in sv.unreachable
2,747✔
1184
                    cfg_delete_edge!(sv.cfg, block_for_inst(sv.cfg, i), block_for_inst(sv.cfg, catchdest))
10✔
1185
                    if isdefined(expr, :scope)
10✔
1186
                        # We've proven that nothing inside the enter region throws,
1187
                        # but we don't yet know whether something might read the scope,
1188
                        # so we need to retain this enter for the time being. However,
1189
                        # we use the special marker `0` to indicate that setting up
1190
                        # the try/catch frame is not required.
1191
                        code[i] = EnterNode(expr, 0)
8✔
1192
                    else
1193
                        code[i] = nothing
2✔
1194
                    end
1195
                end
1196
            elseif isa(expr, PhiNode)
1,176,453✔
1197
                new_edges = Int32[]
×
1198
                new_vals = Any[]
×
1199
                for j = 1:length(expr.edges)
×
1200
                    edge = expr.edges[j]
×
1201
                    (edge in sv.unreachable || (ssavaluetypes[edge] === Union{} && !isa(code[edge], PhiNode))) && continue
×
1202
                    push!(new_edges, edge)
×
1203
                    if isassigned(expr.values, j)
×
1204
                        push!(new_vals, expr.values[j])
×
1205
                    else
1206
                        resize!(new_vals, length(new_edges))
×
1207
                    end
1208
                end
×
1209
                code[i] = PhiNode(new_edges, new_vals)
×
1210
            end
1211
        end
1212
    end
2,650,392✔
1213

1214
    # Go through and add an unreachable node after every
1215
    # Union{} call. Then reindex labels.
1216
    stmtinfo = sv.stmt_info
135,286✔
1217
    meta = Expr[]
135,286✔
1218
    idx = 1
135,286✔
1219
    oldidx = 1
135,286✔
1220
    nstmts = length(code)
135,286✔
1221
    ssachangemap = labelchangemap = blockchangemap = nothing
135,286✔
1222
    prevloc = 0
135,286✔
1223
    while idx <= length(code)
1,522,075✔
1224
        if sv.insert_coverage && changed_lineinfo(ci.debuginfo, oldidx, prevloc)
1,386,789✔
1225
            # insert a side-effect instruction before the current instruction in the same basic block
1226
            insert!(code, idx, Expr(:code_coverage_effect))
220,403✔
1227
            splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
220,403✔
1228
            insert!(ssavaluetypes, idx, Nothing)
220,403✔
1229
            insert!(stmtinfo, idx, NoCallInfo())
220,403✔
1230
            insert!(ssaflags, idx, IR_FLAG_NULL)
220,403✔
1231
            if ssachangemap === nothing
220,403✔
1232
                ssachangemap = fill(0, nstmts)
1,392,659✔
1233
            end
1234
            if labelchangemap === nothing
220,403✔
1235
                labelchangemap = fill(0, nstmts)
1,392,659✔
1236
            end
1237
            ssachangemap[oldidx] += 1
220,403✔
1238
            if oldidx < length(labelchangemap)
220,403✔
1239
                labelchangemap[oldidx + 1] += 1
220,160✔
1240
            end
1241
            if blockchangemap === nothing
220,403✔
1242
                blockchangemap = fill(0, length(sv.cfg.blocks))
273,824✔
1243
            end
1244
            blockchangemap[block_for_inst(sv.cfg, oldidx)] += 1
220,403✔
1245
            idx += 1
220,403✔
1246
            prevloc = oldidx
220,403✔
1247
        end
1248
        if ssavaluetypes[idx] === Union{} && !(oldidx in sv.unreachable) && !isa(code[idx], PhiNode)
1,386,789✔
1249
            # We should have converted any must-throw terminators to an equivalent w/o control-flow edges
1250
            @assert !isterminator(code[idx])
4,031✔
1251

1252
            block = block_for_inst(sv.cfg, oldidx)
4,031✔
1253
            block_end = last(sv.cfg.blocks[block].stmts) + (idx - oldidx)
4,031✔
1254

1255
            # Delete all successors to this basic block
1256
            for succ in sv.cfg.blocks[block].succs
4,031✔
1257
                preds = sv.cfg.blocks[succ].preds
1,084✔
1258
                deleteat!(preds, findfirst(x::Int->x==block, preds)::Int)
5,682✔
1259
            end
1,084✔
1260
            empty!(sv.cfg.blocks[block].succs)
4,031✔
1261

1262
            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
4,031✔
1263
                # Any statements from here to the end of the block have been wrapped in Core.Const(...)
1264
                # by type inference (effectively deleting them). Only task left is to replace the block
1265
                # terminator with an explicit `unreachable` marker.
1266

1267
                if block_end > idx
4,031✔
1268
                    if is_asserts()
3,525✔
1269
                        # Verify that type-inference did its job
1270
                        for i = (oldidx + 1):last(sv.cfg.blocks[block].stmts)
×
1271
                            @assert i in sv.unreachable
×
1272
                        end
×
1273
                    end
1274
                    code[block_end] = ReturnNode()
3,525✔
1275
                    codelocs[3block_end-2], codelocs[3block_end-1], codelocs[3block_end-0] = (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0])
3,525✔
1276
                    ssavaluetypes[block_end] = Union{}
3,525✔
1277
                    stmtinfo[block_end] = NoCallInfo()
3,525✔
1278
                    ssaflags[block_end] = IR_FLAG_NOTHROW
3,525✔
1279
                    idx += block_end - idx
3,525✔
1280
                else
1281
                    insert!(code, idx + 1, ReturnNode())
506✔
1282
                    splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
506✔
1283
                    insert!(ssavaluetypes, idx + 1, Union{})
506✔
1284
                    insert!(stmtinfo, idx + 1, NoCallInfo())
506✔
1285
                    insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
506✔
1286
                    if ssachangemap === nothing
506✔
1287
                        ssachangemap = fill(0, nstmts)
×
1288
                    end
1289
                    if labelchangemap === nothing
506✔
1290
                        labelchangemap = sv.insert_coverage ? fill(0, nstmts) : ssachangemap
×
1291
                    end
1292
                    if oldidx < length(ssachangemap)
506✔
1293
                        ssachangemap[oldidx + 1] += 1
506✔
1294
                        sv.insert_coverage && (labelchangemap[oldidx + 1] += 1)
506✔
1295
                    end
1296
                    if blockchangemap === nothing
506✔
1297
                        blockchangemap = fill(0, length(sv.cfg.blocks))
×
1298
                    end
1299
                    blockchangemap[block] += 1
506✔
1300
                    idx += 1
506✔
1301
                end
1302
                oldidx = last(sv.cfg.blocks[block].stmts)
4,031✔
1303
            end
1304
        end
1305
        idx += 1
1,386,789✔
1306
        oldidx += 1
1,386,789✔
1307
    end
1,386,789✔
1308
    empty!(sv.unreachable)
135,927✔
1309

1310
    if ssachangemap !== nothing && labelchangemap !== nothing
135,286✔
1311
        renumber_ir_elements!(code, ssachangemap, labelchangemap)
135,121✔
1312
    end
1313
    if blockchangemap !== nothing
135,286✔
1314
        renumber_cfg_stmts!(sv.cfg, blockchangemap)
135,121✔
1315
    end
1316

1317
    for i = 1:length(code)
244,657✔
1318
        code[i] = process_meta!(meta, code[i])
1,613,748✔
1319
    end
3,092,210✔
1320
    strip_trailing_junk!(code, ssavaluetypes, ssaflags, di, sv.cfg, stmtinfo)
135,286✔
1321
    types = Any[]
135,286✔
1322
    stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
135,286✔
1323
    # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
1324
    # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
1325
    # and eliminates slots (see below)
1326
    argtypes = sv.slottypes
135,286✔
1327
    return IRCode(stmts, sv.cfg, di, argtypes, meta, sv.sptypes, world_range(ci))
135,286✔
1328
end
1329

1330
function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
1331
    if isexpr(stmt, :meta) && length(stmt.args) ≥ 1
1,613,748✔
1332
        push!(meta, stmt)
×
1333
        return nothing
×
1334
    end
1335
    return stmt
1,613,748✔
1336
end
1337

1338
function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
109,371✔
1339
    # need `ci` for the slot metadata, IR for the code
1340
    @zone "CC: DOMTREE_1" domtree = construct_domtree(ir)
135,392✔
1341
    defuse_insts = scan_slot_def_use(Int(ci.nargs), ci, ir.stmts.stmt)
135,392✔
1342
    𝕃ₒ = optimizer_lattice(sv.inlining.interp)
135,286✔
1343
    @zone "CC: CONSTRUCT_SSA" ir = construct_ssa!(ci, ir, sv, domtree, defuse_insts, 𝕃ₒ) # consumes `ir`
135,392✔
1344
    # NOTE now we have converted `ir` to the SSA form and eliminated slots
1345
    # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
1346
    resize!(ir.argtypes, ci.nargs)
135,392✔
1347
    return ir
135,392✔
1348
end
1349

1350
## Computing the cost of a function body
1351

1352
# saturating sum (inputs are non-negative), prevents overflow with typemax(Int) below
1353
plus_saturate(x::Int, y::Int) = max(x, y, x+y)
2,709,565✔
1354

1355
# known return type
1356
isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
3,804✔
1357

1358
function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
2,508,031✔
1359
                        params::OptimizationParams)
1360
    #=const=# UNKNOWN_CALL_COST = 20
2,508,031✔
1361
    head = ex.head
2,508,031✔
1362
    if is_meta_expr_head(head)
5,010,971✔
1363
        return 0
5,091✔
1364
    elseif head === :call
2,502,940✔
1365
        farg = ex.args[1]
147,689✔
1366
        ftyp = argextype(farg, src, sptypes)
147,689✔
1367
        if ftyp === IntrinsicFunction && farg isa SSAValue
147,689✔
1368
            # if this comes from code that was already inlined into another function,
1369
            # Consts have been widened. try to recover in simple cases.
1370
            farg = isa(src, CodeInfo) ? src.code[farg.id] : src[farg][:stmt]
×
1371
            if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
×
1372
                ftyp = argextype(farg, src, sptypes)
×
1373
            end
1374
        end
1375
        f = singleton_type(ftyp)
147,704✔
1376
        if isa(f, IntrinsicFunction)
147,689✔
1377
            iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
58,095✔
1378
            if isassigned(T_IFUNC, iidx)
58,095✔
1379
                minarg, maxarg, = T_IFUNC[iidx]
58,095✔
1380
                nargs = length(ex.args)
58,095✔
1381
                if minarg + 1 <= nargs <= maxarg + 1
58,095✔
1382
                    # With mostly constant arguments, all Intrinsics tend to become very cheap
1383
                    # and are likely to combine with the operations around them,
1384
                    # so reduce their cost by half.
1385
                    cost = T_IFUNC_COST[iidx]
58,095✔
1386
                    if cost == 0 || nargs < 3 ||
93,756✔
1387
                       (f === Intrinsics.cglobal || f === Intrinsics.llvmcall) # these hold malformed IR, so argextype will crash on them
1388
                        return cost
23,862✔
1389
                    end
1390
                    aty2 = widenconditional(argextype(ex.args[2], src, sptypes))
34,233✔
1391
                    nconst = Int(aty2 isa Const)
34,233✔
1392
                    for i = 3:nargs
68,384✔
1393
                        aty = widenconditional(argextype(ex.args[i], src, sptypes))
34,233✔
1394
                        if widenconst(aty) != widenconst(aty2)
34,233✔
1395
                            nconst = 0
8,344✔
1396
                            break
8,344✔
1397
                        end
1398
                        nconst += aty isa Const
25,889✔
1399
                    end
25,889✔
1400
                    if nconst + 2 >= nargs
34,233✔
1401
                        cost = (cost - 1) ÷ 2
17,313✔
1402
                    end
1403
                    return cost
34,233✔
1404
                end
1405
            end
1406
            # unknown/unhandled intrinsic: hopefully the caller gets a slightly better answer after the inlining
1407
            return UNKNOWN_CALL_COST
×
1408
        end
1409
        if isa(f, Builtin) && f !== invoke
89,594✔
1410
            # The efficiency of operations like a[i] and s.b
1411
            # depend strongly on whether the result can be
1412
            # inferred, so check the type of ex
1413
            if f === Core.getfield || f === Core.tuple || f === Core.getglobal
137,152✔
1414
                # we might like to penalize non-inferrability, but
1415
                # tuple iteration/destructuring makes that impossible
1416
                # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
1417
                return 0
47,388✔
1418
            elseif (f === Core.memoryrefget || f === Core.memoryref_isassigned) && length(ex.args) >= 3
84,303✔
1419
                atyp = argextype(ex.args[2], src, sptypes)
61✔
1420
                return isknowntype(atyp) ? 1 : params.inline_nonleaf_penalty
61✔
1421
            elseif f === Core.memoryrefset! && length(ex.args) >= 3
42,121✔
1422
                atyp = argextype(ex.args[2], src, sptypes)
3,750✔
1423
                return isknowntype(atyp) ? 5 : params.inline_nonleaf_penalty
3,750✔
1424
            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
42,887✔
1425
                return 1
1,030✔
1426
            end
1427
            fidx = find_tfunc(f)
884,822✔
1428
            if fidx === nothing
37,341✔
1429
                # unknown/unhandled builtin
1430
                # Use the generic cost of a direct function call
1431
                return UNKNOWN_CALL_COST
3,530✔
1432
            end
1433
            return T_FFUNC_COST[fidx]
33,811✔
1434
        end
1435
        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
48✔
1436
        if extyp === Union{}
24✔
1437
            return 0
×
1438
        end
1439
        return params.inline_nonleaf_penalty
24✔
1440
    elseif head === :foreigncall
2,355,251✔
1441
        foreigncall = ex.args[1]
2,009✔
1442
        if foreigncall isa QuoteNode && foreigncall.value === :jl_string_ptr
2,009✔
1443
            return 1
6✔
1444
        end
1445
        return 20
2,003✔
1446
    elseif head === :invoke || head === :invoke_modify
4,666,446✔
1447
        # Calls whose "return type" is Union{} do not actually return:
1448
        # they are errors. Since these are not part of the typical
1449
        # run-time of the function, we omit them from
1450
        # consideration. This way, non-inlined error branches do not
1451
        # prevent inlining.
1452
        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
80,076✔
1453
        return extyp === Union{} ? 0 : UNKNOWN_CALL_COST
40,038✔
1454
    elseif head === :(=)
2,313,204✔
1455
        return statement_cost(ex.args[2], -1, src, sptypes, params)
×
1456
    elseif head === :copyast
2,313,204✔
1457
        return 100
×
1458
    end
1459
    return 0
2,313,204✔
1460
end
1461

1462
function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
1463
                                  params::OptimizationParams)
1464
    thiscost = 0
2,709,565✔
1465
    dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
2,767,981✔
1466
    if stmt isa Expr
2,709,610✔
1467
        thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
2,508,061✔
1468
    elseif stmt isa GotoNode
201,549✔
1469
        # loops are generally always expensive
1470
        # but assume that forward jumps are already counted for from
1471
        # summing the cost of the not-taken branch
1472
        thiscost = dst(stmt.label) < line ? 40 : 0
35,275✔
1473
    elseif stmt isa GotoIfNot
166,274✔
1474
        thiscost = dst(stmt.dest) < line ? 40 : 0
23,150✔
1475
    elseif stmt isa EnterNode
143,124✔
1476
        # try/catch is a couple function calls,
1477
        # but don't inline functions with try/catch
1478
        # since these aren't usually performance-sensitive functions,
1479
        # and llvm is more likely to miscompile them when these functions get large
1480
        thiscost = typemax(Int)
1,815✔
1481
    end
1482
    return thiscost
2,709,610✔
1483
end
1484

1485
function inline_cost_model(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
94,148✔
1486
    bodycost = 0
94,148✔
1487
    for i = 1:length(ir.stmts)
188,255✔
1488
        stmt = ir[SSAValue(i)][:stmt]
2,709,565✔
1489
        thiscost = statement_or_branch_cost(stmt, i, ir, ir.sptypes, params)
2,911,099✔
1490
        bodycost = plus_saturate(bodycost, thiscost)
2,709,565✔
1491
        if bodycost > cost_threshold
2,709,565✔
1492
            return MAX_INLINE_COST
3,542✔
1493
        end
1494
    end
5,321,440✔
1495
    return inline_cost_clamp(bodycost)
90,606✔
1496
end
1497

1498
function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, params::OptimizationParams)
1499
    maxcost = 0
×
1500
    for line = 1:length(body)
3✔
1501
        stmt = body[line]
45✔
1502
        thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
60✔
1503
                                            params)
1504
        cost[line] = thiscost
45✔
1505
        if thiscost > maxcost
45✔
1506
            maxcost = thiscost
×
1507
        end
1508
    end
87✔
1509
    return maxcost
3✔
1510
end
1511

1512
function renumber_ir_elements!(body::Vector{Any}, cfg::Union{CFG,Nothing}, ssachangemap::Vector{Int})
×
1513
    return renumber_ir_elements!(body, cfg, ssachangemap, ssachangemap)
×
1514
end
1515

1516
function cumsum_ssamap!(ssachangemap::Vector{Int})
1517
    any_change = false
328,068✔
1518
    rel_change = 0
328,068✔
1519
    for i = 1:length(ssachangemap)
656,136✔
1520
        val = ssachangemap[i]
2,503,044✔
1521
        any_change |= val ≠ 0
2,503,044✔
1522
        rel_change += val
2,503,044✔
1523
        if val == -1
2,503,044✔
1524
            # Keep a marker that this statement was deleted
1525
            ssachangemap[i] = typemin(Int)
×
1526
        else
1527
            ssachangemap[i] = rel_change
2,503,044✔
1528
        end
1529
    end
4,678,020✔
1530
    return any_change
328,068✔
1531
end
1532

1533
function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
109,356✔
1534
    any_change = cumsum_ssamap!(labelchangemap)
1,138,213✔
1535
    if ssachangemap !== labelchangemap
109,356✔
1536
        any_change |= cumsum_ssamap!(ssachangemap)
1,138,213✔
1537
    end
1538
    any_change || return
109,356✔
1539
    for i = 1:length(body)
218,712✔
1540
        el = body[i]
1,310,782✔
1541
        if isa(el, GotoNode)
1,310,782✔
1542
            body[i] = GotoNode(el.label + labelchangemap[el.label])
31,813✔
1543
        elseif isa(el, GotoIfNot)
1,278,969✔
1544
            cond = el.cond
3,428✔
1545
            if isa(cond, SSAValue)
3,428✔
1546
                cond = SSAValue(cond.id + ssachangemap[cond.id])
3,375✔
1547
            end
1548
            was_deleted = labelchangemap[el.dest] == typemin(Int)
3,428✔
1549
            body[i] = was_deleted ? cond : GotoIfNot(cond, el.dest + labelchangemap[el.dest])
3,428✔
1550
        elseif isa(el, ReturnNode)
1,275,541✔
1551
            if isdefined(el, :val)
113,682✔
1552
                val = el.val
110,482✔
1553
                if isa(val, SSAValue)
110,482✔
1554
                    body[i] = ReturnNode(SSAValue(val.id + ssachangemap[val.id]))
110,280✔
1555
                end
1556
            end
1557
        elseif isa(el, SSAValue)
1,161,859✔
1558
            body[i] = SSAValue(el.id + ssachangemap[el.id])
×
1559
        elseif isa(el, PhiNode)
1,161,859✔
1560
            i = 1
×
1561
            edges = el.edges
×
1562
            values = el.values
×
1563
            while i <= length(edges)
×
1564
                was_deleted = ssachangemap[edges[i]] == typemin(Int)
×
1565
                if was_deleted
×
1566
                    deleteat!(edges, i)
×
1567
                    deleteat!(values, i)
×
1568
                else
1569
                    edges[i] += ssachangemap[edges[i]]
×
1570
                    val = values[i]
×
1571
                    if isa(val, SSAValue)
×
1572
                        values[i] = SSAValue(val.id + ssachangemap[val.id])
×
1573
                    end
1574
                    i += 1
×
1575
                end
1576
            end
×
1577
        elseif isa(el, EnterNode)
1,161,859✔
1578
            tgt = el.catch_dest
2,652✔
1579
            if tgt != 0
2,652✔
1580
                was_deleted = labelchangemap[tgt] == typemin(Int)
2,652✔
1581
                if was_deleted
2,652✔
1582
                    @assert !isdefined(el, :scope)
×
1583
                    body[i] = nothing
×
1584
                else
1585
                    if isdefined(el, :scope) && isa(el.scope, SSAValue)
2,652✔
1586
                        body[i] = EnterNode(tgt + labelchangemap[tgt], SSAValue(el.scope.id + ssachangemap[el.scope.id]))
2,430✔
1587
                    else
1588
                        body[i] = EnterNode(el, tgt + labelchangemap[tgt])
222✔
1589
                    end
1590
                end
1591
            end
1592
        elseif isa(el, Expr)
1,159,207✔
1593
            if el.head === :(=) && el.args[2] isa Expr
604,337✔
1594
                el = el.args[2]::Expr
33,911✔
1595
            end
1596
            if !is_meta_expr_head(el.head)
1,208,560✔
1597
                args = el.args
604,223✔
1598
                for i = 1:length(args)
1,035,901✔
1599
                    el = args[i]
1,160,253✔
1600
                    if isa(el, SSAValue)
1,160,253✔
1601
                        args[i] = SSAValue(el.id + ssachangemap[el.id])
580,775✔
1602
                    end
1603
                end
1,160,253✔
1604
            end
1605
        end
1606
    end
1,310,782✔
1607
end
1608

1609
function renumber_cfg_stmts!(cfg::CFG, blockchangemap::Vector{Int})
109,356✔
1610
    cumsum_ssamap!(blockchangemap) || return
109,356✔
1611
    for i = 1:length(cfg.blocks)
218,712✔
1612
        old_range = cfg.blocks[i].stmts
226,618✔
1613
        new_range = StmtRange(first(old_range) + ((i > 1) ? blockchangemap[i - 1] : 0),
226,618✔
1614
                              last(old_range) + blockchangemap[i])
1615
        cfg.blocks[i] = BasicBlock(cfg.blocks[i], new_range)
226,618✔
1616
        if i <= length(cfg.index)
226,618✔
1617
            cfg.index[i] = cfg.index[i] + blockchangemap[i]
226,618✔
1618
        end
1619
    end
226,618✔
1620
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc