• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / 1549

28 May 2026 12:15AM UTC coverage: 77.793% (+0.06%) from 77.73%
1549

push

buildkite

web-flow
[JuliaLowering] Special-case macrocall in `do` (#61912)

Equivalent to #27538, fix
https://github.com/aviatesk/JETLS.jl/issues/717

65680 of 84429 relevant lines covered (77.79%)

21736414.26 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.91
/Compiler/src/optimize.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
#############
4
# constants #
5
#############
6

7
# The slot has uses that are not statically dominated by any assignment
8
# This is implied by `SLOT_USEDUNDEF`.
9
# If this is not set, all the uses are (statically) dominated by the defs.
10
# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
11
const SLOT_STATICUNDEF  = 1 # slot might be used before it is defined (structurally)
12
const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
13
const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
14
# const SLOT_CALLED      = 64
15

16
# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
17

18
const IR_FLAG_NULL        = zero(UInt32)
19
# This statement is marked as @inbounds by user.
20
# If replaced by inlining, any contained boundschecks may be removed.
21
const IR_FLAG_INBOUNDS    = one(UInt32) << 0
22
# This statement is marked as @inline by user
23
const IR_FLAG_INLINE      = one(UInt32) << 1
24
# This statement is marked as @noinline by user
25
const IR_FLAG_NOINLINE    = one(UInt32) << 2
26
# This statement is proven :consistent
27
const IR_FLAG_CONSISTENT  = one(UInt32) << 3
28
# This statement is proven :effect_free
29
const IR_FLAG_EFFECT_FREE = one(UInt32) << 4
30
# This statement is proven :nothrow
31
const IR_FLAG_NOTHROW     = one(UInt32) << 5
32
# This statement is proven :terminates_globally
33
const IR_FLAG_TERMINATES  = one(UInt32) << 6
34
#const IR_FLAG_TERMINATES_LOCALLY = one(UInt32) << 7
35
#const IR_FLAG_NOTASKSTATE = one(UInt32) << 8
36
#const IR_FLAG_INACCESSIBLEMEM = one(UInt32) << 9
37
const IR_FLAG_NOUB        = one(UInt32) << 10
38
#const IR_FLAG_NOUBINIB   = one(UInt32) << 11
39
#const IR_FLAG_CONSISTENTOVERLAY = one(UInt32) << 12
40
# This statement is :nortcall
41
const IR_FLAG_NORTCALL = one(UInt32) << 13
42
# An optimization pass has updated this statement in a way that may
43
# have exposed information that inference did not see. Re-running
44
# inference on this statement may be profitable.
45
const IR_FLAG_REFINED     = one(UInt32) << 16
46
# This statement has no users and may be deleted if flags get refined to IR_FLAGS_REMOVABLE
47
const IR_FLAG_UNUSED      = one(UInt32) << 17
48
# TODO: Both of these next two should eventually go away once
49
# This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
50
const IR_FLAG_EFIIMO      = one(UInt32) << 18
51
# This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
52
const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 19
53

54
const NUM_IR_FLAGS = 3 # sync with julia.h
55

56
const IR_FLAGS_EFFECTS =
57
    IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW |
58
    IR_FLAG_TERMINATES | IR_FLAG_NOUB | IR_FLAG_NORTCALL
59

60
const IR_FLAGS_REMOVABLE = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_TERMINATES
61

62
const IR_FLAGS_NEEDS_EA = IR_FLAG_EFIIMO | IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
63

64
has_flag(curr::UInt32, flag::UInt32) = (curr & flag) == flag
206,949,979✔
65

66
function iscallstmt(@nospecialize stmt)
67
    stmt isa Expr || return false
6,492,178✔
68
    head = stmt.head
4,377,678✔
69
    return head === :call || head === :invoke || head === :foreigncall
8,092,803✔
70
end
71

72
function flags_for_effects(effects::Effects)
73
    flags = zero(UInt32)
885,397✔
74
    if is_consistent(effects)
4,287,164✔
75
        flags |= IR_FLAG_CONSISTENT
791,788✔
76
    end
77
    if is_effect_free(effects)
4,287,164✔
78
        flags |= IR_FLAG_EFFECT_FREE
3,127,254✔
79
    elseif is_effect_free_if_inaccessiblememonly(effects)
1,159,910✔
80
        flags |= IR_FLAG_EFIIMO
16,137✔
81
    end
82
    if is_nothrow(effects)
4,287,164✔
83
        flags |= IR_FLAG_NOTHROW
3,905,511✔
84
    end
85
    if is_terminates(effects)
4,287,164✔
86
        flags |= IR_FLAG_TERMINATES
4,122,293✔
87
    end
88
    if is_inaccessiblemem_or_argmemonly(effects)
4,287,164✔
89
        flags |= IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
278,541✔
90
    end
91
    if is_noub(effects)
4,287,164✔
92
        flags |= IR_FLAG_NOUB
4,045,765✔
93
    end
94
    if is_nortcall(effects)
4,287,164✔
95
        flags |= IR_FLAG_NORTCALL
4,108,103✔
96
    end
97
    return flags
4,287,164✔
98
end
99

100
const TOP_TUPLE = GlobalRef(Core, :tuple)
101

102
inlining_cost(@nospecialize src) =
1,624,272✔
103
    src isa Union{MaybeCompressed,UInt8} ? ccall(:jl_ir_inlining_cost, InlineCostType, (Any,), src) : MAX_INLINE_COST
104
is_inlineable(@nospecialize src) = inlining_cost(src) != MAX_INLINE_COST
1,340,428✔
105
set_inlineable!(src::CodeInfo, val::Bool) =
8,128✔
106
    src.inlining_cost = (val ? MIN_INLINE_COST : MAX_INLINE_COST)
107

108
function inline_cost_clamp(x::Int)
109
    x > MAX_INLINE_COST && return MAX_INLINE_COST
85,991✔
110
    x < MIN_INLINE_COST && return MIN_INLINE_COST
85,991✔
111
    x = ccall(:jl_encode_inlining_cost, UInt8, (InlineCostType,), x)
29,568✔
112
    x = ccall(:jl_decode_inlining_cost, InlineCostType, (UInt8,), x)
29,568✔
113
    return x
29,568✔
114
end
115

116
const SRC_FLAG_DECLARED_INLINE = 0x1
117
const SRC_FLAG_DECLARED_NOINLINE = 0x2
118

119
is_declared_inline(@nospecialize src::MaybeCompressed) =
739,227✔
120
    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_INLINE
121

122
is_declared_noinline(@nospecialize src::MaybeCompressed) =
144✔
123
    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_NOINLINE
124

125
#####################
126
# OptimizationState #
127
#####################
128

129
# return whether this src should be inlined. If so, retrieve_ir_for_inlining must return an IRCode from it
130

131
function src_inlining_policy(interp::AbstractInterpreter, mi::MethodInstance,
132
    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
133
    # If we have a generator, but we can't invoke it (because argument type information is lacking),
134
    # don't inline so we defer its invocation to runtime where we'll have precise type information.
135
    if isa(mi.def, Method) && hasgenerator(mi)
733,422✔
136
        may_invoke_generator(mi) || return false
1,249✔
137
    end
138
    return src_inlining_policy(interp, src, info, stmt_flag)
771,075✔
139
end
140

141
function src_inlining_policy(::AbstractInterpreter,
657,245✔
142
    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
143
    isa(src, OptimizationState) && (src = src.src)
737,205✔
144
    if isa(src, MaybeCompressed)
737,205✔
145
        src_inlineable = is_stmt_inline(stmt_flag) || is_inlineable(src)
1,430,436✔
146
        return src_inlineable
736,364✔
147
    elseif isa(src, IRCode)
841✔
148
        return true
144✔
149
    end
150
    @assert !isa(src, CodeInstance) # handled by caller
697✔
151
    return false
697✔
152
end
153

154
struct InliningState{Interp<:AbstractInterpreter}
155
    edges::Vector{Any}
221,914✔
156
    interp::Interp
157
    opt_cache::IdDict{MethodInstance,CodeInstance}
158
end
159
function InliningState(sv::InferenceState, interp::AbstractInterpreter,
160
                       opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
161
    return InliningState(sv.edges, interp, opt_cache)
221,896✔
162
end
163
function InliningState(interp::AbstractInterpreter,
9✔
164
                       opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
165
    return InliningState(Any[], interp, opt_cache)
27✔
166
end
167

168
struct OptimizerCache{CodeCache}
169
    cache::CodeCache
170
    opt_cache::IdDict{MethodInstance,CodeInstance}
171
    function OptimizerCache(
172
        cache::CodeCache,
173
        opt_cache::IdDict{MethodInstance,CodeInstance}) where CodeCache
174
        return new{CodeCache}(cache, opt_cache)
26,168✔
175
    end
176
end
177
function get((; cache, opt_cache)::OptimizerCache, mi::MethodInstance, default)
178
    if haskey(opt_cache, mi)
29,058✔
179
        return opt_cache[mi] # this is incomplete right now, but will be finished (by finish_cycle) before caching anything
1,347✔
180
    end
181
    return get(cache, mi, default)
26,364✔
182
end
183

184
# get `code_cache(::AbstractInterpreter)` from `state::InliningState`
185
function code_cache(state::InliningState)
186
    cache = code_cache(state.interp)
27,711✔
187
    return OptimizerCache(cache, state.opt_cache)
27,711✔
188
end
189

190
mutable struct OptimizationResult
191
    ir::IRCode
223,735✔
192
    inline_flag::UInt8
193
    simplified::Bool # indicates whether the IR was processed with `cfg_simplify!`
194
end
195

196
function simplify_ir!(result::OptimizationResult)
197
    result.ir = cfg_simplify!(result.ir)
141,759✔
198
    result.simplified = true
141,759✔
199
end
200

201
mutable struct OptimizationState{Interp<:AbstractInterpreter}
202
    linfo::MethodInstance
221,905✔
203
    src::CodeInfo
204
    optresult::Union{Nothing, OptimizationResult}
205
    stmt_info::Vector{CallInfo}
206
    mod::Module
207
    sptypes::Vector{VarState}
208
    slottypes::Vector{Any}
209
    inlining::InliningState{Interp}
210
    cfg::CFG
211
    unreachable::BitSet
212
    bb_states::Vector{Union{Nothing,BBEntryState}}
213
    insert_coverage::Bool
214
end
215
function OptimizationState(sv::InferenceState, interp::AbstractInterpreter,
216
                           opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
217
    inlining = InliningState(sv, interp, opt_cache)
233,430✔
218
    return OptimizationState(sv.linfo, sv.src, nothing, sv.stmt_info, sv.mod,
221,896✔
219
                             sv.sptypes, sv.slottypes, inlining, sv.cfg,
220
                             sv.unreachable, sv.bb_states, sv.insert_coverage)
221
end
222
function OptimizationState(mi::MethodInstance, src::CodeInfo, interp::AbstractInterpreter,
9✔
223
                           opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
224
    # prepare src for running optimization passes if it isn't already
225
    nssavalues = src.ssavaluetypes
18✔
226
    if nssavalues isa Int
9✔
227
        src.ssavaluetypes = Any[ Any for _ = 1:nssavalues ]
69✔
228
    else
229
        nssavalues = length(src.ssavaluetypes::Vector{Any})
×
230
    end
231
    sptypes = sptypes_from_meth_instance(mi)
9✔
232
    nslots = length(src.slotflags)
9✔
233
    slottypes = src.slottypes
9✔
234
    if slottypes === nothing
9✔
235
        slottypes = Any[ Any for _ = 1:nslots ]
24✔
236
    end
237
    stmt_info = CallInfo[ NoCallInfo() for _ = 1:nssavalues ]
69✔
238
    # cache some useful state computations
239
    def = mi.def
9✔
240
    mod = isa(def, Method) ? def.module : def
12✔
241
    # Allow using the global MI cache, but don't track edges.
242
    # This method is mostly used for unit testing the optimizer
243
    inlining = InliningState(interp, opt_cache)
9✔
244
    cfg = compute_basic_blocks(src.code)
9✔
245
    unreachable = BitSet()
9✔
246
    nbbstate = zeros(Int, nslots)
36✔
247
    bb_states = Union{BBEntryState,Nothing}[
30✔
248
        BBEntryState(VarState[
249
            VarState(slottypes[slot], typemin(Int), src.slotflags[slot] & SLOT_USEDUNDEF != 0)
250
            for slot = 1:nslots
251
        ], nbbstate)
252
        for _ = 1:length(cfg.blocks)]
253
    return OptimizationState(mi, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, cfg, unreachable, bb_states, false)
9✔
254
end
255
function OptimizationState(mi::MethodInstance, interp::AbstractInterpreter)
6✔
256
    world = get_inference_world(interp)
6✔
257
    src = retrieve_code_info(mi, world)
6✔
258
    src === nothing && return nothing
6✔
259
    return OptimizationState(mi, src, interp)
6✔
260
end
261

262
function argextype end # imported by EscapeAnalysis
263
function try_compute_field end # imported by EscapeAnalysis
264

265
include("ssair/heap.jl")
266
include("ssair/slot2ssa.jl")
267
include("ssair/inlining.jl")
268
include("ssair/verify.jl")
269
include("ssair/legacy.jl")
270
include("ssair/EscapeAnalysis.jl")
271
include("ssair/passes.jl")
272
include("ssair/irinterp.jl")
273

274
function ir_to_codeinf!(opt::OptimizationState, frame::InferenceState, edges::SimpleVector)
11,264✔
275
    ir_to_codeinf!(opt, edges, compute_inlining_cost(frame.interp, frame.result, opt.optresult))
11,264✔
276
end
277

278
function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector, inlining_cost::InlineCostType)
×
279
    src = ir_to_codeinf!(opt, edges)
22,528✔
280
    src.inlining_cost = inlining_cost
11,264✔
281
    src
11,264✔
282
end
283

284
function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector)
285
    src = ir_to_codeinf!(opt)
25,801✔
286
    src.edges = edges
14,537✔
287
    src
4,297✔
288
end
289

290
function ir_to_codeinf!(opt::OptimizationState)
291
    (; linfo, src, optresult) = opt
77,247✔
292
    if optresult === nothing
77,247✔
293
        return src
3,273✔
294
    end
295
    src = ir_to_codeinf!(src, optresult.ir)
73,974✔
296
    opt.optresult = nothing
73,974✔
297
    opt.src = src
73,974✔
298
    maybe_validate_code(linfo, src, "optimized")
73,974✔
299
    return src
73,974✔
300
end
301

302
function ir_to_codeinf!(src::CodeInfo, ir::IRCode)
303
    replace_code_newstyle!(src, ir)
74,049✔
304
    widen_all_consts!(src)
74,049✔
305
    return src
14,346✔
306
end
307

308
# widen all Const elements in type annotations
309
function widen_all_consts!(src::CodeInfo)
14,346✔
310
    ssavaluetypes = src.ssavaluetypes::Vector{Any}
14,346✔
311
    for i = 1:length(ssavaluetypes)
28,692✔
312
        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
2,555,797✔
313
    end
5,097,248✔
314

315
    for i = 1:length(src.code)
28,692✔
316
        x = src.code[i]
2,555,797✔
317
        if isa(x, PiNode)
2,555,797✔
318
            src.code[i] = PiNode(x.val, widenconst(x.typ))
13,324✔
319
        end
320
    end
5,097,248✔
321

322
    return src
14,346✔
323
end
324

325
#########
326
# logic #
327
#########
328

329
_topmod(sv::OptimizationState) = _topmod(sv.mod)
×
330

331
is_stmt_inline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_INLINE)
1,619,005✔
332
is_stmt_noinline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_NOINLINE)
780,504✔
333

334
function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
5,733✔
335
    Targ = args[1]
11,334✔
336
    atyp = argextype(Targ, src)
5,733✔
337
    # `Expr(:new)` of unknown type could raise arbitrary TypeError.
338
    typ, isexact = instanceof_tfunc(atyp, true)
5,733✔
339
    if !isexact
5,733✔
340
        atyp = unwrap_unionall(widenconst(atyp))
185✔
341
        if isType(atyp) && isTypeDataType(atyp.parameters[1])
364✔
342
            typ = atyp.parameters[1]
179✔
343
        else
344
            return (false, false, false)
6✔
345
        end
346
        isabstracttype(typ) && return (false, false, false)
179✔
347
    else
348
        isconcretedispatch(typ) || return (false, false, false)
5,548✔
349
    end
350
    typ = typ::DataType
5,727✔
351
    fcount = datatype_fieldcount(typ)
5,727✔
352
    fcount === nothing && return (false, false, false)
5,727✔
353
    fcount >= length(args) - 1 || return (false, false, false)
5,727✔
354
    for fidx in 1:(length(args) - 1)
11,448✔
355
        farg = args[fidx + 1]
10,643✔
356
        eT = argextype(farg, src)
10,643✔
357
        fT = fieldtype(typ, fidx)
10,643✔
358
        if !isexact && has_free_typevars(fT)
10,643✔
359
            if pattern_match !== nothing && pattern_match(src, typ, fidx, Targ, farg)
167✔
360
                continue
6✔
361
            end
362
            return (false, false, false)
161✔
363
        end
364
        ⊑(𝕃ₒ, eT, fT) || return (false, false, false)
10,476✔
365
    end
15,404✔
366
    return (false, true, true)
5,566✔
367
end
368

369
# Returns a tuple of `(:consistent, :removable, :nothrow)` flags for a given statement.
370
function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
1,124,331✔
371
    # TODO: We're duplicating analysis from inference here.
372
    isa(stmt, PiNode) && return (true, true, true)
1,124,331✔
373
    isa(stmt, PhiNode) && return (true, true, true)
1,071,768✔
374
    isa(stmt, ReturnNode) && return (true, false, true)
1,049,597✔
375
    isa(stmt, EnterNode) && return (true, false, true)
936,028✔
376
    isa(stmt, GotoNode) && return (true, false, true)
933,543✔
377
    isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
921,807✔
378
    if isa(stmt, GlobalRef)
905,573✔
379
        # Modeled more precisely in abstract_eval_globalref. In general, if a
380
        # GlobalRef was moved to statement position, it is probably not `const`,
381
        # so we can't say much about it anyway.
382
        return (false, false, false)
15✔
383
    elseif isa(stmt, Expr)
905,558✔
384
        (; head, args) = stmt
684,365✔
385
        if head === :static_parameter
684,365✔
386
            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
387
            sptypes = isa(src, IRCode) ? src.sptypes : src.ir.sptypes
58✔
388
            nothrow = !sptypes[args[1]::Int].undef
58✔
389
            return (true, nothrow, nothrow)
58✔
390
        end
391
        if head === :call
684,307✔
392
            f = argextype(args[1], src)
298,231✔
393
            f = singleton_type(f)
298,231✔
394
            f === nothing && return (false, false, false)
298,231✔
395
            if f === Intrinsics.cglobal || f === Intrinsics.llvmcall
595,018✔
396
                # TODO: these are not yet linearized
397
                return (false, false, false)
×
398
            end
399
            isa(f, Builtin) || return (false, false, false)
511,371✔
400
            # Needs to be handled in inlining to look at the callee effects
401
            f === Core._apply_iterate && return (false, false, false)
83,647✔
402
            argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
163,532✔
403
            effects = builtin_effects(𝕃ₒ, f, argtypes, rt)
83,647✔
404
            consistent = is_consistent(effects)
83,647✔
405
            effect_free = is_effect_free(effects)
83,647✔
406
            nothrow = is_nothrow(effects)
83,647✔
407
            terminates = is_terminates(effects)
83,647✔
408
            removable = effect_free & nothrow & terminates
83,647✔
409
            return (consistent, removable, nothrow)
83,647✔
410
        elseif head === :new
386,076✔
411
            return new_expr_effect_flags(𝕃ₒ, args, src)
5,601✔
412
        elseif head === :foreigncall
380,475✔
413
            effects = foreigncall_effects(stmt) do @nospecialize x
800✔
414
                argextype(x, src)
415
            end
416
            consistent = is_consistent(effects)
800✔
417
            effect_free = is_effect_free(effects)
800✔
418
            nothrow = is_nothrow(effects)
800✔
419
            terminates = is_terminates(effects)
800✔
420
            removable = effect_free & nothrow & terminates
800✔
421
            return (consistent, removable, nothrow)
800✔
422
        elseif head === :new_opaque_closure
379,675✔
423
            length(args) < 4 && return (false, false, false)
×
424
            typ = argextype(args[1], src)
×
425
            typ, isexact = instanceof_tfunc(typ, true)
×
426
            isexact || return (false, false, false)
×
427
            ⊑(𝕃ₒ, typ, Tuple) || return (false, false, false)
×
428
            rt_lb = argextype(args[2], src)
×
429
            rt_ub = argextype(args[3], src)
×
430
            source = argextype(args[5], src)
×
431
            if !(⊑(𝕃ₒ, rt_lb, Type) && ⊑(𝕃ₒ, rt_ub, Type) && ⊑(𝕃ₒ, source, Method))
×
432
                return (false, false, false)
×
433
            end
434
            return (false, true, true)
×
435
        elseif head === :inbounds
379,675✔
436
            return (true, true, true)
×
437
        elseif head === :boundscheck || head === :isdefined || head === :the_exception || head === :copyast
758,434✔
438
            return (false, true, true)
1,152✔
439
        else
440
            # e.g. :loopinfo
441
            return (false, false, false)
378,523✔
442
        end
443
    end
444
    isa(stmt, SlotNumber) && error("unexpected IR elements")
221,193✔
445
    return (true, true, true)
221,193✔
446
end
447

448
function recompute_effects_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt),
449
                                 src::Union{IRCode,IncrementalCompact})
450
    flag = IR_FLAG_NULL
1,124,331✔
451
    (consistent, removable, nothrow) = stmt_effect_flags(𝕃ₒ, stmt, rt, src)
2,080,471✔
452
    if consistent
2,080,471✔
453
        flag |= IR_FLAG_CONSISTENT
518,440✔
454
    end
455
    if removable
2,080,471✔
456
        flag |= IR_FLAGS_REMOVABLE
629,363✔
457
    elseif nothrow
1,451,108✔
458
        flag |= IR_FLAG_NOTHROW
326,865✔
459
    end
460
    if !iscallstmt(stmt)
2,715,024✔
461
        # There is a bit of a subtle point here, which is that some non-call
462
        # statements (e.g. PiNode) can be UB:, however, we consider it
463
        # illegal to introduce such statements that actually cause UB (for any
464
        # input). Ideally that'd be handled at insertion time (TODO), but for
465
        # the time being just do that here.
466
        flag |= IR_FLAG_NOUB
1,422,140✔
467
    end
468
    return flag
2,080,471✔
469
end
470

471
"""
472
    argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
473
    argextype(x, src::CodeInfo, sptypes::Vector{VarState}) -> t
474

475
Return the type of value `x` in the context of inferred source `src`.
476
Note that `t` might be an extended lattice element.
477
Use `widenconst(t)` to get the native Julia type of `x`.
478
"""
479
argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{VarState} = ir.sptypes) =
4,822,441✔
480
    argextype(x, ir, sptypes, ir.argtypes)
481
function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{VarState} = compact.ir.sptypes)
5,911✔
482
    isa(x, AnySSAValue) && return types(compact)[x]
11,848,136✔
483
    return argextype(x, compact, sptypes, compact.ir.argtypes)
5,027,861✔
484
end
485
function argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{VarState})
×
486
    return argextype(x, src, sptypes, src.slottypes::Union{Vector{Any},Nothing})
4,554✔
487
end
488
function argextype(
6,712,514✔
489
    @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
490
    sptypes::Vector{VarState}, slottypes::Union{Vector{Any},Nothing})
491
    if isa(x, Expr)
6,752,166✔
492
        if x.head === :static_parameter
×
493
            idx = x.args[1]::Int
×
494
            (1 ≤ idx ≤ length(sptypes)) || throw(InvalidIRError())
×
495
            return sptypes[idx].typ
×
496
        elseif x.head === :boundscheck
×
497
            return Bool
×
498
        elseif x.head === :copyast
×
499
            length(x.args) == 0 && throw(InvalidIRError())
×
500
            return argextype(x.args[1], src, sptypes, slottypes)
×
501
        end
502
        Core.println("argextype called on Expr with head ", x.head,
×
503
                     " which is not valid for IR in argument-position.")
504
        @assert false
×
505
    elseif isa(x, SlotNumber)
6,752,166✔
506
        slottypes === nothing && return Any
×
507
        (1 ≤ x.id ≤ length(slottypes)) || throw(InvalidIRError())
×
508
        return slottypes[x.id]
×
509
    elseif isa(x, SSAValue)
6,752,166✔
510
        return abstract_eval_ssavalue(x, src)
759,065✔
511
    elseif isa(x, Argument)
6,469,219✔
512
        slottypes === nothing && return Any
304,319✔
513
        (1 ≤ x.n ≤ length(slottypes)) || throw(InvalidIRError())
305,018✔
514
        return slottypes[x.n]
305,018✔
515
    elseif isa(x, QuoteNode)
6,164,900✔
516
        return Const(x.value)
52,962✔
517
    elseif isa(x, GlobalRef)
6,111,938✔
518
        return abstract_eval_globalref_type(x, src)
5,488,861✔
519
    elseif isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode)
1,246,145✔
520
        return Any
×
521
    elseif isa(x, PiNode)
623,077✔
522
        return x.typ
×
523
    else
524
        return Const(x)
623,077✔
525
    end
526
end
527
function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo)
×
528
    ssavaluetypes = src.ssavaluetypes
6✔
529
    if ssavaluetypes isa Int
6✔
530
        (1 ≤ s.id ≤ ssavaluetypes) || throw(InvalidIRError())
×
531
        return Any
×
532
    else
533
        return abstract_eval_ssavalue(s, ssavaluetypes::Vector{Any})
6✔
534
    end
535
end
536
abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
760,407✔
537

538
"""
539
    finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
540

541
Called at the end of optimization to store the resulting IR back into the OptimizationState.
542
"""
543
function finishopt!(::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
2,206✔
544
    opt.optresult = OptimizationResult(ir, ccall(:jl_ir_flag_inlining, UInt8, (Any,), opt.src), false)
223,735✔
545
    return nothing
213,514✔
546
end
547

548
function visit_bb_phis!(callback, ir::IRCode, bb::Int)
549
    stmts = ir.cfg.blocks[bb].stmts
×
550
    for idx in stmts
×
551
        stmt = ir[SSAValue(idx)][:stmt]
×
552
        if !isa(stmt, PhiNode)
×
553
            if !is_valid_phiblock_stmt(stmt)
×
554
                return
×
555
            end
556
        else
557
            callback(idx)
×
558
        end
559
    end
×
560
end
561

562
function any_stmt_may_throw(ir::IRCode, bb::Int)
563
    for idx in ir.cfg.blocks[bb].stmts
29,798✔
564
        if !has_flag(ir[SSAValue(idx)], IR_FLAG_NOTHROW)
15,287✔
565
            return true
14,571✔
566
        end
567
    end
1,104✔
568
    return false
328✔
569
end
570

571
visit_conditional_successors(callback, ir::IRCode, bb::Int) = # used for test
9✔
572
    visit_conditional_successors(callback, LazyPostDomtree(ir), ir, bb)
573
function visit_conditional_successors(callback, lazypostdomtree::LazyPostDomtree, ir::IRCode, bb::Int)
14,580✔
574
    visited = BitSet((bb,))
14,580✔
575
    worklist = Int[bb]
14,580✔
576
    while !isempty(worklist)
14,730✔
577
        thisbb = popfirst!(worklist)
14,721✔
578
        for succ in ir.cfg.blocks[thisbb].succs
14,721✔
579
            succ in visited && continue
15,322✔
580
            push!(visited, succ)
15,322✔
581
            if postdominates(get!(lazypostdomtree), succ, bb)
20,880✔
582
                # this successor is not conditional, so no need to visit it further
583
                continue
399✔
584
            elseif callback(succ)
15,639✔
585
                return true
14,571✔
586
            else
587
                push!(worklist, succ)
352✔
588
            end
589
        end
751✔
590
    end
150✔
591
    return false
9✔
592
end
593

594
struct AugmentedDomtree
595
    cfg::CFG
×
596
    domtree::DomTree
597
end
598

599
mutable struct LazyAugmentedDomtree
600
    const ir::IRCode
601
    agdomtree::AugmentedDomtree
602
    LazyAugmentedDomtree(ir::IRCode) = new(ir)
210,312✔
603
end
604

605
function get!(lazyagdomtree::LazyAugmentedDomtree)
×
606
    isdefined(lazyagdomtree, :agdomtree) && return lazyagdomtree.agdomtree
×
607
    ir = lazyagdomtree.ir
×
608
    cfg = copy(ir.cfg)
×
609
    # Add a virtual basic block to represent the exit
610
    push!(cfg.blocks, BasicBlock(StmtRange(0:-1)))
×
611
    for bb = 1:(length(cfg.blocks)-1)
×
612
        terminator = ir[SSAValue(last(cfg.blocks[bb].stmts))][:stmt]
×
613
        if isa(terminator, ReturnNode) && isdefined(terminator, :val)
×
614
            cfg_insert_edge!(cfg, bb, length(cfg.blocks))
×
615
        end
616
    end
×
617
    domtree = construct_domtree(cfg)
×
618
    return lazyagdomtree.agdomtree = AugmentedDomtree(cfg, domtree)
×
619
end
620

621
mutable struct PostOptAnalysisState
622
    const result::InferenceResult
623
    const ir::IRCode
624
    const inconsistent::BitSetBoundedMinPrioritySet
625
    const tpdum::TwoPhaseDefUseMap
626
    const lazypostdomtree::LazyPostDomtree
627
    const lazyagdomtree::LazyAugmentedDomtree
628
    const ea_analysis_pending::Vector{Int}
629
    all_retpaths_consistent::Bool
630
    all_effect_free::Bool
631
    effect_free_if_argmem_only::Union{Nothing,Bool}
632
    all_nothrow::Bool
633
    all_noub::Bool
634
    any_conditional_ub::Bool
635
    nortcall::Bool
636
    function PostOptAnalysisState(result::InferenceResult, ir::IRCode)
637
        inconsistent = BitSetBoundedMinPrioritySet(length(ir.stmts))
210,312✔
638
        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
14,805,351✔
639
        lazypostdomtree = LazyPostDomtree(ir)
210,312✔
640
        lazyagdomtree = LazyAugmentedDomtree(ir)
210,312✔
641
        return new(result, ir, inconsistent, tpdum, lazypostdomtree, lazyagdomtree, Int[],
210,312✔
642
                   true, true, nothing, true, true, false, true)
643
    end
644
end
645

646
give_up_refinements!(sv::PostOptAnalysisState) =
2,406✔
647
    sv.all_retpaths_consistent = sv.all_effect_free = sv.effect_free_if_argmem_only =
648
    sv.all_nothrow = sv.all_noub = sv.nortcall = false
649

650
function any_refinable(sv::PostOptAnalysisState)
651
    effects = sv.result.ipo_effects
224,893✔
652
    return ((!is_consistent(effects) & sv.all_retpaths_consistent) |
224,893✔
653
            (!is_effect_free(effects) & sv.all_effect_free) |
654
            (!is_nothrow(effects) & sv.all_nothrow) |
655
            (!is_noub(effects) & sv.all_noub) |
656
            (!is_nortcall(effects) & sv.nortcall))
657
end
658

659
struct GetNativeEscapeCache{CodeCache}
660
    code_cache::CodeCache
661
    GetNativeEscapeCache(code_cache::CodeCache) where CodeCache = new{CodeCache}(code_cache)
×
662
end
663
GetNativeEscapeCache(interp::AbstractInterpreter) = GetNativeEscapeCache(code_cache(interp))
×
664
function ((; code_cache)::GetNativeEscapeCache)(codeinst::Union{CodeInstance,MethodInstance})
×
665
    if codeinst isa MethodInstance
×
666
        codeinst = get(code_cache, codeinst, nothing)
×
667
        codeinst === nothing && return false
×
668
    end
669
    argescapes = traverse_analysis_results(codeinst) do @nospecialize result
×
670
        return result isa EscapeAnalysis.ArgEscapeCache ? result : nothing
×
671
    end
672
    if argescapes !== nothing
×
673
        return argescapes
×
674
    end
675
    effects = codeinst isa CodeInstance ? decode_effects(codeinst.ipo_purity_bits) : codeinst.ipo_effects
×
676
    if is_effect_free(effects) && is_inaccessiblememonly(effects)
×
677
        # We might not have run EA on simple frames without any escapes (e.g. when optimization
678
        # is skipped when result is constant-folded by abstract interpretation). If those
679
        # frames aren't inlined, the accuracy of EA for caller context takes a big hit.
680
        # This is a HACK to avoid that, but obviously, a more comprehensive fix would be ideal.
681
        return true
×
682
    end
683
    return false
×
684
end
685

686
function refine_effects!(interp::AbstractInterpreter, opt::OptimizationState, sv::PostOptAnalysisState)
210,312✔
687
    if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
210,312✔
688
        ir = sv.ir
×
689
        nargs = Int(opt.src.nargs)
×
690
        estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), get_escape_cache(interp))
×
691
        argescapes = EscapeAnalysis.ArgEscapeCache(estate)
×
692
        stack_analysis_result!(sv.result, argescapes)
×
693
        validate_mutable_arg_escapes!(estate, sv)
×
694
    end
695

696
    any_refinable(sv) || return false
418,063✔
697
    effects = sv.result.ipo_effects
2,561✔
698
    sv.result.ipo_effects = Effects(effects;
2,561✔
699
        consistent = sv.all_retpaths_consistent ? ALWAYS_TRUE : effects.consistent,
700
        effect_free = sv.all_effect_free ? ALWAYS_TRUE :
701
                      sv.effect_free_if_argmem_only === true ? EFFECT_FREE_IF_INACCESSIBLEMEMONLY : effects.effect_free,
702
        nothrow = sv.all_nothrow ? true : effects.nothrow,
703
        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub,
704
        nortcall = sv.nortcall ? true : effects.nortcall)
705
    return true
2,561✔
706
end
707

708
function is_ipo_dataflow_analysis_profitable(effects::Effects)
709
    return !(is_consistent(effects) && is_effect_free(effects) &&
211,347✔
710
             is_nothrow(effects) && is_noub(effects))
711
end
712

713
function iscall_with_boundscheck(@nospecialize(stmt), sv::PostOptAnalysisState)
714
    isexpr(stmt, :call) || return false
7,287,952✔
715
    ft = argextype(stmt.args[1], sv.ir)
297,334✔
716
    f = singleton_type(ft)
297,334✔
717
    f === nothing && return false
297,334✔
718
    if f === getfield
297,328✔
719
        nargs = 4
83,539✔
720
    elseif f === memoryrefnew
213,789✔
721
        nargs= 3
22,390✔
722
    elseif f === memoryrefget || f === memoryref_isassigned
380,855✔
723
        nargs = 4
1,985✔
724
    elseif f === memoryrefset!
189,414✔
725
        nargs = 5
12,220✔
726
    elseif f === memoryrefunset!
177,194✔
727
        nargs = 4
174✔
728
    else
729
        return false
177,020✔
730
    end
731
    length(stmt.args) < nargs && return false
120,308✔
732
    boundscheck = stmt.args[end]
34,236✔
733
    argextype(boundscheck, sv.ir) === Bool || return false
62,350✔
734
    isa(boundscheck, SSAValue) || return false
6,122✔
735
    return true
6,122✔
736
end
737

738
function check_all_args_noescape!(sv::PostOptAnalysisState, ir::IRCode, @nospecialize(stmt),
×
739
                                  estate::EscapeAnalysis.EscapeState)
740
    stmt isa Expr || return false
×
741
    if isexpr(stmt, :invoke)
×
742
        startidx = 2
×
743
    elseif isexpr(stmt, :new)
×
744
        startidx = 1
×
745
    else
746
        return false
×
747
    end
748
    has_no_escape(x::EscapeAnalysis.EscapeInfo) =
×
749
        EscapeAnalysis.has_no_escape(EscapeAnalysis.ignore_argescape(x))
750
    for i = startidx:length(stmt.args)
×
751
        arg = stmt.args[i]
×
752
        argt = argextype(arg, ir)
×
753
        if is_mutation_free_argtype(argt)
×
754
            continue
×
755
        end
756
        # See if we can find the allocation
757
        if isa(arg, Argument)
×
758
            if has_no_escape(estate[arg])
×
759
                # Even if we prove everything else effect_free, the best we can
760
                # say is :effect_free_if_argmem_only
761
                if sv.effect_free_if_argmem_only === nothing
×
762
                    sv.effect_free_if_argmem_only = true
×
763
                end
764
            else
765
                sv.effect_free_if_argmem_only = false
×
766
            end
767
            return false
×
768
        elseif isa(arg, SSAValue)
×
769
            has_no_escape(estate[arg]) || return false
×
770
            check_all_args_noescape!(sv, ir, ir[arg][:stmt], estate) || return false
×
771
        else
772
            return false
×
773
        end
774
    end
×
775
    return true
×
776
end
777

778
function validate_mutable_arg_escapes!(estate::EscapeAnalysis.EscapeState, sv::PostOptAnalysisState)
779
    ir = sv.ir
×
780
    for idx in sv.ea_analysis_pending
×
781
        # See if any mutable memory was allocated in this function and determined
782
        # not to escape.
783
        inst = ir[SSAValue(idx)]
×
784
        stmt = inst[:stmt]
×
785
        if !check_all_args_noescape!(sv, ir, stmt, estate)
×
786
            return sv.all_effect_free = false
×
787
        end
788
    end
×
789
    return true
×
790
end
791

792
function is_conditional_noub(inst::Instruction, sv::PostOptAnalysisState)
793
    stmt = inst[:stmt]
65,007✔
794
    iscall_with_boundscheck(stmt, sv) || return false
127,877✔
795
    barg = stmt.args[end]::SSAValue
2,137✔
796
    bstmt = sv.ir[barg][:stmt]
2,137✔
797
    isexpr(bstmt, :boundscheck) || return false
2,137✔
798
    # If IR_FLAG_INBOUNDS is already set, no more conditional ub
799
    (!isempty(bstmt.args) && bstmt.args[1] === false) && return false
2,137✔
800
    return true
1,607✔
801
end
802

803
function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState)
3,742,217✔
804
    flag = inst[:flag]
3,742,217✔
805
    # If we can prove that the argmem does not escape the current function, we can
806
    # refine this to :effect_free.
807
    needs_ea_validation = has_flag(flag, IR_FLAGS_NEEDS_EA)
3,742,217✔
808
    stmt = inst[:stmt]
3,742,217✔
809
    if !needs_ea_validation
3,742,217✔
810
        if !isterminator(stmt) && stmt !== nothing
7,396,978✔
811
            # ignore control flow node – they are not removable on their own and thus not
812
            # have `IR_FLAG_EFFECT_FREE` but still do not taint `:effect_free`-ness of
813
            # the whole method invocation
814
            sv.all_effect_free &= has_flag(flag, IR_FLAG_EFFECT_FREE)
3,460,669✔
815
        end
816
    elseif sv.all_effect_free
13,191✔
817
        if (isexpr(stmt, :invoke) || isexpr(stmt, :new) ||
×
818
            # HACK for performance: limit the scope of EA to code with object field access only,
819
            # since its abilities to reason about e.g. arrays are currently very limited anyways.
820
            is_known_call(stmt, setfield!, sv.ir))
821
            push!(sv.ea_analysis_pending, inst.idx)
×
822
        else
823
            sv.all_effect_free = false
×
824
        end
825
    end
826
    sv.all_nothrow &= has_flag(flag, IR_FLAG_NOTHROW)
3,742,217✔
827
    if !has_flag(flag, IR_FLAG_NOUB)
3,742,217✔
828
        # Special case: `:boundscheck` into `getfield` or memory operations is `:noub_if_noinbounds`
829
        if is_conditional_noub(inst, sv)
66,614✔
830
            sv.any_conditional_ub = true
1,607✔
831
        else
832
            sv.all_noub = false
63,400✔
833
        end
834
    end
835
    if !has_flag(flag, IR_FLAG_NORTCALL)
3,742,217✔
836
        # if a function call that might invoke `Core.Compiler.return_type` has been deleted,
837
        # there's no need to taint with `:nortcall`, allowing concrete evaluation
838
        if iscallstmt(stmt)
6,435,029✔
839
            sv.nortcall = false
40,786✔
840
        end
841
    end
842
    nothing
3,742,217✔
843
end
844

845
function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState)
3,727,636✔
846
    flag = inst[:flag]
3,727,636✔
847
    stmt_inconsistent = !has_flag(flag, IR_FLAG_CONSISTENT)
3,727,636✔
848
    stmt = inst[:stmt]
3,727,636✔
849
    # Special case: For `getfield` and memory operations, we allow inconsistency of the :boundscheck argument
850
    (; inconsistent, tpdum) = sv
3,727,636✔
851
    if iscall_with_boundscheck(stmt, sv)
4,002,278✔
852
        for i = 1:length(stmt.args)
7,970✔
853
            val = stmt.args[i]
16,056✔
854
            # SSAValue should be the only permitted argument type which can be inconsistent found here.
855
            # Others (e.g. GlobalRef) should have been moved to statement position. See stmt_effect_flags.
856
            if isa(val, SSAValue)
16,056✔
857
                if i < length(stmt.args)  # not the boundscheck argument (which is last)
7,162✔
858
                    stmt_inconsistent |= val.id in inconsistent
6,354✔
859
                end
860
                count!(tpdum, val)
7,162✔
861
            end
862
        end
28,127✔
863
    else
864
        for ur in userefs(stmt)
4,286,500✔
865
            val = ur[]
2,295,403✔
866
            if isa(val, SSAValue)
1,429,126✔
867
                stmt_inconsistent |= val.id in inconsistent
959,687✔
868
                count!(tpdum, val)
479,845✔
869
            end
870
        end
1,429,126✔
871
    end
872
    stmt_inconsistent && push!(inconsistent, inst.idx)
3,727,636✔
873
    return stmt_inconsistent
3,727,636✔
874
end
875

876
struct ScanStmt
877
    sv::PostOptAnalysisState
210,312✔
878
end
879

880
function ((; sv)::ScanStmt)(inst::Instruction, lstmt::Int, bb::Int)
3,730,042✔
881
    stmt = inst[:stmt]
3,730,042✔
882

883
    if isa(stmt, EnterNode)
3,730,042✔
884
        # try/catch not yet modeled
885
        give_up_refinements!(sv)
2,406✔
886
        return true # don't bail out early -- can cause tpdum counts to be off
2,406✔
887
    end
888

889
    scan_non_dataflow_flags!(inst, sv)
3,727,636✔
890

891
    stmt_inconsistent = scan_inconsistency!(inst, sv)
3,727,636✔
892

893
    if stmt_inconsistent
3,727,636✔
894
        if !has_flag(inst[:flag], IR_FLAG_NOTHROW)
3,439,295✔
895
            # Taint :consistent if this statement may raise since :consistent requires
896
            # consistent termination. TODO: Separate :consistent_return and :consistent_termination from :consistent.
897
            sv.all_retpaths_consistent = false
3,083,935✔
898
        end
899
        if inst.idx == lstmt
3,439,295✔
900
            if isa(stmt, ReturnNode) && isdefined(stmt, :val)
115,524✔
901
                sv.all_retpaths_consistent = false
36,409✔
902
            elseif isa(stmt, GotoIfNot)
79,115✔
903
                # Conditional Branch with inconsistent condition.
904
                # If we do not know this function terminates, taint consistency, now,
905
                # :consistent requires consistent termination. TODO: Just look at the
906
                # inconsistent region.
907
                if !sv.result.ipo_effects.terminates
36,957✔
908
                    sv.all_retpaths_consistent = false
22,386✔
909
                elseif visit_conditional_successors(sv.lazypostdomtree, sv.ir, bb) do succ::Int
14,571✔
910
                        return any_stmt_may_throw(sv.ir, succ)
15,615✔
911
                    end
912
                    # check if this `GotoIfNot` leads to conditional throws, which taints consistency
913
                    sv.all_retpaths_consistent = false
14,571✔
914
                else
915
                    (; cfg, domtree) = get!(sv.lazyagdomtree)
×
916
                    for succ in iterated_dominance_frontier(cfg, BlockLiveness(sv.ir.cfg.blocks[bb].succs, nothing), domtree)
×
917
                        if succ == length(cfg.blocks)
×
918
                            # Phi node in the virtual exit -> We have a conditional
919
                            # return. TODO: Check if all the retvals are egal.
920
                            sv.all_retpaths_consistent = false
×
921
                        else
922
                            visit_bb_phis!(sv.ir, succ) do phiidx::Int
×
923
                                push!(sv.inconsistent, phiidx)
×
924
                            end
925
                        end
926
                    end
×
927
                end
928
            end
929
        end
930
    end
931

932
    # Do not bail out early, as this can cause tpdum counts to be off.
933
    # # bail out early if there are no possibilities to refine the effects
934
    # if !any_refinable(sv)
935
    #     return nothing
936
    # end
937

938
    return true
3,727,636✔
939
end
940

941
function check_inconsistentcy!(sv::PostOptAnalysisState, scanner::BBScanner)
3,362✔
942
    (; ir, inconsistent, tpdum) = sv
3,362✔
943

944
    sv.all_retpaths_consistent || return
6,724✔
945
    scan!(ScanStmt(sv), scanner, false)
×
946
    sv.all_retpaths_consistent || return
×
947
    complete!(tpdum); push!(scanner.bb_ip, 1)
×
948
    populate_def_use_map!(tpdum, scanner)
×
949

950
    stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
×
951
    for def in inconsistent
×
952
        append!(stmt_ip, tpdum[def])
×
953
   end
×
954
    lazydomtree = LazyDomtree(ir)
×
955
    while !isempty(stmt_ip)
×
956
        idx = popfirst!(stmt_ip)
×
957
        idx in inconsistent && continue # already processed
×
958
        inst = ir[SSAValue(idx)]
×
959
        stmt = inst[:stmt]
×
960
        if iscall_with_boundscheck(stmt, sv)
×
961
            # recompute inconsistent flags for call while skipping boundscheck (last) argument
962
            any_non_boundscheck_inconsistent = false
×
963
            for i = 1:(length(stmt.args)-1)
×
964
                val = stmt.args[i]
×
965
                if isa(val, SSAValue)
×
966
                    any_non_boundscheck_inconsistent |= val.id in inconsistent
×
967
                    any_non_boundscheck_inconsistent && break
×
968
                end
969
            end
×
970
            any_non_boundscheck_inconsistent || continue
×
971
        elseif isa(stmt, ReturnNode)
×
972
            sv.all_retpaths_consistent = false
×
973
            return
×
974
        elseif isa(stmt, GotoIfNot)
×
975
            bb = block_for_inst(ir, idx)
×
976
            cfg = ir.cfg
×
977
            blockliveness = BlockLiveness(cfg.blocks[bb].succs, nothing)
×
978
            for succ in iterated_dominance_frontier(cfg, blockliveness, get!(lazydomtree))
×
979
                visit_bb_phis!(ir, succ) do phiidx::Int
×
980
                    phiidx in inconsistent || push!(stmt_ip, phiidx)
×
981
                end
982
            end
×
983
        end
984
        push!(inconsistent, idx)
×
985
        append!(stmt_ip, tpdum[idx])
×
986
    end
×
987
end
988

989
function ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
211,347✔
990
                                ir::IRCode, result::InferenceResult)
991
    if !is_ipo_dataflow_analysis_profitable(result.ipo_effects)
211,347✔
992
        return false
1,035✔
993
    end
994

995
    @assert isempty(ir.new_nodes) "IRCode should be compacted before post-opt analysis"
210,312✔
996

997
    sv = PostOptAnalysisState(result, ir)
14,805,351✔
998
    scanner = BBScanner(ir)
210,312✔
999

1000
    completed_scan = scan!(ScanStmt(sv), scanner, true)
210,312✔
1001

1002
    if !completed_scan
210,312✔
1003
        # finish scanning for all_retpaths_consistent computation
1004
        check_inconsistentcy!(sv, scanner)
7,603✔
1005
        if !sv.all_retpaths_consistent
7,603✔
1006
            # No longer any dataflow concerns, just scan the flags
1007
            scan!(scanner, false) do inst::Instruction, ::Int, ::Int
7,603✔
1008
                scan_non_dataflow_flags!(inst, sv)
14,581✔
1009
                # bail out early if there are no possibilities to refine the effects
1010
                if !any_refinable(sv)
14,581✔
1011
                    return nothing
2,836✔
1012
                end
1013
                return true
11,745✔
1014
            end
1015
        end
1016
    end
1017

1018
    return refine_effects!(interp, opt, sv)
210,312✔
1019
end
1020

1021
# run the optimization work
1022
function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
210,265✔
1023
    @zone "CC: OPTIMIZER" ir = run_passes_ipo_safe(opt.src, opt)
221,535✔
1024
    ipo_dataflow_analysis!(interp, opt, ir, caller)
221,529✔
1025
    finishopt!(interp, opt, ir)
221,529✔
1026
    return nothing
211,315✔
1027
end
1028

1029
const ALL_PASS_NAMES = String[]
1030
macro pass(name::String, expr)
1031
    optimize_until = esc(:optimize_until)
1032
    stage = esc(:__stage__)
1033
    macrocall = :(@zone $name $(esc(expr)))
1034
    macrocall.args[2] = __source__  # `@timeit` may want to use it
1035
    push!(ALL_PASS_NAMES, name)
1036
    quote
1037
        $macrocall
1,609,458✔
1038
        matchpass($optimize_until, ($stage += 1), $name) && $(esc(:(@goto __done__)))
1,480,498✔
1039
    end
1040
end
1041

1042
matchpass(optimize_until::Int, stage, _) = optimize_until == stage
123✔
1043
matchpass(optimize_until::String, _, name) = optimize_until == name
177✔
1044
matchpass(::Nothing, _, _) = false
×
1045

1046
function run_passes_ipo_safe(
211,456✔
1047
    ci::CodeInfo,
1048
    sv::OptimizationState,
1049
    optimize_until::Union{Nothing, Int, String} = nothing)  # run all passes by default
1050
    if optimize_until isa String && !contains_is(ALL_PASS_NAMES, optimize_until)
433,144✔
1051
        error("invalid `optimize_until` argument, no such optimization pass")
3✔
1052
    elseif optimize_until isa Int && (optimize_until < 1 || optimize_until > length(ALL_PASS_NAMES))
211,486✔
1053
        error("invalid `optimize_until` argument, no such optimization pass")
3✔
1054
    end
1055

1056
    __stage__ = 0  # used by @pass
211,450✔
1057
    # NOTE: The pass name MUST be unique for `optimize_until::String` to work
1058
    @pass "CC: CONVERT"   ir = convert_to_ircode(ci, sv)
1059
    @pass "CC: SLOT2REG"  ir = slot2reg(ir, ci, sv)
1060
    # TODO: Domsorting can produce an updated domtree - no need to recompute here
1061
    @pass "CC: COMPACT_1" ir = compact!(ir)
1062
    @pass "CC: INLINING"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
1063
    # @zone "CC: VERIFY 2" verify_ir(ir)
1064
    @pass "CC: COMPACT_2" ir = compact!(ir)
1065
    @pass "CC: SROA"      ir = sroa_pass!(ir, sv.inlining)
1066
    @pass "CC: ADCE"      (ir, made_changes) = adce_pass!(ir, sv.inlining)
1067
    if made_changes
211,372✔
1068
        @pass "CC: COMPACT_3" ir = compact!(ir, true)
1069
    end
1070
    if is_asserts()
211,372✔
1071
        @zone "CC: VERIFY_3" begin
1072
            verify_ir(ir, true, false, optimizer_lattice(sv.inlining.interp), sv.linfo)
×
1073
            verify_linetable(ir.debuginfo, length(ir.stmts))
×
1074
        end
1075
    end
1076
    @label __done__  # used by @pass
1077
    return ir
211,447✔
1078
end
1079

1080
function strip_trailing_junk!(code::Vector{Any}, ssavaluetypes::Vector{Any}, ssaflags::Vector, debuginfo::DebugInfoStream, cfg::CFG, info::Vector{CallInfo})
106,920✔
1081
    # Remove `nothing`s at the end, we don't handle them well
1082
    # (we expect the last instruction to be a terminator)
1083
    codelocs = debuginfo.codelocs
106,920✔
1084
    for i = length(code):-1:1
213,840✔
1085
        if code[i] !== nothing
106,920✔
1086
            resize!(code, i)
106,920✔
1087
            resize!(ssavaluetypes, i)
106,920✔
1088
            resize!(codelocs, 3i)
106,920✔
1089
            resize!(info, i)
106,920✔
1090
            resize!(ssaflags, i)
106,920✔
1091
            break
106,920✔
1092
        end
1093
    end
×
1094
    # If the last instruction is not a terminator, add one. This can
1095
    # happen for implicit return on dead branches.
1096
    term = code[end]
106,920✔
1097
    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
106,920✔
1098
        push!(code, ReturnNode())
6,955✔
1099
        push!(ssavaluetypes, Union{})
6,955✔
1100
        push!(codelocs, 0, 0, 0)
6,955✔
1101
        push!(info, NoCallInfo())
6,955✔
1102
        push!(ssaflags, IR_FLAG_NOTHROW)
6,955✔
1103

1104
        # Update CFG to include appended terminator
1105
        old_range = cfg.blocks[end].stmts
6,955✔
1106
        new_range = StmtRange(first(old_range), last(old_range) + 1)
6,955✔
1107
        cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
6,955✔
1108
        (length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
6,955✔
1109
    end
1110
    nothing
106,920✔
1111
end
1112

1113
function changed_lineinfo(di::DebugInfo, codeloc::Int, prevloc::Int)
1,209,083✔
1114
    while true
1,209,083✔
1115
        next = getdebugidx(di, codeloc)
1,210,649✔
1116
        line = next[1]
1,210,649✔
1117
        line < 0 && return false # invalid info
1,210,649✔
1118
        line == 0 && next[2] == 0 && return false # no new info
1,210,586✔
1119
        prevloc <= 0 && return true # no old info
1,071,695✔
1120
        prev = getdebugidx(di, prevloc)
964,817✔
1121
        next === prev && return false # exactly identical
964,817✔
1122
        prevline = prev[1]
81,247✔
1123
        prevline < 0 && return true # previous invalid info, now valid
81,247✔
1124
        edge = next[2]
81,247✔
1125
        edge === prev[2] || return true # change to this edge
81,439✔
1126
        linetable = di.linetable
81,055✔
1127
        # check for change to line number here
1128
        if linetable === nothing || line == 0
81,055✔
1129
            line == prevline || return true
160,544✔
1130
        else
1131
            changed_lineinfo(linetable::DebugInfo, Int(line), Int(prevline)) && return true
×
1132
        end
1133
        # check for change to edge here
1134
        edge == 0 && return false # no edge here
1,566✔
1135
        di = di.edges[Int(edge)]::DebugInfo
1,566✔
1136
        codeloc = Int(next[3])
1,566✔
1137
        prevloc = Int(prev[3])
1,566✔
1138
    end
1,566✔
1139
end
1140

1141
function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
211,365✔
1142
    # Update control-flow to reflect any unreachable branches.
1143
    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
211,365✔
1144
    ci.code = code = copy_exprargs(ci.code)
2,627,752✔
1145
    di = DebugInfoStream(sv.linfo, ci.debuginfo, length(code))
211,365✔
1146
    codelocs = di.codelocs
211,365✔
1147
    ssaflags = ci.ssaflags
211,365✔
1148
    for i = 1:length(code)
318,285✔
1149
        expr = code[i]
2,627,752✔
1150
        if !(i in sv.unreachable)
2,627,752✔
1151
            if isa(expr, GotoIfNot)
2,275,091✔
1152
                # Replace this live GotoIfNot with:
1153
                # - no-op if :nothrow and the branch target is unreachable
1154
                # - cond if :nothrow and both targets are unreachable
1155
                # - typeassert if must-throw
1156
                block = block_for_inst(sv.cfg, i)
127,074✔
1157
                if ssavaluetypes[i] === Bottom
127,074✔
1158
                    destblock = block_for_inst(sv.cfg, expr.dest)
6✔
1159
                    cfg_delete_edge!(sv.cfg, block, block + 1)
6✔
1160
                    ((block + 1) != destblock) && cfg_delete_edge!(sv.cfg, block, destblock)
6✔
1161
                    expr = Expr(:call, Core.typeassert, expr.cond, Bool)
6✔
1162
                elseif i + 1 in sv.unreachable
127,068✔
1163
                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
22,567✔
1164
                    cfg_delete_edge!(sv.cfg, block, block + 1)
22,567✔
1165
                    expr = GotoNode(expr.dest)
22,567✔
1166
                elseif expr.dest in sv.unreachable
104,501✔
1167
                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
49,918✔
1168
                    cfg_delete_edge!(sv.cfg, block, block_for_inst(sv.cfg, expr.dest))
49,918✔
1169
                    expr = nothing
49,918✔
1170
                end
1171
                code[i] = expr
127,074✔
1172
            elseif isa(expr, EnterNode)
2,148,017✔
1173
                catchdest = expr.catch_dest
2,913✔
1174
                if catchdest in sv.unreachable
2,913✔
1175
                    cfg_delete_edge!(sv.cfg, block_for_inst(sv.cfg, i), block_for_inst(sv.cfg, catchdest))
18✔
1176
                    if isdefined(expr, :scope)
18✔
1177
                        # We've proven that nothing inside the enter region throws,
1178
                        # but we don't yet know whether something might read the scope,
1179
                        # so we need to retain this enter for the time being. However,
1180
                        # we use the special marker `0` to indicate that setting up
1181
                        # the try/catch frame is not required.
1182
                        code[i] = EnterNode(expr, 0)
12✔
1183
                    else
1184
                        code[i] = nothing
6✔
1185
                    end
1186
                end
1187
            elseif isa(expr, PhiNode)
2,145,104✔
1188
                new_edges = Int32[]
×
1189
                new_vals = Any[]
×
1190
                for j = 1:length(expr.edges)
×
1191
                    edge = expr.edges[j]
×
1192
                    (edge in sv.unreachable || (ssavaluetypes[edge] === Union{} && !isa(code[edge], PhiNode))) && continue
×
1193
                    push!(new_edges, edge)
×
1194
                    if isassigned(expr.values, j)
×
1195
                        push!(new_vals, expr.values[j])
×
1196
                    else
1197
                        resize!(new_vals, length(new_edges))
×
1198
                    end
1199
                end
×
1200
                code[i] = PhiNode(new_edges, new_vals)
×
1201
            end
1202
        end
1203
    end
5,044,139✔
1204

1205
    # Go through and add an unreachable node after every
1206
    # Union{} call. Then reindex labels.
1207
    stmtinfo = sv.stmt_info
211,365✔
1208
    meta = Expr[]
211,365✔
1209
    idx = 1
211,365✔
1210
    oldidx = 1
211,365✔
1211
    nstmts = length(code)
211,365✔
1212
    ssachangemap = labelchangemap = blockchangemap = nothing
211,365✔
1213
    prevloc = 0
211,365✔
1214
    while idx <= length(code)
2,830,072✔
1215
        if sv.insert_coverage && changed_lineinfo(ci.debuginfo, oldidx, prevloc)
2,618,707✔
1216
            # insert a side-effect instruction before the current instruction in the same basic block
1217
            insert!(code, idx, Expr(:code_coverage_effect))
434,054✔
1218
            splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
434,054✔
1219
            insert!(ssavaluetypes, idx, Nothing)
434,054✔
1220
            insert!(stmtinfo, idx, NoCallInfo())
434,054✔
1221
            insert!(ssaflags, idx, IR_FLAG_NULL)
434,054✔
1222
            if ssachangemap === nothing
434,054✔
1223
                ssachangemap = fill(0, nstmts)
2,626,783✔
1224
            end
1225
            if labelchangemap === nothing
434,054✔
1226
                labelchangemap = fill(0, nstmts)
2,626,783✔
1227
            end
1228
            ssachangemap[oldidx] += 1
434,054✔
1229
            if oldidx < length(labelchangemap)
434,054✔
1230
                labelchangemap[oldidx + 1] += 1
433,080✔
1231
            end
1232
            if blockchangemap === nothing
434,054✔
1233
                blockchangemap = fill(0, length(sv.cfg.blocks))
562,985✔
1234
            end
1235
            blockchangemap[block_for_inst(sv.cfg, oldidx)] += 1
434,054✔
1236
            idx += 1
434,054✔
1237
            prevloc = oldidx
434,054✔
1238
        end
1239
        if ssavaluetypes[idx] === Union{} && !(oldidx in sv.unreachable) && !isa(code[idx], PhiNode)
2,618,707✔
1240
            # We should have converted any must-throw terminators to an equivalent w/o control-flow edges
1241
            @assert !isterminator(code[idx])
8,511✔
1242

1243
            block = block_for_inst(sv.cfg, oldidx)
8,511✔
1244
            block_end = last(sv.cfg.blocks[block].stmts) + (idx - oldidx)
8,511✔
1245

1246
            # Delete all successors to this basic block
1247
            for succ in sv.cfg.blocks[block].succs
8,511✔
1248
                preds = sv.cfg.blocks[succ].preds
4,395✔
1249
                deleteat!(preds, findfirst(x::Int->x==block, preds)::Int)
22,571✔
1250
            end
4,395✔
1251
            empty!(sv.cfg.blocks[block].succs)
8,532✔
1252

1253
            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
8,511✔
1254
                # Any statements from here to the end of the block have been wrapped in Core.Const(...)
1255
                # by type inference (effectively deleting them). Only task left is to replace the block
1256
                # terminator with an explicit `unreachable` marker.
1257

1258
                if block_end > idx
8,511✔
1259
                    if is_asserts()
5,680✔
1260
                        # Verify that type-inference did its job
1261
                        for i = (oldidx + 1):last(sv.cfg.blocks[block].stmts)
×
1262
                            @assert i in sv.unreachable
×
1263
                        end
×
1264
                    end
1265
                    code[block_end] = ReturnNode()
5,680✔
1266
                    codelocs[3block_end-2], codelocs[3block_end-1], codelocs[3block_end-0] = (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0])
5,680✔
1267
                    ssavaluetypes[block_end] = Union{}
5,680✔
1268
                    stmtinfo[block_end] = NoCallInfo()
5,680✔
1269
                    ssaflags[block_end] = IR_FLAG_NOTHROW
5,680✔
1270
                    idx += block_end - idx
5,680✔
1271
                else
1272
                    insert!(code, idx + 1, ReturnNode())
2,831✔
1273
                    splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
2,831✔
1274
                    insert!(ssavaluetypes, idx + 1, Union{})
2,831✔
1275
                    insert!(stmtinfo, idx + 1, NoCallInfo())
2,831✔
1276
                    insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
2,831✔
1277
                    if ssachangemap === nothing
2,831✔
1278
                        ssachangemap = fill(0, nstmts)
×
1279
                    end
1280
                    if labelchangemap === nothing
2,831✔
1281
                        labelchangemap = sv.insert_coverage ? fill(0, nstmts) : ssachangemap
×
1282
                    end
1283
                    if oldidx < length(ssachangemap)
2,831✔
1284
                        ssachangemap[oldidx + 1] += 1
2,831✔
1285
                        sv.insert_coverage && (labelchangemap[oldidx + 1] += 1)
2,831✔
1286
                    end
1287
                    if blockchangemap === nothing
2,831✔
1288
                        blockchangemap = fill(0, length(sv.cfg.blocks))
×
1289
                    end
1290
                    blockchangemap[block] += 1
2,831✔
1291
                    idx += 1
2,831✔
1292
                end
1293
                oldidx = last(sv.cfg.blocks[block].stmts)
8,511✔
1294
            end
1295
        end
1296
        idx += 1
2,618,707✔
1297
        oldidx += 1
2,618,707✔
1298
    end
2,618,707✔
1299
    empty!(sv.unreachable)
216,027✔
1300

1301
    if ssachangemap !== nothing && labelchangemap !== nothing
211,365✔
1302
        renumber_ir_elements!(code, ssachangemap, labelchangemap)
210,891✔
1303
    end
1304
    if blockchangemap !== nothing
211,365✔
1305
        renumber_cfg_stmts!(sv.cfg, blockchangemap)
210,891✔
1306
    end
1307

1308
    for i = 1:length(code)
318,285✔
1309
        code[i] = process_meta!(meta, code[i])
3,064,637✔
1310
    end
5,917,909✔
1311
    strip_trailing_junk!(code, ssavaluetypes, ssaflags, di, sv.cfg, stmtinfo)
211,365✔
1312
    types = Any[]
211,365✔
1313
    stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
211,365✔
1314
    # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
1315
    # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
1316
    # and eliminates slots (see below)
1317
    argtypes = sv.slottypes
211,365✔
1318
    return IRCode(stmts, sv.cfg, di, argtypes, meta, sv.sptypes, world_range(ci))
211,365✔
1319
end
1320

1321
function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
1322
    if isexpr(stmt, :meta) && length(stmt.args) ≥ 1
3,064,637✔
1323
        push!(meta, stmt)
×
1324
        return nothing
×
1325
    end
1326
    return stmt
3,064,637✔
1327
end
1328

1329
function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
106,920✔
1330
    # need `ci` for the slot metadata, IR for the code
1331
    @zone "CC: DOMTREE_1" domtree = construct_domtree(ir)
211,444✔
1332
    defuse_insts = scan_slot_def_use(Int(ci.nargs), ci, ir.stmts.stmt)
211,444✔
1333
    𝕃ₒ = optimizer_lattice(sv.inlining.interp)
211,365✔
1334
    @zone "CC: CONSTRUCT_SSA" ir = construct_ssa!(ci, ir, sv, domtree, defuse_insts, 𝕃ₒ) # consumes `ir`
211,444✔
1335
    # NOTE now we have converted `ir` to the SSA form and eliminated slots
1336
    # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
1337
    resize!(ir.argtypes, ci.nargs)
211,444✔
1338
    return ir
211,444✔
1339
end
1340

1341
## Computing the cost of a function body
1342

1343
# saturating sum (inputs are non-negative), prevents overflow with typemax(Int) below
1344
plus_saturate(x::Int, y::Int) = max(x, y, x+y)
2,785,269✔
1345

1346
# known return type
1347
isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
4,992✔
1348

1349
function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
2,559,364✔
1350
                        params::OptimizationParams)
1351
    #=const=# UNKNOWN_CALL_COST = 20
2,559,364✔
1352
    head = ex.head
2,559,364✔
1353
    if is_meta_expr_head(head)
5,111,142✔
1354
        return 0
7,586✔
1355
    elseif head === :call
2,551,778✔
1356
        farg = ex.args[1]
181,421✔
1357
        ftyp = argextype(farg, src, sptypes)
181,421✔
1358
        if ftyp === IntrinsicFunction && farg isa SSAValue
181,421✔
1359
            # if this comes from code that was already inlined into another function,
1360
            # Consts have been widened. try to recover in simple cases.
1361
            farg = isa(src, CodeInfo) ? src.code[farg.id] : src[farg][:stmt]
×
1362
            if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
×
1363
                ftyp = argextype(farg, src, sptypes)
×
1364
            end
1365
        end
1366
        f = singleton_type(ftyp)
181,421✔
1367
        if isa(f, IntrinsicFunction)
181,421✔
1368
            iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
72,528✔
1369
            if isassigned(T_IFUNC, iidx)
72,528✔
1370
                minarg, maxarg, = T_IFUNC[iidx]
72,528✔
1371
                nargs = length(ex.args)
72,528✔
1372
                if minarg + 1 <= nargs <= maxarg + 1
72,528✔
1373
                    # With mostly constant arguments, all Intrinsics tend to become very cheap
1374
                    # and are likely to combine with the operations around them,
1375
                    # so reduce their cost by half.
1376
                    cost = T_IFUNC_COST[iidx]
72,528✔
1377
                    if cost == 0 || nargs < 3 ||
118,510✔
1378
                       (f === Intrinsics.cglobal || f === Intrinsics.llvmcall) # these hold malformed IR, so argextype will crash on them
1379
                        return cost
27,868✔
1380
                    end
1381
                    aty2 = widenconditional(argextype(ex.args[2], src, sptypes))
44,660✔
1382
                    nconst = Int(aty2 isa Const)
44,660✔
1383
                    for i = 3:nargs
89,248✔
1384
                        aty = widenconditional(argextype(ex.args[i], src, sptypes))
44,660✔
1385
                        if widenconst(aty) != widenconst(aty2)
44,660✔
1386
                            nconst = 0
9,042✔
1387
                            break
9,042✔
1388
                        end
1389
                        nconst += aty isa Const
35,618✔
1390
                    end
35,618✔
1391
                    if nconst + 2 >= nargs
44,660✔
1392
                        cost = (cost - 1) ÷ 2
23,494✔
1393
                    end
1394
                    return cost
44,660✔
1395
                end
1396
            end
1397
            # unknown/unhandled intrinsic: hopefully the caller gets a slightly better answer after the inlining
1398
            return UNKNOWN_CALL_COST
×
1399
        end
1400
        if isa(f, Builtin) && f !== invoke
108,893✔
1401
            # The efficiency of operations like a[i] and s.b
1402
            # depend strongly on whether the result can be
1403
            # inferred, so check the type of ex
1404
            if f === Core.getfield || f === Core.tuple || f === Core.getglobal
161,830✔
1405
                # we might like to penalize non-inferrability, but
1406
                # tuple iteration/destructuring makes that impossible
1407
                # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
1408
                return 0
57,166✔
1409
            elseif (f === Core.memoryrefget || f === Core.memoryref_isassigned) && length(ex.args) >= 3
101,509✔
1410
                atyp = argextype(ex.args[2], src, sptypes)
780✔
1411
                return isknowntype(atyp) ? 1 : params.inline_nonleaf_penalty
780✔
1412
            elseif f === Core.memoryrefset! && length(ex.args) >= 3
50,357✔
1413
                atyp = argextype(ex.args[2], src, sptypes)
4,166✔
1414
                return isknowntype(atyp) ? 5 : params.inline_nonleaf_penalty
4,166✔
1415
            elseif f === Core.memoryrefunset! && length(ex.args) >= 3
46,191✔
1416
                atyp = argextype(ex.args[2], src, sptypes)
54✔
1417
                return isknowntype(atyp) ? 5 : params.inline_nonleaf_penalty
54✔
1418
            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
50,411✔
1419
                return 1
1,200✔
1420
            end
1421
            fidx = find_tfunc(f)
982,996✔
1422
            if fidx === nothing
44,937✔
1423
                # unknown/unhandled builtin
1424
                # Use the generic cost of a direct function call
1425
                return UNKNOWN_CALL_COST
3,130✔
1426
            end
1427
            return T_FFUNC_COST[fidx]
41,807✔
1428
        end
1429
        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
1,180✔
1430
        if extyp === Union{}
590✔
1431
            return 0
×
1432
        end
1433
        return params.inline_nonleaf_penalty
590✔
1434
    elseif head === :foreigncall
2,370,357✔
1435
        foreigncall = ex.args[1]
3,163✔
1436
        if isexpr(foreigncall, :tuple, 1)
3,163✔
1437
            foreigncall = foreigncall.args[1]
2,242✔
1438
            if foreigncall isa QuoteNode && foreigncall.value === :jl_string_ptr
2,242✔
1439
                return 1
362✔
1440
            end
1441
        end
1442
        return 20
2,801✔
1443
    elseif head === :invoke || head === :invoke_modify
4,695,385✔
1444
        # Calls whose "return type" is Union{} do not actually return:
1445
        # they are errors. Since these are not part of the typical
1446
        # run-time of the function, we omit them from
1447
        # consideration. This way, non-inlined error branches do not
1448
        # prevent inlining.
1449
        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
78,006✔
1450
        return extyp === Union{} ? 0 : UNKNOWN_CALL_COST
39,003✔
1451
    elseif head === :(=)
2,328,191✔
1452
        return statement_cost(ex.args[2], -1, src, sptypes, params)
×
1453
    elseif head === :copyast
2,328,191✔
1454
        return 100
×
1455
    end
1456
    return 0
2,328,191✔
1457
end
1458

1459
function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
1460
                                  params::OptimizationParams)
1461
    thiscost = 0
2,785,269✔
1462
    dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
2,858,835✔
1463
    if stmt isa Expr
2,785,314✔
1464
        thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
2,559,394✔
1465
    elseif stmt isa GotoNode
225,920✔
1466
        # loops are generally always expensive
1467
        # but assume that forward jumps are already counted for from
1468
        # summing the cost of the not-taken branch
1469
        thiscost = dst(stmt.label) < line ? 40 : 0
40,822✔
1470
    elseif stmt isa GotoIfNot
185,098✔
1471
        thiscost = dst(stmt.dest) < line ? 40 : 0
32,753✔
1472
    elseif stmt isa EnterNode
152,345✔
1473
        # try/catch is a couple function calls,
1474
        # but don't inline functions with try/catch
1475
        # since these aren't usually performance-sensitive functions,
1476
        # and llvm is more likely to miscompile them when these functions get large
1477
        thiscost = typemax(Int)
1,701✔
1478
    end
1479
    return thiscost
2,785,314✔
1480
end
1481

1482
function inline_cost_model(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
90,150✔
1483
    bodycost = 0
90,150✔
1484
    for i = 1:length(ir.stmts)
180,258✔
1485
        stmt = ir[SSAValue(i)][:stmt]
2,785,269✔
1486
        thiscost = statement_or_branch_cost(stmt, i, ir, ir.sptypes, params)
3,011,174✔
1487
        bodycost = plus_saturate(bodycost, thiscost)
2,785,269✔
1488
        if bodycost > cost_threshold
2,785,269✔
1489
            return MAX_INLINE_COST
4,159✔
1490
        end
1491
    end
5,476,229✔
1492
    return inline_cost_clamp(bodycost)
85,991✔
1493
end
1494

1495
function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, params::OptimizationParams)
×
1496
    maxcost = 0
×
1497
    for line = 1:length(body)
3✔
1498
        stmt = body[line]
45✔
1499
        thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
60✔
1500
                                            params)
1501
        cost[line] = thiscost
45✔
1502
        if thiscost > maxcost
45✔
1503
            maxcost = thiscost
×
1504
        end
1505
    end
87✔
1506
    return maxcost
3✔
1507
end
1508

1509
function renumber_ir_elements!(body::Vector{Any}, cfg::Union{CFG,Nothing}, ssachangemap::Vector{Int})
×
1510
    return renumber_ir_elements!(body, cfg, ssachangemap, ssachangemap)
×
1511
end
1512

1513
function cumsum_ssamap!(ssachangemap::Vector{Int})
1514
    any_change = false
320,634✔
1515
    rel_change = 0
320,634✔
1516
    for i = 1:length(ssachangemap)
641,268✔
1517
        val = ssachangemap[i]
2,688,195✔
1518
        any_change |= val ≠ 0
2,688,195✔
1519
        rel_change += val
2,688,195✔
1520
        if val == -1
2,688,195✔
1521
            # Keep a marker that this statement was deleted
1522
            ssachangemap[i] = typemin(Int)
×
1523
        else
1524
            ssachangemap[i] = rel_change
2,688,195✔
1525
        end
1526
    end
5,055,756✔
1527
    return any_change
320,634✔
1528
end
1529

1530
function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
106,878✔
1531
    any_change = cumsum_ssamap!(labelchangemap)
1,214,202✔
1532
    if ssachangemap !== labelchangemap
106,878✔
1533
        any_change |= cumsum_ssamap!(ssachangemap)
1,214,202✔
1534
    end
1535
    any_change || return
106,878✔
1536
    for i = 1:length(body)
213,756✔
1537
        el = body[i]
1,401,326✔
1538
        if isa(el, GotoNode)
1,401,326✔
1539
            body[i] = GotoNode(el.label + labelchangemap[el.label])
38,251✔
1540
        elseif isa(el, GotoIfNot)
1,363,075✔
1541
            cond = el.cond
13,023✔
1542
            if isa(cond, SSAValue)
13,023✔
1543
                cond = SSAValue(cond.id + ssachangemap[cond.id])
10,586✔
1544
            end
1545
            was_deleted = labelchangemap[el.dest] == typemin(Int)
13,023✔
1546
            body[i] = was_deleted ? cond : GotoIfNot(cond, el.dest + labelchangemap[el.dest])
13,023✔
1547
        elseif isa(el, ReturnNode)
1,350,052✔
1548
            if isdefined(el, :val)
113,428✔
1549
                val = el.val
109,962✔
1550
                if isa(val, SSAValue)
109,962✔
1551
                    body[i] = ReturnNode(SSAValue(val.id + ssachangemap[val.id]))
109,033✔
1552
                end
1553
            end
1554
        elseif isa(el, SSAValue)
1,236,624✔
1555
            body[i] = SSAValue(el.id + ssachangemap[el.id])
×
1556
        elseif isa(el, PhiNode)
1,236,624✔
1557
            i = 1
×
1558
            edges = el.edges
×
1559
            values = el.values
×
1560
            while i <= length(edges)
×
1561
                was_deleted = ssachangemap[edges[i]] == typemin(Int)
×
1562
                if was_deleted
×
1563
                    deleteat!(edges, i)
×
1564
                    deleteat!(values, i)
×
1565
                else
1566
                    edges[i] += ssachangemap[edges[i]]
×
1567
                    val = values[i]
×
1568
                    if isa(val, SSAValue)
×
1569
                        values[i] = SSAValue(val.id + ssachangemap[val.id])
×
1570
                    end
1571
                    i += 1
×
1572
                end
1573
            end
×
1574
        elseif isa(el, EnterNode)
1,236,624✔
1575
            tgt = el.catch_dest
2,485✔
1576
            if tgt != 0
2,485✔
1577
                was_deleted = labelchangemap[tgt] == typemin(Int)
2,485✔
1578
                if was_deleted
2,485✔
1579
                    @assert !isdefined(el, :scope)
×
1580
                    body[i] = nothing
×
1581
                else
1582
                    if isdefined(el, :scope) && isa(el.scope, SSAValue)
2,485✔
1583
                        body[i] = EnterNode(tgt + labelchangemap[tgt], SSAValue(el.scope.id + ssachangemap[el.scope.id]))
2,106✔
1584
                    else
1585
                        body[i] = EnterNode(el, tgt + labelchangemap[tgt])
379✔
1586
                    end
1587
                end
1588
            end
1589
        elseif isa(el, Expr)
1,234,139✔
1590
            if el.head === :(=) && el.args[2] isa Expr
640,269✔
1591
                el = el.args[2]::Expr
40,866✔
1592
            end
1593
            if !is_meta_expr_head(el.head)
1,279,528✔
1594
                args = el.args
639,250✔
1595
                for i = 1:length(args)
1,091,705✔
1596
                    el = args[i]
1,226,484✔
1597
                    if isa(el, SSAValue)
1,226,484✔
1598
                        args[i] = SSAValue(el.id + ssachangemap[el.id])
572,248✔
1599
                    end
1600
                end
1,226,484✔
1601
            end
1602
        end
1603
    end
1,401,326✔
1604
end
1605

1606
function renumber_cfg_stmts!(cfg::CFG, blockchangemap::Vector{Int})
106,878✔
1607
    cumsum_ssamap!(blockchangemap) || return
106,878✔
1608
    for i = 1:length(cfg.blocks)
213,756✔
1609
        old_range = cfg.blocks[i].stmts
259,791✔
1610
        new_range = StmtRange(first(old_range) + ((i > 1) ? blockchangemap[i - 1] : 0),
259,791✔
1611
                              last(old_range) + blockchangemap[i])
1612
        cfg.blocks[i] = BasicBlock(cfg.blocks[i], new_range)
259,791✔
1613
        if i <= length(cfg.index)
259,791✔
1614
            cfg.index[i] = cfg.index[i] + blockchangemap[i]
259,791✔
1615
        end
1616
    end
259,791✔
1617
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc