• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37908

19 Sep 2024 03:08AM UTC coverage: 87.675% (-0.01%) from 87.689%
#37908

push

local

web-flow
a minor improvement for EA-based `:effect_free`-ness refinement (#55796)

2 of 4 new or added lines in 2 files covered. (50.0%)

17 existing lines in 3 files now uncovered.

78264 of 89266 relevant lines covered (87.68%)

16545908.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.75
/base/compiler/optimize.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
#############
4
# constants #
5
#############
6

7
# The slot has uses that are not statically dominated by any assignment
8
# This is implied by `SLOT_USEDUNDEF`.
9
# If this is not set, all the uses are (statically) dominated by the defs.
10
# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
11
const SLOT_STATICUNDEF  = 1 # slot might be used before it is defined (structurally)
12
const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
13
const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
14
# const SLOT_CALLED      = 64
15

16
# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
17

18
const IR_FLAG_NULL        = zero(UInt32)
19
# This statement is marked as @inbounds by user.
20
# Ff replaced by inlining, any contained boundschecks may be removed.
21
const IR_FLAG_INBOUNDS    = one(UInt32) << 0
22
# This statement is marked as @inline by user
23
const IR_FLAG_INLINE      = one(UInt32) << 1
24
# This statement is marked as @noinline by user
25
const IR_FLAG_NOINLINE    = one(UInt32) << 2
26
# An optimization pass has updated this statement in a way that may
27
# have exposed information that inference did not see. Re-running
28
# inference on this statement may be profitable.
29
const IR_FLAG_REFINED     = one(UInt32) << 3
30
# This statement is proven :consistent
31
const IR_FLAG_CONSISTENT  = one(UInt32) << 4
32
# This statement is proven :effect_free
33
const IR_FLAG_EFFECT_FREE = one(UInt32) << 5
34
# This statement is proven :nothrow
35
const IR_FLAG_NOTHROW     = one(UInt32) << 6
36
# This statement is proven :terminates
37
const IR_FLAG_TERMINATES  = one(UInt32) << 7
38
# This statement is proven :noub
39
const IR_FLAG_NOUB        = one(UInt32) << 8
40
# TODO: Both of these should eventually go away once
41
# This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
42
const IR_FLAG_EFIIMO      = one(UInt32) << 9
43
# This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
44
const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 10
45
# This statement is :nortcall
46
const IR_FLAG_NORTCALL    = one(UInt32) << 11
47
# This statement has no users and may be deleted if flags get refined to IR_FLAGS_REMOVABLE
48
const IR_FLAG_UNUSED      = one(UInt32) << 12
49

50
const NUM_IR_FLAGS = 13 # sync with julia.h
51

52
const IR_FLAGS_EFFECTS =
53
    IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW |
54
    IR_FLAG_TERMINATES | IR_FLAG_NOUB | IR_FLAG_NORTCALL
55

56
const IR_FLAGS_REMOVABLE = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_TERMINATES
57

58
const IR_FLAGS_NEEDS_EA = IR_FLAG_EFIIMO | IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
59

60
has_flag(curr::UInt32, flag::UInt32) = (curr & flag) == flag
2,147,483,647✔
61

62
function iscallstmt(@nospecialize stmt)
63
    stmt isa Expr || return false
257,810,681✔
64
    head = stmt.head
144,643,741✔
65
    return head === :call || head === :invoke || head === :foreigncall
249,931,988✔
66
end
67

68
function flags_for_effects(effects::Effects)
69
    flags = zero(UInt32)
×
70
    if is_consistent(effects)
121,865,955✔
71
        flags |= IR_FLAG_CONSISTENT
×
72
    end
73
    if is_effect_free(effects)
121,865,955✔
74
        flags |= IR_FLAG_EFFECT_FREE
98,738,802✔
75
    elseif is_effect_free_if_inaccessiblememonly(effects)
23,127,153✔
76
        flags |= IR_FLAG_EFIIMO
352,707✔
77
    end
78
    if is_nothrow(effects)
121,865,955✔
79
        flags |= IR_FLAG_NOTHROW
108,085,670✔
80
    end
81
    if is_terminates(effects)
121,865,955✔
82
        flags |= IR_FLAG_TERMINATES
114,913,988✔
83
    end
84
    if is_inaccessiblemem_or_argmemonly(effects)
121,865,955✔
85
        flags |= IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
11,142,847✔
86
    end
87
    if is_noub(effects)
121,865,955✔
88
        flags |= IR_FLAG_NOUB
112,273,638✔
89
    end
90
    if is_nortcall(effects)
121,865,955✔
91
        flags |= IR_FLAG_NORTCALL
115,520,281✔
92
    end
93
    return flags
121,865,955✔
94
end
95

96
const TOP_TUPLE = GlobalRef(Core, :tuple)
97

98
# This corresponds to the type of `CodeInfo`'s `inlining_cost` field
99
const InlineCostType = UInt16
100
const MAX_INLINE_COST = typemax(InlineCostType)
101
const MIN_INLINE_COST = InlineCostType(10)
102
const MaybeCompressed = Union{CodeInfo, String}
103

104
is_inlineable(@nospecialize src::MaybeCompressed) =
39,536,381✔
105
    ccall(:jl_ir_inlining_cost, InlineCostType, (Any,), src) != MAX_INLINE_COST
106
set_inlineable!(src::CodeInfo, val::Bool) =
3,506,309✔
107
    src.inlining_cost = (val ? MIN_INLINE_COST : MAX_INLINE_COST)
108

109
function inline_cost_clamp(x::Int)
110
    x > MAX_INLINE_COST && return MAX_INLINE_COST
4,265,486✔
111
    x < MIN_INLINE_COST && return MIN_INLINE_COST
4,265,486✔
112
    return convert(InlineCostType, x)
1,340,165✔
113
end
114

115
is_declared_inline(@nospecialize src::MaybeCompressed) =
46,079,824✔
116
    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 1
117

118
is_declared_noinline(@nospecialize src::MaybeCompressed) =
10,423,112✔
119
    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 2
120

121
#####################
122
# OptimizationState #
123
#####################
124

125
# return whether this src should be inlined. If so, retrieve_ir_for_inlining must return an IRCode from it
126
function src_inlining_policy(interp::AbstractInterpreter,
38,051,759✔
127
    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
128
    if isa(src, MaybeCompressed)
38,051,760✔
129
        src_inlineable = is_stmt_inline(stmt_flag) || is_inlineable(src)
72,133,141✔
130
        return src_inlineable
36,066,829✔
131
    elseif isa(src, IRCode)
1,984,931✔
132
        return true
199✔
133
    end
134
    @assert !isa(src, CodeInstance) # handled by caller
1,984,732✔
135
    return false
1,984,732✔
136
end
137

138
struct InliningState{Interp<:AbstractInterpreter}
139
    edges::Vector{Any}
8,071,093✔
140
    world::UInt
141
    interp::Interp
142
end
143
function InliningState(sv::InferenceState, interp::AbstractInterpreter)
144
    edges = sv.stmt_edges[1]
8,071,087✔
145
    return InliningState(edges, sv.world, interp)
8,071,087✔
146
end
147
function InliningState(interp::AbstractInterpreter)
3✔
148
    return InliningState(Any[], get_inference_world(interp), interp)
6✔
149
end
150

151
# get `code_cache(::AbstractInterpreter)` from `state::InliningState`
152
code_cache(state::InliningState) = WorldView(code_cache(state.interp), state.world)
28,250,365✔
153

154
mutable struct OptimizationState{Interp<:AbstractInterpreter}
155
    linfo::MethodInstance
8,071,090✔
156
    src::CodeInfo
157
    ir::Union{Nothing, IRCode}
158
    stmt_info::Vector{CallInfo}
159
    mod::Module
160
    sptypes::Vector{VarState}
161
    slottypes::Vector{Any}
162
    inlining::InliningState{Interp}
163
    cfg::CFG
164
    unreachable::BitSet
165
    bb_vartables::Vector{Union{Nothing,VarTable}}
166
    insert_coverage::Bool
167
end
168
function OptimizationState(sv::InferenceState, interp::AbstractInterpreter)
169
    inlining = InliningState(sv, interp)
8,071,087✔
170
    return OptimizationState(sv.linfo, sv.src, nothing, sv.stmt_info, sv.mod,
8,071,087✔
171
                             sv.sptypes, sv.slottypes, inlining, sv.cfg,
172
                             sv.unreachable, sv.bb_vartables, sv.insert_coverage)
173
end
174
function OptimizationState(mi::MethodInstance, src::CodeInfo, interp::AbstractInterpreter)
3✔
175
    # prepare src for running optimization passes if it isn't already
176
    nssavalues = src.ssavaluetypes
3✔
177
    if nssavalues isa Int
3✔
178
        src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
24✔
179
    else
180
        nssavalues = length(src.ssavaluetypes::Vector{Any})
×
181
    end
182
    sptypes = sptypes_from_meth_instance(mi)
3✔
183
    nslots = length(src.slotflags)
3✔
184
    slottypes = src.slottypes
3✔
185
    if slottypes === nothing
3✔
186
        slottypes = Any[ Any for i = 1:nslots ]
8✔
187
    end
188
    stmt_info = CallInfo[ NoCallInfo() for i = 1:nssavalues ]
24✔
189
    # cache some useful state computations
190
    def = mi.def
3✔
191
    mod = isa(def, Method) ? def.module : def
4✔
192
    # Allow using the global MI cache, but don't track edges.
193
    # This method is mostly used for unit testing the optimizer
194
    inlining = InliningState(interp)
3✔
195
    cfg = compute_basic_blocks(src.code)
3✔
196
    unreachable = BitSet()
3✔
197
    bb_vartables = Union{VarTable,Nothing}[]
3✔
198
    for block = 1:length(cfg.blocks)
3✔
199
        push!(bb_vartables, VarState[
43✔
200
            VarState(slottypes[slot], src.slotflags[slot] & SLOT_USEDUNDEF != 0)
201
            for slot = 1:nslots
202
        ])
203
    end
17✔
204
    return OptimizationState(mi, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, cfg, unreachable, bb_vartables, false)
3✔
205
end
206
function OptimizationState(mi::MethodInstance, interp::AbstractInterpreter)
2✔
207
    world = get_inference_world(interp)
2✔
208
    src = retrieve_code_info(mi, world)
4✔
209
    src === nothing && return nothing
2✔
210
    return OptimizationState(mi, src, interp)
2✔
211
end
212

213
function argextype end # imported by EscapeAnalysis
214
function try_compute_field end # imported by EscapeAnalysis
215

216
include("compiler/ssair/heap.jl")
217
include("compiler/ssair/slot2ssa.jl")
218
include("compiler/ssair/inlining.jl")
219
include("compiler/ssair/verify.jl")
220
include("compiler/ssair/legacy.jl")
221
include("compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl")
222
include("compiler/ssair/passes.jl")
223
include("compiler/ssair/irinterp.jl")
224

225
function ir_to_codeinf!(opt::OptimizationState)
226
    (; linfo, src) = opt
8,071,034✔
227
    src = ir_to_codeinf!(src, opt.ir::IRCode)
8,071,034✔
228
    opt.ir = nothing
8,071,034✔
229
    maybe_validate_code(linfo, src, "optimized")
8,071,034✔
230
    return src
157✔
231
end
232

233
function ir_to_codeinf!(src::CodeInfo, ir::IRCode)
234
    replace_code_newstyle!(src, ir)
8,071,056✔
235
    widen_all_consts!(src)
8,071,056✔
236
    return src
×
237
end
238

239
# widen all Const elements in type annotations
240
function widen_all_consts!(src::CodeInfo)
8,071,056✔
241
    ssavaluetypes = src.ssavaluetypes::Vector{Any}
8,071,056✔
242
    for i = 1:length(ssavaluetypes)
8,071,056✔
243
        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
818,804,069✔
244
    end
1,629,537,082✔
245

246
    for i = 1:length(src.code)
8,071,056✔
247
        x = src.code[i]
818,804,069✔
248
        if isa(x, PiNode)
818,804,069✔
249
            src.code[i] = PiNode(x.val, widenconst(x.typ))
2,248,688✔
250
        end
251
    end
1,629,537,082✔
252

253
    return src
8,071,056✔
254
end
255

256
#########
257
# logic #
258
#########
259

260
_topmod(sv::OptimizationState) = _topmod(sv.mod)
×
261

262
is_stmt_inline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_INLINE)
87,144,807✔
263
is_stmt_noinline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_NOINLINE)
40,472,253✔
264

265
function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
585,207✔
266
    Targ = args[1]
1,130,280✔
267
    atyp = argextype(Targ, src)
585,207✔
268
    # `Expr(:new)` of unknown type could raise arbitrary TypeError.
269
    typ, isexact = instanceof_tfunc(atyp, true)
585,207✔
270
    if !isexact
585,207✔
271
        atyp = unwrap_unionall(widenconst(atyp))
71,817✔
272
        if isType(atyp) && isTypeDataType(atyp.parameters[1])
143,618✔
273
            typ = atyp.parameters[1]
71,801✔
274
        else
275
            return (false, false, false)
16✔
276
        end
277
        isabstracttype(typ) && return (false, false, false)
71,801✔
278
    else
279
        isconcretedispatch(typ) || return (false, false, false)
513,390✔
280
    end
281
    typ = typ::DataType
585,191✔
282
    fcount = datatype_fieldcount(typ)
585,191✔
283
    fcount === nothing && return (false, false, false)
585,191✔
284
    fcount >= length(args) - 1 || return (false, false, false)
585,191✔
285
    for fidx in 1:(length(args) - 1)
585,191✔
286
        farg = args[fidx + 1]
1,420,545✔
287
        eT = argextype(farg, src)
1,420,545✔
288
        fT = fieldtype(typ, fidx)
1,420,545✔
289
        if !isexact && has_free_typevars(fT)
1,420,545✔
290
            if pattern_match !== nothing && pattern_match(src, typ, fidx, Targ, farg)
41,105✔
291
                continue
12,031✔
292
            end
293
            return (false, false, false)
58,831✔
294
        end
295
        ⊑(𝕃ₒ, eT, fT) || return (false, false, false)
1,350,376✔
296
    end
2,196,550✔
297
    return (false, true, true)
524,911✔
298
end
299

300
# Returns a tuple of `(:consistent, :removable, :nothrow)` flags for a given statement.
301
function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
130,817,560✔
302
    # TODO: We're duplicating analysis from inference here.
303
    isa(stmt, PiNode) && return (true, true, true)
130,817,560✔
304
    isa(stmt, PhiNode) && return (true, true, true)
123,299,278✔
305
    isa(stmt, ReturnNode) && return (true, false, true)
117,253,871✔
306
    isa(stmt, EnterNode) && return (true, false, true)
107,771,415✔
307
    isa(stmt, GotoNode) && return (true, false, true)
107,708,334✔
308
    isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
105,059,073✔
309
    if isa(stmt, GlobalRef)
100,419,749✔
310
        nothrow = consistent = isdefinedconst_globalref(stmt)
10,756✔
311
        return (consistent, nothrow, nothrow)
10,756✔
312
    elseif isa(stmt, Expr)
100,408,993✔
313
        (; head, args) = stmt
82,406,122✔
314
        if head === :static_parameter
82,406,122✔
315
            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
316
            sptypes = isa(src, IRCode) ? src.sptypes : src.ir.sptypes
94,844✔
317
            nothrow = !sptypes[args[1]::Int].undef
94,844✔
318
            return (true, nothrow, nothrow)
94,844✔
319
        end
320
        if head === :call
82,311,278✔
321
            f = argextype(args[1], src)
38,492,809✔
322
            f = singleton_type(f)
38,492,809✔
323
            f === nothing && return (false, false, false)
38,492,809✔
324
            if f === Intrinsics.cglobal || f === Intrinsics.llvmcall
76,253,234✔
325
                # TODO: these are not yet linearized
326
                return (false, false, false)
×
327
            end
328
            isa(f, Builtin) || return (false, false, false)
62,758,997✔
329
            # Needs to be handled in inlining to look at the callee effects
330
            f === Core._apply_iterate && return (false, false, false)
13,494,237✔
331
            argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
26,216,388✔
332
            effects = builtin_effects(𝕃ₒ, f, argtypes, rt)
13,494,237✔
333
            consistent = is_consistent(effects)
13,494,237✔
334
            effect_free = is_effect_free(effects)
13,494,237✔
335
            nothrow = is_nothrow(effects)
13,494,237✔
336
            terminates = is_terminates(effects)
13,494,237✔
337
            removable = effect_free & nothrow & terminates
13,494,237✔
338
            return (consistent, removable, nothrow)
13,494,237✔
339
        elseif head === :new
43,818,469✔
340
            return new_expr_effect_flags(𝕃ₒ, args, src)
545,073✔
341
        elseif head === :foreigncall
43,273,396✔
342
            effects = foreigncall_effects(stmt) do @nospecialize x
130,818✔
343
                argextype(x, src)
23,220✔
344
            end
345
            consistent = is_consistent(effects)
122,689✔
346
            effect_free = is_effect_free(effects)
122,689✔
347
            nothrow = is_nothrow(effects)
×
348
            terminates = is_terminates(effects)
×
349
            removable = effect_free & nothrow & terminates
122,689✔
350
            return (consistent, removable, nothrow)
122,689✔
351
        elseif head === :new_opaque_closure
43,150,707✔
352
            length(args) < 4 && return (false, false, false)
35✔
353
            typ = argextype(args[1], src)
35✔
354
            typ, isexact = instanceof_tfunc(typ, true)
35✔
355
            isexact || return (false, false, false)
36✔
356
            ⊑(𝕃ₒ, typ, Tuple) || return (false, false, false)
34✔
357
            rt_lb = argextype(args[2], src)
34✔
358
            rt_ub = argextype(args[3], src)
34✔
359
            source = argextype(args[5], src)
34✔
360
            if !(⊑(𝕃ₒ, rt_lb, Type) && ⊑(𝕃ₒ, rt_ub, Type) && ⊑(𝕃ₒ, source, Method))
34✔
361
                return (false, false, false)
×
362
            end
363
            return (false, true, true)
34✔
364
        elseif head === :inbounds
43,150,672✔
365
            return (true, true, true)
×
366
        elseif head === :boundscheck || head === :isdefined || head === :the_exception || head === :copyast
85,826,107✔
367
            return (false, true, true)
530,500✔
368
        else
369
            # e.g. :loopinfo
370
            return (false, false, false)
42,620,172✔
371
        end
372
    end
373
    isa(stmt, SlotNumber) && error("unexpected IR elements")
18,002,871✔
374
    return (true, true, true)
18,002,871✔
375
end
376

377
function recompute_effects_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt),
378
                                 src::Union{IRCode,IncrementalCompact})
379
    flag = IR_FLAG_NULL
×
380
    (consistent, removable, nothrow) = stmt_effect_flags(𝕃ₒ, stmt, rt, src)
130,817,560✔
381
    if consistent
130,817,560✔
382
        flag |= IR_FLAG_CONSISTENT
×
383
    end
384
    if removable
130,817,560✔
385
        flag |= IR_FLAGS_REMOVABLE
45,022,706✔
386
    elseif nothrow
85,794,854✔
387
        flag |= IR_FLAG_NOTHROW
16,873,177✔
388
    end
389
    if !iscallstmt(stmt)
174,730,873✔
390
        # There is a bit of a subtle point here, which is that some non-call
391
        # statements (e.g. PiNode) can be UB:, however, we consider it
392
        # illegal to introduce such statements that actually cause UB (for any
393
        # input). Ideally that'd be handled at insertion time (TODO), but for
394
        # the time being just do that here.
395
        flag |= IR_FLAG_NOUB
92,202,061✔
396
    end
397
    return flag
130,817,560✔
398
end
399

400
"""
401
    argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
402
    argextype(x, src::CodeInfo, sptypes::Vector{VarState}) -> t
403

404
Return the type of value `x` in the context of inferred source `src`.
405
Note that `t` might be an extended lattice element.
406
Use `widenconst(t)` to get the native Julia type of `x`.
407
"""
408
argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{VarState} = ir.sptypes) =
443,668,933✔
409
    argextype(x, ir, sptypes, ir.argtypes)
410
function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{VarState} = compact.ir.sptypes)
22,429,439✔
411
    isa(x, AnySSAValue) && return types(compact)[x]
2,147,483,647✔
412
    return argextype(x, compact, sptypes, compact.ir.argtypes)
1,850,985,952✔
413
end
414
argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{VarState}) = argextype(x, src, sptypes, src.slottypes::Vector{Any})
402✔
415
function argextype(
2,106,429,849✔
416
    @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
417
    sptypes::Vector{VarState}, slottypes::Vector{Any})
418
    if isa(x, Expr)
2,106,429,863✔
419
        if x.head === :static_parameter
×
420
            return sptypes[x.args[1]::Int].typ
×
421
        elseif x.head === :boundscheck
×
422
            return Bool
×
423
        elseif x.head === :copyast
×
424
            return argextype(x.args[1], src, sptypes, slottypes)
×
425
        end
426
        Core.println("argextype called on Expr with head ", x.head,
×
427
                     " which is not valid for IR in argument-position.")
428
        @assert false
×
429
    elseif isa(x, SlotNumber)
2,106,429,863✔
430
        return slottypes[x.id]
×
431
    elseif isa(x, SSAValue)
2,106,429,863✔
432
        return abstract_eval_ssavalue(x, src)
65,099,692✔
433
    elseif isa(x, Argument)
2,044,878,075✔
434
        return slottypes[x.n]
50,320,617✔
435
    elseif isa(x, QuoteNode)
1,995,072,543✔
436
        return Const(x.value)
28,033,995✔
437
    elseif isa(x, GlobalRef)
1,967,038,548✔
438
        return abstract_eval_globalref_type(x)
1,887,673,079✔
439
    elseif isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode)
158,872,808✔
440
        return Any
×
441
    elseif isa(x, PiNode)
79,436,404✔
442
        return x.typ
×
443
    else
444
        return Const(x)
79,436,404✔
445
    end
446
end
447
abstract_eval_ssavalue(s::SSAValue, src::CodeInfo) = abstract_eval_ssavalue(s, src.ssavaluetypes::Vector{Any})
16✔
448
abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
74,823,983✔
449

450
"""
451
    finish(interp::AbstractInterpreter, opt::OptimizationState,
452
           ir::IRCode, caller::InferenceResult)
453

454
Post-process information derived by Julia-level optimizations for later use.
455
In particular, this function determines the inlineability of the optimized code.
456
"""
457
function finish(interp::AbstractInterpreter, opt::OptimizationState,
8,070,253✔
458
                ir::IRCode, caller::InferenceResult)
459
    (; src, linfo) = opt
8,070,253✔
460
    (; def, specTypes) = linfo
8,070,253✔
461

462
    force_noinline = is_declared_noinline(src)
8,070,253✔
463

464
    # compute inlining and other related optimizations
465
    result = caller.result
8,070,253✔
466
    @assert !(result isa LimitedAccuracy)
8,070,253✔
467
    result = widenslotwrapper(result)
8,070,253✔
468

469
    opt.ir = ir
8,070,253✔
470

471
    # determine and cache inlineability
472
    if !force_noinline
8,070,253✔
473
        sig = unwrap_unionall(specTypes)
7,948,532✔
474
        if !(isa(sig, DataType) && sig.name === Tuple.name)
7,928,691✔
475
            force_noinline = true
×
476
        end
477
        if !is_declared_inline(src) && result === Bottom
7,928,691✔
478
            force_noinline = true
24✔
479
        end
480
    end
481
    if force_noinline
8,070,253✔
482
        set_inlineable!(src, false)
178,703✔
483
    elseif isa(def, Method)
7,891,550✔
484
        if is_declared_inline(src) && isdispatchtuple(specTypes)
7,890,464✔
485
            # obey @inline declaration if a dispatch barrier would not help
486
            set_inlineable!(src, true)
3,129,311✔
487
        else
488
            # compute the cost (size) of inlining this code
489
            params = OptimizationParams(interp)
4,761,153✔
490
            cost_threshold = default = params.inline_cost_threshold
4,761,153✔
491
            if ⊑(optimizer_lattice(interp), result, Tuple) && !isconcretetype(widenconst(result))
4,761,153✔
492
                cost_threshold += params.inline_tupleret_bonus
81,586✔
493
            end
494
            # if the method is declared as `@inline`, increase the cost threshold 20x
495
            if is_declared_inline(src)
4,761,153✔
496
                cost_threshold += 19*default
248,214✔
497
            end
498
            # a few functions get special treatment
499
            if def.module === _topmod(def.module)
4,761,153✔
500
                name = def.name
3,199,534✔
501
                if name === :iterate || name === :unsafe_convert || name === :cconvert
6,352,535✔
502
                    cost_threshold += 4*default
88,662✔
503
                end
504
            end
505
            src.inlining_cost = inline_cost(ir, params, cost_threshold)
4,761,153✔
506
        end
507
    end
508
    return nothing
8,070,253✔
509
end
510

511
function visit_bb_phis!(callback, ir::IRCode, bb::Int)
512
    stmts = ir.cfg.blocks[bb].stmts
16,643✔
513
    for idx in stmts
33,286✔
514
        stmt = ir[SSAValue(idx)][:stmt]
25,685✔
515
        if !isa(stmt, PhiNode)
25,685✔
516
            if !is_valid_phiblock_stmt(stmt)
33,286✔
517
                return
11,061✔
518
            end
519
        else
520
            callback(idx)
9,042✔
521
        end
522
    end
14,624✔
523
end
524

525
function any_stmt_may_throw(ir::IRCode, bb::Int)
526
    for idx in ir.cfg.blocks[bb].stmts
1,054,094✔
527
        if !has_flag(ir[SSAValue(idx)], IR_FLAG_NOTHROW)
1,093,304✔
528
            return true
364,472✔
529
        end
530
    end
1,295,089✔
531
    return false
162,575✔
532
end
533

534
visit_conditional_successors(callback, ir::IRCode, bb::Int) = # used for test
3✔
535
    visit_conditional_successors(callback, LazyPostDomtree(ir), ir, bb)
536
function visit_conditional_successors(callback, lazypostdomtree::LazyPostDomtree, ir::IRCode, bb::Int)
375,260✔
537
    visited = BitSet((bb,))
375,260✔
538
    worklist = Int[bb]
375,260✔
539
    while !isempty(worklist)
483,456✔
540
        thisbb = popfirst!(worklist)
472,668✔
541
        for succ in ir.cfg.blocks[thisbb].succs
472,668✔
542
            succ in visited && continue
568,321✔
543
            push!(visited, succ)
555,671✔
544
            if postdominates(get!(lazypostdomtree), succ, bb)
783,699✔
545
                # this successor is not conditional, so no need to visit it further
546
                continue
28,616✔
547
            elseif callback(succ)
1,255,887✔
548
                return true
364,472✔
549
            else
550
                push!(worklist, succ)
162,583✔
551
            end
552
        end
203,849✔
553
    end
108,196✔
554
    return false
10,788✔
555
end
556

557
struct AugmentedDomtree
558
    cfg::CFG
9,798✔
559
    domtree::DomTree
560
end
561

562
mutable struct LazyAugmentedDomtree
563
    const ir::IRCode
564
    agdomtree::AugmentedDomtree
565
    LazyAugmentedDomtree(ir::IRCode) = new(ir)
8,049,723✔
566
end
567

568
function get!(lazyagdomtree::LazyAugmentedDomtree)
10,785✔
569
    isdefined(lazyagdomtree, :agdomtree) && return lazyagdomtree.agdomtree
10,785✔
570
    ir = lazyagdomtree.ir
9,798✔
571
    cfg = copy(ir.cfg)
9,798✔
572
    # Add a virtual basic block to represent the exit
573
    push!(cfg.blocks, BasicBlock(StmtRange(0:-1)))
9,798✔
574
    for bb = 1:(length(cfg.blocks)-1)
9,798✔
575
        terminator = ir[SSAValue(last(cfg.blocks[bb].stmts))][:stmt]
354,003✔
576
        if isa(terminator, ReturnNode) && isdefined(terminator, :val)
354,003✔
577
            cfg_insert_edge!(cfg, bb, length(cfg.blocks))
10,389✔
578
        end
579
    end
698,208✔
580
    domtree = construct_domtree(cfg)
9,798✔
581
    return lazyagdomtree.agdomtree = AugmentedDomtree(cfg, domtree)
9,798✔
582
end
583

584
mutable struct PostOptAnalysisState
585
    const result::InferenceResult
586
    const ir::IRCode
587
    const inconsistent::BitSetBoundedMinPrioritySet
588
    const tpdum::TwoPhaseDefUseMap
589
    const lazypostdomtree::LazyPostDomtree
590
    const lazyagdomtree::LazyAugmentedDomtree
591
    const ea_analysis_pending::Vector{Int}
592
    all_retpaths_consistent::Bool
593
    all_effect_free::Bool
594
    effect_free_if_argmem_only::Union{Nothing,Bool}
595
    all_nothrow::Bool
596
    all_noub::Bool
597
    any_conditional_ub::Bool
598
    nortcall::Bool
599
    function PostOptAnalysisState(result::InferenceResult, ir::IRCode)
8,049,723✔
600
        inconsistent = BitSetBoundedMinPrioritySet(length(ir.stmts))
8,049,723✔
601
        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
818,428,978✔
602
        lazypostdomtree = LazyPostDomtree(ir)
8,049,723✔
603
        lazyagdomtree = LazyAugmentedDomtree(ir)
8,049,723✔
604
        return new(result, ir, inconsistent, tpdum, lazypostdomtree, lazyagdomtree, Int[],
8,049,723✔
605
                   true, true, nothing, true, true, false, true)
606
    end
607
end
608

609
give_up_refinements!(sv::PostOptAnalysisState) =
9,895✔
610
    sv.all_retpaths_consistent = sv.all_effect_free = sv.effect_free_if_argmem_only =
611
    sv.all_nothrow = sv.all_noub = sv.nortcall = false
612

613
function any_refinable(sv::PostOptAnalysisState)
614
    effects = sv.result.ipo_effects
96,617,832✔
615
    return ((!is_consistent(effects) & sv.all_retpaths_consistent) |
96,617,832✔
616
            (!is_effect_free(effects) & sv.all_effect_free) |
617
            (!is_nothrow(effects) & sv.all_nothrow) |
618
            (!is_noub(effects) & sv.all_noub) |
619
            (!is_nortcall(effects) & sv.nortcall))
620
end
621

622
struct GetNativeEscapeCache{CodeCache}
623
    code_cache::CodeCache
624
    GetNativeEscapeCache(code_cache::CodeCache) where CodeCache = new{CodeCache}(code_cache)
22✔
625
end
626
GetNativeEscapeCache(interp::AbstractInterpreter) = GetNativeEscapeCache(code_cache(interp))
22✔
627
function ((; code_cache)::GetNativeEscapeCache)(mi::MethodInstance)
628
    codeinst = get(code_cache, mi, nothing)
88✔
629
    codeinst isa CodeInstance || return false
44✔
630
    argescapes = traverse_analysis_results(codeinst) do @nospecialize result
44✔
631
        return result isa EscapeAnalysis.ArgEscapeCache ? result : nothing
×
632
    end
633
    if argescapes !== nothing
44✔
634
        return argescapes
×
635
    end
636
    effects = decode_effects(codeinst.ipo_purity_bits)
44✔
637
    if is_effect_free(effects) && is_inaccessiblememonly(effects)
44✔
638
        # We might not have run EA on simple frames without any escapes (e.g. when optimization
639
        # is skipped when result is constant-folded by abstract interpretation). If those
640
        # frames aren't inlined, the accuracy of EA for caller context takes a big hit.
641
        # This is a HACK to avoid that, but obviously, a more comprehensive fix would be ideal.
642
        return true
×
643
    end
644
    return false
44✔
645
end
646

647
function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState)
8,049,723✔
648
    if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
8,049,723✔
649
        ir = sv.ir
22✔
650
        nargs = length(ir.argtypes)
22✔
651
        estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), GetNativeEscapeCache(interp))
22✔
652
        argescapes = EscapeAnalysis.ArgEscapeCache(estate)
22✔
653
        stack_analysis_result!(sv.result, argescapes)
22✔
654
        validate_mutable_arg_escapes!(estate, sv)
22✔
655
    end
656

657
    any_refinable(sv) || return false
15,924,484✔
658
    effects = sv.result.ipo_effects
174,962✔
659
    sv.result.ipo_effects = Effects(effects;
174,966✔
660
        consistent = sv.all_retpaths_consistent ? ALWAYS_TRUE : effects.consistent,
661
        effect_free = sv.all_effect_free ? ALWAYS_TRUE :
662
                      sv.effect_free_if_argmem_only === true ? EFFECT_FREE_IF_INACCESSIBLEMEMONLY : effects.effect_free,
663
        nothrow = sv.all_nothrow ? true : effects.nothrow,
664
        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub,
665
        nortcall = sv.nortcall ? true : effects.nortcall)
666
    return true
174,962✔
667
end
668

669
function is_ipo_dataflow_analysis_profitable(effects::Effects)
670
    return !(is_consistent(effects) && is_effect_free(effects) &&
8,070,253✔
671
             is_nothrow(effects) && is_noub(effects))
672
end
673

674
function iscall_with_boundscheck(@nospecialize(stmt), sv::PostOptAnalysisState)
675
    isexpr(stmt, :call) || return false
163,631,482✔
676
    ft = argextype(stmt.args[1], sv.ir)
15,587,620✔
677
    f = singleton_type(ft)
15,587,620✔
678
    f === nothing && return false
15,587,620✔
679
    if f === getfield
15,560,746✔
680
        nargs = 4
×
681
    elseif f === memoryrefnew || f === memoryrefget || f === memoryref_isassigned
7,559,321✔
682
        nargs = 4
×
683
    elseif f === memoryrefset!
6,698,761✔
684
        nargs = 5
×
685
    else
686
        return false
6,630,196✔
687
    end
688
    length(stmt.args) < nargs && return false
8,930,550✔
689
    boundscheck = stmt.args[end]
2,945,894✔
690
    argextype(boundscheck, sv.ir) === Bool || return false
3,644,340✔
691
    isa(boundscheck, SSAValue) || return false
2,247,452✔
692
    return true
2,247,444✔
693
end
694

695
function check_all_args_noescape!(sv::PostOptAnalysisState, ir::IRCode, @nospecialize(stmt),
22✔
696
                                  estate::EscapeAnalysis.EscapeState)
697
    stmt isa Expr || return false
22✔
698
    if isexpr(stmt, :invoke)
22✔
699
        startidx = 2
×
700
    elseif isexpr(stmt, :new)
×
701
        startidx = 1
×
702
    else
703
        return false
×
704
    end
705
    has_no_escape(x::EscapeAnalysis.EscapeInfo) =
44✔
706
        EscapeAnalysis.has_no_escape(EscapeAnalysis.ignore_argescape(x))
707
    for i = startidx:length(stmt.args)
22✔
708
        arg = stmt.args[i]
22✔
709
        argt = argextype(arg, ir)
22✔
710
        if is_mutation_free_argtype(argt)
22✔
711
            continue
×
712
        end
713
        # See if we can find the allocation
714
        if isa(arg, Argument)
22✔
NEW
715
            if has_no_escape(estate[arg])
×
716
                # Even if we prove everything else effect_free, the best we can
717
                # say is :effect_free_if_argmem_only
718
                if sv.effect_free_if_argmem_only === nothing
×
719
                    sv.effect_free_if_argmem_only = true
×
720
                end
721
            else
722
                sv.effect_free_if_argmem_only = false
×
723
            end
724
            return false
×
725
        elseif isa(arg, SSAValue)
22✔
726
            has_no_escape(estate[arg]) || return false
44✔
727
            check_all_args_noescape!(sv, ir, ir[arg][:stmt], estate) || return false
×
728
        else
729
            return false
×
730
        end
731
    end
×
732
    return true
×
733
end
734

735
function validate_mutable_arg_escapes!(estate::EscapeAnalysis.EscapeState, sv::PostOptAnalysisState)
736
    ir = sv.ir
22✔
737
    for idx in sv.ea_analysis_pending
22✔
738
        # See if any mutable memory was allocated in this function and determined
739
        # not to escape.
740
        inst = ir[SSAValue(idx)]
22✔
741
        stmt = inst[:stmt]
22✔
742
        if !check_all_args_noescape!(sv, ir, stmt, estate)
22✔
743
            return sv.all_effect_free = false
22✔
744
        end
745
    end
×
746
    return true
×
747
end
748

749
function is_conditional_noub(inst::Instruction, sv::PostOptAnalysisState)
750
    stmt = inst[:stmt]
3,264,256✔
751
    iscall_with_boundscheck(stmt, sv) || return false
6,208,027✔
752
    barg = stmt.args[end]::SSAValue
320,485✔
753
    bstmt = sv.ir[barg][:stmt]
320,485✔
754
    isexpr(bstmt, :boundscheck) || return false
320,485✔
755
    # If IR_FLAG_INBOUNDS is already set, no more conditional ub
756
    (!isempty(bstmt.args) && bstmt.args[1] === false) && return false
320,485✔
757
    return true
297,641✔
758
end
759

760
function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState)
88,568,109✔
761
    flag = inst[:flag]
88,568,109✔
762
    # If we can prove that the argmem does not escape the current function, we can
763
    # refine this to :effect_free.
764
    needs_ea_validation = has_flag(flag, IR_FLAGS_NEEDS_EA)
88,568,109✔
765
    stmt = inst[:stmt]
88,568,109✔
766
    if !needs_ea_validation
88,568,109✔
767
        if !isterminator(stmt) && stmt !== nothing
173,559,553✔
768
            # ignore control flow node – they are not removable on their own and thus not
769
            # have `IR_FLAG_EFFECT_FREE` but still do not taint `:effect_free`-ness of
770
            # the whole method invocation
771
            sv.all_effect_free &= has_flag(flag, IR_FLAG_EFFECT_FREE)
81,238,027✔
772
        end
773
    elseif sv.all_effect_free
71,446✔
774
        if (isexpr(stmt, :invoke) || isexpr(stmt, :new) ||
22✔
775
            # HACK for performance: limit the scope of EA to code with object field access only,
776
            # since its abilities to reason about e.g. arrays are currently very limited anyways.
777
            is_known_call(stmt, setfield!, sv.ir))
778
            push!(sv.ea_analysis_pending, inst.idx)
22✔
779
        else
780
            sv.all_effect_free = false
×
781
        end
782
    end
783
    sv.all_nothrow &= has_flag(flag, IR_FLAG_NOTHROW)
88,568,109✔
784
    if !has_flag(flag, IR_FLAG_NOUB)
88,568,109✔
785
        # Special case: `:boundscheck` into `getfield` or memory operations is `:noub_if_noinbounds`
786
        if is_conditional_noub(inst, sv)
3,561,897✔
787
            sv.any_conditional_ub = true
297,641✔
788
        else
789
            sv.all_noub = false
2,966,615✔
790
        end
791
    end
792
    if !has_flag(flag, IR_FLAG_NORTCALL)
88,568,109✔
793
        # if a function call that might invoke `Core.Compiler.return_type` has been deleted,
794
        # there's no need to taint with `:nortcall`, allowing concrete evaluation
795
        if iscallstmt(stmt)
131,784,585✔
796
            sv.nortcall = false
1,741,099✔
797
        end
798
    end
799
end
800

801
function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState)
86,345,295✔
802
    flag = inst[:flag]
86,345,295✔
803
    stmt_inconsistent = !has_flag(flag, IR_FLAG_CONSISTENT)
86,345,295✔
804
    stmt = inst[:stmt]
86,345,295✔
805
    # Special case: For `getfield` and memory operations, we allow inconsistency of the :boundscheck argument
806
    (; inconsistent, tpdum) = sv
86,345,295✔
807
    if iscall_with_boundscheck(stmt, sv)
100,398,553✔
808
        for i = 1:(length(stmt.args)-1)
1,926,959✔
809
            val = stmt.args[i]
5,790,149✔
810
            if isa(val, SSAValue)
5,790,149✔
811
                stmt_inconsistent |= val.id in inconsistent
3,332,052✔
812
                count!(tpdum, val)
1,666,026✔
813
            end
814
        end
9,653,339✔
815
    else
816
        for ur in userefs(stmt)
102,772,264✔
817
            val = ur[]
51,708,249✔
818
            if isa(val, SSAValue)
51,708,249✔
819
                stmt_inconsistent |= val.id in inconsistent
32,515,950✔
820
                count!(tpdum, val)
16,258,039✔
821
            end
822
        end
51,708,249✔
823
    end
824
    stmt_inconsistent && push!(inconsistent, inst.idx)
86,345,294✔
825
    return stmt_inconsistent
86,345,295✔
826
end
827

828
struct ScanStmt
829
    sv::PostOptAnalysisState
8,049,723✔
830
end
831

832
function ((; sv)::ScanStmt)(inst::Instruction, lstmt::Int, bb::Int)
86,355,190✔
833
    stmt = inst[:stmt]
86,355,190✔
834

835
    if isa(stmt, EnterNode)
86,355,190✔
836
        # try/catch not yet modeled
837
        give_up_refinements!(sv)
9,895✔
838
        return nothing
9,895✔
839
    end
840

841
    scan_non_dataflow_flags!(inst, sv)
86,345,295✔
842

843
    stmt_inconsistent = scan_inconsistency!(inst, sv)
86,345,295✔
844

845
    if stmt_inconsistent
86,345,295✔
846
        if !has_flag(inst[:flag], IR_FLAG_NOTHROW)
73,535,961✔
847
            # Taint :consistent if this statement may raise since :consistent requires
848
            # consistent termination. TODO: Separate :consistent_return and :consistent_termination from :consistent.
849
            sv.all_retpaths_consistent = false
60,987,293✔
850
        end
851
        if inst.idx == lstmt
73,535,961✔
852
            if isa(stmt, ReturnNode) && isdefined(stmt, :val)
2,879,322✔
853
                sv.all_retpaths_consistent = false
10,492✔
854
            elseif isa(stmt, GotoIfNot)
2,868,830✔
855
                # Conditional Branch with inconsistent condition.
856
                # If we do not know this function terminates, taint consistency, now,
857
                # :consistent requires consistent termination. TODO: Just look at the
858
                # inconsistent region.
859
                if !sv.result.ipo_effects.terminates
1,693,178✔
860
                    sv.all_retpaths_consistent = false
1,317,921✔
861
                elseif visit_conditional_successors(sv.lazypostdomtree, sv.ir, bb) do succ::Int
375,257✔
862
                        return any_stmt_may_throw(sv.ir, succ)
1,255,879✔
863
                    end
864
                    # check if this `GotoIfNot` leads to conditional throws, which taints consistency
865
                    sv.all_retpaths_consistent = false
364,472✔
866
                else
867
                    (; cfg, domtree) = get!(sv.lazyagdomtree)
10,785✔
868
                    for succ in iterated_dominance_frontier(cfg, BlockLiveness(sv.ir.cfg.blocks[bb].succs, nothing), domtree)
10,785✔
869
                        if succ == length(cfg.blocks)
16,649✔
870
                            # Phi node in the virtual exit -> We have a conditional
871
                            # return. TODO: Check if all the retvals are egal.
872
                            sv.all_retpaths_consistent = false
6✔
873
                        else
874
                            visit_bb_phis!(sv.ir, succ) do phiidx::Int
16,643✔
875
                                push!(sv.inconsistent, phiidx)
9,042✔
876
                            end
877
                        end
878
                    end
16,649✔
879
                end
880
            end
881
        end
882
    end
883

884
    # bail out early if there are no possibilities to refine the effects
885
    if !any_refinable(sv)
86,345,295✔
886
        return nothing
7,841,144✔
887
    end
888

889
    return true
78,504,151✔
890
end
891

892
function check_inconsistentcy!(sv::PostOptAnalysisState, scanner::BBScanner)
×
893
    (; ir, inconsistent, tpdum) = sv
×
894

895
    scan!(ScanStmt(sv), scanner, false)
×
896
    complete!(tpdum); push!(scanner.bb_ip, 1)
×
897
    populate_def_use_map!(tpdum, scanner)
×
898

899
    stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
×
900
    for def in inconsistent
×
901
        for use in tpdum[def]
×
902
            if !(use in inconsistent)
×
903
                push!(inconsistent, use)
×
904
                append!(stmt_ip, tpdum[use])
×
905
            end
906
        end
×
907
    end
×
908
    lazydomtree = LazyDomtree(ir)
×
909
    while !isempty(stmt_ip)
×
910
        idx = popfirst!(stmt_ip)
×
911
        inst = ir[SSAValue(idx)]
×
912
        stmt = inst[:stmt]
×
913
        if iscall_with_boundscheck(stmt, sv)
×
914
            any_non_boundscheck_inconsistent = false
×
915
            for i = 1:(length(stmt.args)-1)
×
916
                val = stmt.args[i]
×
917
                if isa(val, SSAValue)
×
918
                    any_non_boundscheck_inconsistent |= val.id in inconsistent
×
919
                    any_non_boundscheck_inconsistent && break
×
920
                end
921
            end
×
922
            any_non_boundscheck_inconsistent || continue
×
923
        elseif isa(stmt, ReturnNode)
×
924
            sv.all_retpaths_consistent = false
×
925
        elseif isa(stmt, GotoIfNot)
×
926
            bb = block_for_inst(ir, idx)
×
927
            cfg = ir.cfg
×
928
            blockliveness = BlockLiveness(cfg.blocks[bb].succs, nothing)
×
929
            for succ in iterated_dominance_frontier(cfg, blockliveness, get!(lazydomtree))
×
930
                visit_bb_phis!(ir, succ) do phiidx::Int
×
931
                    push!(inconsistent, phiidx)
×
932
                    push!(stmt_ip, phiidx)
×
933
                end
934
            end
×
935
        end
936
        sv.all_retpaths_consistent || break
×
937
        append!(inconsistent, tpdum[idx])
×
938
        append!(stmt_ip, tpdum[idx])
×
939
    end
×
940
end
941

942
function ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, result::InferenceResult)
8,070,253✔
943
    if !is_ipo_dataflow_analysis_profitable(result.ipo_effects)
8,070,253✔
944
        return false
20,530✔
945
    end
946

947
    @assert isempty(ir.new_nodes) "IRCode should be compacted before post-opt analysis"
8,049,723✔
948

949
    sv = PostOptAnalysisState(result, ir)
8,049,723✔
950
    scanner = BBScanner(ir)
8,049,723✔
951

952
    completed_scan = scan!(ScanStmt(sv), scanner, true)
8,049,723✔
953

954
    if !completed_scan
8,049,723✔
955
        if sv.all_retpaths_consistent
28,126✔
956
            check_inconsistentcy!(sv, scanner)
×
957
        else
958
            # No longer any dataflow concerns, just scan the flags
959
            scan!(scanner, false) do inst::Instruction, lstmt::Int, bb::Int
28,126✔
960
                scan_non_dataflow_flags!(inst, sv)
2,222,814✔
961
                # bail out early if there are no possibilities to refine the effects
962
                if !any_refinable(sv)
2,222,814✔
963
                    return nothing
23,700✔
964
                end
965
                return true
2,199,114✔
966
            end
967
        end
968
    end
969

970
    return refine_effects!(interp, sv)
8,049,723✔
971
end
972

973
# run the optimization work
974
function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
8,068,500✔
975
    @timeit "optimizer" ir = run_passes_ipo_safe(opt.src, opt, caller)
8,070,253✔
976
    ipo_dataflow_analysis!(interp, ir, caller)
8,070,253✔
977
    return finish(interp, opt, ir, caller)
8,070,253✔
978
end
979

980
macro pass(name, expr)
981
    optimize_until = esc(:optimize_until)
982
    stage = esc(:__stage__)
983
    macrocall = :(@timeit $(esc(name)) $(esc(expr)))
984
    macrocall.args[2] = __source__  # `@timeit` may want to use it
985
    quote
986
        $macrocall
13,314,877✔
987
        matchpass($optimize_until, ($stage += 1), $(esc(name))) && $(esc(:(@goto __done__)))
51,763✔
988
    end
989
end
990

991
matchpass(optimize_until::Int, stage, _) = optimize_until == stage
55✔
992
matchpass(optimize_until::String, _, name) = optimize_until == name
66✔
993
matchpass(::Nothing, _, _) = false
×
994

995
function run_passes_ipo_safe(
8,070,304✔
996
    ci::CodeInfo,
997
    sv::OptimizationState,
998
    caller::InferenceResult,
999
    optimize_until = nothing,  # run all passes by default
1000
)
1001
    __stage__ = 0  # used by @pass
8,077,653✔
1002
    # NOTE: The pass name MUST be unique for `optimize_until::AbstractString` to work
1003
    @pass "convert"   ir = convert_to_ircode(ci, sv)
1004
    @pass "slot2reg"  ir = slot2reg(ir, ci, sv)
1005
    # TODO: Domsorting can produce an updated domtree - no need to recompute here
1006
    @pass "compact 1" ir = compact!(ir)
1007
    @pass "Inlining"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
1008
    # @timeit "verify 2" verify_ir(ir)
1009
    @pass "compact 2" ir = compact!(ir)
1010
    @pass "SROA"      ir = sroa_pass!(ir, sv.inlining)
1011
    @pass "ADCE"      (ir, made_changes) = adce_pass!(ir, sv.inlining)
1012
    if made_changes
8,070,288✔
1013
        @pass "compact 3" ir = compact!(ir, true)
1014
    end
1015
    if is_asserts()
8,070,288✔
1016
        @timeit "verify 3" begin
1017
            verify_ir(ir, true, false, optimizer_lattice(sv.inlining.interp))
×
1018
            verify_linetable(ir.debuginfo, length(ir.stmts))
×
1019
        end
1020
    end
1021
    @label __done__  # used by @pass
1022
    return ir
8,070,304✔
1023
end
1024

1025
function strip_trailing_junk!(code::Vector{Any}, ssavaluetypes::Vector{Any}, ssaflags::Vector, debuginfo::DebugInfoStream, cfg::CFG, info::Vector{CallInfo})
8,070,306✔
1026
    # Remove `nothing`s at the end, we don't handle them well
1027
    # (we expect the last instruction to be a terminator)
1028
    codelocs = debuginfo.codelocs
8,070,306✔
1029
    for i = length(code):-1:1
16,082,234✔
1030
        if code[i] !== nothing
8,070,306✔
1031
            resize!(code, i)
8,070,306✔
1032
            resize!(ssavaluetypes, i)
8,070,306✔
1033
            resize!(codelocs, 3i)
8,070,306✔
1034
            resize!(info, i)
8,070,306✔
1035
            resize!(ssaflags, i)
8,070,306✔
1036
            break
8,070,306✔
1037
        end
1038
    end
×
1039
    # If the last instruction is not a terminator, add one. This can
1040
    # happen for implicit return on dead branches.
1041
    term = code[end]
8,070,306✔
1042
    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
8,070,306✔
1043
        push!(code, ReturnNode())
562,549✔
1044
        push!(ssavaluetypes, Union{})
562,549✔
1045
        push!(codelocs, 0, 0, 0)
562,549✔
1046
        push!(info, NoCallInfo())
562,549✔
1047
        push!(ssaflags, IR_FLAG_NOTHROW)
562,549✔
1048

1049
        # Update CFG to include appended terminator
1050
        old_range = cfg.blocks[end].stmts
562,549✔
1051
        new_range = StmtRange(first(old_range), last(old_range) + 1)
562,549✔
1052
        cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
562,549✔
1053
        (length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
562,549✔
1054
    end
1055
    nothing
1056
end
1057

1058
function changed_lineinfo(di::DebugInfo, codeloc::Int, prevloc::Int)
140,881,481✔
1059
    while true
142,925,269✔
1060
        next = getdebugidx(di, codeloc)
142,925,269✔
1061
        line = next[1]
142,925,269✔
1062
        line < 0 && return false # invalid info
142,925,269✔
1063
        line == 0 && next[2] == 0 && return false # no new info
142,834,390✔
1064
        prevloc <= 0 && return true # no old info
128,812,147✔
1065
        prev = getdebugidx(di, prevloc)
120,808,405✔
1066
        next === prev && return false # exactly identical
120,808,405✔
1067
        prevline = prev[1]
16,398,391✔
1068
        prevline < 0 && return true # previous invalid info, now valid
16,398,391✔
1069
        edge = next[2]
16,398,391✔
1070
        edge === prev[2] || return true # change to this edge
16,865,654✔
1071
        linetable = di.linetable
15,931,128✔
1072
        # check for change to line number here
1073
        if linetable === nothing || line == 0
15,932,354✔
1074
            line == prevline || return true
29,816,016✔
1075
        else
1076
            changed_lineinfo(linetable::DebugInfo, Int(line), Int(prevline)) && return true
1,226✔
1077
        end
1078
        # check for change to edge here
1079
        edge == 0 && return false # no edge here
2,044,815✔
1080
        di = di.edges[Int(edge)]::DebugInfo
2,043,788✔
1081
        codeloc = Int(next[3])
2,043,788✔
1082
        prevloc = Int(prev[3])
2,043,788✔
1083
    end
2,043,788✔
1084
end
1085

1086
function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
8,070,306✔
1087
    # Update control-flow to reflect any unreachable branches.
1088
    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
8,070,306✔
1089
    ci.code = code = copy_exprargs(ci.code)
141,349,960✔
1090
    di = DebugInfoStream(sv.linfo, ci.debuginfo, length(code))
8,070,306✔
1091
    codelocs = di.codelocs
8,070,306✔
1092
    ssaflags = ci.ssaflags
8,070,306✔
1093
    for i = 1:length(code)
8,070,306✔
1094
        expr = code[i]
141,349,960✔
1095
        if !(i in sv.unreachable)
141,349,960✔
1096
            if isa(expr, GotoIfNot)
114,188,410✔
1097
                # Replace this live GotoIfNot with:
1098
                # - no-op if :nothrow and the branch target is unreachable
1099
                # - cond if :nothrow and both targets are unreachable
1100
                # - typeassert if must-throw
1101
                block = block_for_inst(sv.cfg, i)
7,132,948✔
1102
                if ssavaluetypes[i] === Bottom
7,132,948✔
1103
                    destblock = block_for_inst(sv.cfg, expr.dest)
546✔
1104
                    cfg_delete_edge!(sv.cfg, block, block + 1)
546✔
1105
                    ((block + 1) != destblock) && cfg_delete_edge!(sv.cfg, block, destblock)
546✔
1106
                    expr = Expr(:call, Core.typeassert, expr.cond, Bool)
546✔
1107
                elseif i + 1 in sv.unreachable
7,132,402✔
1108
                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
1,134,591✔
1109
                    cfg_delete_edge!(sv.cfg, block, block + 1)
1,134,591✔
1110
                    expr = GotoNode(expr.dest)
1,134,591✔
1111
                elseif expr.dest in sv.unreachable
5,997,811✔
1112
                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
2,131,047✔
1113
                    cfg_delete_edge!(sv.cfg, block, block_for_inst(sv.cfg, expr.dest))
2,131,047✔
1114
                    expr = nothing
1,067✔
1115
                end
1116
                code[i] = expr
7,132,948✔
1117
            elseif isa(expr, EnterNode)
107,055,462✔
1118
                catchdest = expr.catch_dest
63,924✔
1119
                if catchdest in sv.unreachable
63,924✔
1120
                    cfg_delete_edge!(sv.cfg, block_for_inst(sv.cfg, i), block_for_inst(sv.cfg, catchdest))
855✔
1121
                    if isdefined(expr, :scope)
855✔
1122
                        # We've proven that nothing inside the enter region throws,
1123
                        # but we don't yet know whether something might read the scope,
1124
                        # so we need to retain this enter for the time being. However,
1125
                        # we use the special marker `0` to indicate that setting up
1126
                        # the try/catch frame is not required.
1127
                        code[i] = EnterNode(expr, 0)
12✔
1128
                    else
1129
                        code[i] = nothing
843✔
1130
                    end
1131
                end
1132
            elseif isa(expr, PhiNode)
106,991,538✔
1133
                new_edges = Int32[]
31✔
1134
                new_vals = Any[]
31✔
1135
                for j = 1:length(expr.edges)
31✔
1136
                    edge = expr.edges[j]
44✔
1137
                    (edge in sv.unreachable || (ssavaluetypes[edge] === Union{} && !isa(code[edge], PhiNode))) && continue
86✔
1138
                    push!(new_edges, edge)
40✔
1139
                    if isassigned(expr.values, j)
40✔
1140
                        push!(new_vals, expr.values[j])
36✔
1141
                    else
1142
                        resize!(new_vals, length(new_edges))
4✔
1143
                    end
1144
                end
63✔
1145
                code[i] = PhiNode(new_edges, new_vals)
31✔
1146
            end
1147
        end
1148
    end
274,629,614✔
1149

1150
    # Go through and add an unreachable node after every
1151
    # Union{} call. Then reindex labels.
1152
    stmtinfo = sv.stmt_info
8,070,306✔
1153
    meta = Expr[]
8,070,306✔
1154
    idx = 1
7,375✔
1155
    oldidx = 1
7,375✔
1156
    nstmts = length(code)
8,070,306✔
1157
    ssachangemap = labelchangemap = blockchangemap = nothing
7,375✔
1158
    prevloc = 0
7,375✔
1159
    while idx <= length(code)
148,950,580✔
1160
        if sv.insert_coverage && changed_lineinfo(ci.debuginfo, oldidx, prevloc)
140,880,274✔
1161
            # insert a side-effect instruction before the current instruction in the same basic block
1162
            insert!(code, idx, Expr(:code_coverage_effect))
22,357,119✔
1163
            splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
22,357,119✔
1164
            insert!(ssavaluetypes, idx, Nothing)
22,357,119✔
1165
            insert!(stmtinfo, idx, NoCallInfo())
22,357,119✔
1166
            insert!(ssaflags, idx, IR_FLAG_NULL)
22,357,119✔
1167
            if ssachangemap === nothing
22,357,119✔
1168
                ssachangemap = fill(0, nstmts)
141,259,012✔
1169
            end
1170
            if labelchangemap === nothing
22,357,119✔
1171
                labelchangemap = fill(0, nstmts)
141,259,012✔
1172
            end
1173
            ssachangemap[oldidx] += 1
22,357,119✔
1174
            if oldidx < length(labelchangemap)
22,357,119✔
1175
                labelchangemap[oldidx + 1] += 1
22,294,074✔
1176
            end
1177
            if blockchangemap === nothing
22,357,119✔
1178
                blockchangemap = fill(0, length(sv.cfg.blocks))
28,275,008✔
1179
            end
1180
            blockchangemap[block_for_inst(sv.cfg, oldidx)] += 1
22,357,119✔
1181
            idx += 1
22,357,119✔
1182
            prevloc = oldidx
13,321✔
1183
        end
1184
        if ssavaluetypes[idx] === Union{} && !(oldidx in sv.unreachable) && !isa(code[idx], PhiNode)
140,880,274✔
1185
            # We should have converted any must-throw terminators to an equivalent w/o control-flow edges
1186
            @assert !isterminator(code[idx])
684,262✔
1187

1188
            block = block_for_inst(sv.cfg, oldidx)
684,262✔
1189
            block_end = last(sv.cfg.blocks[block].stmts) + (idx - oldidx)
684,262✔
1190

1191
            # Delete all successors to this basic block
1192
            for succ in sv.cfg.blocks[block].succs
684,262✔
1193
                preds = sv.cfg.blocks[succ].preds
530,078✔
1194
                deleteat!(preds, findfirst(x::Int->x==block, preds)::Int)
2,654,670✔
1195
            end
530,078✔
1196
            empty!(sv.cfg.blocks[block].succs)
690,627✔
1197

1198
            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
684,262✔
1199
                # Any statements from here to the end of the block have been wrapped in Core.Const(...)
1200
                # by type inference (effectively deleting them). Only task left is to replace the block
1201
                # terminator with an explicit `unreachable` marker.
1202

1203
                if block_end > idx
684,262✔
1204
                    if is_asserts()
275,058✔
1205
                        # Verify that type-inference did its job
1206
                        for i = (oldidx + 1):last(sv.cfg.blocks[block].stmts)
×
1207
                            @assert i in sv.unreachable
×
1208
                        end
×
1209
                    end
1210
                    code[block_end] = ReturnNode()
275,058✔
1211
                    codelocs[3block_end-2], codelocs[3block_end-1], codelocs[3block_end-0] = (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0])
275,058✔
1212
                    ssavaluetypes[block_end] = Union{}
275,058✔
1213
                    stmtinfo[block_end] = NoCallInfo()
275,058✔
1214
                    ssaflags[block_end] = IR_FLAG_NOTHROW
275,058✔
1215
                    idx += block_end - idx
275,058✔
1216
                else
1217
                    insert!(code, idx + 1, ReturnNode())
409,204✔
1218
                    splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
409,204✔
1219
                    insert!(ssavaluetypes, idx + 1, Union{})
409,204✔
1220
                    insert!(stmtinfo, idx + 1, NoCallInfo())
409,204✔
1221
                    insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
409,204✔
1222
                    if ssachangemap === nothing
409,204✔
1223
                        ssachangemap = fill(0, nstmts)
414✔
1224
                    end
1225
                    if labelchangemap === nothing
409,204✔
1226
                        labelchangemap = sv.insert_coverage ? fill(0, nstmts) : ssachangemap
414✔
1227
                    end
1228
                    if oldidx < length(ssachangemap)
409,204✔
1229
                        ssachangemap[oldidx + 1] += 1
409,204✔
1230
                        sv.insert_coverage && (labelchangemap[oldidx + 1] += 1)
409,204✔
1231
                    end
1232
                    if blockchangemap === nothing
409,204✔
1233
                        blockchangemap = fill(0, length(sv.cfg.blocks))
135✔
1234
                    end
1235
                    blockchangemap[block] += 1
409,204✔
1236
                    idx += 1
409,204✔
1237
                end
1238
                oldidx = last(sv.cfg.blocks[block].stmts)
684,262✔
1239
            end
1240
        end
1241
        idx += 1
140,880,274✔
1242
        oldidx += 1
140,880,274✔
1243
    end
140,880,274✔
1244
    empty!(sv.unreachable)
8,565,476✔
1245

1246
    if ssachangemap !== nothing && labelchangemap !== nothing
8,070,306✔
1247
        renumber_ir_elements!(code, ssachangemap, labelchangemap)
8,003,770✔
1248
    end
1249
    if blockchangemap !== nothing
8,070,306✔
1250
        renumber_cfg_stmts!(sv.cfg, blockchangemap)
8,003,770✔
1251
    end
1252

1253
    for i = 1:length(code)
8,070,306✔
1254
        code[i] = process_meta!(meta, code[i])
164,116,283✔
1255
    end
320,162,260✔
1256
    strip_trailing_junk!(code, ssavaluetypes, ssaflags, di, sv.cfg, stmtinfo)
8,070,306✔
1257
    types = Any[]
8,070,306✔
1258
    stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
8,070,306✔
1259
    # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
1260
    # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
1261
    # and eliminates slots (see below)
1262
    argtypes = sv.slottypes
8,070,306✔
1263
    return IRCode(stmts, sv.cfg, di, argtypes, meta, sv.sptypes)
8,070,306✔
1264
end
1265

1266
function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
1267
    if isexpr(stmt, :meta) && length(stmt.args) ≥ 1
164,116,283✔
1268
        push!(meta, stmt)
320✔
1269
        return nothing
320✔
1270
    end
1271
    return stmt
164,115,963✔
1272
end
1273

1274
function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
1✔
1275
    # need `ci` for the slot metadata, IR for the code
1276
    svdef = sv.linfo.def
7,375✔
1277
    @timeit "domtree 1" domtree = construct_domtree(ir)
8,070,303✔
1278
    defuse_insts = scan_slot_def_use(Int(ci.nargs), ci, ir.stmts.stmt)
8,070,303✔
1279
    𝕃ₒ = optimizer_lattice(sv.inlining.interp)
7,375✔
1280
    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, sv, domtree, defuse_insts, 𝕃ₒ) # consumes `ir`
8,070,303✔
1281
    # NOTE now we have converted `ir` to the SSA form and eliminated slots
1282
    # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
1283
    resize!(ir.argtypes, ci.nargs)
8,070,303✔
1284
    return ir
8,070,303✔
1285
end
1286

1287
## Computing the cost of a function body
1288

1289
# saturating sum (inputs are non-negative), prevents overflow with typemax(Int) below
1290
plus_saturate(x::Int, y::Int) = max(x, y, x+y)
233,811,715✔
1291

1292
# known return type
1293
isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
777,906✔
1294

1295
function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
193,096,751✔
1296
                        params::OptimizationParams)
1297
    #=const=# UNKNOWN_CALL_COST = 20
18✔
1298
    head = ex.head
193,096,751✔
1299
    if is_meta_expr_head(head)
381,443,675✔
1300
        return 0
4,760,737✔
1301
    elseif head === :call
188,336,014✔
1302
        farg = ex.args[1]
45,546,406✔
1303
        ftyp = argextype(farg, src, sptypes)
45,546,406✔
1304
        if ftyp === IntrinsicFunction && farg isa SSAValue
45,546,406✔
1305
            # if this comes from code that was already inlined into another function,
1306
            # Consts have been widened. try to recover in simple cases.
1307
            farg = isa(src, CodeInfo) ? src.code[farg.id] : src[farg][:stmt]
48✔
1308
            if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
48✔
1309
                ftyp = argextype(farg, src, sptypes)
96✔
1310
            end
1311
        end
1312
        f = singleton_type(ftyp)
45,546,406✔
1313
        if isa(f, IntrinsicFunction)
45,546,406✔
1314
            iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
19,209,362✔
1315
            if isassigned(T_IFUNC, iidx)
19,209,362✔
1316
                minarg, maxarg, = T_IFUNC[iidx]
19,209,362✔
1317
                nargs = length(ex.args)
19,209,362✔
1318
                if minarg + 1 <= nargs <= maxarg + 1
19,209,362✔
1319
                    # With mostly constant arguments, all Intrinsics tend to become very cheap
1320
                    # and are likely to combine with the operations around them,
1321
                    # so reduce their cost by half.
1322
                    cost = T_IFUNC_COST[iidx]
19,206,988✔
1323
                    if cost == 0 || nargs < 3 ||
32,034,836✔
1324
                       (f === Intrinsics.cglobal || f === Intrinsics.llvmcall) # these hold malformed IR, so argextype will crash on them
1325
                        return cost
6,492,932✔
1326
                    end
1327
                    aty2 = widenconditional(argextype(ex.args[2], src, sptypes))
12,714,056✔
1328
                    nconst = Int(aty2 isa Const)
12,714,056✔
1329
                    for i = 3:nargs
12,714,056✔
1330
                        aty = widenconditional(argextype(ex.args[i], src, sptypes))
12,726,704✔
1331
                        if widenconst(aty) != widenconst(aty2)
12,726,704✔
1332
                            nconst = 0
1✔
1333
                            break
439,307✔
1334
                        end
1335
                        nconst += aty isa Const
12,287,397✔
1336
                    end
12,287,397✔
1337
                    if nconst + 2 >= nargs
12,714,056✔
1338
                        cost = (cost - 1) ÷ 2
7,349,919✔
1339
                    end
1340
                    return cost
12,714,056✔
1341
                end
1342
            end
1343
            # unknown/unhandled intrinsic: hopefully the caller gets a slightly better answer after the inlining
1344
            return UNKNOWN_CALL_COST
2,374✔
1345
        end
1346
        if isa(f, Builtin) && f !== invoke
26,337,044✔
1347
            # The efficiency of operations like a[i] and s.b
1348
            # depend strongly on whether the result can be
1349
            # inferred, so check the type of ex
1350
            if f === Core.getfield || f === Core.tuple || f === Core.getglobal
35,433,902✔
1351
                # we might like to penalize non-inferrability, but
1352
                # tuple iteration/destructuring makes that impossible
1353
                # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
1354
                return 0
18,875,846✔
1355
            elseif (f === Core.memoryrefget || f === Core.memoryref_isassigned) && length(ex.args) >= 3
13,708,006✔
1356
                atyp = argextype(ex.args[2], src, sptypes)
542,286✔
1357
                return isknowntype(atyp) ? 1 : params.inline_nonleaf_penalty
542,286✔
1358
            elseif f === Core.memoryrefset! && length(ex.args) >= 3
6,581,525✔
1359
                atyp = argextype(ex.args[2], src, sptypes)
235,620✔
1360
                return isknowntype(atyp) ? 5 : params.inline_nonleaf_penalty
235,620✔
1361
            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
6,404,014✔
1362
                return 1
44,343✔
1363
            end
1364
            fidx = find_tfunc(f)
83,617,915✔
1365
            if fidx === nothing
6,301,562✔
1366
                # unknown/unhandled builtin
1367
                # Use the generic cost of a direct function call
1368
                return UNKNOWN_CALL_COST
137,196✔
1369
            end
1370
            return T_FFUNC_COST[fidx]
6,164,366✔
1371
        end
1372
        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
674,774✔
1373
        if extyp === Union{}
337,387✔
1374
            return 0
12,265✔
1375
        end
1376
        return params.inline_nonleaf_penalty
325,122✔
1377
    elseif head === :foreigncall
142,789,608✔
1378
        foreigncall = ex.args[1]
914,868✔
1379
        if foreigncall isa QuoteNode && foreigncall.value === :jl_string_ptr
914,868✔
1380
            return 1
46,169✔
1381
        end
1382
        return 20
868,699✔
1383
    elseif head === :invoke || head === :invoke_modify
280,540,979✔
1384
        # Calls whose "return type" is Union{} do not actually return:
1385
        # they are errors. Since these are not part of the typical
1386
        # run-time of the function, we omit them from
1387
        # consideration. This way, non-inlined error branches do not
1388
        # prevent inlining.
1389
        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
6,417,084✔
1390
        return extyp === Union{} ? 0 : UNKNOWN_CALL_COST
3,208,542✔
1391
    elseif head === :(=)
138,666,198✔
1392
        if ex.args[1] isa GlobalRef
1,835✔
1393
            cost = UNKNOWN_CALL_COST
×
1394
        else
1395
            cost = 0
×
1396
        end
1397
        a = ex.args[2]
1,835✔
1398
        if a isa Expr
1,835✔
1399
            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params))
×
1400
        end
1401
        return cost
1,835✔
1402
    elseif head === :copyast
138,664,363✔
1403
        return 100
1,341✔
1404
    end
1405
    return 0
138,663,022✔
1406
end
1407

1408
function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
1409
                                  params::OptimizationParams)
1410
    thiscost = 0
504✔
1411
    dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
24,029,987✔
1412
    if stmt isa Expr
233,811,738✔
1413
        thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
193,096,751✔
1414
    elseif stmt isa GotoNode
40,714,987✔
1415
        # loops are generally always expensive
1416
        # but assume that forward jumps are already counted for from
1417
        # summing the cost of the not-taken branch
1418
        thiscost = dst(stmt.label) < line ? 40 : 0
15,582,516✔
1419
    elseif stmt isa GotoIfNot
25,132,471✔
1420
        thiscost = dst(stmt.dest) < line ? 40 : 0
8,446,967✔
1421
    elseif stmt isa EnterNode
16,685,504✔
1422
        # try/catch is a couple function calls,
1423
        # but don't inline functions with try/catch
1424
        # since these aren't usually performance-sensitive functions,
1425
        # and llvm is more likely to miscompile them when these functions get large
1426
        thiscost = typemax(Int)
×
1427
    end
1428
    return thiscost
233,811,738✔
1429
end
1430

1431
function inline_cost(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
1432
    bodycost = 0
29✔
1433
    for i = 1:length(ir.stmts)
4,761,153✔
1434
        stmt = ir[SSAValue(i)][:stmt]
233,811,715✔
1435
        thiscost = statement_or_branch_cost(stmt, i, ir, ir.sptypes, params)
274,526,697✔
1436
        bodycost = plus_saturate(bodycost, thiscost)
233,811,715✔
1437
        if bodycost > cost_threshold
233,811,715✔
1438
            return MAX_INLINE_COST
495,667✔
1439
        end
1440
    end
462,366,610✔
1441
    return inline_cost_clamp(bodycost)
4,265,486✔
1442
end
1443

1444
function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, params::OptimizationParams)
1445
    maxcost = 0
1✔
1446
    for line = 1:length(body)
1✔
1447
        stmt = body[line]
23✔
1448
        thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
28✔
1449
                                            params)
1450
        cost[line] = thiscost
23✔
1451
        if thiscost > maxcost
23✔
1452
            maxcost = thiscost
2✔
1453
        end
1454
    end
45✔
1455
    return maxcost
1✔
1456
end
1457

1458
function renumber_ir_elements!(body::Vector{Any}, cfg::Union{CFG,Nothing}, ssachangemap::Vector{Int})
×
1459
    return renumber_ir_elements!(body, cfg, ssachangemap, ssachangemap)
×
1460
end
1461

1462
function cumsum_ssamap!(ssachangemap::Vector{Int})
1463
    any_change = false
×
1464
    rel_change = 0
×
1465
    for i = 1:length(ssachangemap)
24,011,312✔
1466
        val = ssachangemap[i]
310,794,013✔
1467
        any_change |= val ≠ 0
310,794,013✔
1468
        rel_change += val
310,794,013✔
1469
        if val == -1
310,794,013✔
1470
            # Keep a marker that this statement was deleted
1471
            ssachangemap[i] = typemin(Int)
×
1472
        else
1473
            ssachangemap[i] = rel_change
310,794,013✔
1474
        end
1475
    end
597,576,713✔
1476
    return any_change
24,011,312✔
1477
end
1478

1479
function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
8,003,771✔
1480
    any_change = cumsum_ssamap!(labelchangemap)
141,259,435✔
1481
    if ssachangemap !== labelchangemap
8,003,771✔
1482
        any_change |= cumsum_ssamap!(ssachangemap)
141,259,435✔
1483
    end
1484
    any_change || return
8,003,771✔
1485
    for i = 1:length(body)
8,003,771✔
1486
        el = body[i]
164,025,758✔
1487
        if isa(el, GotoNode)
164,025,758✔
1488
            body[i] = GotoNode(el.label + labelchangemap[el.label])
4,319,140✔
1489
        elseif isa(el, GotoIfNot)
159,706,618✔
1490
            cond = el.cond
3,866,730✔
1491
            if isa(cond, SSAValue)
3,866,730✔
1492
                cond = SSAValue(cond.id + ssachangemap[cond.id])
3,786,757✔
1493
            end
1494
            was_deleted = labelchangemap[el.dest] == typemin(Int)
3,866,730✔
1495
            body[i] = was_deleted ? cond : GotoIfNot(cond, el.dest + labelchangemap[el.dest])
3,866,730✔
1496
        elseif isa(el, ReturnNode)
155,839,888✔
1497
            if isdefined(el, :val)
9,361,749✔
1498
                val = el.val
8,677,507✔
1499
                if isa(val, SSAValue)
8,677,507✔
1500
                    body[i] = ReturnNode(SSAValue(val.id + ssachangemap[val.id]))
8,305,573✔
1501
                end
1502
            end
1503
        elseif isa(el, SSAValue)
146,478,139✔
1504
            body[i] = SSAValue(el.id + ssachangemap[el.id])
5✔
1505
        elseif isa(el, PhiNode)
146,478,134✔
1506
            i = 1
6✔
1507
            edges = el.edges
6✔
1508
            values = el.values
6✔
1509
            while i <= length(edges)
15✔
1510
                was_deleted = ssachangemap[edges[i]] == typemin(Int)
9✔
1511
                if was_deleted
9✔
1512
                    deleteat!(edges, i)
×
1513
                    deleteat!(values, i)
×
1514
                else
1515
                    edges[i] += ssachangemap[edges[i]]
9✔
1516
                    val = values[i]
9✔
1517
                    if isa(val, SSAValue)
9✔
1518
                        values[i] = SSAValue(val.id + ssachangemap[val.id])
7✔
1519
                    end
1520
                    i += 1
9✔
1521
                end
1522
            end
9✔
1523
        elseif isa(el, EnterNode)
146,478,128✔
1524
            tgt = el.catch_dest
63,081✔
1525
            if tgt != 0
63,081✔
1526
                was_deleted = labelchangemap[tgt] == typemin(Int)
63,069✔
1527
                if was_deleted
63,069✔
1528
                    @assert !isdefined(el, :scope)
×
1529
                    body[i] = nothing
×
1530
                else
1531
                    if isdefined(el, :scope) && isa(el.scope, SSAValue)
63,069✔
1532
                        body[i] = EnterNode(tgt + labelchangemap[tgt], SSAValue(el.scope.id + ssachangemap[el.scope.id]))
289✔
1533
                    else
1534
                        body[i] = EnterNode(el, tgt + labelchangemap[tgt])
62,780✔
1535
                    end
1536
                end
1537
            end
1538
        elseif isa(el, Expr)
146,415,047✔
1539
            if el.head === :(=) && el.args[2] isa Expr
74,514,898✔
1540
                el = el.args[2]::Expr
6,541,917✔
1541
            end
1542
            if !is_meta_expr_head(el.head)
148,519,887✔
1543
                args = el.args
73,918,655✔
1544
                for i = 1:length(args)
73,918,655✔
1545
                    el = args[i]
145,479,416✔
1546
                    if isa(el, SSAValue)
145,479,416✔
1547
                        args[i] = SSAValue(el.id + ssachangemap[el.id])
61,411,369✔
1548
                    end
1549
                end
145,479,416✔
1550
            end
1551
        end
1552
    end
164,025,758✔
1553
end
1554

1555
function renumber_cfg_stmts!(cfg::CFG, blockchangemap::Vector{Int})
8,003,770✔
1556
    cumsum_ssamap!(blockchangemap) || return
8,003,770✔
1557
    for i = 1:length(cfg.blocks)
8,003,770✔
1558
        old_range = cfg.blocks[i].stmts
28,275,143✔
1559
        new_range = StmtRange(first(old_range) + ((i > 1) ? blockchangemap[i - 1] : 0),
28,275,143✔
1560
                              last(old_range) + blockchangemap[i])
1561
        cfg.blocks[i] = BasicBlock(cfg.blocks[i], new_range)
28,275,143✔
1562
        if i <= length(cfg.index)
28,275,143✔
1563
            cfg.index[i] = cfg.index[i] + blockchangemap[i]
28,275,143✔
1564
        end
1565
    end
28,275,143✔
1566
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc