• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37591

pending completion
#37591

push

local

web-flow
Allocation Profiler: Types for all allocations (#50337)

Pass the types to the allocator functions.

-------

Before this PR, we were missing the types for allocations in two cases:

1. allocations from codegen
2. allocations in `gc_managed_realloc_`

The second one is easy: those are always used for buffers, right?

For the first one: we extend the allocation functions called from
codegen, to take the type as a parameter, and set the tag there.

I kept the old interfaces around, since I think that they cannot be
removed due to supporting legacy code?

------

An example of the generated code:
```julia
  %ptls_field6 = getelementptr inbounds {}**, {}*** %4, i64 2
  %13 = bitcast {}*** %ptls_field6 to i8**
  %ptls_load78 = load i8*, i8** %13, align 8
  %box = call noalias nonnull dereferenceable(32) {}* @ijl_gc_pool_alloc_typed(i8* %ptls_load78, i32 1184, i32 32, i64 4366152144) #7
```

Fixes #43688.
Fixes #45268.

Co-authored-by: Valentin Churavy <vchuravy@users.noreply.github.com>

72755 of 84117 relevant lines covered (86.49%)

22738368.36 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.3
/base/char.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
"""
4
The `AbstractChar` type is the supertype of all character implementations
5
in Julia. A character represents a Unicode code point, and can be converted
6
to an integer via the [`codepoint`](@ref) function in order to obtain the
7
numerical value of the code point, or constructed from the same integer.
8
These numerical values determine how characters are compared with `<` and `==`,
9
for example.  New `T <: AbstractChar` types should define a `codepoint(::T)`
10
method and a `T(::UInt32)` constructor, at minimum.
11

12
A given `AbstractChar` subtype may be capable of representing only a subset
13
of Unicode, in which case conversion from an unsupported `UInt32` value
14
may throw an error. Conversely, the built-in [`Char`](@ref) type represents
15
a *superset* of Unicode (in order to losslessly encode invalid byte streams),
16
in which case conversion of a non-Unicode value *to* `UInt32` throws an error.
17
The [`isvalid`](@ref) function can be used to check which codepoints are
18
representable in a given `AbstractChar` type.
19

20
Internally, an `AbstractChar` type may use a variety of encodings.  Conversion
21
via `codepoint(char)` will not reveal this encoding because it always returns the
22
Unicode value of the character. `print(io, c)` of any `c::AbstractChar`
23
produces an encoding determined by `io` (UTF-8 for all built-in `IO`
24
types), via conversion to `Char` if necessary.
25

26
`write(io, c)`, in contrast, may emit an encoding depending on
27
`typeof(c)`, and `read(io, typeof(c))` should read the same encoding as `write`.
28
New `AbstractChar` types must provide their own implementations of
29
`write` and `read`.
30
"""
31
AbstractChar
32

33
"""
34
    Char(c::Union{Number,AbstractChar})
35

36
`Char` is a 32-bit [`AbstractChar`](@ref) type that is the default representation
37
of characters in Julia. `Char` is the type used for character literals like `'x'`
38
and it is also the element type of [`String`](@ref).
39

40
In order to losslessly represent arbitrary byte streams stored in a `String`,
41
a `Char` value may store information that cannot be converted to a Unicode
42
codepoint — converting such a `Char` to `UInt32` will throw an error.
43
The [`isvalid(c::Char)`](@ref) function can be used to query whether `c`
44
represents a valid Unicode character.
45
"""
46
Char
47

48
@constprop :aggressive (::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x))
11,250✔
49
@constprop :aggressive AbstractChar(x::Number) = Char(x)
1✔
50
@constprop :aggressive (::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(codepoint(x))
178✔
51
@constprop :aggressive (::Type{T})(x::AbstractChar) where {T<:Union{Int32,Int64}} = codepoint(x) % T
6,274,486✔
52
(::Type{T})(x::T) where {T<:AbstractChar} = x
13✔
53

54
"""
55
    ncodeunits(c::Char) -> Int
56

57
Return the number of code units required to encode a character as UTF-8.
58
This is the number of bytes which will be printed if the character is written
59
to an output stream, or `ncodeunits(string(c))` but computed efficiently.
60

61
!!! compat "Julia 1.1"
62
    This method requires at least Julia 1.1. In Julia 1.0 consider
63
    using `ncodeunits(string(c))`.
64
"""
65
ncodeunits(c::Char) = write(devnull, c) # this is surprisingly efficient
3,669,234✔
66

67
"""
68
    codepoint(c::AbstractChar) -> Integer
69

70
Return the Unicode codepoint (an unsigned integer) corresponding
71
to the character `c` (or throw an exception if `c` does not represent
72
a valid character). For `Char`, this is a `UInt32` value, but
73
`AbstractChar` types that represent only a subset of Unicode may
74
return a different-sized integer (e.g. `UInt8`).
75
"""
76
function codepoint end
77

78
@constprop :aggressive codepoint(c::Char) = UInt32(c)
7,997,309✔
79

80
struct InvalidCharError{T<:AbstractChar} <: Exception
81
    char::T
7✔
82
end
83
struct CodePointError{T<:Integer} <: Exception
84
    code::T
1✔
85
end
86
@noinline throw_invalid_char(c::AbstractChar) = throw(InvalidCharError(c))
7✔
87
@noinline throw_code_point_err(u::Integer) = throw(CodePointError(u))
1✔
88

89
function ismalformed(c::Char)
426✔
90
    u = bitcast(UInt32, c)
20,151,446✔
91
    l1 = leading_ones(u) << 3
20,151,446✔
92
    t0 = trailing_zeros(u) & 56
20,151,446✔
93
    (l1 == 8) | (l1 + t0 > 32) |
20,151,446✔
94
    (((u & 0x00c0c0c0) ⊻ 0x00808080) >> t0 != 0)
95
end
96

97
@inline is_overlong_enc(u::UInt32) = (u >> 24 == 0xc0) | (u >> 24 == 0xc1) | (u >> 21 == 0x0704) | (u >> 20 == 0x0f08)
12,697,434✔
98

99
function isoverlong(c::Char)
28✔
100
    u = bitcast(UInt32, c)
11,584,754✔
101
    is_overlong_enc(u)
11,584,754✔
102
end
103

104
# fallback: other AbstractChar types, by default, are assumed
105
#           not to support malformed or overlong encodings.
106

107
"""
108
    ismalformed(c::AbstractChar) -> Bool
109

110
Return `true` if `c` represents malformed (non-Unicode) data according to the
111
encoding used by `c`. Defaults to `false` for non-`Char` types.
112

113
See also [`show_invalid`](@ref).
114
"""
115
ismalformed(c::AbstractChar) = false
2✔
116

117
"""
118
    isoverlong(c::AbstractChar) -> Bool
119

120
Return `true` if `c` represents an overlong UTF-8 sequence. Defaults
121
to `false` for non-`Char` types.
122

123
See also [`decode_overlong`](@ref) and [`show_invalid`](@ref).
124
"""
125
isoverlong(c::AbstractChar) = false
1✔
126

127
@constprop :aggressive function UInt32(c::Char)
376✔
128
    # TODO: use optimized inline LLVM
129
    u = bitcast(UInt32, c)
21,615,488✔
130
    u < 0x80000000 && return u >> 24
21,615,508✔
131
    l1 = leading_ones(u)
1,112,676✔
132
    t0 = trailing_zeros(u) & 56
1,112,676✔
133
    (l1 == 1) | (8l1 + t0 > 32) |
1,112,696✔
134
    ((((u & 0x00c0c0c0) ⊻ 0x00808080) >> t0 != 0) | is_overlong_enc(u)) &&
135
        throw_invalid_char(c)
136
    u &= 0xffffffff >> l1
1,112,628✔
137
    u >>= t0
1,112,628✔
138
    ((u & 0x0000007f) >> 0) | ((u & 0x00007f00) >> 2) |
1,112,689✔
139
    ((u & 0x007f0000) >> 4) | ((u & 0x7f000000) >> 6)
140
end
141

142
"""
143
    decode_overlong(c::AbstractChar) -> Integer
144

145
When [`isoverlong(c)`](@ref) is `true`, `decode_overlong(c)` returns
146
the Unicode codepoint value of `c`. `AbstractChar` implementations
147
that support overlong encodings should implement `Base.decode_overlong`.
148
"""
149
function decode_overlong end
150

151
@constprop :aggressive function decode_overlong(c::Char)
7✔
152
    u = bitcast(UInt32, c)
7✔
153
    l1 = leading_ones(u)
7✔
154
    t0 = trailing_zeros(u) & 56
7✔
155
    u &= 0xffffffff >> l1
7✔
156
    u >>= t0
7✔
157
    ((u & 0x0000007f) >> 0) | ((u & 0x00007f00) >> 2) |
7✔
158
    ((u & 0x007f0000) >> 4) | ((u & 0x7f000000) >> 6)
159
end
160

161
@constprop :aggressive function Char(u::UInt32)
305✔
162
    u < 0x80 && return bitcast(Char, u << 24)
149,438✔
163
    u < 0x00200000 || throw_code_point_err(u)
144,310✔
164
    c = ((u << 0) & 0x0000003f) | ((u << 2) & 0x00003f00) |
143,965✔
165
        ((u << 4) & 0x003f0000) | ((u << 6) & 0x3f000000)
166
    c = u < 0x00000800 ? (c << 16) | 0xc0800000 :
265,629✔
167
        u < 0x00010000 ? (c << 08) | 0xe0808000 :
168
                         (c << 00) | 0xf0808080
169
    bitcast(Char, c)
144,310✔
170
end
171

172
@constprop :aggressive @noinline UInt32_cold(c::Char) = UInt32(c)
×
173
@constprop :aggressive function (T::Union{Type{Int8},Type{UInt8}})(c::Char)
5,023✔
174
    i = bitcast(Int32, c)
383,590✔
175
    i ≥ 0 ? ((i >>> 24) % T) : T(UInt32_cold(c))
383,590✔
176
end
177

178
@constprop :aggressive @noinline Char_cold(b::UInt32) = Char(b)
×
179
@constprop :aggressive function Char(b::Union{Int8,UInt8})
93✔
180
    0 ≤ b ≤ 0x7f ? bitcast(Char, (b % UInt32) << 24) : Char_cold(UInt32(b))
252,566✔
181
end
182

183
convert(::Type{AbstractChar}, x::Number) = Char(x) # default to Char
1✔
184
convert(::Type{T}, x::Number) where {T<:AbstractChar} = T(x)::T
41✔
185
convert(::Type{T}, x::AbstractChar) where {T<:Number} = T(x)::T
7,484,777✔
186
convert(::Type{T}, c::AbstractChar) where {T<:AbstractChar} = T(c)::T
1✔
187
convert(::Type{T}, c::T) where {T<:AbstractChar} = c
100✔
188

189
rem(x::AbstractChar, ::Type{T}) where {T<:Number} = rem(codepoint(x), T)
1,722,610✔
190

191
typemax(::Type{Char}) = bitcast(Char, typemax(UInt32))
×
192
typemin(::Type{Char}) = bitcast(Char, typemin(UInt32))
1✔
193

194
size(c::AbstractChar) = ()
106✔
195
size(c::AbstractChar, d::Integer) = d < 1 ? throw(BoundsError()) : 1
186✔
196
ndims(c::AbstractChar) = 0
93✔
197
ndims(::Type{<:AbstractChar}) = 0
6✔
198
length(c::AbstractChar) = 1
2✔
199
IteratorSize(::Type{Char}) = HasShape{0}()
×
200
firstindex(c::AbstractChar) = 1
1✔
201
lastindex(c::AbstractChar) = 1
93✔
202
getindex(c::AbstractChar) = c
95✔
203
getindex(c::AbstractChar, i::Integer) = i == 1 ? c : throw(BoundsError())
13✔
204
getindex(c::AbstractChar, I::Integer...) = all(x -> x == 1, I) ? c : throw(BoundsError())
8✔
205
first(c::AbstractChar) = c
94✔
206
last(c::AbstractChar) = c
94✔
207
eltype(::Type{T}) where {T<:AbstractChar} = T
1✔
208

209
iterate(c::AbstractChar, done=false) = done ? nothing : (c, true)
7,611✔
210
isempty(c::AbstractChar) = false
2✔
211
in(x::AbstractChar, y::AbstractChar) = x == y
2,838,493✔
212

213
==(x::Char, y::Char) = bitcast(UInt32, x) == bitcast(UInt32, y)
233,095,220✔
214
isless(x::Char, y::Char) = bitcast(UInt32, x) < bitcast(UInt32, y)
67,215,189✔
215
hash(x::Char, h::UInt) =
1,069,213✔
216
    hash_uint64(((bitcast(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h))
217

218
first_utf8_byte(c::Char) = (bitcast(UInt32, c) >> 24) % UInt8
118,047✔
219

220
# fallbacks:
221
isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
2✔
222
==(x::AbstractChar, y::AbstractChar) = Char(x) == Char(y)
3✔
223
hash(x::AbstractChar, h::UInt) = hash(Char(x), h)
1✔
224
widen(::Type{T}) where {T<:AbstractChar} = T
1✔
225

226
@inline -(x::AbstractChar, y::AbstractChar) = Int(x) - Int(y)
3,131,497✔
227
@inline function -(x::T, y::Integer) where {T<:AbstractChar}
8✔
228
    if x isa Char
2,381✔
229
        u = Int32((bitcast(UInt32, x) >> 24) % Int8)
2,381✔
230
        if u >= 0 # inline the runtime fast path
2,381✔
231
            z = u - y
2,360✔
232
            return 0 <= z < 0x80 ? bitcast(Char, (z % UInt32) << 24) : Char(UInt32(z))
2,360✔
233
        end
234
    end
235
    return T(Int32(x) - Int32(y))
21✔
236
end
237
@inline function +(x::T, y::Integer) where {T<:AbstractChar}
1,376,841✔
238
    if x isa Char
1,572,517✔
239
        u = Int32((bitcast(UInt32, x) >> 24) % Int8)
196,241✔
240
        if u >= 0 # inline the runtime fast path
196,261✔
241
            z = u + y
1,571,357✔
242
            return 0 <= z < 0x80 ? bitcast(Char, (z % UInt32) << 24) : Char(UInt32(z))
1,572,948✔
243
        end
244
    end
245
    return T(Int32(x) + Int32(y))
1,180✔
246
end
247
@inline +(x::Integer, y::AbstractChar) = y + x
11,416✔
248

249
# `print` should output UTF-8 by default for all AbstractChar types.
250
# (Packages may implement other IO subtypes to specify different encodings.)
251
# In contrast, `write(io, c)` outputs a `c` in an encoding determined by typeof(c).
252
print(io::IO, c::Char) = (write(io, c); nothing)
7,232,073✔
253
print(io::IO, c::AbstractChar) = print(io, Char(c)) # fallback: convert to output UTF-8
2✔
254

255
const hex_chars = UInt8['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
256
                        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
257
                        'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
258
                        's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
259

260
function show_invalid(io::IO, c::Char)
16✔
261
    write(io, 0x27)
32✔
262
    u = bitcast(UInt32, c)
16✔
263
    while true
49✔
264
        a = hex_chars[((u >> 28) & 0xf) + 1]
49✔
265
        b = hex_chars[((u >> 24) & 0xf) + 1]
49✔
266
        write(io, 0x5c, UInt8('x'), a, b)
49✔
267
        (u <<= 8) == 0 && break
49✔
268
    end
33✔
269
    write(io, 0x27)
16✔
270
end
271

272
"""
273
    show_invalid(io::IO, c::AbstractChar)
274

275
Called by `show(io, c)` when [`isoverlong(c)`](@ref) or
276
[`ismalformed(c)`](@ref) return `true`.   Subclasses
277
of `AbstractChar` should define `Base.show_invalid` methods
278
if they support storing invalid character data.
279
"""
280
show_invalid
281

282
# show c to io, assuming UTF-8 encoded output
283
function show(io::IO, c::AbstractChar)
183✔
284
    if c <= '\\'
183✔
285
        b = c == '\0' ? 0x30 :
192✔
286
            c == '\a' ? 0x61 :
287
            c == '\b' ? 0x62 :
288
            c == '\t' ? 0x74 :
289
            c == '\n' ? 0x6e :
290
            c == '\v' ? 0x76 :
291
            c == '\f' ? 0x66 :
292
            c == '\r' ? 0x72 :
293
            c == '\e' ? 0x65 :
294
            c == '\'' ? 0x27 :
295
            c == '\\' ? 0x5c : 0xff
296
        if b != 0xff
102✔
297
            write(io, 0x27, 0x5c, b, 0x27)
29✔
298
            return
29✔
299
        end
300
    end
301
    if isoverlong(c) || ismalformed(c)
292✔
302
        show_invalid(io, c)
16✔
303
    elseif isprint(c)
149✔
304
        write(io, 0x27)
182✔
305
        print(io, c) # use print, not write, to use UTF-8 for any AbstractChar
112✔
306
        write(io, 0x27)
113✔
307
    else # unprintable, well-formed, non-overlong Unicode
308
        u = codepoint(c)
42✔
309
        write(io, 0x27, 0x5c, u <= 0x7f ? 0x78 : u <= 0xffff ? 0x75 : 0x55)
42✔
310
        d = max(2, 8 - (leading_zeros(u) >> 2))
26✔
311
        while 0 < d
123✔
312
            write(io, hex_chars[((u >> ((d -= 1) << 2)) & 0xf) + 1])
194✔
313
        end
97✔
314
        write(io, 0x27)
26✔
315
    end
316
    return
154✔
317
end
318

319
function show(io::IO, ::MIME"text/plain", c::T) where {T<:AbstractChar}
16✔
320
    show(io, c)
16✔
321
    get(io, :compact, false)::Bool && return
16✔
322
    if !ismalformed(c)
15✔
323
        print(io, ": ")
14✔
324
        if isoverlong(c)
14✔
325
            print(io, "[overlong] ")
7✔
326
            u = decode_overlong(c)
7✔
327
            c = T(u)
8✔
328
        else
329
            u = codepoint(c)
7✔
330
        end
331
        h = uppercase(string(u, base = 16, pad = 4))
14✔
332
        print(io, (isascii(c) ? "ASCII/" : ""), "Unicode U+", h)
14✔
333
    else
334
        print(io, ": Malformed UTF-8")
1✔
335
    end
336
    abr = Unicode.category_abbrev(c)
15✔
337
    str = Unicode.category_string(c)
15✔
338
    print(io, " (category ", abr, ": ", str, ")")
15✔
339
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc