• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #38162

06 Aug 2025 08:25PM UTC coverage: 25.688% (-43.6%) from 69.336%
#38162

push

local

web-flow
fix runtime cglobal builtin function implementation (#59210)

This had failed to be updated for the LazyLibrary changes to codegen.

12976 of 50513 relevant lines covered (25.69%)

676965.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

28.47
/base/char.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
import Core: AbstractChar, Char
4

5
"""
6
The `AbstractChar` type is the supertype of all character implementations
7
in Julia. A character represents a Unicode code point, and can be converted
8
to an integer via the [`codepoint`](@ref) function in order to obtain the
9
numerical value of the code point, or constructed from the same integer.
10
These numerical values determine how characters are compared with `<` and `==`,
11
for example.  New `T <: AbstractChar` types should define a `codepoint(::T)`
12
method and a `T(::UInt32)` constructor, at minimum.
13

14
A given `AbstractChar` subtype may be capable of representing only a subset
15
of Unicode, in which case conversion from an unsupported `UInt32` value
16
may throw an error. Conversely, the built-in [`Char`](@ref) type represents
17
a *superset* of Unicode (in order to losslessly encode invalid byte streams),
18
in which case conversion of a non-Unicode value *to* `UInt32` throws an error.
19
The [`isvalid`](@ref) function can be used to check which codepoints are
20
representable in a given `AbstractChar` type.
21

22
Internally, an `AbstractChar` type may use a variety of encodings.  Conversion
23
via `codepoint(char)` will not reveal this encoding because it always returns the
24
Unicode value of the character. `print(io, c)` of any `c::AbstractChar`
25
produces an encoding determined by `io` (UTF-8 for all built-in `IO`
26
types), via conversion to `Char` if necessary.
27

28
`write(io, c)`, in contrast, may emit an encoding depending on
29
`typeof(c)`, and `read(io, typeof(c))` should read the same encoding as `write`.
30
New `AbstractChar` types must provide their own implementations of
31
`write` and `read`.
32
"""
33
AbstractChar
34

35
"""
36
    Char(c::Union{Number,AbstractChar})
37

38
`Char` is a 32-bit [`AbstractChar`](@ref) type that is the default representation
39
of characters in Julia. `Char` is the type used for character literals like `'x'`
40
and it is also the element type of [`String`](@ref).
41

42
In order to losslessly represent arbitrary byte streams stored in a `String`,
43
a `Char` value may store information that cannot be converted to a Unicode
44
codepoint — converting such a `Char` to `UInt32` will throw an error.
45
The [`isvalid(c::Char)`](@ref) function can be used to query whether `c`
46
represents a valid Unicode character.
47
"""
48
Char
49

50
@constprop :aggressive (::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x))
2✔
51
@constprop :aggressive AbstractChar(x::Number) = Char(x)
×
52
@constprop :aggressive (::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(codepoint(x))
×
53
@constprop :aggressive (::Type{T})(x::AbstractChar) where {T<:Union{Int32,Int64}} = codepoint(x) % T
15,946✔
54
(::Type{T})(x::T) where {T<:AbstractChar} = x
28✔
55

56
"""
57
    ncodeunits(c::Char)::Int
58

59
Return the number of code units required to encode a character as UTF-8.
60
This is the number of bytes which will be printed if the character is written
61
to an output stream, or `ncodeunits(string(c))` but computed efficiently.
62

63
!!! compat "Julia 1.1"
64
    This method requires at least Julia 1.1. In Julia 1.0 consider
65
    using `ncodeunits(string(c))`.
66
"""
67
function ncodeunits(c::Char)
6✔
68
    u = reinterpret(UInt32, c)
22,157✔
69
    # We care about how many trailing bytes are all zero
70
    # subtract that from the total number of bytes
71
    n_nonzero_bytes = sizeof(UInt32) - div(trailing_zeros(u), 0x8)
22,157✔
72
    # Take care of '\0', which has an all-zero bitpattern
73
    n_nonzero_bytes + iszero(u)
22,157✔
74
end
75

76
"""
77
    codepoint(c::AbstractChar)::Integer
78

79
Return the Unicode codepoint (an unsigned integer) corresponding
80
to the character `c` (or throw an exception if `c` does not represent
81
a valid character). For `Char`, this is a `UInt32` value, but
82
`AbstractChar` types that represent only a subset of Unicode may
83
return a different-sized integer (e.g. `UInt8`).
84
"""
85
function codepoint end
86

87
@constprop :aggressive codepoint(c::Char) = UInt32(c)
47,724✔
88

89
struct InvalidCharError{T<:AbstractChar} <: Exception
90
    char::T
×
91
end
92
struct CodePointError{T<:Integer} <: Exception
93
    code::T
×
94
end
95
@noinline throw_invalid_char(c::AbstractChar) = throw(InvalidCharError(c))
×
96
@noinline throw_code_point_err(u::Integer) = throw(CodePointError(u))
×
97

98
function ismalformed(c::Char)
99
    u = bitcast(UInt32, c)
202,892✔
100
    l1 = leading_ones(u) << 3
202,892✔
101
    t0 = trailing_zeros(u) & 56
202,892✔
102
    (l1 == 8) | (l1 + t0 > 32) |
202,892✔
103
    (((u & 0x00c0c0c0) ⊻ 0x00808080) >> t0 != 0)
104
end
105

106
@inline is_overlong_enc(u::UInt32) = (u >> 24 == 0xc0) | (u >> 24 == 0xc1) | (u >> 21 == 0x0704) | (u >> 20 == 0x0f08)
44,067✔
107

108
function isoverlong(c::Char)
109
    u = bitcast(UInt32, c)
44,053✔
110
    is_overlong_enc(u)
44,053✔
111
end
112

113
# fallback: other AbstractChar types, by default, are assumed
114
#           not to support malformed or overlong encodings.
115

116
"""
117
    ismalformed(c::AbstractChar)::Bool
118

119
Return `true` if `c` represents malformed (non-Unicode) data according to the
120
encoding used by `c`. Defaults to `false` for non-`Char` types.
121

122
See also [`show_invalid`](@ref).
123
"""
124
ismalformed(c::AbstractChar) = false
×
125

126
"""
127
    isoverlong(c::AbstractChar)::Bool
128

129
Return `true` if `c` represents an overlong UTF-8 sequence. Defaults
130
to `false` for non-`Char` types.
131

132
See also [`decode_overlong`](@ref) and [`show_invalid`](@ref).
133
"""
134
isoverlong(c::AbstractChar) = false
×
135

136
@constprop :aggressive function UInt32(c::Char)
2✔
137
    # TODO: use optimized inline LLVM
138
    u = bitcast(UInt32, c)
208,277✔
139
    u < 0x80000000 && return u >> 24
210,787✔
140
    l1 = leading_ones(u)
14✔
141
    t0 = trailing_zeros(u) & 56
14✔
142
    (l1 == 1) | (8l1 + t0 > 32) |
14✔
143
    ((((u & 0x00c0c0c0) ⊻ 0x00808080) >> t0 != 0) | is_overlong_enc(u)) &&
144
        throw_invalid_char(c)
145
    u &= 0xffffffff >> l1
14✔
146
    u >>= t0
14✔
147
    ((u & 0x0000007f) >> 0) | ((u & 0x00007f00) >> 2) |
14✔
148
    ((u & 0x007f0000) >> 4) | ((u & 0x7f000000) >> 6)
149
end
150

151
"""
152
    decode_overlong(c::AbstractChar)::Integer
153

154
When [`isoverlong(c)`](@ref) is `true`, `decode_overlong(c)` returns
155
the Unicode codepoint value of `c`. `AbstractChar` implementations
156
that support overlong encodings should implement `Base.decode_overlong`.
157
"""
158
function decode_overlong end
159

160
@constprop :aggressive function decode_overlong(c::Char)
×
161
    u = bitcast(UInt32, c)
×
162
    l1 = leading_ones(u)
×
163
    t0 = trailing_zeros(u) & 56
×
164
    u &= 0xffffffff >> l1
×
165
    u >>= t0
×
166
    ((u & 0x0000007f) >> 0) | ((u & 0x00007f00) >> 2) |
×
167
    ((u & 0x007f0000) >> 4) | ((u & 0x7f000000) >> 6)
168
end
169

170
@constprop :aggressive function Char(u::UInt32)
171
    u < 0x80 && return bitcast(Char, u << 24)
2✔
172
    u < 0x00200000 || throw_code_point_err(u)
×
173
    c = ((u << 0) & 0x0000003f) | ((u << 2) & 0x00003f00) |
×
174
        ((u << 4) & 0x003f0000) | ((u << 6) & 0x3f000000)
175
    c = u < 0x00000800 ? (c << 16) | 0xc0800000 :
×
176
        u < 0x00010000 ? (c << 08) | 0xe0808000 :
177
                         (c << 00) | 0xf0808080
178
    bitcast(Char, c)
×
179
end
180

181
@constprop :aggressive @noinline UInt32_cold(c::Char) = UInt32(c)
×
182
@constprop :aggressive function (T::Union{Type{Int8},Type{UInt8}})(c::Char)
436✔
183
    i = bitcast(Int32, c)
742✔
184
    i ≥ 0 ? ((i >>> 24) % T) : T(UInt32_cold(c))
742✔
185
end
186

187
@constprop :aggressive @noinline Char_cold(b::UInt32) = Char(b)
×
188
@constprop :aggressive function Char(b::Union{Int8,UInt8})
189
    0 ≤ b ≤ 0x7f ? bitcast(Char, (b % UInt32) << 24) : Char_cold(UInt32(b))
137,150✔
190
end
191

192
convert(::Type{AbstractChar}, x::Number) = Char(x) # default to Char
×
193
convert(::Type{T}, x::Number) where {T<:AbstractChar} = T(x)::T
×
194
convert(::Type{T}, x::AbstractChar) where {T<:Number} = T(x)::T
3,867✔
195
convert(::Type{T}, c::AbstractChar) where {T<:AbstractChar} = T(c)::T
×
196
convert(::Type{T}, c::T) where {T<:AbstractChar} = c
×
197

198
rem(x::AbstractChar, ::Type{T}) where {T<:Number} = rem(codepoint(x), T)
15,889✔
199

200
typemax(::Type{Char}) = bitcast(Char, typemax(UInt32))
×
201
typemin(::Type{Char}) = bitcast(Char, typemin(UInt32))
×
202

203
size(c::AbstractChar) = ()
×
204
size(c::AbstractChar, d::Integer) = d < 1 ? throw(BoundsError()) : 1
×
205
ndims(c::AbstractChar) = 0
×
206
ndims(::Type{<:AbstractChar}) = 0
×
207
length(c::AbstractChar) = 1
×
208
IteratorSize(::Type{<:AbstractChar}) = HasShape{0}()
×
209
firstindex(c::AbstractChar) = 1
×
210
lastindex(c::AbstractChar) = 1
×
211
getindex(c::AbstractChar) = c
×
212
getindex(c::AbstractChar, i::Integer) = i == 1 ? c : throw(BoundsError())
×
213
getindex(c::AbstractChar, I::Integer...) = all(x -> x == 1, I) ? c : throw(BoundsError())
×
214
first(c::AbstractChar) = c
×
215
last(c::AbstractChar) = c
×
216
eltype(::Type{T}) where {T<:AbstractChar} = T
×
217

218
iterate(c::AbstractChar, done=false) = done ? nothing : (c, true)
×
219
isempty(c::AbstractChar) = false
×
220
in(x::AbstractChar, y::AbstractChar) = x == y
20,420✔
221

222
==(x::Char, y::Char) = bitcast(UInt32, x) == bitcast(UInt32, y)
10,455,914✔
223
isless(x::Char, y::Char) = bitcast(UInt32, x) < bitcast(UInt32, y)
2,508,955✔
224
hash(x::Char, h::UInt) =
3✔
225
    hash_finalizer(((bitcast(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h)) % UInt
226

227
# fallbacks:
228
isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
×
229
==(x::AbstractChar, y::AbstractChar) = Char(x) == Char(y)
×
230
hash(x::AbstractChar, h::UInt) = hash(Char(x), h)
×
231
widen(::Type{T}) where {T<:AbstractChar} = T
×
232

233
@inline -(x::AbstractChar, y::AbstractChar) = Int(x) - Int(y)
15,890✔
234
@inline function -(x::T, y::Integer) where {T<:AbstractChar}
235
    if x isa Char
×
236
        u = Int32((bitcast(UInt32, x) >> 24) % Int8)
6✔
237
        if u >= 0 # inline the runtime fast path
6✔
238
            z = u - y
6✔
239
            return 0 <= z < 0x80 ? bitcast(Char, (z % UInt32) << 24) : Char(UInt32(z))
6✔
240
        end
241
    end
242
    return T(Int32(x) - Int32(y))
×
243
end
244
@inline function +(x::T, y::Integer) where {T<:AbstractChar}
245
    if x isa Char
×
246
        u = Int32((bitcast(UInt32, x) >> 24) % Int8)
×
247
        if u >= 0 # inline the runtime fast path
×
248
            z = u + y
×
249
            return 0 <= z < 0x80 ? bitcast(Char, (z % UInt32) << 24) : Char(UInt32(z))
×
250
        end
251
    end
252
    return T(Int32(x) + Int32(y))
×
253
end
254
@inline +(x::Integer, y::AbstractChar) = y + x
×
255

256
# `print` should output UTF-8 by default for all AbstractChar types.
257
# (Packages may implement other IO subtypes to specify different encodings.)
258
# In contrast, `write(io, c)` outputs a `c` in an encoding determined by typeof(c).
259
print(io::IO, c::Char) = (write(io, c); nothing)
344,599✔
260
print(io::IO, c::AbstractChar) = print(io, Char(c)) # fallback: convert to output UTF-8
×
261

262
const hex_chars = UInt8['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
263
                        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
264
                        'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
265
                        's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
266

267
function show_invalid(io::IO, c::Char)
×
268
    write(io, 0x27)
×
269
    u = bitcast(UInt32, c)
×
270
    while true
×
271
        a = hex_chars[((u >> 28) & 0xf) + 1]
×
272
        b = hex_chars[((u >> 24) & 0xf) + 1]
×
273
        write(io, 0x5c, UInt8('x'), a, b)
×
274
        (u <<= 8) == 0 && break
×
275
    end
×
276
    write(io, 0x27)
×
277
end
278

279
"""
280
    show_invalid(io::IO, c::AbstractChar)
281

282
Called by `show(io, c)` when [`isoverlong(c)`](@ref) or
283
[`ismalformed(c)`](@ref) return `true`.   Subclasses
284
of `AbstractChar` should define `Base.show_invalid` methods
285
if they support storing invalid character data.
286
"""
287
show_invalid
288

289
# show c to io, assuming UTF-8 encoded output
290
function show(io::IO, c::AbstractChar)
×
291
    if c <= '\\'
×
292
        b = c == '\0' ? 0x30 :
×
293
            c == '\a' ? 0x61 :
294
            c == '\b' ? 0x62 :
295
            c == '\t' ? 0x74 :
296
            c == '\n' ? 0x6e :
297
            c == '\v' ? 0x76 :
298
            c == '\f' ? 0x66 :
299
            c == '\r' ? 0x72 :
300
            c == '\e' ? 0x65 :
301
            c == '\'' ? 0x27 :
302
            c == '\\' ? 0x5c : 0xff
303
        if b != 0xff
×
304
            write(io, 0x27, 0x5c, b, 0x27)
×
305
            return
×
306
        end
307
    end
308
    if isoverlong(c) || ismalformed(c)
×
309
        show_invalid(io, c)
×
310
    elseif isprint(c)
×
311
        write(io, 0x27)
×
312
        print(io, c) # use print, not write, to use UTF-8 for any AbstractChar
×
313
        write(io, 0x27)
×
314
    else # unprintable, well-formed, non-overlong Unicode
315
        u = codepoint(c)
×
316
        write(io, 0x27, 0x5c, u <= 0x7f ? 0x78 : u <= 0xffff ? 0x75 : 0x55)
×
317
        d = max(2, 8 - (leading_zeros(u) >> 2))
×
318
        while 0 < d
×
319
            write(io, hex_chars[((u >> ((d -= 1) << 2)) & 0xf) + 1])
×
320
        end
×
321
        write(io, 0x27)
×
322
    end
323
    return
×
324
end
325

326
function show(io::IO, ::MIME"text/plain", c::T) where {T<:AbstractChar}
×
327
    show(io, c)
×
328
    get(io, :compact, false)::Bool && return
×
329
    if !ismalformed(c)
×
330
        print(io, ": ")
×
331
        if isoverlong(c)
×
332
            print(io, "[overlong] ")
×
333
            u = decode_overlong(c)
×
334
            c = T(u)
×
335
        else
336
            u = codepoint(c)
×
337
        end
338
        h = uppercase(string(u, base = 16, pad = 4))
×
339
        print(io, (isascii(c) ? "ASCII/" : ""), "Unicode U+", h)
×
340
    else
341
        print(io, ": Malformed UTF-8")
×
342
    end
343
    abr = Unicode.category_abbrev(c)
×
344
    str = Unicode.category_string(c)
×
345
    print(io, " (category ", abr, ": ", str, ")")
×
346
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc