• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37999

02 Feb 2025 07:22AM UTC coverage: 17.218% (-8.3%) from 25.515%
#37999

push

local

web-flow
bpart: Start tracking backedges for bindings (#57213)

This PR adds limited backedge support for Bindings. There are two
classes of bindings that get backedges:

1. Cross-module `GlobalRef` bindings (new in this PR)
2. Any globals accesses through intrinsics (i.e. those with forward
edges from #57009)

This is a time/space trade-off for invalidation. As a result of the
first category, invalidating a binding now only needs to scan all the
methods defined in the same module as the binding. At the same time, it
is anticipated that most binding references are to bindings in the same
module, keeping the list of bindings that need explicit (back)edges
small.

7 of 30 new or added lines in 3 files covered. (23.33%)

4235 existing lines in 124 files now uncovered.

7882 of 45779 relevant lines covered (17.22%)

98289.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

9.21
/base/strings/cstring.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
import Core.Intrinsics: bitcast
4

5
"""
6
    Cwstring
7

8
A C-style string composed of the native wide character type
9
[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
10
C-style strings composed of the native character
11
type, see [`Cstring`](@ref). For more information
12
about string interoperability with C, see the
13
[manual](@ref man-bits-types).
14

15
"""
16
Cwstring
17

18
"""
19
    Cstring
20

21
A C-style string composed of the native character type
22
[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
23
C-style strings composed of the native wide character
24
type, see [`Cwstring`](@ref). For more information
25
about string interoperability with C, see the
26
[manual](@ref man-bits-types).
27
"""
28
Cstring
29

30
# construction from pointers
31
Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
371✔
32
Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
×
33
Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
32✔
34
Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
×
35

36
convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
69✔
37
convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
×
38
convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
32✔
39
convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
×
40

41
"""
42
    pointer(array [, index])
43

44
Get the native address of an array or string, optionally at a given location `index`.
45

46
This function is "unsafe". Be careful to ensure that a Julia reference to
47
`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
48
macro should be used to protect the `array` argument from garbage collection
49
within a given block of code.
50

51
Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
52
"""
53
function pointer end
54

55
pointer(p::Cstring) = convert(Ptr{Cchar}, p)
15✔
56
pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
×
57

58
# comparisons against pointers (mainly to support `cstr==C_NULL`)
59
==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
15✔
60
==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
×
61

62
unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
17✔
63

64
# convert strings to String etc. to pass as pointers
UNCOV
65
cconvert(::Type{Cstring}, s::String) = s
×
66
cconvert(::Type{Cstring}, s::AbstractString) =
×
67
    cconvert(Cstring, String(s)::String)
68

69
function cconvert(::Type{Cwstring}, s::AbstractString)
×
70
    v = transcode(Cwchar_t, String(s))
×
71
    push!(v, 0)
×
72
    return cconvert(Cwstring, v)
×
73
end
74

75
eltype(::Type{Cstring}) = Cchar
×
76
eltype(::Type{Cwstring}) = Cwchar_t
×
77

78
containsnul(p::Ptr, len) =
716✔
79
    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
80
containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
44✔
UNCOV
81
containsnul(s::AbstractString) = '\0' in s
×
82

83
function unsafe_convert(::Type{Cstring}, s::String)
84
    p = unsafe_convert(Ptr{Cchar}, s)
672✔
85
    containsnul(p, sizeof(s)) &&
672✔
86
        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
87
    return Cstring(p)
672✔
88
end
89

90
unsafe_convert(::Type{Cstring}, s::Union{Memory{UInt8},Memory{Int8}}) = Cstring(unsafe_convert(Ptr{Cvoid}, s))
×
91

92
function cconvert(::Type{Cwstring}, v::Vector{Cwchar_t})
×
93
    for i = 1:length(v)-1
×
94
        v[i] == 0 &&
×
95
            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
96
    end
×
97
    v[end] == 0 ||
×
98
        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
99
    return cconvert(Ptr{Cwchar_t}, v)
×
100
end
101
unsafe_convert(::Type{Cwstring}, s) = Cwstring(unsafe_convert(Ptr{Cwchar_t}, s))
×
102
unsafe_convert(::Type{Cwstring}, s::Cwstring) = s
×
103

104
# symbols are guaranteed not to contain embedded NUL
UNCOV
105
cconvert(::Type{Cstring}, s::Symbol) = s
×
106
unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
179✔
107

108
if ccall(:jl_get_UNAME, Any, ()) === :NT
109
"""
110
    Base.cwstring(s)
111

112
Converts a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
113
functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
114
conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
115
same argument.
116

117
This is only available on Windows.
118
"""
119
function cwstring(s::AbstractString)
×
120
    bytes = codeunits(String(s))
×
121
    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
×
122
    return push!(transcode(UInt16, bytes), 0)
×
123
end
124
end
125

126
# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
127
# and also UTF-32 for APIs using Cwchar_t on other platforms.
128

129
"""
130
    transcode(T, src)
131

132
Convert string data between Unicode encodings. `src` is either a
133
`String` or a `Vector{UIntXX}` of UTF-XX code units, where
134
`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
135
`String` to return a (UTF-8 encoded) `String` or `UIntXX`
136
to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
137
can also be used as the integer type, for converting `wchar_t*` strings
138
used by external C libraries.)
139

140
The `transcode` function succeeds as long as the input data can be
141
reasonably represented in the target encoding; it always succeeds for
142
conversions between UTF-XX encodings, even for invalid Unicode data.
143

144
Only conversion to/from UTF-8 is currently supported.
145

146
# Examples
147
```jldoctest
148
julia> str = "αβγ"
149
"αβγ"
150

151
julia> transcode(UInt16, str)
152
3-element Vector{UInt16}:
153
 0x03b1
154
 0x03b2
155
 0x03b3
156

157
julia> transcode(String, transcode(UInt16, str))
158
"αβγ"
159
```
160
"""
161
function transcode end
162

163
transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
×
164
transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
×
165
transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
×
166
    transcode(T, String(Vector(src)))
167
transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
×
168
    transcode(T, String(src))
169

170
function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
×
171
    buf = IOBuffer()
×
172
    for c in src
×
173
        print(buf, Char(c))
×
174
    end
×
175
    take!(buf)
×
176
end
177
transcode(::Type{String}, src::String) = src
×
178
transcode(T, src::String) = transcode(T, codeunits(src))
61,024✔
179
transcode(::Type{String}, src) = String(transcode(UInt8, src))
×
180

181
function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
×
182
    require_one_based_indexing(src)
×
183
    dst = UInt16[]
×
184
    i, n = 1, length(src)
×
185
    n > 0 || return dst
×
186
    sizehint!(dst, 2n)
×
187
    a = src[1]
×
188
    while true
×
189
        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
×
190
            b = src[i += 1]
×
191
            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
×
192
                # invalid UTF-8 (non-continuation or too-high code point)
193
                push!(dst, a)
×
194
                a = b; continue
×
195
            elseif a < 0xe0 # 2-byte UTF-8
×
196
                push!(dst, xor(0x3080, UInt16(a) << 6, b))
×
197
            elseif i < n # 3/4-byte character
×
198
                c = src[i += 1]
×
199
                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
×
200
                    push!(dst, a, b)
×
201
                    a = c; continue
×
202
                elseif a < 0xf0 # 3-byte UTF-8
×
203
                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
×
204
                elseif i < n
×
205
                    d = src[i += 1]
×
206
                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
×
207
                        push!(dst, a, b, c)
×
208
                        a = d; continue
×
209
                    elseif a == 0xf0 && b < 0x90 # overlong encoding
×
210
                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
×
211
                    else # 4-byte UTF-8
212
                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
×
213
                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
214
                    end
215
                else # too short
216
                    push!(dst, a, b, c)
×
217
                    break
×
218
                end
219
            else # too short
220
                push!(dst, a, b)
×
221
                break
×
222
            end
223
        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
224
            push!(dst, a)
×
225
        end
226
        i < n || break
×
227
        a = src[i += 1]
×
228
    end
×
229
    return dst
×
230
end
231

232
function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
×
233
    require_one_based_indexing(src)
×
234
    n = length(src)
×
235
    n == 0 && return UInt8[]
×
236

237
    # Precompute m = sizeof(dst).   This involves annoying duplication
238
    # of the loop over the src array.   However, this is not just an
239
    # optimization: it is problematic for security reasons to grow
240
    # dst dynamically, because Base.winprompt uses this function to
241
    # convert passwords to UTF-8 and we don't want to make unintentional
242
    # copies of the password data.
243
    a = src[1]
×
244
    i, m = 1, 0
×
245
    while true
×
246
        if a < 0x80
×
247
            m += 1
×
248
        elseif a < 0x800 # 2-byte UTF-8
×
249
            m += 2
×
250
        elseif a & 0xfc00 == 0xd800 && i < length(src)
×
251
            b = src[i += 1]
×
252
            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
×
253
                m += 4
×
254
            else
255
                m += 3
×
256
                a = b; continue
×
257
            end
258
        else
259
            # 1-unit high UTF-16 or unpaired high surrogate
260
            # either way, encode as 3-byte UTF-8 code point
261
            m += 3
×
262
        end
263
        i < n || break
×
264
        a = src[i += 1]
×
265
    end
×
266

267
    dst = StringVector(m)
×
268
    a = src[1]
×
269
    i, j = 1, 0
×
270
    while true
×
271
        if a < 0x80 # ASCII
×
272
            dst[j += 1] = a % UInt8
×
273
        elseif a < 0x800 # 2-byte UTF-8
×
274
            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
×
275
            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
276
        elseif a & 0xfc00 == 0xd800 && i < n
×
277
            b = src[i += 1]
×
278
            if (b & 0xfc00) == 0xdc00
×
279
                # 2-unit UTF-16 sequence => 4-byte UTF-8
280
                a += 0x2840
×
281
                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
×
282
                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
×
283
                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
×
284
                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
×
285
            else
286
                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
×
287
                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
×
288
                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
289
                a = b; continue
×
290
            end
291
        else
292
            # 1-unit high UTF-16 or unpaired high surrogate
293
            # either way, encode as 3-byte UTF-8 code point
294
            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
×
295
            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
×
296
            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
297
        end
298
        i < n || break
×
299
        a = src[i += 1]
×
300
    end
×
301
    return dst
×
302
end
303

304
function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
×
305
    transcode(String, unsafe_wrap(Array, p, length; own=false))
×
306
end
307
function unsafe_string(cw::Cwstring)
×
308
    p = convert(Ptr{Cwchar_t}, cw)
×
309
    n = 1
×
310
    while unsafe_load(p, n) != 0
×
311
        n += 1
×
312
    end
×
313
    return unsafe_string(p, n - 1)
×
314
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc