• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37997

29 Jan 2025 02:08AM UTC coverage: 17.283% (-68.7%) from 85.981%
#37997

push

local

web-flow
bpart: Start enforcing min_world for global variable definitions (#57150)

This is the analog of #57102 for global variables. Unlike for consants,
there is no automatic global backdate mechanism. The reasoning for this
is that global variables can be declared at any time, unlike constants
which can only be decalared once their value is available. As a result
code patterns using `Core.eval` to declare globals are rarer and likely
incorrect.

1 of 22 new or added lines in 3 files covered. (4.55%)

31430 existing lines in 188 files now uncovered.

7903 of 45728 relevant lines covered (17.28%)

98663.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

9.21
/base/strings/cstring.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
import Core.Intrinsics: bitcast
4

5
"""
6
    Cwstring
7

8
A C-style string composed of the native wide character type
9
[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
10
C-style strings composed of the native character
11
type, see [`Cstring`](@ref). For more information
12
about string interoperability with C, see the
13
[manual](@ref man-bits-types).
14

15
"""
16
Cwstring
17

18
"""
19
    Cstring
20

21
A C-style string composed of the native character type
22
[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
23
C-style strings composed of the native wide character
24
type, see [`Cwstring`](@ref). For more information
25
about string interoperability with C, see the
26
[manual](@ref man-bits-types).
27
"""
28
Cstring
29

30
# construction from pointers
31
Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
375✔
UNCOV
32
Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
×
33
Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
32✔
34
Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
×
35

36
convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
71✔
37
convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
×
38
convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
32✔
39
convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
×
40

41
"""
42
    pointer(array [, index])
43

44
Get the native address of an array or string, optionally at a given location `index`.
45

46
This function is "unsafe". Be careful to ensure that a Julia reference to
47
`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
48
macro should be used to protect the `array` argument from garbage collection
49
within a given block of code.
50

51
Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
52
"""
53
function pointer end
54

55
pointer(p::Cstring) = convert(Ptr{Cchar}, p)
15✔
56
pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
×
57

58
# comparisons against pointers (mainly to support `cstr==C_NULL`)
59
==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
15✔
UNCOV
60
==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
×
61

62
unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
17✔
63

64
# convert strings to String etc. to pass as pointers
UNCOV
65
cconvert(::Type{Cstring}, s::String) = s
×
UNCOV
66
cconvert(::Type{Cstring}, s::AbstractString) =
×
67
    cconvert(Cstring, String(s)::String)
68

UNCOV
69
function cconvert(::Type{Cwstring}, s::AbstractString)
×
UNCOV
70
    v = transcode(Cwchar_t, String(s))
×
UNCOV
71
    push!(v, 0)
×
UNCOV
72
    return cconvert(Cwstring, v)
×
73
end
74

75
eltype(::Type{Cstring}) = Cchar
×
76
eltype(::Type{Cwstring}) = Cwchar_t
×
77

78
containsnul(p::Ptr, len) =
716✔
79
    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
80
containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
44✔
UNCOV
81
containsnul(s::AbstractString) = '\0' in s
×
82

83
function unsafe_convert(::Type{Cstring}, s::String)
84
    p = unsafe_convert(Ptr{Cchar}, s)
672✔
85
    containsnul(p, sizeof(s)) &&
672✔
86
        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
87
    return Cstring(p)
672✔
88
end
89

UNCOV
90
unsafe_convert(::Type{Cstring}, s::Union{Memory{UInt8},Memory{Int8}}) = Cstring(unsafe_convert(Ptr{Cvoid}, s))
×
91

UNCOV
92
function cconvert(::Type{Cwstring}, v::Vector{Cwchar_t})
×
UNCOV
93
    for i = 1:length(v)-1
×
UNCOV
94
        v[i] == 0 &&
×
95
            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
UNCOV
96
    end
×
UNCOV
97
    v[end] == 0 ||
×
98
        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
UNCOV
99
    return cconvert(Ptr{Cwchar_t}, v)
×
100
end
UNCOV
101
unsafe_convert(::Type{Cwstring}, s) = Cwstring(unsafe_convert(Ptr{Cwchar_t}, s))
×
102
unsafe_convert(::Type{Cwstring}, s::Cwstring) = s
×
103

104
# symbols are guaranteed not to contain embedded NUL
UNCOV
105
cconvert(::Type{Cstring}, s::Symbol) = s
×
106
unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
181✔
107

108
if ccall(:jl_get_UNAME, Any, ()) === :NT
109
"""
110
    Base.cwstring(s)
111

112
Converts a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
113
functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
114
conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
115
same argument.
116

117
This is only available on Windows.
118
"""
119
function cwstring(s::AbstractString)
×
120
    bytes = codeunits(String(s))
×
121
    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
×
122
    return push!(transcode(UInt16, bytes), 0)
×
123
end
124
end
125

126
# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
127
# and also UTF-32 for APIs using Cwchar_t on other platforms.
128

129
"""
130
    transcode(T, src)
131

132
Convert string data between Unicode encodings. `src` is either a
133
`String` or a `Vector{UIntXX}` of UTF-XX code units, where
134
`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
135
`String` to return a (UTF-8 encoded) `String` or `UIntXX`
136
to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
137
can also be used as the integer type, for converting `wchar_t*` strings
138
used by external C libraries.)
139

140
The `transcode` function succeeds as long as the input data can be
141
reasonably represented in the target encoding; it always succeeds for
142
conversions between UTF-XX encodings, even for invalid Unicode data.
143

144
Only conversion to/from UTF-8 is currently supported.
145

146
# Examples
147
```jldoctest
148
julia> str = "αβγ"
149
"αβγ"
150

151
julia> transcode(UInt16, str)
152
3-element Vector{UInt16}:
153
 0x03b1
154
 0x03b2
155
 0x03b3
156

157
julia> transcode(String, transcode(UInt16, str))
158
"αβγ"
159
```
160
"""
161
function transcode end
162

UNCOV
163
transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
×
UNCOV
164
transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
×
165
transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
×
166
    transcode(T, String(Vector(src)))
UNCOV
167
transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
×
168
    transcode(T, String(src))
169

UNCOV
170
function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
×
UNCOV
171
    buf = IOBuffer()
×
UNCOV
172
    for c in src
×
UNCOV
173
        print(buf, Char(c))
×
UNCOV
174
    end
×
UNCOV
175
    take!(buf)
×
176
end
UNCOV
177
transcode(::Type{String}, src::String) = src
×
178
transcode(T, src::String) = transcode(T, codeunits(src))
60,961✔
UNCOV
179
transcode(::Type{String}, src) = String(transcode(UInt8, src))
×
180

UNCOV
181
function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
×
UNCOV
182
    require_one_based_indexing(src)
×
UNCOV
183
    dst = UInt16[]
×
UNCOV
184
    i, n = 1, length(src)
×
UNCOV
185
    n > 0 || return dst
×
UNCOV
186
    sizehint!(dst, 2n)
×
UNCOV
187
    a = src[1]
×
UNCOV
188
    while true
×
UNCOV
189
        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
×
UNCOV
190
            b = src[i += 1]
×
UNCOV
191
            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
×
192
                # invalid UTF-8 (non-continuation or too-high code point)
193
                push!(dst, a)
×
194
                a = b; continue
×
UNCOV
195
            elseif a < 0xe0 # 2-byte UTF-8
×
UNCOV
196
                push!(dst, xor(0x3080, UInt16(a) << 6, b))
×
UNCOV
197
            elseif i < n # 3/4-byte character
×
UNCOV
198
                c = src[i += 1]
×
UNCOV
199
                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
×
200
                    push!(dst, a, b)
×
201
                    a = c; continue
×
UNCOV
202
                elseif a < 0xf0 # 3-byte UTF-8
×
UNCOV
203
                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
×
UNCOV
204
                elseif i < n
×
UNCOV
205
                    d = src[i += 1]
×
UNCOV
206
                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
×
207
                        push!(dst, a, b, c)
×
208
                        a = d; continue
×
UNCOV
209
                    elseif a == 0xf0 && b < 0x90 # overlong encoding
×
210
                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
×
211
                    else # 4-byte UTF-8
UNCOV
212
                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
×
213
                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
214
                    end
215
                else # too short
216
                    push!(dst, a, b, c)
×
217
                    break
×
218
                end
219
            else # too short
220
                push!(dst, a, b)
×
221
                break
×
222
            end
223
        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
UNCOV
224
            push!(dst, a)
×
225
        end
UNCOV
226
        i < n || break
×
UNCOV
227
        a = src[i += 1]
×
UNCOV
228
    end
×
UNCOV
229
    return dst
×
230
end
231

UNCOV
232
function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
×
UNCOV
233
    require_one_based_indexing(src)
×
UNCOV
234
    n = length(src)
×
UNCOV
235
    n == 0 && return UInt8[]
×
236

237
    # Precompute m = sizeof(dst).   This involves annoying duplication
238
    # of the loop over the src array.   However, this is not just an
239
    # optimization: it is problematic for security reasons to grow
240
    # dst dynamically, because Base.winprompt uses this function to
241
    # convert passwords to UTF-8 and we don't want to make unintentional
242
    # copies of the password data.
UNCOV
243
    a = src[1]
×
UNCOV
244
    i, m = 1, 0
×
UNCOV
245
    while true
×
UNCOV
246
        if a < 0x80
×
UNCOV
247
            m += 1
×
UNCOV
248
        elseif a < 0x800 # 2-byte UTF-8
×
UNCOV
249
            m += 2
×
UNCOV
250
        elseif a & 0xfc00 == 0xd800 && i < length(src)
×
UNCOV
251
            b = src[i += 1]
×
UNCOV
252
            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
×
UNCOV
253
                m += 4
×
254
            else
255
                m += 3
×
256
                a = b; continue
×
257
            end
258
        else
259
            # 1-unit high UTF-16 or unpaired high surrogate
260
            # either way, encode as 3-byte UTF-8 code point
UNCOV
261
            m += 3
×
262
        end
UNCOV
263
        i < n || break
×
UNCOV
264
        a = src[i += 1]
×
UNCOV
265
    end
×
266

UNCOV
267
    dst = StringVector(m)
×
UNCOV
268
    a = src[1]
×
UNCOV
269
    i, j = 1, 0
×
UNCOV
270
    while true
×
UNCOV
271
        if a < 0x80 # ASCII
×
UNCOV
272
            dst[j += 1] = a % UInt8
×
UNCOV
273
        elseif a < 0x800 # 2-byte UTF-8
×
UNCOV
274
            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
×
UNCOV
275
            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
UNCOV
276
        elseif a & 0xfc00 == 0xd800 && i < n
×
UNCOV
277
            b = src[i += 1]
×
UNCOV
278
            if (b & 0xfc00) == 0xdc00
×
279
                # 2-unit UTF-16 sequence => 4-byte UTF-8
UNCOV
280
                a += 0x2840
×
UNCOV
281
                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
×
UNCOV
282
                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
×
UNCOV
283
                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
×
UNCOV
284
                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
×
285
            else
286
                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
×
287
                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
×
288
                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
289
                a = b; continue
×
290
            end
291
        else
292
            # 1-unit high UTF-16 or unpaired high surrogate
293
            # either way, encode as 3-byte UTF-8 code point
UNCOV
294
            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
×
UNCOV
295
            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
×
UNCOV
296
            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
297
        end
UNCOV
298
        i < n || break
×
UNCOV
299
        a = src[i += 1]
×
UNCOV
300
    end
×
UNCOV
301
    return dst
×
302
end
303

304
function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
×
305
    transcode(String, unsafe_wrap(Array, p, length; own=false))
×
306
end
307
function unsafe_string(cw::Cwstring)
×
308
    p = convert(Ptr{Cwchar_t}, cw)
×
309
    n = 1
×
310
    while unsafe_load(p, n) != 0
×
311
        n += 1
×
312
    end
×
313
    return unsafe_string(p, n - 1)
×
314
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc