• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #38163

07 Aug 2025 12:47PM UTC coverage: 69.322% (+43.6%) from 25.688%
#38163

push

local

web-flow
Fix precompiling when there's no manifest (#59212)

1 of 29 new or added lines in 2 files covered. (3.45%)

2699 existing lines in 92 files now uncovered.

42030 of 60630 relevant lines covered (69.32%)

6207804.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

72.19
/base/strings/cstring.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
import Core.Intrinsics: bitcast
4

5
"""
6
    Cwstring
7

8
A C-style string composed of the native wide character type
9
[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
10
C-style strings composed of the native character
11
type, see [`Cstring`](@ref). For more information
12
about string interoperability with C, see the
13
[manual](@ref man-bits-types).
14

15
"""
16
Cwstring
17

18
"""
19
    Cstring
20

21
A C-style string composed of the native character type
22
[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
23
C-style strings composed of the native wide character
24
type, see [`Cwstring`](@ref). For more information
25
about string interoperability with C, see the
26
[manual](@ref man-bits-types).
27
"""
28
Cstring
29

30
# construction from pointers
31
Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
365,651✔
32
Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
34✔
33
Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
61,387✔
34
Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
×
35

36
convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
188,342✔
37
convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
×
38
convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
61,387✔
39
convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
×
40

41
"""
42
    pointer(array [, index])
43

44
Get the native address of an array or string, optionally at a given location `index`.
45

46
This function is "unsafe". Be careful to ensure that a Julia reference to
47
`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
48
macro should be used to protect the `array` argument from garbage collection
49
within a given block of code.
50

51
Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
52
"""
53
function pointer end
54

55
pointer(p::Cstring) = convert(Ptr{Cchar}, p)
60,801✔
56
pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
×
57

58
# comparisons against pointers (mainly to support `cstr==C_NULL`)
59
==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
60,797✔
60
==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
4✔
61

62
unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
581✔
63

64
# convert strings to String etc. to pass as pointers
UNCOV
65
cconvert(::Type{Cstring}, s::String) = s
×
66
cconvert(::Type{Cstring}, s::AbstractString) =
5,803✔
67
    cconvert(Cstring, String(s)::String)
68

69
function cconvert(::Type{Cwstring}, s::AbstractString)
2✔
70
    v = transcode(Cwchar_t, String(s))
34✔
71
    push!(v, 0)
34✔
72
    return cconvert(Cwstring, v)
34✔
73
end
74

75
eltype(::Type{Cstring}) = Cchar
×
76
eltype(::Type{Cwstring}) = Cwchar_t
×
77

78
containsnul(p::Ptr, len) =
47,828✔
79
    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
80
containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
1,085✔
UNCOV
81
containsnul(s::AbstractString) = '\0' in s
×
82

83
function unsafe_convert(::Type{Cstring}, s::String)
84
    p = unsafe_convert(Ptr{Cchar}, s)
46,743✔
85
    containsnul(p, sizeof(s)) &&
46,743✔
86
        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
87
    return Cstring(p)
46,738✔
88
end
89

90
unsafe_convert(::Type{Cstring}, s::Union{Memory{UInt8},Memory{Int8}}) = Cstring(unsafe_convert(Ptr{Cvoid}, s))
51✔
91

92
function cconvert(::Type{Cwstring}, v::Vector{Cwchar_t})
93
    for i = 1:length(v)-1
34✔
94
        v[i] == 0 &&
443✔
95
            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
96
    end
852✔
97
    v[end] == 0 ||
34✔
98
        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
99
    return cconvert(Ptr{Cwchar_t}, v)
34✔
100
end
101
unsafe_convert(::Type{Cwstring}, s) = Cwstring(unsafe_convert(Ptr{Cwchar_t}, s))
34✔
102
unsafe_convert(::Type{Cwstring}, s::Cwstring) = s
×
103

104
# symbols are guaranteed not to contain embedded NUL
UNCOV
105
cconvert(::Type{Cstring}, s::Symbol) = s
×
106
unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
96,327✔
107

108
if ccall(:jl_get_UNAME, Any, ()) === :NT
109
"""
110
    Base.cwstring(s)
111

112
Converts a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
113
functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
114
conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
115
same argument.
116

117
This is only available on Windows.
118
"""
119
function cwstring(s::AbstractString)
×
120
    bytes = codeunits(String(s))
×
121
    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
×
122
    return push!(transcode(UInt16, bytes), 0)
×
123
end
124
end
125

126
# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
127
# and also UTF-32 for APIs using Cwchar_t on other platforms.
128

129
"""
130
    transcode(T, src)
131

132
Convert string data between Unicode encodings. `src` is either a
133
`String` or a `Vector{UIntXX}` of UTF-XX code units, where
134
`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
135
`String` to return a (UTF-8 encoded) `String` or `UIntXX`
136
to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
137
can also be used as the integer type, for converting `wchar_t*` strings
138
used by external C libraries.)
139

140
The `transcode` function succeeds as long as the input data can be
141
reasonably represented in the target encoding; it always succeeds for
142
conversions between UTF-XX encodings, even for invalid Unicode data.
143

144
Only conversion to/from UTF-8 is currently supported.
145

146
# Examples
147
```jldoctest
148
julia> str = "αβγ"
149
"αβγ"
150

151
julia> transcode(UInt16, str)
152
3-element Vector{UInt16}:
153
 0x03b1
154
 0x03b2
155
 0x03b3
156

157
julia> transcode(String, transcode(UInt16, str))
158
"αβγ"
159
```
160
"""
161
function transcode end
162

163
transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
16✔
164
transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
×
165
transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
×
166
    transcode(T, String(Vector(src)))
167
transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
×
168
    transcode(T, String(src))
169

170
function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
4✔
171
    buf = IOBuffer()
4✔
172
    for c in src
4✔
173
        print(buf, Char(c))
19✔
174
    end
10✔
175
    take!(buf)
4✔
176
end
177
transcode(::Type{String}, src::String) = src
×
178
transcode(T, src::String) = transcode(T, codeunits(src))
196,286✔
179
transcode(::Type{String}, src) = String(transcode(UInt8, src))
85✔
180

181
function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
14✔
182
    require_one_based_indexing(src)
14✔
183
    dst = UInt16[]
14✔
184
    i, n = 1, length(src)
14✔
185
    n > 0 || return dst
14✔
186
    sizehint!(dst, 2n)
14✔
187
    a = src[1]
14✔
188
    while true
36✔
189
        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
36✔
190
            b = src[i += 1]
27✔
191
            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
54✔
192
                # invalid UTF-8 (non-continuation or too-high code point)
193
                push!(dst, a)
×
194
                a = b; continue
×
195
            elseif a < 0xe0 # 2-byte UTF-8
27✔
196
                push!(dst, xor(0x3080, UInt16(a) << 6, b))
15✔
197
            elseif i < n # 3/4-byte character
12✔
198
                c = src[i += 1]
12✔
199
                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
12✔
200
                    push!(dst, a, b)
×
201
                    a = c; continue
×
202
                elseif a < 0xf0 # 3-byte UTF-8
12✔
203
                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
6✔
204
                elseif i < n
6✔
205
                    d = src[i += 1]
6✔
206
                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
6✔
207
                        push!(dst, a, b, c)
×
208
                        a = d; continue
×
209
                    elseif a == 0xf0 && b < 0x90 # overlong encoding
6✔
210
                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
×
211
                    else # 4-byte UTF-8
212
                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
6✔
213
                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
214
                    end
215
                else # too short
216
                    push!(dst, a, b, c)
×
217
                    break
×
218
                end
219
            else # too short
220
                push!(dst, a, b)
×
221
                break
×
222
            end
223
        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
224
            push!(dst, a)
9✔
225
        end
226
        i < n || break
50✔
227
        a = src[i += 1]
22✔
228
    end
22✔
229
    return dst
14✔
230
end
231

232
function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
14✔
233
    require_one_based_indexing(src)
14✔
234
    n = length(src)
14✔
235
    n == 0 && return UInt8[]
14✔
236

237
    # Precompute m = sizeof(dst).   This involves annoying duplication
238
    # of the loop over the src array.   However, this is not just an
239
    # optimization: it is problematic for security reasons to grow
240
    # dst dynamically, because Base.winprompt uses this function to
241
    # convert passwords to UTF-8 and we don't want to make unintentional
242
    # copies of the password data.
243
    a = src[1]
14✔
244
    i, m = 1, 0
14✔
245
    while true
36✔
246
        if a < 0x80
36✔
247
            m += 1
9✔
248
        elseif a < 0x800 # 2-byte UTF-8
27✔
249
            m += 2
15✔
250
        elseif a & 0xfc00 == 0xd800 && i < length(src)
12✔
251
            b = src[i += 1]
6✔
252
            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
6✔
253
                m += 4
6✔
254
            else
255
                m += 3
×
256
                a = b; continue
×
257
            end
258
        else
259
            # 1-unit high UTF-16 or unpaired high surrogate
260
            # either way, encode as 3-byte UTF-8 code point
261
            m += 3
6✔
262
        end
263
        i < n || break
50✔
264
        a = src[i += 1]
22✔
265
    end
22✔
266

267
    dst = StringVector(m)
14✔
268
    a = src[1]
14✔
269
    i, j = 1, 0
14✔
270
    while true
36✔
271
        if a < 0x80 # ASCII
36✔
272
            dst[j += 1] = a % UInt8
9✔
273
        elseif a < 0x800 # 2-byte UTF-8
27✔
274
            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
15✔
275
            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
15✔
276
        elseif a & 0xfc00 == 0xd800 && i < n
12✔
277
            b = src[i += 1]
6✔
278
            if (b & 0xfc00) == 0xdc00
6✔
279
                # 2-unit UTF-16 sequence => 4-byte UTF-8
280
                a += 0x2840
6✔
281
                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
6✔
282
                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
6✔
283
                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
6✔
284
                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
6✔
285
            else
286
                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
×
287
                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
×
288
                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
289
                a = b; continue
×
290
            end
291
        else
292
            # 1-unit high UTF-16 or unpaired high surrogate
293
            # either way, encode as 3-byte UTF-8 code point
294
            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
6✔
295
            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
6✔
296
            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
6✔
297
        end
298
        i < n || break
50✔
299
        a = src[i += 1]
22✔
300
    end
22✔
301
    return dst
14✔
302
end
303

304
function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
1✔
305
    transcode(String, unsafe_wrap(Array, p, length; own=false))
1✔
306
end
307
function unsafe_string(cw::Cwstring)
×
308
    p = convert(Ptr{Cwchar_t}, cw)
×
309
    n = 1
×
310
    while unsafe_load(p, n) != 0
×
311
        n += 1
×
312
    end
×
313
    return unsafe_string(p, n - 1)
×
314
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc