• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37846

22 Jul 2024 01:06AM UTC coverage: 86.908% (+0.8%) from 86.098%
#37846

push

local

web-flow
Preserve Git objects from being garbage collected (#55142)

This issue has been discussed
[here](https://discourse.julialang.org/t/preserve-against-garbage-collection-in-libgit2/117095).

In most cases, thanks to the specialization of `Base.unsafe_convert`, it
is sufficient to replace `obj.ptr` by `obj` in `ccalls` to fix the
issue.

In other cases, for example when a pointer to an internal string is
returned, the code has to be wrapped in `GC.https://github.com/preserve
obj begin ... end` block.

All `LibGit2` tests run successfully. I have left a few `FIXME` comments
where I have doubts about the code, notably with `Ptr{Ptr{Cvoid}}`
arguments.

71 of 73 new or added lines in 15 files covered. (97.26%)

560 existing lines in 28 files now uncovered.

76034 of 87488 relevant lines covered (86.91%)

15443040.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

25.17
/base/strings/cstring.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
import Core.Intrinsics: bitcast
4

5
"""
6
    Cwstring
7

8
A C-style string composed of the native wide character type
9
[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
10
C-style strings composed of the native character
11
type, see [`Cstring`](@ref). For more information
12
about string interoperability with C, see the
13
[manual](@ref man-bits-types).
14

15
"""
16
Cwstring
17

18
"""
19
    Cstring
20

21
A C-style string composed of the native character type
22
[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
23
C-style strings composed of the native wide character
24
type, see [`Cwstring`](@ref). For more information
25
about string interoperability with C, see the
26
[manual](@ref man-bits-types).
27
"""
28
Cstring
29

30
# construction from pointers
31
Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
79,484,880✔
32
Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
32✔
33
Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
2,682,888✔
34
Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
×
35

36
convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
768,874✔
37
convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
×
38
convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
2,682,888✔
39
convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
×
40

41
"""
42
    pointer(array [, index])
43

44
Get the native address of an array or string, optionally at a given location `index`.
45

46
This function is "unsafe". Be careful to ensure that a Julia reference to
47
`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
48
macro should be used to protect the `array` argument from garbage collection
49
within a given block of code.
50

51
Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
52
"""
53
function pointer end
54

55
pointer(p::Cstring) = convert(Ptr{Cchar}, p)
1,379,539✔
56
pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
×
57

58
# comparisons against pointers (mainly to support `cstr==C_NULL`)
59
==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
1,379,534✔
60
==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
4✔
61

62
unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
1,303,333✔
63

64
# convert strings to String etc. to pass as pointers
65
cconvert(::Type{Cstring}, s::String) = s
146✔
66
cconvert(::Type{Cstring}, s::AbstractString) =
5,356✔
67
    cconvert(Cstring, String(s)::String)
68

69
function cconvert(::Type{Cwstring}, s::AbstractString)
70
    v = transcode(Cwchar_t, String(s))
32✔
71
    push!(v, 0)
32✔
72
    return cconvert(Cwstring, v)
32✔
73
end
74

75
eltype(::Type{Cstring}) = Cchar
×
76
eltype(::Type{Cwstring}) = Cwchar_t
×
77

78
containsnul(p::Ptr, len) =
3,558,063✔
79
    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
80
containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
39,454✔
81
containsnul(s::AbstractString) = '\0' in s
3,575✔
82

83
function unsafe_convert(::Type{Cstring}, s::String)
4✔
84
    p = unsafe_convert(Ptr{Cchar}, s)
3,518,609✔
85
    containsnul(p, sizeof(s)) &&
3,518,609✔
86
        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
87
    return Cstring(p)
3,518,558✔
88
end
89

UNCOV
90
unsafe_convert(::Type{Cstring}, s::Union{Memory{UInt8},Memory{Int8}}) = Cstring(unsafe_convert(Ptr{Cvoid}, s))
×
91

92
function cconvert(::Type{Cwstring}, v::Vector{Cwchar_t})
93
    for i = 1:length(v)-1
32✔
94
        v[i] == 0 &&
432✔
95
            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
96
    end
832✔
97
    v[end] == 0 ||
32✔
98
        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
99
    return cconvert(Ptr{Cwchar_t}, v)
32✔
100
end
101
unsafe_convert(::Type{Cwstring}, s) = Cwstring(unsafe_convert(Ptr{Cwchar_t}, s))
32✔
102
unsafe_convert(::Type{Cwstring}, s::Cwstring) = s
×
103

104
# symbols are guaranteed not to contain embedded NUL
105
cconvert(::Type{Cstring}, s::Symbol) = s
73✔
106
unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
37,655,913✔
107

108
if ccall(:jl_get_UNAME, Any, ()) === :NT
109
"""
110
    Base.cwstring(s)
111

112
Converts a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
113
functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
114
conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
115
same argument.
116

117
This is only available on Windows.
118
"""
119
function cwstring(s::AbstractString)
×
120
    bytes = codeunits(String(s))
×
121
    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
×
122
    return push!(transcode(UInt16, bytes), 0)
×
123
end
124
end
125

126
# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
127
# and also UTF-32 for APIs using Cwchar_t on other platforms.
128

129
"""
130
    transcode(T, src)
131

132
Convert string data between Unicode encodings. `src` is either a
133
`String` or a `Vector{UIntXX}` of UTF-XX code units, where
134
`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
135
`String` to return a (UTF-8 encoded) `String` or `UIntXX`
136
to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
137
can also be used as the integer type, for converting `wchar_t*` strings
138
used by external C libraries.)
139

140
The `transcode` function succeeds as long as the input data can be
141
reasonably represented in the target encoding; it always succeeds for
142
conversions between UTF-XX encodings, even for invalid Unicode data.
143

144
Only conversion to/from UTF-8 is currently supported.
145

146
# Examples
147
```jldoctest
148
julia> str = "αβγ"
149
"αβγ"
150

151
julia> transcode(UInt16, str)
152
3-element Vector{UInt16}:
153
 0x03b1
154
 0x03b2
155
 0x03b3
156

157
julia> transcode(String, transcode(UInt16, str))
158
"αβγ"
159
```
160
"""
161
function transcode end
162

163
transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
×
164
transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
32✔
165
transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
×
166
    transcode(T, String(Vector(src)))
167
transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
×
168
    transcode(T, String(src))
169

170
function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
32✔
171
    buf = IOBuffer()
32✔
172
    for c in src
32✔
173
        print(buf, Char(c))
650✔
174
    end
650✔
175
    take!(buf)
32✔
176
end
177
transcode(::Type{String}, src::String) = src
×
178
transcode(T, src::String) = transcode(T, codeunits(src))
82,408✔
179
transcode(::Type{String}, src) = String(transcode(UInt8, src))
64✔
180

181
function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
×
182
    require_one_based_indexing(src)
×
183
    dst = UInt16[]
×
184
    i, n = 1, length(src)
×
185
    n > 0 || return dst
×
186
    sizehint!(dst, 2n)
×
187
    a = src[1]
×
188
    while true
×
189
        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
×
190
            b = src[i += 1]
×
191
            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
×
192
                # invalid UTF-8 (non-continuation or too-high code point)
193
                push!(dst, a)
×
194
                a = b; continue
×
195
            elseif a < 0xe0 # 2-byte UTF-8
×
196
                push!(dst, xor(0x3080, UInt16(a) << 6, b))
×
197
            elseif i < n # 3/4-byte character
×
198
                c = src[i += 1]
×
199
                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
×
200
                    push!(dst, a, b)
×
201
                    a = c; continue
×
202
                elseif a < 0xf0 # 3-byte UTF-8
×
203
                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
×
204
                elseif i < n
×
205
                    d = src[i += 1]
×
206
                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
×
207
                        push!(dst, a, b, c)
×
208
                        a = d; continue
×
209
                    elseif a == 0xf0 && b < 0x90 # overlong encoding
×
210
                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
×
211
                    else # 4-byte UTF-8
212
                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
×
213
                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
214
                    end
215
                else # too short
216
                    push!(dst, a, b, c)
×
217
                    break
×
218
                end
219
            else # too short
220
                push!(dst, a, b)
×
221
                break
×
222
            end
223
        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
224
            push!(dst, a)
×
225
        end
226
        i < n || break
×
227
        a = src[i += 1]
×
228
    end
×
229
    return dst
×
230
end
231

232
function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
×
233
    require_one_based_indexing(src)
×
234
    n = length(src)
×
235
    n == 0 && return UInt8[]
×
236

237
    # Precompute m = sizeof(dst).   This involves annoying duplication
238
    # of the loop over the src array.   However, this is not just an
239
    # optimization: it is problematic for security reasons to grow
240
    # dst dynamically, because Base.winprompt uses this function to
241
    # convert passwords to UTF-8 and we don't want to make unintentional
242
    # copies of the password data.
243
    a = src[1]
×
244
    i, m = 1, 0
×
245
    while true
×
246
        if a < 0x80
×
247
            m += 1
×
248
        elseif a < 0x800 # 2-byte UTF-8
×
249
            m += 2
×
250
        elseif a & 0xfc00 == 0xd800 && i < length(src)
×
251
            b = src[i += 1]
×
252
            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
×
253
                m += 4
×
254
            else
255
                m += 3
×
256
                a = b; continue
×
257
            end
258
        else
259
            # 1-unit high UTF-16 or unpaired high surrogate
260
            # either way, encode as 3-byte UTF-8 code point
261
            m += 3
×
262
        end
263
        i < n || break
×
264
        a = src[i += 1]
×
265
    end
×
266

267
    dst = StringVector(m)
×
268
    a = src[1]
×
269
    i, j = 1, 0
×
270
    while true
×
271
        if a < 0x80 # ASCII
×
272
            dst[j += 1] = a % UInt8
×
273
        elseif a < 0x800 # 2-byte UTF-8
×
274
            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
×
275
            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
276
        elseif a & 0xfc00 == 0xd800 && i < n
×
277
            b = src[i += 1]
×
278
            if (b & 0xfc00) == 0xdc00
×
279
                # 2-unit UTF-16 sequence => 4-byte UTF-8
280
                a += 0x2840
×
281
                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
×
282
                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
×
283
                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
×
284
                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
×
285
            else
286
                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
×
287
                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
×
288
                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
289
                a = b; continue
×
290
            end
291
        else
292
            # 1-unit high UTF-16 or unpaired high surrogate
293
            # either way, encode as 3-byte UTF-8 code point
294
            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
×
295
            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
×
296
            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
297
        end
298
        i < n || break
×
299
        a = src[i += 1]
×
300
    end
×
301
    return dst
×
302
end
303

304
function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
×
305
    transcode(String, unsafe_wrap(Array, p, length; own=false))
×
306
end
307
function unsafe_string(cw::Cwstring)
×
308
    p = convert(Ptr{Cwchar_t}, cw)
×
309
    n = 1
×
310
    while unsafe_load(p, n) != 0
×
311
        n += 1
×
312
    end
×
313
    return unsafe_string(p, n - 1)
×
314
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc