• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37934

16 Oct 2024 06:06AM UTC coverage: 86.449% (-1.3%) from 87.724%
#37934

push

local

web-flow
Initial support for RISC-V (#56105)

Rebase and extension of @alexfanqi's initial work on porting Julia to
RISC-V. Requires LLVM 19.

Tested on a VisionFive2, built with:

```make
MARCH := rv64gc_zba_zbb
MCPU := sifive-u74

USE_BINARYBUILDER:=0

DEPS_GIT = llvm
override LLVM_VER=19.1.1
override LLVM_BRANCH=julia-release/19.x
override LLVM_SHA1=julia-release/19.x
```

```julia-repl
❯ ./julia
               _
   _       _ _(_)_     |  Documentation: https://docs.julialang.org
  (_)     | (_) (_)    |
   _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
  | | | | | | |/ _` |  |
  | | |_| | | | (_| |  |  Version 1.12.0-DEV.1374 (2024-10-14)
 _/ |\__'_|_|_|\__'_|  |  riscv/25092a3982* (fork: 1 commits, 0 days)
|__/                   |

julia> versioninfo(; verbose=true)
Julia Version 1.12.0-DEV.1374
Commit 25092a3982* (2024-10-14 09:57 UTC)
Platform Info:
  OS: Linux (riscv64-unknown-linux-gnu)
  uname: Linux 6.11.3-1-riscv64 #1 SMP Debian 6.11.3-1 (2024-10-10) riscv64 unknown
  CPU: unknown:
              speed         user         nice          sys         idle          irq
       #1  1500 MHz        922 s          0 s        265 s     160953 s          0 s
       #2  1500 MHz        457 s          0 s        280 s     161521 s          0 s
       #3  1500 MHz        452 s          0 s        270 s     160911 s          0 s
       #4  1500 MHz        638 s         15 s        301 s     161340 s          0 s
  Memory: 7.760246276855469 GB (7474.08203125 MB free)
  Uptime: 16260.13 sec
  Load Avg:  0.25  0.23  0.1
  WORD_SIZE: 64
  LLVM: libLLVM-19.1.1 (ORCJIT, sifive-u74)
Threads: 1 default, 0 interactive, 1 GC (on 4 virtual cores)
Environment:
  HOME = /home/tim
  PATH = /home/tim/.local/bin:/usr/local/bin:/usr/bin:/bin:/usr/games
  TERM = xterm-256color


julia> ccall(:jl_dump_host_cpu, Nothing, ())
CPU: sifive-u74
Features: +zbb,+d,+i,+f,+c,+a,+zba,+m,-zvbc,-zksed,-zvfhmin,-zbkc,-zkne,-zk... (continued)

77025 of 89099 relevant lines covered (86.45%)

15882373.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.17
/base/strings/cstring.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
import Core.Intrinsics: bitcast
4

5
"""
6
    Cwstring
7

8
A C-style string composed of the native wide character type
9
[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
10
C-style strings composed of the native character
11
type, see [`Cstring`](@ref). For more information
12
about string interoperability with C, see the
13
[manual](@ref man-bits-types).
14

15
"""
16
Cwstring
17

18
"""
19
    Cstring
20

21
A C-style string composed of the native character type
22
[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
23
C-style strings composed of the native wide character
24
type, see [`Cwstring`](@ref). For more information
25
about string interoperability with C, see the
26
[manual](@ref man-bits-types).
27
"""
28
Cstring
29

30
# construction from pointers
31
Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
78,778,180✔
32
Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
32✔
33
Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
2,748,863✔
34
Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
×
35

36
convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
754,209✔
37
convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
×
38
convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
2,748,863✔
39
convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
×
40

41
"""
42
    pointer(array [, index])
43

44
Get the native address of an array or string, optionally at a given location `index`.
45

46
This function is "unsafe". Be careful to ensure that a Julia reference to
47
`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
48
macro should be used to protect the `array` argument from garbage collection
49
within a given block of code.
50

51
Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
52
"""
53
function pointer end
54

55
pointer(p::Cstring) = convert(Ptr{Cchar}, p)
1,412,936✔
56
pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
×
57

58
# comparisons against pointers (mainly to support `cstr==C_NULL`)
59
==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
1,412,931✔
60
==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
4✔
61

62
unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
1,335,919✔
63

64
# convert strings to String etc. to pass as pointers
65
cconvert(::Type{Cstring}, s::String) = s
149✔
66
cconvert(::Type{Cstring}, s::AbstractString) =
623✔
67
    cconvert(Cstring, String(s)::String)
68

69
function cconvert(::Type{Cwstring}, s::AbstractString)
70
    v = transcode(Cwchar_t, String(s))
32✔
71
    push!(v, 0)
32✔
72
    return cconvert(Cwstring, v)
32✔
73
end
74

75
eltype(::Type{Cstring}) = Cchar
×
76
eltype(::Type{Cwstring}) = Cwchar_t
×
77

78
containsnul(p::Ptr, len) =
3,502,208✔
79
    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
80
containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
42,286✔
81
containsnul(s::AbstractString) = '\0' in s
4,071✔
82

83
function unsafe_convert(::Type{Cstring}, s::String)
4✔
84
    p = unsafe_convert(Ptr{Cchar}, s)
3,459,922✔
85
    containsnul(p, sizeof(s)) &&
3,459,922✔
86
        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
87
    return Cstring(p)
3,459,877✔
88
end
89

90
unsafe_convert(::Type{Cstring}, s::Union{Memory{UInt8},Memory{Int8}}) = Cstring(unsafe_convert(Ptr{Cvoid}, s))
×
91

92
function cconvert(::Type{Cwstring}, v::Vector{Cwchar_t})
93
    for i = 1:length(v)-1
32✔
94
        v[i] == 0 &&
432✔
95
            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
96
    end
832✔
97
    v[end] == 0 ||
32✔
98
        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
99
    return cconvert(Ptr{Cwchar_t}, v)
32✔
100
end
101
unsafe_convert(::Type{Cwstring}, s) = Cwstring(unsafe_convert(Ptr{Cwchar_t}, s))
32✔
102
unsafe_convert(::Type{Cwstring}, s::Cwstring) = s
×
103

104
# symbols are guaranteed not to contain embedded NUL
105
cconvert(::Type{Cstring}, s::Symbol) = s
146✔
106
unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
37,336,321✔
107

108
if ccall(:jl_get_UNAME, Any, ()) === :NT
109
"""
110
    Base.cwstring(s)
111

112
Converts a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
113
functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
114
conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
115
same argument.
116

117
This is only available on Windows.
118
"""
119
function cwstring(s::AbstractString)
×
120
    bytes = codeunits(String(s))
×
121
    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
×
122
    return push!(transcode(UInt16, bytes), 0)
×
123
end
124
end
125

126
# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
127
# and also UTF-32 for APIs using Cwchar_t on other platforms.
128

129
"""
130
    transcode(T, src)
131

132
Convert string data between Unicode encodings. `src` is either a
133
`String` or a `Vector{UIntXX}` of UTF-XX code units, where
134
`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
135
`String` to return a (UTF-8 encoded) `String` or `UIntXX`
136
to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
137
can also be used as the integer type, for converting `wchar_t*` strings
138
used by external C libraries.)
139

140
The `transcode` function succeeds as long as the input data can be
141
reasonably represented in the target encoding; it always succeeds for
142
conversions between UTF-XX encodings, even for invalid Unicode data.
143

144
Only conversion to/from UTF-8 is currently supported.
145

146
# Examples
147
```jldoctest
148
julia> str = "αβγ"
149
"αβγ"
150

151
julia> transcode(UInt16, str)
152
3-element Vector{UInt16}:
153
 0x03b1
154
 0x03b2
155
 0x03b3
156

157
julia> transcode(String, transcode(UInt16, str))
158
"αβγ"
159
```
160
"""
161
function transcode end
162

163
transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
7✔
164
transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
40✔
165
transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
×
166
    transcode(T, String(Vector(src)))
167
transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
8✔
168
    transcode(T, String(src))
169

170
function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
40✔
171
    buf = IOBuffer()
40✔
172
    for c in src
40✔
173
        print(buf, Char(c))
688✔
174
    end
670✔
175
    take!(buf)
40✔
176
end
177
transcode(::Type{String}, src::String) = src
4✔
178
transcode(T, src::String) = transcode(T, codeunits(src))
84,571✔
179
transcode(::Type{String}, src) = String(transcode(UInt8, src))
104✔
180

181
function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
12✔
182
    require_one_based_indexing(src)
12✔
183
    dst = UInt16[]
12✔
184
    i, n = 1, length(src)
12✔
185
    n > 0 || return dst
12✔
186
    sizehint!(dst, 2n)
12✔
187
    a = src[1]
12✔
188
    while true
30✔
189
        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
30✔
190
            b = src[i += 1]
27✔
191
            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
54✔
192
                # invalid UTF-8 (non-continuation or too-high code point)
193
                push!(dst, a)
×
194
                a = b; continue
×
195
            elseif a < 0xe0 # 2-byte UTF-8
27✔
196
                push!(dst, xor(0x3080, UInt16(a) << 6, b))
15✔
197
            elseif i < n # 3/4-byte character
12✔
198
                c = src[i += 1]
12✔
199
                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
12✔
200
                    push!(dst, a, b)
×
201
                    a = c; continue
×
202
                elseif a < 0xf0 # 3-byte UTF-8
12✔
203
                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
6✔
204
                elseif i < n
6✔
205
                    d = src[i += 1]
6✔
206
                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
6✔
207
                        push!(dst, a, b, c)
×
208
                        a = d; continue
×
209
                    elseif a == 0xf0 && b < 0x90 # overlong encoding
6✔
210
                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
×
211
                    else # 4-byte UTF-8
212
                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
6✔
213
                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
214
                    end
215
                else # too short
216
                    push!(dst, a, b, c)
×
217
                    break
×
218
                end
219
            else # too short
220
                push!(dst, a, b)
×
221
                break
×
222
            end
223
        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
224
            push!(dst, a)
3✔
225
        end
226
        i < n || break
42✔
227
        a = src[i += 1]
18✔
228
    end
18✔
229
    return dst
12✔
230
end
231

232
function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
12✔
233
    require_one_based_indexing(src)
12✔
234
    n = length(src)
12✔
235
    n == 0 && return UInt8[]
12✔
236

237
    # Precompute m = sizeof(dst).   This involves annoying duplication
238
    # of the loop over the src array.   However, this is not just an
239
    # optimization: it is problematic for security reasons to grow
240
    # dst dynamically, because Base.winprompt uses this function to
241
    # convert passwords to UTF-8 and we don't want to make unintentional
242
    # copies of the password data.
243
    a = src[1]
12✔
244
    i, m = 1, 0
12✔
245
    while true
30✔
246
        if a < 0x80
30✔
247
            m += 1
3✔
248
        elseif a < 0x800 # 2-byte UTF-8
27✔
249
            m += 2
15✔
250
        elseif a & 0xfc00 == 0xd800 && i < length(src)
12✔
251
            b = src[i += 1]
6✔
252
            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
6✔
253
                m += 4
6✔
254
            else
255
                m += 3
×
256
                a = b; continue
×
257
            end
258
        else
259
            # 1-unit high UTF-16 or unpaired high surrogate
260
            # either way, encode as 3-byte UTF-8 code point
261
            m += 3
6✔
262
        end
263
        i < n || break
42✔
264
        a = src[i += 1]
18✔
265
    end
18✔
266

267
    dst = StringVector(m)
12✔
268
    a = src[1]
12✔
269
    i, j = 1, 0
12✔
270
    while true
30✔
271
        if a < 0x80 # ASCII
30✔
272
            dst[j += 1] = a % UInt8
3✔
273
        elseif a < 0x800 # 2-byte UTF-8
27✔
274
            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
15✔
275
            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
15✔
276
        elseif a & 0xfc00 == 0xd800 && i < n
12✔
277
            b = src[i += 1]
6✔
278
            if (b & 0xfc00) == 0xdc00
6✔
279
                # 2-unit UTF-16 sequence => 4-byte UTF-8
280
                a += 0x2840
6✔
281
                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
6✔
282
                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
6✔
283
                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
6✔
284
                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
6✔
285
            else
286
                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
×
287
                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
×
288
                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
×
289
                a = b; continue
×
290
            end
291
        else
292
            # 1-unit high UTF-16 or unpaired high surrogate
293
            # either way, encode as 3-byte UTF-8 code point
294
            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
6✔
295
            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
6✔
296
            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
6✔
297
        end
298
        i < n || break
42✔
299
        a = src[i += 1]
18✔
300
    end
18✔
301
    return dst
12✔
302
end
303

304
function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
×
305
    transcode(String, unsafe_wrap(Array, p, length; own=false))
×
306
end
307
function unsafe_string(cw::Cwstring)
×
308
    p = convert(Ptr{Cwchar_t}, cw)
×
309
    n = 1
×
310
    while unsafe_load(p, n) != 0
×
311
        n += 1
×
312
    end
×
313
    return unsafe_string(p, n - 1)
×
314
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc