• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37512

pending completion
#37512

push

local

web-flow
Allow conversion of `AbstractQ` to `AbstractArray` (#49424)

1 of 1 new or added line in 1 file covered. (100.0%)

71783 of 83011 relevant lines covered (86.47%)

33710661.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.35
/base/strings/string.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
"""
4
    StringIndexError(str, i)
5

6
An error occurred when trying to access `str` at index `i` that is not valid.
7
"""
8
struct StringIndexError <: Exception
9
    string::AbstractString
661✔
10
    index::Integer
11
end
12
@noinline string_index_err(s::AbstractString, i::Integer) =
657✔
13
    throw(StringIndexError(s, Int(i)))
14
function Base.showerror(io::IO, exc::StringIndexError)
4✔
15
    s = exc.string
4✔
16
    print(io, "StringIndexError: ", "invalid index [$(exc.index)]")
4✔
17
    if firstindex(s) <= exc.index <= ncodeunits(s)
4✔
18
        iprev = thisind(s, exc.index)
4✔
19
        inext = nextind(s, iprev)
4✔
20
        escprev = escape_string(s[iprev:iprev])
4✔
21
        if inext <= ncodeunits(s)
4✔
22
            escnext = escape_string(s[inext:inext])
3✔
23
            print(io, ", valid nearby indices [$iprev]=>'$escprev', [$inext]=>'$escnext'")
3✔
24
        else
25
            print(io, ", valid nearby index [$iprev]=>'$escprev'")
1✔
26
        end
27
    end
28
end
29

30
const ByteArray = Union{CodeUnits{UInt8,String}, Vector{UInt8},Vector{Int8}, FastContiguousSubArray{UInt8,1,CodeUnits{UInt8,String}}, FastContiguousSubArray{UInt8,1,Vector{UInt8}}, FastContiguousSubArray{Int8,1,Vector{Int8}}}
31

32
@inline between(b::T, lo::T, hi::T) where {T<:Integer} = (lo ≤ b) & (b ≤ hi)
158,866,059✔
33

34
"""
35
    String <: AbstractString
36

37
The default string type in Julia, used by e.g. string literals.
38

39
`String`s are immutable sequences of `Char`s. A `String` is stored internally as
40
a contiguous byte array, and while they are interpreted as being UTF-8 encoded,
41
they can be composed of any byte sequence. Use [`isvalid`](@ref) to validate
42
that the underlying byte sequence is valid as UTF-8.
43
"""
44
String
45

46
## constructors and conversions ##
47

48
# String constructor docstring from boot.jl, workaround for #16730
49
# and the unavailability of @doc in boot.jl context.
50
"""
51
    String(v::AbstractVector{UInt8})
52

53
Create a new `String` object using the data buffer from byte vector `v`.
54
If `v` is a `Vector{UInt8}` it will be truncated to zero length and future
55
modification of `v` cannot affect the contents of the resulting string.
56
To avoid truncation of `Vector{UInt8}` data, use `String(copy(v))`; for other
57
`AbstractVector` types, `String(v)` already makes a copy.
58

59
When possible, the memory of `v` will be used without copying when the `String`
60
object is created. This is guaranteed to be the case for byte vectors returned
61
by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to
62
[`read(io, nb)`](@ref). This allows zero-copy conversion of I/O data to strings.
63
In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway
64
to guarantee consistent behavior.
65
"""
66
String(v::AbstractVector{UInt8}) = String(copyto!(StringVector(length(v)), v))
2,612,856✔
67
String(v::Vector{UInt8}) = ccall(:jl_array_to_string, Ref{String}, (Any,), v)
7,027,403✔
68

69
"""
70
    unsafe_string(p::Ptr{UInt8}, [length::Integer])
71

72
Copy a string from the address of a C-style (NUL-terminated) string encoded as UTF-8.
73
(The pointer can be safely freed afterwards.) If `length` is specified
74
(the length of the data in bytes), the string does not have to be NUL-terminated.
75

76
This function is labeled "unsafe" because it will crash if `p` is not
77
a valid memory address to data of the requested length.
78
"""
79
function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}}, len::Integer)
1,146✔
80
    p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
740,552✔
81
    ccall(:jl_pchar_to_string, Ref{String}, (Ptr{UInt8}, Int), p, len)
740,551✔
82
end
83
function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}})
10,082✔
84
    p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
3,113,032✔
85
    ccall(:jl_cstr_to_string, Ref{String}, (Ptr{UInt8},), p)
3,113,031✔
86
end
87

88
# This is @assume_effects :effect_free :nothrow :terminates_globally @ccall jl_alloc_string(n::Csize_t)::Ref{String},
89
# but the macro is not available at this time in bootstrap, so we write it manually.
90
@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String}, Expr(:call, Expr(:core, :svec), :Csize_t), 1, QuoteNode((:ccall,0xe)), :(convert(Csize_t, n))))
109,652,941✔
91

92
"""
93
    String(s::AbstractString)
94

95
Create a new `String` from an existing `AbstractString`.
96
"""
97
String(s::AbstractString) = print_to_string(s)
397✔
98
@assume_effects :total String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))
715,486✔
99

100
unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)
6,628,493✔
101
unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s))
×
102

103
Vector{UInt8}(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(undef, length(s)), s)
20,937✔
104
Vector{UInt8}(s::String) = Vector{UInt8}(codeunits(s))
20,860✔
105
Array{UInt8}(s::String)  = Vector{UInt8}(codeunits(s))
×
106

107
String(s::CodeUnits{UInt8,String}) = s.s
3✔
108

109
## low-level functions ##
110

111
pointer(s::String) = unsafe_convert(Ptr{UInt8}, s)
433,610,917✔
112
pointer(s::String, i::Integer) = pointer(s) + Int(i)::Int - 1
307,805,427✔
113

114
ncodeunits(s::String) = Core.sizeof(s)
370,251,675✔
115
codeunit(s::String) = UInt8
×
116

117
codeunit(s::String, i::Integer) = codeunit(s, Int(i))
×
118
@assume_effects :foldable @inline function codeunit(s::String, i::Int)
678,642✔
119
    @boundscheck checkbounds(s, i)
287,143,512✔
120
    b = GC.@preserve s unsafe_load(pointer(s, i))
287,143,512✔
121
    return b
287,143,512✔
122
end
123

124
## comparison ##
125

126
@assume_effects :total _memcmp(a::String, b::String) = @invoke _memcmp(a::Union{Ptr{UInt8},AbstractString},b::Union{Ptr{UInt8},AbstractString})
489,961✔
127

128
_memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}) = _memcmp(a, b, min(sizeof(a), sizeof(b)))
999,680✔
129
function _memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}, len::Int)
317✔
130
    ccall(:memcmp, Cint, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), a, b, len % Csize_t) % Int
1,767,072✔
131
end
132

133
function cmp(a::String, b::String)
1✔
134
    al, bl = sizeof(a), sizeof(b)
489,961✔
135
    c = _memcmp(a, b)
489,961✔
136
    return c < 0 ? -1 : c > 0 ? +1 : cmp(al,bl)
744,574✔
137
end
138

139
==(a::String, b::String) = a===b
10,588,788✔
140

141
typemin(::Type{String}) = ""
2✔
142
typemin(::String) = typemin(String)
1✔
143

144
## thisind, nextind ##
145

146
@propagate_inbounds thisind(s::String, i::Int) = _thisind_str(s, i)
184,096,258✔
147

148
# s should be String or SubString{String}
149
@inline function _thisind_str(s, i::Int)
×
150
    i == 0 && return 0
92,659,638✔
151
    n = ncodeunits(s)
92,631,437✔
152
    i == n + 1 && return i
92,631,437✔
153
    @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
92,608,995✔
154
    @inbounds b = codeunit(s, i)
92,608,971✔
155
    (b & 0xc0 == 0x80) & (i-1 > 0) || return i
165,149,867✔
156
    @inbounds b = codeunit(s, i-1)
20,068,075✔
157
    between(b, 0b11000000, 0b11110111) && return i-1
20,068,075✔
158
    (b & 0xc0 == 0x80) & (i-2 > 0) || return i
15,660,210✔
159
    @inbounds b = codeunit(s, i-2)
7,171,014✔
160
    between(b, 0b11100000, 0b11110111) && return i-2
7,171,014✔
161
    (b & 0xc0 == 0x80) & (i-3 > 0) || return i
5,906,601✔
162
    @inbounds b = codeunit(s, i-3)
1,027,051✔
163
    between(b, 0b11110000, 0b11110111) && return i-3
1,027,051✔
164
    return i
241,004✔
165
end
166

167
@propagate_inbounds nextind(s::String, i::Int) = _nextind_str(s, i)
14,764,284✔
168

169
# s should be String or SubString{String}
170
@inline function _nextind_str(s, i::Int)
15,271,146✔
171
    i == 0 && return 1
15,271,146✔
172
    n = ncodeunits(s)
15,250,287✔
173
    @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
15,250,299✔
174
    @inbounds l = codeunit(s, i)
15,250,275✔
175
    (l < 0x80) | (0xf8 ≤ l) && return i+1
15,250,275✔
176
    if l < 0xc0
132,165✔
177
        i′ = @inbounds thisind(s, i)
51,357✔
178
        return i′ < i ? @inbounds(nextind(s, i′)) : i+1
30,403✔
179
    end
180
    # first continuation byte
181
    (i += 1) > n && return i
101,762✔
182
    @inbounds b = codeunit(s, i)
101,124✔
183
    b & 0xc0 ≠ 0x80 && return i
101,124✔
184
    ((i += 1) > n) | (l < 0xe0) && return i
93,880✔
185
    # second continuation byte
186
    @inbounds b = codeunit(s, i)
76,326✔
187
    b & 0xc0 ≠ 0x80 && return i
76,326✔
188
    ((i += 1) > n) | (l < 0xf0) && return i
72,693✔
189
    # third continuation byte
190
    @inbounds b = codeunit(s, i)
20,796✔
191
    ifelse(b & 0xc0 ≠ 0x80, i, i+1)
20,796✔
192
end
193

194
## checking UTF-8 & ACSII validity ##
195
#=
196
    The UTF-8 Validation is performed by a shift based DFA.
197
    ┌───────────────────────────────────────────────────────────────────┐
198
    │    UTF-8 DFA State Diagram    ┌──────────────2──────────────┐     │
199
    │                               ├────────3────────┐           │     │
200
    │                 ┌──────────┐  │     ┌─┐        ┌▼┐          │     │
201
    │      ASCII      │  UTF-8   │  ├─5──►│9├───1────► │          │     │
202
    │                 │          │  │     ├─┤        │ │         ┌▼┐    │
203
    │                 │  ┌─0─┐   │  ├─6──►│8├─1,7,9──►4├──1,7,9──► │    │
204
    │      ┌─0─┐      │  │   │   │  │     ├─┤        │ │         │ │    │
205
    │      │   │      │ ┌▼───┴┐  │  ├─11─►│7├──7,9───► │ ┌───────►3├─┐  │
206
    │     ┌▼───┴┐     │ │     │  ▼  │     └─┘        └─┘ │       │ │ │  │
207
    │     │  0  ├─────┘ │  1  ├─► ──┤                    │  ┌────► │ │  │
208
    │     └─────┘       │     │     │     ┌─┐            │  │    └─┘ │  │
209
    │                   └──▲──┘     ├─10─►│5├─────7──────┘  │        │  │
210
    │                      │        │     ├─┤               │        │  │
211
    │                      │        └─4──►│6├─────1,9───────┘        │  │
212
    │          INVALID     │              └─┘                        │  │
213
    │           ┌─*─┐      └──────────────────1,7,9──────────────────┘  │
214
    │          ┌▼───┴┐                                                  │
215
    │          │  2  ◄─── All undefined transitions result in state 2   │
216
    │          └─────┘                                                  │
217
    └───────────────────────────────────────────────────────────────────┘
218

219
        Validation States
220
            0 -> _UTF8_DFA_ASCII is the start state and will only stay in this state if the string is only ASCII characters
221
                        If the DFA ends in this state the string is ASCII only
222
            1 -> _UTF8_DFA_ACCEPT is the valid complete character state of the DFA once it has encountered a UTF-8 Unicode character
223
            2 -> _UTF8_DFA_INVALID is only reached by invalid bytes and once in this state it will not change
224
                    as seen by all 1s in that column of table below
225
            3 -> One valid continuation byte needed to return to state 0
226
        4,5,6 -> Two valid continuation bytes needed to return to state 0
227
        7,8,9 -> Three valids continuation bytes needed to return to state 0
228

229
                        Current State
230
                    0̲  1̲  2̲  3̲  4̲  5̲  6̲  7̲  8̲  9̲
231
                0 | 0  1  2  2  2  2  2  2  2  2
232
                1 | 2  2  2  1  3  2  3  2  4  4
233
                2 | 3  3  2  2  2  2  2  2  2  2
234
                3 | 4  4  2  2  2  2  2  2  2  2
235
                4 | 6  6  2  2  2  2  2  2  2  2
236
    Character   5 | 9  9  2  2  2  2  2  2  2  2     <- Next State
237
    Class       6 | 8  8  2  2  2  2  2  2  2  2
238
                7 | 2  2  2  1  3  3  2  4  4  2
239
                8 | 2  2  2  2  2  2  2  2  2  2
240
                9 | 2  2  2  1  3  2  3  4  4  2
241
               10 | 5  5  2  2  2  2  2  2  2  2
242
               11 | 7  7  2  2  2  2  2  2  2  2
243

244
           Shifts | 0  4 10 14 18 24  8 20 12 26
245

246
    The shifts that represent each state were derived using teh SMT solver Z3, to ensure when encoded into
247
    the rows the correct shift was a result.
248

249
    Each character class row is encoding 10 states with shifts as defined above. By shifting the bitsof a row by
250
    the current state then masking the result with 0x11110 give the shift for the new state
251

252

253
=#
254

255
#State type used by UTF-8 DFA
256
const _UTF8DFAState = UInt32
257
# Fill the table with 256 UInt64 representing the DFA transitions for all bytes
258
const _UTF8_DFA_TABLE = let # let block rather than function doesn't pollute base
259
    num_classes=12
260
    num_states=10
261
    bit_per_state = 6
262

263
    # These shifts were derived using a SMT solver
264
    state_shifts = [0, 4, 10, 14, 18, 24, 8, 20, 12, 26]
265

266
    character_classes = [   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
269
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
270
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
271
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
272
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
273
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
274
                            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
275
                            9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
276
                            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
277
                            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
278
                            8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
279
                            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
280
                            10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
281
                            11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ]
282

283
    # These are the rows discussed in comments above
284
    state_arrays = [ 0  1  2  2  2  2  2  2  2  2;
285
                     2  2  2  1  3  2  3  2  4  4;
286
                     3  3  2  2  2  2  2  2  2  2;
287
                     4  4  2  2  2  2  2  2  2  2;
288
                     6  6  2  2  2  2  2  2  2  2;
289
                     9  9  2  2  2  2  2  2  2  2;
290
                     8  8  2  2  2  2  2  2  2  2;
291
                     2  2  2  1  3  3  2  4  4  2;
292
                     2  2  2  2  2  2  2  2  2  2;
293
                     2  2  2  1  3  2  3  4  4  2;
294
                     5  5  2  2  2  2  2  2  2  2;
295
                     7  7  2  2  2  2  2  2  2  2]
296

297
    #This converts the state_arrays into the shift encoded _UTF8DFAState
298
    class_row = zeros(_UTF8DFAState, num_classes)
299

300
    for i = 1:num_classes
301
        row = _UTF8DFAState(0)
302
        for j in 1:num_states
303
            #Calculate the shift required for the next state
304
            to_shift = UInt8((state_shifts[state_arrays[i,j]+1]) )
305
            #Shift the next state into the position of the current state
306
            row = row | (_UTF8DFAState(to_shift) << state_shifts[j])
307
        end
308
        class_row[i]=row
309
    end
310

311
    map(c->class_row[c+1],character_classes)
×
312
end
313

314

315
const _UTF8_DFA_ASCII = _UTF8DFAState(0) #This state represents the start and end of any valid string
316
const _UTF8_DFA_ACCEPT = _UTF8DFAState(4) #This state represents the start and end of any valid string
317
const _UTF8_DFA_INVALID = _UTF8DFAState(10) # If the state machine is ever in this state just stop
318

319
# The dfa step is broken out so that it may be used in other functions. The mask was calculated to work with state shifts above
320
@inline _utf_dfa_step(state::_UTF8DFAState, byte::UInt8) = @inbounds (_UTF8_DFA_TABLE[byte+1] >> state) & _UTF8DFAState(0x0000001E)
54,209✔
321

322
@inline function _isvalid_utf8_dfa(state::_UTF8DFAState, bytes::AbstractVector{UInt8}, first::Int = firstindex(bytes), last::Int = lastindex(bytes))
25,752✔
323
    for i = first:last
51,258✔
324
       @inbounds state = _utf_dfa_step(state, bytes[i])
54,209✔
325
    end
82,597✔
326
    return (state)
25,821✔
327
end
328

329
@inline function  _find_nonascii_chunk(chunk_size,cu::AbstractVector{CU}, first,last) where {CU}
10✔
330
    n=first
10✔
331
    while n <= last - chunk_size
40✔
332
        _isascii(cu,n,n+chunk_size-1) || return n
30✔
333
        n += chunk_size
30✔
334
    end
30✔
335
    n= last-chunk_size+1
10✔
336
    _isascii(cu,n,last) || return n
10✔
337
    return nothing
10✔
338
end
339

340
##
341

342
# Classifcations of string
343
    # 0: neither valid ASCII nor UTF-8
344
    # 1: valid ASCII
345
    # 2: valid UTF-8
346
 byte_string_classify(s::AbstractString) = byte_string_classify(codeunits(s))
24✔
347

348

349
function byte_string_classify(bytes::AbstractVector{UInt8})
20,283✔
350
    chunk_size = 1024
20,259✔
351
    chunk_threshold =  chunk_size + (chunk_size ÷ 2)
20,259✔
352
    n = length(bytes)
20,428✔
353
    if n > chunk_threshold
20,428✔
354
        start = _find_nonascii_chunk(chunk_size,bytes,1,n)
10✔
355
        isnothing(start) && return 1
10✔
356
    else
357
        _isascii(bytes,1,n) && return 1
20,418✔
358
        start = 1
20,120✔
359
    end
360
    return _byte_string_classify_nonascii(bytes,start,n)
20,189✔
361
end
362

363
function _byte_string_classify_nonascii(bytes::AbstractVector{UInt8}, first::Int, last::Int)
20,189✔
364
    chunk_size = 256
20,120✔
365

366
    start = first
20,120✔
367
    stop = min(last,first + chunk_size - 1)
20,189✔
368
    state = _UTF8_DFA_ACCEPT
20,120✔
369
    while start <= last
24,926✔
370
        # try to process ascii chunks
371
        while state == _UTF8_DFA_ACCEPT
20,189✔
372
            _isascii(bytes,start,stop) || break
20,189✔
373
            (start = start + chunk_size) <= last || break
×
374
            stop = min(last,stop + chunk_size)
×
375
        end
×
376
        # Process non ascii chunk
377
        state = _isvalid_utf8_dfa(state,bytes,start,stop)
48,577✔
378
        state == _UTF8_DFA_INVALID && return 0
20,189✔
379

380
        start = start + chunk_size
4,737✔
381
        stop = min(last,stop + chunk_size)
4,737✔
382
    end
4,737✔
383
    return ifelse(state == _UTF8_DFA_ACCEPT,2,0)
4,737✔
384
end
385

386
isvalid(::Type{String}, bytes::AbstractVector{UInt8}) = (@inline byte_string_classify(bytes)) ≠ 0
20,404✔
387
isvalid(::Type{String}, s::AbstractString) =  (@inline byte_string_classify(s)) ≠ 0
24✔
388

389
@inline isvalid(s::AbstractString) = @inline isvalid(String, codeunits(s))
145✔
390

391
is_valid_continuation(c) = c & 0xc0 == 0x80
6✔
392

393
## required core functionality ##
394

395
@inline function iterate(s::String, i::Int=firstindex(s))
64,832✔
396
    (i % UInt) - 1 < ncodeunits(s) || return nothing
29,834,561✔
397
    b = @inbounds codeunit(s, i)
20,232,155✔
398
    u = UInt32(b) << 24
20,232,155✔
399
    between(b, 0x80, 0xf7) || return reinterpret(Char, u), i+1
39,544,469✔
400
    return iterate_continued(s, i, u)
919,841✔
401
end
402

403
function iterate_continued(s::String, i::Int, u::UInt32)
919,841✔
404
    u < 0xc0000000 && (i += 1; @goto ret)
919,942✔
405
    n = ncodeunits(s)
919,740✔
406
    # first continuation byte
407
    (i += 1) > n && @goto ret
919,740✔
408
    @inbounds b = codeunit(s, i)
919,720✔
409
    b & 0xc0 == 0x80 || @goto ret
919,720✔
410
    u |= UInt32(b) << 16
919,659✔
411
    # second continuation byte
412
    ((i += 1) > n) | (u < 0xe0000000) && @goto ret
919,659✔
413
    @inbounds b = codeunit(s, i)
854,626✔
414
    b & 0xc0 == 0x80 || @goto ret
854,628✔
415
    u |= UInt32(b) << 8
854,624✔
416
    # third continuation byte
417
    ((i += 1) > n) | (u < 0xf0000000) && @goto ret
854,624✔
418
    @inbounds b = codeunit(s, i)
273,718✔
419
    b & 0xc0 == 0x80 || @goto ret
273,719✔
420
    u |= UInt32(b); i += 1
547,434✔
421
@label ret
×
422
    return reinterpret(Char, u), i
919,841✔
423
end
424

425
@propagate_inbounds function getindex(s::String, i::Int)
153,800✔
426
    b = codeunit(s, i)
2,702,684✔
427
    u = UInt32(b) << 24
2,702,684✔
428
    between(b, 0x80, 0xf7) || return reinterpret(Char, u)
5,401,609✔
429
    return getindex_continued(s, i, u)
3,759✔
430
end
431

432
function getindex_continued(s::String, i::Int, u::UInt32)
3,759✔
433
    if u < 0xc0000000
3,759✔
434
        # called from `getindex` which checks bounds
435
        @inbounds isvalid(s, i) && @goto ret
16✔
436
        string_index_err(s, i)
1✔
437
    end
438
    n = ncodeunits(s)
3,743✔
439

440
    (i += 1) > n && @goto ret
3,743✔
441
    @inbounds b = codeunit(s, i) # cont byte 1
3,742✔
442
    b & 0xc0 == 0x80 || @goto ret
3,742✔
443
    u |= UInt32(b) << 16
3,741✔
444

445
    ((i += 1) > n) | (u < 0xe0000000) && @goto ret
3,741✔
446
    @inbounds b = codeunit(s, i) # cont byte 2
2,482✔
447
    b & 0xc0 == 0x80 || @goto ret
2,482✔
448
    u |= UInt32(b) << 8
2,482✔
449

450
    ((i += 1) > n) | (u < 0xf0000000) && @goto ret
2,482✔
451
    @inbounds b = codeunit(s, i) # cont byte 3
478✔
452
    b & 0xc0 == 0x80 || @goto ret
478✔
453
    u |= UInt32(b)
478✔
454
@label ret
×
455
    return reinterpret(Char, u)
3,758✔
456
end
457

458
getindex(s::String, r::AbstractUnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]
×
459

460
@inline function getindex(s::String, r::UnitRange{Int})
1,502,427✔
461
    isempty(r) && return ""
992,845✔
462
    i, j = first(r), last(r)
69,336✔
463
    @boundscheck begin
967,209✔
464
        checkbounds(s, r)
967,191✔
465
        @inbounds isvalid(s, i) || string_index_err(s, i)
967,179✔
466
        @inbounds isvalid(s, j) || string_index_err(s, j)
967,180✔
467
    end
468
    j = nextind(s, j) - 1
967,202✔
469
    n = j - i + 1
967,202✔
470
    ss = _string_n(n)
967,202✔
471
    GC.@preserve s ss unsafe_copyto!(pointer(ss), pointer(s, i), n)
967,202✔
472
    return ss
967,202✔
473
end
474

475
# nothrow because we know the start and end indices are valid
476
@assume_effects :nothrow length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
892,279✔
477

478
# effects needed because @inbounds
479
@assume_effects :consistent :effect_free @inline function length(s::String, i::Int, j::Int)
68,251✔
480
    @boundscheck begin
228,523✔
481
        0 < i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i))
228,523✔
482
        0 ≤ j < ncodeunits(s)+1 || throw(BoundsError(s, j))
228,527✔
483
    end
484
    j < i && return 0
228,519✔
485
    @inbounds i, k = thisind(s, i), i
350,866✔
486
    c = j - i + (i == k)
175,433✔
487
    @inbounds length_continued(s, i, j, c)
175,433✔
488
end
489

490
@assume_effects :terminates_locally @inline @propagate_inbounds function length_continued(s::String, i::Int, n::Int, c::Int)
896,987✔
491
    i < n || return c
1,291,553✔
492
    b = codeunit(s, i)
843,871✔
493
    while true
4,267,855✔
494
        while true
53,836,626✔
495
            (i += 1) ≤ n || return c
54,679,726✔
496
            0xc0 ≤ b ≤ 0xf7 && break
52,993,526✔
497
            b = codeunit(s, i)
49,568,771✔
498
        end
49,568,771✔
499
        l = b
3,424,755✔
500
        b = codeunit(s, i) # cont byte 1
3,424,755✔
501
        c -= (x = b & 0xc0 == 0x80)
3,424,755✔
502
        x & (l ≥ 0xe0) || continue
3,424,755✔
503

504
        (i += 1) ≤ n || return c
3,025,417✔
505
        b = codeunit(s, i) # cont byte 2
3,023,955✔
506
        c -= (x = b & 0xc0 == 0x80)
3,023,955✔
507
        x & (l ≥ 0xf0) || continue
5,150,883✔
508

509
        (i += 1) ≤ n || return c
897,067✔
510
        b = codeunit(s, i) # cont byte 3
896,987✔
511
        c -= (b & 0xc0 == 0x80)
896,987✔
512
    end
3,423,984✔
513
end
514

515
## overload methods for efficiency ##
516

517
isvalid(s::String, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i
84,841,979✔
518

519
isascii(s::String) = isascii(codeunits(s))
25,782✔
520

521
# don't assume effects for general integers since we cannot know their implementation
522
@assume_effects :foldable repeat(c::Char, r::BitInteger) = @invoke repeat(c::Char, r::Integer)
14,207✔
523

524
"""
525
    repeat(c::AbstractChar, r::Integer) -> String
526

527
Repeat a character `r` times. This can equivalently be accomplished by calling
528
[`c^r`](@ref :^(::Union{AbstractString, AbstractChar}, ::Integer)).
529

530
# Examples
531
```jldoctest
532
julia> repeat('A', 3)
533
"AAA"
534
```
535
"""
536
function repeat(c::AbstractChar, r::Integer)
14,208✔
537
    c = Char(c)::Char
14,208✔
538
    r == 0 && return ""
14,208✔
539
    r < 0 && throw(ArgumentError("can't repeat a character $r times"))
10,142✔
540
    u = bswap(reinterpret(UInt32, c))
10,138✔
541
    n = 4 - (leading_zeros(u | 0xff) >> 3)
10,138✔
542
    s = _string_n(n*r)
10,138✔
543
    p = pointer(s)
10,136✔
544
    GC.@preserve s if n == 1
10,136✔
545
        ccall(:memset, Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), p, u % UInt8, r)
10,110✔
546
    elseif n == 2
26✔
547
        p16 = reinterpret(Ptr{UInt16}, p)
6✔
548
        for i = 1:r
12✔
549
            unsafe_store!(p16, u % UInt16, i)
20✔
550
        end
20✔
551
    elseif n == 3
20✔
552
        b1 = (u >> 0) % UInt8
16✔
553
        b2 = (u >> 8) % UInt8
16✔
554
        b3 = (u >> 16) % UInt8
16✔
555
        for i = 0:r-1
32✔
556
            unsafe_store!(p, b1, 3i + 1)
45✔
557
            unsafe_store!(p, b2, 3i + 2)
45✔
558
            unsafe_store!(p, b3, 3i + 3)
45✔
559
        end
45✔
560
    elseif n == 4
4✔
561
        p32 = reinterpret(Ptr{UInt32}, p)
4✔
562
        for i = 1:r
8✔
563
            unsafe_store!(p32, u, i)
8✔
564
        end
10,144✔
565
    end
566
    return s
10,136✔
567
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc