• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37998

01 Feb 2025 04:36AM UTC coverage: 25.515% (+8.2%) from 17.283%
#37998

push

local

web-flow
🤖 [master] Bump the LinearAlgebra stdlib from da6d052 to 57e9a0d (#57177)

Stdlib: LinearAlgebra
URL: https://github.com/JuliaLang/LinearAlgebra.jl.git
Stdlib branch: master
Julia branch: master
Old commit: da6d052
New commit: 57e9a0d
Julia version: 1.12.0-DEV
LinearAlgebra version: 1.12.0
Bump invoked by: @ViralBShah
Powered by:
[BumpStdlibs.jl](https://github.com/JuliaLang/BumpStdlibs.jl)

Diff:
https://github.com/JuliaLang/LinearAlgebra.jl/compare/da6d05213...57e9a0d19

```
$ git log --oneline da6d052..57e9a0d
57e9a0d Reduce allocations and improve performance in `syevr!` (#1176)
8bb9f6b fix error messages (#1171)
97a712f Update .ci/Manifest.toml (#1179)
```

Co-authored-by: ViralBShah <744411+ViralBShah@users.noreply.github.com>
Co-authored-by: Viral B. Shah <viral@juliacomputing.com>

12529 of 49105 relevant lines covered (25.51%)

393354.34 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

52.29
/base/strings/string.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
"""
4
    StringIndexError(str, i)
5

6
An error occurred when trying to access `str` at index `i` that is not valid.
7
"""
8
struct StringIndexError <: Exception
9
    string::AbstractString
×
10
    index::Integer
11
end
12
@noinline string_index_err(s::AbstractString, i::Integer) =
×
13
    throw(StringIndexError(s, Int(i)))
14
function Base.showerror(io::IO, exc::StringIndexError)
×
15
    s = exc.string
×
16
    print(io, "StringIndexError: ", "invalid index [$(exc.index)]")
×
17
    if firstindex(s) <= exc.index <= ncodeunits(s)
×
18
        iprev = thisind(s, exc.index)
×
19
        inext = nextind(s, iprev)
×
20
        escprev = escape_string(s[iprev:iprev])
×
21
        if inext <= ncodeunits(s)
×
22
            escnext = escape_string(s[inext:inext])
×
23
            print(io, ", valid nearby indices [$iprev]=>'$escprev', [$inext]=>'$escnext'")
×
24
        else
25
            print(io, ", valid nearby index [$iprev]=>'$escprev'")
×
26
        end
27
    end
28
end
29

30
@inline between(b::T, lo::T, hi::T) where {T<:Integer} = (lo ≤ b) & (b ≤ hi)
2,040,523✔
31

32
"""
33
    String <: AbstractString
34

35
The default string type in Julia, used by e.g. string literals.
36

37
`String`s are immutable sequences of `Char`s. A `String` is stored internally as
38
a contiguous byte array, and while they are interpreted as being UTF-8 encoded,
39
they can be composed of any byte sequence. Use [`isvalid`](@ref) to validate
40
that the underlying byte sequence is valid as UTF-8.
41
"""
42
String
43

44
## constructors and conversions ##
45

46
# String constructor docstring from boot.jl, workaround for #16730
47
# and the unavailability of @doc in boot.jl context.
48
"""
49
    String(v::AbstractVector{UInt8})
50

51
Create a new `String` object using the data buffer from byte vector `v`.
52
If `v` is a `Vector{UInt8}` it will be truncated to zero length and future
53
modification of `v` cannot affect the contents of the resulting string.
54
To avoid truncation of `Vector{UInt8}` data, use `String(copy(v))`; for other
55
`AbstractVector` types, `String(v)` already makes a copy.
56

57
When possible, the memory of `v` will be used without copying when the `String`
58
object is created. This is guaranteed to be the case for byte vectors returned
59
by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to
60
[`read(io, nb)`](@ref). This allows zero-copy conversion of I/O data to strings.
61
In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway
62
to guarantee consistent behavior.
63
"""
64
String(v::AbstractVector{UInt8}) = unsafe_takestring(copyto!(StringMemory(length(v)), v))
21✔
65
function String(v::Vector{UInt8})
66
    #return ccall(:jl_array_to_string, Ref{String}, (Any,), v)
67
    len = length(v)
575,873✔
68
    len == 0 && return ""
575,873✔
69
    ref = v.ref
316,098✔
70
    if ref.ptr_or_offset == ref.mem.ptr
323,403✔
71
        str = ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), ref.mem, len)
323,403✔
72
    else
73
        str = ccall(:jl_pchar_to_string, Ref{String}, (Ptr{UInt8}, Int), ref, len)
×
74
    end
75
    # optimized empty!(v); sizehint!(v, 0) calls
76
    setfield!(v, :size, (0,))
323,403✔
77
    setfield!(v, :ref, memoryref(Memory{UInt8}()))
316,098✔
78
    return str
323,403✔
79
end
80

81
"Create a string re-using the memory, if possible.
82
Mutating or reading the memory after calling this function is undefined behaviour."
83
function unsafe_takestring(m::Memory{UInt8})
84
    isempty(m) ? "" : ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m))
122,810✔
85
end
86

87
"""
88
    unsafe_string(p::Ptr{UInt8}, [length::Integer])
89

90
Copy a string from the address of a C-style (NUL-terminated) string encoded as UTF-8.
91
(The pointer can be safely freed afterwards.) If `length` is specified
92
(the length of the data in bytes), the string does not have to be NUL-terminated.
93

94
This function is labeled "unsafe" because it will crash if `p` is not
95
a valid memory address to data of the requested length.
96
"""
97
function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}}, len::Integer)
98
    p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
231,467✔
99
    ccall(:jl_pchar_to_string, Ref{String}, (Ptr{UInt8}, Int), p, len)
231,467✔
100
end
101
function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}})
102
    p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
69,304✔
103
    ccall(:jl_cstr_to_string, Ref{String}, (Ptr{UInt8},), p)
69,304✔
104
end
105

106
# This is `@assume_effects :total !:consistent @ccall jl_alloc_string(n::Csize_t)::Ref{String}`,
107
# but the macro is not available at this time in bootstrap, so we write it manually.
108
const _string_n_override = 0x04ee
109
@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String},
151,197✔
110
    :(Core.svec(Csize_t)), 1, QuoteNode((:ccall, _string_n_override)), :(convert(Csize_t, n))))
111

112
"""
113
    String(s::AbstractString)
114

115
Create a new `String` from an existing `AbstractString`.
116
"""
117
String(s::AbstractString) = print_to_string(s)
×
118
@assume_effects :total String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))
1,863✔
119

120
unsafe_wrap(::Type{Memory{UInt8}}, s::String) = ccall(:jl_string_to_genericmemory, Ref{Memory{UInt8}}, (Any,), s)
90,437✔
121
unsafe_wrap(::Type{Vector{UInt8}}, s::String) = wrap(Array, unsafe_wrap(Memory{UInt8}, s))
220✔
122

123
Vector{UInt8}(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(undef, length(s)), s)
16,792✔
124
Vector{UInt8}(s::String) = Vector{UInt8}(codeunits(s))
16,792✔
125
Array{UInt8}(s::String)  = Vector{UInt8}(codeunits(s))
×
126

127
String(s::CodeUnits{UInt8,String}) = s.s
×
128

129
## low-level functions ##
130

131
pointer(s::String) = unsafe_convert(Ptr{UInt8}, s)
6,197,663✔
132
pointer(s::String, i::Integer) = pointer(s) + Int(i)::Int - 1
5,492,919✔
133

134
ncodeunits(s::String) = Core.sizeof(s)
6,774,305✔
135
codeunit(s::String) = UInt8
×
136

137
codeunit(s::String, i::Integer) = codeunit(s, Int(i))
×
138
@assume_effects :foldable @inline function codeunit(s::String, i::Int)
27✔
139
    @boundscheck checkbounds(s, i)
5,140,559✔
140
    b = GC.@preserve s unsafe_load(pointer(s, i))
5,140,559✔
141
    return b
5,140,559✔
142
end
143

144
## comparison ##
145

146
@assume_effects :total _memcmp(a::String, b::String) = @invoke _memcmp(a::Union{Ptr{UInt8},AbstractString},b::Union{Ptr{UInt8},AbstractString})
178,401✔
147

148
_memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}) = _memcmp(a, b, min(sizeof(a), sizeof(b)))
481,677✔
149
function _memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}, len::Int)
150
    GC.@preserve a b begin
483,242✔
151
        pa = unsafe_convert(Ptr{UInt8}, a)
483,033✔
152
        pb = unsafe_convert(Ptr{UInt8}, b)
483,242✔
153
        memcmp(pa, pb, len % Csize_t) % Int
483,242✔
154
    end
155
end
156

157
function cmp(a::String, b::String)
158
    al, bl = sizeof(a), sizeof(b)
178,401✔
159
    c = _memcmp(a, b)
178,401✔
160
    return c < 0 ? -1 : c > 0 ? +1 : cmp(al,bl)
243,838✔
161
end
162

163
==(a::String, b::String) = a===b
276,396✔
164

165
typemin(::Type{String}) = ""
×
166
typemin(::String) = typemin(String)
×
167

168
## thisind, nextind ##
169

170
@propagate_inbounds thisind(s::String, i::Int) = _thisind_str(s, i)
405,614✔
171

172
# s should be String or SubString{String}
173
@inline function _thisind_str(s, i::Int)
174
    i == 0 && return 0
204,001✔
175
    n = ncodeunits(s)
208,609✔
176
    i == n + 1 && return i
208,609✔
177
    @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
208,609✔
178
    @inbounds b = codeunit(s, i)
208,609✔
179
    (b & 0xc0 == 0x80) & (i-1 > 0) || return i
412,035✔
180
    (@noinline function _thisind_continued(s, i, n) # mark the rest of the function as a slow-path
×
181
        local b
×
182
        @inbounds b = codeunit(s, i-1)
×
183
        between(b, 0b11000000, 0b11110111) && return i-1
×
184
        (b & 0xc0 == 0x80) & (i-2 > 0) || return i
×
185
        @inbounds b = codeunit(s, i-2)
×
186
        between(b, 0b11100000, 0b11110111) && return i-2
×
187
        (b & 0xc0 == 0x80) & (i-3 > 0) || return i
×
188
        @inbounds b = codeunit(s, i-3)
×
189
        between(b, 0b11110000, 0b11110111) && return i-3
×
190
        return i
×
191
    end)(s, i, n)
192
end
193

194
@propagate_inbounds nextind(s::String, i::Int) = _nextind_str(s, i)
210,783✔
195

196
# s should be String or SubString{String}
197
@inline function _nextind_str(s, i::Int)
198
    i == 0 && return 1
144,790✔
199
    n = ncodeunits(s)
144,804✔
200
    @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
144,804✔
201
    @inbounds l = codeunit(s, i)
144,804✔
202
    between(l, 0x80, 0xf7) || return i+1
289,607✔
203
    (@noinline function _nextind_continued(s, i, n, l) # mark the rest of the function as a slow-path
2✔
204
        if l < 0xc0
1✔
205
            # handle invalid codeunit index by scanning back to the start of this index
206
            # (which may be the same as this index)
207
            i′ = @inbounds thisind(s, i)
×
208
            i′ >= i && return i+1
×
209
            i = i′
×
210
            @inbounds l = codeunit(s, i)
×
211
            (l < 0x80) | (0xf8 ≤ l) && return i+1
×
212
            @assert l >= 0xc0 "invalid codeunit"
×
213
        end
214
        # first continuation byte
215
        (i += 1) > n && return i
1✔
216
        @inbounds b = codeunit(s, i)
1✔
217
        b & 0xc0 ≠ 0x80 && return i
1✔
218
        ((i += 1) > n) | (l < 0xe0) && return i
1✔
219
        # second continuation byte
220
        @inbounds b = codeunit(s, i)
1✔
221
        b & 0xc0 ≠ 0x80 && return i
1✔
222
        ((i += 1) > n) | (l < 0xf0) && return i
1✔
223
        # third continuation byte
224
        @inbounds b = codeunit(s, i)
×
225
        return ifelse(b & 0xc0 ≠ 0x80, i, i+1)
×
226
    end)(s, i, n, l)
227
end
228

229
## checking UTF-8 & ACSII validity ##
230
#=
231
    The UTF-8 Validation is performed by a shift based DFA.
232
    ┌───────────────────────────────────────────────────────────────────┐
233
    │    UTF-8 DFA State Diagram    ┌──────────────2──────────────┐     │
234
    │                               ├────────3────────┐           │     │
235
    │                 ┌──────────┐  │     ┌─┐        ┌▼┐          │     │
236
    │      ASCII      │  UTF-8   │  ├─5──►│9├───1────► │          │     │
237
    │                 │          │  │     ├─┤        │ │         ┌▼┐    │
238
    │                 │  ┌─0─┐   │  ├─6──►│8├─1,7,9──►4├──1,7,9──► │    │
239
    │      ┌─0─┐      │  │   │   │  │     ├─┤        │ │         │ │    │
240
    │      │   │      │ ┌▼───┴┐  │  ├─11─►│7├──7,9───► │ ┌───────►3├─┐  │
241
    │     ┌▼───┴┐     │ │     │  ▼  │     └─┘        └─┘ │       │ │ │  │
242
    │     │  0  ├─────┘ │  1  ├─► ──┤                    │  ┌────► │ │  │
243
    │     └─────┘       │     │     │     ┌─┐            │  │    └─┘ │  │
244
    │                   └──▲──┘     ├─10─►│5├─────7──────┘  │        │  │
245
    │                      │        │     ├─┤               │        │  │
246
    │                      │        └─4──►│6├─────1,9───────┘        │  │
247
    │          INVALID     │              └─┘                        │  │
248
    │           ┌─*─┐      └──────────────────1,7,9──────────────────┘  │
249
    │          ┌▼───┴┐                                                  │
250
    │          │  2  ◄─── All undefined transitions result in state 2   │
251
    │          └─────┘                                                  │
252
    └───────────────────────────────────────────────────────────────────┘
253

254
        Validation States
255
            0 -> _UTF8_DFA_ASCII is the start state and will only stay in this state if the string is only ASCII characters
256
                        If the DFA ends in this state the string is ASCII only
257
            1 -> _UTF8_DFA_ACCEPT is the valid complete character state of the DFA once it has encountered a UTF-8 Unicode character
258
            2 -> _UTF8_DFA_INVALID is only reached by invalid bytes and once in this state it will not change
259
                    as seen by all 1s in that column of table below
260
            3 -> One valid continuation byte needed to return to state 0
261
        4,5,6 -> Two valid continuation bytes needed to return to state 0
262
        7,8,9 -> Three valids continuation bytes needed to return to state 0
263

264
                        Current State
265
                    0̲  1̲  2̲  3̲  4̲  5̲  6̲  7̲  8̲  9̲
266
                0 | 0  1  2  2  2  2  2  2  2  2
267
                1 | 2  2  2  1  3  2  3  2  4  4
268
                2 | 3  3  2  2  2  2  2  2  2  2
269
                3 | 4  4  2  2  2  2  2  2  2  2
270
                4 | 6  6  2  2  2  2  2  2  2  2
271
    Character   5 | 9  9  2  2  2  2  2  2  2  2     <- Next State
272
    Class       6 | 8  8  2  2  2  2  2  2  2  2
273
                7 | 2  2  2  1  3  3  2  4  4  2
274
                8 | 2  2  2  2  2  2  2  2  2  2
275
                9 | 2  2  2  1  3  2  3  4  4  2
276
               10 | 5  5  2  2  2  2  2  2  2  2
277
               11 | 7  7  2  2  2  2  2  2  2  2
278

279
           Shifts | 0  4 10 14 18 24  8 20 12 26
280

281
    The shifts that represent each state were derived using the SMT solver Z3, to ensure when encoded into
282
    the rows the correct shift was a result.
283

284
    Each character class row is encoding 10 states with shifts as defined above. By shifting the bitsof a row by
285
    the current state then masking the result with 0x11110 give the shift for the new state
286

287

288
=#
289

290
#State type used by UTF-8 DFA
291
const _UTF8DFAState = UInt32
292
# Fill the table with 256 UInt64 representing the DFA transitions for all bytes
293
const _UTF8_DFA_TABLE = let # let block rather than function doesn't pollute base
294
    num_classes=12
295
    num_states=10
296
    bit_per_state = 6
297

298
    # These shifts were derived using a SMT solver
299
    state_shifts = [0, 4, 10, 14, 18, 24, 8, 20, 12, 26]
300

301
    character_classes = [   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
302
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
303
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
304
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
305
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
306
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
308
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
309
                            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
310
                            9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
311
                            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
312
                            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
313
                            8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
314
                            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
315
                            10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
316
                            11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ]
317

318
    # These are the rows discussed in comments above
319
    state_arrays = [ 0  1  2  2  2  2  2  2  2  2;
320
                     2  2  2  1  3  2  3  2  4  4;
321
                     3  3  2  2  2  2  2  2  2  2;
322
                     4  4  2  2  2  2  2  2  2  2;
323
                     6  6  2  2  2  2  2  2  2  2;
324
                     9  9  2  2  2  2  2  2  2  2;
325
                     8  8  2  2  2  2  2  2  2  2;
326
                     2  2  2  1  3  3  2  4  4  2;
327
                     2  2  2  2  2  2  2  2  2  2;
328
                     2  2  2  1  3  2  3  4  4  2;
329
                     5  5  2  2  2  2  2  2  2  2;
330
                     7  7  2  2  2  2  2  2  2  2]
331

332
    #This converts the state_arrays into the shift encoded _UTF8DFAState
333
    class_row = zeros(_UTF8DFAState, num_classes)
334

335
    for i = 1:num_classes
336
        row = _UTF8DFAState(0)
337
        for j in 1:num_states
338
            #Calculate the shift required for the next state
339
            to_shift = UInt8((state_shifts[state_arrays[i,j]+1]) )
340
            #Shift the next state into the position of the current state
341
            row = row | (_UTF8DFAState(to_shift) << state_shifts[j])
342
        end
343
        class_row[i]=row
344
    end
345

346
    map(c->class_row[c+1],character_classes)
×
347
end
348

349

350
const _UTF8_DFA_ASCII = _UTF8DFAState(0) #This state represents the start and end of any valid string
351
const _UTF8_DFA_ACCEPT = _UTF8DFAState(4) #This state represents the start and end of any valid string
352
const _UTF8_DFA_INVALID = _UTF8DFAState(10) # If the state machine is ever in this state just stop
353

354
# The dfa step is broken out so that it may be used in other functions. The mask was calculated to work with state shifts above
355
@inline _utf_dfa_step(state::_UTF8DFAState, byte::UInt8) = @inbounds (_UTF8_DFA_TABLE[byte+1] >> state) & _UTF8DFAState(0x0000001E)
×
356

357
@inline function _isvalid_utf8_dfa(state::_UTF8DFAState, bytes::AbstractVector{UInt8}, first::Int = firstindex(bytes), last::Int = lastindex(bytes))
358
    for i = first:last
×
359
       @inbounds state = _utf_dfa_step(state, bytes[i])
×
360
    end
×
361
    return (state)
×
362
end
363

364
@inline function  _find_nonascii_chunk(chunk_size,cu::AbstractVector{CU}, first,last) where {CU}
365
    n=first
×
366
    while n <= last - chunk_size
×
367
        _isascii(cu,n,n+chunk_size-1) || return n
×
368
        n += chunk_size
×
369
    end
×
370
    n= last-chunk_size+1
×
371
    _isascii(cu,n,last) || return n
×
372
    return nothing
×
373
end
374

375
##
376

377
# Classifcations of string
378
    # 0: neither valid ASCII nor UTF-8
379
    # 1: valid ASCII
380
    # 2: valid UTF-8
381
 byte_string_classify(s::AbstractString) = byte_string_classify(codeunits(s))
×
382

383

384
function byte_string_classify(bytes::AbstractVector{UInt8})
385
    chunk_size = 1024
×
386
    chunk_threshold =  chunk_size + (chunk_size ÷ 2)
×
387
    n = length(bytes)
×
388
    if n > chunk_threshold
×
389
        start = _find_nonascii_chunk(chunk_size,bytes,1,n)
×
390
        isnothing(start) && return 1
×
391
    else
392
        _isascii(bytes,1,n) && return 1
×
393
        start = 1
×
394
    end
395
    return _byte_string_classify_nonascii(bytes,start,n)
×
396
end
397

398
function _byte_string_classify_nonascii(bytes::AbstractVector{UInt8}, first::Int, last::Int)
×
399
    chunk_size = 256
×
400

401
    start = first
×
402
    stop = min(last,first + chunk_size - 1)
×
403
    state = _UTF8_DFA_ACCEPT
×
404
    while start <= last
×
405
        # try to process ascii chunks
406
        while state == _UTF8_DFA_ACCEPT
×
407
            _isascii(bytes,start,stop) || break
×
408
            (start = start + chunk_size) <= last || break
×
409
            stop = min(last,stop + chunk_size)
×
410
        end
×
411
        # Process non ascii chunk
412
        state = _isvalid_utf8_dfa(state,bytes,start,stop)
×
413
        state == _UTF8_DFA_INVALID && return 0
×
414

415
        start = start + chunk_size
×
416
        stop = min(last,stop + chunk_size)
×
417
    end
×
418
    return ifelse(state == _UTF8_DFA_ACCEPT,2,0)
×
419
end
420

421
isvalid(::Type{String}, bytes::AbstractVector{UInt8}) = (@inline byte_string_classify(bytes)) ≠ 0
×
422
isvalid(::Type{String}, s::AbstractString) =  (@inline byte_string_classify(s)) ≠ 0
×
423

424
@inline isvalid(s::AbstractString) = @inline isvalid(String, codeunits(s))
×
425

426
is_valid_continuation(c) = c & 0xc0 == 0x80
×
427

428
## required core functionality ##
429

430
@inline function iterate(s::String, i::Int=firstindex(s))
431
    (i % UInt) - 1 < ncodeunits(s) || return nothing
1,344,397✔
432
    b = @inbounds codeunit(s, i)
1,338,018✔
433
    u = UInt32(b) << 24
1,338,018✔
434
    between(b, 0x80, 0xf7) || return reinterpret(Char, u), i+1
2,676,034✔
435
    return @noinline iterate_continued(s, i, u)
2✔
436
end
437

438
# duck-type s so that external UTF-8 string packages like StringViews can hook in
439
function iterate_continued(s, i::Int, u::UInt32)
1✔
440
    u < 0xc0000000 && (i += 1; @goto ret)
1✔
441
    n = ncodeunits(s)
1✔
442
    # first continuation byte
443
    (i += 1) > n && @goto ret
1✔
444
    @inbounds b = codeunit(s, i)
1✔
445
    b & 0xc0 == 0x80 || @goto ret
1✔
446
    u |= UInt32(b) << 16
1✔
447
    # second continuation byte
448
    ((i += 1) > n) | (u < 0xe0000000) && @goto ret
1✔
449
    @inbounds b = codeunit(s, i)
1✔
450
    b & 0xc0 == 0x80 || @goto ret
1✔
451
    u |= UInt32(b) << 8
1✔
452
    # third continuation byte
453
    ((i += 1) > n) | (u < 0xf0000000) && @goto ret
1✔
454
    @inbounds b = codeunit(s, i)
×
455
    b & 0xc0 == 0x80 || @goto ret
×
456
    u |= UInt32(b); i += 1
×
457
@label ret
458
    return reinterpret(Char, u), i
1✔
459
end
460

461
@propagate_inbounds function getindex(s::String, i::Int)
462
    b = codeunit(s, i)
204,303✔
463
    u = UInt32(b) << 24
204,303✔
464
    between(b, 0x80, 0xf7) || return reinterpret(Char, u)
408,605✔
465
    return getindex_continued(s, i, u)
1✔
466
end
467

468
# duck-type s so that external UTF-8 string packages like StringViews can hook in
469
function getindex_continued(s, i::Int, u::UInt32)
1✔
470
    if u < 0xc0000000
1✔
471
        # called from `getindex` which checks bounds
472
        @inbounds isvalid(s, i) && @goto ret
×
473
        string_index_err(s, i)
×
474
    end
475
    n = ncodeunits(s)
1✔
476

477
    (i += 1) > n && @goto ret
1✔
478
    @inbounds b = codeunit(s, i) # cont byte 1
1✔
479
    b & 0xc0 == 0x80 || @goto ret
1✔
480
    u |= UInt32(b) << 16
1✔
481

482
    ((i += 1) > n) | (u < 0xe0000000) && @goto ret
1✔
483
    @inbounds b = codeunit(s, i) # cont byte 2
1✔
484
    b & 0xc0 == 0x80 || @goto ret
1✔
485
    u |= UInt32(b) << 8
1✔
486

487
    ((i += 1) > n) | (u < 0xf0000000) && @goto ret
1✔
488
    @inbounds b = codeunit(s, i) # cont byte 3
×
489
    b & 0xc0 == 0x80 || @goto ret
×
490
    u |= UInt32(b)
×
491
@label ret
492
    return reinterpret(Char, u)
1✔
493
end
494

495
getindex(s::String, r::AbstractUnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]
×
496

497
@inline function getindex(s::String, r::UnitRange{Int})
498
    isempty(r) && return ""
9,862✔
499
    i, j = first(r), last(r)
534✔
500
    @boundscheck begin
9,337✔
501
        checkbounds(s, r)
9,337✔
502
        @inbounds isvalid(s, i) || string_index_err(s, i)
9,337✔
503
        @inbounds isvalid(s, j) || string_index_err(s, j)
9,337✔
504
    end
505
    j = nextind(s, j) - 1
18,674✔
506
    n = j - i + 1
9,337✔
507
    ss = _string_n(n)
9,337✔
508
    GC.@preserve s ss unsafe_copyto!(pointer(ss), pointer(s, i), n)
9,337✔
509
    return ss
9,337✔
510
end
511

512
# nothrow because we know the start and end indices are valid
513
@assume_effects :nothrow length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
25✔
514

515
# effects needed because @inbounds
516
@assume_effects :consistent :effect_free @inline function length(s::String, i::Int, j::Int)
517
    @boundscheck begin
108✔
518
        0 < i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i))
108✔
519
        0 ≤ j < ncodeunits(s)+1 || throw(BoundsError(s, j))
108✔
520
    end
521
    j < i && return 0
108✔
522
    @inbounds i, k = thisind(s, i), i
216✔
523
    c = j - i + (i == k)
108✔
524
    @inbounds length_continued(s, i, j, c)
108✔
525
end
526

527
@assume_effects :terminates_locally @inline @propagate_inbounds function length_continued(s::String, i::Int, n::Int, c::Int)
528
    i < n || return c
214✔
529
    b = codeunit(s, i)
52✔
530
    while true
52✔
531
        while true
418✔
532
            (i += 1) ≤ n || return c
470✔
533
            0xc0 ≤ b ≤ 0xf7 && break
366✔
534
            b = codeunit(s, i)
366✔
535
        end
366✔
536
        l = b
×
537
        b = codeunit(s, i) # cont byte 1
×
538
        c -= (x = b & 0xc0 == 0x80)
×
539
        x & (l ≥ 0xe0) || continue
×
540

541
        (i += 1) ≤ n || return c
×
542
        b = codeunit(s, i) # cont byte 2
×
543
        c -= (x = b & 0xc0 == 0x80)
×
544
        x & (l ≥ 0xf0) || continue
×
545

546
        (i += 1) ≤ n || return c
×
547
        b = codeunit(s, i) # cont byte 3
×
548
        c -= (b & 0xc0 == 0x80)
×
549
    end
×
550
end
551

552
## overload methods for efficiency ##
553

554
isvalid(s::String, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i
293,554✔
555

556
isascii(s::String) = isascii(codeunits(s))
1,870✔
557

558
# don't assume effects for general integers since we cannot know their implementation
559
@assume_effects :foldable repeat(c::Char, r::BitInteger) = @invoke repeat(c::Char, r::Integer)
433✔
560

561
"""
562
    repeat(c::AbstractChar, r::Integer) -> String
563

564
Repeat a character `r` times. This can equivalently be accomplished by calling
565
[`c^r`](@ref :^(::Union{AbstractString, AbstractChar}, ::Integer)).
566

567
# Examples
568
```jldoctest
569
julia> repeat('A', 3)
570
"AAA"
571
```
572
"""
573
function repeat(c::AbstractChar, r::Integer)
27✔
574
    r < 0 && throw(ArgumentError("can't repeat a character $r times"))
27✔
575
    r = UInt(r)::UInt
27✔
576
    c = Char(c)::Char
×
577
    r == 0 && return ""
27✔
578
    u = bswap(reinterpret(UInt32, c))
27✔
579
    n = 4 - (leading_zeros(u | 0xff) >> 3)
27✔
580
    s = _string_n(n*r)
27✔
581
    p = pointer(s)
27✔
582
    GC.@preserve s if n == 1
27✔
583
        memset(p, u % UInt8, r)
27✔
584
    elseif n == 2
×
585
        p16 = reinterpret(Ptr{UInt16}, p)
×
586
        for i = 1:r
×
587
            unsafe_store!(p16, u % UInt16, i)
×
588
        end
×
589
    elseif n == 3
×
590
        b1 = (u >> 0) % UInt8
×
591
        b2 = (u >> 8) % UInt8
×
592
        b3 = (u >> 16) % UInt8
×
593
        for i = 0:r-1
×
594
            unsafe_store!(p, b1, 3i + 1)
×
595
            unsafe_store!(p, b2, 3i + 2)
×
596
            unsafe_store!(p, b3, 3i + 3)
×
597
        end
×
598
    elseif n == 4
×
599
        p32 = reinterpret(Ptr{UInt32}, p)
×
600
        for i = 1:r
×
601
            unsafe_store!(p32, u, i)
×
602
        end
27✔
603
    end
604
    return s
27✔
605
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc