• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #38067

11 May 2025 12:27AM UTC coverage: 25.746% (+0.07%) from 25.68%
#38067

push

local

web-flow
fix Big hashing (#58377)

0 of 2 new or added lines in 1 file covered. (0.0%)

78 existing lines in 4 files now uncovered.

12823 of 49806 relevant lines covered (25.75%)

712440.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

59.18
/base/strings/substring.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
"""
4
    SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s))
5
    SubString(s::AbstractString, r::UnitRange{<:Integer})
6

7
Like [`getindex`](@ref), but returns a view into the parent string `s`
8
within range `i:j` or `r` respectively instead of making a copy.
9

10
The [`@views`](@ref) macro converts any string slices `s[i:j]` into
11
substrings `SubString(s, i, j)` in a block of code.
12

13
# Examples
14
```jldoctest
15
julia> SubString("abc", 1, 2)
16
"ab"
17

18
julia> SubString("abc", 1:2)
19
"ab"
20

21
julia> SubString("abc", 2)
22
"bc"
23
```
24
"""
25
struct SubString{T<:AbstractString} <: AbstractString
26
    string::T
27
    offset::Int
28
    ncodeunits::Int
29

30
    function SubString{T}(s::T, i::Int, j::Int) where T<:AbstractString
25,827✔
31
        i ≤ j || return new(s, 0, 0)
27,526✔
32
        @boundscheck begin
24,128✔
33
            checkbounds(s, i:j)
24,128✔
34
            @inbounds isvalid(s, i) || string_index_err(s, i)
24,128✔
35
            @inbounds isvalid(s, j) || string_index_err(s, j)
24,128✔
36
        end
37
        return new(s, i-1, nextind(s,j)-i)
24,128✔
38
    end
UNCOV
39
    function SubString{T}(s::T, i::Int, j::Int, ::Val{:noshift}) where T<:AbstractString
×
40
        @boundscheck if !(i == j == 0)
×
41
            si, sj = i + 1, prevind(s, j + i + 1)
×
42
            @inbounds isvalid(s, si) || string_index_err(s, si)
×
43
            @inbounds isvalid(s, sj) || string_index_err(s, sj)
×
44
        end
45
        new(s, i, j)
×
46
    end
47
end
48

49
@propagate_inbounds SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j)
72,489✔
50
@propagate_inbounds SubString(s::T, i::Int, j::Int, v::Val{:noshift}) where {T<:AbstractString} = SubString{T}(s, i, j, v)
×
51
@propagate_inbounds SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s)) = SubString(s, Int(i), Int(j))
8,409✔
52
@propagate_inbounds SubString(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, first(r), last(r))
41,364✔
53

54
@propagate_inbounds function SubString(s::SubString, i::Int, j::Int)
55
    @boundscheck i ≤ j && checkbounds(s, i:j)
6,071✔
56
    SubString(s.string, s.offset+i, s.offset+j)
6,071✔
57
end
58

59
SubString(s::AbstractString) = SubString(s, 1, lastindex(s)::Int)
6✔
60
SubString{T}(s::T) where {T<:AbstractString} = SubString{T}(s, 1, lastindex(s)::Int)
×
61

62
@propagate_inbounds view(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, r)
×
63
@propagate_inbounds maybeview(s::AbstractString, r::AbstractUnitRange{<:Integer}) = view(s, r)
×
64
@propagate_inbounds maybeview(s::AbstractString, args...) = getindex(s, args...)
×
65

66
convert(::Type{SubString{S}}, s::AbstractString) where {S<:AbstractString} =
×
67
    SubString(convert(S, s))::SubString{S}
68
convert(::Type{T}, s::T) where {T<:SubString} = s
×
69

70
# Regex match allows only Union{String, SubString{String}} so define conversion to this type
71
convert(::Type{Union{String, SubString{String}}}, s::String) = s
×
72
convert(::Type{Union{String, SubString{String}}}, s::SubString{String}) = s
×
73
convert(::Type{Union{String, SubString{String}}}, s::AbstractString) = convert(String, s)::String
×
74

75
function String(s::SubString{String})
76
    parent = s.string
237,241✔
77
    copy = GC.@preserve parent unsafe_string(pointer(parent, s.offset+1), s.ncodeunits)
237,241✔
78
    return copy
237,236✔
79
end
80

81
ncodeunits(s::SubString) = s.ncodeunits
1,732,040✔
82
codeunit(s::SubString) = codeunit(s.string)::CodeunitType
×
83
length(s::SubString) = length(s.string, s.offset+1, s.offset+s.ncodeunits)
×
84

85
function codeunit(s::SubString, i::Integer)
86
    @boundscheck checkbounds(s, i)
49,201✔
87
    @inbounds return codeunit(s.string, s.offset + i)
49,201✔
88
end
89

90
function iterate(s::SubString, i::Integer=firstindex(s))
91
    i == ncodeunits(s)+1 && return nothing
25,865✔
92
    @boundscheck checkbounds(s, i)
19,588✔
93
    y = iterate(s.string, s.offset + i)
39,176✔
94
    y === nothing && return nothing
19,588✔
95
    c, i = y::Tuple{AbstractChar,Int}
19,588✔
96
    return c, i - s.offset
19,588✔
97
end
98

99
function getindex(s::SubString, i::Integer)
100
    @boundscheck checkbounds(s, i)
59,198✔
101
    @inbounds return getindex(s.string, s.offset + i)
118,382✔
102
end
103

104
isascii(ss::SubString{String}) = isascii(codeunits(ss))
×
105

106
function isvalid(s::SubString, i::Integer)
107
    ib = true
×
108
    @boundscheck ib = checkbounds(Bool, s, i)
4,387✔
109
    @inbounds return ib && isvalid(s.string, s.offset + i)::Bool
8,774✔
110
end
111

112
thisind(s::SubString{String}, i::Int) = _thisind_str(s, i)
11,538✔
113
nextind(s::SubString{String}, i::Int) = _nextind_str(s, i)
43,289✔
114

115
parent(s::SubString) = s.string
×
116
parentindices(s::SubString) = (s.offset + 1 : thisind(s.string, s.offset + s.ncodeunits),)
×
117

118
function ==(a::Union{String, SubString{String}}, b::Union{String, SubString{String}})
119
    sizeof(a) == sizeof(b) && _memcmp(a, b) == 0
649,658✔
120
end
121

122
function cmp(a::SubString{String}, b::SubString{String})
×
123
    c = _memcmp(a, b)
×
124
    return c < 0 ? -1 : c > 0 ? +1 : cmp(sizeof(a), sizeof(b))
×
125
end
126

127
# don't make unnecessary copies when passing substrings to C functions
128
cconvert(::Type{Ptr{UInt8}}, s::SubString{String}) = s
×
129
cconvert(::Type{Ptr{Int8}}, s::SubString{String}) = s
×
130

131
function unsafe_convert(::Type{Ptr{R}}, s::SubString{String}) where R<:Union{Int8, UInt8}
132
    convert(Ptr{R}, pointer(s.string)) + s.offset
312,458✔
133
end
134

135
pointer(x::SubString{String}) = pointer(x.string) + x.offset
159,629✔
136
pointer(x::SubString{String}, i::Integer) = pointer(x.string) + x.offset + (i-1)
×
137

138
hash(data::SubString{String}, h::UInt) =
133,487✔
139
    GC.@preserve data hash_bytes(pointer(data), sizeof(data), UInt64(h), HASH_SECRET) % UInt
140

UNCOV
141
_isannotated(::SubString{T}) where {T} = _isannotated(T)
×
142

143
"""
144
    reverse(s::AbstractString)::AbstractString
145

146
Reverses a string. Technically, this function reverses the codepoints in a string and its
147
main utility is for reversed-order string processing, especially for reversed
148
regular-expression searches. See also [`reverseind`](@ref) to convert indices in `s` to
149
indices in `reverse(s)` and vice-versa, and `graphemes` from module `Unicode` to
150
operate on user-visible "characters" (graphemes) rather than codepoints.
151
See also [`Iterators.reverse`](@ref) for
152
reverse-order iteration without making a copy. Custom string types must implement the
153
`reverse` function themselves and should typically return a string with the same type
154
and encoding. If they return a string with a different encoding, they must also override
155
`reverseind` for that string type to satisfy `s[reverseind(s,i)] == reverse(s)[i]`.
156

157
# Examples
158
```jldoctest
159
julia> reverse("JuliaLang")
160
"gnaLailuJ"
161
```
162

163
!!! note
164
    The examples below may be rendered differently on different systems.
165
    The comments indicate how they're supposed to be rendered
166

167
Combining characters can lead to surprising results:
168

169
```jldoctest
170
julia> reverse("ax̂e") # hat is above x in the input, above e in the output
171
"êxa"
172

173
julia> using Unicode
174

175
julia> join(reverse(collect(graphemes("ax̂e")))) # reverses graphemes; hat is above x in both in- and output
176
"ex̂a"
177
```
178
"""
UNCOV
179
function reverse(s::Union{String,SubString{String}})::String
×
180
    # Read characters forwards from `s` and write backwards to `out`
181
    out = _string_n(sizeof(s))
×
UNCOV
182
    offs = sizeof(s) + 1
×
183
    for c in s
×
184
        offs -= ncodeunits(c)
×
185
        __unsafe_string!(out, c, offs)
×
186
    end
×
187
    return out
×
188
end
189

190
string(a::String)            = String(a)
7✔
191
string(a::SubString{String}) = String(a)
3✔
192

193
function Symbol(s::SubString{String})
UNCOV
194
    return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int), s, sizeof(s))
×
195
end
196

197
@inline function __unsafe_string!(out, c::Char, offs::Integer) # out is a (new) String (or StringVector)
198
    x = bswap(reinterpret(UInt32, c))
22,318✔
199
    n = ncodeunits(c)
22,318✔
200
    GC.@preserve out begin
22,318✔
201
        unsafe_store!(pointer(out, offs), x % UInt8)
22,318✔
202
        n == 1 && return n
22,318✔
UNCOV
203
        x >>= 8
×
UNCOV
204
        unsafe_store!(pointer(out, offs+1), x % UInt8)
×
205
        n == 2 && return n
×
206
        x >>= 8
×
207
        unsafe_store!(pointer(out, offs+2), x % UInt8)
×
208
        n == 3 && return n
×
209
        x >>= 8
×
210
        unsafe_store!(pointer(out, offs+3), x % UInt8)
×
211
    end
212
    return n
×
213
end
214

215
@assume_effects :nothrow @inline function __unsafe_string!(out, s::String, offs::Integer)
216
    n = sizeof(s)
87,123✔
217
    GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
87,123✔
218
    return n
87,123✔
219
end
220

221
@inline function __unsafe_string!(out, s::SubString{String}, offs::Integer)
222
    n = sizeof(s)
434✔
223
    GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
434✔
224
    return n
434✔
225
end
226

227
@assume_effects :nothrow @inline function __unsafe_string!(out, s::Symbol, offs::Integer)
228
    n = sizeof(s)
13✔
229
    GC.@preserve s out unsafe_copyto!(pointer(out, offs), unsafe_convert(Ptr{UInt8},s), n)
13✔
230
    return n
13✔
231
end
232

233
# nothrow needed here because for v in a can't prove the indexing is inbounds.
234
@assume_effects :foldable :nothrow string(a::Union{Char, String, Symbol}...) = _string(a...)
286,690✔
235

236
string(a::Union{Char, String, SubString{String}, Symbol}...) = _string(a...)
265,922✔
237

238
function _string(a::Union{Char, String, SubString{String}, Symbol}...)
32,666✔
239
    n = 0
32,667✔
240
    for v in a
32,667✔
241
        # 4 types is too many for automatic Union-splitting, so we split manually
242
        # and allow one specializable call site per concrete type
243
        if v isa Char
87,594✔
244
            n += ncodeunits(v)
24✔
245
        elseif v isa String
87,570✔
246
            n += sizeof(v)
87,123✔
247
        elseif v isa SubString{String}
447✔
248
            n += sizeof(v)
434✔
249
        else
250
            n += sizeof(v::Symbol)
13✔
251
        end
252
    end
120,255✔
253
    out = _string_n(n)
32,667✔
254
    offs = 1
32,667✔
255
    for v in a
32,667✔
256
        if v isa Char
87,594✔
257
            offs += __unsafe_string!(out, v, offs)
24✔
258
        elseif v isa String || v isa SubString{String}
88,011✔
259
            offs += __unsafe_string!(out, v, offs)
87,557✔
260
        else
261
            offs += __unsafe_string!(out, v::Symbol, offs)
13✔
262
        end
263
    end
120,255✔
264
    return out
32,667✔
265
end
266

267
# don't assume effects for general integers since we cannot know their implementation
268
# not nothrow because r<0 throws
269
@assume_effects :foldable repeat(s::String, r::BitInteger) = @invoke repeat(s::String, r::Integer)
103✔
270

271
function repeat(s::Union{String, SubString{String}}, r::Integer)
43✔
272
    r < 0 && throw(ArgumentError("can't repeat a string $r times"))
43✔
273
    r = UInt(r)::UInt
43✔
274
    r == 0 && return ""
43✔
275
    r == 1 && return String(s)
43✔
276
    n = sizeof(s)
43✔
277
    out = _string_n(n*r)
43✔
278
    if n == 1 # common case: repeating a single-byte string
43✔
279
        @inbounds b = codeunit(s, 1)
43✔
280
        memset(unsafe_convert(Ptr{UInt8}, out), b, r)
43✔
281
    else
UNCOV
282
        for i = 0:r-1
×
UNCOV
283
            GC.@preserve s out unsafe_copyto!(pointer(out, i*n+1), pointer(s), n)
×
284
        end
×
285
    end
286
    return out
43✔
287
end
288

UNCOV
289
function filter(f, s::Union{String, SubString{String}})
×
UNCOV
290
    out = StringVector(sizeof(s))
×
291
    offset = 1
×
292
    for c in s
×
293
        if f(c)
×
294
            offset += __unsafe_string!(out, c, offset)
×
295
        end
296
    end
×
UNCOV
297
    resize!(out, offset-1)
×
298
    sizehint!(out, offset-1)
×
299
    return String(out)
×
300
end
301

302
getindex(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, r)
12✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc