• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37477

pending completion
#37477

push

local

web-flow
Allow external lattice elements to properly union split (#49030)

Currently `MustAlias` is the only lattice element that is allowed
to widen to union types. However, there are others in external
packages. Expand the support we have for this in order to allow
union splitting of lattice elements.

Co-authored-by: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>

26 of 26 new or added lines in 5 files covered. (100.0%)

71476 of 82705 relevant lines covered (86.42%)

34756248.54 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.0
/base/strings/substring.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
"""
4
    SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s))
5
    SubString(s::AbstractString, r::UnitRange{<:Integer})
6

7
Like [`getindex`](@ref), but returns a view into the parent string `s`
8
within range `i:j` or `r` respectively instead of making a copy.
9

10
The [`@views`](@ref) macro converts any string slices `s[i:j]` into
11
substrings `SubString(s, i, j)` in a block of code.
12

13
# Examples
14
```jldoctest
15
julia> SubString("abc", 1, 2)
16
"ab"
17

18
julia> SubString("abc", 1:2)
19
"ab"
20

21
julia> SubString("abc", 2)
22
"bc"
23
```
24
"""
25
struct SubString{T<:AbstractString} <: AbstractString
26
    string::T
27
    offset::Int
28
    ncodeunits::Int
29

30
    function SubString{T}(s::T, i::Int, j::Int) where T<:AbstractString
6,363,904✔
31
        i ≤ j || return new(s, 0, 0)
7,115,105✔
32
        @boundscheck begin
5,612,692✔
33
            checkbounds(s, i:j)
5,612,709✔
34
            @inbounds isvalid(s, i) || string_index_err(s, i)
5,612,673✔
35
            @inbounds isvalid(s, j) || string_index_err(s, j)
5,612,679✔
36
        end
37
        return new(s, i-1, nextind(s,j)-i)
5,612,659✔
38
    end
39
end
40

41
@propagate_inbounds SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j)
6,363,950✔
42
@propagate_inbounds SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s)) = SubString(s, Int(i), Int(j))
3,176,723✔
43
@propagate_inbounds SubString(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, first(r), last(r))
73,850✔
44

45
@propagate_inbounds function SubString(s::SubString, i::Int, j::Int)
384✔
46
    @boundscheck i ≤ j && checkbounds(s, i:j)
141,119✔
47
    SubString(s.string, s.offset+i, s.offset+j)
141,108✔
48
end
49

50
SubString(s::AbstractString) = SubString(s, 1, lastindex(s)::Int)
646✔
51
SubString{T}(s::T) where {T<:AbstractString} = SubString{T}(s, 1, lastindex(s)::Int)
×
52

53
@propagate_inbounds view(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, r)
445✔
54
@propagate_inbounds maybeview(s::AbstractString, r::AbstractUnitRange{<:Integer}) = view(s, r)
3✔
55
@propagate_inbounds maybeview(s::AbstractString, args...) = getindex(s, args...)
6✔
56

57
convert(::Type{SubString{S}}, s::AbstractString) where {S<:AbstractString} =
60✔
58
    SubString(convert(S, s))::SubString{S}
59
convert(::Type{T}, s::T) where {T<:SubString} = s
37✔
60

61
# Regex match allows only Union{String, SubString{String}} so define conversion to this type
62
convert(::Type{Union{String, SubString{String}}}, s::String) = s
×
63
convert(::Type{Union{String, SubString{String}}}, s::SubString{String}) = s
×
64
convert(::Type{Union{String, SubString{String}}}, s::AbstractString) = convert(String, s)::String
×
65

66
function String(s::SubString{String})
114,295✔
67
    parent = s.string
578,480✔
68
    copy = GC.@preserve parent unsafe_string(pointer(parent, s.offset+1), s.ncodeunits)
578,480✔
69
    return copy
578,480✔
70
end
71

72
ncodeunits(s::SubString) = s.ncodeunits
24,248,748✔
73
codeunit(s::SubString) = codeunit(s.string)::CodeunitType
×
74
length(s::SubString) = length(s.string, s.offset+1, s.offset+s.ncodeunits)
163,160✔
75

76
function codeunit(s::SubString, i::Integer)
3,025✔
77
    @boundscheck checkbounds(s, i)
1,150,361✔
78
    @inbounds return codeunit(s.string, s.offset + i)
1,150,361✔
79
end
80

81
function iterate(s::SubString, i::Integer=firstindex(s))
526,325✔
82
    i == ncodeunits(s)+1 && return nothing
6,292,526✔
83
    @boundscheck checkbounds(s, i)
3,151,015✔
84
    y = iterate(s.string, s.offset + i)
6,007,799✔
85
    y === nothing && return nothing
3,151,011✔
86
    c, i = y::Tuple{AbstractChar,Int}
3,150,807✔
87
    return c, i - s.offset
3,151,011✔
88
end
89

90
function getindex(s::SubString, i::Integer)
482,729✔
91
    @boundscheck checkbounds(s, i)
482,731✔
92
    @inbounds return getindex(s.string, s.offset + i)
482,727✔
93
end
94

95
isascii(ss::SubString{String}) = isascii(codeunits(ss))
×
96

97
function isvalid(s::SubString, i::Integer)
70,693✔
98
    ib = true
233✔
99
    @boundscheck ib = checkbounds(Bool, s, i)
24,920,293✔
100
    @inbounds return ib && isvalid(s.string, s.offset + i)::Bool
24,922,276✔
101
end
102

103
byte_string_classify(s::SubString{String}) =
106✔
104
    ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s))
105

106
isvalid(::Type{String}, s::SubString{String}) = byte_string_classify(s) ≠ 0
106✔
107
isvalid(s::SubString{String}) = isvalid(String, s)
106✔
108

109
thisind(s::SubString{String}, i::Int) = _thisind_str(s, i)
593,365✔
110
nextind(s::SubString{String}, i::Int) = _nextind_str(s, i)
512,943✔
111

112
function ==(a::Union{String, SubString{String}}, b::Union{String, SubString{String}})
296,364✔
113
    s = sizeof(a)
7,535,974✔
114
    s == sizeof(b) && 0 == _memcmp(a, b, s)
14,747,148✔
115
end
116

117
function cmp(a::SubString{String}, b::SubString{String})
×
118
    na = sizeof(a)
×
119
    nb = sizeof(b)
×
120
    c = _memcmp(a, b, min(na, nb))
×
121
    return c < 0 ? -1 : c > 0 ? +1 : cmp(na, nb)
×
122
end
123

124
# don't make unnecessary copies when passing substrings to C functions
125
cconvert(::Type{Ptr{UInt8}}, s::SubString{String}) = s
×
126
cconvert(::Type{Ptr{Int8}}, s::SubString{String}) = s
2✔
127

128
function unsafe_convert(::Type{Ptr{R}}, s::SubString{String}) where R<:Union{Int8, UInt8}
187✔
129
    convert(Ptr{R}, pointer(s.string)) + s.offset
390,128✔
130
end
131

132
pointer(x::SubString{String}) = pointer(x.string) + x.offset
3,051,049✔
133
pointer(x::SubString{String}, i::Integer) = pointer(x.string) + x.offset + (i-1)
64✔
134

135
function hash(s::SubString{String}, h::UInt)
×
136
    h += memhash_seed
×
137
    ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), s, sizeof(s), h % UInt32) + h
32,711✔
138
end
139

140
"""
141
    reverse(s::AbstractString) -> AbstractString
142

143
Reverses a string. Technically, this function reverses the codepoints in a string and its
144
main utility is for reversed-order string processing, especially for reversed
145
regular-expression searches. See also [`reverseind`](@ref) to convert indices in `s` to
146
indices in `reverse(s)` and vice-versa, and `graphemes` from module `Unicode` to
147
operate on user-visible "characters" (graphemes) rather than codepoints.
148
See also [`Iterators.reverse`](@ref) for
149
reverse-order iteration without making a copy. Custom string types must implement the
150
`reverse` function themselves and should typically return a string with the same type
151
and encoding. If they return a string with a different encoding, they must also override
152
`reverseind` for that string type to satisfy `s[reverseind(s,i)] == reverse(s)[i]`.
153

154
# Examples
155
```jldoctest
156
julia> reverse("JuliaLang")
157
"gnaLailuJ"
158
```
159

160
!!! note
161
    The examples below may be rendered differently on different systems.
162
    The comments indicate how they're supposed to be rendered
163

164
Combining characters can lead to surprising results:
165

166
```jldoctest
167
julia> reverse("ax̂e") # hat is above x in the input, above e in the output
168
"êxa"
169

170
julia> using Unicode
171

172
julia> join(reverse(collect(graphemes("ax̂e")))) # reverses graphemes; hat is above x in both in- and output
173
"ex̂a"
174
```
175
"""
176
function reverse(s::Union{String,SubString{String}})::String
1,054✔
177
    # Read characters forwards from `s` and write backwards to `out`
178
    out = _string_n(sizeof(s))
1,054✔
179
    offs = sizeof(s) + 1
1,054✔
180
    for c in s
1,984✔
181
        offs -= ncodeunits(c)
42,481✔
182
        __unsafe_string!(out, c, offs)
40,015✔
183
    end
74,841✔
184
    return out
1,054✔
185
end
186

187
string(a::String)            = String(a)
47,044✔
188
string(a::SubString{String}) = String(a)
95✔
189

190
function Symbol(s::SubString{String})
116✔
191
    return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int), s, sizeof(s))
410✔
192
end
193

194
@inline function __unsafe_string!(out, c::Char, offs::Integer) # out is a (new) String (or StringVector)
10,203✔
195
    x = bswap(reinterpret(UInt32, c))
1,755,881✔
196
    n = ncodeunits(c)
1,871,180✔
197
    GC.@preserve out begin
1,755,904✔
198
        unsafe_store!(pointer(out, offs), x % UInt8)
1,755,904✔
199
        n == 1 && return n
1,755,881✔
200
        x >>= 8
58,159✔
201
        unsafe_store!(pointer(out, offs+1), x % UInt8)
58,159✔
202
        n == 2 && return n
58,159✔
203
        x >>= 8
54,504✔
204
        unsafe_store!(pointer(out, offs+2), x % UInt8)
54,504✔
205
        n == 3 && return n
54,504✔
206
        x >>= 8
2,636✔
207
        unsafe_store!(pointer(out, offs+3), x % UInt8)
2,636✔
208
    end
209
    return n
2,636✔
210
end
211

212
@inline function __unsafe_string!(out, s::Union{String, SubString{String}}, offs::Integer)
9,885✔
213
    n = sizeof(s)
13,511,935✔
214
    GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
13,511,935✔
215
    return n
13,511,935✔
216
end
217

218
@inline function __unsafe_string!(out, s::Symbol, offs::Integer)
×
219
    n = sizeof(s)
15,637✔
220
    GC.@preserve s out unsafe_copyto!(pointer(out, offs), unsafe_convert(Ptr{UInt8},s), n)
15,637✔
221
    return n
15,637✔
222
end
223

224
function string(a::Union{Char, String, SubString{String}, Symbol}...)
6,304,452✔
225
    n = 0
5,736✔
226
    for v in a
6,304,475✔
227
        # 4 types is too many for automatic Union-splitting, so we split manually
228
        # and allow one specializable call site per concrete type
229
        if v isa Char
565,170✔
230
            n += ncodeunits(v)
318,753✔
231
        elseif v isa String
1,116,121✔
232
            n += sizeof(v)
13,065,163✔
233
        elseif v isa SubString{String}
166,353✔
234
            n += sizeof(v)
446,772✔
235
        else
236
            n += sizeof(v::Symbol)
15,637✔
237
        end
238
    end
19,693,137✔
239
    out = _string_n(n)
6,304,475✔
240
    offs = 1
5,736✔
241
    for v in a
6,304,475✔
242
        if v isa Char
565,170✔
243
            offs += __unsafe_string!(out, v, offs)
266,827✔
244
        elseif v isa String || v isa SubString{String}
1,280,393✔
245
            offs += __unsafe_string!(out, v, offs)
13,806,917✔
246
        else
247
            offs += __unsafe_string!(out, v::Symbol, offs)
15,637✔
248
        end
249
    end
19,693,137✔
250
    return out
6,304,475✔
251
end
252

253
function repeat(s::Union{String, SubString{String}}, r::Integer)
966,945✔
254
    r < 0 && throw(ArgumentError("can't repeat a string $r times"))
966,945✔
255
    r == 0 && return ""
966,937✔
256
    r == 1 && return String(s)
857,407✔
257
    n = sizeof(s)
720,494✔
258
    out = _string_n(n*r)
720,494✔
259
    if n == 1 # common case: repeating a single-byte string
720,494✔
260
        @inbounds b = codeunit(s, 1)
679,055✔
261
        ccall(:memset, Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), out, b, r)
679,055✔
262
    else
263
        for i = 0:r-1
82,878✔
264
            GC.@preserve s out unsafe_copyto!(pointer(out, i*n+1), pointer(s), n)
2,047,088✔
265
        end
2,047,088✔
266
    end
267
    return out
720,494✔
268
end
269

270
function filter(f, s::Union{String, SubString{String}})
42✔
271
    out = StringVector(sizeof(s))
42✔
272
    offset = 1
3✔
273
    for c in s
84✔
274
        if f(c)
844✔
275
            offset += __unsafe_string!(out, c, offset)
662✔
276
        end
277
    end
1,637✔
278
    resize!(out, offset-1)
84✔
279
    sizehint!(out, offset-1)
42✔
280
    return String(out)
42✔
281
end
282

283
getindex(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, r)
2,414✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc