• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37728

26 Mar 2024 03:46AM UTC coverage: 80.612% (-0.8%) from 81.423%
#37728

push

local

web-flow
Update zlib to 1.3.1 (#53841)

Released January 22, 2024

69920 of 86737 relevant lines covered (80.61%)

14456248.65 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

61.76
/base/pcre.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
## low-level pcre2 interface ##
4

5
module PCRE
6

7
import ..RefValue
8

9
# include($BUILDROOT/base/pcre_h.jl)
10
include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "pcre_h.jl"))
11

12
const PCRE_LIB = "libpcre2-8"
13

14
function create_match_context()
15
    JIT_STACK_START_SIZE = 32768
×
16
    JIT_STACK_MAX_SIZE = 1048576
×
17
    jit_stack = ccall((:pcre2_jit_stack_create_8, PCRE_LIB), Ptr{Cvoid},
234✔
18
                      (Csize_t, Csize_t, Ptr{Cvoid}),
19
                      JIT_STACK_START_SIZE, JIT_STACK_MAX_SIZE, C_NULL)
20
    ctx = ccall((:pcre2_match_context_create_8, PCRE_LIB),
234✔
21
                Ptr{Cvoid}, (Ptr{Cvoid},), C_NULL)
22
    ccall((:pcre2_jit_stack_assign_8, PCRE_LIB), Cvoid,
234✔
23
          (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}), ctx, C_NULL, jit_stack)
24
    return ctx
×
25
end
26

27
THREAD_MATCH_CONTEXTS::Vector{Ptr{Cvoid}} = [C_NULL]
28

29
PCRE_COMPILE_LOCK = nothing
30

31
_tid() = Int(ccall(:jl_threadid, Int16, ())) + 1
10,834,663✔
32
_mth() = Int(Core.Intrinsics.atomic_pointerref(cglobal(:jl_n_threads, Cint), :acquire))
×
33

34
function get_local_match_context()
10,834,663✔
35
    tid = _tid()
10,834,663✔
36
    ctxs = THREAD_MATCH_CONTEXTS
10,834,663✔
37
    if length(ctxs) < tid
10,834,663✔
38
        # slow path to allocate it
39
        l = PCRE_COMPILE_LOCK::Threads.SpinLock
×
40
        lock(l)
×
41
        try
×
42
            ctxs = THREAD_MATCH_CONTEXTS
×
43
            if length(ctxs) < tid
×
44
                global THREAD_MATCH_CONTEXTS = ctxs = copyto!(fill(C_NULL, length(ctxs) + _mth()), ctxs)
×
45
            end
46
        finally
47
            unlock(l)
×
48
        end
49
    end
50
    ctx = @inbounds ctxs[tid]
10,834,663✔
51
    if ctx == C_NULL
10,834,663✔
52
        # slow path to allocate it
53
        ctx = create_match_context()
234✔
54
        THREAD_MATCH_CONTEXTS[tid] = ctx
234✔
55
    end
56
    return ctx
10,834,663✔
57
end
58

59
# supported options for different use cases
60

61
# arguments to pcre2_compile
62
const COMPILE_MASK      =
63
      ALT_BSUX          |
64
      ALT_CIRCUMFLEX    |
65
      ALT_VERBNAMES     |
66
      ANCHORED          |
67
      # AUTO_CALLOUT    |
68
      CASELESS          |
69
      DOLLAR_ENDONLY    |
70
      DOTALL            |
71
      # DUPNAMES        |
72
      ENDANCHORED       |
73
      EXTENDED          |
74
      EXTENDED_MORE     |
75
      FIRSTLINE         |
76
      LITERAL           |
77
      MATCH_INVALID_UTF |
78
      MATCH_UNSET_BACKREF |
79
      MULTILINE         |
80
      NEVER_BACKSLASH_C |
81
      NEVER_UCP         |
82
      NEVER_UTF         |
83
      NO_AUTO_CAPTURE   |
84
      NO_AUTO_POSSESS   |
85
      NO_DOTSTAR_ANCHOR |
86
      NO_START_OPTIMIZE |
87
      NO_UTF_CHECK      |
88
      UCP               |
89
      UNGREEDY          |
90
      USE_OFFSET_LIMIT  |
91
      UTF
92

93
# arguments to pcre2_set_newline
94
const COMPILE_NEWLINE_MASK = (
95
      NEWLINE_CR,
96
      NEWLINE_LF,
97
      NEWLINE_CRLF,
98
      NEWLINE_ANY,
99
      NEWLINE_ANYCRLF,
100
      NEWLINE_NUL)
101

102
# arguments to pcre2_set_compile_extra_options
103
const COMPILE_EXTRA_MASK            =
104
      EXTRA_ALLOW_SURROGATE_ESCAPES |
105
      EXTRA_ALT_BSUX                |
106
      EXTRA_BAD_ESCAPE_IS_LITERAL   |
107
      EXTRA_ESCAPED_CR_IS_LF        |
108
      EXTRA_MATCH_LINE              |
109
      EXTRA_MATCH_WORD
110

111
# arguments to match
112
const EXECUTE_MASK      =
113
      # ANCHORED        |
114
      # COPY_MATCHED_SUBJECT |
115
      # ENDANCHORED     |
116
      NOTBOL            |
117
      NOTEMPTY          |
118
      NOTEMPTY_ATSTART  |
119
      NOTEOL            |
120
      # NO_JIT          |
121
      NO_START_OPTIMIZE |
122
      NO_UTF_CHECK      |
123
      PARTIAL_HARD      |
124
      PARTIAL_SOFT
125

126

127
const UNSET = ~Csize_t(0)  # Indicates that an output vector element is unset
128

129
function info(regex::Ptr{Cvoid}, what::Integer, ::Type{T}) where T
×
130
    buf = RefValue{T}()
×
131
    ret = ccall((:pcre2_pattern_info_8, PCRE_LIB), Cint,
×
132
                (Ptr{Cvoid}, UInt32, Ptr{Cvoid}),
133
                regex, what, buf)
134
    if ret != 0
×
135
        error(ret == ERROR_NULL      ? "PCRE error: NULL regex object" :
×
136
              ret == ERROR_BADMAGIC  ? "PCRE error: invalid regex object" :
137
              ret == ERROR_BADOPTION ? "PCRE error: invalid option flags" :
138
                                       "PCRE error: unknown error ($ret)")
139
    end
140
    return buf[]
×
141
end
142

143
function ovec_length(match_data)
144
    n = ccall((:pcre2_get_ovector_count_8, PCRE_LIB), UInt32,
3,859,457✔
145
              (Ptr{Cvoid},), match_data)
146
    return 2Int(n)
3,859,457✔
147
end
148

149
function ovec_ptr(match_data)
150
    ptr = ccall((:pcre2_get_ovector_pointer_8, PCRE_LIB), Ptr{Csize_t},
7,716,484✔
151
                (Ptr{Cvoid},), match_data)
152
    return ptr
×
153
end
154

155
function compile(pattern::AbstractString, options::Integer)
1,945✔
156
    if !(pattern isa Union{String,SubString{String}})
×
157
        pattern = String(pattern)
×
158
    end
159
    errno = RefValue{Cint}(0)
1,945✔
160
    erroff = RefValue{Csize_t}(0)
1,945✔
161
    re_ptr = ccall((:pcre2_compile_8, PCRE_LIB), Ptr{Cvoid},
1,945✔
162
                   (Ptr{UInt8}, Csize_t, UInt32, Ref{Cint}, Ref{Csize_t}, Ptr{Cvoid}),
163
                   pattern, ncodeunits(pattern), options, errno, erroff, C_NULL)
164
    if re_ptr == C_NULL
1,945✔
165
        error("PCRE compilation error: $(err_message(errno[])) at offset $(erroff[])")
×
166
    end
167
    return re_ptr
1,945✔
168
end
169

170
function jit_compile(regex::Ptr{Cvoid})
171
    errno = ccall((:pcre2_jit_compile_8, PCRE_LIB), Cint,
1,945✔
172
                  (Ptr{Cvoid}, UInt32), regex, JIT_COMPLETE)
173
    errno == 0 && return true
1,945✔
174
    errno == ERROR_JIT_BADOPTION && return false
×
175
    error("PCRE JIT error: $(err_message(errno))")
×
176
end
177

178
free_match_data(match_data) =
10,833,913✔
179
    ccall((:pcre2_match_data_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), match_data)
180

181
free_re(re) =
409✔
182
    ccall((:pcre2_code_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), re)
183

184
free_jit_stack(stack) =
×
185
    ccall((:pcre2_jit_stack_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), stack)
186

187
free_match_context(context) =
×
188
    ccall((:pcre2_match_context_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), context)
189

190
function err_message(errno::Integer)
1✔
191
    buffer = Vector{UInt8}(undef, 1024)
1✔
192
    ret = ccall((:pcre2_get_error_message_8, PCRE_LIB), Cint,
1✔
193
                (Cint, Ptr{UInt8}, Csize_t), errno, buffer, length(buffer))
194
    ret == ERROR_BADDATA && error("PCRE error: invalid errno ($errno)")
1✔
195
    # TODO: seems like there should be a better way to get this string
196
    return GC.@preserve buffer unsafe_string(pointer(buffer))
1✔
197
end
198

199
exec(re, subject::Union{String,SubString{String}}, offset, options, match_data) =
10,834,663✔
200
    _exec(re, subject, offset, options, match_data)
201
exec(re, subject, offset, options, match_data) =
×
202
    _exec(re, String(subject), offset, options, match_data)
203

204
function _exec(re, subject, offset, options, match_data)
2✔
205
    rc = ccall((:pcre2_match_8, PCRE_LIB), Cint,
10,834,665✔
206
               (Ptr{Cvoid}, Ptr{UInt8}, Csize_t, Csize_t, UInt32, Ptr{Cvoid}, Ptr{Cvoid}),
207
               re, subject, ncodeunits(subject), offset, options, match_data, get_local_match_context())
208
    # rc == -1 means no match, -2 means partial match.
209
    rc < -2 && error("PCRE.exec error: $(err_message(rc))")
10,834,663✔
210
    return rc >= 0
10,834,663✔
211
end
212

213
function exec_r(re, subject, offset, options)
1,470,838✔
214
    match_data = create_match_data(re)
1,470,838✔
215
    ans = exec(re, subject, offset, options, match_data)
1,470,838✔
216
    free_match_data(match_data)
1,470,838✔
217
    return ans
1,470,838✔
218
end
219

220
function exec_r_data(re, subject, offset, options)
9,320,417✔
221
    match_data = create_match_data(re)
9,320,419✔
222
    ans = exec(re, subject, offset, options, match_data)
9,320,419✔
223
    return ans, match_data
9,320,419✔
224
end
225

226
function create_match_data(re)
227
    p = ccall((:pcre2_match_data_create_from_pattern_8, PCRE_LIB),
10,833,914✔
228
              Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}), re, C_NULL)
229
    p == C_NULL && error("PCRE error: could not allocate memory")
10,833,914✔
230
    return p
10,833,914✔
231
end
232

233
function substring_number_from_name(re, name)
2,261✔
234
    n = ccall((:pcre2_substring_number_from_name_8, PCRE_LIB), Cint,
2,437✔
235
               (Ptr{Cvoid}, Cstring), re, name)
236
    return Int(n)
2,437✔
237
end
238

239
function substring_length_bynumber(match_data, number)
23✔
240
    s = RefValue{Csize_t}()
23✔
241
    rc = ccall((:pcre2_substring_length_bynumber_8, PCRE_LIB), Cint,
23✔
242
               (Ptr{Cvoid}, Cint, Ref{Csize_t}), match_data, number, s)
243
    if rc < 0
23✔
244
        rc == ERROR_UNSET && return 0
4✔
245
        error("PCRE error: $(err_message(rc))")
1✔
246
    end
247
    return Int(s[])
19✔
248
end
249

250
function substring_copy_bynumber(match_data, number, buf, buf_size)
19✔
251
    s = RefValue{Csize_t}(buf_size)
19✔
252
    rc = ccall((:pcre2_substring_copy_bynumber_8, PCRE_LIB), Cint,
19✔
253
               (Ptr{Cvoid}, UInt32, Ptr{UInt8}, Ref{Csize_t}),
254
               match_data, number, buf, s)
255
    rc < 0 && error("PCRE error: $(err_message(rc))")
19✔
256
    return Int(s[])
19✔
257
end
258

259
function capture_names(re)
×
260
    name_count = info(re, INFO_NAMECOUNT, UInt32)
×
261
    name_entry_size = info(re, INFO_NAMEENTRYSIZE, UInt32)
×
262
    nametable_ptr = info(re, INFO_NAMETABLE, Ptr{UInt8})
×
263
    names = Dict{Int,String}()
×
264
    for i = 1:name_count
×
265
        offset = (i-1)*name_entry_size + 1
×
266
        # The capture group index corresponding to name 'i' is stored as a
267
        # big-endian 16-bit value.
268
        high_byte = UInt16(unsafe_load(nametable_ptr, offset))
×
269
        low_byte = UInt16(unsafe_load(nametable_ptr, offset+1))
×
270
        idx = (high_byte << 8) | low_byte
×
271
        # The capture group name is a null-terminated string located directly
272
        # after the index.
273
        names[idx] = unsafe_string(nametable_ptr+offset+1)
×
274
    end
×
275
    return names
×
276
end
277

278
end # module
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc