• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

lunarmodules / Penlight / 20577780249

29 Dec 2025 04:36PM UTC coverage: 88.871%. Remained the same
20577780249

Pull #503

github

web-flow
chore(ci): update ApVeyor config for Lua 5.5
Pull Request #503: chore(ci): Update Lua and LuaRocks versions for Lua 5.5

5454 of 6137 relevant lines covered (88.87%)

257.13 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.39
/lua/pl/lexer.lua
1
--- Lexical scanner for creating a sequence of tokens from text.
2
-- `lexer.scan(s)` returns an iterator over all tokens found in the
3
-- string `s`. This iterator returns two values, a token type string
4
-- (such as 'string' for quoted string, 'iden' for identifier) and the value of the
5
-- token.
6
--
7
-- Versions specialized for Lua and C are available; these also handle block comments
8
-- and classify keywords as 'keyword' tokens. For example:
9
--
10
--    > s = 'for i=1,n do'
11
--    > for t,v in lexer.lua(s)  do print(t,v) end
12
--    keyword for
13
--    iden    i
14
--    =       =
15
--    number  1
16
--    ,       ,
17
--    iden    n
18
--    keyword do
19
--
20
-- See the Guide for further @{06-data.md.Lexical_Scanning|discussion}
21
-- @module pl.lexer
22

23
local strfind = string.find
210✔
24
local strsub = string.sub
210✔
25
local append = table.insert
210✔
26

27

28
local function assert_arg(idx,val,tp)
29
    if type(val) ~= tp then
×
30
        error("argument "..idx.." must be "..tp, 2)
×
31
    end
32
end
33

34
local lexer = {}
210✔
35

36
local NUMBER1  = '^[%+%-]?%d+%.?%d*[eE][%+%-]?%d+'
210✔
37
local NUMBER1a = '^[%+%-]?%d*%.%d+[eE][%+%-]?%d+'
210✔
38
local NUMBER2  = '^[%+%-]?%d+%.?%d*'
210✔
39
local NUMBER2a = '^[%+%-]?%d*%.%d+'
210✔
40
local NUMBER3  = '^0x[%da-fA-F]+'
210✔
41
local NUMBER4  = '^%d+%.?%d*[eE][%+%-]?%d+'
210✔
42
local NUMBER4a = '^%d*%.%d+[eE][%+%-]?%d+'
210✔
43
local NUMBER5  = '^%d+%.?%d*'
210✔
44
local NUMBER5a = '^%d*%.%d+'
210✔
45
local IDEN = '^[%a_][%w_]*'
210✔
46
local WSPACE = '^%s+'
210✔
47
local STRING1 = "^(['\"])%1" -- empty string
210✔
48
local STRING2 = [[^(['"])(\*)%2%1]]
210✔
49
local STRING3 = [[^(['"]).-[^\](\*)%2%1]]
210✔
50
local CHAR1 = "^''"
210✔
51
local CHAR2 = [[^'(\*)%1']]
210✔
52
local CHAR3 = [[^'.-[^\](\*)%1']]
210✔
53
local PREPRO = '^#.-[^\\]\n'
210✔
54

55
local plain_matches,lua_matches,cpp_matches,lua_keyword,cpp_keyword
56

57
local function tdump(tok)
58
    return tok,tok
264✔
59
end
60

61
local function ndump(tok,options)
62
    if options and options.number then
60✔
63
        tok = tonumber(tok)
48✔
64
    end
65
    return "number",tok
60✔
66
end
67

68
-- regular strings, single or double quotes; usually we want them
69
-- without the quotes
70
local function sdump(tok,options)
71
    if options and options.string then
246✔
72
        tok = tok:sub(2,-2)
248✔
73
    end
74
    return "string",tok
246✔
75
end
76

77
-- long Lua strings need extra work to get rid of the quotes
78
local function sdump_l(tok,options,findres)
79
    if options and options.string then
12✔
80
        local quotelen = 3
12✔
81
        if findres[3] then
12✔
82
            quotelen = quotelen + findres[3]:len()
16✔
83
        end
84
        tok = tok:sub(quotelen, -quotelen)
16✔
85
        if tok:sub(1, 1) == "\n" then
16✔
86
            tok = tok:sub(2)
8✔
87
        end
88
    end
89
    return "string",tok
12✔
90
end
91

92
local function chdump(tok,options)
93
    if options and options.string then
12✔
94
        tok = tok:sub(2,-2)
16✔
95
    end
96
    return "char",tok
12✔
97
end
98

99
local function cdump(tok)
100
    return "comment",tok
30✔
101
end
102

103
local function wsdump (tok)
104
    return "space",tok
36✔
105
end
106

107
local function pdump (tok)
108
    return "prepro",tok
×
109
end
110

111
local function plain_vdump(tok)
112
    return "iden",tok
48✔
113
end
114

115
local function lua_vdump(tok)
116
    if lua_keyword[tok] then
180✔
117
        return "keyword",tok
78✔
118
    else
119
        return "iden",tok
102✔
120
    end
121
end
122

123
local function cpp_vdump(tok)
124
    if cpp_keyword[tok] then
×
125
        return "keyword",tok
×
126
    else
127
        return "iden",tok
×
128
    end
129
end
130

131
--- create a plain token iterator from a string or file-like object.
132
-- @tparam string|file s a string or a file-like object with `:read()` method returning lines.
133
-- @tab matches an optional match table - array of token descriptions.
134
-- A token is described by a `{pattern, action}` pair, where `pattern` should match
135
-- token body and `action` is a function called when a token of described type is found.
136
-- @tab[opt] filter a table of token types to exclude, by default `{space=true}`
137
-- @tab[opt] options a table of options; by default, `{number=true,string=true}`,
138
-- which means convert numbers and strip string quotes.
139
function lexer.scan(s,matches,filter,options)
210✔
140
    local file = type(s) ~= 'string' and s
228✔
141
    filter = filter or {space=true}
228✔
142
    options = options or {number=true,string=true}
228✔
143
    if filter then
228✔
144
        if filter.space then filter[wsdump] = true end
228✔
145
        if filter.comments then
228✔
146
            filter[cdump] = true
48✔
147
        end
148
    end
149
    if not matches then
228✔
150
        if not plain_matches then
168✔
151
            plain_matches = {
6✔
152
                {WSPACE,wsdump},
6✔
153
                {NUMBER3,ndump},
6✔
154
                {IDEN,plain_vdump},
6✔
155
                {NUMBER1,ndump},
6✔
156
                {NUMBER1a,ndump},
6✔
157
                {NUMBER2,ndump},
6✔
158
                {NUMBER2a,ndump},
6✔
159
                {STRING1,sdump},
6✔
160
                {STRING2,sdump},
6✔
161
                {STRING3,sdump},
6✔
162
                {'^.',tdump}
6✔
163
            }
6✔
164
        end
165
        matches = plain_matches
168✔
166
    end
167

168
    local line_nr = 0
228✔
169
    local next_line = file and file:read()
228✔
170
    local sz = file and 0 or #s
228✔
171
    local idx = 1
228✔
172

173
    local tlist_i
174
    local tlist
175

176
    local first_hit = true
228✔
177

178
    local function iter(res)
179
        local tp = type(res)
1,566✔
180

181
        if tlist then -- returning the inserted token list
1,566✔
182
            local cur = tlist[tlist_i]
×
183
            if cur then
×
184
                tlist_i = tlist_i + 1
×
185
                return cur[1], cur[2]
×
186
            else
187
                tlist = nil
×
188
            end
189
        end
190

191
        if tp == 'string' then -- search up to some special pattern
1,566✔
192
            local i1,i2 = strfind(s,res,idx)
×
193
            if i1 then
×
194
                local tok = strsub(s,i1,i2)
×
195
                idx = i2 + 1
×
196
                return '', tok
×
197
            else
198
                idx = sz + 1
×
199
                return '', ''
×
200
            end
201

202
        elseif tp == 'table' then -- insert a token list
1,566✔
203
            tlist_i = 1
×
204
            tlist = res
×
205
            return '', ''
×
206

207
        elseif tp ~= 'nil' then -- return position
1,566✔
208
            return line_nr, idx
36✔
209

210
        else -- look for next token
211
            if first_hit then
1,530✔
212
                if not file then line_nr = 1 end
228✔
213
                first_hit = false
228✔
214
            end
215

216
            if idx > sz then
1,530✔
217
                if file then
312✔
218
                    if not next_line then
168✔
219
                      return -- past the end of file, done
84✔
220
                    end
221
                    s = next_line
84✔
222
                    line_nr = line_nr + 1
84✔
223
                    next_line = file:read()
112✔
224
                    if next_line then
84✔
225
                        s = s .. '\n'
×
226
                    end
227
                    idx, sz = 1, #s
84✔
228
                else
229
                    return -- past the end of input, done
144✔
230
                end
231
            end
232

233
            for _,m in ipairs(matches) do
9,456✔
234
                local pat = m[1]
9,456✔
235
                local fun = m[2]
9,456✔
236
                local findres = {strfind(s,pat,idx)}
9,456✔
237
                local i1, i2 = findres[1], findres[2]
9,456✔
238
                if i1 then
9,456✔
239
                    local tok = strsub(s,i1,i2)
1,302✔
240
                    idx = i2 + 1
1,302✔
241
                    local ret1, ret2
242
                    if not (filter and filter[fun]) then
1,302✔
243
                        lexer.finished = idx > sz
888✔
244
                        ret1, ret2 = fun(tok, options, findres)
1,184✔
245
                    end
246
                    if not file and tok:find("\n") then
1,302✔
247
                        -- Update line number.
248
                        local _, newlines = tok:gsub("\n", {})
138✔
249
                        line_nr = line_nr + newlines
138✔
250
                    end
251
                    if ret1 then
1,302✔
252
                        return ret1, ret2 -- found a match
888✔
253
                    else
254
                        return iter() -- tail-call to try again
414✔
255
                    end
256
                end
257
            end
258
        end
259
    end
260

261
    return iter
228✔
262
end
263

264
local function isstring (s)
265
    return type(s) == 'string'
×
266
end
267

268
--- insert tokens into a stream.
269
-- @param tok a token stream
270
-- @param a1 a string is the type, a table is a token list and
271
-- a function is assumed to be a token-like iterator (returns type & value)
272
-- @string a2 a string is the value
273
function lexer.insert (tok,a1,a2)
210✔
274
    if not a1 then return end
×
275
    local ts
276
    if isstring(a1) and isstring(a2) then
×
277
        ts = {{a1,a2}}
×
278
    elseif type(a1) == 'function' then
×
279
        ts = {}
×
280
        for t,v in a1() do
×
281
            append(ts,{t,v})
×
282
        end
283
    else
284
        ts = a1
×
285
    end
286
    tok(ts)
×
287
end
288

289
--- get everything in a stream upto a newline.
290
-- @param tok a token stream
291
-- @return a string
292
function lexer.getline (tok)
210✔
293
    local _,v = tok('.-\n')
×
294
    return v
×
295
end
296

297
--- get current line number.
298
-- @param tok a token stream
299
-- @return the line number.
300
-- if the input source is a file-like object,
301
-- also return the column.
302
function lexer.lineno (tok)
210✔
303
    return tok(0)
36✔
304
end
305

306
--- get the rest of the stream.
307
-- @param tok a token stream
308
-- @return a string
309
function lexer.getrest (tok)
210✔
310
    local _,v = tok('.+')
×
311
    return v
×
312
end
313

314
--- get the Lua keywords as a set-like table.
315
-- So `res["and"]` etc would be `true`.
316
-- @return a table
317
function lexer.get_keywords ()
210✔
318
    if not lua_keyword then
63✔
319
        lua_keyword = {
21✔
320
            ["and"] = true, ["break"] = true,  ["do"] = true,
15✔
321
            ["else"] = true, ["elseif"] = true, ["end"] = true,
15✔
322
            ["false"] = true, ["for"] = true, ["function"] = true,
15✔
323
            ["if"] = true, ["in"] = true,  ["local"] = true, ["nil"] = true,
15✔
324
            ["not"] = true, ["or"] = true, ["repeat"] = true,
15✔
325
            ["return"] = true, ["then"] = true, ["true"] = true,
15✔
326
            ["until"] = true,  ["while"] = true
15✔
327
        }
21✔
328
    end
329
    return lua_keyword
63✔
330
end
331

332
--- create a Lua token iterator from a string or file-like object.
333
-- Will return the token type and value.
334
-- @string s the string
335
-- @tab[opt] filter a table of token types to exclude, by default `{space=true,comments=true}`
336
-- @tab[opt] options a table of options; by default, `{number=true,string=true}`,
337
-- which means convert numbers and strip string quotes.
338
function lexer.lua(s,filter,options)
210✔
339
    filter = filter or {space=true,comments=true}
48✔
340
    lexer.get_keywords()
48✔
341
    if not lua_matches then
48✔
342
        lua_matches = {
12✔
343
            {WSPACE,wsdump},
12✔
344
            {NUMBER3,ndump},
12✔
345
            {IDEN,lua_vdump},
12✔
346
            {NUMBER4,ndump},
12✔
347
            {NUMBER4a,ndump},
12✔
348
            {NUMBER5,ndump},
12✔
349
            {NUMBER5a,ndump},
12✔
350
            {STRING1,sdump},
12✔
351
            {STRING2,sdump},
12✔
352
            {STRING3,sdump},
12✔
353
            {'^%-%-%[(=*)%[.-%]%1%]',cdump},
12✔
354
            {'^%-%-.-\n',cdump},
12✔
355
            {'^%[(=*)%[.-%]%1%]',sdump_l},
12✔
356
            {'^==',tdump},
12✔
357
            {'^~=',tdump},
12✔
358
            {'^<=',tdump},
12✔
359
            {'^>=',tdump},
12✔
360
            {'^%.%.%.',tdump},
12✔
361
            {'^%.%.',tdump},
12✔
362
            {'^.',tdump}
12✔
363
        }
12✔
364
    end
365
    return lexer.scan(s,lua_matches,filter,options)
48✔
366
end
367

368
--- create a C/C++ token iterator from a string or file-like object.
369
-- Will return the token type type and value.
370
-- @string s the string
371
-- @tab[opt] filter a table of token types to exclude, by default `{space=true,comments=true}`
372
-- @tab[opt] options a table of options; by default, `{number=true,string=true}`,
373
-- which means convert numbers and strip string quotes.
374
function lexer.cpp(s,filter,options)
210✔
375
    filter = filter or {space=true,comments=true}
12✔
376
    if not cpp_keyword then
12✔
377
        cpp_keyword = {
6✔
378
            ["class"] = true, ["break"] = true,  ["do"] = true, ["sizeof"] = true,
4✔
379
            ["else"] = true, ["continue"] = true, ["struct"] = true,
4✔
380
            ["false"] = true, ["for"] = true, ["public"] = true, ["void"] = true,
4✔
381
            ["private"] = true, ["protected"] = true, ["goto"] = true,
4✔
382
            ["if"] = true, ["static"] = true,  ["const"] = true, ["typedef"] = true,
4✔
383
            ["enum"] = true, ["char"] = true, ["int"] = true, ["bool"] = true,
4✔
384
            ["long"] = true, ["float"] = true, ["true"] = true, ["delete"] = true,
4✔
385
            ["double"] = true,  ["while"] = true, ["new"] = true,
4✔
386
            ["namespace"] = true, ["try"] = true, ["catch"] = true,
4✔
387
            ["switch"] = true, ["case"] = true, ["extern"] = true,
4✔
388
            ["return"] = true,["default"] = true,['unsigned']  = true,['signed'] = true,
4✔
389
            ["union"] =  true, ["volatile"] = true, ["register"] = true,["short"] = true,
4✔
390
        }
6✔
391
    end
392
    if not cpp_matches then
12✔
393
        cpp_matches = {
6✔
394
            {WSPACE,wsdump},
6✔
395
            {PREPRO,pdump},
6✔
396
            {NUMBER3,ndump},
6✔
397
            {IDEN,cpp_vdump},
6✔
398
            {NUMBER4,ndump},
6✔
399
            {NUMBER4a,ndump},
6✔
400
            {NUMBER5,ndump},
6✔
401
            {NUMBER5a,ndump},
6✔
402
            {CHAR1,chdump},
6✔
403
            {CHAR2,chdump},
6✔
404
            {CHAR3,chdump},
6✔
405
            {STRING1,sdump},
6✔
406
            {STRING2,sdump},
6✔
407
            {STRING3,sdump},
6✔
408
            {'^//.-\n',cdump},
6✔
409
            {'^/%*.-%*/',cdump},
6✔
410
            {'^==',tdump},
6✔
411
            {'^!=',tdump},
6✔
412
            {'^<=',tdump},
6✔
413
            {'^>=',tdump},
6✔
414
            {'^->',tdump},
6✔
415
            {'^&&',tdump},
6✔
416
            {'^||',tdump},
6✔
417
            {'^%+%+',tdump},
6✔
418
            {'^%-%-',tdump},
6✔
419
            {'^%+=',tdump},
6✔
420
            {'^%-=',tdump},
6✔
421
            {'^%*=',tdump},
6✔
422
            {'^/=',tdump},
6✔
423
            {'^|=',tdump},
6✔
424
            {'^%^=',tdump},
6✔
425
            {'^::',tdump},
6✔
426
            {'^.',tdump}
6✔
427
        }
6✔
428
    end
429
    return lexer.scan(s,cpp_matches,filter,options)
12✔
430
end
431

432
--- get a list of parameters separated by a delimiter from a stream.
433
-- @param tok the token stream
434
-- @string[opt=')'] endtoken end of list. Can be '\n'
435
-- @string[opt=','] delim separator
436
-- @return a list of token lists.
437
function lexer.get_separated_list(tok,endtoken,delim)
210✔
438
    endtoken = endtoken or ')'
×
439
    delim = delim or ','
×
440
    local parm_values = {}
×
441
    local level = 1 -- used to count ( and )
×
442
    local tl = {}
×
443
    local function tappend (tl,t,val)
444
        val = val or t
×
445
        append(tl,{t,val})
×
446
    end
447
    local is_end
448
    if endtoken == '\n' then
×
449
        is_end = function(t,val)
450
            return t == 'space' and val:find '\n'
×
451
        end
452
    else
453
        is_end = function (t)
454
            return t == endtoken
×
455
        end
456
    end
457
    local token,value
458
    while true do
459
        token,value=tok()
×
460
        if not token then return nil,'EOS' end -- end of stream is an error!
×
461
        if is_end(token,value) and level == 1 then
×
462
            append(parm_values,tl)
×
463
            break
464
        elseif token == '(' then
×
465
            level = level + 1
×
466
            tappend(tl,'(')
×
467
        elseif token == ')' then
×
468
            level = level - 1
×
469
            if level == 0 then -- finished with parm list
×
470
                append(parm_values,tl)
×
471
                break
472
            else
473
                tappend(tl,')')
×
474
            end
475
        elseif token == delim and level == 1 then
×
476
            append(parm_values,tl) -- a new parm
×
477
            tl = {}
×
478
        else
479
            tappend(tl,token,value)
×
480
        end
481
    end
482
    return parm_values,{token,value}
×
483
end
484

485
--- get the next non-space token from the stream.
486
-- @param tok the token stream.
487
function lexer.skipws (tok)
210✔
488
    local t,v = tok()
×
489
    while t == 'space' do
×
490
        t,v = tok()
×
491
    end
492
    return t,v
×
493
end
494

495
local skipws = lexer.skipws
210✔
496

497
--- get the next token, which must be of the expected type.
498
-- Throws an error if this type does not match!
499
-- @param tok the token stream
500
-- @string expected_type the token type
501
-- @bool no_skip_ws whether we should skip whitespace
502
function lexer.expecting (tok,expected_type,no_skip_ws)
210✔
503
    assert_arg(1,tok,'function')
×
504
    assert_arg(2,expected_type,'string')
×
505
    local t,v
506
    if no_skip_ws then
×
507
        t,v = tok()
×
508
    else
509
        t,v = skipws(tok)
×
510
    end
511
    if t ~= expected_type then error ("expecting "..expected_type,2) end
×
512
    return v
×
513
end
514

515
return lexer
210✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc