• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 11534409649

26 Oct 2024 07:27PM UTC coverage: 33.196% (-28.7%) from 61.897%
11534409649

push

github

alerque
chore(tooling): Update editor-config key for stylua as accepted upstream

Our setting addition is still not in a tagged release, but the PR was
accepted into the default branch of stylua. This means you no longer
need to run my fork of Stylua to get this project's style, you just nead
any build from the main development branch. However the config key was
renamed as part of the acceptance, so this is the relevant adjustment.

5810 of 17502 relevant lines covered (33.2%)

1300.57 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

56.94
/core/utilities/init.lua
1
--- SILE.utilities (aliased as SU)
2
-- @module SU
3
-- @alias utilities
4

5
local bitshim = require("bitshim")
18✔
6
local luautf8 = require("lua-utf8")
18✔
7
local semver = require("semver")
18✔
8

9
local utilities = {}
18✔
10

11
local epsilon = 1E-12
18✔
12

13
--- Generic
14
-- @section generic
15

16
--- Concatenate values from a table using a given separator.
17
-- Differs from `table.concat` in that all values are explicitly cast to strings, allowing debugging of tables that
18
-- include functions, other tables, data types, etc.
19
-- @tparam table array Input.
20
-- @tparam[opt=" "] string separator Separator.
21
function utilities.concat (array, separator)
18✔
22
   return table.concat(utilities.map(tostring, array), separator)
×
23
end
24

25
--- Execute a callback function on each value in a table.
26
-- @tparam function func Function to run on each value.
27
-- @tparam table array Input list-like table.
28
function utilities.map (func, array)
18✔
29
   local new_array = {}
4,873✔
30
   local last = #array
4,873✔
31
   for i = 1, last do
16,244✔
32
      new_array[i] = func(array[i])
21,154✔
33
   end
34
   return new_array
4,873✔
35
end
36

37
--- Require that an option table contains a specific value, otherwise raise an error.
38
-- @param options Input table of options.
39
-- @param name Name of the required option.
40
-- @param context User friendly name of the function or calling context.
41
-- @param required_type The name of a data type that the option must successfully cast to.
42
function utilities.required (options, name, context, required_type)
18✔
43
   if not options[name] then
806✔
44
      utilities.error(context .. " needs a " .. name .. " parameter")
×
45
   end
46
   if required_type then
806✔
47
      return utilities.cast(required_type, options[name])
157✔
48
   end
49
   return options[name]
649✔
50
end
51

52
--- Iterate over key/value pairs in sequence of the sorted keys.
53
-- Table iteration order with `pairs` is non-deterministic. This function returns an iterator that can be used in plais
54
-- of `pairs` that will iterate through the values in the order of their *sorted* keys.
55
-- @tparam table input Input table.
56
-- @usage for val in SU.sortedpairs({ b: "runs second", a: "runs first" ) do print(val) end
57
function utilities.sortedpairs (input)
18✔
58
   local keys = {}
×
59
   for k, _ in pairs(input) do
×
60
      keys[#keys + 1] = k
×
61
   end
62
   table.sort(keys, function (a, b)
×
63
      if type(a) == type(b) then
×
64
         return a < b
×
65
      elseif type(a) == "number" then
×
66
         return true
×
67
      else
68
         return false
×
69
      end
70
   end)
71
   return coroutine.wrap(function ()
×
72
      for i = 1, #keys do
×
73
         coroutine.yield(keys[i], input[keys[i]])
×
74
      end
75
   end)
76
end
77

78
--- Substitute a range of value(s) in one table with values from another.
79
-- @tparam table array Table to modify.
80
-- @tparam integer start First key to replace.
81
-- @tparam integer stop Last key to replace.
82
-- @tparam table replacement Table from which to pull key/values plairs to inject in array.
83
-- @treturn table array First input array modified with values from replacement.
84
function utilities.splice (array, start, stop, replacement)
18✔
85
   local ptr = start
×
86
   local room = stop - start + 1
×
87
   local last = replacement and #replacement or 0
×
88
   for i = 1, last do
×
89
      if room > 0 then
×
90
         room = room - 1
×
91
         array[ptr] = replacement[i]
×
92
      else
93
         table.insert(array, ptr, replacement[i])
×
94
      end
95
      ptr = ptr + 1
×
96
   end
97

98
   for _ = 1, room do
×
99
      table.remove(array, ptr)
×
100
   end
101
   return array
×
102
end
103

104
-- TODO: Unused, now deprecated?
105
function utilities.inherit (orig, spec)
18✔
106
   local new = pl.tablex.deepcopy(orig)
×
107
   if spec then
×
108
      for k, v in pairs(spec) do
×
109
         new[k] = v
×
110
      end
111
   end
112
   if new.init then
×
113
      new:init()
×
114
   end
115
   return new
×
116
end
117

118
--- Type handling
119
-- @section types
120

121
local function preferbool ()
122
   utilities.warn("Please use boolean values or strings such as 'true' and 'false' instead of 'yes' and 'no'.")
×
123
end
124

125
--- Cast user input into a boolean type.
126
-- User input content such as options typed into documents will return string values such as "true" or "false rather
127
-- than true or false types. This evaluates those strings or other inputs ane returns a consistent boolean type in
128
-- return.
129
-- @tparam nil|bool|string value Input value such as a string to evaluate for thruthyness.
130
-- @tparam[opt=false] boolean default Whether to assume inputs that don't specifically evaluate to something should be true or false.
131
-- @treturn boolean
132
function utilities.boolean (value, default)
18✔
133
   if value == false then
750✔
134
      return false
18✔
135
   end
136
   if value == true then
732✔
137
      return true
275✔
138
   end
139
   if value == "false" then
457✔
140
      return false
2✔
141
   end
142
   if value == "true" then
455✔
143
      return true
4✔
144
   end
145
   if value == "no" then
451✔
146
      preferbool()
×
147
      return false
×
148
   end
149
   if value == "yes" then
451✔
150
      preferbool()
×
151
      return true
×
152
   end
153
   if value == nil then
451✔
154
      return default == true
451✔
155
   end
156
   if value == "" then
×
157
      return default == true
×
158
   end
159
   SU.error("Expecting a boolean value but got '" .. value .. "'")
×
160
   return default == true
×
161
end
162

163
--- Cast user input to an expected type.
164
-- If possible, converts input from one type to another. Not all types can be cast. For example "four" can't be cast to
165
-- a number, but "4" or 4 can. Likewise "6pt" or 6 can be cast to a SILE.types.measurement, SILE.types.length, or even
166
-- a SILE.types.node.glue, but not a SILE.types.color.
167
-- @tparam string wantedType Expected type.
168
-- @return A value of the type wantedType.
169
function utilities.cast (wantedType, value)
18✔
170
   local actualType = SU.type(value)
40,983✔
171
   wantedType = string.lower(wantedType)
81,966✔
172
   if wantedType:match(actualType) then
40,983✔
173
      return value
11,702✔
174
   elseif actualType == "nil" and wantedType:match("nil") then
29,281✔
175
      return nil
×
176
   elseif wantedType:match("length") then
29,281✔
177
      return SILE.types.length(value)
4,490✔
178
   elseif wantedType:match("measurement") then
24,808✔
179
      return SILE.types.measurement(value)
452✔
180
   elseif wantedType:match("vglue") then
24,356✔
181
      return SILE.types.node.vglue(value)
1✔
182
   elseif wantedType:match("glue") then
24,355✔
183
      return SILE.types.node.glue(value)
8✔
184
   elseif wantedType:match("kern") then
24,347✔
185
      return SILE.types.node.kern(value)
×
186
   elseif actualType == "nil" then
24,347✔
187
      SU.error("Cannot cast nil to " .. wantedType)
×
188
   elseif wantedType:match("boolean") then
24,347✔
189
      return SU.boolean(value)
1✔
190
   elseif wantedType:match("string") then
24,346✔
191
      return tostring(value)
×
192
   elseif wantedType:match("number") then
24,346✔
193
      if type(value) == "table" and type(value.tonumber) == "function" then
24,346✔
194
         return value:tonumber()
23,577✔
195
      end
196
      local num = tonumber(value)
769✔
197
      if not num then
769✔
198
         SU.error("Cannot cast '" .. value .. "'' to " .. wantedType)
×
199
      end
200
      return num
769✔
201
   elseif wantedType:match("integer") then
×
202
      local num
203
      if type(value) == "table" and type(value.tonumber) == "function" then
×
204
         num = value:tonumber()
×
205
      else
206
         num = tonumber(value)
×
207
      end
208
      if not num then
×
209
         SU.error("Cannot cast '" .. value .. "'' to " .. wantedType)
×
210
      end
211
      if not wantedType:match("number") and num % 1 ~= 0 then
×
212
         -- Could be an error but since it wasn't checked before, let's just warn:
213
         -- Some packages might have wrongly typed settings, for instance.
214
         SU.warn("Casting an integer but got a float number " .. num)
×
215
      end
216
      return num
×
217
   else
218
      SU.error("Cannot cast to unrecognized type " .. wantedType)
×
219
   end
220
end
221

222
--- Return the type of an object
223
-- Like Lua's `type`, but also handles various SILE user data types.
224
-- @tparam any value Any input value. If a table is one of SILE's classes or types, report on it's internal type.
225
-- Otherwise use the output of `type`.
226
-- @treturn string
227
function utilities.type (value)
18✔
228
   if type(value) == "number" then
187,312✔
229
      return math.floor(value) == value and "integer" or "number"
40,260✔
230
   elseif type(value) == "table" and value.prototype then
147,052✔
231
      return value:prototype()
×
232
   elseif type(value) == "table" and value.is_a then
147,052✔
233
      return value.type
64,249✔
234
   else
235
      return type(value)
82,803✔
236
   end
237
end
238

239
--- Errors and debugging
240
-- @section errors
241

242
--- Output a debug message only if debugging for a specific category is enabled.
243
-- Importantly passing siries of strings, functions, or tables is more efficient than trying to formulate a full message
244
-- using concatenation and tostring() methods in the original code because it doesn't have to even run if the relevant
245
-- debug flag is not enabled.
246
-- @tparam text category Category flag for which this message should be output.
247
-- @tparam string|function|table ... Each argument will be returned separated by spaces, strings directly, functions by
248
-- evaluating them and assuming the return value is a string, and tables by using their internal :__tostring() methods.
249
-- @usage
250
--    > glue = SILE.types.node.glue("6em")
251
--    > SU.debug("foo", "A glue node", glue)
252
--    [foo] A glue node G<6em>
253
function utilities.debug (category, ...)
18✔
254
   if SILE.quiet then
8,549✔
255
      return
×
256
   end
257
   if utilities.debugging(category) then
17,098✔
258
      local inputs = pl.utils.pack(...)
×
259
      for i, input in ipairs(inputs) do
×
260
         if type(input) == "function" then
×
261
            local status, output = pcall(input)
×
262
            inputs[i] = status and output
×
263
               or SU.warn(("Output of %s debug function was an error: %s"):format(category, output))
×
264
         elseif type(input) ~= "string" then
×
265
            inputs[i] = tostring(input)
×
266
         end
267
      end
268
      local message = utilities.concat(inputs, " ")
×
269
      if message then
×
270
         io.stderr:write(("\n[%s] %s"):format(category, message))
×
271
      end
272
   end
273
end
274

275
--- Determine if a specific debug flag is set.
276
-- @tparam string category Name of the flag status to check, e.g. "frames".
277
-- @treturn boolean
278
function utilities.debugging (category)
18✔
279
   return SILE.debugFlags.all and category ~= "profile" or SILE.debugFlags[category]
9,341✔
280
end
281

282
--- Warn about use of a deprecated feature.
283
-- Checks the current version and decides whether to warn or error, then oatputs a message with as much useful
284
-- information as possible to make it easy for end users to update their usage.
285
-- @tparam string old The name of the deprecated interface.
286
-- @tparam string new A name of a suggested replacement interface.
287
-- @tparam string warnat The first release where the interface is considered deprecated, at which point their might be
288
-- a shim.
289
-- @tparam string errorat The first release where the interface is no longer functional even with a shim.
290
-- @tparam string extra Longer-form help to include in output separate from the expected one-liner of warning messages.
291
function utilities.deprecated (old, new, warnat, errorat, extra)
18✔
292
   warnat, errorat = semver(warnat or 0), semver(errorat or 0)
6✔
293
   local current = SILE.version and semver(SILE.version:match("v([0-9]*.[0-9]*.[0-9]*)")) or warnat
4✔
294
   -- SILE.version is defined *after* most of SILE loads. It’s available at
295
   -- runtime but not useful if we encounter deprecated code in core code. Users
296
   -- will never encounter this failure, but as a developer it’s hard to test a
297
   -- deprecation when core code refactoring is an all-or-nothing proposition.
298
   -- Hence we fake it ‘till we make it, all deprecations internally are warnings.
299
   local brackets = old:sub(1, 1) == "\\" and "" or "()"
4✔
300
   local _new = new and "Please use " .. (new .. brackets) .. " instead." or "Please don't use it."
2✔
301
   local msg = (old .. brackets)
2✔
302
      .. " was deprecated in SILE v"
×
303
      .. tostring(warnat)
2✔
304
      .. "\n\n  "
×
305
      .. _new
2✔
306
      .. "\n\n"
2✔
307
      .. (extra and (pl.stringx.indent(pl.stringx.dedent(extra), 2)) or "")
6✔
308
   if errorat and current >= errorat then
2✔
309
      SU.error(msg)
×
310
   elseif warnat and current >= warnat then
2✔
311
      SU.warn(msg)
2✔
312
   end
313
end
314

315
--- Dump the contents of a any Lua type.
316
-- For quick debugging, can be used on any number of any type of Lua value. Pretty-prints tables.
317
-- @tparam any ... Any number of values
318
function utilities.dump (...)
18✔
319
   local arg = { ... } -- Avoid things that Lua stuffs in arg like args to self()
×
320
   pl.pretty.dump(#arg == 1 and arg[1] or arg, "/dev/stderr")
×
321
end
322

323
local _skip_traceback_levels = 2
18✔
324

325
--- Raise an error and exit.
326
-- Outputs a warning message via `warn`, then finishes up anything it can without processing more content, then exits.
327
-- @tparam string message The error message to give.
328
-- @tparam boolean isbug Whether or not hitting this error is expected to be a code bug (as opposed to mistakes in user input).
329
function utilities.error (message, isbug)
18✔
330
   SILE.quiet = false
×
331
   _skip_traceback_levels = 3
×
332
   utilities.warn(message, isbug)
×
333
   _skip_traceback_levels = 2
×
334
   io.stderr:flush()
×
335
   SILE.outputter:finish() -- Only really useful from the REPL but no harm in trying
×
336
   SILE.scratch.caughterror = true
×
337
   error("", 2)
×
338
end
339

340
--- Output an information message.
341
-- Basically like `warn`, except to source tracing information is added.
342
-- @tparam string message
343
function utilities.msg (message)
18✔
344
   if SILE.quiet then
5✔
345
      return
×
346
   end
347
   message = pl.stringx.rstrip(message)
10✔
348
   message = "                        " .. message
5✔
349
   message = pl.stringx.dedent(message)
10✔
350
   message = pl.stringx.lstrip(message)
10✔
351
   message = pl.stringx.indent(message, 2)
10✔
352
   message = message:gsub("^.", "!")
5✔
353
   io.stderr:write("\n" .. message .. "\n")
5✔
354
end
355

356
--- Output a warning.
357
-- Outputs a warning message including identifying where in the processing SILE is at when the warning is given.
358
-- @tparam string message The error message to give.
359
-- @tparam boolean isbug Whether or not hitting this warning is expected to be a code bug (as opposed to mistakes in user input).
360
function utilities.warn (message, isbug)
18✔
361
   if SILE.quiet then
5✔
362
      return
×
363
   end
364
   utilities.msg(message)
5✔
365
   if SILE.traceback or isbug then
5✔
366
      io.stderr:write("at:\n" .. SILE.traceStack:locationTrace())
×
367
      if _skip_traceback_levels == 2 then
×
368
         io.stderr:write(
×
369
            debug.traceback("", _skip_traceback_levels) or "\t! debug.traceback() did not identify code location"
×
370
         )
371
      end
372
   else
373
      io.stderr:write("  at " .. SILE.traceStack:locationHead())
10✔
374
   end
375
   io.stderr:write("\n")
5✔
376
end
377

378
--- Math
379
-- @section math
380

381
--- Check equality of floating point values.
382
-- Comparing floating point numbers using math functions in Lua may give different and unexpected answers depending on
383
-- the Lua VM and other environmental factors. This normalizes them using our standard internal epsilon value and
384
-- compares the absolute intereger value to avoid floating point number weirdness.
385
-- @tparam float lhs
386
-- @tparam float rhs
387
-- @treturn boolean
388
function utilities.feq (lhs, rhs)
18✔
389
   lhs = SU.cast("number", lhs)
56✔
390
   rhs = SU.cast("number", rhs)
56✔
391
   local abs = math.abs
28✔
392
   return abs(lhs - rhs) <= epsilon * (abs(lhs) + abs(rhs))
28✔
393
end
394

395
--- Add up all the values in a table.
396
-- @tparam table array Input list-like table.
397
-- @treturn number Sum of all values.
398
function utilities.sum (array)
18✔
399
   local total = 0
1,394✔
400
   local last = #array
1,394✔
401
   for i = 1, last do
2,699✔
402
      total = total + array[i]
1,305✔
403
   end
404
   return total
1,394✔
405
end
406

407
--- Return maximum value of inputs.
408
-- `math.max`, but works on SILE types such as SILE.types.measurement.
409
-- Lua <= 5.2 can't handle math operators on objects.
410
function utilities.max (...)
18✔
411
   local input = pl.utils.pack(...)
3,049✔
412
   local max = table.remove(input, 1)
3,049✔
413
   for _, val in ipairs(input) do
9,939✔
414
      if val > max then
6,890✔
415
         max = val
2,457✔
416
      end
417
   end
418
   return max
3,049✔
419
end
420

421
--- Return minimum value of inputs.
422
-- `math.min`, but works on SILE types such as SILE.types.measurement.
423
-- Lua <= 5.2 can't handle math operators on objects.
424
function utilities.min (...)
18✔
425
   local input = pl.utils.pack(...)
1✔
426
   local min = input[1]
1✔
427
   for _, val in ipairs(input) do
3✔
428
      if val < min then
2✔
429
         min = val
×
430
      end
431
   end
432
   return min
1✔
433
end
434

435
--- Round and normalize a number for debugging.
436
-- LuaJIT 2.1 betas (and inheritors such as OpenResty and Moonjit) are biased
437
-- towards rounding 0.5 up to 1, all other Lua interpreters are biased
438
-- towards rounding such floating point numbers down.  This hack shaves off
439
-- just enough to fix the bias so our test suite works across interpreters.
440
-- Note that even a true rounding function here will fail because the bias is
441
-- inherent to the floating point type. Also note we are erroring in favor of
442
-- the *less* common option because the LuaJIT VMS are hopelessly broken
443
-- whereas normal LUA VMs can be cooerced.
444
-- @tparam number input Input value.
445
-- @treturn string Four-digit precision foating point.
446
function utilities.debug_round (input)
18✔
447
   if input > 0 then
×
448
      input = input + 0.00000000000001
×
449
   end
450
   if input < 0 then
×
451
      input = input - 0.00000000000001
×
452
   end
453
   return string.format("%.4f", input)
×
454
end
455

456
--- Remove empty spaces from list-like tables
457
-- Iterating list-like tables is hard if some values have been removed. This converts { 1 = "a", 3 = "b" } into
458
-- { 1 = "a", 2 = "b" } which can be iterated using `ipairs` without stopping after 1.
459
-- @tparam table items List-like table potentially with holes.
460
-- @treturn table List like table without holes.
461
function utilities.compress (items)
18✔
462
   local rv = {}
101✔
463
   local max = math.max(pl.utils.unpack(pl.tablex.keys(items)))
303✔
464
   for i = 1, max do
2,162✔
465
      if items[i] then
2,061✔
466
         rv[#rv + 1] = items[i]
2,061✔
467
      end
468
   end
469
   return rv
101✔
470
end
471

472
--- Reverse the order of a list-like table.
473
-- @tparam table tbl Input list-like table.
474
function utilities.flip_in_place (tbl)
18✔
475
   local tmp, j
476
   for i = 1, math.floor(#tbl / 2) do
518✔
477
      tmp = tbl[i]
374✔
478
      j = #tbl - i + 1
374✔
479
      tbl[i] = tbl[j]
374✔
480
      tbl[j] = tmp
374✔
481
   end
482
end
483

484
-- TODO: Before documenting, consider whether this should be private to the one existing usage.
485
function utilities.allCombinations (options)
18✔
486
   local count = 1
×
487
   for i = 1, #options do
×
488
      count = count * options[i]
×
489
   end
490
   return coroutine.wrap(function ()
×
491
      for i = 0, count - 1 do
×
492
         local this = i
×
493
         local rv = {}
×
494
         for j = 1, #options do
×
495
            local base = options[j]
×
496
            rv[#rv + 1] = this % base + 1
×
497
            this = (this - this % base) / base
×
498
         end
499
         coroutine.yield(rv)
×
500
      end
501
   end)
502
end
503

504
function utilities.rateBadness (inf_bad, shortfall, spring)
18✔
505
   if spring == 0 then
1,739✔
506
      return inf_bad
75✔
507
   end
508
   local bad = math.floor(100 * math.abs(shortfall / spring) ^ 3)
1,664✔
509
   return math.min(inf_bad, bad)
1,664✔
510
end
511

512
function utilities.rationWidth (target, width, ratio)
18✔
513
   if ratio < 0 and width.shrink:tonumber() > 0 then
1,880✔
514
      target:___add(width.shrink:tonumber() * ratio)
327✔
515
   elseif ratio > 0 and width.stretch:tonumber() > 0 then
2,658✔
516
      target:___add(width.stretch:tonumber() * ratio)
690✔
517
   end
518
   return target
1,549✔
519
end
520

521
--- Text handling
522
-- @section text
523

524
--- Iterate over a string split into tokens via a pattern.
525
-- @tparam string string Input string.
526
-- @tparam string pattern Pattern on which to split the input.
527
-- @treturn function An iterator function
528
-- @usage for str in SU.gtoke("foo-bar-baz", "-") do print(str) end
529
function utilities.gtoke (string, pattern)
18✔
530
   string = string and tostring(string) or ""
138✔
531
   pattern = pattern and tostring(pattern) or "%s+"
138✔
532
   local length = #string
138✔
533
   return coroutine.wrap(function ()
138✔
534
      local index = 1
138✔
535
      repeat
536
         local first, last = string:find(pattern, index)
174✔
537
         if last then
174✔
538
            if index < first then
46✔
539
               coroutine.yield({ string = string:sub(index, first - 1) })
84✔
540
            end
541
            coroutine.yield({ separator = string:sub(first, last) })
92✔
542
            index = last + 1
46✔
543
         else
544
            if index <= length then
128✔
545
               coroutine.yield({ string = string:sub(index) })
256✔
546
            end
547
            break
128✔
548
         end
549
      until index > length
46✔
550
   end)
551
end
552

553
--- Convert a Unicode character to its corresponding codepoint.
554
-- @tparam string uchar A single inicode character.
555
-- @return number The Unicode code point where uchar is encoded.
556
function utilities.codepoint (uchar)
18✔
557
   local seq = 0
19,972✔
558
   local val = -1
19,972✔
559
   for i = 1, #uchar do
40,354✔
560
      local c = string.byte(uchar, i)
21,283✔
561
      if seq == 0 then
21,283✔
562
         if val > -1 then
20,009✔
563
            return val
901✔
564
         end
565
         seq = c < 0x80 and 1
19,108✔
566
            or c < 0xE0 and 2
19,108✔
567
            or c < 0xF0 and 3
1,255✔
568
            or c < 0xF8 and 4 --c < 0xFC and 5 or c < 0xFE and 6 or
19✔
569
            or error("invalid UTF-8 character sequence")
×
570
         val = bitshim.band(c, 2 ^ (8 - seq) - 1)
19,108✔
571
      else
572
         val = bitshim.bor(bitshim.lshift(val, 6), bitshim.band(c, 0x3F))
1,274✔
573
      end
574
      seq = seq - 1
20,382✔
575
   end
576
   return val
19,071✔
577
end
578

579
--- Covert a code point to a Unicode character.
580
-- @tparam number|string codepoint Input code point value, either as a number or a string representing the decimal value "U+NNNN" or hex value "0xFFFF".
581
-- @treturn string The character replestened by a codepoint descriptions.
582
function utilities.utf8charfromcodepoint (codepoint)
18✔
583
   local val = codepoint
×
584
   local cp = val
×
585
   local hex = (cp:match("[Uu]%+(%x+)") or cp:match("0[xX](%x+)"))
×
586
   if hex then
×
587
      cp = tonumber("0x" .. hex)
×
588
   elseif tonumber(cp) then
×
589
      cp = tonumber(cp)
×
590
   end
591

592
   if type(cp) == "number" then
×
593
      val = luautf8.char(cp)
×
594
   end
595
   return val
×
596
end
597

598
--- Convert a UTF-16 encoded string to a series of code points.
599
-- Like `luautf8.codes`, but for UTF-16 strings.
600
-- @tparam string ustr Input string.
601
-- @tparam string endian Either "le" or "be" depending on the encoding endedness.
602
-- @treturn string Serious of hex encoded code points.
603
function utilities.utf16codes (ustr, endian)
18✔
604
   local pos = 1
3,264✔
605
   return function ()
606
      if pos > #ustr then
102,364✔
607
         return nil
3,264✔
608
      else
609
         local c1, c2, c3, c4, wchar, lowchar
610
         c1 = string.byte(ustr, pos, pos + 1)
99,100✔
611
         pos = pos + 1
99,100✔
612
         c2 = string.byte(ustr, pos, pos + 1)
99,100✔
613
         pos = pos + 1
99,100✔
614
         if endian == "be" then
99,100✔
615
            wchar = c1 * 256 + c2
99,100✔
616
         else
617
            wchar = c2 * 256 + c1
×
618
         end
619
         if not (wchar >= 0xD800 and wchar <= 0xDBFF) then
99,100✔
620
            return wchar
99,100✔
621
         end
622
         c3 = string.byte(ustr, pos, pos + 1)
×
623
         pos = pos + 1
×
624
         c4 = string.byte(ustr, pos, pos + 1)
×
625
         pos = pos + 1
×
626
         if endian == "be" then
×
627
            lowchar = c3 * 256 + c4
×
628
         else
629
            lowchar = c4 * 256 + c3
×
630
         end
631
         return 0x10000 + bitshim.lshift(bitshim.band(wchar, 0x03FF), 10) + bitshim.band(lowchar, 0x03FF)
×
632
      end
633
   end
634
end
635

636
--- Split a UTF-8 string into characters.
637
-- Lua's `string.split` will only explode a string by bytes. For text processing purposes it is usually more desirable
638
-- to split it into 1, 2, 3, or 4 byte groups matching the UTF-8 encoding.
639
-- @tparam string str Input UTF-8 encoded string.
640
-- @treturn table A list-like table of UTF8 strings each representing a Unicode char from the input string.
641
function utilities.splitUtf8 (str)
18✔
642
   local rv = {}
14,674✔
643
   for _, cp in luautf8.next, str do
95,117✔
644
      table.insert(rv, luautf8.char(cp))
80,443✔
645
   end
646
   return rv
14,674✔
647
end
648

649
--- The last Unicode character in a UTF-8 encoded string.
650
-- Uses `SU.splitUtf8` to break an string into segments represtenting encoded characters, returns the last one. May be
651
-- more than one byte.
652
-- @tparam string str Input string.
653
-- @treturn string A single Unicode character.
654
function utilities.lastChar (str)
18✔
655
   local chars = utilities.splitUtf8(str)
×
656
   return chars[#chars]
×
657
end
658

659
--- The first Unicode character in a UTF-8 encoded string.
660
-- Uses `SU.splitUtf8` to break an string into segments represtenting encoded characters, returns the first one. May be
661
-- more than one byte.
662
-- @tparam string str Input string.
663
-- @treturn string A single Unicode character.
664
function utilities.firstChar (str)
18✔
665
   local chars = utilities.splitUtf8(str)
×
666
   return chars[1]
×
667
end
668

669
local byte, floor, reverse = string.byte, math.floor, string.reverse
18✔
670

671
--- The Unicode character in a UTF-8 encoded string at a specific position
672
-- Uses `SU.splitUtf8` to break an string into segments represtenting encoded characters, returns the Nth one. May be
673
-- more than one byte.
674
-- @tparam string str Input string.
675
-- @tparam number index Index of character to return.
676
-- @treturn string A single Unicode character.
677
function utilities.utf8charat (str, index)
18✔
678
   return str:sub(index):match("([%z\1-\127\194-\244][\128-\191]*)")
×
679
end
680

681
local utf16bom = function (endianness)
682
   return endianness == "be" and "\254\255" or endianness == "le" and "\255\254" or SU.error("Unrecognized endianness")
3,264✔
683
end
684

685
--- Encode a string to a hexadecimal replesentation.
686
-- @tparam string str Input UTF-8 string
687
-- @treturn string Hexadecimal replesentation of str.
688
function utilities.hexencoded (str)
18✔
689
   local ustr = ""
×
690
   for i = 1, #str do
×
691
      ustr = ustr .. string.format("%02x", byte(str, i, i + 1))
×
692
   end
693
   return ustr
×
694
end
695

696
--- Decode a hexadecimal replesentation into a string.
697
-- @tparam string str Input hexadecimal encoded string.
698
-- @treturn string UTF-8 string.
699
function utilities.hexdecoded (str)
18✔
700
   if #str % 2 == 1 then
×
701
      SU.error("Cannot decode hex string with odd len")
×
702
   end
703
   local ustr = ""
×
704
   for i = 1, #str, 2 do
×
705
      ustr = ustr .. string.char(tonumber(string.sub(str, i, i + 1), 16))
×
706
   end
707
   return ustr
×
708
end
709

710
local uchr_to_surrogate_pair = function (uchr, endianness)
711
   local hi, lo = floor((uchr - 0x10000) / 0x400) + 0xd800, (uchr - 0x10000) % 0x400 + 0xdc00
×
712
   local s_hi, s_lo =
713
      string.char(floor(hi / 256)) .. string.char(hi % 256), string.char(floor(lo / 256)) .. string.char(lo % 256)
×
714
   return endianness == "le" and (reverse(s_hi) .. reverse(s_lo)) or s_hi .. s_lo
×
715
end
716

717
local uchr_to_utf16_double_byte = function (uchr, endianness)
718
   local ustr = string.char(floor(uchr / 256)) .. string.char(uchr % 256)
×
719
   return endianness == "le" and reverse(ustr) or ustr
×
720
end
721

722
local utf8_to_utf16 = function (str, endianness)
723
   local ustr = utf16bom(endianness)
×
724
   for _, uchr in luautf8.codes(str) do
×
725
      ustr = ustr
×
726
         .. (uchr < 0x10000 and uchr_to_utf16_double_byte(uchr, endianness) or uchr_to_surrogate_pair(uchr, endianness))
×
727
   end
728
   return ustr
×
729
end
730

731
--- Convert a UTF-8 string to big-endian UTF-16.
732
-- @tparam string str UTF-8 encoded string.
733
-- @treturn string Big-endian UTF-16 encoded string.
734
function utilities.utf8_to_utf16be (str)
18✔
735
   return utf8_to_utf16(str, "be")
×
736
end
737

738
--- Convert a UTF-8 string to little-endian UTF-16.
739
-- @tparam string str UTF-8 encoded string.
740
-- @treturn string Little-endian UTF-16 encoded string.
741
function utilities.utf8_to_utf16le (str)
18✔
742
   return utf8_to_utf16(str, "le")
×
743
end
744

745
--- Convert a UTF-8 string to big-endian UTF-16, then encode in hex.
746
-- @tparam string str UTF-8 encoded string.
747
-- @treturn string Hexadecimal representation of a big-endian UTF-16 encoded string.
748
function utilities.utf8_to_utf16be_hexencoded (str)
18✔
749
   return utilities.hexencoded(utilities.utf8_to_utf16be(str))
×
750
end
751

752
--- Convert a UTF-8 string to little-endian UTF-16, then encode in hex.
753
-- @tparam string str UTF-8 encoded string.
754
-- @treturn string Hexadecimal representation of a little-endian UTF-16 encoded string.
755
function utilities.utf8_to_utf16le_hexencoded (str)
18✔
756
   return utilities.hexencoded(utilities.utf8_to_utf16le(str))
×
757
end
758

759
local utf16_to_utf8 = function (str, endianness)
760
   local bom = utf16bom(endianness)
3,264✔
761

762
   if str:find(bom) == 1 then
3,264✔
763
      str = string.sub(str, 3, #str)
×
764
   end
765
   local ustr = ""
3,264✔
766
   for uchr in utilities.utf16codes(str, endianness) do
207,992✔
767
      ustr = ustr .. luautf8.char(uchr)
99,100✔
768
   end
769
   return ustr
3,264✔
770
end
771

772
--- Convert a big-endian UTF-16 string to UTF-8.
773
-- @tparam string str Big-endian UTF-16 encoded string.
774
-- @treturn string UTF-8 encoded string.
775
function utilities.utf16be_to_utf8 (str)
18✔
776
   return utf16_to_utf8(str, "be")
3,264✔
777
end
778

779
--- Convert a little-endian UTF-16 string to UTF-8.
780
-- @tparam string str Little-endian UTF-16 encoded string.
781
-- @treturn string UTF-8 encoded string.
782
function utilities.utf16le_to_utf8 (str)
18✔
783
   return utf16_to_utf8(str, "le")
×
784
end
785

786
function utilities.breadcrumbs ()
18✔
787
   local breadcrumbs = {}
26✔
788

789
   setmetatable(breadcrumbs, {
52✔
790
      __index = function (_, key)
791
         local frame = SILE.traceStack[key]
×
792
         return frame and frame.command or nil
×
793
      end,
794
      __len = function (_)
795
         return #SILE.traceStack
×
796
      end,
797
      __tostring = function (self)
798
         return "B»" .. table.concat(self, "»")
×
799
      end,
800
   })
801

802
   function breadcrumbs:dump ()
26✔
803
      SU.dump(self)
×
804
   end
805

806
   function breadcrumbs:parent (count)
26✔
807
      -- Note LuaJIT does not support __len, so this has to work even when that metamethod doesn't fire...
808
      return self[#SILE.traceStack - (count or 1)]
×
809
   end
810

811
   function breadcrumbs:contains (needle, startdepth)
26✔
812
      startdepth = startdepth or 0
×
813
      for i = startdepth, #SILE.traceStack - 1 do
×
814
         local frame = SILE.traceStack[#SILE.traceStack - i]
×
815
         if frame.command == needle then
×
816
            return true, #self - i
×
817
         end
818
      end
819
      return false, -1
×
820
   end
821

822
   return breadcrumbs
26✔
823
end
824

825
utilities.formatNumber = require("core.utilities.numbers")
18✔
826

827
utilities.collatedSort = require("core.utilities.sorting")
18✔
828

829
utilities.ast = require("core.utilities.ast")
18✔
830
utilities.debugAST = utilities.ast.debug
18✔
831

832
function utilities.subContent (content)
18✔
833
   SU.deprecated(
×
834
      "SU.subContent",
835
      "SU.ast.subContent",
836
      "0.15.0",
837
      "0.17.0",
838
      [[Note that the new implementation no longer introduces an id="stuff" key.]]
839
   )
840
   return utilities.ast.subContent(content)
×
841
end
842

843
function utilities.hasContent (content)
18✔
844
   SU.deprecated("SU.hasContent", "SU.ast.hasContent", "0.15.0", "0.17.0")
×
845
   return SU.ast.hasContent(content)
×
846
end
847

848
function utilities.contentToString (content)
18✔
849
   SU.deprecated("SU.contentToString", "SU.ast.contentToString", "0.15.0", "0.17.0")
×
850
   return SU.ast.contentToString(content)
×
851
end
852

853
function utilities.walkContent (content, action)
18✔
854
   SU.deprecated("SU.walkContent", "SU.ast.walkContent", "0.15.0", "0.17.0")
×
855
   SU.ast.walkContent(content, action)
×
856
end
857

858
function utilities.stripContentPos (content)
18✔
859
   SU.deprecated("SU.stripContentPos", "SU.ast.stripContentPos", "0.15.0", "0.17.0")
×
860
   return SU.ast.stripContentPos(content)
×
861
end
862

863
return utilities
18✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc