• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 12313034533

13 Dec 2024 09:28AM UTC coverage: 60.234% (-0.7%) from 60.941%
12313034533

push

github

web-flow
Merge 5a7694dff into d737b2656

9 of 25 new or added lines in 5 files covered. (36.0%)

145 existing lines in 16 files now uncovered.

12801 of 21252 relevant lines covered (60.23%)

2545.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.35
/core/utilities/init.lua
1
--- SILE.utilities (aliased as SU)
2
-- @module SU
3
-- @alias utilities
4

5
local bitshim = require("bitshim")
97✔
6
local luautf8 = require("lua-utf8")
97✔
7
local semver = require("rusile").semver
97✔
8

9
local utilities = {}
97✔
10

11
local epsilon = 1E-12
97✔
12

13
--- Generic
14
-- @section generic
15

16
--- Concatenate values from a table using a given separator.
17
-- Differs from `table.concat` in that all values are explicitly cast to strings, allowing debugging of tables that
18
-- include functions, other tables, data types, etc.
19
-- @tparam table array Input.
20
-- @tparam[opt=" "] string separator Separator.
21
function utilities.concat (array, separator)
97✔
UNCOV
22
   return table.concat(utilities.map(tostring, array), separator)
×
23
end
24

25
--- Execute a callback function on each value in a table.
26
-- @tparam function func Function to run on each value.
27
-- @tparam table array Input list-like table.
28
function utilities.map (func, array)
97✔
29
   local new_array = {}
13,277✔
30
   local last = #array
13,277✔
31
   for i = 1, last do
44,811✔
32
      new_array[i] = func(array[i])
60,362✔
33
   end
34
   return new_array
13,277✔
35
end
36

37
--- Require that an option table contains a specific value, otherwise raise an error.
38
-- @param options Input table of options.
39
-- @param name Name of the required option.
40
-- @param context User friendly name of the function or calling context.
41
-- @param required_type The name of a data type that the option must successfully cast to.
42
function utilities.required (options, name, context, required_type)
97✔
43
   if not options[name] then
4,630✔
44
      utilities.error(context .. " needs a " .. name .. " parameter")
×
45
   end
46
   if required_type then
4,630✔
47
      return utilities.cast(required_type, options[name])
866✔
48
   end
49
   return options[name]
3,764✔
50
end
51

52
--- Iterate over key/value pairs in sequence of the sorted keys.
53
-- Table iteration order with `pairs` is non-deterministic. This function returns an iterator that can be used in plais
54
-- of `pairs` that will iterate through the values in the order of their *sorted* keys.
55
-- @tparam table input Input table.
56
-- @usage for val in SU.sortedpairs({ b: "runs second", a: "runs first" ) do print(val) end
57
function utilities.sortedpairs (input)
97✔
UNCOV
58
   local keys = {}
×
UNCOV
59
   for k, _ in pairs(input) do
×
UNCOV
60
      keys[#keys + 1] = k
×
61
   end
UNCOV
62
   table.sort(keys, function (a, b)
×
UNCOV
63
      if type(a) == type(b) then
×
UNCOV
64
         return a < b
×
UNCOV
65
      elseif type(a) == "number" then
×
UNCOV
66
         return true
×
67
      else
UNCOV
68
         return false
×
69
      end
70
   end)
UNCOV
71
   return coroutine.wrap(function ()
×
UNCOV
72
      for i = 1, #keys do
×
UNCOV
73
         coroutine.yield(keys[i], input[keys[i]])
×
74
      end
75
   end)
76
end
77

78
--- Substitute a range of value(s) in one table with values from another.
79
-- @tparam table array Table to modify.
80
-- @tparam integer start First key to replace.
81
-- @tparam integer stop Last key to replace.
82
-- @tparam table replacement Table from which to pull key/values plairs to inject in array.
83
-- @treturn table array First input array modified with values from replacement.
84
function utilities.splice (array, start, stop, replacement)
97✔
85
   local ptr = start
×
86
   local room = stop - start + 1
×
87
   local last = replacement and #replacement or 0
×
88
   for i = 1, last do
×
89
      if room > 0 then
×
90
         room = room - 1
×
91
         array[ptr] = replacement[i]
×
92
      else
93
         table.insert(array, ptr, replacement[i])
×
94
      end
95
      ptr = ptr + 1
×
96
   end
97

98
   for _ = 1, room do
×
99
      table.remove(array, ptr)
×
100
   end
101
   return array
×
102
end
103

104
-- TODO: Unused, now deprecated?
105
function utilities.inherit (orig, spec)
97✔
106
   local new = pl.tablex.deepcopy(orig)
×
107
   if spec then
×
108
      for k, v in pairs(spec) do
×
109
         new[k] = v
×
110
      end
111
   end
112
   if new.init then
×
113
      new:init()
×
114
   end
115
   return new
×
116
end
117

118
--- Type handling
119
-- @section types
120

121
local function preferbool ()
122
   utilities.warn("Please use boolean values or strings such as 'true' and 'false' instead of 'yes' and 'no'.")
×
123
end
124

125
--- Cast user input into a boolean type.
126
-- User input content such as options typed into documents will return string values such as "true" or "false rather
127
-- than true or false types. This evaluates those strings or other inputs ane returns a consistent boolean type in
128
-- return.
129
-- @tparam nil|bool|string value Input value such as a string to evaluate for thruthyness.
130
-- @tparam[opt=false] boolean default Whether to assume inputs that don't specifically evaluate to something should be true or false.
131
-- @treturn boolean
132
function utilities.boolean (value, default)
97✔
133
   if value == false then
6,340✔
134
      return false
29✔
135
   end
136
   if value == true then
6,311✔
137
      return true
2,080✔
138
   end
139
   if value == "false" then
4,231✔
140
      return false
2✔
141
   end
142
   if value == "true" then
4,229✔
143
      return true
11✔
144
   end
145
   if value == "no" then
4,218✔
146
      preferbool()
×
147
      return false
×
148
   end
149
   if value == "yes" then
4,218✔
150
      preferbool()
×
151
      return true
×
152
   end
153
   if value == nil then
4,218✔
154
      return default == true
4,218✔
155
   end
156
   if value == "" then
×
157
      return default == true
×
158
   end
159
   SU.error("Expecting a boolean value but got '" .. value .. "'")
×
160
   return default == true
×
161
end
162

163
--- Cast user input to an expected type.
164
-- If possible, converts input from one type to another. Not all types can be cast. For example "four" can't be cast to
165
-- a number, but "4" or 4 can. Likewise "6pt" or 6 can be cast to a SILE.types.measurement, SILE.types.length, or even
166
-- a SILE.types.node.glue, but not a SILE.types.color.
167
-- @tparam string wantedType Expected type.
168
-- @return A value of the type wantedType.
169
function utilities.cast (wantedType, value)
97✔
170
   local actualType = SU.type(value)
166,498✔
171
   wantedType = string.lower(wantedType)
332,996✔
172
   if wantedType:match(actualType) then
166,498✔
173
      return value
65,934✔
174
   elseif actualType == "nil" and wantedType:match("nil") then
100,564✔
175
      return nil
×
176
   elseif wantedType:match("length") then
100,564✔
177
      return SILE.types.length(value)
17,297✔
178
   elseif wantedType:match("measurement") then
83,363✔
179
      return SILE.types.measurement(value)
2,091✔
180
   elseif wantedType:match("vglue") then
81,272✔
181
      return SILE.types.node.vglue(value)
2✔
182
   elseif wantedType:match("glue") then
81,270✔
183
      return SILE.types.node.glue(value)
38✔
184
   elseif wantedType:match("kern") then
81,232✔
185
      return SILE.types.node.kern(value)
×
186
   elseif actualType == "nil" then
81,232✔
187
      SU.error("Cannot cast nil to " .. wantedType)
×
188
   elseif wantedType:match("boolean") then
81,232✔
189
      return SU.boolean(value)
1✔
190
   elseif wantedType:match("string") then
81,231✔
191
      return tostring(value)
×
192
   elseif wantedType:match("number") then
81,231✔
193
      if type(value) == "table" and type(value.tonumber) == "function" then
81,218✔
194
         return value:tonumber()
78,993✔
195
      end
196
      local num = tonumber(value)
2,225✔
197
      if not num then
2,225✔
198
         SU.error("Cannot cast '" .. value .. "'' to " .. wantedType)
×
199
      end
200
      return num
2,225✔
201
   elseif wantedType:match("integer") then
13✔
202
      local num
203
      if type(value) == "table" and type(value.tonumber) == "function" then
13✔
204
         num = value:tonumber()
×
205
      else
206
         num = tonumber(value)
13✔
207
      end
208
      if not num then
13✔
209
         SU.error("Cannot cast '" .. value .. "'' to " .. wantedType)
×
210
      end
211
      if not wantedType:match("number") and num % 1 ~= 0 then
13✔
212
         -- Could be an error but since it wasn't checked before, let's just warn:
213
         -- Some packages might have wrongly typed settings, for instance.
214
         SU.warn("Casting an integer but got a float number " .. num)
×
215
      end
216
      return num
13✔
217
   else
218
      SU.error("Cannot cast to unrecognized type " .. wantedType)
×
219
   end
220
end
221

222
--- Return the type of an object
223
-- Like Lua's `type`, but also handles various SILE user data types.
224
-- @tparam any value Any input value. If a table is one of SILE's classes or types, report on it's internal type.
225
-- Otherwise use the output of `type`.
226
-- @treturn string
227
function utilities.type (value)
97✔
228
   if type(value) == "number" then
978,569✔
229
      return math.floor(value) == value and "integer" or "number"
256,961✔
230
   elseif type(value) == "table" and value.prototype then
721,608✔
231
      return value:prototype()
×
232
   elseif type(value) == "table" and value.is_a then
721,608✔
233
      return value.type
354,354✔
234
   else
235
      return type(value)
367,254✔
236
   end
237
end
238

239
--- Errors and debugging
240
-- @section errors
241

242
--- Output a debug message only if debugging for a specific category is enabled.
243
-- Importantly passing siries of strings, functions, or tables is more efficient than trying to formulate a full message
244
-- using concatenation and tostring() methods in the original code because it doesn't have to even run if the relevant
245
-- debug flag is not enabled.
246
-- @tparam text category Category flag for which this message should be output.
247
-- @tparam string|function|table ... Each argument will be returned separated by spaces, strings directly, functions by
248
-- evaluating them and assuming the return value is a string, and tables by using their internal :__tostring() methods.
249
-- @usage
250
--    > glue = SILE.types.node.glue("6em")
251
--    > SU.debug("foo", "A glue node", glue)
252
--    [foo] A glue node G<6em>
253
function utilities.debug (category, ...)
97✔
254
   if SILE.quiet then
101,264✔
255
      return
×
256
   end
257
   if utilities.debugging(category) then
202,528✔
258
      local inputs = pl.utils.pack(...)
×
259
      for i, input in ipairs(inputs) do
×
260
         if type(input) == "function" then
×
261
            local status, output = pcall(input)
×
262
            inputs[i] = status and output
×
263
               or SU.warn(("Output of %s debug function was an error: %s"):format(category, output))
×
264
         elseif type(input) ~= "string" then
×
265
            inputs[i] = tostring(input)
×
266
         end
267
      end
268
      local message = utilities.concat(inputs, " ")
×
269
      if message then
×
270
         io.stderr:write(("\n[%s] %s"):format(category, message))
×
271
      end
272
   end
273
end
274

275
--- Determine if a specific debug flag is set.
276
-- @tparam string category Name of the flag status to check, e.g. "frames".
277
-- @treturn boolean
278
function utilities.debugging (category)
97✔
279
   return SILE.debugFlags.all and category ~= "profile" or SILE.debugFlags[category]
104,860✔
280
end
281

282
--- Warn about use of a deprecated feature.
283
-- Checks the current version and decides whether to warn or error, then oatputs a message with as much useful
284
-- information as possible to make it easy for end users to update their usage.
285
-- @tparam string old The name of the deprecated interface.
286
-- @tparam string new A name of a suggested replacement interface.
287
-- @tparam string warnat The first release where the interface is considered deprecated, at which point their might be
288
-- a shim.
289
-- @tparam string errorat The first release where the interface is no longer functional even with a shim.
290
-- @tparam string extra Longer-form help to include in output separate from the expected one-liner of warning messages.
291
function utilities.deprecated (old, new, warnat, errorat, extra)
97✔
292
   warnat, errorat = semver(warnat or 0), semver(errorat or 0)
2✔
293
   local current = SILE.version and semver(SILE.version:match("v([0-9]*.[0-9]*.[0-9]*)")) or warnat
2✔
294
   -- SILE.version is defined *after* most of SILE loads. It’s available at
295
   -- runtime but not useful if we encounter deprecated code in core code. Users
296
   -- will never encounter this failure, but as a developer it’s hard to test a
297
   -- deprecation when core code refactoring is an all-or-nothing proposition.
298
   -- Hence we fake it ‘till we make it, all deprecations internally are warnings.
299
   local brackets = old:sub(1, 1) == "\\" and "" or "()"
4✔
300
   local _new = new and "Please use " .. (new .. brackets) .. " instead." or "Please don't use it."
2✔
301
   local msg = (old .. brackets)
2✔
302
      .. " was deprecated in SILE v"
×
303
      .. tostring(warnat)
2✔
304
      .. "\n\n  "
×
305
      .. _new
2✔
306
      .. "\n\n"
2✔
307
      .. (extra and (pl.stringx.indent(pl.stringx.dedent(extra), 2)) or "")
6✔
308
   if errorat and current >= errorat then
2✔
309
      SU.error(msg)
×
310
   elseif warnat and current >= warnat then
2✔
311
      SU.warn(msg)
2✔
312
   end
313
end
314

315
--- Dump the contents of a any Lua type.
316
-- For quick debugging, can be used on any number of any type of Lua value. Pretty-prints tables.
317
-- @tparam any ... Any number of values
318
function utilities.dump (...)
97✔
UNCOV
319
   local arg = { ... } -- Avoid things that Lua stuffs in arg like args to self()
×
UNCOV
320
   pl.pretty.dump(#arg == 1 and arg[1] or arg, "/dev/stderr")
×
321
end
322

323
local _skip_traceback_levels = 2
97✔
324

325
--- Raise an error and exit.
326
-- Outputs a warning message via `warn`, then finishes up anything it can without processing more content, then exits.
327
-- @tparam string message The error message to give.
328
-- @tparam boolean isbug Whether or not hitting this error is expected to be a code bug (as opposed to mistakes in user input).
329
function utilities.error (message, isbug)
97✔
330
   SILE.quiet = false
×
331
   _skip_traceback_levels = 3
×
332
   utilities.warn(message, isbug)
×
333
   _skip_traceback_levels = 2
×
334
   io.stderr:flush()
×
NEW
335
   SILE.outputter:abort()
×
336
   SILE.scratch.caughterror = true
×
337
   error("", 2)
×
338
end
339

340
--- Output an information message.
341
-- Basically like `warn`, except to source tracing information is added.
342
-- @tparam string message
343
function utilities.msg (message)
97✔
344
   if SILE.quiet then
23✔
345
      return
×
346
   end
347
   message = pl.stringx.rstrip(message)
46✔
348
   message = "                        " .. message
23✔
349
   message = pl.stringx.dedent(message)
46✔
350
   message = pl.stringx.lstrip(message)
46✔
351
   message = pl.stringx.indent(message, 2)
46✔
352
   message = message:gsub("^.", "!")
23✔
353
   io.stderr:write("\n" .. message .. "\n")
23✔
354
end
355

356
--- Output a warning.
357
-- Outputs a warning message including identifying where in the processing SILE is at when the warning is given.
358
-- @tparam string message The error message to give.
359
-- @tparam boolean isbug Whether or not hitting this warning is expected to be a code bug (as opposed to mistakes in user input).
360
function utilities.warn (message, isbug)
97✔
361
   if SILE.quiet then
23✔
362
      return
×
363
   end
364
   utilities.msg(message)
23✔
365
   if SILE.traceback or isbug then
23✔
366
      io.stderr:write("at:\n" .. SILE.traceStack:locationTrace())
2✔
367
      if _skip_traceback_levels == 2 then
1✔
368
         io.stderr:write(
2✔
369
            debug.traceback("", _skip_traceback_levels) or "\t! debug.traceback() did not identify code location"
1✔
370
         )
371
      end
372
   else
373
      io.stderr:write("  at " .. SILE.traceStack:locationHead())
44✔
374
   end
375
   io.stderr:write("\n")
23✔
376
end
377

378
--- Math
379
-- @section math
380

381
--- Check equality of floating point values.
382
-- Comparing floating point numbers using math functions in Lua may give different and unexpected answers depending on
383
-- the Lua VM and other environmental factors. This normalizes them using our standard internal epsilon value and
384
-- compares the absolute intereger value to avoid floating point number weirdness.
385
-- @tparam float lhs
386
-- @tparam float rhs
387
-- @treturn boolean
388
function utilities.feq (lhs, rhs)
97✔
389
   lhs = SU.cast("number", lhs)
72✔
390
   rhs = SU.cast("number", rhs)
72✔
391
   local abs = math.abs
36✔
392
   return abs(lhs - rhs) <= epsilon * (abs(lhs) + abs(rhs))
36✔
393
end
394

395
--- Add up all the values in a table.
396
-- @tparam table array Input list-like table.
397
-- @treturn number Sum of all values.
398
function utilities.sum (array)
97✔
399
   local total = 0
3,848✔
400
   local last = #array
3,848✔
401
   for i = 1, last do
7,358✔
402
      total = total + array[i]
3,510✔
403
   end
404
   return total
3,848✔
405
end
406

407
--- Return maximum value of inputs.
408
-- `math.max`, but works on SILE types such as SILE.types.measurement.
409
-- Lua <= 5.2 can't handle math operators on objects.
410
function utilities.max (...)
97✔
411
   local input = pl.utils.pack(...)
8,659✔
412
   local max = table.remove(input, 1)
8,659✔
413
   for _, val in ipairs(input) do
31,277✔
414
      if val > max then
22,618✔
415
         max = val
7,316✔
416
      end
417
   end
418
   return max
8,659✔
419
end
420

421
--- Return minimum value of inputs.
422
-- `math.min`, but works on SILE types such as SILE.types.measurement.
423
-- Lua <= 5.2 can't handle math operators on objects.
424
function utilities.min (...)
97✔
425
   local input = pl.utils.pack(...)
6✔
426
   local min = input[1]
6✔
427
   for _, val in ipairs(input) do
18✔
428
      if val < min then
12✔
429
         min = val
×
430
      end
431
   end
432
   return min
6✔
433
end
434

435
--- Round and normalize a number for debugging.
436
-- LuaJIT 2.1 betas (and inheritors such as OpenResty and Moonjit) are biased
437
-- towards rounding 0.5 up to 1, all other Lua interpreters are biased
438
-- towards rounding such floating point numbers down.  This hack shaves off
439
-- just enough to fix the bias so our test suite works across interpreters.
440
-- Note that even a true rounding function here will fail because the bias is
441
-- inherent to the floating point type. Also note we are erroring in favor of
442
-- the *less* common option because the LuaJIT VMS are hopelessly broken
443
-- whereas normal LUA VMs can be cooerced.
444
-- @tparam number input Input value.
445
-- @treturn string Four-digit precision foating point.
446
function utilities.debug_round (input)
97✔
447
   if input > 0 then
×
448
      input = input + 0.00000000000001
×
449
   end
450
   if input < 0 then
×
451
      input = input - 0.00000000000001
×
452
   end
453
   return string.format("%.4f", input)
×
454
end
455

456
--- Remove empty spaces from list-like tables
457
-- Iterating list-like tables is hard if some values have been removed. This converts { 1 = "a", 3 = "b" } into
458
-- { 1 = "a", 2 = "b" } which can be iterated using `ipairs` without stopping after 1.
459
-- @tparam table items List-like table potentially with holes.
460
-- @treturn table List like table without holes.
461
function utilities.compress (items)
97✔
462
   local rv = {}
425✔
463
   local max = math.max(pl.utils.unpack(pl.tablex.keys(items)))
1,275✔
464
   for i = 1, max do
8,130✔
465
      if items[i] then
7,705✔
466
         rv[#rv + 1] = items[i]
7,705✔
467
      end
468
   end
469
   return rv
425✔
470
end
471

472
--- Reverse the order of a list-like table.
473
-- @tparam table tbl Input list-like table.
474
function utilities.flip_in_place (tbl)
97✔
475
   local tmp, j
476
   for i = 1, math.floor(#tbl / 2) do
808✔
477
      tmp = tbl[i]
520✔
478
      j = #tbl - i + 1
520✔
479
      tbl[i] = tbl[j]
520✔
480
      tbl[j] = tmp
520✔
481
   end
482
end
483

484
-- TODO: Before documenting, consider whether this should be private to the one existing usage.
485
function utilities.allCombinations (options)
97✔
486
   local count = 1
×
487
   for i = 1, #options do
×
488
      count = count * options[i]
×
489
   end
490
   return coroutine.wrap(function ()
×
491
      for i = 0, count - 1 do
×
492
         local this = i
×
493
         local rv = {}
×
494
         for j = 1, #options do
×
495
            local base = options[j]
×
496
            rv[#rv + 1] = this % base + 1
×
497
            this = (this - this % base) / base
×
498
         end
499
         coroutine.yield(rv)
×
500
      end
501
   end)
502
end
503

504
function utilities.rateBadness (inf_bad, shortfall, spring)
97✔
505
   if spring == 0 then
7,393✔
506
      return inf_bad
270✔
507
   end
508
   local bad = math.floor(100 * math.abs(shortfall / spring) ^ 3)
7,123✔
509
   return math.min(inf_bad, bad)
7,123✔
510
end
511

512
function utilities.rationWidth (target, width, ratio)
97✔
513
   if ratio < 0 and width.shrink:tonumber() > 0 then
7,330✔
514
      target:___add(width.shrink:tonumber() * ratio)
1,269✔
515
   elseif ratio > 0 and width.stretch:tonumber() > 0 then
10,771✔
516
      target:___add(width.stretch:tonumber() * ratio)
2,768✔
517
   end
518
   return target
6,180✔
519
end
520

521
--- Text handling
522
-- @section text
523

524
--- Iterate over a string split into tokens via a pattern.
525
-- @tparam string string Input string.
526
-- @tparam string pattern Pattern on which to split the input.
527
-- @treturn function An iterator function
528
-- @usage for str in SU.gtoke("foo-bar-baz", "-") do print(str) end
529
function utilities.gtoke (string, pattern)
97✔
530
   string = string and tostring(string) or ""
701✔
531
   pattern = pattern and tostring(pattern) or "%s+"
701✔
532
   local length = #string
701✔
533
   return coroutine.wrap(function ()
701✔
534
      local index = 1
701✔
535
      repeat
536
         local first, last = string:find(pattern, index)
931✔
537
         if last then
931✔
538
            if index < first then
353✔
539
               coroutine.yield({ string = string:sub(index, first - 1) })
436✔
540
            end
541
            coroutine.yield({ separator = string:sub(first, last) })
706✔
542
            index = last + 1
353✔
543
         else
544
            if index <= length then
578✔
545
               coroutine.yield({ string = string:sub(index) })
1,156✔
546
            end
547
            break
578✔
548
         end
549
      until index > length
353✔
550
   end)
551
end
552

553
--- Convert a Unicode character to its corresponding codepoint.
554
-- @tparam string uchar A single inicode character.
555
-- @return number The Unicode code point where uchar is encoded.
556
function utilities.codepoint (uchar)
97✔
557
   local seq = 0
62,796✔
558
   local val = -1
62,796✔
559
   for i = 1, #uchar do
130,887✔
560
      local c = string.byte(uchar, i)
69,410✔
561
      if seq == 0 then
69,410✔
562
         if val > -1 then
63,025✔
563
            return val
1,319✔
564
         end
565
         seq = c < 0x80 and 1
61,706✔
566
            or c < 0xE0 and 2
61,706✔
567
            or c < 0xF0 and 3
6,214✔
568
            or c < 0xF8 and 4 --c < 0xFC and 5 or c < 0xFE and 6 or
171✔
UNCOV
569
            or error("invalid UTF-8 character sequence")
×
570
         val = bitshim.band(c, 2 ^ (8 - seq) - 1)
61,706✔
571
      else
572
         val = bitshim.bor(bitshim.lshift(val, 6), bitshim.band(c, 0x3F))
6,385✔
573
      end
574
      seq = seq - 1
68,091✔
575
   end
576
   return val
61,477✔
577
end
578

579
--- Covert a code point to a Unicode character.
580
-- @tparam number|string codepoint Input code point value, either as a number or a string representing the decimal value "U+NNNN" or hex value "0xFFFF".
581
-- @treturn string The character replestened by a codepoint descriptions.
582
function utilities.utf8charfromcodepoint (codepoint)
97✔
583
   local val = codepoint
355✔
584
   local cp = val
355✔
585
   local hex = (cp:match("[Uu]%+(%x+)") or cp:match("0[xX](%x+)"))
355✔
586
   if hex then
355✔
587
      cp = tonumber("0x" .. hex)
1✔
588
   elseif tonumber(cp) then
354✔
589
      cp = tonumber(cp)
×
590
   end
591

592
   if type(cp) == "number" then
355✔
593
      val = luautf8.char(cp)
1✔
594
   end
595
   return val
355✔
596
end
597

598
--- Convert a UTF-16 encoded string to a series of code points.
599
-- Like `luautf8.codes`, but for UTF-16 strings.
600
-- @tparam string ustr Input string.
601
-- @tparam string endian Either "le" or "be" depending on the encoding endedness.
602
-- @treturn string Serious of hex encoded code points.
603
function utilities.utf16codes (ustr, endian)
97✔
604
   local pos = 1
16,040✔
605
   return function ()
606
      if pos > #ustr then
496,938✔
607
         return nil
16,040✔
608
      else
609
         local c1, c2, c3, c4, wchar, lowchar
610
         c1 = string.byte(ustr, pos, pos + 1)
480,898✔
611
         pos = pos + 1
480,898✔
612
         c2 = string.byte(ustr, pos, pos + 1)
480,898✔
613
         pos = pos + 1
480,898✔
614
         if endian == "be" then
480,898✔
615
            wchar = c1 * 256 + c2
480,898✔
616
         else
UNCOV
617
            wchar = c2 * 256 + c1
×
618
         end
619
         if not (wchar >= 0xD800 and wchar <= 0xDBFF) then
480,898✔
620
            return wchar
480,898✔
621
         end
UNCOV
622
         c3 = string.byte(ustr, pos, pos + 1)
×
UNCOV
623
         pos = pos + 1
×
UNCOV
624
         c4 = string.byte(ustr, pos, pos + 1)
×
UNCOV
625
         pos = pos + 1
×
UNCOV
626
         if endian == "be" then
×
UNCOV
627
            lowchar = c3 * 256 + c4
×
628
         else
UNCOV
629
            lowchar = c4 * 256 + c3
×
630
         end
UNCOV
631
         return 0x10000 + bitshim.lshift(bitshim.band(wchar, 0x03FF), 10) + bitshim.band(lowchar, 0x03FF)
×
632
      end
633
   end
634
end
635

636
--- Split a UTF-8 string into characters.
637
-- Lua's `string.split` will only explode a string by bytes. For text processing purposes it is usually more desirable
638
-- to split it into 1, 2, 3, or 4 byte groups matching the UTF-8 encoding.
639
-- @tparam string str Input UTF-8 encoded string.
640
-- @treturn table A list-like table of UTF8 strings each representing a Unicode char from the input string.
641
function utilities.splitUtf8 (str)
97✔
642
   local rv = {}
29,657✔
643
   for _, cp in luautf8.next, str do
192,401✔
644
      table.insert(rv, luautf8.char(cp))
162,744✔
645
   end
646
   return rv
29,657✔
647
end
648

649
--- The last Unicode character in a UTF-8 encoded string.
650
-- Uses `SU.splitUtf8` to break an string into segments represtenting encoded characters, returns the last one. May be
651
-- more than one byte.
652
-- @tparam string str Input string.
653
-- @treturn string A single Unicode character.
654
function utilities.lastChar (str)
97✔
UNCOV
655
   local chars = utilities.splitUtf8(str)
×
UNCOV
656
   return chars[#chars]
×
657
end
658

659
--- The first Unicode character in a UTF-8 encoded string.
660
-- Uses `SU.splitUtf8` to break an string into segments represtenting encoded characters, returns the first one. May be
661
-- more than one byte.
662
-- @tparam string str Input string.
663
-- @treturn string A single Unicode character.
664
function utilities.firstChar (str)
97✔
UNCOV
665
   local chars = utilities.splitUtf8(str)
×
UNCOV
666
   return chars[1]
×
667
end
668

669
local byte, floor, reverse = string.byte, math.floor, string.reverse
97✔
670

671
--- The Unicode character in a UTF-8 encoded string at a specific position
672
-- Uses `SU.splitUtf8` to break an string into segments represtenting encoded characters, returns the Nth one. May be
673
-- more than one byte.
674
-- @tparam string str Input string.
675
-- @tparam number index Index of character to return.
676
-- @treturn string A single Unicode character.
677
function utilities.utf8charat (str, index)
97✔
678
   return str:sub(index):match("([%z\1-\127\194-\244][\128-\191]*)")
×
679
end
680

681
local utf16bom = function (endianness)
682
   return endianness == "be" and "\254\255" or endianness == "le" and "\255\254" or SU.error("Unrecognized endianness")
16,043✔
683
end
684

685
--- Encode a string to a hexadecimal replesentation.
686
-- @tparam string str Input UTF-8 string
687
-- @treturn string Hexadecimal replesentation of str.
688
function utilities.hexencoded (str)
97✔
689
   local ustr = ""
3✔
690
   for i = 1, #str do
71✔
691
      ustr = ustr .. string.format("%02x", byte(str, i, i + 1))
68✔
692
   end
693
   return ustr
3✔
694
end
695

696
--- Decode a hexadecimal replesentation into a string.
697
-- @tparam string str Input hexadecimal encoded string.
698
-- @treturn string UTF-8 string.
699
function utilities.hexdecoded (str)
97✔
UNCOV
700
   if #str % 2 == 1 then
×
701
      SU.error("Cannot decode hex string with odd len")
×
702
   end
UNCOV
703
   local ustr = ""
×
UNCOV
704
   for i = 1, #str, 2 do
×
UNCOV
705
      ustr = ustr .. string.char(tonumber(string.sub(str, i, i + 1), 16))
×
706
   end
UNCOV
707
   return ustr
×
708
end
709

710
local uchr_to_surrogate_pair = function (uchr, endianness)
UNCOV
711
   local hi, lo = floor((uchr - 0x10000) / 0x400) + 0xd800, (uchr - 0x10000) % 0x400 + 0xdc00
×
712
   local s_hi, s_lo =
UNCOV
713
      string.char(floor(hi / 256)) .. string.char(hi % 256), string.char(floor(lo / 256)) .. string.char(lo % 256)
×
UNCOV
714
   return endianness == "le" and (reverse(s_hi) .. reverse(s_lo)) or s_hi .. s_lo
×
715
end
716

717
local uchr_to_utf16_double_byte = function (uchr, endianness)
718
   local ustr = string.char(floor(uchr / 256)) .. string.char(uchr % 256)
93✔
719
   return endianness == "le" and reverse(ustr) or ustr
31✔
720
end
721

722
local utf8_to_utf16 = function (str, endianness)
723
   local ustr = utf16bom(endianness)
3✔
724
   for _, uchr in luautf8.codes(str) do
34✔
725
      ustr = ustr
×
726
         .. (uchr < 0x10000 and uchr_to_utf16_double_byte(uchr, endianness) or uchr_to_surrogate_pair(uchr, endianness))
62✔
727
   end
728
   return ustr
3✔
729
end
730

731
--- Convert a UTF-8 string to big-endian UTF-16.
732
-- @tparam string str UTF-8 encoded string.
733
-- @treturn string Big-endian UTF-16 encoded string.
734
function utilities.utf8_to_utf16be (str)
97✔
735
   return utf8_to_utf16(str, "be")
3✔
736
end
737

738
--- Convert a UTF-8 string to little-endian UTF-16.
739
-- @tparam string str UTF-8 encoded string.
740
-- @treturn string Little-endian UTF-16 encoded string.
741
function utilities.utf8_to_utf16le (str)
97✔
UNCOV
742
   return utf8_to_utf16(str, "le")
×
743
end
744

745
--- Convert a UTF-8 string to big-endian UTF-16, then encode in hex.
746
-- @tparam string str UTF-8 encoded string.
747
-- @treturn string Hexadecimal representation of a big-endian UTF-16 encoded string.
748
function utilities.utf8_to_utf16be_hexencoded (str)
97✔
749
   return utilities.hexencoded(utilities.utf8_to_utf16be(str))
6✔
750
end
751

752
--- Convert a UTF-8 string to little-endian UTF-16, then encode in hex.
753
-- @tparam string str UTF-8 encoded string.
754
-- @treturn string Hexadecimal representation of a little-endian UTF-16 encoded string.
755
function utilities.utf8_to_utf16le_hexencoded (str)
97✔
UNCOV
756
   return utilities.hexencoded(utilities.utf8_to_utf16le(str))
×
757
end
758

759
local utf16_to_utf8 = function (str, endianness)
760
   local bom = utf16bom(endianness)
16,040✔
761

762
   if str:find(bom) == 1 then
16,040✔
UNCOV
763
      str = string.sub(str, 3, #str)
×
764
   end
765
   local ustr = ""
16,040✔
766
   for uchr in utilities.utf16codes(str, endianness) do
1,009,916✔
767
      ustr = ustr .. luautf8.char(uchr)
480,898✔
768
   end
769
   return ustr
16,040✔
770
end
771

772
--- Convert a big-endian UTF-16 string to UTF-8.
773
-- @tparam string str Big-endian UTF-16 encoded string.
774
-- @treturn string UTF-8 encoded string.
775
function utilities.utf16be_to_utf8 (str)
97✔
776
   return utf16_to_utf8(str, "be")
16,040✔
777
end
778

779
--- Convert a little-endian UTF-16 string to UTF-8.
780
-- @tparam string str Little-endian UTF-16 encoded string.
781
-- @treturn string UTF-8 encoded string.
782
function utilities.utf16le_to_utf8 (str)
97✔
UNCOV
783
   return utf16_to_utf8(str, "le")
×
784
end
785

786
function utilities.breadcrumbs ()
97✔
787
   local breadcrumbs = {}
95✔
788

789
   setmetatable(breadcrumbs, {
190✔
790
      __index = function (_, key)
UNCOV
791
         local frame = SILE.traceStack[key]
×
UNCOV
792
         return frame and frame.command or nil
×
793
      end,
794
      __len = function (_)
795
         return #SILE.traceStack
×
796
      end,
797
      __tostring = function (self)
798
         return "B»" .. table.concat(self, "»")
×
799
      end,
800
   })
801

802
   function breadcrumbs:dump ()
95✔
803
      SU.dump(self)
×
804
   end
805

806
   function breadcrumbs:parent (count)
95✔
807
      -- Note LuaJIT does not support __len, so this has to work even when that metamethod doesn't fire...
UNCOV
808
      return self[#SILE.traceStack - (count or 1)]
×
809
   end
810

811
   function breadcrumbs:contains (needle, startdepth)
95✔
812
      startdepth = startdepth or 0
×
813
      for i = startdepth, #SILE.traceStack - 1 do
×
814
         local frame = SILE.traceStack[#SILE.traceStack - i]
×
815
         if frame.command == needle then
×
816
            return true, #self - i
×
817
         end
818
      end
819
      return false, -1
×
820
   end
821

822
   return breadcrumbs
95✔
823
end
824

825
utilities.formatNumber = require("core.utilities.numbers")
97✔
826

827
utilities.collatedSort = require("core.utilities.sorting")
97✔
828

829
utilities.ast = require("core.utilities.ast")
97✔
830
utilities.debugAST = utilities.ast.debug
97✔
831

832
function utilities.subContent (content)
97✔
833
   SU.deprecated(
×
834
      "SU.subContent",
835
      "SU.ast.subContent",
836
      "0.15.0",
837
      "0.17.0",
838
      [[Note that the new implementation no longer introduces an id="stuff" key.]]
839
   )
840
   return utilities.ast.subContent(content)
×
841
end
842

843
function utilities.hasContent (content)
97✔
844
   SU.deprecated("SU.hasContent", "SU.ast.hasContent", "0.15.0", "0.17.0")
×
845
   return SU.ast.hasContent(content)
×
846
end
847

848
function utilities.contentToString (content)
97✔
849
   SU.deprecated("SU.contentToString", "SU.ast.contentToString", "0.15.0", "0.17.0")
×
850
   return SU.ast.contentToString(content)
×
851
end
852

853
function utilities.walkContent (content, action)
97✔
854
   SU.deprecated("SU.walkContent", "SU.ast.walkContent", "0.15.0", "0.17.0")
×
855
   SU.ast.walkContent(content, action)
×
856
end
857

858
function utilities.stripContentPos (content)
97✔
859
   SU.deprecated("SU.stripContentPos", "SU.ast.stripContentPos", "0.15.0", "0.17.0")
×
860
   return SU.ast.stripContentPos(content)
×
861
end
862

863
return utilities
97✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc