• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 14502192980

16 Apr 2025 08:26PM UTC coverage: 57.267% (-5.4%) from 62.627%
14502192980

push

github

alerque
chore(packages): Remove unused package interdependency, url doesn't need verbatim

Reported-by: Omikhleia <didier.willis@gmail.com>

12352 of 21569 relevant lines covered (57.27%)

871.56 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.59
/packages/math/texlike.lua
1
local atoms = require("packages.math.atoms")
5✔
2
local syms = require("packages.math.unicode-symbols")
5✔
3
local bits = require("core.parserbits")
5✔
4

5
local epnf = require("epnf")
5✔
6
local lpeg = require("lpeg")
5✔
7

8
local operatorDict = syms.operatorDict
5✔
9
local symbols = syms.symbols
5✔
10

11
-- Grammar to parse TeX-like math
12
-- luacheck: push ignore
13
-- stylua: ignore start
14
---@diagnostic disable: undefined-global, unused-local, lowercase-global
15
local mathGrammar = function (_ENV)
16
   local _ = WS^0
5✔
17
   local eol = S"\r\n"
5✔
18
   local digit = R("09")
5✔
19
   local natural = (
20
      -- TeX doesn't really knows what a number in a formula is.
21
      -- It handles any sequence of "ordinary" characters, including period(s):
22
      -- See for instance The TeXbook, p. 132.
23
      -- When later converting to MathML, we'll ideally want <mn>0.0123</mn>
24
      -- instead of, say, <mn>0</mn><mo>.</mo><mn>0123</mn> (not only wrong
25
      -- in essence, but also taking the risk of using a <mo> operator, then
26
      -- considered as a punctuation, thus inserting a space)
27
      -- We cannot be general, but checking MathJax and TeMML's behavior, they
28
      -- are not general either in this regard.
29
         digit^0 * P(".")^-1 * digit^1 + -- Decimal number (ex: 1.23, 0.23, .23)
5✔
30
         digit^1 -- Integer (digits only, ex: 123)
5✔
31
      ) / tostring
5✔
32
   local pos_natural = R("19") * digit^0 / tonumber
5✔
33

34
   -- \left and \right delimiters = The TeXbook p. 148.
35
   -- Characters with a delcode in TeX: The TeXbook p. 341
36
   -- These are for use in \left...\right pairs.
37
   -- We add the period (null delimiter) from p. 149-150.
38
   -- We don't include the backslash here and handle it just after.
39
   local delcode = S"([</|)]>."
5✔
40
   -- Left/right is followed by a delimiter with delcode, or a command.
41
   -- We use the delcode or backslash as terminator: commands such as
42
   -- \rightarrow must still be allowed.
43
   local leftright = function (s) return P(s) * (delcode + P"\\") end
15✔
44

45
   local ctrl_word = R("AZ", "az")^1
5✔
46
   local ctrl_symbol = P(1) - S"{}\\"
5✔
47
   local ctrl_sequence_name = C(ctrl_word + ctrl_symbol) - leftright("left") - leftright("right") / 1
15✔
48
   local comment = (
49
         P"%" *
5✔
50
         P(1-eol)^0 *
5✔
51
         eol^-1
5✔
52
      )
53
   local utf8cont = R("\128\191")
5✔
54
   local utf8code = lpeg.R("\0\127")
5✔
55
      + lpeg.R("\194\223") * utf8cont
5✔
56
      + lpeg.R("\224\239") * utf8cont * utf8cont
5✔
57
      + lpeg.R("\240\244") * utf8cont * utf8cont * utf8cont
5✔
58
   -- Identifiers inside \mo and \mi tags
59
   local sileID = C(bits.identifier + P(1)) / 1
5✔
60

61
   local group = P"{" * V"mathlist" * (P"}" + E("`}` expected"))
10✔
62
   -- Simple amsmath-like \text command (no embedded math)
63
   local textgroup = P"{" * C((1-P"}")^1) * (P"}" + E("`}` expected"))
10✔
64
   -- TeX \left...\right group
65
   local delim =
66
      -- Delimiter with delcode
67
      C(delcode) / function (d)
5✔
68
         if d ~= "." then
×
69
            return {
×
70
               id = "atom",
71
               d
72
            }
73
         end
74
         return nil
×
75
      end
76
      -- Delimiter as escaped \{ or \}
77
      + P"\\" * C(S"{}") / function (d)
5✔
78
         return {
×
79
            id = "atom",
80
            d
81
         }
82
      end
83
      -- Delimiter as command ex. \langle
84
      + P"\\" * C(ctrl_sequence_name) / 1 / function (cmd)
5✔
85
         return {
×
86
            id = "command",
87
            command = cmd
×
88
         }
89
      end
90

91
      local leftrightgroup = P"\\left" * delim * V"mathlist" * P"\\right" * delim
5✔
92
         / function (left, subformula, right)
×
93
            if not left and not right then
×
94
               -- No delimiters, return the subformula as-is
95
               return subformula
×
96
            end
97
            -- Rewrap the subformula in a flagged mathlist
98
            local mrow = {
×
99
               id = "mathlist",
100
               options = {},
101
               is_paired_explicit = true, -- Internal flag
102
               subformula
103
            }
104
            if left then
×
105
               table.insert(mrow, 1, left)
×
106
            end
107
            if right then
×
108
               table.insert(mrow, right)
×
109
            end
110
            return mrow
×
111
         end
112

113
   local element_no_infix =
114
      leftrightgroup + -- Important: before command
×
115
      V"def" +
5✔
116
      V"text" + -- Important: before command
5✔
117
      V"command" +
5✔
118
      group +
5✔
119
      V"argument" +
5✔
120
      V"atom"
5✔
121
   local element =
122
      V"supsub" +
5✔
123
      V"subsup" +
5✔
124
      V"sup" +
5✔
125
      V"sub" +
5✔
126
      element_no_infix
5✔
127
   local sep = S",;" * _
5✔
128
   local quotedString = (P'"' * C((1-P'"')^1) * P'"')
5✔
129
   local value = ( quotedString + (1-S",;]")^1 )
5✔
130
   local pair = Cg(sileID * _ * "=" * _ * C(value)) * sep^-1 / function (...)
5✔
131
      local t = {...}; return t[1], t[#t]
417✔
132
   end
133
   local list = Cf(Ct"" * pair^0, rawset)
5✔
134
   local parameters = (
135
         P"[" *
5✔
136
         list *
5✔
137
         P"]"
5✔
138
      )^-1 / function (a)
5✔
139
            return type(a)=="table" and a or {}
629✔
140
         end
141
   local dim2_arg_inner = Ct(V"mathlist" * (P"&" * V"mathlist")^0) /
5✔
142
      function (t)
143
         t.id = "mathlist"
×
144
         return t
×
145
      end
146
   local dim2_arg =
147
      Cg(P"{" *
10✔
148
         dim2_arg_inner *
5✔
149
         (P"\\\\" * dim2_arg_inner)^1 *
5✔
150
         (P"}" + E("`}` expected"))
10✔
151
         ) / function (...)
×
152
            local t = {...}
×
153
            -- Remove the last mathlist if empty. This way,
154
            -- `inner1 \\ inner2 \\` is the same as `inner1 \\ inner2`.
155
            if not t[#t][1] or not t[#t][1][1] then table.remove(t) end
×
156
            return pl.utils.unpack(t)
×
157
         end
158

159
   local dim2_arg_inner = Ct(V"mathlist" * (P"&" * V"mathlist")^0) /
5✔
160
      function (t)
161
         t.id = "mathlist"
16✔
162
         return t
16✔
163
      end
164
   local dim2_arg =
165
      Cg(P"{" *
10✔
166
         dim2_arg_inner *
5✔
167
         (P"\\\\" * dim2_arg_inner)^1 *
5✔
168
         (P"}" + E("`}` expected"))
10✔
169
         ) / function (...)
×
170
         local t = {...}
4✔
171
         -- Remove the last mathlist if empty. This way,
172
         -- `inner1 \\ inner2 \\` is the same as `inner1 \\ inner2`.
173
         if not t[#t][1] or not t[#t][1][1] then table.remove(t) end
4✔
174
         return pl.utils.unpack(t)
4✔
175
         end
176

177
   -- TeX uses the regular asterisk (* = U+002A) in superscripts or subscript:
178
   -- The TeXbook exercice 18.32 (p. 179, 330) for instance.
179
   -- Fonts usually have the asterisk raised too high, so using the Unicode
180
   -- asterisk operator U+2217 looks better (= \ast in TeX).
181
   local astop = P"*" / luautf8.char(0x2217)
5✔
182
   -- TeX interprets apostrophes as primes in math mode:
183
   -- The TeXbook p. 130 expands ' to ^\prime commands and repeats the \prime
184
   -- for multiple apostrophes.
185
   -- The TeXbook p. 134: "Then there is the character ', which we know is used
186
   -- as an abbreviation for \prime superscripts."
187
   -- (So we are really sure superscript primes are really the intended meaning.)
188
   -- Here we use the Unicode characters for primes, but the intent is the same.
189
   local primes = (
190
         P"''''" / luautf8.char(0x2057) + -- quadruple prime
5✔
191
         P"'''" / luautf8.char(0x2034) + -- triple prime
5✔
192
         P"''" / luautf8.char(0x2033) + -- double prime
5✔
193
         P"'" / luautf8.char(0x2032) -- prime
5✔
194
      ) / function (s)
×
195
            return { id="atom", s }
×
196
         end
197
   local primes_sup = (
198
         primes * _ * P"^" * _ * element_no_infix / function (p, e)
5✔
199
            -- Combine the prime with the superscript in the same mathlist
200
            if e.id == "mathlist" then
×
201
               table.insert(e, 1, p)
×
202
               return e
×
203
            end
204
            return { id="mathlist", p, e }
×
205
         end
206
         + primes -- or standalone primes
5✔
207
      )
208

209
   START "math"
5✔
210
   math = V"mathlist" * EOF"Unexpected character at end of math code"
10✔
211
   mathlist = (comment + (WS * _) + element)^0
5✔
212
   supsub = element_no_infix * _ * primes_sup                  * _ *  P"_" * _ * element_no_infix +
5✔
213
            element_no_infix * _ * P"^" * _ * element_no_infix * _ *  P"_" * _ * element_no_infix
5✔
214
   subsup = element_no_infix * _ * P"_" * _ * element_no_infix * primes_sup +
5✔
215
            element_no_infix * _ * P"_" * _ * element_no_infix * _ * P"^" * _ * element_no_infix
5✔
216
   sup =  element_no_infix * _ * primes_sup +
5✔
217
          element_no_infix * _ * P"^" * _ * element_no_infix
5✔
218
   sub = element_no_infix * _ * P"_" * _ * element_no_infix
5✔
219
   atom = natural + astop + C(utf8code - S"\\{}%^_&'") +
5✔
220
      (P"\\{" + P"\\}") / function (s) return string.sub(s, -1) end
5✔
221
   text = (
×
222
         P"\\text" *
5✔
223
         Cg(parameters, "options") *
5✔
224
         textgroup
5✔
225
      )
5✔
226
   command = (
×
227
         P"\\" *
5✔
228
         Cg(ctrl_sequence_name, "command") *
5✔
229
         Cg(parameters, "options") *
5✔
230
         (dim2_arg + group^0)
5✔
231
      )
5✔
232
   def = P"\\def" * _ * P"{" *
5✔
233
      Cg(ctrl_sequence_name, "command-name") * P"}" * _ *
5✔
234
      --P"[" * Cg(digit^1, "arity") * P"]" * _ *
235
      P"{" * V"mathlist" * P"}"
5✔
236
   argument = P"#" * Cg(pos_natural, "index")
5✔
237
end
238
-- luacheck: pop
239
-- stylua: ignore end
240
---@diagnostic enable: undefined-global, unused-local, lowercase-global
241

242
local mathParser = epnf.define(mathGrammar)
5✔
243

244
local commands = {}
5✔
245

246
-- A command type is a type for each argument it takes: either string or MathML
247
-- tree. If a command has no type, it is assumed to take only trees.
248
-- Tags like <mi>, <mo>, <mn> take a string, and this needs to be propagated in
249
-- commands that use them.
250

251
local objType = {
5✔
252
   tree = 1,
253
   str = 2,
254
   unknown = 3,
255
}
256

257
local function inferArgTypes_aux (accumulator, typeRequired, body)
258
   if type(body) == "table" then
2,140✔
259
      if body.id == "argument" then
2,140✔
260
         local ret = accumulator
135✔
261
         while #ret < body.index do
270✔
262
            -- Don't leave holes in the argument list.
263
            -- This may happen if the argument are not used orderly, and the
264
            -- entry might later be filled with the appropriate type... unless
265
            -- the argument is not used at all.
266
            -- CODE SMELL, but this recursive inference is hard to assess.
267
            table.insert(ret, objType.unknown)
135✔
268
         end
269
         ret[body.index] = typeRequired
135✔
270
         return ret
135✔
271
      elseif body.id == "command" then
2,005✔
272
         if commands[body.command] then
445✔
273
            local cmdArgTypes = commands[body.command][1]
345✔
274
            if #cmdArgTypes ~= #body then
345✔
275
               SU.error(
×
276
                  "Wrong number of arguments ("
277
                     .. #body
×
278
                     .. ") for command "
×
279
                     .. body.command
×
280
                     .. " (should be "
×
281
                     .. #cmdArgTypes
×
282
                     .. ")"
×
283
               )
284
            else
285
               for i = 1, #cmdArgTypes do
640✔
286
                  accumulator = inferArgTypes_aux(accumulator, cmdArgTypes[i], body[i])
590✔
287
               end
288
            end
289
            return accumulator
345✔
290
         elseif body.command == "mi" or body.command == "mo" or body.command == "mn" then
100✔
291
            if #body ~= 1 then
×
292
               SU.error("Wrong number of arguments (" .. #body .. ") for command " .. body.command .. " (should be 1)")
×
293
            end
294
            accumulator = inferArgTypes_aux(accumulator, objType.str, body[1])
×
295
            return accumulator
×
296
         else
297
            -- Not a macro, recurse on children assuming tree type for all
298
            -- arguments
299
            for _, child in ipairs(body) do
180✔
300
               accumulator = inferArgTypes_aux(accumulator, objType.tree, child)
160✔
301
            end
302
            return accumulator
100✔
303
         end
304
      elseif body.id == "atom" then
1,560✔
305
         return accumulator
770✔
306
      else
307
         -- Simply recurse on children
308
         for _, child in ipairs(body) do
2,140✔
309
            accumulator = inferArgTypes_aux(accumulator, typeRequired, child)
2,700✔
310
         end
311
         return accumulator
790✔
312
      end
313
   else
314
      SU.error("invalid argument to inferArgTypes_aux")
×
315
   end
316
end
317

318
local inferArgTypes = function (body)
319
   return inferArgTypes_aux({}, objType.tree, body)
415✔
320
end
321

322
local function registerCommand (name, argTypes, func)
323
   commands[name] = { argTypes, func }
480✔
324
end
325

326
-- Computes func(func(... func(init, k1, v1), k2, v2)..., k_n, v_n), i.e. applies
327
-- func on every key-value pair in the table. Keys with numeric indices are
328
-- processed in order. This is an important property for MathML compilation below.
329
local function fold_pairs (func, table)
330
   local accumulator = {}
1,231✔
331
   for k, v in pl.utils.kpairs(table) do
10,483✔
332
      accumulator = func(v, k, accumulator)
6,790✔
333
   end
334
   for i, v in ipairs(table) do
3,041✔
335
      accumulator = func(v, i, accumulator)
3,620✔
336
   end
337
   return accumulator
1,231✔
338
end
339

340
local function forall (pred, list)
341
   for _, x in ipairs(list) do
33✔
342
      if not pred(x) then
56✔
343
         return false
28✔
344
      end
345
   end
346
   return true
5✔
347
end
348

349
local compileToStr = function (argEnv, mathlist)
350
   if #mathlist == 1 and mathlist.id == "atom" then
82✔
351
      -- List is a single atom
352
      return mathlist[1]
×
353
   elseif #mathlist == 1 and mathlist[1].id == "argument" then
82✔
354
      return argEnv[mathlist[1].index]
2✔
355
   elseif mathlist.id == "argument" then
80✔
356
      return argEnv[mathlist.index]
×
357
   else
358
      local ret = ""
80✔
359
      for _, elt in ipairs(mathlist) do
357✔
360
         if elt.id == "atom" then
277✔
361
            ret = ret .. elt[1]
277✔
362
         elseif elt.id == "command" and symbols[elt.command] then
×
363
            ret = ret .. symbols[elt.command]
×
364
         else
365
            SU.error("Encountered non-character token in command that takes a string")
×
366
         end
367
      end
368
      return ret
80✔
369
   end
370
end
371

372
local function isOperatorKind (tree, typeOfAtom)
373
   if not tree then
674✔
374
      return false -- safeguard
×
375
   end
376
   if tree.command ~= "mo" then
674✔
377
      return false
546✔
378
   end
379
   -- Case \mo[atom=xxx]{ops}
380
   -- E.g. \mo[atom=op]{lim}
381
   if tree.options and tree.options.atom then
128✔
382
      return atoms.types[tree.options.atom] == typeOfAtom
×
383
   end
384
   -- Case \mo{ops} where ops is registered with the resquested type
385
   -- E.g. \mo{∑) or \sum
386
   if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].atom then
128✔
387
      return operatorDict[tree[1]].atom == typeOfAtom
124✔
388
   end
389
   return false
4✔
390
end
391

392
local function isMoveableLimitsOrAlwaysStacked (tree)
393
   if not tree then
80✔
394
      return false -- safeguard
24✔
395
   end
396
   if tree.is_always_stacked then
56✔
397
      -- We use an internal flag to mark commands that are always stacking
398
      -- their sup/sub arguments, such as brace-like commands.
399
      return true
×
400
   end
401
   if tree.command ~= "mo" then
56✔
402
      -- On the recursion:
403
      -- MathML allows movablelimits on <mo> elements, but "embellished operators"
404
      -- can be other elements inheriting the property from their "core operator",
405
      -- see MathML Core §3.2.4.1, which is full of intricacies so we are probably
406
      -- not even doing the right thing here.
407
      -- On the hack:
408
      -- See variant commands for limits further down.
409
      return SU.boolean(tree.is_hacked_movablelimits, false) or isMoveableLimitsOrAlwaysStacked(tree[1])
156✔
410
   end
411
   if tree.options and SU.boolean(tree.options.movablelimits, false) then
8✔
412
      return true
×
413
   end
414
   if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].forms then
4✔
415
      -- Leap of faith: We have not idea yet which form the operator will take
416
      -- in the final MathML.
417
      -- In the MathML operator dictionary, some operators have a movablelimits
418
      -- in some forms and not in others.
419
      -- Ex. \Join (U+2A1D) and \bigtriangleleft (U+2A1E) have it prefix but not
420
      -- infix, for some unspecified reason (?).
421
      -- Assume that if at least one form has movablelimits, the operator is
422
      -- considered to have movablelimits "in general".
423
      for _, form in pairs(operatorDict[tree[1]].forms) do
6✔
424
         if SU.boolean(form.movablelimits, false) then
8✔
425
            return true
2✔
426
         end
427
      end
428
   end
429
   return false
2✔
430
end
431
local function isCloseOperator (tree)
432
   return isOperatorKind(tree, atoms.types.close)
340✔
433
end
434
local function isOpeningOperator (tree)
435
   return isOperatorKind(tree, atoms.types.open)
334✔
436
end
437

438
local function isAccentSymbol (symbol)
439
   return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.accent
26✔
440
end
441
local function isBottomAccentSymbol (symbol)
442
   return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.botaccent
26✔
443
end
444

445
local function compileToMathML_aux (_, arg_env, tree)
446
   if type(tree) == "string" then
1,451✔
447
      return tree
220✔
448
   end
449
   local function compile_and_insert (child, key, accumulator)
450
      if type(key) ~= "number" then
5,205✔
451
         accumulator[key] = child
3,395✔
452
         return accumulator
3,395✔
453
      -- Compile all children, except if this node is a macro definition (no
454
      -- evaluation "under lambda") or the application of a registered macro
455
      -- (since evaluating the nodes depends on the macro's signature, it is more
456
      -- complex and done below)..
457
      elseif tree.id == "def" or (tree.id == "command" and commands[tree.command]) then
1,810✔
458
         -- Conserve unevaluated child
459
         table.insert(accumulator, child)
529✔
460
      else
461
         -- Compile next child
462
         local comp = compileToMathML_aux(nil, arg_env, child)
1,281✔
463
         if comp then
1,281✔
464
            if comp.id == "wrapper" then
866✔
465
               -- Insert all children of the wrapper node
466
               for _, inner_child in ipairs(comp) do
158✔
467
                  table.insert(accumulator, inner_child)
79✔
468
               end
469
            else
470
               table.insert(accumulator, comp)
787✔
471
            end
472
         end
473
      end
474
      return accumulator
1,810✔
475
   end
476
   tree = fold_pairs(compile_and_insert, tree)
2,462✔
477
   if tree.id == "math" then
1,231✔
478
      tree.command = "math"
33✔
479
      -- If the outermost `mrow` contains only other `mrow`s, remove it
480
      -- (allowing vertical stacking).
481
      if forall(function (c)
66✔
482
         return c.command == "mrow"
28✔
483
      end, tree[1]) then
66✔
484
         tree[1].command = "math"
5✔
485
         return tree[1]
5✔
486
      end
487
   elseif tree.id == "mathlist" then
1,198✔
488
      -- Turn mathlist into `mrow` except if it has exactly one `mtr` or `mtd`
489
      -- child.
490
      -- Note that `def`s have already been compiled away at this point.
491
      if #tree == 1 then
242✔
492
         if tree[1].command == "mtr" or tree[1].command == "mtd" then
177✔
493
            return tree[1]
×
494
         else
495
            tree.command = "mrow"
177✔
496
         end
497
      elseif tree.is_paired_explicit then
65✔
498
         -- We already did the re-wrapping of open/close delimiters in the parser
499
         -- via \left...\right, doing it would not harm but would add an extra mrow,
500
         -- which we can avoid directly to keep the tree minimal.
501
         -- N.B. We could have used the same flag, but it's easier to debug this way.
502
         tree.is_paired = true
×
503
         tree.is_paired_explicit = nil
×
504
         tree.command = "mrow"
×
505
      else
506
         -- Re-wrap content from opening to closing operator in an implicit mrow,
507
         -- so stretchy operators apply to the correct span of content.
508
         local children = {}
65✔
509
         local stack = {}
65✔
510
         for _, child in ipairs(tree) do
399✔
511
            if isOpeningOperator(child) then
668✔
512
               table.insert(stack, children)
8✔
513
               local mrow = {
8✔
514
                  command = "mrow",
515
                  is_paired = true, -- Internal flag to mark this re-wrapped mrow
516
                  options = {},
8✔
517
                  child,
8✔
518
               }
519
               table.insert(children, mrow)
8✔
520
               children = mrow
8✔
521
            elseif isCloseOperator(child) then
652✔
522
               table.insert(children, child)
8✔
523
               if #stack > 0 then
8✔
524
                  children = table.remove(stack)
16✔
525
               end
526
            elseif
×
527
               (child.command == "msubsup" or child.command == "msub" or child.command == "msup")
318✔
528
               and isCloseOperator(child[1]) -- child[1] is the base
28✔
529
            then
530
               if #stack > 0 then
×
531
                  -- Special case for closing operator with sub/superscript:
532
                  -- (....)^i must be interpreted as {(....)}^i, not as (...{)}^i
533
                  -- Push the closing operator into the mrow
534
                  table.insert(children, child[1])
×
535
                  -- Move the mrow into the msubsup, replacing the closing operator
536
                  child[1] = children
×
537
                  -- And insert the msubsup into the parent
538
                  children = table.remove(stack)
×
539
                  children[#children] = child
×
540
               else
541
                  table.insert(children, child)
×
542
               end
543
            else
544
               table.insert(children, child)
318✔
545
            end
546
         end
547
         tree = #stack > 0 and stack[1] or children
65✔
548
         tree.command = "mrow"
65✔
549
      end
550
   elseif tree.id == "atom" then
956✔
551
      local codepoints = {}
218✔
552
      for _, cp in luautf8.codes(tree[1]) do
438✔
553
         table.insert(codepoints, cp)
220✔
554
      end
555
      local cp = codepoints[1]
218✔
556
      if
557
         #codepoints == 1
218✔
558
         and ( -- If length of UTF-8 string is 1
×
559
            cp >= SU.codepoint("A") and cp <= SU.codepoint("Z")
530✔
560
            or cp >= SU.codepoint("a") and cp <= SU.codepoint("z")
470✔
561
            or cp >= SU.codepoint("Α") and cp <= SU.codepoint("Ω")
332✔
562
            or cp >= SU.codepoint("α") and cp <= SU.codepoint("ω")
332✔
563
            or cp == SU.codepoint("ϑ")
292✔
564
            or cp == SU.codepoint("ϕ")
292✔
565
            or cp == SU.codepoint("ϰ")
292✔
566
            or cp == SU.codepoint("ϱ")
292✔
567
            or cp == SU.codepoint("ϖ")
292✔
568
            or cp == SU.codepoint("ϵ")
292✔
569
         )
570
      then
571
         tree.command = "mi"
70✔
572
      elseif lpeg.match(lpeg.R("09") ^ 1, tree[1]) then
148✔
573
         tree.command = "mn"
66✔
574
      else
575
         tree.command = "mo"
82✔
576
      end
577
      tree.options = {}
218✔
578
   -- Translate TeX-like sub/superscripts to `munderover` or `msubsup`,
579
   -- depending on whether the base is an operator with moveable limits,
580
   -- or a brace-like command.
581
   elseif tree.id == "sup" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
746✔
582
      tree.command = "mover"
×
583
   elseif tree.id == "sub" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
756✔
584
      tree.command = "munder"
×
585
   elseif tree.id == "subsup" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
740✔
586
      tree.command = "munderover"
2✔
587
   elseif tree.id == "supsub" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
736✔
588
      tree.command = "munderover"
×
589
      local tmp = tree[2]
×
590
      tree[2] = tree[3]
×
591
      tree[3] = tmp
×
592
   elseif tree.id == "sup" then
736✔
593
      tree.command = "msup"
8✔
594
   elseif tree.id == "sub" then
728✔
595
      tree.command = "msub"
18✔
596
   elseif tree.id == "subsup" then
710✔
597
      tree.command = "msubsup"
×
598
   elseif tree.id == "supsub" then
710✔
599
      tree.command = "msubsup"
×
600
      local tmp = tree[2]
×
601
      tree[2] = tree[3]
×
602
      tree[3] = tmp
×
603
   elseif tree.id == "def" then
710✔
604
      local commandName = tree["command-name"]
415✔
605
      local argTypes = inferArgTypes(tree[1])
415✔
606
      registerCommand(commandName, argTypes, function (compiledArgs)
830✔
607
         return compileToMathML_aux(nil, compiledArgs, tree[1])
79✔
608
      end)
609
      return nil
415✔
610
   elseif tree.id == "text" then
295✔
611
      tree.command = "mtext"
2✔
612
   elseif tree.id == "command" and commands[tree.command] then
293✔
613
      local argTypes = commands[tree.command][1]
159✔
614
      local cmdFun = commands[tree.command][2]
159✔
615
      local applicationTree = tree
159✔
616
      local cmdName = tree.command
159✔
617
      if #applicationTree ~= #argTypes then
159✔
618
         SU.error(
×
619
            "Wrong number of arguments ("
620
               .. #applicationTree
×
621
               .. ") for command "
×
622
               .. cmdName
×
623
               .. " (should be "
×
624
               .. #argTypes
×
625
               .. ")"
×
626
         )
627
      end
628
      -- Compile every argument
629
      local compiledArgs = {}
159✔
630
      for i, arg in pairs(applicationTree) do
909✔
631
         if type(i) == "number" then
750✔
632
            if argTypes[i] == objType.tree then
114✔
633
               table.insert(compiledArgs, compileToMathML_aux(nil, arg_env, arg))
64✔
634
            else
635
               local x = compileToStr(arg_env, arg)
82✔
636
               table.insert(compiledArgs, x)
82✔
637
            end
638
         else
639
            -- Not an argument but an attribute. Add it to the compiled
640
            -- argument tree as-is
641
            compiledArgs[i] = applicationTree[i]
636✔
642
         end
643
      end
644
      local res = cmdFun(compiledArgs)
159✔
645
      if res.command == "mrow" then
159✔
646
         -- Mark the outer mrow to be unwrapped in the parent
647
         res.id = "wrapper"
79✔
648
      end
649
      return res
159✔
650
   elseif tree.id == "command" and symbols[tree.command] then
134✔
651
      local atom = { id = "atom", [1] = symbols[tree.command] }
26✔
652
      if isAccentSymbol(symbols[tree.command]) and #tree > 0 then
52✔
653
         -- LaTeX-style accents \overrightarrow{v} = <mover accent="true"><mi>v</mi><mo>&#x20D7;</mo></mover>
654
         local accent = {
×
655
            id = "command",
656
            command = "mover",
657
            options = {
×
658
               accent = "true",
659
            },
660
         }
661
         accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
×
662
         accent[2] = compileToMathML_aux(nil, arg_env, atom)
×
663
         tree = accent
×
664
      elseif isBottomAccentSymbol(symbols[tree.command]) and #tree > 0 then
52✔
665
         -- LaTeX-style bottom accents \underleftarrow{v} = <munder accent="true"><mi>v</mi><mo>&#x20EE;</mo></munder>
666
         local accent = {
×
667
            id = "command",
668
            command = "munder",
669
            options = {
×
670
               accentunder = "true",
671
            },
672
         }
673
         accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
×
674
         accent[2] = compileToMathML_aux(nil, arg_env, atom)
×
675
         tree = accent
×
676
      elseif #tree > 0 then
26✔
677
         -- Play cool with LaTeX-style commands that don't take arguments:
678
         -- Edge case for non-accent symbols so we don't loose bracketed groups
679
         -- that might have been seen as command arguments.
680
         -- Ex. \langle{x}\rangle (without space after \langle)
681
         local sym = compileToMathML_aux(nil, arg_env, atom)
×
682
         -- Compile all children in-place
683
         for i, child in ipairs(tree) do
×
684
            tree[i] = compileToMathML_aux(nil, arg_env, child)
×
685
         end
686
         -- Insert symbol at the beginning,
687
         -- And add a wrapper mrow to be unwrapped in the parent.
688
         table.insert(tree, 1, sym)
×
689
         tree.command = "mrow"
×
690
         tree.id = "wrapper"
×
691
      else
692
         tree = compileToMathML_aux(nil, arg_env, atom)
52✔
693
      end
694
   elseif tree.id == "argument" then
108✔
695
      if arg_env[tree.index] then
32✔
696
         return arg_env[tree.index]
32✔
697
      else
698
         SU.error("Argument #" .. tree.index .. " has escaped its scope (probably not fully applied command).")
×
699
      end
700
   end
701
   tree.id = nil
620✔
702
   return tree
620✔
703
end
704

705
local function printMathML (tree)
706
   if type(tree) == "string" then
×
707
      return tree
×
708
   end
709
   local result = "\\" .. tree.command
×
710
   if tree.options then
×
711
      local options = {}
×
712
      for k, v in pairs(tree.options) do
×
713
         table.insert(options, k .. "=" .. tostring(v))
×
714
      end
715
      if #options > 0 then
×
716
         result = result .. "[" .. table.concat(options, ", ") .. "]"
×
717
      end
718
   end
719
   if #tree > 0 then
×
720
      result = result .. "{"
×
721
      for _, child in ipairs(tree) do
×
722
         result = result .. printMathML(child)
×
723
      end
724
      result = result .. "}"
×
725
   end
726
   return result
×
727
end
728

729
local function compileToMathML (_, arg_env, tree)
730
   local result = compileToMathML_aux(_, arg_env, tree)
33✔
731
   SU.debug("texmath", function ()
66✔
732
      return "Resulting MathML: " .. printMathML(result)
×
733
   end)
734
   return result
33✔
735
end
736

737
local function convertTexlike (_, content)
738
   local ret = epnf.parsestring(mathParser, content[1])
33✔
739
   SU.debug("texmath", function ()
66✔
740
      return "Parsed TeX math: " .. pl.pretty.write(ret)
×
741
   end)
742
   return ret
33✔
743
end
744

745
registerCommand("mi", { [1] = objType.str }, function (x)
10✔
746
   return x
78✔
747
end)
748
registerCommand("mo", { [1] = objType.str }, function (x)
10✔
749
   return x
2✔
750
end)
751
registerCommand("mn", { [1] = objType.str }, function (x)
10✔
752
   return x
×
753
end)
754

755
-- Register a limit-like variant command
756
-- Variants of superior, inferior, projective and injective limits are special:
757
-- They accept a sub/sup behaving as a movablelimits, but also have a symbol
758
-- on top of the limit symbol, which is not a movablelimits.
759
-- I can't see in the MathML specification how to do this properly: MathML Core
760
-- seems to only allow movablelimits on <mo> elements, and <mover>/<munder> may
761
-- inherit that property from their "core operator", but in this case we do not
762
-- want the accent to be movable, only the limit sup/sub.
763
-- So we use a hack, and also avoid "\def" here to prevent unwanted mrows.
764
-- @tparam string name    TeX command name
765
-- @tparam string command MathML command (mover or munder)
766
-- @tparam number symbol  Unicode codepoint for the accent symbol
767
-- @tparam string text    Text representation
768
local function registerVarLimits (name, command, symbol, text)
769
   registerCommand(name, {}, function ()
40✔
770
      local options = command == "mover" and { accent = "true" } or { accentunder = "true" }
×
771
      return {
×
772
         command = command,
773
         is_hacked_movablelimits = true, -- Internal flag to mark this as a hack
774
         options = options,
775
         {
776
            command = "mo",
777
            options = { atom = "op", movablelimits = false },
778
            text,
779
         },
780
         {
781
            command = "mo",
782
            options = { accentunder = "true" },
783
            luautf8.char(symbol),
×
784
         },
785
      }
786
   end)
787
end
788
registerVarLimits("varlimsup", "mover", 0x203E, "lim") -- U+203E OVERLINE
5✔
789
registerVarLimits("varliminf", "munder", 0x203E, "lim") -- U+203E OVERLINE
5✔
790
registerVarLimits("varprojlim", "munder", 0x2190, "lim") -- U+2190 LEFTWARDS ARROW
5✔
791
registerVarLimits("varinjlim", "munder", 0x2192, "lim") -- U+2192 RIGHTWARDS ARROW
5✔
792

793
-- Register a brace-like commands.
794
-- Those symbols are accents per-se in MathML, and are non-combining in Unicode.
795
-- But TeX treats them as "pseudo-accent" stretchy symbols.
796
-- Moreover, they accept a sub/sup which is always stacked, and not movable.
797
-- So we use an internal flag.
798
-- We also avoid "\def" here to prevent unwanted mrows resulting from the
799
-- compilation of the argument.
800
-- @tparam string name    TeX command name
801
-- @tparam string command MathML command (mover or munder)
802
-- @tparam number symbol  Unicode codepoint for the brace symbol
803
local function registerBraceLikeCommands (name, command, symbol)
804
   registerCommand(name, {
60✔
805
      [1] = objType.tree,
30✔
806
   }, function (tree)
807
      local options = command == "mover" and { accent = "true" } or { accentunder = "true" }
×
808
      return {
×
809
         command = command,
810
         is_always_stacked = true, -- Internal flag to mark this as a brace-like command
811
         options = options,
812
         tree[1],
×
813
         {
814
            command = "mo",
815
            options = { stretchy = "true" },
816
            luautf8.char(symbol),
×
817
         },
818
      }
819
   end)
820
end
821
-- Note: the following overrides the default commands from xml-entities / unicode-math.
822
registerBraceLikeCommands("overbrace", "mover", 0x23DE) -- U+23DE TOP CURLY BRACKET
5✔
823
registerBraceLikeCommands("underbrace", "munder", 0x23DF) -- U+23DF BOTTOM CURLY BRACKET
5✔
824
registerBraceLikeCommands("overparen", "mover", 0x23DC) -- U+23DC TOP PARENTHESIS
5✔
825
registerBraceLikeCommands("underparen", "munder", 0x23DD) -- U+23DD BOTTOM PARENTHESIS
5✔
826
registerBraceLikeCommands("overbracket", "mover", 0x23B4) -- U+23B4 TOP SQUARE BRACKET
5✔
827
registerBraceLikeCommands("underbracket", "munder", 0x23B5) -- U+23B5 BOTTOM SQUARE BRACKET
5✔
828

829
compileToMathML(
10✔
830
   nil,
5✔
831
   {},
832
   convertTexlike(nil, {
5✔
833
      [==[
×
834
  \def{frac}{\mfrac{#1}{#2}}
835
  \def{sqrt}{\msqrt{#1}}
836
  \def{bi}{\mi[mathvariant=bold-italic]{#1}}
837
  \def{dsi}{\mi[mathvariant=double-struck]{#1}}
838

839
  % From amsmath:
840
  \def{to}{\mo[atom=bin]{→}}
841
  \def{lim}{\mo[atom=op, movablelimits=true]{lim}}
842
  \def{gcd}{\mo[atom=op, movablelimits=true]{gcd}}
843
  \def{sup}{\mo[atom=op, movablelimits=true]{sup}}
844
  \def{inf}{\mo[atom=op, movablelimits=true]{inf}}
845
  \def{max}{\mo[atom=op, movablelimits=true]{max}}
846
  \def{min}{\mo[atom=op, movablelimits=true]{min}}
847
  % Those use U+202F NARROW NO-BREAK SPACE in their names
848
  \def{limsup}{\mo[atom=op, movablelimits=true]{lim sup}}
849
  \def{liminf}{\mo[atom=op, movablelimits=true]{lim inf}}
850
  \def{projlim}{\mo[atom=op, movablelimits=true]{proj lim}}
851
  \def{injlim}{\mo[atom=op, movablelimits=true]{inj lim}}
852

853
  % Other pre-defined operators from the TeXbook, p. 162:
854
  % TeX of course defines them with \mathop, so we use atom=op here.
855
  % MathML would use a <mi> here.
856
  % But we use a <mo> so the atom type is handled
857
  \def{arccos}{\mo[atom=op]{arccos}}
858
  \def{arcsin}{\mo[atom=op]{arcsin}}
859
  \def{arctan}{\mo[atom=op]{arctan}}
860
  \def{arg}{\mo[atom=op]{arg}}
861
  \def{cos}{\mo[atom=op]{cos}}
862
  \def{cosh}{\mo[atom=op]{cosh}}
863
  \def{cot}{\mo[atom=op]{cot}}
864
  \def{coth}{\mo[atom=op]{coth}}
865
  \def{csc}{\mo[atom=op]{csc}}
866
  \def{deg}{\mo[atom=op]{deg}}
867
  \def{det}{\mo[atom=op]{det}}
868
  \def{dim}{\mo[atom=op]{dim}}
869
  \def{exp}{\mo[atom=op]{exp}}
870
  \def{hom}{\mo[atom=op]{hom}}
871
  \def{ker}{\mo[atom=op]{ker}}
872
  \def{lg}{\mo[atom=op]{lg}}
873
  \def{ln}{\mo[atom=op]{ln}}
874
  \def{log}{\mo[atom=op]{log}}
875
  \def{Pr}{\mo[atom=op]{Pr}}
876
  \def{sec}{\mo[atom=op]{sec}}
877
  \def{sin}{\mo[atom=op]{sin}}
878
  \def{sinh}{\mo[atom=op]{sinh}}
879
  \def{tan}{\mo[atom=op]{tan}}
880
  \def{tanh}{\mo[atom=op]{tanh}}
881

882
  % Standard spaces gleaned from plain TeX
883
  \def{thinspace}{\mspace[width=thin]}
884
  \def{negthinspace}{\mspace[width=-thin]}
885
  \def{,}{\thinspace}
886
  \def{!}{\negthinspace}
887
  \def{medspace}{\mspace[width=med]}
888
  \def{negmedspace}{\mspace[width=-med]}
889
  \def{>}{\medspace}
890
  \def{thickspace}{\mspace[width=thick]}
891
  \def{negthickspace}{\mspace[width=-thick]}
892
  \def{;}{\thickspace}
893
  \def{enspace}{\mspace[width=1en]}
894
  \def{enskip}{\enspace}
895
  \def{quad}{\mspace[width=1em]}
896
  \def{qquad}{\mspace[width=2em]}
897

898
  % MathML says a single-character identifier must be in italic by default.
899
  % TeX however has the following Greek capital macros rendered in upright shape.
900
  % It so common that you've probably never seen Γ(x) written with an italic gamma.
901
  \def{Gamma}{\mi[mathvariant=normal]{Γ}}
902
  \def{Delta}{\mi[mathvariant=normal]{Δ}}
903
  \def{Theta}{\mi[mathvariant=normal]{Θ}}
904
  \def{Lambda}{\mi[mathvariant=normal]{Λ}}
905
  \def{Xi}{\mi[mathvariant=normal]{Ξ}}
906
  \def{Pi}{\mi[mathvariant=normal]{Π}}
907
  \def{Sigma}{\mi[mathvariant=normal]{Σ}}
908
  \def{Upsilon}{\mi[mathvariant=normal]{Υ}}
909
  \def{Phi}{\mi[mathvariant=normal]{Φ}}
910
  \def{Psi}{\mi[mathvariant=normal]{Ψ}}
911
  \def{Omega}{\mi[mathvariant=normal]{Ω}}
912
  % Some calligraphic (script), fraktur, double-struck styles:
913
  % Convenience for compatibility with LaTeX.
914
  \def{mathcal}{\mi[mathvariant=script]{#1}}
915
  \def{mathfrak}{\mi[mathvariant=fraktur]{#1}}
916
  \def{mathbb}{\mi[mathvariant=double-struck]{#1}}
917
  % Some style-switching commands for compatibility with LaTeX math.
918
  % Caveat emptor: LaTeX would allow these to apply to a whole formula.
919
  % We can't do that in MathML, as mathvariant applies to token elements only.
920
  % Also note that LaTeX and related packages may have many more such commands.
921
  % We only provide a few common ('historical') ones here.
922
  \def{mathrm}{\mi[mathvariant=normal]{#1}}
923
  \def{mathbf}{\mi[mathvariant=bold]{#1}}
924
  \def{mathit}{\mi[mathvariant=italic]{#1}}
925
  \def{mathsf}{\mi[mathvariant=sans-serif]{#1}}
926
  \def{mathtt}{\mi[mathvariant=monospace]{#1}}
927

928
  % Modulus operator forms
929
  % See Michael Downes & Barbara Beeton, "Short Math Guide for LaTeX"
930
  % American Mathematical Society (v2.0, 2017), §7.1 p. 18
931
  \def{bmod}{\mo[atom=bin]{mod}}
932
  \def{pmod}{\quad(\mo[atom=ord]{mod}\>#1)}
933
  \def{mod}{\quad \mo[atom=ord]{mod}\>#1}
934
  \def{pod}{\quad(#1)}
935

936
  % Phantom commands from TeX/LaTeX
937
  \def{phantom}{\mphantom{#1}}
938
  \def{hphantom}{\mpadded[height=0, depth=0]{\mphantom{#1}}}
939
  \def{vphantom}{\mpadded[width=0]{\mphantom{#1}}}
940

941
  % Stacking commands
942
  % Plain LaTeX \stackrel is only supposed to be used on binary relations.
943
  % It's a poor naming choice, and a poor design choice as well.
944
  % Package "stackrel" on CTAN redefine its for relational operators, and
945
  % provides a \stackbin for binary operators.
946
  % Users would, without respect for semantics, use them interchangeably.
947
  % We use the same definition for both, and expect the MathML layer to handle
948
  % the content as appropriate based on the actual operators...
949
  \def{stackrel}{\mover{#2}{#1}}
950
  \def{stackbin}{\mover{#2}{#1}}
951
  % Package "amsmath" went with its own generic \overset and \underset.
952
  \def{overset}{\mover{#2}{#1}}
953
  \def{underset}{\munder{#2}{#1}}
954
]==],
955
   })
956
)
957

958
return { convertTexlike, compileToMathML }
5✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc