• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 12272864087

11 Dec 2024 08:55AM UTC coverage: 29.607% (-41.0%) from 70.614%
12272864087

push

github

web-flow
Merge 95cccf286 into f394f608c

5834 of 19705 relevant lines covered (29.61%)

429.05 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/packages/math/texlike.lua
1
local atoms = require("packages.math.atoms")
×
2
local syms = require("packages.math.unicode-symbols")
×
3
local bits = require("core.parserbits")
×
4

5
local epnf = require("epnf")
×
6
local lpeg = require("lpeg")
×
7

8
local operatorDict = syms.operatorDict
×
9
local symbols = syms.symbols
×
10

11
-- Grammar to parse TeX-like math
12
-- luacheck: push ignore
13
-- stylua: ignore start
14
---@diagnostic disable: undefined-global, unused-local, lowercase-global
15
local mathGrammar = function (_ENV)
16
   local _ = WS^0
×
17
   local eol = S"\r\n"
×
18
   local digit = R("09")
×
19
   local natural = (
20
      -- TeX doesn't really knows what a number in a formula is.
21
      -- It handles any sequence of "ordinary" characters, including period(s):
22
      -- See for instance The TeXbook, p. 132.
23
      -- When later converting to MathML, we'll ideally want <mn>0.0123</mn>
24
      -- instead of, say, <mn>0</mn><mo>.</mo><mn>0123</mn> (not only wrong
25
      -- in essence, but also taking the risk of using a <mo> operator, then
26
      -- considered as a punctuation, thus inserting a space)
27
      -- We cannot be general, but checking MathJax and TeMML's behavior, they
28
      -- are not general either in this regard.
29
         digit^0 * P(".")^-1 * digit^1 + -- Decimal number (ex: 1.23, 0.23, .23)
×
30
         digit^1 -- Integer (digits only, ex: 123)
×
31
      ) / tostring
×
32
   local pos_natural = R("19") * digit^0 / tonumber
×
33

34
   -- \left and \right delimiters = The TeXbook p. 148.
35
   -- Characters with a delcode in TeX: The TeXbook p. 341
36
   -- These are for use in \left...\right pairs.
37
   -- We add the period (null delimiter) from p. 149-150.
38
   -- We don't include the backslash here and handle it just after.
39
   local delcode = S"([</|)]>."
×
40
   -- Left/right is followed by a delimiter with delcode, or a command.
41
   -- We use the delcode or backslash as terminator: commands such as
42
   -- \rightarrow must still be allowed.
43
   local leftright = function (s) return P(s) * (delcode + P"\\") end
×
44

45
   local ctrl_word = R("AZ", "az")^1
×
46
   local ctrl_symbol = P(1) - S"{}\\"
×
47
   local ctrl_sequence_name = C(ctrl_word + ctrl_symbol) - leftright("left") - leftright("right") / 1
×
48
   local comment = (
49
         P"%" *
×
50
         P(1-eol)^0 *
×
51
         eol^-1
×
52
      )
53
   local utf8cont = R("\128\191")
×
54
   local utf8code = lpeg.R("\0\127")
×
55
      + lpeg.R("\194\223") * utf8cont
×
56
      + lpeg.R("\224\239") * utf8cont * utf8cont
×
57
      + lpeg.R("\240\244") * utf8cont * utf8cont * utf8cont
×
58
   -- Identifiers inside \mo and \mi tags
59
   local sileID = C(bits.identifier + P(1)) / 1
×
60
   local mathMLID = (utf8code - S"\\{}%")^1 / function (...)
×
61
         local ret = ""
×
62
         local t = {...}
×
63
         for _,b in ipairs(t) do
×
64
         ret = ret .. b
×
65
         end
66
         return ret
×
67
      end
68
   local group = P"{" * V"mathlist" * (P"}" + E("`}` expected"))
×
69
   -- Simple amsmath-like \text command (no embedded math)
70
   local textgroup = P"{" * C((1-P"}")^1) * (P"}" + E("`}` expected"))
×
71
   -- TeX \left...\right group
72
   local delim =
73
      -- Delimiter with delcode
74
      C(delcode) / function (d)
×
75
         if d ~= "." then
×
76
            return {
×
77
               id = "atom",
78
               d
79
            }
80
         end
81
         return nil
×
82
      end
83
      -- Delimiter as escaped \{ or \}
84
      + P"\\" * C(S"{}") / function (d)
×
85
         return {
×
86
            id = "atom",
87
            d
88
         }
89
      end
90
      -- Delimiter as command ex. \langle
91
      + P"\\" * C(ctrl_sequence_name) / 1 / function (cmd)
×
92
         return {
×
93
            id = "command",
94
            command = cmd
×
95
         }
96
      end
97

98
      local leftrightgroup = P"\\left" * delim * V"mathlist" * P"\\right" * delim
×
99
         / function (left, subformula, right)
×
100
            if not left and not right then
×
101
               -- No delimiters, return the subformula as-is
102
               return subformula
×
103
            end
104
            -- Rewrap the subformula in a flagged mathlist
105
            local mrow = {
×
106
               id = "mathlist",
107
               options = {},
108
               is_paired_explicit = true, -- Internal flag
109
               subformula
110
            }
111
            if left then
×
112
               table.insert(mrow, 1, left)
×
113
            end
114
            if right then
×
115
               table.insert(mrow, right)
×
116
            end
117
            return mrow
×
118
         end
119

120
   local element_no_infix =
121
      leftrightgroup + -- Important: before command
×
122
      V"def" +
×
123
      V"text" + -- Important: before command
×
124
      V"command" +
×
125
      group +
×
126
      V"argument" +
×
127
      V"atom"
128
   local element =
129
      V"supsub" +
×
130
      V"subsup" +
×
131
      V"sup" +
×
132
      V"sub" +
×
133
      element_no_infix
134
   local sep = S",;" * _
×
135
   local quotedString = (P'"' * C((1-P'"')^1) * P'"')
×
136
   local value = ( quotedString + (1-S",;]")^1 )
×
137
   local pair = Cg(sileID * _ * "=" * _ * C(value)) * sep^-1 / function (...)
×
138
      local t = {...}; return t[1], t[#t]
×
139
   end
140
   local list = Cf(Ct"" * pair^0, rawset)
×
141
   local parameters = (
142
         P"[" *
×
143
         list *
×
144
         P"]"
145
      )^-1 / function (a)
×
146
            return type(a)=="table" and a or {}
×
147
         end
148
   local dim2_arg_inner = Ct(V"mathlist" * (P"&" * V"mathlist")^0) /
×
149
      function (t)
150
         t.id = "mathlist"
×
151
         return t
×
152
      end
153
   local dim2_arg =
154
      Cg(P"{" *
×
155
         dim2_arg_inner *
×
156
         (P"\\\\" * dim2_arg_inner)^1 *
×
157
         (P"}" + E("`}` expected"))
×
158
         ) / function (...)
×
159
            local t = {...}
×
160
            -- Remove the last mathlist if empty. This way,
161
            -- `inner1 \\ inner2 \\` is the same as `inner1 \\ inner2`.
162
            if not t[#t][1] or not t[#t][1][1] then table.remove(t) end
×
163
            return pl.utils.unpack(t)
×
164
         end
165

166
   local dim2_arg_inner = Ct(V"mathlist" * (P"&" * V"mathlist")^0) /
×
167
      function (t)
168
         t.id = "mathlist"
×
169
         return t
×
170
      end
171
   local dim2_arg =
172
      Cg(P"{" *
×
173
         dim2_arg_inner *
×
174
         (P"\\\\" * dim2_arg_inner)^1 *
×
175
         (P"}" + E("`}` expected"))
×
176
         ) / function (...)
×
177
         local t = {...}
×
178
         -- Remove the last mathlist if empty. This way,
179
         -- `inner1 \\ inner2 \\` is the same as `inner1 \\ inner2`.
180
         if not t[#t][1] or not t[#t][1][1] then table.remove(t) end
×
181
         return pl.utils.unpack(t)
×
182
         end
183

184
   -- TeX uses the regular asterisk (* = U+002A) in superscripts or subscript:
185
   -- The TeXbook exercice 18.32 (p. 179, 330) for instance.
186
   -- Fonts usually have the asterisk raised too high, so using the Unicode
187
   -- asterisk operator U+2217 looks better (= \ast in TeX).
188
   local astop = P"*" / luautf8.char(0x2217)
×
189
   -- TeX interprets apostrophes as primes in math mode:
190
   -- The TeXbook p. 130 expands ' to ^\prime commands and repeats the \prime
191
   -- for multiple apostrophes.
192
   -- The TeXbook p. 134: "Then there is the character ', which we know is used
193
   -- as an abbreviation for \prime superscripts."
194
   -- (So we are really sure superscript primes are really the intended meaning.)
195
   -- Here we use the Unicode characters for primes, but the intent is the same.
196
   local primes = (
197
         P"''''" / luautf8.char(0x2057) + -- quadruple prime
×
198
         P"'''" / luautf8.char(0x2034) + -- triple prime
×
199
         P"''" / luautf8.char(0x2033) + -- double prime
×
200
         P"'" / luautf8.char(0x2032) -- prime
×
201
      ) / function (s)
×
202
            return { id="atom", s }
×
203
         end
204
   local primes_sup = (
205
         primes * _ * P"^" * _ * element_no_infix / function (p, e)
×
206
            -- Combine the prime with the superscript in the same mathlist
207
            if e.id == "mathlist" then
×
208
               table.insert(e, 1, p)
×
209
               return e
×
210
            end
211
            return { id="mathlist", p, e }
×
212
         end
213
         + primes -- or standalone primes
×
214
      )
215

216
   START "math"
217
   math = V"mathlist" * EOF"Unexpected character at end of math code"
×
218
   mathlist = (comment + (WS * _) + element)^0
×
219
   supsub = element_no_infix * _ * primes_sup                  * _ *  P"_" * _ * element_no_infix +
×
220
            element_no_infix * _ * P"^" * _ * element_no_infix * _ *  P"_" * _ * element_no_infix
×
221
   subsup = element_no_infix * _ * P"_" * _ * element_no_infix * primes_sup +
×
222
            element_no_infix * _ * P"_" * _ * element_no_infix * _ * P"^" * _ * element_no_infix
×
223
   sup =  element_no_infix * _ * primes_sup +
×
224
          element_no_infix * _ * P"^" * _ * element_no_infix
×
225
   sub = element_no_infix * _ * P"_" * _ * element_no_infix
×
226
   atom = natural + astop + C(utf8code - S"\\{}%^_&'") +
×
227
      (P"\\{" + P"\\}") / function (s) return string.sub(s, -1) end
×
228
   text = (
×
229
         P"\\text" *
×
230
         Cg(parameters, "options") *
×
231
         textgroup
232
      )
233
   command = (
×
234
         P"\\" *
×
235
         Cg(ctrl_sequence_name, "command") *
×
236
         Cg(parameters, "options") *
×
237
         (dim2_arg + group^0)
×
238
      )
239
   def = P"\\def" * _ * P"{" *
×
240
      Cg(ctrl_sequence_name, "command-name") * P"}" * _ *
×
241
      --P"[" * Cg(digit^1, "arity") * P"]" * _ *
242
      P"{" * V"mathlist" * P"}"
×
243
   argument = P"#" * Cg(pos_natural, "index")
×
244
end
245
-- luacheck: pop
246
-- stylua: ignore end
247
---@diagnostic enable: undefined-global, unused-local, lowercase-global
248

249
local mathParser = epnf.define(mathGrammar)
×
250

251
local commands = {}
×
252

253
-- A command type is a type for each argument it takes: either string or MathML
254
-- tree. If a command has no type, it is assumed to take only trees.
255
-- Tags like <mi>, <mo>, <mn> take a string, and this needs to be propagated in
256
-- commands that use them.
257

258
local objType = {
×
259
   tree = 1,
260
   str = 2,
261
   unknown = 3,
262
}
263

264
local function inferArgTypes_aux (accumulator, typeRequired, body)
265
   if type(body) == "table" then
×
266
      if body.id == "argument" then
×
267
         local ret = accumulator
×
268
         while #ret < body.index do
×
269
            -- Don't leave holes in the argument list.
270
            -- This may happen if the argument are not used orderly, and the
271
            -- entry might later be filled with the appropriate type... unless
272
            -- the argument is not used at all.
273
            -- CODE SMELL, but this recursive inference is hard to assess.
274
            table.insert(ret, objType.unknown)
×
275
         end
276
         ret[body.index] = typeRequired
×
277
         return ret
×
278
      elseif body.id == "command" then
×
279
         if commands[body.command] then
×
280
            local cmdArgTypes = commands[body.command][1]
×
281
            if #cmdArgTypes ~= #body then
×
282
               SU.error(
×
283
                  "Wrong number of arguments ("
284
                     .. #body
×
285
                     .. ") for command "
×
286
                     .. body.command
×
287
                     .. " (should be "
×
288
                     .. #cmdArgTypes
×
289
                     .. ")"
×
290
               )
291
            else
292
               for i = 1, #cmdArgTypes do
×
293
                  accumulator = inferArgTypes_aux(accumulator, cmdArgTypes[i], body[i])
×
294
               end
295
            end
296
            return accumulator
×
297
         elseif body.command == "mi" or body.command == "mo" or body.command == "mn" then
×
298
            if #body ~= 1 then
×
299
               SU.error("Wrong number of arguments (" .. #body .. ") for command " .. body.command .. " (should be 1)")
×
300
            end
301
            accumulator = inferArgTypes_aux(accumulator, objType.str, body[1])
×
302
            return accumulator
×
303
         else
304
            -- Not a macro, recurse on children assuming tree type for all
305
            -- arguments
306
            for _, child in ipairs(body) do
×
307
               accumulator = inferArgTypes_aux(accumulator, objType.tree, child)
×
308
            end
309
            return accumulator
×
310
         end
311
      elseif body.id == "atom" then
×
312
         return accumulator
×
313
      else
314
         -- Simply recurse on children
315
         for _, child in ipairs(body) do
×
316
            accumulator = inferArgTypes_aux(accumulator, typeRequired, child)
×
317
         end
318
         return accumulator
×
319
      end
320
   else
321
      SU.error("invalid argument to inferArgTypes_aux")
×
322
   end
323
end
324

325
local inferArgTypes = function (body)
326
   return inferArgTypes_aux({}, objType.tree, body)
×
327
end
328

329
local function registerCommand (name, argTypes, func)
330
   commands[name] = { argTypes, func }
×
331
end
332

333
-- Computes func(func(... func(init, k1, v1), k2, v2)..., k_n, v_n), i.e. applies
334
-- func on every key-value pair in the table. Keys with numeric indices are
335
-- processed in order. This is an important property for MathML compilation below.
336
local function fold_pairs (func, table)
337
   local accumulator = {}
×
338
   for k, v in pl.utils.kpairs(table) do
×
339
      accumulator = func(v, k, accumulator)
×
340
   end
341
   for i, v in ipairs(table) do
×
342
      accumulator = func(v, i, accumulator)
×
343
   end
344
   return accumulator
×
345
end
346

347
local function forall (pred, list)
348
   for _, x in ipairs(list) do
×
349
      if not pred(x) then
×
350
         return false
×
351
      end
352
   end
353
   return true
×
354
end
355

356
local compileToStr = function (argEnv, mathlist)
357
   if #mathlist == 1 and mathlist.id == "atom" then
×
358
      -- List is a single atom
359
      return mathlist[1]
×
360
   elseif #mathlist == 1 and mathlist[1].id == "argument" then
×
361
      return argEnv[mathlist[1].index]
×
362
   elseif mathlist.id == "argument" then
×
363
      return argEnv[mathlist.index]
×
364
   else
365
      local ret = ""
×
366
      for _, elt in ipairs(mathlist) do
×
367
         if elt.id == "atom" then
×
368
            ret = ret .. elt[1]
×
369
         elseif elt.id == "command" and symbols[elt.command] then
×
370
            ret = ret .. symbols[elt.command]
×
371
         else
372
            SU.error("Encountered non-character token in command that takes a string")
×
373
         end
374
      end
375
      return ret
×
376
   end
377
end
378

379
local function isOperatorKind (tree, typeOfAtom)
380
   if not tree then
×
381
      return false -- safeguard
×
382
   end
383
   if tree.command ~= "mo" then
×
384
      return false
×
385
   end
386
   -- Case \mo[atom=xxx]{ops}
387
   -- E.g. \mo[atom=op]{lim}
388
   if tree.options and tree.options.atom then
×
389
      return atoms.types[tree.options.atom] == typeOfAtom
×
390
   end
391
   -- Case \mo{ops} where ops is registered with the resquested type
392
   -- E.g. \mo{∑) or \sum
393
   if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].atom then
×
394
      return operatorDict[tree[1]].atom == typeOfAtom
×
395
   end
396
   return false
×
397
end
398

399
local function isMoveableLimitsOrAlwaysStacked (tree)
400
   if not tree then
×
401
      return false -- safeguard
×
402
   end
403
   if tree.is_always_stacked then
×
404
      -- We use an internal flag to mark commands that are always stacking
405
      -- their sup/sub arguments, such as brace-like commands.
406
      return true
×
407
   end
408
   if tree.command ~= "mo" then
×
409
      -- On the recursion:
410
      -- MathML allows movablelimits on <mo> elements, but "embellished operators"
411
      -- can be other elements inheriting the property from their "core operator",
412
      -- see MathML Core §3.2.4.1, which is full of intricacies so we are probably
413
      -- not even doing the right thing here.
414
      -- On the hack:
415
      -- See variant commands for limits further down.
416
      return SU.boolean(tree.is_hacked_movablelimits, false) or isMoveableLimitsOrAlwaysStacked(tree[1])
×
417
   end
418
   if tree.options and SU.boolean(tree.options.movablelimits, false) then
×
419
      return true
×
420
   end
421
   if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].forms then
×
422
      -- Leap of faith: We have not idea yet which form the operator will take
423
      -- in the final MathML.
424
      -- In the MathML operator dictionary, some operators have a movablelimits
425
      -- in some forms and not in others.
426
      -- Ex. \Join (U+2A1D) and \bigtriangleleft (U+2A1E) have it prefix but not
427
      -- infix, for some unspecified reason (?).
428
      -- Assume that if at least one form has movablelimits, the operator is
429
      -- considered to have movablelimits "in general".
430
      for _, form in pairs(operatorDict[tree[1]].forms) do
×
431
         if SU.boolean(form.movablelimits, false) then
×
432
            return true
×
433
         end
434
      end
435
   end
436
   return false
×
437
end
438
local function isCloseOperator (tree)
439
   return isOperatorKind(tree, atoms.types.close)
×
440
end
441
local function isOpeningOperator (tree)
442
   return isOperatorKind(tree, atoms.types.open)
×
443
end
444

445
local function isAccentSymbol (symbol)
446
   return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.accent
×
447
end
448
local function isBottomAccentSymbol (symbol)
449
   return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.botaccent
×
450
end
451

452
local function compileToMathML_aux (_, arg_env, tree)
453
   if type(tree) == "string" then
×
454
      return tree
×
455
   end
456
   local function compile_and_insert (child, key, accumulator)
457
      if type(key) ~= "number" then
×
458
         accumulator[key] = child
×
459
         return accumulator
×
460
      -- Compile all children, except if this node is a macro definition (no
461
      -- evaluation "under lambda") or the application of a registered macro
462
      -- (since evaluating the nodes depends on the macro's signature, it is more
463
      -- complex and done below)..
464
      elseif tree.id == "def" or (tree.id == "command" and commands[tree.command]) then
×
465
         -- Conserve unevaluated child
466
         table.insert(accumulator, child)
×
467
      else
468
         -- Compile next child
469
         local comp = compileToMathML_aux(nil, arg_env, child)
×
470
         if comp then
×
471
            if comp.id == "wrapper" then
×
472
               -- Insert all children of the wrapper node
473
               for _, inner_child in ipairs(comp) do
×
474
                  table.insert(accumulator, inner_child)
×
475
               end
476
            else
477
               table.insert(accumulator, comp)
×
478
            end
479
         end
480
      end
481
      return accumulator
×
482
   end
483
   tree = fold_pairs(compile_and_insert, tree)
×
484
   if tree.id == "math" then
×
485
      tree.command = "math"
×
486
      -- If the outermost `mrow` contains only other `mrow`s, remove it
487
      -- (allowing vertical stacking).
488
      if forall(function (c)
×
489
         return c.command == "mrow"
×
490
      end, tree[1]) then
×
491
         tree[1].command = "math"
×
492
         return tree[1]
×
493
      end
494
   elseif tree.id == "mathlist" then
×
495
      -- Turn mathlist into `mrow` except if it has exactly one `mtr` or `mtd`
496
      -- child.
497
      -- Note that `def`s have already been compiled away at this point.
498
      if #tree == 1 then
×
499
         if tree[1].command == "mtr" or tree[1].command == "mtd" then
×
500
            return tree[1]
×
501
         else
502
            tree.command = "mrow"
×
503
         end
504
      elseif tree.is_paired_explicit then
×
505
         -- We already did the re-wrapping of open/close delimiters in the parser
506
         -- via \left...\right, doing it would not harm but would add an extra mrow,
507
         -- which we can avoid directly to keep the tree minimal.
508
         -- N.B. We could have used the same flag, but it's easier to debug this way.
509
         tree.is_paired = true
×
510
         tree.is_paired_explicit = nil
×
511
         tree.command = "mrow"
×
512
      else
513
         -- Re-wrap content from opening to closing operator in an implicit mrow,
514
         -- so stretchy operators apply to the correct span of content.
515
         local children = {}
×
516
         local stack = {}
×
517
         for _, child in ipairs(tree) do
×
518
            if isOpeningOperator(child) then
×
519
               table.insert(stack, children)
×
520
               local mrow = {
×
521
                  command = "mrow",
522
                  is_paired = true, -- Internal flag to mark this re-wrapped mrow
523
                  options = {},
524
                  child,
525
               }
526
               table.insert(children, mrow)
×
527
               children = mrow
×
528
            elseif isCloseOperator(child) then
×
529
               table.insert(children, child)
×
530
               if #stack > 0 then
×
531
                  children = table.remove(stack)
×
532
               end
533
            elseif
×
534
               (child.command == "msubsup" or child.command == "msub" or child.command == "msup")
×
535
               and isCloseOperator(child[1]) -- child[1] is the base
×
536
            then
537
               if #stack > 0 then
×
538
                  -- Special case for closing operator with sub/superscript:
539
                  -- (....)^i must be interpreted as {(....)}^i, not as (...{)}^i
540
                  -- Push the closing operator into the mrow
541
                  table.insert(children, child[1])
×
542
                  -- Move the mrow into the msubsup, replacing the closing operator
543
                  child[1] = children
×
544
                  -- And insert the msubsup into the parent
545
                  children = table.remove(stack)
×
546
                  children[#children] = child
×
547
               else
548
                  table.insert(children, child)
×
549
               end
550
            else
551
               table.insert(children, child)
×
552
            end
553
         end
554
         tree = #stack > 0 and stack[1] or children
×
555
         tree.command = "mrow"
×
556
      end
557
   elseif tree.id == "atom" then
×
558
      local codepoints = {}
×
559
      for _, cp in luautf8.codes(tree[1]) do
×
560
         table.insert(codepoints, cp)
×
561
      end
562
      local cp = codepoints[1]
×
563
      if
564
         #codepoints == 1
×
565
         and ( -- If length of UTF-8 string is 1
×
566
            cp >= SU.codepoint("A") and cp <= SU.codepoint("Z")
×
567
            or cp >= SU.codepoint("a") and cp <= SU.codepoint("z")
×
568
            or cp >= SU.codepoint("Α") and cp <= SU.codepoint("Ω")
×
569
            or cp >= SU.codepoint("α") and cp <= SU.codepoint("ω")
×
570
            or cp == SU.codepoint("ϑ")
×
571
            or cp == SU.codepoint("ϕ")
×
572
            or cp == SU.codepoint("ϰ")
×
573
            or cp == SU.codepoint("ϱ")
×
574
            or cp == SU.codepoint("ϖ")
×
575
            or cp == SU.codepoint("ϵ")
×
576
         )
577
      then
578
         tree.command = "mi"
×
579
      elseif lpeg.match(lpeg.R("09") ^ 1, tree[1]) then
×
580
         tree.command = "mn"
×
581
      else
582
         tree.command = "mo"
×
583
      end
584
      tree.options = {}
×
585
   -- Translate TeX-like sub/superscripts to `munderover` or `msubsup`,
586
   -- depending on whether the base is an operator with moveable limits,
587
   -- or a brace-like command.
588
   elseif tree.id == "sup" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
×
589
      tree.command = "mover"
×
590
   elseif tree.id == "sub" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
×
591
      tree.command = "munder"
×
592
   elseif tree.id == "subsup" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
×
593
      tree.command = "munderover"
×
594
   elseif tree.id == "supsub" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
×
595
      tree.command = "munderover"
×
596
      local tmp = tree[2]
×
597
      tree[2] = tree[3]
×
598
      tree[3] = tmp
×
599
   elseif tree.id == "sup" then
×
600
      tree.command = "msup"
×
601
   elseif tree.id == "sub" then
×
602
      tree.command = "msub"
×
603
   elseif tree.id == "subsup" then
×
604
      tree.command = "msubsup"
×
605
   elseif tree.id == "supsub" then
×
606
      tree.command = "msubsup"
×
607
      local tmp = tree[2]
×
608
      tree[2] = tree[3]
×
609
      tree[3] = tmp
×
610
   elseif tree.id == "def" then
×
611
      local commandName = tree["command-name"]
×
612
      local argTypes = inferArgTypes(tree[1])
×
613
      registerCommand(commandName, argTypes, function (compiledArgs)
×
614
         return compileToMathML_aux(nil, compiledArgs, tree[1])
×
615
      end)
616
      return nil
×
617
   elseif tree.id == "text" then
×
618
      tree.command = "mtext"
×
619
   elseif tree.id == "command" and commands[tree.command] then
×
620
      local argTypes = commands[tree.command][1]
×
621
      local cmdFun = commands[tree.command][2]
×
622
      local applicationTree = tree
×
623
      local cmdName = tree.command
×
624
      if #applicationTree ~= #argTypes then
×
625
         SU.error(
×
626
            "Wrong number of arguments ("
627
               .. #applicationTree
×
628
               .. ") for command "
×
629
               .. cmdName
×
630
               .. " (should be "
×
631
               .. #argTypes
×
632
               .. ")"
×
633
         )
634
      end
635
      -- Compile every argument
636
      local compiledArgs = {}
×
637
      for i, arg in pairs(applicationTree) do
×
638
         if type(i) == "number" then
×
639
            if argTypes[i] == objType.tree then
×
640
               table.insert(compiledArgs, compileToMathML_aux(nil, arg_env, arg))
×
641
            else
642
               local x = compileToStr(arg_env, arg)
×
643
               table.insert(compiledArgs, x)
×
644
            end
645
         else
646
            -- Not an argument but an attribute. Add it to the compiled
647
            -- argument tree as-is
648
            compiledArgs[i] = applicationTree[i]
×
649
         end
650
      end
651
      local res = cmdFun(compiledArgs)
×
652
      if res.command == "mrow" then
×
653
         -- Mark the outer mrow to be unwrapped in the parent
654
         res.id = "wrapper"
×
655
      end
656
      return res
×
657
   elseif tree.id == "command" and symbols[tree.command] then
×
658
      local atom = { id = "atom", [1] = symbols[tree.command] }
×
659
      if isAccentSymbol(symbols[tree.command]) and #tree > 0 then
×
660
         -- LaTeX-style accents \overrightarrow{v} = <mover accent="true"><mi>v</mi><mo>&#x20D7;</mo></mover>
661
         local accent = {
×
662
            id = "command",
663
            command = "mover",
664
            options = {
×
665
               accent = "true",
666
            },
667
         }
668
         accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
×
669
         accent[2] = compileToMathML_aux(nil, arg_env, atom)
×
670
         tree = accent
×
671
      elseif isBottomAccentSymbol(symbols[tree.command]) and #tree > 0 then
×
672
         -- LaTeX-style bottom accents \underleftarrow{v} = <munder accent="true"><mi>v</mi><mo>&#x20EE;</mo></munder>
673
         local accent = {
×
674
            id = "command",
675
            command = "munder",
676
            options = {
×
677
               accentunder = "true",
678
            },
679
         }
680
         accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
×
681
         accent[2] = compileToMathML_aux(nil, arg_env, atom)
×
682
         tree = accent
×
683
      elseif #tree > 0 then
×
684
         -- Play cool with LaTeX-style commands that don't take arguments:
685
         -- Edge case for non-accent symbols so we don't loose bracketed groups
686
         -- that might have been seen as command arguments.
687
         -- Ex. \langle{x}\rangle (without space after \langle)
688
         local sym = compileToMathML_aux(nil, arg_env, atom)
×
689
         -- Compile all children in-place
690
         for i, child in ipairs(tree) do
×
691
            tree[i] = compileToMathML_aux(nil, arg_env, child)
×
692
         end
693
         -- Insert symbol at the beginning,
694
         -- And add a wrapper mrow to be unwrapped in the parent.
695
         table.insert(tree, 1, sym)
×
696
         tree.command = "mrow"
×
697
         tree.id = "wrapper"
×
698
      else
699
         tree = compileToMathML_aux(nil, arg_env, atom)
×
700
      end
701
   elseif tree.id == "argument" then
×
702
      if arg_env[tree.index] then
×
703
         return arg_env[tree.index]
×
704
      else
705
         SU.error("Argument #" .. tree.index .. " has escaped its scope (probably not fully applied command).")
×
706
      end
707
   end
708
   tree.id = nil
×
709
   return tree
×
710
end
711

712
local function printMathML (tree)
713
   if type(tree) == "string" then
×
714
      return tree
×
715
   end
716
   local result = "\\" .. tree.command
×
717
   if tree.options then
×
718
      local options = {}
×
719
      for k, v in pairs(tree.options) do
×
720
         table.insert(options, k .. "=" .. tostring(v))
×
721
      end
722
      if #options > 0 then
×
723
         result = result .. "[" .. table.concat(options, ", ") .. "]"
×
724
      end
725
   end
726
   if #tree > 0 then
×
727
      result = result .. "{"
×
728
      for _, child in ipairs(tree) do
×
729
         result = result .. printMathML(child)
×
730
      end
731
      result = result .. "}"
×
732
   end
733
   return result
×
734
end
735

736
local function compileToMathML (_, arg_env, tree)
737
   local result = compileToMathML_aux(_, arg_env, tree)
×
738
   SU.debug("texmath", function ()
×
739
      return "Resulting MathML: " .. printMathML(result)
×
740
   end)
741
   return result
×
742
end
743

744
local function convertTexlike (_, content)
745
   local ret = epnf.parsestring(mathParser, content[1])
×
746
   SU.debug("texmath", function ()
×
747
      return "Parsed TeX math: " .. pl.pretty.write(ret)
×
748
   end)
749
   return ret
×
750
end
751

752
registerCommand("mi", { [1] = objType.str }, function (x)
×
753
   return x
×
754
end)
755
registerCommand("mo", { [1] = objType.str }, function (x)
×
756
   return x
×
757
end)
758
registerCommand("mn", { [1] = objType.str }, function (x)
×
759
   return x
×
760
end)
761

762
-- Register a limit-like variant command
763
-- Variants of superior, inferior, projective and injective limits are special:
764
-- They accept a sub/sup behaving as a movablelimits, but also have a symbol
765
-- on top of the limit symbol, which is not a movablelimits.
766
-- I can't see in the MathML specification how to do this properly: MathML Core
767
-- seems to only allow movablelimits on <mo> elements, and <mover>/<munder> may
768
-- inherit that property from their "core operator", but in this case we do not
769
-- want the accent to be movable, only the limit sup/sub.
770
-- So we use a hack, and also avoid "\def" here to prevent unwanted mrows.
771
-- @tparam string name    TeX command name
772
-- @tparam string command MathML command (mover or munder)
773
-- @tparam number symbol  Unicode codepoint for the accent symbol
774
-- @tparam string text    Text representation
775
local function registerVarLimits (name, command, symbol, text)
776
   registerCommand(name, {}, function ()
×
777
      local options = command == "mover" and { accent = "true" } or { accentunder = "true" }
×
778
      return {
×
779
         command = command,
780
         is_hacked_movablelimits = true, -- Internal flag to mark this as a hack
781
         options = options,
782
         {
783
            command = "mo",
784
            options = { atom = "op", movablelimits = false },
785
            text,
786
         },
787
         {
788
            command = "mo",
789
            options = { accentunder = "true" },
790
            luautf8.char(symbol),
×
791
         },
792
      }
793
   end)
794
end
795
registerVarLimits("varlimsup", "mover", 0x203E, "lim") -- U+203E OVERLINE
×
796
registerVarLimits("varliminf", "munder", 0x203E, "lim") -- U+203E OVERLINE
×
797
registerVarLimits("varprojlim", "munder", 0x2190, "lim") -- U+2190 LEFTWARDS ARROW
×
798
registerVarLimits("varinjlim", "munder", 0x2192, "lim") -- U+2192 RIGHTWARDS ARROW
×
799

800
-- Register a brace-like commands.
801
-- Those symbols are accents per-se in MathML, and are non-combining in Unicode.
802
-- But TeX treats them as "pseudo-accent" stretchy symbols.
803
-- Moreover, they accept a sub/sup which is always stacked, and not movable.
804
-- So we use an internal flag.
805
-- We also avoid "\def" here to prevent unwanted mrows resulting from the
806
-- compilation of the argument.
807
-- @tparam string name    TeX command name
808
-- @tparam string command MathML command (mover or munder)
809
-- @tparam number symbol  Unicode codepoint for the brace symbol
810
local function registerBraceLikeCommands (name, command, symbol)
811
   registerCommand(name, {
×
812
      [1] = objType.tree,
813
   }, function (tree)
814
      local options = command == "mover" and { accent = "true" } or { accentunder = "true" }
×
815
      return {
×
816
         command = command,
817
         is_always_stacked = true, -- Internal flag to mark this as a brace-like command
818
         options = options,
819
         tree[1],
×
820
         {
821
            command = "mo",
822
            options = { stretchy = "true" },
823
            luautf8.char(symbol),
×
824
         },
825
      }
826
   end)
827
end
828
-- Note: the following overriddes the default commands from xml-entities / unicode-math.
829
registerBraceLikeCommands("overbrace", "mover", 0x23DE) -- U+23DE TOP CURLY BRACKET
×
830
registerBraceLikeCommands("underbrace", "munder", 0x23DF) -- U+23DF BOTTOM CURLY BRACKET
×
831
registerBraceLikeCommands("overparen", "mover", 0x23DC) -- U+23DC TOP PARENTHESIS
×
832
registerBraceLikeCommands("underparen", "munder", 0x23DD) -- U+23DD BOTTOM PARENTHESIS
×
833
registerBraceLikeCommands("overbracket", "mover", 0x23B4) -- U+23B4 TOP SQUARE BRACKET
×
834
registerBraceLikeCommands("underbracket", "munder", 0x23B5) -- U+23B5 BOTTOM SQUARE BRACKET
×
835

836
compileToMathML(
×
837
   nil,
838
   {},
839
   convertTexlike(nil, {
×
840
      [==[
×
841
  \def{frac}{\mfrac{#1}{#2}}
842
  \def{sqrt}{\msqrt{#1}}
843
  \def{bi}{\mi[mathvariant=bold-italic]{#1}}
844
  \def{dsi}{\mi[mathvariant=double-struck]{#1}}
845

846
  % From amsmath:
847
  \def{to}{\mo[atom=bin]{→}}
848
  \def{lim}{\mo[atom=op, movablelimits=true]{lim}}
849
  \def{gcd}{\mo[atom=op, movablelimits=true]{gcd}}
850
  \def{sup}{\mo[atom=op, movablelimits=true]{sup}}
851
  \def{inf}{\mo[atom=op, movablelimits=true]{inf}}
852
  \def{max}{\mo[atom=op, movablelimits=true]{max}}
853
  \def{min}{\mo[atom=op, movablelimits=true]{min}}
854
  % Those use U+202F NARROW NO-BREAK SPACE in their names
855
  \def{limsup}{\mo[atom=op, movablelimits=true]{lim sup}}
856
  \def{liminf}{\mo[atom=op, movablelimits=true]{lim inf}}
857
  \def{projlim}{\mo[atom=op, movablelimits=true]{proj lim}}
858
  \def{injlim}{\mo[atom=op, movablelimits=true]{inj lim}}
859

860
  % Other pre-defined operators from the TeXbook, p. 162:
861
  % TeX of course defines them with \mathop, so we use atom=op here.
862
  % MathML would use a <mi> here.
863
  % But we use a <mo> so the atom type is handled
864
  \def{arccos}{\mo[atom=op]{arccos}}
865
  \def{arcsin}{\mo[atom=op]{arcsin}}
866
  \def{arctan}{\mo[atom=op]{arctan}}
867
  \def{arg}{\mo[atom=op]{arg}}
868
  \def{cos}{\mo[atom=op]{cos}}
869
  \def{cosh}{\mo[atom=op]{cosh}}
870
  \def{cot}{\mo[atom=op]{cot}}
871
  \def{coth}{\mo[atom=op]{coth}}
872
  \def{csc}{\mo[atom=op]{csc}}
873
  \def{deg}{\mo[atom=op]{deg}}
874
  \def{det}{\mo[atom=op]{det}}
875
  \def{dim}{\mo[atom=op]{dim}}
876
  \def{exp}{\mo[atom=op]{exp}}
877
  \def{hom}{\mo[atom=op]{hom}}
878
  \def{ker}{\mo[atom=op]{ker}}
879
  \def{lg}{\mo[atom=op]{lg}}
880
  \def{ln}{\mo[atom=op]{ln}}
881
  \def{log}{\mo[atom=op]{log}}
882
  \def{Pr}{\mo[atom=op]{Pr}}
883
  \def{sec}{\mo[atom=op]{sec}}
884
  \def{sin}{\mo[atom=op]{sin}}
885
  \def{sinh}{\mo[atom=op]{sinh}}
886
  \def{tan}{\mo[atom=op]{tan}}
887
  \def{tanh}{\mo[atom=op]{tanh}}
888

889
  % Standard spaces gleaned from plain TeX
890
  \def{thinspace}{\mspace[width=thin]}
891
  \def{negthinspace}{\mspace[width=-thin]}
892
  \def{,}{\thinspace}
893
  \def{!}{\negthinspace}
894
  \def{medspace}{\mspace[width=med]}
895
  \def{negmedspace}{\mspace[width=-med]}
896
  \def{>}{\medspace}
897
  \def{thickspace}{\mspace[width=thick]}
898
  \def{negthickspace}{\mspace[width=-thick]}
899
  \def{;}{\thickspace}
900
  \def{enspace}{\mspace[width=1en]}
901
  \def{enskip}{\enspace}
902
  \def{quad}{\mspace[width=1em]}
903
  \def{qquad}{\mspace[width=2em]}
904

905
  % MathML says a single-character identifier must be in italic by default.
906
  % TeX however has the following Greek capital macros rendered in upright shape.
907
  % It so common that you've probably never seen Γ(x) written with an italic gamma.
908
  \def{Gamma}{\mi[mathvariant=normal]{Γ}}
909
  \def{Delta}{\mi[mathvariant=normal]{Δ}}
910
  \def{Theta}{\mi[mathvariant=normal]{Θ}}
911
  \def{Lambda}{\mi[mathvariant=normal]{Λ}}
912
  \def{Xi}{\mi[mathvariant=normal]{Ξ}}
913
  \def{Pi}{\mi[mathvariant=normal]{Π}}
914
  \def{Sigma}{\mi[mathvariant=normal]{Σ}}
915
  \def{Upsilon}{\mi[mathvariant=normal]{Υ}}
916
  \def{Phi}{\mi[mathvariant=normal]{Φ}}
917
  \def{Psi}{\mi[mathvariant=normal]{Ψ}}
918
  \def{Omega}{\mi[mathvariant=normal]{Ω}}
919
  % Some calligraphic (script), fraktur, double-struck styles:
920
  % Convenience for compatibility with LaTeX.
921
  \def{mathcal}{\mi[mathvariant=script]{#1}}
922
  \def{mathfrak}{\mi[mathvariant=fraktur]{#1}}
923
  \def{mathbb}{\mi[mathvariant=double-struck]{#1}}
924
  % Some style-switching commands for compatibility with LaTeX math.
925
  % Caveat emptor: LaTeX would allow these to apply to a whole formula.
926
  % We can't do that in MathML, as mathvariant applies to token elements only.
927
  % Also note that LaTeX and related packages may have many more such commands.
928
  % We only provide a few common ('historical') ones here.
929
  \def{mathrm}{\mi[mathvariant=normal]{#1}}
930
  \def{mathbf}{\mi[mathvariant=bold]{#1}}
931
  \def{mathit}{\mi[mathvariant=italic]{#1}}
932
  \def{mathsf}{\mi[mathvariant=sans-serif]{#1}}
933
  \def{mathtt}{\mi[mathvariant=monospace]{#1}}
934

935
  % Modulus operator forms
936
  % See Michael Downes & Barbara Beeton, "Short Math Guide for LaTeX"
937
  % American Mathematical Society (v2.0, 2017), §7.1 p. 18
938
  \def{bmod}{\mo[atom=bin]{mod}}
939
  \def{pmod}{\quad(\mo[atom=ord]{mod}\>#1)}
940
  \def{mod}{\quad \mo[atom=ord]{mod}\>#1}
941
  \def{pod}{\quad(#1)}
942

943
  % Phantom commands from TeX/LaTeX
944
  \def{phantom}{\mphantom{#1}}
945
  \def{hphantom}{\mpadded[height=0, depth=0]{\mphantom{#1}}}
946
  \def{vphantom}{\mpadded[width=0]{\mphantom{#1}}}
947

948
  % Stacking commands
949
  % Plain LaTeX \stackrel is only supposed to be used on binary relations.
950
  % It's a poor naming choice, and a poor design choice as well.
951
  % Package "stackrel" on CTAN redefine its for relational operators, and
952
  % provides a \stackbin for binary operators.
953
  % Users would, without respect for semantics, use them interchangeably.
954
  % We use the same definition for both, and expect the MathML layer to handle
955
  % the content as appropriate based on the actual operators...
956
  \def{stackrel}{\mover{#2}{#1}}
957
  \def{stackbin}{\mover{#2}{#1}}
958
  % Package "amsmath" went with its own generic \overset and \underset.
959
  \def{overset}{\mover{#2}{#1}}
960
  \def{underset}{\munder{#2}{#1}}
961
]==],
962
   })
963
)
964

965
return { convertTexlike, compileToMathML }
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc