• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 12005157744

25 Nov 2024 07:21AM UTC coverage: 64.353% (-4.2%) from 68.563%
12005157744

push

github

web-flow
Support TeX-like left..right construct (#2179)

30 of 32 new or added lines in 1 file covered. (93.75%)

1121 existing lines in 36 files now uncovered.

12776 of 19853 relevant lines covered (64.35%)

4779.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.86
/packages/math/texlike.lua
1
local atoms = require("packages.math.atoms")
14✔
2
local syms = require("packages.math.unicode-symbols")
14✔
3
local bits = require("core.parserbits")
14✔
4

5
local epnf = require("epnf")
14✔
6
local lpeg = require("lpeg")
14✔
7

8
local operatorDict = syms.operatorDict
14✔
9
local symbols = syms.symbols
14✔
10

11
-- Grammar to parse TeX-like math
12
-- luacheck: push ignore
13
-- stylua: ignore start
14
---@diagnostic disable: undefined-global, unused-local, lowercase-global
15
local mathGrammar = function (_ENV)
16
   local _ = WS^0
14✔
17
   local eol = S"\r\n"
14✔
18
   local digit = R("09")
14✔
19
   local natural = (
20
      -- TeX doesn't really knows what a number in a formula is.
21
      -- It handles any sequence of "ordinary" characters, including period(s):
22
      -- See for instance The TeXbook, p. 132.
23
      -- When later converting to MathML, we'll ideally want <mn>0.0123</mn>
24
      -- instead of, say, <mn>0</mn><mo>.</mo><mn>0123</mn> (not only wrong
25
      -- in essence, but also taking the risk of using a <mo> operator, then
26
      -- considered as a punctuation, thus inserting a space)
27
      -- We cannot be general, but checking MathJax and TeMML's behavior, they
28
      -- are not general either in this regard.
29
         digit^0 * P(".")^-1 * digit^1 + -- Decimal number (ex: 1.23, 0.23, .23)
14✔
30
         digit^1 -- Integer (digits only, ex: 123)
14✔
31
      ) / tostring
14✔
32
   local pos_natural = R("19") * digit^0 / tonumber
14✔
33

34
   -- \left and \right delimiters = The TeXbook p. 148.
35
   -- Characters with a delcode in TeX: The TeXbook p. 341
36
   -- These are for use in \left...\right pairs.
37
   -- We add the period (null delimiter) from p. 149-150.
38
   -- We don't include the backslash here and handle it just after.
39
   local delcode = S"([</|)]>."
14✔
40
   -- Left/right is followed by a delimiter with delcode, or a command.
41
   -- We use the delcode or backslash as terminator: commands such as
42
   -- \rightarrow must still be allowed.
43
   local leftright = function (s) return P(s) * (delcode + P"\\") end
42✔
44

45
   local ctrl_word = R("AZ", "az")^1
14✔
46
   local ctrl_symbol = P(1) - S"{}\\"
14✔
47
   local ctrl_sequence_name = C(ctrl_word + ctrl_symbol) - leftright("left") - leftright("right") / 1
42✔
48
   local comment = (
49
         P"%" *
14✔
50
         P(1-eol)^0 *
14✔
51
         eol^-1
14✔
52
      )
53
   local utf8cont = R("\128\191")
14✔
54
   local utf8code = lpeg.R("\0\127")
14✔
55
      + lpeg.R("\194\223") * utf8cont
14✔
56
      + lpeg.R("\224\239") * utf8cont * utf8cont
14✔
57
      + lpeg.R("\240\244") * utf8cont * utf8cont * utf8cont
14✔
58
   -- Identifiers inside \mo and \mi tags
59
   local sileID = C(bits.identifier + P(1)) / 1
14✔
60
   local mathMLID = (utf8code - S"\\{}%")^1 / function (...)
14✔
61
         local ret = ""
×
62
         local t = {...}
×
63
         for _,b in ipairs(t) do
×
64
         ret = ret .. b
×
65
         end
66
         return ret
×
67
      end
68
   local group = P"{" * V"mathlist" * (P"}" + E("`}` expected"))
28✔
69
   -- Simple amsmath-like \text command (no embedded math)
70
   local textgroup = P"{" * C((1-P"}")^1) * (P"}" + E("`}` expected"))
28✔
71
   -- TeX \left...\right group
72
   local delim =
73
      -- Delimiter with delcode
74
      C(delcode) / function (d)
14✔
75
         if d ~= "." then
16✔
76
            return {
10✔
77
               id = "atom",
78
               d
79
            }
10✔
80
         end
81
         return nil
6✔
82
      end
83
      -- Delimiter as escaped \{ or \}
84
      + P"\\" * C(S"{}") / function (d)
14✔
85
         return {
2✔
86
            id = "atom",
87
            d
88
         }
2✔
89
      end
90
      -- Delimiter as command ex. \langle
91
      + P"\\" * C(ctrl_sequence_name) / 1 / function (cmd)
14✔
92
         return {
6✔
93
            id = "command",
94
            command = cmd
6✔
95
         }
6✔
96
      end
97

98
      local leftrightgroup = P"\\left" * delim * V"mathlist" * P"\\right" * delim
14✔
NEW
99
         / function (left, subformula, right)
×
100
            if not left and not right then
12✔
101
               -- No delimiters, return the subformula as-is
102
               return subformula
1✔
103
            end
104
            -- Rewrap the subformula in a flagged mathlist
105
            local mrow = {
11✔
106
               id = "mathlist",
107
               options = {},
11✔
108
               is_paired_explicit = true, -- Internal flag
109
               subformula
110
            }
11✔
111
            if left then
11✔
112
               table.insert(mrow, 1, left)
9✔
113
            end
114
            if right then
11✔
115
               table.insert(mrow, right)
9✔
116
            end
117
            return mrow
11✔
118
         end
119

120
   local element_no_infix =
NEW
121
      leftrightgroup + -- Important: before command
×
122
      V"def" +
14✔
123
      V"text" + -- Important: before command
14✔
124
      V"command" +
14✔
125
      group +
14✔
126
      V"argument" +
14✔
127
      V"atom"
14✔
128
   local element =
129
      V"supsub" +
14✔
130
      V"subsup" +
14✔
131
      V"sup" +
14✔
132
      V"sub" +
14✔
133
      element_no_infix
14✔
134
   local sep = S",;" * _
14✔
135
   local quotedString = (P'"' * C((1-P'"')^1) * P'"')
14✔
136
   local value = ( quotedString + (1-S",;]")^1 )
14✔
137
   local pair = Cg(sileID * _ * "=" * _ * C(value)) * sep^-1 / function (...)
14✔
138
      local t = {...}; return t[1], t[#t]
1,178✔
139
   end
140
   local list = Cf(Ct"" * pair^0, rawset)
14✔
141
   local parameters = (
142
         P"[" *
14✔
143
         list *
14✔
144
         P"]"
14✔
145
      )^-1 / function (a)
14✔
146
            return type(a)=="table" and a or {}
1,572✔
147
         end
148
   local dim2_arg_inner = Ct(V"mathlist" * (P"&" * V"mathlist")^0) /
14✔
149
      function (t)
150
         t.id = "mathlist"
×
151
         return t
×
152
      end
153
   local dim2_arg =
154
      Cg(P"{" *
28✔
155
         dim2_arg_inner *
14✔
156
         (P"\\\\" * dim2_arg_inner)^1 *
14✔
157
         (P"}" + E("`}` expected"))
28✔
158
         ) / function (...)
×
159
            local t = {...}
×
160
            -- Remove the last mathlist if empty. This way,
161
            -- `inner1 \\ inner2 \\` is the same as `inner1 \\ inner2`.
162
            if not t[#t][1] or not t[#t][1][1] then table.remove(t) end
×
163
            return pl.utils.unpack(t)
×
164
         end
165

166
   local dim2_arg_inner = Ct(V"mathlist" * (P"&" * V"mathlist")^0) /
14✔
167
      function (t)
168
         t.id = "mathlist"
16✔
169
         return t
16✔
170
      end
171
   local dim2_arg =
172
      Cg(P"{" *
28✔
173
         dim2_arg_inner *
14✔
174
         (P"\\\\" * dim2_arg_inner)^1 *
14✔
175
         (P"}" + E("`}` expected"))
28✔
176
         ) / function (...)
×
177
         local t = {...}
4✔
178
         -- Remove the last mathlist if empty. This way,
179
         -- `inner1 \\ inner2 \\` is the same as `inner1 \\ inner2`.
180
         if not t[#t][1] or not t[#t][1][1] then table.remove(t) end
4✔
181
         return pl.utils.unpack(t)
4✔
182
         end
183

184
   -- TeX uses the regular asterisk (* = U+002A) in superscripts or subscript:
185
   -- The TeXbook exercice 18.32 (p. 179, 330) for instance.
186
   -- Fonts usually have the asterisk raised too high, so using the Unicode
187
   -- asterisk operator U+2217 looks better (= \ast in TeX).
188
   local astop = P"*" / luautf8.char(0x2217)
14✔
189
   -- TeX interprets apostrophes as primes in math mode:
190
   -- The TeXbook p. 130 expands ' to ^\prime commands and repeats the \prime
191
   -- for multiple apostrophes.
192
   -- The TeXbook p. 134: "Then there is the character ', which we know is used
193
   -- as an abbreviation for \prime superscripts."
194
   -- (So we are really sure superscript primes are really the intended meaning.)
195
   -- Here we use the Unicode characters for primes, but the intent is the same.
196
   local primes = (
197
         P"''''" / luautf8.char(0x2057) + -- quadruple prime
14✔
198
         P"'''" / luautf8.char(0x2034) + -- triple prime
14✔
199
         P"''" / luautf8.char(0x2033) + -- double prime
14✔
200
         P"'" / luautf8.char(0x2032) -- prime
14✔
201
      ) / function (s)
×
202
            return { id="atom", s }
26✔
203
         end
204
   local primes_sup = (
205
         primes * _ * P"^" * _ * element_no_infix / function (p, e)
14✔
206
            -- Combine the prime with the superscript in the same mathlist
207
            if e.id == "mathlist" then
12✔
208
               table.insert(e, 1, p)
2✔
209
               return e
2✔
210
            end
211
            return { id="mathlist", p, e }
10✔
212
         end
213
         + primes -- or standalone primes
14✔
214
      )
215

216
   START "math"
14✔
217
   math = V"mathlist" * EOF"Unexpected character at end of math code"
28✔
218
   mathlist = (comment + (WS * _) + element)^0
14✔
219
   supsub = element_no_infix * _ * primes_sup                  * _ *  P"_" * _ * element_no_infix +
14✔
220
            element_no_infix * _ * P"^" * _ * element_no_infix * _ *  P"_" * _ * element_no_infix
14✔
221
   subsup = element_no_infix * _ * P"_" * _ * element_no_infix * primes_sup +
14✔
222
            element_no_infix * _ * P"_" * _ * element_no_infix * _ * P"^" * _ * element_no_infix
14✔
223
   sup =  element_no_infix * _ * primes_sup +
14✔
224
          element_no_infix * _ * P"^" * _ * element_no_infix
14✔
225
   sub = element_no_infix * _ * P"_" * _ * element_no_infix
14✔
226
   atom = natural + astop + C(utf8code - S"\\{}%^_&'") +
14✔
227
      (P"\\{" + P"\\}") / function (s) return string.sub(s, -1) end
14✔
228
   text = (
×
229
         P"\\text" *
14✔
230
         Cg(parameters, "options") *
14✔
231
         textgroup
14✔
232
      )
14✔
233
   command = (
×
234
         P"\\" *
14✔
235
         Cg(ctrl_sequence_name, "command") *
14✔
236
         Cg(parameters, "options") *
14✔
237
         (dim2_arg + group^0)
14✔
238
      )
14✔
239
   def = P"\\def" * _ * P"{" *
14✔
240
      Cg(ctrl_sequence_name, "command-name") * P"}" * _ *
14✔
241
      --P"[" * Cg(digit^1, "arity") * P"]" * _ *
242
      P"{" * V"mathlist" * P"}"
14✔
243
   argument = P"#" * Cg(pos_natural, "index")
14✔
244
end
245
-- luacheck: pop
246
-- stylua: ignore end
247
---@diagnostic enable: undefined-global, unused-local, lowercase-global
248

249
local mathParser = epnf.define(mathGrammar)
14✔
250

251
local commands = {}
14✔
252

253
-- A command type is a type for each argument it takes: either string or MathML
254
-- tree. If a command has no type, it is assumed to take only trees.
255
-- Tags like <mi>, <mo>, <mn> take a string, and this needs to be propagated in
256
-- commands that use them.
257

258
local objType = {
14✔
259
   tree = 1,
260
   str = 2,
261
}
262

263
local function inferArgTypes_aux (accumulator, typeRequired, body)
264
   if type(body) == "table" then
5,780✔
265
      if body.id == "argument" then
5,780✔
266
         local ret = accumulator
286✔
267
         table.insert(ret, body.index, typeRequired)
286✔
268
         return ret
286✔
269
      elseif body.id == "command" then
5,494✔
270
         if commands[body.command] then
1,226✔
271
            local cmdArgTypes = commands[body.command][1]
973✔
272
            if #cmdArgTypes ~= #body then
973✔
273
               SU.error(
×
274
                  "Wrong number of arguments ("
275
                     .. #body
×
276
                     .. ") for command "
×
277
                     .. body.command
×
278
                     .. " (should be "
×
279
                     .. #cmdArgTypes
×
280
                     .. ")"
×
281
               )
282
            else
283
               for i = 1, #cmdArgTypes do
1,806✔
284
                  accumulator = inferArgTypes_aux(accumulator, cmdArgTypes[i], body[i])
1,666✔
285
               end
286
            end
287
            return accumulator
973✔
288
         elseif body.command == "mi" or body.command == "mo" or body.command == "mn" then
253✔
289
            if #body ~= 1 then
×
290
               SU.error("Wrong number of arguments (" .. #body .. ") for command " .. body.command .. " (should be 1)")
×
291
            end
292
            accumulator = inferArgTypes_aux(accumulator, objType.str, body[1])
×
293
            return accumulator
×
294
         else
295
            -- Not a macro, recurse on children assuming tree type for all
296
            -- arguments
297
            for _, child in ipairs(body) do
395✔
298
               accumulator = inferArgTypes_aux(accumulator, objType.tree, child)
284✔
299
            end
300
            return accumulator
253✔
301
         end
302
      elseif body.id == "atom" then
4,268✔
303
         return accumulator
2,164✔
304
      else
305
         -- Simply recurse on children
306
         for _, child in ipairs(body) do
5,782✔
307
            accumulator = inferArgTypes_aux(accumulator, typeRequired, child)
7,356✔
308
         end
309
         return accumulator
2,104✔
310
      end
311
   else
312
      SU.error("invalid argument to inferArgTypes_aux")
×
313
   end
314
end
315

316
local inferArgTypes = function (body)
317
   return inferArgTypes_aux({}, objType.tree, body)
1,127✔
318
end
319

320
local function registerCommand (name, argTypes, func)
321
   commands[name] = { argTypes, func }
1,169✔
322
end
323

324
-- Computes func(func(... func(init, k1, v1), k2, v2)..., k_n, v_n), i.e. applies
325
-- func on every key-value pair in the table. Keys with numeric indices are
326
-- processed in order. This is an important property for MathML compilation below.
327
local function fold_pairs (func, table)
328
   local accumulator = {}
3,152✔
329
   for k, v in pl.utils.kpairs(table) do
26,258✔
330
      accumulator = func(v, k, accumulator)
16,802✔
331
   end
332
   for i, v in ipairs(table) do
7,935✔
333
      accumulator = func(v, i, accumulator)
9,566✔
334
   end
335
   return accumulator
3,152✔
336
end
337

338
local function forall (pred, list)
339
   for _, x in ipairs(list) do
80✔
340
      if not pred(x) then
132✔
341
         return false
62✔
342
      end
343
   end
344
   return true
14✔
345
end
346

347
local compileToStr = function (argEnv, mathlist)
348
   if #mathlist == 1 and mathlist.id == "atom" then
134✔
349
      -- List is a single atom
350
      return mathlist[1]
×
351
   elseif #mathlist == 1 and mathlist[1].id == "argument" then
134✔
352
      return argEnv[mathlist[1].index]
3✔
353
   elseif mathlist.id == "argument" then
131✔
354
      return argEnv[mathlist.index]
×
355
   else
356
      local ret = ""
131✔
357
      for _, elt in ipairs(mathlist) do
510✔
358
         if elt.id == "atom" then
379✔
359
            ret = ret .. elt[1]
378✔
360
         elseif elt.id == "command" and symbols[elt.command] then
1✔
361
            ret = ret .. symbols[elt.command]
1✔
362
         else
363
            SU.error("Encountered non-character token in command that takes a string")
×
364
         end
365
      end
366
      return ret
131✔
367
   end
368
end
369

370
local function isOperatorKind (tree, typeOfAtom)
371
   if not tree then
1,643✔
372
      return false -- safeguard
×
373
   end
374
   if tree.command ~= "mo" then
1,643✔
375
      return false
1,172✔
376
   end
377
   -- Case \mo[atom=xxx]{ops}
378
   -- E.g. \mo[atom=op]{lim}
379
   if tree.options and tree.options.atom then
471✔
380
      return atoms.types[tree.options.atom] == typeOfAtom
42✔
381
   end
382
   -- Case \mo{ops} where ops is registered with the resquested type
383
   -- E.g. \mo{∑) or \sum
384
   if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].atom then
429✔
385
      return operatorDict[tree[1]].atom == typeOfAtom
413✔
386
   end
387
   return false
16✔
388
end
389

390
local function isMoveableLimits (tree)
391
   if tree.command ~= "mo" then
111✔
392
      return false
87✔
393
   end
394
   if tree.options and SU.boolean(tree.options.movablelimits, false) then
48✔
395
      return true
3✔
396
   end
397
   if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].forms then
21✔
398
      -- Leap of faith: We have not idea yet which form the operator will take
399
      -- in the final MathML.
400
      -- In the MathML operator dictionary, some operators have a movablelimits
401
      -- in some forms and not in others.
402
      -- Ex. \Join (U+2A1D) and \bigtriangleleft (U+2A1E) have it prefix but not
403
      -- infix, for some unspecified reason (?).
404
      -- Assume that if at least one form has movablelimits, the operator is
405
      -- considered to have movablelimits "in general".
406
      for _, form in pairs(operatorDict[tree[1]].forms) do
25✔
407
         if SU.boolean(form.movablelimits, false) then
36✔
408
            return true
11✔
409
         end
410
      end
411
   end
412
   return false
10✔
413
end
414
local function isCloseOperator (tree)
415
   return isOperatorKind(tree, atoms.types.close)
845✔
416
end
417
local function isOpeningOperator (tree)
418
   return isOperatorKind(tree, atoms.types.open)
798✔
419
end
420

421
local function isAccentSymbol (symbol)
422
   return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.accent
90✔
423
end
424

425
local function compileToMathML_aux (_, arg_env, tree)
426
   if type(tree) == "string" then
3,853✔
427
      return tree
701✔
428
   end
429
   local function compile_and_insert (child, key, accumulator)
430
      if type(key) ~= "number" then
13,184✔
431
         accumulator[key] = child
8,401✔
432
         return accumulator
8,401✔
433
      -- Compile all children, except if this node is a macro definition (no
434
      -- evaluation "under lambda") or the application of a registered macro
435
      -- (since evaluating the nodes depends on the macro's signature, it is more
436
      -- complex and done below)..
437
      elseif tree.id == "def" or (tree.id == "command" and commands[tree.command]) then
4,783✔
438
         -- Conserve unevaluated child
439
         table.insert(accumulator, child)
1,344✔
440
      else
441
         -- Compile next child
442
         local comp = compileToMathML_aux(nil, arg_env, child)
3,439✔
443
         if comp then
3,439✔
444
            if comp.id == "wrapper" then
2,312✔
445
               -- Insert all children of the wrapper node
446
               for _, inner_child in ipairs(comp) do
331✔
447
                  table.insert(accumulator, inner_child)
166✔
448
               end
449
            else
450
               table.insert(accumulator, comp)
2,147✔
451
            end
452
         end
453
      end
454
      return accumulator
4,783✔
455
   end
456
   tree = fold_pairs(compile_and_insert, tree)
6,304✔
457
   if tree.id == "math" then
3,152✔
458
      tree.command = "math"
76✔
459
      -- If the outermost `mrow` contains only other `mrow`s, remove it
460
      -- (allowing vertical stacking).
461
      if forall(function (c)
152✔
462
         return c.command == "mrow"
66✔
463
      end, tree[1]) then
152✔
464
         tree[1].command = "math"
14✔
465
         return tree[1]
14✔
466
      end
467
   elseif tree.id == "mathlist" then
3,076✔
468
      -- Turn mathlist into `mrow` except if it has exactly one `mtr` or `mtd`
469
      -- child.
470
      -- Note that `def`s have already been compiled away at this point.
471
      if #tree == 1 then
535✔
472
         if tree[1].command == "mtr" or tree[1].command == "mtd" then
361✔
473
            return tree[1]
×
474
         else
475
            tree.command = "mrow"
361✔
476
         end
477
      elseif tree.is_paired_explicit then
174✔
478
         -- We already did the re-wrapping of open/close delimiters in the parser
479
         -- via \left...\right, doing it would not harm but would add an extra mrow,
480
         -- which we can avoid directly to keep the tree minimal.
481
         -- N.B. We could have used the same flag, but it's easier to debug this way.
482
         tree.is_paired = true
11✔
483
         tree.is_paired_explicit = nil
11✔
484
         tree.command = "mrow"
11✔
485
      else
486
         -- Re-wrap content from opening to closing operator in an implicit mrow,
487
         -- so stretchy operators apply to the correct span of content.
488
         local children = {}
163✔
489
         local stack = {}
163✔
490
         for _, child in ipairs(tree) do
961✔
491
            if isOpeningOperator(child) then
1,596✔
492
               table.insert(stack, children)
35✔
493
               local mrow = {
35✔
494
                  command = "mrow",
495
                  is_paired = true, -- Internal flag to mark this re-wrapped mrow
496
                  options = {},
35✔
497
                  child,
35✔
498
               }
499
               table.insert(children, mrow)
35✔
500
               children = mrow
35✔
501
            elseif isCloseOperator(child) then
1,526✔
502
               table.insert(children, child)
32✔
503
               if #stack > 0 then
32✔
504
                  children = table.remove(stack)
64✔
505
               end
506
            elseif
×
507
               (child.command == "msubsup" or child.command == "msub" or child.command == "msup")
731✔
508
               and isCloseOperator(child[1]) -- child[1] is the base
164✔
509
            then
510
               if #stack > 0 then
1✔
511
                  -- Special case for closing operator with sub/superscript:
512
                  -- (....)^i must be interpreted as {(....)}^i, not as (...{)}^i
513
                  -- Push the closing operator into the mrow
514
                  table.insert(children, child[1])
1✔
515
                  -- Move the mrow into the msubsup, replacing the closing operator
516
                  child[1] = children
1✔
517
                  -- And insert the msubsup into the parent
518
                  children = table.remove(stack)
2✔
519
                  children[#children] = child
1✔
520
               else
521
                  table.insert(children, child)
×
522
               end
523
            else
524
               table.insert(children, child)
730✔
525
            end
526
         end
527
         tree = #stack > 0 and stack[1] or children
163✔
528
         tree.command = "mrow"
163✔
529
      end
530
   elseif tree.id == "atom" then
2,541✔
531
      local codepoints = {}
701✔
532
      for _, cp in luautf8.codes(tree[1]) do
1,416✔
533
         table.insert(codepoints, cp)
715✔
534
      end
535
      local cp = codepoints[1]
701✔
536
      if
537
         #codepoints == 1
701✔
538
         and ( -- If length of UTF-8 string is 1
×
539
            cp >= SU.codepoint("A") and cp <= SU.codepoint("Z")
1,701✔
540
            or cp >= SU.codepoint("a") and cp <= SU.codepoint("z")
1,667✔
541
            or cp >= SU.codepoint("Α") and cp <= SU.codepoint("Ω")
1,127✔
542
            or cp >= SU.codepoint("α") and cp <= SU.codepoint("ω")
1,127✔
543
            or cp == SU.codepoint("ϑ")
932✔
544
            or cp == SU.codepoint("ϕ")
932✔
545
            or cp == SU.codepoint("ϰ")
932✔
546
            or cp == SU.codepoint("ϱ")
932✔
547
            or cp == SU.codepoint("ϖ")
932✔
548
            or cp == SU.codepoint("ϵ")
932✔
549
         )
550
      then
551
         tree.command = "mi"
221✔
552
      elseif lpeg.match(lpeg.R("09") ^ 1, tree[1]) then
480✔
553
         tree.command = "mn"
193✔
554
      else
555
         tree.command = "mo"
287✔
556
      end
557
      tree.options = {}
701✔
558
   -- Translate TeX-like sub/superscripts to `munderover` or `msubsup`,
559
   -- depending on whether the base is an operator with moveable limits.
560
   elseif tree.id == "sup" and isMoveableLimits(tree[1]) then
1,879✔
561
      tree.command = "mover"
×
562
   elseif tree.id == "sub" and isMoveableLimits(tree[1]) then
1,878✔
563
      tree.command = "munder"
4✔
564
   elseif tree.id == "subsup" and isMoveableLimits(tree[1]) then
1,856✔
565
      tree.command = "munderover"
10✔
566
   elseif tree.id == "supsub" and isMoveableLimits(tree[1]) then
1,840✔
567
      tree.command = "munderover"
×
568
      local tmp = tree[2]
×
569
      tree[2] = tree[3]
×
570
      tree[3] = tmp
×
571
   elseif tree.id == "sup" then
1,826✔
572
      tree.command = "msup"
39✔
573
   elseif tree.id == "sub" then
1,787✔
574
      tree.command = "msub"
34✔
575
   elseif tree.id == "subsup" then
1,753✔
576
      tree.command = "msubsup"
10✔
577
   elseif tree.id == "supsub" then
1,743✔
578
      tree.command = "msubsup"
14✔
579
      local tmp = tree[2]
14✔
580
      tree[2] = tree[3]
14✔
581
      tree[3] = tmp
14✔
582
   elseif tree.id == "def" then
1,729✔
583
      local commandName = tree["command-name"]
1,127✔
584
      local argTypes = inferArgTypes(tree[1])
1,127✔
585
      registerCommand(commandName, argTypes, function (compiledArgs)
2,254✔
586
         return compileToMathML_aux(nil, compiledArgs, tree[1])
165✔
587
      end)
588
      return nil
1,127✔
589
   elseif tree.id == "text" then
602✔
590
      tree.command = "mtext"
×
591
   elseif tree.id == "command" and commands[tree.command] then
602✔
592
      local argTypes = commands[tree.command][1]
296✔
593
      local cmdFun = commands[tree.command][2]
296✔
594
      local applicationTree = tree
296✔
595
      local cmdName = tree.command
296✔
596
      if #applicationTree ~= #argTypes then
296✔
597
         SU.error(
×
598
            "Wrong number of arguments ("
599
               .. #applicationTree
×
600
               .. ") for command "
×
601
               .. cmdName
×
602
               .. " (should be "
×
603
               .. #argTypes
×
604
               .. ")"
×
605
         )
606
      end
607
      -- Compile every argument
608
      local compiledArgs = {}
296✔
609
      for i, arg in pairs(applicationTree) do
1,697✔
610
         if type(i) == "number" then
1,401✔
611
            if argTypes[i] == objType.tree then
217✔
612
               table.insert(compiledArgs, compileToMathML_aux(nil, arg_env, arg))
166✔
613
            else
614
               local x = compileToStr(arg_env, arg)
134✔
615
               table.insert(compiledArgs, x)
134✔
616
            end
617
         else
618
            -- Not an argument but an attribute. Add it to the compiled
619
            -- argument tree as-is
620
            compiledArgs[i] = applicationTree[i]
1,184✔
621
         end
622
      end
623
      local res = cmdFun(compiledArgs)
296✔
624
      if res.command == "mrow" then
296✔
625
         -- Mark the outer mrow to be unwrapped in the parent
626
         res.id = "wrapper"
165✔
627
      end
628
      return res
296✔
629
   elseif tree.id == "command" and symbols[tree.command] then
306✔
630
      local atom = { id = "atom", [1] = symbols[tree.command] }
90✔
631
      if isAccentSymbol(symbols[tree.command]) and #tree > 0 then
180✔
632
         -- LaTeX-style accents \vec{v} = <mover accent="true"><mi>v</mi><mo>→</mo></mover>
633
         local accent = {
×
634
            id = "command",
635
            command = "mover",
636
            options = {
×
637
               accent = "true",
638
            },
639
         }
640
         accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
×
641
         accent[2] = compileToMathML_aux(nil, arg_env, atom)
×
642
         tree = accent
×
643
      elseif #tree > 0 then
90✔
644
         -- Play cool with LaTeX-style commands that don't take arguments:
645
         -- Edge case for non-accent symbols so we don't loose bracketed groups
646
         -- that might have been seen as command arguments.
647
         -- Ex. \langle{x}\rangle (without space after \langle)
648
         local sym = compileToMathML_aux(nil, arg_env, atom)
×
649
         -- Compile all children in-place
650
         for i, child in ipairs(tree) do
×
651
            tree[i] = compileToMathML_aux(nil, arg_env, child)
×
652
         end
653
         -- Insert symbol at the beginning,
654
         -- And add a wrapper mrow to be unwrapped in the parent.
655
         table.insert(tree, 1, sym)
×
656
         tree.command = "mrow"
×
657
         tree.id = "wrapper"
×
658
      else
659
         tree = compileToMathML_aux(nil, arg_env, atom)
180✔
660
      end
661
   elseif tree.id == "argument" then
216✔
662
      if arg_env[tree.index] then
83✔
663
         return arg_env[tree.index]
83✔
664
      else
665
         SU.error("Argument #" .. tree.index .. " has escaped its scope (probably not fully applied command).")
×
666
      end
667
   end
668
   tree.id = nil
1,632✔
669
   return tree
1,632✔
670
end
671

672
local function printMathML (tree)
673
   if type(tree) == "string" then
×
674
      return tree
×
675
   end
676
   local result = "\\" .. tree.command
×
677
   if tree.options then
×
678
      local options = {}
×
679
      for k, v in pairs(tree.options) do
×
680
         table.insert(options, k .. "=" .. tostring(v))
×
681
      end
682
      if #options > 0 then
×
683
         result = result .. "[" .. table.concat(options, ", ") .. "]"
×
684
      end
685
   end
686
   if #tree > 0 then
×
687
      result = result .. "{"
×
688
      for _, child in ipairs(tree) do
×
689
         result = result .. printMathML(child)
×
690
      end
691
      result = result .. "}"
×
692
   end
693
   return result
×
694
end
695

696
local function compileToMathML (_, arg_env, tree)
697
   local result = compileToMathML_aux(_, arg_env, tree)
76✔
698
   SU.debug("texmath", function ()
152✔
699
      return "Resulting MathML: " .. printMathML(result)
×
700
   end)
701
   return result
76✔
702
end
703

704
local function convertTexlike (_, content)
705
   local ret = epnf.parsestring(mathParser, content[1])
76✔
706
   SU.debug("texmath", function ()
152✔
707
      return "Parsed TeX math: " .. pl.pretty.write(ret)
×
708
   end)
709
   return ret
76✔
710
end
711

712
registerCommand("mi", { [1] = objType.str }, function (x)
28✔
713
   return x
92✔
714
end)
715
registerCommand("mo", { [1] = objType.str }, function (x)
28✔
716
   return x
38✔
717
end)
718
registerCommand("mn", { [1] = objType.str }, function (x)
28✔
719
   return x
1✔
720
end)
721

722
compileToMathML(
28✔
723
   nil,
14✔
724
   {},
725
   convertTexlike(nil, {
14✔
726
      [==[
×
727
  \def{frac}{\mfrac{#1}{#2}}
728
  \def{sqrt}{\msqrt{#1}}
729
  \def{bi}{\mi[mathvariant=bold-italic]{#1}}
730
  \def{dsi}{\mi[mathvariant=double-struck]{#1}}
731
  \def{vec}{\mover[accent=true]{#1}{\rightarrow}}
732

733
  % From amsmath:
734
  \def{to}{\mo[atom=bin]{→}}
735
  \def{lim}{\mo[atom=op, movablelimits=true]{lim}}
736
  \def{gcd}{\mo[atom=op, movablelimits=true]{gcd}}
737
  \def{sup}{\mo[atom=op, movablelimits=true]{sup}}
738
  \def{inf}{\mo[atom=op, movablelimits=true]{inf}}
739
  \def{max}{\mo[atom=op, movablelimits=true]{max}}
740
  \def{min}{\mo[atom=op, movablelimits=true]{min}}
741
  % Those use U+202F NARROW NO-BREAK SPACE in their names
742
  \def{limsup}{\mo[atom=op, movablelimits=true]{lim sup}}
743
  \def{liminf}{\mo[atom=op, movablelimits=true]{lim inf}}
744
  \def{projlim}{\mo[atom=op, movablelimits=true]{proj lim}}
745
  \def{injlim}{\mo[atom=op, movablelimits=true]{inj lim}}
746

747
  % Other pre-defined operators from the TeXbook, p. 162:
748
  % TeX of course defines them with \mathop, so we use atom=op here.
749
  % MathML would use a <mi> here.
750
  % But we use a <mo> so the atom type is handled
751
  \def{arccos}{\mo[atom=op]{arccos}}
752
  \def{arcsin}{\mo[atom=op]{arcsin}}
753
  \def{arctan}{\mo[atom=op]{arctan}}
754
  \def{arg}{\mo[atom=op]{arg}}
755
  \def{cos}{\mo[atom=op]{cos}}
756
  \def{cosh}{\mo[atom=op]{cosh}}
757
  \def{cot}{\mo[atom=op]{cot}}
758
  \def{coth}{\mo[atom=op]{coth}}
759
  \def{csc}{\mo[atom=op]{csc}}
760
  \def{deg}{\mo[atom=op]{deg}}
761
  \def{det}{\mo[atom=op]{det}}
762
  \def{dim}{\mo[atom=op]{dim}}
763
  \def{exp}{\mo[atom=op]{exp}}
764
  \def{hom}{\mo[atom=op]{hom}}
765
  \def{ker}{\mo[atom=op]{ker}}
766
  \def{lg}{\mo[atom=op]{lg}}
767
  \def{ln}{\mo[atom=op]{ln}}
768
  \def{log}{\mo[atom=op]{log}}
769
  \def{Pr}{\mo[atom=op]{Pr}}
770
  \def{sec}{\mo[atom=op]{sec}}
771
  \def{sin}{\mo[atom=op]{sin}}
772
  \def{sinh}{\mo[atom=op]{sinh}}
773
  \def{tan}{\mo[atom=op]{tan}}
774
  \def{tanh}{\mo[atom=op]{tanh}}
775

776
  % Standard spaces gleaned from plain TeX
777
  \def{thinspace}{\mspace[width=thin]}
778
  \def{negthinspace}{\mspace[width=-thin]}
779
  \def{,}{\thinspace}
780
  \def{!}{\negthinspace}
781
  \def{medspace}{\mspace[width=med]}
782
  \def{negmedspace}{\mspace[width=-med]}
783
  \def{>}{\medspace}
784
  \def{thickspace}{\mspace[width=thick]}
785
  \def{negthickspace}{\mspace[width=-thick]}
786
  \def{;}{\thickspace}
787
  \def{enspace}{\mspace[width=1en]}
788
  \def{enskip}{\enspace}
789
  \def{quad}{\mspace[width=1em]}
790
  \def{qquad}{\mspace[width=2em]}
791

792
  % MathML says a single-character identifier must be in italic by default.
793
  % TeX however has the following Greek capital macros rendered in upright shape.
794
  % It so common that you've probably never seen Γ(x) written with an italic gamma.
795
  \def{Gamma}{\mi[mathvariant=normal]{Γ}}
796
  \def{Delta}{\mi[mathvariant=normal]{Δ}}
797
  \def{Theta}{\mi[mathvariant=normal]{Θ}}
798
  \def{Lambda}{\mi[mathvariant=normal]{Λ}}
799
  \def{Xi}{\mi[mathvariant=normal]{Ξ}}
800
  \def{Pi}{\mi[mathvariant=normal]{Π}}
801
  \def{Sigma}{\mi[mathvariant=normal]{Σ}}
802
  \def{Upsilon}{\mi[mathvariant=normal]{Υ}}
803
  \def{Phi}{\mi[mathvariant=normal]{Φ}}
804
  \def{Psi}{\mi[mathvariant=normal]{Ψ}}
805
  \def{Omega}{\mi[mathvariant=normal]{Ω}}
806
  % Some calligraphic (script), fraktur, double-struck styles:
807
  % Convenience for compatibility with LaTeX.
808
  \def{mathcal}{\mi[mathvariant=script]{#1}}
809
  \def{mathfrak}{\mi[mathvariant=fraktur]{#1}}
810
  \def{mathbb}{\mi[mathvariant=double-struck]{#1}}
811
  % Some style-switching commands for compatibility with LaTeX math.
812
  % Caveat emptor: LaTeX would allow these to apply to a whole formula.
813
  % We can't do that in MathML, as mathvariant applies to token elements only.
814
  % Also note that LaTeX and related packages may have many more such commands.
815
  % We only provide a few common ('historical') ones here.
816
  \def{mathrm}{\mi[mathvariant=normal]{#1}}
817
  \def{mathbf}{\mi[mathvariant=bold]{#1}}
818
  \def{mathit}{\mi[mathvariant=italic]{#1}}
819
  \def{mathsf}{\mi[mathvariant=sans-serif]{#1}}
820
  \def{mathtt}{\mi[mathvariant=monospace]{#1}}
821

822
  % Modulus operator forms
823
  % See Michael Downes & Barbara Beeton, "Short Math Guide for LaTeX"
824
  % American Mathematical Society (v2.0, 2017), §7.1 p. 18
825
  \def{bmod}{\mo[atom=bin]{mod}}
826
  \def{pmod}{\quad(\mo[atom=ord]{mod}\>#1)}
827
  \def{mod}{\quad \mo[atom=ord]{mod}\>#1}
828
  \def{pod}{\quad(#1)}
829

830
  % Phantom commands from TeX/LaTeX
831
  \def{phantom}{\mphantom{#1}}
832
  \def{hphantom}{\mpadded[height=0, depth=0]{\mphantom{#1}}}
833
  \def{vphantom}{\mpadded[width=0]{\mphantom{#1}}}
834
]==],
835
   })
836
)
837

838
return { convertTexlike, compileToMathML }
14✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc