• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 12005578175

25 Nov 2024 07:53AM UTC coverage: 57.011% (-7.3%) from 64.353%
12005578175

push

github

alerque
chore(tooling): Extend and annotate spell check exceptions

11267 of 19763 relevant lines covered (57.01%)

755.25 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.62
/packages/math/texlike.lua
1
local atoms = require("packages.math.atoms")
1✔
2
local syms = require("packages.math.unicode-symbols")
1✔
3
local bits = require("core.parserbits")
1✔
4

5
local epnf = require("epnf")
1✔
6
local lpeg = require("lpeg")
1✔
7

8
local operatorDict = syms.operatorDict
1✔
9
local symbols = syms.symbols
1✔
10

11
-- Grammar to parse TeX-like math
12
-- luacheck: push ignore
13
-- stylua: ignore start
14
---@diagnostic disable: undefined-global, unused-local, lowercase-global
15
local mathGrammar = function (_ENV)
16
   local _ = WS^0
1✔
17
   local eol = S"\r\n"
1✔
18
   local digit = R("09")
1✔
19
   local natural = (
20
      -- TeX doesn't really knows what a number in a formula is.
21
      -- It handles any sequence of "ordinary" characters, including period(s):
22
      -- See for instance The TeXbook, p. 132.
23
      -- When later converting to MathML, we'll ideally want <mn>0.0123</mn>
24
      -- instead of, say, <mn>0</mn><mo>.</mo><mn>0123</mn> (not only wrong
25
      -- in essence, but also taking the risk of using a <mo> operator, then
26
      -- considered as a punctuation, thus inserting a space)
27
      -- We cannot be general, but checking MathJax and TeMML's behavior, they
28
      -- are not general either in this regard.
29
         digit^0 * P(".")^-1 * digit^1 + -- Decimal number (ex: 1.23, 0.23, .23)
1✔
30
         digit^1 -- Integer (digits only, ex: 123)
1✔
31
      ) / tostring
1✔
32
   local pos_natural = R("19") * digit^0 / tonumber
1✔
33

34
   -- \left and \right delimiters = The TeXbook p. 148.
35
   -- Characters with a delcode in TeX: The TeXbook p. 341
36
   -- These are for use in \left...\right pairs.
37
   -- We add the period (null delimiter) from p. 149-150.
38
   -- We don't include the backslash here and handle it just after.
39
   local delcode = S"([</|)]>."
1✔
40
   -- Left/right is followed by a delimiter with delcode, or a command.
41
   -- We use the delcode or backslash as terminator: commands such as
42
   -- \rightarrow must still be allowed.
43
   local leftright = function (s) return P(s) * (delcode + P"\\") end
3✔
44

45
   local ctrl_word = R("AZ", "az")^1
1✔
46
   local ctrl_symbol = P(1) - S"{}\\"
1✔
47
   local ctrl_sequence_name = C(ctrl_word + ctrl_symbol) - leftright("left") - leftright("right") / 1
3✔
48
   local comment = (
49
         P"%" *
1✔
50
         P(1-eol)^0 *
1✔
51
         eol^-1
1✔
52
      )
53
   local utf8cont = R("\128\191")
1✔
54
   local utf8code = lpeg.R("\0\127")
1✔
55
      + lpeg.R("\194\223") * utf8cont
1✔
56
      + lpeg.R("\224\239") * utf8cont * utf8cont
1✔
57
      + lpeg.R("\240\244") * utf8cont * utf8cont * utf8cont
1✔
58
   -- Identifiers inside \mo and \mi tags
59
   local sileID = C(bits.identifier + P(1)) / 1
1✔
60
   local mathMLID = (utf8code - S"\\{}%")^1 / function (...)
1✔
61
         local ret = ""
×
62
         local t = {...}
×
63
         for _,b in ipairs(t) do
×
64
         ret = ret .. b
×
65
         end
66
         return ret
×
67
      end
68
   local group = P"{" * V"mathlist" * (P"}" + E("`}` expected"))
2✔
69
   -- Simple amsmath-like \text command (no embedded math)
70
   local textgroup = P"{" * C((1-P"}")^1) * (P"}" + E("`}` expected"))
2✔
71
   -- TeX \left...\right group
72
   local delim =
73
      -- Delimiter with delcode
74
      C(delcode) / function (d)
1✔
75
         if d ~= "." then
×
76
            return {
×
77
               id = "atom",
78
               d
79
            }
80
         end
81
         return nil
×
82
      end
83
      -- Delimiter as escaped \{ or \}
84
      + P"\\" * C(S"{}") / function (d)
1✔
85
         return {
×
86
            id = "atom",
87
            d
88
         }
89
      end
90
      -- Delimiter as command ex. \langle
91
      + P"\\" * C(ctrl_sequence_name) / 1 / function (cmd)
1✔
92
         return {
×
93
            id = "command",
94
            command = cmd
×
95
         }
96
      end
97

98
      local leftrightgroup = P"\\left" * delim * V"mathlist" * P"\\right" * delim
1✔
99
         / function (left, subformula, right)
×
100
            if not left and not right then
×
101
               -- No delimiters, return the subformula as-is
102
               return subformula
×
103
            end
104
            -- Rewrap the subformula in a flagged mathlist
105
            local mrow = {
×
106
               id = "mathlist",
107
               options = {},
108
               is_paired_explicit = true, -- Internal flag
109
               subformula
110
            }
111
            if left then
×
112
               table.insert(mrow, 1, left)
×
113
            end
114
            if right then
×
115
               table.insert(mrow, right)
×
116
            end
117
            return mrow
×
118
         end
119

120
   local element_no_infix =
121
      leftrightgroup + -- Important: before command
×
122
      V"def" +
1✔
123
      V"text" + -- Important: before command
1✔
124
      V"command" +
1✔
125
      group +
1✔
126
      V"argument" +
1✔
127
      V"atom"
1✔
128
   local element =
129
      V"supsub" +
1✔
130
      V"subsup" +
1✔
131
      V"sup" +
1✔
132
      V"sub" +
1✔
133
      element_no_infix
1✔
134
   local sep = S",;" * _
1✔
135
   local quotedString = (P'"' * C((1-P'"')^1) * P'"')
1✔
136
   local value = ( quotedString + (1-S",;]")^1 )
1✔
137
   local pair = Cg(sileID * _ * "=" * _ * C(value)) * sep^-1 / function (...)
1✔
138
      local t = {...}; return t[1], t[#t]
92✔
139
   end
140
   local list = Cf(Ct"" * pair^0, rawset)
1✔
141
   local parameters = (
142
         P"[" *
1✔
143
         list *
1✔
144
         P"]"
1✔
145
      )^-1 / function (a)
1✔
146
            return type(a)=="table" and a or {}
105✔
147
         end
148
   local dim2_arg_inner = Ct(V"mathlist" * (P"&" * V"mathlist")^0) /
1✔
149
      function (t)
150
         t.id = "mathlist"
×
151
         return t
×
152
      end
153
   local dim2_arg =
154
      Cg(P"{" *
2✔
155
         dim2_arg_inner *
1✔
156
         (P"\\\\" * dim2_arg_inner)^1 *
1✔
157
         (P"}" + E("`}` expected"))
2✔
158
         ) / function (...)
×
159
            local t = {...}
×
160
            -- Remove the last mathlist if empty. This way,
161
            -- `inner1 \\ inner2 \\` is the same as `inner1 \\ inner2`.
162
            if not t[#t][1] or not t[#t][1][1] then table.remove(t) end
×
163
            return pl.utils.unpack(t)
×
164
         end
165

166
   local dim2_arg_inner = Ct(V"mathlist" * (P"&" * V"mathlist")^0) /
1✔
167
      function (t)
168
         t.id = "mathlist"
×
169
         return t
×
170
      end
171
   local dim2_arg =
172
      Cg(P"{" *
2✔
173
         dim2_arg_inner *
1✔
174
         (P"\\\\" * dim2_arg_inner)^1 *
1✔
175
         (P"}" + E("`}` expected"))
2✔
176
         ) / function (...)
×
177
         local t = {...}
×
178
         -- Remove the last mathlist if empty. This way,
179
         -- `inner1 \\ inner2 \\` is the same as `inner1 \\ inner2`.
180
         if not t[#t][1] or not t[#t][1][1] then table.remove(t) end
×
181
         return pl.utils.unpack(t)
×
182
         end
183

184
   -- TeX uses the regular asterisk (* = U+002A) in superscripts or subscript:
185
   -- The TeXbook exercice 18.32 (p. 179, 330) for instance.
186
   -- Fonts usually have the asterisk raised too high, so using the Unicode
187
   -- asterisk operator U+2217 looks better (= \ast in TeX).
188
   local astop = P"*" / luautf8.char(0x2217)
1✔
189
   -- TeX interprets apostrophes as primes in math mode:
190
   -- The TeXbook p. 130 expands ' to ^\prime commands and repeats the \prime
191
   -- for multiple apostrophes.
192
   -- The TeXbook p. 134: "Then there is the character ', which we know is used
193
   -- as an abbreviation for \prime superscripts."
194
   -- (So we are really sure superscript primes are really the intended meaning.)
195
   -- Here we use the Unicode characters for primes, but the intent is the same.
196
   local primes = (
197
         P"''''" / luautf8.char(0x2057) + -- quadruple prime
1✔
198
         P"'''" / luautf8.char(0x2034) + -- triple prime
1✔
199
         P"''" / luautf8.char(0x2033) + -- double prime
1✔
200
         P"'" / luautf8.char(0x2032) -- prime
1✔
201
      ) / function (s)
×
202
            return { id="atom", s }
×
203
         end
204
   local primes_sup = (
205
         primes * _ * P"^" * _ * element_no_infix / function (p, e)
1✔
206
            -- Combine the prime with the superscript in the same mathlist
207
            if e.id == "mathlist" then
×
208
               table.insert(e, 1, p)
×
209
               return e
×
210
            end
211
            return { id="mathlist", p, e }
×
212
         end
213
         + primes -- or standalone primes
1✔
214
      )
215

216
   START "math"
1✔
217
   math = V"mathlist" * EOF"Unexpected character at end of math code"
2✔
218
   mathlist = (comment + (WS * _) + element)^0
1✔
219
   supsub = element_no_infix * _ * primes_sup                  * _ *  P"_" * _ * element_no_infix +
1✔
220
            element_no_infix * _ * P"^" * _ * element_no_infix * _ *  P"_" * _ * element_no_infix
1✔
221
   subsup = element_no_infix * _ * P"_" * _ * element_no_infix * primes_sup +
1✔
222
            element_no_infix * _ * P"_" * _ * element_no_infix * _ * P"^" * _ * element_no_infix
1✔
223
   sup =  element_no_infix * _ * primes_sup +
1✔
224
          element_no_infix * _ * P"^" * _ * element_no_infix
1✔
225
   sub = element_no_infix * _ * P"_" * _ * element_no_infix
1✔
226
   atom = natural + astop + C(utf8code - S"\\{}%^_&'") +
1✔
227
      (P"\\{" + P"\\}") / function (s) return string.sub(s, -1) end
1✔
228
   text = (
×
229
         P"\\text" *
1✔
230
         Cg(parameters, "options") *
1✔
231
         textgroup
1✔
232
      )
1✔
233
   command = (
×
234
         P"\\" *
1✔
235
         Cg(ctrl_sequence_name, "command") *
1✔
236
         Cg(parameters, "options") *
1✔
237
         (dim2_arg + group^0)
1✔
238
      )
1✔
239
   def = P"\\def" * _ * P"{" *
1✔
240
      Cg(ctrl_sequence_name, "command-name") * P"}" * _ *
1✔
241
      --P"[" * Cg(digit^1, "arity") * P"]" * _ *
242
      P"{" * V"mathlist" * P"}"
1✔
243
   argument = P"#" * Cg(pos_natural, "index")
1✔
244
end
245
-- luacheck: pop
246
-- stylua: ignore end
247
---@diagnostic enable: undefined-global, unused-local, lowercase-global
248

249
local mathParser = epnf.define(mathGrammar)
1✔
250

251
local commands = {}
1✔
252

253
-- A command type is a type for each argument it takes: either string or MathML
254
-- tree. If a command has no type, it is assumed to take only trees.
255
-- Tags like <mi>, <mo>, <mn> take a string, and this needs to be propagated in
256
-- commands that use them.
257

258
local objType = {
1✔
259
   tree = 1,
260
   str = 2,
261
}
262

263
local function inferArgTypes_aux (accumulator, typeRequired, body)
264
   if type(body) == "table" then
410✔
265
      if body.id == "argument" then
410✔
266
         local ret = accumulator
20✔
267
         table.insert(ret, body.index, typeRequired)
20✔
268
         return ret
20✔
269
      elseif body.id == "command" then
390✔
270
         if commands[body.command] then
87✔
271
            local cmdArgTypes = commands[body.command][1]
69✔
272
            if #cmdArgTypes ~= #body then
69✔
273
               SU.error(
×
274
                  "Wrong number of arguments ("
275
                     .. #body
×
276
                     .. ") for command "
×
277
                     .. body.command
×
278
                     .. " (should be "
×
279
                     .. #cmdArgTypes
×
280
                     .. ")"
×
281
               )
282
            else
283
               for i = 1, #cmdArgTypes do
128✔
284
                  accumulator = inferArgTypes_aux(accumulator, cmdArgTypes[i], body[i])
118✔
285
               end
286
            end
287
            return accumulator
69✔
288
         elseif body.command == "mi" or body.command == "mo" or body.command == "mn" then
18✔
289
            if #body ~= 1 then
×
290
               SU.error("Wrong number of arguments (" .. #body .. ") for command " .. body.command .. " (should be 1)")
×
291
            end
292
            accumulator = inferArgTypes_aux(accumulator, objType.str, body[1])
×
293
            return accumulator
×
294
         else
295
            -- Not a macro, recurse on children assuming tree type for all
296
            -- arguments
297
            for _, child in ipairs(body) do
28✔
298
               accumulator = inferArgTypes_aux(accumulator, objType.tree, child)
20✔
299
            end
300
            return accumulator
18✔
301
         end
302
      elseif body.id == "atom" then
303✔
303
         return accumulator
154✔
304
      else
305
         -- Simply recurse on children
306
         for _, child in ipairs(body) do
410✔
307
            accumulator = inferArgTypes_aux(accumulator, typeRequired, child)
522✔
308
         end
309
         return accumulator
149✔
310
      end
311
   else
312
      SU.error("invalid argument to inferArgTypes_aux")
×
313
   end
314
end
315

316
local inferArgTypes = function (body)
317
   return inferArgTypes_aux({}, objType.tree, body)
80✔
318
end
319

320
local function registerCommand (name, argTypes, func)
321
   commands[name] = { argTypes, func }
83✔
322
end
323

324
-- Computes func(func(... func(init, k1, v1), k2, v2)..., k_n, v_n), i.e. applies
325
-- func on every key-value pair in the table. Keys with numeric indices are
326
-- processed in order. This is an important property for MathML compilation below.
327
local function fold_pairs (func, table)
328
   local accumulator = {}
142✔
329
   for k, v in pl.utils.kpairs(table) do
1,210✔
330
      accumulator = func(v, k, accumulator)
784✔
331
   end
332
   for i, v in ipairs(table) do
395✔
333
      accumulator = func(v, i, accumulator)
506✔
334
   end
335
   return accumulator
142✔
336
end
337

338
local function forall (pred, list)
339
   for _, x in ipairs(list) do
3✔
340
      if not pred(x) then
4✔
341
         return false
2✔
342
      end
343
   end
344
   return true
1✔
345
end
346

347
local compileToStr = function (argEnv, mathlist)
348
   if #mathlist == 1 and mathlist.id == "atom" then
10✔
349
      -- List is a single atom
350
      return mathlist[1]
×
351
   elseif #mathlist == 1 and mathlist[1].id == "argument" then
10✔
352
      return argEnv[mathlist[1].index]
×
353
   elseif mathlist.id == "argument" then
10✔
354
      return argEnv[mathlist.index]
×
355
   else
356
      local ret = ""
10✔
357
      for _, elt in ipairs(mathlist) do
24✔
358
         if elt.id == "atom" then
14✔
359
            ret = ret .. elt[1]
14✔
360
         elseif elt.id == "command" and symbols[elt.command] then
×
361
            ret = ret .. symbols[elt.command]
×
362
         else
363
            SU.error("Encountered non-character token in command that takes a string")
×
364
         end
365
      end
366
      return ret
10✔
367
   end
368
end
369

370
local function isOperatorKind (tree, typeOfAtom)
371
   if not tree then
78✔
372
      return false -- safeguard
×
373
   end
374
   if tree.command ~= "mo" then
78✔
375
      return false
32✔
376
   end
377
   -- Case \mo[atom=xxx]{ops}
378
   -- E.g. \mo[atom=op]{lim}
379
   if tree.options and tree.options.atom then
46✔
380
      return atoms.types[tree.options.atom] == typeOfAtom
×
381
   end
382
   -- Case \mo{ops} where ops is registered with the resquested type
383
   -- E.g. \mo{∑) or \sum
384
   if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].atom then
46✔
385
      return operatorDict[tree[1]].atom == typeOfAtom
42✔
386
   end
387
   return false
4✔
388
end
389

390
local function isMoveableLimits (tree)
391
   if tree.command ~= "mo" then
4✔
392
      return false
4✔
393
   end
394
   if tree.options and SU.boolean(tree.options.movablelimits, false) then
×
395
      return true
×
396
   end
397
   if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].forms then
×
398
      -- Leap of faith: We have not idea yet which form the operator will take
399
      -- in the final MathML.
400
      -- In the MathML operator dictionary, some operators have a movablelimits
401
      -- in some forms and not in others.
402
      -- Ex. \Join (U+2A1D) and \bigtriangleleft (U+2A1E) have it prefix but not
403
      -- infix, for some unspecified reason (?).
404
      -- Assume that if at least one form has movablelimits, the operator is
405
      -- considered to have movablelimits "in general".
406
      for _, form in pairs(operatorDict[tree[1]].forms) do
×
407
         if SU.boolean(form.movablelimits, false) then
×
408
            return true
×
409
         end
410
      end
411
   end
412
   return false
×
413
end
414
local function isCloseOperator (tree)
415
   return isOperatorKind(tree, atoms.types.close)
38✔
416
end
417
local function isOpeningOperator (tree)
418
   return isOperatorKind(tree, atoms.types.open)
40✔
419
end
420

421
local function isAccentSymbol (symbol)
422
   return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.accent
8✔
423
end
424

425
local function compileToMathML_aux (_, arg_env, tree)
426
   if type(tree) == "string" then
174✔
427
      return tree
32✔
428
   end
429
   local function compile_and_insert (child, key, accumulator)
430
      if type(key) ~= "number" then
645✔
431
         accumulator[key] = child
392✔
432
         return accumulator
392✔
433
      -- Compile all children, except if this node is a macro definition (no
434
      -- evaluation "under lambda") or the application of a registered macro
435
      -- (since evaluating the nodes depends on the macro's signature, it is more
436
      -- complex and done below)..
437
      elseif tree.id == "def" or (tree.id == "command" and commands[tree.command]) then
253✔
438
         -- Conserve unevaluated child
439
         table.insert(accumulator, child)
90✔
440
      else
441
         -- Compile next child
442
         local comp = compileToMathML_aux(nil, arg_env, child)
163✔
443
         if comp then
163✔
444
            if comp.id == "wrapper" then
83✔
445
               -- Insert all children of the wrapper node
446
               for _, inner_child in ipairs(comp) do
×
447
                  table.insert(accumulator, inner_child)
×
448
               end
449
            else
450
               table.insert(accumulator, comp)
83✔
451
            end
452
         end
453
      end
454
      return accumulator
253✔
455
   end
456
   tree = fold_pairs(compile_and_insert, tree)
284✔
457
   if tree.id == "math" then
142✔
458
      tree.command = "math"
3✔
459
      -- If the outermost `mrow` contains only other `mrow`s, remove it
460
      -- (allowing vertical stacking).
461
      if forall(function (c)
6✔
462
         return c.command == "mrow"
2✔
463
      end, tree[1]) then
6✔
464
         tree[1].command = "math"
1✔
465
         return tree[1]
1✔
466
      end
467
   elseif tree.id == "mathlist" then
139✔
468
      -- Turn mathlist into `mrow` except if it has exactly one `mtr` or `mtd`
469
      -- child.
470
      -- Note that `def`s have already been compiled away at this point.
471
      if #tree == 1 then
5✔
472
         if tree[1].command == "mtr" or tree[1].command == "mtd" then
×
473
            return tree[1]
×
474
         else
475
            tree.command = "mrow"
×
476
         end
477
      elseif tree.is_paired_explicit then
5✔
478
         -- We already did the re-wrapping of open/close delimiters in the parser
479
         -- via \left...\right, doing it would not harm but would add an extra mrow,
480
         -- which we can avoid directly to keep the tree minimal.
481
         -- N.B. We could have used the same flag, but it's easier to debug this way.
482
         tree.is_paired = true
×
483
         tree.is_paired_explicit = nil
×
484
         tree.command = "mrow"
×
485
      else
486
         -- Re-wrap content from opening to closing operator in an implicit mrow,
487
         -- so stretchy operators apply to the correct span of content.
488
         local children = {}
5✔
489
         local stack = {}
5✔
490
         for _, child in ipairs(tree) do
45✔
491
            if isOpeningOperator(child) then
80✔
492
               table.insert(stack, children)
6✔
493
               local mrow = {
6✔
494
                  command = "mrow",
495
                  is_paired = true, -- Internal flag to mark this re-wrapped mrow
496
                  options = {},
6✔
497
                  child,
6✔
498
               }
499
               table.insert(children, mrow)
6✔
500
               children = mrow
6✔
501
            elseif isCloseOperator(child) then
68✔
502
               table.insert(children, child)
6✔
503
               if #stack > 0 then
6✔
504
                  children = table.remove(stack)
12✔
505
               end
506
            elseif
×
507
               (child.command == "msubsup" or child.command == "msub" or child.command == "msup")
28✔
508
               and isCloseOperator(child[1]) -- child[1] is the base
8✔
509
            then
510
               if #stack > 0 then
×
511
                  -- Special case for closing operator with sub/superscript:
512
                  -- (....)^i must be interpreted as {(....)}^i, not as (...{)}^i
513
                  -- Push the closing operator into the mrow
514
                  table.insert(children, child[1])
×
515
                  -- Move the mrow into the msubsup, replacing the closing operator
516
                  child[1] = children
×
517
                  -- And insert the msubsup into the parent
518
                  children = table.remove(stack)
×
519
                  children[#children] = child
×
520
               else
521
                  table.insert(children, child)
×
522
               end
523
            else
524
               table.insert(children, child)
28✔
525
            end
526
         end
527
         tree = #stack > 0 and stack[1] or children
5✔
528
         tree.command = "mrow"
5✔
529
      end
530
   elseif tree.id == "atom" then
134✔
531
      local codepoints = {}
32✔
532
      for _, cp in luautf8.codes(tree[1]) do
64✔
533
         table.insert(codepoints, cp)
32✔
534
      end
535
      local cp = codepoints[1]
32✔
536
      if
537
         #codepoints == 1
32✔
538
         and ( -- If length of UTF-8 string is 1
×
539
            cp >= SU.codepoint("A") and cp <= SU.codepoint("Z")
78✔
540
            or cp >= SU.codepoint("a") and cp <= SU.codepoint("z")
78✔
541
            or cp >= SU.codepoint("Α") and cp <= SU.codepoint("Ω")
60✔
542
            or cp >= SU.codepoint("α") and cp <= SU.codepoint("ω")
60✔
543
            or cp == SU.codepoint("ϑ")
52✔
544
            or cp == SU.codepoint("ϕ")
52✔
545
            or cp == SU.codepoint("ϰ")
52✔
546
            or cp == SU.codepoint("ϱ")
52✔
547
            or cp == SU.codepoint("ϖ")
52✔
548
            or cp == SU.codepoint("ϵ")
52✔
549
         )
550
      then
551
         tree.command = "mi"
6✔
552
      elseif lpeg.match(lpeg.R("09") ^ 1, tree[1]) then
26✔
553
         tree.command = "mn"
2✔
554
      else
555
         tree.command = "mo"
24✔
556
      end
557
      tree.options = {}
32✔
558
   -- Translate TeX-like sub/superscripts to `munderover` or `msubsup`,
559
   -- depending on whether the base is an operator with moveable limits.
560
   elseif tree.id == "sup" and isMoveableLimits(tree[1]) then
106✔
561
      tree.command = "mover"
×
562
   elseif tree.id == "sub" and isMoveableLimits(tree[1]) then
102✔
563
      tree.command = "munder"
×
564
   elseif tree.id == "subsup" and isMoveableLimits(tree[1]) then
102✔
565
      tree.command = "munderover"
×
566
   elseif tree.id == "supsub" and isMoveableLimits(tree[1]) then
102✔
567
      tree.command = "munderover"
×
568
      local tmp = tree[2]
×
569
      tree[2] = tree[3]
×
570
      tree[3] = tmp
×
571
   elseif tree.id == "sup" then
102✔
572
      tree.command = "msup"
4✔
573
   elseif tree.id == "sub" then
98✔
574
      tree.command = "msub"
×
575
   elseif tree.id == "subsup" then
98✔
576
      tree.command = "msubsup"
×
577
   elseif tree.id == "supsub" then
98✔
578
      tree.command = "msubsup"
×
579
      local tmp = tree[2]
×
580
      tree[2] = tree[3]
×
581
      tree[3] = tmp
×
582
   elseif tree.id == "def" then
98✔
583
      local commandName = tree["command-name"]
80✔
584
      local argTypes = inferArgTypes(tree[1])
80✔
585
      registerCommand(commandName, argTypes, function (compiledArgs)
160✔
586
         return compileToMathML_aux(nil, compiledArgs, tree[1])
×
587
      end)
588
      return nil
80✔
589
   elseif tree.id == "text" then
18✔
590
      tree.command = "mtext"
×
591
   elseif tree.id == "command" and commands[tree.command] then
18✔
592
      local argTypes = commands[tree.command][1]
10✔
593
      local cmdFun = commands[tree.command][2]
10✔
594
      local applicationTree = tree
10✔
595
      local cmdName = tree.command
10✔
596
      if #applicationTree ~= #argTypes then
10✔
597
         SU.error(
×
598
            "Wrong number of arguments ("
599
               .. #applicationTree
×
600
               .. ") for command "
×
601
               .. cmdName
×
602
               .. " (should be "
×
603
               .. #argTypes
×
604
               .. ")"
×
605
         )
606
      end
607
      -- Compile every argument
608
      local compiledArgs = {}
10✔
609
      for i, arg in pairs(applicationTree) do
60✔
610
         if type(i) == "number" then
50✔
611
            if argTypes[i] == objType.tree then
10✔
612
               table.insert(compiledArgs, compileToMathML_aux(nil, arg_env, arg))
×
613
            else
614
               local x = compileToStr(arg_env, arg)
10✔
615
               table.insert(compiledArgs, x)
10✔
616
            end
617
         else
618
            -- Not an argument but an attribute. Add it to the compiled
619
            -- argument tree as-is
620
            compiledArgs[i] = applicationTree[i]
40✔
621
         end
622
      end
623
      local res = cmdFun(compiledArgs)
10✔
624
      if res.command == "mrow" then
10✔
625
         -- Mark the outer mrow to be unwrapped in the parent
626
         res.id = "wrapper"
×
627
      end
628
      return res
10✔
629
   elseif tree.id == "command" and symbols[tree.command] then
8✔
630
      local atom = { id = "atom", [1] = symbols[tree.command] }
8✔
631
      if isAccentSymbol(symbols[tree.command]) and #tree > 0 then
16✔
632
         -- LaTeX-style accents \vec{v} = <mover accent="true"><mi>v</mi><mo>→</mo></mover>
633
         local accent = {
×
634
            id = "command",
635
            command = "mover",
636
            options = {
×
637
               accent = "true",
638
            },
639
         }
640
         accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
×
641
         accent[2] = compileToMathML_aux(nil, arg_env, atom)
×
642
         tree = accent
×
643
      elseif #tree > 0 then
8✔
644
         -- Play cool with LaTeX-style commands that don't take arguments:
645
         -- Edge case for non-accent symbols so we don't loose bracketed groups
646
         -- that might have been seen as command arguments.
647
         -- Ex. \langle{x}\rangle (without space after \langle)
648
         local sym = compileToMathML_aux(nil, arg_env, atom)
×
649
         -- Compile all children in-place
650
         for i, child in ipairs(tree) do
×
651
            tree[i] = compileToMathML_aux(nil, arg_env, child)
×
652
         end
653
         -- Insert symbol at the beginning,
654
         -- And add a wrapper mrow to be unwrapped in the parent.
655
         table.insert(tree, 1, sym)
×
656
         tree.command = "mrow"
×
657
         tree.id = "wrapper"
×
658
      else
659
         tree = compileToMathML_aux(nil, arg_env, atom)
16✔
660
      end
661
   elseif tree.id == "argument" then
×
662
      if arg_env[tree.index] then
×
663
         return arg_env[tree.index]
×
664
      else
665
         SU.error("Argument #" .. tree.index .. " has escaped its scope (probably not fully applied command).")
×
666
      end
667
   end
668
   tree.id = nil
51✔
669
   return tree
51✔
670
end
671

672
local function printMathML (tree)
673
   if type(tree) == "string" then
×
674
      return tree
×
675
   end
676
   local result = "\\" .. tree.command
×
677
   if tree.options then
×
678
      local options = {}
×
679
      for k, v in pairs(tree.options) do
×
680
         table.insert(options, k .. "=" .. tostring(v))
×
681
      end
682
      if #options > 0 then
×
683
         result = result .. "[" .. table.concat(options, ", ") .. "]"
×
684
      end
685
   end
686
   if #tree > 0 then
×
687
      result = result .. "{"
×
688
      for _, child in ipairs(tree) do
×
689
         result = result .. printMathML(child)
×
690
      end
691
      result = result .. "}"
×
692
   end
693
   return result
×
694
end
695

696
local function compileToMathML (_, arg_env, tree)
697
   local result = compileToMathML_aux(_, arg_env, tree)
3✔
698
   SU.debug("texmath", function ()
6✔
699
      return "Resulting MathML: " .. printMathML(result)
×
700
   end)
701
   return result
3✔
702
end
703

704
local function convertTexlike (_, content)
705
   local ret = epnf.parsestring(mathParser, content[1])
3✔
706
   SU.debug("texmath", function ()
6✔
707
      return "Parsed TeX math: " .. pl.pretty.write(ret)
×
708
   end)
709
   return ret
3✔
710
end
711

712
registerCommand("mi", { [1] = objType.str }, function (x)
2✔
713
   return x
8✔
714
end)
715
registerCommand("mo", { [1] = objType.str }, function (x)
2✔
716
   return x
2✔
717
end)
718
registerCommand("mn", { [1] = objType.str }, function (x)
2✔
719
   return x
×
720
end)
721

722
compileToMathML(
2✔
723
   nil,
1✔
724
   {},
725
   convertTexlike(nil, {
1✔
726
      [==[
×
727
  \def{frac}{\mfrac{#1}{#2}}
728
  \def{sqrt}{\msqrt{#1}}
729
  \def{bi}{\mi[mathvariant=bold-italic]{#1}}
730
  \def{dsi}{\mi[mathvariant=double-struck]{#1}}
731
  \def{vec}{\mover[accent=true]{#1}{\rightarrow}}
732

733
  % From amsmath:
734
  \def{to}{\mo[atom=bin]{→}}
735
  \def{lim}{\mo[atom=op, movablelimits=true]{lim}}
736
  \def{gcd}{\mo[atom=op, movablelimits=true]{gcd}}
737
  \def{sup}{\mo[atom=op, movablelimits=true]{sup}}
738
  \def{inf}{\mo[atom=op, movablelimits=true]{inf}}
739
  \def{max}{\mo[atom=op, movablelimits=true]{max}}
740
  \def{min}{\mo[atom=op, movablelimits=true]{min}}
741
  % Those use U+202F NARROW NO-BREAK SPACE in their names
742
  \def{limsup}{\mo[atom=op, movablelimits=true]{lim sup}}
743
  \def{liminf}{\mo[atom=op, movablelimits=true]{lim inf}}
744
  \def{projlim}{\mo[atom=op, movablelimits=true]{proj lim}}
745
  \def{injlim}{\mo[atom=op, movablelimits=true]{inj lim}}
746

747
  % Other pre-defined operators from the TeXbook, p. 162:
748
  % TeX of course defines them with \mathop, so we use atom=op here.
749
  % MathML would use a <mi> here.
750
  % But we use a <mo> so the atom type is handled
751
  \def{arccos}{\mo[atom=op]{arccos}}
752
  \def{arcsin}{\mo[atom=op]{arcsin}}
753
  \def{arctan}{\mo[atom=op]{arctan}}
754
  \def{arg}{\mo[atom=op]{arg}}
755
  \def{cos}{\mo[atom=op]{cos}}
756
  \def{cosh}{\mo[atom=op]{cosh}}
757
  \def{cot}{\mo[atom=op]{cot}}
758
  \def{coth}{\mo[atom=op]{coth}}
759
  \def{csc}{\mo[atom=op]{csc}}
760
  \def{deg}{\mo[atom=op]{deg}}
761
  \def{det}{\mo[atom=op]{det}}
762
  \def{dim}{\mo[atom=op]{dim}}
763
  \def{exp}{\mo[atom=op]{exp}}
764
  \def{hom}{\mo[atom=op]{hom}}
765
  \def{ker}{\mo[atom=op]{ker}}
766
  \def{lg}{\mo[atom=op]{lg}}
767
  \def{ln}{\mo[atom=op]{ln}}
768
  \def{log}{\mo[atom=op]{log}}
769
  \def{Pr}{\mo[atom=op]{Pr}}
770
  \def{sec}{\mo[atom=op]{sec}}
771
  \def{sin}{\mo[atom=op]{sin}}
772
  \def{sinh}{\mo[atom=op]{sinh}}
773
  \def{tan}{\mo[atom=op]{tan}}
774
  \def{tanh}{\mo[atom=op]{tanh}}
775

776
  % Standard spaces gleaned from plain TeX
777
  \def{thinspace}{\mspace[width=thin]}
778
  \def{negthinspace}{\mspace[width=-thin]}
779
  \def{,}{\thinspace}
780
  \def{!}{\negthinspace}
781
  \def{medspace}{\mspace[width=med]}
782
  \def{negmedspace}{\mspace[width=-med]}
783
  \def{>}{\medspace}
784
  \def{thickspace}{\mspace[width=thick]}
785
  \def{negthickspace}{\mspace[width=-thick]}
786
  \def{;}{\thickspace}
787
  \def{enspace}{\mspace[width=1en]}
788
  \def{enskip}{\enspace}
789
  \def{quad}{\mspace[width=1em]}
790
  \def{qquad}{\mspace[width=2em]}
791

792
  % MathML says a single-character identifier must be in italic by default.
793
  % TeX however has the following Greek capital macros rendered in upright shape.
794
  % It so common that you've probably never seen Γ(x) written with an italic gamma.
795
  \def{Gamma}{\mi[mathvariant=normal]{Γ}}
796
  \def{Delta}{\mi[mathvariant=normal]{Δ}}
797
  \def{Theta}{\mi[mathvariant=normal]{Θ}}
798
  \def{Lambda}{\mi[mathvariant=normal]{Λ}}
799
  \def{Xi}{\mi[mathvariant=normal]{Ξ}}
800
  \def{Pi}{\mi[mathvariant=normal]{Π}}
801
  \def{Sigma}{\mi[mathvariant=normal]{Σ}}
802
  \def{Upsilon}{\mi[mathvariant=normal]{Υ}}
803
  \def{Phi}{\mi[mathvariant=normal]{Φ}}
804
  \def{Psi}{\mi[mathvariant=normal]{Ψ}}
805
  \def{Omega}{\mi[mathvariant=normal]{Ω}}
806
  % Some calligraphic (script), fraktur, double-struck styles:
807
  % Convenience for compatibility with LaTeX.
808
  \def{mathcal}{\mi[mathvariant=script]{#1}}
809
  \def{mathfrak}{\mi[mathvariant=fraktur]{#1}}
810
  \def{mathbb}{\mi[mathvariant=double-struck]{#1}}
811
  % Some style-switching commands for compatibility with LaTeX math.
812
  % Caveat emptor: LaTeX would allow these to apply to a whole formula.
813
  % We can't do that in MathML, as mathvariant applies to token elements only.
814
  % Also note that LaTeX and related packages may have many more such commands.
815
  % We only provide a few common ('historical') ones here.
816
  \def{mathrm}{\mi[mathvariant=normal]{#1}}
817
  \def{mathbf}{\mi[mathvariant=bold]{#1}}
818
  \def{mathit}{\mi[mathvariant=italic]{#1}}
819
  \def{mathsf}{\mi[mathvariant=sans-serif]{#1}}
820
  \def{mathtt}{\mi[mathvariant=monospace]{#1}}
821

822
  % Modulus operator forms
823
  % See Michael Downes & Barbara Beeton, "Short Math Guide for LaTeX"
824
  % American Mathematical Society (v2.0, 2017), §7.1 p. 18
825
  \def{bmod}{\mo[atom=bin]{mod}}
826
  \def{pmod}{\quad(\mo[atom=ord]{mod}\>#1)}
827
  \def{mod}{\quad \mo[atom=ord]{mod}\>#1}
828
  \def{pod}{\quad(#1)}
829

830
  % Phantom commands from TeX/LaTeX
831
  \def{phantom}{\mphantom{#1}}
832
  \def{hphantom}{\mpadded[height=0, depth=0]{\mphantom{#1}}}
833
  \def{vphantom}{\mpadded[width=0]{\mphantom{#1}}}
834
]==],
835
   })
836
)
837

838
return { convertTexlike, compileToMathML }
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc