• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 11573992455

29 Oct 2024 12:37PM UTC coverage: 68.22% (+10.2%) from 58.059%
11573992455

push

github

web-flow
Merge pull request #2140 from Omikhleia/feath-mathml-mtext

feat(math): Support MathML mtext and ms elements

8 of 22 new or added lines in 3 files covered. (36.36%)

1 existing line in 1 file now uncovered.

12257 of 17967 relevant lines covered (68.22%)

6069.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.71
/packages/math/texlike.lua
1
local syms = require("packages.math.unicode-symbols")
13✔
2
local bits = require("core.parserbits")
13✔
3

4
local epnf = require("epnf")
13✔
5
local lpeg = require("lpeg")
13✔
6

7
local atomType = syms.atomType
13✔
8
local symbolDefaults = syms.symbolDefaults
13✔
9
local symbols = syms.symbols
13✔
10

11
-- Grammar to parse TeX-like math
12
-- luacheck: push ignore
13
-- stylua: ignore start
14
---@diagnostic disable: undefined-global, unused-local, lowercase-global
15
local mathGrammar = function (_ENV)
16
   local _ = WS^0
13✔
17
   local eol = S"\r\n"
13✔
18
   local digit = R("09")
13✔
19
   local natural = digit^1 / tostring
13✔
20
   local pos_natural = R("19") * digit^0 / tonumber
13✔
21
   local ctrl_word = R("AZ", "az")^1
13✔
22
   local ctrl_symbol = P(1) - S"{}\\"
13✔
23
   local ctrl_sequence_name = C(ctrl_word + ctrl_symbol) / 1
13✔
24
   local comment = (
25
         P"%" *
13✔
26
         P(1-eol)^0 *
13✔
27
         eol^-1
13✔
28
      )
29
   local utf8cont = R("\128\191")
13✔
30
   local utf8code = lpeg.R("\0\127")
13✔
31
      + lpeg.R("\194\223") * utf8cont
13✔
32
      + lpeg.R("\224\239") * utf8cont * utf8cont
13✔
33
      + lpeg.R("\240\244") * utf8cont * utf8cont * utf8cont
13✔
34
   -- Identifiers inside \mo and \mi tags
35
   local sileID = C(bits.identifier + P(1)) / 1
13✔
36
   local mathMLID = (utf8code - S"\\{}%")^1 / function (...)
13✔
37
         local ret = ""
×
38
         local t = {...}
×
39
         for _,b in ipairs(t) do
×
40
         ret = ret .. b
×
41
         end
42
         return ret
×
43
      end
44
   local group = P"{" * V"mathlist" * (P"}" + E("`}` expected"))
26✔
45
   -- Simple amsmath-like \text command (no embedded math)
46
   local textgroup = P"{" * C((1-P"}")^1) * (P"}" + E("`}` expected"))
26✔
47
   local element_no_infix =
48
      V"def" +
13✔
49
      V"text" + -- Important: before command
13✔
50
      V"command" +
13✔
51
      group +
13✔
52
      V"argument" +
13✔
53
      V"atom"
13✔
54
   local element =
55
      V"supsub" +
13✔
56
      V"subsup" +
13✔
57
      V"sup" +
13✔
58
      V"sub" +
13✔
59
      element_no_infix
13✔
60
   local sep = S",;" * _
13✔
61
   local quotedString = (P'"' * C((1-P'"')^1) * P'"')
13✔
62
   local value = ( quotedString + (1-S",;]")^1 )
13✔
63
   local pair = Cg(sileID * _ * "=" * _ * C(value)) * sep^-1 / function (...)
13✔
64
      local t = {...}; return t[1], t[#t]
316✔
65
   end
66
   local list = Cf(Ct"" * pair^0, rawset)
13✔
67
   local parameters = (
68
         P"[" *
13✔
69
         list *
13✔
70
         P"]"
13✔
71
      )^-1 / function (a)
13✔
72
            return type(a)=="table" and a or {}
670✔
73
         end
74
   local dim2_arg_inner = Ct(V"mathlist" * (P"&" * V"mathlist")^0) /
13✔
75
      function (t)
76
         t.id = "mathlist"
×
77
         return t
×
78
      end
79
   local dim2_arg =
80
      Cg(P"{" *
26✔
81
         dim2_arg_inner *
13✔
82
         (P"\\\\" * dim2_arg_inner)^1 *
13✔
83
         (P"}" + E("`}` expected"))
26✔
84
         ) / function (...)
×
85
            local t = {...}
×
86
            -- Remove the last mathlist if empty. This way,
87
            -- `inner1 \\ inner2 \\` is the same as `inner1 \\ inner2`.
88
            if not t[#t][1] or not t[#t][1][1] then table.remove(t) end
×
89
            return pl.utils.unpack(t)
×
90
         end
91

92
   local dim2_arg_inner = Ct(V"mathlist" * (P"&" * V"mathlist")^0) /
13✔
93
      function (t)
94
         t.id = "mathlist"
19✔
95
         return t
19✔
96
      end
97
   local dim2_arg =
98
      Cg(P"{" *
26✔
99
         dim2_arg_inner *
13✔
100
         (P"\\\\" * dim2_arg_inner)^1 *
13✔
101
         (P"}" + E("`}` expected"))
26✔
102
         ) / function (...)
×
103
         local t = {...}
5✔
104
         -- Remove the last mathlist if empty. This way,
105
         -- `inner1 \\ inner2 \\` is the same as `inner1 \\ inner2`.
106
         if not t[#t][1] or not t[#t][1][1] then table.remove(t) end
5✔
107
         return pl.utils.unpack(t)
5✔
108
         end
109

110
   START "math"
13✔
111
   math = V"mathlist" * EOF"Unexpected character at end of math code"
26✔
112
   mathlist = (comment + (WS * _) + element)^0
13✔
113
   supsub = element_no_infix * _ * P"^" * _ * element_no_infix * _ *
13✔
114
      P"_" * _ * element_no_infix
13✔
115
   subsup = element_no_infix * _ * P"_" * _ * element_no_infix * _ *
13✔
116
      P"^" * _ * element_no_infix
13✔
117
   sup = element_no_infix * _ * P"^" * _ * element_no_infix
13✔
118
   sub = element_no_infix * _ * P"_" * _ * element_no_infix
13✔
119
   atom = natural + C(utf8code - S"\\{}%^_&") +
13✔
120
      (P"\\{" + P"\\}") / function (s) return string.sub(s, -1) end
14✔
NEW
121
   text = (
×
122
         P"\\text" *
13✔
123
         Cg(parameters, "options") *
13✔
124
         textgroup
13✔
125
      )
13✔
UNCOV
126
   command = (
×
127
         P"\\" *
13✔
128
         Cg(ctrl_sequence_name, "command") *
13✔
129
         Cg(parameters, "options") *
13✔
130
         (dim2_arg + group^0)
13✔
131
      )
13✔
132
   def = P"\\def" * _ * P"{" *
13✔
133
      Cg(ctrl_sequence_name, "command-name") * P"}" * _ *
13✔
134
      --P"[" * Cg(digit^1, "arity") * P"]" * _ *
135
      P"{" * V"mathlist" * P"}"
13✔
136
   argument = P"#" * Cg(pos_natural, "index")
13✔
137
end
138
-- luacheck: pop
139
-- stylua: ignore end
140
---@diagnostic enable: undefined-global, unused-local, lowercase-global
141

142
local mathParser = epnf.define(mathGrammar)
13✔
143

144
local commands = {}
13✔
145

146
-- A command type is a type for each argument it takes: either string or MathML
147
-- tree. If a command has no type, it is assumed to take only trees.
148
-- Tags like <mi>, <mo>, <mn> take a string, and this needs to be propagated in
149
-- commands that use them.
150

151
local objType = {
13✔
152
   tree = 1,
153
   str = 2,
154
}
155

156
local function inferArgTypes_aux (accumulator, typeRequired, body)
157
   if type(body) == "table" then
1,899✔
158
      if body.id == "argument" then
1,899✔
159
         local ret = accumulator
84✔
160
         table.insert(ret, body.index, typeRequired)
84✔
161
         return ret
84✔
162
      elseif body.id == "command" then
1,815✔
163
         if commands[body.command] then
424✔
164
            local cmdArgTypes = commands[body.command][1]
280✔
165
            if #cmdArgTypes ~= #body then
280✔
166
               SU.error(
×
167
                  "Wrong number of arguments ("
168
                     .. #body
×
169
                     .. ") for command "
×
170
                     .. body.command
×
171
                     .. " (should be "
×
172
                     .. #cmdArgTypes
×
173
                     .. ")"
×
174
               )
175
            else
176
               for i = 1, #cmdArgTypes do
482✔
177
                  accumulator = inferArgTypes_aux(accumulator, cmdArgTypes[i], body[i])
404✔
178
               end
179
            end
180
            return accumulator
280✔
181
         elseif body.command == "mi" or body.command == "mo" or body.command == "mn" then
144✔
182
            if #body ~= 1 then
×
183
               SU.error("Wrong number of arguments (" .. #body .. ") for command " .. body.command .. " (should be 1)")
×
184
            end
185
            accumulator = inferArgTypes_aux(accumulator, objType.str, body[1])
×
186
            return accumulator
×
187
         else
188
            -- Not a macro, recurse on children assuming tree type for all
189
            -- arguments
190
            for _, child in ipairs(body) do
185✔
191
               accumulator = inferArgTypes_aux(accumulator, objType.tree, child)
82✔
192
            end
193
            return accumulator
144✔
194
         end
195
      elseif body.id == "atom" then
1,391✔
196
         return accumulator
736✔
197
      else
198
         -- Simply recurse on children
199
         for _, child in ipairs(body) do
1,901✔
200
            accumulator = inferArgTypes_aux(accumulator, typeRequired, child)
2,492✔
201
         end
202
         return accumulator
655✔
203
      end
204
   else
205
      SU.error("invalid argument to inferArgTypes_aux")
×
206
   end
207
end
208

209
local inferArgTypes = function (body)
210
   return inferArgTypes_aux({}, objType.tree, body)
410✔
211
end
212

213
local function registerCommand (name, argTypes, func)
214
   commands[name] = { argTypes, func }
462✔
215
end
216

217
-- Computes func(func(... func(init, k1, v1), k2, v2)..., k_n, v_n), i.e. applies
218
-- func on every key-value pair in the table. Keys with numeric indices are
219
-- processed in order. This is an important property for MathML compilation below.
220
local function fold_pairs (func, table)
221
   local accumulator = {}
1,695✔
222
   for k, v in pl.utils.kpairs(table) do
13,957✔
223
      accumulator = func(v, k, accumulator)
8,872✔
224
   end
225
   for i, v in ipairs(table) do
4,153✔
226
      accumulator = func(v, i, accumulator)
4,916✔
227
   end
228
   return accumulator
1,695✔
229
end
230

231
local function forall (pred, list)
232
   for _, x in ipairs(list) do
63✔
233
      if not pred(x) then
100✔
234
         return false
50✔
235
      end
236
   end
237
   return true
13✔
238
end
239

240
local compileToStr = function (argEnv, mathlist)
241
   if #mathlist == 1 and mathlist.id == "atom" then
111✔
242
      -- List is a single atom
243
      return mathlist[1]
×
244
   elseif #mathlist == 1 and mathlist[1].id == "argument" then
111✔
245
      return argEnv[mathlist[1].index]
3✔
246
   elseif mathlist.id == "argument" then
108✔
247
      return argEnv[mathlist.index]
×
248
   else
249
      local ret = ""
108✔
250
      for _, elt in ipairs(mathlist) do
431✔
251
         if elt.id == "atom" then
323✔
252
            ret = ret .. elt[1]
323✔
253
         elseif elt.id == "command" and symbols[elt.command] then
×
254
            ret = ret .. symbols[elt.command]
×
255
         else
256
            SU.error("Encountered non-character token in command that takes a string")
×
257
         end
258
      end
259
      return ret
108✔
260
   end
261
end
262

263
local function isBigOperator (tree)
264
   if tree.command ~= "mo" then
71✔
265
      return false
56✔
266
   end
267
   -- Case \mo[atom=big]{ops}
268
   -- E.g. \mo[atom=big]{lim}
269
   if tree.options and tree.options.atom == "big" then
15✔
270
      return true
×
271
   end
272
   -- Case \mo{ops} where ops is registered as big operator (unicode-symbols)
273
   -- E.g. \mo{∑) or \sum
274
   if tree[1] and symbolDefaults[tree[1]] and symbolDefaults[tree[1]].atom == atomType.bigOperator then
15✔
275
      return true
9✔
276
   end
277
   return false
6✔
278
end
279

280
local function compileToMathML_aux (_, arg_env, tree)
281
   if type(tree) == "string" then
2,147✔
282
      return tree
452✔
283
   end
284
   local function compile_and_insert (child, key, accumulator)
285
      if type(key) ~= "number" then
6,894✔
286
         accumulator[key] = child
4,436✔
287
         return accumulator
4,436✔
288
      -- Compile all children, except if this node is a macro definition (no
289
      -- evaluation "under lambda") or the application of a registered macro
290
      -- (since evaluating the nodes depends on the macro's signature, it is more
291
      -- complex and done below)..
292
      elseif tree.id == "def" or (tree.id == "command" and commands[tree.command]) then
2,458✔
293
         -- Conserve unevaluated child
294
         table.insert(accumulator, child)
558✔
295
      else
296
         -- Compile next child
297
         local comp = compileToMathML_aux(nil, arg_env, child)
1,900✔
298
         if comp then
1,900✔
299
            if comp.id == "wrapper" then
1,490✔
300
               -- Insert all children of the wrapper node
301
               for _, inner_child in ipairs(comp) do
180✔
302
                  table.insert(accumulator, inner_child)
92✔
303
               end
304
            else
305
               table.insert(accumulator, comp)
1,402✔
306
            end
307
         end
308
      end
309
      return accumulator
2,458✔
310
   end
311
   tree = fold_pairs(compile_and_insert, tree)
3,390✔
312
   if tree.id == "math" then
1,695✔
313
      tree.command = "math"
63✔
314
      -- If the outermost `mrow` contains only other `mrow`s, remove it
315
      -- (allowing vertical stacking).
316
      if forall(function (c)
126✔
317
         return c.command == "mrow"
50✔
318
      end, tree[1]) then
126✔
319
         tree[1].command = "math"
13✔
320
         return tree[1]
13✔
321
      end
322
   elseif tree.id == "mathlist" then
1,632✔
323
      -- Turn mathlist into `mrow` except if it has exactly one `mtr` or `mtd`
324
      -- child.
325
      -- Note that `def`s have already been compiled away at this point.
326
      if #tree == 1 and (tree[1].command == "mtr" or tree[1].command == "mtd") then
326✔
327
         return tree[1]
×
328
      else
329
         tree.command = "mrow"
326✔
330
      end
331
      tree.command = "mrow"
326✔
332
   elseif tree.id == "atom" then
1,306✔
333
      local codepoints = {}
452✔
334
      for _, cp in luautf8.codes(tree[1]) do
916✔
335
         table.insert(codepoints, cp)
464✔
336
      end
337
      local cp = codepoints[1]
452✔
338
      if
339
         #codepoints == 1
452✔
340
         and ( -- If length of UTF-8 string is 1
×
341
            cp >= SU.codepoint("A") and cp <= SU.codepoint("Z")
1,079✔
342
            or cp >= SU.codepoint("a") and cp <= SU.codepoint("z")
1,058✔
343
            or cp >= SU.codepoint("Α") and cp <= SU.codepoint("Ω")
687✔
344
            or cp >= SU.codepoint("α") and cp <= SU.codepoint("ω")
681✔
345
         )
346
      then
347
         tree.command = "mi"
154✔
348
      elseif lpeg.match(lpeg.R("09") ^ 1, tree[1]) then
298✔
349
         tree.command = "mn"
116✔
350
      else
351
         tree.command = "mo"
182✔
352
      end
353
      tree.options = {}
452✔
354
   -- Translate TeX-like sub/superscripts to `munderover` or `msubsup`,
355
   -- depending on whether the base is a big operator
356
   elseif tree.id == "sup" and isBigOperator(tree[1]) then
875✔
357
      tree.command = "mover"
×
358
   elseif tree.id == "sub" and isBigOperator(tree[1]) then
890✔
359
      tree.command = "munder"
1✔
360
   elseif tree.id == "subsup" and isBigOperator(tree[1]) then
867✔
361
      tree.command = "munderover"
8✔
362
   elseif tree.id == "supsub" and isBigOperator(tree[1]) then
845✔
363
      tree.command = "munderover"
×
364
      local tmp = tree[2]
×
365
      tree[2] = tree[3]
×
366
      tree[3] = tmp
×
367
   elseif tree.id == "sup" then
845✔
368
      tree.command = "msup"
21✔
369
   elseif tree.id == "sub" then
824✔
370
      tree.command = "msub"
35✔
371
   elseif tree.id == "subsup" then
789✔
372
      tree.command = "msubsup"
6✔
373
   elseif tree.id == "supsub" then
783✔
374
      tree.command = "msubsup"
×
375
      local tmp = tree[2]
×
376
      tree[2] = tree[3]
×
377
      tree[3] = tmp
×
378
   elseif tree.id == "def" then
783✔
379
      local commandName = tree["command-name"]
410✔
380
      local argTypes = inferArgTypes(tree[1])
410✔
381
      registerCommand(commandName, argTypes, function (compiledArgs)
820✔
382
         return compileToMathML_aux(nil, compiledArgs, tree[1])
88✔
383
      end)
384
      return nil
410✔
385
   elseif tree.id == "text" then
373✔
NEW
386
      tree.command = "mtext"
×
387
   elseif tree.id == "command" and commands[tree.command] then
373✔
388
      local argTypes = commands[tree.command][1]
197✔
389
      local cmdFun = commands[tree.command][2]
197✔
390
      local applicationTree = tree
197✔
391
      local cmdName = tree.command
197✔
392
      if #applicationTree ~= #argTypes then
197✔
393
         SU.error(
×
394
            "Wrong number of arguments ("
395
               .. #applicationTree
×
396
               .. ") for command "
×
397
               .. cmdName
×
398
               .. " (should be "
×
399
               .. #argTypes
×
400
               .. ")"
×
401
         )
402
      end
403
      -- Compile every argument
404
      local compiledArgs = {}
197✔
405
      for i, arg in pairs(applicationTree) do
1,133✔
406
         if type(i) == "number" then
936✔
407
            if argTypes[i] == objType.tree then
148✔
408
               table.insert(compiledArgs, compileToMathML_aux(nil, arg_env, arg))
74✔
409
            else
410
               local x = compileToStr(arg_env, arg)
111✔
411
               table.insert(compiledArgs, x)
111✔
412
            end
413
         else
414
            -- Not an argument but an attribute. Add it to the compiled
415
            -- argument tree as-is
416
            compiledArgs[i] = applicationTree[i]
788✔
417
         end
418
      end
419
      local res = cmdFun(compiledArgs)
197✔
420
      if res.command == "mrow" then
197✔
421
         -- Mark the outer mrow to be unwrapped in the parent
422
         res.id = "wrapper"
88✔
423
      end
424
      return res
197✔
425
   elseif tree.id == "command" and symbols[tree.command] then
176✔
426
      local atom = { id = "atom", [1] = symbols[tree.command] }
59✔
427
      tree = compileToMathML_aux(nil, arg_env, atom)
118✔
428
   elseif tree.id == "argument" then
117✔
429
      if arg_env[tree.index] then
37✔
430
         return arg_env[tree.index]
37✔
431
      else
432
         SU.error("Argument #" .. tree.index .. " has escaped its scope (probably not fully applied command).")
×
433
      end
434
   end
435
   tree.id = nil
1,038✔
436
   return tree
1,038✔
437
end
438

439
local function printMathML (tree)
440
   if type(tree) == "string" then
×
441
      return tree
×
442
   end
443
   local result = "\\" .. tree.command
×
444
   if tree.options then
×
445
      local options = {}
×
446
      for k, v in pairs(tree.options) do
×
447
         table.insert(options, k .. "=" .. v)
×
448
      end
449
      if #options > 0 then
×
450
         result = result .. "[" .. table.concat(options, ", ") .. "]"
×
451
      end
452
   end
453
   if #tree > 0 then
×
454
      result = result .. "{"
×
455
      for _, child in ipairs(tree) do
×
456
         result = result .. printMathML(child)
×
457
      end
458
      result = result .. "}"
×
459
   end
460
   return result
×
461
end
462

463
local function compileToMathML (_, arg_env, tree)
464
   local result = compileToMathML_aux(_, arg_env, tree)
63✔
465
   SU.debug("texmath", function ()
126✔
466
      return "Resulting MathML: " .. printMathML(result)
×
467
   end)
468
   return result
63✔
469
end
470

471
local function convertTexlike (_, content)
472
   local ret = epnf.parsestring(mathParser, content[1])
63✔
473
   SU.debug("texmath", function ()
126✔
474
      return "Parsed TeX math: " .. pl.pretty.write(ret)
×
475
   end)
476
   return ret
63✔
477
end
478

479
registerCommand("%", {}, function ()
26✔
480
   return { "%", command = "mo", options = {} }
1✔
481
end)
482
registerCommand("mi", { [1] = objType.str }, function (x)
26✔
483
   return x
88✔
484
end)
485
registerCommand("mo", { [1] = objType.str }, function (x)
26✔
486
   return x
19✔
487
end)
488
registerCommand("mn", { [1] = objType.str }, function (x)
26✔
489
   return x
1✔
490
end)
491

492
compileToMathML(
26✔
493
   nil,
13✔
494
   {},
495
   convertTexlike(nil, {
13✔
496
      [==[
×
497
  \def{frac}{\mfrac{#1}{#2}}
498
  \def{sqrt}{\msqrt{#1}}
499
  \def{bi}{\mi[mathvariant=bold-italic]{#1}}
500
  \def{dsi}{\mi[mathvariant=double-struck]{#1}}
501

502
  \def{lim}{\mo[atom=big]{lim}}
503

504
  % From amsmath:
505
  \def{to}{\mo[atom=bin]{→}}
506
  \def{gcd}{\mo[atom=big]{gcd}}
507
  \def{sup}{\mo[atom=big]{sup}}
508
  \def{inf}{\mo[atom=big]{inf}}
509
  \def{max}{\mo[atom=big]{max}}
510
  \def{min}{\mo[atom=big]{min}}
511
  % Those use U+202F NARROW NO-BREAK SPACE in their names
512
  \def{limsup}{\mo[atom=big]{lim sup}}
513
  \def{liminf}{\mo[atom=big]{lim inf}}
514
  \def{projlim}{\mo[atom=big]{proj lim}}
515
  \def{injlim}{\mo[atom=big]{inj lim}}
516

517
  % Standard spaces gleaned from plain TeX
518
  \def{thinspace}{\mspace[width=thin]}
519
  \def{negthinspace}{\mspace[width=-thin]}
520
  \def{,}{\thinspace}
521
  \def{!}{\negthinspace}
522
  \def{medspace}{\mspace[width=med]}
523
  \def{negmedspace}{\mspace[width=-med]}
524
  \def{>}{\medspace}
525
  \def{thickspace}{\mspace[width=thick]}
526
  \def{negthickspace}{\mspace[width=-thick]}
527
  \def{;}{\thickspace}
528
  \def{enspace}{\mspace[width=1en]}
529
  \def{enskip}{\enspace}
530
  \def{quad}{\mspace[width=1em]}
531
  \def{qquad}{\mspace[width=2em]}
532

533
  % Modulus operator forms
534
  \def{bmod}{\mo{mod}}
535
  \def{pmod}{\quad(\mo{mod} #1)}
536
]==],
537
   })
538
)
539

540
return { convertTexlike, compileToMathML }
13✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc