• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

lunarmodules / Penlight / 21483820508

29 Jan 2026 03:19PM UTC coverage: 89.173% (+0.02%) from 89.157%
21483820508

push

github

web-flow
Merge 5dbb79181 into cccca1c9f

6 of 6 new or added lines in 1 file covered. (100.0%)

9 existing lines in 1 file now uncovered.

5477 of 6142 relevant lines covered (89.17%)

378.96 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.65
/lua/pl/xml.lua
1
--- XML LOM Utilities.
2
--
3
-- This implements some useful things on [LOM](http://matthewwild.co.uk/projects/luaexpat/lom.html) documents, such as returned by `lxp.lom.parse`.
4
-- In particular, it can convert LOM back into XML text, with optional pretty-printing control.
5
-- It is based on stanza.lua from [Prosody](http://hg.prosody.im/trunk/file/4621c92d2368/util/stanza.lua)
6
--
7
--     > d = xml.parse "<nodes><node id='1'>alice</node></nodes>"
8
--     > = d
9
--     <nodes><node id='1'>alice</node></nodes>
10
--     > = xml.tostring(d,'','  ')
11
--     <nodes>
12
--        <node id='1'>alice</node>
13
--     </nodes>
14
--
15
-- Can be used as a lightweight one-stop-shop for simple XML processing; a simple XML parser is included
16
-- but the default is to use `lxp.lom` if it can be found.
17
-- <pre>
18
-- Prosody IM
19
-- Copyright (C) 2008-2010 Matthew Wild
20
-- Copyright (C) 2008-2010 Waqas Hussain--
21
-- classic Lua XML parser by Roberto Ierusalimschy.
22
-- modified to output LOM format.
23
-- http://lua-users.org/wiki/LuaXml
24
-- </pre>
25
-- See @{06-data.md.XML|the Guide}
26
--
27
-- Dependencies: `pl.utils`
28
--
29
-- Soft Dependencies: `lxp.lom` (fallback is to use basic Lua parser)
30
-- @module pl.xml
31

32
local utils = require 'pl.utils'
18✔
33
local split         =   utils.split
18✔
34
local t_insert      =  table.insert
18✔
35
local t_concat      =  table.concat
18✔
36
local t_remove      =  table.remove
18✔
37
local s_match       =  string.match
18✔
38
local tostring      =      tostring
18✔
39
local setmetatable  =  setmetatable
18✔
40
local getmetatable  =  getmetatable
18✔
41
local pairs         =         pairs
18✔
42
local ipairs        =        ipairs
18✔
43
local type          =          type
18✔
44
local next          =          next
18✔
45
local print         =         print
18✔
46
local unpack        =  utils.unpack
18✔
47
local s_gsub        =   string.gsub
18✔
48
local s_sub         =    string.sub
18✔
49
local s_find        =   string.find
18✔
50
local pcall         =         pcall
18✔
51
local require       =       require
18✔
52

53

54
utils.raise_deprecation {
36✔
55
  source = "Penlight " .. utils._VERSION,
18✔
56
  message = "the contents of module 'pl.xml' has been deprecated, please use a more specialized library instead",
18✔
57
  version_removed = "2.0.0",
18✔
58
  deprecated_after = "1.11.0",
18✔
59
  no_trace = true,
18✔
60
}
61

62

63

64
local _M = {}
18✔
65
local Doc = { __type = "doc" };
18✔
66
Doc.__index = Doc;
18✔
67

68

69
local function is_text(s) return type(s) == 'string' end
9,342✔
70
local function is_tag(d) return type(d) == 'table' and is_text(d.tag) end
3,105✔
71

72

73

74
--- create a new document node.
75
-- @tparam string tag the tag name
76
-- @tparam[opt={}] table attr attributes (table of name-value pairs)
77
-- @return the Node object
78
-- @see xml.elem
79
-- @usage
80
-- local doc = xml.new("main", { hello = "world", answer = "42" })
81
-- print(doc)  -->  <main hello='world' answer='42'/>
82
function _M.new(tag, attr)
18✔
83
  if type(tag) ~= "string" then
1,836✔
84
    error("expected 'tag' to be a string value, got: " .. type(tag), 2)
9✔
85
  end
86
  attr = attr or {}
1,827✔
87
  if type(attr) ~= "table" then
1,827✔
88
    error("expected 'attr' to be a table value, got: " .. type(attr), 2)
9✔
89
  end
90

91
  local doc = { tag = tag, attr = attr, last_add = {}};
1,818✔
92
  return setmetatable(doc, Doc);
1,818✔
93
end
94

95

96
--- parse an XML document. By default, this uses lxp.lom.parse, but
97
-- falls back to basic_parse, or if `use_basic` is truthy
98
-- @param text_or_filename  file or string representation
99
-- @param is_file whether text_or_file is a file name or not
100
-- @param use_basic do a basic parse
101
-- @return a parsed LOM document with the document metatatables set
102
-- @return nil, error the error can either be a file error or a parse error
103
function _M.parse(text_or_filename, is_file, use_basic)
18✔
104
  local parser,status,lom
105
  if use_basic then
189✔
106
    parser = _M.basic_parse
180✔
107
  else
108
    status,lom = pcall(require,'lxp.lom')
9✔
109
    if not status then
9✔
110
      parser = _M.basic_parse
9✔
111
    else
112
      parser = lom.parse
×
113
    end
114
  end
115

116
  if is_file then
189✔
117
    local text, err = utils.readfile(text_or_filename)
9✔
118
    if not text then
9✔
119
      return nil, err
×
120
    end
121
    text_or_filename = text
9✔
122
  end
123

124
  local doc, err = parser(text_or_filename)
189✔
125
  if not doc then
189✔
126
    return nil, err
×
127
  end
128

129
  if lom then
189✔
130
    _M.walk(doc, false, function(_, d)
18✔
131
      setmetatable(d, Doc)
135✔
132
    end)
133
  end
134
  return doc
189✔
135
end
136

137

138
--- Create a Node with a set of children (text or Nodes) and attributes.
139
-- @tparam string tag a tag name
140
-- @tparam table|string items either a single child (text or Node), or a table where the hash
141
-- part is the attributes and the list part is the children (text or Nodes).
142
-- @return the new Node
143
-- @see xml.new
144
-- @see xml.tags
145
-- @usage
146
-- local doc = xml.elem("top", "hello world")                -- <top>hello world</top>
147
-- local doc = xml.elem("main", xml.new("child"))            -- <main><child/></main>
148
-- local doc = xml.elem("main", { "this ", "is ", "nice" })  -- <main>this is nice</main>
149
-- local doc = xml.elem("main", { xml.new "this",
150
--                                xml.new "is",
151
--                                xml.new "nice" })          -- <main><this/><is/><nice/></main>
152
-- local doc = xml.elem("main", { hello = "world" })         -- <main hello='world'/>
153
-- local doc = xml.elem("main", {
154
--   "prefix",
155
--   xml.elem("child", { "this ", "is ", "nice"}),
156
--   "postfix",
157
--   attrib = "value"
158
-- })   -- <main attrib='value'>prefix<child>this is nice</child>postfix</main>"
159
function _M.elem(tag, items)
18✔
160
  local s = _M.new(tag)
1,206✔
161
  if is_text(items) then items = {items} end
1,206✔
162
  if is_tag(items) then
1,206✔
163
    t_insert(s,items)
9✔
164
  elseif type(items) == 'table' then
1,197✔
165
    for k,v in pairs(items) do
2,214✔
166
      if is_text(k) then
1,521✔
167
        s.attr[k] = v
216✔
168
        t_insert(s.attr,k)
216✔
169
      else
170
        s[k] = v
1,305✔
171
      end
172
    end
173
  end
174
  return s
1,206✔
175
end
176

177

178
--- given a list of names, return a number of element constructors.
179
-- If passing a comma-separated string, then whitespace surrounding the values
180
-- will be stripped.
181
--
182
-- The returned constructor functions are a shortcut to `xml.elem` where you
183
-- no longer provide the tag-name, but only the `items` table.
184
-- @tparam string|table list a list of names, or a comma-separated string.
185
-- @return (multiple) constructor functions; `function(items)`. For the `items`
186
-- parameter see `xml.elem`.
187
-- @see xml.elem
188
-- @usage
189
-- local new_parent, new_child = xml.tags 'mom, kid'
190
-- doc = new_parent {new_child 'Bob', new_child 'Annie'}
191
-- -- <mom><kid>Bob</kid><kid>Annie</kid></mom>
192
function _M.tags(list)
18✔
193
  local ctors = {}
45✔
194
  if is_text(list) then
45✔
195
    list = split(list:match("^%s*(.-)%s*$"),'%s*,%s*')
36✔
196
  end
197
  for i,tag in ipairs(list) do
198✔
198
    local function ctor(items)
199
      return _M.elem(tag,items)
270✔
200
    end
201
    ctors[i] = ctor
153✔
202
  end
203
  return unpack(ctors)
45✔
204
end
205

206

207
--- Adds a document Node, at current position.
208
-- This updates the last inserted position to the new Node.
209
-- @tparam string tag the tag name
210
-- @tparam[opt={}] table attrs attributes (table of name-value pairs)
211
-- @return the current node (`self`)
212
-- @usage
213
-- local doc = xml.new("main")
214
-- doc:addtag("penlight", { hello = "world"})
215
-- doc:addtag("expat")  -- added to 'penlight' since position moved
216
-- print(doc)  -->  <main><penlight hello='world'><expat/></penlight></main>
217
function Doc:addtag(tag, attrs)
18✔
218
  local s = _M.new(tag, attrs)
216✔
219
  self:add_child(s)
216✔
220
  t_insert(self.last_add, s)
216✔
221
  return self
216✔
222
end
223

224

225
--- Adds a text node, at current position.
226
-- @tparam string text a string
227
-- @return the current node (`self`)
228
-- @usage
229
-- local doc = xml.new("main")
230
-- doc:text("penlight")
231
-- doc:text("expat")
232
-- print(doc)  -->  <main><penlightexpat</main>
233
function Doc:text(text)
18✔
234
  self:add_child(text)
189✔
235
  return self
189✔
236
end
237

238

239
--- Moves current position up one level.
240
-- @return the current node (`self`)
241
function Doc:up()
18✔
242
  t_remove(self.last_add)
162✔
243
  return self
162✔
244
end
245

246

247
--- Resets current position to top level.
248
-- Resets to the `self` node.
249
-- @return the current node (`self`)
250
function Doc:reset()
18✔
251
  local last_add = self.last_add
9✔
252
  for i = 1,#last_add do
36✔
253
    last_add[i] = nil
27✔
254
  end
255
  return self
9✔
256
end
257

258

259
--- Append a child to the current Node (ignoring current position).
260
-- @param child a child node (either text or a document)
261
-- @return the current node (`self`)
262
-- @usage
263
-- local doc = xml.new("main")
264
-- doc:add_direct_child("dog")
265
-- doc:add_direct_child(xml.new("child"))
266
-- doc:add_direct_child("cat")
267
-- print(doc)  -->  <main>dog<child/>cat</main>
268
function Doc:add_direct_child(child)
18✔
269
  t_insert(self, child)
594✔
270
  return self
594✔
271
end
272

273

274
--- Append a child at the current position (without changing position).
275
-- @param child a child node (either text or a document)
276
-- @return the current node (`self`)
277
-- @usage
278
-- local doc = xml.new("main")
279
-- doc:addtag("one")
280
-- doc:add_child(xml.new("item1"))
281
-- doc:add_child(xml.new("item2"))
282
-- doc:add_child(xml.new("item3"))
283
-- print(doc)  -->  <main><one><item1/><item2/><item3/></one></main>
284
function Doc:add_child(child)
18✔
285
  (self.last_add[#self.last_add] or self):add_direct_child(child)
558✔
286
  return self
558✔
287
end
288

289

290
--accessing attributes: useful not to have to expose implementation (attr)
291
--but also can allow attr to be nil in any future optimizations
292

293

294
--- Set attributes of a document node.
295
-- Will add/overwrite values, but will not remove existing ones.
296
-- Operates on the Node itself, will not take position into account.
297
-- @tparam table t a table containing attribute/value pairs
298
-- @return the current node (`self`)
299
function Doc:set_attribs(t)
18✔
300
  -- TODO: keep array part in sync
301
  for k,v in pairs(t) do
90✔
302
    self.attr[k] = v
54✔
303
  end
304
  return self
36✔
305
end
306

307

308
--- Set a single attribute of a document node.
309
-- Operates on the Node itself, will not take position into account.
310
-- @param a attribute
311
-- @param v its value, pass in `nil` to delete the attribute
312
-- @return the current node (`self`)
313
function Doc:set_attrib(a,v)
18✔
314
  -- TODO: keep array part in sync
315
  self.attr[a] = v
18✔
316
  return self
18✔
317
end
318

319

320
--- Gets the attributes of a document node.
321
-- Operates on the Node itself, will not take position into account.
322
-- @return table with attributes (attribute/value pairs)
323
function Doc:get_attribs()
18✔
324
  return self.attr
9✔
325
end
326

327

328

329
local template_cache do
18✔
330
  local templ_cache = {}
18✔
331

332
  -- @param templ a template, a string being valid xml to be parsed, or a Node object
333
  function template_cache(templ)
18✔
334
    if is_text(templ) then
108✔
335
      if templ_cache[templ] then
99✔
336
        -- cache hit
337
        return templ_cache[templ]
×
338

339
      else
340
        -- parse and cache
341
        local ptempl, err = _M.parse(templ,false,true)
99✔
342
        if not ptempl then
99✔
343
          return nil, err
×
344
        end
345
        templ_cache[templ] = ptempl
99✔
346
        return ptempl
99✔
347
      end
348
    end
349

350
    if is_tag(templ) then
9✔
351
      return templ
9✔
352
    end
353

354
    return nil, "template is not a document"
×
355
  end
356
end
357

358

359
do
360
  local function is_data(data)
361
    return #data == 0 or type(data[1]) ~= 'table'
18✔
362
  end
363

364

365
  local function prepare_data(data)
366
    -- a hack for ensuring that $1 maps to first element of data, etc.
367
    -- Either this or could change the gsub call just below.
368
    for i,v in ipairs(data) do
18✔
369
      data[tostring(i)] = v
×
370
    end
371
  end
372

373
  --- create a substituted copy of a document,
374
  -- @param template may be a document or a string representation which will be parsed and cached
375
  -- @param data a table of name-value pairs or a list of such tables
376
  -- @return an XML document
377
  function Doc.subst(template, data)
18✔
378
    if type(data) ~= 'table' or not next(data) then
9✔
379
      return nil, "data must be a non-empty table"
×
380
    end
381

382
    if is_data(data) then
9✔
383
      prepare_data(data)
×
384
    end
385

386
    local templ, err = template_cache(template)
9✔
387
    if err then
9✔
388
      return nil, err
×
389
    end
390

391
    local function _subst(item)
392
      return _M.clone(templ, function(s)
36✔
393
        return s:gsub('%$(%w+)', item)
126✔
394
      end)
395
    end
396

397
    if is_data(data) then
9✔
398
      return _subst(data)
×
399
    end
400

401
    local list = {}
9✔
402
    for _, item in ipairs(data) do
27✔
403
      prepare_data(item)
18✔
404
      t_insert(list, _subst(item))
18✔
405
    end
406

407
    if data.tag then
9✔
408
      list = _M.elem(data.tag,list)
×
409
    end
410
    return list
9✔
411
  end
412
end
413

414

415
--- Return the first child with a given tag name (non-recursive).
416
-- @param tag the tag name
417
-- @return the child Node found or `nil` if not found
418
function Doc:child_with_name(tag)
18✔
419
  for _, child in ipairs(self) do
27✔
420
    if child.tag == tag then
27✔
421
      return child
9✔
422
    end
423
  end
424
end
425

426

427
do
428
  -- @param self document node to traverse
429
  -- @param tag tag-name to look for
430
  -- @param list array table to add the matching ones to
431
  -- @param recurse if truthy, recursively search the node
432
  local function _children_with_name(self, tag, list, recurse)
433
    -- TODO: protect against recursion
434
    for _, child in ipairs(self) do
729✔
435
      if type(child) == 'table' then
414✔
436
        if child.tag == tag then
288✔
437
          t_insert(list, child)
63✔
438
        end
439
        if recurse then
288✔
440
          _children_with_name(child, tag, list, recurse)
288✔
441
        end
442
      end
443
    end
444
  end
445

446
  --- Returns all elements in a document that have a given tag.
447
  -- @tparam string tag a tag name
448
  -- @tparam[opt=false] boolean dont_recurse optionally only return the immediate children with this tag name
449
  -- @return a list of elements found, list will be empty if none was found.
450
  function Doc:get_elements_with_name(tag, dont_recurse)
18✔
451
    local res = {}
27✔
452
    _children_with_name(self, tag, res, not dont_recurse)
27✔
453
    return res
27✔
454
  end
455
end
456

457

458

459
--- Iterator over all children of a document node, including text nodes.
460
-- This function is not recursive, so returns only direct child nodes.
461
-- @return iterator that returns a single Node per iteration.
462
function Doc:children()
18✔
463
  local i = 0;
18✔
464
  return function (a)
465
    i = i + 1
45✔
466
    return a[i];
45✔
467
  end, self, i;
18✔
468
end
469

470

471
--- Return the first child element of a node, if it exists.
472
-- This will skip text nodes.
473
-- @return first child Node or `nil` if there is none.
474
function Doc:first_childtag()
18✔
475
  if #self == 0 then
18✔
476
    return
×
477
  end
478
  for _, t in ipairs(self) do
45✔
479
    if is_tag(t) then
36✔
480
      return t
9✔
481
    end
482
  end
483
end
484

485

486
--- Iterator that matches tag names, and a namespace (non-recursive).
487
-- @tparam[opt=nil] string tag tag names to return. Returns all tags if not provided.
488
-- @tparam[opt=nil] string xmlns the namespace value ('xmlns' attribute) to return. If not
489
-- provided will match all namespaces.
490
-- @return iterator that returns a single Node per iteration.
491
function Doc:matching_tags(tag, xmlns)
18✔
492
  -- TODO: this doesn't make sense??? namespaces are not "xmnls", as matched below
493
  -- but "xmlns:name"... so should be a string-prefix match if anything...
494
  xmlns = xmlns or self.attr.xmlns;
495
  local tags = self
×
496
  local next_i = 1
×
497
  local max_i = #tags
×
498
  local node
499
  return function ()
500
      for i = next_i, max_i do
×
501
        node = tags[i];
502
        if (not tag or node.tag == tag) and
×
503
           (not xmlns or xmlns == node.attr.xmlns) then
×
504
          next_i = i + 1
×
505
          return node
×
506
        end
507
      end
508
    end, tags, next_i
×
509
end
510

511

512
--- Iterator over all child tags of a document node. This will skip over
513
-- text nodes.
514
-- @return iterator that returns a single Node per iteration.
515
function Doc:childtags()
18✔
516
  local i = 0;
27✔
517
  return function (a)
518
    local v
519
      repeat
520
        i = i + 1
72✔
521
        v = self[i]
72✔
522
        if v and type(v) == 'table' then
72✔
523
          return v
45✔
524
        end
525
      until not v
27✔
526
    end, self[1], i;
36✔
527
end
528

529

530
--- Visit child Nodes of a node and call a function, possibly modifying the document.
531
-- Text elements will be skipped.
532
-- This is not recursive, so only direct children will be passed.
533
-- @tparam function callback a function with signature `function(node)`, passed the node.
534
-- The element will be updated with the returned value, or deleted if it returns `nil`.
535
function Doc:maptags(callback)
18✔
536
  local i = 1;
18✔
537

538
  while i <= #self do
108✔
539
    if is_tag(self[i]) then
90✔
540
      local ret = callback(self[i]);
54✔
541
      if ret == nil then
54✔
542
        -- remove it
543
        t_remove(self, i);
27✔
544

545
      else
546
        -- update it
547
        self[i] = ret;
27✔
548
        i = i + 1;
27✔
549
      end
550
    else
551
      i = i + 1
36✔
552
    end
553
  end
554

555
  return self;
18✔
556
end
557

558

559
do
560
  local xml_escape_table = {
18✔
561
    ["'"] = "&apos;",
18✔
562
    ['"'] = "&quot;",
18✔
563
    ["<"] = "&lt;",
18✔
564
    [">"] = "&gt;",
18✔
565
    ["&"] = "&amp;",
18✔
566
  }
567

568
  --- Escapes a string for safe use in xml.
569
  -- Handles quotes(single+double), less-than, greater-than, and ampersand.
570
  -- Non-printable control characters (ASCII 0-31 except tab/LF/CR, and DEL 127) are escaped as \xHH.
571
  -- High bytes (128-255) are preserved to support UTF-8 encoding.
572
  -- @tparam string str string value to escape
573
  -- @return escaped string
574
  -- @usage
575
  -- local esc = xml.xml_escape([["'<>&]])  --> "&quot;&apos;&lt;&gt;&amp;"
576
  -- local esc = xml.xml_escape("hello\x00world")  --> "hello\\x00world"
577
  function _M.xml_escape(str)
18✔
578
    -- First, escape non-printable control characters to \xHH format
579
    -- Pattern: [\x00-\x08\x0B\x0C\x0E-\x1F\x7F]
580
    -- Excludes: tab(0x09), newline(0x0A), carriage return(0x0D)
581
    -- Preserves: high bytes (128-255) for UTF-8 support
582
    str = s_gsub(str, "[%z\1-\8\11\12\14-\31\127]", function(c)
1,926✔
583
      return ("\\x%02X"):format(c:byte())
855✔
584
    end)
585

586
    -- Then, escape XML special characters
587
    return (s_gsub(str, "['&<>\"]", xml_escape_table))
963✔
588
  end
589
end
590
local xml_escape = _M.xml_escape
18✔
591

592
do
593
  local escape_table = {
18✔
594
    quot = '"',
18✔
595
    apos = "'",
18✔
596
    lt = "<",
18✔
597
    gt = ">",
18✔
598
    amp = "&",
18✔
599
  }
600

601
  --- Unescapes a string from xml.
602
  -- Handles quotes(single+double), less-than, greater-than, and ampersand.
603
  -- Also handles \xHH escape sequences for control characters.
604
  -- @tparam string str string value to unescape
605
  -- @return unescaped string
606
  -- @usage
607
  -- local unesc = xml.xml_unescape("&quot;&apos;&lt;&gt;&amp;")  --> [["'<>&]]
608
  -- local unesc = xml.xml_unescape("hello\\x00world")  --> "hello\x00world"
609
  function _M.xml_unescape(str)
18✔
610
    -- First, unescape \xHH sequences
611
    str = str:gsub("\\x(%x%x)", function(hex)
3,006✔
612
      return string.char(tonumber(hex, 16))
387✔
613
    end)
614

615
    -- Then, unescape XML entities
616
    return (str:gsub( "&(%a+);", escape_table))
1,503✔
617
  end
618
end
619
local xml_unescape = _M.xml_unescape
18✔
620

621
-- pretty printing
622
-- if indent, then put each new tag on its own line
623
-- if attr_indent, put each new attribute on its own line
624
local function _dostring(t, buf, parentns, block_indent, tag_indent, attr_indent)
625
  local nsid = 0
1,368✔
626
  local tag = t.tag
1,368✔
627

628
  local lf = ""
1,368✔
629
  if tag_indent then
1,368✔
630
    lf = '\n'..block_indent
162✔
631
  end
632

633
  local alf = " "
1,368✔
634
  if attr_indent then
1,368✔
UNCOV
635
    alf = '\n'..block_indent..attr_indent
×
636
  end
637

638
  t_insert(buf, lf.."<"..tag)
1,368✔
639

640
  local function write_attr(k,v)
641
    if s_find(k, "\1", 1, true) then
306✔
UNCOV
642
      nsid = nsid + 1
×
UNCOV
643
      local ns, attrk = s_match(k, "^([^\1]*)\1?(.*)$")
×
UNCOV
644
      t_insert(buf, " xmlns:ns"..nsid.."='"..xml_escape(ns).."' ".."ns"..nsid..":"..attrk.."='"..xml_escape(v).."'")
×
645

646
    elseif not (k == "xmlns" and v == parentns) then
306✔
647
      t_insert(buf, alf..k.."='"..xml_escape(v).."'");
306✔
648
    end
649
  end
650

651
  -- it's useful for testing to have predictable attribute ordering, if available
652
  if #t.attr > 0 then
1,368✔
653
    -- TODO: the key-value list is leading, what if they are not in-sync
654
    for _,k in ipairs(t.attr) do
162✔
655
      write_attr(k,t.attr[k])
90✔
656
    end
657
  else
658
    for k, v in pairs(t.attr) do
1,512✔
659
      write_attr(k,v)
216✔
660
    end
661
  end
662

663
  local len = #t
1,368✔
664
  local has_children
665

666
  if len == 0 then
1,368✔
667
    t_insert(buf, attr_indent and '\n'..block_indent.."/>" or "/>")
585✔
668

669
  else
670
    t_insert(buf, ">");
783✔
671

672
    for n = 1, len do
2,061✔
673
      local child = t[n]
1,278✔
674

675
      if child.tag then
1,278✔
676
        has_children = true
801✔
677
        _dostring(child, buf, t.attr.xmlns, block_indent and block_indent..tag_indent, tag_indent, attr_indent)
801✔
678

679
      else
680
        -- text element
681
        t_insert(buf, xml_escape(child))
477✔
682
      end
683
    end
684

685
    t_insert(buf, (has_children and lf or '').."</"..tag..">");
783✔
686
  end
687
end
688

689
--- Function to pretty-print an XML document.
690
-- @param doc an XML document
691
-- @tparam[opt] string|int b_ind an initial block-indent (required when `t_ind` is set)
692
-- @tparam[opt] string|int t_ind an tag-indent for each level (required when `a_ind` is set)
693
-- @tparam[opt] string|int a_ind if given, indent each attribute pair and put on a separate line
694
-- @tparam[opt] string|bool xml_preface force prefacing with default or custom <?xml...>, if truthy then `&lt;?xml version='1.0'?&gt;` will be used as default.
695
-- @return a string representation
696
-- @see Doc:tostring
697
function _M.tostring(doc, b_ind, t_ind, a_ind, xml_preface)
18✔
698
  local buf = {}
567✔
699

700
  if type(b_ind) == "number" then b_ind = (" "):rep(b_ind) end
567✔
701
  if type(t_ind) == "number" then t_ind = (" "):rep(t_ind) end
567✔
702
  if type(a_ind) == "number" then a_ind = (" "):rep(a_ind) end
567✔
703

704
  if xml_preface then
567✔
705
    if type(xml_preface) == "string" then
9✔
UNCOV
706
      buf[1] = xml_preface
×
707
    else
708
      buf[1] = "<?xml version='1.0'?>"
9✔
709
    end
710
  end
711

712
  _dostring(doc, buf, nil, b_ind, t_ind, a_ind, xml_preface)
567✔
713

714
  return t_concat(buf)
567✔
715
end
716

717

718
Doc.__tostring = _M.tostring
18✔
719

720

721
--- Method to pretty-print an XML document.
722
-- Invokes `xml.tostring`.
723
-- @tparam[opt] string|int b_ind an initial indent (required when `t_ind` is set)
724
-- @tparam[opt] string|int t_ind an indent for each level (required when `a_ind` is set)
725
-- @tparam[opt] string|int a_ind if given, indent each attribute pair and put on a separate line
726
-- @tparam[opt="&lt;?xml version='1.0'?&gt;"] string xml_preface force prefacing with default or custom <?xml...>
727
-- @return a string representation
728
-- @see xml.tostring
729
function Doc:tostring(b_ind, t_ind, a_ind, xml_preface)
18✔
730
  return _M.tostring(self, b_ind, t_ind, a_ind, xml_preface)
378✔
731
end
732

733

734
--- get the full text value of an element.
735
-- @return a single string with all text elements concatenated
736
-- @usage
737
-- local doc = xml.new("main")
738
-- doc:text("one")
739
-- doc:add_child(xml.elem "two")
740
-- doc:text("three")
741
--
742
-- local t = doc:get_text()    -->  "onethree"
743
function Doc:get_text()
18✔
744
  local res = {}
81✔
745
  for i,el in ipairs(self) do
180✔
746
    if is_text(el) then t_insert(res,el) end
99✔
747
  end
748
  return t_concat(res);
81✔
749
end
750

751

752
do
753
  local function _copy(object, kind, parent, strsubst, lookup_table)
754
    if type(object) ~= "table" then
576✔
755
      if strsubst and is_text(object) then
351✔
756
        return strsubst(object, kind, parent)
198✔
757
      else
758
        return object
153✔
759
      end
760
    end
761

762
    if lookup_table[object] then
225✔
763
      error("recursion detected")
9✔
764
    end
765
    lookup_table[object] = true
216✔
766

767
    local new_table = {}
216✔
768
    lookup_table[object] = new_table
216✔
769

770
    local tag = object.tag
216✔
771
    new_table.tag = _copy(tag, '*TAG', parent, strsubst, lookup_table)
216✔
772

773
    if object.attr then
216✔
774
      local res = {}
216✔
775
      for attr, value in pairs(object.attr) do
306✔
776
        if type(attr) == "string" then
90✔
777
          res[attr] = _copy(value, attr, object, strsubst, lookup_table)
45✔
778
        end
779
      end
780
      new_table.attr = res
216✔
781
    end
782

783
    for index = 1, #object do
468✔
784
      local v = _copy(object[index], '*TEXT', object, strsubst, lookup_table)
261✔
785
      t_insert(new_table,v)
252✔
786
    end
787

788
    return setmetatable(new_table, getmetatable(object))
207✔
789
  end
790

791
  --- Returns a copy of a document.
792
  -- The `strsubst` parameter is a callback with signature `function(object, kind, parent)`.
793
  --
794
  -- Param `kind` has the following values, and parameters:
795
  --
796
  -- - `"*TAG"`: `object` is the tag-name, `parent` is the Node object. Returns the new tag name.
797
  --
798
  -- - `"*TEXT"`: `object` is the text-element, `parent` is the Node object. Returns the new text value.
799
  --
800
  -- - other strings not prefixed with `*`: `kind` is the attribute name, `object` is the
801
  --   attribute value, `parent` is the Node object. Returns the new attribute value.
802
  --
803
  -- @tparam Node|string doc a Node object or string (text node)
804
  -- @tparam[opt] function strsubst an optional function for handling string copying
805
  -- which could do substitution, etc.
806
  -- @return copy of the document
807
  -- @see Doc:filter
808
  function _M.clone(doc, strsubst)
18✔
809
    return _copy(doc, nil, nil, strsubst, {})
54✔
810
  end
811
end
812

813

814
--- Returns a copy of a document.
815
-- This is the method version of `xml.clone`.
816
-- @see xml.clone
817
-- @name Doc:filter
818
-- @tparam[opt] function strsubst an optional function for handling string copying
819
Doc.filter = _M.clone -- also available as method
18✔
820

821
do
822
  local function _compare(t1, t2, recurse_check)
823

824
    local ty1 = type(t1)
522✔
825
    local ty2 = type(t2)
522✔
826

827
    if ty1 ~= ty2 then
522✔
828
      return false, 'type mismatch'
18✔
829
    end
830

831
    if ty1 == 'string' then
504✔
832
      if t1 == t2 then
117✔
833
        return true
108✔
834
      else
835
        return false, 'text '..t1..' ~= text '..t2
9✔
836
      end
837
    end
838

839
    if ty1 ~= 'table' or ty2 ~= 'table' then
387✔
840
      return false, 'not a document'
9✔
841
    end
842

843
    if recurse_check[t1] then
378✔
844
      return false, "recursive document"
9✔
845
    end
846
    recurse_check[t1] = true
369✔
847

848
    if t1.tag ~= t2.tag then
369✔
849
      return false, 'tag  '..t1.tag..' ~= tag '..t2.tag
9✔
850
    end
851

852
    if #t1 ~= #t2 then
360✔
853
      return false, 'size '..#t1..' ~= size '..#t2..' for tag '..t1.tag
9✔
854
    end
855

856
    -- compare attributes
857
    for k,v in pairs(t1.attr) do
543✔
858
      local t2_value = t2.attr[k]
210✔
859
      if type(k) == "string" then
210✔
860
        if t2_value ~= v then return false, 'mismatch attrib' end
138✔
861
      else
862
        if t2_value ~= nil and t2_value ~= v then return false, "mismatch attrib order" end
72✔
863
      end
864
    end
865
    for k,v in pairs(t2.attr) do
481✔
866
      local t1_value = t1.attr[k]
157✔
867
      if type(k) == "string" then
157✔
868
        if t1_value ~= v then return false, 'mismatch attrib' end
112✔
869
      else
870
        if t1_value ~= nil and t1_value ~= v then return false, "mismatch attrib order" end
45✔
871
      end
872
    end
873

874
    -- compare children
875
    for i = 1, #t1 do
675✔
876
      local ok, err = _compare(t1[i], t2[i], recurse_check)
378✔
877
      if not ok then
378✔
878
        return ok, err
27✔
879
      end
880
    end
881
    return true
297✔
882
  end
883

884
  --- Compare two documents or elements.
885
  -- Equality is based on tag, child nodes (text and tags), attributes and order
886
  -- of those (order only fails if both are given, and not equal).
887
  -- @tparam Node|string t1 a Node object or string (text node)
888
  -- @tparam Node|string t2 a Node object or string (text node)
889
  -- @treturn boolean `true` when the Nodes are equal.
890
  function _M.compare(t1,t2)
18✔
891
    return _compare(t1, t2, {})
144✔
892
  end
893
end
894

895

896
--- is this value a document element?
897
-- @param d any value
898
-- @treturn boolean `true` if it is a `table` with property `tag` being a string value.
899
-- @name is_tag
900
_M.is_tag = is_tag
18✔
901

902

903
do
904
  local function _walk(doc, depth_first, operation, recurse_check)
905
    if not depth_first then operation(doc.tag, doc) end
585✔
906
    for _,d in ipairs(doc) do
1,251✔
907
      if is_tag(d) then
684✔
908
        assert(not recurse_check[d], "recursion detected")
459✔
909
        recurse_check[d] = true
450✔
910
        _walk(d, depth_first, operation, recurse_check)
450✔
911
      end
912
    end
913
    if depth_first then operation(doc.tag, doc) end
567✔
914
  end
915

916
  --- Calls a function recursively over Nodes in the document.
917
  -- Will only call on tags, it will skip text nodes.
918
  -- The function signature for `operation` is `function(tag_name, Node)`.
919
  -- @tparam Node|string doc a Node object or string (text node)
920
  -- @tparam boolean depth_first visit child nodes first, then the current node
921
  -- @tparam function operation a function which will receive the current tag name and current node.
922
  function _M.walk(doc, depth_first, operation)
18✔
923
    return _walk(doc, depth_first, operation, {})
135✔
924
  end
925
end
926

927

928
local html_empty_elements = { --lists all HTML empty (void) elements
18✔
929
    br      = true,
18✔
930
    img     = true,
18✔
931
    meta    = true,
18✔
932
    frame   = true,
18✔
933
    area    = true,
18✔
934
    hr      = true,
18✔
935
    base    = true,
18✔
936
    col     = true,
18✔
937
    link    = true,
18✔
938
    input   = true,
18✔
939
    option  = true,
18✔
940
    param   = true,
18✔
941
    isindex = true,
18✔
942
    embed = true,
18✔
943
}
944

945
--- Parse a well-formed HTML file as a string.
946
-- Tags are case-insensitive, DOCTYPE is ignored, and empty elements can be .. empty.
947
-- @param s the HTML
948
function _M.parsehtml(s)
18✔
949
    return _M.basic_parse(s,false,true)
27✔
950
end
951

952
--- Parse a simple XML document using a pure Lua parser based on Robero Ierusalimschy's original version.
953
-- @param s the XML document to be parsed.
954
-- @param all_text  if true, preserves all whitespace. Otherwise only text containing non-whitespace is included.
955
-- @param html if true, uses relaxed HTML rules for parsing
956
function _M.basic_parse(s, all_text, html)
18✔
957
    local stack = {}
216✔
958
    local top = {}
216✔
959

960
    local function parseargs(s)
961
      local arg = {}
1,530✔
962
      s:gsub("([%w:%-_]+)%s*=%s*([\"'])(.-)%2", function (w, _, a)
3,060✔
963
        if html then w = w:lower() end
801✔
964
        arg[w] = xml_unescape(a)
801✔
965
      end)
966
      if html then
1,530✔
967
        s:gsub("([%w:%-_]+)%s*=%s*([^\"']+)%s*", function (w, a)
216✔
968
          w = w:lower()
18✔
969
          arg[w] = xml_unescape(a)
18✔
970
        end)
971
      end
972
      return arg
1,530✔
973
    end
974

975
    t_insert(stack, top)
216✔
976
    local ni,c,label,xarg, empty, _, istart
977
    local i = 1
216✔
978
    local j
979
    -- we're not interested in <?xml version="1.0"?>
980
    _,istart = s_find(s,'^%s*<%?[^%?]+%?>%s*')
216✔
981
    if not istart then -- or <!DOCTYPE ...>
216✔
982
        _,istart = s_find(s,'^%s*<!DOCTYPE.->%s*')
207✔
983
    end
984
    if istart then i = istart+1 end
216✔
985
    while true do
986
        ni,j,c,label,xarg, empty = s_find(s, "<([%/!]?)([%w:%-_]+)(.-)(%/?)>", i)
2,790✔
987
        if not ni then break end
2,790✔
988
        if c == "!" then -- comment
2,574✔
989
            -- case where there's no space inside comment
990
            if not (label:match '%-%-$' and xarg == '') then
18✔
991
                if xarg:match '%-%-$' then -- we've grabbed it all
18✔
992
                    j = j - 2
9✔
993
                end
994
                -- match end of comment
995
                _,j = s_find(s, "-->", j, true)
18✔
996
            end
997
        else
998
            local text = s_sub(s, i, ni-1)
2,556✔
999
            if html then
2,556✔
1000
                label = label:lower()
198✔
1001
                if html_empty_elements[label] then empty = "/" end
198✔
1002
            end
1003
            if all_text or not s_find(text, "^%s*$") then
2,556✔
1004
                t_insert(top, xml_unescape(text))
612✔
1005
            end
1006
            if empty == "/" then  -- empty element tag
2,556✔
1007
                t_insert(top, setmetatable({tag=label, attr=parseargs(xarg), empty=1},Doc))
504✔
1008
            elseif c == "" then   -- start tag
2,052✔
1009
                top = setmetatable({tag=label, attr=parseargs(xarg)},Doc)
1,026✔
1010
                t_insert(stack, top)   -- new level
1,026✔
1011
            else  -- end tag
1012
                local toclose = t_remove(stack)  -- remove top
1,026✔
1013
                top = stack[#stack]
1,026✔
1014
                if #stack < 1 then
1,026✔
UNCOV
1015
                    error("nothing to close with "..label..':'..text)
×
1016
                end
1017
                if toclose.tag ~= label then
1,026✔
UNCOV
1018
                    error("trying to close "..toclose.tag.." with "..label.." "..text)
×
1019
                end
1020
                t_insert(top, toclose)
1,026✔
1021
            end
1022
        end
1023
        i = j+1
2,574✔
1024
    end
1025
    local text = s_sub(s, i)
216✔
1026
    if all_text or  not s_find(text, "^%s*$") then
216✔
UNCOV
1027
        t_insert(stack[#stack], xml_unescape(text))
×
1028
    end
1029
    if #stack > 1 then
216✔
UNCOV
1030
        error("unclosed "..stack[#stack].tag)
×
1031
    end
1032
    local res = stack[1]
216✔
1033
    return is_text(res[1]) and res[2] or res[1]
216✔
1034
end
1035

1036
do
1037
  local match do
18✔
1038

1039
    local function empty(attr) return not attr or not next(attr) end
1,701✔
1040

1041
    local append_capture do
18✔
1042
      -- returns the key,value pair from a table if it has exactly one entry
1043
      local function has_one_element(t)
1044
          local key,value = next(t)
324✔
1045
          if next(t,key) ~= nil then return false end
324✔
1046
          return key,value
198✔
1047
      end
1048

1049
      function append_capture(res,tbl)
18✔
1050
          if not empty(tbl) then -- no point in capturing empty tables...
324✔
1051
              local key
1052
              if tbl._ then  -- if $_ was set then it is meant as the top-level key for the captured table
324✔
1053
                  key = tbl._
198✔
1054
                  tbl._ = nil
198✔
1055
                  if empty(tbl) then return end
198✔
1056
              end
1057
              -- a table with only one pair {[0]=value} shall be reduced to that value
1058
              local numkey,val = has_one_element(tbl)
324✔
1059
              if numkey == 0 then tbl = val end
324✔
1060
              if key then
324✔
1061
                  res[key] = tbl
198✔
1062
              else -- otherwise, we append the captured table
1063
                  t_insert(res,tbl)
126✔
1064
              end
1065
          end
1066
      end
1067
    end
1068

1069
    local function make_number(pat)
1070
        if pat:find '^%d+$' then -- $1 etc means use this as an array location
783✔
1071
            pat = tonumber(pat)
306✔
1072
        end
1073
        return pat
783✔
1074
    end
1075

1076
    local function capture_attrib(res,pat,value)
1077
        pat = make_number(pat:sub(2))
648✔
1078
        res[pat] = value
648✔
1079
        return true
648✔
1080
    end
1081

1082
    function match(d,pat,res,keep_going)
18✔
1083
        local ret = true
1,665✔
1084
        if d == nil then d = '' end --return false end
1,665✔
1085
        -- attribute string matching is straight equality, except if the pattern is a $ capture,
1086
        -- which always succeeds.
1087
        if is_text(d) then
1,665✔
1088
            if not is_text(pat) then return false end
702✔
1089
            if _M.debug then print(d,pat) end
702✔
1090
            if pat:find '^%$' then
702✔
1091
                return capture_attrib(res,pat,d)
648✔
1092
            else
1093
                return d == pat
54✔
1094
            end
1095
        else
1096
        if _M.debug then print(d.tag,pat.tag) end
963✔
1097
            -- this is an element node. For a match to succeed, the attributes must
1098
            -- match as well.
1099
            -- a tagname in the pattern ending with '-' is a wildcard and matches like an attribute
1100
            local tagpat = pat.tag:match '^(.-)%-$'
963✔
1101
            if tagpat then
963✔
1102
                tagpat = make_number(tagpat)
135✔
1103
                res[tagpat] = d.tag
135✔
1104
            end
1105
            if d.tag == pat.tag or tagpat then
963✔
1106

1107
                if not empty(pat.attr) then
837✔
1108
                    if empty(d.attr) then ret =  false
324✔
1109
                    else
1110
                        for prop,pval in pairs(pat.attr) do
738✔
1111
                            local dval = d.attr[prop]
414✔
1112
                            if not match(dval,pval,res) then ret = false;  break end
414✔
1113
                        end
1114
                    end
1115
                end
1116
                -- the pattern may have child nodes. We match partially, so that {P1,P2} shall match {X,P1,X,X,P2,..}
1117
                if ret and #pat > 0 then
837✔
1118
                    local i,j = 1,1
585✔
1119
                    local function next_elem()
1120
                        j = j + 1  -- next child element of data
1,224✔
1121
                        if is_text(d[j]) then j = j + 1 end
1,224✔
1122
                        return j <= #d
1,224✔
1123
                    end
1124
                    repeat
1125
                        local p = pat[i]
882✔
1126
                        -- repeated {{<...>}} patterns  shall match one or more elements
1127
                        -- so e.g. {P+} will match {X,X,P,P,X,P,X,X,X}
1128
                        if is_tag(p) and p.repeated then
882✔
1129
                            local found
1130
                            repeat
1131
                                local tbl = {}
342✔
1132
                                ret = match(d[j],p,tbl,false)
342✔
1133
                                if ret then
342✔
1134
                                    found = false --true
324✔
1135
                                    append_capture(res,tbl)
324✔
1136
                                end
1137
                            until not next_elem() or (found and not ret)
342✔
1138
                            i = i + 1
90✔
1139
                        else
1140
                            ret = match(d[j],p,res,false)
792✔
1141
                            if ret then i = i + 1 end
792✔
1142
                        end
1143
                    until not next_elem() or i > #pat -- run out of elements or patterns to match
882✔
1144
                    -- if every element in our pattern matched ok, then it's been a successful match
1145
                    if i > #pat then return true end
585✔
1146
                end
1147
                if ret then return true end
351✔
1148
            else
1149
                ret = false
126✔
1150
            end
1151
            -- keep going anyway - look at the children!
1152
            if keep_going then
225✔
1153
                for child in d:childtags() do
18✔
1154
                    ret = match(child,pat,res,keep_going)
18✔
1155
                    if ret then break end
18✔
1156
                end
1157
            end
1158
        end
1159
        return ret
225✔
1160
    end
1161
  end
1162

1163
  --- does something...
1164
  function Doc:match(pat)
18✔
1165
      local err
1166
      pat,err = template_cache(pat)
99✔
1167
      if not pat then return nil, err end
99✔
1168
      _M.walk(pat,false,function(_,d)
198✔
1169
          if is_text(d[1]) and is_tag(d[2]) and is_text(d[3]) and
333✔
1170
            d[1]:find '%s*{{' and d[3]:find '}}%s*' then
90✔
1171
            t_remove(d,1)
90✔
1172
            t_remove(d,2)
90✔
1173
            d[1].repeated = true
90✔
1174
          end
1175
      end)
1176

1177
      local res = {}
99✔
1178
      local ret = match(self,pat,res,true)
99✔
1179
      return res,ret
99✔
1180
  end
1181
end
1182

1183

1184
return _M
18✔
1185

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc