• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

earwig / mwparserfromhell / 6080278913

05 Sep 2023 04:44AM CUT coverage: 99.136% (+0.04%) from 99.098%
6080278913

push

github

earwig
Fix pickling SmartLists (fixes #289)

23 of 23 new or added lines in 4 files covered. (100.0%)

2982 of 3008 relevant lines covered (99.14%)

6.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.01
/src/mwparserfromhell/wikicode.py
1
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
2
#
3
# Permission is hereby granted, free of charge, to any person obtaining a copy
4
# of this software and associated documentation files (the "Software"), to deal
5
# in the Software without restriction, including without limitation the rights
6
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
# copies of the Software, and to permit persons to whom the Software is
8
# furnished to do so, subject to the following conditions:
9
#
10
# The above copyright notice and this permission notice shall be included in
11
# all copies or substantial portions of the Software.
12
#
13
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
# SOFTWARE.
20

21
import re
7✔
22
from itertools import chain
7✔
23

24
from .nodes import (
7✔
25
    Argument,
26
    Comment,
27
    ExternalLink,
28
    Heading,
29
    HTMLEntity,
30
    Node,
31
    Tag,
32
    Template,
33
    Text,
34
    Wikilink,
35
)
36
from .smart_list.list_proxy import ListProxy
7✔
37
from .string_mixin import StringMixIn
7✔
38
from .utils import parse_anything
7✔
39

40
__all__ = ["Wikicode"]
7✔
41

42
FLAGS = re.IGNORECASE | re.DOTALL | re.UNICODE
7✔
43

44

45
class Wikicode(StringMixIn):
7✔
46
    """A ``Wikicode`` is a container for nodes that operates like a string.
47

48
    Additionally, it contains methods that can be used to extract data from or
49
    modify the nodes, implemented in an interface similar to a list. For
50
    example, :meth:`index` can get the index of a node in the list, and
51
    :meth:`insert` can add a new node at that index. The :meth:`filter()
52
    <ifilter>` series of functions is very useful for extracting and iterating
53
    over, for example, all of the templates in the object.
54
    """
55

56
    RECURSE_OTHERS = 2
7✔
57

58
    def __init__(self, nodes):
7✔
59
        super().__init__()
7✔
60
        self._nodes = nodes
7✔
61

62
    def __str__(self):
7✔
63
        return "".join([str(node) for node in self.nodes])
7✔
64

65
    @staticmethod
7✔
66
    def _get_children(node, contexts=False, restrict=None, parent=None):
7✔
67
        """Iterate over all child :class:`.Node`\\ s of a given *node*."""
68
        yield (parent, node) if contexts else node
7✔
69
        if restrict and isinstance(node, restrict):
7✔
70
            return
7✔
71
        for code in node.__children__():
7✔
72
            for child in code.nodes:
7✔
73
                sub = Wikicode._get_children(child, contexts, restrict, code)
7✔
74
                yield from sub
7✔
75

76
    @staticmethod
7✔
77
    def _slice_replace(code, index, old, new):
6✔
78
        """Replace the string *old* with *new* across *index* in *code*."""
79
        nodes = [str(node) for node in code.get(index)]
7✔
80
        substring = "".join(nodes).replace(old, new)
7✔
81
        code.nodes[index] = parse_anything(substring).nodes
7✔
82

83
    @staticmethod
7✔
84
    def _build_matcher(matches, flags):
6✔
85
        """Helper for :meth:`_indexed_ifilter` and others.
86

87
        If *matches* is a function, return it. If it's a regex, return a
88
        wrapper around it that can be called with a node to do a search. If
89
        it's ``None``, return a function that always returns ``True``.
90
        """
91
        if matches:
7✔
92
            if callable(matches):
7✔
93
                return matches
7✔
94
            return lambda obj: re.search(matches, str(obj), flags)
7✔
95
        return lambda obj: True
7✔
96

97
    def _indexed_ifilter(
7✔
98
        self, recursive=True, matches=None, flags=FLAGS, forcetype=None
99
    ):
100
        """Iterate over nodes and their corresponding indices in the node list.
101

102
        The arguments are interpreted as for :meth:`ifilter`. For each tuple
103
        ``(i, node)`` yielded by this method, ``self.index(node) == i``. Note
104
        that if *recursive* is ``True``, ``self.nodes[i]`` might not be the
105
        node itself, but will still contain it.
106
        """
107
        match = self._build_matcher(matches, flags)
7✔
108
        if recursive:
7✔
109
            restrict = forcetype if recursive == self.RECURSE_OTHERS else None
7✔
110

111
            def getter(i, node):
7✔
112
                for ch in self._get_children(node, restrict=restrict):
7✔
113
                    yield (i, ch)
7✔
114

115
            inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes)))
7✔
116
        else:
117
            inodes = enumerate(self.nodes)
7✔
118
        for i, node in inodes:
7✔
119
            if (not forcetype or isinstance(node, forcetype)) and match(node):
7✔
120
                yield (i, node)
7✔
121

122
    def _is_child_wikicode(self, obj, recursive=True):
7✔
123
        """Return whether the given :class:`.Wikicode` is a descendant."""
124

125
        def deref(nodes):
7✔
126
            if isinstance(nodes, ListProxy):
7✔
127
                return nodes._parent  # pylint: disable=protected-access
7✔
128
            return nodes
7✔
129

130
        target = deref(obj.nodes)
7✔
131
        if target is deref(self.nodes):
7✔
132
            return True
7✔
133
        if recursive:
7✔
134
            todo = [self]
7✔
135
            while todo:
7✔
136
                code = todo.pop()
7✔
137
                if target is deref(code.nodes):
7✔
138
                    return True
7✔
139
                for node in code.nodes:
7✔
140
                    todo += list(node.__children__())
7✔
141
        return False
7✔
142

143
    def _do_strong_search(self, obj, recursive=True):
7✔
144
        """Search for the specific element *obj* within the node list.
145

146
        *obj* can be either a :class:`.Node` or a :class:`.Wikicode` object. If
147
        found, we return a tuple (*context*, *index*) where *context* is the
148
        :class:`.Wikicode` that contains *obj* and *index* is its index there,
149
        as a :class:`slice`. Note that if *recursive* is ``False``, *context*
150
        will always be ``self`` (since we only look for *obj* among immediate
151
        descendants), but if *recursive* is ``True``, then it could be any
152
        :class:`.Wikicode` contained by a node within ``self``. If *obj* is not
153
        found, :exc:`ValueError` is raised.
154
        """
155
        if isinstance(obj, Wikicode):
7✔
156
            if not self._is_child_wikicode(obj, recursive):
7✔
157
                raise ValueError(obj)
7✔
158
            return obj, slice(0, len(obj.nodes))
7✔
159

160
        if isinstance(obj, Node):
7✔
161
            mkslice = lambda i: slice(i, i + 1)
7✔
162
            if not recursive:
7✔
163
                return self, mkslice(self.index(obj))
7✔
164
            for node in self.nodes:
7✔
165
                for context, child in self._get_children(node, contexts=True):
7✔
166
                    if obj is child:
7✔
167
                        if not context:
7✔
168
                            context = self
7✔
169
                        return context, mkslice(context.index(child))
7✔
170
            raise ValueError(obj)
7✔
171

172
        raise TypeError(obj)
×
173

174
    def _do_weak_search(self, obj, recursive):
7✔
175
        """Search for an element that looks like *obj* within the node list.
176

177
        This follows the same rules as :meth:`_do_strong_search` with some
178
        differences. *obj* is treated as a string that might represent any
179
        :class:`.Node`, :class:`.Wikicode`, or combination of the two present
180
        in the node list. Thus, matching is weak (using string comparisons)
181
        rather than strong (using ``is``). Because multiple nodes can match
182
        *obj*, the result is a list of tuples instead of just one (however,
183
        :exc:`ValueError` is still raised if nothing is found). Individual
184
        matches will never overlap.
185

186
        The tuples contain a new first element, *exact*, which is ``True`` if
187
        we were able to match *obj* exactly to one or more adjacent nodes, or
188
        ``False`` if we found *obj* inside a node or incompletely spanning
189
        multiple nodes.
190
        """
191
        obj = parse_anything(obj)
7✔
192
        if not obj or obj not in self:
7✔
193
            raise ValueError(obj)
7✔
194
        results = []
7✔
195
        contexts = [self]
7✔
196
        while contexts:
7✔
197
            context = contexts.pop()
7✔
198
            i = len(context.nodes) - 1
7✔
199
            while i >= 0:
7✔
200
                node = context.get(i)
7✔
201
                if obj.get(-1) == node:
7✔
202
                    for j in range(-len(obj.nodes), -1):
7✔
203
                        if obj.get(j) != context.get(i + j + 1):
7✔
204
                            break
7✔
205
                    else:
206
                        i -= len(obj.nodes) - 1
7✔
207
                        index = slice(i, i + len(obj.nodes))
7✔
208
                        results.append((True, context, index))
7✔
209
                elif recursive and obj in node:
7✔
210
                    contexts.extend(node.__children__())
7✔
211
                i -= 1
7✔
212
        if not results:
7✔
213
            if not recursive:
7✔
214
                raise ValueError(obj)
7✔
215
            results.append((False, self, slice(0, len(self.nodes))))
7✔
216
        return results
7✔
217

218
    def _get_tree(self, code, lines, marker, indent):
7✔
219
        """Build a tree to illustrate the way the Wikicode object was parsed.
220

221
        The method that builds the actual tree is ``__showtree__`` of ``Node``
222
        objects. *code* is the ``Wikicode`` object to build a tree for. *lines*
223
        is the list to append the tree to, which is returned at the end of the
224
        method. *marker* is some object to be used to indicate that the builder
225
        should continue on from the last line instead of starting a new one; it
226
        should be any object that can be tested for with ``is``. *indent* is
227
        the starting indentation.
228
        """
229

230
        def write(*args):
7✔
231
            """Write a new line following the proper indentation rules."""
232
            if lines and lines[-1] is marker:  # Continue from the last line
7✔
233
                lines.pop()  # Remove the marker
7✔
234
                last = lines.pop()
7✔
235
                lines.append(last + " ".join(args))
7✔
236
            else:
237
                lines.append(" " * 6 * indent + " ".join(args))
7✔
238

239
        get = lambda code: self._get_tree(code, lines, marker, indent + 1)
7✔
240
        mark = lambda: lines.append(marker)
7✔
241
        for node in code.nodes:
7✔
242
            node.__showtree__(write, get, mark)
7✔
243
        return lines
7✔
244

245
    @classmethod
7✔
246
    def _build_filter_methods(cls, **meths):
6✔
247
        """Given Node types, build the corresponding i?filter shortcuts.
248

249
        The should be given as keys storing the method's base name paired with
250
        values storing the corresponding :class:`.Node` type. For example, the
251
        dict may contain the pair ``("templates", Template)``, which will
252
        produce the methods :meth:`ifilter_templates` and
253
        :meth:`filter_templates`, which are shortcuts for
254
        :meth:`ifilter(forcetype=Template) <ifilter>` and
255
        :meth:`filter(forcetype=Template) <filter>`, respectively. These
256
        shortcuts are added to the class itself, with an appropriate docstring.
257
        """
258
        doc = """Iterate over {0}.
7✔
259

260
        This is equivalent to :meth:`{1}` with *forcetype* set to
261
        :class:`~{2.__module__}.{2.__name__}`.
262
        """
263
        make_ifilter = lambda ftype: (
7✔
264
            lambda self, *a, **kw: self.ifilter(forcetype=ftype, *a, **kw)
265
        )
266
        make_filter = lambda ftype: (
7✔
267
            lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw)
268
        )
269
        for name, ftype in meths.items():
7✔
270
            ifilt = make_ifilter(ftype)
7✔
271
            filt = make_filter(ftype)
7✔
272
            ifilt.__doc__ = doc.format(name, "ifilter", ftype)
7✔
273
            filt.__doc__ = doc.format(name, "filter", ftype)
7✔
274
            setattr(cls, "ifilter_" + name, ifilt)
7✔
275
            setattr(cls, "filter_" + name, filt)
7✔
276

277
    @property
7✔
278
    def nodes(self):
6✔
279
        """A list of :class:`.Node` objects.
280

281
        This is the internal data actually stored within a :class:`.Wikicode`
282
        object.
283
        """
284
        return self._nodes
7✔
285

286
    @nodes.setter
7✔
287
    def nodes(self, value):
6✔
288
        if not isinstance(value, list):
7✔
289
            value = parse_anything(value).nodes
7✔
290
        self._nodes = value
7✔
291

292
    def get(self, index):
7✔
293
        """Return the *index*\\ th node within the list of nodes."""
294
        return self.nodes[index]
7✔
295

296
    def set(self, index, value):
7✔
297
        """Set the ``Node`` at *index* to *value*.
298

299
        Raises :exc:`IndexError` if *index* is out of range, or
300
        :exc:`ValueError` if *value* cannot be coerced into one :class:`.Node`.
301
        To insert multiple nodes at an index, use :meth:`get` with either
302
        :meth:`remove` and :meth:`insert` or :meth:`replace`.
303
        """
304
        nodes = parse_anything(value).nodes
7✔
305
        if len(nodes) > 1:
7✔
306
            raise ValueError("Cannot coerce multiple nodes into one index")
7✔
307
        if index >= len(self.nodes) or -1 * index > len(self.nodes):
7✔
308
            raise IndexError("List assignment index out of range")
7✔
309
        if nodes:
7✔
310
            self.nodes[index] = nodes[0]
7✔
311
        else:
312
            self.nodes.pop(index)
7✔
313

314
    def contains(self, obj):
7✔
315
        """Return whether this Wikicode object contains *obj*.
316

317
        If *obj* is a :class:`.Node` or :class:`.Wikicode` object, then we
318
        search for it exactly among all of our children, recursively.
319
        Otherwise, this method just uses :meth:`.__contains__` on the string.
320
        """
321
        if not isinstance(obj, (Node, Wikicode)):
7✔
322
            return obj in self
7✔
323
        try:
7✔
324
            self._do_strong_search(obj, recursive=True)
7✔
325
        except ValueError:
7✔
326
            return False
7✔
327
        return True
7✔
328

329
    def index(self, obj, recursive=False):
7✔
330
        """Return the index of *obj* in the list of nodes.
331

332
        Raises :exc:`ValueError` if *obj* is not found. If *recursive* is
333
        ``True``, we will look in all nodes of ours and their descendants, and
334
        return the index of our direct descendant node within *our* list of
335
        nodes. Otherwise, the lookup is done only on direct descendants.
336
        """
337
        strict = isinstance(obj, Node)
7✔
338
        equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n)
7✔
339
        for i, node in enumerate(self.nodes):
7✔
340
            if recursive:
7✔
341
                for child in self._get_children(node):
7✔
342
                    if equivalent(obj, child):
7✔
343
                        return i
7✔
344
            elif equivalent(obj, node):
7✔
345
                return i
7✔
346
        raise ValueError(obj)
7✔
347

348
    def get_ancestors(self, obj):
7✔
349
        """Return a list of all ancestor nodes of the :class:`.Node` *obj*.
350

351
        The list is ordered from the most shallow ancestor (greatest great-
352
        grandparent) to the direct parent. The node itself is not included in
353
        the list. For example::
354

355
            >>> text = "{{a|{{b|{{c|{{d}}}}}}}}"
356
            >>> code = mwparserfromhell.parse(text)
357
            >>> node = code.filter_templates(matches=lambda n: n == "{{d}}")[0]
358
            >>> code.get_ancestors(node)
359
            ['{{a|{{b|{{c|{{d}}}}}}}}', '{{b|{{c|{{d}}}}}}', '{{c|{{d}}}}']
360

361
        Will return an empty list if *obj* is at the top level of this Wikicode
362
        object. Will raise :exc:`ValueError` if it wasn't found.
363
        """
364

365
        def _get_ancestors(code, needle):
7✔
366
            for node in code.nodes:
7✔
367
                if node is needle:
7✔
368
                    return []
7✔
369
                for code in node.__children__():
7✔
370
                    ancestors = _get_ancestors(code, needle)
7✔
371
                    if ancestors is not None:
7✔
372
                        return [node] + ancestors
7✔
373
            return None
7✔
374

375
        if isinstance(obj, Wikicode):
7✔
376
            obj = obj.get(0)
×
377
        elif not isinstance(obj, Node):
7✔
378
            raise ValueError(obj)
×
379

380
        ancestors = _get_ancestors(self, obj)
7✔
381
        if ancestors is None:
7✔
382
            raise ValueError(obj)
7✔
383
        return ancestors
7✔
384

385
    def get_parent(self, obj):
7✔
386
        """Return the direct parent node of the :class:`.Node` *obj*.
387

388
        This function is equivalent to calling :meth:`.get_ancestors` and
389
        taking the last element of the resulting list. Will return None if
390
        the node exists but does not have a parent; i.e., it is at the top
391
        level of the Wikicode object.
392
        """
393
        ancestors = self.get_ancestors(obj)
7✔
394
        return ancestors[-1] if ancestors else None
7✔
395

396
    def insert(self, index, value):
7✔
397
        """Insert *value* at *index* in the list of nodes.
398

399
        *value* can be anything parsable by :func:`.parse_anything`, which
400
        includes strings or other :class:`.Wikicode` or :class:`.Node` objects.
401
        """
402
        nodes = parse_anything(value).nodes
7✔
403
        for node in reversed(nodes):
7✔
404
            self.nodes.insert(index, node)
7✔
405

406
    def insert_before(self, obj, value, recursive=True):
7✔
407
        """Insert *value* immediately before *obj*.
408

409
        *obj* can be either a string, a :class:`.Node`, or another
410
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
411
        example). If *obj* is a string, we will operate on all instances of
412
        that string within the code, otherwise only on the specific instance
413
        given. *value* can be anything parsable by :func:`.parse_anything`. If
414
        *recursive* is ``True``, we will try to find *obj* within our child
415
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
416
        object. If *obj* is not found, :exc:`ValueError` is raised.
417
        """
418
        if isinstance(obj, (Node, Wikicode)):
7✔
419
            context, index = self._do_strong_search(obj, recursive)
7✔
420
            context.insert(index.start, value)
7✔
421
        else:
422
            for exact, context, index in self._do_weak_search(obj, recursive):
7✔
423
                if exact:
7✔
424
                    context.insert(index.start, value)
7✔
425
                else:
426
                    obj = str(obj)
7✔
427
                    self._slice_replace(context, index, obj, str(value) + obj)
7✔
428

429
    def insert_after(self, obj, value, recursive=True):
7✔
430
        """Insert *value* immediately after *obj*.
431

432
        *obj* can be either a string, a :class:`.Node`, or another
433
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
434
        example). If *obj* is a string, we will operate on all instances of
435
        that string within the code, otherwise only on the specific instance
436
        given. *value* can be anything parsable by :func:`.parse_anything`. If
437
        *recursive* is ``True``, we will try to find *obj* within our child
438
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
439
        object. If *obj* is not found, :exc:`ValueError` is raised.
440
        """
441
        if isinstance(obj, (Node, Wikicode)):
7✔
442
            context, index = self._do_strong_search(obj, recursive)
7✔
443
            context.insert(index.stop, value)
7✔
444
        else:
445
            for exact, context, index in self._do_weak_search(obj, recursive):
7✔
446
                if exact:
7✔
447
                    context.insert(index.stop, value)
7✔
448
                else:
449
                    obj = str(obj)
7✔
450
                    self._slice_replace(context, index, obj, obj + str(value))
7✔
451

452
    def replace(self, obj, value, recursive=True):
7✔
453
        """Replace *obj* with *value*.
454

455
        *obj* can be either a string, a :class:`.Node`, or another
456
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
457
        example). If *obj* is a string, we will operate on all instances of
458
        that string within the code, otherwise only on the specific instance
459
        given. *value* can be anything parsable by :func:`.parse_anything`.
460
        If *recursive* is ``True``, we will try to find *obj* within our child
461
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
462
        object. If *obj* is not found, :exc:`ValueError` is raised.
463
        """
464
        if isinstance(obj, (Node, Wikicode)):
7✔
465
            context, index = self._do_strong_search(obj, recursive)
7✔
466
            for _ in range(index.start, index.stop):
7✔
467
                context.nodes.pop(index.start)
7✔
468
            context.insert(index.start, value)
7✔
469
        else:
470
            for exact, context, index in self._do_weak_search(obj, recursive):
7✔
471
                if exact:
7✔
472
                    for _ in range(index.start, index.stop):
7✔
473
                        context.nodes.pop(index.start)
7✔
474
                    context.insert(index.start, value)
7✔
475
                else:
476
                    self._slice_replace(context, index, str(obj), str(value))
7✔
477

478
    def append(self, value):
7✔
479
        """Insert *value* at the end of the list of nodes.
480

481
        *value* can be anything parsable by :func:`.parse_anything`.
482
        """
483
        nodes = parse_anything(value).nodes
7✔
484
        for node in nodes:
7✔
485
            self.nodes.append(node)
7✔
486

487
    def remove(self, obj, recursive=True):
7✔
488
        """Remove *obj* from the list of nodes.
489

490
        *obj* can be either a string, a :class:`.Node`, or another
491
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
492
        example). If *obj* is a string, we will operate on all instances of
493
        that string within the code, otherwise only on the specific instance
494
        given. If *recursive* is ``True``, we will try to find *obj* within our
495
        child nodes even if it is not a direct descendant of this
496
        :class:`.Wikicode` object. If *obj* is not found, :exc:`ValueError` is
497
        raised.
498
        """
499
        if isinstance(obj, (Node, Wikicode)):
7✔
500
            context, index = self._do_strong_search(obj, recursive)
7✔
501
            for _ in range(index.start, index.stop):
7✔
502
                context.nodes.pop(index.start)
7✔
503
        else:
504
            for exact, context, index in self._do_weak_search(obj, recursive):
7✔
505
                if exact:
7✔
506
                    for _ in range(index.start, index.stop):
7✔
507
                        context.nodes.pop(index.start)
7✔
508
                else:
509
                    self._slice_replace(context, index, str(obj), "")
7✔
510

511
    def matches(self, other):
7✔
512
        """Do a loose equivalency test suitable for comparing page names.
513

514
        *other* can be any string-like object, including :class:`.Wikicode`, or
515
        an iterable of these. This operation is symmetric; both sides are
516
        adjusted. Specifically, whitespace and markup is stripped and the first
517
        letter's case is normalized. Typical usage is
518
        ``if template.name.matches("stub"): ...``.
519
        """
520
        normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s
7✔
521
        this = normalize(self.strip_code().strip())
7✔
522

523
        if isinstance(other, (str, bytes, Wikicode, Node)):
7✔
524
            that = parse_anything(other).strip_code().strip()
7✔
525
            return this == normalize(that)
7✔
526

527
        for obj in other:
7✔
528
            that = parse_anything(obj).strip_code().strip()
7✔
529
            if this == normalize(that):
7✔
530
                return True
7✔
531
        return False
7✔
532

533
    def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None):
7✔
534
        """Iterate over nodes in our list matching certain conditions.
535

536
        If *forcetype* is given, only nodes that are instances of this type (or
537
        tuple of types) are yielded. Setting *recursive* to ``True`` will
538
        iterate over all children and their descendants. ``RECURSE_OTHERS``
539
        will only iterate over children that are not the instances of
540
        *forcetype*. ``False`` will only iterate over immediate children.
541

542
        ``RECURSE_OTHERS`` can be used to iterate over all un-nested templates,
543
        even if they are inside of HTML tags, like so:
544

545
            >>> code = mwparserfromhell.parse("{{foo}}<b>{{foo|{{bar}}}}</b>")
546
            >>> code.filter_templates(code.RECURSE_OTHERS)
547
            ["{{foo}}", "{{foo|{{bar}}}}"]
548

549
        *matches* can be used to further restrict the nodes, either as a
550
        function (taking a single :class:`.Node` and returning a boolean) or a
551
        regular expression (matched against the node's string representation
552
        with :func:`re.search`). If *matches* is a regex, the flags passed to
553
        :func:`re.search` are :const:`re.IGNORECASE`, :const:`re.DOTALL`, and
554
        :const:`re.UNICODE`, but custom flags can be specified by passing
555
        *flags*.
556
        """
557
        gen = self._indexed_ifilter(recursive, matches, flags, forcetype)
7✔
558
        return (node for i, node in gen)
7✔
559

560
    def filter(self, *args, **kwargs):
7✔
561
        """Return a list of nodes within our list matching certain conditions.
562

563
        This is equivalent to calling :func:`list` on :meth:`ifilter`.
564
        """
565
        return list(self.ifilter(*args, **kwargs))
7✔
566

567
    def get_sections(
7✔
568
        self,
569
        levels=None,
570
        matches=None,
571
        flags=FLAGS,
572
        flat=False,
573
        include_lead=None,
574
        include_headings=True,
575
    ):
576
        """Return a list of sections within the page.
577

578
        Sections are returned as :class:`.Wikicode` objects with a shared node
579
        list (implemented using :class:`.SmartList`) so that changes to
580
        sections are reflected in the parent Wikicode object.
581

582
        Each section contains all of its subsections, unless *flat* is
583
        ``True``. If *levels* is given, it should be a iterable of integers;
584
        only sections whose heading levels are within it will be returned. If
585
        *matches* is given, it should be either a function or a regex; only
586
        sections whose headings match it (without the surrounding equal signs)
587
        will be included. *flags* can be used to override the default regex
588
        flags (see :meth:`ifilter`) if a regex *matches* is used.
589

590
        If *include_lead* is ``True``, the first, lead section (without a
591
        heading) will be included in the list; ``False`` will not include it;
592
        the default will include it only if no specific *levels* were given. If
593
        *include_headings* is ``True``, the section's beginning
594
        :class:`.Heading` object will be included; otherwise, this is skipped.
595
        """
596
        title_matcher = self._build_matcher(matches, flags)
7✔
597
        matcher = lambda heading: (
7✔
598
            title_matcher(heading.title) and (not levels or heading.level in levels)
599
        )
600
        iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading)
7✔
601
        sections = []  # Tuples of (index_of_first_node, section)
7✔
602
        # Tuples of (index, heading), where index and heading.level are both
603
        # monotonically increasing
604
        open_headings = []
7✔
605

606
        # Add the lead section if appropriate:
607
        if include_lead or not (include_lead is not None or matches or levels):
7✔
608
            itr = self._indexed_ifilter(recursive=False, forcetype=Heading)
7✔
609
            try:
7✔
610
                first = next(itr)[0]
7✔
611
                sections.append((0, Wikicode(self.nodes[:first])))
7✔
612
            except StopIteration:  # No headings in page
7✔
613
                sections.append((0, Wikicode(self.nodes[:])))
7✔
614

615
        # Iterate over headings, adding sections to the list as they end:
616
        for i, heading in iheadings:
7✔
617
            if flat:  # With flat, all sections close at the next heading
7✔
618
                newly_closed, open_headings = open_headings, []
7✔
619
            else:  # Otherwise, figure out which sections have closed, if any
620
                closed_start_index = len(open_headings)
7✔
621
                for j, (start, last_heading) in enumerate(open_headings):
7✔
622
                    if heading.level <= last_heading.level:
7✔
623
                        closed_start_index = j
7✔
624
                        break
7✔
625
                newly_closed = open_headings[closed_start_index:]
7✔
626
                del open_headings[closed_start_index:]
7✔
627
            for start, closed_heading in newly_closed:
7✔
628
                if matcher(closed_heading):
7✔
629
                    sections.append((start, Wikicode(self.nodes[start:i])))
7✔
630
            start = i if include_headings else (i + 1)
7✔
631
            open_headings.append((start, heading))
7✔
632

633
        # Add any remaining open headings to the list of sections:
634
        for start, heading in open_headings:
7✔
635
            if matcher(heading):
7✔
636
                sections.append((start, Wikicode(self.nodes[start:])))
7✔
637

638
        # Ensure that earlier sections are earlier in the returned list:
639
        return [section for i, section in sorted(sections)]
7✔
640

641
    def strip_code(self, normalize=True, collapse=True, keep_template_params=False):
7✔
642
        """Return a rendered string without unprintable code such as templates.
643

644
        The way a node is stripped is handled by the
645
        :meth:`~.Node.__strip__` method of :class:`.Node` objects, which
646
        generally return a subset of their nodes or ``None``. For example,
647
        templates and tags are removed completely, links are stripped to just
648
        their display part, headings are stripped to just their title.
649

650
        If *normalize* is ``True``, various things may be done to strip code
651
        further, such as converting HTML entities like ``&Sigma;``, ``&#931;``,
652
        and ``&#x3a3;`` to ``Σ``. If *collapse* is ``True``, we will try to
653
        remove excess whitespace as well (three or more newlines are converted
654
        to two, for example). If *keep_template_params* is ``True``, then
655
        template parameters will be preserved in the output (normally, they are
656
        removed completely).
657
        """
658
        kwargs = {
7✔
659
            "normalize": normalize,
660
            "collapse": collapse,
661
            "keep_template_params": keep_template_params,
662
        }
663

664
        nodes = []
7✔
665
        for node in self.nodes:
7✔
666
            stripped = node.__strip__(**kwargs)
7✔
667
            if stripped:
7✔
668
                nodes.append(str(stripped))
7✔
669

670
        if collapse:
7✔
671
            stripped = "".join(nodes).strip("\n")
7✔
672
            while "\n\n\n" in stripped:
7✔
673
                stripped = stripped.replace("\n\n\n", "\n\n")
7✔
674
            return stripped
7✔
675
        return "".join(nodes)
7✔
676

677
    def get_tree(self):
7✔
678
        """Return a hierarchical tree representation of the object.
679

680
        The representation is a string makes the most sense printed. It is
681
        built by calling :meth:`_get_tree` on the :class:`.Wikicode` object and
682
        its children recursively. The end result may look something like the
683
        following::
684

685
            >>> text = "Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}"
686
            >>> print(mwparserfromhell.parse(text).get_tree())
687
            Lorem ipsum
688
            {{
689
                  foo
690
                | 1
691
                = bar
692
                | 2
693
                = {{
694
                        baz
695
                  }}
696
                | spam
697
                = eggs
698
            }}
699
        """
700
        marker = object()  # Random object we can find with certainty in a list
7✔
701
        return "\n".join(self._get_tree(self, [], marker, 0))
7✔
702

703

704
Wikicode._build_filter_methods(
7✔
705
    arguments=Argument,
706
    comments=Comment,
707
    external_links=ExternalLink,
708
    headings=Heading,
709
    html_entities=HTMLEntity,
710
    tags=Tag,
711
    templates=Template,
712
    text=Text,
713
    wikilinks=Wikilink,
714
)
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc