• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

earwig / mwparserfromhell / 15949141982

28 Jun 2025 11:18PM UTC coverage: 98.886% (-0.3%) from 99.204%
15949141982

push

github

earwig
Fix a failing test

3106 of 3141 relevant lines covered (98.89%)

9.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.06
/src/mwparserfromhell/wikicode.py
1
# Copyright (C) 2012-2025 Ben Kurtovic <ben.kurtovic@gmail.com>
2
#
3
# Permission is hereby granted, free of charge, to any person obtaining a copy
4
# of this software and associated documentation files (the "Software"), to deal
5
# in the Software without restriction, including without limitation the rights
6
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
# copies of the Software, and to permit persons to whom the Software is
8
# furnished to do so, subject to the following conditions:
9
#
10
# The above copyright notice and this permission notice shall be included in
11
# all copies or substantial portions of the Software.
12
#
13
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
# SOFTWARE.
20

21
from __future__ import annotations
10✔
22

23
import re
10✔
24
from collections.abc import Generator, Iterable
10✔
25
from itertools import chain
10✔
26
from typing import Any, Callable, TypeVar, cast, overload
10✔
27

28
from .nodes import (
10✔
29
    Argument,
30
    Comment,
31
    ExternalLink,
32
    Heading,
33
    HTMLEntity,
34
    Node,
35
    Tag,
36
    Template,
37
    Text,
38
    Wikilink,
39
)
40
from .smart_list.list_proxy import ListProxy
10✔
41
from .string_mixin import StringMixIn
10✔
42
from .utils import parse_anything
10✔
43

44
__all__ = ["Wikicode"]
10✔
45

46
FLAGS = re.IGNORECASE | re.DOTALL
10✔
47

48
N = TypeVar("N", bound=Node)
10✔
49

50

51
class Wikicode(StringMixIn):
10✔
52
    """A ``Wikicode`` is a container for nodes that operates like a string.
53

54
    Additionally, it contains methods that can be used to extract data from or
55
    modify the nodes, implemented in an interface similar to a list. For
56
    example, :meth:`index` can get the index of a node in the list, and
57
    :meth:`insert` can add a new node at that index. The :meth:`filter()
58
    <ifilter>` series of functions is very useful for extracting and iterating
59
    over, for example, all of the templates in the object.
60
    """
61

62
    RECURSE_OTHERS = 2
10✔
63

64
    def __init__(self, nodes: list[Node]):
10✔
65
        super().__init__()
10✔
66
        self._nodes = nodes
10✔
67

68
    def __str__(self) -> str:
10✔
69
        return "".join([str(node) for node in self.nodes])
10✔
70

71
    @staticmethod
10✔
72
    def _get_children(
10✔
73
        node: Node,
74
        contexts: bool = False,
75
        restrict: type | None = None,
76
        parent: Wikicode | None = None,
77
    ) -> Generator[tuple[Wikicode | None, Node] | Node]:
78
        """Iterate over all child :class:`.Node`\\ s of a given *node*."""
79
        yield (parent, node) if contexts else node
10✔
80
        if restrict and isinstance(node, restrict):
10✔
81
            return
10✔
82
        for code in node.__children__():
10✔
83
            for child in code.nodes:
10✔
84
                sub = Wikicode._get_children(child, contexts, restrict, code)
10✔
85
                yield from sub
10✔
86

87
    @staticmethod
10✔
88
    def _slice_replace(code: Wikicode, index: slice, old: str, new: str) -> None:
10✔
89
        """Replace the string *old* with *new* across *index* in *code*."""
90
        nodes = [str(node) for node in code.get(index)]
10✔
91
        substring = "".join(nodes).replace(old, new)
10✔
92
        code.nodes[index] = parse_anything(substring).nodes
10✔
93

94
    @staticmethod
10✔
95
    def _build_matcher(
10✔
96
        matches: Callable[[N], bool | re.Match[str] | None] | re.Pattern | str | None,
97
        flags: int,
98
    ) -> Callable[[N], bool | re.Match[str] | None]:
99
        """Helper for :meth:`_indexed_ifilter` and others.
100

101
        If *matches* is a function, return it. If it's a regex, return a
102
        wrapper around it that can be called with a node to do a search. If
103
        it's ``None``, return a function that always returns ``True``.
104
        """
105
        if matches:
10✔
106
            if callable(matches):
10✔
107
                return matches
10✔
108
            else:
109
                return lambda obj: re.search(matches, str(obj), flags)
10✔
110
        else:
111
            return lambda obj: True
10✔
112

113
    def _indexed_ifilter(
10✔
114
        self,
115
        recursive: bool = True,
116
        matches: Callable[[N], bool] | re.Pattern | str | None = None,
117
        flags: int = FLAGS,
118
        forcetype: type[N] | None = None,
119
    ) -> Generator[tuple[int, N]]:
120
        """Iterate over nodes and their corresponding indices in the node list.
121

122
        The arguments are interpreted as for :meth:`ifilter`. For each tuple
123
        ``(i, node)`` yielded by this method, ``self.index(node) == i``. Note
124
        that if *recursive* is ``True``, ``self.nodes[i]`` might not be the
125
        node itself, but will still contain it.
126
        """
127
        match = self._build_matcher(matches, flags)
10✔
128
        inodes: Iterable[tuple[int, Node]]
129
        if recursive:
10✔
130
            restrict = forcetype if recursive == self.RECURSE_OTHERS else None
10✔
131

132
            def getter(i: int, node: Node) -> Generator[tuple[int, Node]]:
10✔
133
                for ch in self._get_children(node, restrict=restrict):
10✔
134
                    yield (i, cast(Node, ch))
10✔
135

136
            inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes)))
10✔
137
        else:
138
            inodes = enumerate(self.nodes)
10✔
139
        for i, node in inodes:
10✔
140
            if (forcetype is None or isinstance(node, forcetype)) and match(
10✔
141
                cast(N, node)
142
            ):
143
                yield (i, cast(N, node))
10✔
144

145
    def _is_child_wikicode(self, obj: Wikicode, recursive: bool = True) -> bool:
10✔
146
        """Return whether the given :class:`.Wikicode` is a descendant."""
147

148
        def deref(nodes):
10✔
149
            if isinstance(nodes, ListProxy):
10✔
150
                return nodes._parent  # pylint: disable=protected-access
10✔
151
            return nodes
10✔
152

153
        target = deref(obj.nodes)
10✔
154
        if target is deref(self.nodes):
10✔
155
            return True
10✔
156
        if recursive:
10✔
157
            todo = [self]
10✔
158
            while todo:
10✔
159
                code = todo.pop()
10✔
160
                if target is deref(code.nodes):
10✔
161
                    return True
10✔
162
                for node in code.nodes:
10✔
163
                    todo += list(node.__children__())
10✔
164
        return False
10✔
165

166
    def _do_strong_search(
10✔
167
        self, obj: Node | Wikicode, recursive: bool = True
168
    ) -> tuple[Wikicode, slice]:
169
        """Search for the specific element *obj* within the node list.
170

171
        *obj* can be either a :class:`.Node` or a :class:`.Wikicode` object. If
172
        found, we return a tuple (*context*, *index*) where *context* is the
173
        :class:`.Wikicode` that contains *obj* and *index* is its index there,
174
        as a :class:`slice`. Note that if *recursive* is ``False``, *context*
175
        will always be ``self`` (since we only look for *obj* among immediate
176
        descendants), but if *recursive* is ``True``, then it could be any
177
        :class:`.Wikicode` contained by a node within ``self``. If *obj* is not
178
        found, :exc:`ValueError` is raised.
179
        """
180
        if isinstance(obj, Wikicode):
10✔
181
            if not self._is_child_wikicode(obj, recursive):
10✔
182
                raise ValueError(obj)
10✔
183
            return obj, slice(0, len(obj.nodes))
10✔
184

185
        if isinstance(obj, Node):
10✔
186

187
            def mkslice(i):
10✔
188
                return slice(i, i + 1)
10✔
189

190
            if not recursive:
10✔
191
                return self, mkslice(self.index(obj))
10✔
192
            for node in self.nodes:
10✔
193
                for context, child in self._get_children(node, contexts=True):
10✔
194
                    if obj is child:
10✔
195
                        if not context:
10✔
196
                            context = self
10✔
197
                        return context, mkslice(context.index(child))
10✔
198
            raise ValueError(obj)
10✔
199

200
        raise TypeError(obj)
×
201

202
    def _do_weak_search(
10✔
203
        self, obj: Any, recursive: bool
204
    ) -> list[tuple[bool, Wikicode, slice]]:
205
        """Search for an element that looks like *obj* within the node list.
206

207
        This follows the same rules as :meth:`_do_strong_search` with some
208
        differences. *obj* is treated as a string that might represent any
209
        :class:`.Node`, :class:`.Wikicode`, or combination of the two present
210
        in the node list. Thus, matching is weak (using string comparisons)
211
        rather than strong (using ``is``). Because multiple nodes can match
212
        *obj*, the result is a list of tuples instead of just one (however,
213
        :exc:`ValueError` is still raised if nothing is found). Individual
214
        matches will never overlap.
215

216
        The tuples contain a new first element, *exact*, which is ``True`` if
217
        we were able to match *obj* exactly to one or more adjacent nodes, or
218
        ``False`` if we found *obj* inside a node or incompletely spanning
219
        multiple nodes.
220
        """
221
        obj = parse_anything(obj)
10✔
222
        if not obj or obj not in self:
10✔
223
            raise ValueError(obj)
10✔
224
        results = []
10✔
225
        contexts: list[Wikicode] = [self]
10✔
226
        while contexts:
10✔
227
            context = contexts.pop()
10✔
228
            i = len(context.nodes) - 1
10✔
229
            while i >= 0:
10✔
230
                node = context.get(i)
10✔
231
                if obj.get(-1) == node:
10✔
232
                    for j in range(-len(obj.nodes), -1):
10✔
233
                        if obj.get(j) != context.get(i + j + 1):
10✔
234
                            break
10✔
235
                    else:
236
                        i -= len(obj.nodes) - 1
10✔
237
                        index = slice(i, i + len(obj.nodes))
10✔
238
                        results.append((True, context, index))
10✔
239
                elif recursive and obj in node:
10✔
240
                    contexts.extend(node.__children__())
10✔
241
                i -= 1
10✔
242
        if not results:
10✔
243
            if not recursive:
10✔
244
                raise ValueError(obj)
10✔
245
            results.append((False, self, slice(0, len(self.nodes))))
10✔
246
        return results
10✔
247

248
    def _get_tree(
10✔
249
        self, code: Wikicode, lines: list[str], marker: Any, indent: int
250
    ) -> list[str]:
251
        """Build a tree to illustrate the way the Wikicode object was parsed.
252

253
        The method that builds the actual tree is ``__showtree__`` of ``Node``
254
        objects. *code* is the ``Wikicode`` object to build a tree for. *lines*
255
        is the list to append the tree to, which is returned at the end of the
256
        method. *marker* is some object to be used to indicate that the builder
257
        should continue on from the last line instead of starting a new one; it
258
        should be any object that can be tested for with ``is``. *indent* is
259
        the starting indentation.
260
        """
261

262
        def write(*args: str) -> None:
10✔
263
            """Write a new line following the proper indentation rules."""
264
            if lines and lines[-1] is marker:  # Continue from the last line
10✔
265
                lines.pop()  # Remove the marker
10✔
266
                last = lines.pop()
10✔
267
                lines.append(last + " ".join(args))
10✔
268
            else:
269
                lines.append(" " * 6 * indent + " ".join(args))
10✔
270

271
        def get(code: Wikicode):
10✔
272
            self._get_tree(code, lines, marker, indent + 1)
10✔
273

274
        def mark():
10✔
275
            return lines.append(marker)
10✔
276

277
        for node in code.nodes:
10✔
278
            node.__showtree__(write, get, mark)
10✔
279
        return lines
10✔
280

281
    @classmethod
10✔
282
    def _build_filter_methods(cls, **meths):
10✔
283
        """Given Node types, build the corresponding i?filter shortcuts.
284

285
        The should be given as keys storing the method's base name paired with
286
        values storing the corresponding :class:`.Node` type. For example, the
287
        dict may contain the pair ``("templates", Template)``, which will
288
        produce the methods :meth:`ifilter_templates` and
289
        :meth:`filter_templates`, which are shortcuts for
290
        :meth:`ifilter(forcetype=Template) <ifilter>` and
291
        :meth:`filter(forcetype=Template) <filter>`, respectively. These
292
        shortcuts are added to the class itself, with an appropriate docstring.
293
        """
294
        doc = """Iterate over {0}.
10✔
295

296
        This is equivalent to :meth:`{1}` with *forcetype* set to
297
        :class:`~{2.__module__}.{2.__name__}`.
298
        """
299

300
        def make_ifilter(ftype):
10✔
301
            return lambda self, *a, **kw: self.ifilter(forcetype=ftype, *a, **kw)
10✔
302

303
        def make_filter(ftype):
10✔
304
            return lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw)
10✔
305

306
        for name, ftype in meths.items():
10✔
307
            ifilt = make_ifilter(ftype)
10✔
308
            filt = make_filter(ftype)
10✔
309
            ifilt.__doc__ = doc.format(name, "ifilter", ftype)
10✔
310
            filt.__doc__ = doc.format(name, "filter", ftype)
10✔
311
            setattr(cls, "ifilter_" + name, ifilt)
10✔
312
            setattr(cls, "filter_" + name, filt)
10✔
313

314
    @property
10✔
315
    def nodes(self) -> list[Node]:
10✔
316
        """A list of :class:`.Node` objects.
317

318
        This is the internal data actually stored within a :class:`.Wikicode`
319
        object.
320
        """
321
        return self._nodes
10✔
322

323
    @nodes.setter
10✔
324
    def nodes(self, value: list[Node] | Any) -> None:
10✔
325
        if not isinstance(value, list):
10✔
326
            value = parse_anything(value).nodes
10✔
327
        self._nodes = value
10✔
328

329
    @overload
10✔
330
    def get(self, index: int) -> Node: ...
10✔
331

332
    @overload
10✔
333
    def get(self, index: slice) -> list[Node]: ...
10✔
334

335
    def get(self, index):
10✔
336
        """Return the *index*\\ th node within the list of nodes."""
337
        return self.nodes[index]
10✔
338

339
    def set(self, index: int, value: Any) -> None:
10✔
340
        """Set the ``Node`` at *index* to *value*.
341

342
        Raises :exc:`IndexError` if *index* is out of range, or
343
        :exc:`ValueError` if *value* cannot be coerced into one :class:`.Node`.
344
        To insert multiple nodes at an index, use :meth:`get` with either
345
        :meth:`remove` and :meth:`insert` or :meth:`replace`.
346
        """
347
        nodes = parse_anything(value).nodes
10✔
348
        if len(nodes) > 1:
10✔
349
            raise ValueError("Cannot coerce multiple nodes into one index")
10✔
350
        if index >= len(self.nodes) or -1 * index > len(self.nodes):
10✔
351
            raise IndexError("List assignment index out of range")
10✔
352
        if nodes:
10✔
353
            self.nodes[index] = nodes[0]
10✔
354
        else:
355
            self.nodes.pop(index)
10✔
356

357
    def contains(self, obj: Node | Wikicode | str) -> bool:
10✔
358
        """Return whether this Wikicode object contains *obj*.
359

360
        If *obj* is a :class:`.Node` or :class:`.Wikicode` object, then we
361
        search for it exactly among all of our children, recursively.
362
        Otherwise, this method just uses :meth:`.__contains__` on the string.
363
        """
364
        if not isinstance(obj, (Node, Wikicode)):
10✔
365
            return obj in self
10✔
366
        try:
10✔
367
            self._do_strong_search(obj, recursive=True)
10✔
368
        except ValueError:
10✔
369
            return False
10✔
370
        return True
10✔
371

372
    def index(self, obj: Node | Wikicode | str, recursive: bool = False) -> int:
10✔
373
        """Return the index of *obj* in the list of nodes.
374

375
        Raises :exc:`ValueError` if *obj* is not found. If *recursive* is
376
        ``True``, we will look in all nodes of ours and their descendants, and
377
        return the index of our direct descendant node within *our* list of
378
        nodes. Otherwise, the lookup is done only on direct descendants.
379
        """
380
        strict = isinstance(obj, Node)
10✔
381
        equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n)
10✔
382
        for i, node in enumerate(self.nodes):
10✔
383
            if recursive:
10✔
384
                for child in self._get_children(node):
10✔
385
                    if equivalent(obj, child):
10✔
386
                        return i
10✔
387
            elif equivalent(obj, node):
10✔
388
                return i
10✔
389
        raise ValueError(obj)
10✔
390

391
    def get_ancestors(self, obj: Node | Wikicode) -> list[Node]:
10✔
392
        """Return a list of all ancestor nodes of the :class:`.Node` *obj*.
393

394
        The list is ordered from the most shallow ancestor (greatest great-
395
        grandparent) to the direct parent. The node itself is not included in
396
        the list. For example::
397

398
            >>> text = "{{a|{{b|{{c|{{d}}}}}}}}"
399
            >>> code = mwparserfromhell.parse(text)
400
            >>> node = code.filter_templates(matches=lambda n: n == "{{d}}")[0]
401
            >>> code.get_ancestors(node)
402
            ['{{a|{{b|{{c|{{d}}}}}}}}', '{{b|{{c|{{d}}}}}}', '{{c|{{d}}}}']
403

404
        Will return an empty list if *obj* is at the top level of this Wikicode
405
        object. Will raise :exc:`ValueError` if it wasn't found.
406
        """
407

408
        def _get_ancestors(code: Wikicode, needle: Node) -> list[Node] | None:
10✔
409
            for node in code.nodes:
10✔
410
                if node is needle:
10✔
411
                    return []
10✔
412
                for code in node.__children__():
10✔
413
                    ancestors = _get_ancestors(code, needle)
10✔
414
                    if ancestors is not None:
10✔
415
                        return [node] + ancestors
10✔
416
            return None
10✔
417

418
        if isinstance(obj, Wikicode):
10✔
419
            obj = obj.get(0)
×
420
        elif not isinstance(obj, Node):
10✔
421
            raise ValueError(obj)
×
422

423
        ancestors = _get_ancestors(self, obj)
10✔
424
        if ancestors is None:
10✔
425
            raise ValueError(obj)
10✔
426
        return ancestors
10✔
427

428
    def get_parent(self, obj: Node | Wikicode) -> Node | None:
10✔
429
        """Return the direct parent node of the :class:`.Node` *obj*.
430

431
        This function is equivalent to calling :meth:`.get_ancestors` and
432
        taking the last element of the resulting list. Will return None if
433
        the node exists but does not have a parent; i.e., it is at the top
434
        level of the Wikicode object.
435
        """
436
        ancestors = self.get_ancestors(obj)
10✔
437
        return ancestors[-1] if ancestors else None
10✔
438

439
    def insert(self, index: int, value: Any) -> None:
10✔
440
        """Insert *value* at *index* in the list of nodes.
441

442
        *value* can be anything parsable by :func:`.parse_anything`, which
443
        includes strings or other :class:`.Wikicode` or :class:`.Node` objects.
444
        """
445
        nodes = parse_anything(value).nodes
10✔
446
        for node in reversed(nodes):
10✔
447
            self.nodes.insert(index, node)
10✔
448

449
    def insert_before(
10✔
450
        self, obj: Node | Wikicode | str, value: Any, recursive: bool = True
451
    ) -> None:
452
        """Insert *value* immediately before *obj*.
453

454
        *obj* can be either a string, a :class:`.Node`, or another
455
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
456
        example). If *obj* is a string, we will operate on all instances of
457
        that string within the code, otherwise only on the specific instance
458
        given. *value* can be anything parsable by :func:`.parse_anything`. If
459
        *recursive* is ``True``, we will try to find *obj* within our child
460
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
461
        object. If *obj* is not found, :exc:`ValueError` is raised.
462
        """
463
        if isinstance(obj, (Node, Wikicode)):
10✔
464
            context, index = self._do_strong_search(obj, recursive)
10✔
465
            context.insert(index.start, value)
10✔
466
        else:
467
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
468
                if exact:
10✔
469
                    context.insert(index.start, value)
10✔
470
                else:
471
                    obj = str(obj)
10✔
472
                    self._slice_replace(context, index, obj, str(value) + obj)
10✔
473

474
    def insert_after(
10✔
475
        self, obj: Node | Wikicode | str, value: Any, recursive: bool = True
476
    ) -> None:
477
        """Insert *value* immediately after *obj*.
478

479
        *obj* can be either a string, a :class:`.Node`, or another
480
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
481
        example). If *obj* is a string, we will operate on all instances of
482
        that string within the code, otherwise only on the specific instance
483
        given. *value* can be anything parsable by :func:`.parse_anything`. If
484
        *recursive* is ``True``, we will try to find *obj* within our child
485
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
486
        object. If *obj* is not found, :exc:`ValueError` is raised.
487
        """
488
        if isinstance(obj, (Node, Wikicode)):
10✔
489
            context, index = self._do_strong_search(obj, recursive)
10✔
490
            context.insert(index.stop, value)
10✔
491
        else:
492
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
493
                if exact:
10✔
494
                    context.insert(index.stop, value)
10✔
495
                else:
496
                    obj = str(obj)
10✔
497
                    self._slice_replace(context, index, obj, obj + str(value))
10✔
498

499
    def replace(
10✔
500
        self, obj: Node | Wikicode | str, value: Any, recursive: bool = True
501
    ) -> None:
502
        """Replace *obj* with *value*.
503

504
        *obj* can be either a string, a :class:`.Node`, or another
505
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
506
        example). If *obj* is a string, we will operate on all instances of
507
        that string within the code, otherwise only on the specific instance
508
        given. *value* can be anything parsable by :func:`.parse_anything`.
509
        If *recursive* is ``True``, we will try to find *obj* within our child
510
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
511
        object. If *obj* is not found, :exc:`ValueError` is raised.
512
        """
513
        if isinstance(obj, (Node, Wikicode)):
10✔
514
            context, index = self._do_strong_search(obj, recursive)
10✔
515
            for _ in range(index.start, index.stop):
10✔
516
                context.nodes.pop(index.start)
10✔
517
            context.insert(index.start, value)
10✔
518
        else:
519
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
520
                if exact:
10✔
521
                    for _ in range(index.start, index.stop):
10✔
522
                        context.nodes.pop(index.start)
10✔
523
                    context.insert(index.start, value)
10✔
524
                else:
525
                    self._slice_replace(context, index, str(obj), str(value))
10✔
526

527
    def append(self, value: Any) -> None:
10✔
528
        """Insert *value* at the end of the list of nodes.
529

530
        *value* can be anything parsable by :func:`.parse_anything`.
531
        """
532
        nodes = parse_anything(value).nodes
10✔
533
        for node in nodes:
10✔
534
            self.nodes.append(node)
10✔
535

536
    def remove(self, obj: Node | Wikicode | str, recursive: bool = True) -> None:
10✔
537
        """Remove *obj* from the list of nodes.
538

539
        *obj* can be either a string, a :class:`.Node`, or another
540
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
541
        example). If *obj* is a string, we will operate on all instances of
542
        that string within the code, otherwise only on the specific instance
543
        given. If *recursive* is ``True``, we will try to find *obj* within our
544
        child nodes even if it is not a direct descendant of this
545
        :class:`.Wikicode` object. If *obj* is not found, :exc:`ValueError` is
546
        raised.
547
        """
548
        if isinstance(obj, (Node, Wikicode)):
10✔
549
            context, index = self._do_strong_search(obj, recursive)
10✔
550
            for _ in range(index.start, index.stop):
10✔
551
                context.nodes.pop(index.start)
10✔
552
        else:
553
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
554
                if exact:
10✔
555
                    for _ in range(index.start, index.stop):
10✔
556
                        context.nodes.pop(index.start)
10✔
557
                else:
558
                    self._slice_replace(context, index, str(obj), "")
10✔
559

560
    def matches(
10✔
561
        self,
562
        other: Node | Wikicode | str | bytes | Iterable[Node | Wikicode | str | bytes],
563
    ) -> bool:
564
        """Do a loose equivalency test suitable for comparing page names.
565

566
        *other* can be any string-like object, including :class:`.Wikicode`, or
567
        an iterable of these. This operation is symmetric; both sides are
568
        adjusted. Specifically, whitespace and markup is stripped and the first
569
        letter's case is normalized. Typical usage is
570
        ``if template.name.matches("stub"): ...``.
571
        """
572

573
        def normalize(s: str) -> str:
10✔
574
            return (s[0].upper() + s[1:]).replace("_", " ") if s else s
10✔
575

576
        this = normalize(self.strip_code().strip())
10✔
577

578
        if isinstance(other, (str, bytes, Wikicode, Node)):
10✔
579
            that = parse_anything(other).strip_code().strip()
10✔
580
            return this == normalize(that)
10✔
581

582
        for obj in other:
10✔
583
            that = parse_anything(obj).strip_code().strip()
10✔
584
            if this == normalize(that):
10✔
585
                return True
10✔
586
        return False
10✔
587

588
    def ifilter(
10✔
589
        self,
590
        recursive: bool = True,
591
        matches: Callable[[Node], bool] | re.Pattern | str | None = None,
592
        flags: int = FLAGS,
593
        forcetype: type | None = None,
594
    ) -> Generator[Node]:
595
        """Iterate over nodes in our list matching certain conditions.
596

597
        If *forcetype* is given, only nodes that are instances of this type (or
598
        tuple of types) are yielded. Setting *recursive* to ``True`` will
599
        iterate over all children and their descendants. ``RECURSE_OTHERS``
600
        will only iterate over children that are not the instances of
601
        *forcetype*. ``False`` will only iterate over immediate children.
602

603
        ``RECURSE_OTHERS`` can be used to iterate over all un-nested templates,
604
        even if they are inside of HTML tags, like so:
605

606
            >>> code = mwparserfromhell.parse("{{foo}}<b>{{foo|{{bar}}}}</b>")
607
            >>> code.filter_templates(code.RECURSE_OTHERS)
608
            ["{{foo}}", "{{foo|{{bar}}}}"]
609

610
        *matches* can be used to further restrict the nodes, either as a
611
        function (taking a single :class:`.Node` and returning a boolean) or a
612
        regular expression (matched against the node's string representation
613
        with :func:`re.search`). If *matches* is a regex, the flags passed to
614
        :func:`re.search` are :const:`re.IGNORECASE`, :const:`re.DOTALL`, and
615
        :const:`re.UNICODE`, but custom flags can be specified by passing
616
        *flags*.
617
        """
618
        gen = self._indexed_ifilter(recursive, matches, flags, forcetype)
10✔
619
        return (node for i, node in gen)
10✔
620

621
    def filter(self, *args: Any, **kwargs: Any) -> list[Node]:
10✔
622
        """Return a list of nodes within our list matching certain conditions.
623

624
        This is equivalent to calling :func:`list` on :meth:`ifilter`.
625
        """
626
        return list(self.ifilter(*args, **kwargs))
10✔
627

628
    def get_sections(
10✔
629
        self,
630
        levels: Iterable[int] | None = None,
631
        matches: Callable[[Node], bool] | re.Pattern | str | None = None,
632
        flags: int = FLAGS,
633
        flat: bool = False,
634
        include_lead: bool | None = None,
635
        include_headings: bool = True,
636
    ) -> list[Wikicode]:
637
        """Return a list of sections within the page.
638

639
        Sections are returned as :class:`.Wikicode` objects with a shared node
640
        list (implemented using :class:`.SmartList`) so that changes to
641
        sections are reflected in the parent Wikicode object.
642

643
        Each section contains all of its subsections, unless *flat* is
644
        ``True``. If *levels* is given, it should be a iterable of integers;
645
        only sections whose heading levels are within it will be returned. If
646
        *matches* is given, it should be either a function or a regex; only
647
        sections whose headings match it (without the surrounding equal signs)
648
        will be included. *flags* can be used to override the default regex
649
        flags (see :meth:`ifilter`) if a regex *matches* is used.
650

651
        If *include_lead* is ``True``, the first, lead section (without a
652
        heading) will be included in the list; ``False`` will not include it;
653
        the default will include it only if no specific *levels* were given. If
654
        *include_headings* is ``True``, the section's beginning
655
        :class:`.Heading` object will be included; otherwise, this is skipped.
656
        """
657
        title_matcher = self._build_matcher(matches, flags)
10✔
658

659
        def matcher(heading):
10✔
660
            return title_matcher(heading.title) and (
10✔
661
                not levels or heading.level in levels
662
            )
663

664
        iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading)
10✔
665
        sections = []  # Tuples of (index_of_first_node, section)
10✔
666
        # Tuples of (index, heading), where index and heading.level are both
667
        # monotonically increasing
668
        open_headings: list[tuple[int, Heading]] = []
10✔
669

670
        # Add the lead section if appropriate:
671
        if include_lead or not (include_lead is not None or matches or levels):
10✔
672
            itr = self._indexed_ifilter(recursive=False, forcetype=Heading)
10✔
673
            try:
10✔
674
                first = next(itr)[0]
10✔
675
                sections.append((0, Wikicode(self.nodes[:first])))
10✔
676
            except StopIteration:  # No headings in page
10✔
677
                sections.append((0, Wikicode(self.nodes[:])))
10✔
678

679
        # Iterate over headings, adding sections to the list as they end:
680
        for i, heading in iheadings:
10✔
681
            if flat:  # With flat, all sections close at the next heading
10✔
682
                newly_closed, open_headings = open_headings, []
10✔
683
            else:  # Otherwise, figure out which sections have closed, if any
684
                closed_start_index = len(open_headings)
10✔
685
                for j, (start, last_heading) in enumerate(open_headings):
10✔
686
                    if heading.level <= last_heading.level:
10✔
687
                        closed_start_index = j
10✔
688
                        break
10✔
689
                newly_closed = open_headings[closed_start_index:]
10✔
690
                del open_headings[closed_start_index:]
10✔
691
            for start, closed_heading in newly_closed:
10✔
692
                if matcher(closed_heading):
10✔
693
                    sections.append((start, Wikicode(self.nodes[start:i])))
10✔
694
            start = i if include_headings else (i + 1)
10✔
695
            open_headings.append((start, heading))
10✔
696

697
        # Add any remaining open headings to the list of sections:
698
        for start, heading in open_headings:
10✔
699
            if matcher(heading):
10✔
700
                sections.append((start, Wikicode(self.nodes[start:])))
10✔
701

702
        # Ensure that earlier sections are earlier in the returned list:
703
        return [section for i, section in sorted(sections)]
10✔
704

705
    def strip_code(
10✔
706
        self,
707
        normalize: bool = True,
708
        collapse: bool = True,
709
        keep_template_params: bool = False,
710
    ) -> str:
711
        """Return a rendered string without unprintable code such as templates.
712

713
        The way a node is stripped is handled by the
714
        :meth:`~.Node.__strip__` method of :class:`.Node` objects, which
715
        generally return a subset of their nodes or ``None``. For example,
716
        templates and tags are removed completely, links are stripped to just
717
        their display part, headings are stripped to just their title.
718

719
        If *normalize* is ``True``, various things may be done to strip code
720
        further, such as converting HTML entities like ``&Sigma;``, ``&#931;``,
721
        and ``&#x3a3;`` to ``Σ``. If *collapse* is ``True``, we will try to
722
        remove excess whitespace as well (three or more newlines are converted
723
        to two, for example). If *keep_template_params* is ``True``, then
724
        template parameters will be preserved in the output (normally, they are
725
        removed completely).
726
        """
727
        kwargs = {
10✔
728
            "normalize": normalize,
729
            "collapse": collapse,
730
            "keep_template_params": keep_template_params,
731
        }
732

733
        nodes = []
10✔
734
        for node in self.nodes:
10✔
735
            stripped = node.__strip__(**kwargs)
10✔
736
            if stripped:
10✔
737
                nodes.append(str(stripped))
10✔
738

739
        if collapse:
10✔
740
            stripped = "".join(nodes).strip("\n")
10✔
741
            while "\n\n\n" in stripped:
10✔
742
                stripped = stripped.replace("\n\n\n", "\n\n")
10✔
743
            return stripped
10✔
744
        return "".join(nodes)
10✔
745

746
    def get_tree(self) -> str:
10✔
747
        """Return a hierarchical tree representation of the object.
748

749
        The representation is a string makes the most sense printed. It is
750
        built by calling :meth:`_get_tree` on the :class:`.Wikicode` object and
751
        its children recursively. The end result may look something like the
752
        following::
753

754
            >>> text = "Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}"
755
            >>> print(mwparserfromhell.parse(text).get_tree())
756
            Lorem ipsum
757
            {{
758
                  foo
759
                | 1
760
                = bar
761
                | 2
762
                = {{
763
                        baz
764
                  }}
765
                | spam
766
                = eggs
767
            }}
768
        """
769
        marker = object()  # Random object we can find with certainty in a list
10✔
770
        return "\n".join(self._get_tree(self, [], marker, 0))
10✔
771

772

773
Wikicode._build_filter_methods(
10✔
774
    arguments=Argument,
775
    comments=Comment,
776
    external_links=ExternalLink,
777
    headings=Heading,
778
    html_entities=HTMLEntity,
779
    tags=Tag,
780
    templates=Template,
781
    text=Text,
782
    wikilinks=Wikilink,
783
)
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc