• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

earwig / mwparserfromhell / 15990296633

01 Jul 2025 05:01AM UTC coverage: 98.438% (-0.2%) from 98.662%
15990296633

push

github

earwig
Improve Wikicode/Node typing

239 of 252 new or added lines in 6 files covered. (94.84%)

2 existing lines in 2 files now uncovered.

3276 of 3328 relevant lines covered (98.44%)

9.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.16
/src/mwparserfromhell/wikicode.py
1
# Copyright (C) 2012-2025 Ben Kurtovic <ben.kurtovic@gmail.com>
2
#
3
# Permission is hereby granted, free of charge, to any person obtaining a copy
4
# of this software and associated documentation files (the "Software"), to deal
5
# in the Software without restriction, including without limitation the rights
6
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
# copies of the Software, and to permit persons to whom the Software is
8
# furnished to do so, subject to the following conditions:
9
#
10
# The above copyright notice and this permission notice shall be included in
11
# all copies or substantial portions of the Software.
12
#
13
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
# SOFTWARE.
20

21
from __future__ import annotations
10✔
22

23
import re
10✔
24
from collections.abc import Generator, Iterable
10✔
25
from enum import Enum
10✔
26
from itertools import chain
10✔
27
from typing import Any, Callable, Literal, TypeVar, cast, overload
10✔
28

29
from .nodes import (
10✔
30
    Argument,
31
    Comment,
32
    ExternalLink,
33
    Heading,
34
    HTMLEntity,
35
    Node,
36
    Tag,
37
    Template,
38
    Text,
39
    Wikilink,
40
)
41
from .smart_list.list_proxy import ListProxy
10✔
42
from .string_mixin import StringMixIn
10✔
43
from .utils import parse_anything
10✔
44

45
__all__ = ["Wikicode"]
10✔
46

47
FLAGS = re.IGNORECASE | re.DOTALL
10✔
48

49
N = TypeVar("N", bound=Node)
10✔
50

51

52
class Recurse(Enum):
10✔
53
    RECURSE_OTHERS = 2
10✔
54

55

56
class Wikicode(StringMixIn):
10✔
57
    """A ``Wikicode`` is a container for nodes that operates like a string.
58

59
    Additionally, it contains methods that can be used to extract data from or
60
    modify the nodes, implemented in an interface similar to a list. For
61
    example, :meth:`index` can get the index of a node in the list, and
62
    :meth:`insert` can add a new node at that index. The :meth:`filter()
63
    <ifilter>` series of functions is very useful for extracting and iterating
64
    over, for example, all of the templates in the object.
65
    """
66

67
    RECURSE_OTHERS = Recurse.RECURSE_OTHERS
10✔
68

69
    def __init__(self, nodes: list[Node]):
10✔
70
        super().__init__()
10✔
71
        self._nodes = nodes
10✔
72

73
    def __str__(self) -> str:
10✔
74
        return "".join([str(node) for node in self.nodes])
10✔
75

76
    @overload
10✔
77
    @staticmethod
10✔
78
    def _get_children(
10✔
79
        node: Node,
80
        contexts: Literal[False] = False,
81
        restrict: type | None = None,
82
        parent: Wikicode | None = None,
83
    ) -> Generator[Node]: ...
84

85
    @overload
10✔
86
    @staticmethod
10✔
87
    def _get_children(
10✔
88
        node: Node,
89
        contexts: Literal[True],
90
        restrict: type | None = None,
91
        parent: Wikicode | None = None,
92
    ) -> Generator[tuple[Wikicode | None, Node]]: ...
93

94
    @staticmethod
10✔
95
    def _get_children(
10✔
96
        node: Node,
97
        contexts: bool = False,
98
        restrict: type | None = None,
99
        parent: Wikicode | None = None,
100
    ) -> Generator[tuple[Wikicode | None, Node] | Node]:
101
        """Iterate over all child :class:`.Node`\\ s of a given *node*."""
102
        yield (parent, node) if contexts else node
10✔
103
        if restrict and isinstance(node, restrict):
10✔
104
            return
10✔
105
        for code in node.__children__():
10✔
106
            for child in code.nodes:
10✔
107
                sub = Wikicode._get_children(child, contexts, restrict, code)
10✔
108
                yield from sub
10✔
109

110
    @staticmethod
10✔
111
    def _slice_replace(code: Wikicode, index: slice, old: str, new: str) -> None:
10✔
112
        """Replace the string *old* with *new* across *index* in *code*."""
113
        nodes = [str(node) for node in code.get(index)]
10✔
114
        substring = "".join(nodes).replace(old, new)
10✔
115
        code.nodes[index] = parse_anything(substring).nodes
10✔
116

117
    @staticmethod
10✔
118
    def _build_matcher(
10✔
119
        matches: Callable[[N], bool | re.Match[str] | None] | re.Pattern | str | None,
120
        flags: int,
121
    ) -> Callable[[N], bool | re.Match[str] | None]:
122
        """Helper for :meth:`_indexed_ifilter` and others.
123

124
        If *matches* is a function, return it. If it's a regex, return a
125
        wrapper around it that can be called with a node to do a search. If
126
        it's ``None``, return a function that always returns ``True``.
127
        """
128
        if matches:
10✔
129
            if callable(matches):
10✔
130
                return matches
10✔
131
            else:
132
                return lambda obj: re.search(matches, str(obj), flags)
10✔
133
        else:
134
            return lambda obj: True
10✔
135

136
    def _indexed_ifilter(
10✔
137
        self,
138
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
139
        matches: Callable[[N], bool] | re.Pattern | str | None = None,
140
        flags: int = FLAGS,
141
        forcetype: type[N] | None = None,
142
    ) -> Generator[tuple[int, N]]:
143
        """Iterate over nodes and their corresponding indices in the node list.
144

145
        The arguments are interpreted as for :meth:`ifilter`. For each tuple
146
        ``(i, node)`` yielded by this method, ``self.index(node) == i``. Note
147
        that if *recursive* is ``True``, ``self.nodes[i]`` might not be the
148
        node itself, but will still contain it.
149
        """
150
        match = self._build_matcher(matches, flags)
10✔
151
        inodes: Iterable[tuple[int, Node]]
152
        if recursive:
10✔
153
            restrict = forcetype if recursive == self.RECURSE_OTHERS else None
10✔
154

155
            def getter(i: int, node: Node) -> Generator[tuple[int, Node]]:
10✔
156
                for ch in self._get_children(node, restrict=restrict):
10✔
157
                    yield (i, cast(Node, ch))
10✔
158

159
            inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes)))
10✔
160
        else:
161
            inodes = enumerate(self.nodes)
10✔
162
        for i, node in inodes:
10✔
163
            if (forcetype is None or isinstance(node, forcetype)) and match(
10✔
164
                cast(N, node)
165
            ):
166
                yield (i, cast(N, node))
10✔
167

168
    def _is_child_wikicode(self, obj: Wikicode, recursive: bool = True) -> bool:
10✔
169
        """Return whether the given :class:`.Wikicode` is a descendant."""
170

171
        def deref(nodes):
10✔
172
            if isinstance(nodes, ListProxy):
10✔
173
                return nodes._parent  # pylint: disable=protected-access
10✔
174
            return nodes
10✔
175

176
        target = deref(obj.nodes)
10✔
177
        if target is deref(self.nodes):
10✔
178
            return True
10✔
179
        if recursive:
10✔
180
            todo = [self]
10✔
181
            while todo:
10✔
182
                code = todo.pop()
10✔
183
                if target is deref(code.nodes):
10✔
184
                    return True
10✔
185
                for node in code.nodes:
10✔
186
                    todo += list(node.__children__())
10✔
187
        return False
10✔
188

189
    def _do_strong_search(
10✔
190
        self, obj: Node | Wikicode, recursive: bool = True
191
    ) -> tuple[Wikicode, slice]:
192
        """Search for the specific element *obj* within the node list.
193

194
        *obj* can be either a :class:`.Node` or a :class:`.Wikicode` object. If
195
        found, we return a tuple (*context*, *index*) where *context* is the
196
        :class:`.Wikicode` that contains *obj* and *index* is its index there,
197
        as a :class:`slice`. Note that if *recursive* is ``False``, *context*
198
        will always be ``self`` (since we only look for *obj* among immediate
199
        descendants), but if *recursive* is ``True``, then it could be any
200
        :class:`.Wikicode` contained by a node within ``self``. If *obj* is not
201
        found, :exc:`ValueError` is raised.
202
        """
203
        if isinstance(obj, Wikicode):
10✔
204
            if not self._is_child_wikicode(obj, recursive):
10✔
205
                raise ValueError(obj)
10✔
206
            return obj, slice(0, len(obj.nodes))
10✔
207

208
        elif isinstance(obj, Node):
10✔
209

210
            def mkslice(i):
10✔
211
                return slice(i, i + 1)
10✔
212

213
            if not recursive:
10✔
214
                return self, mkslice(self.index(obj))
10✔
215
            for node in self.nodes:
10✔
216
                for context, child in self._get_children(node, contexts=True):
10✔
217
                    if obj is child:
10✔
218
                        if not context:
10✔
219
                            context = self
10✔
220
                        return context, mkslice(context.index(child))
10✔
221
            raise ValueError(obj)
10✔
222

223
        else:
NEW
224
            raise TypeError(obj)
×
225

226
    def _do_weak_search(
10✔
227
        self, obj: Any, recursive: bool
228
    ) -> list[tuple[bool, Wikicode, slice]]:
229
        """Search for an element that looks like *obj* within the node list.
230

231
        This follows the same rules as :meth:`_do_strong_search` with some
232
        differences. *obj* is treated as a string that might represent any
233
        :class:`.Node`, :class:`.Wikicode`, or combination of the two present
234
        in the node list. Thus, matching is weak (using string comparisons)
235
        rather than strong (using ``is``). Because multiple nodes can match
236
        *obj*, the result is a list of tuples instead of just one (however,
237
        :exc:`ValueError` is still raised if nothing is found). Individual
238
        matches will never overlap.
239

240
        The tuples contain a new first element, *exact*, which is ``True`` if
241
        we were able to match *obj* exactly to one or more adjacent nodes, or
242
        ``False`` if we found *obj* inside a node or incompletely spanning
243
        multiple nodes.
244
        """
245
        obj = parse_anything(obj)
10✔
246
        if not obj or obj not in self:
10✔
247
            raise ValueError(obj)
10✔
248
        results = []
10✔
249
        contexts: list[Wikicode] = [self]
10✔
250
        while contexts:
10✔
251
            context = contexts.pop()
10✔
252
            i = len(context.nodes) - 1
10✔
253
            while i >= 0:
10✔
254
                node = context.get(i)
10✔
255
                if obj.get(-1) == node:
10✔
256
                    for j in range(-len(obj.nodes), -1):
10✔
257
                        if obj.get(j) != context.get(i + j + 1):
10✔
258
                            break
10✔
259
                    else:
260
                        i -= len(obj.nodes) - 1
10✔
261
                        index = slice(i, i + len(obj.nodes))
10✔
262
                        results.append((True, context, index))
10✔
263
                elif recursive and obj in node:
10✔
264
                    contexts.extend(node.__children__())
10✔
265
                i -= 1
10✔
266
        if not results:
10✔
267
            if not recursive:
10✔
268
                raise ValueError(obj)
10✔
269
            results.append((False, self, slice(0, len(self.nodes))))
10✔
270
        return results
10✔
271

272
    def _get_tree(
10✔
273
        self, code: Wikicode, lines: list[str], marker: Any, indent: int
274
    ) -> list[str]:
275
        """Build a tree to illustrate the way the Wikicode object was parsed.
276

277
        The method that builds the actual tree is ``__showtree__`` of ``Node``
278
        objects. *code* is the ``Wikicode`` object to build a tree for. *lines*
279
        is the list to append the tree to, which is returned at the end of the
280
        method. *marker* is some object to be used to indicate that the builder
281
        should continue on from the last line instead of starting a new one; it
282
        should be any object that can be tested for with ``is``. *indent* is
283
        the starting indentation.
284
        """
285

286
        def write(*args: str) -> None:
10✔
287
            """Write a new line following the proper indentation rules."""
288
            if lines and lines[-1] is marker:  # Continue from the last line
10✔
289
                lines.pop()  # Remove the marker
10✔
290
                last = lines.pop()
10✔
291
                lines.append(last + " ".join(args))
10✔
292
            else:
293
                lines.append(" " * 6 * indent + " ".join(args))
10✔
294

295
        def get(code: Wikicode):
10✔
296
            self._get_tree(code, lines, marker, indent + 1)
10✔
297

298
        def mark():
10✔
299
            return lines.append(marker)
10✔
300

301
        for node in code.nodes:
10✔
302
            node.__showtree__(write, get, mark)
10✔
303
        return lines
10✔
304

305
    @property
10✔
306
    def nodes(self) -> list[Node]:
10✔
307
        """A list of :class:`.Node` objects.
308

309
        This is the internal data actually stored within a :class:`.Wikicode`
310
        object.
311
        """
312
        return self._nodes
10✔
313

314
    @nodes.setter
10✔
315
    def nodes(self, value: list[Node] | Any) -> None:
10✔
316
        if not isinstance(value, list):
10✔
317
            value = parse_anything(value).nodes
10✔
318
        self._nodes = value
10✔
319

320
    @overload
10✔
321
    def get(self, index: int) -> Node: ...
10✔
322

323
    @overload
10✔
324
    def get(self, index: slice) -> list[Node]: ...
10✔
325

326
    def get(self, index):
10✔
327
        """Return the *index*\\ th node within the list of nodes."""
328
        return self.nodes[index]
10✔
329

330
    def set(self, index: int, value: Any) -> None:
10✔
331
        """Set the ``Node`` at *index* to *value*.
332

333
        Raises :exc:`IndexError` if *index* is out of range, or
334
        :exc:`ValueError` if *value* cannot be coerced into one :class:`.Node`.
335
        To insert multiple nodes at an index, use :meth:`get` with either
336
        :meth:`remove` and :meth:`insert` or :meth:`replace`.
337
        """
338
        nodes = parse_anything(value).nodes
10✔
339
        if len(nodes) > 1:
10✔
340
            raise ValueError("Cannot coerce multiple nodes into one index")
10✔
341
        if index >= len(self.nodes) or -1 * index > len(self.nodes):
10✔
342
            raise IndexError("List assignment index out of range")
10✔
343
        if nodes:
10✔
344
            self.nodes[index] = nodes[0]
10✔
345
        else:
346
            self.nodes.pop(index)
10✔
347

348
    def contains(self, obj: Node | Wikicode | str) -> bool:
10✔
349
        """Return whether this Wikicode object contains *obj*.
350

351
        If *obj* is a :class:`.Node` or :class:`.Wikicode` object, then we
352
        search for it exactly among all of our children, recursively.
353
        Otherwise, this method just uses :meth:`.__contains__` on the string.
354
        """
355
        if not isinstance(obj, (Node, Wikicode)):
10✔
356
            return obj in self
10✔
357
        try:
10✔
358
            self._do_strong_search(obj, recursive=True)
10✔
359
        except ValueError:
10✔
360
            return False
10✔
361
        return True
10✔
362

363
    def index(  # pyright: ignore[reportIncompatibleMethodOverride]
10✔
364
        self, obj: Node | Wikicode | str, recursive: bool = False
365
    ) -> int:
366
        """Return the index of *obj* in the list of nodes.
367

368
        Raises :exc:`ValueError` if *obj* is not found. If *recursive* is
369
        ``True``, we will look in all nodes of ours and their descendants, and
370
        return the index of our direct descendant node within *our* list of
371
        nodes. Otherwise, the lookup is done only on direct descendants.
372
        """
373
        strict = isinstance(obj, Node)
10✔
374
        equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n)
10✔
375
        for i, node in enumerate(self.nodes):
10✔
376
            if recursive:
10✔
377
                for child in self._get_children(node):
10✔
378
                    if equivalent(obj, child):
10✔
379
                        return i
10✔
380
            elif equivalent(obj, node):
10✔
381
                return i
10✔
382
        raise ValueError(obj)
10✔
383

384
    def get_ancestors(self, obj: Node | Wikicode) -> list[Node]:
10✔
385
        """Return a list of all ancestor nodes of the :class:`.Node` *obj*.
386

387
        The list is ordered from the most shallow ancestor (greatest great-
388
        grandparent) to the direct parent. The node itself is not included in
389
        the list. For example::
390

391
            >>> text = "{{a|{{b|{{c|{{d}}}}}}}}"
392
            >>> code = mwparserfromhell.parse(text)
393
            >>> node = code.filter_templates(matches=lambda n: n == "{{d}}")[0]
394
            >>> code.get_ancestors(node)
395
            ['{{a|{{b|{{c|{{d}}}}}}}}', '{{b|{{c|{{d}}}}}}', '{{c|{{d}}}}']
396

397
        Will return an empty list if *obj* is at the top level of this Wikicode
398
        object. Will raise :exc:`ValueError` if it wasn't found.
399
        """
400

401
        def _get_ancestors(code: Wikicode, needle: Node) -> list[Node] | None:
10✔
402
            for node in code.nodes:
10✔
403
                if node is needle:
10✔
404
                    return []
10✔
405
                for code in node.__children__():
10✔
406
                    ancestors = _get_ancestors(code, needle)
10✔
407
                    if ancestors is not None:
10✔
408
                        return [node] + ancestors
10✔
409
            return None
10✔
410

411
        if isinstance(obj, Wikicode):
10✔
412
            obj = obj.get(0)
×
413
        elif not isinstance(obj, Node):
10✔
414
            raise ValueError(obj)
×
415

416
        ancestors = _get_ancestors(self, obj)
10✔
417
        if ancestors is None:
10✔
418
            raise ValueError(obj)
10✔
419
        return ancestors
10✔
420

421
    def get_parent(self, obj: Node | Wikicode) -> Node | None:
10✔
422
        """Return the direct parent node of the :class:`.Node` *obj*.
423

424
        This function is equivalent to calling :meth:`.get_ancestors` and
425
        taking the last element of the resulting list. Will return None if
426
        the node exists but does not have a parent; i.e., it is at the top
427
        level of the Wikicode object.
428
        """
429
        ancestors = self.get_ancestors(obj)
10✔
430
        return ancestors[-1] if ancestors else None
10✔
431

432
    def insert(self, index: int, value: Any) -> None:
10✔
433
        """Insert *value* at *index* in the list of nodes.
434

435
        *value* can be anything parsable by :func:`.parse_anything`, which
436
        includes strings or other :class:`.Wikicode` or :class:`.Node` objects.
437
        """
438
        nodes = parse_anything(value).nodes
10✔
439
        for node in reversed(nodes):
10✔
440
            self.nodes.insert(index, node)
10✔
441

442
    def insert_before(
10✔
443
        self, obj: Node | Wikicode | str, value: Any, recursive: bool = True
444
    ) -> None:
445
        """Insert *value* immediately before *obj*.
446

447
        *obj* can be either a string, a :class:`.Node`, or another
448
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
449
        example). If *obj* is a string, we will operate on all instances of
450
        that string within the code, otherwise only on the specific instance
451
        given. *value* can be anything parsable by :func:`.parse_anything`. If
452
        *recursive* is ``True``, we will try to find *obj* within our child
453
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
454
        object. If *obj* is not found, :exc:`ValueError` is raised.
455
        """
456
        if isinstance(obj, (Node, Wikicode)):
10✔
457
            context, index = self._do_strong_search(obj, recursive)
10✔
458
            context.insert(index.start, value)
10✔
459
        else:
460
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
461
                if exact:
10✔
462
                    context.insert(index.start, value)
10✔
463
                else:
464
                    obj = str(obj)
10✔
465
                    self._slice_replace(context, index, obj, str(value) + obj)
10✔
466

467
    def insert_after(
10✔
468
        self, obj: Node | Wikicode | str, value: Any, recursive: bool = True
469
    ) -> None:
470
        """Insert *value* immediately after *obj*.
471

472
        *obj* can be either a string, a :class:`.Node`, or another
473
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
474
        example). If *obj* is a string, we will operate on all instances of
475
        that string within the code, otherwise only on the specific instance
476
        given. *value* can be anything parsable by :func:`.parse_anything`. If
477
        *recursive* is ``True``, we will try to find *obj* within our child
478
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
479
        object. If *obj* is not found, :exc:`ValueError` is raised.
480
        """
481
        if isinstance(obj, (Node, Wikicode)):
10✔
482
            context, index = self._do_strong_search(obj, recursive)
10✔
483
            context.insert(index.stop, value)
10✔
484
        else:
485
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
486
                if exact:
10✔
487
                    context.insert(index.stop, value)
10✔
488
                else:
489
                    obj = str(obj)
10✔
490
                    self._slice_replace(context, index, obj, obj + str(value))
10✔
491

492
    def replace(  # pyright: ignore[reportIncompatibleMethodOverride]
10✔
493
        self, obj: Node | Wikicode | str, value: Any, recursive: bool = True
494
    ) -> None:
495
        """Replace *obj* with *value*.
496

497
        *obj* can be either a string, a :class:`.Node`, or another
498
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
499
        example). If *obj* is a string, we will operate on all instances of
500
        that string within the code, otherwise only on the specific instance
501
        given. *value* can be anything parsable by :func:`.parse_anything`.
502
        If *recursive* is ``True``, we will try to find *obj* within our child
503
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
504
        object. If *obj* is not found, :exc:`ValueError` is raised.
505
        """
506
        if isinstance(obj, (Node, Wikicode)):
10✔
507
            context, index = self._do_strong_search(obj, recursive)
10✔
508
            for _ in range(index.start, index.stop):
10✔
509
                context.nodes.pop(index.start)
10✔
510
            context.insert(index.start, value)
10✔
511
        else:
512
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
513
                if exact:
10✔
514
                    for _ in range(index.start, index.stop):
10✔
515
                        context.nodes.pop(index.start)
10✔
516
                    context.insert(index.start, value)
10✔
517
                else:
518
                    self._slice_replace(context, index, str(obj), str(value))
10✔
519

520
    def append(self, value: Any) -> None:
10✔
521
        """Insert *value* at the end of the list of nodes.
522

523
        *value* can be anything parsable by :func:`.parse_anything`.
524
        """
525
        nodes = parse_anything(value).nodes
10✔
526
        for node in nodes:
10✔
527
            self.nodes.append(node)
10✔
528

529
    def remove(self, obj: Node | Wikicode | str, recursive: bool = True) -> None:
10✔
530
        """Remove *obj* from the list of nodes.
531

532
        *obj* can be either a string, a :class:`.Node`, or another
533
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
534
        example). If *obj* is a string, we will operate on all instances of
535
        that string within the code, otherwise only on the specific instance
536
        given. If *recursive* is ``True``, we will try to find *obj* within our
537
        child nodes even if it is not a direct descendant of this
538
        :class:`.Wikicode` object. If *obj* is not found, :exc:`ValueError` is
539
        raised.
540
        """
541
        if isinstance(obj, (Node, Wikicode)):
10✔
542
            context, index = self._do_strong_search(obj, recursive)
10✔
543
            for _ in range(index.start, index.stop):
10✔
544
                context.nodes.pop(index.start)
10✔
545
        else:
546
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
547
                if exact:
10✔
548
                    for _ in range(index.start, index.stop):
10✔
549
                        context.nodes.pop(index.start)
10✔
550
                else:
551
                    self._slice_replace(context, index, str(obj), "")
10✔
552

553
    def matches(
10✔
554
        self,
555
        other: Node | Wikicode | str | bytes | Iterable[Node | Wikicode | str | bytes],
556
    ) -> bool:
557
        """Do a loose equivalency test suitable for comparing page names.
558

559
        *other* can be any string-like object, including :class:`.Wikicode`, or
560
        an iterable of these. This operation is symmetric; both sides are
561
        adjusted. Specifically, whitespace and markup is stripped and the first
562
        letter's case is normalized. Typical usage is
563
        ``if template.name.matches("stub"): ...``.
564
        """
565

566
        def normalize(s: str) -> str:
10✔
567
            return (s[0].upper() + s[1:]).replace("_", " ") if s else s
10✔
568

569
        this = normalize(self.strip_code().strip())
10✔
570

571
        if isinstance(other, (str, bytes, Wikicode, Node)):
10✔
572
            that = parse_anything(other).strip_code().strip()
10✔
573
            return this == normalize(that)
10✔
574

575
        for obj in other:
10✔
576
            that = parse_anything(obj).strip_code().strip()
10✔
577
            if this == normalize(that):
10✔
578
                return True
10✔
579
        return False
10✔
580

581
    @overload
10✔
582
    def ifilter(
10✔
583
        self,
584
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
585
        matches: Callable[[Node], bool] | re.Pattern | str | None = None,
586
        flags: int = FLAGS,
587
        forcetype: None = None,
588
    ) -> Generator[Node]: ...
589

590
    @overload
10✔
591
    def ifilter(
10✔
592
        self,
593
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
594
        matches: Callable[[N], bool] | re.Pattern | str | None = None,
595
        flags: int = FLAGS,
596
        *,
597
        forcetype: type[N],
598
    ) -> Generator[N]: ...
599

600
    def ifilter(
10✔
601
        self,
602
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
603
        matches: Callable[[N], bool] | re.Pattern | str | None = None,
604
        flags: int = FLAGS,
605
        forcetype: type[N] | None = None,
606
    ) -> Generator[N]:
607
        """Iterate over nodes in our list matching certain conditions.
608

609
        If *forcetype* is given, only nodes that are instances of this type (or
610
        tuple of types) are yielded. Setting *recursive* to ``True`` will
611
        iterate over all children and their descendants. ``RECURSE_OTHERS``
612
        will only iterate over children that are not the instances of
613
        *forcetype*. ``False`` will only iterate over immediate children.
614

615
        ``RECURSE_OTHERS`` can be used to iterate over all un-nested templates,
616
        even if they are inside of HTML tags, like so:
617

618
            >>> code = mwparserfromhell.parse("{{foo}}<b>{{foo|{{bar}}}}</b>")
619
            >>> code.filter_templates(code.RECURSE_OTHERS)
620
            ["{{foo}}", "{{foo|{{bar}}}}"]
621

622
        *matches* can be used to further restrict the nodes, either as a
623
        function (taking a single :class:`.Node` and returning a boolean) or a
624
        regular expression (matched against the node's string representation
625
        with :func:`re.search`). If *matches* is a regex, the flags passed to
626
        :func:`re.search` are :const:`re.IGNORECASE`, :const:`re.DOTALL`, and
627
        :const:`re.UNICODE`, but custom flags can be specified by passing
628
        *flags*.
629
        """
630
        gen = self._indexed_ifilter(recursive, matches, flags, forcetype)
10✔
631
        return (node for i, node in gen)
10✔
632

633
    def ifilter_arguments(
10✔
634
        self,
635
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
636
        matches: Callable[[Argument], bool] | re.Pattern | str | None = None,
637
        flags: int = FLAGS,
638
    ) -> Generator[Argument]:
639
        """Iterate over arguments.
640

641
        This is equivalent to :meth:`ifilter` with *forcetype* set to
642
        :class:`~argument.Argument`.
643
        """
644
        return self.ifilter(
10✔
645
            recursive=recursive, matches=matches, flags=flags, forcetype=Argument
646
        )
647

648
    def ifilter_comments(
10✔
649
        self,
650
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
651
        matches: Callable[[Comment], bool] | re.Pattern | str | None = None,
652
        flags: int = FLAGS,
653
    ) -> Generator[Comment]:
654
        """Iterate over comments.
655

656
        This is equivalent to :meth:`ifilter` with *forcetype* set to
657
        :class:`~comment.Comment`.
658
        """
659
        return self.ifilter(
10✔
660
            recursive=recursive, matches=matches, flags=flags, forcetype=Comment
661
        )
662

663
    def ifilter_external_links(
10✔
664
        self,
665
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
666
        matches: Callable[[ExternalLink], bool] | re.Pattern | str | None = None,
667
        flags: int = FLAGS,
668
    ) -> Generator[ExternalLink]:
669
        """Iterate over external links.
670

671
        This is equivalent to :meth:`ifilter` with *forcetype* set to
672
        :class:`~external_link.ExternalLink`.
673
        """
674
        return self.ifilter(
10✔
675
            recursive=recursive, matches=matches, flags=flags, forcetype=ExternalLink
676
        )
677

678
    def ifilter_headings(
10✔
679
        self,
680
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
681
        matches: Callable[[Heading], bool] | re.Pattern | str | None = None,
682
        flags: int = FLAGS,
683
    ) -> Generator[Heading]:
684
        """Iterate over headings.
685

686
        This is equivalent to :meth:`ifilter` with *forcetype* set to
687
        :class:`~heading.Heading`.
688
        """
689
        return self.ifilter(
10✔
690
            recursive=recursive, matches=matches, flags=flags, forcetype=Heading
691
        )
692

693
    def ifilter_html_entities(
10✔
694
        self,
695
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
696
        matches: Callable[[HTMLEntity], bool] | re.Pattern | str | None = None,
697
        flags: int = FLAGS,
698
    ) -> Generator[HTMLEntity]:
699
        """Iterate over HTML entities.
700

701
        This is equivalent to :meth:`ifilter` with *forcetype* set to
702
        :class:`~html_entity.HTMLEntity`.
703
        """
704
        return self.ifilter(
10✔
705
            recursive=recursive, matches=matches, flags=flags, forcetype=HTMLEntity
706
        )
707

708
    def ifilter_tags(
10✔
709
        self,
710
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
711
        matches: Callable[[Tag], bool] | re.Pattern | str | None = None,
712
        flags: int = FLAGS,
713
    ) -> Generator[Tag]:
714
        """Iterate over tags.
715

716
        This is equivalent to :meth:`ifilter` with *forcetype* set to
717
        :class:`~tag.Tag`.
718
        """
719
        return self.ifilter(
10✔
720
            recursive=recursive, matches=matches, flags=flags, forcetype=Tag
721
        )
722

723
    def ifilter_templates(
10✔
724
        self,
725
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
726
        matches: Callable[[Template], bool] | re.Pattern | str | None = None,
727
        flags: int = FLAGS,
728
    ) -> Generator[Template]:
729
        """Iterate over templates.
730

731
        This is equivalent to :meth:`ifilter` with *forcetype* set to
732
        :class:`~template.Template`.
733
        """
734
        return self.ifilter(
10✔
735
            recursive=recursive, matches=matches, flags=flags, forcetype=Template
736
        )
737

738
    def ifilter_text(
10✔
739
        self,
740
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
741
        matches: Callable[[Text], bool] | re.Pattern | str | None = None,
742
        flags: int = FLAGS,
743
    ) -> Generator[Text]:
744
        """Iterate over text.
745

746
        This is equivalent to :meth:`ifilter` with *forcetype* set to
747
        :class:`~text.Text`.
748
        """
749
        return self.ifilter(
10✔
750
            recursive=recursive, matches=matches, flags=flags, forcetype=Text
751
        )
752

753
    def ifilter_wikilinks(
10✔
754
        self,
755
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
756
        matches: Callable[[Wikilink], bool] | re.Pattern | str | None = None,
757
        flags: int = FLAGS,
758
    ) -> Generator[Wikilink]:
759
        """Iterate over wikilinks.
760

761
        This is equivalent to :meth:`ifilter` with *forcetype* set to
762
        :class:`~wikilink.Wikilink`.
763
        """
764
        return self.ifilter(
10✔
765
            recursive=recursive, matches=matches, flags=flags, forcetype=Wikilink
766
        )
767

768
    @overload
10✔
769
    def filter(
10✔
770
        self,
771
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
772
        matches: Callable[[Node], bool] | re.Pattern | str | None = None,
773
        flags: int = FLAGS,
774
        forcetype: None = None,
775
    ) -> list[Node]: ...
776

777
    @overload
10✔
778
    def filter(
10✔
779
        self,
780
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
781
        matches: Callable[[N], bool] | re.Pattern | str | None = None,
782
        flags: int = FLAGS,
783
        *,
784
        forcetype: type[N],
785
    ) -> list[N]: ...
786

787
    def filter(
10✔
788
        self,
789
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
790
        matches: Callable[[N], bool] | re.Pattern | str | None = None,
791
        flags: int = FLAGS,
792
        forcetype: type[N] | None = None,
793
    ) -> list[N]:
794
        """Return a list of nodes within our list matching certain conditions.
795

796
        This is equivalent to calling :func:`list` on :meth:`ifilter`.
797
        """
798
        gen = self.ifilter(  # pyright: ignore[reportCallIssue]
10✔
799
            recursive=recursive,
800
            matches=matches,
801
            flags=flags,
802
            forcetype=forcetype,  # pyright: ignore[reportArgumentType]
803
        )
804
        return list(gen)
10✔
805

806
    def filter_arguments(
10✔
807
        self,
808
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
809
        matches: Callable[[Argument], bool] | re.Pattern | str | None = None,
810
        flags: int = FLAGS,
811
    ) -> list[Argument]:
812
        """Iterate over arguments.
813

814
        This is equivalent to :meth:`filter` with *forcetype* set to
815
        :class:`~argument.Argument`.
816
        """
817
        return self.filter(
10✔
818
            recursive=recursive, matches=matches, flags=flags, forcetype=Argument
819
        )
820

821
    def filter_comments(
10✔
822
        self,
823
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
824
        matches: Callable[[Comment], bool] | re.Pattern | str | None = None,
825
        flags: int = FLAGS,
826
    ) -> list[Comment]:
827
        """Iterate over comments.
828

829
        This is equivalent to :meth:`filter` with *forcetype* set to
830
        :class:`~comment.Comment`.
831
        """
832
        return self.filter(
10✔
833
            recursive=recursive, matches=matches, flags=flags, forcetype=Comment
834
        )
835

836
    def filter_external_links(
10✔
837
        self,
838
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
839
        matches: Callable[[ExternalLink], bool] | re.Pattern | str | None = None,
840
        flags: int = FLAGS,
841
    ) -> list[ExternalLink]:
842
        """Iterate over external links.
843

844
        This is equivalent to :meth:`filter` with *forcetype* set to
845
        :class:`~external_link.ExternalLink`.
846
        """
847
        return self.filter(
10✔
848
            recursive=recursive, matches=matches, flags=flags, forcetype=ExternalLink
849
        )
850

851
    def filter_headings(
10✔
852
        self,
853
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
854
        matches: Callable[[Heading], bool] | re.Pattern | str | None = None,
855
        flags: int = FLAGS,
856
    ) -> list[Heading]:
857
        """Iterate over headings.
858

859
        This is equivalent to :meth:`filter` with *forcetype* set to
860
        :class:`~heading.Heading`.
861
        """
862
        return self.filter(
10✔
863
            recursive=recursive, matches=matches, flags=flags, forcetype=Heading
864
        )
865

866
    def filter_html_entities(
10✔
867
        self,
868
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
869
        matches: Callable[[HTMLEntity], bool] | re.Pattern | str | None = None,
870
        flags: int = FLAGS,
871
    ) -> list[HTMLEntity]:
872
        """Iterate over HTML entities.
873

874
        This is equivalent to :meth:`filter` with *forcetype* set to
875
        :class:`~html_entity.HTMLEntity`.
876
        """
877
        return self.filter(
10✔
878
            recursive=recursive, matches=matches, flags=flags, forcetype=HTMLEntity
879
        )
880

881
    def filter_tags(
10✔
882
        self,
883
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
884
        matches: Callable[[Tag], bool] | re.Pattern | str | None = None,
885
        flags: int = FLAGS,
886
    ) -> list[Tag]:
887
        """Iterate over tags.
888

889
        This is equivalent to :meth:`filter` with *forcetype* set to
890
        :class:`~tag.Tag`.
891
        """
892
        return self.filter(
10✔
893
            recursive=recursive, matches=matches, flags=flags, forcetype=Tag
894
        )
895

896
    def filter_templates(
10✔
897
        self,
898
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
899
        matches: Callable[[Template], bool] | re.Pattern | str | None = None,
900
        flags: int = FLAGS,
901
    ) -> list[Template]:
902
        """Iterate over templates.
903

904
        This is equivalent to :meth:`filter` with *forcetype* set to
905
        :class:`~template.Template`.
906
        """
907
        return self.filter(
10✔
908
            recursive=recursive, matches=matches, flags=flags, forcetype=Template
909
        )
910

911
    def filter_text(
10✔
912
        self,
913
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
914
        matches: Callable[[Text], bool] | re.Pattern | str | None = None,
915
        flags: int = FLAGS,
916
    ) -> list[Text]:
917
        """Iterate over text.
918

919
        This is equivalent to :meth:`filter` with *forcetype* set to
920
        :class:`~text.Text`.
921
        """
922
        return self.filter(
10✔
923
            recursive=recursive, matches=matches, flags=flags, forcetype=Text
924
        )
925

926
    def filter_wikilinks(
10✔
927
        self,
928
        recursive: bool | Literal[Recurse.RECURSE_OTHERS] = True,
929
        matches: Callable[[Wikilink], bool] | re.Pattern | str | None = None,
930
        flags: int = FLAGS,
931
    ) -> list[Wikilink]:
932
        """Iterate over wikilinks.
933

934
        This is equivalent to :meth:`filter` with *forcetype* set to
935
        :class:`~wikilink.Wikilink`.
936
        """
937
        return self.filter(
10✔
938
            recursive=recursive, matches=matches, flags=flags, forcetype=Wikilink
939
        )
940

941
    def get_sections(
10✔
942
        self,
943
        levels: Iterable[int] | None = None,
944
        matches: Callable[[Node], bool] | re.Pattern | str | None = None,
945
        flags: int = FLAGS,
946
        flat: bool = False,
947
        include_lead: bool | None = None,
948
        include_headings: bool = True,
949
    ) -> list[Wikicode]:
950
        """Return a list of sections within the page.
951

952
        Sections are returned as :class:`.Wikicode` objects with a shared node
953
        list (implemented using :class:`.SmartList`) so that changes to
954
        sections are reflected in the parent Wikicode object.
955

956
        Each section contains all of its subsections, unless *flat* is
957
        ``True``. If *levels* is given, it should be a iterable of integers;
958
        only sections whose heading levels are within it will be returned. If
959
        *matches* is given, it should be either a function or a regex; only
960
        sections whose headings match it (without the surrounding equal signs)
961
        will be included. *flags* can be used to override the default regex
962
        flags (see :meth:`ifilter`) if a regex *matches* is used.
963

964
        If *include_lead* is ``True``, the first, lead section (without a
965
        heading) will be included in the list; ``False`` will not include it;
966
        the default will include it only if no specific *levels* were given. If
967
        *include_headings* is ``True``, the section's beginning
968
        :class:`.Heading` object will be included; otherwise, this is skipped.
969
        """
970
        title_matcher = self._build_matcher(matches, flags)
10✔
971

972
        def matcher(heading):
10✔
973
            return title_matcher(heading.title) and (
10✔
974
                not levels or heading.level in levels
975
            )
976

977
        iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading)
10✔
978
        sections = []  # Tuples of (index_of_first_node, section)
10✔
979
        # Tuples of (index, heading), where index and heading.level are both
980
        # monotonically increasing
981
        open_headings: list[tuple[int, Heading]] = []
10✔
982

983
        # Add the lead section if appropriate:
984
        if include_lead or not (include_lead is not None or matches or levels):
10✔
985
            itr = self._indexed_ifilter(recursive=False, forcetype=Heading)
10✔
986
            try:
10✔
987
                first = next(itr)[0]
10✔
988
                sections.append((0, Wikicode(self.nodes[:first])))
10✔
989
            except StopIteration:  # No headings in page
10✔
990
                sections.append((0, Wikicode(self.nodes[:])))
10✔
991

992
        # Iterate over headings, adding sections to the list as they end:
993
        for i, heading in iheadings:
10✔
994
            if flat:  # With flat, all sections close at the next heading
10✔
995
                newly_closed, open_headings = open_headings, []
10✔
996
            else:  # Otherwise, figure out which sections have closed, if any
997
                closed_start_index = len(open_headings)
10✔
998
                for j, (start, last_heading) in enumerate(open_headings):
10✔
999
                    if heading.level <= last_heading.level:
10✔
1000
                        closed_start_index = j
10✔
1001
                        break
10✔
1002
                newly_closed = open_headings[closed_start_index:]
10✔
1003
                del open_headings[closed_start_index:]
10✔
1004
            for start, closed_heading in newly_closed:
10✔
1005
                if matcher(closed_heading):
10✔
1006
                    sections.append((start, Wikicode(self.nodes[start:i])))
10✔
1007
            start = i if include_headings else (i + 1)
10✔
1008
            open_headings.append((start, heading))
10✔
1009

1010
        # Add any remaining open headings to the list of sections:
1011
        for start, heading in open_headings:
10✔
1012
            if matcher(heading):
10✔
1013
                sections.append((start, Wikicode(self.nodes[start:])))
10✔
1014

1015
        # Ensure that earlier sections are earlier in the returned list:
1016
        return [section for i, section in sorted(sections)]
10✔
1017

1018
    def strip_code(
10✔
1019
        self,
1020
        normalize: bool = True,
1021
        collapse: bool = True,
1022
        keep_template_params: bool = False,
1023
    ) -> str:
1024
        """Return a rendered string without unprintable code such as templates.
1025

1026
        The way a node is stripped is handled by the
1027
        :meth:`~.Node.__strip__` method of :class:`.Node` objects, which
1028
        generally return a subset of their nodes or ``None``. For example,
1029
        templates and tags are removed completely, links are stripped to just
1030
        their display part, headings are stripped to just their title.
1031

1032
        If *normalize* is ``True``, various things may be done to strip code
1033
        further, such as converting HTML entities like ``&Sigma;``, ``&#931;``,
1034
        and ``&#x3a3;`` to ``Σ``. If *collapse* is ``True``, we will try to
1035
        remove excess whitespace as well (three or more newlines are converted
1036
        to two, for example). If *keep_template_params* is ``True``, then
1037
        template parameters will be preserved in the output (normally, they are
1038
        removed completely).
1039
        """
1040
        kwargs = {
10✔
1041
            "normalize": normalize,
1042
            "collapse": collapse,
1043
            "keep_template_params": keep_template_params,
1044
        }
1045

1046
        nodes = []
10✔
1047
        for node in self.nodes:
10✔
1048
            stripped = node.__strip__(**kwargs)
10✔
1049
            if stripped:
10✔
1050
                nodes.append(str(stripped))
10✔
1051

1052
        if collapse:
10✔
1053
            stripped = "".join(nodes).strip("\n")
10✔
1054
            while "\n\n\n" in stripped:
10✔
1055
                stripped = stripped.replace("\n\n\n", "\n\n")
10✔
1056
            return stripped
10✔
1057
        return "".join(nodes)
10✔
1058

1059
    def get_tree(self) -> str:
10✔
1060
        """Return a hierarchical tree representation of the object.
1061

1062
        The representation is a string makes the most sense printed. It is
1063
        built by calling :meth:`_get_tree` on the :class:`.Wikicode` object and
1064
        its children recursively. The end result may look something like the
1065
        following::
1066

1067
            >>> text = "Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}"
1068
            >>> print(mwparserfromhell.parse(text).get_tree())
1069
            Lorem ipsum
1070
            {{
1071
                  foo
1072
                | 1
1073
                = bar
1074
                | 2
1075
                = {{
1076
                        baz
1077
                  }}
1078
                | spam
1079
                = eggs
1080
            }}
1081
        """
1082
        marker = object()  # Random object we can find with certainty in a list
10✔
1083
        return "\n".join(self._get_tree(self, [], marker, 0))
10✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc