• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

earwig / mwparserfromhell / 10014163542

19 Jul 2024 08:49PM CUT coverage: 99.201% (-0.002%) from 99.203%
10014163542

Pull #326

github

web-flow
Merge 8c23031f1 into 4e73af2fa
Pull Request #326: Make fallthrough explicit in tok_parse.c

2979 of 3003 relevant lines covered (99.2%)

9.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.01
/src/mwparserfromhell/wikicode.py
1
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
2
#
3
# Permission is hereby granted, free of charge, to any person obtaining a copy
4
# of this software and associated documentation files (the "Software"), to deal
5
# in the Software without restriction, including without limitation the rights
6
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
# copies of the Software, and to permit persons to whom the Software is
8
# furnished to do so, subject to the following conditions:
9
#
10
# The above copyright notice and this permission notice shall be included in
11
# all copies or substantial portions of the Software.
12
#
13
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
# SOFTWARE.
20

21
import re
10✔
22
from itertools import chain
10✔
23

24
from .nodes import (
10✔
25
    Argument,
26
    Comment,
27
    ExternalLink,
28
    Heading,
29
    HTMLEntity,
30
    Node,
31
    Tag,
32
    Template,
33
    Text,
34
    Wikilink,
35
)
36
from .smart_list.list_proxy import ListProxy
10✔
37
from .string_mixin import StringMixIn
10✔
38
from .utils import parse_anything
10✔
39

40
__all__ = ["Wikicode"]
10✔
41

42
FLAGS = re.IGNORECASE | re.DOTALL | re.UNICODE
10✔
43

44

45
class Wikicode(StringMixIn):
10✔
46
    """A ``Wikicode`` is a container for nodes that operates like a string.
47

48
    Additionally, it contains methods that can be used to extract data from or
49
    modify the nodes, implemented in an interface similar to a list. For
50
    example, :meth:`index` can get the index of a node in the list, and
51
    :meth:`insert` can add a new node at that index. The :meth:`filter()
52
    <ifilter>` series of functions is very useful for extracting and iterating
53
    over, for example, all of the templates in the object.
54
    """
55

56
    RECURSE_OTHERS = 2
10✔
57

58
    def __init__(self, nodes):
10✔
59
        super().__init__()
10✔
60
        self._nodes = nodes
10✔
61

62
    def __str__(self):
10✔
63
        return "".join([str(node) for node in self.nodes])
10✔
64

65
    @staticmethod
10✔
66
    def _get_children(node, contexts=False, restrict=None, parent=None):
10✔
67
        """Iterate over all child :class:`.Node`\\ s of a given *node*."""
68
        yield (parent, node) if contexts else node
10✔
69
        if restrict and isinstance(node, restrict):
10✔
70
            return
10✔
71
        for code in node.__children__():
10✔
72
            for child in code.nodes:
10✔
73
                sub = Wikicode._get_children(child, contexts, restrict, code)
10✔
74
                yield from sub
10✔
75

76
    @staticmethod
10✔
77
    def _slice_replace(code, index, old, new):
10✔
78
        """Replace the string *old* with *new* across *index* in *code*."""
79
        nodes = [str(node) for node in code.get(index)]
10✔
80
        substring = "".join(nodes).replace(old, new)
10✔
81
        code.nodes[index] = parse_anything(substring).nodes
10✔
82

83
    @staticmethod
10✔
84
    def _build_matcher(matches, flags):
10✔
85
        """Helper for :meth:`_indexed_ifilter` and others.
86

87
        If *matches* is a function, return it. If it's a regex, return a
88
        wrapper around it that can be called with a node to do a search. If
89
        it's ``None``, return a function that always returns ``True``.
90
        """
91
        if matches:
10✔
92
            if callable(matches):
10✔
93
                return matches
10✔
94
            return lambda obj: re.search(matches, str(obj), flags)
10✔
95
        return lambda obj: True
10✔
96

97
    def _indexed_ifilter(
10✔
98
        self, recursive=True, matches=None, flags=FLAGS, forcetype=None
99
    ):
100
        """Iterate over nodes and their corresponding indices in the node list.
101

102
        The arguments are interpreted as for :meth:`ifilter`. For each tuple
103
        ``(i, node)`` yielded by this method, ``self.index(node) == i``. Note
104
        that if *recursive* is ``True``, ``self.nodes[i]`` might not be the
105
        node itself, but will still contain it.
106
        """
107
        match = self._build_matcher(matches, flags)
10✔
108
        if recursive:
10✔
109
            restrict = forcetype if recursive == self.RECURSE_OTHERS else None
10✔
110

111
            def getter(i, node):
10✔
112
                for ch in self._get_children(node, restrict=restrict):
10✔
113
                    yield (i, ch)
10✔
114

115
            inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes)))
10✔
116
        else:
117
            inodes = enumerate(self.nodes)
10✔
118
        for i, node in inodes:
10✔
119
            if (not forcetype or isinstance(node, forcetype)) and match(node):
10✔
120
                yield (i, node)
10✔
121

122
    def _is_child_wikicode(self, obj, recursive=True):
10✔
123
        """Return whether the given :class:`.Wikicode` is a descendant."""
124

125
        def deref(nodes):
10✔
126
            if isinstance(nodes, ListProxy):
10✔
127
                return nodes._parent  # pylint: disable=protected-access
10✔
128
            return nodes
10✔
129

130
        target = deref(obj.nodes)
10✔
131
        if target is deref(self.nodes):
10✔
132
            return True
10✔
133
        if recursive:
10✔
134
            todo = [self]
10✔
135
            while todo:
10✔
136
                code = todo.pop()
10✔
137
                if target is deref(code.nodes):
10✔
138
                    return True
10✔
139
                for node in code.nodes:
10✔
140
                    todo += list(node.__children__())
10✔
141
        return False
10✔
142

143
    def _do_strong_search(self, obj, recursive=True):
10✔
144
        """Search for the specific element *obj* within the node list.
145

146
        *obj* can be either a :class:`.Node` or a :class:`.Wikicode` object. If
147
        found, we return a tuple (*context*, *index*) where *context* is the
148
        :class:`.Wikicode` that contains *obj* and *index* is its index there,
149
        as a :class:`slice`. Note that if *recursive* is ``False``, *context*
150
        will always be ``self`` (since we only look for *obj* among immediate
151
        descendants), but if *recursive* is ``True``, then it could be any
152
        :class:`.Wikicode` contained by a node within ``self``. If *obj* is not
153
        found, :exc:`ValueError` is raised.
154
        """
155
        if isinstance(obj, Wikicode):
10✔
156
            if not self._is_child_wikicode(obj, recursive):
10✔
157
                raise ValueError(obj)
10✔
158
            return obj, slice(0, len(obj.nodes))
10✔
159

160
        if isinstance(obj, Node):
10✔
161
            mkslice = lambda i: slice(i, i + 1)
10✔
162
            if not recursive:
10✔
163
                return self, mkslice(self.index(obj))
10✔
164
            for node in self.nodes:
10✔
165
                for context, child in self._get_children(node, contexts=True):
10✔
166
                    if obj is child:
10✔
167
                        if not context:
10✔
168
                            context = self
10✔
169
                        return context, mkslice(context.index(child))
10✔
170
            raise ValueError(obj)
10✔
171

172
        raise TypeError(obj)
×
173

174
    def _do_weak_search(self, obj, recursive):
10✔
175
        """Search for an element that looks like *obj* within the node list.
176

177
        This follows the same rules as :meth:`_do_strong_search` with some
178
        differences. *obj* is treated as a string that might represent any
179
        :class:`.Node`, :class:`.Wikicode`, or combination of the two present
180
        in the node list. Thus, matching is weak (using string comparisons)
181
        rather than strong (using ``is``). Because multiple nodes can match
182
        *obj*, the result is a list of tuples instead of just one (however,
183
        :exc:`ValueError` is still raised if nothing is found). Individual
184
        matches will never overlap.
185

186
        The tuples contain a new first element, *exact*, which is ``True`` if
187
        we were able to match *obj* exactly to one or more adjacent nodes, or
188
        ``False`` if we found *obj* inside a node or incompletely spanning
189
        multiple nodes.
190
        """
191
        obj = parse_anything(obj)
10✔
192
        if not obj or obj not in self:
10✔
193
            raise ValueError(obj)
10✔
194
        results = []
10✔
195
        contexts = [self]
10✔
196
        while contexts:
10✔
197
            context = contexts.pop()
10✔
198
            i = len(context.nodes) - 1
10✔
199
            while i >= 0:
10✔
200
                node = context.get(i)
10✔
201
                if obj.get(-1) == node:
10✔
202
                    for j in range(-len(obj.nodes), -1):
10✔
203
                        if obj.get(j) != context.get(i + j + 1):
10✔
204
                            break
10✔
205
                    else:
206
                        i -= len(obj.nodes) - 1
10✔
207
                        index = slice(i, i + len(obj.nodes))
10✔
208
                        results.append((True, context, index))
10✔
209
                elif recursive and obj in node:
10✔
210
                    contexts.extend(node.__children__())
10✔
211
                i -= 1
10✔
212
        if not results:
10✔
213
            if not recursive:
10✔
214
                raise ValueError(obj)
10✔
215
            results.append((False, self, slice(0, len(self.nodes))))
10✔
216
        return results
10✔
217

218
    def _get_tree(self, code, lines, marker, indent):
10✔
219
        """Build a tree to illustrate the way the Wikicode object was parsed.
220

221
        The method that builds the actual tree is ``__showtree__`` of ``Node``
222
        objects. *code* is the ``Wikicode`` object to build a tree for. *lines*
223
        is the list to append the tree to, which is returned at the end of the
224
        method. *marker* is some object to be used to indicate that the builder
225
        should continue on from the last line instead of starting a new one; it
226
        should be any object that can be tested for with ``is``. *indent* is
227
        the starting indentation.
228
        """
229

230
        def write(*args):
10✔
231
            """Write a new line following the proper indentation rules."""
232
            if lines and lines[-1] is marker:  # Continue from the last line
10✔
233
                lines.pop()  # Remove the marker
10✔
234
                last = lines.pop()
10✔
235
                lines.append(last + " ".join(args))
10✔
236
            else:
237
                lines.append(" " * 6 * indent + " ".join(args))
10✔
238

239
        get = lambda code: self._get_tree(code, lines, marker, indent + 1)
10✔
240
        mark = lambda: lines.append(marker)
10✔
241
        for node in code.nodes:
10✔
242
            node.__showtree__(write, get, mark)
10✔
243
        return lines
10✔
244

245
    @classmethod
10✔
246
    def _build_filter_methods(cls, **meths):
10✔
247
        """Given Node types, build the corresponding i?filter shortcuts.
248

249
        The should be given as keys storing the method's base name paired with
250
        values storing the corresponding :class:`.Node` type. For example, the
251
        dict may contain the pair ``("templates", Template)``, which will
252
        produce the methods :meth:`ifilter_templates` and
253
        :meth:`filter_templates`, which are shortcuts for
254
        :meth:`ifilter(forcetype=Template) <ifilter>` and
255
        :meth:`filter(forcetype=Template) <filter>`, respectively. These
256
        shortcuts are added to the class itself, with an appropriate docstring.
257
        """
258
        doc = """Iterate over {0}.
10✔
259

260
        This is equivalent to :meth:`{1}` with *forcetype* set to
261
        :class:`~{2.__module__}.{2.__name__}`.
262
        """
263
        make_ifilter = lambda ftype: (
10✔
264
            lambda self, *a, **kw: self.ifilter(forcetype=ftype, *a, **kw)
265
        )
266
        make_filter = lambda ftype: (
10✔
267
            lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw)
268
        )
269
        for name, ftype in meths.items():
10✔
270
            ifilt = make_ifilter(ftype)
10✔
271
            filt = make_filter(ftype)
10✔
272
            ifilt.__doc__ = doc.format(name, "ifilter", ftype)
10✔
273
            filt.__doc__ = doc.format(name, "filter", ftype)
10✔
274
            setattr(cls, "ifilter_" + name, ifilt)
10✔
275
            setattr(cls, "filter_" + name, filt)
10✔
276

277
    @property
10✔
278
    def nodes(self):
10✔
279
        """A list of :class:`.Node` objects.
280

281
        This is the internal data actually stored within a :class:`.Wikicode`
282
        object.
283
        """
284
        return self._nodes
10✔
285

286
    @nodes.setter
10✔
287
    def nodes(self, value):
10✔
288
        if not isinstance(value, list):
10✔
289
            value = parse_anything(value).nodes
10✔
290
        self._nodes = value
10✔
291

292
    def get(self, index):
10✔
293
        """Return the *index*\\ th node within the list of nodes."""
294
        return self.nodes[index]
10✔
295

296
    def set(self, index, value):
10✔
297
        """Set the ``Node`` at *index* to *value*.
298

299
        Raises :exc:`IndexError` if *index* is out of range, or
300
        :exc:`ValueError` if *value* cannot be coerced into one :class:`.Node`.
301
        To insert multiple nodes at an index, use :meth:`get` with either
302
        :meth:`remove` and :meth:`insert` or :meth:`replace`.
303
        """
304
        nodes = parse_anything(value).nodes
10✔
305
        if len(nodes) > 1:
10✔
306
            raise ValueError("Cannot coerce multiple nodes into one index")
10✔
307
        if index >= len(self.nodes) or -1 * index > len(self.nodes):
10✔
308
            raise IndexError("List assignment index out of range")
10✔
309
        if nodes:
10✔
310
            self.nodes[index] = nodes[0]
10✔
311
        else:
312
            self.nodes.pop(index)
10✔
313

314
    def contains(self, obj):
10✔
315
        """Return whether this Wikicode object contains *obj*.
316

317
        If *obj* is a :class:`.Node` or :class:`.Wikicode` object, then we
318
        search for it exactly among all of our children, recursively.
319
        Otherwise, this method just uses :meth:`.__contains__` on the string.
320
        """
321
        if not isinstance(obj, (Node, Wikicode)):
10✔
322
            return obj in self
10✔
323
        try:
10✔
324
            self._do_strong_search(obj, recursive=True)
10✔
325
        except ValueError:
10✔
326
            return False
10✔
327
        return True
10✔
328

329
    def index(self, obj, recursive=False):
10✔
330
        """Return the index of *obj* in the list of nodes.
331

332
        Raises :exc:`ValueError` if *obj* is not found. If *recursive* is
333
        ``True``, we will look in all nodes of ours and their descendants, and
334
        return the index of our direct descendant node within *our* list of
335
        nodes. Otherwise, the lookup is done only on direct descendants.
336
        """
337
        strict = isinstance(obj, Node)
10✔
338
        equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n)
10✔
339
        for i, node in enumerate(self.nodes):
10✔
340
            if recursive:
10✔
341
                for child in self._get_children(node):
10✔
342
                    if equivalent(obj, child):
10✔
343
                        return i
10✔
344
            elif equivalent(obj, node):
10✔
345
                return i
10✔
346
        raise ValueError(obj)
10✔
347

348
    def get_ancestors(self, obj):
10✔
349
        """Return a list of all ancestor nodes of the :class:`.Node` *obj*.
350

351
        The list is ordered from the most shallow ancestor (greatest great-
352
        grandparent) to the direct parent. The node itself is not included in
353
        the list. For example::
354

355
            >>> text = "{{a|{{b|{{c|{{d}}}}}}}}"
356
            >>> code = mwparserfromhell.parse(text)
357
            >>> node = code.filter_templates(matches=lambda n: n == "{{d}}")[0]
358
            >>> code.get_ancestors(node)
359
            ['{{a|{{b|{{c|{{d}}}}}}}}', '{{b|{{c|{{d}}}}}}', '{{c|{{d}}}}']
360

361
        Will return an empty list if *obj* is at the top level of this Wikicode
362
        object. Will raise :exc:`ValueError` if it wasn't found.
363
        """
364

365
        def _get_ancestors(code, needle):
10✔
366
            for node in code.nodes:
10✔
367
                if node is needle:
10✔
368
                    return []
10✔
369
                for code in node.__children__():
10✔
370
                    ancestors = _get_ancestors(code, needle)
10✔
371
                    if ancestors is not None:
10✔
372
                        return [node] + ancestors
10✔
373
            return None
10✔
374

375
        if isinstance(obj, Wikicode):
10✔
376
            obj = obj.get(0)
×
377
        elif not isinstance(obj, Node):
10✔
378
            raise ValueError(obj)
×
379

380
        ancestors = _get_ancestors(self, obj)
10✔
381
        if ancestors is None:
10✔
382
            raise ValueError(obj)
10✔
383
        return ancestors
10✔
384

385
    def get_parent(self, obj):
10✔
386
        """Return the direct parent node of the :class:`.Node` *obj*.
387

388
        This function is equivalent to calling :meth:`.get_ancestors` and
389
        taking the last element of the resulting list. Will return None if
390
        the node exists but does not have a parent; i.e., it is at the top
391
        level of the Wikicode object.
392
        """
393
        ancestors = self.get_ancestors(obj)
10✔
394
        return ancestors[-1] if ancestors else None
10✔
395

396
    def insert(self, index, value):
10✔
397
        """Insert *value* at *index* in the list of nodes.
398

399
        *value* can be anything parsable by :func:`.parse_anything`, which
400
        includes strings or other :class:`.Wikicode` or :class:`.Node` objects.
401
        """
402
        nodes = parse_anything(value).nodes
10✔
403
        for node in reversed(nodes):
10✔
404
            self.nodes.insert(index, node)
10✔
405

406
    def insert_before(self, obj, value, recursive=True):
10✔
407
        """Insert *value* immediately before *obj*.
408

409
        *obj* can be either a string, a :class:`.Node`, or another
410
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
411
        example). If *obj* is a string, we will operate on all instances of
412
        that string within the code, otherwise only on the specific instance
413
        given. *value* can be anything parsable by :func:`.parse_anything`. If
414
        *recursive* is ``True``, we will try to find *obj* within our child
415
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
416
        object. If *obj* is not found, :exc:`ValueError` is raised.
417
        """
418
        if isinstance(obj, (Node, Wikicode)):
10✔
419
            context, index = self._do_strong_search(obj, recursive)
10✔
420
            context.insert(index.start, value)
10✔
421
        else:
422
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
423
                if exact:
10✔
424
                    context.insert(index.start, value)
10✔
425
                else:
426
                    obj = str(obj)
10✔
427
                    self._slice_replace(context, index, obj, str(value) + obj)
10✔
428

429
    def insert_after(self, obj, value, recursive=True):
10✔
430
        """Insert *value* immediately after *obj*.
431

432
        *obj* can be either a string, a :class:`.Node`, or another
433
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
434
        example). If *obj* is a string, we will operate on all instances of
435
        that string within the code, otherwise only on the specific instance
436
        given. *value* can be anything parsable by :func:`.parse_anything`. If
437
        *recursive* is ``True``, we will try to find *obj* within our child
438
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
439
        object. If *obj* is not found, :exc:`ValueError` is raised.
440
        """
441
        if isinstance(obj, (Node, Wikicode)):
10✔
442
            context, index = self._do_strong_search(obj, recursive)
10✔
443
            context.insert(index.stop, value)
10✔
444
        else:
445
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
446
                if exact:
10✔
447
                    context.insert(index.stop, value)
10✔
448
                else:
449
                    obj = str(obj)
10✔
450
                    self._slice_replace(context, index, obj, obj + str(value))
10✔
451

452
    def replace(self, obj, value, recursive=True):
10✔
453
        """Replace *obj* with *value*.
454

455
        *obj* can be either a string, a :class:`.Node`, or another
456
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
457
        example). If *obj* is a string, we will operate on all instances of
458
        that string within the code, otherwise only on the specific instance
459
        given. *value* can be anything parsable by :func:`.parse_anything`.
460
        If *recursive* is ``True``, we will try to find *obj* within our child
461
        nodes even if it is not a direct descendant of this :class:`.Wikicode`
462
        object. If *obj* is not found, :exc:`ValueError` is raised.
463
        """
464
        if isinstance(obj, (Node, Wikicode)):
10✔
465
            context, index = self._do_strong_search(obj, recursive)
10✔
466
            for _ in range(index.start, index.stop):
10✔
467
                context.nodes.pop(index.start)
10✔
468
            context.insert(index.start, value)
10✔
469
        else:
470
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
471
                if exact:
10✔
472
                    for _ in range(index.start, index.stop):
10✔
473
                        context.nodes.pop(index.start)
10✔
474
                    context.insert(index.start, value)
10✔
475
                else:
476
                    self._slice_replace(context, index, str(obj), str(value))
10✔
477

478
    def append(self, value):
10✔
479
        """Insert *value* at the end of the list of nodes.
480

481
        *value* can be anything parsable by :func:`.parse_anything`.
482
        """
483
        nodes = parse_anything(value).nodes
10✔
484
        for node in nodes:
10✔
485
            self.nodes.append(node)
10✔
486

487
    def remove(self, obj, recursive=True):
10✔
488
        """Remove *obj* from the list of nodes.
489

490
        *obj* can be either a string, a :class:`.Node`, or another
491
        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
492
        example). If *obj* is a string, we will operate on all instances of
493
        that string within the code, otherwise only on the specific instance
494
        given. If *recursive* is ``True``, we will try to find *obj* within our
495
        child nodes even if it is not a direct descendant of this
496
        :class:`.Wikicode` object. If *obj* is not found, :exc:`ValueError` is
497
        raised.
498
        """
499
        if isinstance(obj, (Node, Wikicode)):
10✔
500
            context, index = self._do_strong_search(obj, recursive)
10✔
501
            for _ in range(index.start, index.stop):
10✔
502
                context.nodes.pop(index.start)
10✔
503
        else:
504
            for exact, context, index in self._do_weak_search(obj, recursive):
10✔
505
                if exact:
10✔
506
                    for _ in range(index.start, index.stop):
10✔
507
                        context.nodes.pop(index.start)
10✔
508
                else:
509
                    self._slice_replace(context, index, str(obj), "")
10✔
510

511
    def matches(self, other):
10✔
512
        """Do a loose equivalency test suitable for comparing page names.
513

514
        *other* can be any string-like object, including :class:`.Wikicode`, or
515
        an iterable of these. This operation is symmetric; both sides are
516
        adjusted. Specifically, whitespace and markup is stripped and the first
517
        letter's case is normalized. Typical usage is
518
        ``if template.name.matches("stub"): ...``.
519
        """
520
        normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s
10✔
521
        this = normalize(self.strip_code().strip())
10✔
522

523
        if isinstance(other, (str, bytes, Wikicode, Node)):
10✔
524
            that = parse_anything(other).strip_code().strip()
10✔
525
            return this == normalize(that)
10✔
526

527
        for obj in other:
10✔
528
            that = parse_anything(obj).strip_code().strip()
10✔
529
            if this == normalize(that):
10✔
530
                return True
10✔
531
        return False
10✔
532

533
    def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None):
10✔
534
        """Iterate over nodes in our list matching certain conditions.
535

536
        If *forcetype* is given, only nodes that are instances of this type (or
537
        tuple of types) are yielded. Setting *recursive* to ``True`` will
538
        iterate over all children and their descendants. ``RECURSE_OTHERS``
539
        will only iterate over children that are not the instances of
540
        *forcetype*. ``False`` will only iterate over immediate children.
541

542
        ``RECURSE_OTHERS`` can be used to iterate over all un-nested templates,
543
        even if they are inside of HTML tags, like so:
544

545
            >>> code = mwparserfromhell.parse("{{foo}}<b>{{foo|{{bar}}}}</b>")
546
            >>> code.filter_templates(code.RECURSE_OTHERS)
547
            ["{{foo}}", "{{foo|{{bar}}}}"]
548

549
        *matches* can be used to further restrict the nodes, either as a
550
        function (taking a single :class:`.Node` and returning a boolean) or a
551
        regular expression (matched against the node's string representation
552
        with :func:`re.search`). If *matches* is a regex, the flags passed to
553
        :func:`re.search` are :const:`re.IGNORECASE`, :const:`re.DOTALL`, and
554
        :const:`re.UNICODE`, but custom flags can be specified by passing
555
        *flags*.
556
        """
557
        gen = self._indexed_ifilter(recursive, matches, flags, forcetype)
10✔
558
        return (node for i, node in gen)
10✔
559

560
    def filter(self, *args, **kwargs):
10✔
561
        """Return a list of nodes within our list matching certain conditions.
562

563
        This is equivalent to calling :func:`list` on :meth:`ifilter`.
564
        """
565
        return list(self.ifilter(*args, **kwargs))
10✔
566

567
    def get_sections(
10✔
568
        self,
569
        levels=None,
570
        matches=None,
571
        flags=FLAGS,
572
        flat=False,
573
        include_lead=None,
574
        include_headings=True,
575
    ):
576
        """Return a list of sections within the page.
577

578
        Sections are returned as :class:`.Wikicode` objects with a shared node
579
        list (implemented using :class:`.SmartList`) so that changes to
580
        sections are reflected in the parent Wikicode object.
581

582
        Each section contains all of its subsections, unless *flat* is
583
        ``True``. If *levels* is given, it should be a iterable of integers;
584
        only sections whose heading levels are within it will be returned. If
585
        *matches* is given, it should be either a function or a regex; only
586
        sections whose headings match it (without the surrounding equal signs)
587
        will be included. *flags* can be used to override the default regex
588
        flags (see :meth:`ifilter`) if a regex *matches* is used.
589

590
        If *include_lead* is ``True``, the first, lead section (without a
591
        heading) will be included in the list; ``False`` will not include it;
592
        the default will include it only if no specific *levels* were given. If
593
        *include_headings* is ``True``, the section's beginning
594
        :class:`.Heading` object will be included; otherwise, this is skipped.
595
        """
596
        title_matcher = self._build_matcher(matches, flags)
10✔
597
        matcher = lambda heading: (
10✔
598
            title_matcher(heading.title) and (not levels or heading.level in levels)
599
        )
600
        iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading)
10✔
601
        sections = []  # Tuples of (index_of_first_node, section)
10✔
602
        # Tuples of (index, heading), where index and heading.level are both
603
        # monotonically increasing
604
        open_headings = []
10✔
605

606
        # Add the lead section if appropriate:
607
        if include_lead or not (include_lead is not None or matches or levels):
10✔
608
            itr = self._indexed_ifilter(recursive=False, forcetype=Heading)
10✔
609
            try:
10✔
610
                first = next(itr)[0]
10✔
611
                sections.append((0, Wikicode(self.nodes[:first])))
10✔
612
            except StopIteration:  # No headings in page
10✔
613
                sections.append((0, Wikicode(self.nodes[:])))
10✔
614

615
        # Iterate over headings, adding sections to the list as they end:
616
        for i, heading in iheadings:
10✔
617
            if flat:  # With flat, all sections close at the next heading
10✔
618
                newly_closed, open_headings = open_headings, []
10✔
619
            else:  # Otherwise, figure out which sections have closed, if any
620
                closed_start_index = len(open_headings)
10✔
621
                for j, (start, last_heading) in enumerate(open_headings):
10✔
622
                    if heading.level <= last_heading.level:
10✔
623
                        closed_start_index = j
10✔
624
                        break
10✔
625
                newly_closed = open_headings[closed_start_index:]
10✔
626
                del open_headings[closed_start_index:]
10✔
627
            for start, closed_heading in newly_closed:
10✔
628
                if matcher(closed_heading):
10✔
629
                    sections.append((start, Wikicode(self.nodes[start:i])))
10✔
630
            start = i if include_headings else (i + 1)
10✔
631
            open_headings.append((start, heading))
10✔
632

633
        # Add any remaining open headings to the list of sections:
634
        for start, heading in open_headings:
10✔
635
            if matcher(heading):
10✔
636
                sections.append((start, Wikicode(self.nodes[start:])))
10✔
637

638
        # Ensure that earlier sections are earlier in the returned list:
639
        return [section for i, section in sorted(sections)]
10✔
640

641
    def strip_code(self, normalize=True, collapse=True, keep_template_params=False):
10✔
642
        """Return a rendered string without unprintable code such as templates.
643

644
        The way a node is stripped is handled by the
645
        :meth:`~.Node.__strip__` method of :class:`.Node` objects, which
646
        generally return a subset of their nodes or ``None``. For example,
647
        templates and tags are removed completely, links are stripped to just
648
        their display part, headings are stripped to just their title.
649

650
        If *normalize* is ``True``, various things may be done to strip code
651
        further, such as converting HTML entities like ``&Sigma;``, ``&#931;``,
652
        and ``&#x3a3;`` to ``Σ``. If *collapse* is ``True``, we will try to
653
        remove excess whitespace as well (three or more newlines are converted
654
        to two, for example). If *keep_template_params* is ``True``, then
655
        template parameters will be preserved in the output (normally, they are
656
        removed completely).
657
        """
658
        kwargs = {
10✔
659
            "normalize": normalize,
660
            "collapse": collapse,
661
            "keep_template_params": keep_template_params,
662
        }
663

664
        nodes = []
10✔
665
        for node in self.nodes:
10✔
666
            stripped = node.__strip__(**kwargs)
10✔
667
            if stripped:
10✔
668
                nodes.append(str(stripped))
10✔
669

670
        if collapse:
10✔
671
            stripped = "".join(nodes).strip("\n")
10✔
672
            while "\n\n\n" in stripped:
10✔
673
                stripped = stripped.replace("\n\n\n", "\n\n")
10✔
674
            return stripped
10✔
675
        return "".join(nodes)
10✔
676

677
    def get_tree(self):
10✔
678
        """Return a hierarchical tree representation of the object.
679

680
        The representation is a string makes the most sense printed. It is
681
        built by calling :meth:`_get_tree` on the :class:`.Wikicode` object and
682
        its children recursively. The end result may look something like the
683
        following::
684

685
            >>> text = "Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}"
686
            >>> print(mwparserfromhell.parse(text).get_tree())
687
            Lorem ipsum
688
            {{
689
                  foo
690
                | 1
691
                = bar
692
                | 2
693
                = {{
694
                        baz
695
                  }}
696
                | spam
697
                = eggs
698
            }}
699
        """
700
        marker = object()  # Random object we can find with certainty in a list
10✔
701
        return "\n".join(self._get_tree(self, [], marker, 0))
10✔
702

703

704
Wikicode._build_filter_methods(
10✔
705
    arguments=Argument,
706
    comments=Comment,
707
    external_links=ExternalLink,
708
    headings=Heading,
709
    html_entities=HTMLEntity,
710
    tags=Tag,
711
    templates=Template,
712
    text=Text,
713
    wikilinks=Wikilink,
714
)
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc