• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rindPHI / isla / 7846461830

09 Feb 2024 03:58PM UTC coverage: 93.504% (-0.2%) from 93.737%
7846461830

Pull #90

github

web-flow
Merge 4e3e41de3 into 3de029959
Pull Request #90: RepairSolver

796 of 884 new or added lines in 7 files covered. (90.05%)

2 existing lines in 2 files now uncovered.

6852 of 7328 relevant lines covered (93.5%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.47
/src/isla/tree_insertion.py
1
from typing import Iterator, Tuple, Iterable
1✔
2

3
import more_itertools
1✔
4
from frozendict import frozendict
1✔
5
from grammar_graph.gg import GrammarGraph, Node, ChoiceNode, NonterminalNode
1✔
6
from returns.maybe import Nothing, Maybe, Some
1✔
7

8
from isla.derivation_tree import DerivationTree
1✔
9
from isla.helpers import (
1✔
10
    is_nonterminal,
11
    canonical,
12
    frozen_canonical,
13
    assertions_activated,
14
)
15
from isla.helpers import deep_str  # noqa: F401
1✔
16
from isla.type_defs import Path, FrozenCanonicalGrammar
1✔
17

18

19
def validate_insertion_result(
1✔
20
    graph: GrammarGraph,
21
    original_tree: DerivationTree,
22
    inserted_tree: DerivationTree,
23
    result_tree: DerivationTree,
24
    paths_to_ignore_in_inserted_tree: Iterable[Path] = (),
25
) -> None:
26
    """
27
    This function validates a tree insertion result. In particular, it asserts that
28

29
    - all identifiers in :code:`original_tree` are retained;
30
    - all identifiers in :code:`inserted tree`, except those in :code:`paths_to_ignore_in_inserted_tree`;
31
    - the constructed tree is valid.
32

33
    :param graph: The grammar graph.
34
    :param original_tree: The original tree.
35
    :param inserted_tree: The tree to insert.
36
    :param result_tree: The resulting tree.
37
    :param paths_to_ignore_in_inserted_tree: Paths in the inserted tree that should
38
        be ignored when checking whether all identifiers are retained.
39
    :return: Nothing (but yields an AssertionError if assertions are activated and
40
        an assumption is not met).
41
    """
42

43
    if not assertions_activated():
1✔
NEW
44
        return
×
45

46
    result_ids = {node.id for _, node in result_tree.paths()}
1✔
47
    orig_ids = {node.id for _, node in original_tree.paths()}
1✔
48
    inserted_tree_ids = {
1✔
49
        node.id
50
        for path, node in inserted_tree.paths()
51
        if path not in paths_to_ignore_in_inserted_tree
52
    }
53

54
    node_diff = tuple(
1✔
55
        (
56
            original_tree.get_subtree(original_tree.find_node(tid)).value,
57
            tid,
58
        )
59
        for tid in orig_ids.difference(result_ids)
60
    )
61
    assert orig_ids.issubset(result_ids), (
1✔
62
        f"The node(s) {node_diff} from the "
63
        f"original tree are not contained in the identifier set from "
64
        f"the resulting tree."
65
    )
66

67
    node_diff = tuple(
1✔
68
        (
69
            inserted_tree.get_subtree(inserted_tree.find_node(tid)).value,
70
            tid,
71
        )
72
        for tid in inserted_tree_ids.difference(result_ids)
73
    )
74
    assert inserted_tree_ids.issubset(result_ids), (
1✔
75
        f"The node(s) {node_diff} from the "
76
        f"inserted tree are not contained in the identifier set from "
77
        f"the resulting tree."
78
    )
79

80
    assert graph.tree_is_valid(result_tree)
1✔
81

82

83
def graph_path_to_tree(
1✔
84
    graph_path: Tuple[Node, ...],
85
    graph: GrammarGraph,
86
    maybe_canonical_grammar: Maybe[FrozenCanonicalGrammar] = Nothing,
87
) -> Tuple[DerivationTree, Path]:
88
    """
89
    Returns a tuple of a derivation tree and a path, where the path corresponds to
90
    the given grammar graph path.
91

92
    Example
93
    -------
94

95
    >>> import string
96
    >>> grammar = frozendict({
97
    ...     "<start>": ("<stmt>",),
98
    ...     "<stmt>": ("<assgn> ; <stmt>", "<assgn>"),
99
    ...     "<assgn>": ("<var> := <rhs>",),
100
    ...     "<rhs>": ("<var>", "<digit>"),
101
    ...     "<var>": tuple(string.ascii_lowercase),
102
    ...     "<digit>": tuple(string.digits),
103
    ... })
104
    >>> graph = GrammarGraph.from_grammar(grammar)
105

106
    >>> resulting_tree, path_to_end = connect_nonterminals("<stmt>", "<var>", graph)
107

108
    >>> graph_path: Tuple[Node, ...] = tuple(
109
    ...     graph.shortest_path(
110
    ...         graph.get_node("<stmt>"),
111
    ...         graph.get_node("<var>"),
112
    ...         nodes_filter=None,
113
    ...     )
114
    ... )
115

116
    >>> resulting_tree, path_to_end = graph_path_to_tree(graph_path, graph)
117

118
    >>> graph.tree_is_valid(resulting_tree)
119
    True
120

121
    >>> print(resulting_tree)
122
    <var> := <rhs> ; <stmt>
123

124
    >>> print(resulting_tree.value)
125
    <stmt>
126

127
    >>> print(resulting_tree.get_subtree(path_to_end))
128
    <var>
129

130
    Let us have a look at the path connecting a :code:`<assgn>` node with a
131
    :code:`<var>` node on the right-hand side:
132

133
    >>> graph_path: Tuple[Node, ...] = tuple(
134
    ...     tuple(
135
    ...         graph.all_paths(
136
    ...             graph.get_node("<assgn>"),
137
    ...             {graph.get_node("<var>")}
138
    ...         )
139
    ...     )[1]
140
    ... )
141

142
    >>> resulting_tree, path_to_end = graph_path_to_tree(graph_path, graph)
143

144
    >>> graph.tree_is_valid(resulting_tree)
145
    True
146

147
    >>> print(resulting_tree)
148
    <var> := <var>
149

150
    >>> print(resulting_tree.value)
151
    <assgn>
152

153
    >>> print(path_to_end)
154
    (2, 0)
155

156
    >>> print(resulting_tree.get_subtree(path_to_end))
157
    <var>
158

159
    :param graph_path: The path in the grammar graph to turn into a tree.
160
    :param graph: The grammar graph.
161
    :param maybe_canonical_grammar: The canonical version of the grammar represented
162
        by :code:`graph`. If :code:`Nothing`, the canonical grammar is computed
163
        from :code:`graph` (with the according potential overhead).
164
    :return: A pair of a derivation tree and a path in that tree, such that the path
165
        corresponds to the given grammar graph path.
166
    """
167

168
    canonical_grammar = maybe_canonical_grammar.value_or(canonical(graph.grammar))
1✔
169
    start = graph_path[0].symbol
1✔
170
    end = graph_path[-1].symbol
1✔
171

172
    resulting_tree: DerivationTree = DerivationTree(start)
1✔
173
    path_to_end: Path = ()
1✔
174
    current_node: DerivationTree = resulting_tree
1✔
175

176
    for idx_of_symbolic_node_in_graph_path, (symbolic_node, choice_node) in enumerate(
1✔
177
        more_itertools.grouper(graph_path, n=2, incomplete="fill", fillvalue=None)
178
    ):
179
        symbolic_node: NonterminalNode | NonterminalNode
180
        choice_node: ChoiceNode
181

182
        # The index is incremented once for each *pair,* so we must multiply by two
183
        # to obtain the actual index of the symbolic node.
184
        idx_of_symbolic_node_in_graph_path *= 2
1✔
185

186
        assert graph_path[idx_of_symbolic_node_in_graph_path] == symbolic_node
1✔
187
        assert resulting_tree.get_subtree(path_to_end).value == symbolic_node.symbol
1✔
188
        assert current_node.value == symbolic_node.symbol
1✔
189
        assert (
1✔
190
            choice_node is not None
191
            or resulting_tree.get_subtree(path_to_end).value == end
192
        )
193

194
        if not choice_node:
1✔
195
            break
1✔
196

197
        # NOTE: We build upon the convention that choice node symbols are of the form
198
        #       <symbol>-<expansion_idx>, where <expansion_idx> is the index of the
199
        #       expansion in the grammar. This structure should be made explicit by
200
        #       a numeric index in ChoiceNode objects.
201
        expansion_idx = int(choice_node.symbol[choice_node.symbol.rfind("-") + 1 :]) - 1
1✔
202
        expansion = canonical_grammar[symbolic_node.symbol][expansion_idx]
1✔
203

204
        new_children = tuple(
1✔
205
            DerivationTree(symbol, None if is_nonterminal(symbol) else ())
206
            for symbol in expansion
207
        )
208

209
        resulting_tree = resulting_tree.replace_path(
1✔
210
            path_to_end, DerivationTree(current_node.value, new_children)
211
        )
212
        assert graph.tree_is_valid(resulting_tree)
1✔
213

214
        # To determine the next path element, we must choose the index of the next
215
        # node in the graph path in the children of the current choice node.
216
        # TODO: This can fail since the grammar graph library currently does not
217
        #       distinguish several occurrences of the same nonterminal node, as
218
        #       does the original algorithm by Havrikov. Thus, if a nonterminal
219
        #       occurs more than once in an expansion, we might pick the wrong
220
        #       index unless we implement some backtracking or lookahead.
221
        #       We should fix the grammar graph library eventually.
222
        assert len(graph_path) > idx_of_symbolic_node_in_graph_path + 2
1✔
223
        index_in_expansion = choice_node.children.index(
1✔
224
            graph_path[idx_of_symbolic_node_in_graph_path + 2]
225
        )
226
        current_node = new_children[index_in_expansion]
1✔
227

228
        path_to_end += (index_in_expansion,)
1✔
229

230
    assert resulting_tree.value == start
1✔
231
    assert resulting_tree.get_subtree(path_to_end).value == end
1✔
232
    assert (
1✔
233
        not is_nonterminal(end)
234
        or resulting_tree.get_subtree(path_to_end).children is None
235
    )
236

237
    return resulting_tree, path_to_end
1✔
238

239

240
def connect_nonterminals(
1✔
241
    start: str,
242
    end: str,
243
    graph: GrammarGraph,
244
    maybe_canonical_grammar: Maybe[FrozenCanonicalGrammar] = Nothing,
245
) -> Tuple[DerivationTree, Path]:
246
    """
247
    Returns a tuple of a derivation tree and a path, such that the root of the tree
248
    has the value of the symbol :code:`start` and the leaf of the tree at the path
249
    has the value of the symbol :code:`end`. The tree path between those two nodes
250
    is a shortest path between the symbols. If :code:`start` and :code:`end` are
251
    the same symbol, the tree is a single node.
252

253
    Example
254
    -------
255

256
    >>> import string
257
    >>> grammar = frozendict({
258
    ...     "<start>": ("<stmt>",),
259
    ...     "<stmt>": ("<assgn> ; <stmt>", "<assgn>"),
260
    ...     "<assgn>": ("<var> := <rhs>",),
261
    ...     "<rhs>": ("<var>", "<digit>"),
262
    ...     "<var>": tuple(string.ascii_lowercase),
263
    ...     "<digit>": tuple(string.digits),
264
    ... })
265
    >>> graph = GrammarGraph.from_grammar(grammar)
266

267
    >>> resulting_tree, path_to_end = connect_nonterminals("<stmt>", "<var>", graph)
268

269
    >>> print(resulting_tree)
270
    <var> := <rhs> ; <stmt>
271

272
    >>> print(resulting_tree.value)
273
    <stmt>
274

275
    >>> print(resulting_tree.get_subtree(path_to_end))
276
    <var>
277

278
    We obtain a single node if the start and end symbols are the same:
279

280
    >>> resulting_tree, path_to_end = connect_nonterminals("<stmt>", "<stmt>", graph)
281
    >>> print(resulting_tree)
282
    <stmt>
283

284
    :param start: The start symbol.
285
    :param end: The end symbol.
286
    :param graph: The grammar graph.
287
    :param maybe_canonical_grammar: The canonical version of the grammar represented
288
        by :code:`graph`. If :code:`Nothing`, the canonical grammar is computed
289
        from :code:`graph` (with the according potential overhead).
290
    :return: A pair of a derivation tree starting at :code:`start` and a path to a
291
        node in that tree labeled with :code:`end`.
292
    """
293

294
    assert start == end or graph.reachable(
1✔
295
        start, end
296
    ), f"{end} is not reachable from {start}"
297

298
    start_node = graph.get_node(start)
1✔
299
    end_node = graph.get_node(end)
1✔
300

301
    shortest_graph_path: Tuple[Node, ...] = (
1✔
302
        tuple(graph.shortest_path(start_node, end_node, nodes_filter=None))
303
        if start != end
304
        else ()
305
    )
306

307
    if not shortest_graph_path:
1✔
308
        # Start and end symbols are the same, so we return a trivial connecting tree.
309
        return DerivationTree(start, None), ()
1✔
310

311
    return graph_path_to_tree(shortest_graph_path, graph, maybe_canonical_grammar)
1✔
312

313

314
def insert_tree_into_hole(
1✔
315
    original_tree: DerivationTree,
316
    tree_to_insert: DerivationTree,
317
    graph: GrammarGraph,
318
    maybe_canonical_grammar: Maybe[FrozenCanonicalGrammar] = Nothing,
319
) -> Iterator[Tuple[DerivationTree, Path]]:
320
    """
321
    This tree insertion method inserts :code:`tree_to_insert` into :code:`original_tree`
322
    by using an existing "hole" in :code:`original_tree`, i.e., an open leaf node.
323
    If possible, we append the children of :code:`tree_to_insert` to :code:`original_tree`;
324
    otherwise, we generate a suitable connecting tree.
325

326
    Example
327
    -------
328

329
    >>> import string
330
    >>> grammar = frozendict({
331
    ...     "<start>": ("<stmt>",),
332
    ...     "<stmt>": ("<assgn> ; <stmt>", "<assgn>"),
333
    ...     "<assgn>": ("<var> := <rhs>",),
334
    ...     "<rhs>": ("<var>", "<digit>"),
335
    ...     "<var>": tuple(string.ascii_lowercase),
336
    ...     "<digit>": tuple(string.digits),
337
    ... })
338
    >>> graph = GrammarGraph.from_grammar(grammar)
339

340
    >>> pre_original_inp = 'a := 1 ; b := x ; c := 3'
341

342
    >>> from isla.parser import EarleyParser
343
    >>> pre_original_tree = DerivationTree.from_parse_tree(
344
    ...     next(EarleyParser(grammar).parse(pre_original_inp))
345
    ... )
346
    >>> original_tree = pre_original_tree.replace_path((0, 0), DerivationTree("<assgn>", None))
347
    >>> print(original_tree)
348
    <assgn> ; b := x ; c := 3
349

350
    >>> declaration = DerivationTree.from_parse_tree(
351
    ...     ("<assgn>", [
352
    ...         ("<var>", [("x", [])]),
353
    ...         (" := ", []),
354
    ...         ("<rhs>", None),
355
    ...     ])
356
    ... )
357

358
    >>> print(deep_str(list(insert_tree_into_hole(
359
    ...     original_tree, declaration, graph
360
    ... ))))
361
    [(x := <rhs> ; b := x ; c := 3, (0, 0))]
362

363
    In the following case, we need a connecting tree:
364

365
    >>> original_tree = pre_original_tree.replace_path((0, 2, 2), DerivationTree("<stmt>", None))
366
    >>> print(original_tree)
367
    a := 1 ; b := x ; <stmt>
368

369
    >>> declaration = DerivationTree.from_parse_tree(
370
    ...     ("<assgn>", [
371
    ...         ("<var>", [("x", [])]),
372
    ...         (" := ", []),
373
    ...         ("<rhs>", None),
374
    ...     ])
375
    ... )
376

377
    If we considered the shortest path only, we would exclusively obtain
378
    the first one of the results displayed below, which is not minimal.
379

380
    >>> for result in insert_tree_into_hole(
381
    ...     original_tree, declaration, graph
382
    ... ):
383
    ...     print(deep_str(result))
384
    (a := 1 ; b := x ; x := <rhs> ; <stmt>, (0, 2, 2, 0))
385
    (a := 1 ; b := x ; x := <rhs>, (0, 2, 2, 0))
386

387
    :param original_tree: The original tree into which to insert :code:`tree_to_insert`.
388
    :param tree_to_insert: The tree to insert.
389
    :param graph: The graph of the underlying reference grammar.
390
    :param maybe_canonical_grammar: The canonical version of the grammar represented
391
        by :code:`graph`. If :code:`Nothing`, the canonical grammar is computed
392
        from :code:`graph` (with the according potential overhead).
393
    :return: Pairs of derivation trees resulting from the insertion and paths pointing
394
        to the position of the inserted tree in those resulting trees.
395
    """
396

397
    for path_to_hole, hole in original_tree.open_leaves():
1✔
398
        if hole.value == tree_to_insert.value:
1✔
399
            result = original_tree.replace_path(
1✔
400
                path_to_hole,
401
                DerivationTree(tree_to_insert.value, tree_to_insert.children, hole.id),
402
            )
403

404
            # The root of the inserted tree is "lost" in the result, so we set
405
            # `paths_to_ignore_in_inserted_tree` correspondingly.
406
            validate_insertion_result(
1✔
407
                graph, original_tree, tree_to_insert, result, ((),)
408
            )
409
            yield result, path_to_hole
1✔
410

411
        if graph.reachable(hole.value, tree_to_insert.value):
1✔
412
            # We consider all paths and not just the shortest one, since we want to
413
            # find all possible insertions. This is necessary since the shortest path
414
            # might not be the one that we want to use for the insertion (see the
415
            # example in the docstring).
416
            for graph_path in graph.all_paths(
1✔
417
                graph.get_node(hole.value), {graph.get_node(tree_to_insert.value)}
418
            ):
419
                (
1✔
420
                    connecting_tree,
421
                    path_in_connecting_tree_to_tree_to_insert,
422
                ) = graph_path_to_tree(
423
                    tuple(graph_path), graph, maybe_canonical_grammar
424
                )
425

426
                connecting_tree = DerivationTree(
1✔
427
                    hole.value,
428
                    connecting_tree.children,
429
                    hole.id,
430
                )
431

432
                connecting_tree = connecting_tree.replace_path(
1✔
433
                    path_in_connecting_tree_to_tree_to_insert, tree_to_insert
434
                )
435

436
                result = original_tree.replace_path(path_to_hole, connecting_tree)
1✔
437
                path_to_tree_to_insert = (
1✔
438
                    path_to_hole + path_in_connecting_tree_to_tree_to_insert
439
                )
440

441
                validate_insertion_result(graph, original_tree, tree_to_insert, result)
1✔
442
                yield result, path_to_tree_to_insert
1✔
443

444

445
def insert_tree_by_self_embedding(
1✔
446
    original_tree: DerivationTree,
447
    tree_to_insert: DerivationTree,
448
    graph: GrammarGraph,
449
    maybe_canonical_grammar: Maybe[FrozenCanonicalGrammar] = Nothing,
450
) -> Iterator[Tuple[DerivationTree, Path]]:
451
    """
452
    The self embedding method embeds :code:`tree_to_insert` into :code:`original_tree`
453
    by finding a node in :code:`original_tree` whose label can reach itself in the
454
    grammar graph in such a way that :code:`tree_to_insert` can be embedded in the
455
    such expanded tree.
456

457
    More precisely, we aim to find a node :code:`node` in :code:`original_tree` such
458
    that there is a connection `|*` enabling the following outcome::
459

460
                <start>
461
               /   |   \
462
                 node'
463
             /    |*    \
464
                node  tree_to_insert
465

466
    where :code:`node'` is a new node with the same label as :code:`node`.
467

468
    All identifiers from the passed trees are preserved in the results.
469

470
    Example
471
    -------
472

473
    >>> import string
474
    >>> grammar = frozendict({
475
    ...     "<start>": ("<stmt>",),
476
    ...     "<stmt>": ("<assgn> ; <stmt>", "<assgn>"),
477
    ...     "<assgn>": ("<var> := <rhs>",),
478
    ...     "<rhs>": ("<var>", "<digit>"),
479
    ...     "<var>": tuple(string.ascii_lowercase),
480
    ...     "<digit>": tuple(string.digits),
481
    ... })
482
    >>> graph = GrammarGraph.from_grammar(grammar)
483

484
    >>> original_inp = 'a := 1 ; b := x ; c := 3'
485

486
    >>> from isla.parser import EarleyParser
487
    >>> original_tree = DerivationTree.from_parse_tree(
488
    ...     next(EarleyParser(grammar).parse(original_inp))
489
    ... )
490

491
    >>> declaration = DerivationTree.from_parse_tree(
492
    ...     ("<assgn>", [
493
    ...         ("<var>", [("x", [])]),
494
    ...         (" := ", []),
495
    ...         ("<rhs>", None),
496
    ...     ])
497
    ... )
498

499
    >>> for result in insert_tree_by_self_embedding(
500
    ...     original_tree, declaration, graph
501
    ... ):
502
    ...    print(deep_str(result))
503
    (x := <rhs> ; a := 1 ; b := x ; c := 3, (0, 0))
504
    (a := 1 ; x := <rhs> ; b := x ; c := 3, (0, 2, 0))
505
    (a := 1 ; b := x ; x := <rhs> ; c := 3, (0, 2, 2, 0))
506

507
    >>> new_variable_node = DerivationTree("<var>")
508
    >>> for result in insert_tree_by_self_embedding(
509
    ...     original_tree, new_variable_node, graph
510
    ... ):
511
    ...    print(deep_str(result))
512
    (<var> := <rhs> ; a := 1 ; b := x ; c := 3, (0, 0, 0))
513
    (<var> := <var> ; a := 1 ; b := x ; c := 3, (0, 0, 2, 0))
514
    (a := 1 ; <var> := <rhs> ; b := x ; c := 3, (0, 2, 0, 0))
515
    (a := 1 ; <var> := <var> ; b := x ; c := 3, (0, 2, 0, 2, 0))
516
    (a := 1 ; b := x ; <var> := <rhs> ; c := 3, (0, 2, 2, 0, 0))
517
    (a := 1 ; b := x ; <var> := <var> ; c := 3, (0, 2, 2, 0, 2, 0))
518

519
    :param original_tree: The original tree into which to insert :code:`tree_to_insert`
520
        using the self embedding method.
521
    :param tree_to_insert: The tree to insert.
522
    :param graph: The graph of the underlying reference grammar.
523
    :param maybe_canonical_grammar: The canonical version of the grammar represented
524
        by :code:`graph`. If :code:`Nothing`, the canonical grammar is computed
525
        from :code:`graph` (with the according potential overhead).
526
    :return: Pairs of derivation trees resulting from the insertion and paths pointing
527
        to the position of the inserted tree in those resulting trees.
528
    """
529

530
    for node_path, node in original_tree.paths():
1✔
531
        if not is_nonterminal(node.value):
1✔
532
            continue
1✔
533

534
        graph_node = graph.get_node(node.value)
1✔
535
        for graph_path in graph.all_paths(graph_node, {graph_node}):
1✔
536
            tree_from_path, path_to_final_node = graph_path_to_tree(
1✔
537
                tuple(graph_path), graph, maybe_canonical_grammar
538
            )
539

540
            for (
1✔
541
                path_in_tree_from_path,
542
                tree_from_path_leaf,
543
            ) in tree_from_path.open_leaves():
544
                if path_in_tree_from_path == path_to_final_node:
1✔
545
                    continue
1✔
546

547
                if tree_from_path_leaf.value == tree_to_insert.value:
1✔
548
                    # We can use this tree to insert `tree_to_insert`.
549
                    new_subtree = tree_from_path.replace_path(
1✔
550
                        path_in_tree_from_path, tree_to_insert
551
                    ).replace_path(path_to_final_node, node)
552

553
                    result_tree = original_tree.replace_path(node_path, new_subtree)
1✔
554
                    path_to_inserted_tree_in_result = node_path + path_in_tree_from_path
1✔
555

556
                    validate_insertion_result(
1✔
557
                        graph, original_tree, tree_to_insert, result_tree
558
                    )
559
                    yield result_tree, path_to_inserted_tree_in_result
1✔
560

561
                elif graph.reachable(tree_from_path_leaf.value, tree_to_insert.value):
1✔
562
                    # We must use a connecting tree to insert `tree_to_insert`.
563
                    for connecting_graph_path in graph.all_paths(
1✔
564
                        graph.get_node(tree_from_path_leaf.value),
565
                        {graph.get_node(tree_to_insert.value)},
566
                    ):
567
                        (
1✔
568
                            connecting_tree_from_path,
569
                            path_to_final_node_in_connecting_tree,
570
                        ) = graph_path_to_tree(
571
                            tuple(connecting_graph_path), graph, maybe_canonical_grammar
572
                        )
573

574
                        instantiated_connecting_tree = (
1✔
575
                            connecting_tree_from_path.replace_path(
576
                                path_to_final_node_in_connecting_tree, tree_to_insert
577
                            )
578
                        )
579

580
                        new_subtree = tree_from_path.replace_path(
1✔
581
                            path_in_tree_from_path, instantiated_connecting_tree
582
                        ).replace_path(path_to_final_node, node)
583

584
                        result_tree = original_tree.replace_path(node_path, new_subtree)
1✔
585
                        path_to_inserted_tree_in_result = (
1✔
586
                            node_path
587
                            + path_in_tree_from_path
588
                            + path_to_final_node_in_connecting_tree
589
                        )
590

591
                        validate_insertion_result(
1✔
592
                            graph, original_tree, tree_to_insert, result_tree
593
                        )
594

595
                        yield result_tree, path_to_inserted_tree_in_result
1✔
596

597

598
def insert_tree_by_reverse_embedding(
1✔
599
    original_tree: DerivationTree,
600
    tree_to_insert: DerivationTree,
601
    graph: GrammarGraph,
602
    maybe_canonical_grammar: Maybe[FrozenCanonicalGrammar] = Nothing,
603
) -> Iterator[Tuple[DerivationTree, Path]]:
604
    """
605
    The reverse embedding method attempts to
606

607
    1. Find the first node in :code:`original_tree` that fits into a hole in
608
       :code:`tree_to_insert`. The tree above that node must be linear, i.e.,
609
       each node most have exactly one child.
610
    2. Adds the subtree of that node in :code:`original_tree` as a subtree of
611
       the hole in :code:`tree_to_insert`.
612
    3. Adds the resulting tree to the part of :code:`original_tree` that is above
613
       the node that was found in step 1.
614

615
    If there are any collisions in step 3, e.g., because both trees start with the
616
    :code:`<start>` nonterminal, the nodes from :code:`original_tree` are
617
    preferred.
618

619
    More precisely, we aim to find nodes matching the following setup::
620

621
           original_tree:        tree_to_insert:
622

623
              <start>               tti_root
624
                 |                 /   |   \
625
              parent                 hole
626
         orig_tree_subtree
627

628
    such that the following tree is grammatically valid (for a suitable
629
    instantiation of the new connecting parts `|*` between (1) parent and tti_root and
630
    (2) hole and orig_tree_subtree)::
631

632
             <start>
633
                |
634
             parent
635
                |*
636
            tti_root
637
            /   |  \
638
              hole
639
                |*
640
         orig_tree_subtree
641

642
    All nodes of the original tree must be preserved. If necessary, the
643
    root node of tree_to_insert is removed and replace by an existing
644
    node with the same label from the original tree.
645

646
    Example
647
    -------
648

649
    >>> from frozendict import frozendict
650
    >>> grammar = frozendict(
651
    ...     {
652
    ...         "<start>": ("<xml-tree>",),
653
    ...         "<xml-tree>": ("", "<xml-open-tag><xml-tree><xml-close-tag>"),
654
    ...         "<xml-open-tag>": ("<langle><id><attrs>>",),
655
    ...         "<xml-close-tag>": ("<langle>/<id>>",),
656
    ...         "<attrs>": ("", " <attr><attrs>"),
657
    ...         "<attr>": ('<id>="XXX"',),
658
    ...         "<id>": ("<letter>:<letter>",),
659
    ...         "<letter>": ("a", "b", "c", "x"),
660
    ...         "<langle>": ("<",),
661
    ...     }
662
    ... )
663

664

665
    This is our original input:
666

667
    >>> original_inp = '<b:c b:c="XXX" x:b="XXX"></b:x>'
668

669
    >>> from isla.parser import EarleyParser
670
    >>> original_tree = DerivationTree.from_parse_tree(
671
    ...     next(EarleyParser(grammar).parse(original_inp))
672
    ... )
673

674
    The context node to insert is :code:`<<id> <attr>><xml-tree></<id>>`:
675

676
    >>> tree_to_insert = DerivationTree.from_parse_tree(
677
    ...     (
678
    ...         "<xml-tree>",
679
    ...         [
680
    ...             (
681
    ...                 "<xml-open-tag>",
682
    ...                 [
683
    ...                     ("<langle>", [("<", [])]),
684
    ...                     ("<id>", None),
685
    ...                     ("<attrs>", [(" ", []), ("<attr>", None), ("<attrs>", [])]),
686
    ...                     (">", []),
687
    ...                 ],
688
    ...             ),
689
    ...             ("<xml-tree>", None),
690
    ...             (
691
    ...                 "<xml-close-tag>",
692
    ...                 [
693
    ...                     ("<langle>", [("<", [])]),
694
    ...                     ("/", []),
695
    ...                     ("<id>", None),
696
    ...                     (">", []),
697
    ...                 ],
698
    ...             ),
699
    ...         ],
700
    ...     )
701
    ... )
702

703
    There is one possible result of the insertion:
704

705
    >>> graph = GrammarGraph.from_grammar(grammar)
706
    >>> deep_str(list(insert_tree_by_reverse_embedding(
707
    ...     original_tree, tree_to_insert, graph))
708
    ... )
709
    '[(<<id> <attr>><b:c b:c="XXX" x:b="XXX"></b:x></<id>>, (0,))]'
710

711
    :param original_tree: The original tree into which to insert :code:`tree_to_insert`
712
        using the reverse embedding method.
713
    :param tree_to_insert: The tree to insert.
714
    :param graph: The graph of the underlying reference grammar.
715
    :param maybe_canonical_grammar: The canonical version of the grammar represented
716
        by :code:`graph`. If :code:`Nothing`, the canonical grammar is computed
717
        from :code:`graph` (with the according potential overhead).
718
    :return: Pairs of derivation trees resulting from the insertion and paths pointing
719
        to the position of the inserted tree in those resulting trees.
720
    """
721

722
    assert original_tree.value == tree_to_insert.value or graph.reachable(
1✔
723
        original_tree.value, tree_to_insert.value
724
    )
725

726
    tree_to_insert_holes: frozendict[Path, DerivationTree] = frozendict(
1✔
727
        tree_to_insert.open_leaves()
728
    )
729

730
    for orig_tree_path, orig_tree_subtree in original_tree.paths():
1✔
731
        parent_path: Path = () if not orig_tree_path else orig_tree_path[:-1]
1✔
732
        parent_node: DerivationTree = original_tree.get_subtree(parent_path)
1✔
733

734
        if (
1✔
735
            not is_nonterminal(orig_tree_subtree.value)
736
            or parent_node.value != tree_to_insert.value
737
            and not graph.reachable(parent_node.value, tree_to_insert.value)
738
        ):
NEW
739
            continue
×
740

741
        matching_holes = (
1✔
742
            (path, hole)
743
            for path, hole in tree_to_insert_holes.items()
744
            if (
745
                hole.value == orig_tree_subtree.value
746
                or graph.reachable(hole.value, orig_tree_subtree.value)
747
            )
748
        )
749

750
        for path_to_hole, hole in matching_holes:
1✔
751
            # Tree from `parent` to `tti_root`.
752
            (
1✔
753
                connecting_tree_1,
754
                path_in_connecting_tree_1_to_tti_root,
755
            ) = connect_nonterminals(
756
                parent_node.value, tree_to_insert.value, graph, maybe_canonical_grammar
757
            )
758

759
            # Retain the original identifier of `parent`.
760
            connecting_tree_1 = DerivationTree(
1✔
761
                parent_node.value,
762
                connecting_tree_1.children,
763
                parent_node.id,
764
            )
765

766
            # Tree from `hole` to `orig_tree_subtree`.
767
            (
1✔
768
                connecting_tree_2,
769
                path_in_connecting_tree_2_to_orig_tree_subtree,
770
            ) = connect_nonterminals(
771
                hole.value, orig_tree_subtree.value, graph, maybe_canonical_grammar
772
            )
773

774
            # Retain the original identifier of `hole`.
775
            connecting_tree_2 = DerivationTree(
1✔
776
                connecting_tree_2.value,
777
                connecting_tree_2.children,
778
                hole.id,
779
            )
780

781
            #       <start>
782
            #          |
783
            #       parent
784
            #  orig_tree_subtree
785
            result = original_tree
1✔
786

787
            #          <start>
788
            #             |
789
            #          parent
790
            #             |*
791
            #         tti_root*
792
            result = result.replace_path(parent_path, connecting_tree_1)
1✔
793

794
            #          <start>
795
            #             |
796
            #          parent
797
            #             |*
798
            #         tti_root
799
            #         /   |  \
800
            #           hole
801
            result = result.replace_path(
1✔
802
                parent_path + path_in_connecting_tree_1_to_tti_root, tree_to_insert
803
            )
804

805
            #          <start>
806
            #             |
807
            #          parent
808
            #             |*
809
            #         tti_root
810
            #         /   |  \
811
            #           hole
812
            #             |*
813
            #      orig_tree_subtree*
814
            result = result.replace_path(
1✔
815
                parent_path + path_in_connecting_tree_1_to_tti_root + path_to_hole,
816
                connecting_tree_2,
817
            )
818

819
            #          <start>
820
            #             |
821
            #          parent
822
            #             |*
823
            #         tti_root
824
            #         /   |  \
825
            #           hole
826
            #             |*
827
            #      orig_tree_subtree
828
            result = result.replace_path(
1✔
829
                parent_path
830
                + path_in_connecting_tree_1_to_tti_root
831
                + path_to_hole
832
                + path_in_connecting_tree_2_to_orig_tree_subtree,
833
                orig_tree_subtree,
834
            )
835

836
            path_to_inserted_tree = parent_path + path_in_connecting_tree_1_to_tti_root
1✔
837

838
            # In the validation, we permit the path to the hole in the inserted
839
            # tree to not appear in the result if the second connecting tree is
840
            # trivial. In that case, the hole is replaced by `orig_tree_subtree`.
841
            validate_insertion_result(
1✔
842
                graph,
843
                original_tree,
844
                tree_to_insert,
845
                result,
846
                (path_to_hole,) if len(connecting_tree_2) == 1 else (),
847
            )
848
            yield result, path_to_inserted_tree
1✔
849

850
        if len(orig_tree_subtree.children or []) > 1:
1✔
851
            break
1✔
852

853

854
def insert_tree(
1✔
855
    original_tree: DerivationTree,
856
    tree_to_insert: DerivationTree,
857
    graph: GrammarGraph,
858
    maybe_canonical_grammar: Maybe[FrozenCanonicalGrammar] = Nothing,
859
) -> Iterator[Tuple[DerivationTree, Path]]:
860
    """
861
    This function inserts :code:`tree_to_insert` into :code:`original_tree` in three
862
    possible ways using the following functions:
863

864
    1. :func:`~isla.tree_insertion.insert_tree_into_hole`
865
    2. :func:`~isla.tree_insertion.insert_tree_by_self_embedding`
866
    3. :func:`~isla.tree_insertion.insert_tree_by_reverse_embedding`
867

868
    Example
869
    -------
870

871
    Consider the following, simplified XML grammar:
872

873
    >>> from frozendict import frozendict
874
    >>> grammar = frozendict(
875
    ...     {
876
    ...         "<start>": ("<xml-tree>",),
877
    ...         "<xml-tree>": ("", "<xml-open-tag><xml-tree><xml-close-tag>"),
878
    ...         "<xml-open-tag>": ("<langle><id><attrs>>",),
879
    ...         "<xml-close-tag>": ("<langle>/<id>>",),
880
    ...         "<attrs>": ("", " <attr><attrs>"),
881
    ...         "<attr>": ('<id>="XXX"',),
882
    ...         "<id>": ("<letter>:<letter>",),
883
    ...         "<letter>": ("a", "b", "c", "x"),
884
    ...         "<langle>": ("<",),
885
    ...     }
886
    ... )
887

888
    We want to embed the following input into an additional context:
889

890
    >>> original_inp = '<b:c b:c="XXX" x:b="XXX"></b:x>'
891

892
    >>> from isla.parser import EarleyParser
893
    >>> original_tree = DerivationTree.from_parse_tree(
894
    ...     next(EarleyParser(grammar).parse(original_inp))
895
    ... )
896

897
    The context to insert is the abstract input :code:`<<id> <attr>><xml-tree></<id>>`:
898

899
    >>> tree_to_insert = DerivationTree.from_parse_tree(
900
    ...     (
901
    ...         "<xml-tree>",
902
    ...         [
903
    ...             (
904
    ...                 "<xml-open-tag>",
905
    ...                 [
906
    ...                     ("<langle>", [("<", [])]),
907
    ...                     ("<id>", None),
908
    ...                     ("<attrs>", [(" ", []), ("<attr>", None), ("<attrs>", [])]),
909
    ...                     (">", []),
910
    ...                 ],
911
    ...             ),
912
    ...             ("<xml-tree>", None),
913
    ...             (
914
    ...                 "<xml-close-tag>",
915
    ...                 [
916
    ...                     ("<langle>", [("<", [])]),
917
    ...                     ("/", []),
918
    ...                     ("<id>", None),
919
    ...                     (">", []),
920
    ...                 ],
921
    ...             ),
922
    ...         ],
923
    ...     )
924
    ... )
925

926
    In this case, the function :func:`~isla.tree_insertion.insert_tree_by_reverse_embedding`
927
    is the only one that can be used to insert the tree:
928

929
    >>> graph = GrammarGraph.from_grammar(grammar)
930
    >>> deep_str(list(insert_tree(original_tree, tree_to_insert, graph)))
931
    '[(<<id> <attr>><b:c b:c="XXX" x:b="XXX"></b:x></<id>>, (0,))]'
932

933
    :param original_tree: The original tree into which to insert :code:`tree_to_insert`.
934
    :param tree_to_insert: The tree to insert.
935
    :param graph: The graph of the underlying reference grammar.
936
    :param maybe_canonical_grammar: The canonical version of the grammar represented
937
        by :code:`graph`. If :code:`Nothing`, the canonical grammar is computed
938
        from :code:`graph` (with the according potential overhead).
939
    :return: Pairs of derivation trees resulting from the insertion and paths pointing
940
        to the position of the inserted tree in those resulting trees.
941
    """
942

943
    maybe_canonical_grammar = maybe_canonical_grammar.lash(
1✔
944
        lambda _: Some(frozen_canonical(graph.grammar))
945
    )
946

947
    iterators = (
1✔
948
        insert_tree_into_hole(
949
            original_tree, tree_to_insert, graph, maybe_canonical_grammar
950
        ),
951
        insert_tree_by_self_embedding(
952
            original_tree, tree_to_insert, graph, maybe_canonical_grammar
953
        ),
954
        insert_tree_by_reverse_embedding(
955
            original_tree, tree_to_insert, graph, maybe_canonical_grammar
956
        ),
957
    )
958

959
    idx = 0
1✔
960
    while iterators:
1✔
961
        try:
1✔
962
            yield next(iterators[idx])
1✔
963
        except StopIteration:
1✔
964
            iterators = iterators[:idx] + iterators[idx + 1 :]
1✔
965

966
        if iterators:
1✔
967
            idx = (idx + 1) % len(iterators)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc