• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

hibtc / cpymad / 3676917429

pending completion
3676917429

push

github-actions

Thomas Gläßle
Add documentation for building on linux+conda

1045 of 1117 relevant lines covered (93.55%)

9.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.44
/src/cpymad/parsing.py
1
"""
2
Implementation of a simple LL(1) parser.
3
"""
4
from copy import deepcopy
10✔
5

6

7
def fix_point(func, x, **kwargs):
10✔
8
    """
9
    Repeatedly apply ``x := f(x, **kwargs)`` until encountering a fix
10
    point, i.e. ``x == f(x)`` and return the resulting fix point ``x``.
11
    """
12
    while True:
6✔
13
        x_new = func(x, **kwargs)
10✔
14
        if x_new == x:
10✔
15
            return x
10✔
16
        else:
17
            x = x_new
10✔
18

19

20
def extend_empty_sets(empty: dict, grammar: dict) -> dict:
10✔
21
    """
22
    Determine which nonterminals of an LL(1) grammar can be empty.
23
    Must be applied repeatedly until converging.
24

25
    :param empty: nonterminal -> bool
26
    :param grammar: nonterminal -> [productions...]
27
    :returns: Extended copy of ``empty``
28
    """
29
    return {
10✔
30
        symbol: empty.get(symbol) or any(
31
            all(empty.get(p) for p in production)
32
            for production in productions
33
        )
34
        for symbol, productions in grammar.items()
35
    }
36

37

38
def extend_firsts_sets(firsts, *, empty, grammar):
10✔
39
    """
40
    Determine the FIRST sets of an LL(1) grammar, i.e. the set of terminals
41
    that a given symbol can start with.
42

43
    Must be applied repeatedly until converging.
44

45
    :param firsts: terminal|nonterminal -> set[terminal]
46
    :param empty: nonterminal -> bool
47
    :param grammar: nonterminal -> [productions...]
48
    :returns: Extended copy of ``firsts``
49
    """
50
    firsts = deepcopy(firsts)
10✔
51
    for symbol, productions in grammar.items():
10✔
52
        for production in productions:
10✔
53
            for i, n in enumerate(production):
10✔
54
                extend_parse_table(symbol, firsts[symbol], {
10✔
55
                    t: production[i + 1:][::-1] + p
56
                    for t, p in firsts[n].items()
57
                })
58
                if not empty[n]:
10✔
59
                    break
10✔
60
    return firsts
10✔
61

62

63
def extend_follow_sets(follow, *, firsts, empty, grammar):
10✔
64
    """
65
    Determine the FOLLOW sets of an LL(1) grammar, i.e. the set of terminals
66
    that a given symbol can be followed by if it may be empty.
67

68
    Must be applied repeatedly until converging.
69

70
    :param follow: terminal|nonterminal -> set[terminal]
71
    :param firsts: terminal|nonterminal -> set[terminal]
72
    :param empty: nonterminal -> bool
73
    :param grammar: nonterminal -> [productions...]
74
    :returns: Extended copy of ``follow``
75
    """
76
    follow = deepcopy(follow)
10✔
77
    for symbol, productions in grammar.items():
10✔
78
        for production in productions:
10✔
79
            for i, p1 in enumerate(production):
10✔
80
                for p2 in production[i + 1:]:
10✔
81
                    follow[p1] |= set(firsts[p2])
10✔
82
                    if not empty[p2]:
10✔
83
                        break
10✔
84
                else:
85
                    follow[p1] |= follow[symbol]
10✔
86
    return follow
10✔
87

88

89
def create_parse_table(terminals, grammar, start):
10✔
90
    """
91
    Create an LL(1) parsing table.
92

93
    :param terminals: list of terminal symbols
94
    :param grammar: nonterminal -> [productions...]
95
    :param start: nonterminal start symbol
96
    :returns: parse table nonterminal -> {terminal -> production}
97
    """
98
    empty = fix_point(extend_empty_sets, {}, grammar=grammar)
10✔
99
    empty.update({t: False for t in terminals})
10✔
100
    table = {**{t: {t: []} for t in terminals}, **{n: {} for n in grammar}}
10✔
101
    table = fix_point(
10✔
102
        extend_firsts_sets,
103
        table,
104
        empty=empty,
105
        grammar=grammar)
106
    follow = {
10✔
107
        **{t: set() for t in terminals},
108
        **{n: set() for n in grammar},
109
    }
110
    follow = fix_point(
10✔
111
        extend_follow_sets, follow,
112
        firsts=table, empty=empty, grammar=grammar)
113

114
    for symbol, terminals in follow.items():
10✔
115
        if empty[symbol]:
10✔
116
            extend_parse_table(symbol, table[symbol], {
10✔
117
                t: None
118
                for t in terminals
119
            })
120

121
    return table
10✔
122

123

124
def extend_parse_table(symbol, old, new):
10✔
125
    """
126
    Merge parse tables ``old`` and ``new`` without silently overriding
127
    duplicate entries. Raises a ``ValueError`` if ``new`` contains entries
128
    that were defined differently in ``old``.
129
    """
130
    for key in old.keys() & new.keys():
10✔
131
        if old[key] != new[key]:
10✔
132
            raise ValueError((
×
133
                "Grammar is ambiguous. Duplicate entry "
134
                "in parse table: <{}, {}> -> {} or {}"
135
            ).format(symbol, key, old[key], new[key]))
136
    old.update(new)
10✔
137

138

139
class Parser:
10✔
140

141
    """
142
    LL(1) syntax checker.
143

144
    :param terminals: list of terminal symbols
145
    :param grammar: nonterminal -> [productions...]
146
    :param start: nonterminal start symbol
147
    """
148

149
    def __init__(self, terminals, grammar, start):
10✔
150
        self.table = table = create_parse_table(
10✔
151
            terminals, grammar, start)
152
        # precompule rules lookup:
153
        self.rules = {symbol: {} for symbol in table}
10✔
154
        for symbol, rules in table.items():
10✔
155
            self.rules[symbol].update({
10✔
156
                t: p and [self.rules[n] for n in p]
157
                for t, p in rules.items()
158
            })
159
        self.start = self.rules[start]
10✔
160

161
    def parse(self, tokens):
10✔
162
        """
163
        Verifies the grammar.
164

165
        :param list tokens: list of tokens (terminals) in input order
166
        :returns: nothing if successful
167
        :raises: ValueError
168
        """
169
        tokens = list(reversed(tokens))
10✔
170
        stack = [self.start]
10✔
171
        try:
10✔
172
            while stack:
10✔
173
                token = tokens[-1]
10✔
174
                rules = stack.pop()
10✔
175
                more = rules[token.type]
10✔
176
                if more is not None:
10✔
177
                    stack.extend(more)
10✔
178
                    tokens.pop()
10✔
179
        except KeyError:
10✔
180
            raise ValueError(
10✔
181
                ("Unexpected {} in:\n"
182
                 "    {!r}\n"
183
                 "     ").format(token.type, token.expr)
184
                + ' ' * token.start
185
                + '^' * max(token.length, 1)
186
            ) from None
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc