• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

psf / black / 6457704576

09 Oct 2023 02:02PM UTC coverage: 96.516% (-0.09%) from 96.607%
6457704576

push

github

web-flow
Drop support for parsing Python 2 (#3933)

3 of 3 new or added lines in 1 file covered. (100.0%)

6 existing lines in 1 file now uncovered.

6677 of 6918 relevant lines covered (96.52%)

3.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.09
/src/black/parsing.py
1
"""
2
Parse Python code and perform AST validation.
3
"""
4
import ast
4✔
5
import sys
4✔
6
from typing import Iterable, Iterator, List, Set, Tuple
4✔
7

8
from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
4✔
9
from black.nodes import syms
4✔
10
from blib2to3 import pygram
4✔
11
from blib2to3.pgen2 import driver
4✔
12
from blib2to3.pgen2.grammar import Grammar
4✔
13
from blib2to3.pgen2.parse import ParseError
4✔
14
from blib2to3.pgen2.tokenize import TokenError
4✔
15
from blib2to3.pytree import Leaf, Node
4✔
16

17

18
class InvalidInput(ValueError):
4✔
19
    """Raised when input source code fails all parse attempts."""
20

21

22
def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
4✔
23
    if not target_versions:
4✔
24
        # No target_version specified, so try all grammars.
25
        return [
4✔
26
            # Python 3.7-3.9
27
            pygram.python_grammar_async_keywords,
28
            # Python 3.0-3.6
29
            pygram.python_grammar,
30
            # Python 3.10+
31
            pygram.python_grammar_soft_keywords,
32
        ]
33

34
    grammars = []
4✔
35
    # If we have to parse both, try to parse async as a keyword first
36
    if not supports_feature(
4✔
37
        target_versions, Feature.ASYNC_IDENTIFIERS
38
    ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
39
        # Python 3.7-3.9
40
        grammars.append(pygram.python_grammar_async_keywords)
4✔
41
    if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
4✔
42
        # Python 3.0-3.6
43
        grammars.append(pygram.python_grammar)
4✔
44
    if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
4✔
45
        # Python 3.10+
46
        grammars.append(pygram.python_grammar_soft_keywords)
4✔
47

48
    # At least one of the above branches must have been taken, because every Python
49
    # version has exactly one of the two 'ASYNC_*' flags
50
    return grammars
4✔
51

52

53
def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
4✔
54
    """Given a string with source, return the lib2to3 Node."""
55
    if not src_txt.endswith("\n"):
4✔
56
        src_txt += "\n"
4✔
57

58
    grammars = get_grammars(set(target_versions))
4✔
59
    errors = {}
4✔
60
    for grammar in grammars:
4✔
61
        drv = driver.Driver(grammar)
4✔
62
        try:
4✔
63
            result = drv.parse_string(src_txt, True)
4✔
64
            break
4✔
65

66
        except ParseError as pe:
4✔
67
            lineno, column = pe.context[1]
4✔
68
            lines = src_txt.splitlines()
4✔
69
            try:
4✔
70
                faulty_line = lines[lineno - 1]
4✔
71
            except IndexError:
×
72
                faulty_line = "<line number missing in source>"
×
73
            errors[grammar.version] = InvalidInput(
4✔
74
                f"Cannot parse: {lineno}:{column}: {faulty_line}"
75
            )
76

77
        except TokenError as te:
4✔
78
            # In edge cases these are raised; and typically don't have a "faulty_line".
79
            lineno, column = te.args[1]
4✔
80
            errors[grammar.version] = InvalidInput(
4✔
81
                f"Cannot parse: {lineno}:{column}: {te.args[0]}"
82
            )
83

84
    else:
85
        # Choose the latest version when raising the actual parsing error.
86
        assert len(errors) >= 1
4✔
87
        exc = errors[max(errors)]
4✔
88
        raise exc from None
4✔
89

90
    if isinstance(result, Leaf):
4✔
91
        result = Node(syms.file_input, [result])
4✔
92
    return result
4✔
93

94

95
def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
4✔
UNCOV
96
    drv = driver.Driver(grammar)
×
UNCOV
97
    try:
×
UNCOV
98
        drv.parse_string(src_txt, True)
×
UNCOV
99
    except (ParseError, TokenError, IndentationError):
×
UNCOV
100
        return False
×
101
    else:
UNCOV
102
        return True
×
103

104

105
def lib2to3_unparse(node: Node) -> str:
4✔
106
    """Given a lib2to3 node, return its string representation."""
107
    code = str(node)
×
108
    return code
×
109

110

111
def parse_single_version(
4✔
112
    src: str, version: Tuple[int, int], *, type_comments: bool
113
) -> ast.AST:
114
    filename = "<unknown>"
4✔
115
    return ast.parse(
4✔
116
        src, filename, feature_version=version, type_comments=type_comments
117
    )
118

119

120
def parse_ast(src: str) -> ast.AST:
4✔
121
    # TODO: support Python 4+ ;)
122
    versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
4✔
123

124
    first_error = ""
4✔
125
    for version in sorted(versions, reverse=True):
4✔
126
        try:
4✔
127
            return parse_single_version(src, version, type_comments=True)
4✔
128
        except SyntaxError as e:
4✔
129
            if not first_error:
4✔
130
                first_error = str(e)
4✔
131

132
    # Try to parse without type comments
133
    for version in sorted(versions, reverse=True):
4✔
134
        try:
4✔
135
            return parse_single_version(src, version, type_comments=False)
4✔
136
        except SyntaxError:
4✔
137
            pass
4✔
138

139
    raise SyntaxError(first_error)
4✔
140

141

142
def _normalize(lineend: str, value: str) -> str:
4✔
143
    # To normalize, we strip any leading and trailing space from
144
    # each line...
145
    stripped: List[str] = [i.strip() for i in value.splitlines()]
4✔
146
    normalized = lineend.join(stripped)
4✔
147
    # ...and remove any blank lines at the beginning and end of
148
    # the whole string
149
    return normalized.strip()
4✔
150

151

152
def stringify_ast(node: ast.AST, depth: int = 0) -> Iterator[str]:
4✔
153
    """Simple visitor generating strings to compare ASTs by content."""
154

155
    if (
4✔
156
        isinstance(node, ast.Constant)
157
        and isinstance(node.value, str)
158
        and node.kind == "u"
159
    ):
160
        # It's a quirk of history that we strip the u prefix over here. We used to
161
        # rewrite the AST nodes for Python version compatibility and we never copied
162
        # over the kind
163
        node.kind = None
4✔
164

165
    yield f"{'  ' * depth}{node.__class__.__name__}("
4✔
166

167
    for field in sorted(node._fields):  # noqa: F402
4✔
168
        # TypeIgnore has only one field 'lineno' which breaks this comparison
169
        if isinstance(node, ast.TypeIgnore):
4✔
170
            break
4✔
171

172
        try:
4✔
173
            value: object = getattr(node, field)
4✔
174
        except AttributeError:
×
175
            continue
×
176

177
        yield f"{'  ' * (depth+1)}{field}="
4✔
178

179
        if isinstance(value, list):
4✔
180
            for item in value:
4✔
181
                # Ignore nested tuples within del statements, because we may insert
182
                # parentheses and they change the AST.
183
                if (
4✔
184
                    field == "targets"
185
                    and isinstance(node, ast.Delete)
186
                    and isinstance(item, ast.Tuple)
187
                ):
188
                    for elt in item.elts:
4✔
189
                        yield from stringify_ast(elt, depth + 2)
4✔
190

191
                elif isinstance(item, ast.AST):
4✔
192
                    yield from stringify_ast(item, depth + 2)
4✔
193

194
        elif isinstance(value, ast.AST):
4✔
195
            yield from stringify_ast(value, depth + 2)
4✔
196

197
        else:
198
            normalized: object
199
            if (
4✔
200
                isinstance(node, ast.Constant)
201
                and field == "value"
202
                and isinstance(value, str)
203
            ):
204
                # Constant strings may be indented across newlines, if they are
205
                # docstrings; fold spaces after newlines when comparing. Similarly,
206
                # trailing and leading space may be removed.
207
                normalized = _normalize("\n", value)
4✔
208
            elif field == "type_comment" and isinstance(value, str):
4✔
209
                # Trailing whitespace in type comments is removed.
210
                normalized = value.rstrip()
4✔
211
            else:
212
                normalized = value
4✔
213
            yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
4✔
214

215
    yield f"{'  ' * depth})  # /{node.__class__.__name__}"
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc