• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

psf / black / 22209455021

20 Feb 2026 02:57AM UTC coverage: 95.685%. First build
22209455021

Pull #4998

github

web-flow
Merge 9846ad2c5 into 55180793f
Pull Request #4998: Aggregate escape counts across all f-string segments in normalize_fstring_quotes

5196 of 5483 branches covered (94.77%)

0 of 4 new or added lines in 1 file covered. (0.0%)

7960 of 8319 relevant lines covered (95.68%)

4.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.28
/src/black/strings.py
1
"""
2
Simple formatting on strings. Further string formatting code is in trans.py.
3
"""
4

5
import re
5✔
6
import sys
5✔
7
from functools import lru_cache
5✔
8
from re import Match, Pattern
5✔
9
from typing import Final
5✔
10

11
from black._width_table import WIDTH_TABLE
5✔
12
from blib2to3.pytree import Leaf
5✔
13

14
STRING_PREFIX_CHARS: Final = "fturbFTURB"  # All possible string prefix characters.
5✔
15
STRING_PREFIX_RE: Final = re.compile(
5✔
16
    r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL
17
)
18
UNICODE_ESCAPE_RE: Final = re.compile(
5✔
19
    r"(?P<backslashes>\\+)(?P<body>"
20
    r"(u(?P<u>[a-fA-F0-9]{4}))"  # Character with 16-bit hex value xxxx
21
    r"|(U(?P<U>[a-fA-F0-9]{8}))"  # Character with 32-bit hex value xxxxxxxx
22
    r"|(x(?P<x>[a-fA-F0-9]{2}))"  # Character with hex value hh
23
    r"|(N\{(?P<N>[a-zA-Z0-9 \-]{2,})\})"  # Character named name in the Unicode database
24
    r")",
25
    re.VERBOSE,
26
)
27

28

29
def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
5✔
30
    """Replace `regex` with `replacement` twice on `original`.
31

32
    This is used by string normalization to perform replaces on
33
    overlapping matches.
34
    """
35
    return regex.sub(replacement, regex.sub(replacement, original))
5✔
36

37

38
def has_triple_quotes(string: str) -> bool:
5✔
39
    """
40
    Returns:
41
        True iff @string starts with three quotation characters.
42
    """
43
    raw_string = string.lstrip(STRING_PREFIX_CHARS)
5✔
44
    return raw_string[:3] in {'"""', "'''"}
5✔
45

46

47
def lines_with_leading_tabs_expanded(s: str) -> list[str]:
5✔
48
    """
49
    Splits string into lines and expands only leading tabs (following the normal
50
    Python rules)
51
    """
52
    lines = []
5✔
53
    for line in s.splitlines():
5✔
54
        stripped_line = line.lstrip()
5✔
55
        if not stripped_line or stripped_line == line:
5✔
56
            lines.append(line)
5✔
57
        else:
58
            prefix_length = len(line) - len(stripped_line)
5✔
59
            prefix = line[:prefix_length].expandtabs()
5✔
60
            lines.append(prefix + stripped_line)
5✔
61
    if s.endswith("\n"):
5✔
62
        lines.append("")
5✔
63
    return lines
5✔
64

65

66
def fix_multiline_docstring(docstring: str, prefix: str) -> str:
5✔
67
    # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
68
    assert docstring, "INTERNAL ERROR: Multiline docstrings cannot be empty"
5✔
69
    lines = lines_with_leading_tabs_expanded(docstring)
5✔
70
    # Determine minimum indentation (first line doesn't count):
71
    indent = sys.maxsize
5✔
72
    for line in lines[1:]:
5✔
73
        stripped = line.lstrip()
5✔
74
        if stripped:
5✔
75
            indent = min(indent, len(line) - len(stripped))
5✔
76
    # Remove indentation (first line is special):
77
    trimmed = [lines[0].strip()]
5✔
78
    if indent < sys.maxsize:
5✔
79
        last_line_idx = len(lines) - 2
5✔
80
        for i, line in enumerate(lines[1:]):
5✔
81
            stripped_line = line[indent:].rstrip()
5✔
82
            if stripped_line or i == last_line_idx:
5✔
83
                trimmed.append(prefix + stripped_line)
5✔
84
            else:
85
                trimmed.append("")
5✔
86
    return "\n".join(trimmed)
5✔
87

88

89
def get_string_prefix(string: str) -> str:
5✔
90
    """
91
    Pre-conditions:
92
        * assert_is_leaf_string(@string)
93

94
    Returns:
95
        @string's prefix (e.g. '', 'r', 'f', or 'rf').
96
    """
97
    assert_is_leaf_string(string)
5✔
98

99
    prefix = []
5✔
100
    for char in string:
5!
101
        if char in STRING_PREFIX_CHARS:
5✔
102
            prefix.append(char)
5✔
103
        else:
104
            break
5✔
105
    return "".join(prefix)
5✔
106

107

108
def assert_is_leaf_string(string: str) -> None:
5✔
109
    """
110
    Checks the pre-condition that @string has the format that you would expect
111
    of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
112
    token.STRING`. A more precise description of the pre-conditions that are
113
    checked are listed below.
114

115
    Pre-conditions:
116
        * @string starts with either ', ", <prefix>', or <prefix>" where
117
        `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
118
        * @string ends with a quote character (' or ").
119

120
    Raises:
121
        AssertionError(...) if the pre-conditions listed above are not
122
        satisfied.
123
    """
124
    dquote_idx = string.find('"')
5✔
125
    squote_idx = string.find("'")
5✔
126
    if -1 in [dquote_idx, squote_idx]:
5✔
127
        quote_idx = max(dquote_idx, squote_idx)
5✔
128
    else:
129
        quote_idx = min(squote_idx, dquote_idx)
5✔
130

131
    assert (
5✔
132
        0 <= quote_idx < len(string) - 1
133
    ), f"{string!r} is missing a starting quote character (' or \")."
134
    assert string[-1] in (
5✔
135
        "'",
136
        '"',
137
    ), f"{string!r} is missing an ending quote character (' or \")."
138
    assert set(string[:quote_idx]).issubset(
5✔
139
        set(STRING_PREFIX_CHARS)
140
    ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
141

142

143
def normalize_string_prefix(s: str) -> str:
5✔
144
    """Make all string prefixes lowercase."""
145
    match = STRING_PREFIX_RE.match(s)
5✔
146
    assert match is not None, f"failed to match string {s!r}"
5✔
147
    orig_prefix = match.group(1)
5✔
148
    new_prefix = (
5✔
149
        orig_prefix.replace("F", "f")
150
        .replace("B", "b")
151
        .replace("U", "")
152
        .replace("u", "")
153
    )
154

155
    # Python syntax guarantees max 2 prefixes and that one of them is "r"
156
    if len(new_prefix) == 2 and new_prefix[0].lower() != "r":
5✔
157
        new_prefix = new_prefix[::-1]
5✔
158
    return f"{new_prefix}{match.group(2)}"
5✔
159

160

161
# Re(gex) does actually cache patterns internally but this still improves
162
# performance on a long list literal of strings by 5-9% since lru_cache's
163
# caching overhead is much lower.
164
@lru_cache(maxsize=64)
5✔
165
def _cached_compile(pattern: str) -> Pattern[str]:
5✔
166
    return re.compile(pattern)
5✔
167

168

169
def normalize_string_quotes(s: str) -> str:
5✔
170
    """Prefer double quotes but only if it doesn't cause more escaping.
171

172
    Adds or removes backslashes as appropriate.
173
    """
174
    value = s.lstrip(STRING_PREFIX_CHARS)
5✔
175
    if value[:3] == '"""':
5✔
176
        return s
5✔
177

178
    elif value[:3] == "'''":
5✔
179
        orig_quote = "'''"
5✔
180
        new_quote = '"""'
5✔
181
    elif value[0] == '"':
5✔
182
        orig_quote = '"'
5✔
183
        new_quote = "'"
5✔
184
    else:
185
        orig_quote = "'"
5✔
186
        new_quote = '"'
5✔
187
    first_quote_pos = s.find(orig_quote)
5✔
188
    assert first_quote_pos != -1, f"INTERNAL ERROR: Malformed string {s!r}"
5✔
189

190
    prefix = s[:first_quote_pos]
5✔
191
    unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
5✔
192
    escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
5✔
193
    escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
5✔
194
    body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]
5✔
195
    if "r" in prefix.casefold():
5✔
196
        if unescaped_new_quote.search(body):
5✔
197
            # There's at least one unescaped new_quote in this raw string
198
            # so converting is impossible
199
            return s
5✔
200

201
        # Do not introduce or remove backslashes in raw strings
202
        new_body = body
5✔
203
    else:
204
        # remove unnecessary escapes
205
        new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
5✔
206
        if body != new_body:
5✔
207
            # Consider the string without unnecessary escapes as the original
208
            body = new_body
5✔
209
            s = f"{prefix}{orig_quote}{body}{orig_quote}"
5✔
210
        new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
5✔
211
        new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
5✔
212

213
    if "f" in prefix.casefold():
5✔
214
        matches = re.findall(
5✔
215
            r"""
216
            (?:(?<!\{)|^)\{  # start of the string or a non-{ followed by a single {
217
                ([^{].*?)  # contents of the brackets except if begins with {{
218
            \}(?:(?!\})|$)  # A } followed by end of the string or a non-}
219
            """,
220
            new_body,
221
            re.VERBOSE,
222
        )
223
        for m in matches:
5✔
224
            if "\\" in str(m):
5✔
225
                # Do not introduce backslashes in interpolated expressions
226
                return s
5✔
227

228
    if new_quote == '"""' and new_body[-1:] == '"':
5✔
229
        # edge case:
230
        new_body = new_body[:-1] + '\\"'
5✔
231
    orig_escape_count = body.count("\\")
5✔
232
    new_escape_count = new_body.count("\\")
5✔
233
    if new_escape_count > orig_escape_count:
5✔
234
        return s  # Do not introduce more escaping
5✔
235

236
    if new_escape_count == orig_escape_count and orig_quote == '"':
5✔
237
        return s  # Prefer double quotes
5✔
238

239
    return f"{prefix}{new_quote}{new_body}{new_quote}"
5✔
240

241

242
def normalize_fstring_quotes(
5✔
243
    quote: str,
244
    middles: list[Leaf],
245
    is_raw_fstring: bool,
246
) -> tuple[list[Leaf], str]:
247
    """Prefer double quotes but only if it doesn't cause more escaping.
248

249
    Adds or removes backslashes as appropriate.
250
    """
251
    if quote == '"""':
×
252
        return middles, quote
×
253

254
    elif quote == "'''":
×
255
        new_quote = '"""'
×
256
    elif quote == '"':
×
257
        new_quote = "'"
×
258
    else:
259
        new_quote = '"'
×
260

261
    unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
×
262
    escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
×
263
    escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){quote}")
×
264
    if is_raw_fstring:
×
265
        for middle in middles:
×
266
            if unescaped_new_quote.search(middle.value):
×
267
                # There's at least one unescaped new_quote in this raw string
268
                # so converting is impossible
269
                return middles, quote
×
270

271
        # Do not introduce or remove backslashes in raw strings, just use double quote
272
        return middles, '"'
×
273

274
    new_segments = []
×
275
    for middle in middles:
×
276
        segment = middle.value
×
277
        # remove unnecessary escapes
278
        new_segment = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", segment)
×
279
        if segment != new_segment:
×
280
            # Consider the string without unnecessary escapes as the original
281
            middle.value = new_segment
×
282

283
        new_segment = sub_twice(escaped_orig_quote, rf"\1\2{quote}", new_segment)
×
284
        new_segment = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_segment)
×
285
        new_segments.append(new_segment)
×
286

287
    if new_quote == '"""' and new_segments[-1].endswith('"'):
×
288
        # edge case:
289
        new_segments[-1] = new_segments[-1][:-1] + '\\"'
×
290

NEW
291
    orig_escape_count = 0
×
NEW
292
    new_escape_count = 0
×
293
    for middle, new_segment in zip(middles, new_segments, strict=True):
×
NEW
294
        orig_escape_count += middle.value.count("\\")
×
NEW
295
        new_escape_count += new_segment.count("\\")
×
296

297
    if new_escape_count > orig_escape_count:
×
298
        return middles, quote  # Do not introduce more escaping
×
299

300
    if new_escape_count == orig_escape_count and quote == '"':
×
301
        return middles, quote  # Prefer double quotes
×
302

303
    for middle, new_segment in zip(middles, new_segments, strict=True):
×
304
        middle.value = new_segment
×
305

306
    return middles, new_quote
×
307

308

309
def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
5✔
310
    """Replace hex codes in Unicode escape sequences with lowercase representation."""
311
    text = leaf.value
5✔
312
    prefix = get_string_prefix(text)
5✔
313
    if "r" in prefix.lower():
5✔
314
        return
5✔
315

316
    def replace(m: Match[str]) -> str:
5✔
317
        groups = m.groupdict()
5✔
318
        back_slashes = groups["backslashes"]
5✔
319

320
        if len(back_slashes) % 2 == 0:
5✔
321
            return back_slashes + groups["body"]
5✔
322

323
        if groups["u"]:
5✔
324
            # \u
325
            return back_slashes + "u" + groups["u"].lower()
5✔
326
        elif groups["U"]:
5✔
327
            # \U
328
            return back_slashes + "U" + groups["U"].lower()
5✔
329
        elif groups["x"]:
5✔
330
            # \x
331
            return back_slashes + "x" + groups["x"].lower()
5✔
332
        else:
333
            assert groups["N"], f"Unexpected match: {m}"
5✔
334
            # \N{}
335
            return back_slashes + "N{" + groups["N"].upper() + "}"
5✔
336

337
    leaf.value = re.sub(UNICODE_ESCAPE_RE, replace, text)
5✔
338

339

340
@lru_cache(maxsize=4096)
5✔
341
def char_width(char: str) -> int:
5✔
342
    """Return the width of a single character as it would be displayed in a
343
    terminal or editor (which respects Unicode East Asian Width).
344

345
    Full width characters are counted as 2, while half width characters are
346
    counted as 1.  Also control characters are counted as 0.
347
    """
348
    table = WIDTH_TABLE
5✔
349
    codepoint = ord(char)
5✔
350
    highest = len(table) - 1
5✔
351
    lowest = 0
5✔
352
    idx = highest // 2
5✔
353
    while True:
5✔
354
        start_codepoint, end_codepoint, width = table[idx]
5✔
355
        if codepoint < start_codepoint:
5✔
356
            highest = idx - 1
5✔
357
        elif codepoint > end_codepoint:
5✔
358
            lowest = idx + 1
5✔
359
        else:
360
            return 0 if width < 0 else width
5✔
361
        if highest < lowest:
5✔
362
            break
5✔
363
        idx = (highest + lowest) // 2
5✔
364
    return 1
5✔
365

366

367
def str_width(line_str: str) -> int:
5✔
368
    """Return the width of `line_str` as it would be displayed in a terminal
369
    or editor (which respects Unicode East Asian Width).
370

371
    You could utilize this function to determine, for example, if a string
372
    is too wide to display in a terminal or editor.
373
    """
374
    if line_str.isascii():
5✔
375
        # Fast path for a line consisting of only ASCII characters
376
        return len(line_str)
5✔
377
    return sum(map(char_width, line_str))
5✔
378

379

380
def count_chars_in_width(line_str: str, max_width: int) -> int:
5✔
381
    """Count the number of characters in `line_str` that would fit in a
382
    terminal or editor of `max_width` (which respects Unicode East Asian
383
    Width).
384
    """
385
    total_width = 0
5✔
386
    for i, char in enumerate(line_str):
5✔
387
        width = char_width(char)
5✔
388
        if width + total_width > max_width:
5✔
389
            return i
5✔
390
        total_width += width
5✔
391
    return len(line_str)
5✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc