22209455021

Committed 20 Feb 2026 02:57AM UTC coverage: 95.685%. First build

Build # 22209455021

Build Type

Pull #4998

github

Committed by

web-flow

Commit Message

Merge 9846ad2c5 into 55180793f

Pull Request Pull Request #4998: Aggregate escape counts across all f-string segments in normalize_fstring_quotes

Coverage Stats

5196 of 5483 branches covered (94.77%)

0 of 4 new or added lines in 1 file covered. (0.0%)

7960 of 8319 relevant lines covered (95.68%)

4.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.28

/src/black/strings.py

"""
Simple formatting on strings. Further string formatting code is in trans.py.
"""

import re
import sys
from functools import lru_cache
from re import Match, Pattern
from typing import Final

from black._width_table import WIDTH_TABLE
from blib2to3.pytree import Leaf

STRING_PREFIX_CHARS: Final = "fturbFTURB"  # All possible string prefix characters.
STRING_PREFIX_RE: Final = re.compile(
    r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL
)
UNICODE_ESCAPE_RE: Final = re.compile(
    r"(?P<backslashes>\\+)(?P<body>"
    r"(u(?P<u>[a-fA-F0-9]{4}))"  # Character with 16-bit hex value xxxx
    r"|(U(?P<U>[a-fA-F0-9]{8}))"  # Character with 32-bit hex value xxxxxxxx
    r"|(x(?P<x>[a-fA-F0-9]{2}))"  # Character with hex value hh
    r"|(N\{(?P<N>[a-zA-Z0-9 \-]{2,})\})"  # Character named name in the Unicode database
    r")",
    re.VERBOSE,
)


def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
    """Replace `regex` with `replacement` twice on `original`.

    This is used by string normalization to perform replaces on
    overlapping matches.
    """
    return regex.sub(replacement, regex.sub(replacement, original))


def has_triple_quotes(string: str) -> bool:
    """
    Returns:
        True iff @string starts with three quotation characters.
    """
    raw_string = string.lstrip(STRING_PREFIX_CHARS)
    return raw_string[:3] in {'"""', "'''"}


def lines_with_leading_tabs_expanded(s: str) -> list[str]:
    """
    Splits string into lines and expands only leading tabs (following the normal
    Python rules)
    """
    lines = []
    for line in s.splitlines():
        stripped_line = line.lstrip()
        if not stripped_line or stripped_line == line:
            lines.append(line)
        else:
            prefix_length = len(line) - len(stripped_line)
            prefix = line[:prefix_length].expandtabs()
            lines.append(prefix + stripped_line)
    if s.endswith("\n"):
        lines.append("")
    return lines


def fix_multiline_docstring(docstring: str, prefix: str) -> str:
    # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
    assert docstring, "INTERNAL ERROR: Multiline docstrings cannot be empty"
    lines = lines_with_leading_tabs_expanded(docstring)
    # Determine minimum indentation (first line doesn't count):
    indent = sys.maxsize
    for line in lines[1:]:
        stripped = line.lstrip()
        if stripped:
            indent = min(indent, len(line) - len(stripped))
    # Remove indentation (first line is special):
    trimmed = [lines[0].strip()]
    if indent < sys.maxsize:
        last_line_idx = len(lines) - 2
        for i, line in enumerate(lines[1:]):
            stripped_line = line[indent:].rstrip()
            if stripped_line or i == last_line_idx:
                trimmed.append(prefix + stripped_line)
            else:
                trimmed.append("")
    return "\n".join(trimmed)


def get_string_prefix(string: str) -> str:
    """
    Pre-conditions:
        * assert_is_leaf_string(@string)

    Returns:
        @string's prefix (e.g. '', 'r', 'f', or 'rf').
    """
    assert_is_leaf_string(string)

    prefix = []
    for char in string:
        if char in STRING_PREFIX_CHARS:
            prefix.append(char)
        else:
            break
    return "".join(prefix)


def assert_is_leaf_string(string: str) -> None:
    """
    Checks the pre-condition that @string has the format that you would expect
    of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
    token.STRING`. A more precise description of the pre-conditions that are
    checked are listed below.

    Pre-conditions:
        * @string starts with either ', ", <prefix>', or <prefix>" where
        `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
        * @string ends with a quote character (' or ").

    Raises:
        AssertionError(...) if the pre-conditions listed above are not
        satisfied.
    """
    dquote_idx = string.find('"')
    squote_idx = string.find("'")
    if -1 in [dquote_idx, squote_idx]:
        quote_idx = max(dquote_idx, squote_idx)
    else:
        quote_idx = min(squote_idx, dquote_idx)

    assert (
        0 <= quote_idx < len(string) - 1
    ), f"{string!r} is missing a starting quote character (' or \")."
    assert string[-1] in (
        "'",
        '"',
    ), f"{string!r} is missing an ending quote character (' or \")."
    assert set(string[:quote_idx]).issubset(
        set(STRING_PREFIX_CHARS)
    ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."


def normalize_string_prefix(s: str) -> str:
    """Make all string prefixes lowercase."""
    match = STRING_PREFIX_RE.match(s)
    assert match is not None, f"failed to match string {s!r}"
    orig_prefix = match.group(1)
    new_prefix = (
        orig_prefix.replace("F", "f")
        .replace("B", "b")
        .replace("U", "")
        .replace("u", "")
    )

    # Python syntax guarantees max 2 prefixes and that one of them is "r"
    if len(new_prefix) == 2 and new_prefix[0].lower() != "r":
        new_prefix = new_prefix[::-1]
    return f"{new_prefix}{match.group(2)}"


# Re(gex) does actually cache patterns internally but this still improves
# performance on a long list literal of strings by 5-9% since lru_cache's
# caching overhead is much lower.
@lru_cache(maxsize=64)
def _cached_compile(pattern: str) -> Pattern[str]:
    return re.compile(pattern)


def normalize_string_quotes(s: str) -> str:
    """Prefer double quotes but only if it doesn't cause more escaping.

    Adds or removes backslashes as appropriate.
    """
    value = s.lstrip(STRING_PREFIX_CHARS)
    if value[:3] == '"""':
        return s

    elif value[:3] == "'''":
        orig_quote = "'''"
        new_quote = '"""'
    elif value[0] == '"':
        orig_quote = '"'
        new_quote = "'"
    else:
        orig_quote = "'"
        new_quote = '"'
    first_quote_pos = s.find(orig_quote)
    assert first_quote_pos != -1, f"INTERNAL ERROR: Malformed string {s!r}"

    prefix = s[:first_quote_pos]
    unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
    escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
    escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
    body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]
    if "r" in prefix.casefold():
        if unescaped_new_quote.search(body):
            # There's at least one unescaped new_quote in this raw string
            # so converting is impossible
            return s

        # Do not introduce or remove backslashes in raw strings
        new_body = body
    else:
        # remove unnecessary escapes
        new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
        if body != new_body:
            # Consider the string without unnecessary escapes as the original
            body = new_body
            s = f"{prefix}{orig_quote}{body}{orig_quote}"
        new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
        new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)

    if "f" in prefix.casefold():
        matches = re.findall(
            r"""
            (?:(?<!\{)|^)\{  # start of the string or a non-{ followed by a single {
                ([^{].*?)  # contents of the brackets except if begins with {{
            \}(?:(?!\})|$)  # A } followed by end of the string or a non-}
            """,
            new_body,
            re.VERBOSE,
        )
        for m in matches:
            if "\\" in str(m):
                # Do not introduce backslashes in interpolated expressions
                return s

    if new_quote == '"""' and new_body[-1:] == '"':
        # edge case:
        new_body = new_body[:-1] + '\\"'
    orig_escape_count = body.count("\\")
    new_escape_count = new_body.count("\\")
    if new_escape_count > orig_escape_count:
        return s  # Do not introduce more escaping

    if new_escape_count == orig_escape_count and orig_quote == '"':
        return s  # Prefer double quotes

    return f"{prefix}{new_quote}{new_body}{new_quote}"


def normalize_fstring_quotes(
    quote: str,
    middles: list[Leaf],
    is_raw_fstring: bool,
) -> tuple[list[Leaf], str]:
    """Prefer double quotes but only if it doesn't cause more escaping.

    Adds or removes backslashes as appropriate.
    """
    if quote == '"""':
        return middles, quote

    elif quote == "'''":
        new_quote = '"""'
    elif quote == '"':
        new_quote = "'"
    else:
        new_quote = '"'

    unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
    escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
    escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){quote}")
    if is_raw_fstring:
        for middle in middles:
            if unescaped_new_quote.search(middle.value):
                # There's at least one unescaped new_quote in this raw string
                # so converting is impossible
                return middles, quote

        # Do not introduce or remove backslashes in raw strings, just use double quote
        return middles, '"'

    new_segments = []
    for middle in middles:
        segment = middle.value
        # remove unnecessary escapes
        new_segment = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", segment)
        if segment != new_segment:
            # Consider the string without unnecessary escapes as the original
            middle.value = new_segment

        new_segment = sub_twice(escaped_orig_quote, rf"\1\2{quote}", new_segment)
        new_segment = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_segment)
        new_segments.append(new_segment)

    if new_quote == '"""' and new_segments[-1].endswith('"'):
        # edge case:
        new_segments[-1] = new_segments[-1][:-1] + '\\"'

    orig_escape_count = 0
    new_escape_count = 0
    for middle, new_segment in zip(middles, new_segments, strict=True):
        orig_escape_count += middle.value.count("\\")
        new_escape_count += new_segment.count("\\")

    if new_escape_count > orig_escape_count:
        return middles, quote  # Do not introduce more escaping

    if new_escape_count == orig_escape_count and quote == '"':
        return middles, quote  # Prefer double quotes

    for middle, new_segment in zip(middles, new_segments, strict=True):
        middle.value = new_segment

    return middles, new_quote


def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
    """Replace hex codes in Unicode escape sequences with lowercase representation."""
    text = leaf.value
    prefix = get_string_prefix(text)
    if "r" in prefix.lower():
        return

    def replace(m: Match[str]) -> str:
        groups = m.groupdict()
        back_slashes = groups["backslashes"]

        if len(back_slashes) % 2 == 0:
            return back_slashes + groups["body"]

        if groups["u"]:
            # \u
            return back_slashes + "u" + groups["u"].lower()
        elif groups["U"]:
            # \U
            return back_slashes + "U" + groups["U"].lower()
        elif groups["x"]:
            # \x
            return back_slashes + "x" + groups["x"].lower()
        else:
            assert groups["N"], f"Unexpected match: {m}"
            # \N{}
            return back_slashes + "N{" + groups["N"].upper() + "}"

    leaf.value = re.sub(UNICODE_ESCAPE_RE, replace, text)


@lru_cache(maxsize=4096)
def char_width(char: str) -> int:
    """Return the width of a single character as it would be displayed in a
    terminal or editor (which respects Unicode East Asian Width).

    Full width characters are counted as 2, while half width characters are
    counted as 1.  Also control characters are counted as 0.
    """
    table = WIDTH_TABLE
    codepoint = ord(char)
    highest = len(table) - 1
    lowest = 0
    idx = highest // 2
    while True:
        start_codepoint, end_codepoint, width = table[idx]
        if codepoint < start_codepoint:
            highest = idx - 1
        elif codepoint > end_codepoint:
            lowest = idx + 1
        else:
            return 0 if width < 0 else width
        if highest < lowest:
            break
        idx = (highest + lowest) // 2
    return 1


def str_width(line_str: str) -> int:
    """Return the width of `line_str` as it would be displayed in a terminal
    or editor (which respects Unicode East Asian Width).

    You could utilize this function to determine, for example, if a string
    is too wide to display in a terminal or editor.
    """
    if line_str.isascii():
        # Fast path for a line consisting of only ASCII characters
        return len(line_str)
    return sum(map(char_width, line_str))


def count_chars_in_width(line_str: str, max_width: int) -> int:
    """Count the number of characters in `line_str` that would fit in a
    terminal or editor of `max_width` (which respects Unicode East Asian
    Width).
    """
    total_width = 0
    for i, char in enumerate(line_str):
        width = char_width(char)
        if width + total_width > max_width:
            return i
        total_width += width
    return len(line_str)

1	"""
2	Simple formatting on strings. Further string formatting code is in trans.py.
3	"""
4
5	import re	5✔
6	import sys	5✔
7	from functools import lru_cache	5✔
8	from re import Match, Pattern	5✔
9	from typing import Final	5✔
10
11	from black._width_table import WIDTH_TABLE	5✔
12	from blib2to3.pytree import Leaf	5✔
13
14	STRING_PREFIX_CHARS: Final = "fturbFTURB" # All possible string prefix characters.	5✔
15	STRING_PREFIX_RE: Final = re.compile(	5✔
16	r"^([" + STRING_PREFIX_CHARS + r"])(.)$", re.DOTALL
17	)
18	UNICODE_ESCAPE_RE: Final = re.compile(	5✔
19	r"(?P<backslashes>\\+)(?P<body>"
20	r"(u(?P<u>[a-fA-F0-9]{4}))" # Character with 16-bit hex value xxxx
21	r"\|(U(?P<U>[a-fA-F0-9]{8}))" # Character with 32-bit hex value xxxxxxxx
22	r"\|(x(?P<x>[a-fA-F0-9]{2}))" # Character with hex value hh
23	r"\|(N\{(?P<N>[a-zA-Z0-9 \-]{2,})\})" # Character named name in the Unicode database
24	r")",
25	re.VERBOSE,
26	)
27
28
29	def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:	5✔
30	"""Replace `regex` with `replacement` twice on `original`.
31
32	This is used by string normalization to perform replaces on
33	overlapping matches.
34	"""
35	return regex.sub(replacement, regex.sub(replacement, original))	5✔
36
37
38	def has_triple_quotes(string: str) -> bool:	5✔
39	"""
40	Returns:
41	True iff @string starts with three quotation characters.
42	"""
43	raw_string = string.lstrip(STRING_PREFIX_CHARS)	5✔
44	return raw_string[:3] in {'"""', "'''"}	5✔
45
46
47	def lines_with_leading_tabs_expanded(s: str) -> list[str]:	5✔
48	"""
49	Splits string into lines and expands only leading tabs (following the normal
50	Python rules)
51	"""
52	lines = []	5✔
53	for line in s.splitlines():	5✔
54	stripped_line = line.lstrip()	5✔
55	if not stripped_line or stripped_line == line:	5✔
56	lines.append(line)	5✔
57	else:
58	prefix_length = len(line) - len(stripped_line)	5✔
59	prefix = line[:prefix_length].expandtabs()	5✔
60	lines.append(prefix + stripped_line)	5✔
61	if s.endswith("\n"):	5✔
62	lines.append("")	5✔
63	return lines	5✔
64
65
66	def fix_multiline_docstring(docstring: str, prefix: str) -> str:	5✔
67	# https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
68	assert docstring, "INTERNAL ERROR: Multiline docstrings cannot be empty"	5✔
69	lines = lines_with_leading_tabs_expanded(docstring)	5✔
70	# Determine minimum indentation (first line doesn't count):
71	indent = sys.maxsize	5✔
72	for line in lines[1:]:	5✔
73	stripped = line.lstrip()	5✔
74	if stripped:	5✔
75	indent = min(indent, len(line) - len(stripped))	5✔
76	# Remove indentation (first line is special):
77	trimmed = [lines[0].strip()]	5✔
78	if indent < sys.maxsize:	5✔
79	last_line_idx = len(lines) - 2	5✔
80	for i, line in enumerate(lines[1:]):	5✔
81	stripped_line = line[indent:].rstrip()	5✔
82	if stripped_line or i == last_line_idx:	5✔
83	trimmed.append(prefix + stripped_line)	5✔
84	else:
85	trimmed.append("")	5✔
86	return "\n".join(trimmed)	5✔
87
88
89	def get_string_prefix(string: str) -> str:	5✔
90	"""
91	Pre-conditions:
92	* assert_is_leaf_string(@string)
93
94	Returns:
95	@string's prefix (e.g. '', 'r', 'f', or 'rf').
96	"""
97	assert_is_leaf_string(string)	5✔
98
99	prefix = []	5✔
100	for char in string:	5!
101	if char in STRING_PREFIX_CHARS:	5✔
102	prefix.append(char)	5✔
103	else:
104	break	5✔
105	return "".join(prefix)	5✔
106
107
108	def assert_is_leaf_string(string: str) -> None:	5✔
109	"""
110	Checks the pre-condition that @string has the format that you would expect
111	of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
112	token.STRING`. A more precise description of the pre-conditions that are
113	checked are listed below.
114
115	Pre-conditions:
116	* @string starts with either ', ", <prefix>', or <prefix>" where
117	`set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
118	* @string ends with a quote character (' or ").
119
120	Raises:
121	AssertionError(...) if the pre-conditions listed above are not
122	satisfied.
123	"""
124	dquote_idx = string.find('"')	5✔
125	squote_idx = string.find("'")	5✔
126	if -1 in [dquote_idx, squote_idx]:	5✔
127	quote_idx = max(dquote_idx, squote_idx)	5✔
128	else:
129	quote_idx = min(squote_idx, dquote_idx)	5✔
130
131	assert (	5✔
132	0 <= quote_idx < len(string) - 1
133	), f"{string!r} is missing a starting quote character (' or \")."
134	assert string[-1] in (	5✔
135	"'",
136	'"',
137	), f"{string!r} is missing an ending quote character (' or \")."
138	assert set(string[:quote_idx]).issubset(	5✔
139	set(STRING_PREFIX_CHARS)
140	), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
141
142
143	def normalize_string_prefix(s: str) -> str:	5✔
144	"""Make all string prefixes lowercase."""
145	match = STRING_PREFIX_RE.match(s)	5✔
146	assert match is not None, f"failed to match string {s!r}"	5✔
147	orig_prefix = match.group(1)	5✔
148	new_prefix = (	5✔
149	orig_prefix.replace("F", "f")
150	.replace("B", "b")
151	.replace("U", "")
152	.replace("u", "")
153	)
154
155	# Python syntax guarantees max 2 prefixes and that one of them is "r"
156	if len(new_prefix) == 2 and new_prefix[0].lower() != "r":	5✔
157	new_prefix = new_prefix[::-1]	5✔
158	return f"{new_prefix}{match.group(2)}"	5✔
159
160
161	# Re(gex) does actually cache patterns internally but this still improves
162	# performance on a long list literal of strings by 5-9% since lru_cache's
163	# caching overhead is much lower.
164	@lru_cache(maxsize=64)	5✔
165	def _cached_compile(pattern: str) -> Pattern[str]:	5✔
166	return re.compile(pattern)	5✔
167
168
169	def normalize_string_quotes(s: str) -> str:	5✔
170	"""Prefer double quotes but only if it doesn't cause more escaping.
171
172	Adds or removes backslashes as appropriate.
173	"""
174	value = s.lstrip(STRING_PREFIX_CHARS)	5✔
175	if value[:3] == '"""':	5✔
176	return s	5✔
177
178	elif value[:3] == "'''":	5✔
179	orig_quote = "'''"	5✔
180	new_quote = '"""'	5✔
181	elif value[0] == '"':	5✔
182	orig_quote = '"'	5✔
183	new_quote = "'"	5✔
184	else:
185	orig_quote = "'"	5✔
186	new_quote = '"'	5✔
187	first_quote_pos = s.find(orig_quote)	5✔
188	assert first_quote_pos != -1, f"INTERNAL ERROR: Malformed string {s!r}"	5✔
189
190	prefix = s[:first_quote_pos]	5✔
191	unescaped_new_quote = _cached_compile(rf"(([^\\]\|^)(\\\\)*){new_quote}")	5✔
192	escaped_new_quote = _cached_compile(rf"([^\\]\|^)\\((?:\\\\)*){new_quote}")	5✔
193	escaped_orig_quote = _cached_compile(rf"([^\\]\|^)\\((?:\\\\)*){orig_quote}")	5✔
194	body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]	5✔
195	if "r" in prefix.casefold():	5✔
196	if unescaped_new_quote.search(body):	5✔
197	# There's at least one unescaped new_quote in this raw string
198	# so converting is impossible
199	return s	5✔
200
201	# Do not introduce or remove backslashes in raw strings
202	new_body = body	5✔
203	else:
204	# remove unnecessary escapes
205	new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)	5✔
206	if body != new_body:	5✔
207	# Consider the string without unnecessary escapes as the original
208	body = new_body	5✔
209	s = f"{prefix}{orig_quote}{body}{orig_quote}"	5✔
210	new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)	5✔
211	new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)	5✔
212
213	if "f" in prefix.casefold():	5✔
214	matches = re.findall(	5✔
215	r"""
216	(?:(?<!\{)\|^)\{ # start of the string or a non-{ followed by a single {
217	([^{].*?) # contents of the brackets except if begins with {{
218	\}(?:(?!\})\|$) # A } followed by end of the string or a non-}
219	""",
220	new_body,
221	re.VERBOSE,
222	)
223	for m in matches:	5✔
224	if "\\" in str(m):	5✔
225	# Do not introduce backslashes in interpolated expressions
226	return s	5✔
227
228	if new_quote == '"""' and new_body[-1:] == '"':	5✔
229	# edge case:
230	new_body = new_body[:-1] + '\\"'	5✔
231	orig_escape_count = body.count("\\")	5✔
232	new_escape_count = new_body.count("\\")	5✔
233	if new_escape_count > orig_escape_count:	5✔
234	return s # Do not introduce more escaping	5✔
235
236	if new_escape_count == orig_escape_count and orig_quote == '"':	5✔
237	return s # Prefer double quotes	5✔
238
239	return f"{prefix}{new_quote}{new_body}{new_quote}"	5✔
240
241
242	def normalize_fstring_quotes(	5✔
243	quote: str,
244	middles: list[Leaf],
245	is_raw_fstring: bool,
246	) -> tuple[list[Leaf], str]:
247	"""Prefer double quotes but only if it doesn't cause more escaping.
248
249	Adds or removes backslashes as appropriate.
250	"""
251	if quote == '"""':	×
252	return middles, quote	×
253
254	elif quote == "'''":	×
255	new_quote = '"""'	×
256	elif quote == '"':	×
257	new_quote = "'"	×
258	else:
259	new_quote = '"'	×
260
261	unescaped_new_quote = _cached_compile(rf"(([^\\]\|^)(\\\\)*){new_quote}")	×
262	escaped_new_quote = _cached_compile(rf"([^\\]\|^)\\((?:\\\\)*){new_quote}")	×
263	escaped_orig_quote = _cached_compile(rf"([^\\]\|^)\\((?:\\\\)*){quote}")	×
264	if is_raw_fstring:	×
265	for middle in middles:	×
266	if unescaped_new_quote.search(middle.value):	×
267	# There's at least one unescaped new_quote in this raw string
268	# so converting is impossible
269	return middles, quote	×
270
271	# Do not introduce or remove backslashes in raw strings, just use double quote
272	return middles, '"'	×
273
274	new_segments = []	×
275	for middle in middles:	×
276	segment = middle.value	×
277	# remove unnecessary escapes
278	new_segment = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", segment)	×
279	if segment != new_segment:	×
280	# Consider the string without unnecessary escapes as the original
281	middle.value = new_segment	×
282
283	new_segment = sub_twice(escaped_orig_quote, rf"\1\2{quote}", new_segment)	×
284	new_segment = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_segment)	×
285	new_segments.append(new_segment)	×
286
287	if new_quote == '"""' and new_segments[-1].endswith('"'):	×
288	# edge case:
289	new_segments[-1] = new_segments[-1][:-1] + '\\"'	×
290
NEW 291	orig_escape_count = 0	×
NEW 292	new_escape_count = 0	×
293	for middle, new_segment in zip(middles, new_segments, strict=True):	×
NEW 294	orig_escape_count += middle.value.count("\\")	×
NEW 295	new_escape_count += new_segment.count("\\")	×
296
297	if new_escape_count > orig_escape_count:	×
298	return middles, quote # Do not introduce more escaping	×
299
300	if new_escape_count == orig_escape_count and quote == '"':	×
301	return middles, quote # Prefer double quotes	×
302
303	for middle, new_segment in zip(middles, new_segments, strict=True):	×
304	middle.value = new_segment	×
305
306	return middles, new_quote	×
307
308
309	def normalize_unicode_escape_sequences(leaf: Leaf) -> None:	5✔
310	"""Replace hex codes in Unicode escape sequences with lowercase representation."""
311	text = leaf.value	5✔
312	prefix = get_string_prefix(text)	5✔
313	if "r" in prefix.lower():	5✔
314	return	5✔
315
316	def replace(m: Match[str]) -> str:	5✔
317	groups = m.groupdict()	5✔
318	back_slashes = groups["backslashes"]	5✔
319
320	if len(back_slashes) % 2 == 0:	5✔
321	return back_slashes + groups["body"]	5✔
322
323	if groups["u"]:	5✔
324	# \u
325	return back_slashes + "u" + groups["u"].lower()	5✔
326	elif groups["U"]:	5✔
327	# \U
328	return back_slashes + "U" + groups["U"].lower()	5✔
329	elif groups["x"]:	5✔
330	# \x
331	return back_slashes + "x" + groups["x"].lower()	5✔
332	else:
333	assert groups["N"], f"Unexpected match: {m}"	5✔
334	# \N{}
335	return back_slashes + "N{" + groups["N"].upper() + "}"	5✔
336
337	leaf.value = re.sub(UNICODE_ESCAPE_RE, replace, text)	5✔
338
339
340	@lru_cache(maxsize=4096)	5✔
341	def char_width(char: str) -> int:	5✔
342	"""Return the width of a single character as it would be displayed in a
343	terminal or editor (which respects Unicode East Asian Width).
344
345	Full width characters are counted as 2, while half width characters are
346	counted as 1. Also control characters are counted as 0.
347	"""
348	table = WIDTH_TABLE	5✔
349	codepoint = ord(char)	5✔
350	highest = len(table) - 1	5✔
351	lowest = 0	5✔
352	idx = highest // 2	5✔
353	while True:	5✔
354	start_codepoint, end_codepoint, width = table[idx]	5✔
355	if codepoint < start_codepoint:	5✔
356	highest = idx - 1	5✔
357	elif codepoint > end_codepoint:	5✔
358	lowest = idx + 1	5✔
359	else:
360	return 0 if width < 0 else width	5✔
361	if highest < lowest:	5✔
362	break	5✔
363	idx = (highest + lowest) // 2	5✔
364	return 1	5✔
365
366
367	def str_width(line_str: str) -> int:	5✔
368	"""Return the width of `line_str` as it would be displayed in a terminal
369	or editor (which respects Unicode East Asian Width).
370
371	You could utilize this function to determine, for example, if a string
372	is too wide to display in a terminal or editor.
373	"""
374	if line_str.isascii():	5✔
375	# Fast path for a line consisting of only ASCII characters
376	return len(line_str)	5✔
377	return sum(map(char_width, line_str))	5✔
378
379
380	def count_chars_in_width(line_str: str, max_width: int) -> int:	5✔
381	"""Count the number of characters in `line_str` that would fit in a
382	terminal or editor of `max_width` (which respects Unicode East Asian
383	Width).
384	"""
385	total_width = 0	5✔
386	for i, char in enumerate(line_str):	5✔
387	width = char_width(char)	5✔
388	if width + total_width > max_width:	5✔
389	return i	5✔
390	total_width += width	5✔
391	return len(line_str)	5✔

psf / black / 22209455021

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous