You are now the owner of this repo.

19015773527

Committed 02 Nov 2025 05:33PM UTC coverage: 17.872% (-62.4%) from 80.3%

Build # 19015773527

Build Type

Pull #22816

github

Committed by

web-flow

Commit Message

Merge a12d75757 into 6c024e162

Pull Request Pull Request #22816: Update Pants internal Python to 3.14

Run Details

4 of 5 new or added lines in 3 files covered. (80.0%)

28452 existing lines in 683 files now uncovered.

9831 of 55007 relevant lines covered (17.87%)

0.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

45.88

/src/python/pants/util/strutil.py

# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import dataclasses
import hashlib
import json
import re
import shlex
import textwrap
from collections import abc
from collections.abc import Callable, Iterable, Mapping
from logging import Logger
from typing import Any, TypeVar

import colors
from typing_extensions import ParamSpec

from pants.engine.internals.native_engine import Digest
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet


def ensure_binary(text_or_binary: bytes | str) -> bytes:
    if isinstance(text_or_binary, bytes):
        return text_or_binary
    elif isinstance(text_or_binary, str):
        return text_or_binary.encode("utf8")
    else:
        raise TypeError(f"Argument is neither text nor binary type.({type(text_or_binary)})")


def ensure_text(text_or_binary: bytes | str) -> str:
    if isinstance(text_or_binary, bytes):
        return text_or_binary.decode()
    elif isinstance(text_or_binary, str):
        return text_or_binary
    else:
        raise TypeError(f"Argument is neither text nor binary type ({type(text_or_binary)})")


def safe_shlex_split(text_or_binary: bytes | str) -> list[str]:
    """Split a string using shell-like syntax.

    Safe even on python versions whose shlex.split() method doesn't accept unicode.
    """
    value = ensure_text(text_or_binary)
    return shlex.split(value)


# `_shell_unsafe_chars_pattern` and `shell_quote` are modified from the CPython 3.6 source:
# https://github.com/python/cpython/blob/142e3c08a40c75b5788474b0defe7d5c0671f675/Lib/shlex.py#L308
_shell_unsafe_chars_pattern = re.compile(r"[^\w@%+=:,./-]").search


def shell_quote(s: str) -> str:
    """Return a shell-escaped version of the string *s*."""
    if not s:
        return "''"
    if _shell_unsafe_chars_pattern(s) is None:
        return s

    # use single quotes, and put single quotes into double quotes
    # the string $'b is then quoted as '$'"'"'b'
    return "'" + s.replace("'", "'\"'\"'") + "'"


def safe_shlex_join(arg_list: Iterable[str]) -> str:
    """Join a list of strings into a shlex-able string.

    Shell-quotes each argument with `shell_quote()`.
    """
    return " ".join(shell_quote(arg) for arg in arg_list)


def pluralize(count: int, item_type: str, include_count: bool = True) -> str:
    """Pluralizes the item_type if the count does not equal one.

    For example `pluralize(1, 'apple')` returns '1 apple',
    while `pluralize(0, 'apple') returns '0 apples'.

    When `include_count=False` does not add the count in front of the pluralized `item_type`.

    :return The count and inflected item_type together as a string
    """

    def pluralize_string(x: str) -> str:
        if x.endswith("s"):
            return x + "es"
        elif x.endswith("y"):
            return x[:-1] + "ies"
        else:
            return x + "s"

    pluralized_item = item_type if count == 1 else pluralize_string(item_type)
    if not include_count:
        return pluralized_item
    else:
        text = f"{count} {pluralized_item}"
        return text


def comma_separated_list(items: Iterable[str]) -> str:
    items = list(items)
    if len(items) == 0:
        return ""
    if len(items) == 1:
        return items[0]
    if len(items) == 2:
        return f"{items[0]} and {items[1]}"
    # For 3+ items, employ the oxford comma.
    return f"{', '.join(items[0:-1])}, and {items[-1]}"


def strip_prefix(string: str, prefix: str) -> str:
    """Returns a copy of the string from which the multi-character prefix has been stripped.

    Use strip_prefix() instead of lstrip() to remove a substring (instead of individual characters)
    from the beginning of a string, if the substring is present.  lstrip() does not match substrings
    but rather treats a substring argument as a set of characters.

    :param string: The string from which to strip the specified prefix.
    :param prefix: The substring to strip from the left of string, if present.
    :return: The string with prefix stripped from the left, if present.
    """
    if string.startswith(prefix):
        return string[len(prefix) :]
    else:
        return string


# NB: We allow bytes because `ProcessResult.std{err,out}` uses bytes.
def strip_v2_chroot_path(v: bytes | str) -> str:
    """Remove all instances of the chroot tmpdir path from the str so that it only uses relative
    paths.

    This is useful when a tool that is run with the V2 engine outputs absolute paths. It is
    confusing for the user to see the absolute path in the final output because it is an
    implementation detail that Pants copies their source code into a chroot.
    """
    if isinstance(v, bytes):
        v = v.decode()
    return re.sub(r"/[a-zA-Z0-9-_\/]*/pants-sandbox-[a-zA-Z0-9]+/", "", v)


@dataclasses.dataclass(frozen=True)
class Simplifier:
    """Helper for options for conditionally simplifying a string."""

    # it's only rarely useful to show a chroot path to a user, hence they're stripped by default
    strip_chroot_path: bool = True
    """remove all instances of the chroot tmpdir path"""
    strip_formatting: bool = False
    """remove ANSI formatting commands (colors, bold, etc)"""

    def simplify(self, v: bytes | str) -> str:
        chroot = (
            strip_v2_chroot_path(v)
            if self.strip_chroot_path
            else v.decode()
            if isinstance(v, bytes)
            else v
        )
        formatting = colors.strip_color(chroot) if self.strip_formatting else chroot
        assert isinstance(formatting, str)

        return formatting


def hard_wrap(s: str, *, indent: int = 0, width: int = 96) -> list[str]:
    """Hard wrap a string while still preserving any prior hard wrapping (new lines).

    This works well when the input uses soft wrapping, e.g. via Python's implicit string
    concatenation.

    Usually, you will want to join the lines together with "\n".join().
    """
    # wrap() returns [] for an empty line, but we want to emit those, hence the `or [line]`.
    return [
        f"{' ' * indent}{wrapped_line}"
        for line in s.splitlines()
        for wrapped_line in textwrap.wrap(line, width=width - indent) or [line]
    ]


def bullet_list(elements: Iterable[str], max_elements: int = -1) -> str:
    """Format a bullet list with padding.

    Callers should normally use `\n\n` before and (if relevant) after this so that the bullets
    appear as a distinct section.

    The `max_elements` may be used to limit the number of bullet rows to output, and instead leave a
    last bullet item with "* ... and N more".
    """
    if not elements:
        return ""

    if max_elements > 0:
        elements = tuple(elements)
        if len(elements) > max_elements:
            elements = elements[: max_elements - 1] + (
                f"... and {len(elements) - max_elements + 1} more",
            )

    sep = "\n  * "
    return f"  * {sep.join(elements)}"


def first_paragraph(s: str) -> str:
    """Get the first paragraph, where paragraphs are separated by blank lines."""
    lines = s.splitlines()
    first_blank_line_index = next(
        (i for i, line in enumerate(lines) if line.strip() == ""), len(lines)
    )
    return " ".join(lines[:first_blank_line_index])


# This is more conservative that it necessarily need be. In practice POSIX filesystems
# support any printable character except the path separator (forward slash), but it's
# better to be over-cautious.

# TODO: <> may not be safe in Windows paths. When we support Windows we will probably
#  want to detect that here and be more restrictive on that platform. But we do want
#  to support <> where possible, because they appear in target partition descriptions
#  (e.g., "CPython>=2.7,<3") and those are sometimes converted to paths.
_non_path_safe_re = re.compile(r"[^a-zA-Z0-9_\-.()<>,= ]")


def path_safe(s: str) -> str:
    return _non_path_safe_re.sub("_", s)


# TODO: This may be a bit too eager. Some strings might want to preserve multiple spaces in them
# (e.g. a Python code block which has a comment in it would have 2 spaces before the "#", which
# would be squashed by this eager regex). The challenge is that there's some overlap between prose
# (which shouldn't need multiple spaces) and code (which might) for non-alphanumeric characters.
# We can tighten as necessary.
_super_space_re = re.compile(r"(\S)  +(\S)")
_more_than_2_newlines = re.compile(r"\n{2}\n+")
_leading_whitespace_re = re.compile(r"(^[ ]*)(?:[^ \n])", re.MULTILINE)


def softwrap(text: str) -> str:
    """Turns a multiline-ish string into a softwrapped string.

    This is primarily used to turn strings in source code, which often have a single paragraph
    span multiple source lines, into consistently formatted blocks for hardwrapping later.

    Applies the following rules:
        - Dedents the text (you also don't need to start your string with a backslash)
            (The algorithm used for dedention simply looks at the first indented line and
            unambiguously tries to strip that much indentation from every indented line thereafter.)
        - Replaces all occurrences of multiple spaces in a sentence with a single space
        - Replaces all occurrences of multiple newlines with exactly 2 newlines
        - Replaces singular newlines with a space (to turn a paragraph into one long line)
            - Unless the following line is indented, or begins with a `* ` (to indicate an item in a list),
              in which case the newline and indentation are preserved.
        - Double-newlines are preserved
        - Extra indentation is preserved, and also preserves the indented line's ending
            (If your indented line needs to be continued due to it being longer than the suggested
            width, use trailing backlashes to line-continue the line. Because we squash multiple
            spaces, this will "just work".)

    To keep the numbered or bullet lists indented without converting to a code block,
    make sure to use 2 spaces (and not 4).
    """
    if not text:
        return text
    # If callers didn't use a leading "\" that's OK.
    if text[0] == "\n":
        text = text[1:]

    text = _more_than_2_newlines.sub("\n\n", text)
    margin = _leading_whitespace_re.search(text)
    if margin:
        text = re.sub(r"(?m)^" + margin[1], "", text)

    lines = text.splitlines(keepends=True)
    # NB: collecting a list of strs and `"".join` is more performant than calling `+=` repeatedly.
    result_strs = []
    for i, line in enumerate(lines):
        line = _super_space_re.sub(r"\1 \2", line)
        next_line = lines[i + 1] if i + 1 < len(lines) else ""
        if (
            "\n" in (line, next_line)
            or line.startswith(" ")
            or next_line.startswith(" ")
            or line.lstrip().startswith("* ")
        ):
            result_strs.append(line)
        else:
            result_strs.append(line.rstrip())
            result_strs.append(" ")

    return "".join(result_strs).rstrip()


_MEMORY_UNITS = ["B", "KiB", "MiB", "GiB"]


def fmt_memory_size(value: int, *, units: Iterable[str] = _MEMORY_UNITS) -> str:
    """Formats a numeric value as amount of bytes alongside the biggest byte-based unit from the
    list that represents the same amount without using decimals."""

    if not units:
        return str(value)

    amount = value
    unit_idx = 0

    units = tuple(units)
    while (amount >= 1024 and amount % 1024 == 0) and unit_idx < len(units) - 1:
        amount = int(amount / 1024)
        unit_idx += 1

    return f"{int(amount)}{units[unit_idx]}"


def strval(val: str | Callable[[], str]) -> str:
    return val if isinstance(val, str) else val()


def help_text(val: str | Callable[[], str]) -> str | Callable[[], str]:
    """Convenience method for defining an optionally lazy-evaluated softwrapped help string.

    This exists because `mypy` does not respect the type hints defined on base `Field` and `Target`
    classes.
    """
    # This can go away when https://github.com/python/mypy/issues/14702 is fixed
    if isinstance(val, str):
        return softwrap(val)
    else:
        return lambda: softwrap(val())


P = ParamSpec("P")
R = TypeVar("R")


def docstring(doc: str | Callable[[], str]) -> Callable[[Callable[P, R]], Callable[P, R]]:
    """Use this decorator to provide a dynamic doc-string to a function."""

    def wrapper(func: Callable[P, R]) -> Callable[P, R]:
        func.__doc__ = strval(doc)
        return func

    return wrapper


class _JsonEncoder(json.JSONEncoder):
    """Allow us to serialize everything, with a fallback on `str()` in case of any esoteric
    types."""

    def default(self, o):
        """Return a serializable object for o."""
        if isinstance(o, abc.Mapping):
            return dict(o)
        if isinstance(o, (abc.Sequence, OrderedSet, FrozenOrderedSet)):
            return list(o)

        # NB: A quick way to embed the type in the hash so that two objects with the same data but
        # different types produce different hashes.
        classname = o.__class__.__name__
        if dataclasses.is_dataclass(o):
            return {"__class__.__name__": classname, **dataclasses.asdict(o)}
        if isinstance(o, (Digest,)):
            return {"__class__.__name__": classname, "fingerprint": o.fingerprint}
        return super().default(o)


def stable_hash(value: Any, *, name: str = "sha256") -> str:
    """Attempts to return a stable hash of the value stable across processes.

    "Stable" here means that if `value` is equivalent in multiple invocations (across multiple
    processes), it should produce the same hash. To that end, what values are accepted are limited
    in scope.
    """
    return hashlib.new(
        name,
        json.dumps(
            value, indent=None, separators=(",", ":"), sort_keys=True, cls=_JsonEncoder
        ).encode("utf-8"),
    ).hexdigest()


# NB: If an OS string is not valid UTF-8, Python encodes the non-decodable bytes
#  as lone surrogates (see https://peps.python.org/pep-0383/).
#  However when we pass these to Rust, we will fail to decode as strict UTF-8.
#  So we perform a lossy re-encoding to prevent this.
def strict_utf8(s: str) -> str:
    return s.encode("utf-8", "replace").decode("utf-8")


def get_strict_env(env: Mapping[str, str], logger: Logger) -> Mapping[str, str]:
    strict_env = {}
    for key, val in sorted(env.items()):
        strict_key = strict_utf8(key)
        strict_val = strict_utf8(val)
        if strict_key == key:
            if strict_val == val:
                strict_env[strict_key] = strict_val
            else:
                logger.warning(f"Environment variable with non-UTF-8 value ignored: {key}")
        else:
            # We can only log strict_key, because logging will choke on non-UTF-8.
            # But the reader will know what we mean.
            logger.warning(f"Environment variable with non-UTF-8 name ignored: {strict_key}")
    return strict_env

1	# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
2	# Licensed under the Apache License, Version 2.0 (see LICENSE).
3
4	from __future__ import annotations	1✔
5
6	import dataclasses	1✔
7	import hashlib	1✔
8	import json	1✔
9	import re	1✔
10	import shlex	1✔
11	import textwrap	1✔
12	from collections import abc	1✔
13	from collections.abc import Callable, Iterable, Mapping	1✔
14	from logging import Logger	1✔
15	from typing import Any, TypeVar	1✔
16
17	import colors	1✔
18	from typing_extensions import ParamSpec	1✔
19
20	from pants.engine.internals.native_engine import Digest	1✔
21	from pants.util.ordered_set import FrozenOrderedSet, OrderedSet	1✔
22
23
24	def ensure_binary(text_or_binary: bytes \| str) -> bytes:	1✔
UNCOV 25	if isinstance(text_or_binary, bytes):	×
26	return text_or_binary	×
UNCOV 27	elif isinstance(text_or_binary, str):	×
UNCOV 28	return text_or_binary.encode("utf8")	×
29	else:
UNCOV 30	raise TypeError(f"Argument is neither text nor binary type.({type(text_or_binary)})")	×
31
32
33	def ensure_text(text_or_binary: bytes \| str) -> str:	1✔
UNCOV 34	if isinstance(text_or_binary, bytes):	×
UNCOV 35	return text_or_binary.decode()	×
UNCOV 36	elif isinstance(text_or_binary, str):	×
UNCOV 37	return text_or_binary	×
38	else:
UNCOV 39	raise TypeError(f"Argument is neither text nor binary type ({type(text_or_binary)})")	×
40
41
42	def safe_shlex_split(text_or_binary: bytes \| str) -> list[str]:	1✔
43	"""Split a string using shell-like syntax.
44
45	Safe even on python versions whose shlex.split() method doesn't accept unicode.
46	"""
47	value = ensure_text(text_or_binary)	×
48	return shlex.split(value)	×
49
50
51	# `_shell_unsafe_chars_pattern` and `shell_quote` are modified from the CPython 3.6 source:
52	# https://github.com/python/cpython/blob/142e3c08a40c75b5788474b0defe7d5c0671f675/Lib/shlex.py#L308
53	_shell_unsafe_chars_pattern = re.compile(r"[^\w@%+=:,./-]").search	1✔
54
55
56	def shell_quote(s: str) -> str:	1✔
57	"""Return a shell-escaped version of the string s."""
58	if not s:	×
59	return "''"	×
60	if _shell_unsafe_chars_pattern(s) is None:	×
61	return s	×
62
63	# use single quotes, and put single quotes into double quotes
64	# the string $'b is then quoted as '$'"'"'b'
65	return "'" + s.replace("'", "'\"'\"'") + "'"	×
66
67
68	def safe_shlex_join(arg_list: Iterable[str]) -> str:	1✔
69	"""Join a list of strings into a shlex-able string.
70
71	Shell-quotes each argument with `shell_quote()`.
72	"""
73	return " ".join(shell_quote(arg) for arg in arg_list)	×
74
75
76	def pluralize(count: int, item_type: str, include_count: bool = True) -> str:	1✔
77	"""Pluralizes the item_type if the count does not equal one.
78
79	For example `pluralize(1, 'apple')` returns '1 apple',
80	while `pluralize(0, 'apple') returns '0 apples'.
81
82	When `include_count=False` does not add the count in front of the pluralized `item_type`.
83
84	:return The count and inflected item_type together as a string
85	"""
86
UNCOV 87	def pluralize_string(x: str) -> str:	×
UNCOV 88	if x.endswith("s"):	×
UNCOV 89	return x + "es"	×
UNCOV 90	elif x.endswith("y"):	×
UNCOV 91	return x[:-1] + "ies"	×
92	else:
UNCOV 93	return x + "s"	×
94
UNCOV 95	pluralized_item = item_type if count == 1 else pluralize_string(item_type)	×
UNCOV 96	if not include_count:	×
UNCOV 97	return pluralized_item	×
98	else:
UNCOV 99	text = f"{count} {pluralized_item}"	×
UNCOV 100	return text	×
101
102
103	def comma_separated_list(items: Iterable[str]) -> str:	1✔
UNCOV 104	items = list(items)	×
UNCOV 105	if len(items) == 0:	×
UNCOV 106	return ""	×
UNCOV 107	if len(items) == 1:	×
UNCOV 108	return items[0]	×
UNCOV 109	if len(items) == 2:	×
UNCOV 110	return f"{items[0]} and {items[1]}"	×
111	# For 3+ items, employ the oxford comma.
UNCOV 112	return f"{', '.join(items[0:-1])}, and {items[-1]}"	×
113
114
115	def strip_prefix(string: str, prefix: str) -> str:	1✔
116	"""Returns a copy of the string from which the multi-character prefix has been stripped.
117
118	Use strip_prefix() instead of lstrip() to remove a substring (instead of individual characters)
119	from the beginning of a string, if the substring is present. lstrip() does not match substrings
120	but rather treats a substring argument as a set of characters.
121
122	:param string: The string from which to strip the specified prefix.
123	:param prefix: The substring to strip from the left of string, if present.
124	:return: The string with prefix stripped from the left, if present.
125	"""
UNCOV 126	if string.startswith(prefix):	×
UNCOV 127	return string[len(prefix) :]	×
128	else:
UNCOV 129	return string	×
130
131
132	# NB: We allow bytes because `ProcessResult.std{err,out}` uses bytes.
133	def strip_v2_chroot_path(v: bytes \| str) -> str:	1✔
134	"""Remove all instances of the chroot tmpdir path from the str so that it only uses relative
135	paths.
136
137	This is useful when a tool that is run with the V2 engine outputs absolute paths. It is
138	confusing for the user to see the absolute path in the final output because it is an
139	implementation detail that Pants copies their source code into a chroot.
140	"""
UNCOV 141	if isinstance(v, bytes):	×
UNCOV 142	v = v.decode()	×
UNCOV 143	return re.sub(r"/[a-zA-Z0-9-_\/]*/pants-sandbox-[a-zA-Z0-9]+/", "", v)	×
144
145
146	@dataclasses.dataclass(frozen=True)	1✔
147	class Simplifier:	1✔
148	"""Helper for options for conditionally simplifying a string."""
149
150	# it's only rarely useful to show a chroot path to a user, hence they're stripped by default
151	strip_chroot_path: bool = True	1✔
152	"""remove all instances of the chroot tmpdir path"""	1✔
153	strip_formatting: bool = False	1✔
154	"""remove ANSI formatting commands (colors, bold, etc)"""	1✔
155
156	def simplify(self, v: bytes \| str) -> str:	1✔
UNCOV 157	chroot = (	×
158	strip_v2_chroot_path(v)
159	if self.strip_chroot_path
160	else v.decode()
161	if isinstance(v, bytes)
162	else v
163	)
UNCOV 164	formatting = colors.strip_color(chroot) if self.strip_formatting else chroot	×
UNCOV 165	assert isinstance(formatting, str)	×
166
UNCOV 167	return formatting	×
168
169
170	def hard_wrap(s: str, *, indent: int = 0, width: int = 96) -> list[str]:	1✔
171	"""Hard wrap a string while still preserving any prior hard wrapping (new lines).
172
173	This works well when the input uses soft wrapping, e.g. via Python's implicit string
174	concatenation.
175
176	Usually, you will want to join the lines together with "\n".join().
177	"""
178	# wrap() returns [] for an empty line, but we want to emit those, hence the `or [line]`.
UNCOV 179	return [	×
180	f"{' ' * indent}{wrapped_line}"
181	for line in s.splitlines()
182	for wrapped_line in textwrap.wrap(line, width=width - indent) or [line]
183	]
184
185
186	def bullet_list(elements: Iterable[str], max_elements: int = -1) -> str:	1✔
187	"""Format a bullet list with padding.
188
189	Callers should normally use `\n\n` before and (if relevant) after this so that the bullets
190	appear as a distinct section.
191
192	The `max_elements` may be used to limit the number of bullet rows to output, and instead leave a
193	last bullet item with "* ... and N more".
194	"""
UNCOV 195	if not elements:	×
UNCOV 196	return ""	×
197
UNCOV 198	if max_elements > 0:	×
UNCOV 199	elements = tuple(elements)	×
UNCOV 200	if len(elements) > max_elements:	×
UNCOV 201	elements = elements[: max_elements - 1] + (	×
202	f"... and {len(elements) - max_elements + 1} more",
203	)
204
UNCOV 205	sep = "\n * "	×
UNCOV 206	return f" * {sep.join(elements)}"	×
207
208
209	def first_paragraph(s: str) -> str:	1✔
210	"""Get the first paragraph, where paragraphs are separated by blank lines."""
UNCOV 211	lines = s.splitlines()	×
UNCOV 212	first_blank_line_index = next(	×
213	(i for i, line in enumerate(lines) if line.strip() == ""), len(lines)
214	)
UNCOV 215	return " ".join(lines[:first_blank_line_index])	×
216
217
218	# This is more conservative that it necessarily need be. In practice POSIX filesystems
219	# support any printable character except the path separator (forward slash), but it's
220	# better to be over-cautious.
221
222	# TODO: <> may not be safe in Windows paths. When we support Windows we will probably
223	# want to detect that here and be more restrictive on that platform. But we do want
224	# to support <> where possible, because they appear in target partition descriptions
225	# (e.g., "CPython>=2.7,<3") and those are sometimes converted to paths.
226	_non_path_safe_re = re.compile(r"[^a-zA-Z0-9_\-.()<>,= ]")	1✔
227
228
229	def path_safe(s: str) -> str:	1✔
UNCOV 230	return _non_path_safe_re.sub("_", s)	×
231
232
233	# TODO: This may be a bit too eager. Some strings might want to preserve multiple spaces in them
234	# (e.g. a Python code block which has a comment in it would have 2 spaces before the "#", which
235	# would be squashed by this eager regex). The challenge is that there's some overlap between prose
236	# (which shouldn't need multiple spaces) and code (which might) for non-alphanumeric characters.
237	# We can tighten as necessary.
238	_super_space_re = re.compile(r"(\S) +(\S)")	1✔
239	_more_than_2_newlines = re.compile(r"\n{2}\n+")	1✔
240	_leading_whitespace_re = re.compile(r"(^[ ]*)(?:[^ \n])", re.MULTILINE)	1✔
241
242
243	def softwrap(text: str) -> str:	1✔
244	"""Turns a multiline-ish string into a softwrapped string.
245
246	This is primarily used to turn strings in source code, which often have a single paragraph
247	span multiple source lines, into consistently formatted blocks for hardwrapping later.
248
249	Applies the following rules:
250	- Dedents the text (you also don't need to start your string with a backslash)
251	(The algorithm used for dedention simply looks at the first indented line and
252	unambiguously tries to strip that much indentation from every indented line thereafter.)
253	- Replaces all occurrences of multiple spaces in a sentence with a single space
254	- Replaces all occurrences of multiple newlines with exactly 2 newlines
255	- Replaces singular newlines with a space (to turn a paragraph into one long line)
256	- Unless the following line is indented, or begins with a `* ` (to indicate an item in a list),
257	in which case the newline and indentation are preserved.
258	- Double-newlines are preserved
259	- Extra indentation is preserved, and also preserves the indented line's ending
260	(If your indented line needs to be continued due to it being longer than the suggested
261	width, use trailing backlashes to line-continue the line. Because we squash multiple
262	spaces, this will "just work".)
263
264	To keep the numbered or bullet lists indented without converting to a code block,
265	make sure to use 2 spaces (and not 4).
266	"""
267	if not text:	1✔
268	return text	×
269	# If callers didn't use a leading "\" that's OK.
270	if text[0] == "\n":	1✔
271	text = text[1:]	1✔
272
273	text = _more_than_2_newlines.sub("\n\n", text)	1✔
274	margin = _leading_whitespace_re.search(text)	1✔
275	if margin:	1✔
276	text = re.sub(r"(?m)^" + margin[1], "", text)	1✔
277
278	lines = text.splitlines(keepends=True)	1✔
279	# NB: collecting a list of strs and `"".join` is more performant than calling `+=` repeatedly.
280	result_strs = []	1✔
281	for i, line in enumerate(lines):	1✔
282	line = _super_space_re.sub(r"\1 \2", line)	1✔
283	next_line = lines[i + 1] if i + 1 < len(lines) else ""	1✔
284	if (	1✔
285	"\n" in (line, next_line)
286	or line.startswith(" ")
287	or next_line.startswith(" ")
288	or line.lstrip().startswith("* ")
289	):
290	result_strs.append(line)	1✔
291	else:
292	result_strs.append(line.rstrip())	1✔
293	result_strs.append(" ")	1✔
294
295	return "".join(result_strs).rstrip()	1✔
296
297
298	_MEMORY_UNITS = ["B", "KiB", "MiB", "GiB"]	1✔
299
300
301	def fmt_memory_size(value: int, *, units: Iterable[str] = _MEMORY_UNITS) -> str:	1✔
302	"""Formats a numeric value as amount of bytes alongside the biggest byte-based unit from the
303	list that represents the same amount without using decimals."""
304
UNCOV 305	if not units:	×
306	return str(value)	×
307
UNCOV 308	amount = value	×
UNCOV 309	unit_idx = 0	×
310
UNCOV 311	units = tuple(units)	×
UNCOV 312	while (amount >= 1024 and amount % 1024 == 0) and unit_idx < len(units) - 1:	×
UNCOV 313	amount = int(amount / 1024)	×
UNCOV 314	unit_idx += 1	×
315
UNCOV 316	return f"{int(amount)}{units[unit_idx]}"	×
317
318
319	def strval(val: str \| Callable[[], str]) -> str:	1✔
320	return val if isinstance(val, str) else val()	1✔
321
322
323	def help_text(val: str \| Callable[[], str]) -> str \| Callable[[], str]:	1✔
324	"""Convenience method for defining an optionally lazy-evaluated softwrapped help string.
325
326	This exists because `mypy` does not respect the type hints defined on base `Field` and `Target`
327	classes.
328	"""
329	# This can go away when https://github.com/python/mypy/issues/14702 is fixed
330	if isinstance(val, str):	1✔
331	return softwrap(val)	1✔
332	else:
333	return lambda: softwrap(val())	1✔
334
335
336	P = ParamSpec("P")	1✔
337	R = TypeVar("R")	1✔
338
339
340	def docstring(doc: str \| Callable[[], str]) -> Callable[[Callable[P, R]], Callable[P, R]]:	1✔
341	"""Use this decorator to provide a dynamic doc-string to a function."""
342
343	def wrapper(func: Callable[P, R]) -> Callable[P, R]:	1✔
344	func.__doc__ = strval(doc)	1✔
345	return func	1✔
346
347	return wrapper	1✔
348
349
350	class _JsonEncoder(json.JSONEncoder):	1✔
351	"""Allow us to serialize everything, with a fallback on `str()` in case of any esoteric
352	types."""
353
354	def default(self, o):	1✔
355	"""Return a serializable object for o."""
UNCOV 356	if isinstance(o, abc.Mapping):	×
UNCOV 357	return dict(o)	×
UNCOV 358	if isinstance(o, (abc.Sequence, OrderedSet, FrozenOrderedSet)):	×
UNCOV 359	return list(o)	×
360
361	# NB: A quick way to embed the type in the hash so that two objects with the same data but
362	# different types produce different hashes.
UNCOV 363	classname = o.__class__.__name__	×
UNCOV 364	if dataclasses.is_dataclass(o):	×
UNCOV 365	return {"__class__.__name__": classname, **dataclasses.asdict(o)}	×
UNCOV 366	if isinstance(o, (Digest,)):	×
UNCOV 367	return {"__class__.__name__": classname, "fingerprint": o.fingerprint}	×
368	return super().default(o)	×
369
370
371	def stable_hash(value: Any, *, name: str = "sha256") -> str:	1✔
372	"""Attempts to return a stable hash of the value stable across processes.
373
374	"Stable" here means that if `value` is equivalent in multiple invocations (across multiple
375	processes), it should produce the same hash. To that end, what values are accepted are limited
376	in scope.
377	"""
UNCOV 378	return hashlib.new(	×
379	name,
380	json.dumps(
381	value, indent=None, separators=(",", ":"), sort_keys=True, cls=_JsonEncoder
382	).encode("utf-8"),
383	).hexdigest()
384
385
386	# NB: If an OS string is not valid UTF-8, Python encodes the non-decodable bytes
387	# as lone surrogates (see https://peps.python.org/pep-0383/).
388	# However when we pass these to Rust, we will fail to decode as strict UTF-8.
389	# So we perform a lossy re-encoding to prevent this.
390	def strict_utf8(s: str) -> str:	1✔
UNCOV 391	return s.encode("utf-8", "replace").decode("utf-8")	×
392
393
394	def get_strict_env(env: Mapping[str, str], logger: Logger) -> Mapping[str, str]:	1✔
UNCOV 395	strict_env = {}	×
UNCOV 396	for key, val in sorted(env.items()):	×
UNCOV 397	strict_key = strict_utf8(key)	×
UNCOV 398	strict_val = strict_utf8(val)	×
UNCOV 399	if strict_key == key:	×
UNCOV 400	if strict_val == val:	×
UNCOV 401	strict_env[strict_key] = strict_val	×
402	else:
403	logger.warning(f"Environment variable with non-UTF-8 value ignored: {key}")	×
404	else:
405	# We can only log strict_key, because logging will choke on non-UTF-8.
406	# But the reader will know what we mean.
407	logger.warning(f"Environment variable with non-UTF-8 name ignored: {strict_key}")	×
UNCOV 408	return strict_env	×

pantsbuild / pants / 19015773527

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous