18252174847

Committed 05 Oct 2025 01:36AM UTC coverage: 43.382% (-36.9%) from 80.261%

Build # 18252174847

Build Type

push

github

Committed by

web-flow

Commit Message

run tests on mac arm (#22717)

Just doing the minimal to pull forward the x86_64 pattern.

ref #20993

Run Details

25776 of 59416 relevant lines covered (43.38%)

1.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

44.83

/src/python/pants/backend/python/typecheck/mypy/rules.py

# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import dataclasses
from collections.abc import Iterable
from dataclasses import dataclass
from hashlib import sha256
from textwrap import dedent  # noqa: PNT20

import packaging

from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.typecheck.mypy.subsystem import (
    MyPy,
    MyPyConfigFile,
    MyPyFieldSet,
    MyPyFirstPartyPlugins,
)
from pants.backend.python.util_rules import pex_from_targets
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.backend.python.util_rules.partition import (
    _partition_by_interpreter_constraints_and_resolve,
)
from pants.backend.python.util_rules.pex import (
    PexRequest,
    VenvPex,
    VenvPexProcess,
    create_pex,
    create_venv_pex,
    determine_venv_pex_resolve_info,
    setup_venv_pex_process,
)
from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest
from pants.backend.python.util_rules.python_sources import (
    PythonSourceFilesRequest,
    prepare_python_sources,
)
from pants.base.build_root import BuildRoot
from pants.core.goals.check import REPORT_DIR, CheckRequest, CheckResult, CheckResults
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
from pants.core.util_rules.system_binaries import (
    CpBinary,
    LnBinary,
    MkdirBinary,
    MktempBinary,
    MvBinary,
)
from pants.engine.collection import Collection
from pants.engine.fs import CreateDigest, FileContent, MergeDigests, RemovePrefix
from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get
from pants.engine.intrinsics import create_digest, execute_process, merge_digests, remove_prefix
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
from pants.engine.target import CoarsenedTargets, CoarsenedTargetsRequest
from pants.engine.unions import UnionRule
from pants.option.global_options import GlobalOptions
from pants.util.logging import LogLevel
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet
from pants.util.strutil import pluralize, shell_quote


@dataclass(frozen=True)
class MyPyPartition:
    field_sets: FrozenOrderedSet[MyPyFieldSet]
    root_targets: CoarsenedTargets
    resolve_description: str | None
    interpreter_constraints: InterpreterConstraints

    def description(self) -> str:
        ics = str(sorted(str(c) for c in self.interpreter_constraints))
        return f"{self.resolve_description}, {ics}" if self.resolve_description else ics


class MyPyPartitions(Collection[MyPyPartition]):
    pass


class MyPyRequest(CheckRequest):
    field_set_type = MyPyFieldSet
    tool_name = MyPy.options_scope


async def _generate_argv(
    mypy: MyPy,
    *,
    pex: VenvPex,
    cache_dir: str,
    venv_python: str,
    file_list_path: str,
    python_version: str | None,
) -> tuple[str, ...]:
    args = [pex.pex.argv0, f"--python-executable={venv_python}", *mypy.args]
    if mypy.config:
        args.append(f"--config-file={mypy.config}")
    if python_version:
        args.append(f"--python-version={python_version}")

    mypy_pex_info = await determine_venv_pex_resolve_info(pex)
    mypy_info = mypy_pex_info.find("mypy")
    assert mypy_info is not None
    if mypy_info.version > packaging.version.Version("0.700") and python_version is not None:
        # Skip mtime checks because we don't propagate mtime when materializing the sandbox, so the
        # mtime checks will always fail otherwise.
        args.append("--skip-cache-mtime-check")
        # See "__run_wrapper.sh" below for explanation
        args.append("--sqlite-cache")  # Added in v 0.660
        args.extend(("--cache-dir", cache_dir))
    else:
        # Don't bother caching
        args.append("--cache-dir=/dev/null")
    args.append(f"@{file_list_path}")
    return tuple(args)


def determine_python_files(files: Iterable[str]) -> tuple[str, ...]:
    """We run over all .py and .pyi files, but .pyi files take precedence.

    MyPy will error if we say to run over the same module with both its .py and .pyi files, so we
    must be careful to only use the .pyi stub.
    """
    result: OrderedSet[str] = OrderedSet()
    for f in files:
        if f.endswith(".pyi"):
            py_file = f[:-1]  # That is, strip the `.pyi` suffix to be `.py`.
            result.discard(py_file)
            result.add(f)
        elif f.endswith(".py"):
            pyi_file = f + "i"
            if pyi_file not in result:
                result.add(f)
        else:
            result.add(f)

    return tuple(result)


@rule
async def mypy_typecheck_partition(
    partition: MyPyPartition,
    config_file: MyPyConfigFile,
    first_party_plugins: MyPyFirstPartyPlugins,
    build_root: BuildRoot,
    mypy: MyPy,
    python_setup: PythonSetup,
    mkdir: MkdirBinary,
    mktemp: MktempBinary,
    cp: CpBinary,
    mv: MvBinary,
    ln: LnBinary,
    global_options: GlobalOptions,
) -> CheckResult:
    # MyPy requires 3.5+ to run, but uses the typed-ast library to work with 2.7, 3.4, 3.5, 3.6,
    # and 3.7. However, typed-ast does not understand 3.8+, so instead we must run MyPy with
    # Python 3.8+ when relevant. We only do this if <3.8 can't be used, as we don't want a
    # loose requirement like `>=3.6` to result in requiring Python 3.8+, which would error if
    # 3.8+ is not installed on the machine.
    tool_interpreter_constraints = (
        partition.interpreter_constraints
        if (
            mypy.options.is_default("interpreter_constraints")
            and partition.interpreter_constraints.requires_python38_or_newer(
                python_setup.interpreter_versions_universe
            )
        )
        else mypy.interpreter_constraints
    )

    roots_sources_get = determine_source_files(
        SourceFilesRequest(fs.sources for fs in partition.field_sets)
    )

    # See `requirements_venv_pex` for how this will get wrapped in a `VenvPex`.
    requirements_pex_get = create_pex(
        **implicitly(
            RequirementsPexRequest(
                (fs.address for fs in partition.field_sets),
                hardcoded_interpreter_constraints=partition.interpreter_constraints,
            )
        )
    )

    mypy_pex_get = create_venv_pex(
        **implicitly(
            mypy.to_pex_request(
                interpreter_constraints=tool_interpreter_constraints,
                extra_requirements=first_party_plugins.requirement_strings,
            )
        )
    )

    (
        roots_sources,
        mypy_pex,
        requirements_pex,
    ) = await concurrently(
        roots_sources_get,
        mypy_pex_get,
        requirements_pex_get,
    )

    python_files = determine_python_files(roots_sources.snapshot.files)
    file_list_path = "__files.txt"
    file_list_digest_request = create_digest(
        CreateDigest([FileContent(file_list_path, "\n".join(python_files).encode())])
    )

    # This creates a venv with all the 3rd-party requirements used by the code. We tell MyPy to
    # use this venv by setting `--python-executable`. Note that this Python interpreter is
    # different than what we run MyPy with.
    #
    # We could have directly asked the `PexFromTargetsRequest` to return a `VenvPex`, rather than
    # `Pex`, but that would mean missing out on sharing a cache with other goals like `test` and
    # `run`.
    requirements_venv_pex_request = create_venv_pex(
        **implicitly(
            PexRequest(
                output_filename="requirements_venv.pex",
                internal_only=True,
                pex_path=[requirements_pex],
                interpreter_constraints=partition.interpreter_constraints,
            )
        )
    )
    closure_sources_get = prepare_python_sources(
        PythonSourceFilesRequest(partition.root_targets.closure()), **implicitly()
    )

    closure_sources, requirements_venv_pex, file_list_digest = await concurrently(
        closure_sources_get, requirements_venv_pex_request, file_list_digest_request
    )

    py_version = config_file.python_version_to_autoset(
        partition.interpreter_constraints, python_setup.interpreter_versions_universe
    )
    named_cache_dir = ".cache/mypy_cache"
    mypy_cache_dir = f"{named_cache_dir}/{sha256(build_root.path.encode()).hexdigest()}"
    if partition.resolve_description:
        mypy_cache_dir += f"/{partition.resolve_description}"
    run_cache_dir = ".tmp_cache/mypy_cache"
    argv = await _generate_argv(
        mypy,
        pex=mypy_pex,
        venv_python=requirements_venv_pex.python.argv0,
        cache_dir=run_cache_dir,
        file_list_path=file_list_path,
        python_version=py_version,
    )

    script_runner_digest = await create_digest(
        CreateDigest(
            [
                FileContent(
                    "__mypy_runner.sh",
                    dedent(
                        f"""\
                            # We want to leverage the MyPy cache for fast incremental runs of MyPy.
                            # Pants exposes "append_only_caches" we can leverage, but with the caveat
                            # that it requires either only appending files, or multiprocess-safe access.
                            #
                            # MyPy guarantees neither, but there's workarounds!
                            #
                            # By default, MyPy uses 2 cache files per source file, which introduces a
                            # whole slew of race conditions. We can minimize the race conditions by
                            # using MyPy's SQLite cache. MyPy still has race conditions when using the
                            # db, as it issues at least 2 single-row queries per source file at different
                            # points in time (therefore SQLite's own safety guarantees don't apply).
                            #
                            # Our workaround depends on whether we can hardlink between the sandbox
                            # and cache or not.
                            #
                            # If we can hardlink (this means the two sides of the link are on the
                            # same filesystem), then after mypy runs, we hardlink from the sandbox
                            # back to the named cache.
                            #
                            # If we can't hardlink, we resort to copying the result next to the
                            # cache under a temporary name, and finally doing an atomic mv from the
                            # tempfile to the real one.
                            #
                            # In either case, the result is an atomic replacement of the "old" named
                            # cache db, such that old references (via opened file descriptors) are
                            # still valid, but new references use the new contents.
                            #
                            # There is a chance of multiple processes thrashing on the cache, leaving
                            # it in a state that doesn't reflect reality at the current point in time,
                            # and forcing other processes to do potentially done work. This strategy
                            # still provides a net benefit because the cache is generally _mostly_
                            # valid (it includes entries for the standard library, and 3rdparty deps,
                            # among 1stparty sources), and even in the worst case
                            # (every single file has changed) the overhead of missing the cache each
                            # query should be small when compared to the work being done of typechecking.
                            #
                            # Lastly, we expect that since this is run through Pants which attempts
                            # to partition MyPy runs by python version (which the DB is independent
                            # for different versions) and uses a one-process-at-a-time daemon by default,
                            # multiple MyPy processes operating on a single db cache should be rare.

                            NAMED_CACHE_DIR="{mypy_cache_dir}/{py_version}"
                            NAMED_CACHE_DB="$NAMED_CACHE_DIR/cache.db"
                            SANDBOX_CACHE_DIR="{run_cache_dir}/{py_version}"
                            SANDBOX_CACHE_DB="$SANDBOX_CACHE_DIR/cache.db"

                            {mkdir.path} -p "$NAMED_CACHE_DIR" > /dev/null 2>&1
                            {mkdir.path} -p "$SANDBOX_CACHE_DIR" > /dev/null 2>&1
                            {cp.path} "$NAMED_CACHE_DB" "$SANDBOX_CACHE_DB" > /dev/null 2>&1

                            {" ".join((shell_quote(arg) for arg in argv))}
                            EXIT_CODE=$?

                            if ! {ln.path} "$SANDBOX_CACHE_DB" "$NAMED_CACHE_DB" > /dev/null 2>&1; then
                                TMP_CACHE=$({mktemp.path} "$SANDBOX_CACHE_DB.tmp.XXXXXX")
                                {cp.path} "$SANDBOX_CACHE_DB" "$TMP_CACHE" > /dev/null 2>&1
                                {mv.path} "$TMP_CACHE" "$NAMED_CACHE_DB" > /dev/null 2>&1
                            fi

                            exit $EXIT_CODE
                        """
                    ).encode(),
                    is_executable=True,
                )
            ]
        )
    )

    merged_input_files = await merge_digests(
        MergeDigests(
            [
                file_list_digest,
                first_party_plugins.sources_digest,
                closure_sources.source_files.snapshot.digest,
                requirements_venv_pex.digest,
                config_file.digest,
                script_runner_digest,
            ]
        )
    )

    env = {
        "PEX_EXTRA_SYS_PATH": ":".join(first_party_plugins.source_roots),
        "MYPYPATH": ":".join(closure_sources.source_roots),
        # Always emit colors to improve cache hit rates, the results are post-processed to match the
        # global setting
        "MYPY_FORCE_COLOR": "1",
        # Mypy needs to know the terminal so it can use appropriate escape sequences. ansi is a
        # reasonable lowest common denominator for the sort of escapes mypy uses (NB. TERM=xterm
        # uses some additional codes that colors.strip_color doesn't remove).
        "TERM": "ansi",
        # Force a fixed terminal width. This is effectively infinite, disabling mypy's
        # builtin truncation and line wrapping. Terminals do an acceptable job of soft-wrapping
        # diagnostic text and source code is typically already hard-wrapped to a limited width.
        # (Unique random number to make it easier to search for the source of this setting.)
        "MYPY_FORCE_TERMINAL_WIDTH": "642092230765939",
    }

    process = await setup_venv_pex_process(
        VenvPexProcess(
            mypy_pex,
            input_digest=merged_input_files,
            extra_env=env,
            output_directories=(REPORT_DIR,),
            description=f"Run MyPy on {pluralize(len(python_files), 'file')}.",
            level=LogLevel.DEBUG,
            append_only_caches={"mypy_cache": named_cache_dir},
        ),
        **implicitly(),
    )
    process = dataclasses.replace(process, argv=("./__mypy_runner.sh",))
    result = await execute_process(process, **implicitly())
    report = await remove_prefix(RemovePrefix(result.output_digest, REPORT_DIR))
    return CheckResult.from_fallible_process_result(
        result,
        partition_description=partition.description(),
        report=report,
        output_simplifier=global_options.output_simplifier(),
    )


@rule(desc="Determine if necessary to partition MyPy input", level=LogLevel.DEBUG)
async def mypy_determine_partitions(
    request: MyPyRequest, mypy: MyPy, python_setup: PythonSetup
) -> MyPyPartitions:
    resolve_and_interpreter_constraints_to_field_sets = (
        _partition_by_interpreter_constraints_and_resolve(request.field_sets, python_setup)
    )
    coarsened_targets = await coarsened_targets_get(
        CoarsenedTargetsRequest(field_set.address for field_set in request.field_sets),
        **implicitly(),
    )
    coarsened_targets_by_address = coarsened_targets.by_address()

    return MyPyPartitions(
        MyPyPartition(
            FrozenOrderedSet(field_sets),
            CoarsenedTargets(
                OrderedSet(
                    coarsened_targets_by_address[field_set.address] for field_set in field_sets
                )
            ),
            resolve if len(python_setup.resolves) > 1 else None,
            interpreter_constraints or mypy.interpreter_constraints,
        )
        for (resolve, interpreter_constraints), field_sets in sorted(
            resolve_and_interpreter_constraints_to_field_sets.items()
        )
    )


# TODO(#10864): Improve performance, e.g. by leveraging the MyPy cache.
@rule(desc="Typecheck using MyPy", level=LogLevel.DEBUG)
async def mypy_typecheck(request: MyPyRequest, mypy: MyPy) -> CheckResults:
    if mypy.skip:
        return CheckResults([], checker_name=request.tool_name)

    partitions = await mypy_determine_partitions(request, **implicitly())
    partitioned_results = await concurrently(
        mypy_typecheck_partition(partition, **implicitly()) for partition in partitions
    )
    return CheckResults(partitioned_results, checker_name=request.tool_name)


def rules():
    return [
        *collect_rules(),
        UnionRule(CheckRequest, MyPyRequest),
        *pex_from_targets.rules(),
    ]

1	# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2	# Licensed under the Apache License, Version 2.0 (see LICENSE).
3
4	from __future__ import annotations	3✔
5
6	import dataclasses	3✔
7	from collections.abc import Iterable	3✔
8	from dataclasses import dataclass	3✔
9	from hashlib import sha256	3✔
10	from textwrap import dedent # noqa: PNT20	3✔
11
12	import packaging	3✔
13
14	from pants.backend.python.subsystems.setup import PythonSetup	3✔
15	from pants.backend.python.typecheck.mypy.subsystem import (	3✔
16	MyPy,
17	MyPyConfigFile,
18	MyPyFieldSet,
19	MyPyFirstPartyPlugins,
20	)
21	from pants.backend.python.util_rules import pex_from_targets	3✔
22	from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints	3✔
23	from pants.backend.python.util_rules.partition import (	3✔
24	_partition_by_interpreter_constraints_and_resolve,
25	)
26	from pants.backend.python.util_rules.pex import (	3✔
27	PexRequest,
28	VenvPex,
29	VenvPexProcess,
30	create_pex,
31	create_venv_pex,
32	determine_venv_pex_resolve_info,
33	setup_venv_pex_process,
34	)
35	from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest	3✔
36	from pants.backend.python.util_rules.python_sources import (	3✔
37	PythonSourceFilesRequest,
38	prepare_python_sources,
39	)
40	from pants.base.build_root import BuildRoot	3✔
41	from pants.core.goals.check import REPORT_DIR, CheckRequest, CheckResult, CheckResults	3✔
42	from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files	3✔
43	from pants.core.util_rules.system_binaries import (	3✔
44	CpBinary,
45	LnBinary,
46	MkdirBinary,
47	MktempBinary,
48	MvBinary,
49	)
50	from pants.engine.collection import Collection	3✔
51	from pants.engine.fs import CreateDigest, FileContent, MergeDigests, RemovePrefix	3✔
52	from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get	3✔
53	from pants.engine.intrinsics import create_digest, execute_process, merge_digests, remove_prefix	3✔
54	from pants.engine.rules import collect_rules, concurrently, implicitly, rule	3✔
55	from pants.engine.target import CoarsenedTargets, CoarsenedTargetsRequest	3✔
56	from pants.engine.unions import UnionRule	3✔
57	from pants.option.global_options import GlobalOptions	3✔
58	from pants.util.logging import LogLevel	3✔
59	from pants.util.ordered_set import FrozenOrderedSet, OrderedSet	3✔
60	from pants.util.strutil import pluralize, shell_quote	3✔
61
62
63	@dataclass(frozen=True)	3✔
64	class MyPyPartition:	3✔
65	field_sets: FrozenOrderedSet[MyPyFieldSet]	3✔
66	root_targets: CoarsenedTargets	3✔
67	resolve_description: str \| None	3✔
68	interpreter_constraints: InterpreterConstraints	3✔
69
70	def description(self) -> str:	3✔
71	ics = str(sorted(str(c) for c in self.interpreter_constraints))	×
72	return f"{self.resolve_description}, {ics}" if self.resolve_description else ics	×
73
74
75	class MyPyPartitions(Collection[MyPyPartition]):	3✔
76	pass	3✔
77
78
79	class MyPyRequest(CheckRequest):	3✔
80	field_set_type = MyPyFieldSet	3✔
81	tool_name = MyPy.options_scope	3✔
82
83
84	async def _generate_argv(	3✔
85	mypy: MyPy,
86	*,
87	pex: VenvPex,
88	cache_dir: str,
89	venv_python: str,
90	file_list_path: str,
91	python_version: str \| None,
92	) -> tuple[str, ...]:
93	args = [pex.pex.argv0, f"--python-executable={venv_python}", *mypy.args]	×
94	if mypy.config:	×
95	args.append(f"--config-file={mypy.config}")	×
96	if python_version:	×
97	args.append(f"--python-version={python_version}")	×
98
99	mypy_pex_info = await determine_venv_pex_resolve_info(pex)	×
100	mypy_info = mypy_pex_info.find("mypy")	×
101	assert mypy_info is not None	×
102	if mypy_info.version > packaging.version.Version("0.700") and python_version is not None:	×
103	# Skip mtime checks because we don't propagate mtime when materializing the sandbox, so the
104	# mtime checks will always fail otherwise.
105	args.append("--skip-cache-mtime-check")	×
106	# See "__run_wrapper.sh" below for explanation
107	args.append("--sqlite-cache") # Added in v 0.660	×
108	args.extend(("--cache-dir", cache_dir))	×
109	else:
110	# Don't bother caching
111	args.append("--cache-dir=/dev/null")	×
112	args.append(f"@{file_list_path}")	×
113	return tuple(args)	×
114
115
116	def determine_python_files(files: Iterable[str]) -> tuple[str, ...]:	3✔
117	"""We run over all .py and .pyi files, but .pyi files take precedence.
118
119	MyPy will error if we say to run over the same module with both its .py and .pyi files, so we
120	must be careful to only use the .pyi stub.
121	"""
122	result: OrderedSet[str] = OrderedSet()	×
123	for f in files:	×
124	if f.endswith(".pyi"):	×
125	py_file = f[:-1] # That is, strip the `.pyi` suffix to be `.py`.	×
126	result.discard(py_file)	×
127	result.add(f)	×
128	elif f.endswith(".py"):	×
129	pyi_file = f + "i"	×
130	if pyi_file not in result:	×
131	result.add(f)	×
132	else:
133	result.add(f)	×
134
135	return tuple(result)	×
136
137
138	@rule	3✔
139	async def mypy_typecheck_partition(	3✔
140	partition: MyPyPartition,
141	config_file: MyPyConfigFile,
142	first_party_plugins: MyPyFirstPartyPlugins,
143	build_root: BuildRoot,
144	mypy: MyPy,
145	python_setup: PythonSetup,
146	mkdir: MkdirBinary,
147	mktemp: MktempBinary,
148	cp: CpBinary,
149	mv: MvBinary,
150	ln: LnBinary,
151	global_options: GlobalOptions,
152	) -> CheckResult:
153	# MyPy requires 3.5+ to run, but uses the typed-ast library to work with 2.7, 3.4, 3.5, 3.6,
154	# and 3.7. However, typed-ast does not understand 3.8+, so instead we must run MyPy with
155	# Python 3.8+ when relevant. We only do this if <3.8 can't be used, as we don't want a
156	# loose requirement like `>=3.6` to result in requiring Python 3.8+, which would error if
157	# 3.8+ is not installed on the machine.
158	tool_interpreter_constraints = (	×
159	partition.interpreter_constraints
160	if (
161	mypy.options.is_default("interpreter_constraints")
162	and partition.interpreter_constraints.requires_python38_or_newer(
163	python_setup.interpreter_versions_universe
164	)
165	)
166	else mypy.interpreter_constraints
167	)
168
169	roots_sources_get = determine_source_files(	×
170	SourceFilesRequest(fs.sources for fs in partition.field_sets)
171	)
172
173	# See `requirements_venv_pex` for how this will get wrapped in a `VenvPex`.
174	requirements_pex_get = create_pex(	×
175	**implicitly(
176	RequirementsPexRequest(
177	(fs.address for fs in partition.field_sets),
178	hardcoded_interpreter_constraints=partition.interpreter_constraints,
179	)
180	)
181	)
182
183	mypy_pex_get = create_venv_pex(	×
184	**implicitly(
185	mypy.to_pex_request(
186	interpreter_constraints=tool_interpreter_constraints,
187	extra_requirements=first_party_plugins.requirement_strings,
188	)
189	)
190	)
191
192	(	×
193	roots_sources,
194	mypy_pex,
195	requirements_pex,
196	) = await concurrently(
197	roots_sources_get,
198	mypy_pex_get,
199	requirements_pex_get,
200	)
201
202	python_files = determine_python_files(roots_sources.snapshot.files)	×
203	file_list_path = "__files.txt"	×
204	file_list_digest_request = create_digest(	×
205	CreateDigest([FileContent(file_list_path, "\n".join(python_files).encode())])
206	)
207
208	# This creates a venv with all the 3rd-party requirements used by the code. We tell MyPy to
209	# use this venv by setting `--python-executable`. Note that this Python interpreter is
210	# different than what we run MyPy with.
211	#
212	# We could have directly asked the `PexFromTargetsRequest` to return a `VenvPex`, rather than
213	# `Pex`, but that would mean missing out on sharing a cache with other goals like `test` and
214	# `run`.
215	requirements_venv_pex_request = create_venv_pex(	×
216	**implicitly(
217	PexRequest(
218	output_filename="requirements_venv.pex",
219	internal_only=True,
220	pex_path=[requirements_pex],
221	interpreter_constraints=partition.interpreter_constraints,
222	)
223	)
224	)
225	closure_sources_get = prepare_python_sources(	×
226	PythonSourceFilesRequest(partition.root_targets.closure()), **implicitly()
227	)
228
229	closure_sources, requirements_venv_pex, file_list_digest = await concurrently(	×
230	closure_sources_get, requirements_venv_pex_request, file_list_digest_request
231	)
232
233	py_version = config_file.python_version_to_autoset(	×
234	partition.interpreter_constraints, python_setup.interpreter_versions_universe
235	)
236	named_cache_dir = ".cache/mypy_cache"	×
237	mypy_cache_dir = f"{named_cache_dir}/{sha256(build_root.path.encode()).hexdigest()}"	×
238	if partition.resolve_description:	×
239	mypy_cache_dir += f"/{partition.resolve_description}"	×
240	run_cache_dir = ".tmp_cache/mypy_cache"	×
241	argv = await _generate_argv(	×
242	mypy,
243	pex=mypy_pex,
244	venv_python=requirements_venv_pex.python.argv0,
245	cache_dir=run_cache_dir,
246	file_list_path=file_list_path,
247	python_version=py_version,
248	)
249
250	script_runner_digest = await create_digest(	×
251	CreateDigest(
252	[
253	FileContent(
254	"__mypy_runner.sh",
255	dedent(
256	f"""\
257	# We want to leverage the MyPy cache for fast incremental runs of MyPy.
258	# Pants exposes "append_only_caches" we can leverage, but with the caveat
259	# that it requires either only appending files, or multiprocess-safe access.
260	#
261	# MyPy guarantees neither, but there's workarounds!
262	#
263	# By default, MyPy uses 2 cache files per source file, which introduces a
264	# whole slew of race conditions. We can minimize the race conditions by
265	# using MyPy's SQLite cache. MyPy still has race conditions when using the
266	# db, as it issues at least 2 single-row queries per source file at different
267	# points in time (therefore SQLite's own safety guarantees don't apply).
268	#
269	# Our workaround depends on whether we can hardlink between the sandbox
270	# and cache or not.
271	#
272	# If we can hardlink (this means the two sides of the link are on the
273	# same filesystem), then after mypy runs, we hardlink from the sandbox
274	# back to the named cache.
275	#
276	# If we can't hardlink, we resort to copying the result next to the
277	# cache under a temporary name, and finally doing an atomic mv from the
278	# tempfile to the real one.
279	#
280	# In either case, the result is an atomic replacement of the "old" named
281	# cache db, such that old references (via opened file descriptors) are
282	# still valid, but new references use the new contents.
283	#
284	# There is a chance of multiple processes thrashing on the cache, leaving
285	# it in a state that doesn't reflect reality at the current point in time,
286	# and forcing other processes to do potentially done work. This strategy
287	# still provides a net benefit because the cache is generally _mostly_
288	# valid (it includes entries for the standard library, and 3rdparty deps,
289	# among 1stparty sources), and even in the worst case
290	# (every single file has changed) the overhead of missing the cache each
291	# query should be small when compared to the work being done of typechecking.
292	#
293	# Lastly, we expect that since this is run through Pants which attempts
294	# to partition MyPy runs by python version (which the DB is independent
295	# for different versions) and uses a one-process-at-a-time daemon by default,
296	# multiple MyPy processes operating on a single db cache should be rare.
297
298	NAMED_CACHE_DIR="{mypy_cache_dir}/{py_version}"
299	NAMED_CACHE_DB="$NAMED_CACHE_DIR/cache.db"
300	SANDBOX_CACHE_DIR="{run_cache_dir}/{py_version}"
301	SANDBOX_CACHE_DB="$SANDBOX_CACHE_DIR/cache.db"
302
303	{mkdir.path} -p "$NAMED_CACHE_DIR" > /dev/null 2>&1
304	{mkdir.path} -p "$SANDBOX_CACHE_DIR" > /dev/null 2>&1
305	{cp.path} "$NAMED_CACHE_DB" "$SANDBOX_CACHE_DB" > /dev/null 2>&1
306
307	{" ".join((shell_quote(arg) for arg in argv))}
308	EXIT_CODE=$?
309
310	if ! {ln.path} "$SANDBOX_CACHE_DB" "$NAMED_CACHE_DB" > /dev/null 2>&1; then
311	TMP_CACHE=$({mktemp.path} "$SANDBOX_CACHE_DB.tmp.XXXXXX")
312	{cp.path} "$SANDBOX_CACHE_DB" "$TMP_CACHE" > /dev/null 2>&1
313	{mv.path} "$TMP_CACHE" "$NAMED_CACHE_DB" > /dev/null 2>&1
314	fi
315
316	exit $EXIT_CODE
317	"""
318	).encode(),
319	is_executable=True,
320	)
321	]
322	)
323	)
324
325	merged_input_files = await merge_digests(	×
326	MergeDigests(
327	[
328	file_list_digest,
329	first_party_plugins.sources_digest,
330	closure_sources.source_files.snapshot.digest,
331	requirements_venv_pex.digest,
332	config_file.digest,
333	script_runner_digest,
334	]
335	)
336	)
337
338	env = {	×
339	"PEX_EXTRA_SYS_PATH": ":".join(first_party_plugins.source_roots),
340	"MYPYPATH": ":".join(closure_sources.source_roots),
341	# Always emit colors to improve cache hit rates, the results are post-processed to match the
342	# global setting
343	"MYPY_FORCE_COLOR": "1",
344	# Mypy needs to know the terminal so it can use appropriate escape sequences. ansi is a
345	# reasonable lowest common denominator for the sort of escapes mypy uses (NB. TERM=xterm
346	# uses some additional codes that colors.strip_color doesn't remove).
347	"TERM": "ansi",
348	# Force a fixed terminal width. This is effectively infinite, disabling mypy's
349	# builtin truncation and line wrapping. Terminals do an acceptable job of soft-wrapping
350	# diagnostic text and source code is typically already hard-wrapped to a limited width.
351	# (Unique random number to make it easier to search for the source of this setting.)
352	"MYPY_FORCE_TERMINAL_WIDTH": "642092230765939",
353	}
354
355	process = await setup_venv_pex_process(	×
356	VenvPexProcess(
357	mypy_pex,
358	input_digest=merged_input_files,
359	extra_env=env,
360	output_directories=(REPORT_DIR,),
361	description=f"Run MyPy on {pluralize(len(python_files), 'file')}.",
362	level=LogLevel.DEBUG,
363	append_only_caches={"mypy_cache": named_cache_dir},
364	),
365	**implicitly(),
366	)
367	process = dataclasses.replace(process, argv=("./__mypy_runner.sh",))	×
368	result = await execute_process(process, **implicitly())	×
369	report = await remove_prefix(RemovePrefix(result.output_digest, REPORT_DIR))	×
370	return CheckResult.from_fallible_process_result(	×
371	result,
372	partition_description=partition.description(),
373	report=report,
374	output_simplifier=global_options.output_simplifier(),
375	)
376
377
378	@rule(desc="Determine if necessary to partition MyPy input", level=LogLevel.DEBUG)	3✔
379	async def mypy_determine_partitions(	3✔
380	request: MyPyRequest, mypy: MyPy, python_setup: PythonSetup
381	) -> MyPyPartitions:
382	resolve_and_interpreter_constraints_to_field_sets = (	×
383	_partition_by_interpreter_constraints_and_resolve(request.field_sets, python_setup)
384	)
385	coarsened_targets = await coarsened_targets_get(	×
386	CoarsenedTargetsRequest(field_set.address for field_set in request.field_sets),
387	**implicitly(),
388	)
389	coarsened_targets_by_address = coarsened_targets.by_address()	×
390
391	return MyPyPartitions(	×
392	MyPyPartition(
393	FrozenOrderedSet(field_sets),
394	CoarsenedTargets(
395	OrderedSet(
396	coarsened_targets_by_address[field_set.address] for field_set in field_sets
397	)
398	),
399	resolve if len(python_setup.resolves) > 1 else None,
400	interpreter_constraints or mypy.interpreter_constraints,
401	)
402	for (resolve, interpreter_constraints), field_sets in sorted(
403	resolve_and_interpreter_constraints_to_field_sets.items()
404	)
405	)
406
407
408	# TODO(#10864): Improve performance, e.g. by leveraging the MyPy cache.
409	@rule(desc="Typecheck using MyPy", level=LogLevel.DEBUG)	3✔
410	async def mypy_typecheck(request: MyPyRequest, mypy: MyPy) -> CheckResults:	3✔
411	if mypy.skip:	×
412	return CheckResults([], checker_name=request.tool_name)	×
413
414	partitions = await mypy_determine_partitions(request, **implicitly())	×
415	partitioned_results = await concurrently(	×
416	mypy_typecheck_partition(partition, **implicitly()) for partition in partitions
417	)
418	return CheckResults(partitioned_results, checker_name=request.tool_name)	×
419
420
421	def rules():	3✔
422	return [	3✔
423	*collect_rules(),
424	UnionRule(CheckRequest, MyPyRequest),
425	*pex_from_targets.rules(),
426	]

pantsbuild / pants / 18252174847

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous