18562858873

Committed 16 Oct 2025 01:26PM UTC coverage: 80.266% (-0.001%) from 80.267%

Build # 18562858873

Build Type

Pull #22753

github

Committed by

web-flow

Commit Message

Merge 87039b5c3 into bd4607b86

Pull Request Pull Request #22753: revert PEX_EXTRA_SYS_PATH for mypy changes

Run Details

1 of 2 new or added lines in 1 file covered. (50.0%)

2 existing lines in 2 files now uncovered.

77230 of 96218 relevant lines covered (80.27%)

3.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

56.78

/src/python/pants/backend/python/typecheck/mypy/rules.py

# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import dataclasses
import itertools
from collections.abc import Iterable
from dataclasses import dataclass
from hashlib import sha256
from textwrap import dedent  # noqa: PNT20

import packaging

from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.typecheck.mypy.subsystem import (
    MyPy,
    MyPyConfigFile,
    MyPyFieldSet,
    MyPyFirstPartyPlugins,
)
from pants.backend.python.util_rules import pex_from_targets
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.backend.python.util_rules.partition import (
    _partition_by_interpreter_constraints_and_resolve,
)
from pants.backend.python.util_rules.pex import (
    PexRequest,
    VenvPex,
    VenvPexProcess,
    create_pex,
    create_venv_pex,
    determine_venv_pex_resolve_info,
    setup_venv_pex_process,
)
from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest
from pants.backend.python.util_rules.python_sources import (
    PythonSourceFilesRequest,
    prepare_python_sources,
)
from pants.base.build_root import BuildRoot
from pants.core.goals.check import REPORT_DIR, CheckRequest, CheckResult, CheckResults
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
from pants.core.util_rules.system_binaries import (
    CpBinary,
    LnBinary,
    MkdirBinary,
    MktempBinary,
    MvBinary,
)
from pants.engine.collection import Collection
from pants.engine.fs import CreateDigest, FileContent, MergeDigests, RemovePrefix
from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get
from pants.engine.intrinsics import create_digest, execute_process, merge_digests, remove_prefix
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
from pants.engine.target import CoarsenedTargets, CoarsenedTargetsRequest
from pants.engine.unions import UnionRule
from pants.option.global_options import GlobalOptions
from pants.util.logging import LogLevel
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet
from pants.util.strutil import pluralize, shell_quote


@dataclass(frozen=True)
class MyPyPartition:
    field_sets: FrozenOrderedSet[MyPyFieldSet]
    root_targets: CoarsenedTargets
    resolve_description: str | None
    interpreter_constraints: InterpreterConstraints

    def description(self) -> str:
        ics = str(sorted(str(c) for c in self.interpreter_constraints))
        return f"{self.resolve_description}, {ics}" if self.resolve_description else ics


class MyPyPartitions(Collection[MyPyPartition]):
    pass


class MyPyRequest(CheckRequest):
    field_set_type = MyPyFieldSet
    tool_name = MyPy.options_scope


async def _generate_argv(
    mypy: MyPy,
    *,
    pex: VenvPex,
    cache_dir: str,
    venv_python: str,
    file_list_path: str,
    python_version: str | None,
) -> tuple[str, ...]:
    args = [pex.pex.argv0, f"--python-executable={venv_python}", *mypy.args]
    if mypy.config:
        args.append(f"--config-file={mypy.config}")
    if python_version:
        args.append(f"--python-version={python_version}")

    mypy_pex_info = await determine_venv_pex_resolve_info(pex)
    mypy_info = mypy_pex_info.find("mypy")
    assert mypy_info is not None
    if mypy_info.version > packaging.version.Version("0.700") and python_version is not None:
        # Skip mtime checks because we don't propagate mtime when materializing the sandbox, so the
        # mtime checks will always fail otherwise.
        args.append("--skip-cache-mtime-check")
        # See "__run_wrapper.sh" below for explanation
        args.append("--sqlite-cache")  # Added in v 0.660
        args.extend(("--cache-dir", cache_dir))
    else:
        # Don't bother caching
        args.append("--cache-dir=/dev/null")
    args.append(f"@{file_list_path}")
    return tuple(args)


def determine_python_files(files: Iterable[str]) -> tuple[str, ...]:
    """We run over all .py and .pyi files, but .pyi files take precedence.

    MyPy will error if we say to run over the same module with both its .py and .pyi files, so we
    must be careful to only use the .pyi stub.
    """
    result: OrderedSet[str] = OrderedSet()
    for f in files:
        if f.endswith(".pyi"):
            py_file = f[:-1]  # That is, strip the `.pyi` suffix to be `.py`.
            result.discard(py_file)
            result.add(f)
        elif f.endswith(".py"):
            pyi_file = f + "i"
            if pyi_file not in result:
                result.add(f)
        else:
            result.add(f)

    return tuple(result)


@rule
async def mypy_typecheck_partition(
    partition: MyPyPartition,
    config_file: MyPyConfigFile,
    first_party_plugins: MyPyFirstPartyPlugins,
    build_root: BuildRoot,
    mypy: MyPy,
    python_setup: PythonSetup,
    mkdir: MkdirBinary,
    mktemp: MktempBinary,
    cp: CpBinary,
    mv: MvBinary,
    ln: LnBinary,
    global_options: GlobalOptions,
) -> CheckResult:
    # MyPy requires 3.5+ to run, but uses the typed-ast library to work with 2.7, 3.4, 3.5, 3.6,
    # and 3.7. However, typed-ast does not understand 3.8+, so instead we must run MyPy with
    # Python 3.8+ when relevant. We only do this if <3.8 can't be used, as we don't want a
    # loose requirement like `>=3.6` to result in requiring Python 3.8+, which would error if
    # 3.8+ is not installed on the machine.
    tool_interpreter_constraints = (
        partition.interpreter_constraints
        if (
            mypy.options.is_default("interpreter_constraints")
            and partition.interpreter_constraints.requires_python38_or_newer(
                python_setup.interpreter_versions_universe
            )
        )
        else mypy.interpreter_constraints
    )

    roots_sources_get = determine_source_files(
        SourceFilesRequest(fs.sources for fs in partition.field_sets)
    )

    # See `requirements_venv_pex` for how this will get wrapped in a `VenvPex`.
    requirements_pex_get = create_pex(
        **implicitly(
            RequirementsPexRequest(
                (fs.address for fs in partition.field_sets),
                hardcoded_interpreter_constraints=partition.interpreter_constraints,
            )
        )
    )

    mypy_pex_get = create_venv_pex(
        **implicitly(
            mypy.to_pex_request(
                interpreter_constraints=tool_interpreter_constraints,
                extra_requirements=first_party_plugins.requirement_strings,
            )
        )
    )

    (
        roots_sources,
        mypy_pex,
        requirements_pex,
    ) = await concurrently(
        roots_sources_get,
        mypy_pex_get,
        requirements_pex_get,
    )

    python_files = determine_python_files(roots_sources.snapshot.files)
    file_list_path = "__files.txt"
    file_list_digest_request = create_digest(
        CreateDigest([FileContent(file_list_path, "\n".join(python_files).encode())])
    )

    # This creates a venv with all the 3rd-party requirements used by the code. We tell MyPy to
    # use this venv by setting `--python-executable`. Note that this Python interpreter is
    # different than what we run MyPy with.
    #
    # We could have directly asked the `PexFromTargetsRequest` to return a `VenvPex`, rather than
    # `Pex`, but that would mean missing out on sharing a cache with other goals like `test` and
    # `run`.
    requirements_venv_pex_request = create_venv_pex(
        **implicitly(
            PexRequest(
                output_filename="requirements_venv.pex",
                internal_only=True,
                pex_path=[requirements_pex],
                interpreter_constraints=partition.interpreter_constraints,
            )
        )
    )
    closure_sources_get = prepare_python_sources(
        PythonSourceFilesRequest(partition.root_targets.closure()), **implicitly()
    )

    closure_sources, requirements_venv_pex, file_list_digest = await concurrently(
        closure_sources_get, requirements_venv_pex_request, file_list_digest_request
    )

    py_version = config_file.python_version_to_autoset(
        partition.interpreter_constraints, python_setup.interpreter_versions_universe
    )
    named_cache_dir = ".cache/mypy_cache"
    mypy_cache_dir = f"{named_cache_dir}/{sha256(build_root.path.encode()).hexdigest()}"
    if partition.resolve_description:
        mypy_cache_dir += f"/{partition.resolve_description}"
    run_cache_dir = ".tmp_cache/mypy_cache"
    argv = await _generate_argv(
        mypy,
        pex=mypy_pex,
        venv_python=requirements_venv_pex.python.argv0,
        cache_dir=run_cache_dir,
        file_list_path=file_list_path,
        python_version=py_version,
    )

    script_runner_digest = await create_digest(
        CreateDigest(
            [
                FileContent(
                    "__mypy_runner.sh",
                    dedent(
                        f"""\
                            # We want to leverage the MyPy cache for fast incremental runs of MyPy.
                            # Pants exposes "append_only_caches" we can leverage, but with the caveat
                            # that it requires either only appending files, or multiprocess-safe access.
                            #
                            # MyPy guarantees neither, but there's workarounds!
                            #
                            # By default, MyPy uses 2 cache files per source file, which introduces a
                            # whole slew of race conditions. We can minimize the race conditions by
                            # using MyPy's SQLite cache. MyPy still has race conditions when using the
                            # db, as it issues at least 2 single-row queries per source file at different
                            # points in time (therefore SQLite's own safety guarantees don't apply).
                            #
                            # Our workaround depends on whether we can hardlink between the sandbox
                            # and cache or not.
                            #
                            # If we can hardlink (this means the two sides of the link are on the
                            # same filesystem), then after mypy runs, we hardlink from the sandbox
                            # back to the named cache.
                            #
                            # If we can't hardlink, we resort to copying the result next to the
                            # cache under a temporary name, and finally doing an atomic mv from the
                            # tempfile to the real one.
                            #
                            # In either case, the result is an atomic replacement of the "old" named
                            # cache db, such that old references (via opened file descriptors) are
                            # still valid, but new references use the new contents.
                            #
                            # There is a chance of multiple processes thrashing on the cache, leaving
                            # it in a state that doesn't reflect reality at the current point in time,
                            # and forcing other processes to do potentially done work. This strategy
                            # still provides a net benefit because the cache is generally _mostly_
                            # valid (it includes entries for the standard library, and 3rdparty deps,
                            # among 1stparty sources), and even in the worst case
                            # (every single file has changed) the overhead of missing the cache each
                            # query should be small when compared to the work being done of typechecking.
                            #
                            # Lastly, we expect that since this is run through Pants which attempts
                            # to partition MyPy runs by python version (which the DB is independent
                            # for different versions) and uses a one-process-at-a-time daemon by default,
                            # multiple MyPy processes operating on a single db cache should be rare.

                            NAMED_CACHE_DIR="{mypy_cache_dir}/{py_version}"
                            NAMED_CACHE_DB="$NAMED_CACHE_DIR/cache.db"
                            SANDBOX_CACHE_DIR="{run_cache_dir}/{py_version}"
                            SANDBOX_CACHE_DB="$SANDBOX_CACHE_DIR/cache.db"

                            {mkdir.path} -p "$NAMED_CACHE_DIR" > /dev/null 2>&1
                            {mkdir.path} -p "$SANDBOX_CACHE_DIR" > /dev/null 2>&1
                            {cp.path} "$NAMED_CACHE_DB" "$SANDBOX_CACHE_DB" > /dev/null 2>&1

                            {" ".join((shell_quote(arg) for arg in argv))}
                            EXIT_CODE=$?

                            if ! {ln.path} "$SANDBOX_CACHE_DB" "$NAMED_CACHE_DB" > /dev/null 2>&1; then
                                TMP_CACHE=$({mktemp.path} "$SANDBOX_CACHE_DB.tmp.XXXXXX")
                                {cp.path} "$SANDBOX_CACHE_DB" "$TMP_CACHE" > /dev/null 2>&1
                                {mv.path} "$TMP_CACHE" "$NAMED_CACHE_DB" > /dev/null 2>&1
                            fi

                            exit $EXIT_CODE
                        """
                    ).encode(),
                    is_executable=True,
                )
            ]
        )
    )

    merged_input_files = await merge_digests(
        MergeDigests(
            [
                file_list_digest,
                first_party_plugins.sources_digest,
                closure_sources.source_files.snapshot.digest,
                requirements_venv_pex.digest,
                config_file.digest,
                script_runner_digest,
            ]
        )
    )

    all_used_source_roots = sorted(
        set(itertools.chain(first_party_plugins.source_roots, closure_sources.source_roots))
    )

    env = {
        "PEX_EXTRA_SYS_PATH": ":".join(all_used_source_roots),
        "MYPYPATH": ":".join(all_used_source_roots),
        # Always emit colors to improve cache hit rates, the results are post-processed to match the
        # global setting
        "MYPY_FORCE_COLOR": "1",
        # Mypy needs to know the terminal so it can use appropriate escape sequences. ansi is a
        # reasonable lowest common denominator for the sort of escapes mypy uses (NB. TERM=xterm
        # uses some additional codes that colors.strip_color doesn't remove).
        "TERM": "ansi",
        # Force a fixed terminal width. This is effectively infinite, disabling mypy's
        # builtin truncation and line wrapping. Terminals do an acceptable job of soft-wrapping
        # diagnostic text and source code is typically already hard-wrapped to a limited width.
        # (Unique random number to make it easier to search for the source of this setting.)
        "MYPY_FORCE_TERMINAL_WIDTH": "642092230765939",
    }

    process = await setup_venv_pex_process(
        VenvPexProcess(
            mypy_pex,
            input_digest=merged_input_files,
            extra_env=env,
            output_directories=(REPORT_DIR,),
            description=f"Run MyPy on {pluralize(len(python_files), 'file')}.",
            level=LogLevel.DEBUG,
            append_only_caches={"mypy_cache": named_cache_dir},
        ),
        **implicitly(),
    )
    process = dataclasses.replace(process, argv=("./__mypy_runner.sh",))
    result = await execute_process(process, **implicitly())
    report = await remove_prefix(RemovePrefix(result.output_digest, REPORT_DIR))
    return CheckResult.from_fallible_process_result(
        result,
        partition_description=partition.description(),
        report=report,
        output_simplifier=global_options.output_simplifier(),
    )


@rule(desc="Determine if necessary to partition MyPy input", level=LogLevel.DEBUG)
async def mypy_determine_partitions(
    request: MyPyRequest, mypy: MyPy, python_setup: PythonSetup
) -> MyPyPartitions:
    resolve_and_interpreter_constraints_to_field_sets = (
        _partition_by_interpreter_constraints_and_resolve(request.field_sets, python_setup)
    )
    coarsened_targets = await coarsened_targets_get(
        CoarsenedTargetsRequest(field_set.address for field_set in request.field_sets),
        **implicitly(),
    )
    coarsened_targets_by_address = coarsened_targets.by_address()

    return MyPyPartitions(
        MyPyPartition(
            FrozenOrderedSet(field_sets),
            CoarsenedTargets(
                OrderedSet(
                    coarsened_targets_by_address[field_set.address] for field_set in field_sets
                )
            ),
            resolve if len(python_setup.resolves) > 1 else None,
            interpreter_constraints or mypy.interpreter_constraints,
        )
        for (resolve, interpreter_constraints), field_sets in sorted(
            resolve_and_interpreter_constraints_to_field_sets.items()
        )
    )


# TODO(#10864): Improve performance, e.g. by leveraging the MyPy cache.
@rule(desc="Typecheck using MyPy", level=LogLevel.DEBUG)
async def mypy_typecheck(request: MyPyRequest, mypy: MyPy) -> CheckResults:
    if mypy.skip:
        return CheckResults([], checker_name=request.tool_name)

    partitions = await mypy_determine_partitions(request, **implicitly())
    partitioned_results = await concurrently(
        mypy_typecheck_partition(partition, **implicitly()) for partition in partitions
    )
    return CheckResults(partitioned_results, checker_name=request.tool_name)


def rules():
    return [
        *collect_rules(),
        UnionRule(CheckRequest, MyPyRequest),
        *pex_from_targets.rules(),
    ]

1	# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2	# Licensed under the Apache License, Version 2.0 (see LICENSE).
3
4	from __future__ import annotations	4✔
5
6	import dataclasses	4✔
7	import itertools	4✔
8	from collections.abc import Iterable	4✔
9	from dataclasses import dataclass	4✔
10	from hashlib import sha256	4✔
11	from textwrap import dedent # noqa: PNT20	4✔
12
13	import packaging	4✔
14
15	from pants.backend.python.subsystems.setup import PythonSetup	4✔
16	from pants.backend.python.typecheck.mypy.subsystem import (	4✔
17	MyPy,
18	MyPyConfigFile,
19	MyPyFieldSet,
20	MyPyFirstPartyPlugins,
21	)
22	from pants.backend.python.util_rules import pex_from_targets	4✔
23	from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints	4✔
24	from pants.backend.python.util_rules.partition import (	4✔
25	_partition_by_interpreter_constraints_and_resolve,
26	)
27	from pants.backend.python.util_rules.pex import (	4✔
28	PexRequest,
29	VenvPex,
30	VenvPexProcess,
31	create_pex,
32	create_venv_pex,
33	determine_venv_pex_resolve_info,
34	setup_venv_pex_process,
35	)
36	from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest	4✔
37	from pants.backend.python.util_rules.python_sources import (	4✔
38	PythonSourceFilesRequest,
39	prepare_python_sources,
40	)
41	from pants.base.build_root import BuildRoot	4✔
42	from pants.core.goals.check import REPORT_DIR, CheckRequest, CheckResult, CheckResults	4✔
43	from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files	4✔
44	from pants.core.util_rules.system_binaries import (	4✔
45	CpBinary,
46	LnBinary,
47	MkdirBinary,
48	MktempBinary,
49	MvBinary,
50	)
51	from pants.engine.collection import Collection	4✔
52	from pants.engine.fs import CreateDigest, FileContent, MergeDigests, RemovePrefix	4✔
53	from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get	4✔
54	from pants.engine.intrinsics import create_digest, execute_process, merge_digests, remove_prefix	4✔
55	from pants.engine.rules import collect_rules, concurrently, implicitly, rule	4✔
56	from pants.engine.target import CoarsenedTargets, CoarsenedTargetsRequest	4✔
57	from pants.engine.unions import UnionRule	4✔
58	from pants.option.global_options import GlobalOptions	4✔
59	from pants.util.logging import LogLevel	4✔
60	from pants.util.ordered_set import FrozenOrderedSet, OrderedSet	4✔
61	from pants.util.strutil import pluralize, shell_quote	4✔
62
63
64	@dataclass(frozen=True)	4✔
65	class MyPyPartition:	4✔
66	field_sets: FrozenOrderedSet[MyPyFieldSet]	4✔
67	root_targets: CoarsenedTargets	4✔
68	resolve_description: str \| None	4✔
69	interpreter_constraints: InterpreterConstraints	4✔
70
71	def description(self) -> str:	4✔
72	ics = str(sorted(str(c) for c in self.interpreter_constraints))	1✔
73	return f"{self.resolve_description}, {ics}" if self.resolve_description else ics	1✔
74
75
76	class MyPyPartitions(Collection[MyPyPartition]):	4✔
77	pass	4✔
78
79
80	class MyPyRequest(CheckRequest):	4✔
81	field_set_type = MyPyFieldSet	4✔
82	tool_name = MyPy.options_scope	4✔
83
84
85	async def _generate_argv(	4✔
86	mypy: MyPy,
87	*,
88	pex: VenvPex,
89	cache_dir: str,
90	venv_python: str,
91	file_list_path: str,
92	python_version: str \| None,
93	) -> tuple[str, ...]:
94	args = [pex.pex.argv0, f"--python-executable={venv_python}", *mypy.args]	×
95	if mypy.config:	×
96	args.append(f"--config-file={mypy.config}")	×
97	if python_version:	×
98	args.append(f"--python-version={python_version}")	×
99
100	mypy_pex_info = await determine_venv_pex_resolve_info(pex)	×
101	mypy_info = mypy_pex_info.find("mypy")	×
102	assert mypy_info is not None	×
103	if mypy_info.version > packaging.version.Version("0.700") and python_version is not None:	×
104	# Skip mtime checks because we don't propagate mtime when materializing the sandbox, so the
105	# mtime checks will always fail otherwise.
106	args.append("--skip-cache-mtime-check")	×
107	# See "__run_wrapper.sh" below for explanation
108	args.append("--sqlite-cache") # Added in v 0.660	×
109	args.extend(("--cache-dir", cache_dir))	×
110	else:
111	# Don't bother caching
112	args.append("--cache-dir=/dev/null")	×
113	args.append(f"@{file_list_path}")	×
114	return tuple(args)	×
115
116
117	def determine_python_files(files: Iterable[str]) -> tuple[str, ...]:	4✔
118	"""We run over all .py and .pyi files, but .pyi files take precedence.
119
120	MyPy will error if we say to run over the same module with both its .py and .pyi files, so we
121	must be careful to only use the .pyi stub.
122	"""
123	result: OrderedSet[str] = OrderedSet()	1✔
124	for f in files:	1✔
125	if f.endswith(".pyi"):	1✔
126	py_file = f[:-1] # That is, strip the `.pyi` suffix to be `.py`.	1✔
127	result.discard(py_file)	1✔
128	result.add(f)	1✔
129	elif f.endswith(".py"):	1✔
130	pyi_file = f + "i"	1✔
131	if pyi_file not in result:	1✔
132	result.add(f)	1✔
133	else:
134	result.add(f)	1✔
135
136	return tuple(result)	1✔
137
138
139	@rule	4✔
140	async def mypy_typecheck_partition(	4✔
141	partition: MyPyPartition,
142	config_file: MyPyConfigFile,
143	first_party_plugins: MyPyFirstPartyPlugins,
144	build_root: BuildRoot,
145	mypy: MyPy,
146	python_setup: PythonSetup,
147	mkdir: MkdirBinary,
148	mktemp: MktempBinary,
149	cp: CpBinary,
150	mv: MvBinary,
151	ln: LnBinary,
152	global_options: GlobalOptions,
153	) -> CheckResult:
154	# MyPy requires 3.5+ to run, but uses the typed-ast library to work with 2.7, 3.4, 3.5, 3.6,
155	# and 3.7. However, typed-ast does not understand 3.8+, so instead we must run MyPy with
156	# Python 3.8+ when relevant. We only do this if <3.8 can't be used, as we don't want a
157	# loose requirement like `>=3.6` to result in requiring Python 3.8+, which would error if
158	# 3.8+ is not installed on the machine.
159	tool_interpreter_constraints = (	×
160	partition.interpreter_constraints
161	if (
162	mypy.options.is_default("interpreter_constraints")
163	and partition.interpreter_constraints.requires_python38_or_newer(
164	python_setup.interpreter_versions_universe
165	)
166	)
167	else mypy.interpreter_constraints
168	)
169
170	roots_sources_get = determine_source_files(	×
171	SourceFilesRequest(fs.sources for fs in partition.field_sets)
172	)
173
174	# See `requirements_venv_pex` for how this will get wrapped in a `VenvPex`.
175	requirements_pex_get = create_pex(	×
176	**implicitly(
177	RequirementsPexRequest(
178	(fs.address for fs in partition.field_sets),
179	hardcoded_interpreter_constraints=partition.interpreter_constraints,
180	)
181	)
182	)
183
184	mypy_pex_get = create_venv_pex(	×
185	**implicitly(
186	mypy.to_pex_request(
187	interpreter_constraints=tool_interpreter_constraints,
188	extra_requirements=first_party_plugins.requirement_strings,
189	)
190	)
191	)
192
193	(	×
194	roots_sources,
195	mypy_pex,
196	requirements_pex,
197	) = await concurrently(
198	roots_sources_get,
199	mypy_pex_get,
200	requirements_pex_get,
201	)
202
203	python_files = determine_python_files(roots_sources.snapshot.files)	×
204	file_list_path = "__files.txt"	×
205	file_list_digest_request = create_digest(	×
206	CreateDigest([FileContent(file_list_path, "\n".join(python_files).encode())])
207	)
208
209	# This creates a venv with all the 3rd-party requirements used by the code. We tell MyPy to
210	# use this venv by setting `--python-executable`. Note that this Python interpreter is
211	# different than what we run MyPy with.
212	#
213	# We could have directly asked the `PexFromTargetsRequest` to return a `VenvPex`, rather than
214	# `Pex`, but that would mean missing out on sharing a cache with other goals like `test` and
215	# `run`.
216	requirements_venv_pex_request = create_venv_pex(	×
217	**implicitly(
218	PexRequest(
219	output_filename="requirements_venv.pex",
220	internal_only=True,
221	pex_path=[requirements_pex],
222	interpreter_constraints=partition.interpreter_constraints,
223	)
224	)
225	)
226	closure_sources_get = prepare_python_sources(	×
227	PythonSourceFilesRequest(partition.root_targets.closure()), **implicitly()
228	)
229
230	closure_sources, requirements_venv_pex, file_list_digest = await concurrently(	×
231	closure_sources_get, requirements_venv_pex_request, file_list_digest_request
232	)
233
234	py_version = config_file.python_version_to_autoset(	×
235	partition.interpreter_constraints, python_setup.interpreter_versions_universe
236	)
237	named_cache_dir = ".cache/mypy_cache"	×
238	mypy_cache_dir = f"{named_cache_dir}/{sha256(build_root.path.encode()).hexdigest()}"	×
239	if partition.resolve_description:	×
240	mypy_cache_dir += f"/{partition.resolve_description}"	×
241	run_cache_dir = ".tmp_cache/mypy_cache"	×
242	argv = await _generate_argv(	×
243	mypy,
244	pex=mypy_pex,
245	venv_python=requirements_venv_pex.python.argv0,
246	cache_dir=run_cache_dir,
247	file_list_path=file_list_path,
248	python_version=py_version,
249	)
250
251	script_runner_digest = await create_digest(	×
252	CreateDigest(
253	[
254	FileContent(
255	"__mypy_runner.sh",
256	dedent(
257	f"""\
258	# We want to leverage the MyPy cache for fast incremental runs of MyPy.
259	# Pants exposes "append_only_caches" we can leverage, but with the caveat
260	# that it requires either only appending files, or multiprocess-safe access.
261	#
262	# MyPy guarantees neither, but there's workarounds!
263	#
264	# By default, MyPy uses 2 cache files per source file, which introduces a
265	# whole slew of race conditions. We can minimize the race conditions by
266	# using MyPy's SQLite cache. MyPy still has race conditions when using the
267	# db, as it issues at least 2 single-row queries per source file at different
268	# points in time (therefore SQLite's own safety guarantees don't apply).
269	#
270	# Our workaround depends on whether we can hardlink between the sandbox
271	# and cache or not.
272	#
273	# If we can hardlink (this means the two sides of the link are on the
274	# same filesystem), then after mypy runs, we hardlink from the sandbox
275	# back to the named cache.
276	#
277	# If we can't hardlink, we resort to copying the result next to the
278	# cache under a temporary name, and finally doing an atomic mv from the
279	# tempfile to the real one.
280	#
281	# In either case, the result is an atomic replacement of the "old" named
282	# cache db, such that old references (via opened file descriptors) are
283	# still valid, but new references use the new contents.
284	#
285	# There is a chance of multiple processes thrashing on the cache, leaving
286	# it in a state that doesn't reflect reality at the current point in time,
287	# and forcing other processes to do potentially done work. This strategy
288	# still provides a net benefit because the cache is generally _mostly_
289	# valid (it includes entries for the standard library, and 3rdparty deps,
290	# among 1stparty sources), and even in the worst case
291	# (every single file has changed) the overhead of missing the cache each
292	# query should be small when compared to the work being done of typechecking.
293	#
294	# Lastly, we expect that since this is run through Pants which attempts
295	# to partition MyPy runs by python version (which the DB is independent
296	# for different versions) and uses a one-process-at-a-time daemon by default,
297	# multiple MyPy processes operating on a single db cache should be rare.
298
299	NAMED_CACHE_DIR="{mypy_cache_dir}/{py_version}"
300	NAMED_CACHE_DB="$NAMED_CACHE_DIR/cache.db"
301	SANDBOX_CACHE_DIR="{run_cache_dir}/{py_version}"
302	SANDBOX_CACHE_DB="$SANDBOX_CACHE_DIR/cache.db"
303
304	{mkdir.path} -p "$NAMED_CACHE_DIR" > /dev/null 2>&1
305	{mkdir.path} -p "$SANDBOX_CACHE_DIR" > /dev/null 2>&1
306	{cp.path} "$NAMED_CACHE_DB" "$SANDBOX_CACHE_DB" > /dev/null 2>&1
307
308	{" ".join((shell_quote(arg) for arg in argv))}
309	EXIT_CODE=$?
310
311	if ! {ln.path} "$SANDBOX_CACHE_DB" "$NAMED_CACHE_DB" > /dev/null 2>&1; then
312	TMP_CACHE=$({mktemp.path} "$SANDBOX_CACHE_DB.tmp.XXXXXX")
313	{cp.path} "$SANDBOX_CACHE_DB" "$TMP_CACHE" > /dev/null 2>&1
314	{mv.path} "$TMP_CACHE" "$NAMED_CACHE_DB" > /dev/null 2>&1
315	fi
316
317	exit $EXIT_CODE
318	"""
319	).encode(),
320	is_executable=True,
321	)
322	]
323	)
324	)
325
326	merged_input_files = await merge_digests(	×
327	MergeDigests(
328	[
329	file_list_digest,
330	first_party_plugins.sources_digest,
331	closure_sources.source_files.snapshot.digest,
332	requirements_venv_pex.digest,
333	config_file.digest,
334	script_runner_digest,
335	]
336	)
337	)
338
NEW 339	all_used_source_roots = sorted(	×
340	set(itertools.chain(first_party_plugins.source_roots, closure_sources.source_roots))
341	)
342
UNCOV 343	env = {	×
344	"PEX_EXTRA_SYS_PATH": ":".join(all_used_source_roots),
345	"MYPYPATH": ":".join(all_used_source_roots),
346	# Always emit colors to improve cache hit rates, the results are post-processed to match the
347	# global setting
348	"MYPY_FORCE_COLOR": "1",
349	# Mypy needs to know the terminal so it can use appropriate escape sequences. ansi is a
350	# reasonable lowest common denominator for the sort of escapes mypy uses (NB. TERM=xterm
351	# uses some additional codes that colors.strip_color doesn't remove).
352	"TERM": "ansi",
353	# Force a fixed terminal width. This is effectively infinite, disabling mypy's
354	# builtin truncation and line wrapping. Terminals do an acceptable job of soft-wrapping
355	# diagnostic text and source code is typically already hard-wrapped to a limited width.
356	# (Unique random number to make it easier to search for the source of this setting.)
357	"MYPY_FORCE_TERMINAL_WIDTH": "642092230765939",
358	}
359
360	process = await setup_venv_pex_process(	×
361	VenvPexProcess(
362	mypy_pex,
363	input_digest=merged_input_files,
364	extra_env=env,
365	output_directories=(REPORT_DIR,),
366	description=f"Run MyPy on {pluralize(len(python_files), 'file')}.",
367	level=LogLevel.DEBUG,
368	append_only_caches={"mypy_cache": named_cache_dir},
369	),
370	**implicitly(),
371	)
372	process = dataclasses.replace(process, argv=("./__mypy_runner.sh",))	×
373	result = await execute_process(process, **implicitly())	×
374	report = await remove_prefix(RemovePrefix(result.output_digest, REPORT_DIR))	×
375	return CheckResult.from_fallible_process_result(	×
376	result,
377	partition_description=partition.description(),
378	report=report,
379	output_simplifier=global_options.output_simplifier(),
380	)
381
382
383	@rule(desc="Determine if necessary to partition MyPy input", level=LogLevel.DEBUG)	4✔
384	async def mypy_determine_partitions(	4✔
385	request: MyPyRequest, mypy: MyPy, python_setup: PythonSetup
386	) -> MyPyPartitions:
387	resolve_and_interpreter_constraints_to_field_sets = (	×
388	_partition_by_interpreter_constraints_and_resolve(request.field_sets, python_setup)
389	)
390	coarsened_targets = await coarsened_targets_get(	×
391	CoarsenedTargetsRequest(field_set.address for field_set in request.field_sets),
392	**implicitly(),
393	)
394	coarsened_targets_by_address = coarsened_targets.by_address()	×
395
396	return MyPyPartitions(	×
397	MyPyPartition(
398	FrozenOrderedSet(field_sets),
399	CoarsenedTargets(
400	OrderedSet(
401	coarsened_targets_by_address[field_set.address] for field_set in field_sets
402	)
403	),
404	resolve if len(python_setup.resolves) > 1 else None,
405	interpreter_constraints or mypy.interpreter_constraints,
406	)
407	for (resolve, interpreter_constraints), field_sets in sorted(
408	resolve_and_interpreter_constraints_to_field_sets.items()
409	)
410	)
411
412
413	# TODO(#10864): Improve performance, e.g. by leveraging the MyPy cache.
414	@rule(desc="Typecheck using MyPy", level=LogLevel.DEBUG)	4✔
415	async def mypy_typecheck(request: MyPyRequest, mypy: MyPy) -> CheckResults:	4✔
416	if mypy.skip:	×
417	return CheckResults([], checker_name=request.tool_name)	×
418
419	partitions = await mypy_determine_partitions(request, **implicitly())	×
420	partitioned_results = await concurrently(	×
421	mypy_typecheck_partition(partition, **implicitly()) for partition in partitions
422	)
423	return CheckResults(partitioned_results, checker_name=request.tool_name)	×
424
425
426	def rules():	4✔
427	return [	4✔
428	*collect_rules(),
429	UnionRule(CheckRequest, MyPyRequest),
430	*pex_from_targets.rules(),
431	]

pantsbuild / pants / 18562858873

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous