22507600930

Committed 27 Feb 2026 11:18PM UTC coverage: 92.932%. First build

Build # 22507600930

Build Type

Pull #23146

github

Committed by

web-flow

Commit Message

Merge c3dc555a3 into 27363b1a1

Pull Request Pull Request #23146: option to disable mypy caching

Run Details

41 of 43 new or added lines in 3 files covered. (95.35%)

90944 of 97861 relevant lines covered (92.93%)

4.06 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.39

/src/python/pants/backend/python/typecheck/mypy/rules.py

# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import dataclasses
from collections.abc import Iterable
from dataclasses import dataclass
from hashlib import sha256
from textwrap import dedent  # noqa: PNT20

import packaging

from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.typecheck.mypy.subsystem import (
    MyPy,
    MyPyCacheMode,
    MyPyConfigFile,
    MyPyFieldSet,
    MyPyFirstPartyPlugins,
)
from pants.backend.python.util_rules import pex_from_targets
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.backend.python.util_rules.partition import (
    _partition_by_interpreter_constraints_and_resolve,
)
from pants.backend.python.util_rules.pex import (
    PexRequest,
    VenvPex,
    VenvPexProcess,
    create_pex,
    create_venv_pex,
    determine_venv_pex_resolve_info,
    setup_venv_pex_process,
)
from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest
from pants.backend.python.util_rules.python_sources import (
    PythonSourceFilesRequest,
    prepare_python_sources,
)
from pants.base.build_root import BuildRoot
from pants.core.goals.check import (
    REPORT_DIR,
    CheckRequest,
    CheckResult,
    CheckResults,
    CheckSubsystem,
)
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
from pants.core.util_rules.system_binaries import (
    CpBinary,
    LnBinary,
    MkdirBinary,
    MktempBinary,
    MvBinary,
)
from pants.engine.collection import Collection
from pants.engine.fs import CreateDigest, FileContent, MergeDigests, RemovePrefix
from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get
from pants.engine.intrinsics import create_digest, execute_process, merge_digests, remove_prefix
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
from pants.engine.target import CoarsenedTargets, CoarsenedTargetsRequest
from pants.engine.unions import UnionRule
from pants.option.global_options import GlobalOptions
from pants.util.logging import LogLevel
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet
from pants.util.strutil import pluralize, shell_quote


@dataclass(frozen=True)
class MyPyPartition:
    field_sets: FrozenOrderedSet[MyPyFieldSet]
    root_targets: CoarsenedTargets
    resolve_description: str | None
    interpreter_constraints: InterpreterConstraints

    def description(self) -> str:
        ics = str(sorted(str(c) for c in self.interpreter_constraints))
        return f"{self.resolve_description}, {ics}" if self.resolve_description else ics


class MyPyPartitions(Collection[MyPyPartition]):
    pass


class MyPyRequest(CheckRequest):
    field_set_type = MyPyFieldSet
    tool_name = MyPy.options_scope


def _get_cache_args(
    mypy_version: packaging.version.Version,
    python_version: str | None,
    cache_mode: MyPyCacheMode,
    cache_dir: str,
) -> tuple[str, ...]:
    if (
        mypy_version > packaging.version.Version("0.700")
        and python_version is not None
        and cache_mode == MyPyCacheMode.sqlite
    ):
        return (
            # Skip mtime checks because we don't propagate mtime when materializing the
            # sandbox, so the mtime checks will always fail otherwise.
            "--skip-cache-mtime-check",
            # See "__mypy_runner.sh" below for explanation
            "--sqlite-cache",  # Added in v 0.660
            "--cache-dir",
            cache_dir,
        )
    else:
        return ("--cache-dir=/dev/null",)


async def _generate_argv(
    mypy: MyPy,
    *,
    pex: VenvPex,
    cache_dir: str,
    venv_python: str,
    file_list_path: str,
    python_version: str | None,
) -> tuple[str, ...]:
    args = [pex.pex.argv0, f"--python-executable={venv_python}", *mypy.args]
    if mypy.config:
        args.append(f"--config-file={mypy.config}")
    if python_version:
        args.append(f"--python-version={python_version}")

    mypy_pex_info = await determine_venv_pex_resolve_info(pex)
    mypy_info = mypy_pex_info.find("mypy")
    assert mypy_info is not None
    args.extend(_get_cache_args(mypy_info.version, python_version, mypy.cache_mode, cache_dir))
    args.append(f"@{file_list_path}")
    return tuple(args)


def determine_python_files(files: Iterable[str]) -> tuple[str, ...]:
    """We run over all .py and .pyi files, but .pyi files take precedence.

    MyPy will error if we say to run over the same module with both its .py and .pyi files, so we
    must be careful to only use the .pyi stub.
    """
    result: OrderedSet[str] = OrderedSet()
    for f in files:
        if f.endswith(".pyi"):
            py_file = f[:-1]  # That is, strip the `.pyi` suffix to be `.py`.
            result.discard(py_file)
            result.add(f)
        elif f.endswith(".py"):
            pyi_file = f + "i"
            if pyi_file not in result:
                result.add(f)
        else:
            result.add(f)

    return tuple(result)


@rule
async def mypy_typecheck_partition(
    partition: MyPyPartition,
    config_file: MyPyConfigFile,
    first_party_plugins: MyPyFirstPartyPlugins,
    build_root: BuildRoot,
    mypy: MyPy,
    check_subsystem: CheckSubsystem,
    python_setup: PythonSetup,
    mkdir: MkdirBinary,
    mktemp: MktempBinary,
    cp: CpBinary,
    mv: MvBinary,
    ln: LnBinary,
    global_options: GlobalOptions,
) -> CheckResult:
    # MyPy requires 3.5+ to run, but uses the typed-ast library to work with 2.7, 3.4, 3.5, 3.6,
    # and 3.7. However, typed-ast does not understand 3.8+, so instead we must run MyPy with
    # Python 3.8+ when relevant. We only do this if <3.8 can't be used, as we don't want a
    # loose requirement like `>=3.6` to result in requiring Python 3.8+, which would error if
    # 3.8+ is not installed on the machine.
    tool_interpreter_constraints = (
        partition.interpreter_constraints
        if (
            mypy.options.is_default("interpreter_constraints")
            and partition.interpreter_constraints.requires_python38_or_newer(
                python_setup.interpreter_versions_universe
            )
        )
        else mypy.interpreter_constraints
    )

    roots_sources_get = determine_source_files(
        SourceFilesRequest(fs.sources for fs in partition.field_sets)
    )

    # See `requirements_venv_pex` for how this will get wrapped in a `VenvPex`.
    requirements_pex_get = create_pex(
        **implicitly(
            RequirementsPexRequest(
                (fs.address for fs in partition.field_sets),
                hardcoded_interpreter_constraints=partition.interpreter_constraints,
            )
        )
    )

    mypy_pex_get = create_venv_pex(
        **implicitly(
            mypy.to_pex_request(
                interpreter_constraints=tool_interpreter_constraints,
                extra_requirements=first_party_plugins.requirement_strings,
            )
        )
    )

    (
        roots_sources,
        mypy_pex,
        requirements_pex,
    ) = await concurrently(
        roots_sources_get,
        mypy_pex_get,
        requirements_pex_get,
    )

    python_files = determine_python_files(roots_sources.snapshot.files)
    file_list_path = "__files.txt"
    file_list_digest_request = create_digest(
        CreateDigest([FileContent(file_list_path, "\n".join(python_files).encode())])
    )

    # This creates a venv with all the 3rd-party requirements used by the code. We tell MyPy to
    # use this venv by setting `--python-executable`. Note that this Python interpreter is
    # different than what we run MyPy with.
    #
    # We could have directly asked the `PexFromTargetsRequest` to return a `VenvPex`, rather than
    # `Pex`, but that would mean missing out on sharing a cache with other goals like `test` and
    # `run`.
    requirements_venv_pex_request = create_venv_pex(
        **implicitly(
            PexRequest(
                output_filename="requirements_venv.pex",
                internal_only=True,
                pex_path=[requirements_pex],
                interpreter_constraints=partition.interpreter_constraints,
            )
        )
    )
    closure_sources_get = prepare_python_sources(
        PythonSourceFilesRequest(partition.root_targets.closure()), **implicitly()
    )

    closure_sources, requirements_venv_pex, file_list_digest = await concurrently(
        closure_sources_get, requirements_venv_pex_request, file_list_digest_request
    )

    py_version = config_file.python_version_to_autoset(
        partition.interpreter_constraints, python_setup.interpreter_versions_universe
    )
    named_cache_dir = ".cache/mypy_cache"
    mypy_cache_dir = f"{named_cache_dir}/{sha256(build_root.path.encode()).hexdigest()}"
    if partition.resolve_description:
        mypy_cache_dir += f"/{partition.resolve_description}"
    run_cache_dir = ".tmp_cache/mypy_cache"
    argv = await _generate_argv(
        mypy,
        pex=mypy_pex,
        venv_python=requirements_venv_pex.python.argv0,
        cache_dir=run_cache_dir,
        file_list_path=file_list_path,
        python_version=py_version,
    )

    mypy_command = " ".join(shell_quote(arg) for arg in argv)

    if mypy.cache_mode == MyPyCacheMode.none:
        script_content = dedent(f"""\
            {mypy_command}
        """)
    else:
        sandbox_cache_dir = f"{run_cache_dir}/{py_version}"

        script_content = dedent(f"""\
            # We want to leverage the MyPy cache for fast incremental runs of MyPy.
            # Pants exposes "append_only_caches" we can leverage, but with the caveat
            # that it requires either only appending files, or multiprocess-safe access.
            #
            # MyPy guarantees neither, but there's workarounds!
            #
            # By default, MyPy uses 2 cache files per source file, which introduces a
            # whole slew of race conditions. We can minimize the race conditions by
            # using MyPy's SQLite cache. MyPy still has race conditions when using the
            # db, as it issues at least 2 single-row queries per source file at different
            # points in time (therefore SQLite's own safety guarantees don't apply).
            #
            # Our workaround depends on whether we can hardlink between the sandbox
            # and cache or not.
            #
            # If we can hardlink (this means the two sides of the link are on the
            # same filesystem), then after mypy runs, we hardlink from the sandbox
            # to a temp file in the named cache, then atomically rename it into place.
            #
            # If we can't hardlink, we resort to copying the result to a temp file
            # in the named cache, and finally doing an atomic mv from the tempfile
            # to the real one.
            #
            # In either case, the result is an atomic replacement of the "old" named
            # cache db, such that old references (via opened file descriptors) are
            # still valid, but new references use the new contents.
            #
            # There is a chance of multiple processes thrashing on the cache, leaving
            # it in a state that doesn't reflect reality at the current point in time,
            # and forcing other processes to do potentially done work. This strategy
            # still provides a net benefit because the cache is generally _mostly_
            # valid (it includes entries for the standard library, and 3rdparty deps,
            # among 1stparty sources), and even in the worst case
            # (every single file has changed) the overhead of missing the cache each
            # query should be small when compared to the work being done of typechecking.
            #
            # Lastly, we expect that since this is run through Pants which attempts
            # to partition MyPy runs by python version (which the DB is independent
            # for different versions) and uses a one-process-at-a-time daemon by default,
            # multiple MyPy processes operating on a single db cache should be rare.

            NAMED_CACHE_DIR="{mypy_cache_dir}/{py_version}"
            NAMED_CACHE_DB="$NAMED_CACHE_DIR/cache.db"
            SANDBOX_CACHE_DIR="{sandbox_cache_dir}"
            SANDBOX_CACHE_DB="$SANDBOX_CACHE_DIR/cache.db"

            {mkdir.path} -p "$NAMED_CACHE_DIR" > /dev/null 2>&1
            {mkdir.path} -p "$SANDBOX_CACHE_DIR" > /dev/null 2>&1
            {cp.path} "$NAMED_CACHE_DB" "$SANDBOX_CACHE_DB" > /dev/null 2>&1

            {mypy_command}
            EXIT_CODE=$?

            # Only update the cache on successful runs (exit code 0 or 1).
            # Exit code 2 indicates a crash or internal error, which may have
            # left the cache in an inconsistent state.
            # See https://github.com/python/mypy/issues/6003 for exit codes
            if [ $EXIT_CODE -le 1 ]; then
                if LN_TMP=$({mktemp.path} -u "$NAMED_CACHE_DB.tmp.XXXXXX") &&
                   {ln.path} "$SANDBOX_CACHE_DB" "$LN_TMP" > /dev/null 2>&1; then
                    {mv.path} "$LN_TMP" "$NAMED_CACHE_DB" > /dev/null 2>&1
                else
                    CP_TMP=$({mktemp.path} "$NAMED_CACHE_DB.tmp.XXXXXX") &&
                        {cp.path} "$SANDBOX_CACHE_DB" "$CP_TMP" > /dev/null 2>&1 &&
                        {mv.path} "$CP_TMP" "$NAMED_CACHE_DB" > /dev/null 2>&1
                fi
            fi

            exit $EXIT_CODE
        """)

    script_runner_digest = await create_digest(
        CreateDigest(
            [
                FileContent(
                    "__mypy_runner.sh",
                    script_content.encode(),
                    is_executable=True,
                )
            ]
        )
    )

    merged_input_files = await merge_digests(
        MergeDigests(
            [
                file_list_digest,
                first_party_plugins.sources_digest,
                closure_sources.source_files.snapshot.digest,
                requirements_venv_pex.digest,
                config_file.digest,
                script_runner_digest,
            ]
        )
    )

    env = {
        "PEX_EXTRA_SYS_PATH": ":".join(first_party_plugins.source_roots),
        "MYPYPATH": ":".join(closure_sources.source_roots),
        # Always emit colors to improve cache hit rates, the results are post-processed to match the
        # global setting
        "MYPY_FORCE_COLOR": "1",
        # Mypy needs to know the terminal so it can use appropriate escape sequences. ansi is a
        # reasonable lowest common denominator for the sort of escapes mypy uses (NB. TERM=xterm
        # uses some additional codes that colors.strip_color doesn't remove).
        "TERM": "ansi",
        # Force a fixed terminal width. This is effectively infinite, disabling mypy's
        # builtin truncation and line wrapping. Terminals do an acceptable job of soft-wrapping
        # diagnostic text and source code is typically already hard-wrapped to a limited width.
        # (Unique random number to make it easier to search for the source of this setting.)
        "MYPY_FORCE_TERMINAL_WIDTH": "642092230765939",
    }

    # Only use append_only_caches when caching is enabled
    if mypy.cache_mode == MyPyCacheMode.none:
        append_only_caches = {}
    else:
        append_only_caches = {"mypy_cache": named_cache_dir}

    process = await setup_venv_pex_process(
        VenvPexProcess(
            mypy_pex,
            input_digest=merged_input_files,
            extra_env=env,
            output_directories=(REPORT_DIR,),
            description=f"Run MyPy on {pluralize(len(python_files), 'file')}.",
            level=LogLevel.DEBUG,
            cache_scope=check_subsystem.default_process_cache_scope,
            append_only_caches=append_only_caches,
        ),
        **implicitly(),
    )
    process = dataclasses.replace(process, argv=("./__mypy_runner.sh",))
    result = await execute_process(process, **implicitly())
    report = await remove_prefix(RemovePrefix(result.output_digest, REPORT_DIR))
    return CheckResult.from_fallible_process_result(
        result,
        partition_description=partition.description(),
        report=report,
        output_simplifier=global_options.output_simplifier(),
    )


@rule(desc="Determine if necessary to partition MyPy input", level=LogLevel.DEBUG)
async def mypy_determine_partitions(
    request: MyPyRequest, mypy: MyPy, python_setup: PythonSetup
) -> MyPyPartitions:
    resolve_and_interpreter_constraints_to_field_sets = (
        _partition_by_interpreter_constraints_and_resolve(request.field_sets, python_setup)
    )
    coarsened_targets = await coarsened_targets_get(
        CoarsenedTargetsRequest(field_set.address for field_set in request.field_sets),
        **implicitly(),
    )
    coarsened_targets_by_address = coarsened_targets.by_address()

    return MyPyPartitions(
        MyPyPartition(
            FrozenOrderedSet(field_sets),
            CoarsenedTargets(
                OrderedSet(
                    coarsened_targets_by_address[field_set.address] for field_set in field_sets
                )
            ),
            resolve if len(python_setup.resolves) > 1 else None,
            interpreter_constraints or mypy.interpreter_constraints,
        )
        for (resolve, interpreter_constraints), field_sets in sorted(
            resolve_and_interpreter_constraints_to_field_sets.items()
        )
    )


@rule(desc="Typecheck using MyPy", level=LogLevel.DEBUG)
async def mypy_typecheck(request: MyPyRequest, mypy: MyPy) -> CheckResults:
    if mypy.skip:
        return CheckResults([], checker_name=request.tool_name)

    partitions = await mypy_determine_partitions(request, **implicitly())
    partitioned_results = await concurrently(
        mypy_typecheck_partition(partition, **implicitly()) for partition in partitions
    )
    return CheckResults(partitioned_results, checker_name=request.tool_name)


def rules():
    return [
        *collect_rules(),
        UnionRule(CheckRequest, MyPyRequest),
        *pex_from_targets.rules(),
    ]

1	# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2	# Licensed under the Apache License, Version 2.0 (see LICENSE).
3
4	from __future__ import annotations	4✔
5
6	import dataclasses	4✔
7	from collections.abc import Iterable	4✔
8	from dataclasses import dataclass	4✔
9	from hashlib import sha256	4✔
10	from textwrap import dedent # noqa: PNT20	4✔
11
12	import packaging	4✔
13
14	from pants.backend.python.subsystems.setup import PythonSetup	4✔
15	from pants.backend.python.typecheck.mypy.subsystem import (	4✔
16	MyPy,
17	MyPyCacheMode,
18	MyPyConfigFile,
19	MyPyFieldSet,
20	MyPyFirstPartyPlugins,
21	)
22	from pants.backend.python.util_rules import pex_from_targets	4✔
23	from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints	4✔
24	from pants.backend.python.util_rules.partition import (	4✔
25	_partition_by_interpreter_constraints_and_resolve,
26	)
27	from pants.backend.python.util_rules.pex import (	4✔
28	PexRequest,
29	VenvPex,
30	VenvPexProcess,
31	create_pex,
32	create_venv_pex,
33	determine_venv_pex_resolve_info,
34	setup_venv_pex_process,
35	)
36	from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest	4✔
37	from pants.backend.python.util_rules.python_sources import (	4✔
38	PythonSourceFilesRequest,
39	prepare_python_sources,
40	)
41	from pants.base.build_root import BuildRoot	4✔
42	from pants.core.goals.check import (	4✔
43	REPORT_DIR,
44	CheckRequest,
45	CheckResult,
46	CheckResults,
47	CheckSubsystem,
48	)
49	from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files	4✔
50	from pants.core.util_rules.system_binaries import (	4✔
51	CpBinary,
52	LnBinary,
53	MkdirBinary,
54	MktempBinary,
55	MvBinary,
56	)
57	from pants.engine.collection import Collection	4✔
58	from pants.engine.fs import CreateDigest, FileContent, MergeDigests, RemovePrefix	4✔
59	from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get	4✔
60	from pants.engine.intrinsics import create_digest, execute_process, merge_digests, remove_prefix	4✔
61	from pants.engine.rules import collect_rules, concurrently, implicitly, rule	4✔
62	from pants.engine.target import CoarsenedTargets, CoarsenedTargetsRequest	4✔
63	from pants.engine.unions import UnionRule	4✔
64	from pants.option.global_options import GlobalOptions	4✔
65	from pants.util.logging import LogLevel	4✔
66	from pants.util.ordered_set import FrozenOrderedSet, OrderedSet	4✔
67	from pants.util.strutil import pluralize, shell_quote	4✔
68
69
70	@dataclass(frozen=True)	4✔
71	class MyPyPartition:	4✔
72	field_sets: FrozenOrderedSet[MyPyFieldSet]	4✔
73	root_targets: CoarsenedTargets	4✔
74	resolve_description: str \| None	4✔
75	interpreter_constraints: InterpreterConstraints	4✔
76
77	def description(self) -> str:	4✔
78	ics = str(sorted(str(c) for c in self.interpreter_constraints))	3✔
79	return f"{self.resolve_description}, {ics}" if self.resolve_description else ics	3✔
80
81
82	class MyPyPartitions(Collection[MyPyPartition]):	4✔
83	pass	4✔
84
85
86	class MyPyRequest(CheckRequest):	4✔
87	field_set_type = MyPyFieldSet	4✔
88	tool_name = MyPy.options_scope	4✔
89
90
91	def _get_cache_args(	4✔
92	mypy_version: packaging.version.Version,
93	python_version: str \| None,
94	cache_mode: MyPyCacheMode,
95	cache_dir: str,
96	) -> tuple[str, ...]:
97	if (	4✔
98	mypy_version > packaging.version.Version("0.700")
99	and python_version is not None
100	and cache_mode == MyPyCacheMode.sqlite
101	):
102	return (	4✔
103	# Skip mtime checks because we don't propagate mtime when materializing the
104	# sandbox, so the mtime checks will always fail otherwise.
105	"--skip-cache-mtime-check",
106	# See "__mypy_runner.sh" below for explanation
107	"--sqlite-cache", # Added in v 0.660
108	"--cache-dir",
109	cache_dir,
110	)
111	else:
112	return ("--cache-dir=/dev/null",)	1✔
113
114
115	async def _generate_argv(	4✔
116	mypy: MyPy,
117	*,
118	pex: VenvPex,
119	cache_dir: str,
120	venv_python: str,
121	file_list_path: str,
122	python_version: str \| None,
123	) -> tuple[str, ...]:
124	args = [pex.pex.argv0, f"--python-executable={venv_python}", *mypy.args]	3✔
125	if mypy.config:	3✔
126	args.append(f"--config-file={mypy.config}")	1✔
127	if python_version:	3✔
128	args.append(f"--python-version={python_version}")	3✔
129
130	mypy_pex_info = await determine_venv_pex_resolve_info(pex)	3✔
131	mypy_info = mypy_pex_info.find("mypy")	3✔
132	assert mypy_info is not None	3✔
133	args.extend(_get_cache_args(mypy_info.version, python_version, mypy.cache_mode, cache_dir))	3✔
134	args.append(f"@{file_list_path}")	3✔
135	return tuple(args)	3✔
136
137
138	def determine_python_files(files: Iterable[str]) -> tuple[str, ...]:	4✔
139	"""We run over all .py and .pyi files, but .pyi files take precedence.
140
141	MyPy will error if we say to run over the same module with both its .py and .pyi files, so we
142	must be careful to only use the .pyi stub.
143	"""
144	result: OrderedSet[str] = OrderedSet()	4✔
145	for f in files:	4✔
146	if f.endswith(".pyi"):	4✔
147	py_file = f[:-1] # That is, strip the `.pyi` suffix to be `.py`.	1✔
148	result.discard(py_file)	1✔
149	result.add(f)	1✔
150	elif f.endswith(".py"):	4✔
151	pyi_file = f + "i"	4✔
152	if pyi_file not in result:	4✔
153	result.add(f)	4✔
154	else:
155	result.add(f)	1✔
156
157	return tuple(result)	4✔
158
159
160	@rule	4✔
161	async def mypy_typecheck_partition(	4✔
162	partition: MyPyPartition,
163	config_file: MyPyConfigFile,
164	first_party_plugins: MyPyFirstPartyPlugins,
165	build_root: BuildRoot,
166	mypy: MyPy,
167	check_subsystem: CheckSubsystem,
168	python_setup: PythonSetup,
169	mkdir: MkdirBinary,
170	mktemp: MktempBinary,
171	cp: CpBinary,
172	mv: MvBinary,
173	ln: LnBinary,
174	global_options: GlobalOptions,
175	) -> CheckResult:
176	# MyPy requires 3.5+ to run, but uses the typed-ast library to work with 2.7, 3.4, 3.5, 3.6,
177	# and 3.7. However, typed-ast does not understand 3.8+, so instead we must run MyPy with
178	# Python 3.8+ when relevant. We only do this if <3.8 can't be used, as we don't want a
179	# loose requirement like `>=3.6` to result in requiring Python 3.8+, which would error if
180	# 3.8+ is not installed on the machine.
181	tool_interpreter_constraints = (	3✔
182	partition.interpreter_constraints
183	if (
184	mypy.options.is_default("interpreter_constraints")
185	and partition.interpreter_constraints.requires_python38_or_newer(
186	python_setup.interpreter_versions_universe
187	)
188	)
189	else mypy.interpreter_constraints
190	)
191
192	roots_sources_get = determine_source_files(	3✔
193	SourceFilesRequest(fs.sources for fs in partition.field_sets)
194	)
195
196	# See `requirements_venv_pex` for how this will get wrapped in a `VenvPex`.
197	requirements_pex_get = create_pex(	3✔
198	**implicitly(
199	RequirementsPexRequest(
200	(fs.address for fs in partition.field_sets),
201	hardcoded_interpreter_constraints=partition.interpreter_constraints,
202	)
203	)
204	)
205
206	mypy_pex_get = create_venv_pex(	3✔
207	**implicitly(
208	mypy.to_pex_request(
209	interpreter_constraints=tool_interpreter_constraints,
210	extra_requirements=first_party_plugins.requirement_strings,
211	)
212	)
213	)
214
215	(	3✔
216	roots_sources,
217	mypy_pex,
218	requirements_pex,
219	) = await concurrently(
220	roots_sources_get,
221	mypy_pex_get,
222	requirements_pex_get,
223	)
224
225	python_files = determine_python_files(roots_sources.snapshot.files)	3✔
226	file_list_path = "__files.txt"	3✔
227	file_list_digest_request = create_digest(	3✔
228	CreateDigest([FileContent(file_list_path, "\n".join(python_files).encode())])
229	)
230
231	# This creates a venv with all the 3rd-party requirements used by the code. We tell MyPy to
232	# use this venv by setting `--python-executable`. Note that this Python interpreter is
233	# different than what we run MyPy with.
234	#
235	# We could have directly asked the `PexFromTargetsRequest` to return a `VenvPex`, rather than
236	# `Pex`, but that would mean missing out on sharing a cache with other goals like `test` and
237	# `run`.
238	requirements_venv_pex_request = create_venv_pex(	3✔
239	**implicitly(
240	PexRequest(
241	output_filename="requirements_venv.pex",
242	internal_only=True,
243	pex_path=[requirements_pex],
244	interpreter_constraints=partition.interpreter_constraints,
245	)
246	)
247	)
248	closure_sources_get = prepare_python_sources(	3✔
249	PythonSourceFilesRequest(partition.root_targets.closure()), **implicitly()
250	)
251
252	closure_sources, requirements_venv_pex, file_list_digest = await concurrently(	3✔
253	closure_sources_get, requirements_venv_pex_request, file_list_digest_request
254	)
255
256	py_version = config_file.python_version_to_autoset(	3✔
257	partition.interpreter_constraints, python_setup.interpreter_versions_universe
258	)
259	named_cache_dir = ".cache/mypy_cache"	3✔
260	mypy_cache_dir = f"{named_cache_dir}/{sha256(build_root.path.encode()).hexdigest()}"	3✔
261	if partition.resolve_description:	3✔
262	mypy_cache_dir += f"/{partition.resolve_description}"	1✔
263	run_cache_dir = ".tmp_cache/mypy_cache"	3✔
264	argv = await _generate_argv(	3✔
265	mypy,
266	pex=mypy_pex,
267	venv_python=requirements_venv_pex.python.argv0,
268	cache_dir=run_cache_dir,
269	file_list_path=file_list_path,
270	python_version=py_version,
271	)
272
273	mypy_command = " ".join(shell_quote(arg) for arg in argv)	3✔
274
275	if mypy.cache_mode == MyPyCacheMode.none:	3✔
NEW 276	script_content = dedent(f"""\	×
277	{mypy_command}
278	""")
279	else:
280	sandbox_cache_dir = f"{run_cache_dir}/{py_version}"	3✔
281
282	script_content = dedent(f"""\	3✔
283	# We want to leverage the MyPy cache for fast incremental runs of MyPy.
284	# Pants exposes "append_only_caches" we can leverage, but with the caveat
285	# that it requires either only appending files, or multiprocess-safe access.
286	#
287	# MyPy guarantees neither, but there's workarounds!
288	#
289	# By default, MyPy uses 2 cache files per source file, which introduces a
290	# whole slew of race conditions. We can minimize the race conditions by
291	# using MyPy's SQLite cache. MyPy still has race conditions when using the
292	# db, as it issues at least 2 single-row queries per source file at different
293	# points in time (therefore SQLite's own safety guarantees don't apply).
294	#
295	# Our workaround depends on whether we can hardlink between the sandbox
296	# and cache or not.
297	#
298	# If we can hardlink (this means the two sides of the link are on the
299	# same filesystem), then after mypy runs, we hardlink from the sandbox
300	# to a temp file in the named cache, then atomically rename it into place.
301	#
302	# If we can't hardlink, we resort to copying the result to a temp file
303	# in the named cache, and finally doing an atomic mv from the tempfile
304	# to the real one.
305	#
306	# In either case, the result is an atomic replacement of the "old" named
307	# cache db, such that old references (via opened file descriptors) are
308	# still valid, but new references use the new contents.
309	#
310	# There is a chance of multiple processes thrashing on the cache, leaving
311	# it in a state that doesn't reflect reality at the current point in time,
312	# and forcing other processes to do potentially done work. This strategy
313	# still provides a net benefit because the cache is generally _mostly_
314	# valid (it includes entries for the standard library, and 3rdparty deps,
315	# among 1stparty sources), and even in the worst case
316	# (every single file has changed) the overhead of missing the cache each
317	# query should be small when compared to the work being done of typechecking.
318	#
319	# Lastly, we expect that since this is run through Pants which attempts
320	# to partition MyPy runs by python version (which the DB is independent
321	# for different versions) and uses a one-process-at-a-time daemon by default,
322	# multiple MyPy processes operating on a single db cache should be rare.
323
324	NAMED_CACHE_DIR="{mypy_cache_dir}/{py_version}"
325	NAMED_CACHE_DB="$NAMED_CACHE_DIR/cache.db"
326	SANDBOX_CACHE_DIR="{sandbox_cache_dir}"
327	SANDBOX_CACHE_DB="$SANDBOX_CACHE_DIR/cache.db"
328
329	{mkdir.path} -p "$NAMED_CACHE_DIR" > /dev/null 2>&1
330	{mkdir.path} -p "$SANDBOX_CACHE_DIR" > /dev/null 2>&1
331	{cp.path} "$NAMED_CACHE_DB" "$SANDBOX_CACHE_DB" > /dev/null 2>&1
332
333	{mypy_command}
334	EXIT_CODE=$?
335
336	# Only update the cache on successful runs (exit code 0 or 1).
337	# Exit code 2 indicates a crash or internal error, which may have
338	# left the cache in an inconsistent state.
339	# See https://github.com/python/mypy/issues/6003 for exit codes
340	if [ $EXIT_CODE -le 1 ]; then
341	if LN_TMP=$({mktemp.path} -u "$NAMED_CACHE_DB.tmp.XXXXXX") &&
342	{ln.path} "$SANDBOX_CACHE_DB" "$LN_TMP" > /dev/null 2>&1; then
343	{mv.path} "$LN_TMP" "$NAMED_CACHE_DB" > /dev/null 2>&1
344	else
345	CP_TMP=$({mktemp.path} "$NAMED_CACHE_DB.tmp.XXXXXX") &&
346	{cp.path} "$SANDBOX_CACHE_DB" "$CP_TMP" > /dev/null 2>&1 &&
347	{mv.path} "$CP_TMP" "$NAMED_CACHE_DB" > /dev/null 2>&1
348	fi
349	fi
350
351	exit $EXIT_CODE
352	""")
353
354	script_runner_digest = await create_digest(	3✔
355	CreateDigest(
356	[
357	FileContent(
358	"__mypy_runner.sh",
359	script_content.encode(),
360	is_executable=True,
361	)
362	]
363	)
364	)
365
366	merged_input_files = await merge_digests(	3✔
367	MergeDigests(
368	[
369	file_list_digest,
370	first_party_plugins.sources_digest,
371	closure_sources.source_files.snapshot.digest,
372	requirements_venv_pex.digest,
373	config_file.digest,
374	script_runner_digest,
375	]
376	)
377	)
378
379	env = {	3✔
380	"PEX_EXTRA_SYS_PATH": ":".join(first_party_plugins.source_roots),
381	"MYPYPATH": ":".join(closure_sources.source_roots),
382	# Always emit colors to improve cache hit rates, the results are post-processed to match the
383	# global setting
384	"MYPY_FORCE_COLOR": "1",
385	# Mypy needs to know the terminal so it can use appropriate escape sequences. ansi is a
386	# reasonable lowest common denominator for the sort of escapes mypy uses (NB. TERM=xterm
387	# uses some additional codes that colors.strip_color doesn't remove).
388	"TERM": "ansi",
389	# Force a fixed terminal width. This is effectively infinite, disabling mypy's
390	# builtin truncation and line wrapping. Terminals do an acceptable job of soft-wrapping
391	# diagnostic text and source code is typically already hard-wrapped to a limited width.
392	# (Unique random number to make it easier to search for the source of this setting.)
393	"MYPY_FORCE_TERMINAL_WIDTH": "642092230765939",
394	}
395
396	# Only use append_only_caches when caching is enabled
397	if mypy.cache_mode == MyPyCacheMode.none:	3✔
NEW 398	append_only_caches = {}	×
399	else:
400	append_only_caches = {"mypy_cache": named_cache_dir}	3✔
401
402	process = await setup_venv_pex_process(	3✔
403	VenvPexProcess(
404	mypy_pex,
405	input_digest=merged_input_files,
406	extra_env=env,
407	output_directories=(REPORT_DIR,),
408	description=f"Run MyPy on {pluralize(len(python_files), 'file')}.",
409	level=LogLevel.DEBUG,
410	cache_scope=check_subsystem.default_process_cache_scope,
411	append_only_caches=append_only_caches,
412	),
413	**implicitly(),
414	)
415	process = dataclasses.replace(process, argv=("./__mypy_runner.sh",))	3✔
416	result = await execute_process(process, **implicitly())	3✔
417	report = await remove_prefix(RemovePrefix(result.output_digest, REPORT_DIR))	3✔
418	return CheckResult.from_fallible_process_result(	3✔
419	result,
420	partition_description=partition.description(),
421	report=report,
422	output_simplifier=global_options.output_simplifier(),
423	)
424
425
426	@rule(desc="Determine if necessary to partition MyPy input", level=LogLevel.DEBUG)	4✔
427	async def mypy_determine_partitions(	4✔
428	request: MyPyRequest, mypy: MyPy, python_setup: PythonSetup
429	) -> MyPyPartitions:
430	resolve_and_interpreter_constraints_to_field_sets = (	3✔
431	_partition_by_interpreter_constraints_and_resolve(request.field_sets, python_setup)
432	)
433	coarsened_targets = await coarsened_targets_get(	3✔
434	CoarsenedTargetsRequest(field_set.address for field_set in request.field_sets),
435	**implicitly(),
436	)
437	coarsened_targets_by_address = coarsened_targets.by_address()	3✔
438
439	return MyPyPartitions(	3✔
440	MyPyPartition(
441	FrozenOrderedSet(field_sets),
442	CoarsenedTargets(
443	OrderedSet(
444	coarsened_targets_by_address[field_set.address] for field_set in field_sets
445	)
446	),
447	resolve if len(python_setup.resolves) > 1 else None,
448	interpreter_constraints or mypy.interpreter_constraints,
449	)
450	for (resolve, interpreter_constraints), field_sets in sorted(
451	resolve_and_interpreter_constraints_to_field_sets.items()
452	)
453	)
454
455
456	@rule(desc="Typecheck using MyPy", level=LogLevel.DEBUG)	4✔
457	async def mypy_typecheck(request: MyPyRequest, mypy: MyPy) -> CheckResults:	4✔
458	if mypy.skip:	3✔
459	return CheckResults([], checker_name=request.tool_name)	1✔
460
461	partitions = await mypy_determine_partitions(request, **implicitly())	3✔
462	partitioned_results = await concurrently(	3✔
463	mypy_typecheck_partition(partition, **implicitly()) for partition in partitions
464	)
465	return CheckResults(partitioned_results, checker_name=request.tool_name)	3✔
466
467
468	def rules():	4✔
469	return [	3✔
470	*collect_rules(),
471	UnionRule(CheckRequest, MyPyRequest),
472	*pex_from_targets.rules(),
473	]

pantsbuild / pants / 22507600930

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous