• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 22507600930

27 Feb 2026 11:18PM UTC coverage: 92.932%. First build
22507600930

Pull #23146

github

web-flow
Merge c3dc555a3 into 27363b1a1
Pull Request #23146: option to disable mypy caching

41 of 43 new or added lines in 3 files covered. (95.35%)

90944 of 97861 relevant lines covered (92.93%)

4.06 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.39
/src/python/pants/backend/python/typecheck/mypy/rules.py
1
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
4✔
5

6
import dataclasses
4✔
7
from collections.abc import Iterable
4✔
8
from dataclasses import dataclass
4✔
9
from hashlib import sha256
4✔
10
from textwrap import dedent  # noqa: PNT20
4✔
11

12
import packaging
4✔
13

14
from pants.backend.python.subsystems.setup import PythonSetup
4✔
15
from pants.backend.python.typecheck.mypy.subsystem import (
4✔
16
    MyPy,
17
    MyPyCacheMode,
18
    MyPyConfigFile,
19
    MyPyFieldSet,
20
    MyPyFirstPartyPlugins,
21
)
22
from pants.backend.python.util_rules import pex_from_targets
4✔
23
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
4✔
24
from pants.backend.python.util_rules.partition import (
4✔
25
    _partition_by_interpreter_constraints_and_resolve,
26
)
27
from pants.backend.python.util_rules.pex import (
4✔
28
    PexRequest,
29
    VenvPex,
30
    VenvPexProcess,
31
    create_pex,
32
    create_venv_pex,
33
    determine_venv_pex_resolve_info,
34
    setup_venv_pex_process,
35
)
36
from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest
4✔
37
from pants.backend.python.util_rules.python_sources import (
4✔
38
    PythonSourceFilesRequest,
39
    prepare_python_sources,
40
)
41
from pants.base.build_root import BuildRoot
4✔
42
from pants.core.goals.check import (
4✔
43
    REPORT_DIR,
44
    CheckRequest,
45
    CheckResult,
46
    CheckResults,
47
    CheckSubsystem,
48
)
49
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
4✔
50
from pants.core.util_rules.system_binaries import (
4✔
51
    CpBinary,
52
    LnBinary,
53
    MkdirBinary,
54
    MktempBinary,
55
    MvBinary,
56
)
57
from pants.engine.collection import Collection
4✔
58
from pants.engine.fs import CreateDigest, FileContent, MergeDigests, RemovePrefix
4✔
59
from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get
4✔
60
from pants.engine.intrinsics import create_digest, execute_process, merge_digests, remove_prefix
4✔
61
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
4✔
62
from pants.engine.target import CoarsenedTargets, CoarsenedTargetsRequest
4✔
63
from pants.engine.unions import UnionRule
4✔
64
from pants.option.global_options import GlobalOptions
4✔
65
from pants.util.logging import LogLevel
4✔
66
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet
4✔
67
from pants.util.strutil import pluralize, shell_quote
4✔
68

69

70
@dataclass(frozen=True)
4✔
71
class MyPyPartition:
4✔
72
    field_sets: FrozenOrderedSet[MyPyFieldSet]
4✔
73
    root_targets: CoarsenedTargets
4✔
74
    resolve_description: str | None
4✔
75
    interpreter_constraints: InterpreterConstraints
4✔
76

77
    def description(self) -> str:
4✔
78
        ics = str(sorted(str(c) for c in self.interpreter_constraints))
3✔
79
        return f"{self.resolve_description}, {ics}" if self.resolve_description else ics
3✔
80

81

82
class MyPyPartitions(Collection[MyPyPartition]):
4✔
83
    pass
4✔
84

85

86
class MyPyRequest(CheckRequest):
4✔
87
    field_set_type = MyPyFieldSet
4✔
88
    tool_name = MyPy.options_scope
4✔
89

90

91
def _get_cache_args(
4✔
92
    mypy_version: packaging.version.Version,
93
    python_version: str | None,
94
    cache_mode: MyPyCacheMode,
95
    cache_dir: str,
96
) -> tuple[str, ...]:
97
    if (
4✔
98
        mypy_version > packaging.version.Version("0.700")
99
        and python_version is not None
100
        and cache_mode == MyPyCacheMode.sqlite
101
    ):
102
        return (
4✔
103
            # Skip mtime checks because we don't propagate mtime when materializing the
104
            # sandbox, so the mtime checks will always fail otherwise.
105
            "--skip-cache-mtime-check",
106
            # See "__mypy_runner.sh" below for explanation
107
            "--sqlite-cache",  # Added in v 0.660
108
            "--cache-dir",
109
            cache_dir,
110
        )
111
    else:
112
        return ("--cache-dir=/dev/null",)
1✔
113

114

115
async def _generate_argv(
4✔
116
    mypy: MyPy,
117
    *,
118
    pex: VenvPex,
119
    cache_dir: str,
120
    venv_python: str,
121
    file_list_path: str,
122
    python_version: str | None,
123
) -> tuple[str, ...]:
124
    args = [pex.pex.argv0, f"--python-executable={venv_python}", *mypy.args]
3✔
125
    if mypy.config:
3✔
126
        args.append(f"--config-file={mypy.config}")
1✔
127
    if python_version:
3✔
128
        args.append(f"--python-version={python_version}")
3✔
129

130
    mypy_pex_info = await determine_venv_pex_resolve_info(pex)
3✔
131
    mypy_info = mypy_pex_info.find("mypy")
3✔
132
    assert mypy_info is not None
3✔
133
    args.extend(_get_cache_args(mypy_info.version, python_version, mypy.cache_mode, cache_dir))
3✔
134
    args.append(f"@{file_list_path}")
3✔
135
    return tuple(args)
3✔
136

137

138
def determine_python_files(files: Iterable[str]) -> tuple[str, ...]:
4✔
139
    """We run over all .py and .pyi files, but .pyi files take precedence.
140

141
    MyPy will error if we say to run over the same module with both its .py and .pyi files, so we
142
    must be careful to only use the .pyi stub.
143
    """
144
    result: OrderedSet[str] = OrderedSet()
4✔
145
    for f in files:
4✔
146
        if f.endswith(".pyi"):
4✔
147
            py_file = f[:-1]  # That is, strip the `.pyi` suffix to be `.py`.
1✔
148
            result.discard(py_file)
1✔
149
            result.add(f)
1✔
150
        elif f.endswith(".py"):
4✔
151
            pyi_file = f + "i"
4✔
152
            if pyi_file not in result:
4✔
153
                result.add(f)
4✔
154
        else:
155
            result.add(f)
1✔
156

157
    return tuple(result)
4✔
158

159

160
@rule
4✔
161
async def mypy_typecheck_partition(
4✔
162
    partition: MyPyPartition,
163
    config_file: MyPyConfigFile,
164
    first_party_plugins: MyPyFirstPartyPlugins,
165
    build_root: BuildRoot,
166
    mypy: MyPy,
167
    check_subsystem: CheckSubsystem,
168
    python_setup: PythonSetup,
169
    mkdir: MkdirBinary,
170
    mktemp: MktempBinary,
171
    cp: CpBinary,
172
    mv: MvBinary,
173
    ln: LnBinary,
174
    global_options: GlobalOptions,
175
) -> CheckResult:
176
    # MyPy requires 3.5+ to run, but uses the typed-ast library to work with 2.7, 3.4, 3.5, 3.6,
177
    # and 3.7. However, typed-ast does not understand 3.8+, so instead we must run MyPy with
178
    # Python 3.8+ when relevant. We only do this if <3.8 can't be used, as we don't want a
179
    # loose requirement like `>=3.6` to result in requiring Python 3.8+, which would error if
180
    # 3.8+ is not installed on the machine.
181
    tool_interpreter_constraints = (
3✔
182
        partition.interpreter_constraints
183
        if (
184
            mypy.options.is_default("interpreter_constraints")
185
            and partition.interpreter_constraints.requires_python38_or_newer(
186
                python_setup.interpreter_versions_universe
187
            )
188
        )
189
        else mypy.interpreter_constraints
190
    )
191

192
    roots_sources_get = determine_source_files(
3✔
193
        SourceFilesRequest(fs.sources for fs in partition.field_sets)
194
    )
195

196
    # See `requirements_venv_pex` for how this will get wrapped in a `VenvPex`.
197
    requirements_pex_get = create_pex(
3✔
198
        **implicitly(
199
            RequirementsPexRequest(
200
                (fs.address for fs in partition.field_sets),
201
                hardcoded_interpreter_constraints=partition.interpreter_constraints,
202
            )
203
        )
204
    )
205

206
    mypy_pex_get = create_venv_pex(
3✔
207
        **implicitly(
208
            mypy.to_pex_request(
209
                interpreter_constraints=tool_interpreter_constraints,
210
                extra_requirements=first_party_plugins.requirement_strings,
211
            )
212
        )
213
    )
214

215
    (
3✔
216
        roots_sources,
217
        mypy_pex,
218
        requirements_pex,
219
    ) = await concurrently(
220
        roots_sources_get,
221
        mypy_pex_get,
222
        requirements_pex_get,
223
    )
224

225
    python_files = determine_python_files(roots_sources.snapshot.files)
3✔
226
    file_list_path = "__files.txt"
3✔
227
    file_list_digest_request = create_digest(
3✔
228
        CreateDigest([FileContent(file_list_path, "\n".join(python_files).encode())])
229
    )
230

231
    # This creates a venv with all the 3rd-party requirements used by the code. We tell MyPy to
232
    # use this venv by setting `--python-executable`. Note that this Python interpreter is
233
    # different than what we run MyPy with.
234
    #
235
    # We could have directly asked the `PexFromTargetsRequest` to return a `VenvPex`, rather than
236
    # `Pex`, but that would mean missing out on sharing a cache with other goals like `test` and
237
    # `run`.
238
    requirements_venv_pex_request = create_venv_pex(
3✔
239
        **implicitly(
240
            PexRequest(
241
                output_filename="requirements_venv.pex",
242
                internal_only=True,
243
                pex_path=[requirements_pex],
244
                interpreter_constraints=partition.interpreter_constraints,
245
            )
246
        )
247
    )
248
    closure_sources_get = prepare_python_sources(
3✔
249
        PythonSourceFilesRequest(partition.root_targets.closure()), **implicitly()
250
    )
251

252
    closure_sources, requirements_venv_pex, file_list_digest = await concurrently(
3✔
253
        closure_sources_get, requirements_venv_pex_request, file_list_digest_request
254
    )
255

256
    py_version = config_file.python_version_to_autoset(
3✔
257
        partition.interpreter_constraints, python_setup.interpreter_versions_universe
258
    )
259
    named_cache_dir = ".cache/mypy_cache"
3✔
260
    mypy_cache_dir = f"{named_cache_dir}/{sha256(build_root.path.encode()).hexdigest()}"
3✔
261
    if partition.resolve_description:
3✔
262
        mypy_cache_dir += f"/{partition.resolve_description}"
1✔
263
    run_cache_dir = ".tmp_cache/mypy_cache"
3✔
264
    argv = await _generate_argv(
3✔
265
        mypy,
266
        pex=mypy_pex,
267
        venv_python=requirements_venv_pex.python.argv0,
268
        cache_dir=run_cache_dir,
269
        file_list_path=file_list_path,
270
        python_version=py_version,
271
    )
272

273
    mypy_command = " ".join(shell_quote(arg) for arg in argv)
3✔
274

275
    if mypy.cache_mode == MyPyCacheMode.none:
3✔
NEW
276
        script_content = dedent(f"""\
×
277
            {mypy_command}
278
        """)
279
    else:
280
        sandbox_cache_dir = f"{run_cache_dir}/{py_version}"
3✔
281

282
        script_content = dedent(f"""\
3✔
283
            # We want to leverage the MyPy cache for fast incremental runs of MyPy.
284
            # Pants exposes "append_only_caches" we can leverage, but with the caveat
285
            # that it requires either only appending files, or multiprocess-safe access.
286
            #
287
            # MyPy guarantees neither, but there's workarounds!
288
            #
289
            # By default, MyPy uses 2 cache files per source file, which introduces a
290
            # whole slew of race conditions. We can minimize the race conditions by
291
            # using MyPy's SQLite cache. MyPy still has race conditions when using the
292
            # db, as it issues at least 2 single-row queries per source file at different
293
            # points in time (therefore SQLite's own safety guarantees don't apply).
294
            #
295
            # Our workaround depends on whether we can hardlink between the sandbox
296
            # and cache or not.
297
            #
298
            # If we can hardlink (this means the two sides of the link are on the
299
            # same filesystem), then after mypy runs, we hardlink from the sandbox
300
            # to a temp file in the named cache, then atomically rename it into place.
301
            #
302
            # If we can't hardlink, we resort to copying the result to a temp file
303
            # in the named cache, and finally doing an atomic mv from the tempfile
304
            # to the real one.
305
            #
306
            # In either case, the result is an atomic replacement of the "old" named
307
            # cache db, such that old references (via opened file descriptors) are
308
            # still valid, but new references use the new contents.
309
            #
310
            # There is a chance of multiple processes thrashing on the cache, leaving
311
            # it in a state that doesn't reflect reality at the current point in time,
312
            # and forcing other processes to do potentially done work. This strategy
313
            # still provides a net benefit because the cache is generally _mostly_
314
            # valid (it includes entries for the standard library, and 3rdparty deps,
315
            # among 1stparty sources), and even in the worst case
316
            # (every single file has changed) the overhead of missing the cache each
317
            # query should be small when compared to the work being done of typechecking.
318
            #
319
            # Lastly, we expect that since this is run through Pants which attempts
320
            # to partition MyPy runs by python version (which the DB is independent
321
            # for different versions) and uses a one-process-at-a-time daemon by default,
322
            # multiple MyPy processes operating on a single db cache should be rare.
323

324
            NAMED_CACHE_DIR="{mypy_cache_dir}/{py_version}"
325
            NAMED_CACHE_DB="$NAMED_CACHE_DIR/cache.db"
326
            SANDBOX_CACHE_DIR="{sandbox_cache_dir}"
327
            SANDBOX_CACHE_DB="$SANDBOX_CACHE_DIR/cache.db"
328

329
            {mkdir.path} -p "$NAMED_CACHE_DIR" > /dev/null 2>&1
330
            {mkdir.path} -p "$SANDBOX_CACHE_DIR" > /dev/null 2>&1
331
            {cp.path} "$NAMED_CACHE_DB" "$SANDBOX_CACHE_DB" > /dev/null 2>&1
332

333
            {mypy_command}
334
            EXIT_CODE=$?
335

336
            # Only update the cache on successful runs (exit code 0 or 1).
337
            # Exit code 2 indicates a crash or internal error, which may have
338
            # left the cache in an inconsistent state.
339
            # See https://github.com/python/mypy/issues/6003 for exit codes
340
            if [ $EXIT_CODE -le 1 ]; then
341
                if LN_TMP=$({mktemp.path} -u "$NAMED_CACHE_DB.tmp.XXXXXX") &&
342
                   {ln.path} "$SANDBOX_CACHE_DB" "$LN_TMP" > /dev/null 2>&1; then
343
                    {mv.path} "$LN_TMP" "$NAMED_CACHE_DB" > /dev/null 2>&1
344
                else
345
                    CP_TMP=$({mktemp.path} "$NAMED_CACHE_DB.tmp.XXXXXX") &&
346
                        {cp.path} "$SANDBOX_CACHE_DB" "$CP_TMP" > /dev/null 2>&1 &&
347
                        {mv.path} "$CP_TMP" "$NAMED_CACHE_DB" > /dev/null 2>&1
348
                fi
349
            fi
350

351
            exit $EXIT_CODE
352
        """)
353

354
    script_runner_digest = await create_digest(
3✔
355
        CreateDigest(
356
            [
357
                FileContent(
358
                    "__mypy_runner.sh",
359
                    script_content.encode(),
360
                    is_executable=True,
361
                )
362
            ]
363
        )
364
    )
365

366
    merged_input_files = await merge_digests(
3✔
367
        MergeDigests(
368
            [
369
                file_list_digest,
370
                first_party_plugins.sources_digest,
371
                closure_sources.source_files.snapshot.digest,
372
                requirements_venv_pex.digest,
373
                config_file.digest,
374
                script_runner_digest,
375
            ]
376
        )
377
    )
378

379
    env = {
3✔
380
        "PEX_EXTRA_SYS_PATH": ":".join(first_party_plugins.source_roots),
381
        "MYPYPATH": ":".join(closure_sources.source_roots),
382
        # Always emit colors to improve cache hit rates, the results are post-processed to match the
383
        # global setting
384
        "MYPY_FORCE_COLOR": "1",
385
        # Mypy needs to know the terminal so it can use appropriate escape sequences. ansi is a
386
        # reasonable lowest common denominator for the sort of escapes mypy uses (NB. TERM=xterm
387
        # uses some additional codes that colors.strip_color doesn't remove).
388
        "TERM": "ansi",
389
        # Force a fixed terminal width. This is effectively infinite, disabling mypy's
390
        # builtin truncation and line wrapping. Terminals do an acceptable job of soft-wrapping
391
        # diagnostic text and source code is typically already hard-wrapped to a limited width.
392
        # (Unique random number to make it easier to search for the source of this setting.)
393
        "MYPY_FORCE_TERMINAL_WIDTH": "642092230765939",
394
    }
395

396
    # Only use append_only_caches when caching is enabled
397
    if mypy.cache_mode == MyPyCacheMode.none:
3✔
NEW
398
        append_only_caches = {}
×
399
    else:
400
        append_only_caches = {"mypy_cache": named_cache_dir}
3✔
401

402
    process = await setup_venv_pex_process(
3✔
403
        VenvPexProcess(
404
            mypy_pex,
405
            input_digest=merged_input_files,
406
            extra_env=env,
407
            output_directories=(REPORT_DIR,),
408
            description=f"Run MyPy on {pluralize(len(python_files), 'file')}.",
409
            level=LogLevel.DEBUG,
410
            cache_scope=check_subsystem.default_process_cache_scope,
411
            append_only_caches=append_only_caches,
412
        ),
413
        **implicitly(),
414
    )
415
    process = dataclasses.replace(process, argv=("./__mypy_runner.sh",))
3✔
416
    result = await execute_process(process, **implicitly())
3✔
417
    report = await remove_prefix(RemovePrefix(result.output_digest, REPORT_DIR))
3✔
418
    return CheckResult.from_fallible_process_result(
3✔
419
        result,
420
        partition_description=partition.description(),
421
        report=report,
422
        output_simplifier=global_options.output_simplifier(),
423
    )
424

425

426
@rule(desc="Determine if necessary to partition MyPy input", level=LogLevel.DEBUG)
4✔
427
async def mypy_determine_partitions(
4✔
428
    request: MyPyRequest, mypy: MyPy, python_setup: PythonSetup
429
) -> MyPyPartitions:
430
    resolve_and_interpreter_constraints_to_field_sets = (
3✔
431
        _partition_by_interpreter_constraints_and_resolve(request.field_sets, python_setup)
432
    )
433
    coarsened_targets = await coarsened_targets_get(
3✔
434
        CoarsenedTargetsRequest(field_set.address for field_set in request.field_sets),
435
        **implicitly(),
436
    )
437
    coarsened_targets_by_address = coarsened_targets.by_address()
3✔
438

439
    return MyPyPartitions(
3✔
440
        MyPyPartition(
441
            FrozenOrderedSet(field_sets),
442
            CoarsenedTargets(
443
                OrderedSet(
444
                    coarsened_targets_by_address[field_set.address] for field_set in field_sets
445
                )
446
            ),
447
            resolve if len(python_setup.resolves) > 1 else None,
448
            interpreter_constraints or mypy.interpreter_constraints,
449
        )
450
        for (resolve, interpreter_constraints), field_sets in sorted(
451
            resolve_and_interpreter_constraints_to_field_sets.items()
452
        )
453
    )
454

455

456
@rule(desc="Typecheck using MyPy", level=LogLevel.DEBUG)
4✔
457
async def mypy_typecheck(request: MyPyRequest, mypy: MyPy) -> CheckResults:
4✔
458
    if mypy.skip:
3✔
459
        return CheckResults([], checker_name=request.tool_name)
1✔
460

461
    partitions = await mypy_determine_partitions(request, **implicitly())
3✔
462
    partitioned_results = await concurrently(
3✔
463
        mypy_typecheck_partition(partition, **implicitly()) for partition in partitions
464
    )
465
    return CheckResults(partitioned_results, checker_name=request.tool_name)
3✔
466

467

468
def rules():
4✔
469
    return [
3✔
470
        *collect_rules(),
471
        UnionRule(CheckRequest, MyPyRequest),
472
        *pex_from_targets.rules(),
473
    ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc