• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 18562858873

16 Oct 2025 01:26PM UTC coverage: 80.266% (-0.001%) from 80.267%
18562858873

Pull #22753

github

web-flow
Merge 87039b5c3 into bd4607b86
Pull Request #22753: revert PEX_EXTRA_SYS_PATH for mypy changes

1 of 2 new or added lines in 1 file covered. (50.0%)

2 existing lines in 2 files now uncovered.

77230 of 96218 relevant lines covered (80.27%)

3.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

56.78
/src/python/pants/backend/python/typecheck/mypy/rules.py
1
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
4✔
5

6
import dataclasses
4✔
7
import itertools
4✔
8
from collections.abc import Iterable
4✔
9
from dataclasses import dataclass
4✔
10
from hashlib import sha256
4✔
11
from textwrap import dedent  # noqa: PNT20
4✔
12

13
import packaging
4✔
14

15
from pants.backend.python.subsystems.setup import PythonSetup
4✔
16
from pants.backend.python.typecheck.mypy.subsystem import (
4✔
17
    MyPy,
18
    MyPyConfigFile,
19
    MyPyFieldSet,
20
    MyPyFirstPartyPlugins,
21
)
22
from pants.backend.python.util_rules import pex_from_targets
4✔
23
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
4✔
24
from pants.backend.python.util_rules.partition import (
4✔
25
    _partition_by_interpreter_constraints_and_resolve,
26
)
27
from pants.backend.python.util_rules.pex import (
4✔
28
    PexRequest,
29
    VenvPex,
30
    VenvPexProcess,
31
    create_pex,
32
    create_venv_pex,
33
    determine_venv_pex_resolve_info,
34
    setup_venv_pex_process,
35
)
36
from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest
4✔
37
from pants.backend.python.util_rules.python_sources import (
4✔
38
    PythonSourceFilesRequest,
39
    prepare_python_sources,
40
)
41
from pants.base.build_root import BuildRoot
4✔
42
from pants.core.goals.check import REPORT_DIR, CheckRequest, CheckResult, CheckResults
4✔
43
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
4✔
44
from pants.core.util_rules.system_binaries import (
4✔
45
    CpBinary,
46
    LnBinary,
47
    MkdirBinary,
48
    MktempBinary,
49
    MvBinary,
50
)
51
from pants.engine.collection import Collection
4✔
52
from pants.engine.fs import CreateDigest, FileContent, MergeDigests, RemovePrefix
4✔
53
from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get
4✔
54
from pants.engine.intrinsics import create_digest, execute_process, merge_digests, remove_prefix
4✔
55
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
4✔
56
from pants.engine.target import CoarsenedTargets, CoarsenedTargetsRequest
4✔
57
from pants.engine.unions import UnionRule
4✔
58
from pants.option.global_options import GlobalOptions
4✔
59
from pants.util.logging import LogLevel
4✔
60
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet
4✔
61
from pants.util.strutil import pluralize, shell_quote
4✔
62

63

64
@dataclass(frozen=True)
4✔
65
class MyPyPartition:
4✔
66
    field_sets: FrozenOrderedSet[MyPyFieldSet]
4✔
67
    root_targets: CoarsenedTargets
4✔
68
    resolve_description: str | None
4✔
69
    interpreter_constraints: InterpreterConstraints
4✔
70

71
    def description(self) -> str:
4✔
72
        ics = str(sorted(str(c) for c in self.interpreter_constraints))
1✔
73
        return f"{self.resolve_description}, {ics}" if self.resolve_description else ics
1✔
74

75

76
class MyPyPartitions(Collection[MyPyPartition]):
4✔
77
    pass
4✔
78

79

80
class MyPyRequest(CheckRequest):
4✔
81
    field_set_type = MyPyFieldSet
4✔
82
    tool_name = MyPy.options_scope
4✔
83

84

85
async def _generate_argv(
4✔
86
    mypy: MyPy,
87
    *,
88
    pex: VenvPex,
89
    cache_dir: str,
90
    venv_python: str,
91
    file_list_path: str,
92
    python_version: str | None,
93
) -> tuple[str, ...]:
94
    args = [pex.pex.argv0, f"--python-executable={venv_python}", *mypy.args]
×
95
    if mypy.config:
×
96
        args.append(f"--config-file={mypy.config}")
×
97
    if python_version:
×
98
        args.append(f"--python-version={python_version}")
×
99

100
    mypy_pex_info = await determine_venv_pex_resolve_info(pex)
×
101
    mypy_info = mypy_pex_info.find("mypy")
×
102
    assert mypy_info is not None
×
103
    if mypy_info.version > packaging.version.Version("0.700") and python_version is not None:
×
104
        # Skip mtime checks because we don't propagate mtime when materializing the sandbox, so the
105
        # mtime checks will always fail otherwise.
106
        args.append("--skip-cache-mtime-check")
×
107
        # See "__run_wrapper.sh" below for explanation
108
        args.append("--sqlite-cache")  # Added in v 0.660
×
109
        args.extend(("--cache-dir", cache_dir))
×
110
    else:
111
        # Don't bother caching
112
        args.append("--cache-dir=/dev/null")
×
113
    args.append(f"@{file_list_path}")
×
114
    return tuple(args)
×
115

116

117
def determine_python_files(files: Iterable[str]) -> tuple[str, ...]:
4✔
118
    """We run over all .py and .pyi files, but .pyi files take precedence.
119

120
    MyPy will error if we say to run over the same module with both its .py and .pyi files, so we
121
    must be careful to only use the .pyi stub.
122
    """
123
    result: OrderedSet[str] = OrderedSet()
1✔
124
    for f in files:
1✔
125
        if f.endswith(".pyi"):
1✔
126
            py_file = f[:-1]  # That is, strip the `.pyi` suffix to be `.py`.
1✔
127
            result.discard(py_file)
1✔
128
            result.add(f)
1✔
129
        elif f.endswith(".py"):
1✔
130
            pyi_file = f + "i"
1✔
131
            if pyi_file not in result:
1✔
132
                result.add(f)
1✔
133
        else:
134
            result.add(f)
1✔
135

136
    return tuple(result)
1✔
137

138

139
@rule
4✔
140
async def mypy_typecheck_partition(
4✔
141
    partition: MyPyPartition,
142
    config_file: MyPyConfigFile,
143
    first_party_plugins: MyPyFirstPartyPlugins,
144
    build_root: BuildRoot,
145
    mypy: MyPy,
146
    python_setup: PythonSetup,
147
    mkdir: MkdirBinary,
148
    mktemp: MktempBinary,
149
    cp: CpBinary,
150
    mv: MvBinary,
151
    ln: LnBinary,
152
    global_options: GlobalOptions,
153
) -> CheckResult:
154
    # MyPy requires 3.5+ to run, but uses the typed-ast library to work with 2.7, 3.4, 3.5, 3.6,
155
    # and 3.7. However, typed-ast does not understand 3.8+, so instead we must run MyPy with
156
    # Python 3.8+ when relevant. We only do this if <3.8 can't be used, as we don't want a
157
    # loose requirement like `>=3.6` to result in requiring Python 3.8+, which would error if
158
    # 3.8+ is not installed on the machine.
159
    tool_interpreter_constraints = (
×
160
        partition.interpreter_constraints
161
        if (
162
            mypy.options.is_default("interpreter_constraints")
163
            and partition.interpreter_constraints.requires_python38_or_newer(
164
                python_setup.interpreter_versions_universe
165
            )
166
        )
167
        else mypy.interpreter_constraints
168
    )
169

170
    roots_sources_get = determine_source_files(
×
171
        SourceFilesRequest(fs.sources for fs in partition.field_sets)
172
    )
173

174
    # See `requirements_venv_pex` for how this will get wrapped in a `VenvPex`.
175
    requirements_pex_get = create_pex(
×
176
        **implicitly(
177
            RequirementsPexRequest(
178
                (fs.address for fs in partition.field_sets),
179
                hardcoded_interpreter_constraints=partition.interpreter_constraints,
180
            )
181
        )
182
    )
183

184
    mypy_pex_get = create_venv_pex(
×
185
        **implicitly(
186
            mypy.to_pex_request(
187
                interpreter_constraints=tool_interpreter_constraints,
188
                extra_requirements=first_party_plugins.requirement_strings,
189
            )
190
        )
191
    )
192

193
    (
×
194
        roots_sources,
195
        mypy_pex,
196
        requirements_pex,
197
    ) = await concurrently(
198
        roots_sources_get,
199
        mypy_pex_get,
200
        requirements_pex_get,
201
    )
202

203
    python_files = determine_python_files(roots_sources.snapshot.files)
×
204
    file_list_path = "__files.txt"
×
205
    file_list_digest_request = create_digest(
×
206
        CreateDigest([FileContent(file_list_path, "\n".join(python_files).encode())])
207
    )
208

209
    # This creates a venv with all the 3rd-party requirements used by the code. We tell MyPy to
210
    # use this venv by setting `--python-executable`. Note that this Python interpreter is
211
    # different than what we run MyPy with.
212
    #
213
    # We could have directly asked the `PexFromTargetsRequest` to return a `VenvPex`, rather than
214
    # `Pex`, but that would mean missing out on sharing a cache with other goals like `test` and
215
    # `run`.
216
    requirements_venv_pex_request = create_venv_pex(
×
217
        **implicitly(
218
            PexRequest(
219
                output_filename="requirements_venv.pex",
220
                internal_only=True,
221
                pex_path=[requirements_pex],
222
                interpreter_constraints=partition.interpreter_constraints,
223
            )
224
        )
225
    )
226
    closure_sources_get = prepare_python_sources(
×
227
        PythonSourceFilesRequest(partition.root_targets.closure()), **implicitly()
228
    )
229

230
    closure_sources, requirements_venv_pex, file_list_digest = await concurrently(
×
231
        closure_sources_get, requirements_venv_pex_request, file_list_digest_request
232
    )
233

234
    py_version = config_file.python_version_to_autoset(
×
235
        partition.interpreter_constraints, python_setup.interpreter_versions_universe
236
    )
237
    named_cache_dir = ".cache/mypy_cache"
×
238
    mypy_cache_dir = f"{named_cache_dir}/{sha256(build_root.path.encode()).hexdigest()}"
×
239
    if partition.resolve_description:
×
240
        mypy_cache_dir += f"/{partition.resolve_description}"
×
241
    run_cache_dir = ".tmp_cache/mypy_cache"
×
242
    argv = await _generate_argv(
×
243
        mypy,
244
        pex=mypy_pex,
245
        venv_python=requirements_venv_pex.python.argv0,
246
        cache_dir=run_cache_dir,
247
        file_list_path=file_list_path,
248
        python_version=py_version,
249
    )
250

251
    script_runner_digest = await create_digest(
×
252
        CreateDigest(
253
            [
254
                FileContent(
255
                    "__mypy_runner.sh",
256
                    dedent(
257
                        f"""\
258
                            # We want to leverage the MyPy cache for fast incremental runs of MyPy.
259
                            # Pants exposes "append_only_caches" we can leverage, but with the caveat
260
                            # that it requires either only appending files, or multiprocess-safe access.
261
                            #
262
                            # MyPy guarantees neither, but there's workarounds!
263
                            #
264
                            # By default, MyPy uses 2 cache files per source file, which introduces a
265
                            # whole slew of race conditions. We can minimize the race conditions by
266
                            # using MyPy's SQLite cache. MyPy still has race conditions when using the
267
                            # db, as it issues at least 2 single-row queries per source file at different
268
                            # points in time (therefore SQLite's own safety guarantees don't apply).
269
                            #
270
                            # Our workaround depends on whether we can hardlink between the sandbox
271
                            # and cache or not.
272
                            #
273
                            # If we can hardlink (this means the two sides of the link are on the
274
                            # same filesystem), then after mypy runs, we hardlink from the sandbox
275
                            # back to the named cache.
276
                            #
277
                            # If we can't hardlink, we resort to copying the result next to the
278
                            # cache under a temporary name, and finally doing an atomic mv from the
279
                            # tempfile to the real one.
280
                            #
281
                            # In either case, the result is an atomic replacement of the "old" named
282
                            # cache db, such that old references (via opened file descriptors) are
283
                            # still valid, but new references use the new contents.
284
                            #
285
                            # There is a chance of multiple processes thrashing on the cache, leaving
286
                            # it in a state that doesn't reflect reality at the current point in time,
287
                            # and forcing other processes to do potentially done work. This strategy
288
                            # still provides a net benefit because the cache is generally _mostly_
289
                            # valid (it includes entries for the standard library, and 3rdparty deps,
290
                            # among 1stparty sources), and even in the worst case
291
                            # (every single file has changed) the overhead of missing the cache each
292
                            # query should be small when compared to the work being done of typechecking.
293
                            #
294
                            # Lastly, we expect that since this is run through Pants which attempts
295
                            # to partition MyPy runs by python version (which the DB is independent
296
                            # for different versions) and uses a one-process-at-a-time daemon by default,
297
                            # multiple MyPy processes operating on a single db cache should be rare.
298

299
                            NAMED_CACHE_DIR="{mypy_cache_dir}/{py_version}"
300
                            NAMED_CACHE_DB="$NAMED_CACHE_DIR/cache.db"
301
                            SANDBOX_CACHE_DIR="{run_cache_dir}/{py_version}"
302
                            SANDBOX_CACHE_DB="$SANDBOX_CACHE_DIR/cache.db"
303

304
                            {mkdir.path} -p "$NAMED_CACHE_DIR" > /dev/null 2>&1
305
                            {mkdir.path} -p "$SANDBOX_CACHE_DIR" > /dev/null 2>&1
306
                            {cp.path} "$NAMED_CACHE_DB" "$SANDBOX_CACHE_DB" > /dev/null 2>&1
307

308
                            {" ".join((shell_quote(arg) for arg in argv))}
309
                            EXIT_CODE=$?
310

311
                            if ! {ln.path} "$SANDBOX_CACHE_DB" "$NAMED_CACHE_DB" > /dev/null 2>&1; then
312
                                TMP_CACHE=$({mktemp.path} "$SANDBOX_CACHE_DB.tmp.XXXXXX")
313
                                {cp.path} "$SANDBOX_CACHE_DB" "$TMP_CACHE" > /dev/null 2>&1
314
                                {mv.path} "$TMP_CACHE" "$NAMED_CACHE_DB" > /dev/null 2>&1
315
                            fi
316

317
                            exit $EXIT_CODE
318
                        """
319
                    ).encode(),
320
                    is_executable=True,
321
                )
322
            ]
323
        )
324
    )
325

326
    merged_input_files = await merge_digests(
×
327
        MergeDigests(
328
            [
329
                file_list_digest,
330
                first_party_plugins.sources_digest,
331
                closure_sources.source_files.snapshot.digest,
332
                requirements_venv_pex.digest,
333
                config_file.digest,
334
                script_runner_digest,
335
            ]
336
        )
337
    )
338

NEW
339
    all_used_source_roots = sorted(
×
340
        set(itertools.chain(first_party_plugins.source_roots, closure_sources.source_roots))
341
    )
342

UNCOV
343
    env = {
×
344
        "PEX_EXTRA_SYS_PATH": ":".join(all_used_source_roots),
345
        "MYPYPATH": ":".join(all_used_source_roots),
346
        # Always emit colors to improve cache hit rates, the results are post-processed to match the
347
        # global setting
348
        "MYPY_FORCE_COLOR": "1",
349
        # Mypy needs to know the terminal so it can use appropriate escape sequences. ansi is a
350
        # reasonable lowest common denominator for the sort of escapes mypy uses (NB. TERM=xterm
351
        # uses some additional codes that colors.strip_color doesn't remove).
352
        "TERM": "ansi",
353
        # Force a fixed terminal width. This is effectively infinite, disabling mypy's
354
        # builtin truncation and line wrapping. Terminals do an acceptable job of soft-wrapping
355
        # diagnostic text and source code is typically already hard-wrapped to a limited width.
356
        # (Unique random number to make it easier to search for the source of this setting.)
357
        "MYPY_FORCE_TERMINAL_WIDTH": "642092230765939",
358
    }
359

360
    process = await setup_venv_pex_process(
×
361
        VenvPexProcess(
362
            mypy_pex,
363
            input_digest=merged_input_files,
364
            extra_env=env,
365
            output_directories=(REPORT_DIR,),
366
            description=f"Run MyPy on {pluralize(len(python_files), 'file')}.",
367
            level=LogLevel.DEBUG,
368
            append_only_caches={"mypy_cache": named_cache_dir},
369
        ),
370
        **implicitly(),
371
    )
372
    process = dataclasses.replace(process, argv=("./__mypy_runner.sh",))
×
373
    result = await execute_process(process, **implicitly())
×
374
    report = await remove_prefix(RemovePrefix(result.output_digest, REPORT_DIR))
×
375
    return CheckResult.from_fallible_process_result(
×
376
        result,
377
        partition_description=partition.description(),
378
        report=report,
379
        output_simplifier=global_options.output_simplifier(),
380
    )
381

382

383
@rule(desc="Determine if necessary to partition MyPy input", level=LogLevel.DEBUG)
4✔
384
async def mypy_determine_partitions(
4✔
385
    request: MyPyRequest, mypy: MyPy, python_setup: PythonSetup
386
) -> MyPyPartitions:
387
    resolve_and_interpreter_constraints_to_field_sets = (
×
388
        _partition_by_interpreter_constraints_and_resolve(request.field_sets, python_setup)
389
    )
390
    coarsened_targets = await coarsened_targets_get(
×
391
        CoarsenedTargetsRequest(field_set.address for field_set in request.field_sets),
392
        **implicitly(),
393
    )
394
    coarsened_targets_by_address = coarsened_targets.by_address()
×
395

396
    return MyPyPartitions(
×
397
        MyPyPartition(
398
            FrozenOrderedSet(field_sets),
399
            CoarsenedTargets(
400
                OrderedSet(
401
                    coarsened_targets_by_address[field_set.address] for field_set in field_sets
402
                )
403
            ),
404
            resolve if len(python_setup.resolves) > 1 else None,
405
            interpreter_constraints or mypy.interpreter_constraints,
406
        )
407
        for (resolve, interpreter_constraints), field_sets in sorted(
408
            resolve_and_interpreter_constraints_to_field_sets.items()
409
        )
410
    )
411

412

413
# TODO(#10864): Improve performance, e.g. by leveraging the MyPy cache.
414
@rule(desc="Typecheck using MyPy", level=LogLevel.DEBUG)
4✔
415
async def mypy_typecheck(request: MyPyRequest, mypy: MyPy) -> CheckResults:
4✔
416
    if mypy.skip:
×
417
        return CheckResults([], checker_name=request.tool_name)
×
418

419
    partitions = await mypy_determine_partitions(request, **implicitly())
×
420
    partitioned_results = await concurrently(
×
421
        mypy_typecheck_partition(partition, **implicitly()) for partition in partitions
422
    )
423
    return CheckResults(partitioned_results, checker_name=request.tool_name)
×
424

425

426
def rules():
4✔
427
    return [
4✔
428
        *collect_rules(),
429
        UnionRule(CheckRequest, MyPyRequest),
430
        *pex_from_targets.rules(),
431
    ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc