• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 18252174847

05 Oct 2025 01:36AM UTC coverage: 43.382% (-36.9%) from 80.261%
18252174847

push

github

web-flow
run tests on mac arm (#22717)

Just doing the minimal to pull forward the x86_64 pattern.

ref #20993

25776 of 59416 relevant lines covered (43.38%)

1.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

44.83
/src/python/pants/backend/python/typecheck/mypy/rules.py
1
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
3✔
5

6
import dataclasses
3✔
7
from collections.abc import Iterable
3✔
8
from dataclasses import dataclass
3✔
9
from hashlib import sha256
3✔
10
from textwrap import dedent  # noqa: PNT20
3✔
11

12
import packaging
3✔
13

14
from pants.backend.python.subsystems.setup import PythonSetup
3✔
15
from pants.backend.python.typecheck.mypy.subsystem import (
3✔
16
    MyPy,
17
    MyPyConfigFile,
18
    MyPyFieldSet,
19
    MyPyFirstPartyPlugins,
20
)
21
from pants.backend.python.util_rules import pex_from_targets
3✔
22
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
3✔
23
from pants.backend.python.util_rules.partition import (
3✔
24
    _partition_by_interpreter_constraints_and_resolve,
25
)
26
from pants.backend.python.util_rules.pex import (
3✔
27
    PexRequest,
28
    VenvPex,
29
    VenvPexProcess,
30
    create_pex,
31
    create_venv_pex,
32
    determine_venv_pex_resolve_info,
33
    setup_venv_pex_process,
34
)
35
from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest
3✔
36
from pants.backend.python.util_rules.python_sources import (
3✔
37
    PythonSourceFilesRequest,
38
    prepare_python_sources,
39
)
40
from pants.base.build_root import BuildRoot
3✔
41
from pants.core.goals.check import REPORT_DIR, CheckRequest, CheckResult, CheckResults
3✔
42
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
3✔
43
from pants.core.util_rules.system_binaries import (
3✔
44
    CpBinary,
45
    LnBinary,
46
    MkdirBinary,
47
    MktempBinary,
48
    MvBinary,
49
)
50
from pants.engine.collection import Collection
3✔
51
from pants.engine.fs import CreateDigest, FileContent, MergeDigests, RemovePrefix
3✔
52
from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get
3✔
53
from pants.engine.intrinsics import create_digest, execute_process, merge_digests, remove_prefix
3✔
54
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
3✔
55
from pants.engine.target import CoarsenedTargets, CoarsenedTargetsRequest
3✔
56
from pants.engine.unions import UnionRule
3✔
57
from pants.option.global_options import GlobalOptions
3✔
58
from pants.util.logging import LogLevel
3✔
59
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet
3✔
60
from pants.util.strutil import pluralize, shell_quote
3✔
61

62

63
@dataclass(frozen=True)
3✔
64
class MyPyPartition:
3✔
65
    field_sets: FrozenOrderedSet[MyPyFieldSet]
3✔
66
    root_targets: CoarsenedTargets
3✔
67
    resolve_description: str | None
3✔
68
    interpreter_constraints: InterpreterConstraints
3✔
69

70
    def description(self) -> str:
3✔
71
        ics = str(sorted(str(c) for c in self.interpreter_constraints))
×
72
        return f"{self.resolve_description}, {ics}" if self.resolve_description else ics
×
73

74

75
class MyPyPartitions(Collection[MyPyPartition]):
3✔
76
    pass
3✔
77

78

79
class MyPyRequest(CheckRequest):
3✔
80
    field_set_type = MyPyFieldSet
3✔
81
    tool_name = MyPy.options_scope
3✔
82

83

84
async def _generate_argv(
3✔
85
    mypy: MyPy,
86
    *,
87
    pex: VenvPex,
88
    cache_dir: str,
89
    venv_python: str,
90
    file_list_path: str,
91
    python_version: str | None,
92
) -> tuple[str, ...]:
93
    args = [pex.pex.argv0, f"--python-executable={venv_python}", *mypy.args]
×
94
    if mypy.config:
×
95
        args.append(f"--config-file={mypy.config}")
×
96
    if python_version:
×
97
        args.append(f"--python-version={python_version}")
×
98

99
    mypy_pex_info = await determine_venv_pex_resolve_info(pex)
×
100
    mypy_info = mypy_pex_info.find("mypy")
×
101
    assert mypy_info is not None
×
102
    if mypy_info.version > packaging.version.Version("0.700") and python_version is not None:
×
103
        # Skip mtime checks because we don't propagate mtime when materializing the sandbox, so the
104
        # mtime checks will always fail otherwise.
105
        args.append("--skip-cache-mtime-check")
×
106
        # See "__run_wrapper.sh" below for explanation
107
        args.append("--sqlite-cache")  # Added in v 0.660
×
108
        args.extend(("--cache-dir", cache_dir))
×
109
    else:
110
        # Don't bother caching
111
        args.append("--cache-dir=/dev/null")
×
112
    args.append(f"@{file_list_path}")
×
113
    return tuple(args)
×
114

115

116
def determine_python_files(files: Iterable[str]) -> tuple[str, ...]:
3✔
117
    """We run over all .py and .pyi files, but .pyi files take precedence.
118

119
    MyPy will error if we say to run over the same module with both its .py and .pyi files, so we
120
    must be careful to only use the .pyi stub.
121
    """
122
    result: OrderedSet[str] = OrderedSet()
×
123
    for f in files:
×
124
        if f.endswith(".pyi"):
×
125
            py_file = f[:-1]  # That is, strip the `.pyi` suffix to be `.py`.
×
126
            result.discard(py_file)
×
127
            result.add(f)
×
128
        elif f.endswith(".py"):
×
129
            pyi_file = f + "i"
×
130
            if pyi_file not in result:
×
131
                result.add(f)
×
132
        else:
133
            result.add(f)
×
134

135
    return tuple(result)
×
136

137

138
@rule
3✔
139
async def mypy_typecheck_partition(
3✔
140
    partition: MyPyPartition,
141
    config_file: MyPyConfigFile,
142
    first_party_plugins: MyPyFirstPartyPlugins,
143
    build_root: BuildRoot,
144
    mypy: MyPy,
145
    python_setup: PythonSetup,
146
    mkdir: MkdirBinary,
147
    mktemp: MktempBinary,
148
    cp: CpBinary,
149
    mv: MvBinary,
150
    ln: LnBinary,
151
    global_options: GlobalOptions,
152
) -> CheckResult:
153
    # MyPy requires 3.5+ to run, but uses the typed-ast library to work with 2.7, 3.4, 3.5, 3.6,
154
    # and 3.7. However, typed-ast does not understand 3.8+, so instead we must run MyPy with
155
    # Python 3.8+ when relevant. We only do this if <3.8 can't be used, as we don't want a
156
    # loose requirement like `>=3.6` to result in requiring Python 3.8+, which would error if
157
    # 3.8+ is not installed on the machine.
158
    tool_interpreter_constraints = (
×
159
        partition.interpreter_constraints
160
        if (
161
            mypy.options.is_default("interpreter_constraints")
162
            and partition.interpreter_constraints.requires_python38_or_newer(
163
                python_setup.interpreter_versions_universe
164
            )
165
        )
166
        else mypy.interpreter_constraints
167
    )
168

169
    roots_sources_get = determine_source_files(
×
170
        SourceFilesRequest(fs.sources for fs in partition.field_sets)
171
    )
172

173
    # See `requirements_venv_pex` for how this will get wrapped in a `VenvPex`.
174
    requirements_pex_get = create_pex(
×
175
        **implicitly(
176
            RequirementsPexRequest(
177
                (fs.address for fs in partition.field_sets),
178
                hardcoded_interpreter_constraints=partition.interpreter_constraints,
179
            )
180
        )
181
    )
182

183
    mypy_pex_get = create_venv_pex(
×
184
        **implicitly(
185
            mypy.to_pex_request(
186
                interpreter_constraints=tool_interpreter_constraints,
187
                extra_requirements=first_party_plugins.requirement_strings,
188
            )
189
        )
190
    )
191

192
    (
×
193
        roots_sources,
194
        mypy_pex,
195
        requirements_pex,
196
    ) = await concurrently(
197
        roots_sources_get,
198
        mypy_pex_get,
199
        requirements_pex_get,
200
    )
201

202
    python_files = determine_python_files(roots_sources.snapshot.files)
×
203
    file_list_path = "__files.txt"
×
204
    file_list_digest_request = create_digest(
×
205
        CreateDigest([FileContent(file_list_path, "\n".join(python_files).encode())])
206
    )
207

208
    # This creates a venv with all the 3rd-party requirements used by the code. We tell MyPy to
209
    # use this venv by setting `--python-executable`. Note that this Python interpreter is
210
    # different than what we run MyPy with.
211
    #
212
    # We could have directly asked the `PexFromTargetsRequest` to return a `VenvPex`, rather than
213
    # `Pex`, but that would mean missing out on sharing a cache with other goals like `test` and
214
    # `run`.
215
    requirements_venv_pex_request = create_venv_pex(
×
216
        **implicitly(
217
            PexRequest(
218
                output_filename="requirements_venv.pex",
219
                internal_only=True,
220
                pex_path=[requirements_pex],
221
                interpreter_constraints=partition.interpreter_constraints,
222
            )
223
        )
224
    )
225
    closure_sources_get = prepare_python_sources(
×
226
        PythonSourceFilesRequest(partition.root_targets.closure()), **implicitly()
227
    )
228

229
    closure_sources, requirements_venv_pex, file_list_digest = await concurrently(
×
230
        closure_sources_get, requirements_venv_pex_request, file_list_digest_request
231
    )
232

233
    py_version = config_file.python_version_to_autoset(
×
234
        partition.interpreter_constraints, python_setup.interpreter_versions_universe
235
    )
236
    named_cache_dir = ".cache/mypy_cache"
×
237
    mypy_cache_dir = f"{named_cache_dir}/{sha256(build_root.path.encode()).hexdigest()}"
×
238
    if partition.resolve_description:
×
239
        mypy_cache_dir += f"/{partition.resolve_description}"
×
240
    run_cache_dir = ".tmp_cache/mypy_cache"
×
241
    argv = await _generate_argv(
×
242
        mypy,
243
        pex=mypy_pex,
244
        venv_python=requirements_venv_pex.python.argv0,
245
        cache_dir=run_cache_dir,
246
        file_list_path=file_list_path,
247
        python_version=py_version,
248
    )
249

250
    script_runner_digest = await create_digest(
×
251
        CreateDigest(
252
            [
253
                FileContent(
254
                    "__mypy_runner.sh",
255
                    dedent(
256
                        f"""\
257
                            # We want to leverage the MyPy cache for fast incremental runs of MyPy.
258
                            # Pants exposes "append_only_caches" we can leverage, but with the caveat
259
                            # that it requires either only appending files, or multiprocess-safe access.
260
                            #
261
                            # MyPy guarantees neither, but there's workarounds!
262
                            #
263
                            # By default, MyPy uses 2 cache files per source file, which introduces a
264
                            # whole slew of race conditions. We can minimize the race conditions by
265
                            # using MyPy's SQLite cache. MyPy still has race conditions when using the
266
                            # db, as it issues at least 2 single-row queries per source file at different
267
                            # points in time (therefore SQLite's own safety guarantees don't apply).
268
                            #
269
                            # Our workaround depends on whether we can hardlink between the sandbox
270
                            # and cache or not.
271
                            #
272
                            # If we can hardlink (this means the two sides of the link are on the
273
                            # same filesystem), then after mypy runs, we hardlink from the sandbox
274
                            # back to the named cache.
275
                            #
276
                            # If we can't hardlink, we resort to copying the result next to the
277
                            # cache under a temporary name, and finally doing an atomic mv from the
278
                            # tempfile to the real one.
279
                            #
280
                            # In either case, the result is an atomic replacement of the "old" named
281
                            # cache db, such that old references (via opened file descriptors) are
282
                            # still valid, but new references use the new contents.
283
                            #
284
                            # There is a chance of multiple processes thrashing on the cache, leaving
285
                            # it in a state that doesn't reflect reality at the current point in time,
286
                            # and forcing other processes to do potentially done work. This strategy
287
                            # still provides a net benefit because the cache is generally _mostly_
288
                            # valid (it includes entries for the standard library, and 3rdparty deps,
289
                            # among 1stparty sources), and even in the worst case
290
                            # (every single file has changed) the overhead of missing the cache each
291
                            # query should be small when compared to the work being done of typechecking.
292
                            #
293
                            # Lastly, we expect that since this is run through Pants which attempts
294
                            # to partition MyPy runs by python version (which the DB is independent
295
                            # for different versions) and uses a one-process-at-a-time daemon by default,
296
                            # multiple MyPy processes operating on a single db cache should be rare.
297

298
                            NAMED_CACHE_DIR="{mypy_cache_dir}/{py_version}"
299
                            NAMED_CACHE_DB="$NAMED_CACHE_DIR/cache.db"
300
                            SANDBOX_CACHE_DIR="{run_cache_dir}/{py_version}"
301
                            SANDBOX_CACHE_DB="$SANDBOX_CACHE_DIR/cache.db"
302

303
                            {mkdir.path} -p "$NAMED_CACHE_DIR" > /dev/null 2>&1
304
                            {mkdir.path} -p "$SANDBOX_CACHE_DIR" > /dev/null 2>&1
305
                            {cp.path} "$NAMED_CACHE_DB" "$SANDBOX_CACHE_DB" > /dev/null 2>&1
306

307
                            {" ".join((shell_quote(arg) for arg in argv))}
308
                            EXIT_CODE=$?
309

310
                            if ! {ln.path} "$SANDBOX_CACHE_DB" "$NAMED_CACHE_DB" > /dev/null 2>&1; then
311
                                TMP_CACHE=$({mktemp.path} "$SANDBOX_CACHE_DB.tmp.XXXXXX")
312
                                {cp.path} "$SANDBOX_CACHE_DB" "$TMP_CACHE" > /dev/null 2>&1
313
                                {mv.path} "$TMP_CACHE" "$NAMED_CACHE_DB" > /dev/null 2>&1
314
                            fi
315

316
                            exit $EXIT_CODE
317
                        """
318
                    ).encode(),
319
                    is_executable=True,
320
                )
321
            ]
322
        )
323
    )
324

325
    merged_input_files = await merge_digests(
×
326
        MergeDigests(
327
            [
328
                file_list_digest,
329
                first_party_plugins.sources_digest,
330
                closure_sources.source_files.snapshot.digest,
331
                requirements_venv_pex.digest,
332
                config_file.digest,
333
                script_runner_digest,
334
            ]
335
        )
336
    )
337

338
    env = {
×
339
        "PEX_EXTRA_SYS_PATH": ":".join(first_party_plugins.source_roots),
340
        "MYPYPATH": ":".join(closure_sources.source_roots),
341
        # Always emit colors to improve cache hit rates, the results are post-processed to match the
342
        # global setting
343
        "MYPY_FORCE_COLOR": "1",
344
        # Mypy needs to know the terminal so it can use appropriate escape sequences. ansi is a
345
        # reasonable lowest common denominator for the sort of escapes mypy uses (NB. TERM=xterm
346
        # uses some additional codes that colors.strip_color doesn't remove).
347
        "TERM": "ansi",
348
        # Force a fixed terminal width. This is effectively infinite, disabling mypy's
349
        # builtin truncation and line wrapping. Terminals do an acceptable job of soft-wrapping
350
        # diagnostic text and source code is typically already hard-wrapped to a limited width.
351
        # (Unique random number to make it easier to search for the source of this setting.)
352
        "MYPY_FORCE_TERMINAL_WIDTH": "642092230765939",
353
    }
354

355
    process = await setup_venv_pex_process(
×
356
        VenvPexProcess(
357
            mypy_pex,
358
            input_digest=merged_input_files,
359
            extra_env=env,
360
            output_directories=(REPORT_DIR,),
361
            description=f"Run MyPy on {pluralize(len(python_files), 'file')}.",
362
            level=LogLevel.DEBUG,
363
            append_only_caches={"mypy_cache": named_cache_dir},
364
        ),
365
        **implicitly(),
366
    )
367
    process = dataclasses.replace(process, argv=("./__mypy_runner.sh",))
×
368
    result = await execute_process(process, **implicitly())
×
369
    report = await remove_prefix(RemovePrefix(result.output_digest, REPORT_DIR))
×
370
    return CheckResult.from_fallible_process_result(
×
371
        result,
372
        partition_description=partition.description(),
373
        report=report,
374
        output_simplifier=global_options.output_simplifier(),
375
    )
376

377

378
@rule(desc="Determine if necessary to partition MyPy input", level=LogLevel.DEBUG)
3✔
379
async def mypy_determine_partitions(
3✔
380
    request: MyPyRequest, mypy: MyPy, python_setup: PythonSetup
381
) -> MyPyPartitions:
382
    resolve_and_interpreter_constraints_to_field_sets = (
×
383
        _partition_by_interpreter_constraints_and_resolve(request.field_sets, python_setup)
384
    )
385
    coarsened_targets = await coarsened_targets_get(
×
386
        CoarsenedTargetsRequest(field_set.address for field_set in request.field_sets),
387
        **implicitly(),
388
    )
389
    coarsened_targets_by_address = coarsened_targets.by_address()
×
390

391
    return MyPyPartitions(
×
392
        MyPyPartition(
393
            FrozenOrderedSet(field_sets),
394
            CoarsenedTargets(
395
                OrderedSet(
396
                    coarsened_targets_by_address[field_set.address] for field_set in field_sets
397
                )
398
            ),
399
            resolve if len(python_setup.resolves) > 1 else None,
400
            interpreter_constraints or mypy.interpreter_constraints,
401
        )
402
        for (resolve, interpreter_constraints), field_sets in sorted(
403
            resolve_and_interpreter_constraints_to_field_sets.items()
404
        )
405
    )
406

407

408
# TODO(#10864): Improve performance, e.g. by leveraging the MyPy cache.
409
@rule(desc="Typecheck using MyPy", level=LogLevel.DEBUG)
3✔
410
async def mypy_typecheck(request: MyPyRequest, mypy: MyPy) -> CheckResults:
3✔
411
    if mypy.skip:
×
412
        return CheckResults([], checker_name=request.tool_name)
×
413

414
    partitions = await mypy_determine_partitions(request, **implicitly())
×
415
    partitioned_results = await concurrently(
×
416
        mypy_typecheck_partition(partition, **implicitly()) for partition in partitions
417
    )
418
    return CheckResults(partitioned_results, checker_name=request.tool_name)
×
419

420

421
def rules():
3✔
422
    return [
3✔
423
        *collect_rules(),
424
        UnionRule(CheckRequest, MyPyRequest),
425
        *pex_from_targets.rules(),
426
    ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc