• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 19551203802

20 Nov 2025 09:01PM UTC coverage: 80.295% (-0.009%) from 80.304%
19551203802

push

github

web-flow
Support large numbers of input files to Pyright. (#22901)

Pyright can read input file paths from stdin. This helps
avoid exceeding the max command line length.

This change uses this feature when possible, to support
large numbers of input files. 

Fixes https://github.com/pantsbuild/pants/issues/22779

11 of 24 new or added lines in 2 files covered. (45.83%)

3 existing lines in 1 file now uncovered.

78385 of 97621 relevant lines covered (80.3%)

3.36 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

54.89
/src/python/pants/backend/python/typecheck/pyright/rules.py
1
# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
1✔
5

6
import dataclasses
1✔
7
import json
1✔
8
import logging
1✔
9
import os
1✔
10
import shlex
1✔
11
from collections.abc import Iterable
1✔
12
from dataclasses import dataclass, replace
1✔
13

14
import toml
1✔
15

16
from pants.backend.javascript.subsystems import nodejs_tool
1✔
17
from pants.backend.javascript.subsystems.nodejs import NodeJS
1✔
18
from pants.backend.javascript.subsystems.nodejs_tool import prepare_tool_process
1✔
19
from pants.backend.python.subsystems.setup import PythonSetup
1✔
20
from pants.backend.python.target_types import (
1✔
21
    InterpreterConstraintsField,
22
    PythonResolveField,
23
    PythonSourceField,
24
)
25
from pants.backend.python.typecheck.pyright.skip_field import SkipPyrightField
1✔
26
from pants.backend.python.typecheck.pyright.subsystem import Pyright
1✔
27
from pants.backend.python.util_rules import pex_from_targets
1✔
28
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
1✔
29
from pants.backend.python.util_rules.partition import (
1✔
30
    _partition_by_interpreter_constraints_and_resolve,
31
)
32
from pants.backend.python.util_rules.pex import (
1✔
33
    PexRequest,
34
    VenvPexProcess,
35
    VenvPexRequest,
36
    create_pex,
37
    create_venv_pex,
38
)
39
from pants.backend.python.util_rules.pex_environment import PexEnvironment
1✔
40
from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest
1✔
41
from pants.backend.python.util_rules.python_sources import (
1✔
42
    PythonSourceFilesRequest,
43
    prepare_python_sources,
44
)
45
from pants.core.goals.check import CheckRequest, CheckResult, CheckResults
1✔
46
from pants.core.util_rules import config_files
1✔
47
from pants.core.util_rules.config_files import ConfigFiles, find_config_file
1✔
48
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
1✔
49
from pants.core.util_rules.system_binaries import CatBinary, ShBinary
1✔
50
from pants.engine.collection import Collection
1✔
51
from pants.engine.fs import CreateDigest, FileContent
1✔
52
from pants.engine.internals.graph import resolve_coarsened_targets as coarsened_targets_get
1✔
53
from pants.engine.internals.native_engine import Digest, MergeDigests
1✔
54
from pants.engine.internals.selectors import concurrently
1✔
55
from pants.engine.intrinsics import (
1✔
56
    create_digest,
57
    execute_process,
58
    get_digest_contents,
59
    merge_digests,
60
)
61
from pants.engine.process import ProcessCacheScope, execute_process_or_raise
1✔
62
from pants.engine.rules import Rule, collect_rules, implicitly, rule
1✔
63
from pants.engine.target import CoarsenedTargets, CoarsenedTargetsRequest, FieldSet, Target
1✔
64
from pants.engine.unions import UnionRule
1✔
65
from pants.util.logging import LogLevel
1✔
66
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet
1✔
67
from pants.util.strutil import pluralize
1✔
68

69
logger = logging.getLogger(__name__)
1✔
70

71

72
@dataclass(frozen=True)
1✔
73
class PyrightFieldSet(FieldSet):
1✔
74
    required_fields = (PythonSourceField,)
1✔
75

76
    sources: PythonSourceField
1✔
77
    resolve: PythonResolveField
1✔
78
    interpreter_constraints: InterpreterConstraintsField
1✔
79

80
    @classmethod
1✔
81
    def opt_out(cls, tgt: Target) -> bool:
1✔
82
        return tgt.get(SkipPyrightField).value
×
83

84

85
class PyrightRequest(CheckRequest):
1✔
86
    field_set_type = PyrightFieldSet
1✔
87
    tool_name = Pyright.options_scope
1✔
88

89

90
@dataclass(frozen=True)
1✔
91
class PyrightPartition:
1✔
92
    field_sets: FrozenOrderedSet[PyrightFieldSet]
1✔
93
    root_targets: CoarsenedTargets
1✔
94
    resolve_description: str | None
1✔
95
    interpreter_constraints: InterpreterConstraints
1✔
96

97
    def description(self) -> str:
1✔
98
        ics = str(sorted(str(c) for c in self.interpreter_constraints))
1✔
99
        return f"{self.resolve_description}, {ics}" if self.resolve_description else ics
1✔
100

101

102
class PyrightPartitions(Collection[PyrightPartition]):
1✔
103
    pass
1✔
104

105

106
async def _patch_config_file(
1✔
107
    config_files: ConfigFiles, venv_dir: str, source_roots: Iterable[str]
108
) -> Digest:
109
    """Patch the Pyright config file to use the incoming venv directory (from
110
    requirements_venv_pex). If there is no config file, create a dummy pyrightconfig.json with the
111
    `venv` key populated.
112

113
    The incoming venv directory works alongside the `--venvpath` CLI argument.
114

115
    Additionally, add source roots to the `extraPaths` key in the config file.
116
    """
117

118
    source_roots_list = list(source_roots)
×
119
    if not config_files.snapshot.files:
×
120
        # venv workaround as per: https://github.com/microsoft/pyright/issues/4051
121
        generated_config: dict[str, str | list[str]] = {
×
122
            "venv": venv_dir,
123
            "extraPaths": source_roots_list,
124
        }
125
        return await create_digest(
×
126
            CreateDigest(
127
                [
128
                    FileContent(
129
                        "pyrightconfig.json",
130
                        json.dumps(generated_config).encode(),
131
                    )
132
                ]
133
            )
134
        )
135

136
    config_contents = await get_digest_contents(config_files.snapshot.digest)
×
137
    new_files: list[FileContent] = []
×
138
    for file in config_contents:
×
139
        # This only supports a single json config file in the root of the project
140
        # https://github.com/pantsbuild/pants/issues/17816 tracks supporting multiple config files and workspaces
141
        if file.path == "pyrightconfig.json":
×
142
            json_config = json.loads(file.content)
×
143
            json_config["venv"] = venv_dir
×
144
            json_extra_paths: list[str] = json_config.get("extraPaths", [])
×
145
            json_config["extraPaths"] = list(OrderedSet(json_extra_paths + source_roots_list))
×
146
            new_content = json.dumps(json_config).encode()
×
147
            new_files.append(replace(file, content=new_content))
×
148

149
        # This only supports a single pyproject.toml file in the root of the project
150
        # https://github.com/pantsbuild/pants/issues/17816 tracks supporting multiple config files and workspaces
151
        elif file.path == "pyproject.toml":
×
152
            toml_config = toml.loads(file.content.decode())
×
153
            pyright_config = toml_config["tool"]["pyright"]
×
154
            pyright_config["venv"] = venv_dir
×
155
            toml_extra_paths: list[str] = pyright_config.get("extraPaths", [])
×
156
            pyright_config["extraPaths"] = list(OrderedSet(toml_extra_paths + source_roots_list))
×
157
            new_content = toml.dumps(toml_config).encode()
×
158
            new_files.append(replace(file, content=new_content))
×
159

160
    return await create_digest(CreateDigest(new_files))
×
161

162

163
@rule(
1✔
164
    desc="Pyright typecheck each partition based on its interpreter_constraints",
165
    level=LogLevel.DEBUG,
166
)
167
async def pyright_typecheck_partition(
1✔
168
    partition: PyrightPartition,
169
    pyright: Pyright,
170
    pex_environment: PexEnvironment,
171
    nodejs: NodeJS,
172
    sh_binary: ShBinary,
173
    cat_binary: CatBinary,
174
) -> CheckResult:
175
    root_sources_get = determine_source_files(
×
176
        SourceFilesRequest(fs.sources for fs in partition.field_sets)
177
    )
178

179
    # Grab the closure of the root source files to be typechecked
180
    transitive_sources_get = prepare_python_sources(
×
181
        PythonSourceFilesRequest(partition.root_targets.closure()), **implicitly()
182
    )
183

184
    # See `requirements_venv_pex` for how this will get wrapped in a `VenvPex`.
185
    requirements_pex_get = create_pex(
×
186
        **implicitly(
187
            RequirementsPexRequest(
188
                (fs.address for fs in partition.field_sets),
189
                hardcoded_interpreter_constraints=partition.interpreter_constraints,
190
            )
191
        )
192
    )
193

194
    # Look for any/all of the Pyright configuration files (the config is modified below
195
    # for the `venv` workaround)
196
    config_files_get = find_config_file(pyright.config_request())
×
197

198
    root_sources, transitive_sources, requirements_pex, config_files = await concurrently(
×
199
        root_sources_get,
200
        transitive_sources_get,
201
        requirements_pex_get,
202
        config_files_get,
203
    )
204

205
    # This is a workaround for https://github.com/pantsbuild/pants/issues/19946.
206
    # complete_pex_env needs to be created here so that the test `test_passing_cache_clear`
207
    # test can pass using the appropriate caching directory.
208
    # See https://github.com/pantsbuild/pants/pull/19430#discussion_r1337851780
209
    # for more discussion.
210
    complete_pex_env = pex_environment.in_workspace()
×
211
    requirements_pex_request = PexRequest(
×
212
        output_filename="requirements_venv.pex",
213
        internal_only=True,
214
        pex_path=[requirements_pex],
215
        interpreter_constraints=partition.interpreter_constraints,
216
    )
217
    requirements_venv_pex = await create_venv_pex(
×
218
        VenvPexRequest(requirements_pex_request, complete_pex_env), **implicitly()
219
    )
220

221
    # Force the requirements venv to materialize always by running a no-op.
222
    # This operation must be called with `ProcessCacheScope.SESSION`
223
    # so that it runs every time.
224
    _ = await execute_process_or_raise(
×
225
        **implicitly(
226
            VenvPexProcess(
227
                requirements_venv_pex,
228
                description="Force venv to materialize",
229
                argv=["-c", "''"],
230
                cache_scope=ProcessCacheScope.PER_SESSION,
231
            )
232
        )
233
    )
234

235
    # Patch the config file to use the venv directory from the requirements pex,
236
    # and add source roots to the `extraPaths` key in the config file.
237
    patched_config_digest = await _patch_config_file(
×
238
        config_files, requirements_venv_pex.venv_rel_dir, transitive_sources.source_roots
239
    )
240

241
    # Prepare the process with as much information as we currently have. This will give us the
242
    # process's cwd, which we need in order to calculate the relative paths to the input files.
243
    # We will then manually tweak the argv before actually running.
UNCOV
244
    input_digest = await merge_digests(
×
245
        MergeDigests(
246
            [
247
                transitive_sources.source_files.snapshot.digest,
248
                requirements_venv_pex.digest,
249
                patched_config_digest,
250
            ]
251
        )
252
    )
UNCOV
253
    process = await prepare_tool_process(
×
254
        pyright.request(
255
            args=(
256
                f"--venvpath={complete_pex_env.pex_root}",  # Used with `venv` in config
257
                *pyright.args,  # User-added arguments
258
                "-",  # Read input file paths from stdin
259
            ),
260
            input_digest=input_digest,
261
            description=f"Run Pyright on {pluralize(len(root_sources.snapshot.files), 'file')}.",
262
            level=LogLevel.DEBUG,
263
        ),
264
        **implicitly(),
265
    )
266

267
    # We must use relative paths, because we don't know the abspath of the sandbox the process
268
    # will run in, and `{chroot}` interpolation only works on argv, not on the contents of
269
    # __files.txt (see below). Pyright interprets relpaths as relative to its cwd, so we
270
    # prepend the appropriate prefix to each file path.
NEW
271
    input_path_prefix = os.path.relpath(".", process.working_directory)
×
NEW
272
    input_files = [os.path.join(input_path_prefix, file) for file in root_sources.snapshot.files]
×
273

274
    # We prefer to pass the list of input files via stdin, as large numbers of files can cause us
275
    # to exceed the max command line length.  See https://github.com/pantsbuild/pants/issues/22779.
276
    # However Pyright, weirdly, splits stdin on spaces as well as newlines. So we can't pass input
277
    # file paths via stdin if any of them contain spaces.
NEW
278
    file_with_spaces = next((file for file in root_sources.snapshot.files if " " in file), None)
×
NEW
279
    if file_with_spaces:
×
280
        # Fall back to passing paths as args and hope we don't exceed the max command line length.
NEW
281
        process = dataclasses.replace(process, argv=(*process.argv[0:-1], *input_files))
×
282
    else:
283
        # Write the input files out to a text file.
NEW
284
        file_list_path = "__files.txt"
×
NEW
285
        file_list_content = "\n".join(input_files).encode()
×
NEW
286
        file_list_digest = await create_digest(
×
287
            CreateDigest([FileContent(file_list_path, file_list_content)])
288
        )
NEW
289
        input_digest = await merge_digests(
×
290
            MergeDigests(
291
                [
292
                    process.input_digest,
293
                    file_list_digest,
294
                ]
295
            )
296
        )
297
        # Run the underlying process inside a shell script that cats the file list to stdin.
NEW
298
        shell_script = f"{cat_binary.path} {os.path.join(input_path_prefix, file_list_path)} | {shlex.join(process.argv)}"
×
NEW
299
        process = dataclasses.replace(
×
300
            process, argv=(sh_binary.path, "-c", shell_script), input_digest=input_digest
301
        )
302

303
    result = await execute_process(process, **implicitly())
×
NEW
304
    if result.exit_code == 249 and file_with_spaces:
×
NEW
305
        logger.error(
×
306
            f"Found input files with spaces in their names, including: {file_with_spaces}. "
307
            "Due to a bug in Pyright this means that the number of input files Pants can pass to "
308
            "Pyright is limited, and exceeding that limit causes it to crash with exit code 249. "
309
            "Please reach out to the Pants team if this happens: "
310
            "https://www.pantsbuild.org/community/getting-help."
311
        )
UNCOV
312
    return CheckResult.from_fallible_process_result(
×
313
        result,
314
        partition_description=partition.description(),
315
    )
316

317

318
@rule(
1✔
319
    desc="Determine if it is necessary to partition Pyright's input (interpreter_constraints and resolves)",
320
    level=LogLevel.DEBUG,
321
)
322
async def pyright_determine_partitions(
1✔
323
    request: PyrightRequest,
324
    pyright: Pyright,
325
    python_setup: PythonSetup,
326
) -> PyrightPartitions:
327
    resolve_and_interpreter_constraints_to_field_sets = (
×
328
        _partition_by_interpreter_constraints_and_resolve(request.field_sets, python_setup)
329
    )
330

331
    coarsened_targets = await coarsened_targets_get(
×
332
        CoarsenedTargetsRequest(field_set.address for field_set in request.field_sets),
333
        **implicitly(),
334
    )
335
    coarsened_targets_by_address = coarsened_targets.by_address()
×
336

337
    return PyrightPartitions(
×
338
        PyrightPartition(
339
            FrozenOrderedSet(field_sets),
340
            CoarsenedTargets(
341
                OrderedSet(
342
                    coarsened_targets_by_address[field_set.address] for field_set in field_sets
343
                )
344
            ),
345
            resolve if len(python_setup.resolves) > 1 else None,
346
            interpreter_constraints or pyright.interpreter_constraints,
347
        )
348
        for (resolve, interpreter_constraints), field_sets in sorted(
349
            resolve_and_interpreter_constraints_to_field_sets.items()
350
        )
351
    )
352

353

354
@rule(desc="Typecheck using Pyright", level=LogLevel.DEBUG)
1✔
355
async def pyright_typecheck(
1✔
356
    request: PyrightRequest,
357
    pyright: Pyright,
358
) -> CheckResults:
359
    if pyright.skip:
×
360
        return CheckResults([], checker_name=request.tool_name)
×
361

362
    # Explicitly excluding `pyright` as a function argument to `pyright_determine_partitions` and `pyright_typecheck_partition`
363
    # as it throws "TypeError: unhashable type: 'Pyright'"
364
    partitions = await pyright_determine_partitions(request, **implicitly())
×
365
    partitioned_results = await concurrently(
×
366
        pyright_typecheck_partition(partition, **implicitly()) for partition in partitions
367
    )
368
    return CheckResults(
×
369
        partitioned_results,
370
        checker_name=request.tool_name,
371
    )
372

373

374
def rules() -> Iterable[Rule | UnionRule]:
1✔
375
    return (
1✔
376
        *collect_rules(),
377
        *config_files.rules(),
378
        *pex_from_targets.rules(),
379
        *nodejs_tool.rules(),
380
        UnionRule(CheckRequest, PyrightRequest),
381
    )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc