• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 19383441979

15 Nov 2025 03:03AM UTC coverage: 80.284% (-0.006%) from 80.29%
19383441979

push

github

web-flow
plumb through (most) of Pex's --scie flags (#22866)

After playing with some back and forth I decided to add these to the
existing `pex_binary` target: that is most similar to what Pex does; and
it ended up being the easiest way to re-use existing machinery. I tried
to stay out of Pex's way with the various flags and pass things through.
The help text is lightly translated (ex: `-flags` too fields). The most
fiddly part is the expected outputs where the existing Pants code really
wants to know the expected outputs of a Process. I effectively
duplicated some of the Pex output logic, this is annoying, but I think
mostly getting in our own way instead of limiting what anyone can do,
and it does provide nicer output.

Flags not currently covered as fields:
* `--scie-only, --no-scie-only, --pex-and-scie`: The existing Pant
classes really expect to get a `.pex` out of all of this.
* `--scie-pypy-release`: I don't have any PyPy experience. I think it
would be straightforward to add.
* `--scie-pbs-free-threaded` / `--scie-pbs-debug`: This are new since I
started; thanks! Happy to do as a followup PR.
* `--scie-science-binary`: Would maybe be interesting if someone wrote a
general `science` backend.
 * `--scie-assets-base-url` / `--scie-base`

ref #22654

207 of 263 new or added lines in 5 files covered. (78.71%)

6 existing lines in 2 files now uncovered.

78075 of 97248 relevant lines covered (80.28%)

3.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

68.97
/src/python/pants/backend/python/util_rules/pex.py
1
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
11✔
5

6
import dataclasses
11✔
7
import json
11✔
8
import logging
11✔
9
import os
11✔
10
import shlex
11✔
11
from collections.abc import Iterable, Iterator, Mapping, Sequence
11✔
12
from dataclasses import dataclass
11✔
13
from pathlib import PurePath
11✔
14
from textwrap import dedent  # noqa: PNT20
11✔
15
from typing import TypeVar
11✔
16

17
import packaging.specifiers
11✔
18
import packaging.version
11✔
19
from packaging.requirements import Requirement
11✔
20

21
from pants.backend.python.subsystems.setup import PythonSetup
11✔
22
from pants.backend.python.target_types import (
11✔
23
    Executable,
24
    MainSpecification,
25
    PexCompletePlatformsField,
26
    PexLayout,
27
    PythonRequirementFindLinksField,
28
    PythonRequirementsField,
29
)
30
from pants.backend.python.util_rules import pex_cli, pex_requirements
11✔
31
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
11✔
32
from pants.backend.python.util_rules.pex_cli import PexCliProcess, PexPEX, maybe_log_pex_stderr
11✔
33
from pants.backend.python.util_rules.pex_environment import (
11✔
34
    CompletePexEnvironment,
35
    PexEnvironment,
36
    PexSubsystem,
37
    PythonExecutable,
38
)
39
from pants.backend.python.util_rules.pex_requirements import (
11✔
40
    EntireLockfile,
41
    LoadedLockfile,
42
    LoadedLockfileRequest,
43
    Lockfile,
44
)
45
from pants.backend.python.util_rules.pex_requirements import (
11✔
46
    PexRequirements as PexRequirements,  # Explicit re-export.
47
)
48
from pants.backend.python.util_rules.pex_requirements import (
11✔
49
    Resolve,
50
    ResolvePexConfigRequest,
51
    determine_resolve_pex_config,
52
    get_lockfile_for_resolve,
53
    load_lockfile,
54
    validate_metadata,
55
)
56
from pants.build_graph.address import Address
11✔
57
from pants.core.environments.target_types import EnvironmentTarget
11✔
58
from pants.core.target_types import FileSourceField, ResourceSourceField
11✔
59
from pants.core.util_rules.stripped_source_files import StrippedFileNameRequest
11✔
60
from pants.core.util_rules.stripped_source_files import rules as stripped_source_rules
11✔
61
from pants.core.util_rules.stripped_source_files import strip_file_name
11✔
62
from pants.core.util_rules.system_binaries import BashBinary
11✔
63
from pants.engine.addresses import UnparsedAddressInputs
11✔
64
from pants.engine.collection import Collection, DeduplicatedCollection
11✔
65
from pants.engine.engine_aware import EngineAwareParameter
11✔
66
from pants.engine.environment import EnvironmentName
11✔
67
from pants.engine.fs import EMPTY_DIGEST, AddPrefix, CreateDigest, Digest, FileContent, MergeDigests
11✔
68
from pants.engine.internals.graph import (
11✔
69
    hydrate_sources,
70
    resolve_targets,
71
    resolve_unparsed_address_inputs,
72
)
73
from pants.engine.internals.graph import transitive_targets as transitive_targets_get
11✔
74
from pants.engine.internals.native_engine import Snapshot
11✔
75
from pants.engine.intrinsics import add_prefix, create_digest, digest_to_snapshot, merge_digests
11✔
76
from pants.engine.process import (
11✔
77
    Process,
78
    ProcessCacheScope,
79
    ProcessResult,
80
    fallible_to_exec_result_or_raise,
81
)
82
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
11✔
83
from pants.engine.target import HydrateSourcesRequest, SourcesField, TransitiveTargetsRequest
11✔
84
from pants.engine.unions import UnionMembership, union
11✔
85
from pants.util.frozendict import FrozenDict
11✔
86
from pants.util.logging import LogLevel
11✔
87
from pants.util.strutil import bullet_list, pluralize, softwrap
11✔
88

89
logger = logging.getLogger(__name__)
11✔
90

91

92
@union(in_scope_types=[EnvironmentName])
11✔
93
@dataclass(frozen=True)
11✔
94
class PythonProvider:
11✔
95
    """Union which should have 0 or 1 implementations registered which provide Python.
96

97
    Subclasses should provide a rule from their subclass type to `PythonExecutable`.
98
    """
99

100
    interpreter_constraints: InterpreterConstraints
11✔
101

102

103
@rule(polymorphic=True)
11✔
104
async def get_python_executable(
11✔
105
    provider: PythonProvider, env_name: EnvironmentName
106
) -> PythonExecutable:
107
    raise NotImplementedError()
×
108

109

110
class PexPlatforms(DeduplicatedCollection[str]):
11✔
111
    sort_input = True
11✔
112

113
    def generate_pex_arg_list(self) -> list[str]:
11✔
114
        args = []
2✔
115
        for platform in self:
2✔
116
            args.extend(["--platform", platform])
1✔
117
        return args
2✔
118

119

120
class CompletePlatforms(DeduplicatedCollection[str]):
11✔
121
    sort_input = True
11✔
122

123
    def __init__(self, iterable: Iterable[str] = (), *, digest: Digest = EMPTY_DIGEST):
11✔
124
        super().__init__(iterable)
11✔
125
        self._digest = digest
11✔
126

127
    @classmethod
11✔
128
    def from_snapshot(cls, snapshot: Snapshot) -> CompletePlatforms:
11✔
129
        return cls(snapshot.files, digest=snapshot.digest)
1✔
130

131
    @property
11✔
132
    def digest(self) -> Digest:
11✔
133
        return self._digest
2✔
134

135
    def generate_pex_arg_list(self) -> Iterator[str]:
11✔
136
        for path in self:
2✔
137
            yield "--complete-platform"
1✔
138
            yield path
1✔
139

140

141
@rule
11✔
142
async def digest_complete_platform_addresses(
11✔
143
    addresses: UnparsedAddressInputs,
144
) -> CompletePlatforms:
145
    original_file_targets = await resolve_targets(**implicitly(addresses))
×
146
    original_files_sources = await concurrently(
×
147
        hydrate_sources(
148
            HydrateSourcesRequest(
149
                tgt.get(SourcesField),
150
                for_sources_types=(
151
                    FileSourceField,
152
                    ResourceSourceField,
153
                ),
154
                enable_codegen=True,
155
            ),
156
            **implicitly(),
157
        )
158
        for tgt in original_file_targets
159
    )
160
    snapshot = await digest_to_snapshot(
×
161
        **implicitly(MergeDigests(sources.snapshot.digest for sources in original_files_sources))
162
    )
163
    return CompletePlatforms.from_snapshot(snapshot)
×
164

165

166
@rule
11✔
167
async def digest_complete_platforms(
11✔
168
    complete_platforms: PexCompletePlatformsField,
169
) -> CompletePlatforms:
170
    return await digest_complete_platform_addresses(complete_platforms.to_unparsed_address_inputs())
×
171

172

173
@dataclass(frozen=True)
11✔
174
class PexRequest(EngineAwareParameter):
11✔
175
    output_filename: str
11✔
176
    internal_only: bool
11✔
177
    layout: PexLayout
11✔
178
    python: PythonExecutable | None
11✔
179
    requirements: PexRequirements | EntireLockfile
11✔
180
    interpreter_constraints: InterpreterConstraints
11✔
181
    platforms: PexPlatforms
11✔
182
    complete_platforms: CompletePlatforms
11✔
183
    sources: Digest | None
11✔
184
    additional_inputs: Digest
11✔
185
    main: MainSpecification | None
11✔
186
    inject_args: tuple[str, ...]
11✔
187
    inject_env: FrozenDict[str, str]
11✔
188
    additional_args: tuple[str, ...]
11✔
189
    pex_path: tuple[Pex, ...]
11✔
190
    description: str | None = dataclasses.field(compare=False)
11✔
191
    cache_scope: ProcessCacheScope
11✔
192
    scie_output_files: Iterable[str] | None = None
11✔
193
    scie_output_directories: Iterable[str] | None = None
11✔
194

195
    def __init__(
11✔
196
        self,
197
        *,
198
        output_filename: str,
199
        scie_output_files: Iterable[str] | None = None,
200
        scie_output_directories: Iterable[str] | None = None,
201
        internal_only: bool,
202
        layout: PexLayout | None = None,
203
        python: PythonExecutable | None = None,
204
        requirements: PexRequirements | EntireLockfile = PexRequirements(),
205
        interpreter_constraints=InterpreterConstraints(),
206
        platforms=PexPlatforms(),
207
        complete_platforms=CompletePlatforms(),
208
        sources: Digest | None = None,
209
        additional_inputs: Digest | None = None,
210
        main: MainSpecification | None = None,
211
        inject_args: Iterable[str] = (),
212
        inject_env: Mapping[str, str] = FrozenDict(),
213
        additional_args: Iterable[str] = (),
214
        pex_path: Iterable[Pex] = (),
215
        description: str | None = None,
216
        cache_scope: ProcessCacheScope = ProcessCacheScope.SUCCESSFUL,
217
    ) -> None:
218
        """A request to create a PEX from its inputs.
219

220
        :param output_filename: The name of the built Pex file, which typically should end in
221
            `.pex`.
222
        :param internal_only: Whether we ever materialize the Pex and distribute it directly
223
            to end users, such as with the `binary` goal. Typically, instead, the user never
224
            directly uses the Pex, e.g. with `lint` and `test`. If True, we will use a Pex setting
225
            that results in faster build time but compatibility with fewer interpreters at runtime.
226
        :param layout: The filesystem layout to create the PEX with.
227
        :param python: A particular PythonExecutable to use, which must match any relevant
228
            interpreter_constraints.
229
        :param requirements: The requirements that the PEX should contain.
230
        :param interpreter_constraints: Any constraints on which Python versions may be used.
231
        :param platforms: Which abbreviated platforms should be supported. Setting this value will
232
            cause interpreter constraints to not be used at PEX build time because platforms already
233
            constrain the valid Python versions, e.g. by including `cp36m` in the platform string.
234
            Unfortunately this also causes interpreter constraints to not be embedded in the built
235
            PEX for use at runtime which can lead to problems.
236
            See: https://github.com/pantsbuild/pants/issues/13904.
237
        :param complete_platforms: Which complete platforms should be supported. Setting this value
238
            will cause interpreter constraints to not be used at PEX build time because complete
239
            platforms completely constrain the valid Python versions. Unfortunately this also causes
240
            interpreter constraints to not be embedded in the built PEX for use at runtime which can
241
            lead to problems. See: https://github.com/pantsbuild/pants/issues/13904.
242
        :param sources: Any source files that should be included in the Pex.
243
        :param additional_inputs: Any inputs that are not source files and should not be included
244
            directly in the Pex, but should be present in the environment when building the Pex.
245
        :param main: The main for the built Pex, equivalent to Pex's `-e` or '-c' flag. If
246
            left off, the Pex will open up as a REPL.
247
        :param inject_args: Command line arguments to freeze in to the PEX.
248
        :param inject_env: Environment variables to freeze in to the PEX.
249
        :param additional_args: Any additional Pex flags.
250
        :param pex_path: Pex files to add to the PEX_PATH.
251
        :param description: A human-readable description to render in the dynamic UI when building
252
            the Pex.
253
        :param cache_scope: The cache scope for the underlying pex cli invocation process.
254
        :param scie_output_files If we are also building native executable scies for the PEX, their filenames
255
        :param scie_output_directories If we are also building native executable scies for the PEX, using a directory layout, their dirs.
256
        """
257
        object.__setattr__(self, "output_filename", output_filename)
3✔
258
        object.__setattr__(self, "scie_output_files", scie_output_files)
3✔
259
        object.__setattr__(self, "scie_output_directories", scie_output_directories)
3✔
260
        object.__setattr__(self, "internal_only", internal_only)
3✔
261
        # Use any explicitly requested layout, or Packed for internal PEXes (which is a much
262
        # friendlier layout for the CAS than Zipapp.)
263
        object.__setattr__(
3✔
264
            self, "layout", layout or (PexLayout.PACKED if internal_only else PexLayout.ZIPAPP)
265
        )
266
        object.__setattr__(self, "python", python)
3✔
267
        object.__setattr__(self, "requirements", requirements)
3✔
268
        object.__setattr__(self, "interpreter_constraints", interpreter_constraints)
3✔
269
        object.__setattr__(self, "platforms", platforms)
3✔
270
        object.__setattr__(self, "complete_platforms", complete_platforms)
3✔
271
        object.__setattr__(self, "sources", sources)
3✔
272
        object.__setattr__(self, "additional_inputs", additional_inputs or EMPTY_DIGEST)
3✔
273
        object.__setattr__(self, "main", main)
3✔
274
        object.__setattr__(self, "inject_args", tuple(inject_args))
3✔
275
        object.__setattr__(self, "inject_env", FrozenDict(inject_env))
3✔
276
        object.__setattr__(self, "additional_args", tuple(additional_args))
3✔
277
        object.__setattr__(self, "pex_path", tuple(pex_path))
3✔
278
        object.__setattr__(self, "description", description)
3✔
279
        object.__setattr__(self, "cache_scope", cache_scope)
3✔
280

281
        self.__post_init__()
3✔
282

283
    def __post_init__(self):
11✔
284
        if self.internal_only and self.platforms:
3✔
285
            raise ValueError(
×
286
                softwrap(
287
                    f"""
288
                    Internal only PEXes can only constrain interpreters with interpreter_constraints.
289
                    Given platform constraints {self.platforms} for internal only pex request:
290
                    {self}.
291
                    """
292
                )
293
            )
294
        if self.internal_only and self.complete_platforms:
3✔
295
            raise ValueError(
×
296
                softwrap(
297
                    f"""
298
                    Internal only PEXes can only constrain interpreters with interpreter_constraints.
299
                    Given complete_platform constraints {self.complete_platforms} for internal only
300
                    pex request: {self}.
301
                    """
302
                )
303
            )
304
        if self.python and self.platforms:
3✔
305
            raise ValueError(
×
306
                softwrap(
307
                    f"""
308
                    Only one of platforms or a specific interpreter may be set. Got
309
                    both {self.platforms} and {self.python}.
310
                    """
311
                )
312
            )
313
        if self.python and self.complete_platforms:
3✔
314
            raise ValueError(
×
315
                softwrap(
316
                    f"""
317
                    Only one of complete_platforms or a specific interpreter may be set. Got
318
                    both {self.complete_platforms} and {self.python}.
319
                    """
320
                )
321
            )
322
        if self.python and self.interpreter_constraints:
3✔
323
            raise ValueError(
×
324
                softwrap(
325
                    f"""
326
                    Only one of interpreter_constraints or a specific interpreter may be set. Got
327
                    both {self.interpreter_constraints} and {self.python}.
328
                    """
329
                )
330
            )
331

332
    def debug_hint(self) -> str:
11✔
333
        return self.output_filename
×
334

335

336
@dataclass(frozen=True)
11✔
337
class OptionalPexRequest:
11✔
338
    maybe_pex_request: PexRequest | None
11✔
339

340

341
@dataclass(frozen=True)
11✔
342
class Pex:
11✔
343
    """Wrapper for a digest containing a pex file created with some filename."""
344

345
    digest: Digest
11✔
346
    name: str
11✔
347
    python: PythonExecutable | None
11✔
348

349

350
@dataclass(frozen=True)
11✔
351
class OptionalPex:
11✔
352
    maybe_pex: Pex | None
11✔
353

354

355
@rule(desc="Find Python interpreter for constraints", level=LogLevel.DEBUG)
11✔
356
async def find_interpreter(
11✔
357
    interpreter_constraints: InterpreterConstraints,
358
    pex_subsystem: PexSubsystem,
359
    env_target: EnvironmentTarget,
360
    union_membership: UnionMembership,
361
) -> PythonExecutable:
362
    python_providers = union_membership.get(PythonProvider)
×
363
    if len(python_providers) > 1:
×
364
        raise ValueError(
×
365
            softwrap(
366
                f"""
367
                Too many Python provider plugins were registered. We expected 0 or 1, but found
368
                {len(python_providers)}. Providers were:
369

370
                {bullet_list(repr(provider.__class__) for provider in python_providers)}
371
                """
372
            )
373
        )
374
    if python_providers:
×
375
        python_provider = next(iter(python_providers))
×
376
        python = await get_python_executable(
×
377
            **implicitly({python_provider(interpreter_constraints): PythonProvider})
378
        )
379
        return python
×
380

381
    formatted_constraints = " OR ".join(str(constraint) for constraint in interpreter_constraints)
×
382
    result = await fallible_to_exec_result_or_raise(
×
383
        **implicitly(
384
            PexCliProcess(
385
                description=f"Find interpreter for constraints: {formatted_constraints}",
386
                subcommand=(),
387
                # Here, we run the Pex CLI with no requirements, which just selects an interpreter.
388
                # Normally, this would start an isolated repl. By passing `--`, we force the repl to
389
                # instead act as an interpreter (the selected one) and tell us about itself. The upshot
390
                # is we run the Pex interpreter selection logic unperturbed but without resolving any
391
                # distributions.
392
                extra_args=(
393
                    *interpreter_constraints.generate_pex_arg_list(),
394
                    "--",
395
                    "-c",
396
                    # N.B.: The following code snippet must be compatible with Python 2.7 and
397
                    # Python 3.5+.
398
                    #
399
                    # When hashing, we pick 8192 for efficiency of reads and fingerprint updates
400
                    # (writes) since it's a common OS buffer size and an even multiple of the
401
                    # hash block size.
402
                    dedent(
403
                        """\
404
                    import hashlib, os, sys
405

406
                    python = os.path.realpath(sys.executable)
407
                    print(python)
408

409
                    hasher = hashlib.sha256()
410
                    with open(python, "rb") as fp:
411
                      for chunk in iter(lambda: fp.read(8192), b""):
412
                          hasher.update(chunk)
413
                    print(hasher.hexdigest())
414
                    """
415
                    ),
416
                ),
417
                level=LogLevel.DEBUG,
418
                cache_scope=env_target.executable_search_path_cache_scope(),
419
            )
420
        )
421
    )
422
    path, fingerprint = result.stdout.decode().strip().splitlines()
×
423

424
    maybe_log_pex_stderr(result.stderr, pex_subsystem.verbosity)
×
425

426
    return PythonExecutable(path=path, fingerprint=fingerprint)
×
427

428

429
@dataclass(frozen=True)
11✔
430
class BuildPexResult:
11✔
431
    result: ProcessResult
11✔
432
    pex_filename: str
11✔
433
    digest: Digest
11✔
434
    python: PythonExecutable | None
11✔
435

436
    def create_pex(self) -> Pex:
11✔
437
        return Pex(digest=self.digest, name=self.pex_filename, python=self.python)
×
438

439

440
@dataclass
11✔
441
class _BuildPexPythonSetup:
11✔
442
    python: PythonExecutable | None
11✔
443
    argv: list[str]
11✔
444

445

446
@rule
11✔
447
async def _determine_pex_python_and_platforms(request: PexRequest) -> _BuildPexPythonSetup:
11✔
448
    # NB: If `--platform` is specified, this signals that the PEX should not be built locally.
449
    # `--interpreter-constraint` only makes sense in the context of building locally. These two
450
    # flags are mutually exclusive. See https://github.com/pex-tool/pex/issues/957.
451
    if request.platforms or request.complete_platforms:
1✔
452
        # Note that this means that this is not an internal-only pex.
453
        # TODO(#9560): consider validating that these platforms are valid with the interpreter
454
        #  constraints.
455
        return _BuildPexPythonSetup(
1✔
456
            None,
457
            [
458
                *request.platforms.generate_pex_arg_list(),
459
                *request.complete_platforms.generate_pex_arg_list(),
460
            ],
461
        )
462

463
    if request.python:
1✔
464
        python = request.python
1✔
465
    else:
466
        python = await find_interpreter(request.interpreter_constraints, **implicitly())
1✔
467

468
    if request.python or request.internal_only:
1✔
469
        # Sometimes we want to build and run with a specific interpreter (either because request
470
        # demanded it, or because it's an internal-only PEX). We will have already validated that
471
        # there were no platforms.
472
        return _BuildPexPythonSetup(python, ["--python", python.path])
1✔
473

474
    else:
475
        # Otherwise, we don't want to force compatibility with a particular interpreter (as in, the
476
        # resulting PEX should follow the ICs), but we _do_ want to tell PEX about at least one
477
        # interpreter that is compatible, to ensure that an interpreter installed/managed by
478
        # provider backends are visible (in the extreme case, a machine may have no Python
479
        # interpreters installed at all, and just rely on Pants' provider backends to install them,
480
        # and thus pex searching $PATH will find nothing).
481
        return _BuildPexPythonSetup(
1✔
482
            python,
483
            [
484
                *request.interpreter_constraints.generate_pex_arg_list(),
485
                "--python-path",
486
                python.path,
487
            ],
488
        )
489

490

491
@dataclass
11✔
492
class _BuildPexRequirementsSetup:
11✔
493
    digests: list[Digest]
11✔
494
    argv: list[str]
11✔
495
    concurrency_available: int
11✔
496

497

498
@dataclass(frozen=True)
11✔
499
class PexRequirementsInfo:
11✔
500
    req_strings: tuple[str, ...]
11✔
501
    find_links: tuple[str, ...]
11✔
502

503

504
@rule
11✔
505
async def get_req_strings(pex_reqs: PexRequirements) -> PexRequirementsInfo:
11✔
506
    addrs: list[Address] = []
×
507
    specs: list[str] = []
×
508
    req_strings: list[str] = []
×
509
    find_links: set[str] = set()
×
510
    for req_str_or_addr in pex_reqs.req_strings_or_addrs:
×
511
        if isinstance(req_str_or_addr, Address):
×
512
            addrs.append(req_str_or_addr)
×
513
        else:
514
            assert isinstance(req_str_or_addr, str)
×
515
            # Require a `//` prefix, to distinguish address specs from
516
            # local or VCS requirements.
517
            if req_str_or_addr.startswith(os.path.sep * 2):
×
518
                specs.append(req_str_or_addr)
×
519
            else:
520
                req_strings.append(req_str_or_addr)
×
521
    if specs:
×
522
        addrs_from_specs = await resolve_unparsed_address_inputs(
×
523
            UnparsedAddressInputs(
524
                specs,
525
                owning_address=None,
526
                description_of_origin=pex_reqs.description_of_origin,
527
            ),
528
            **implicitly(),
529
        )
530
        addrs.extend(addrs_from_specs)
×
531
    if addrs:
×
532
        transitive_targets = await transitive_targets_get(
×
533
            TransitiveTargetsRequest(addrs), **implicitly()
534
        )
535
        req_strings.extend(
×
536
            PexRequirements.req_strings_from_requirement_fields(
537
                tgt[PythonRequirementsField]
538
                for tgt in transitive_targets.closure
539
                if tgt.has_field(PythonRequirementsField)
540
            )
541
        )
542
        find_links.update(
×
543
            find_links
544
            for tgt in transitive_targets.closure
545
            if tgt.has_field(PythonRequirementFindLinksField)
546
            for find_links in tgt[PythonRequirementFindLinksField].value or ()
547
        )
548
    return PexRequirementsInfo(tuple(sorted(req_strings)), tuple(sorted(find_links)))
×
549

550

551
async def _get_entire_lockfile_and_requirements(
11✔
552
    requirements: EntireLockfile | PexRequirements,
553
) -> tuple[LoadedLockfile | None, tuple[str, ...]]:
554
    lockfile: Lockfile | None = None
1✔
555
    complete_req_strings: tuple[str, ...] = tuple()
1✔
556
    # TODO: This is clunky, but can be simplified once we get rid of old-style tool
557
    #  lockfiles, because we can unify EntireLockfile and Resolve.
558
    if isinstance(requirements, EntireLockfile):
1✔
559
        complete_req_strings = requirements.complete_req_strings or tuple()
1✔
560
        lockfile = requirements.lockfile
1✔
561
    elif (
1✔
562
        isinstance(requirements.from_superset, Resolve)
563
        and requirements.from_superset.use_entire_lockfile
564
    ):
565
        lockfile = await get_lockfile_for_resolve(requirements.from_superset, **implicitly())
×
566
    if not lockfile:
1✔
567
        return None, complete_req_strings
1✔
568
    loaded_lockfile = await load_lockfile(LoadedLockfileRequest(lockfile), **implicitly())
1✔
569
    return loaded_lockfile, complete_req_strings
1✔
570

571

572
@rule
11✔
573
async def _setup_pex_requirements(
11✔
574
    request: PexRequest, python_setup: PythonSetup
575
) -> _BuildPexRequirementsSetup:
576
    resolve_name: str | None
577
    if isinstance(request.requirements, EntireLockfile):
1✔
578
        resolve_name = request.requirements.lockfile.resolve_name
1✔
579
    elif isinstance(request.requirements.from_superset, Resolve):
1✔
580
        resolve_name = request.requirements.from_superset.name
1✔
581
    else:
582
        # This implies that, currently, per-resolve options are only configurable for resolves.
583
        # However, if no resolve is specified, we will still load options that apply to every
584
        # resolve, like `[python-repos].indexes`.
585
        resolve_name = None
1✔
586
    resolve_config = await determine_resolve_pex_config(
1✔
587
        ResolvePexConfigRequest(resolve_name), **implicitly()
588
    )
589

590
    pex_lock_resolver_args = list(resolve_config.pex_args())
1✔
591
    pip_resolver_args = [*resolve_config.pex_args(), "--resolver-version", "pip-2020-resolver"]
1✔
592

593
    loaded_lockfile, complete_req_strings = await _get_entire_lockfile_and_requirements(
1✔
594
        request.requirements
595
    )
596
    if loaded_lockfile:
1✔
597
        argv = (
1✔
598
            ["--lock", loaded_lockfile.lockfile_path, *pex_lock_resolver_args]
599
            if loaded_lockfile.is_pex_native
600
            # We use pip to resolve a requirements.txt pseudo-lockfile, possibly with hashes.
601
            else [
602
                "--requirement",
603
                loaded_lockfile.lockfile_path,
604
                "--no-transitive",
605
                *pip_resolver_args,
606
            ]
607
        )
608
        if loaded_lockfile.metadata and complete_req_strings:
1✔
609
            validate_metadata(
×
610
                loaded_lockfile.metadata,
611
                request.interpreter_constraints,
612
                loaded_lockfile.original_lockfile,
613
                complete_req_strings,
614
                # We're using the entire lockfile, so there is no Pex subsetting operation we
615
                # can delegate requirement validation to.  So we do our naive string-matching
616
                # validation.
617
                validate_consumed_req_strings=True,
618
                python_setup=python_setup,
619
                resolve_config=resolve_config,
620
            )
621

622
        return _BuildPexRequirementsSetup(
1✔
623
            [loaded_lockfile.lockfile_digest], argv, loaded_lockfile.requirement_estimate
624
        )
625

626
    assert isinstance(request.requirements, PexRequirements)
1✔
627
    reqs_info = await get_req_strings(request.requirements)
1✔
628

629
    # TODO: This is not the best heuristic for available concurrency, since the
630
    # requirements almost certainly have transitive deps which also need building, but it
631
    # is better than using something hardcoded.
632
    concurrency_available = len(reqs_info.req_strings)
1✔
633

634
    if isinstance(request.requirements.from_superset, Pex):
1✔
635
        repository_pex = request.requirements.from_superset
1✔
636
        return _BuildPexRequirementsSetup(
1✔
637
            [repository_pex.digest],
638
            [*reqs_info.req_strings, "--pex-repository", repository_pex.name],
639
            concurrency_available,
640
        )
641

642
    elif isinstance(request.requirements.from_superset, Resolve):
1✔
643
        lockfile = await get_lockfile_for_resolve(
1✔
644
            request.requirements.from_superset, **implicitly()
645
        )
646
        loaded_lockfile = await load_lockfile(LoadedLockfileRequest(lockfile), **implicitly())
1✔
647

648
        # NB: This is also validated in the constructor.
649
        assert loaded_lockfile.is_pex_native
1✔
650
        if not reqs_info.req_strings:
1✔
651
            return _BuildPexRequirementsSetup([], [], concurrency_available)
×
652

653
        if loaded_lockfile.metadata:
1✔
654
            validate_metadata(
×
655
                loaded_lockfile.metadata,
656
                request.interpreter_constraints,
657
                loaded_lockfile.original_lockfile,
658
                consumed_req_strings=reqs_info.req_strings,
659
                # Don't validate user requirements when subsetting a resolve, as Pex's
660
                # validation during the subsetting is far more precise than our naive string
661
                # comparison. For example, if a lockfile was generated with `foo==1.2.3`
662
                # and we want to resolve `foo>=1.0.0` or just `foo` out of it, Pex will do
663
                # so successfully, while our naive validation would fail.
664
                validate_consumed_req_strings=False,
665
                python_setup=python_setup,
666
                resolve_config=resolve_config,
667
            )
668

669
        return _BuildPexRequirementsSetup(
1✔
670
            [loaded_lockfile.lockfile_digest],
671
            [
672
                *reqs_info.req_strings,
673
                "--lock",
674
                loaded_lockfile.lockfile_path,
675
                *pex_lock_resolver_args,
676
            ],
677
            concurrency_available,
678
        )
679

680
    # We use pip to perform a normal resolve.
681
    digests = []
1✔
682
    argv = [
1✔
683
        *reqs_info.req_strings,
684
        *pip_resolver_args,
685
        *(f"--find-links={find_links}" for find_links in reqs_info.find_links),
686
    ]
687
    if request.requirements.constraints_strings:
1✔
688
        constraints_file = "__constraints.txt"
1✔
689
        constraints_content = "\n".join(request.requirements.constraints_strings)
1✔
690
        digests.append(
1✔
691
            await create_digest(
692
                CreateDigest([FileContent(constraints_file, constraints_content.encode())])
693
            )
694
        )
695
        argv.extend(["--constraints", constraints_file])
1✔
696
    return _BuildPexRequirementsSetup(digests, argv, concurrency_available=concurrency_available)
1✔
697

698

699
@rule(level=LogLevel.DEBUG)
11✔
700
async def build_pex(
11✔
701
    request: PexRequest, python_setup: PythonSetup, pex_subsystem: PexSubsystem
702
) -> BuildPexResult:
703
    """Returns a PEX with the given settings."""
704

705
    if not request.python and not request.interpreter_constraints:
×
706
        # Blank ICs in the request means that the caller wants us to use the ICs configured
707
        # for the resolve (falling back to the global ICs).
708
        resolve_name = ""
×
709
        if isinstance(request.requirements, PexRequirements) and isinstance(
×
710
            request.requirements.from_superset, Resolve
711
        ):
712
            resolve_name = request.requirements.from_superset.name
×
713
        elif isinstance(request.requirements, EntireLockfile):
×
714
            resolve_name = request.requirements.lockfile.resolve_name
×
715

716
        if resolve_name:
×
717
            request = dataclasses.replace(
×
718
                request,
719
                interpreter_constraints=InterpreterConstraints(
720
                    python_setup.resolves_to_interpreter_constraints.get(
721
                        resolve_name,
722
                        python_setup.interpreter_constraints,
723
                    )
724
                ),
725
            )
726

727
    source_dir_name = "source_files"
×
728

729
    pex_python_setup_req = _determine_pex_python_and_platforms(request)
×
730
    requirements_setup_req = _setup_pex_requirements(**implicitly({request: PexRequest}))
×
731
    sources_digest_as_subdir_req = add_prefix(
×
732
        AddPrefix(request.sources or EMPTY_DIGEST, source_dir_name)
733
    )
734
    if isinstance(request.requirements, PexRequirements):
×
735
        (
×
736
            pex_python_setup,
737
            requirements_setup,
738
            sources_digest_as_subdir,
739
            req_info,
740
        ) = await concurrently(
741
            pex_python_setup_req,
742
            requirements_setup_req,
743
            sources_digest_as_subdir_req,
744
            get_req_strings(request.requirements),
745
        )
746
        req_strings = req_info.req_strings
×
747
    else:
748
        pex_python_setup, requirements_setup, sources_digest_as_subdir = await concurrently(
×
749
            pex_python_setup_req,
750
            requirements_setup_req,
751
            sources_digest_as_subdir_req,
752
        )
753
        req_strings = ()
×
754

755
    argv = [
×
756
        "--output-file",
757
        request.output_filename,
758
        *request.additional_args,
759
    ]
760

761
    argv.extend(pex_python_setup.argv)
×
762

763
    if request.main is not None:
×
764
        argv.extend(request.main.iter_pex_args())
×
765
        if isinstance(request.main, Executable):
×
766
            # Unlike other MainSpecifiecation types (that can pass spec as-is to pex),
767
            # Executable must be an actual path relative to the sandbox.
768
            # request.main.spec is a python source file including its spec_path.
769
            # To make it relative to the sandbox, we strip the source root
770
            # and add the source_dir_name (sources get prefixed with that below).
771
            stripped = await strip_file_name(StrippedFileNameRequest(request.main.spec))
×
772
            argv.append(os.path.join(source_dir_name, stripped.value))
×
773

774
    argv.extend(
×
775
        f"--inject-args={shlex.quote(injected_arg)}" for injected_arg in request.inject_args
776
    )
777
    argv.extend(f"--inject-env={k}={v}" for k, v in sorted(request.inject_env.items()))
×
778

779
    # TODO(John Sirois): Right now any request requirements will shadow corresponding pex path
780
    #  requirements, which could lead to problems. Support shading python binaries.
781
    #  See: https://github.com/pantsbuild/pants/issues/9206
782
    if request.pex_path:
×
783
        argv.extend(["--pex-path", ":".join(pex.name for pex in request.pex_path)])
×
784

785
    if request.internal_only:
×
786
        # An internal-only runs on a single machine, and pre-installing wheels is wasted work in
787
        # that case (see https://github.com/pex-tool/pex/issues/2292#issuecomment-1854582647 for
788
        # analysis).
789
        argv.append("--no-pre-install-wheels")
×
790

791
    argv.append(f"--sources-directory={source_dir_name}")
×
792

793
    # Include any additional arguments and input digests required by the requirements.
794
    argv.extend(requirements_setup.argv)
×
795

796
    merged_digest = await merge_digests(
×
797
        MergeDigests(
798
            (
799
                request.complete_platforms.digest,
800
                sources_digest_as_subdir,
801
                request.additional_inputs,
802
                *requirements_setup.digests,
803
                *(pex.digest for pex in request.pex_path),
804
            )
805
        )
806
    )
807

808
    argv.extend(["--layout", request.layout.value])
×
809

NEW
810
    pex_output_files: Iterable[str] | None = None
×
NEW
811
    pex_output_directories: Iterable[str] | None = None
×
812
    if PexLayout.ZIPAPP == request.layout:
×
NEW
813
        pex_output_files = [request.output_filename]
×
814
    else:
NEW
815
        pex_output_directories = [request.output_filename]
×
816

NEW
817
    output_files = (
×
818
        *(pex_output_files if pex_output_files else []),
819
        *(request.scie_output_files if request.scie_output_files else []),
820
    )
NEW
821
    output_directories = (
×
822
        *(pex_output_directories if pex_output_directories else []),
823
        *(request.scie_output_directories if request.scie_output_directories else []),
824
    )
825

826
    result = await fallible_to_exec_result_or_raise(
×
827
        **implicitly(
828
            PexCliProcess(
829
                subcommand=(),
830
                extra_args=argv,
831
                additional_input_digest=merged_digest,
832
                description=_build_pex_description(request, req_strings, python_setup.resolves),
833
                output_files=output_files if output_files else None,
834
                output_directories=output_directories if output_directories else None,
835
                concurrency_available=requirements_setup.concurrency_available,
836
                cache_scope=request.cache_scope,
837
            )
838
        )
839
    )
840

841
    maybe_log_pex_stderr(result.stderr, pex_subsystem.verbosity)
×
842

843
    digest = (
×
844
        await merge_digests(
845
            MergeDigests((result.output_digest, *(pex.digest for pex in request.pex_path)))
846
        )
847
        if request.pex_path
848
        else result.output_digest
849
    )
850

851
    return BuildPexResult(
×
852
        result=result,
853
        pex_filename=request.output_filename,
854
        digest=digest,
855
        python=pex_python_setup.python,
856
    )
857

858

859
def _build_pex_description(
11✔
860
    request: PexRequest, req_strings: Sequence[str], resolve_to_lockfile: Mapping[str, str]
861
) -> str:
862
    if request.description:
1✔
863
        return request.description
1✔
864

865
    if isinstance(request.requirements, EntireLockfile):
1✔
866
        lockfile = request.requirements.lockfile
1✔
867
        desc_suffix = f"from {lockfile.url}"
1✔
868
    else:
869
        if not req_strings:
1✔
870
            return f"Building {request.output_filename}"
1✔
871
        elif isinstance(request.requirements.from_superset, Pex):
1✔
872
            repo_pex = request.requirements.from_superset.name
1✔
873
            return softwrap(
1✔
874
                f"""
875
                Extracting {pluralize(len(req_strings), "requirement")}
876
                to build {request.output_filename} from {repo_pex}:
877
                {", ".join(req_strings)}
878
                """
879
            )
880
        elif isinstance(request.requirements.from_superset, Resolve):
1✔
881
            # At this point we know this is a valid user resolve, so we can assume
882
            # it's available in the dict. Nonetheless we use get() so that any weird error
883
            # here gives a bad message rather than an outright crash.
884
            lockfile_path = resolve_to_lockfile.get(request.requirements.from_superset.name, "")
×
885
            return softwrap(
×
886
                f"""
887
                Building {pluralize(len(req_strings), "requirement")}
888
                for {request.output_filename} from the {lockfile_path} resolve:
889
                {", ".join(req_strings)}
890
                """
891
            )
892
        else:
893
            desc_suffix = softwrap(
1✔
894
                f"""
895
                with {pluralize(len(req_strings), "requirement")}:
896
                {", ".join(req_strings)}
897
                """
898
            )
899
    return f"Building {request.output_filename} {desc_suffix}"
1✔
900

901

902
@rule
11✔
903
async def create_pex(request: PexRequest) -> Pex:
11✔
904
    result = await build_pex(request, **implicitly())
×
905
    return result.create_pex()
×
906

907

908
@rule
11✔
909
async def create_optional_pex(request: OptionalPexRequest) -> OptionalPex:
11✔
910
    if request.maybe_pex_request is None:
×
911
        return OptionalPex(None)
×
912
    result = await create_pex(request.maybe_pex_request)
×
913
    return OptionalPex(result)
×
914

915

916
@dataclass(frozen=True)
11✔
917
class Script:
11✔
918
    path: PurePath
11✔
919

920
    @property
11✔
921
    def argv0(self) -> str:
11✔
922
        return f"./{self.path}" if self.path.parent == PurePath() else str(self.path)
×
923

924

925
@dataclass(frozen=True)
11✔
926
class VenvScript:
11✔
927
    script: Script
11✔
928
    content: FileContent
11✔
929

930

931
@dataclass(frozen=True)
11✔
932
class VenvScriptWriter:
11✔
933
    complete_pex_env: CompletePexEnvironment
11✔
934
    pex: Pex
11✔
935
    venv_dir: PurePath
11✔
936

937
    @classmethod
11✔
938
    def create(
11✔
939
        cls, complete_pex_env: CompletePexEnvironment, pex: Pex, venv_rel_dir: PurePath
940
    ) -> VenvScriptWriter:
941
        # N.B.: We don't know the working directory that will be used in any given
942
        # invocation of the venv scripts; so we deal with working_directory once in an
943
        # `adjust_relative_paths` function inside the script to save rule authors from having to do
944
        # CWD offset math in every rule for all the relative paths their process depends on.
945
        venv_dir = complete_pex_env.pex_root / venv_rel_dir
×
946
        return cls(complete_pex_env=complete_pex_env, pex=pex, venv_dir=venv_dir)
×
947

948
    def _create_venv_script(
11✔
949
        self,
950
        bash: BashBinary,
951
        *,
952
        script_path: PurePath,
953
        venv_executable: PurePath,
954
    ) -> VenvScript:
955
        env_vars = (
×
956
            f"{name}={shlex.quote(value)}"
957
            for name, value in self.complete_pex_env.environment_dict(
958
                python=self.pex.python
959
            ).items()
960
        )
961

962
        target_venv_executable = shlex.quote(str(venv_executable))
×
963
        venv_dir = shlex.quote(str(self.venv_dir))
×
964
        execute_pex_args = " ".join(
×
965
            f"$(adjust_relative_paths {shlex.quote(arg)})"
966
            for arg in self.complete_pex_env.create_argv(self.pex.name)
967
        )
968

969
        script = dedent(
×
970
            f"""\
971
            #!{bash.path}
972
            set -euo pipefail
973

974
            # N.B.: This relies on BASH_SOURCE which has been available since bash-3.0, released in
975
            # 2004. It will either contain the absolute path of the venv script or it will contain
976
            # the relative path from the CWD to the venv script. Either way, we know the venv script
977
            # parent directory is the sandbox root directory.
978
            SANDBOX_ROOT="${{BASH_SOURCE%/*}}"
979

980
            function adjust_relative_paths() {{
981
                local value0="$1"
982
                shift
983
                if [ "${{value0:0:1}}" == "/" ]; then
984
                    # Don't relativize absolute paths.
985
                    echo "${{value0}}" "$@"
986
                else
987
                    # N.B.: We convert all relative paths to paths relative to the sandbox root so
988
                    # this script works when run with a PWD set somewhere else than the sandbox
989
                    # root.
990
                    #
991
                    # There are two cases to consider. For the purposes of example, assume PWD is
992
                    # `/tmp/sandboxes/abc123/foo/bar`; i.e.: the rule API sets working_directory to
993
                    # `foo/bar`. Also assume `config/tool.yml` is the relative path in question.
994
                    #
995
                    # 1. If our BASH_SOURCE is  `/tmp/sandboxes/abc123/pex_shim.sh`; so our
996
                    #    SANDBOX_ROOT is `/tmp/sandboxes/abc123`, we calculate
997
                    #    `/tmp/sandboxes/abc123/config/tool.yml`.
998
                    # 2. If our BASH_SOURCE is instead `../../pex_shim.sh`; so our SANDBOX_ROOT is
999
                    #    `../..`, we calculate `../../config/tool.yml`.
1000
                    echo "${{SANDBOX_ROOT}}/${{value0}}" "$@"
1001
                fi
1002
            }}
1003

1004
            export {" ".join(env_vars)}
1005
            export PEX_ROOT="$(adjust_relative_paths ${{PEX_ROOT}})"
1006

1007
            execute_pex_args="{execute_pex_args}"
1008
            target_venv_executable="$(adjust_relative_paths {target_venv_executable})"
1009
            venv_dir="$(adjust_relative_paths {venv_dir})"
1010

1011
            # Let PEX_TOOLS invocations pass through to the original PEX file since venvs don't come
1012
            # with tools support.
1013
            if [ -n "${{PEX_TOOLS:-}}" ]; then
1014
              exec ${{execute_pex_args}} "$@"
1015
            fi
1016

1017
            # If the seeded venv has been removed from the PEX_ROOT, we re-seed from the original
1018
            # `--venv` mode PEX file.
1019
            if [ ! -e "${{venv_dir}}" ]; then
1020
                PEX_INTERPRETER=1 ${{execute_pex_args}} -c ''
1021
            fi
1022

1023
            exec "${{target_venv_executable}}" "$@"
1024
            """
1025
        )
1026
        return VenvScript(
×
1027
            script=Script(script_path),
1028
            content=FileContent(path=str(script_path), content=script.encode(), is_executable=True),
1029
        )
1030

1031
    def exe(self, bash: BashBinary) -> VenvScript:
11✔
1032
        """Writes a safe shim for the venv's executable `pex` script."""
1033
        script_path = PurePath(f"{self.pex.name}_pex_shim.sh")
×
1034
        return self._create_venv_script(
×
1035
            bash, script_path=script_path, venv_executable=self.venv_dir / "pex"
1036
        )
1037

1038
    def bin(self, bash: BashBinary, name: str) -> VenvScript:
11✔
1039
        """Writes a safe shim for an executable or script in the venv's `bin` directory."""
1040
        script_path = PurePath(f"{self.pex.name}_bin_{name}_shim.sh")
×
1041
        return self._create_venv_script(
×
1042
            bash,
1043
            script_path=script_path,
1044
            venv_executable=self.venv_dir / "bin" / name,
1045
        )
1046

1047
    def python(self, bash: BashBinary) -> VenvScript:
11✔
1048
        """Writes a safe shim for the venv's python binary."""
1049
        return self.bin(bash, "python")
×
1050

1051

1052
@dataclass(frozen=True)
11✔
1053
class VenvPex:
11✔
1054
    digest: Digest
11✔
1055
    append_only_caches: FrozenDict[str, str] | None
11✔
1056
    pex_filename: str
11✔
1057
    pex: Script
11✔
1058
    python: Script
11✔
1059
    bin: FrozenDict[str, Script]
11✔
1060
    venv_rel_dir: str
11✔
1061

1062

1063
@dataclass(frozen=True)
11✔
1064
class VenvPexRequest:
11✔
1065
    pex_request: PexRequest
11✔
1066
    complete_pex_env: CompletePexEnvironment
11✔
1067
    bin_names: tuple[str, ...] = ()
11✔
1068
    site_packages_copies: bool = False
11✔
1069

1070
    def __init__(
11✔
1071
        self,
1072
        pex_request: PexRequest,
1073
        complete_pex_env: CompletePexEnvironment,
1074
        bin_names: Iterable[str] = (),
1075
        site_packages_copies: bool = False,
1076
    ) -> None:
1077
        """A request for a PEX that runs in a venv and optionally exposes select venv `bin` scripts.
1078

1079
        :param pex_request: The details of the desired PEX.
1080
        :param complete_pex_env: The complete PEX environment the pex will be run in.
1081
        :param bin_names: The names of venv `bin` scripts to expose for execution.
1082
        :param site_packages_copies: `True` to use copies (hardlinks when possible) of PEX
1083
            dependencies when installing them in the venv site-packages directory. By default this
1084
            is `False` and symlinks are used instead which is a win in the time and space dimensions
1085
            but results in a non-standard venv structure that does trip up some libraries.
1086
        """
1087
        object.__setattr__(self, "pex_request", pex_request)
×
1088
        object.__setattr__(self, "complete_pex_env", complete_pex_env)
×
1089
        object.__setattr__(self, "bin_names", tuple(bin_names))
×
1090
        object.__setattr__(self, "site_packages_copies", site_packages_copies)
×
1091

1092

1093
@rule
11✔
1094
async def wrap_venv_prex_request(
11✔
1095
    pex_request: PexRequest, pex_environment: PexEnvironment
1096
) -> VenvPexRequest:
1097
    # Allow creating a VenvPex from a plain PexRequest when no extra bin scripts need to be exposed.
1098
    return VenvPexRequest(pex_request, pex_environment.in_sandbox(working_directory=None))
×
1099

1100

1101
@rule
11✔
1102
async def create_venv_pex(
11✔
1103
    request: VenvPexRequest, bash: BashBinary, pex_environment: PexEnvironment
1104
) -> VenvPex:
1105
    # VenvPex is motivated by improving performance of Python tools by eliminating traditional PEX
1106
    # file startup overhead.
1107
    #
1108
    # To achieve the minimal overhead (on the order of 1ms) we discard:
1109
    # 1. Using Pex default mode:
1110
    #    Although this does reduce initial tool execution overhead, it still leaves a minimum
1111
    #    O(100ms) of overhead per subsequent tool invocation. Fundamentally, Pex still needs to
1112
    #    execute its `sys.path` isolation bootstrap code in this case.
1113
    # 2. Using the Pex `venv` tool:
1114
    #    The idea here would be to create a tool venv as a Process output and then use the tool
1115
    #    venv as an input digest for all tool invocations. This was tried and netted ~500ms of
1116
    #    overhead over raw venv use.
1117
    #
1118
    # Instead we use Pex's `--venv` mode. In this mode you can run the Pex file and it will create a
1119
    # venv on the fly in the PEX_ROOT as needed. Since the PEX_ROOT is a named_cache, we avoid the
1120
    # digest materialization overhead present in 2 above. Since the venv is naturally isolated we
1121
    # avoid the `sys.path` isolation overhead of Pex itself present in 1 above.
1122
    #
1123
    # This does leave O(50ms) of overhead though for the PEX bootstrap code to detect an already
1124
    # created venv in the PEX_ROOT and re-exec into it. To eliminate this overhead we execute the
1125
    # `pex` venv script in the PEX_ROOT directly. This is not robust on its own though, since the
1126
    # named caches store might be pruned at any time. To guard against that case we introduce a shim
1127
    # bash script that checks to see if the `pex` venv script exists in the PEX_ROOT and re-creates
1128
    # the PEX_ROOT venv if not. Using the shim script to run Python tools gets us down to the ~1ms
1129
    # of overhead we currently enjoy.
1130

1131
    pex_request = request.pex_request
×
1132
    seeded_venv_request = dataclasses.replace(
×
1133
        pex_request,
1134
        additional_args=pex_request.additional_args
1135
        + (
1136
            "--venv",
1137
            "prepend",
1138
            "--seed",
1139
            "verbose",
1140
            pex_environment.venv_site_packages_copies_option(
1141
                use_copies=request.site_packages_copies
1142
            ),
1143
        ),
1144
    )
1145
    venv_pex_result = await build_pex(seeded_venv_request, **implicitly())
×
1146
    # Pex verbose --seed mode outputs the absolute path of the PEX executable as well as the
1147
    # absolute path of the PEX_ROOT.  In the --venv case this is the `pex` script in the venv root
1148
    # directory.
1149
    seed_info = json.loads(venv_pex_result.result.stdout.decode())
×
1150
    abs_pex_root = PurePath(seed_info["pex_root"])
×
1151
    abs_pex_path = PurePath(seed_info["pex"])
×
1152
    venv_rel_dir = abs_pex_path.relative_to(abs_pex_root).parent
×
1153

1154
    venv_script_writer = VenvScriptWriter.create(
×
1155
        complete_pex_env=request.complete_pex_env,
1156
        pex=venv_pex_result.create_pex(),
1157
        venv_rel_dir=venv_rel_dir,
1158
    )
1159
    pex = venv_script_writer.exe(bash)
×
1160
    python = venv_script_writer.python(bash)
×
1161
    scripts = {bin_name: venv_script_writer.bin(bash, bin_name) for bin_name in request.bin_names}
×
1162
    scripts_digest = await create_digest(
×
1163
        CreateDigest(
1164
            (
1165
                pex.content,
1166
                python.content,
1167
                *(venv_script.content for venv_script in scripts.values()),
1168
            )
1169
        )
1170
    )
1171
    input_digest = await merge_digests(
×
1172
        MergeDigests((venv_script_writer.pex.digest, scripts_digest))
1173
    )
1174
    append_only_caches = (
×
1175
        venv_pex_result.python.append_only_caches if venv_pex_result.python else None
1176
    )
1177

1178
    return VenvPex(
×
1179
        digest=input_digest,
1180
        append_only_caches=append_only_caches,
1181
        pex_filename=venv_pex_result.pex_filename,
1182
        pex=pex.script,
1183
        python=python.script,
1184
        bin=FrozenDict((bin_name, venv_script.script) for bin_name, venv_script in scripts.items()),
1185
        venv_rel_dir=venv_rel_dir.as_posix(),
1186
    )
1187

1188

1189
@dataclass(frozen=True)
11✔
1190
class PexProcess:
11✔
1191
    pex: Pex
11✔
1192
    argv: tuple[str, ...]
11✔
1193
    description: str = dataclasses.field(compare=False)
11✔
1194
    level: LogLevel
11✔
1195
    input_digest: Digest | None
11✔
1196
    working_directory: str | None
11✔
1197
    extra_env: FrozenDict[str, str]
11✔
1198
    output_files: tuple[str, ...] | None
11✔
1199
    output_directories: tuple[str, ...] | None
11✔
1200
    timeout_seconds: int | None
11✔
1201
    execution_slot_variable: str | None
11✔
1202
    concurrency_available: int
11✔
1203
    cache_scope: ProcessCacheScope
11✔
1204

1205
    def __init__(
11✔
1206
        self,
1207
        pex: Pex,
1208
        *,
1209
        description: str,
1210
        argv: Iterable[str] = (),
1211
        level: LogLevel = LogLevel.INFO,
1212
        input_digest: Digest | None = None,
1213
        working_directory: str | None = None,
1214
        extra_env: Mapping[str, str] | None = None,
1215
        output_files: Iterable[str] | None = None,
1216
        output_directories: Iterable[str] | None = None,
1217
        timeout_seconds: int | None = None,
1218
        execution_slot_variable: str | None = None,
1219
        concurrency_available: int = 0,
1220
        cache_scope: ProcessCacheScope = ProcessCacheScope.SUCCESSFUL,
1221
    ) -> None:
1222
        object.__setattr__(self, "pex", pex)
2✔
1223
        object.__setattr__(self, "argv", tuple(argv))
2✔
1224
        object.__setattr__(self, "description", description)
2✔
1225
        object.__setattr__(self, "level", level)
2✔
1226
        object.__setattr__(self, "input_digest", input_digest)
2✔
1227
        object.__setattr__(self, "working_directory", working_directory)
2✔
1228
        object.__setattr__(self, "extra_env", FrozenDict(extra_env or {}))
2✔
1229
        object.__setattr__(self, "output_files", tuple(output_files) if output_files else None)
2✔
1230
        object.__setattr__(
2✔
1231
            self, "output_directories", tuple(output_directories) if output_directories else None
1232
        )
1233
        object.__setattr__(self, "timeout_seconds", timeout_seconds)
2✔
1234
        object.__setattr__(self, "execution_slot_variable", execution_slot_variable)
2✔
1235
        object.__setattr__(self, "concurrency_available", concurrency_available)
2✔
1236
        object.__setattr__(self, "cache_scope", cache_scope)
2✔
1237

1238

1239
@rule
11✔
1240
async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment) -> Process:
11✔
1241
    pex = request.pex
×
1242
    complete_pex_env = pex_environment.in_sandbox(working_directory=request.working_directory)
×
1243
    argv = complete_pex_env.create_argv(pex.name, *request.argv)
×
1244
    env = {
×
1245
        **complete_pex_env.environment_dict(python=pex.python),
1246
        **request.extra_env,
1247
    }
1248
    input_digest = (
×
1249
        await merge_digests(MergeDigests((pex.digest, request.input_digest)))
1250
        if request.input_digest
1251
        else pex.digest
1252
    )
1253
    append_only_caches = (
×
1254
        request.pex.python.append_only_caches if request.pex.python else FrozenDict({})
1255
    )
1256
    return Process(
×
1257
        argv,
1258
        description=request.description,
1259
        level=request.level,
1260
        input_digest=input_digest,
1261
        working_directory=request.working_directory,
1262
        env=env,
1263
        output_files=request.output_files,
1264
        output_directories=request.output_directories,
1265
        append_only_caches={
1266
            **complete_pex_env.append_only_caches,
1267
            **append_only_caches,
1268
        },
1269
        timeout_seconds=request.timeout_seconds,
1270
        execution_slot_variable=request.execution_slot_variable,
1271
        concurrency_available=request.concurrency_available,
1272
        cache_scope=request.cache_scope,
1273
    )
1274

1275

1276
@dataclass(unsafe_hash=True)
11✔
1277
class VenvPexProcess:
11✔
1278
    venv_pex: VenvPex
11✔
1279
    argv: tuple[str, ...]
11✔
1280
    description: str = dataclasses.field(compare=False)
11✔
1281
    level: LogLevel
11✔
1282
    input_digest: Digest | None
11✔
1283
    working_directory: str | None
11✔
1284
    extra_env: FrozenDict[str, str]
11✔
1285
    output_files: tuple[str, ...] | None
11✔
1286
    output_directories: tuple[str, ...] | None
11✔
1287
    timeout_seconds: int | None
11✔
1288
    execution_slot_variable: str | None
11✔
1289
    concurrency_available: int
11✔
1290
    cache_scope: ProcessCacheScope
11✔
1291
    append_only_caches: FrozenDict[str, str]
11✔
1292

1293
    def __init__(
11✔
1294
        self,
1295
        venv_pex: VenvPex,
1296
        *,
1297
        description: str,
1298
        argv: Iterable[str] = (),
1299
        level: LogLevel = LogLevel.INFO,
1300
        input_digest: Digest | None = None,
1301
        working_directory: str | None = None,
1302
        extra_env: Mapping[str, str] | None = None,
1303
        output_files: Iterable[str] | None = None,
1304
        output_directories: Iterable[str] | None = None,
1305
        timeout_seconds: int | None = None,
1306
        execution_slot_variable: str | None = None,
1307
        concurrency_available: int = 0,
1308
        cache_scope: ProcessCacheScope = ProcessCacheScope.SUCCESSFUL,
1309
        append_only_caches: Mapping[str, str] | None = None,
1310
    ) -> None:
1311
        object.__setattr__(self, "venv_pex", venv_pex)
1✔
1312
        object.__setattr__(self, "argv", tuple(argv))
1✔
1313
        object.__setattr__(self, "description", description)
1✔
1314
        object.__setattr__(self, "level", level)
1✔
1315
        object.__setattr__(self, "input_digest", input_digest)
1✔
1316
        object.__setattr__(self, "working_directory", working_directory)
1✔
1317
        object.__setattr__(self, "extra_env", FrozenDict(extra_env or {}))
1✔
1318
        object.__setattr__(self, "output_files", tuple(output_files) if output_files else None)
1✔
1319
        object.__setattr__(
1✔
1320
            self, "output_directories", tuple(output_directories) if output_directories else None
1321
        )
1322
        object.__setattr__(self, "timeout_seconds", timeout_seconds)
1✔
1323
        object.__setattr__(self, "execution_slot_variable", execution_slot_variable)
1✔
1324
        object.__setattr__(self, "concurrency_available", concurrency_available)
1✔
1325
        object.__setattr__(self, "cache_scope", cache_scope)
1✔
1326
        object.__setattr__(self, "append_only_caches", FrozenDict(append_only_caches or {}))
1✔
1327

1328

1329
@rule
11✔
1330
async def setup_venv_pex_process(
11✔
1331
    request: VenvPexProcess, pex_environment: PexEnvironment
1332
) -> Process:
1333
    venv_pex = request.venv_pex
×
1334
    pex_bin = (
×
1335
        os.path.relpath(venv_pex.pex.argv0, request.working_directory)
1336
        if request.working_directory
1337
        else venv_pex.pex.argv0
1338
    )
1339
    argv = (pex_bin, *request.argv)
×
1340
    input_digest = (
×
1341
        await merge_digests(MergeDigests((venv_pex.digest, request.input_digest)))
1342
        if request.input_digest
1343
        else venv_pex.digest
1344
    )
1345
    append_only_caches: FrozenDict[str, str] = FrozenDict(
×
1346
        **pex_environment.in_sandbox(
1347
            working_directory=request.working_directory
1348
        ).append_only_caches,
1349
        **request.append_only_caches,
1350
        **(FrozenDict({}) if venv_pex.append_only_caches is None else venv_pex.append_only_caches),
1351
    )
1352
    return Process(
×
1353
        argv=argv,
1354
        description=request.description,
1355
        level=request.level,
1356
        input_digest=input_digest,
1357
        working_directory=request.working_directory,
1358
        env=request.extra_env,
1359
        output_files=request.output_files,
1360
        output_directories=request.output_directories,
1361
        append_only_caches=append_only_caches,
1362
        timeout_seconds=request.timeout_seconds,
1363
        execution_slot_variable=request.execution_slot_variable,
1364
        concurrency_available=request.concurrency_available,
1365
        cache_scope=request.cache_scope,
1366
    )
1367

1368

1369
@dataclass(frozen=True)
11✔
1370
class PexDistributionInfo:
11✔
1371
    """Information about an individual distribution in a PEX file, as reported by `PEX_TOOLS=1
1372
    repository info -v`."""
1373

1374
    project_name: str
11✔
1375
    version: packaging.version.Version
11✔
1376
    requires_python: packaging.specifiers.SpecifierSet | None
11✔
1377
    # Note: These are parsed from metadata written by the pex tool, and are always
1378
    #   a valid packaging.requirements.Requirement.
1379
    requires_dists: tuple[Requirement, ...]
11✔
1380

1381

1382
DefaultT = TypeVar("DefaultT")
11✔
1383

1384

1385
class PexResolveInfo(Collection[PexDistributionInfo]):
11✔
1386
    """Information about all distributions resolved in a PEX file, as reported by `PEX_TOOLS=1
1387
    repository info -v`."""
1388

1389
    def find(
11✔
1390
        self, name: str, default: DefaultT | None = None
1391
    ) -> PexDistributionInfo | DefaultT | None:
1392
        """Returns the PexDistributionInfo with the given name, first one wins."""
1393
        try:
×
1394
            return next(info for info in self if info.project_name == name)
×
1395
        except StopIteration:
×
1396
            return default
×
1397

1398

1399
def parse_repository_info(repository_info: str) -> PexResolveInfo:
11✔
1400
    def iter_dist_info() -> Iterator[PexDistributionInfo]:
×
1401
        for line in repository_info.splitlines():
×
1402
            info = json.loads(line)
×
1403
            requires_python = info["requires_python"]
×
1404
            yield PexDistributionInfo(
×
1405
                project_name=info["project_name"],
1406
                version=packaging.version.Version(info["version"]),
1407
                requires_python=(
1408
                    packaging.specifiers.SpecifierSet(requires_python)
1409
                    if requires_python is not None
1410
                    else None
1411
                ),
1412
                requires_dists=tuple(Requirement(req) for req in sorted(info["requires_dists"])),
1413
            )
1414

1415
    return PexResolveInfo(sorted(iter_dist_info(), key=lambda dist: dist.project_name))
×
1416

1417

1418
@rule
11✔
1419
async def determine_venv_pex_resolve_info(venv_pex: VenvPex) -> PexResolveInfo:
11✔
1420
    process_result = await fallible_to_exec_result_or_raise(
×
1421
        **implicitly(
1422
            VenvPexProcess(
1423
                venv_pex,
1424
                argv=["repository", "info", "-v"],
1425
                extra_env={"PEX_TOOLS": "1"},
1426
                input_digest=venv_pex.digest,
1427
                description=f"Determine distributions found in {venv_pex.pex_filename}",
1428
                level=LogLevel.DEBUG,
1429
            )
1430
        )
1431
    )
1432
    return parse_repository_info(process_result.stdout.decode())
×
1433

1434

1435
@rule
11✔
1436
async def determine_pex_resolve_info(pex_pex: PexPEX, pex: Pex) -> PexResolveInfo:
11✔
1437
    process_result = await fallible_to_exec_result_or_raise(
×
1438
        **implicitly(
1439
            PexProcess(
1440
                pex=Pex(digest=pex_pex.digest, name=pex_pex.exe, python=pex.python),
1441
                argv=[pex.name, "repository", "info", "-v"],
1442
                input_digest=pex.digest,
1443
                extra_env={"PEX_MODULE": "pex.tools"},
1444
                description=f"Determine distributions found in {pex.name}",
1445
                level=LogLevel.DEBUG,
1446
            )
1447
        )
1448
    )
1449
    return parse_repository_info(process_result.stdout.decode())
×
1450

1451

1452
def rules():
11✔
1453
    return [*collect_rules(), *pex_cli.rules(), *pex_requirements.rules(), *stripped_source_rules()]
11✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc