• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 20952406873

13 Jan 2026 09:57AM UTC coverage: 73.952% (-6.3%) from 80.277%
20952406873

Pull #22588

github

web-flow
Merge 28b91a768 into b12ccd337
Pull Request #22588: Fix #22575: Use stable image IDs instead of full metadata for Docker build context hashes

2 of 21 new or added lines in 1 file covered. (9.52%)

3 existing lines in 2 files now uncovered.

12529 of 16942 relevant lines covered (73.95%)

17.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

54.55
/src/python/pants/backend/docker/util_rules/docker_build_context.py
1
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
8✔
5

6
import logging
8✔
7
import re
8✔
8
import shlex
8✔
9
from abc import ABC
8✔
10
from collections.abc import Iterable, Mapping
8✔
11
from dataclasses import dataclass
8✔
12

13
from pants.backend.docker.package_types import BuiltDockerImage
8✔
14
from pants.backend.docker.subsystems.docker_options import DockerOptions
8✔
15
from pants.backend.docker.subsystems.dockerfile_parser import (
8✔
16
    DockerfileInfo,
17
    DockerfileInfoRequest,
18
    parse_dockerfile,
19
)
20
from pants.backend.docker.target_types import DockerImageSourceField
8✔
21
from pants.backend.docker.util_rules.docker_build_args import (
8✔
22
    DockerBuildArgs,
23
    DockerBuildArgsRequest,
24
    docker_build_args,
25
)
26
from pants.backend.docker.util_rules.docker_build_env import (
8✔
27
    DockerBuildEnvironment,
28
    DockerBuildEnvironmentError,
29
    DockerBuildEnvironmentRequest,
30
    docker_build_environment_vars,
31
)
32
from pants.backend.docker.utils import image_ref_regexp, suggest_renames
8✔
33
from pants.backend.docker.value_interpolation import DockerBuildArgsInterpolationValue
8✔
34
from pants.backend.shell.target_types import ShellSourceField
8✔
35
from pants.core.goals.package import (
8✔
36
    BuiltPackage,
37
    EnvironmentAwarePackageRequest,
38
    PackageFieldSet,
39
    environment_aware_package,
40
)
41
from pants.core.target_types import FileSourceField
8✔
42
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
8✔
43
from pants.engine.addresses import Address, UnparsedAddressInputs
8✔
44
from pants.engine.fs import (
8✔
45
    CreateDigest,
46
    Digest,
47
    FileContent,
48
    MergeDigests,
49
    Snapshot,
50
)
51
from pants.engine.internals.graph import (
8✔
52
    find_valid_field_sets,
53
    resolve_targets,
54
    resolve_unparsed_address_inputs,
55
)
56
from pants.engine.internals.graph import transitive_targets as transitive_targets_get
8✔
57
from pants.engine.intrinsics import create_digest, digest_to_snapshot, get_digest_contents
8✔
58
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
8✔
59
from pants.engine.target import (
8✔
60
    Dependencies,
61
    DependenciesRequest,
62
    FieldSetsPerTargetRequest,
63
    GeneratedSources,
64
    GenerateSourcesRequest,
65
    SourcesField,
66
    TransitiveTargetsRequest,
67
)
68
from pants.engine.unions import UnionRule
8✔
69
from pants.util.strutil import softwrap, stable_hash
8✔
70
from pants.util.value_interpolation import InterpolationContext, InterpolationValue
8✔
71

72
logger = logging.getLogger(__name__)
8✔
73

74

75
class DockerBuildContextError(Exception):
8✔
76
    pass
8✔
77

78

79
class DockerContextFilesAcceptableInputsField(ABC, SourcesField):
8✔
80
    """This is a meta field for the context files generator, to tell the codegen machinery what
81
    source fields are good to use as-is.
82

83
    Use `DockerContextFilesAcceptableInputsField.register(<SourceField>)` to register input fields
84
    that should be accepted.
85

86
    This is implemented using the `ABC.register` from Python lib:
87
    https://docs.python.org/3/library/abc.html#abc.ABCMeta.register
88
    """
89

90

91
# These sources will be used to populate the build context as-is.
92
DockerContextFilesAcceptableInputsField.register(ShellSourceField)
8✔
93

94

95
class DockerContextFilesSourcesField(SourcesField):
8✔
96
    """This is just a type marker for the codegen machinery."""
97

98

99
class GenerateDockerContextFiles(GenerateSourcesRequest):
8✔
100
    """This translates all files from acceptable Source fields for the docker context using the
101
    `codegen` machinery."""
102

103
    input = DockerContextFilesAcceptableInputsField
8✔
104
    output = DockerContextFilesSourcesField
8✔
105
    exportable = False
8✔
106

107

108
@rule
8✔
109
async def hydrate_input_sources(request: GenerateDockerContextFiles) -> GeneratedSources:
8✔
110
    # We simply pass the files on, as-is
111
    return GeneratedSources(request.protocol_sources)
×
112

113

114
@dataclass(frozen=True)
8✔
115
class DockerBuildContextRequest:
8✔
116
    address: Address
8✔
117
    build_upstream_images: bool = False
8✔
118

119

120
@dataclass(frozen=True)
8✔
121
class DockerBuildContext:
8✔
122
    build_args: DockerBuildArgs
8✔
123
    digest: Digest
8✔
124
    build_env: DockerBuildEnvironment
8✔
125
    upstream_image_ids: tuple[str, ...]
8✔
126
    dockerfile: str
8✔
127
    interpolation_context: InterpolationContext
8✔
128
    copy_source_vs_context_source: tuple[tuple[str, str], ...]
8✔
129
    stages: tuple[str, ...]
8✔
130

131
    @classmethod
8✔
132
    def create(
8✔
133
        cls,
134
        build_args: DockerBuildArgs,
135
        snapshot: Snapshot,
136
        build_env: DockerBuildEnvironment,
137
        upstream_image_ids: Iterable[str],
138
        dockerfile_info: DockerfileInfo,
139
        should_suggest_renames: bool = True,
140
    ) -> DockerBuildContext:
141
        interpolation_context: dict[str, dict[str, str] | InterpolationValue] = {}
2✔
142

143
        if build_args:
2✔
144
            interpolation_context["build_args"] = cls._merge_build_args(
2✔
145
                dockerfile_info, build_args, build_env
146
            )
147

148
        # Override default value type for the `build_args` context to get helpful error messages.
149
        interpolation_context["build_args"] = DockerBuildArgsInterpolationValue(
2✔
150
            interpolation_context.get("build_args", {})
151
        )
152

153
        # Data from Pants.
154
        interpolation_context["pants"] = {
2✔
155
            # Present hash for all inputs that can be used for image tagging.
156
            "hash": stable_hash((build_args, build_env, snapshot.digest)),
157
        }
158

159
        # Base image tags values for all stages (as parsed from the Dockerfile instructions).
160
        stage_names, tags_values = cls._get_stages_and_tags(
2✔
161
            dockerfile_info, interpolation_context["build_args"]
162
        )
163
        interpolation_context["tags"] = tags_values
2✔
164

165
        copy_source_vs_context_source = (
2✔
166
            tuple(
167
                suggest_renames(
168
                    tentative_paths=(
169
                        # We don't want to include the Dockerfile as a suggested rename
170
                        dockerfile_info.source,
171
                        *dockerfile_info.copy_source_paths,
172
                    ),
173
                    actual_files=snapshot.files,
174
                    actual_dirs=snapshot.dirs,
175
                )
176
            )
177
            if should_suggest_renames
178
            else ()
179
        )
180

181
        return cls(
2✔
182
            build_args=build_args,
183
            digest=snapshot.digest,
184
            dockerfile=dockerfile_info.source,
185
            build_env=build_env,
186
            upstream_image_ids=tuple(sorted(upstream_image_ids)),
187
            interpolation_context=InterpolationContext.from_dict(interpolation_context),
188
            copy_source_vs_context_source=copy_source_vs_context_source,
189
            stages=tuple(sorted(stage_names)),
190
        )
191

192
    @classmethod
8✔
193
    def _get_stages_and_tags(
8✔
194
        cls, dockerfile_info: DockerfileInfo, build_args: Mapping[str, str]
195
    ) -> tuple[set[str], dict[str, str]]:
196
        # Go over all FROM tags and names for all stages.
197
        stage_names: set[str] = set()
2✔
198
        # tag is empty if image is referenced by digest instead
199
        stage_tags = ([*tag.split(maxsplit=1), ""][:2] for tag in dockerfile_info.version_tags)
2✔
200
        tags_values: dict[str, str] = {}
2✔
201
        for idx, (stage, tag) in enumerate(stage_tags):
2✔
202
            if tag.startswith("build-arg:"):
2✔
203
                build_arg = tag[10:]
×
204
                image_ref = build_args.get(build_arg) or dockerfile_info.build_args.to_dict().get(
×
205
                    build_arg
206
                )
207
                if not image_ref:
×
208
                    raise DockerBuildContextError(
×
209
                        f"Failed to parse Dockerfile baseimage tag for stage {stage} in "
210
                        f"{dockerfile_info.address} target, unknown build ARG: {build_arg!r}."
211
                    )
212
                parsed = re.match(image_ref_regexp, image_ref.strip("\"'"))
×
213
                tag = parsed.group("tag") or (parsed.group("digest") and "latest") if parsed else ""
×
214
                if not tag:
×
215
                    raise DockerBuildContextError(
×
216
                        f"Failed to parse Dockerfile baseimage tag for stage {stage} in "
217
                        f"{dockerfile_info.address} target, from image ref: {image_ref}."
218
                    )
219

220
            if stage != f"stage{idx}":
2✔
221
                stage_names.add(stage)
2✔
222
            if tag:
2✔
223
                if idx == 0:
2✔
224
                    # Expose the first (stage0) FROM directive as the "baseimage".
225
                    tags_values["baseimage"] = tag
2✔
226
                tags_values[stage] = tag
2✔
227

228
        return stage_names, tags_values
2✔
229

230
    @staticmethod
8✔
231
    def _merge_build_args(
8✔
232
        dockerfile_info: DockerfileInfo,
233
        build_args: DockerBuildArgs,
234
        build_env: DockerBuildEnvironment,
235
    ) -> dict[str, str]:
236
        # Extract default arg values from the parsed Dockerfile.
237
        build_arg_defaults = {
2✔
238
            def_name: def_value
239
            for def_name, has_default, def_value in [
240
                def_arg.partition("=") for def_arg in dockerfile_info.build_args
241
            ]
242
            if has_default
243
        }
244
        try:
2✔
245
            # Create build args context value, based on defined build_args and
246
            # extra_build_args. We do _not_ auto "magically" pick up all ARG names from the
247
            # Dockerfile as first class args to use as placeholders, to make it more explicit
248
            # which args are actually being used by Pants. We do pick up any defined default ARG
249
            # values from the Dockerfile however, in order to not having to duplicate them in
250
            # the BUILD files.
251
            return {
2✔
252
                arg_name: (
253
                    arg_value
254
                    if has_value
255
                    else build_env.get(arg_name, build_arg_defaults.get(arg_name))
256
                )
257
                for arg_name, has_value, arg_value in [
258
                    build_arg.partition("=") for build_arg in build_args
259
                ]
260
            }
261
        except DockerBuildEnvironmentError as e:
×
262
            raise DockerBuildContextError(
×
263
                f"Undefined value for build arg on the {dockerfile_info.address} target: {e}"
264
                "\n\nIf you did not intend to inherit the value for this build arg from the "
265
                "environment, provide a default value with the option `[docker].build_args` "
266
                "or in the `extra_build_args` field on the target definition. Alternatively, "
267
                "you may also provide a default value on the `ARG` instruction directly in "
268
                "the `Dockerfile`."
269
            ) from e
270

271

272
@rule
8✔
273
async def create_docker_build_context(
8✔
274
    request: DockerBuildContextRequest,
275
    options: DockerOptions,
276
) -> DockerBuildContext:
277
    # Get all targets to include in context.
278
    transitive_targets = await transitive_targets_get(
×
279
        TransitiveTargetsRequest([request.address]), **implicitly()
280
    )
281
    docker_image = transitive_targets.roots[0]
×
282

283
    # Get all dependencies for the root target.
284
    root_dependencies = await resolve_targets(
×
285
        **implicitly(DependenciesRequest(docker_image.get(Dependencies)))
286
    )
287

288
    # Get all file sources from the root dependencies. That includes any non-file sources that can
289
    # be "codegen"ed into a file source.
290
    sources_request = determine_source_files(
×
291
        SourceFilesRequest(
292
            sources_fields=[tgt.get(SourcesField) for tgt in root_dependencies],
293
            for_sources_types=(
294
                DockerContextFilesSourcesField,
295
                FileSourceField,
296
            ),
297
            enable_codegen=True,
298
        )
299
    )
300

301
    embedded_pkgs_per_target_request = find_valid_field_sets(
×
302
        FieldSetsPerTargetRequest(PackageFieldSet, transitive_targets.dependencies), **implicitly()
303
    )
304

305
    sources, embedded_pkgs_per_target, dockerfile_info = await concurrently(
×
306
        sources_request,
307
        embedded_pkgs_per_target_request,
308
        parse_dockerfile(DockerfileInfoRequest(docker_image.address), **implicitly()),
309
    )
310

311
    # Package binary dependencies for build context.
312
    pkgs_wanting_embedding = [
×
313
        field_set
314
        for field_set in embedded_pkgs_per_target.field_sets
315
        # Exclude docker images, unless build_upstream_images is true.
316
        if (
317
            request.build_upstream_images
318
            or not isinstance(getattr(field_set, "source", None), DockerImageSourceField)
319
        )
320
    ]
321
    embedded_pkgs = await concurrently(
×
322
        environment_aware_package(EnvironmentAwarePackageRequest(field_set))
323
        for field_set in pkgs_wanting_embedding
324
    )
325

326
    if request.build_upstream_images:
×
327
        images_str = ", ".join(
×
328
            a.tags[0] for p in embedded_pkgs for a in p.artifacts if isinstance(a, BuiltDockerImage)
329
        )
330
        if images_str:
×
331
            logger.debug(f"Built upstream Docker images: {images_str}")
×
332
        else:
333
            logger.debug("Did not build any upstream Docker images")
×
334

335
    packages_str = ", ".join(a.relpath for p in embedded_pkgs for a in p.artifacts if a.relpath)
×
336
    if packages_str:
×
337
        logger.debug(f"Built packages for Docker image: {packages_str}")
×
338
    else:
339
        logger.debug("Did not build any packages for Docker image")
×
340

341
    # Create digests for embedded packages. For upstream Docker images, we use only the image ID
342
    # to ensure hash stability. This prevents changes in image tags (which may include timestamps
343
    # or other dynamic values) from affecting the build context hash of dependent images.
NEW
344
    embedded_pkgs_digest = []
×
345

346
    # For Docker images, we need to extract the metadata filename and create stable digests
NEW
347
    docker_packages = []
×
NEW
348
    for field_set, built_package in zip(pkgs_wanting_embedding, embedded_pkgs):
×
NEW
349
        if request.build_upstream_images and isinstance(
×
350
            getattr(field_set, "source", None), DockerImageSourceField
351
        ):
NEW
352
            docker_packages.append(built_package)
×
353
        else:
NEW
354
            embedded_pkgs_digest.append(built_package.digest)
×
355

NEW
356
    if docker_packages:
×
NEW
357
        docker_metadata_contents = await concurrently(
×
358
            get_digest_contents(built_package.digest) for built_package in docker_packages
359
        )
360

NEW
361
        for metadata_contents, built_package in zip(docker_metadata_contents, docker_packages):
×
362

NEW
363
            if metadata_contents:
×
NEW
364
                for artifact in built_package.artifacts:
×
NEW
365
                    if isinstance(artifact, BuiltDockerImage):
×
NEW
366
                        stable_content = artifact.image_id.encode()
×
367
                        # Add `stable` to the file name to emphasize that it contains only 
368
                        # metadata that is stable for given inputs, and not fields, such as tags, 
369
                        # that may contain timestamps or otherwise change on each rebuild.
NEW
370
                        stable_filename = "docker-info.stable.json"
×
NEW
371
                        stable_digest = await create_digest(
×
372
                            CreateDigest([FileContent(stable_filename, stable_content)])
373
                        )
NEW
374
                        embedded_pkgs_digest.append(stable_digest)
×
NEW
375
                        break
×
376
                else:
377
                    # Fallback if no BuiltDockerImage found (shouldn't happen)
NEW
378
                    embedded_pkgs_digest.append(built_package.digest)
×
379
            else:
380
                # Fallback if no contents in digest
NEW
381
                embedded_pkgs_digest.append(built_package.digest)
×
382

UNCOV
383
    all_digests = (dockerfile_info.digest, sources.snapshot.digest, *embedded_pkgs_digest)
×
384

385
    # Merge all digests to get the final docker build context digest.
386
    context_request = digest_to_snapshot(**implicitly(MergeDigests(d for d in all_digests if d)))
×
387

388
    # Requests for build args and env
389
    build_args_request = docker_build_args(DockerBuildArgsRequest(docker_image), **implicitly())
×
390
    build_env_request = docker_build_environment_vars(
×
391
        DockerBuildEnvironmentRequest(docker_image), **implicitly()
392
    )
393
    context, supplied_build_args, build_env = await concurrently(
×
394
        context_request, build_args_request, build_env_request
395
    )
396

397
    build_args = supplied_build_args
×
398

399
    upstream_image_ids = []
×
400
    if request.build_upstream_images:
×
401
        # Update build arg values for FROM image build args.
402

403
        # Get the FROM image build args with defined values in the Dockerfile & build args.
404
        dockerfile_build_args = dockerfile_info.from_image_build_args.with_overrides(
×
405
            supplied_build_args
406
        ).nonempty()
407
        # Parse the build args values into Address instances.
408
        from_image_addresses = await resolve_unparsed_address_inputs(
×
409
            UnparsedAddressInputs(
410
                dockerfile_build_args.values(),
411
                owning_address=dockerfile_info.address,
412
                description_of_origin=softwrap(
413
                    f"""
414
                    the FROM arguments from the file {dockerfile_info.source}
415
                    from the target {dockerfile_info.address}
416
                    """
417
                ),
418
                skip_invalid_addresses=True,
419
            ),
420
            **implicitly(),
421
        )
422
        # Map those addresses to the corresponding built image ref (tag).
423
        address_to_built_image_tag = {
×
424
            field_set.address: image.tags[0]
425
            for field_set, built in zip(embedded_pkgs_per_target.field_sets, embedded_pkgs)
426
            for image in built.artifacts
427
            if isinstance(image, BuiltDockerImage)
428
        }
429
        upstream_image_ids = [
×
430
            image.image_id
431
            for built in embedded_pkgs
432
            for image in built.artifacts
433
            if isinstance(image, BuiltDockerImage)
434
        ]
435
        # Create the FROM image build args.
436
        from_image_build_args = [
×
437
            f"{arg_name}={address_to_built_image_tag[addr]}"
438
            for arg_name, addr in zip(dockerfile_build_args.keys(), from_image_addresses)
439
        ]
440
        build_args = build_args.extended(from_image_build_args)
×
441

442
    # Render build args for turning COPY values in ARGS which are targets into their output
443
    dockerfile_copy_args = dockerfile_info.copy_build_args.with_overrides(
×
444
        supplied_build_args
445
    ).nonempty()
446

447
    def get_artifact_paths(built_package: BuiltPackage) -> list[str]:
×
448
        return [e.relpath for e in built_package.artifacts if e.relpath]
×
449

450
    addrs_to_paths = {
×
451
        field_set.address: get_artifact_paths(pkg)
452
        for field_set, pkg in zip(embedded_pkgs_per_target.field_sets, embedded_pkgs)
453
    }
454

455
    copy_arg_as_build_args = await fill_args_from_copy(
×
456
        dockerfile_copy_args, dockerfile_info, addrs_to_paths
457
    )
458

459
    build_args = build_args.extended(copy_arg_as_build_args)
×
460

461
    return DockerBuildContext.create(
×
462
        build_args=build_args,
463
        snapshot=context,
464
        upstream_image_ids=upstream_image_ids,
465
        dockerfile_info=dockerfile_info,
466
        build_env=build_env,
467
        should_suggest_renames=options.suggest_renames,
468
    )
469

470

471
async def fill_args_from_copy(
8✔
472
    dockerfile_copy_args: dict[str, str], dockerfile_info, addrs_to_paths
473
):
474
    copy_arg_addresses = await resolve_unparsed_address_inputs(
×
475
        UnparsedAddressInputs(
476
            dockerfile_info.copy_build_args.to_dict().values(),
477
            owning_address=dockerfile_info.address,
478
            description_of_origin=softwrap(
479
                f"""
480
                the COPY arguments from the file {dockerfile_info.source}
481
                from the target {dockerfile_info.address}
482
                """
483
            ),
484
            skip_invalid_addresses=True,
485
        ),
486
        **implicitly(),
487
    )
488

489
    def resolve_arg(arg_name, maybe_addr) -> str:
×
490
        if maybe_addr in addrs_to_paths:
×
491
            return f"{arg_name}={shlex.join(addrs_to_paths[maybe_addr])}"
×
492
        else:
493
            # When the ARG value is a reference to a normal file
494
            return f"{arg_name}={maybe_addr}"
×
495

496
    copy_arg_as_build_args = [
×
497
        resolve_arg(arg_name, arg_value)
498
        for arg_name, arg_value in (zip(dockerfile_copy_args.keys(), copy_arg_addresses))
499
    ]
500
    return copy_arg_as_build_args
×
501

502

503
def rules():
8✔
504
    return (
8✔
505
        *collect_rules(),
506
        UnionRule(GenerateSourcesRequest, GenerateDockerContextFiles),
507
    )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc