• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 25404098001

05 May 2026 09:46PM UTC coverage: 92.915%. Remained the same
25404098001

push

github

web-flow
backend/go: deduplicate third-party module downloads across go.mods (#23261)

Third-party Go module analysis is now deduplicated across
`go.mod` files.

Previously, a module required by N `go.mod` files was downloaded
and analyzed N times, causing O(N*M) downloads and significant
memory overhead in monorepos with many overlapping `go.mod` files.

Partially addresses #20274.

68 of 70 new or added lines in 2 files covered. (97.14%)

1 existing line in 1 file now uncovered.

92206 of 99237 relevant lines covered (92.91%)

4.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.07
/src/python/pants/backend/go/util_rules/third_party_pkg.py
1
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
11✔
5

6
import dataclasses
11✔
7
import json
11✔
8
import logging
11✔
9
import os
11✔
10
from dataclasses import dataclass
11✔
11
from typing import Any
11✔
12

13
import ijson.backends.python as ijson
11✔
14

15
from pants.backend.go.go_sources.load_go_binary import LoadedGoBinaryRequest, setup_go_binary
11✔
16
from pants.backend.go.util_rules import pkg_analyzer
11✔
17
from pants.backend.go.util_rules.build_opts import GoBuildOptions
11✔
18
from pants.backend.go.util_rules.cgo import CGoCompilerFlags
11✔
19
from pants.backend.go.util_rules.embedcfg import EmbedConfig
11✔
20
from pants.backend.go.util_rules.pkg_analyzer import PackageAnalyzerSetup
11✔
21
from pants.backend.go.util_rules.sdk import GoSdkProcess
11✔
22
from pants.build_graph.address import Address
11✔
23
from pants.engine.engine_aware import EngineAwareParameter
11✔
24
from pants.engine.fs import (
11✔
25
    EMPTY_DIGEST,
26
    CreateDigest,
27
    Digest,
28
    DigestSubset,
29
    FileContent,
30
    GlobExpansionConjunction,
31
    GlobMatchErrorBehavior,
32
    MergeDigests,
33
    PathGlobs,
34
)
35
from pants.engine.intrinsics import (
11✔
36
    create_digest,
37
    digest_to_snapshot,
38
    execute_process,
39
    get_digest_contents,
40
    merge_digests,
41
)
42
from pants.engine.process import Process, fallible_to_exec_result_or_raise
11✔
43
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
11✔
44
from pants.util.dirutil import group_by_dir
11✔
45
from pants.util.frozendict import FrozenDict
11✔
46
from pants.util.logging import LogLevel
11✔
47
from pants.util.ordered_set import FrozenOrderedSet
11✔
48

49
logger = logging.getLogger(__name__)
11✔
50

51

52
class GoThirdPartyPkgError(Exception):
11✔
53
    pass
11✔
54

55

56
@dataclass(frozen=True)
11✔
57
class ThirdPartyPkgAnalysis:
11✔
58
    """All the info and files needed to build a third-party package.
59

60
    The digest only contains the files for the package, with all prefixes stripped.
61
    """
62

63
    import_path: str
11✔
64
    name: str
11✔
65

66
    digest: Digest
11✔
67
    dir_path: str
11✔
68

69
    # Note that we don't care about test-related metadata like `TestImports`, as we'll never run
70
    # tests directly on a third-party package.
71
    imports: tuple[str, ...]
11✔
72
    go_files: tuple[str, ...]
11✔
73
    cgo_files: tuple[str, ...]
11✔
74
    cgo_flags: CGoCompilerFlags
11✔
75

76
    c_files: tuple[str, ...]
11✔
77
    cxx_files: tuple[str, ...]
11✔
78
    m_files: tuple[str, ...]
11✔
79
    h_files: tuple[str, ...]
11✔
80
    f_files: tuple[str, ...]
11✔
81
    s_files: tuple[str, ...]
11✔
82

83
    syso_files: tuple[str, ...]
11✔
84

85
    minimum_go_version: str | None
11✔
86

87
    embed_patterns: tuple[str, ...]
11✔
88
    test_embed_patterns: tuple[str, ...]
11✔
89
    xtest_embed_patterns: tuple[str, ...]
11✔
90

91
    embed_config: EmbedConfig | None = None
11✔
92
    test_embed_config: EmbedConfig | None = None
11✔
93
    xtest_embed_config: EmbedConfig | None = None
11✔
94

95
    error: GoThirdPartyPkgError | None = None
11✔
96

97

98
@dataclass(frozen=True)
11✔
99
class ThirdPartyPkgAnalysisRequest(EngineAwareParameter):
11✔
100
    """Request the info and digest needed to build a third-party package.
101

102
    The package's module must be included in the input `go.mod`/`go.sum`.
103
    """
104

105
    import_path: str
11✔
106
    go_mod_address: Address
11✔
107
    go_mod_digest: Digest
11✔
108
    go_mod_path: str
11✔
109
    build_opts: GoBuildOptions
11✔
110

111
    def debug_hint(self) -> str:
11✔
112
        return f"{self.import_path} from {self.go_mod_path}"
×
113

114

115
@dataclass(frozen=True)
11✔
116
class AllThirdPartyPackages:
11✔
117
    """All the packages downloaded from a go.mod, along with a digest of the downloaded files.
118

119
    The digest has files in the format `gopath/pkg/mod`, which is what `GoSdkProcess` sets `GOPATH`
120
    to. This means that you can include the digest in a process and Go will properly consume it as
121
    the `GOPATH`.
122
    """
123

124
    digest: Digest
11✔
125
    import_paths_to_pkg_info: FrozenDict[str, ThirdPartyPkgAnalysis]
11✔
126

127

128
@dataclass(frozen=True)
11✔
129
class AllThirdPartyPackagesRequest:
11✔
130
    go_mod_address: Address
11✔
131
    go_mod_digest: Digest
11✔
132
    go_mod_path: str
11✔
133
    build_opts: GoBuildOptions
11✔
134

135

136
@dataclass(frozen=True)
11✔
137
class ModuleDescriptorsRequest:
11✔
138
    digest: Digest
11✔
139
    path: str
11✔
140

141

142
@dataclass(frozen=True)
11✔
143
class ModuleDescriptor:
11✔
144
    import_path: str
11✔
145
    name: str
11✔
146
    version: str
11✔
147
    indirect: bool
11✔
148
    minimum_go_version: str | None
11✔
149

150

151
@dataclass(frozen=True)
11✔
152
class ModuleDescriptors:
11✔
153
    modules: FrozenOrderedSet[ModuleDescriptor]
11✔
154
    go_mods_digest: Digest
11✔
155

156

157
@dataclass(frozen=True)
11✔
158
class ModuleDownloadRequest:
11✔
159
    """Download and analyze a Go module, keyed by (name, version, minimum_go_version,
160
    build_opts, go_sum_entries).
161

162
    This enables cross-go.mod deduplication: if mod-a and mod-b both depend on
163
    grpc@v1.60.0 with the same go.sum entries, the download and analysis only
164
    happens once because the Pants engine memoizes by the full request key.
165

166
    ``go_sum_entries`` carries the two go.sum lines for ``<name> <version>`` and
167
    ``<name> <version>/go.mod`` extracted from the consuming go.mod's real
168
    go.sum. These entries are content-addressable by design: two well-formed
169
    go.sums MUST agree on them for the same module@version. Including them in
170
    the dedup key has two effects:
171

172
    1. Happy path: all consumers of module@version share one download, and the
173
       synthetic go.sum written into the sandbox lets Go perform its normal
174
       checksum verification (no GONOSUMCHECK override).
175
    2. Tampered path: if one go.sum disagrees, the two consumers produce
176
       distinct requests -- each verified independently against its own
177
       entries -- and the tampered one fails with Go's usual SECURITY ERROR.
178
    """
179

180
    name: str
11✔
181
    version: str
11✔
182
    minimum_go_version: str | None
11✔
183
    build_opts: GoBuildOptions
11✔
184
    go_sum_entries: tuple[str, ...]
11✔
185

186

187
@dataclass(frozen=True)
11✔
188
class AnalyzedThirdPartyModule:
11✔
189
    packages: FrozenOrderedSet[ThirdPartyPkgAnalysis]
11✔
190

191

192
@dataclass(frozen=True)
11✔
193
class AnalyzeThirdPartyPackageRequest:
11✔
194
    pkg_json: FrozenDict[str, Any]
11✔
195
    module_sources_digest: Digest
11✔
196
    module_sources_path: str
11✔
197
    module_import_path: str
11✔
198
    package_path: str
11✔
199
    minimum_go_version: str | None
11✔
200

201

202
@dataclass(frozen=True)
11✔
203
class FallibleThirdPartyPkgAnalysis:
11✔
204
    """Metadata for a third-party Go package, but fallible if our analysis failed."""
205

206
    analysis: ThirdPartyPkgAnalysis | None
11✔
207
    import_path: str
11✔
208
    exit_code: int = 0
11✔
209
    stderr: str | None = None
11✔
210

211

212
@rule
11✔
213
async def analyze_module_dependencies(request: ModuleDescriptorsRequest) -> ModuleDescriptors:
11✔
214
    # List the modules used directly and indirectly by this module.
215
    #
216
    # This rule can't modify `go.mod` and `go.sum` as it would require mutating the workspace.
217
    # Instead, we expect them to be well-formed already.
218
    #
219
    # Options used:
220
    # - `-mod=readonly': It would be convenient to set `-mod=mod` to allow edits, and then compare the
221
    #   resulting files to the input so that we could print a diff for the user to know how to update. But
222
    #   `-mod=mod` results in more packages being downloaded and added to `go.mod` than is
223
    #   actually necessary.
224
    # TODO: nice error when `go.mod` and `go.sum` would need to change. Right now, it's a
225
    #  message from Go and won't be intuitive for Pants users what to do.
226
    # - `-e` is used to not fail if one of the modules is problematic. There may be some packages in the transitive
227
    #   closure that cannot be built, but we should  not blow up Pants. For example, a package that sets the
228
    #   special value `package documentation` and has no source files would naively error due to
229
    #   `build constraints exclude all Go files`, even though we should not error on that package.
230
    mod_list_result = await fallible_to_exec_result_or_raise(
10✔
231
        **implicitly(
232
            GoSdkProcess(
233
                command=["list", "-mod=readonly", "-e", "-m", "-json", "all"],
234
                input_digest=request.digest,
235
                output_directories=("gopath",),
236
                working_dir=request.path if request.path else None,
237
                # Allow downloads of the module metadata (i.e., go.mod files).
238
                allow_downloads=True,
239
                description="Analyze Go module dependencies.",
240
            )
241
        )
242
    )
243

244
    if len(mod_list_result.stdout) == 0:
10✔
245
        return ModuleDescriptors(FrozenOrderedSet(), EMPTY_DIGEST)
×
246

247
    descriptors: dict[tuple[str, str], ModuleDescriptor] = {}
10✔
248

249
    for mod_json in ijson.items(mod_list_result.stdout, "", multiple_values=True):
10✔
250
        # Skip the first-party module being analyzed.
251
        if "Main" in mod_json and mod_json["Main"]:
10✔
252
            continue
10✔
253

254
        # Skip first-party modules referenced from other first-party modules.
255
        # TODO Issue #22097: These cross-module references could be used for dependency inference
256
        if "Replace" in mod_json and "Version" not in mod_json["Replace"]:
6✔
257
            continue
1✔
258

259
        if "Replace" in mod_json:
6✔
260
            # TODO: Reject local file path replacements? Gazelle does.
261
            name = mod_json["Replace"]["Path"]
×
262
            version = mod_json["Replace"]["Version"]
×
263
        else:
264
            name = mod_json["Path"]
6✔
265
            version = mod_json["Version"]
6✔
266

267
        descriptors[(name, version)] = ModuleDescriptor(
6✔
268
            import_path=mod_json["Path"],
269
            name=name,
270
            version=version,
271
            indirect=mod_json.get("Indirect", False),
272
            minimum_go_version=mod_json.get("GoVersion"),
273
        )
274

275
    # TODO: Augment the modules with go.sum entries?
276
    # Gazelle does this, mainly to store the sum on the go_repository rule. We could store it (or its
277
    # absence) to be able to download sums automatically.
278

279
    return ModuleDescriptors(FrozenOrderedSet(descriptors.values()), mod_list_result.output_digest)
10✔
280

281

282
def strip_sandbox_prefix(path: str, marker: str) -> str:
11✔
283
    """Strip a path prefix from a path using a marker string to find the start of the portion to not
284
    strip. This is used to strip absolute paths used in the execution sandbox by `go`.
285

286
    Note: The marker string is required because we cannot assume how the prefix will be formed since it
287
    will differ depending on which execution environment is used (e.g, local or remote).
288
    """
289
    marker_pos = path.find(marker)
6✔
290
    if marker_pos != -1:
6✔
291
        return path[marker_pos:]
6✔
292
    else:
293
        return path
×
294

295

296
def _parse_go_sum(go_sum_content: bytes) -> dict[tuple[str, str], tuple[str, ...]]:
11✔
297
    """Parse a go.sum file into a dict keyed by (module name, version).
298

299
    A well-formed go.sum has up to two lines per (module, version):
300

301
        <name> <version> h1:<content hash>=
302
        <name> <version>/go.mod h1:<go.mod hash>=
303

304
    Returns a dict mapping (name, version) to a tuple of the matching lines,
305
    enabling O(1) lookup per module instead of re-scanning the file.
306
    """
307
    entries: dict[tuple[str, str], list[str]] = {}
10✔
308
    for line in go_sum_content.decode("utf-8").splitlines():
10✔
309
        if not line:
6✔
310
            continue
1✔
311
        parts = line.split(" ", 2)
6✔
312
        if len(parts) < 3:
6✔
NEW
313
            continue
×
314
        name = parts[0]
6✔
315
        version_field = parts[1]
6✔
316
        # Strip the "/go.mod" suffix to get the base version for grouping.
317
        version = version_field.removesuffix("/go.mod")
6✔
318
        key = (name, version)
6✔
319
        entries.setdefault(key, []).append(line)
6✔
320
    return {k: tuple(v) for k, v in entries.items()}
10✔
321

322

323
def _extract_go_sum_entries_for_module(
11✔
324
    go_sum_content: bytes, name: str, version: str
325
) -> tuple[str, ...]:
326
    """Return the go.sum lines for a given module@version.
327

328
    Thin wrapper around _parse_go_sum for callers that only need one module.
329
    Prefer _parse_go_sum when looking up multiple modules from the same go.sum.
330
    """
331
    parsed = _parse_go_sum(go_sum_content)
1✔
332
    return parsed.get((name, version), ())
1✔
333

334

335
def _freeze_json_dict(d: dict[Any, Any]) -> FrozenDict[str, Any]:
11✔
336
    result = {}
6✔
337
    for k, v in d.items():
6✔
338
        if not isinstance(k, str):
6✔
339
            raise AssertionError("Got non-`str` key for _freeze_json_dict.")
×
340

341
        f: Any = None
6✔
342
        if isinstance(v, list):
6✔
343
            f = tuple(v)
6✔
344
        elif isinstance(v, dict):
6✔
345
            f = _freeze_json_dict(v)
×
346
        elif isinstance(v, str) or isinstance(v, int):
6✔
347
            f = v
6✔
348
        else:
349
            raise AssertionError(f"Unsupported value type for _freeze_json_dict: {type(v)}")
×
350
        result[k] = f
6✔
351
    return FrozenDict(result)
6✔
352

353

354
@rule
11✔
355
async def analyze_go_third_party_package(
11✔
356
    request: AnalyzeThirdPartyPackageRequest,
357
) -> FallibleThirdPartyPkgAnalysis:
358
    if not request.package_path.startswith(request.module_sources_path):
6✔
359
        raise AssertionError(
×
360
            "The path within GOPATH for a package in a module must always be prefixed by the path "
361
            "to the applicable module's root directory. "
362
            f"This was not the case however for module {request.module_import_path}.\n\n"
363
            "This may be a bug in Pants. Please report this issue at "
364
            "https://github.com/pantsbuild/pants/issues/new/choose and include the following data: "
365
            f"package_path: {request.package_path}; module_sources_path: {request.module_sources_path}; "
366
            f"module_import_path: {request.module_import_path}"
367
        )
368
    import_path_tail = request.package_path[len(request.module_sources_path) :].strip(os.sep)
6✔
369
    if import_path_tail != "":
6✔
370
        parts = import_path_tail.split(os.sep)
6✔
371
        import_path = "/".join([request.module_import_path, *parts])
6✔
372
    else:
373
        import_path = request.module_import_path
6✔
374

375
    if "Error" in request.pkg_json or "InvalidGoFiles" in request.pkg_json:
6✔
376
        error = request.pkg_json.get("Error", "")
3✔
377
        if error:
3✔
378
            error += "\n"
3✔
379
        if "InvalidGoFiles" in request.pkg_json:
3✔
380
            error += "\n".join(
×
381
                f"{filename}: {error}"
382
                for filename, error in request.pkg_json.get("InvalidGoFiles", {}).items()
383
            )
384
            error += "\n"
×
385
        return FallibleThirdPartyPkgAnalysis(
3✔
386
            analysis=None, import_path=import_path, exit_code=1, stderr=error
387
        )
388

389
    maybe_error: GoThirdPartyPkgError | None = None
6✔
390

391
    for key in (
6✔
392
        "CompiledGoFiles",
393
        "SwigFiles",
394
        "SwigCXXFiles",
395
    ):
396
        if key in request.pkg_json:
6✔
397
            maybe_error = GoThirdPartyPkgError(
×
398
                f"The third-party package {import_path} includes `{key}`, which Pants does "
399
                "not yet support. Please open a feature request at "
400
                "https://github.com/pantsbuild/pants/issues/new/choose so that we know to "
401
                "prioritize adding support. Please include this error message and the version of "
402
                "the third-party module."
403
            )
404

405
    analysis = ThirdPartyPkgAnalysis(
6✔
406
        digest=request.module_sources_digest,
407
        import_path=import_path,
408
        name=request.pkg_json["Name"],
409
        dir_path=request.package_path,
410
        imports=tuple(request.pkg_json.get("Imports", ())),
411
        go_files=tuple(request.pkg_json.get("GoFiles", ())),
412
        c_files=tuple(request.pkg_json.get("CFiles", ())),
413
        cxx_files=tuple(request.pkg_json.get("CXXFiles", ())),
414
        m_files=tuple(request.pkg_json.get("MFiles", ())),
415
        h_files=tuple(request.pkg_json.get("HFiles", ())),
416
        f_files=tuple(request.pkg_json.get("FFiles", ())),
417
        s_files=tuple(request.pkg_json.get("SFiles", ())),
418
        syso_files=tuple(request.pkg_json.get("SysoFiles", ())),
419
        cgo_files=tuple(request.pkg_json.get("CgoFiles", ())),
420
        minimum_go_version=request.minimum_go_version,
421
        embed_patterns=tuple(request.pkg_json.get("EmbedPatterns", [])),
422
        test_embed_patterns=tuple(request.pkg_json.get("TestEmbedPatterns", [])),
423
        xtest_embed_patterns=tuple(request.pkg_json.get("XTestEmbedPatterns", [])),
424
        error=maybe_error,
425
        cgo_flags=CGoCompilerFlags(
426
            cflags=tuple(request.pkg_json.get("CgoCFLAGS", [])),
427
            cppflags=tuple(request.pkg_json.get("CgoCPPFLAGS", [])),
428
            cxxflags=tuple(request.pkg_json.get("CgoCXXFLAGS", [])),
429
            fflags=tuple(request.pkg_json.get("CgoFFLAGS", [])),
430
            ldflags=tuple(request.pkg_json.get("CgoLDFLAGS", [])),
431
            pkg_config=tuple(request.pkg_json.get("CgoPkgConfig", [])),
432
        ),
433
    )
434

435
    if analysis.embed_patterns or analysis.test_embed_patterns or analysis.xtest_embed_patterns:
6✔
436
        patterns_json = json.dumps(
1✔
437
            {
438
                "EmbedPatterns": analysis.embed_patterns,
439
                "TestEmbedPatterns": analysis.test_embed_patterns,
440
                "XTestEmbedPatterns": analysis.xtest_embed_patterns,
441
            }
442
        ).encode("utf-8")
443
        embedder, patterns_json_digest = await concurrently(
1✔
444
            setup_go_binary(
445
                LoadedGoBinaryRequest("embedcfg", ("main.go",), "./embedder"), **implicitly()
446
            ),
447
            create_digest(CreateDigest([FileContent("patterns.json", patterns_json)])),
448
        )
449
        input_digest = await merge_digests(
1✔
450
            MergeDigests((request.module_sources_digest, patterns_json_digest, embedder.digest))
451
        )
452
        embed_result = await execute_process(
1✔
453
            Process(
454
                ("./embedder", "patterns.json", request.package_path),
455
                input_digest=input_digest,
456
                description=f"Create embed mapping for {import_path}",
457
                level=LogLevel.DEBUG,
458
            ),
459
            **implicitly(),
460
        )
461
        if embed_result.exit_code != 0:
1✔
462
            return FallibleThirdPartyPkgAnalysis(
×
463
                analysis=None,
464
                import_path=import_path,
465
                exit_code=1,
466
                stderr=embed_result.stderr.decode(),
467
            )
468
        metadata = json.loads(embed_result.stdout)
1✔
469
        embed_config = EmbedConfig.from_json_dict(metadata.get("EmbedConfig", {}))
1✔
470
        test_embed_config = EmbedConfig.from_json_dict(metadata.get("TestEmbedConfig", {}))
1✔
471
        xtest_embed_config = EmbedConfig.from_json_dict(metadata.get("XTestEmbedConfig", {}))
1✔
472
        analysis = dataclasses.replace(
1✔
473
            analysis,
474
            embed_config=embed_config,
475
            test_embed_config=test_embed_config,
476
            xtest_embed_config=xtest_embed_config,
477
        )
478

479
    return FallibleThirdPartyPkgAnalysis(
6✔
480
        analysis=analysis,
481
        import_path=import_path,
482
        exit_code=0,
483
        stderr=None,
484
    )
485

486

487
@rule
11✔
488
async def download_and_analyze_module(
11✔
489
    request: ModuleDownloadRequest,
490
    analyzer: PackageAnalyzerSetup,
491
) -> AnalyzedThirdPartyModule:
492
    """Download and analyze a single Go module via a synthetic go.mod + go.sum.
493

494
    Keyed by (name, version, minimum_go_version, build_opts, go_sum_entries),
495
    which lets the Pants engine deduplicate identical module downloads across
496
    go.mods.
497

498
    A synthetic go.mod + go.sum pair is written into the sandbox so that Go's
499
    normal checksum verification still runs -- the go.sum entries come straight
500
    from the consuming go.mod's real go.sum (see ModuleDownloadRequest for the
501
    full argument for why this is safe).
502
    """
503
    # Create a synthetic go.mod (and go.sum when entries are available) that
504
    # only requires this one module. When the consuming go.sum contains the
505
    # entries for this module@version, we emit them verbatim so `go mod
506
    # download` performs its usual local checksum verification. When they
507
    # are absent (e.g., a transitive discovered during MVS that the consumer's
508
    # go.sum hasn't recorded yet, or a go.sum that is entirely missing), we
509
    # omit the synthetic go.sum and let Go fall back to GOSUMDB
510
    # (sum.golang.org by default) for verification. This is a softer signal
511
    # than the pre-dedup rule, which raised an error pointing the user at
512
    # `go mod download all`; the warning below preserves that guidance.
513
    if not request.go_sum_entries:
6✔
NEW
514
        logger.warning(
×
515
            "No go.sum entries found for %s@%s; falling back to GOSUMDB for "
516
            "checksum verification. This usually means the consuming go.mod's "
517
            "go.sum is incomplete -- run `go mod download all` (or `go mod "
518
            "tidy`) in that module's directory to record the checksum locally.",
519
            request.name,
520
            request.version,
521
        )
522
    go_version = request.minimum_go_version or "1.21"
6✔
523
    synthetic_go_mod = (
6✔
524
        f"module synthetic.invalid\n\ngo {go_version}\n\nrequire {request.name} {request.version}\n"
525
    )
526
    synthetic_files = [FileContent("go.mod", synthetic_go_mod.encode())]
6✔
527
    if request.go_sum_entries:
6✔
528
        synthetic_go_sum = "\n".join(request.go_sum_entries) + "\n"
6✔
529
        synthetic_files.append(FileContent("go.sum", synthetic_go_sum.encode()))
6✔
530
    synthetic_digest = await create_digest(CreateDigest(synthetic_files))
6✔
531

532
    download_result = await fallible_to_exec_result_or_raise(
6✔
533
        **implicitly(
534
            GoSdkProcess(
535
                ("mod", "download", "-json", f"{request.name}@{request.version}"),
536
                input_digest=synthetic_digest,
537
                allow_downloads=True,
538
                output_directories=("gopath",),
539
                description=f"Download Go module {request.name}@{request.version}.",
540
            )
541
        )
542
    )
543

544
    if len(download_result.stdout) == 0:
6✔
545
        raise AssertionError(
×
546
            f"Expected output from `go mod download` for {request.name}@{request.version}."
547
        )
548

549
    module_metadata = json.loads(download_result.stdout)
6✔
550
    module_sources_relpath = strip_sandbox_prefix(module_metadata["Dir"], "gopath/")
6✔
551
    go_mod_relpath = strip_sandbox_prefix(module_metadata["GoMod"], "gopath/")
6✔
552

553
    module_sources_snapshot = await digest_to_snapshot(
6✔
554
        **implicitly(
555
            DigestSubset(
556
                download_result.output_digest,
557
                PathGlobs(
558
                    [f"{module_sources_relpath}/**", go_mod_relpath],
559
                    glob_match_error_behavior=GlobMatchErrorBehavior.error,
560
                    conjunction=GlobExpansionConjunction.all_match,
561
                    description_of_origin=f"the download of Go module {request.name}@{request.version}",
562
                ),
563
            )
564
        )
565
    )
566

567
    candidate_package_dirs = []
6✔
568
    files_by_dir = group_by_dir(
6✔
569
        p for p in module_sources_snapshot.files if p.startswith(module_sources_relpath)
570
    )
571
    for maybe_pkg_dir, files in files_by_dir.items():
6✔
572
        # Skip directories where "testdata" would end up in the import path.
573
        # See https://github.com/golang/go/blob/f005df8b582658d54e63d59953201299d6fee880/src/go/build/build.go#L580-L585
574
        if "testdata" in maybe_pkg_dir.split("/"):
6✔
575
            continue
5✔
576
        if any(f for f in files if f.endswith(".go")):
6✔
577
            candidate_package_dirs.append(maybe_pkg_dir)
6✔
578
    candidate_package_dirs.sort()
6✔
579

580
    analyzer_relpath = "__analyzer"
6✔
581
    analysis_result = await fallible_to_exec_result_or_raise(
6✔
582
        **implicitly(
583
            Process(
584
                [os.path.join(analyzer_relpath, analyzer.path), *candidate_package_dirs],
585
                input_digest=module_sources_snapshot.digest,
586
                immutable_input_digests={
587
                    analyzer_relpath: analyzer.digest,
588
                },
589
                description=f"Analyze metadata for Go third-party module: {request.name}@{request.version}",
590
                level=LogLevel.DEBUG,
591
                env={"CGO_ENABLED": "1" if request.build_opts.cgo_enabled else "0"},
592
            )
593
        )
594
    )
595

596
    if len(analysis_result.stdout) == 0:
6✔
597
        return AnalyzedThirdPartyModule(FrozenOrderedSet())
1✔
598

599
    package_analysis_gets = []
6✔
600
    for pkg_path, pkg_json in zip(
6✔
601
        candidate_package_dirs, ijson.items(analysis_result.stdout, "", multiple_values=True)
602
    ):
603
        package_analysis_gets.append(
6✔
604
            analyze_go_third_party_package(
605
                AnalyzeThirdPartyPackageRequest(
606
                    pkg_json=_freeze_json_dict(pkg_json),
607
                    module_sources_digest=module_sources_snapshot.digest,
608
                    module_sources_path=module_sources_relpath,
609
                    module_import_path=request.name,
610
                    package_path=pkg_path,
611
                    minimum_go_version=request.minimum_go_version,
612
                )
613
            )
614
        )
615
    analyzed_packages_fallible = await concurrently(package_analysis_gets)
6✔
616
    analyzed_packages = [
6✔
617
        pkg.analysis for pkg in analyzed_packages_fallible if pkg.analysis and pkg.exit_code == 0
618
    ]
619
    return AnalyzedThirdPartyModule(FrozenOrderedSet(analyzed_packages))
6✔
620

621

622
@rule(desc="Download and analyze all third-party Go packages", level=LogLevel.DEBUG)
11✔
623
async def download_and_analyze_third_party_packages(
11✔
624
    request: AllThirdPartyPackagesRequest,
625
) -> AllThirdPartyPackages:
626
    module_analysis = await analyze_module_dependencies(
10✔
627
        ModuleDescriptorsRequest(
628
            digest=request.go_mod_digest,
629
            path=os.path.dirname(request.go_mod_path),
630
        )
631
    )
632

633
    # Read the real go.sum once so we can extract per-module entries for the
634
    # download sandbox. This keeps Go's checksum verification intact while
635
    # allowing the engine to memoize identical module@version downloads
636
    # across different go.mods.
637
    go_sum_path = os.path.join(os.path.dirname(request.go_mod_path), "go.sum")
10✔
638
    digest_contents = await get_digest_contents(request.go_mod_digest)
10✔
639
    go_sum_content = b""
10✔
640
    for entry in digest_contents:
10✔
641
        if entry.path == go_sum_path:
10✔
642
            go_sum_content = entry.content
6✔
643
            break
6✔
644

645
    # Parse the go.sum once into a dict for O(1) lookup per module.
646
    go_sum_index = _parse_go_sum(go_sum_content)
10✔
647

648
    # The engine memoizes by (name, version, minimum_go_version, build_opts,
649
    # go_sum_entries), so identical modules across go.mods are downloaded
650
    # once -- reducing downloads from O(N*M) to O(M).
651
    analyzed_modules = await concurrently(
10✔
652
        download_and_analyze_module(
653
            ModuleDownloadRequest(
654
                name=mod.name,
655
                version=mod.version,
656
                minimum_go_version=mod.minimum_go_version,
657
                build_opts=request.build_opts,
658
                go_sum_entries=go_sum_index.get((mod.name, mod.version), ()),
659
            ),
660
            **implicitly(),
661
        )
662
        for mod in module_analysis.modules
663
    )
664

665
    import_path_to_info = {
10✔
666
        pkg.import_path: pkg
667
        for analyzed_module in analyzed_modules
668
        for pkg in analyzed_module.packages
669
    }
670

671
    return AllThirdPartyPackages(EMPTY_DIGEST, FrozenDict(import_path_to_info))
10✔
672

673

674
@rule
11✔
675
async def extract_package_info(request: ThirdPartyPkgAnalysisRequest) -> ThirdPartyPkgAnalysis:
11✔
676
    all_packages = await download_and_analyze_third_party_packages(
5✔
677
        AllThirdPartyPackagesRequest(
678
            request.go_mod_address,
679
            request.go_mod_digest,
680
            request.go_mod_path,
681
            build_opts=request.build_opts,
682
        )
683
    )
684
    pkg_info = all_packages.import_paths_to_pkg_info.get(request.import_path)
5✔
685
    if pkg_info:
5✔
686
        return pkg_info
5✔
687
    raise AssertionError(
1✔
688
        f"The package `{request.import_path}` was not downloaded, but Pants tried using it. "
689
        "This should not happen. Please open an issue at "
690
        "https://github.com/pantsbuild/pants/issues/new/choose with this error message."
691
    )
692

693

694
def maybe_raise_or_create_error_or_create_failed_pkg_info(
11✔
695
    go_list_json: dict, import_path: str
696
) -> tuple[GoThirdPartyPkgError | None, ThirdPartyPkgAnalysis | None]:
697
    """Error for unrecoverable errors, otherwise lazily create an error or `ThirdPartyPkgInfo` for
698
    recoverable errors.
699

700
    Lazy errors should only be raised when the package is compiled, but not during target generation
701
    and project introspection. This is important so that we don't overzealously error on packages
702
    that the user doesn't actually ever use, given how a Go module includes all of its packages,
703
    even test packages that are never used by first-party code.
704

705
    Returns a `ThirdPartyPkgInfo` if the `Dir` key is missing, which is necessary for our normal
706
    analysis of the package.
707
    """
708
    if import_path == "...":
×
709
        if "Error" not in go_list_json:
×
710
            raise AssertionError(
×
711
                "`go list` included the import path `...`, but there was no `Error` attached. "
712
                "Please open an issue at https://github.com/pantsbuild/pants/issues/new/choose "
713
                f"with this error message:\n\n{go_list_json}"
714
            )
715
        # TODO: Improve this error message, such as better instructions if `go.sum` is stale.
716
        raise GoThirdPartyPkgError(go_list_json["Error"]["Err"])
×
717

718
    if "Dir" not in go_list_json:
×
719
        error = GoThirdPartyPkgError(
×
720
            f"`go list` failed for the import path `{import_path}` because `Dir` was not defined. "
721
            f"Please open an issue at https://github.com/pantsbuild/pants/issues/new/choose so "
722
            f"that we can figure out how to support this:"
723
            f"\n\n{go_list_json}"
724
        )
725
        return None, ThirdPartyPkgAnalysis(
×
726
            import_path=import_path,
727
            name="",
728
            dir_path="",
729
            digest=EMPTY_DIGEST,
730
            imports=(),
731
            go_files=(),
732
            c_files=(),
733
            cxx_files=(),
734
            h_files=(),
735
            m_files=(),
736
            f_files=(),
737
            s_files=(),
738
            syso_files=(),
739
            minimum_go_version=None,
740
            embed_patterns=(),
741
            test_embed_patterns=(),
742
            xtest_embed_patterns=(),
743
            error=error,
744
            cgo_files=(),
745
            cgo_flags=CGoCompilerFlags(
746
                cflags=(),
747
                cppflags=(),
748
                cxxflags=(),
749
                fflags=(),
750
                ldflags=(),
751
                pkg_config=(),
752
            ),
753
        )
754

755
    if "Error" in go_list_json:
×
756
        err_msg = go_list_json["Error"]["Err"]
×
757
        return (
×
758
            GoThirdPartyPkgError(
759
                f"`go list` failed for the import path `{import_path}`. Please open an issue at "
760
                "https://github.com/pantsbuild/pants/issues/new/choose so that we can figure out "
761
                "how to support this:"
762
                f"\n\n{err_msg}\n\n{go_list_json}"
763
            ),
764
            None,
765
        )
766

767
    return None, None
×
768

769

770
def rules():
11✔
771
    return (
11✔
772
        *collect_rules(),
773
        *pkg_analyzer.rules(),
774
    )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc