• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 24142259555

08 Apr 2026 02:58PM UTC coverage: 91.259% (-1.7%) from 92.909%
24142259555

Pull #23228

github

web-flow
Merge 1e468fa49 into 9036734c9
Pull Request #23228: Add persistent dependency inference cache for incremental --changed-dependents

266 of 317 new or added lines in 3 files covered. (83.91%)

1448 existing lines in 70 files now uncovered.

86176 of 94430 relevant lines covered (91.26%)

3.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.9
/src/python/pants/backend/project_info/incremental_dependents.py
1
# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
"""Incremental dependency graph updates for faster `--changed-dependents` runs.
5

6
Instead of resolving dependencies for ALL targets every time, this module persists
7
the forward dependency graph to disk and only re-resolves dependencies for targets
8
whose source files have changed since the last run.
9
"""
10

11
from __future__ import annotations
10✔
12

13
import hashlib
10✔
14
import json
10✔
15
import logging
10✔
16
import os
10✔
17
from dataclasses import dataclass
10✔
18

19
from pants.base.build_environment import get_pants_cachedir
10✔
20
from pants.engine.addresses import Address
10✔
21
from pants.option.option_types import BoolOption
10✔
22
from pants.option.subsystem import Subsystem
10✔
23
from pants.util.strutil import help_text
10✔
24

25
logger = logging.getLogger(__name__)
10✔
26

27

28
class IncrementalDependents(Subsystem):
10✔
29
    options_scope = "incremental-dependents"
10✔
30
    help = help_text(
10✔
31
        """
32
        Persist the forward dependency graph to disk and incrementally update it,
33
        so that `--changed-dependents=transitive` does not need to resolve
34
        dependencies for every target on every run.
35
        """
36
    )
37

38
    enabled = BoolOption(
10✔
39
        default=False,
40
        help="Enable incremental dependency graph caching. "
41
        "When enabled, the forward dependency graph is persisted to disk and only "
42
        "targets with changed source files have their dependencies re-resolved.",
43
    )
44

45

46
# ---------------------------------------------------------------------------
47
# Persisted graph helpers
48
# ---------------------------------------------------------------------------
49

50
_CACHE_VERSION = 2  # v2: stores structured address components
10✔
51

52

53
@dataclass(frozen=True)
10✔
54
class CachedEntry:
10✔
55
    fingerprint: str
10✔
56
    # Dependencies stored as address spec strings (e.g. "src/python/foo/bar.py:lib")
57
    deps: tuple[str, ...]
10✔
58

59

60
def get_cache_path() -> str:
10✔
61
    """Return the path to the incremental dep graph cache file."""
NEW
62
    return os.path.join(get_pants_cachedir(), "incremental_dep_graph_v2.json")
×
63

64

65
def load_persisted_graph(path: str, buildroot: str) -> dict[str, CachedEntry]:
10✔
66
    """Load the persisted forward dependency graph from disk.
67

68
    Returns an empty dict if the file doesn't exist or is invalid.
69
    """
70
    try:
1✔
71
        with open(path) as f:
1✔
72
            data = json.load(f)
1✔
73
        if data.get("version") != _CACHE_VERSION:
1✔
74
            logger.debug("Incremental dep graph cache version mismatch, rebuilding.")
1✔
75
            return {}
1✔
76
        if data.get("buildroot") != buildroot:
1✔
77
            logger.debug("Incremental dep graph cache buildroot mismatch, rebuilding.")
1✔
78
            return {}
1✔
79
        entries: dict[str, CachedEntry] = {}
1✔
80
        for addr_spec, entry in data.get("entries", {}).items():
1✔
81
            entries[addr_spec] = CachedEntry(
1✔
82
                fingerprint=entry["fingerprint"],
83
                deps=tuple(entry["deps"]),
84
            )
85
        return entries
1✔
86
    except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError) as e:
1✔
87
        logger.debug("Could not load incremental dep graph cache: %s", e)
1✔
88
        return {}
1✔
89

90

91
def save_persisted_graph(
10✔
92
    path: str,
93
    buildroot: str,
94
    entries: dict[str, CachedEntry],
95
) -> None:
96
    """Save the forward dependency graph to disk."""
97
    data = {
1✔
98
        "version": _CACHE_VERSION,
99
        "buildroot": buildroot,
100
        "entries": {
101
            addr_spec: {
102
                "fingerprint": entry.fingerprint,
103
                "deps": list(entry.deps),
104
            }
105
            for addr_spec, entry in entries.items()
106
        },
107
    }
108
    os.makedirs(os.path.dirname(path), exist_ok=True)
1✔
109

110
    # Atomic write: write to temp file then rename
111
    tmp_path = path + ".tmp"
1✔
112
    try:
1✔
113
        with open(tmp_path, "w") as f:
1✔
114
            json.dump(data, f, separators=(",", ":"))
1✔
115
        os.replace(tmp_path, path)
1✔
116
        logger.debug(
1✔
117
            "Saved incremental dep graph cache with %d entries to %s",
118
            len(entries),
119
            path,
120
        )
NEW
121
    except OSError as e:
×
NEW
122
        logger.warning("Failed to save incremental dep graph cache: %s", e)
×
NEW
123
        try:
×
NEW
124
            os.unlink(tmp_path)
×
NEW
125
        except OSError:
×
NEW
126
            pass
×
127

128

129
def _sha256_file(path: str) -> str | None:
10✔
130
    """Return the SHA-256 hex digest of a file's contents, or None if unreadable."""
131
    try:
1✔
132
        h = hashlib.sha256()
1✔
133
        with open(path, "rb") as f:
1✔
134
            for chunk in iter(lambda: f.read(65536), b""):
1✔
135
                h.update(chunk)
1✔
136
        return h.hexdigest()
1✔
137
    except OSError:
1✔
138
        return None
1✔
139

140

141
def compute_source_fingerprint(target_address: Address, buildroot: str) -> str:
10✔
142
    """Compute a content-based fingerprint for a target.
143

144
    Uses SHA-256 of file contents (not mtime) so the cache is portable across
145
    machines — critical for CI where git clone sets all mtimes to the same value.
146

147
    The fingerprint includes:
148
    - The BUILD file defining the target
149
    - The specific source file (for generated/file-level targets)
150
    """
151
    hasher = hashlib.sha256()
1✔
152

153
    # Always include the BUILD file(s) in the fingerprint
154
    spec_path = target_address.spec_path
1✔
155
    build_dir = os.path.join(buildroot, spec_path) if spec_path else buildroot
1✔
156

157
    for build_name in ("BUILD", "BUILD.pants"):
1✔
158
        build_file = os.path.join(build_dir, build_name)
1✔
159
        digest = _sha256_file(build_file)
1✔
160
        if digest:
1✔
161
            hasher.update(f"BUILD:{build_file}:{digest}".encode())
1✔
162

163
    # For file-addressed targets (e.g. python_source generated from python_sources),
164
    # include the file's own content hash.
165
    if target_address.is_generated_target and target_address.generated_name:
1✔
166
        gen_name = target_address.generated_name
1✔
167
        candidate = (
1✔
168
            os.path.join(buildroot, spec_path, gen_name)
169
            if spec_path
170
            else os.path.join(buildroot, gen_name)
171
        )
172
        digest = _sha256_file(candidate)
1✔
173
        if digest:
1✔
174
            hasher.update(f"SRC:{candidate}:{digest}".encode())
1✔
NEW
175
        elif candidate != os.path.join(buildroot, gen_name):
×
176
            # Also try as a path directly from buildroot
NEW
177
            digest = _sha256_file(os.path.join(buildroot, gen_name))
×
NEW
178
            if digest:
×
NEW
179
                hasher.update(f"SRC:{gen_name}:{digest}".encode())
×
180

181
    return hasher.hexdigest()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc