• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 20533309613

27 Dec 2025 02:49AM UTC coverage: 80.283%. First build
20533309613

push

github

web-flow
partition protobuf targets for dependency inference using any "resolve-like" fields (#21918)

## Background

As reported in https://github.com/pantsbuild/pants/issues/21409,
protobuf dependency inference cannot handle resolve-like fields which
are attached to `protobuf_source` target types by plugins. Basically,
multiple targets own the same source file but in different resolves, but
the existing code does not know about resolves and thus has no way to
partition the targets into distinct groups and apply dependency
inference within each group.

## Solution

Partition the protobuf targets by any "resolve-like" field found
registered on a `protobuf_source` target. The new `ResolveLikeField`
mix-in is used to detect fields which are "resolve like." The dependency
inference logic then uses the new `ResolveLikeFieldToValueRequest` union
to query the applicable language backend for what the actual resolve
name is so it can be used for partitioning.

The Python and JVM backends support `ResolveLikeField`.

225 of 297 new or added lines in 11 files covered. (75.76%)

78750 of 98090 relevant lines covered (80.28%)

3.36 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

35.97
/src/python/pants/backend/codegen/protobuf/protobuf_dependency_inference.py
1
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
5✔
5

6
import re
5✔
7
import typing
5✔
8
from collections import defaultdict
5✔
9
from dataclasses import dataclass
5✔
10
from typing import DefaultDict
5✔
11

12
from pants.backend.codegen.protobuf.protoc import Protoc
5✔
13
from pants.backend.codegen.protobuf.target_types import (
5✔
14
    AllProtobufTargets,
15
    ProtobufDependenciesField,
16
    ProtobufSourceField,
17
    ProtobufSourceTarget,
18
)
19
from pants.core.target_types import (
5✔
20
    ResolveLikeField,
21
    ResolveLikeFieldToValueRequest,
22
    get_resolve_from_resolve_like_field_request,
23
)
24
from pants.core.util_rules.stripped_source_files import (
5✔
25
    StrippedFileName,
26
    StrippedFileNameRequest,
27
    strip_file_name,
28
)
29
from pants.engine.addresses import Address
5✔
30
from pants.engine.internals.graph import (
5✔
31
    determine_explicitly_provided_dependencies,
32
    hydrate_sources,
33
    resolve_target,
34
)
35
from pants.engine.intrinsics import get_digest_contents
5✔
36
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
5✔
37
from pants.engine.target import (
5✔
38
    DependenciesRequest,
39
    Field,
40
    FieldSet,
41
    HydrateSourcesRequest,
42
    InferDependenciesRequest,
43
    InferredDependencies,
44
    Target,
45
    WrappedTargetRequest,
46
)
47
from pants.engine.unions import UnionMembership, UnionRule
5✔
48
from pants.util.frozendict import FrozenDict
5✔
49
from pants.util.logging import LogLevel
5✔
50
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet
5✔
51
from pants.util.strutil import softwrap
5✔
52

53

54
@dataclass(frozen=True)
5✔
55
class ProtobufMappingResolveKey:
5✔
56
    field_type: type[Field]
5✔
57
    resolve: str
5✔
58

59

60
# Sentinel value for when:
61
#   1. No resolve-like fields are registered on protobuf_source targets.
62
# .  2. Resolve-like fields exist but resolves are disabled for a language backend.
63
_NO_RESOLVE_LIKE_FIELDS_DEFINED = ProtobufMappingResolveKey(
5✔
64
    field_type=ProtobufSourceField, resolve="<no-resolve>"
65
)
66

67

68
@dataclass(frozen=True)
5✔
69
class ProtobufMapping:
5✔
70
    """A mapping of stripped .proto file names to their owning file address indirectly mapped by
71
    resolve-like fields."""
72

73
    mapping: FrozenDict[ProtobufMappingResolveKey, FrozenDict[str, Address]]
5✔
74
    ambiguous_modules: FrozenDict[ProtobufMappingResolveKey, FrozenDict[str, tuple[Address, ...]]]
5✔
75

76

77
async def _map_single_pseudo_resolve(protobuf_targets: AllProtobufTargets) -> ProtobufMapping:
5✔
78
    stripped_file_per_target = await concurrently(
×
79
        strip_file_name(StrippedFileNameRequest(tgt[ProtobufSourceField].file_path))
80
        for tgt in protobuf_targets
81
    )
82

83
    stripped_files_to_addresses: dict[str, Address] = {}
×
84
    stripped_files_with_multiple_owners: DefaultDict[str, set[Address]] = defaultdict(set)
×
85
    for tgt, stripped_file in zip(protobuf_targets, stripped_file_per_target):
×
86
        if stripped_file.value in stripped_files_to_addresses:
×
87
            stripped_files_with_multiple_owners[stripped_file.value].update(
×
88
                {stripped_files_to_addresses[stripped_file.value], tgt.address}
89
            )
90
        else:
91
            stripped_files_to_addresses[stripped_file.value] = tgt.address
×
92

93
    # Remove files with ambiguous owners.
94
    for ambiguous_stripped_f in stripped_files_with_multiple_owners:
×
95
        stripped_files_to_addresses.pop(ambiguous_stripped_f)
×
96

97
    return ProtobufMapping(
×
98
        mapping=FrozenDict(
99
            {
100
                _NO_RESOLVE_LIKE_FIELDS_DEFINED: FrozenDict(
101
                    sorted(stripped_files_to_addresses.items())
102
                )
103
            }
104
        ),
105
        ambiguous_modules=FrozenDict(
106
            {
107
                _NO_RESOLVE_LIKE_FIELDS_DEFINED: FrozenDict(
108
                    (k, tuple(sorted(v)))
109
                    for k, v in sorted(stripped_files_with_multiple_owners.items())
110
                )
111
            }
112
        ),
113
    )
114

115

116
@rule(desc="Creating map of Protobuf file names to Protobuf targets", level=LogLevel.DEBUG)
5✔
117
async def map_protobuf_files(
5✔
118
    protobuf_targets: AllProtobufTargets, union_membership: UnionMembership
119
) -> ProtobufMapping:
120
    # Determine the resolve-like fields installed on the `protobuf_source` target type.
NEW
121
    resolve_like_field_types: set[type[Field]] = set()
×
NEW
122
    for field_type in ProtobufSourceTarget.class_field_types(union_membership):
×
NEW
123
        if issubclass(field_type, ResolveLikeField):
×
NEW
124
            resolve_like_field_types.add(field_type)
×
NEW
125
    if not resolve_like_field_types:
×
NEW
126
        return await _map_single_pseudo_resolve(protobuf_targets)
×
127

128
    # Discover which resolves are present in the protobuf_source targets.
NEW
129
    resolve_requests: list[ResolveLikeFieldToValueRequest] = []
×
NEW
130
    target_and_field_type_for_resolve_requests: list[tuple[Target, type[Field]]] = []
×
NEW
131
    for tgt in protobuf_targets:
×
NEW
132
        saw_at_least_one_field = False
×
NEW
133
        for field_type in resolve_like_field_types:
×
NEW
134
            if tgt.has_field(field_type):
×
NEW
135
                resolve_request_type = typing.cast(
×
136
                    ResolveLikeField, tgt[field_type]
137
                ).get_resolve_like_field_to_value_request()
NEW
138
                resolve_request = resolve_request_type(target=tgt)
×
NEW
139
                resolve_requests.append(resolve_request)
×
NEW
140
                target_and_field_type_for_resolve_requests.append((tgt, field_type))
×
NEW
141
                saw_at_least_one_field = True
×
142

NEW
143
        if not saw_at_least_one_field:
×
NEW
144
            raise ValueError(f"Did not find a resolve field on target at address `{tgt.address}`.")
×
145

146
    # Obtain the resolves for each target and then partition.
NEW
147
    resolve_results = await concurrently(
×
148
        get_resolve_from_resolve_like_field_request(
149
            **implicitly({resolve_request: ResolveLikeFieldToValueRequest})
150
        )
151
        for resolve_request in resolve_requests
152
    )
NEW
153
    targets_partitioned_by_resolve: dict[ProtobufMappingResolveKey, list[Target]] = defaultdict(
×
154
        list
155
    )
NEW
156
    for resolve_result, (target, field_type) in zip(
×
157
        resolve_results, target_and_field_type_for_resolve_requests
158
    ):
159
        # When a resolve field returns None (resolves disabled), canonicalize to
160
        # _NO_RESOLVE_LIKE_FIELDS_DEFINED to ensure all "resolves disabled" targets share
161
        # the same partition regardless of which resolve-like field they have.
NEW
162
        if resolve_result.value is None:
×
NEW
163
            resolve_key = _NO_RESOLVE_LIKE_FIELDS_DEFINED
×
164
        else:
NEW
165
            resolve_key = ProtobufMappingResolveKey(
×
166
                field_type=field_type, resolve=resolve_result.value
167
            )
NEW
168
        targets_partitioned_by_resolve[resolve_key].append(target)
×
169

NEW
170
    stripped_file_per_target = await concurrently(
×
171
        strip_file_name(StrippedFileNameRequest(tgt[ProtobufSourceField].file_path))
172
        for tgt in protobuf_targets
173
    )
174

NEW
175
    target_to_stripped_file: dict[Target, StrippedFileName] = dict(
×
176
        zip(protobuf_targets, stripped_file_per_target)
177
    )
178

NEW
179
    stripped_files_to_addresses: dict[ProtobufMappingResolveKey, dict[str, Address]] = defaultdict(
×
180
        dict
181
    )
NEW
182
    stripped_files_with_multiple_owners: dict[
×
183
        ProtobufMappingResolveKey, dict[str, set[Address]]
184
    ] = defaultdict(lambda: defaultdict(set))
185

NEW
186
    for resolve_key, targets_in_resolve in targets_partitioned_by_resolve.items():
×
NEW
187
        for tgt in targets_in_resolve:
×
NEW
188
            stripped_file = target_to_stripped_file[tgt]
×
NEW
189
            if stripped_file.value in stripped_files_to_addresses[resolve_key]:
×
NEW
190
                stripped_files_with_multiple_owners[resolve_key][stripped_file.value].update(
×
191
                    {stripped_files_to_addresses[resolve_key][stripped_file.value], tgt.address}
192
                )
193
            else:
NEW
194
                stripped_files_to_addresses[resolve_key][stripped_file.value] = tgt.address
×
195

196
    # Remove files with ambiguous owners in each resolve.
NEW
197
    for (
×
198
        resolve_key,
199
        stripped_files_with_multiple_owners_in_resolve,
200
    ) in stripped_files_with_multiple_owners.items():
NEW
201
        for ambiguous_stripped_f in stripped_files_with_multiple_owners_in_resolve:
×
NEW
202
            stripped_files_to_addresses[resolve_key].pop(ambiguous_stripped_f)
×
203

NEW
204
    return ProtobufMapping(
×
205
        mapping=FrozenDict(
206
            {
207
                resolve_key: FrozenDict(sorted(stripped_files_to_addresses_in_resolve.items()))
208
                for resolve_key, stripped_files_to_addresses_in_resolve in stripped_files_to_addresses.items()
209
            }
210
        ),
211
        ambiguous_modules=FrozenDict(
212
            {
213
                resolve_key: FrozenDict(
214
                    (k, tuple(sorted(v)))
215
                    for k, v in sorted(stripped_files_with_multiple_owners_in_resolve.items())
216
                )
217
                for resolve_key, stripped_files_with_multiple_owners_in_resolve in stripped_files_with_multiple_owners.items()
218
            }
219
        ),
220
    )
221

222

223
# See https://developers.google.com/protocol-buffers/docs/reference/proto3-spec for the Proto
224
# language spec.
225
QUOTE_CHAR = r"(?:'|\")"
5✔
226
IMPORT_MODIFIERS = r"(?:\spublic|\sweak)?"
5✔
227
FILE_NAME = r"(.+?\.proto)"
5✔
228
# NB: We don't specify what a valid file name looks like to avoid accidentally breaking unicode.
229
IMPORT_REGEX = re.compile(rf"import\s*{IMPORT_MODIFIERS}\s*{QUOTE_CHAR}{FILE_NAME}{QUOTE_CHAR}\s*;")
5✔
230

231

232
def parse_proto_imports(file_content: str) -> FrozenOrderedSet[str]:
5✔
233
    return FrozenOrderedSet(IMPORT_REGEX.findall(file_content))
1✔
234

235

236
@dataclass(frozen=True)
5✔
237
class ProtobufDependencyInferenceFieldSet(FieldSet):
5✔
238
    required_fields = (ProtobufSourceField, ProtobufDependenciesField)
5✔
239

240
    source: ProtobufSourceField
5✔
241
    dependencies: ProtobufDependenciesField
5✔
242

243

244
class InferProtobufDependencies(InferDependenciesRequest):
5✔
245
    infer_from = ProtobufDependencyInferenceFieldSet
5✔
246

247

248
async def get_resolve_key_from_target(address: Address) -> ProtobufMappingResolveKey:
5✔
NEW
249
    wrapped_target = await resolve_target(
×
250
        WrappedTargetRequest(address=address, description_of_origin="protobuf"), **implicitly()
251
    )
NEW
252
    resolve_field_type: type[Field] | None = None
×
NEW
253
    for field_type in wrapped_target.target.field_types:
×
NEW
254
        if issubclass(field_type, ResolveLikeField):
×
NEW
255
            if resolve_field_type is not None:
×
NEW
256
                raise NotImplementedError(
×
257
                    f"TODO: Multiple resolve-like fields on target at address `{address}`."
258
                )
NEW
259
            resolve_field_type = field_type
×
NEW
260
    if resolve_field_type is None:
×
NEW
261
        raise ValueError(f"Failed to find resolve-like field on target at address `{address}.")
×
262

NEW
263
    resolve_request_type = typing.cast(
×
264
        ResolveLikeField, wrapped_target.target[resolve_field_type]
265
    ).get_resolve_like_field_to_value_request()
NEW
266
    resolve_request = resolve_request_type(target=wrapped_target.target)
×
NEW
267
    resolve_result = await get_resolve_from_resolve_like_field_request(
×
268
        **implicitly({resolve_request: ResolveLikeFieldToValueRequest})
269
    )
270

271
    # When resolves are disabled, return the sentinel key
NEW
272
    if resolve_result.value is None:
×
NEW
273
        return _NO_RESOLVE_LIKE_FIELDS_DEFINED
×
274

NEW
275
    return ProtobufMappingResolveKey(
×
276
        field_type=resolve_field_type,
277
        resolve=resolve_result.value,
278
    )
279

280

281
@rule(desc="Inferring Protobuf dependencies by analyzing imports")
5✔
282
async def infer_protobuf_dependencies(
5✔
283
    request: InferProtobufDependencies, protobuf_mapping: ProtobufMapping, protoc: Protoc
284
) -> InferredDependencies:
285
    if not protoc.dependency_inference:
×
286
        return InferredDependencies([])
×
287

288
    address = request.field_set.address
×
289

290
    resolve_key: ProtobufMappingResolveKey
NEW
291
    if _NO_RESOLVE_LIKE_FIELDS_DEFINED in protobuf_mapping.mapping:
×
NEW
292
        resolve_key = _NO_RESOLVE_LIKE_FIELDS_DEFINED
×
293
    else:
NEW
294
        resolve_key = await get_resolve_key_from_target(address)
×
295

296
    explicitly_provided_deps, hydrated_sources = await concurrently(
×
297
        determine_explicitly_provided_dependencies(
298
            **implicitly(DependenciesRequest(request.field_set.dependencies))
299
        ),
300
        hydrate_sources(HydrateSourcesRequest(request.field_set.source), **implicitly()),
301
    )
302
    digest_contents = await get_digest_contents(hydrated_sources.snapshot.digest)
×
303
    assert len(digest_contents) == 1
×
304
    file_content = digest_contents[0]
×
305

306
    result: OrderedSet[Address] = OrderedSet()
×
307
    for import_path in parse_proto_imports(file_content.content.decode()):
×
NEW
308
        mapping_in_resolve = protobuf_mapping.mapping.get(resolve_key)
×
NEW
309
        unambiguous = mapping_in_resolve.get(import_path) if mapping_in_resolve else None
×
310

NEW
311
        ambiguous_modules_in_resolve = protobuf_mapping.ambiguous_modules.get(resolve_key)
×
NEW
312
        ambiguous = (
×
313
            ambiguous_modules_in_resolve.get(import_path) if ambiguous_modules_in_resolve else None
314
        )
315

316
        if unambiguous:
×
317
            result.add(unambiguous)
×
318
        elif ambiguous:
×
319
            explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference(
×
320
                ambiguous,
321
                address,
322
                import_reference="file",
323
                context=softwrap(
324
                    f"""
325
                    The target {address} imports `{import_path}` in the file
326
                    {file_content.path}
327
                    """
328
                ),
329
            )
330
            maybe_disambiguated = explicitly_provided_deps.disambiguated(ambiguous)
×
331
            if maybe_disambiguated:
×
332
                result.add(maybe_disambiguated)
×
333
    return InferredDependencies(sorted(result))
×
334

335

336
def rules():
5✔
337
    return (*collect_rules(), UnionRule(InferDependenciesRequest, InferProtobufDependencies))
5✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc