• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 20520347691

26 Dec 2025 09:50AM UTC coverage: 80.283% (-0.01%) from 80.296%
20520347691

Pull #21918

github

web-flow
Merge 8d95a9530 into 165706096
Pull Request #21918: [WIP] partition protobuf dependency inference by any "resolve-like" fields from plugins

225 of 297 new or added lines in 11 files covered. (75.76%)

42 existing lines in 3 files now uncovered.

78750 of 98090 relevant lines covered (80.28%)

3.36 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

35.97
/src/python/pants/backend/codegen/protobuf/protobuf_dependency_inference.py
1
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
5✔
5

6
import re
5✔
7
import typing
5✔
8
from collections import defaultdict
5✔
9
from dataclasses import dataclass
5✔
10
from typing import DefaultDict
5✔
11

12
from pants.backend.codegen.protobuf.protoc import Protoc
5✔
13
from pants.backend.codegen.protobuf.target_types import (
5✔
14
    AllProtobufTargets,
15
    ProtobufDependenciesField,
16
    ProtobufSourceField,
17
    ProtobufSourceTarget,
18
)
19
from pants.core.target_types import (
5✔
20
    ResolveLikeField,
21
    ResolveLikeFieldToValueRequest,
22
    get_resolve_from_resolve_like_field_request,
23
)
24
from pants.core.util_rules.stripped_source_files import (
5✔
25
    StrippedFileName,
26
    StrippedFileNameRequest,
27
    strip_file_name,
28
)
29
from pants.engine.addresses import Address
5✔
30
from pants.engine.internals.graph import (
5✔
31
    determine_explicitly_provided_dependencies,
32
    hydrate_sources,
33
    resolve_target,
34
)
35
from pants.engine.intrinsics import get_digest_contents
5✔
36
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
5✔
37
from pants.engine.target import (
5✔
38
    DependenciesRequest,
39
    Field,
40
    FieldSet,
41
    HydrateSourcesRequest,
42
    InferDependenciesRequest,
43
    InferredDependencies,
44
    Target,
45
    WrappedTargetRequest,
46
)
47
from pants.engine.unions import UnionMembership, UnionRule
5✔
48
from pants.util.frozendict import FrozenDict
5✔
49
from pants.util.logging import LogLevel
5✔
50
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet
5✔
51
from pants.util.strutil import softwrap
5✔
52

53

54
@dataclass(frozen=True)
5✔
55
class ProtobufMappingResolveKey:
5✔
56
    field_type: type[Field]
5✔
57
    resolve: str
5✔
58

59

60
# Sentinnel value for when:
61
#   1. No resolve-like fields are registered on protobuf_source targets.
62
# .  2. Resolve-like fields exist but resolves are disabled for a language backend.
63
_NO_RESOLVE_LIKE_FIELDS_DEFINED = ProtobufMappingResolveKey(
5✔
64
    field_type=ProtobufSourceField, resolve="<no-resolve>"
65
)
66

67

68
@dataclass(frozen=True)
5✔
69
class ProtobufMapping:
5✔
70
    """A mapping of stripped .proto file names to their owning file address indirectly mapped by
71
    resolve-like fields."""
72

73
    mapping: FrozenDict[ProtobufMappingResolveKey, FrozenDict[str, Address]]
5✔
74
    ambiguous_modules: FrozenDict[ProtobufMappingResolveKey, FrozenDict[str, tuple[Address, ...]]]
5✔
75

76

77
async def _map_single_pseudo_resolve(protobuf_targets: AllProtobufTargets) -> ProtobufMapping:
5✔
UNCOV
78
    stripped_file_per_target = await concurrently(
×
79
        strip_file_name(StrippedFileNameRequest(tgt[ProtobufSourceField].file_path))
80
        for tgt in protobuf_targets
81
    )
82

83
    stripped_files_to_addresses: dict[str, Address] = {}
×
84
    stripped_files_with_multiple_owners: DefaultDict[str, set[Address]] = defaultdict(set)
×
85
    for tgt, stripped_file in zip(protobuf_targets, stripped_file_per_target):
×
86
        if stripped_file.value in stripped_files_to_addresses:
×
87
            stripped_files_with_multiple_owners[stripped_file.value].update(
×
88
                {stripped_files_to_addresses[stripped_file.value], tgt.address}
89
            )
90
        else:
91
            stripped_files_to_addresses[stripped_file.value] = tgt.address
×
92

93
    # Remove files with ambiguous owners.
94
    for ambiguous_stripped_f in stripped_files_with_multiple_owners:
×
95
        stripped_files_to_addresses.pop(ambiguous_stripped_f)
×
96

97
    return ProtobufMapping(
×
98
        mapping=FrozenDict(
99
            {
100
                _NO_RESOLVE_LIKE_FIELDS_DEFINED: FrozenDict(
101
                    sorted(stripped_files_to_addresses.items())
102
                )
103
            }
104
        ),
105
        ambiguous_modules=FrozenDict(
106
            {
107
                _NO_RESOLVE_LIKE_FIELDS_DEFINED: FrozenDict(
108
                    (k, tuple(sorted(v)))
109
                    for k, v in sorted(stripped_files_with_multiple_owners.items())
110
                )
111
            }
112
        ),
113
    )
114

115

116
@rule(desc="Creating map of Protobuf file names to Protobuf targets", level=LogLevel.DEBUG)
5✔
117
async def map_protobuf_files(
5✔
118
    protobuf_targets: AllProtobufTargets, union_membership: UnionMembership
119
) -> ProtobufMapping:
120
    # Determine the resolve-like fields installed on the `protobuf_source` target type.
NEW
121
    resolve_like_field_types: set[type[Field]] = set()
×
NEW
122
    for field_type in ProtobufSourceTarget.class_field_types(union_membership):
×
NEW
123
        if issubclass(field_type, ResolveLikeField):
×
NEW
124
            resolve_like_field_types.add(field_type)
×
NEW
125
    if not resolve_like_field_types:
×
NEW
126
        return await _map_single_pseudo_resolve(protobuf_targets)
×
127

128
    # Discover which resolves are present in the protobuf_source targets.
NEW
129
    resolve_requests: list[ResolveLikeFieldToValueRequest] = []
×
NEW
130
    target_and_field_type_for_resolve_requests: list[tuple[Target, type[Field]]] = []
×
NEW
131
    for tgt in protobuf_targets:
×
NEW
132
        saw_at_least_one_field = False
×
NEW
133
        for field_type in resolve_like_field_types:
×
NEW
134
            if tgt.has_field(field_type):
×
NEW
135
                resolve_request_type = typing.cast(
×
136
                    ResolveLikeField, tgt[field_type]
137
                ).get_resolve_like_field_to_value_request()
NEW
138
                resolve_request = resolve_request_type(target=tgt)
×
NEW
139
                resolve_requests.append(resolve_request)
×
NEW
140
                target_and_field_type_for_resolve_requests.append((tgt, field_type))
×
NEW
141
                saw_at_least_one_field = True
×
142

NEW
143
        if not saw_at_least_one_field:
×
NEW
144
            raise ValueError(f"Did not find a resolve field on target at address `{tgt.address}`.")
×
145

146
    # Obtain the resolves for each target and then partition.
NEW
147
    resolve_results = await concurrently(
×
148
        get_resolve_from_resolve_like_field_request(
149
            **implicitly({resolve_request: ResolveLikeFieldToValueRequest})
150
        )
151
        for resolve_request in resolve_requests
152
    )
NEW
153
    targets_partitioned_by_resolve: dict[ProtobufMappingResolveKey, list[Target]] = defaultdict(
×
154
        list
155
    )
NEW
156
    for resolve_result, (target, field_type) in zip(
×
157
        resolve_results, target_and_field_type_for_resolve_requests
158
    ):
159
        # When a resolve field returns None (resolves disabled), canonicalize to
160
        # _NO_RESOLVE_LIKE_FIELDS_DEFINED to ensure all "resolves disabled" targets share
161
        # the same partition regardless of which resolve-like field they have.
NEW
162
        if resolve_result.value is None:
×
NEW
163
            resolve_key = _NO_RESOLVE_LIKE_FIELDS_DEFINED
×
164
        else:
NEW
165
            resolve_key = ProtobufMappingResolveKey(
×
166
                field_type=field_type, resolve=resolve_result.value
167
            )
NEW
168
        targets_partitioned_by_resolve[resolve_key].append(target)
×
169

NEW
170
    stripped_file_per_target = await concurrently(
×
171
        strip_file_name(StrippedFileNameRequest(tgt[ProtobufSourceField].file_path))
172
        for tgt in protobuf_targets
173
    )
174

NEW
175
    target_to_stripped_file: dict[Target, StrippedFileName] = dict(
×
176
        zip(protobuf_targets, stripped_file_per_target)
177
    )
178

NEW
179
    stripped_files_to_addresses: dict[ProtobufMappingResolveKey, dict[str, Address]] = defaultdict(
×
180
        dict
181
    )
NEW
182
    stripped_files_with_multiple_owners: dict[
×
183
        ProtobufMappingResolveKey, dict[str, set[Address]]
184
    ] = defaultdict(lambda: defaultdict(set))
185

NEW
186
    for resolve_key, targets_in_resolve in targets_partitioned_by_resolve.items():
×
NEW
187
        for tgt in targets_in_resolve:
×
NEW
188
            stripped_file = target_to_stripped_file[tgt]
×
NEW
189
            if stripped_file.value in stripped_files_to_addresses[resolve_key]:
×
NEW
190
                stripped_files_with_multiple_owners[resolve_key][stripped_file.value].update(
×
191
                    {stripped_files_to_addresses[resolve_key][stripped_file.value], tgt.address}
192
                )
193
            else:
NEW
194
                stripped_files_to_addresses[resolve_key][stripped_file.value] = tgt.address
×
195

196
    # Remove files with ambiguous owners in each resolve.
NEW
197
    for (
×
198
        resolve_key,
199
        stripped_files_with_multiple_owners_in_resolve,
200
    ) in stripped_files_with_multiple_owners.items():
NEW
201
        for ambiguous_stripped_f in stripped_files_with_multiple_owners_in_resolve:
×
NEW
202
            stripped_files_to_addresses[resolve_key].pop(ambiguous_stripped_f)
×
203

NEW
204
    return ProtobufMapping(
×
205
        mapping=FrozenDict(
206
            {
207
                resolve_key: FrozenDict(sorted(stripped_files_to_addresses_in_resolve.items()))
208
                for resolve_key, stripped_files_to_addresses_in_resolve in stripped_files_to_addresses.items()
209
            }
210
        ),
211
        ambiguous_modules=FrozenDict(
212
            {
213
                resolve_key: FrozenDict(
214
                    (k, tuple(sorted(v)))
215
                    for k, v in sorted(stripped_files_with_multiple_owners_in_resolve.items())
216
                )
217
                for resolve_key, stripped_files_with_multiple_owners_in_resolve in stripped_files_with_multiple_owners.items()
218
            }
219
        ),
220
    )
221

222

223
# See https://developers.google.com/protocol-buffers/docs/reference/proto3-spec for the Proto
224
# language spec.
225
QUOTE_CHAR = r"(?:'|\")"
5✔
226
IMPORT_MODIFIERS = r"(?:\spublic|\sweak)?"
5✔
227
FILE_NAME = r"(.+?\.proto)"
5✔
228
# NB: We don't specify what a valid file name looks like to avoid accidentally breaking unicode.
229
IMPORT_REGEX = re.compile(rf"import\s*{IMPORT_MODIFIERS}\s*{QUOTE_CHAR}{FILE_NAME}{QUOTE_CHAR}\s*;")
5✔
230

231

232
def parse_proto_imports(file_content: str) -> FrozenOrderedSet[str]:
5✔
233
    return FrozenOrderedSet(IMPORT_REGEX.findall(file_content))
1✔
234

235

236
@dataclass(frozen=True)
5✔
237
class ProtobufDependencyInferenceFieldSet(FieldSet):
5✔
238
    required_fields = (ProtobufSourceField, ProtobufDependenciesField)
5✔
239

240
    source: ProtobufSourceField
5✔
241
    dependencies: ProtobufDependenciesField
5✔
242

243

244
class InferProtobufDependencies(InferDependenciesRequest):
5✔
245
    infer_from = ProtobufDependencyInferenceFieldSet
5✔
246

247

248
async def get_resolve_key_from_target(address: Address) -> ProtobufMappingResolveKey:
5✔
NEW
249
    wrapped_target = await resolve_target(
×
250
        WrappedTargetRequest(address=address, description_of_origin="protobuf"), **implicitly()
251
    )
NEW
252
    resolve_field_type: type[Field] | None = None
×
NEW
253
    for field_type in wrapped_target.target.field_types:
×
NEW
254
        if issubclass(field_type, ResolveLikeField):
×
NEW
255
            if resolve_field_type is not None:
×
NEW
256
                raise NotImplementedError(
×
257
                    f"TODO: Multiple resolve-like fields on target at address `{address}`."
258
                )
NEW
259
            resolve_field_type = field_type
×
NEW
260
    if resolve_field_type is None:
×
NEW
261
        raise ValueError(f"Failed to find resolve-like field on target at address `{address}.")
×
262

NEW
263
    resolve_request_type = typing.cast(
×
264
        ResolveLikeField, wrapped_target.target[resolve_field_type]
265
    ).get_resolve_like_field_to_value_request()
NEW
266
    resolve_request = resolve_request_type(target=wrapped_target.target)
×
NEW
267
    resolve_result = await get_resolve_from_resolve_like_field_request(
×
268
        **implicitly({resolve_request: ResolveLikeFieldToValueRequest})
269
    )
270

271
    # When resolves are disabled, return the sentinel key
NEW
272
    if resolve_result.value is None:
×
NEW
273
        return _NO_RESOLVE_LIKE_FIELDS_DEFINED
×
274

NEW
275
    return ProtobufMappingResolveKey(
×
276
        field_type=resolve_field_type,
277
        resolve=resolve_result.value,
278
    )
279

280

281
@rule(desc="Inferring Protobuf dependencies by analyzing imports")
5✔
282
async def infer_protobuf_dependencies(
5✔
283
    request: InferProtobufDependencies, protobuf_mapping: ProtobufMapping, protoc: Protoc
284
) -> InferredDependencies:
285
    if not protoc.dependency_inference:
×
286
        return InferredDependencies([])
×
287

288
    address = request.field_set.address
×
289

290
    resolve_key: ProtobufMappingResolveKey
NEW
291
    if _NO_RESOLVE_LIKE_FIELDS_DEFINED in protobuf_mapping.mapping:
×
NEW
292
        resolve_key = _NO_RESOLVE_LIKE_FIELDS_DEFINED
×
293
    else:
NEW
294
        resolve_key = await get_resolve_key_from_target(address)
×
295

UNCOV
296
    explicitly_provided_deps, hydrated_sources = await concurrently(
×
297
        determine_explicitly_provided_dependencies(
298
            **implicitly(DependenciesRequest(request.field_set.dependencies))
299
        ),
300
        hydrate_sources(HydrateSourcesRequest(request.field_set.source), **implicitly()),
301
    )
302
    digest_contents = await get_digest_contents(hydrated_sources.snapshot.digest)
×
303
    assert len(digest_contents) == 1
×
304
    file_content = digest_contents[0]
×
305

306
    result: OrderedSet[Address] = OrderedSet()
×
307
    for import_path in parse_proto_imports(file_content.content.decode()):
×
NEW
308
        mapping_in_resolve = protobuf_mapping.mapping.get(resolve_key)
×
NEW
309
        unambiguous = mapping_in_resolve.get(import_path) if mapping_in_resolve else None
×
310

NEW
311
        ambiguous_modules_in_resolve = protobuf_mapping.ambiguous_modules.get(resolve_key)
×
NEW
312
        ambiguous = (
×
313
            ambiguous_modules_in_resolve.get(import_path) if ambiguous_modules_in_resolve else None
314
        )
315

316
        if unambiguous:
×
317
            result.add(unambiguous)
×
318
        elif ambiguous:
×
319
            explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference(
×
320
                ambiguous,
321
                address,
322
                import_reference="file",
323
                context=softwrap(
324
                    f"""
325
                    The target {address} imports `{import_path}` in the file
326
                    {file_content.path}
327
                    """
328
                ),
329
            )
330
            maybe_disambiguated = explicitly_provided_deps.disambiguated(ambiguous)
×
331
            if maybe_disambiguated:
×
332
                result.add(maybe_disambiguated)
×
333
    return InferredDependencies(sorted(result))
×
334

335

336
def rules():
5✔
337
    return (*collect_rules(), UnionRule(InferDependenciesRequest, InferProtobufDependencies))
5✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc