• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 20428083889

22 Dec 2025 09:43AM UTC coverage: 80.285% (-0.01%) from 80.296%
20428083889

Pull #21918

github

web-flow
Merge c895684e5 into 06f105be8
Pull Request #21918: [WIP] partition protobuf dependency inference by any "resolve-like" fields from plugins

191 of 263 new or added lines in 9 files covered. (72.62%)

44 existing lines in 2 files now uncovered.

78686 of 98008 relevant lines covered (80.29%)

3.65 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

35.97
/src/python/pants/backend/codegen/protobuf/protobuf_dependency_inference.py
1
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
4✔
5

6
import re
4✔
7
import typing
4✔
8
from collections import defaultdict
4✔
9
from dataclasses import dataclass
4✔
10
from typing import DefaultDict
4✔
11

12
from pants.backend.codegen.protobuf.protoc import Protoc
4✔
13
from pants.backend.codegen.protobuf.target_types import (
4✔
14
    AllProtobufTargets,
15
    ProtobufDependenciesField,
16
    ProtobufSourceField,
17
    ProtobufSourceTarget,
18
)
19
from pants.core.target_types import (
4✔
20
    ResolveLikeField,
21
    ResolveLikeFieldToValueRequest,
22
    get_resolve_from_resolve_like_field_request,
23
)
24
from pants.core.util_rules.stripped_source_files import (
4✔
25
    StrippedFileName,
26
    StrippedFileNameRequest,
27
    strip_file_name,
28
)
29
from pants.engine.addresses import Address
4✔
30
from pants.engine.internals.graph import (
4✔
31
    determine_explicitly_provided_dependencies,
32
    hydrate_sources,
33
    resolve_target,
34
)
35
from pants.engine.intrinsics import get_digest_contents
4✔
36
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
4✔
37
from pants.engine.target import (
4✔
38
    DependenciesRequest,
39
    Field,
40
    FieldSet,
41
    HydrateSourcesRequest,
42
    InferDependenciesRequest,
43
    InferredDependencies,
44
    Target,
45
    WrappedTargetRequest,
46
)
47
from pants.engine.unions import UnionMembership, UnionRule
4✔
48
from pants.util.frozendict import FrozenDict
4✔
49
from pants.util.logging import LogLevel
4✔
50
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet
4✔
51
from pants.util.strutil import softwrap
4✔
52

53

54
@dataclass(frozen=True)
4✔
55
class ProtobufMappingResolveKey:
4✔
56
    field_type: type[Field]
4✔
57
    resolve: str
4✔
58

59

60
_NO_RESOLVE_LIKE_FIELDS_DEFINED = ProtobufMappingResolveKey(
4✔
61
    field_type=ProtobufSourceField, resolve="<no-resolve>"
62
)
63
# Note: This key is used when:
64
# 1. No resolve-like fields are registered on protobuf_source targets
65
# 2. Resolve-like fields exist but resolves are disabled (ResolveLikeFieldToValueResult returns None)
66
# In both cases, all targets are treated as belonging to a single implicit resolve.
67

68

69
@dataclass(frozen=True)
4✔
70
class ProtobufMapping:
4✔
71
    """A mapping of stripped .proto file names to their owning file address indirectly mapped by
72
    resolve-like fields."""
73

74
    mapping: FrozenDict[ProtobufMappingResolveKey, FrozenDict[str, Address]]
4✔
75
    ambiguous_modules: FrozenDict[ProtobufMappingResolveKey, FrozenDict[str, tuple[Address, ...]]]
4✔
76

77

78
async def _map_single_pseudo_resolve(protobuf_targets: AllProtobufTargets) -> ProtobufMapping:
4✔
UNCOV
79
    stripped_file_per_target = await concurrently(
×
80
        strip_file_name(StrippedFileNameRequest(tgt[ProtobufSourceField].file_path))
81
        for tgt in protobuf_targets
82
    )
83

84
    stripped_files_to_addresses: dict[str, Address] = {}
×
85
    stripped_files_with_multiple_owners: DefaultDict[str, set[Address]] = defaultdict(set)
×
86
    for tgt, stripped_file in zip(protobuf_targets, stripped_file_per_target):
×
87
        if stripped_file.value in stripped_files_to_addresses:
×
88
            stripped_files_with_multiple_owners[stripped_file.value].update(
×
89
                {stripped_files_to_addresses[stripped_file.value], tgt.address}
90
            )
91
        else:
92
            stripped_files_to_addresses[stripped_file.value] = tgt.address
×
93

94
    # Remove files with ambiguous owners.
95
    for ambiguous_stripped_f in stripped_files_with_multiple_owners:
×
96
        stripped_files_to_addresses.pop(ambiguous_stripped_f)
×
97

98
    return ProtobufMapping(
×
99
        mapping=FrozenDict(
100
            {
101
                _NO_RESOLVE_LIKE_FIELDS_DEFINED: FrozenDict(
102
                    sorted(stripped_files_to_addresses.items())
103
                )
104
            }
105
        ),
106
        ambiguous_modules=FrozenDict(
107
            {
108
                _NO_RESOLVE_LIKE_FIELDS_DEFINED: FrozenDict(
109
                    (k, tuple(sorted(v)))
110
                    for k, v in sorted(stripped_files_with_multiple_owners.items())
111
                )
112
            }
113
        ),
114
    )
115

116

117
@rule(desc="Creating map of Protobuf file names to Protobuf targets", level=LogLevel.DEBUG)
4✔
118
async def map_protobuf_files(
4✔
119
    protobuf_targets: AllProtobufTargets, union_membership: UnionMembership
120
) -> ProtobufMapping:
121
    # Determine the resolve-like fields installed on the `protobuf_source` target type.
NEW
122
    resolve_like_field_types: set[type[Field]] = set()
×
NEW
123
    for field_type in ProtobufSourceTarget.class_field_types(union_membership):
×
NEW
124
        if issubclass(field_type, ResolveLikeField):
×
NEW
125
            resolve_like_field_types.add(field_type)
×
NEW
126
    if not resolve_like_field_types:
×
NEW
127
        return await _map_single_pseudo_resolve(protobuf_targets)
×
128

129
    # Discover which resolves are present in the protobuf_source targets.
NEW
130
    resolve_requests: list[ResolveLikeFieldToValueRequest] = []
×
NEW
131
    target_and_field_type_for_resolve_requests: list[tuple[Target, type[Field]]] = []
×
NEW
132
    for tgt in protobuf_targets:
×
NEW
133
        saw_at_least_one_field = False
×
NEW
134
        for field_type in resolve_like_field_types:
×
NEW
135
            if tgt.has_field(field_type):
×
NEW
136
                resolve_request_type = typing.cast(
×
137
                    ResolveLikeField, tgt[field_type]
138
                ).get_resolve_like_field_to_value_request()
NEW
139
                resolve_request = resolve_request_type(target=tgt)
×
NEW
140
                resolve_requests.append(resolve_request)
×
NEW
141
                target_and_field_type_for_resolve_requests.append((tgt, field_type))
×
NEW
142
                saw_at_least_one_field = True
×
143

NEW
144
        if not saw_at_least_one_field:
×
NEW
145
            raise ValueError(f"Did not find a resolve field on target at address `{tgt.address}`.")
×
146

147
    # Obtain the resolves for each target and then partition.
NEW
148
    resolve_results = await concurrently(
×
149
        get_resolve_from_resolve_like_field_request(
150
            **implicitly({resolve_request: ResolveLikeFieldToValueRequest})
151
        )
152
        for resolve_request in resolve_requests
153
    )
NEW
154
    targets_partitioned_by_resolve: dict[ProtobufMappingResolveKey, list[Target]] = defaultdict(
×
155
        list
156
    )
NEW
157
    for resolve_result, (target, field_type) in zip(
×
158
        resolve_results, target_and_field_type_for_resolve_requests
159
    ):
160
        # When a resolve field returns None (resolves disabled), canonicalize to
161
        # _NO_RESOLVE_LIKE_FIELDS_DEFINED to ensure all "resolves disabled" targets share
162
        # the same partition regardless of which resolve-like field they have.
NEW
163
        if resolve_result.value is None:
×
NEW
164
            resolve_key = _NO_RESOLVE_LIKE_FIELDS_DEFINED
×
165
        else:
NEW
166
            resolve_key = ProtobufMappingResolveKey(
×
167
                field_type=field_type, resolve=resolve_result.value
168
            )
NEW
169
        targets_partitioned_by_resolve[resolve_key].append(target)
×
170

NEW
171
    stripped_file_per_target = await concurrently(
×
172
        strip_file_name(StrippedFileNameRequest(tgt[ProtobufSourceField].file_path))
173
        for tgt in protobuf_targets
174
    )
175

NEW
176
    target_to_stripped_file: dict[Target, StrippedFileName] = dict(
×
177
        zip(protobuf_targets, stripped_file_per_target)
178
    )
179

NEW
180
    stripped_files_to_addresses: dict[ProtobufMappingResolveKey, dict[str, Address]] = defaultdict(
×
181
        dict
182
    )
NEW
183
    stripped_files_with_multiple_owners: dict[
×
184
        ProtobufMappingResolveKey, dict[str, set[Address]]
185
    ] = defaultdict(lambda: defaultdict(set))
186

NEW
187
    for resolve_key, targets_in_resolve in targets_partitioned_by_resolve.items():
×
NEW
188
        for tgt in targets_in_resolve:
×
NEW
189
            stripped_file = target_to_stripped_file[tgt]
×
NEW
190
            if stripped_file.value in stripped_files_to_addresses[resolve_key]:
×
NEW
191
                stripped_files_with_multiple_owners[resolve_key][stripped_file.value].update(
×
192
                    {stripped_files_to_addresses[resolve_key][stripped_file.value], tgt.address}
193
                )
194
            else:
NEW
195
                stripped_files_to_addresses[resolve_key][stripped_file.value] = tgt.address
×
196

197
    # Remove files with ambiguous owners in each resolve.
NEW
198
    for (
×
199
        resolve_key,
200
        stripped_files_with_multiple_owners_in_resolve,
201
    ) in stripped_files_with_multiple_owners.items():
NEW
202
        for ambiguous_stripped_f in stripped_files_with_multiple_owners_in_resolve:
×
NEW
203
            stripped_files_to_addresses[resolve_key].pop(ambiguous_stripped_f)
×
204

NEW
205
    return ProtobufMapping(
×
206
        mapping=FrozenDict(
207
            {
208
                resolve_key: FrozenDict(sorted(stripped_files_to_addresses_in_resolve.items()))
209
                for resolve_key, stripped_files_to_addresses_in_resolve in stripped_files_to_addresses.items()
210
            }
211
        ),
212
        ambiguous_modules=FrozenDict(
213
            {
214
                resolve_key: FrozenDict(
215
                    (k, tuple(sorted(v)))
216
                    for k, v in sorted(stripped_files_with_multiple_owners_in_resolve.items())
217
                )
218
                for resolve_key, stripped_files_with_multiple_owners_in_resolve in stripped_files_with_multiple_owners.items()
219
            }
220
        ),
221
    )
222

223

224
# See https://developers.google.com/protocol-buffers/docs/reference/proto3-spec for the Proto
225
# language spec.
226
QUOTE_CHAR = r"(?:'|\")"
4✔
227
IMPORT_MODIFIERS = r"(?:\spublic|\sweak)?"
4✔
228
FILE_NAME = r"(.+?\.proto)"
4✔
229
# NB: We don't specify what a valid file name looks like to avoid accidentally breaking unicode.
230
IMPORT_REGEX = re.compile(rf"import\s*{IMPORT_MODIFIERS}\s*{QUOTE_CHAR}{FILE_NAME}{QUOTE_CHAR}\s*;")
4✔
231

232

233
def parse_proto_imports(file_content: str) -> FrozenOrderedSet[str]:
4✔
234
    return FrozenOrderedSet(IMPORT_REGEX.findall(file_content))
1✔
235

236

237
@dataclass(frozen=True)
4✔
238
class ProtobufDependencyInferenceFieldSet(FieldSet):
4✔
239
    required_fields = (ProtobufSourceField, ProtobufDependenciesField)
4✔
240

241
    source: ProtobufSourceField
4✔
242
    dependencies: ProtobufDependenciesField
4✔
243

244

245
class InferProtobufDependencies(InferDependenciesRequest):
4✔
246
    infer_from = ProtobufDependencyInferenceFieldSet
4✔
247

248

249
async def get_resolve_key_from_target(address: Address) -> ProtobufMappingResolveKey:
4✔
NEW
250
    wrapped_target = await resolve_target(
×
251
        WrappedTargetRequest(address=address, description_of_origin="protobuf"), **implicitly()
252
    )
NEW
253
    resolve_field_type: type[Field] | None = None
×
NEW
254
    for field_type in wrapped_target.target.field_types:
×
NEW
255
        if issubclass(field_type, ResolveLikeField):
×
NEW
256
            if resolve_field_type is not None:
×
NEW
257
                raise NotImplementedError(
×
258
                    f"TODO: Multiple resolve-like fields on target at address `{address}`."
259
                )
NEW
260
            resolve_field_type = field_type
×
NEW
261
    if resolve_field_type is None:
×
NEW
262
        raise ValueError(f"Failed to find resolve-like field on target at address `{address}.")
×
263

NEW
264
    resolve_request_type = typing.cast(
×
265
        ResolveLikeField, wrapped_target.target[resolve_field_type]
266
    ).get_resolve_like_field_to_value_request()
NEW
267
    resolve_request = resolve_request_type(target=wrapped_target.target)
×
NEW
268
    resolve_result = await get_resolve_from_resolve_like_field_request(
×
269
        **implicitly({resolve_request: ResolveLikeFieldToValueRequest})
270
    )
271

272
    # When resolves are disabled, return the sentinel key
NEW
273
    if resolve_result.value is None:
×
NEW
274
        return _NO_RESOLVE_LIKE_FIELDS_DEFINED
×
275

NEW
276
    return ProtobufMappingResolveKey(
×
277
        field_type=resolve_field_type,
278
        resolve=resolve_result.value,
279
    )
280

281

282
@rule(desc="Inferring Protobuf dependencies by analyzing imports")
4✔
283
async def infer_protobuf_dependencies(
4✔
284
    request: InferProtobufDependencies, protobuf_mapping: ProtobufMapping, protoc: Protoc
285
) -> InferredDependencies:
286
    if not protoc.dependency_inference:
×
287
        return InferredDependencies([])
×
288

289
    address = request.field_set.address
×
290

291
    resolve_key: ProtobufMappingResolveKey
NEW
292
    if _NO_RESOLVE_LIKE_FIELDS_DEFINED in protobuf_mapping.mapping:
×
NEW
293
        resolve_key = _NO_RESOLVE_LIKE_FIELDS_DEFINED
×
294
    else:
NEW
295
        resolve_key = await get_resolve_key_from_target(address)
×
296

UNCOV
297
    explicitly_provided_deps, hydrated_sources = await concurrently(
×
298
        determine_explicitly_provided_dependencies(
299
            **implicitly(DependenciesRequest(request.field_set.dependencies))
300
        ),
301
        hydrate_sources(HydrateSourcesRequest(request.field_set.source), **implicitly()),
302
    )
303
    digest_contents = await get_digest_contents(hydrated_sources.snapshot.digest)
×
304
    assert len(digest_contents) == 1
×
305
    file_content = digest_contents[0]
×
306

307
    result: OrderedSet[Address] = OrderedSet()
×
308
    for import_path in parse_proto_imports(file_content.content.decode()):
×
NEW
309
        mapping_in_resolve = protobuf_mapping.mapping.get(resolve_key)
×
NEW
310
        unambiguous = mapping_in_resolve.get(import_path) if mapping_in_resolve else None
×
311

NEW
312
        ambiguous_modules_in_resolve = protobuf_mapping.ambiguous_modules.get(resolve_key)
×
NEW
313
        ambiguous = (
×
314
            ambiguous_modules_in_resolve.get(import_path) if ambiguous_modules_in_resolve else None
315
        )
316

317
        if unambiguous:
×
318
            result.add(unambiguous)
×
319
        elif ambiguous:
×
320
            explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference(
×
321
                ambiguous,
322
                address,
323
                import_reference="file",
324
                context=softwrap(
325
                    f"""
326
                    The target {address} imports `{import_path}` in the file
327
                    {file_content.path}
328
                    """
329
                ),
330
            )
331
            maybe_disambiguated = explicitly_provided_deps.disambiguated(ambiguous)
×
332
            if maybe_disambiguated:
×
333
                result.add(maybe_disambiguated)
×
334
    return InferredDependencies(sorted(result))
×
335

336

337
def rules():
4✔
338
    return (*collect_rules(), UnionRule(InferDependenciesRequest, InferProtobufDependencies))
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc