• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 19250292619

11 Nov 2025 12:09AM UTC coverage: 77.865% (-2.4%) from 80.298%
19250292619

push

github

web-flow
flag non-runnable targets used with `code_quality_tool` (#22875)

2 of 5 new or added lines in 2 files covered. (40.0%)

1487 existing lines in 72 files now uncovered.

71448 of 91759 relevant lines covered (77.86%)

3.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

57.41
/src/python/pants/backend/python/dependency_inference/module_mapper.py
1
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
11✔
5

6
import enum
11✔
7
import functools
11✔
8
import itertools
11✔
9
import logging
11✔
10
import os
11✔
11
from collections import defaultdict
11✔
12
from collections.abc import Iterable, Mapping
11✔
13
from dataclasses import dataclass
11✔
14
from functools import total_ordering
11✔
15
from pathlib import PurePath
11✔
16
from typing import DefaultDict
11✔
17

18
from packaging.utils import canonicalize_name as canonicalize_project_name
11✔
19

20
from pants.backend.python.dependency_inference.default_module_mapping import (
11✔
21
    DEFAULT_MODULE_MAPPING,
22
    DEFAULT_MODULE_PATTERN_MAPPING,
23
    DEFAULT_TYPE_STUB_MODULE_MAPPING,
24
    DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING,
25
)
26
from pants.backend.python.subsystems.setup import PythonSetup
11✔
27
from pants.backend.python.target_types import (
11✔
28
    PythonRequirementModulesField,
29
    PythonRequirementResolveField,
30
    PythonRequirementsField,
31
    PythonRequirementTypeStubModulesField,
32
    PythonResolveField,
33
    PythonSourceField,
34
)
35
from pants.core.util_rules.stripped_source_files import StrippedFileNameRequest, strip_file_name
11✔
36
from pants.engine.addresses import Address
11✔
37
from pants.engine.environment import EnvironmentName
11✔
38
from pants.engine.rules import collect_rules, concurrently, implicitly, rule
11✔
39
from pants.engine.target import AllTargets, Target
11✔
40
from pants.engine.unions import UnionMembership, UnionRule, union
11✔
41
from pants.util.frozendict import FrozenDict
11✔
42
from pants.util.logging import LogLevel
11✔
43
from pants.util.strutil import softwrap
11✔
44

45
logger = logging.getLogger(__name__)
11✔
46

47

48
ResolveName = str
11✔
49

50

51
@total_ordering
11✔
52
class ModuleProviderType(enum.Enum):
11✔
53
    TYPE_STUB = enum.auto()
11✔
54
    IMPL = enum.auto()
11✔
55

56
    def __lt__(self, other) -> bool:
11✔
57
        if not isinstance(other, ModuleProviderType):
×
58
            return NotImplemented
×
59
        return self.name < other.name
×
60

61

62
@dataclass(frozen=True, order=True)
11✔
63
class ModuleProvider:
11✔
64
    addr: Address
11✔
65
    typ: ModuleProviderType
11✔
66

67

68
@dataclass(frozen=True, order=True)
11✔
69
class PossibleModuleProvider:
11✔
70
    provider: ModuleProvider
11✔
71
    # 0 = The provider mapped to the module itself, 1 = the provider mapped to its parent, etc.
72
    ancestry: int
11✔
73

74

75
def module_from_stripped_path(path: PurePath) -> str:
11✔
76
    module_name_with_slashes = (
1✔
77
        path.parent if path.name in ("__init__.py", "__init__.pyi") else path.with_suffix("")
78
    )
79
    return module_name_with_slashes.as_posix().replace("/", ".")
1✔
80

81

82
@dataclass(frozen=True)
11✔
83
class AllPythonTargets:
11✔
84
    first_party: tuple[Target, ...]
11✔
85
    third_party: tuple[Target, ...]
11✔
86

87

88
@rule(desc="Find all Python targets in project", level=LogLevel.DEBUG)
11✔
89
async def find_all_python_projects(all_targets: AllTargets) -> AllPythonTargets:
11✔
90
    first_party = []
×
91
    third_party = []
×
92
    for tgt in all_targets:
×
93
        if tgt.has_field(PythonSourceField):
×
94
            first_party.append(tgt)
×
95
        if tgt.has_field(PythonRequirementsField):
×
96
            third_party.append(tgt)
×
97

98
    return AllPythonTargets(tuple(sorted(first_party)), tuple(sorted(third_party)))
×
99

100

101
# -----------------------------------------------------------------------------------------------
102
# First-party module mapping
103
# -----------------------------------------------------------------------------------------------
104

105

106
class FirstPartyPythonMappingImpl(
11✔
107
    FrozenDict[ResolveName, FrozenDict[str, tuple[ModuleProvider, ...]]]
108
):
109
    """A mapping of each resolve name to the first-party module names contained and their owning
110
    addresses.
111

112
    This contains the modules from a specific implementation, e.g. a codegen backend. All
113
    implementations then get merged.
114
    """
115

116
    @classmethod
11✔
117
    def create(
11✔
118
        cls,
119
        resolves_to_modules_to_providers: Mapping[
120
            ResolveName, Mapping[str, Iterable[ModuleProvider]]
121
        ],
122
    ) -> FirstPartyPythonMappingImpl:
123
        return FirstPartyPythonMappingImpl(
2✔
124
            (
125
                resolve,
126
                FrozenDict(
127
                    (mod, tuple(sorted(providers))) for mod, providers in sorted(mapping.items())
128
                ),
129
            )
130
            for resolve, mapping in sorted(resolves_to_modules_to_providers.items())
131
        )
132

133

134
@union(in_scope_types=[EnvironmentName])
11✔
135
class FirstPartyPythonMappingImplMarker:
11✔
136
    """An entry point for a specific implementation of mapping module names to owning targets for
137
    Python import dependency inference.
138

139
    All implementations will be merged together. Any modules that show up in multiple
140
    implementations will be marked ambiguous.
141
    """
142

143

144
@rule(polymorphic=True)
11✔
145
async def get_first_party_python_mapping_impl(
11✔
146
    marker: FirstPartyPythonMappingImplMarker, env_name: EnvironmentName
147
) -> FirstPartyPythonMappingImpl:
148
    raise NotImplementedError()
×
149

150

151
@dataclass(frozen=True)
11✔
152
class FirstPartyPythonModuleMapping:
11✔
153
    resolves_to_modules_to_providers: FrozenDict[
11✔
154
        ResolveName, FrozenDict[str, tuple[ModuleProvider, ...]]
155
    ]
156

157
    """A merged mapping of each resolve name to the first-party module names contained and their
11✔
158
    owning addresses.
159

160
    This mapping may have been constructed from multiple distinct implementations, e.g.
161
    implementations for each codegen backends.
162
    """
163

164
    def _providers_for_resolve(
11✔
165
        self, module: str, resolve: str
166
    ) -> tuple[PossibleModuleProvider, ...]:
167
        mapping = self.resolves_to_modules_to_providers.get(resolve)
1✔
168
        if not mapping:
1✔
169
            return ()
×
170

171
        result = mapping.get(module, ())
1✔
172
        if result:
1✔
173
            return tuple(PossibleModuleProvider(provider, 0) for provider in result)
1✔
174

175
        # If the module is not found, try the parent, if any. This is to handle `from` imports
176
        # where the "module" we were handed was actually a symbol inside the module.
177
        # E.g., with `from my_project.app import App`, we would be passed "my_project.app.App".
178
        #
179
        # We do not look past the direct parent, as this could cause multiple ambiguous owners to
180
        # be resolved. This contrasts with the third-party module mapping, which will try every
181
        # ancestor.
182
        # TODO: Now that we capture the ancestry, we could look past the direct parent.
183
        #  One reason to do so would be to unify more of the FirstParty and ThirdParty impls.
184
        if "." not in module:
1✔
185
            return ()
1✔
186
        parent_module = module.rsplit(".", maxsplit=1)[0]
1✔
187
        parent_providers = mapping.get(parent_module, ())
1✔
188
        return tuple(PossibleModuleProvider(mp, 1) for mp in parent_providers)
1✔
189

190
    def providers_for_module(
11✔
191
        self, module: str, resolve: str | None
192
    ) -> tuple[PossibleModuleProvider, ...]:
193
        """Find all providers for the module.
194

195
        If `resolve` is None, will not consider resolves, i.e. any `python_source` et al can be
196
        used. Otherwise, providers can only come from first-party targets with the resolve.
197
        """
198
        if resolve:
1✔
199
            return self._providers_for_resolve(module, resolve)
1✔
200
        return tuple(
1✔
201
            itertools.chain.from_iterable(
202
                self._providers_for_resolve(module, resolve)
203
                for resolve in list(self.resolves_to_modules_to_providers.keys())
204
            )
205
        )
206

207

208
@rule(level=LogLevel.DEBUG)
11✔
209
async def merge_first_party_module_mappings(
11✔
210
    union_membership: UnionMembership,
211
) -> FirstPartyPythonModuleMapping:
212
    all_mappings = await concurrently(
×
213
        get_first_party_python_mapping_impl(
214
            **implicitly({marker_cls(): FirstPartyPythonMappingImplMarker})
215
        )
216
        for marker_cls in union_membership.get(FirstPartyPythonMappingImplMarker)
217
    )
218
    resolves_to_modules_to_providers: DefaultDict[
×
219
        ResolveName, DefaultDict[str, list[ModuleProvider]]
220
    ] = defaultdict(lambda: defaultdict(list))
221
    for mapping_impl in all_mappings:
×
222
        for resolve, modules_to_providers in mapping_impl.items():
×
223
            for module, providers in modules_to_providers.items():
×
224
                resolves_to_modules_to_providers[resolve][module].extend(providers)
×
225
    return FirstPartyPythonModuleMapping(
×
226
        FrozenDict(
227
            (
228
                resolve,
229
                FrozenDict(
230
                    (mod, tuple(sorted(providers))) for mod, providers in sorted(mapping.items())
231
                ),
232
            )
233
            for resolve, mapping in sorted(resolves_to_modules_to_providers.items())
234
        )
235
    )
236

237

238
# This is only used to register our implementation with the plugin hook via unions. Note that we
239
# implement this like any other plugin implementation so that we can run them all in parallel.
240
class FirstPartyPythonTargetsMappingMarker(FirstPartyPythonMappingImplMarker):
11✔
241
    pass
11✔
242

243

244
@rule(
11✔
245
    desc="Creating map of first party Python targets to Python modules",
246
    level=LogLevel.DEBUG,
247
)
248
async def map_first_party_python_targets_to_modules(
11✔
249
    _: FirstPartyPythonTargetsMappingMarker,
250
    all_python_targets: AllPythonTargets,
251
    python_setup: PythonSetup,
252
) -> FirstPartyPythonMappingImpl:
253
    stripped_file_per_target = await concurrently(
×
254
        strip_file_name(StrippedFileNameRequest(tgt[PythonSourceField].file_path))
255
        for tgt in all_python_targets.first_party
256
    )
257

258
    resolves_to_modules_to_providers: DefaultDict[
×
259
        ResolveName, DefaultDict[str, list[ModuleProvider]]
260
    ] = defaultdict(lambda: defaultdict(list))
261
    for tgt, stripped_file in zip(all_python_targets.first_party, stripped_file_per_target):
×
262
        resolve = tgt[PythonResolveField].normalized_value(python_setup)
×
263
        stripped_f = PurePath(stripped_file.value)
×
264
        provider_type = (
×
265
            ModuleProviderType.TYPE_STUB if stripped_f.suffix == ".pyi" else ModuleProviderType.IMPL
266
        )
267
        module = module_from_stripped_path(stripped_f)
×
268
        resolves_to_modules_to_providers[resolve][module].append(
×
269
            ModuleProvider(tgt.address, provider_type)
270
        )
271

272
    return FirstPartyPythonMappingImpl.create(resolves_to_modules_to_providers)
×
273

274

275
# -----------------------------------------------------------------------------------------------
276
# Third party module mapping
277
# -----------------------------------------------------------------------------------------------
278

279

280
@dataclass(frozen=True)
11✔
281
class ThirdPartyPythonModuleMapping:
11✔
282
    """A mapping of each resolve to the modules they contain and the addresses providing those
283
    modules."""
284

285
    resolves_to_modules_to_providers: FrozenDict[
11✔
286
        ResolveName, FrozenDict[str, tuple[ModuleProvider, ...]]
287
    ]
288

289
    def _providers_for_resolve(
11✔
290
        self, module: str, resolve: str, ancestry: int = 0
291
    ) -> tuple[PossibleModuleProvider, ...]:
292
        mapping = self.resolves_to_modules_to_providers.get(resolve)
1✔
293
        if not mapping:
1✔
294
            return ()
×
295

296
        result = mapping.get(module, ())
1✔
297
        if result:
1✔
298
            return tuple(PossibleModuleProvider(mp, ancestry) for mp in result)
1✔
299

300
        # If the module is not found, recursively try the ancestor modules, if any. For example,
301
        # pants.task.task.Task -> pants.task.task -> pants.task -> pants
302
        if "." not in module:
1✔
303
            return ()
1✔
304
        parent_module = module.rsplit(".", maxsplit=1)[0]
1✔
305
        return self._providers_for_resolve(parent_module, resolve, ancestry + 1)
1✔
306

307
    def providers_for_module(
11✔
308
        self, module: str, resolve: str | None
309
    ) -> tuple[PossibleModuleProvider, ...]:
310
        """Find all providers for the module.
311

312
        If `resolve` is None, will not consider resolves, i.e. any `python_requirement` can be
313
        consumed. Otherwise, providers can only come from `python_requirements` with the resolve.
314
        """
315
        if resolve:
1✔
316
            return self._providers_for_resolve(module, resolve)
1✔
317
        return tuple(
1✔
318
            itertools.chain.from_iterable(
319
                self._providers_for_resolve(module, resolve)
320
                for resolve in list(self.resolves_to_modules_to_providers.keys())
321
            )
322
        )
323

324

325
@functools.cache
11✔
326
def generate_mappings_from_pattern(proj_name: str, is_type_stub: bool) -> tuple[str, ...]:
11✔
327
    """Generate a tuple of possible module mappings from a project name using a regex pattern.
328

329
    e.g. google-cloud-foo -> [google.cloud.foo, google.cloud.foo_v1, google.cloud.foo_v2]
330
    Should eliminate the need to "manually" add a mapping for every service
331
    proj_name: The project name to generate mappings for e.g google-cloud-datastream
332
    """
333
    pattern_mappings = (
1✔
334
        DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING if is_type_stub else DEFAULT_MODULE_PATTERN_MAPPING
335
    )
336
    pattern_values = []
1✔
337
    for match_pattern, replace_patterns in pattern_mappings.items():
1✔
338
        if match_pattern.match(proj_name) is not None:
1✔
339
            pattern_values = [
1✔
340
                match_pattern.sub(replace_pattern, proj_name)
341
                for replace_pattern in replace_patterns
342
            ]
343
            break  # stop after the first match in the rare chance that there are multiple matches
1✔
344
    return tuple(pattern_values)
1✔
345

346

347
@rule(desc="Creating map of third party targets to Python modules", level=LogLevel.DEBUG)
11✔
348
async def map_third_party_modules_to_addresses(
11✔
349
    all_python_targets: AllPythonTargets,
350
    python_setup: PythonSetup,
351
) -> ThirdPartyPythonModuleMapping:
352
    resolves_to_modules_to_providers: DefaultDict[
×
353
        ResolveName, DefaultDict[str, list[ModuleProvider]]
354
    ] = defaultdict(lambda: defaultdict(list))
355

356
    for tgt in all_python_targets.third_party:
×
357
        resolve = tgt[PythonRequirementResolveField].normalized_value(python_setup)
×
358

359
        def add_modules(modules: Iterable[str], *, is_type_stub: bool) -> None:
×
360
            for module in modules:
×
361
                resolves_to_modules_to_providers[resolve][module].append(
×
362
                    ModuleProvider(
363
                        tgt.address,
364
                        ModuleProviderType.TYPE_STUB if is_type_stub else ModuleProviderType.IMPL,
365
                    )
366
                )
367

368
        explicit_modules = tgt.get(PythonRequirementModulesField).value
×
369
        if explicit_modules:
×
370
            add_modules(explicit_modules, is_type_stub=False)
×
371
            continue
×
372

373
        explicit_stub_modules = tgt.get(PythonRequirementTypeStubModulesField).value
×
374
        if explicit_stub_modules:
×
375
            add_modules(explicit_stub_modules, is_type_stub=True)
×
376
            continue
×
377

378
        # Else, fall back to defaults.
379
        for req in tgt[PythonRequirementsField].value:
×
380
            # NB: We don't use `canonicalize_project_name()` for the fallback value because we
381
            # want to preserve `.` in the module name. See
382
            # https://www.python.org/dev/peps/pep-0503/#normalized-names.
383
            proj_name = canonicalize_project_name(req.name)
×
384
            fallback_value = req.name.strip().lower().replace("-", "_")
×
385

386
            modules_to_add: tuple[str, ...]
387
            is_type_stub: bool
388
            if proj_name in DEFAULT_MODULE_MAPPING:
×
389
                modules_to_add = DEFAULT_MODULE_MAPPING[proj_name]
×
390
                is_type_stub = False
×
391
            elif proj_name in DEFAULT_TYPE_STUB_MODULE_MAPPING:
×
392
                modules_to_add = DEFAULT_TYPE_STUB_MODULE_MAPPING[proj_name]
×
393
                is_type_stub = True
×
394
            # check for stubs first, since stub packages may also match impl package patterns
395
            elif modules_to_add := generate_mappings_from_pattern(proj_name, is_type_stub=True):
×
396
                is_type_stub = True
×
397
            elif modules_to_add := generate_mappings_from_pattern(proj_name, is_type_stub=False):
×
398
                is_type_stub = False
×
399
            else:
400
                modules_to_add = (fallback_value,)
×
401
                is_type_stub = False
×
402

403
            add_modules(modules_to_add, is_type_stub=is_type_stub)
×
404

405
    return ThirdPartyPythonModuleMapping(
×
406
        FrozenDict(
407
            (
408
                resolve,
409
                FrozenDict(
410
                    (mod, tuple(sorted(providers))) for mod, providers in sorted(mapping.items())
411
                ),
412
            )
413
            for resolve, mapping in sorted(resolves_to_modules_to_providers.items())
414
        )
415
    )
416

417

418
# -----------------------------------------------------------------------------------------------
419
# module -> owners
420
# -----------------------------------------------------------------------------------------------
421

422

423
@dataclass(frozen=True)
11✔
424
class PythonModuleOwners:
11✔
425
    """The target(s) that own a Python module.
426

427
    Up to 2 targets can unambiguously own the same module, if one is an implementation and the other
428
    is a .pyi type stub. It is ambiguous for >1 implementation target to own the same module, and
429
    those targets will be put into `ambiguous` instead of `unambiguous`. Therefore, `unambiguous`
430
    should never be >2; and only 1 of `unambiguous` and `ambiguous` should have targets.
431
    """
432

433
    unambiguous: tuple[Address, ...]
11✔
434
    ambiguous: tuple[Address, ...] = ()
11✔
435

436
    def __post_init__(self) -> None:
11✔
UNCOV
437
        if self.unambiguous and self.ambiguous:
×
438
            raise AssertionError(
×
439
                softwrap(
440
                    """
441
                    A module has both unambiguous and ambiguous owners, which is a bug in the
442
                    dependency inference code. Please file a bug report at
443
                    https://github.com/pantsbuild/pants/issues/new.
444
                    """
445
                )
446
            )
447

448

449
@dataclass(frozen=True)
11✔
450
class PythonModuleOwnersRequest:
11✔
451
    module: str
11✔
452
    resolve: str | None
11✔
453
    # If specified, resolve ambiguity by choosing the symbol provider with the
454
    # closest common ancestor to this path. Must be a path relative to the build root.
455
    locality: str | None = None
11✔
456

457

458
@rule
11✔
459
async def map_module_to_address(
11✔
460
    request: PythonModuleOwnersRequest,
461
    first_party_mapping: FirstPartyPythonModuleMapping,
462
    third_party_mapping: ThirdPartyPythonModuleMapping,
463
) -> PythonModuleOwners:
464
    possible_providers: tuple[PossibleModuleProvider, ...] = (
×
465
        *third_party_mapping.providers_for_module(request.module, resolve=request.resolve),
466
        *first_party_mapping.providers_for_module(request.module, resolve=request.resolve),
467
    )
468

469
    # We first attempt to disambiguate conflicting providers by taking - for each provider type -
470
    # the providers of the closest ancestors to the requested modules.
471
    # E.g., if we have a provider for foo.bar and for foo.bar.baz, prefer the latter.
472
    # This prevents issues with namespace packages that are split between first-party and
473
    # third-party (e.g., https://github.com/pantsbuild/pants/discussions/17286).
474

475
    # Map from provider type to mutable pair of
476
    # [closest ancestry, list of provider of that type at that ancestry level].
477
    type_to_closest_providers: dict[ModuleProviderType, list] = defaultdict(lambda: [999, []])
×
478
    for possible_provider in possible_providers:
×
479
        val = type_to_closest_providers[possible_provider.provider.typ]
×
480
        if possible_provider.ancestry < val[0]:
×
481
            val[0] = possible_provider.ancestry
×
482
            val[1] = []
×
483
        # NB This must come after the < check above, so we handle the possible_provider
484
        # that caused that check to pass.
485
        if possible_provider.ancestry == val[0]:
×
486
            val[1].append(possible_provider.provider)
×
487

488
    if request.locality:
×
489
        # For each provider type, if we have more than one provider left, prefer
490
        # the one with the closest common ancestor to the requester.
491
        for val in type_to_closest_providers.values():
×
492
            providers = val[1]
×
493
            if len(providers) < 2:
×
494
                continue
×
495
            providers_with_closest_common_ancestor: list[ModuleProvider] = []
×
496
            closest_common_ancestor_len = 0
×
497
            for provider in providers:
×
498
                common_ancestor_len = len(
×
499
                    os.path.commonpath([request.locality, provider.addr.spec_path])
500
                )
501
                if common_ancestor_len > closest_common_ancestor_len:
×
502
                    closest_common_ancestor_len = common_ancestor_len
×
503
                    providers_with_closest_common_ancestor = []
×
504
                if common_ancestor_len == closest_common_ancestor_len:
×
505
                    providers_with_closest_common_ancestor.append(provider)
×
506
            providers[:] = providers_with_closest_common_ancestor
×
507

508
    remaining_providers: list[ModuleProvider] = list(
×
509
        itertools.chain(*[val[1] for val in type_to_closest_providers.values()])
510
    )
511
    addresses = tuple(provider.addr for provider in remaining_providers)
×
512
    # Check that we have at most one remaining provider for each provider type.
513
    # If we have more than one, signal ambiguity.
514
    if any(len(val[1]) > 1 for val in type_to_closest_providers.values()):
×
515
        return PythonModuleOwners((), ambiguous=addresses)
×
516

517
    return PythonModuleOwners(addresses)
×
518

519

520
def rules():
11✔
521
    return (
11✔
522
        *collect_rules(),
523
        UnionRule(FirstPartyPythonMappingImplMarker, FirstPartyPythonTargetsMappingMarker),
524
    )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc