• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pantsbuild / pants / 20332790708

18 Dec 2025 09:48AM UTC coverage: 64.992% (-15.3%) from 80.295%
20332790708

Pull #22949

github

web-flow
Merge f730a56cd into 407284c67
Pull Request #22949: Add experimental uv resolver for Python lockfiles

54 of 97 new or added lines in 5 files covered. (55.67%)

8270 existing lines in 295 files now uncovered.

48990 of 75379 relevant lines covered (64.99%)

1.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

45.18
/src/python/pants/backend/project_info/regex_lint.py
1
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3

4
from __future__ import annotations
1✔
5

6
import logging
1✔
7
import re
1✔
8
from collections.abc import Iterable
1✔
9
from dataclasses import dataclass
1✔
10
from enum import Enum
1✔
11
from typing import Any
1✔
12

13
from pants.base.exiter import PANTS_FAILED_EXIT_CODE, PANTS_SUCCEEDED_EXIT_CODE
1✔
14
from pants.core.goals.lint import LintFilesRequest, LintResult, Partitions
1✔
15
from pants.engine.fs import PathGlobs
1✔
16
from pants.engine.intrinsics import get_digest_contents
1✔
17
from pants.engine.rules import collect_rules, implicitly, rule
1✔
18
from pants.option.option_types import DictOption, EnumOption, SkipOption
1✔
19
from pants.option.subsystem import Subsystem
1✔
20
from pants.util.frozendict import FrozenDict
1✔
21
from pants.util.logging import LogLevel
1✔
22
from pants.util.memo import memoized_method
1✔
23
from pants.util.strutil import help_text, softwrap
1✔
24

25
logger = logging.getLogger(__name__)
1✔
26

27

28
class DetailLevel(Enum):
1✔
29
    """How much detail about validation to emit to the console.
30

31
    none: Emit nothing.
32
    summary: Emit a summary only.
33
    nonmatching: Emit details for files that failed to match at least one pattern.
34
    name_only: Emit just the paths of files that failed to match at least one pattern.
35
    all: Emit details for all files.
36
    """
37

38
    none = "none"
1✔
39
    summary = "summary"
1✔
40
    nonmatching = "nonmatching"
1✔
41
    names = "names"
1✔
42
    all = "all"
1✔
43

44

45
@dataclass(frozen=True)
1✔
46
class PathPattern:
1✔
47
    name: str
1✔
48
    pattern: str
1✔
49
    inverted: bool = False
1✔
50
    content_encoding: str = "utf8"
1✔
51

52

53
@dataclass(frozen=True)
1✔
54
class ContentPattern:
1✔
55
    name: str
1✔
56
    pattern: str
1✔
57
    inverted: bool = False
1✔
58

59

60
@dataclass(frozen=True)
1✔
61
class ValidationConfig:
1✔
62
    path_patterns: tuple[PathPattern, ...]
1✔
63
    content_patterns: tuple[ContentPattern, ...]
1✔
64
    required_matches: FrozenDict[str, tuple[str]]  # path pattern name -> content pattern names.
1✔
65

66
    @classmethod
1✔
67
    def from_dict(cls, d: dict[str, Any]) -> ValidationConfig:
1✔
UNCOV
68
        return cls(
×
69
            path_patterns=tuple(PathPattern(**kwargs) for kwargs in d["path_patterns"]),
70
            content_patterns=tuple(ContentPattern(**kwargs) for kwargs in d["content_patterns"]),
71
            required_matches=FrozenDict({k: tuple(v) for k, v in d["required_matches"].items()}),
72
        )
73

74

75
class RegexLintSubsystem(Subsystem):
1✔
76
    options_scope = "regex-lint"
1✔
77
    name = "regex-lint"
1✔
78
    help = help_text(
1✔
79
        """
80
        Lint your code using regex patterns, e.g. to check for copyright headers.
81

82
        To activate this with the `lint` goal, you must set `[regex-lint].config`.
83

84
        Unlike other linters, this can run on files not owned by targets, such as BUILD files.
85
        """
86
    )
87

88
    skip = SkipOption("lint")
1✔
89
    _config = DictOption[Any](
90
        help=softwrap(
91
            """
92
            Config schema is as follows:
93

94
                ```
95
                {
96
                'required_matches': {
97
                    'path_pattern1': [content_pattern1, content_pattern2],
98
                    'path_pattern2': [content_pattern1, content_pattern3],
99
                    ...
100
                },
101
                'path_patterns': [
102
                    {
103
                    'name': path_pattern1',
104
                    'pattern': <path regex pattern>,
105
                    'inverted': True|False (defaults to False),
106
                    'content_encoding': <encoding> (defaults to utf8)
107
                    },
108
                    ...
109
                ],
110
                'content_patterns': [
111
                    {
112
                    'name': 'content_pattern1',
113
                    'pattern': <content regex pattern>,
114
                    'inverted': True|False (defaults to False)
115
                    }
116
                    ...
117
                ]
118
                }
119
                ```
120

121
            Meaning: if a file matches some path pattern, its content must match all the
122
            corresponding content patterns.
123

124
            It's often helpful to load this config from a JSON or YAML file. To do that, set
125
            `[regex-lint].config = '@path/to/config.yaml'`, for example.
126
            """
127
        ),
128
        fromfile=True,
129
    )
130
    detail_level = EnumOption(
1✔
131
        default=DetailLevel.nonmatching,
132
        help="How much detail to include in the result.",
133
    )
134

135
    @memoized_method
1✔
136
    def get_multi_matcher(self) -> MultiMatcher | None:
1✔
137
        return MultiMatcher(ValidationConfig.from_dict(self._config)) if self._config else None
×
138

139

140
@dataclass(frozen=True)
1✔
141
class RegexMatchResult:
1✔
142
    """The result of running regex matches on a source file."""
143

144
    path: str
1✔
145
    matching: tuple
1✔
146
    nonmatching: tuple
1✔
147

148

149
class Matcher:
1✔
150
    """Class to match a single (possibly inverted) regex.
151

152
    Matches are allowed anywhere in the string (so really a "search" in the Python regex parlance).
153
    To anchor a match at the beginning of a string, use the ^ anchor. To anchor at the beginning of
154
    any line, use the ^ anchor along with the MULTILINE directive (?m).  See test for examples.
155
    """
156

157
    def __init__(self, pattern, inverted=False):
1✔
UNCOV
158
        self.compiled_regex = re.compile(pattern)
×
UNCOV
159
        self.inverted = inverted
×
160

161
    def matches(self, s):
1✔
162
        """Whether the pattern matches anywhere in the string s."""
UNCOV
163
        regex_matches = self.compiled_regex.search(s) is not None
×
UNCOV
164
        return not regex_matches if self.inverted else regex_matches
×
165

166

167
class PathMatcher(Matcher):
1✔
168
    """A matcher for matching file paths."""
169

170
    def __init__(self, path_pattern: PathPattern):
1✔
UNCOV
171
        super().__init__(path_pattern.pattern, path_pattern.inverted)
×
172
        # The expected encoding of the content of files whose paths match this pattern.
UNCOV
173
        self.content_encoding = path_pattern.content_encoding
×
174

175

176
class ContentMatcher(Matcher):
1✔
177
    """A matcher for matching file content."""
178

179
    def __init__(self, content_pattern: ContentPattern):
1✔
UNCOV
180
        super().__init__(content_pattern.pattern, content_pattern.inverted)
×
181

182

183
class MultiMatcher:
1✔
184
    def __init__(self, config: ValidationConfig) -> None:
1✔
185
        """Class to check multiple regex matching on files.
186

187
        :param dict config: Regex matching config (see above).
188
        """
189
        # Validate the pattern names mentioned in required_matches.
UNCOV
190
        path_patterns_used: set[str] = set()
×
UNCOV
191
        content_patterns_used: set[str] = set()
×
UNCOV
192
        for k, v in config.required_matches.items():
×
UNCOV
193
            path_patterns_used.add(k)
×
UNCOV
194
            if not isinstance(v, (tuple, list)):
×
195
                raise ValueError(
×
196
                    f"Value for path pattern {k} in required_matches must be tuple of "
197
                    f"content pattern names, but was {v}"
198
                )
UNCOV
199
            content_patterns_used.update(v)
×
200

UNCOV
201
        unknown_path_patterns = path_patterns_used.difference(
×
202
            pp.name for pp in config.path_patterns
203
        )
UNCOV
204
        if unknown_path_patterns:
×
UNCOV
205
            raise ValueError(
×
206
                "required_matches uses unknown path pattern names: {}".format(
207
                    ", ".join(sorted(unknown_path_patterns))
208
                )
209
            )
210

UNCOV
211
        unknown_content_patterns = content_patterns_used.difference(
×
212
            cp.name for cp in config.content_patterns
213
        )
UNCOV
214
        if unknown_content_patterns:
×
UNCOV
215
            raise ValueError(
×
216
                "required_matches uses unknown content pattern names: {}".format(
217
                    ", ".join(sorted(unknown_content_patterns))
218
                )
219
            )
220

UNCOV
221
        self._path_matchers = {pp.name: PathMatcher(pp) for pp in config.path_patterns}
×
UNCOV
222
        self._content_matchers = {cp.name: ContentMatcher(cp) for cp in config.content_patterns}
×
UNCOV
223
        self._required_matches = config.required_matches
×
224

225
    def check_content(
1✔
226
        self, path: str, content: bytes, content_pattern_names: Iterable[str], encoding: str
227
    ) -> RegexMatchResult:
UNCOV
228
        matching = []
×
UNCOV
229
        nonmatching = []
×
UNCOV
230
        for content_pattern_name in content_pattern_names:
×
UNCOV
231
            if self._content_matchers[content_pattern_name].matches(content.decode(encoding)):
×
UNCOV
232
                matching.append(content_pattern_name)
×
233
            else:
UNCOV
234
                nonmatching.append(content_pattern_name)
×
UNCOV
235
        return RegexMatchResult(path, tuple(matching), tuple(nonmatching))
×
236

237
    def get_applicable_content_pattern_names(self, path: str) -> tuple[set[str], str | None]:
1✔
238
        """Return the content patterns applicable to a given path.
239

240
        Returns a tuple (applicable_content_pattern_names, content_encoding).
241

242
        If path matches no path patterns, the returned content_encoding will be None (and
243
        applicable_content_pattern_names will be empty).
244
        """
UNCOV
245
        encodings = set()
×
UNCOV
246
        applicable_content_pattern_names: set[str] = set()
×
UNCOV
247
        for path_pattern_name, content_pattern_names in self._required_matches.items():
×
UNCOV
248
            m = self._path_matchers[path_pattern_name]
×
UNCOV
249
            if m.matches(path):
×
UNCOV
250
                encodings.add(m.content_encoding)
×
UNCOV
251
                applicable_content_pattern_names.update(content_pattern_names)
×
UNCOV
252
        if len(encodings) > 1:
×
UNCOV
253
            raise ValueError(
×
254
                "Path matched patterns with multiple content encodings ({}): {}".format(
255
                    ", ".join(sorted(encodings)), path
256
                )
257
            )
UNCOV
258
        content_encoding = next(iter(encodings)) if encodings else None
×
UNCOV
259
        return applicable_content_pattern_names, content_encoding
×
260

261

262
class RegexLintRequest(LintFilesRequest):
1✔
263
    tool_subsystem = RegexLintSubsystem  # type: ignore[assignment]
1✔
264

265

266
@rule
1✔
267
async def partition_inputs(
1✔
268
    request: RegexLintRequest.PartitionRequest, regex_lint_subsystem: RegexLintSubsystem
269
) -> Partitions[str, Any]:
270
    multi_matcher = regex_lint_subsystem.get_multi_matcher()
×
271
    if multi_matcher is None:
×
272
        return Partitions()
×
273

274
    applicable_file_paths = []
×
275
    for fp in request.files:
×
276
        content_pattern_names, encoding = multi_matcher.get_applicable_content_pattern_names(fp)
×
277
        if content_pattern_names and encoding:
×
278
            applicable_file_paths.append(fp)
×
279

280
    return Partitions.single_partition(applicable_file_paths)
×
281

282

283
@rule(desc="Lint with regex patterns", level=LogLevel.DEBUG)
1✔
284
async def lint_with_regex_patterns(
1✔
285
    request: RegexLintRequest.Batch[str, Any], regex_lint_subsystem: RegexLintSubsystem
286
) -> LintResult:
287
    multi_matcher = regex_lint_subsystem.get_multi_matcher()
×
288
    assert multi_matcher is not None
×
289
    file_to_content_pattern_names_and_encoding = {}
×
290
    for fp in request.elements:
×
291
        content_pattern_names, encoding = multi_matcher.get_applicable_content_pattern_names(fp)
×
292
        assert content_pattern_names and encoding
×
293
        file_to_content_pattern_names_and_encoding[fp] = (content_pattern_names, encoding)
×
294

295
    digest_contents = await get_digest_contents(
×
296
        **implicitly(PathGlobs(globs=file_to_content_pattern_names_and_encoding.keys()))
297
    )
298

299
    result = []
×
300
    for file_content in digest_contents:
×
301
        content_patterns, encoding = file_to_content_pattern_names_and_encoding[file_content.path]
×
302
        result.append(
×
303
            multi_matcher.check_content(
304
                file_content.path, file_content.content, content_patterns, encoding
305
            )
306
        )
307

308
    stdout = ""
×
309
    detail_level = regex_lint_subsystem.detail_level
×
310
    num_matched_all = 0
×
311
    num_nonmatched_some = 0
×
312
    for rmr in sorted(result, key=lambda rmr: rmr.path):
×
313
        if not rmr.matching and not rmr.nonmatching:
×
314
            continue
×
315
        if detail_level == DetailLevel.names:
×
316
            if rmr.nonmatching:
×
317
                stdout += f"{rmr.path}\n"
×
318
            continue
×
319

320
        if rmr.nonmatching:
×
321
            icon = "X"
×
322
            num_nonmatched_some += 1
×
323
        else:
324
            icon = "V"
×
325
            num_matched_all += 1
×
326
        matched_msg = " Matched: {}".format(",".join(rmr.matching)) if rmr.matching else ""
×
327
        nonmatched_msg = (
×
328
            " Didn't match: {}".format(",".join(rmr.nonmatching)) if rmr.nonmatching else ""
329
        )
330
        if detail_level == DetailLevel.all or (
×
331
            detail_level == DetailLevel.nonmatching and nonmatched_msg
332
        ):
333
            stdout += f"{icon} {rmr.path}:{matched_msg}{nonmatched_msg}\n"
×
334

335
    if detail_level not in (DetailLevel.none, DetailLevel.names):
×
336
        if stdout:
×
337
            stdout += "\n"
×
338
        stdout += f"{num_matched_all} files matched all required patterns.\n"
×
339
        stdout += f"{num_nonmatched_some} files failed to match at least one required pattern."
×
340

341
    exit_code = PANTS_FAILED_EXIT_CODE if num_nonmatched_some else PANTS_SUCCEEDED_EXIT_CODE
×
342
    return LintResult(exit_code, stdout, "", RegexLintSubsystem.options_scope)
×
343

344

345
def rules():
1✔
346
    return (*collect_rules(), *RegexLintRequest.rules())
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc