• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 20361865516

19 Dec 2025 06:25AM UTC coverage: 89.13% (-0.002%) from 89.132%
20361865516

push

github

bramp
Fix lint errors: line length, unused imports, and YAML issues

- Add ruff isort configuration with known-first-party for build_a_long
- Add per-file E501 ignore for legocom_test.py (JSON test data)
- Create .yamllint config to relax strict YAML rules
- Fix E501 line length errors by wrapping long comments and strings
- Fix F841 unused variable errors
- Fix PLC0415 import-at-non-top-level errors
- Fix SIM108 ternary simplification errors

12 of 14 new or added lines in 8 files covered. (85.71%)

78 existing lines in 6 files now uncovered.

12915 of 14490 relevant lines covered (89.13%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.22
/src/build_a_long/pdf_extract/classifier/rule_based_classifier.py
1
"""
2
Rule-based classifier implementation.
3
"""
4

5
from __future__ import annotations
1✔
6

7
import logging
1✔
8
from abc import abstractmethod
1✔
9
from typing import TYPE_CHECKING
1✔
10

11
from build_a_long.pdf_extract.classifier.block_filter import (
1✔
12
    find_text_outline_effects,
13
)
14
from build_a_long.pdf_extract.classifier.candidate import Candidate
1✔
15
from build_a_long.pdf_extract.classifier.classification_result import (
1✔
16
    ClassificationResult,
17
)
18
from build_a_long.pdf_extract.classifier.label_classifier import (
1✔
19
    LabelClassifier,
20
)
21
from build_a_long.pdf_extract.classifier.rules import Rule, RuleContext
1✔
22
from build_a_long.pdf_extract.classifier.score import Score, Weight
1✔
23
from build_a_long.pdf_extract.extractor.bbox import BBox
1✔
24
from build_a_long.pdf_extract.extractor.page_blocks import Block, Blocks, Text
1✔
25

26
if TYPE_CHECKING:
27
    pass
28

29
log = logging.getLogger(__name__)
1✔
30

31

32
class RuleScore(Score):
1✔
33
    """Generic score based on rules."""
34

35
    components: dict[str, float]
1✔
36
    total_score: float
1✔
37

38
    def score(self) -> Weight:
1✔
UNCOV
39
        return self.total_score
×
40

41
    def get(self, rule_name: str, default: float = 0.0) -> float:
1✔
42
        """Get the score for a specific rule name."""
43
        return self.components.get(rule_name, default)
1✔
44

45

46
class StepNumberScore(RuleScore):
1✔
47
    """Score for step number candidates that includes the parsed step value.
48

49
    This avoids re-parsing the step number from source blocks when the value
50
    is needed later (e.g., for building StepNumber elements or sorting).
51
    """
52

53
    step_value: int
1✔
54
    """The parsed step number value (e.g., 1, 2, 3, 42)."""
1✔
55

56

57
class RuleBasedClassifier(LabelClassifier):
1✔
58
    """Base class for classifiers that use a list of rules to score candidates."""
59

60
    @property
1✔
61
    @abstractmethod
1✔
62
    def rules(self) -> list[Rule]:
1✔
63
        """Get the list of rules for this classifier."""
UNCOV
64
        pass
×
65

66
    @property
1✔
67
    def min_score(self) -> float:
1✔
68
        """Minimum score threshold for acceptance. Defaults to 0.0."""
69
        return 0.0
1✔
70

71
    def _create_score(
1✔
72
        self,
73
        block: Block,
74
        components: dict[str, float],
75
        total_score: float,
76
    ) -> RuleScore:
77
        """Create the score object for a candidate.
78

79
        Subclasses can override this to return a more specific score type
80
        that contains additional information (e.g., parsed values).
81

82
        Args:
83
            block: The block being scored
84
            components: Dictionary of rule name to score
85
            total_score: The weighted total score
86

87
        Returns:
88
            A RuleScore (or subclass) instance
89
        """
90
        return RuleScore(components=components, total_score=total_score)
1✔
91

92
    def _score(self, result: ClassificationResult) -> None:
1✔
93
        """Score blocks using rules."""
94
        context = RuleContext(result.page_data, self.config, result)
1✔
95
        rules = self.rules
1✔
96

97
        for block in result.page_data.blocks:
1✔
98
            components = {}
1✔
99
            weighted_sum = 0.0
1✔
100
            total_weight = 0.0
1✔
101
            failed = False
1✔
102

103
            for rule in rules:
1✔
104
                score = rule.calculate(block, context)
1✔
105

106
                # If rule returns None, it's skipped (not applicable)
107
                if score is None:
1✔
108
                    continue
1✔
109

110
                # If required rule fails (score 0), fail the block immediately
111
                if rule.required and score == 0.0:
1✔
112
                    failed = True
1✔
113
                    # log.debug(
114
                    #    "[%s] block_id=%s failed required rule '%s'",
115
                    #    self.output,
116
                    #    block.id,
117
                    #    rule.name,
118
                    # )
119
                    break
1✔
120

121
                rule_weight = rule.weight  # Using direct weight from Rule instance
1✔
122

123
                weighted_sum += score * rule_weight
1✔
124
                total_weight += rule_weight
1✔
125
                components[rule.name] = score
1✔
126

127
            if failed:
1✔
128
                continue
1✔
129

130
            # Calculate final score
131
            final_score = weighted_sum / total_weight if total_weight > 0 else 0.0
1✔
132

133
            # Check classifier-specific acceptance logic
134
            if not self._should_accept(final_score):
1✔
135
                log.debug(
1✔
136
                    "[%s] block_id=%s "
137
                    "rejected: score=%.3f < min_score=%.3f components=%s",
138
                    self.output,
139
                    block.id,
140
                    final_score,
141
                    self.min_score,
142
                    components,
143
                )
144
                continue
1✔
145

146
            log.debug(
1✔
147
                "[%s] block_id=%s accepted: score=%.3f components=%s",
148
                self.output,
149
                block.id,
150
                final_score,
151
                components,
152
            )
153

154
            # Build source blocks list, including text outline effects for Text blocks
155
            source_blocks: list = [block]
1✔
156
            if isinstance(block, Text):
1✔
157
                outline_effects = find_text_outline_effects(
1✔
158
                    block, result.page_data.blocks
159
                )
160
                source_blocks.extend(outline_effects)
1✔
161

162
            # Add any classifier-specific additional source blocks
163
            source_blocks.extend(self._get_additional_source_blocks(block, result))
1✔
164

165
            # Create score object (subclasses can override _create_score)
166
            score_details = self._create_score(block, components, final_score)
1✔
167

168
            # Compute bbox as the union of all source blocks
169
            # This ensures the candidate bbox matches the source_blocks union,
170
            # required by validation (assert_element_bbox_matches_source_and_children)
171
            candidate_bbox = BBox.union_all([b.bbox for b in source_blocks])
1✔
172

173
            # Create candidate
174
            candidate = Candidate(
1✔
175
                bbox=candidate_bbox,
176
                label=self.output,
177
                score=final_score,
178
                score_details=score_details,
179
                source_blocks=source_blocks,
180
            )
181
            result.add_candidate(candidate)
1✔
182

183
    def _get_additional_source_blocks(
1✔
184
        self, block: Block, result: ClassificationResult
185
    ) -> list[Blocks]:
186
        """Get additional source blocks to include with the candidate.
187

188
        Subclasses can override this to include related blocks (e.g.,
189
        overlapping drawings, drop shadows) in the candidate's source_blocks.
190
        These blocks will be marked as removed if the candidate wins.
191
        """
192
        return []
1✔
193

194
    def _should_accept(self, score: float) -> bool:
1✔
195
        """Determine if a score is high enough to be a candidate.
196

197
        Subclasses can override this.
198
        """
199
        return score >= self.min_score
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc