• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 20398712053

20 Dec 2025 07:00PM UTC coverage: 89.361% (+0.2%) from 89.185%
20398712053

push

github

bramp
Improve circular dependency error to show dependency chain

- Add _find_dependency_cycle() to trace and format the actual circular dependency path
- Update error message to include both affected classifiers and the dependency chain
- Add test case to verify circular dependency detection and error message format

48 of 56 new or added lines in 2 files covered. (85.71%)

145 existing lines in 28 files now uncovered.

13700 of 15331 relevant lines covered (89.36%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.7
/src/build_a_long/pdf_extract/classifier/rule_based_classifier.py
1
"""
2
Rule-based classifier implementation.
3
"""
4

5
from __future__ import annotations
1✔
6

7
import logging
1✔
8
from abc import abstractmethod
1✔
9
from collections.abc import Sequence
1✔
10
from typing import TYPE_CHECKING
1✔
11

12
from build_a_long.pdf_extract.classifier.block_filter import (
1✔
13
    find_contained_effects,
14
)
15
from build_a_long.pdf_extract.classifier.candidate import Candidate
1✔
16
from build_a_long.pdf_extract.classifier.classification_result import (
1✔
17
    ClassificationResult,
18
)
19
from build_a_long.pdf_extract.classifier.label_classifier import (
1✔
20
    LabelClassifier,
21
)
22
from build_a_long.pdf_extract.classifier.rules import Rule, RuleContext
1✔
23
from build_a_long.pdf_extract.classifier.score import Score, Weight
1✔
24
from build_a_long.pdf_extract.extractor.bbox import BBox
1✔
25
from build_a_long.pdf_extract.extractor.page_blocks import Block, Blocks, Text
1✔
26

27
if TYPE_CHECKING:
28
    pass
29

30
log = logging.getLogger(__name__)
1✔
31

32

33
class RuleScore(Score):
1✔
34
    """Generic score based on rules."""
35

36
    components: dict[str, float]
1✔
37
    total_score: float
1✔
38

39
    def score(self) -> Weight:
1✔
UNCOV
40
        return self.total_score
×
41

42
    def get(self, rule_name: str, default: float = 0.0) -> float:
1✔
43
        """Get the score for a specific rule name."""
44
        return self.components.get(rule_name, default)
1✔
45

46

47
class StepNumberScore(RuleScore):
1✔
48
    """Score for step number candidates that includes the parsed step value.
49

50
    This avoids re-parsing the step number from source blocks when the value
51
    is needed later (e.g., for building StepNumber elements or sorting).
52
    """
53

54
    step_value: int
1✔
55
    """The parsed step number value (e.g., 1, 2, 3, 42)."""
1✔
56

57

58
class RuleBasedClassifier(LabelClassifier):
1✔
59
    """Base class for classifiers that use a list of rules to score candidates."""
60

61
    @property
1✔
62
    @abstractmethod
1✔
63
    def rules(self) -> Sequence[Rule]:
1✔
64
        """Get the list of rules for this classifier."""
UNCOV
65
        pass
×
66

67
    @property
1✔
68
    def min_score(self) -> float:
1✔
69
        """Minimum score threshold for acceptance. Defaults to 0.0."""
70
        return 0.0
1✔
71

72
    @property
1✔
73
    def effects_margin(self) -> float | None:
1✔
74
        """Margin to expand block bbox to find visual effects (outlines, shadows).
75

76
        If None, no automatic effect finding is performed.
77
        Defaults to 2.0.
78
        """
79
        return 2.0
1✔
80

81
    @property
1✔
82
    def effects_max_area_ratio(self) -> float | None:
1✔
83
        """Maximum ratio of effect block area to primary block area.
84

85
        Used to avoid consuming unrelated large blocks as effects.
86
        Defaults to None (no ratio check).
87
        """
88
        return None
1✔
89

90
    def _create_score(
1✔
91
        self,
92
        block: Block,
93
        components: dict[str, float],
94
        total_score: float,
95
    ) -> RuleScore:
96
        """Create the score object for a candidate.
97

98
        Subclasses can override this to return a more specific score type
99
        that contains additional information (e.g., parsed values).
100

101
        Args:
102
            block: The block being scored
103
            components: Dictionary of rule name to score
104
            total_score: The weighted total score
105

106
        Returns:
107
            A RuleScore (or subclass) instance
108
        """
109
        return RuleScore(components=components, total_score=total_score)
1✔
110

111
    def _score(self, result: ClassificationResult) -> None:
1✔
112
        """Score blocks using rules."""
113
        context = RuleContext(result.page_data, self.config, result)
1✔
114
        rules = self.rules
1✔
115

116
        for block in result.page_data.blocks:
1✔
117
            components = {}
1✔
118
            weighted_sum = 0.0
1✔
119
            total_weight = 0.0
1✔
120
            failed = False
1✔
121

122
            for rule in rules:
1✔
123
                score = rule.calculate(block, context)
1✔
124

125
                # If rule returns None, it's skipped (not applicable)
126
                if score is None:
1✔
127
                    continue
1✔
128

129
                # If required rule fails (score 0), fail the block immediately
130
                if rule.required and score == 0.0:
1✔
131
                    failed = True
1✔
132
                    # log.debug(
133
                    #    "[%s] block_id=%s failed required rule '%s'",
134
                    #    self.output,
135
                    #    block.id,
136
                    #    rule.name,
137
                    # )
138
                    break
1✔
139

140
                rule_weight = rule.weight  # Using direct weight from Rule instance
1✔
141

142
                weighted_sum += score * rule_weight
1✔
143
                total_weight += rule_weight
1✔
144
                components[rule.name] = score
1✔
145

146
            if failed:
1✔
147
                continue
1✔
148

149
            # Calculate final score
150
            final_score = weighted_sum / total_weight if total_weight > 0 else 0.0
1✔
151

152
            # Check classifier-specific acceptance logic
153
            if not self._should_accept(final_score):
1✔
154
                log.debug(
1✔
155
                    "[%s] block_id=%s "
156
                    "rejected: score=%.3f < min_score=%.3f components=%s",
157
                    self.output,
158
                    block.id,
159
                    final_score,
160
                    self.min_score,
161
                    components,
162
                )
163
                continue
1✔
164

165
            log.debug(
1✔
166
                "[%s] block_id=%s accepted: score=%.3f components=%s",
167
                self.output,
168
                block.id,
169
                final_score,
170
                components,
171
            )
172

173
            # Build source blocks list, deduplicating as we go
174
            seen_ids: set[int] = {block.id}
1✔
175
            source_blocks: list[Blocks] = [block]
1✔
176

177
            # Automatically find visual effects (outlines, shadows) for Text blocks
178
            margin = self.effects_margin
1✔
179
            if margin is not None and isinstance(block, Text):
1✔
180
                effects = find_contained_effects(
1✔
181
                    block,
182
                    result.page_data.blocks,
183
                    margin=margin,
184
                    max_area_ratio=self.effects_max_area_ratio,
185
                )
186
                for b in effects:
1✔
187
                    if b.id not in seen_ids:
1✔
188
                        seen_ids.add(b.id)
1✔
189
                        source_blocks.append(b)
1✔
190

191
            # Add any classifier-specific additional source blocks
192
            for b in self._get_additional_source_blocks(block, result):
1✔
193
                if b.id not in seen_ids:
1✔
194
                    seen_ids.add(b.id)
1✔
195
                    source_blocks.append(b)
1✔
196

197
            # Create score object (subclasses can override _create_score)
198
            score_details = self._create_score(block, components, final_score)
1✔
199

200
            # Compute bbox as the union of all source blocks
201
            # This ensures the candidate bbox matches the source_blocks union,
202
            # required by validation (assert_element_bbox_matches_source_and_children)
203
            candidate_bbox = BBox.union_all([b.bbox for b in source_blocks])
1✔
204

205
            # Create candidate
206
            candidate = Candidate(
1✔
207
                bbox=candidate_bbox,
208
                label=self.output,
209
                score=final_score,
210
                score_details=score_details,
211
                source_blocks=source_blocks,
212
            )
213
            result.add_candidate(candidate)
1✔
214

215
    def _get_additional_source_blocks(
1✔
216
        self, block: Block, result: ClassificationResult
217
    ) -> Sequence[Blocks]:
218
        """Get additional source blocks to include with the candidate.
219

220
        Subclasses can override this to include related blocks (e.g.,
221
        overlapping drawings, drop shadows) in the candidate's source_blocks.
222
        These blocks will be marked as removed if the candidate wins.
223
        """
224
        return []
1✔
225

226
    def _should_accept(self, score: float) -> bool:
1✔
227
        """Determine if a score is high enough to be a candidate.
228

229
        Subclasses can override this.
230
        """
231
        return score >= self.min_score
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc