• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 20120615911

10 Dec 2025 10:24PM UTC coverage: 89.767% (+0.08%) from 89.689%
20120615911

push

github

bramp
Fix typo in fixtures README: .sh -> .py for regenerate script

11790 of 13134 relevant lines covered (89.77%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.16
/src/build_a_long/pdf_extract/classifier/rule_based_classifier.py
1
"""
2
Rule-based classifier implementation.
3
"""
4

5
from __future__ import annotations
1✔
6

7
import logging
1✔
8
from abc import abstractmethod
1✔
9

10
from build_a_long.pdf_extract.classifier.block_filter import (
1✔
11
    find_text_outline_effects,
12
)
13
from build_a_long.pdf_extract.classifier.candidate import Candidate
1✔
14
from build_a_long.pdf_extract.classifier.classification_result import (
1✔
15
    ClassificationResult,
16
)
17
from build_a_long.pdf_extract.classifier.label_classifier import (
1✔
18
    LabelClassifier,
19
)
20
from build_a_long.pdf_extract.classifier.rules import Rule, RuleContext
1✔
21
from build_a_long.pdf_extract.classifier.score import Score, Weight
1✔
22
from build_a_long.pdf_extract.extractor.page_blocks import Text
1✔
23

24
log = logging.getLogger(__name__)
1✔
25

26

27
class RuleScore(Score):
1✔
28
    """Generic score based on rules."""
29

30
    components: dict[str, float]
1✔
31
    total_score: float
1✔
32

33
    def score(self) -> Weight:
1✔
34
        return self.total_score
×
35

36
    def get(self, rule_name: str, default: float = 0.0) -> float:
1✔
37
        """Get the score for a specific rule name."""
38
        return self.components.get(rule_name, default)
1✔
39

40

41
class RuleBasedClassifier(LabelClassifier):
1✔
42
    """Base class for classifiers that use a list of rules to score candidates."""
43

44
    @property
1✔
45
    @abstractmethod
1✔
46
    def rules(self) -> list[Rule]:
1✔
47
        """Get the list of rules for this classifier."""
48
        pass
×
49

50
    @property
1✔
51
    def min_score(self) -> float:
1✔
52
        """Minimum score threshold for acceptance. Defaults to 0.0."""
53
        return 0.0
1✔
54

55
    def _score(self, result: ClassificationResult) -> None:
1✔
56
        """Score blocks using rules."""
57
        context = RuleContext(result.page_data, self.config)
1✔
58
        rules = self.rules
1✔
59

60
        for block in result.page_data.blocks:
1✔
61
            components = {}
1✔
62
            weighted_sum = 0.0
1✔
63
            total_weight = 0.0
1✔
64
            failed = False
1✔
65

66
            for rule in rules:
1✔
67
                score = rule.calculate(block, context)
1✔
68

69
                # If rule returns None, it's skipped (not applicable)
70
                if score is None:
1✔
71
                    continue
1✔
72

73
                # If required rule fails (score 0), fail the block immediately
74
                if rule.required and score == 0.0:
1✔
75
                    failed = True
1✔
76
                    # log.debug(
77
                    #    "[%s] block_id=%s failed required rule '%s'",
78
                    #    self.output,
79
                    #    block.id,
80
                    #    rule.name,
81
                    # )
82
                    break
1✔
83

84
                rule_weight = rule.weight  # Using direct weight from Rule instance
1✔
85

86
                weighted_sum += score * rule_weight
1✔
87
                total_weight += rule_weight
1✔
88
                components[rule.name] = score
1✔
89

90
            if failed:
1✔
91
                continue
1✔
92

93
            # Calculate final score
94
            if total_weight > 0:
1✔
95
                final_score = weighted_sum / total_weight
1✔
96
            else:
97
                final_score = 0.0
×
98

99
            # Check classifier-specific acceptance logic
100
            if not self._should_accept(final_score):
1✔
101
                log.debug(
1✔
102
                    "[%s] block_id=%s rejected: score=%.3f < min_score=%.3f components=%s",
103
                    self.output,
104
                    block.id,
105
                    final_score,
106
                    self.min_score,
107
                    components,
108
                )
109
                continue
1✔
110

111
            log.debug(
1✔
112
                "[%s] block_id=%s accepted: score=%.3f components=%s",
113
                self.output,
114
                block.id,
115
                final_score,
116
                components,
117
            )
118

119
            # Build source blocks list, including text outline effects for Text blocks
120
            source_blocks: list = [block]
1✔
121
            if isinstance(block, Text):
1✔
122
                outline_effects = find_text_outline_effects(
1✔
123
                    block, result.page_data.blocks
124
                )
125
                source_blocks.extend(outline_effects)
1✔
126

127
            # Create candidate
128
            candidate = Candidate(
1✔
129
                bbox=block.bbox,
130
                label=self.output,
131
                score=final_score,
132
                score_details=RuleScore(components=components, total_score=final_score),
133
                source_blocks=source_blocks,
134
            )
135
            result.add_candidate(candidate)
1✔
136

137
    def _should_accept(self, score: float) -> bool:
1✔
138
        """Determine if a score is high enough to be a candidate.
139

140
        Subclasses can override this.
141
        """
142
        return score >= self.min_score
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc