• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 20015271182

08 Dec 2025 03:02AM UTC coverage: 90.402% (+0.1%) from 90.299%
20015271182

push

github

bramp
Minor rename of property in subassembly config.

3 of 3 new or added lines in 1 file covered. (100.0%)

125 existing lines in 14 files now uncovered.

11039 of 12211 relevant lines covered (90.4%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.45
/src/build_a_long/pdf_extract/classifier/steps/step_count_classifier.py
1
"""
2
Step count classifier.
3

4
Purpose
5
-------
6
Detect step-count text like "2x" that appears in substep callout boxes.
7
These are similar to part counts but use a larger font size (typically 16pt),
8
between part count size and step number size.
9

10
Debugging
11
---------
12
Enable DEBUG logs with LOG_LEVEL=DEBUG.
13
"""
14

15
import logging
1✔
16

17
from build_a_long.pdf_extract.classifier.candidate import Candidate
1✔
18
from build_a_long.pdf_extract.classifier.classification_result import (
1✔
19
    ClassificationResult,
20
)
21
from build_a_long.pdf_extract.classifier.config import StepCountConfig
1✔
22
from build_a_long.pdf_extract.classifier.label_classifier import (
1✔
23
    LabelClassifier,
24
)
25
from build_a_long.pdf_extract.classifier.score import Score, Weight
1✔
26
from build_a_long.pdf_extract.classifier.text import (
1✔
27
    extract_part_count_value,
28
)
29
from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
30
    StepCount,
31
)
32
from build_a_long.pdf_extract.extractor.page_blocks import Text
1✔
33

34
log = logging.getLogger(__name__)
1✔
35

36

37
class _StepCountScore(Score):
1✔
38
    """Internal score representation for step count classification."""
39

40
    text_score: float
41
    """Score based on how well the text matches count patterns (0.0-1.0)."""
1✔
42

43
    font_size_score: float
44
    """Score based on font size being between part count and step number (0.0-1.0)."""
1✔
45

46
    config: StepCountConfig
47
    """Step count configuration for dynamic score calculations."""
1✔
48

49
    def score(self) -> Weight:
1✔
50
        """Calculate final weighted score from components.
51

52
        Combines text matching and font size matching.
53
        """
54
        return (
1✔
55
            self.config.text_weight * self.text_score
56
            + self.config.font_size_weight * self.font_size_score
57
        )
58

59

60
class StepCountClassifier(LabelClassifier):
1✔
61
    """Classifier for step counts (substep counts like "2x").
62

63
    These are count labels that appear inside substep callout boxes,
64
    indicating how many times to build the sub-assembly.
65
    They use a font size between part counts and step numbers.
66
    """
67

68
    output = "step_count"
1✔
69
    requires = frozenset()
1✔
70

71
    def _score(self, result: ClassificationResult) -> None:
1✔
72
        """Score text blocks and create candidates."""
73
        page_data = result.page_data
1✔
74
        if not page_data.blocks:
1✔
75
            return
1✔
76

77
        step_count_config = self.config.step_count
1✔
78

79
        for block in page_data.blocks:
1✔
80
            if not isinstance(block, Text):
1✔
81
                continue
1✔
82

83
            # Check if text matches count pattern (e.g., "2x", "4x")
84
            text_score = self._score_count_text(block.text)
1✔
85
            if text_score == 0.0:
1✔
86
                continue
1✔
87

88
            # Score font size: should be >= part_count_size and <= step_number_size
89
            font_size_score = 0.5  # Default neutral score
1✔
90
            if block.font_size is not None:
1✔
91
                font_size_score = self._score_step_count_font_size(block.font_size)
1✔
92

93
            detail_score = _StepCountScore(
1✔
94
                text_score=text_score,
95
                font_size_score=font_size_score,
96
                config=step_count_config,
97
            )
98

99
            combined = detail_score.score()
1✔
100

101
            # Skip candidates below minimum score threshold
102
            if combined < step_count_config.min_score:
1✔
103
                log.debug(
×
104
                    "[step_count] Skipping low-score candidate: text='%s' "
105
                    "font_size=%.1f score=%.3f (below threshold %.3f)",
106
                    block.text,
107
                    block.font_size,
108
                    combined,
109
                    step_count_config.min_score,
110
                )
111
                continue
×
112

113
            result.add_candidate(
1✔
114
                Candidate(
115
                    bbox=block.bbox,
116
                    label="step_count",
117
                    score=combined,
118
                    score_details=detail_score,
119
                    source_blocks=[block],
120
                ),
121
            )
122
            log.debug(
1✔
123
                "[step_count] Candidate: text='%s' font_size=%.1f score=%.3f",
124
                block.text,
125
                block.font_size,
126
                combined,
127
            )
128

129
    def build(self, candidate: Candidate, result: ClassificationResult) -> StepCount:
1✔
130
        """Construct a StepCount element from a candidate.
131

132
        The candidate may include additional source blocks (e.g., text outline
133
        effects) beyond the primary Text block.
134
        """
135
        # Get the primary text block (first in source_blocks)
136
        assert len(candidate.source_blocks) >= 1
1✔
137
        block = candidate.source_blocks[0]
1✔
138
        assert isinstance(block, Text)
1✔
139

140
        # Parse the count value
141
        value = extract_part_count_value(block.text)
1✔
142
        if value is None:
1✔
UNCOV
143
            raise ValueError(f"Could not parse step count from text: '{block.text}'")
×
144

145
        return StepCount(count=value, bbox=block.bbox)
1✔
146

147
    def _score_count_text(self, text: str) -> float:
1✔
148
        """Score text based on how well it matches count patterns.
149

150
        Returns:
151
            1.0 if text matches count pattern (e.g., "2x"), 0.0 otherwise
152
        """
153
        if extract_part_count_value(text) is not None:
1✔
154
            return 1.0
1✔
155
        return 0.0
1✔
156

157
    def _score_step_count_font_size(self, font_size: float) -> float:
1✔
158
        """Score font size for step counts.
159

160
        Step counts should have a font size that is:
161
        - Greater than or equal to part_count_size
162
        - Less than or equal to step_number_size
163

164
        Returns:
165
            1.0 if font size is in the expected range
166
            0.5 if we don't have hints to compare against
167
            0.0 if font size is clearly outside the range
168
        """
169
        hints = self.config.font_size_hints
1✔
170
        part_count_size = hints.part_count_size
1✔
171
        step_number_size = hints.step_number_size
1✔
172

173
        # If we don't have both hints, give a neutral score
174
        if part_count_size is None or step_number_size is None:
1✔
175
            return 0.5
1✔
176

177
        # Check if font size is in the expected range
178
        # Allow some tolerance (within 1pt)
179
        tolerance = 1.0
1✔
180

181
        if font_size < part_count_size - tolerance:
1✔
182
            # Too small - likely a part count or something smaller
183
            return 0.0
1✔
184

185
        if font_size > step_number_size + tolerance:
1✔
186
            # Too large - likely a step number or larger element
187
            return 0.0
1✔
188

189
        # Font size is in the expected range
190
        # Give higher score if it's strictly between the two sizes
191
        if font_size > part_count_size + tolerance:
1✔
192
            # Clearly larger than part count - good indicator
193
            return 1.0
1✔
194

195
        # Font size is close to part count size - less confident
196
        return 0.7
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc