• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 19995046189

06 Dec 2025 10:18PM UTC coverage: 90.506% (+0.09%) from 90.421%
19995046189

push

github

bramp
test: regenerate golden files for step classifier refactoring

10525 of 11629 relevant lines covered (90.51%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.33
/src/build_a_long/pdf_extract/classifier/steps/rotation_symbol_classifier.py
1
"""
2
Rotation symbol classifier.
3

4
Purpose
5
-------
6
Identify rotation symbols on LEGO instruction pages. These symbols indicate
7
that the builder should rotate the assembled model. They appear as small,
8
isolated, square clusters of Drawing elements (~46px).
9

10
Heuristic
11
---------
12
1. Collect all Drawing blocks on the page
13
2. Build connected components (clusters) using bbox overlap
14
3. For each cluster, compute the union bbox
15
4. Score clusters that are:
16
   - Square-ish (aspect ratio ~0.95-1.05)
17
   - Small (~41-51 pixels per side, ±10% of ideal 46px)
18
   - Near a Diagram element
19

20
The key insight is that rotation symbols are vector drawings that are ISOLATED -
21
they don't overlap with nearby diagram elements. Images are excluded because
22
their bounding boxes may overlap with diagrams even when the visible content
23
(ignoring transparent areas) appears disconnected.
24

25
Debugging
26
---------
27
Enable with `LOG_LEVEL=DEBUG` for structured logs.
28
"""
29

30
from __future__ import annotations
1✔
31

32
import logging
1✔
33

34
from build_a_long.pdf_extract.classifier.candidate import Candidate
1✔
35
from build_a_long.pdf_extract.classifier.classification_result import (
1✔
36
    ClassificationResult,
37
)
38
from build_a_long.pdf_extract.classifier.label_classifier import (
1✔
39
    LabelClassifier,
40
)
41
from build_a_long.pdf_extract.classifier.score import Score, Weight
1✔
42
from build_a_long.pdf_extract.extractor.bbox import (
1✔
43
    BBox,
44
    build_all_connected_clusters,
45
)
46
from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
47
    RotationSymbol,
48
)
49
from build_a_long.pdf_extract.extractor.page_blocks import (
1✔
50
    Drawing,
51
    Image,
52
)
53

54
log = logging.getLogger(__name__)
1✔
55

56

57
class _RotationSymbolScore(Score):
1✔
58
    """Internal score representation for rotation symbol classification."""
59

60
    size_score: float
1✔
61
    """Score based on size being in expected range (0.0-1.0)."""
1✔
62

63
    aspect_score: float
1✔
64
    """Score based on aspect ratio being square-ish (0.0-1.0)."""
1✔
65

66
    proximity_to_diagram: float
1✔
67
    """Score based on proximity to a diagram (0.0-1.0)."""
1✔
68

69
    # Store weights for score calculation
70
    size_weight: float = 0.5
1✔
71
    aspect_weight: float = 0.3
1✔
72
    proximity_weight: float = 0.2
1✔
73

74
    def score(self) -> Weight:
1✔
75
        """Calculate final weighted score from components."""
76
        return (
1✔
77
            self.size_score * self.size_weight
78
            + self.aspect_score * self.aspect_weight
79
            + self.proximity_to_diagram * self.proximity_weight
80
        )
81

82

83
class RotationSymbolClassifier(LabelClassifier):
1✔
84
    """Classifier for rotation symbol elements."""
85

86
    output = "rotation_symbol"
1✔
87
    requires = frozenset({"diagram"})
1✔
88

89
    def _score(self, result: ClassificationResult) -> None:
1✔
90
        """Score connected clusters of Drawing blocks as rotation symbols."""
91
        page_data = result.page_data
1✔
92
        config = self.config
1✔
93
        page_bbox = page_data.bbox
1✔
94
        assert page_bbox is not None
1✔
95

96
        # Get diagram candidates to check proximity
97
        diagram_candidates = result.get_scored_candidates(
1✔
98
            "diagram", valid_only=False, exclude_failed=True
99
        )
100

101
        # Filter out page-spanning drawings (>90% of page width or height).
102
        # These are typically background/border elements that would connect
103
        # unrelated symbols together during clustering.
104
        max_width = page_bbox.width * 0.9
1✔
105
        max_height = page_bbox.height * 0.9
1✔
106
        drawings: list[Drawing] = [
1✔
107
            block
108
            for block in page_data.blocks
109
            if isinstance(block, Drawing)
110
            and block.bbox.width <= max_width
111
            and block.bbox.height <= max_height
112
        ]
113

114
        if not drawings:
1✔
115
            return
1✔
116

117
        # Build connected components using bbox overlap
118
        clusters = build_all_connected_clusters(drawings)
1✔
119

120
        log.debug(
1✔
121
            "[rotation_symbol] Found %d clusters from %d drawings",
122
            len(clusters),
123
            len(drawings),
124
        )
125

126
        # Score each cluster
127
        for cluster in clusters:
1✔
128
            cluster_bbox = BBox.union_all([block.bbox for block in cluster])
1✔
129

130
            score_details = self._score_bbox(cluster_bbox, diagram_candidates)
1✔
131
            if score_details is None:
1✔
132
                log.debug(
1✔
133
                    "[rotation_symbol] Rejected cluster at %s "
134
                    "(%d blocks) size=%.1fx%.1f - outside size/aspect constraints",
135
                    cluster_bbox,
136
                    len(cluster),
137
                    cluster_bbox.width,
138
                    cluster_bbox.height,
139
                )
140
                continue
1✔
141

142
            if score_details.score() <= config.rotation_symbol.min_score:
1✔
143
                log.debug(
×
144
                    "[rotation_symbol] Rejected cluster at %s "
145
                    "(%d blocks) score=%.2f < min_score=%.2f",
146
                    cluster_bbox,
147
                    len(cluster),
148
                    score_details.score(),
149
                    config.rotation_symbol.min_score,
150
                )
151
                continue
×
152

153
            result.add_candidate(
1✔
154
                Candidate(
155
                    bbox=cluster_bbox,
156
                    label="rotation_symbol",
157
                    score=score_details.score(),
158
                    score_details=score_details,
159
                    # Don't claim source_blocks - rotation symbols
160
                    # can coexist with diagrams and part images
161
                    source_blocks=[],
162
                )
163
            )
164
            log.debug(
1✔
165
                "[rotation_symbol] Cluster candidate at %s "
166
                "(%d blocks) score=%.2f "
167
                "(size=%.2f aspect=%.2f proximity=%.2f)",
168
                cluster_bbox,
169
                len(cluster),
170
                score_details.score(),
171
                score_details.size_score,
172
                score_details.aspect_score,
173
                score_details.proximity_to_diagram,
174
            )
175

176
    def build(
1✔
177
        self, candidate: Candidate, result: ClassificationResult
178
    ) -> RotationSymbol:
179
        """Construct a RotationSymbol element from a candidate.
180

181
        Also finds and claims small images that overlap or are very close to
182
        the rotation symbol (e.g., dropshadows or reference diagrams that are
183
        visually part of the rotation symbol).
184
        """
185
        # Find small images that should be claimed as part of the rotation symbol
186
        claimed_images = self._find_rotation_symbol_images(candidate, result)
1✔
187

188
        if claimed_images:
1✔
189
            # Update the candidate's bbox to include the claimed images
190
            all_bboxes = [candidate.bbox] + [img.bbox for img in claimed_images]
1✔
191
            expanded_bbox = BBox.union_all(all_bboxes)
1✔
192

193
            # Add claimed images to source_blocks so they're marked as consumed
194
            candidate.source_blocks.extend(claimed_images)
1✔
195

196
            log.debug(
1✔
197
                "[rotation_symbol] Claimed %d additional images for rotation symbol "
198
                "at %s, expanded bbox to %s, source_blocks=%s",
199
                len(claimed_images),
200
                candidate.bbox,
201
                expanded_bbox,
202
                [b.id for b in candidate.source_blocks],
203
            )
204

205
            candidate.bbox = expanded_bbox
1✔
206

207
        return RotationSymbol(bbox=candidate.bbox)
1✔
208

209
    def _find_rotation_symbol_images(
1✔
210
        self, candidate: Candidate, result: ClassificationResult
211
    ) -> list[Image]:
212
        """Find small images that are part of the rotation symbol.
213

214
        These are typically dropshadows or small reference diagrams that visually
215
        belong to the rotation symbol but are stored as separate Image blocks.
216

217
        An image is claimed if:
218
        1. It overlaps with or is very close to the rotation symbol bbox
219
        2. It's small enough to plausibly be part of the symbol (not a main diagram)
220
        3. It hasn't already been consumed by another classifier
221

222
        Args:
223
            candidate: The rotation symbol candidate
224
            result: Classification result containing page data and consumed blocks
225

226
        Returns:
227
            List of Image blocks that should be claimed as part of the rotation symbol
228
        """
229
        page_data = result.page_data
1✔
230
        rs_bbox = candidate.bbox
1✔
231

232
        # Maximum size for an image to be considered part of the rotation symbol.
233
        # Images larger than 2x the rotation symbol size are likely main diagrams.
234
        max_image_dimension = rs_bbox.width * 2.0
1✔
235

236
        # How close an image must be to be claimed (allow small gap for positioning)
237
        proximity_threshold = 10.0
1✔
238

239
        # Expand the rotation symbol bbox slightly for overlap detection
240
        search_bbox = BBox(
1✔
241
            x0=rs_bbox.x0 - proximity_threshold,
242
            y0=rs_bbox.y0 - proximity_threshold,
243
            x1=rs_bbox.x1 + proximity_threshold,
244
            y1=rs_bbox.y1 + proximity_threshold,
245
        )
246

247
        claimed: list[Image] = []
1✔
248
        for block in page_data.blocks:
1✔
249
            if not isinstance(block, Image):
1✔
250
                continue
1✔
251

252
            # Skip if already consumed
253
            if block.id in result._consumed_blocks:
1✔
254
                continue
1✔
255

256
            # Check if image overlaps with expanded search area
257
            if not block.bbox.overlaps(search_bbox):
1✔
258
                continue
1✔
259

260
            # Skip if image is too large (likely a main diagram)
261
            if (
1✔
262
                block.bbox.width > max_image_dimension
263
                or block.bbox.height > max_image_dimension
264
            ):
265
                log.debug(
1✔
266
                    "[rotation_symbol] Skipping large image at %s "
267
                    "(size %.1fx%.1f > max %.1f)",
268
                    block.bbox,
269
                    block.bbox.width,
270
                    block.bbox.height,
271
                    max_image_dimension,
272
                )
273
                continue
1✔
274

275
            claimed.append(block)
1✔
276
            log.debug(
1✔
277
                "[rotation_symbol] Found rotation symbol image at %s (size %.1fx%.1f)",
278
                block.bbox,
279
                block.bbox.width,
280
                block.bbox.height,
281
            )
282

283
        return claimed
1✔
284

285
    def _score_bbox(
1✔
286
        self,
287
        bbox: BBox,
288
        diagram_candidates: list[Candidate],
289
    ) -> _RotationSymbolScore | None:
290
        """Score a bounding box as a potential rotation symbol.
291

292
        Args:
293
            bbox: Bounding box to score
294
            diagram_candidates: List of diagram candidates for proximity scoring
295

296
        Returns:
297
            Score details if this could be a rotation symbol, None otherwise
298
        """
299
        rs_config = self.config.rotation_symbol
1✔
300
        width = bbox.width
1✔
301
        height = bbox.height
1✔
302

303
        # Check basic size constraints
304
        if (
1✔
305
            width < rs_config.min_size
306
            or width > rs_config.max_size
307
            or height < rs_config.min_size
308
            or height > rs_config.max_size
309
        ):
310
            return None
1✔
311

312
        # Score size (prefer images close to ideal size)
313
        ideal_size = rs_config.ideal_size
1✔
314
        size_diff = abs(width - ideal_size) + abs(height - ideal_size)
1✔
315
        size_score = max(0.0, 1.0 - (size_diff / (ideal_size * 2)))
1✔
316

317
        # Score aspect ratio (prefer square)
318
        aspect = width / height if height > 0 else 0
1✔
319
        if aspect < rs_config.min_aspect or aspect > rs_config.max_aspect:
1✔
320
            return None
×
321

322
        # Perfect square = 1.0, score decreases linearly to 0 at boundaries
323
        aspect_diff = abs(aspect - 1.0)
1✔
324
        aspect_tolerance = rs_config.max_aspect - 1.0
1✔
325
        aspect_score = max(0.0, 1.0 - (aspect_diff / aspect_tolerance))
1✔
326

327
        # Score proximity to diagrams
328
        proximity_score = self._calculate_proximity_to_diagrams(
1✔
329
            bbox, diagram_candidates
330
        )
331

332
        return _RotationSymbolScore(
1✔
333
            size_score=size_score,
334
            aspect_score=aspect_score,
335
            proximity_to_diagram=proximity_score,
336
            size_weight=rs_config.size_weight,
337
            aspect_weight=rs_config.aspect_weight,
338
            proximity_weight=rs_config.proximity_weight,
339
        )
340

341
    def _calculate_proximity_to_diagrams(
1✔
342
        self, bbox: BBox, diagram_candidates: list[Candidate]
343
    ) -> float:
344
        """Calculate proximity score based on distance to nearest diagram.
345

346
        Rotation symbols are typically positioned near diagrams.
347

348
        Args:
349
            bbox: Bounding box of the potential rotation symbol
350
            diagram_candidates: List of diagram candidates
351

352
        Returns:
353
            Score from 0.0 (far from diagrams) to 1.0 (very close to diagram)
354
        """
355
        rs_config = self.config.rotation_symbol
1✔
356
        close_distance = rs_config.proximity_close_distance
1✔
357
        far_distance = rs_config.proximity_far_distance
1✔
358

359
        if not diagram_candidates:
1✔
360
            # No diagrams on page, give neutral score
361
            return 0.5
×
362

363
        # Find minimum edge-to-edge distance to any diagram
364
        min_distance = min(
1✔
365
            bbox.min_distance(diagram_cand.bbox) for diagram_cand in diagram_candidates
366
        )
367

368
        # Score based on distance (closer = better)
369
        # min_distance returns 0.0 for overlapping bboxes
370
        if min_distance < close_distance:
1✔
371
            return 1.0
1✔
372
        elif min_distance > far_distance:
1✔
373
            return 0.0
×
374
        else:
375
            return 1.0 - (
1✔
376
                (min_distance - close_distance) / (far_distance - close_distance)
377
            )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc