• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 20398712053

20 Dec 2025 07:00PM UTC coverage: 89.361% (+0.2%) from 89.185%
20398712053

push

github

bramp
Improve circular dependency error to show dependency chain

- Add _find_dependency_cycle() to trace and format the actual circular dependency path
- Update error message to include both affected classifiers and the dependency chain
- Add test case to verify circular dependency detection and error message format

48 of 56 new or added lines in 2 files covered. (85.71%)

145 existing lines in 28 files now uncovered.

13700 of 15331 relevant lines covered (89.36%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.01
/src/build_a_long/pdf_extract/classifier/steps/substep_classifier.py
1
"""
2
SubStep classifier.
3

4
Purpose
5
-------
6
Identify SubStep elements by finding substep numbers paired with diagrams.
7

8
SubSteps are mini-steps that appear either:
9
1. Inside SubAssembly callout boxes (numbered 1, 2, 3 within the box)
10
2. As "naked" substeps on the page (small numbers 1, 2, 3, 4 alongside a main step)
11

12
This classifier pairs substep_number with diagram candidates based on position:
13
- Step number should be to the left or above the diagram
14
- Step number and diagram should be relatively close together
15

16
Architecture
17
------------
18
This classifier independently finds SubStep candidates during scoring:
19
- Gets substep_number candidates (from SubStepNumberClassifier - smaller font)
20
- Gets diagram candidates
21
- Creates SubStep candidates by pairing substep numbers with nearby diagrams
22

23
These candidates are then used by:
24
- SubAssemblyClassifier: claims SubSteps that are inside callout boxes
25
- StepClassifier: uses remaining SubSteps as "naked" substeps
26

27
The key insight is that SubStep step numbers have a SMALLER FONT SIZE than
28
main step numbers. SubStepNumberClassifier detects these smaller font step numbers
29
and this classifier pairs them with diagrams.
30
"""
31

32
import logging
1✔
33
from collections.abc import Sequence
1✔
34
from typing import ClassVar
1✔
35

36
from build_a_long.pdf_extract.classifier.candidate import Candidate
1✔
37
from build_a_long.pdf_extract.classifier.classification_result import (
1✔
38
    ClassificationResult,
39
)
40
from build_a_long.pdf_extract.classifier.label_classifier import LabelClassifier
1✔
41
from build_a_long.pdf_extract.classifier.rule_based_classifier import StepNumberScore
1✔
42
from build_a_long.pdf_extract.classifier.score import Score
1✔
43
from build_a_long.pdf_extract.classifier.steps.pairing import (
1✔
44
    DEFAULT_MAX_PAIRING_DISTANCE,
45
    PairingConfig,
46
    find_optimal_pairings,
47
)
48
from build_a_long.pdf_extract.extractor.bbox import BBox
1✔
49
from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
50
    Diagram,
51
    StepNumber,
52
    SubStep,
53
)
54

55
log = logging.getLogger(__name__)
1✔
56

57

58
class _SubStepScore(Score):
1✔
59
    """Score details for SubStep candidates.
60

61
    Scoring is based on:
62
    - Position: step number should be to the left/top of the diagram
63
    - Distance: step number should be close to the diagram
64
    """
65

66
    step_value: int
67
    """The parsed step number value (e.g., 1, 2, 3)."""
1✔
68

69
    substep_number_candidate: Candidate
70
    """The substep_number candidate for this substep."""
1✔
71

72
    diagram_candidate: Candidate
73
    """The diagram candidate paired with this step number."""
1✔
74

75
    position_score: float
76
    """Score based on step number being to left/top of diagram (0.0-1.0)."""
1✔
77

78
    distance_score: float
79
    """Score based on distance between step number and diagram (0.0-1.0)."""
1✔
80

81
    def score(self) -> float:
1✔
82
        """Return the weighted score value."""
83
        return self.position_score * 0.5 + self.distance_score * 0.5
1✔
84

85

86
class SubStepClassifier(LabelClassifier):
1✔
87
    """Classifier for SubStep elements.
88

89
    This classifier finds step numbers and pairs them with nearby diagrams based
90
    on position. The pairing creates SubStep candidates which are then:
91
    - Claimed by SubAssemblyClassifier if inside a callout box
92
    - Used by StepClassifier as naked substeps if not inside a box
93

94
    Scoring phase:
95
    - Gets all step_number candidates
96
    - Gets all diagram candidates
97
    - Creates SubStep candidates by pairing step numbers with diagrams
98
      where the step number is to the left/above the diagram
99

100
    Build phase:
101
    - Builds the step_number from its candidate (substep_number -> StepNumber)
102
    - Builds the diagram from its candidate
103
    - Creates the SubStep with both elements
104
    """
105

106
    output: ClassVar[str] = "substep"
1✔
107
    requires: ClassVar[frozenset[str]] = frozenset({"substep_number", "diagram"})
1✔
108

109
    def _score(self, result: ClassificationResult) -> None:
1✔
110
        """Score substep number + diagram pairings to create SubStep candidates."""
111
        # Get substep number candidates (small font step numbers)
112
        # During scoring, candidates are not yet constructed
113
        substep_number_candidates = result.get_scored_candidates("substep_number")
1✔
114

115
        if not substep_number_candidates:
1✔
116
            log.debug("[substep] No substep_number candidates found")
1✔
117
            return
1✔
118

119
        # Get diagram candidates (not yet built)
120
        diagram_candidates = result.get_scored_candidates("diagram")
1✔
121

122
        if not diagram_candidates:
1✔
123
            log.debug("[substep] No diagram candidates found")
1✔
124
            return
1✔
125

126
        log.debug(
1✔
127
            "[substep] Found %d substep numbers, %d diagrams",
128
            len(substep_number_candidates),
129
            len(diagram_candidates),
130
        )
131

132
        # Use Hungarian algorithm to find optimal pairing
133
        self._create_candidates_with_hungarian(
1✔
134
            substep_number_candidates,
135
            diagram_candidates,
136
            result,
137
        )
138

139
    def _get_step_value(self, candidate: Candidate) -> int:
1✔
140
        """Get the step number value from a candidate's score."""
141
        score_details = candidate.score_details
1✔
142
        if isinstance(score_details, StepNumberScore):
1✔
143
            return score_details.step_value
1✔
UNCOV
144
        return 0
×
145

146
    def _create_candidates_with_hungarian(
1✔
147
        self,
148
        step_candidates: Sequence[Candidate],
149
        diagram_candidates: Sequence[Candidate],
150
        result: ClassificationResult,
151
    ) -> None:
152
        """Use Hungarian algorithm to optimally pair step numbers with diagrams.
153

154
        Uses the shared pairing module to find optimal step number to diagram
155
        pairings based on position and distance.
156

157
        Args:
158
            step_candidates: Step number candidates
159
            diagram_candidates: Diagram candidates
160
            result: Classification result to add candidates to
161
        """
162
        if not step_candidates or not diagram_candidates:
1✔
UNCOV
163
            return
×
164

165
        # Get dividers for obstruction checking
166
        divider_candidates = result.get_built_candidates("divider")
1✔
167
        divider_bboxes = [
1✔
168
            c.constructed.bbox for c in divider_candidates if c.constructed is not None
169
        ]
170

171
        # Extract bboxes for pairing
172
        step_bboxes = [c.bbox for c in step_candidates]
1✔
173
        diagram_bboxes = [c.bbox for c in diagram_candidates]
1✔
174

175
        # Configure pairing: substeps use smaller max distance
176
        config = PairingConfig(
1✔
177
            max_distance=DEFAULT_MAX_PAIRING_DISTANCE,
178
            position_weight=0.5,
179
            distance_weight=0.5,
180
            check_dividers=True,
181
            top_left_tolerance=100.0,
182
        )
183

184
        # Find optimal pairings using shared logic
185
        pairings = find_optimal_pairings(
1✔
186
            step_bboxes, diagram_bboxes, config, divider_bboxes
187
        )
188

189
        # Create candidates from pairings
190
        for pairing in pairings:
1✔
191
            substep_cand = step_candidates[pairing.step_index]
1✔
192
            diag_cand = diagram_candidates[pairing.diagram_index]
1✔
193

194
            step_value = self._get_step_value(substep_cand)
1✔
195
            score_details = _SubStepScore(
1✔
196
                step_value=step_value,
197
                substep_number_candidate=substep_cand,
198
                diagram_candidate=diag_cand,
199
                position_score=pairing.position_score,
200
                distance_score=pairing.distance_score,
201
            )
202

203
            # Combined bbox
204
            combined_bbox = substep_cand.bbox.union(diag_cand.bbox)
1✔
205

206
            candidate = Candidate(
1✔
207
                bbox=combined_bbox,
208
                label="substep",
209
                score=score_details.score(),
210
                score_details=score_details,
211
                source_blocks=[],  # Blocks claimed via nested candidates
212
            )
213

214
            result.add_candidate(candidate)
1✔
215
            log.debug(
1✔
216
                "[substep] Created candidate: step=%s, score=%.2f",
217
                score_details.step_value,
218
                score_details.score(),
219
            )
220

221
    def build(
1✔
222
        self,
223
        candidate: Candidate,
224
        result: ClassificationResult,
225
        constraint_bbox: BBox | None = None,
226
    ) -> SubStep:
227
        """Construct a SubStep from a candidate.
228

229
        Args:
230
            candidate: The candidate to construct, with score_details containing
231
                substep_number_candidate and diagram_candidate.
232
            result: The classification result context.
233
            constraint_bbox: Optional bounding box to constrain diagram clustering.
234
                When building inside a SubAssembly, this should be the
235
                SubAssembly's bbox to prevent diagrams from clustering beyond
236
                the SubAssembly boundaries.
237

238
        Returns:
239
            The constructed SubStep element.
240
        """
241
        score = candidate.score_details
1✔
242
        assert isinstance(score, _SubStepScore)
1✔
243

244
        # Build the step number from the substep_number candidate
245
        # (SubStepNumberClassifier.build() returns a StepNumber element)
246
        step_num_elem = result.build(score.substep_number_candidate)
1✔
247
        assert isinstance(step_num_elem, StepNumber)
1✔
248

249
        # Build the diagram from its candidate, passing constraint_bbox if provided
250
        # to prevent the diagram from clustering beyond the SubAssembly boundaries
251
        diagram_elem = result.build(
1✔
252
            score.diagram_candidate, constraint_bbox=constraint_bbox
253
        )
254
        assert isinstance(diagram_elem, Diagram)
1✔
255

256
        # Compute bbox including both step number and diagram
257
        substep_bbox = step_num_elem.bbox.union(diagram_elem.bbox)
1✔
258

259
        log.debug(
1✔
260
            "[substep] Built SubStep %d",
261
            step_num_elem.value,
262
        )
263

264
        return SubStep(
1✔
265
            bbox=substep_bbox,
266
            step_number=step_num_elem,
267
            diagram=diagram_elem,
268
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc