• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 20361865516

19 Dec 2025 06:25AM UTC coverage: 89.13% (-0.002%) from 89.132%
20361865516

push

github

bramp
Fix lint errors: line length, unused imports, and YAML issues

- Add ruff isort configuration with known-first-party for build_a_long
- Add per-file E501 ignore for legocom_test.py (JSON test data)
- Create .yamllint config to relax strict YAML rules
- Fix E501 line length errors by wrapping long comments and strings
- Fix F841 unused variable errors
- Fix PLC0415 import-at-non-top-level errors
- Fix SIM108 ternary simplification errors

12 of 14 new or added lines in 8 files covered. (85.71%)

78 existing lines in 6 files now uncovered.

12915 of 14490 relevant lines covered (89.13%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.75
/src/build_a_long/pdf_extract/classifier/pages/progress_bar_classifier.py
1
"""
2
Progress bar classifier.
3

4
Purpose
5
-------
6
Identify progress bars at the bottom of instruction pages. Progress bars are
7
typically horizontal elements spanning most of the page width, located near
8
the page number at the bottom of the page.
9

10
Heuristic
11
---------
12
- Look for Drawing/Image elements near the bottom of the page
13
- Must span a significant portion of the page width (e.g., >50%)
14
- Should be relatively thin vertically (height << width)
15
- Located near the page number or bottom margin
16
- May consist of multiple adjacent elements forming a single visual bar
17

18
Debugging
19
---------
20
Enable with `LOG_LEVEL=DEBUG` for structured logs.
21
"""
22

23
from __future__ import annotations
1✔
24

25
import logging
1✔
26

27
from build_a_long.pdf_extract.classifier.candidate import Candidate
1✔
28
from build_a_long.pdf_extract.classifier.classification_result import (
1✔
29
    ClassificationResult,
30
)
31
from build_a_long.pdf_extract.classifier.config import ProgressBarConfig
1✔
32
from build_a_long.pdf_extract.classifier.rule_based_classifier import (
1✔
33
    RuleBasedClassifier,
34
    RuleScore,
35
)
36
from build_a_long.pdf_extract.classifier.rules import (
1✔
37
    BottomPositionScore,
38
    ContinuousAspectRatioScore,
39
    IsInstanceFilter,
40
    PageNumberProximityScore,
41
    Rule,
42
    WidthCoverageScore,
43
)
44
from build_a_long.pdf_extract.extractor.bbox import BBox
1✔
45
from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
46
    ProgressBar,
47
    ProgressBarIndicator,
48
)
49
from build_a_long.pdf_extract.extractor.page_blocks import (
1✔
50
    Block,
51
    Blocks,
52
    Drawing,
53
    Image,
54
)
55

56
log = logging.getLogger(__name__)
1✔
57

58

59
class ProgressBarClassifier(RuleBasedClassifier):
1✔
60
    """Classifier for progress bars on instruction pages."""
61

62
    output = "progress_bar"
1✔
63
    requires = frozenset({"page_number", "progress_bar_indicator"})
1✔
64

65
    @property
1✔
66
    def min_score(self) -> float:
1✔
67
        return self.config.progress_bar.min_score
1✔
68

69
    @property
1✔
70
    def rules(self) -> list[Rule]:
1✔
71
        config: ProgressBarConfig = self.config.progress_bar
1✔
72
        return [
1✔
73
            IsInstanceFilter((Drawing, Image)),
74
            BottomPositionScore(
75
                max_bottom_margin_ratio=config.max_bottom_margin_ratio,
76
                weight=1.0,
77
                name="position_score",
78
            ),
79
            # TODO Do we need this rule? Being in the bottom band may be sufficent
80
            PageNumberProximityScore(
81
                proximity_ratio=config.max_page_number_proximity_ratio,
82
                weight=0.2,
83
                name="page_number_proximity_score",
84
            ),
85
            WidthCoverageScore(
86
                min_width_ratio=config.min_width_ratio,
87
                max_score_width_ratio=config.max_score_width_ratio,
88
                weight=1.0,
89
                name="width_score",
90
            ),
91
            ContinuousAspectRatioScore(
92
                min_ratio=config.min_aspect_ratio,
93
                ideal_ratio=config.ideal_aspect_ratio,
94
                weight=1.0,
95
                name="aspect_ratio_score",
96
            ),
97
        ]
98

99
    def _get_additional_source_blocks(
1✔
100
        self, block: Block, result: ClassificationResult
101
    ) -> list[Blocks]:
102
        """Find overlapping blocks to include in source_blocks."""
103
        if not isinstance(block, Drawing | Image):
1✔
104
            return []
×
105

106
        page_bbox = result.page_data.bbox
1✔
107
        assert page_bbox is not None
1✔
108
        clipped_bbox = block.bbox.clip_to(page_bbox)
1✔
109
        config: ProgressBarConfig = self.config.progress_bar
1✔
110

111
        return self._find_overlapping_blocks(block, clipped_bbox, result, config)
1✔
112

113
    def build(self, candidate: Candidate, result: ClassificationResult) -> ProgressBar:
1✔
114
        """Construct a ProgressBar element from a single candidate."""
115
        # Get score details
116
        detail_score = candidate.score_details
1✔
117
        assert isinstance(detail_score, RuleScore)
1✔
118

119
        # Get the config for ProgressBarClassifier
120
        config: ProgressBarConfig = self.config.progress_bar
1✔
121

122
        # Get the primary block's bbox (first source_block) for progress calculation.
123
        # The candidate.bbox may be a union of multiple blocks (including overlapping
124
        # elements), but we need the original bar's dimensions for progress.
125
        assert len(candidate.source_blocks) >= 1
1✔
126
        primary_block = candidate.source_blocks[0]
1✔
127
        primary_bbox = primary_block.bbox
1✔
128

129
        # Calculate properties from primary block bbox
130
        page_bbox = result.page_data.bbox
1✔
131
        assert page_bbox is not None
1✔
132
        clipped_bbox = primary_bbox.clip_to(page_bbox)
1✔
133
        original_width = primary_bbox.width
1✔
134
        bar_start_x = primary_bbox.x0
1✔
135

136
        # Find and build the indicator at build time
137
        indicator, progress = self._find_and_build_indicator(
1✔
138
            clipped_bbox,
139
            bar_start_x,
140
            original_width,
141
            result,
142
            config,
143
        )
144

145
        # Compute final bbox as union of source blocks + indicator (if present)
146
        # This ensures the bbox matches source_blocks + children as required
147
        bbox = BBox.union_all([b.bbox for b in candidate.source_blocks])
1✔
148
        if indicator:
1✔
149
            bbox = bbox.union(indicator.bbox)
1✔
150

151
        # Construct the ProgressBar element
152
        return ProgressBar(
1✔
153
            bbox=bbox,
154
            progress=progress,
155
            full_width=original_width,
156
            indicator=indicator,
157
        )
158

159
    def _find_and_build_indicator(
1✔
160
        self,
161
        bar_bbox: BBox,
162
        bar_start_x: float,
163
        bar_full_width: float,
164
        result: ClassificationResult,
165
        config: ProgressBarConfig,
166
    ) -> tuple[ProgressBarIndicator | None, float | None]:
167
        """Find and build a progress bar indicator for this progress bar.
168

169
        Looks for progress_bar_indicator candidates that are vertically aligned
170
        with the progress bar and selects the one furthest to the right (showing
171
        most progress).
172

173
        Args:
174
            bar_bbox: The clipped bounding box of the progress bar
175
            bar_start_x: The starting X position of the progress bar
176
            bar_full_width: The original unclipped width of the progress bar
177
            result: Classification result containing indicator candidates
178
            config: ProgressBarConfig instance
179
        Returns:
180
            A tuple of (indicator, progress) where:
181
            - indicator: The built ProgressBarIndicator, or None
182
            - progress: The calculated progress (0.0-1.0), or None if not found
183
        """
184
        # Get available indicator candidates
185
        indicator_candidates = result.get_scored_candidates(
1✔
186
            "progress_bar_indicator",
187
            valid_only=False,
188
            exclude_failed=True,
189
        )
190

191
        bar_height = bar_bbox.height
1✔
192
        bar_center_y = (bar_bbox.y0 + bar_bbox.y1) / 2
1✔
193

194
        best_candidate: Candidate | None = None
1✔
195
        best_score: float = -1.0
1✔
196

197
        for cand in indicator_candidates:
1✔
198
            # Skip if already built (consumed by another progress bar)
199
            if cand.constructed is not None:
1✔
UNCOV
200
                continue
×
201

202
            cand_bbox = cand.bbox
1✔
203

204
            # Indicator must be at least as tall as the bar to avoid false positives
205
            if cand_bbox.height < bar_height:
1✔
206
                continue
1✔
207

208
            # Check if the candidate's center Y is aligned with the bar's center Y
209
            cand_center_y = (cand_bbox.y0 + cand_bbox.y1) / 2
1✔
210
            if abs(cand_center_y - bar_center_y) > bar_height:
1✔
211
                continue
1✔
212

213
            # Must be horizontally within or near the progress bar
214
            indicator_x = (cand_bbox.x0 + cand_bbox.x1) / 2
1✔
215
            bar_end_x = bar_start_x + bar_full_width
1✔
216
            if (
1✔
217
                indicator_x < bar_start_x - config.indicator_search_margin
218
                or indicator_x > bar_end_x + config.indicator_search_margin
219
            ):
UNCOV
220
                continue
×
221

222
            # Keep the indicator with the highest score (most circular shape)
223
            if cand.score > best_score:
1✔
224
                best_candidate = cand
1✔
225
                best_score = cand.score
1✔
226

227
        if best_candidate is None:
1✔
228
            return None, None
1✔
229

230
        # Calculate progress based on indicator position
231
        best_indicator_x = (best_candidate.bbox.x0 + best_candidate.bbox.x1) / 2
1✔
232
        progress = (best_indicator_x - bar_start_x) / bar_full_width
1✔
233
        progress = max(0.0, min(1.0, progress))
1✔
234

235
        log.debug(
1✔
236
            "Found progress indicator candidate at x=%.1f, bar_start=%.1f, "
237
            "full_width=%.1f, progress=%.1%%",
238
            best_indicator_x,
239
            bar_start_x,
240
            bar_full_width,
241
            progress * 100,
242
        )
243

244
        # Build the indicator
245
        try:
1✔
246
            indicator_elem = result.build(best_candidate)
1✔
247
            assert isinstance(indicator_elem, ProgressBarIndicator)
1✔
248
            return indicator_elem, progress
1✔
UNCOV
249
        except Exception as e:
×
UNCOV
250
            log.debug(
×
251
                "[progress_bar] Failed to build indicator at %s: %s",
252
                best_candidate.bbox,
253
                e,
254
            )
UNCOV
255
            return None, None
×
256

257
    def _find_overlapping_blocks(
1✔
258
        self,
259
        bar_block: Drawing | Image,
260
        bar_bbox: BBox,
261
        result: ClassificationResult,
262
        config: ProgressBarConfig,
263
    ) -> list[Blocks]:
264
        """Find all Drawing/Image blocks that are contained within the progress bar.
265

266
        This captures all visual elements that are part of the progress bar
267
        visualization, including:
268
        - The colored progress section on the left
269
        - Inner/outer borders
270
        - Progress indicator elements
271
        - Any decorative elements within the bar area
272

273
        Only includes blocks that are fully or mostly contained within the
274
        progress bar's vertical extent to avoid capturing unrelated elements
275
        like page backgrounds or vertical dividers.
276

277
        Args:
278
            bar_block: The main progress bar drawing/image block
279
            bar_bbox: The clipped bounding box of the progress bar
280
            result: The classification result containing all page blocks
281
            config: ProgressBarConfig instance
282
        Returns:
283
            List of blocks that are contained within the progress bar area
284
        """
285
        overlapping: list[Blocks] = []
1✔
286

287
        # Expand the bbox slightly vertically to catch elements that extend
288
        # a bit beyond (like the indicator)
289
        expanded_bbox = bar_bbox.expand(config.overlap_expansion_margin)
1✔
290

291
        for block in result.page_data.blocks:
1✔
292
            # Only consider Drawing and Image elements
293
            if not isinstance(block, Drawing | Image):
1✔
294
                continue
1✔
295

296
            # Skip the bar itself
297
            if block is bar_block:
1✔
298
                continue
1✔
299

300
            block_bbox = block.bbox
1✔
301

302
            # Block must be mostly within the progress bar's vertical extent
303
            # This filters out full-page backgrounds and vertical dividers
304
            if block_bbox.y0 < expanded_bbox.y0 or block_bbox.y1 > expanded_bbox.y1:
1✔
305
                continue
1✔
306

307
            # Block must overlap horizontally with the progress bar
308
            if block_bbox.x1 < bar_bbox.x0 or block_bbox.x0 > bar_bbox.x1:
1✔
309
                continue
1✔
310

311
            overlapping.append(block)
1✔
312

313
        return overlapping
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc