• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 20086551557

10 Dec 2025 03:43AM UTC coverage: 90.303% (+0.3%) from 90.041%
20086551557

push

github

bramp
Refactor arrow shaft detection: unified method, stroked line support, multi-head grouping

- Merge _find_simple_shaft, _find_stroked_line_shaft, and _find_cornered_shaft
  into a single unified _find_shaft method that handles all shaft types by
  extracting points and finding closest/furthest from the arrowhead tip
- Add support for stroked line shafts (stroke_color instead of fill_color)
- Add tail_grouping_tolerance config for grouping arrowheads with nearby tails
- Group arrowheads that share the same shaft_block (L-shaped arrows with
  multiple heads at different ends)
- Use union-find algorithm to group arrowheads by shared shaft or tail proximity
- Extract colors_match to shared utils module
- Add comprehensive tests for stroked line shafts, tail correctness, and
  multi-head arrow grouping
- Update golden files for pages 011, 013, 015, 017 with corrected arrow detection

204 of 206 new or added lines in 5 files covered. (99.03%)

252 existing lines in 14 files now uncovered.

11855 of 13128 relevant lines covered (90.3%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.53
/src/build_a_long/pdf_extract/validation/validation_test.py
1
"""Tests for validation module."""
2

3
from typing import Any
1✔
4

5
from build_a_long.pdf_extract.classifier import (
1✔
6
    BatchClassificationResult,
7
    Candidate,
8
    ClassificationResult,
9
    TextHistogram,
10
)
11
from build_a_long.pdf_extract.classifier.test_utils import TestScore
1✔
12
from build_a_long.pdf_extract.extractor import PageData
1✔
13
from build_a_long.pdf_extract.extractor.bbox import BBox
1✔
14
from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
15
    Manual,
16
    Page,
17
    PageNumber,
18
    Part,
19
    PartCount,
20
    PartsList,
21
    Step,
22
    StepNumber,
23
)
24

25
from .printer import print_validation
1✔
26
from .rules import (
1✔
27
    format_ranges,
28
    validate_catalog_coverage,
29
    validate_elements_within_page,
30
    validate_first_page_number,
31
    validate_missing_page_numbers,
32
    validate_no_divider_intersection,
33
    validate_page_number_sequence,
34
    validate_parts_list_has_parts,
35
    validate_parts_lists_no_overlap,
36
    validate_progress_bar_sequence,
37
    validate_step_sequence,
38
    validate_steps_have_parts,
39
    validate_steps_no_significant_overlap,
40
)
41
from .runner import validate_results
1✔
42
from .types import ValidationIssue, ValidationResult, ValidationSeverity
1✔
43

44

45
class TestValidationResult:
1✔
46
    """Tests for ValidationResult class."""
47

48
    def test_empty_result(self) -> None:
1✔
49
        """Test empty validation result."""
50
        result = ValidationResult()
1✔
51
        assert result.error_count == 0
1✔
52
        assert result.warning_count == 0
1✔
53
        assert result.info_count == 0
1✔
54
        assert not result.has_issues()
1✔
55

56
    def test_add_issue(self) -> None:
1✔
57
        """Test adding issues to result."""
58
        result = ValidationResult()
1✔
59
        result.add(
1✔
60
            ValidationIssue(
61
                severity=ValidationSeverity.ERROR,
62
                rule="test",
63
                message="test error",
64
            )
65
        )
66
        result.add(
1✔
67
            ValidationIssue(
68
                severity=ValidationSeverity.WARNING,
69
                rule="test",
70
                message="test warning",
71
            )
72
        )
73
        result.add(
1✔
74
            ValidationIssue(
75
                severity=ValidationSeverity.INFO,
76
                rule="test",
77
                message="test info",
78
            )
79
        )
80

81
        assert result.error_count == 1
1✔
82
        assert result.warning_count == 1
1✔
83
        assert result.info_count == 1
1✔
84
        assert result.has_issues()
1✔
85

86
    def test_frozen_issue(self) -> None:
1✔
87
        """Test that ValidationIssue is immutable."""
88
        import pydantic
1✔
89

90
        issue = ValidationIssue(
1✔
91
            severity=ValidationSeverity.ERROR,
92
            rule="test",
93
            message="test",
94
        )
95
        # Should not be able to modify (Pydantic frozen model raises ValidationError)
96
        try:
1✔
97
            issue.message = "new message"  # type: ignore[misc]
1✔
98
            raise AssertionError("Expected frozen model to raise")
×
99
        except pydantic.ValidationError:
1✔
100
            pass  # Expected
1✔
101

102

103
class TestFormatRanges:
1✔
104
    """Tests for format_ranges helper function."""
105

106
    def test_empty_list(self) -> None:
1✔
107
        """Test empty list."""
108
        assert format_ranges([]) == ""
1✔
109

110
    def test_single_number(self) -> None:
1✔
111
        """Test single number."""
112
        assert format_ranges([5]) == "5"
1✔
113

114
    def test_consecutive_range(self) -> None:
1✔
115
        """Test consecutive numbers form a range."""
116
        assert format_ranges([1, 2, 3, 4, 5]) == "1-5"
1✔
117

118
    def test_separate_numbers(self) -> None:
1✔
119
        """Test non-consecutive numbers."""
120
        assert format_ranges([1, 3, 5]) == "1, 3, 5"
1✔
121

122
    def test_mixed_ranges(self) -> None:
1✔
123
        """Test mixed ranges and single numbers."""
124
        assert format_ranges([1, 2, 3, 5, 7, 8, 9]) == "1-3, 5, 7-9"
1✔
125

126
    def test_long_list_truncation(self) -> None:
1✔
127
        """Test that very long output is truncated."""
128
        # Create a list that would produce a very long string
129
        numbers = list(range(1, 200, 2))  # Odd numbers 1-199
1✔
130
        result = format_ranges(numbers)
1✔
131
        assert len(result) <= 100
1✔
132
        assert result.endswith("...")
1✔
133

134

135
class TestValidateMissingPageNumbers:
1✔
136
    """Tests for validate_missing_page_numbers rule."""
137

138
    def test_no_missing_pages(self) -> None:
1✔
139
        """Test when all pages have page numbers."""
140
        validation = ValidationResult()
1✔
141
        validate_missing_page_numbers(validation, [], 10)
1✔
142
        assert not validation.has_issues()
1✔
143

144
    def test_high_coverage(self) -> None:
1✔
145
        """Test >90% coverage produces INFO."""
146
        validation = ValidationResult()
1✔
147
        validate_missing_page_numbers(validation, [1], 20)  # 95% coverage
1✔
148
        assert validation.info_count == 1
1✔
149
        assert validation.issues[0].severity == ValidationSeverity.INFO
1✔
150

151
    def test_medium_coverage(self) -> None:
1✔
152
        """Test 50-90% coverage produces WARNING."""
153
        validation = ValidationResult()
1✔
154
        validate_missing_page_numbers(validation, [1, 2, 3], 10)  # 70% coverage
1✔
155
        assert validation.warning_count == 1
1✔
156

157
    def test_low_coverage(self) -> None:
1✔
158
        """Test <50% coverage produces ERROR."""
159
        validation = ValidationResult()
1✔
160
        validate_missing_page_numbers(validation, list(range(1, 8)), 10)  # 30% coverage
1✔
161
        assert validation.error_count == 1
1✔
162

163

164
class TestValidateStepSequence:
1✔
165
    """Tests for validate_step_sequence rule."""
166

167
    def test_empty_steps(self) -> None:
1✔
168
        """Test empty step list."""
169
        validation = ValidationResult()
1✔
170
        validate_step_sequence(validation, [])
1✔
171
        assert not validation.has_issues()
1✔
172

173
    def test_valid_sequence(self) -> None:
1✔
174
        """Test valid step sequence starting at 1."""
175
        validation = ValidationResult()
1✔
176
        validate_step_sequence(validation, [(1, 1), (2, 2), (3, 3)])
1✔
177
        assert not validation.has_issues()
1✔
178

179
    def test_duplicate_steps(self) -> None:
1✔
180
        """Test duplicate step numbers."""
181
        validation = ValidationResult()
1✔
182
        validate_step_sequence(validation, [(1, 1), (2, 1), (3, 2)])  # Step 1 twice
1✔
183
        # Should have warning about duplicates
184
        assert any(i.rule == "duplicate_steps" for i in validation.issues)
1✔
185

186
    def test_step_gaps(self) -> None:
1✔
187
        """Test gaps in step sequence."""
188
        validation = ValidationResult()
1✔
189
        validate_step_sequence(validation, [(1, 1), (2, 3)])  # Missing step 2
1✔
190
        assert any(i.rule == "step_gaps" for i in validation.issues)
1✔
191

192
    def test_step_not_starting_at_one(self) -> None:
1✔
193
        """Test sequence not starting at 1."""
194
        validation = ValidationResult()
1✔
195
        validate_step_sequence(validation, [(1, 5), (2, 6), (3, 7)])  # Starts at 5
1✔
196
        assert any(i.rule == "step_start" for i in validation.issues)
1✔
197

198

199
class TestValidateFirstPageNumber:
1✔
200
    """Tests for validate_first_page_number rule."""
201

202
    def test_no_page_numbers(self) -> None:
1✔
203
        """Test when no page numbers detected."""
204
        validation = ValidationResult()
1✔
205
        validate_first_page_number(validation, [])
1✔
206
        assert validation.error_count == 1
1✔
207
        assert validation.issues[0].rule == "no_page_numbers"
1✔
208

209
    def test_reasonable_first_page(self) -> None:
1✔
210
        """Test reasonable first page number."""
211
        validation = ValidationResult()
1✔
212
        validate_first_page_number(validation, [1, 2, 3])
1✔
213
        assert not validation.has_issues()
1✔
214

215
    def test_high_first_page(self) -> None:
1✔
216
        """Test high first page number."""
217
        validation = ValidationResult()
1✔
218
        validate_first_page_number(validation, [15, 16, 17])
1✔
219
        assert any(i.rule == "high_first_page" for i in validation.issues)
1✔
220

221

222
class TestValidatePageNumberSequence:
1✔
223
    """Tests for validate_page_number_sequence rule."""
224

225
    def test_single_page(self) -> None:
1✔
226
        """Test single page number."""
227
        validation = ValidationResult()
1✔
228
        validate_page_number_sequence(validation, [1])
1✔
229
        assert not validation.has_issues()
1✔
230

231
    def test_valid_sequence(self) -> None:
1✔
232
        """Test valid consecutive sequence."""
233
        validation = ValidationResult()
1✔
234
        validate_page_number_sequence(validation, [1, 2, 3, 4, 5])
1✔
235
        assert not validation.has_issues()
1✔
236

237
    def test_valid_sequence_starting_later(self) -> None:
1✔
238
        """Test valid consecutive sequence that doesn't start at 1.
239

240
        First few pages missing is OK (e.g., cover pages without page numbers).
241
        """
242
        validation = ValidationResult()
1✔
243
        validate_page_number_sequence(validation, [5, 6, 7, 8, 9])
1✔
244
        assert not validation.has_issues()
1✔
245

246
    def test_valid_sequence_ending_early(self) -> None:
1✔
247
        """Test valid consecutive sequence that might end before the last page.
248

249
        Last few pages missing is OK (e.g., back cover without page numbers).
250
        This tests the sequence is consecutive - we don't know total pages here.
251
        """
252
        validation = ValidationResult()
1✔
253
        # Sequence 10-14 is consecutive, even if there could be more pages
254
        validate_page_number_sequence(validation, [10, 11, 12, 13, 14])
1✔
255
        assert not validation.has_issues()
1✔
256

257
    def test_valid_sequence_starting_later_and_ending_early(self) -> None:
1✔
258
        """Test consecutive sequence with both start and end pages missing.
259

260
        Both first N and last M pages can be missing, as long as there are no
261
        gaps in the middle.
262
        """
263
        validation = ValidationResult()
1✔
264
        validate_page_number_sequence(validation, [5, 6, 7, 8, 9, 10])
1✔
265
        assert not validation.has_issues()
1✔
266

267
    def test_decreasing_sequence(self) -> None:
1✔
268
        """Test decreasing page numbers."""
269
        validation = ValidationResult()
1✔
270
        validate_page_number_sequence(validation, [1, 2, 5, 3, 4])  # Decreases at 3
1✔
271
        assert any(i.rule == "page_sequence" for i in validation.issues)
1✔
272

273
    def test_gap_in_middle(self) -> None:
1✔
274
        """Test gap in the middle of page numbers."""
275
        validation = ValidationResult()
1✔
276
        validate_page_number_sequence(validation, [1, 2, 5, 6])  # Gap: 2->5
1✔
277
        assert any(i.rule == "page_gaps" for i in validation.issues)
1✔
278
        # Should be a warning now
279
        gap_issue = next(i for i in validation.issues if i.rule == "page_gaps")
1✔
280
        assert gap_issue.severity == ValidationSeverity.WARNING
1✔
281

282
    def test_small_gap_not_allowed(self) -> None:
1✔
283
        """Test that even small gaps (>1) are flagged."""
284
        validation = ValidationResult()
1✔
285
        validate_page_number_sequence(validation, [1, 2, 4, 5])  # Gap: 2->4
1✔
286
        assert any(i.rule == "page_gaps" for i in validation.issues)
1✔
287

288

289
class TestValidateProgressBarSequence:
1✔
290
    """Tests for validate_progress_bar_sequence rule."""
291

292
    def test_empty_progress_bars(self) -> None:
1✔
293
        """Test empty progress bar list."""
294
        validation = ValidationResult()
1✔
295
        validate_progress_bar_sequence(validation, [])
1✔
296
        assert not validation.has_issues()
1✔
297

298
    def test_valid_sequence(self) -> None:
1✔
299
        """Test valid monotonically increasing sequence."""
300
        validation = ValidationResult()
1✔
301
        # (page, value) tuples
302
        validate_progress_bar_sequence(
1✔
303
            validation, [(1, 0.1), (2, 0.2), (3, 0.3), (4, 0.4)]
304
        )
305
        assert not validation.has_issues()
1✔
306

307
    def test_decreasing_sequence(self) -> None:
1✔
308
        """Test decreasing progress bar values."""
309
        validation = ValidationResult()
1✔
310
        validate_progress_bar_sequence(
1✔
311
            validation,
312
            [(1, 0.5), (2, 0.4), (3, 0.6)],  # Decreases at p.2
313
        )
314
        assert validation.warning_count == 1
1✔
315
        assert validation.issues[0].rule == "progress_bar_decrease"
1✔
316

317
    def test_consistent_increments(self) -> None:
1✔
318
        """Test consistent progress increments (steady rate)."""
319
        validation = ValidationResult()
1✔
320
        # Constant 0.1 increment
321
        validate_progress_bar_sequence(
1✔
322
            validation,
323
            [(1, 0.1), (2, 0.2), (3, 0.3), (4, 0.4), (5, 0.5), (6, 0.6)],
324
        )
325
        assert not validation.has_issues()
1✔
326

327
    def test_inconsistent_increments(self) -> None:
1✔
328
        """Test inconsistent progress increments (high variance)."""
329
        validation = ValidationResult()
1✔
330
        # Increments vary wildly: 0.01, 0.4, 0.01, 0.01, 0.01
331
        validate_progress_bar_sequence(
1✔
332
            validation,
333
            [(1, 0.1), (2, 0.11), (3, 0.51), (4, 0.52), (5, 0.53), (6, 0.54)],
334
        )
335
        assert any(i.rule == "progress_bar_inconsistent" for i in validation.issues)
1✔
336
        issue = next(
1✔
337
            i for i in validation.issues if i.rule == "progress_bar_inconsistent"
338
        )
339
        assert issue.severity == ValidationSeverity.INFO
1✔
340

341
    def test_not_enough_samples(self) -> None:
1✔
342
        """Test that consistency check is skipped for few samples."""
343
        validation = ValidationResult()
1✔
344
        # Highly inconsistent, but only 5 samples (needs >5)
345
        validate_progress_bar_sequence(
1✔
346
            validation,
347
            [(1, 0.1), (2, 0.11), (3, 0.51), (4, 0.52), (5, 0.53)],
348
        )
349
        # Should pass because consistency check requires >5 samples
350
        assert not validation.has_issues()
1✔
351

352

353
class TestValidateCatalogCoverage:
1✔
354
    """Tests for validate_catalog_coverage rule."""
355

356
    def _make_part_with_image(
1✔
357
        self,
358
        image_id: str | None = None,
359
        xref: int | None = None,
360
        digest: bytes | None = None,
361
    ) -> Part:
362
        """Create a Part with a diagram image ID, xref, and/or digest."""
363
        from build_a_long.pdf_extract.extractor.lego_page_elements import PartImage
1✔
364

365
        return Part(
1✔
366
            bbox=BBox(0, 0, 10, 10),
367
            count=PartCount(bbox=BBox(0, 0, 5, 5), count=1),
368
            diagram=PartImage(
369
                bbox=BBox(0, 0, 10, 10),
370
                image_id=image_id,
371
                xref=xref,
372
                digest=digest,
373
            ),
374
        )
375

376
    def _make_manual(
1✔
377
        self,
378
        instruction_parts_config: list[dict[str, Any]],
379
        catalog_parts_config: list[dict[str, Any]],
380
    ) -> Manual:
381
        """Create a Manual with specified parts.
382

383
        Args:
384
            instruction_parts_config: List of dicts with keys 'image_id', 'xref', 'digest'
385
            catalog_parts_config: List of dicts with keys 'image_id', 'xref', 'digest'
386
        """
387
        pages = []
1✔
388

389
        # Instruction page
390
        if instruction_parts_config:
1✔
391
            parts = [
1✔
392
                self._make_part_with_image(**cfg) for cfg in instruction_parts_config
393
            ]
394
            step = Step(
1✔
395
                bbox=BBox(0, 0, 100, 100),
396
                step_number=StepNumber(bbox=BBox(0, 0, 10, 10), value=1),
397
                parts_list=PartsList(bbox=BBox(0, 0, 50, 50), parts=parts),
398
            )
399
            pages.append(
1✔
400
                Page(
401
                    bbox=BBox(0, 0, 100, 100),
402
                    pdf_page_number=1,
403
                    page_number=PageNumber(bbox=BBox(90, 90, 100, 100), value=1),
404
                    categories={Page.PageType.INSTRUCTION},
405
                    steps=[step],
406
                )
407
            )
408

409
        # Catalog page
410
        if catalog_parts_config:
1✔
411
            parts = [self._make_part_with_image(**cfg) for cfg in catalog_parts_config]
1✔
412
            pages.append(
1✔
413
                Page(
414
                    bbox=BBox(0, 0, 100, 100),
415
                    pdf_page_number=2,
416
                    page_number=PageNumber(bbox=BBox(90, 90, 100, 100), value=2),
417
                    categories={Page.PageType.CATALOG},
418
                    catalog=parts,
419
                )
420
            )
421

422
        return Manual(pages=pages)
1✔
423

424
    def test_no_catalog_pages(self) -> None:
1✔
425
        """Test when no catalog pages are present."""
426
        manual = self._make_manual([{"xref": 1}], [])
1✔
427
        validation = ValidationResult()
1✔
428
        validate_catalog_coverage(validation, manual)
1✔
429
        assert not validation.has_issues()
1✔
430

431
    def test_no_instruction_parts(self) -> None:
1✔
432
        """Test when no instruction parts are found."""
433
        manual = self._make_manual([], [{"xref": 1}])
1✔
434
        validation = ValidationResult()
1✔
435
        validate_catalog_coverage(validation, manual)
1✔
436
        assert not validation.has_issues()
1✔
437

438
    def test_perfect_coverage_xref(self) -> None:
1✔
439
        """Test when all instruction parts are in catalog using xref."""
440
        manual = self._make_manual(
1✔
441
            [{"xref": 1}, {"xref": 2}],
442
            [{"xref": 1}, {"xref": 2}, {"xref": 3}],
443
        )
444
        validation = ValidationResult()
1✔
445
        validate_catalog_coverage(validation, manual)
1✔
446
        assert validation.info_count == 1
1✔
447
        assert "100.0%" in validation.issues[0].message
1✔
448

449
    def test_perfect_coverage_digest(self) -> None:
1✔
450
        """Test when all instruction parts are in catalog using digest."""
451
        manual = self._make_manual(
1✔
452
            [{"digest": b"a"}, {"digest": b"b"}],
453
            [{"digest": b"a"}, {"digest": b"b"}, {"digest": b"c"}],
454
        )
455
        validation = ValidationResult()
1✔
456
        validate_catalog_coverage(validation, manual)
1✔
457
        assert validation.info_count == 1
1✔
458
        assert "100.0%" in validation.issues[0].message
1✔
459

460
    def test_mixed_matching(self) -> None:
1✔
461
        """Test matching using both xref and digest."""
462
        manual = self._make_manual(
1✔
463
            [
464
                {"xref": 1},  # Matches by xref
465
                {"digest": b"b"},  # Matches by digest
466
                {"xref": 3, "digest": b"c"},  # Matches by xref (preferred)
467
            ],
468
            [
469
                {"xref": 1, "digest": b"x"},
470
                {"xref": 9, "digest": b"b"},
471
                {"xref": 3, "digest": b"z"},
472
            ],
473
        )
474
        validation = ValidationResult()
1✔
475
        validate_catalog_coverage(validation, manual)
1✔
476
        assert validation.info_count == 1
1✔
477
        assert "100.0%" in validation.issues[0].message
1✔
478

479
    def test_partial_coverage_experimental(self) -> None:
1✔
480
        """Test partial coverage with experimental flag (INFO)."""
481
        # 1 match (xref), 1 missing
482
        manual = self._make_manual(
1✔
483
            [{"xref": 1}, {"xref": 2}],
484
            [{"xref": 1}],
485
        )
486
        validation = ValidationResult()
1✔
487
        validate_catalog_coverage(validation, manual, experimental=True)
1✔
488

489
        # 1 INFO for coverage stat, 1 INFO for missing parts (experimental)
490
        assert validation.info_count == 2
1✔
491
        assert validation.warning_count == 0
1✔
492
        assert any(i.rule == "missing_from_catalog" for i in validation.issues)
1✔
493
        missing_issue = next(
1✔
494
            i for i in validation.issues if i.rule == "missing_from_catalog"
495
        )
496
        assert missing_issue.severity == ValidationSeverity.INFO
1✔
497
        assert "[EXPERIMENTAL]" in missing_issue.message
1✔
498
        assert missing_issue.details is not None
1✔
499
        assert "xref:2" in missing_issue.details
1✔
500

501
    def test_partial_coverage_strict(self) -> None:
1✔
502
        """Test partial coverage without experimental flag (WARNING)."""
503
        # 1 match, 1 missing
504
        manual = self._make_manual(
1✔
505
            [{"digest": b"a"}, {"digest": b"b"}],
506
            [{"digest": b"a"}],
507
        )
508
        validation = ValidationResult()
1✔
509
        validate_catalog_coverage(validation, manual, experimental=False)
1✔
510

511
        # 1 INFO for coverage stat, 1 WARNING for missing parts
512
        assert validation.info_count == 1
1✔
513
        assert validation.warning_count == 1
1✔
514
        assert any(i.rule == "missing_from_catalog" for i in validation.issues)
1✔
515
        missing_issue = next(
1✔
516
            i for i in validation.issues if i.rule == "missing_from_catalog"
517
        )
518
        assert missing_issue.severity == ValidationSeverity.WARNING
1✔
519
        assert "[EXPERIMENTAL]" not in missing_issue.message
1✔
520
        assert missing_issue.details is not None
1✔
521
        assert "digest:" in missing_issue.details  # Hex representation of b"b"
1✔
522

523
    def test_zero_coverage(self) -> None:
1✔
524
        """Test zero coverage (should not warn, assumes no image reuse)."""
525
        manual = self._make_manual([{"xref": 1}], [{"xref": 2}])
1✔
526
        validation = ValidationResult()
1✔
527
        validate_catalog_coverage(validation, manual)
1✔
528

529
        # Only stats info, no warning because coverage is 0%
530
        assert validation.info_count == 1
1✔
531
        assert validation.warning_count == 0
1✔
532
        assert "0.0%" in validation.issues[0].message
1✔
533

534

535
class TestValidateStepsHaveParts:
1✔
536
    """Tests for validate_steps_have_parts rule."""
537

538
    def test_all_steps_have_parts(self) -> None:
1✔
539
        """Test when all steps have parts."""
540
        validation = ValidationResult()
1✔
541
        validate_steps_have_parts(validation, [])
1✔
542
        assert not validation.has_issues()
1✔
543

544
    def test_some_steps_missing_parts(self) -> None:
1✔
545
        """Test some steps missing parts."""
546
        validation = ValidationResult()
1✔
547
        # (page, step_number) tuples
548
        validate_steps_have_parts(validation, [(1, 1), (3, 5), (5, 10)])
1✔
549
        assert validation.info_count == 1
1✔
550
        issue = validation.issues[0]
1✔
551
        assert issue.rule == "steps_without_parts"
1✔
552
        assert issue.pages == [1, 3, 5]
1✔
553
        assert issue.details is not None
1✔
554
        assert "step 1 (p.1)" in issue.details
1✔
555
        assert "step 5 (p.3)" in issue.details
1✔
556
        assert "step 10 (p.5)" in issue.details
1✔
557

558

559
def _make_page_data(page_num: int) -> PageData:
1✔
560
    """Create a minimal PageData for testing."""
561
    return PageData(
1✔
562
        page_number=page_num,
563
        bbox=BBox(0, 0, 100, 100),
564
        blocks=[],
565
    )
566

567

568
def _make_classification_result(
1✔
569
    page_data: PageData,
570
    page_number_val: int | None = None,
571
    step_numbers: list[int] | None = None,
572
    include_parts: bool = True,
573
) -> ClassificationResult:
574
    """Create a ClassificationResult with a Page for testing.
575

576
    Args:
577
        page_data: The PageData to associate
578
        page_number_val: The LEGO page number value (None for no page number)
579
        step_numbers: List of step numbers to include
580
        include_parts: Whether to include parts lists in steps
581
    """
582
    result = ClassificationResult(page_data=page_data)
1✔
583

584
    # Build the Page object
585
    page_num_elem = (
1✔
586
        PageNumber(bbox=BBox(0, 90, 10, 100), value=page_number_val)
587
        if page_number_val is not None
588
        else None
589
    )
590

591
    step_elems: list[Step] = []
1✔
592
    if step_numbers:
1✔
593
        for step_num in step_numbers:
1✔
594
            parts_list = None
1✔
595
            if include_parts:
1✔
596
                parts_list = PartsList(
1✔
597
                    bbox=BBox(0, 0, 20, 10),
598
                    parts=[
599
                        Part(
600
                            bbox=BBox(0, 0, 10, 10),
601
                            count=PartCount(bbox=BBox(0, 0, 5, 5), count=1),
602
                        )
603
                    ],
604
                )
605
            step_elems.append(
1✔
606
                Step(
607
                    bbox=BBox(0, 0, 80, 80),
608
                    step_number=StepNumber(bbox=BBox(0, 10, 10, 20), value=step_num),
609
                    parts_list=parts_list,
610
                )
611
            )
612

613
    page = Page(
1✔
614
        bbox=BBox(0, 0, 100, 100),
615
        pdf_page_number=page_data.page_number,
616
        page_number=page_num_elem,
617
        steps=step_elems,
618
    )
619

620
    # Add a candidate for the page
621
    candidate = Candidate(
1✔
622
        label="page",
623
        source_blocks=[],
624
        bbox=page.bbox,
625
        score=1.0,
626
        score_details=TestScore(),
627
        constructed=page,
628
    )
629
    result.add_candidate(candidate)
1✔
630

631
    return result
1✔
632

633

634
class TestValidateResults:
1✔
635
    """Tests for the main validate_results function."""
636

637
    def test_perfect_document(self) -> None:
1✔
638
        """Test document with no issues."""
639
        pages = [_make_page_data(i) for i in range(1, 4)]
1✔
640
        results = [
1✔
641
            _make_classification_result(pages[0], page_number_val=1, step_numbers=[1]),
642
            _make_classification_result(pages[1], page_number_val=2, step_numbers=[2]),
643
            _make_classification_result(pages[2], page_number_val=3, step_numbers=[3]),
644
        ]
645
        batch_result = BatchClassificationResult(
1✔
646
            results=results, histogram=TextHistogram.empty()
647
        )
648

649
        validation = validate_results(batch_result)
1✔
650
        # No errors or warnings expected
651
        assert validation.error_count == 0
1✔
652
        assert validation.warning_count == 0
1✔
653

654
    def test_missing_page_numbers(self) -> None:
1✔
655
        """Test detection of missing page numbers."""
656
        pages = [_make_page_data(i) for i in range(1, 4)]
1✔
657
        results = [
1✔
658
            _make_classification_result(
659
                pages[0], page_number_val=None, step_numbers=[1]
660
            ),
661
            _make_classification_result(pages[1], page_number_val=2, step_numbers=[2]),
662
            _make_classification_result(
663
                pages[2], page_number_val=None, step_numbers=[3]
664
            ),
665
        ]
666
        batch_result = BatchClassificationResult(
1✔
667
            results=results, histogram=TextHistogram.empty()
668
        )
669

670
        validation = validate_results(batch_result)
1✔
671
        assert any(i.rule == "missing_page_numbers" for i in validation.issues)
1✔
672

673
    def test_step_sequence_issues(self) -> None:
1✔
674
        """Test detection of step sequence issues."""
675
        pages = [_make_page_data(i) for i in range(1, 4)]
1✔
676
        results = [
1✔
677
            _make_classification_result(pages[0], page_number_val=1, step_numbers=[1]),
678
            _make_classification_result(
679
                pages[1], page_number_val=2, step_numbers=[3]
680
            ),  # Skipped step 2
681
            _make_classification_result(pages[2], page_number_val=3, step_numbers=[4]),
682
        ]
683
        batch_result = BatchClassificationResult(
1✔
684
            results=results, histogram=TextHistogram.empty()
685
        )
686

687
        validation = validate_results(batch_result)
1✔
688
        assert any(i.rule == "step_gaps" for i in validation.issues)
1✔
689

690

691
class TestPrintValidation:
1✔
692
    """Tests for print_validation function."""
693

694
    def test_print_no_issues(self, capsys: object) -> None:
1✔
695
        """Test printing when no issues."""
696
        validation = ValidationResult()
1✔
697
        print_validation(validation)
1✔
698
        # Check output contains success message
699
        captured = capsys.readouterr()  # type: ignore[union-attr]
1✔
700
        assert "passed" in captured.out
1✔
701

702
    def test_print_with_issues(self, capsys: object) -> None:
1✔
703
        """Test printing with various issues."""
704
        validation = ValidationResult()
1✔
705
        validation.add(
1✔
706
            ValidationIssue(
707
                severity=ValidationSeverity.ERROR,
708
                rule="test_error",
709
                message="Test error message",
710
                pages=[1, 2, 3],
711
            )
712
        )
713
        validation.add(
1✔
714
            ValidationIssue(
715
                severity=ValidationSeverity.WARNING,
716
                rule="test_warning",
717
                message="Test warning message",
718
                details="Some details",
719
            )
720
        )
721

722
        print_validation(validation, use_color=False)
1✔
723
        captured = capsys.readouterr()  # type: ignore[union-attr]
1✔
724

725
        assert "test_error" in captured.out
1✔
726
        assert "Test error message" in captured.out
1✔
727
        assert "test_warning" in captured.out
1✔
728
        assert "Some details" in captured.out
1✔
729

730

731
# =============================================================================
732
# Domain Invariant Validation Rules Tests
733
# =============================================================================
734

735

736
def _make_page_with_steps(
1✔
737
    step_data: list[tuple[int, BBox, BBox | None]],  # (step_num, step_bbox, pl_bbox)
738
    page_number_val: int = 1,
739
    page_bbox: BBox | None = None,
740
) -> tuple[Page, PageData]:
741
    """Create a Page with steps for testing domain invariants.
742

743
    Args:
744
        step_data: List of (step_number, step_bbox, parts_list_bbox) tuples.
745
            If parts_list_bbox is None, no parts list is added.
746
        page_number_val: The page number value
747
        page_bbox: The page bounding box (default 0,0,100,100)
748

749
    Returns:
750
        Tuple of (Page, PageData)
751
    """
752
    if page_bbox is None:
1✔
753
        page_bbox = BBox(0, 0, 100, 100)
1✔
754

755
    page_data = PageData(
1✔
756
        page_number=1,
757
        bbox=page_bbox,
758
        blocks=[],
759
    )
760

761
    steps = []
1✔
762
    for step_num, step_bbox, pl_bbox in step_data:
1✔
763
        parts_list = None
1✔
764
        if pl_bbox is not None:
1✔
765
            # Create a parts list with one part
766
            part = Part(
1✔
767
                bbox=BBox(pl_bbox.x0, pl_bbox.y0, pl_bbox.x1, pl_bbox.y1 - 5),
768
                count=PartCount(
769
                    bbox=BBox(pl_bbox.x0, pl_bbox.y1 - 5, pl_bbox.x1, pl_bbox.y1),
770
                    count=1,
771
                ),
772
            )
773
            parts_list = PartsList(bbox=pl_bbox, parts=[part])
1✔
774

775
        step = Step(
1✔
776
            bbox=step_bbox,
777
            step_number=StepNumber(
778
                bbox=BBox(
779
                    step_bbox.x0, step_bbox.y0, step_bbox.x0 + 10, step_bbox.y0 + 10
780
                ),
781
                value=step_num,
782
            ),
783
            parts_list=parts_list,
784
        )
785
        steps.append(step)
1✔
786

787
    page = Page(
1✔
788
        bbox=page_bbox,
789
        pdf_page_number=1,
790
        page_number=PageNumber(bbox=BBox(90, 90, 100, 100), value=page_number_val),
791
        steps=steps,
792
    )
793

794
    return page, page_data
1✔
795

796

797
class TestValidatePartsListHasParts:
1✔
798
    """Tests for validate_parts_list_has_parts rule."""
799

800
    def test_no_empty_parts_lists(self) -> None:
1✔
801
        """Test page with all parts lists having parts."""
802
        page, page_data = _make_page_with_steps(
1✔
803
            [
804
                (1, BBox(0, 0, 50, 50), BBox(40, 0, 50, 20)),
805
            ]
806
        )
807
        validation = ValidationResult()
1✔
808
        validate_parts_list_has_parts(validation, page, page_data)
1✔
809
        assert not validation.has_issues()
1✔
810

811
    def test_empty_parts_list(self) -> None:
1✔
812
        """Test detection of empty parts list."""
813
        page, page_data = _make_page_with_steps(
1✔
814
            [
815
                (1, BBox(0, 0, 50, 50), BBox(40, 0, 50, 20)),
816
            ]
817
        )
818
        # Manually empty the parts list
819
        page.steps[0].parts_list.parts = []  # type: ignore[union-attr]
1✔
820

821
        validation = ValidationResult()
1✔
822
        validate_parts_list_has_parts(validation, page, page_data)
1✔
823
        assert validation.warning_count == 1
1✔
824
        assert validation.issues[0].rule == "empty_parts_list"
1✔
825

826

827
class TestValidatePartsListsNoOverlap:
1✔
828
    """Tests for validate_parts_lists_no_overlap rule."""
829

830
    def test_non_overlapping_parts_lists(self) -> None:
1✔
831
        """Test page with non-overlapping parts lists."""
832
        page, page_data = _make_page_with_steps(
1✔
833
            [
834
                (1, BBox(0, 0, 45, 50), BBox(35, 0, 45, 20)),
835
                (2, BBox(55, 0, 100, 50), BBox(90, 0, 100, 20)),
836
            ]
837
        )
838
        validation = ValidationResult()
1✔
839
        validate_parts_lists_no_overlap(validation, page, page_data)
1✔
840
        assert not validation.has_issues()
1✔
841

842
    def test_overlapping_parts_lists(self) -> None:
1✔
843
        """Test detection of overlapping parts lists."""
844
        page, page_data = _make_page_with_steps(
1✔
845
            [
846
                (1, BBox(0, 0, 60, 50), BBox(40, 0, 60, 20)),
847
                (2, BBox(40, 0, 100, 50), BBox(40, 0, 60, 20)),  # Same bbox!
848
            ]
849
        )
850
        validation = ValidationResult()
1✔
851
        validate_parts_lists_no_overlap(validation, page, page_data)
1✔
852
        assert validation.error_count == 1
1✔
853
        assert validation.issues[0].rule == "overlapping_parts_lists"
1✔
854

855

856
class TestValidateStepsNoSignificantOverlap:
1✔
857
    """Tests for validate_steps_no_significant_overlap rule."""
858

859
    def test_non_overlapping_steps(self) -> None:
1✔
860
        """Test page with non-overlapping steps."""
861
        page, page_data = _make_page_with_steps(
1✔
862
            [
863
                (1, BBox(0, 0, 45, 50), None),
864
                (2, BBox(55, 0, 100, 50), None),
865
            ]
866
        )
867
        validation = ValidationResult()
1✔
868
        validate_steps_no_significant_overlap(validation, page, page_data)
1✔
869
        assert not validation.has_issues()
1✔
870

871
    def test_significantly_overlapping_steps(self) -> None:
1✔
872
        """Test detection of significantly overlapping steps."""
873
        page, page_data = _make_page_with_steps(
1✔
874
            [
875
                (1, BBox(0, 0, 80, 50), None),
876
                (2, BBox(20, 0, 100, 50), None),  # 60% overlap
877
            ]
878
        )
879
        validation = ValidationResult()
1✔
880
        validate_steps_no_significant_overlap(
1✔
881
            validation, page, page_data, overlap_threshold=0.05
882
        )
883
        assert validation.warning_count == 1
1✔
884
        assert validation.issues[0].rule == "overlapping_steps"
1✔
885

886
    def test_minor_overlap_allowed(self) -> None:
1✔
887
        """Test that minor overlap below threshold is allowed."""
888
        page, page_data = _make_page_with_steps(
1✔
889
            [
890
                (1, BBox(0, 0, 51, 50), None),
891
                (2, BBox(50, 0, 100, 50), None),  # 1px overlap
892
            ]
893
        )
894
        validation = ValidationResult()
1✔
895
        validate_steps_no_significant_overlap(
1✔
896
            validation, page, page_data, overlap_threshold=0.05
897
        )
898
        assert not validation.has_issues()
1✔
899

900

901
class TestValidateElementsWithinPage:
1✔
902
    """Tests for validate_elements_within_page rule."""
903

904
    def test_elements_within_bounds(self) -> None:
1✔
905
        """Test page with all elements within bounds."""
906
        page, page_data = _make_page_with_steps(
1✔
907
            [
908
                (1, BBox(10, 10, 90, 90), BBox(70, 10, 90, 30)),
909
            ]
910
        )
911
        validation = ValidationResult()
1✔
912
        validate_elements_within_page(validation, page, page_data)
1✔
913
        assert not validation.has_issues()
1✔
914

915
    def test_element_outside_bounds(self) -> None:
1✔
916
        """Test detection of element outside page bounds."""
917
        page, page_data = _make_page_with_steps(
1✔
918
            [
919
                (1, BBox(10, 10, 110, 90), None),  # Extends past right edge
920
            ]
921
        )
922
        validation = ValidationResult()
1✔
923
        validate_elements_within_page(validation, page, page_data)
1✔
924
        assert validation.error_count >= 1
1✔
925
        assert any(i.rule == "element_outside_page" for i in validation.issues)
1✔
926

927

928
class TestValidateNoDividerIntersection:
1✔
929
    """Tests for validate_no_divider_intersection rule."""
930

931
    def _make_page_with_divider(
1✔
932
        self,
933
        divider_bbox: BBox,
934
        element_bbox: BBox,
935
        element_type: str = "Step",
936
    ) -> tuple[Page, PageData]:
937
        """Create a page with a divider and one other element."""
938
        from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
939
            Background,
940
            Divider,
941
            Page,
942
            ProgressBar,
943
            Step,
944
            StepNumber,
945
        )
946

947
        page_bbox = BBox(0, 0, 100, 100)
1✔
948
        page_data = PageData(page_number=1, bbox=page_bbox, blocks=[])
1✔
949

950
        divider = Divider(bbox=divider_bbox, orientation=Divider.Orientation.VERTICAL)
1✔
951

952
        element: Any
953
        if element_type == "Step":
1✔
954
            element = Step(
1✔
955
                bbox=element_bbox,
956
                step_number=StepNumber(bbox=element_bbox, value=1),
957
            )
958
            steps = [element]
1✔
959
            background = None
1✔
960
            progress_bar = None
1✔
961
        elif element_type == "Background":
1✔
962
            element = Background(bbox=element_bbox)
1✔
963
            steps = []
1✔
964
            background = element
1✔
965
            progress_bar = None
1✔
966
        elif element_type == "ProgressBar":
1✔
967
            element = ProgressBar(bbox=element_bbox, full_width=100)
1✔
968
            steps = []
1✔
969
            background = None
1✔
970
            progress_bar = element
1✔
971
        else:
UNCOV
972
            raise ValueError(f"Unknown element type: {element_type}")
×
973

974
        page = Page(
1✔
975
            bbox=page_bbox,
976
            pdf_page_number=1,
977
            dividers=[divider],
978
            steps=steps,
979
            background=background,
980
            progress_bar=progress_bar,
981
        )
982

983
        return page, page_data
1✔
984

985
    def test_no_dividers(self) -> None:
1✔
986
        """Test checking a page with no dividers."""
987
        page, page_data = _make_page_with_steps([(1, BBox(0, 0, 10, 10), None)])
1✔
988
        validation = ValidationResult()
1✔
989
        validate_no_divider_intersection(validation, page, page_data)
1✔
990
        assert not validation.has_issues()
1✔
991

992
    def test_no_intersection(self) -> None:
1✔
993
        """Test element not intersecting divider."""
994
        page, page_data = self._make_page_with_divider(
1✔
995
            divider_bbox=BBox(50, 0, 51, 100),  # Vertical line at x=50
996
            element_bbox=BBox(0, 0, 40, 40),  # Left side
997
        )
998
        validation = ValidationResult()
1✔
999
        validate_no_divider_intersection(validation, page, page_data)
1✔
1000
        assert not validation.has_issues()
1✔
1001

1002
    def test_intersection(self) -> None:
1✔
1003
        """Test element intersecting divider."""
1004
        page, page_data = self._make_page_with_divider(
1✔
1005
            divider_bbox=BBox(50, 0, 51, 100),  # Vertical line at x=50
1006
            element_bbox=BBox(40, 0, 60, 40),  # Crosses x=50
1007
        )
1008
        validation = ValidationResult()
1✔
1009
        validate_no_divider_intersection(validation, page, page_data)
1✔
1010
        assert validation.warning_count >= 1
1✔
1011
        assert any(i.rule == "divider_intersection" for i in validation.issues)
1✔
1012

1013
    def test_excluded_elements_ignored(self) -> None:
1✔
1014
        """Test that excluded elements (Background, ProgressBar) are ignored."""
1015
        # Test Background intersection
1016
        page, page_data = self._make_page_with_divider(
1✔
1017
            divider_bbox=BBox(50, 0, 51, 100),
1018
            element_bbox=BBox(0, 0, 100, 100),  # Full page background
1019
            element_type="Background",
1020
        )
1021
        validation = ValidationResult()
1✔
1022
        validate_no_divider_intersection(validation, page, page_data)
1✔
1023
        assert not validation.has_issues()
1✔
1024

1025
        # Test ProgressBar intersection
1026
        page, page_data = self._make_page_with_divider(
1✔
1027
            divider_bbox=BBox(50, 0, 51, 100),
1028
            element_bbox=BBox(0, 90, 100, 100),  # Bottom bar crossing divider
1029
            element_type="ProgressBar",
1030
        )
1031
        validate_no_divider_intersection(validation, page, page_data)
1✔
1032
        assert not validation.has_issues()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc