• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 20017191425

08 Dec 2025 04:39AM UTC coverage: 90.47% (+0.07%) from 90.402%
20017191425

push

github

bramp
Add TriviaTextClassifier for detecting flavor/story text on instruction pages

- Add TriviaTextClassifier using union-find spatial clustering to detect
  dense clusters of text blocks (trivia/flavor content)
- Filter numeric text (part numbers, element IDs, counts) from consideration
- Include related images and drawings in the trivia text bounding box
- Exclude large background elements (>50% of page) from expansion
- Clamp final bbox to page bounds to avoid boundary violations
- Add TriviaTextConfig with min_text_blocks, min_total_characters, proximity_margin
- Register classifier in classifier.py and PageClassifier
- Add TriviaText element to Page and iter_elements()
- Update golden test file for page 17

130 of 139 new or added lines in 7 files covered. (93.53%)

41 existing lines in 4 files now uncovered.

11230 of 12413 relevant lines covered (90.47%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.51
/src/build_a_long/pdf_extract/validation/validation_test.py
1
"""Tests for validation module."""
2

3
from typing import Any
1✔
4

5
from build_a_long.pdf_extract.classifier import (
1✔
6
    BatchClassificationResult,
7
    Candidate,
8
    ClassificationResult,
9
    TextHistogram,
10
)
11
from build_a_long.pdf_extract.classifier.test_utils import TestScore
1✔
12
from build_a_long.pdf_extract.extractor import PageData
1✔
13
from build_a_long.pdf_extract.extractor.bbox import BBox
1✔
14
from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
15
    Manual,
16
    Page,
17
    PageNumber,
18
    Part,
19
    PartCount,
20
    PartsList,
21
    Step,
22
    StepNumber,
23
)
24

25
from .printer import print_validation
1✔
26
from .rules import (
1✔
27
    format_ranges,
28
    validate_catalog_coverage,
29
    validate_elements_within_page,
30
    validate_first_page_number,
31
    validate_missing_page_numbers,
32
    validate_no_divider_intersection,
33
    validate_page_number_sequence,
34
    validate_parts_list_has_parts,
35
    validate_parts_lists_no_overlap,
36
    validate_progress_bar_sequence,
37
    validate_step_sequence,
38
    validate_steps_have_parts,
39
    validate_steps_no_significant_overlap,
40
)
41
from .runner import validate_results
1✔
42
from .types import ValidationIssue, ValidationResult, ValidationSeverity
1✔
43

44

45
class TestValidationResult:
1✔
46
    """Tests for ValidationResult class."""
47

48
    def test_empty_result(self) -> None:
1✔
49
        """Test empty validation result."""
50
        result = ValidationResult()
1✔
51
        assert result.error_count == 0
1✔
52
        assert result.warning_count == 0
1✔
53
        assert result.info_count == 0
1✔
54
        assert not result.has_issues()
1✔
55

56
    def test_add_issue(self) -> None:
1✔
57
        """Test adding issues to result."""
58
        result = ValidationResult()
1✔
59
        result.add(
1✔
60
            ValidationIssue(
61
                severity=ValidationSeverity.ERROR,
62
                rule="test",
63
                message="test error",
64
            )
65
        )
66
        result.add(
1✔
67
            ValidationIssue(
68
                severity=ValidationSeverity.WARNING,
69
                rule="test",
70
                message="test warning",
71
            )
72
        )
73
        result.add(
1✔
74
            ValidationIssue(
75
                severity=ValidationSeverity.INFO,
76
                rule="test",
77
                message="test info",
78
            )
79
        )
80

81
        assert result.error_count == 1
1✔
82
        assert result.warning_count == 1
1✔
83
        assert result.info_count == 1
1✔
84
        assert result.has_issues()
1✔
85

86
    def test_frozen_issue(self) -> None:
1✔
87
        """Test that ValidationIssue is immutable."""
88
        import pydantic
1✔
89

90
        issue = ValidationIssue(
1✔
91
            severity=ValidationSeverity.ERROR,
92
            rule="test",
93
            message="test",
94
        )
95
        # Should not be able to modify (Pydantic frozen model raises ValidationError)
96
        try:
1✔
97
            issue.message = "new message"  # type: ignore[misc]
1✔
UNCOV
98
            raise AssertionError("Expected frozen model to raise")
×
99
        except pydantic.ValidationError:
1✔
100
            pass  # Expected
1✔
101

102

103
class TestFormatRanges:
1✔
104
    """Tests for format_ranges helper function."""
105

106
    def test_empty_list(self) -> None:
1✔
107
        """Test empty list."""
108
        assert format_ranges([]) == ""
1✔
109

110
    def test_single_number(self) -> None:
1✔
111
        """Test single number."""
112
        assert format_ranges([5]) == "5"
1✔
113

114
    def test_consecutive_range(self) -> None:
1✔
115
        """Test consecutive numbers form a range."""
116
        assert format_ranges([1, 2, 3, 4, 5]) == "1-5"
1✔
117

118
    def test_separate_numbers(self) -> None:
1✔
119
        """Test non-consecutive numbers."""
120
        assert format_ranges([1, 3, 5]) == "1, 3, 5"
1✔
121

122
    def test_mixed_ranges(self) -> None:
1✔
123
        """Test mixed ranges and single numbers."""
124
        assert format_ranges([1, 2, 3, 5, 7, 8, 9]) == "1-3, 5, 7-9"
1✔
125

126
    def test_long_list_truncation(self) -> None:
1✔
127
        """Test that very long output is truncated."""
128
        # Create a list that would produce a very long string
129
        numbers = list(range(1, 200, 2))  # Odd numbers 1-199
1✔
130
        result = format_ranges(numbers)
1✔
131
        assert len(result) <= 100
1✔
132
        assert result.endswith("...")
1✔
133

134

135
class TestValidateMissingPageNumbers:
1✔
136
    """Tests for validate_missing_page_numbers rule."""
137

138
    def test_no_missing_pages(self) -> None:
1✔
139
        """Test when all pages have page numbers."""
140
        validation = ValidationResult()
1✔
141
        validate_missing_page_numbers(validation, [], 10)
1✔
142
        assert not validation.has_issues()
1✔
143

144
    def test_high_coverage(self) -> None:
1✔
145
        """Test >90% coverage produces INFO."""
146
        validation = ValidationResult()
1✔
147
        validate_missing_page_numbers(validation, [1], 20)  # 95% coverage
1✔
148
        assert validation.info_count == 1
1✔
149
        assert validation.issues[0].severity == ValidationSeverity.INFO
1✔
150

151
    def test_medium_coverage(self) -> None:
1✔
152
        """Test 50-90% coverage produces WARNING."""
153
        validation = ValidationResult()
1✔
154
        validate_missing_page_numbers(validation, [1, 2, 3], 10)  # 70% coverage
1✔
155
        assert validation.warning_count == 1
1✔
156

157
    def test_low_coverage(self) -> None:
1✔
158
        """Test <50% coverage produces ERROR."""
159
        validation = ValidationResult()
1✔
160
        validate_missing_page_numbers(validation, list(range(1, 8)), 10)  # 30% coverage
1✔
161
        assert validation.error_count == 1
1✔
162

163

164
class TestValidateStepSequence:
1✔
165
    """Tests for validate_step_sequence rule."""
166

167
    def test_empty_steps(self) -> None:
1✔
168
        """Test empty step list."""
169
        validation = ValidationResult()
1✔
170
        validate_step_sequence(validation, [])
1✔
171
        assert not validation.has_issues()
1✔
172

173
    def test_valid_sequence(self) -> None:
1✔
174
        """Test valid step sequence starting at 1."""
175
        validation = ValidationResult()
1✔
176
        validate_step_sequence(validation, [(1, 1), (2, 2), (3, 3)])
1✔
177
        assert not validation.has_issues()
1✔
178

179
    def test_duplicate_steps(self) -> None:
1✔
180
        """Test duplicate step numbers."""
181
        validation = ValidationResult()
1✔
182
        validate_step_sequence(validation, [(1, 1), (2, 1), (3, 2)])  # Step 1 twice
1✔
183
        # Should have warning about duplicates
184
        assert any(i.rule == "duplicate_steps" for i in validation.issues)
1✔
185

186
    def test_step_gaps(self) -> None:
1✔
187
        """Test gaps in step sequence."""
188
        validation = ValidationResult()
1✔
189
        validate_step_sequence(validation, [(1, 1), (2, 3)])  # Missing step 2
1✔
190
        assert any(i.rule == "step_gaps" for i in validation.issues)
1✔
191

192
    def test_step_not_starting_at_one(self) -> None:
1✔
193
        """Test sequence not starting at 1."""
194
        validation = ValidationResult()
1✔
195
        validate_step_sequence(validation, [(1, 5), (2, 6), (3, 7)])  # Starts at 5
1✔
196
        assert any(i.rule == "step_start" for i in validation.issues)
1✔
197

198

199
class TestValidateFirstPageNumber:
1✔
200
    """Tests for validate_first_page_number rule."""
201

202
    def test_no_page_numbers(self) -> None:
1✔
203
        """Test when no page numbers detected."""
204
        validation = ValidationResult()
1✔
205
        validate_first_page_number(validation, [])
1✔
206
        assert validation.error_count == 1
1✔
207
        assert validation.issues[0].rule == "no_page_numbers"
1✔
208

209
    def test_reasonable_first_page(self) -> None:
1✔
210
        """Test reasonable first page number."""
211
        validation = ValidationResult()
1✔
212
        validate_first_page_number(validation, [1, 2, 3])
1✔
213
        assert not validation.has_issues()
1✔
214

215
    def test_high_first_page(self) -> None:
1✔
216
        """Test high first page number."""
217
        validation = ValidationResult()
1✔
218
        validate_first_page_number(validation, [15, 16, 17])
1✔
219
        assert any(i.rule == "high_first_page" for i in validation.issues)
1✔
220

221

222
class TestValidatePageNumberSequence:
1✔
223
    """Tests for validate_page_number_sequence rule."""
224

225
    def test_single_page(self) -> None:
1✔
226
        """Test single page number."""
227
        validation = ValidationResult()
1✔
228
        validate_page_number_sequence(validation, [1])
1✔
229
        assert not validation.has_issues()
1✔
230

231
    def test_valid_sequence(self) -> None:
1✔
232
        """Test valid consecutive sequence."""
233
        validation = ValidationResult()
1✔
234
        validate_page_number_sequence(validation, [1, 2, 3, 4, 5])
1✔
235
        assert not validation.has_issues()
1✔
236

237
    def test_valid_sequence_starting_later(self) -> None:
1✔
238
        """Test valid consecutive sequence that doesn't start at 1.
239

240
        First few pages missing is OK (e.g., cover pages without page numbers).
241
        """
242
        validation = ValidationResult()
1✔
243
        validate_page_number_sequence(validation, [5, 6, 7, 8, 9])
1✔
244
        assert not validation.has_issues()
1✔
245

246
    def test_valid_sequence_ending_early(self) -> None:
1✔
247
        """Test valid consecutive sequence that might end before the last page.
248

249
        Last few pages missing is OK (e.g., back cover without page numbers).
250
        This tests the sequence is consecutive - we don't know total pages here.
251
        """
252
        validation = ValidationResult()
1✔
253
        # Sequence 10-14 is consecutive, even if there could be more pages
254
        validate_page_number_sequence(validation, [10, 11, 12, 13, 14])
1✔
255
        assert not validation.has_issues()
1✔
256

257
    def test_valid_sequence_starting_later_and_ending_early(self) -> None:
1✔
258
        """Test consecutive sequence with both start and end pages missing.
259

260
        Both first N and last M pages can be missing, as long as there are no
261
        gaps in the middle.
262
        """
263
        validation = ValidationResult()
1✔
264
        validate_page_number_sequence(validation, [5, 6, 7, 8, 9, 10])
1✔
265
        assert not validation.has_issues()
1✔
266

267
    def test_decreasing_sequence(self) -> None:
1✔
268
        """Test decreasing page numbers."""
269
        validation = ValidationResult()
1✔
270
        validate_page_number_sequence(validation, [1, 2, 5, 3, 4])  # Decreases at 3
1✔
271
        assert any(i.rule == "page_sequence" for i in validation.issues)
1✔
272

273
    def test_gap_in_middle(self) -> None:
1✔
274
        """Test gap in the middle of page numbers."""
275
        validation = ValidationResult()
1✔
276
        validate_page_number_sequence(validation, [1, 2, 5, 6])  # Gap: 2->5
1✔
277
        assert any(i.rule == "page_gaps" for i in validation.issues)
1✔
278
        # Should be a warning now
279
        gap_issue = next(i for i in validation.issues if i.rule == "page_gaps")
1✔
280
        assert gap_issue.severity == ValidationSeverity.WARNING
1✔
281

282
    def test_small_gap_not_allowed(self) -> None:
1✔
283
        """Test that even small gaps (>1) are flagged."""
284
        validation = ValidationResult()
1✔
285
        validate_page_number_sequence(validation, [1, 2, 4, 5])  # Gap: 2->4
1✔
286
        assert any(i.rule == "page_gaps" for i in validation.issues)
1✔
287

288

289
class TestValidateProgressBarSequence:
1✔
290
    """Tests for validate_progress_bar_sequence rule."""
291

292
    def test_empty_progress_bars(self) -> None:
1✔
293
        """Test empty progress bar list."""
294
        validation = ValidationResult()
1✔
295
        validate_progress_bar_sequence(validation, [])
1✔
296
        assert not validation.has_issues()
1✔
297

298
    def test_valid_sequence(self) -> None:
1✔
299
        """Test valid monotonically increasing sequence."""
300
        validation = ValidationResult()
1✔
301
        # (page, value) tuples
302
        validate_progress_bar_sequence(
1✔
303
            validation, [(1, 0.1), (2, 0.2), (3, 0.3), (4, 0.4)]
304
        )
305
        assert not validation.has_issues()
1✔
306

307
    def test_decreasing_sequence(self) -> None:
1✔
308
        """Test decreasing progress bar values."""
309
        validation = ValidationResult()
1✔
310
        validate_progress_bar_sequence(
1✔
311
            validation,
312
            [(1, 0.5), (2, 0.4), (3, 0.6)],  # Decreases at p.2
313
        )
314
        assert validation.warning_count == 1
1✔
315
        assert validation.issues[0].rule == "progress_bar_decrease"
1✔
316

317
    def test_consistent_increments(self) -> None:
1✔
318
        """Test consistent progress increments (steady rate)."""
319
        validation = ValidationResult()
1✔
320
        # Constant 0.1 increment
321
        validate_progress_bar_sequence(
1✔
322
            validation,
323
            [(1, 0.1), (2, 0.2), (3, 0.3), (4, 0.4), (5, 0.5), (6, 0.6)],
324
        )
325
        assert not validation.has_issues()
1✔
326

327
    def test_inconsistent_increments(self) -> None:
1✔
328
        """Test inconsistent progress increments (high variance)."""
329
        validation = ValidationResult()
1✔
330
        # Increments vary wildly: 0.01, 0.4, 0.01, 0.01, 0.01
331
        validate_progress_bar_sequence(
1✔
332
            validation,
333
            [(1, 0.1), (2, 0.11), (3, 0.51), (4, 0.52), (5, 0.53), (6, 0.54)],
334
        )
335
        assert any(i.rule == "progress_bar_inconsistent" for i in validation.issues)
1✔
336
        issue = next(
1✔
337
            i for i in validation.issues if i.rule == "progress_bar_inconsistent"
338
        )
339
        assert issue.severity == ValidationSeverity.INFO
1✔
340

341
    def test_not_enough_samples(self) -> None:
1✔
342
        """Test that consistency check is skipped for few samples."""
343
        validation = ValidationResult()
1✔
344
        # Highly inconsistent, but only 5 samples (needs >5)
345
        validate_progress_bar_sequence(
1✔
346
            validation,
347
            [(1, 0.1), (2, 0.11), (3, 0.51), (4, 0.52), (5, 0.53)],
348
        )
349
        # Should pass because consistency check requires >5 samples
350
        assert not validation.has_issues()
1✔
351

352

353
class TestValidateCatalogCoverage:
1✔
354
    """Tests for validate_catalog_coverage rule."""
355

356
    def _make_part_with_image(self, image_id: str) -> Part:
1✔
357
        """Create a Part with a diagram image ID."""
358
        from build_a_long.pdf_extract.extractor.lego_page_elements import PartImage
1✔
359

360
        return Part(
1✔
361
            bbox=BBox(0, 0, 10, 10),
362
            count=PartCount(bbox=BBox(0, 0, 5, 5), count=1),
363
            diagram=PartImage(bbox=BBox(0, 0, 10, 10), image_id=image_id),
364
        )
365

366
    def _make_manual(
1✔
367
        self, instruction_image_ids: list[str], catalog_image_ids: list[str]
368
    ) -> Manual:
369
        """Create a Manual with specified parts."""
370
        pages = []
1✔
371

372
        # Instruction page
373
        if instruction_image_ids:
1✔
374
            parts = [self._make_part_with_image(iid) for iid in instruction_image_ids]
1✔
375
            step = Step(
1✔
376
                bbox=BBox(0, 0, 100, 100),
377
                step_number=StepNumber(bbox=BBox(0, 0, 10, 10), value=1),
378
                parts_list=PartsList(bbox=BBox(0, 0, 50, 50), parts=parts),
379
            )
380
            pages.append(
1✔
381
                Page(
382
                    bbox=BBox(0, 0, 100, 100),
383
                    pdf_page_number=1,
384
                    page_number=PageNumber(bbox=BBox(90, 90, 100, 100), value=1),
385
                    categories={Page.PageType.INSTRUCTION},
386
                    steps=[step],
387
                )
388
            )
389

390
        # Catalog page
391
        if catalog_image_ids:
1✔
392
            parts = [self._make_part_with_image(iid) for iid in catalog_image_ids]
1✔
393
            pages.append(
1✔
394
                Page(
395
                    bbox=BBox(0, 0, 100, 100),
396
                    pdf_page_number=2,
397
                    page_number=PageNumber(bbox=BBox(90, 90, 100, 100), value=2),
398
                    categories={Page.PageType.CATALOG},
399
                    catalog=parts,
400
                )
401
            )
402

403
        return Manual(pages=pages)
1✔
404

405
    def test_no_catalog_pages(self) -> None:
1✔
406
        """Test when no catalog pages are present."""
407
        manual = self._make_manual(["img1"], [])
1✔
408
        validation = ValidationResult()
1✔
409
        validate_catalog_coverage(validation, manual)
1✔
410
        assert not validation.has_issues()
1✔
411

412
    def test_no_instruction_parts(self) -> None:
1✔
413
        """Test when no instruction parts are found."""
414
        manual = self._make_manual([], ["img1"])
1✔
415
        validation = ValidationResult()
1✔
416
        validate_catalog_coverage(validation, manual)
1✔
417
        assert not validation.has_issues()
1✔
418

419
    def test_perfect_coverage(self) -> None:
1✔
420
        """Test when all instruction parts are in catalog."""
421
        manual = self._make_manual(["img1", "img2"], ["img1", "img2", "img3"])
1✔
422
        validation = ValidationResult()
1✔
423
        validate_catalog_coverage(validation, manual)
1✔
424
        # Should have INFO about coverage
425
        assert validation.info_count == 1
1✔
426
        assert "100.0%" in validation.issues[0].message
1✔
427

428
    def test_partial_coverage_experimental(self) -> None:
1✔
429
        """Test partial coverage with experimental flag (INFO)."""
430
        # 1 match, 1 missing -> 50% coverage
431
        manual = self._make_manual(["img1", "img2"], ["img1"])
1✔
432
        validation = ValidationResult()
1✔
433
        validate_catalog_coverage(validation, manual, experimental=True)
1✔
434

435
        # 1 INFO for coverage stat, 1 INFO for missing parts (experimental)
436
        assert validation.info_count == 2
1✔
437
        assert validation.warning_count == 0
1✔
438
        assert any(i.rule == "missing_from_catalog" for i in validation.issues)
1✔
439
        missing_issue = next(
1✔
440
            i for i in validation.issues if i.rule == "missing_from_catalog"
441
        )
442
        assert missing_issue.severity == ValidationSeverity.INFO
1✔
443
        assert "[EXPERIMENTAL]" in missing_issue.message
1✔
444

445
    def test_partial_coverage_strict(self) -> None:
1✔
446
        """Test partial coverage without experimental flag (WARNING)."""
447
        # 1 match, 1 missing -> 50% coverage
448
        manual = self._make_manual(["img1", "img2"], ["img1"])
1✔
449
        validation = ValidationResult()
1✔
450
        validate_catalog_coverage(validation, manual, experimental=False)
1✔
451

452
        # 1 INFO for coverage stat, 1 WARNING for missing parts
453
        assert validation.info_count == 1
1✔
454
        assert validation.warning_count == 1
1✔
455
        assert any(i.rule == "missing_from_catalog" for i in validation.issues)
1✔
456
        missing_issue = next(
1✔
457
            i for i in validation.issues if i.rule == "missing_from_catalog"
458
        )
459
        assert missing_issue.severity == ValidationSeverity.WARNING
1✔
460
        assert "[EXPERIMENTAL]" not in missing_issue.message
1✔
461

462
    def test_zero_coverage(self) -> None:
1✔
463
        """Test zero coverage (should not warn, assumes no image reuse)."""
464
        manual = self._make_manual(["img1"], ["img2"])
1✔
465
        validation = ValidationResult()
1✔
466
        validate_catalog_coverage(validation, manual)
1✔
467

468
        # Only stats info, no warning because coverage is 0% (implies different images used)
469
        assert validation.info_count == 1
1✔
470
        assert validation.warning_count == 0
1✔
471
        assert "0.0%" in validation.issues[0].message
1✔
472

473

474
class TestValidateStepsHaveParts:
1✔
475
    """Tests for validate_steps_have_parts rule."""
476

477
    def test_all_steps_have_parts(self) -> None:
1✔
478
        """Test when all steps have parts."""
479
        validation = ValidationResult()
1✔
480
        validate_steps_have_parts(validation, [])
1✔
481
        assert not validation.has_issues()
1✔
482

483
    def test_some_steps_missing_parts(self) -> None:
1✔
484
        """Test some steps missing parts."""
485
        validation = ValidationResult()
1✔
486
        # (page, step_number) tuples
487
        validate_steps_have_parts(validation, [(1, 1), (3, 5), (5, 10)])
1✔
488
        assert validation.info_count == 1
1✔
489
        issue = validation.issues[0]
1✔
490
        assert issue.rule == "steps_without_parts"
1✔
491
        assert issue.pages == [1, 3, 5]
1✔
492
        assert issue.details is not None
1✔
493
        assert "step 1 (p.1)" in issue.details
1✔
494
        assert "step 5 (p.3)" in issue.details
1✔
495
        assert "step 10 (p.5)" in issue.details
1✔
496

497

498
def _make_page_data(page_num: int) -> PageData:
1✔
499
    """Create a minimal PageData for testing."""
500
    return PageData(
1✔
501
        page_number=page_num,
502
        bbox=BBox(0, 0, 100, 100),
503
        blocks=[],
504
    )
505

506

507
def _make_classification_result(
1✔
508
    page_data: PageData,
509
    page_number_val: int | None = None,
510
    step_numbers: list[int] | None = None,
511
    include_parts: bool = True,
512
) -> ClassificationResult:
513
    """Create a ClassificationResult with a Page for testing.
514

515
    Args:
516
        page_data: The PageData to associate
517
        page_number_val: The LEGO page number value (None for no page number)
518
        step_numbers: List of step numbers to include
519
        include_parts: Whether to include parts lists in steps
520
    """
521
    result = ClassificationResult(page_data=page_data)
1✔
522

523
    # Build the Page object
524
    page_num_elem = (
1✔
525
        PageNumber(bbox=BBox(0, 90, 10, 100), value=page_number_val)
526
        if page_number_val is not None
527
        else None
528
    )
529

530
    step_elems: list[Step] = []
1✔
531
    if step_numbers:
1✔
532
        for step_num in step_numbers:
1✔
533
            parts_list = None
1✔
534
            if include_parts:
1✔
535
                parts_list = PartsList(
1✔
536
                    bbox=BBox(0, 0, 20, 10),
537
                    parts=[
538
                        Part(
539
                            bbox=BBox(0, 0, 10, 10),
540
                            count=PartCount(bbox=BBox(0, 0, 5, 5), count=1),
541
                        )
542
                    ],
543
                )
544
            step_elems.append(
1✔
545
                Step(
546
                    bbox=BBox(0, 0, 80, 80),
547
                    step_number=StepNumber(bbox=BBox(0, 10, 10, 20), value=step_num),
548
                    parts_list=parts_list,
549
                )
550
            )
551

552
    page = Page(
1✔
553
        bbox=BBox(0, 0, 100, 100),
554
        pdf_page_number=page_data.page_number,
555
        page_number=page_num_elem,
556
        steps=step_elems,
557
    )
558

559
    # Add a candidate for the page
560
    candidate = Candidate(
1✔
561
        label="page",
562
        source_blocks=[],
563
        bbox=page.bbox,
564
        score=1.0,
565
        score_details=TestScore(),
566
        constructed=page,
567
    )
568
    result.add_candidate(candidate)
1✔
569

570
    return result
1✔
571

572

573
class TestValidateResults:
1✔
574
    """Tests for the main validate_results function."""
575

576
    def test_perfect_document(self) -> None:
1✔
577
        """Test document with no issues."""
578
        pages = [_make_page_data(i) for i in range(1, 4)]
1✔
579
        results = [
1✔
580
            _make_classification_result(pages[0], page_number_val=1, step_numbers=[1]),
581
            _make_classification_result(pages[1], page_number_val=2, step_numbers=[2]),
582
            _make_classification_result(pages[2], page_number_val=3, step_numbers=[3]),
583
        ]
584
        batch_result = BatchClassificationResult(
1✔
585
            results=results, histogram=TextHistogram.empty()
586
        )
587

588
        validation = validate_results(batch_result)
1✔
589
        # No errors or warnings expected
590
        assert validation.error_count == 0
1✔
591
        assert validation.warning_count == 0
1✔
592

593
    def test_missing_page_numbers(self) -> None:
1✔
594
        """Test detection of missing page numbers."""
595
        pages = [_make_page_data(i) for i in range(1, 4)]
1✔
596
        results = [
1✔
597
            _make_classification_result(
598
                pages[0], page_number_val=None, step_numbers=[1]
599
            ),
600
            _make_classification_result(pages[1], page_number_val=2, step_numbers=[2]),
601
            _make_classification_result(
602
                pages[2], page_number_val=None, step_numbers=[3]
603
            ),
604
        ]
605
        batch_result = BatchClassificationResult(
1✔
606
            results=results, histogram=TextHistogram.empty()
607
        )
608

609
        validation = validate_results(batch_result)
1✔
610
        assert any(i.rule == "missing_page_numbers" for i in validation.issues)
1✔
611

612
    def test_step_sequence_issues(self) -> None:
1✔
613
        """Test detection of step sequence issues."""
614
        pages = [_make_page_data(i) for i in range(1, 4)]
1✔
615
        results = [
1✔
616
            _make_classification_result(pages[0], page_number_val=1, step_numbers=[1]),
617
            _make_classification_result(
618
                pages[1], page_number_val=2, step_numbers=[3]
619
            ),  # Skipped step 2
620
            _make_classification_result(pages[2], page_number_val=3, step_numbers=[4]),
621
        ]
622
        batch_result = BatchClassificationResult(
1✔
623
            results=results, histogram=TextHistogram.empty()
624
        )
625

626
        validation = validate_results(batch_result)
1✔
627
        assert any(i.rule == "step_gaps" for i in validation.issues)
1✔
628

629

630
class TestPrintValidation:
1✔
631
    """Tests for print_validation function."""
632

633
    def test_print_no_issues(self, capsys: object) -> None:
1✔
634
        """Test printing when no issues."""
635
        validation = ValidationResult()
1✔
636
        print_validation(validation)
1✔
637
        # Check output contains success message
638
        captured = capsys.readouterr()  # type: ignore[union-attr]
1✔
639
        assert "passed" in captured.out
1✔
640

641
    def test_print_with_issues(self, capsys: object) -> None:
1✔
642
        """Test printing with various issues."""
643
        validation = ValidationResult()
1✔
644
        validation.add(
1✔
645
            ValidationIssue(
646
                severity=ValidationSeverity.ERROR,
647
                rule="test_error",
648
                message="Test error message",
649
                pages=[1, 2, 3],
650
            )
651
        )
652
        validation.add(
1✔
653
            ValidationIssue(
654
                severity=ValidationSeverity.WARNING,
655
                rule="test_warning",
656
                message="Test warning message",
657
                details="Some details",
658
            )
659
        )
660

661
        print_validation(validation, use_color=False)
1✔
662
        captured = capsys.readouterr()  # type: ignore[union-attr]
1✔
663

664
        assert "test_error" in captured.out
1✔
665
        assert "Test error message" in captured.out
1✔
666
        assert "test_warning" in captured.out
1✔
667
        assert "Some details" in captured.out
1✔
668

669

670
# =============================================================================
671
# Domain Invariant Validation Rules Tests
672
# =============================================================================
673

674

675
def _make_page_with_steps(
1✔
676
    step_data: list[tuple[int, BBox, BBox | None]],  # (step_num, step_bbox, pl_bbox)
677
    page_number_val: int = 1,
678
    page_bbox: BBox | None = None,
679
) -> tuple[Page, PageData]:
680
    """Create a Page with steps for testing domain invariants.
681

682
    Args:
683
        step_data: List of (step_number, step_bbox, parts_list_bbox) tuples.
684
            If parts_list_bbox is None, no parts list is added.
685
        page_number_val: The page number value
686
        page_bbox: The page bounding box (default 0,0,100,100)
687

688
    Returns:
689
        Tuple of (Page, PageData)
690
    """
691
    if page_bbox is None:
1✔
692
        page_bbox = BBox(0, 0, 100, 100)
1✔
693

694
    page_data = PageData(
1✔
695
        page_number=1,
696
        bbox=page_bbox,
697
        blocks=[],
698
    )
699

700
    steps = []
1✔
701
    for step_num, step_bbox, pl_bbox in step_data:
1✔
702
        parts_list = None
1✔
703
        if pl_bbox is not None:
1✔
704
            # Create a parts list with one part
705
            part = Part(
1✔
706
                bbox=BBox(pl_bbox.x0, pl_bbox.y0, pl_bbox.x1, pl_bbox.y1 - 5),
707
                count=PartCount(
708
                    bbox=BBox(pl_bbox.x0, pl_bbox.y1 - 5, pl_bbox.x1, pl_bbox.y1),
709
                    count=1,
710
                ),
711
            )
712
            parts_list = PartsList(bbox=pl_bbox, parts=[part])
1✔
713

714
        step = Step(
1✔
715
            bbox=step_bbox,
716
            step_number=StepNumber(
717
                bbox=BBox(
718
                    step_bbox.x0, step_bbox.y0, step_bbox.x0 + 10, step_bbox.y0 + 10
719
                ),
720
                value=step_num,
721
            ),
722
            parts_list=parts_list,
723
        )
724
        steps.append(step)
1✔
725

726
    page = Page(
1✔
727
        bbox=page_bbox,
728
        pdf_page_number=1,
729
        page_number=PageNumber(bbox=BBox(90, 90, 100, 100), value=page_number_val),
730
        steps=steps,
731
    )
732

733
    return page, page_data
1✔
734

735

736
class TestValidatePartsListHasParts:
1✔
737
    """Tests for validate_parts_list_has_parts rule."""
738

739
    def test_no_empty_parts_lists(self) -> None:
1✔
740
        """Test page with all parts lists having parts."""
741
        page, page_data = _make_page_with_steps(
1✔
742
            [
743
                (1, BBox(0, 0, 50, 50), BBox(40, 0, 50, 20)),
744
            ]
745
        )
746
        validation = ValidationResult()
1✔
747
        validate_parts_list_has_parts(validation, page, page_data)
1✔
748
        assert not validation.has_issues()
1✔
749

750
    def test_empty_parts_list(self) -> None:
1✔
751
        """Test detection of empty parts list."""
752
        page, page_data = _make_page_with_steps(
1✔
753
            [
754
                (1, BBox(0, 0, 50, 50), BBox(40, 0, 50, 20)),
755
            ]
756
        )
757
        # Manually empty the parts list
758
        page.steps[0].parts_list.parts = []  # type: ignore[union-attr]
1✔
759

760
        validation = ValidationResult()
1✔
761
        validate_parts_list_has_parts(validation, page, page_data)
1✔
762
        assert validation.warning_count == 1
1✔
763
        assert validation.issues[0].rule == "empty_parts_list"
1✔
764

765

766
class TestValidatePartsListsNoOverlap:
1✔
767
    """Tests for validate_parts_lists_no_overlap rule."""
768

769
    def test_non_overlapping_parts_lists(self) -> None:
1✔
770
        """Test page with non-overlapping parts lists."""
771
        page, page_data = _make_page_with_steps(
1✔
772
            [
773
                (1, BBox(0, 0, 45, 50), BBox(35, 0, 45, 20)),
774
                (2, BBox(55, 0, 100, 50), BBox(90, 0, 100, 20)),
775
            ]
776
        )
777
        validation = ValidationResult()
1✔
778
        validate_parts_lists_no_overlap(validation, page, page_data)
1✔
779
        assert not validation.has_issues()
1✔
780

781
    def test_overlapping_parts_lists(self) -> None:
1✔
782
        """Test detection of overlapping parts lists."""
783
        page, page_data = _make_page_with_steps(
1✔
784
            [
785
                (1, BBox(0, 0, 60, 50), BBox(40, 0, 60, 20)),
786
                (2, BBox(40, 0, 100, 50), BBox(40, 0, 60, 20)),  # Same bbox!
787
            ]
788
        )
789
        validation = ValidationResult()
1✔
790
        validate_parts_lists_no_overlap(validation, page, page_data)
1✔
791
        assert validation.error_count == 1
1✔
792
        assert validation.issues[0].rule == "overlapping_parts_lists"
1✔
793

794

795
class TestValidateStepsNoSignificantOverlap:
1✔
796
    """Tests for validate_steps_no_significant_overlap rule."""
797

798
    def test_non_overlapping_steps(self) -> None:
1✔
799
        """Test page with non-overlapping steps."""
800
        page, page_data = _make_page_with_steps(
1✔
801
            [
802
                (1, BBox(0, 0, 45, 50), None),
803
                (2, BBox(55, 0, 100, 50), None),
804
            ]
805
        )
806
        validation = ValidationResult()
1✔
807
        validate_steps_no_significant_overlap(validation, page, page_data)
1✔
808
        assert not validation.has_issues()
1✔
809

810
    def test_significantly_overlapping_steps(self) -> None:
1✔
811
        """Test detection of significantly overlapping steps."""
812
        page, page_data = _make_page_with_steps(
1✔
813
            [
814
                (1, BBox(0, 0, 80, 50), None),
815
                (2, BBox(20, 0, 100, 50), None),  # 60% overlap
816
            ]
817
        )
818
        validation = ValidationResult()
1✔
819
        validate_steps_no_significant_overlap(
1✔
820
            validation, page, page_data, overlap_threshold=0.05
821
        )
822
        assert validation.warning_count == 1
1✔
823
        assert validation.issues[0].rule == "overlapping_steps"
1✔
824

825
    def test_minor_overlap_allowed(self) -> None:
1✔
826
        """Test that minor overlap below threshold is allowed."""
827
        page, page_data = _make_page_with_steps(
1✔
828
            [
829
                (1, BBox(0, 0, 51, 50), None),
830
                (2, BBox(50, 0, 100, 50), None),  # 1px overlap
831
            ]
832
        )
833
        validation = ValidationResult()
1✔
834
        validate_steps_no_significant_overlap(
1✔
835
            validation, page, page_data, overlap_threshold=0.05
836
        )
837
        assert not validation.has_issues()
1✔
838

839

840
class TestValidateElementsWithinPage:
1✔
841
    """Tests for validate_elements_within_page rule."""
842

843
    def test_elements_within_bounds(self) -> None:
1✔
844
        """Test page with all elements within bounds."""
845
        page, page_data = _make_page_with_steps(
1✔
846
            [
847
                (1, BBox(10, 10, 90, 90), BBox(70, 10, 90, 30)),
848
            ]
849
        )
850
        validation = ValidationResult()
1✔
851
        validate_elements_within_page(validation, page, page_data)
1✔
852
        assert not validation.has_issues()
1✔
853

854
    def test_element_outside_bounds(self) -> None:
1✔
855
        """Test detection of element outside page bounds."""
856
        page, page_data = _make_page_with_steps(
1✔
857
            [
858
                (1, BBox(10, 10, 110, 90), None),  # Extends past right edge
859
            ]
860
        )
861
        validation = ValidationResult()
1✔
862
        validate_elements_within_page(validation, page, page_data)
1✔
863
        assert validation.error_count >= 1
1✔
864
        assert any(i.rule == "element_outside_page" for i in validation.issues)
1✔
865

866

867
class TestValidateNoDividerIntersection:
1✔
868
    """Tests for validate_no_divider_intersection rule."""
869

870
    def _make_page_with_divider(
1✔
871
        self,
872
        divider_bbox: BBox,
873
        element_bbox: BBox,
874
        element_type: str = "Step",
875
    ) -> tuple[Page, PageData]:
876
        """Create a page with a divider and one other element."""
877
        from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
878
            Background,
879
            Divider,
880
            Page,
881
            ProgressBar,
882
            Step,
883
            StepNumber,
884
        )
885

886
        page_bbox = BBox(0, 0, 100, 100)
1✔
887
        page_data = PageData(page_number=1, bbox=page_bbox, blocks=[])
1✔
888

889
        divider = Divider(bbox=divider_bbox, orientation=Divider.Orientation.VERTICAL)
1✔
890

891
        element: Any
892
        if element_type == "Step":
1✔
893
            element = Step(
1✔
894
                bbox=element_bbox,
895
                step_number=StepNumber(bbox=element_bbox, value=1),
896
            )
897
            steps = [element]
1✔
898
            background = None
1✔
899
            progress_bar = None
1✔
900
        elif element_type == "Background":
1✔
901
            element = Background(bbox=element_bbox)
1✔
902
            steps = []
1✔
903
            background = element
1✔
904
            progress_bar = None
1✔
905
        elif element_type == "ProgressBar":
1✔
906
            element = ProgressBar(bbox=element_bbox, full_width=100)
1✔
907
            steps = []
1✔
908
            background = None
1✔
909
            progress_bar = element
1✔
910
        else:
UNCOV
911
            raise ValueError(f"Unknown element type: {element_type}")
×
912

913
        page = Page(
1✔
914
            bbox=page_bbox,
915
            pdf_page_number=1,
916
            dividers=[divider],
917
            steps=steps,
918
            background=background,
919
            progress_bar=progress_bar,
920
        )
921

922
        return page, page_data
1✔
923

924
    def test_no_dividers(self) -> None:
1✔
925
        """Test checking a page with no dividers."""
926
        page, page_data = _make_page_with_steps([(1, BBox(0, 0, 10, 10), None)])
1✔
927
        validation = ValidationResult()
1✔
928
        validate_no_divider_intersection(validation, page, page_data)
1✔
929
        assert not validation.has_issues()
1✔
930

931
    def test_no_intersection(self) -> None:
1✔
932
        """Test element not intersecting divider."""
933
        page, page_data = self._make_page_with_divider(
1✔
934
            divider_bbox=BBox(50, 0, 51, 100),  # Vertical line at x=50
935
            element_bbox=BBox(0, 0, 40, 40),  # Left side
936
        )
937
        validation = ValidationResult()
1✔
938
        validate_no_divider_intersection(validation, page, page_data)
1✔
939
        assert not validation.has_issues()
1✔
940

941
    def test_intersection(self) -> None:
1✔
942
        """Test element intersecting divider."""
943
        page, page_data = self._make_page_with_divider(
1✔
944
            divider_bbox=BBox(50, 0, 51, 100),  # Vertical line at x=50
945
            element_bbox=BBox(40, 0, 60, 40),  # Crosses x=50
946
        )
947
        validation = ValidationResult()
1✔
948
        validate_no_divider_intersection(validation, page, page_data)
1✔
949
        assert validation.warning_count >= 1
1✔
950
        assert any(i.rule == "divider_intersection" for i in validation.issues)
1✔
951

952
    def test_excluded_elements_ignored(self) -> None:
1✔
953
        """Test that excluded elements (Background, ProgressBar) are ignored."""
954
        # Test Background intersection
955
        page, page_data = self._make_page_with_divider(
1✔
956
            divider_bbox=BBox(50, 0, 51, 100),
957
            element_bbox=BBox(0, 0, 100, 100),  # Full page background
958
            element_type="Background",
959
        )
960
        validation = ValidationResult()
1✔
961
        validate_no_divider_intersection(validation, page, page_data)
1✔
962
        assert not validation.has_issues()
1✔
963

964
        # Test ProgressBar intersection
965
        page, page_data = self._make_page_with_divider(
1✔
966
            divider_bbox=BBox(50, 0, 51, 100),
967
            element_bbox=BBox(0, 90, 100, 100),  # Bottom bar crossing divider
968
            element_type="ProgressBar",
969
        )
970
        validate_no_divider_intersection(validation, page, page_data)
1✔
971
        assert not validation.has_issues()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc