• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 20389851973

20 Dec 2025 05:31AM UTC coverage: 89.185% (+0.04%) from 89.145%
20389851973

push

github

bramp
Add support for `ty` to the pyproject.toml.

13384 of 15007 relevant lines covered (89.19%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.53
/src/build_a_long/pdf_extract/validation/validation_test.py
1
"""Tests for validation module."""
2

3
from typing import Any
1✔
4

5
import pydantic
1✔
6

7
from build_a_long.pdf_extract.classifier import (
1✔
8
    BatchClassificationResult,
9
    Candidate,
10
    ClassificationResult,
11
    TextHistogram,
12
)
13
from build_a_long.pdf_extract.classifier.test_utils import TestScore
1✔
14
from build_a_long.pdf_extract.extractor import PageData
1✔
15
from build_a_long.pdf_extract.extractor.bbox import BBox
1✔
16
from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
17
    Background,
18
    CatalogContent,
19
    Divider,
20
    InstructionContent,
21
    Manual,
22
    Page,
23
    PageNumber,
24
    Part,
25
    PartCount,
26
    PartImage,
27
    PartsList,
28
    ProgressBar,
29
    ProgressBarBar,
30
    Step,
31
    StepNumber,
32
)
33

34
from .printer import print_validation
1✔
35
from .rules import (
1✔
36
    format_ranges,
37
    validate_catalog_coverage,
38
    validate_elements_within_page,
39
    validate_first_page_number,
40
    validate_missing_page_numbers,
41
    validate_no_divider_intersection,
42
    validate_page_number_sequence,
43
    validate_parts_list_has_parts,
44
    validate_parts_lists_no_overlap,
45
    validate_progress_bar_sequence,
46
    validate_step_sequence,
47
    validate_steps_have_parts,
48
    validate_steps_no_significant_overlap,
49
)
50
from .runner import validate_results
1✔
51
from .types import ValidationIssue, ValidationResult, ValidationSeverity
1✔
52

53

54
class TestValidationResult:
1✔
55
    """Tests for ValidationResult class."""
56

57
    def test_empty_result(self) -> None:
1✔
58
        """Test empty validation result."""
59
        result = ValidationResult()
1✔
60
        assert result.error_count == 0
1✔
61
        assert result.warning_count == 0
1✔
62
        assert result.info_count == 0
1✔
63
        assert not result.has_issues()
1✔
64

65
    def test_add_issue(self) -> None:
1✔
66
        """Test adding issues to result."""
67
        result = ValidationResult()
1✔
68
        result.add(
1✔
69
            ValidationIssue(
70
                severity=ValidationSeverity.ERROR,
71
                rule="test",
72
                message="test error",
73
            )
74
        )
75
        result.add(
1✔
76
            ValidationIssue(
77
                severity=ValidationSeverity.WARNING,
78
                rule="test",
79
                message="test warning",
80
            )
81
        )
82
        result.add(
1✔
83
            ValidationIssue(
84
                severity=ValidationSeverity.INFO,
85
                rule="test",
86
                message="test info",
87
            )
88
        )
89

90
        assert result.error_count == 1
1✔
91
        assert result.warning_count == 1
1✔
92
        assert result.info_count == 1
1✔
93
        assert result.has_issues()
1✔
94

95
    def test_frozen_issue(self) -> None:
1✔
96
        """Test that ValidationIssue is immutable."""
97

98
        issue = ValidationIssue(
1✔
99
            severity=ValidationSeverity.ERROR,
100
            rule="test",
101
            message="test",
102
        )
103
        # Should not be able to modify (Pydantic frozen model raises ValidationError)
104
        try:
1✔
105
            issue.message = "new message"  # type: ignore[misc]
1✔
106
            raise AssertionError("Expected frozen model to raise")
×
107
        except pydantic.ValidationError:
1✔
108
            pass  # Expected
1✔
109

110

111
class TestFormatRanges:
1✔
112
    """Tests for format_ranges helper function."""
113

114
    def test_empty_list(self) -> None:
1✔
115
        """Test empty list."""
116
        assert format_ranges([]) == ""
1✔
117

118
    def test_single_number(self) -> None:
1✔
119
        """Test single number."""
120
        assert format_ranges([5]) == "5"
1✔
121

122
    def test_consecutive_range(self) -> None:
1✔
123
        """Test consecutive numbers form a range."""
124
        assert format_ranges([1, 2, 3, 4, 5]) == "1-5"
1✔
125

126
    def test_separate_numbers(self) -> None:
1✔
127
        """Test non-consecutive numbers."""
128
        assert format_ranges([1, 3, 5]) == "1, 3, 5"
1✔
129

130
    def test_mixed_ranges(self) -> None:
1✔
131
        """Test mixed ranges and single numbers."""
132
        assert format_ranges([1, 2, 3, 5, 7, 8, 9]) == "1-3, 5, 7-9"
1✔
133

134
    def test_long_list_truncation(self) -> None:
1✔
135
        """Test that very long output is truncated."""
136
        # Create a list that would produce a very long string
137
        numbers = list(range(1, 200, 2))  # Odd numbers 1-199
1✔
138
        result = format_ranges(numbers)
1✔
139
        assert len(result) <= 100
1✔
140
        assert result.endswith("...")
1✔
141

142

143
class TestValidateMissingPageNumbers:
1✔
144
    """Tests for validate_missing_page_numbers rule."""
145

146
    def test_no_missing_pages(self) -> None:
1✔
147
        """Test when all pages have page numbers."""
148
        validation = ValidationResult()
1✔
149
        validate_missing_page_numbers(validation, [], 10)
1✔
150
        assert not validation.has_issues()
1✔
151

152
    def test_high_coverage(self) -> None:
1✔
153
        """Test >90% coverage produces INFO."""
154
        validation = ValidationResult()
1✔
155
        validate_missing_page_numbers(validation, [1], 20)  # 95% coverage
1✔
156
        assert validation.info_count == 1
1✔
157
        assert validation.issues[0].severity == ValidationSeverity.INFO
1✔
158

159
    def test_medium_coverage(self) -> None:
1✔
160
        """Test 50-90% coverage produces WARNING."""
161
        validation = ValidationResult()
1✔
162
        validate_missing_page_numbers(validation, [1, 2, 3], 10)  # 70% coverage
1✔
163
        assert validation.warning_count == 1
1✔
164

165
    def test_low_coverage(self) -> None:
1✔
166
        """Test <50% coverage produces ERROR."""
167
        validation = ValidationResult()
1✔
168
        validate_missing_page_numbers(validation, list(range(1, 8)), 10)  # 30% coverage
1✔
169
        assert validation.error_count == 1
1✔
170

171

172
class TestValidateStepSequence:
1✔
173
    """Tests for validate_step_sequence rule."""
174

175
    def test_empty_steps(self) -> None:
1✔
176
        """Test empty step list."""
177
        validation = ValidationResult()
1✔
178
        validate_step_sequence(validation, [])
1✔
179
        assert not validation.has_issues()
1✔
180

181
    def test_valid_sequence(self) -> None:
1✔
182
        """Test valid step sequence starting at 1."""
183
        validation = ValidationResult()
1✔
184
        validate_step_sequence(validation, [(1, 1), (2, 2), (3, 3)])
1✔
185
        assert not validation.has_issues()
1✔
186

187
    def test_duplicate_steps(self) -> None:
1✔
188
        """Test duplicate step numbers."""
189
        validation = ValidationResult()
1✔
190
        validate_step_sequence(validation, [(1, 1), (2, 1), (3, 2)])  # Step 1 twice
1✔
191
        # Should have warning about duplicates
192
        assert any(i.rule == "duplicate_steps" for i in validation.issues)
1✔
193

194
    def test_step_gaps(self) -> None:
1✔
195
        """Test gaps in step sequence."""
196
        validation = ValidationResult()
1✔
197
        validate_step_sequence(validation, [(1, 1), (2, 3)])  # Missing step 2
1✔
198
        assert any(i.rule == "step_gaps" for i in validation.issues)
1✔
199

200
    def test_step_not_starting_at_one(self) -> None:
1✔
201
        """Test sequence not starting at 1."""
202
        validation = ValidationResult()
1✔
203
        validate_step_sequence(validation, [(1, 5), (2, 6), (3, 7)])  # Starts at 5
1✔
204
        assert any(i.rule == "step_start" for i in validation.issues)
1✔
205

206

207
class TestValidateFirstPageNumber:
1✔
208
    """Tests for validate_first_page_number rule."""
209

210
    def test_no_page_numbers(self) -> None:
1✔
211
        """Test when no page numbers detected."""
212
        validation = ValidationResult()
1✔
213
        validate_first_page_number(validation, [])
1✔
214
        assert validation.error_count == 1
1✔
215
        assert validation.issues[0].rule == "no_page_numbers"
1✔
216

217
    def test_reasonable_first_page(self) -> None:
1✔
218
        """Test reasonable first page number."""
219
        validation = ValidationResult()
1✔
220
        validate_first_page_number(validation, [1, 2, 3])
1✔
221
        assert not validation.has_issues()
1✔
222

223
    def test_high_first_page(self) -> None:
1✔
224
        """Test high first page number."""
225
        validation = ValidationResult()
1✔
226
        validate_first_page_number(validation, [15, 16, 17])
1✔
227
        assert any(i.rule == "high_first_page" for i in validation.issues)
1✔
228

229

230
class TestValidatePageNumberSequence:
1✔
231
    """Tests for validate_page_number_sequence rule."""
232

233
    def test_single_page(self) -> None:
1✔
234
        """Test single page number."""
235
        validation = ValidationResult()
1✔
236
        validate_page_number_sequence(validation, [1])
1✔
237
        assert not validation.has_issues()
1✔
238

239
    def test_valid_sequence(self) -> None:
1✔
240
        """Test valid consecutive sequence."""
241
        validation = ValidationResult()
1✔
242
        validate_page_number_sequence(validation, [1, 2, 3, 4, 5])
1✔
243
        assert not validation.has_issues()
1✔
244

245
    def test_valid_sequence_starting_later(self) -> None:
1✔
246
        """Test valid consecutive sequence that doesn't start at 1.
247

248
        First few pages missing is OK (e.g., cover pages without page numbers).
249
        """
250
        validation = ValidationResult()
1✔
251
        validate_page_number_sequence(validation, [5, 6, 7, 8, 9])
1✔
252
        assert not validation.has_issues()
1✔
253

254
    def test_valid_sequence_ending_early(self) -> None:
1✔
255
        """Test valid consecutive sequence that might end before the last page.
256

257
        Last few pages missing is OK (e.g., back cover without page numbers).
258
        This tests the sequence is consecutive - we don't know total pages here.
259
        """
260
        validation = ValidationResult()
1✔
261
        # Sequence 10-14 is consecutive, even if there could be more pages
262
        validate_page_number_sequence(validation, [10, 11, 12, 13, 14])
1✔
263
        assert not validation.has_issues()
1✔
264

265
    def test_valid_sequence_starting_later_and_ending_early(self) -> None:
1✔
266
        """Test consecutive sequence with both start and end pages missing.
267

268
        Both first N and last M pages can be missing, as long as there are no
269
        gaps in the middle.
270
        """
271
        validation = ValidationResult()
1✔
272
        validate_page_number_sequence(validation, [5, 6, 7, 8, 9, 10])
1✔
273
        assert not validation.has_issues()
1✔
274

275
    def test_decreasing_sequence(self) -> None:
1✔
276
        """Test decreasing page numbers."""
277
        validation = ValidationResult()
1✔
278
        validate_page_number_sequence(validation, [1, 2, 5, 3, 4])  # Decreases at 3
1✔
279
        assert any(i.rule == "page_sequence" for i in validation.issues)
1✔
280

281
    def test_gap_in_middle(self) -> None:
1✔
282
        """Test gap in the middle of page numbers."""
283
        validation = ValidationResult()
1✔
284
        validate_page_number_sequence(validation, [1, 2, 5, 6])  # Gap: 2->5
1✔
285
        assert any(i.rule == "page_gaps" for i in validation.issues)
1✔
286
        # Should be a warning now
287
        gap_issue = next(i for i in validation.issues if i.rule == "page_gaps")
1✔
288
        assert gap_issue.severity == ValidationSeverity.WARNING
1✔
289

290
    def test_small_gap_not_allowed(self) -> None:
1✔
291
        """Test that even small gaps (>1) are flagged."""
292
        validation = ValidationResult()
1✔
293
        validate_page_number_sequence(validation, [1, 2, 4, 5])  # Gap: 2->4
1✔
294
        assert any(i.rule == "page_gaps" for i in validation.issues)
1✔
295

296

297
class TestValidateProgressBarSequence:
1✔
298
    """Tests for validate_progress_bar_sequence rule."""
299

300
    def test_empty_progress_bars(self) -> None:
1✔
301
        """Test empty progress bar list."""
302
        validation = ValidationResult()
1✔
303
        validate_progress_bar_sequence(validation, [])
1✔
304
        assert not validation.has_issues()
1✔
305

306
    def test_valid_sequence(self) -> None:
1✔
307
        """Test valid monotonically increasing sequence."""
308
        validation = ValidationResult()
1✔
309
        # (page, value) tuples
310
        validate_progress_bar_sequence(
1✔
311
            validation, [(1, 0.1), (2, 0.2), (3, 0.3), (4, 0.4)]
312
        )
313
        assert not validation.has_issues()
1✔
314

315
    def test_decreasing_sequence(self) -> None:
1✔
316
        """Test decreasing progress bar values."""
317
        validation = ValidationResult()
1✔
318
        validate_progress_bar_sequence(
1✔
319
            validation,
320
            [(1, 0.5), (2, 0.4), (3, 0.6)],  # Decreases at p.2
321
        )
322
        assert validation.warning_count == 1
1✔
323
        assert validation.issues[0].rule == "progress_bar_decrease"
1✔
324

325
    def test_consistent_increments(self) -> None:
1✔
326
        """Test consistent progress increments (steady rate)."""
327
        validation = ValidationResult()
1✔
328
        # Constant 0.1 increment
329
        validate_progress_bar_sequence(
1✔
330
            validation,
331
            [(1, 0.1), (2, 0.2), (3, 0.3), (4, 0.4), (5, 0.5), (6, 0.6)],
332
        )
333
        assert not validation.has_issues()
1✔
334

335
    def test_inconsistent_increments(self) -> None:
1✔
336
        """Test inconsistent progress increments (high variance)."""
337
        validation = ValidationResult()
1✔
338
        # Increments vary wildly: 0.01, 0.4, 0.01, 0.01, 0.01
339
        validate_progress_bar_sequence(
1✔
340
            validation,
341
            [(1, 0.1), (2, 0.11), (3, 0.51), (4, 0.52), (5, 0.53), (6, 0.54)],
342
        )
343
        assert any(i.rule == "progress_bar_inconsistent" for i in validation.issues)
1✔
344
        issue = next(
1✔
345
            i for i in validation.issues if i.rule == "progress_bar_inconsistent"
346
        )
347
        assert issue.severity == ValidationSeverity.INFO
1✔
348

349
    def test_not_enough_samples(self) -> None:
1✔
350
        """Test that consistency check is skipped for few samples."""
351
        validation = ValidationResult()
1✔
352
        # Highly inconsistent, but only 5 samples (needs >5)
353
        validate_progress_bar_sequence(
1✔
354
            validation,
355
            [(1, 0.1), (2, 0.11), (3, 0.51), (4, 0.52), (5, 0.53)],
356
        )
357
        # Should be ignored because there are fewer than 5 samples
358
        assert not validation.has_issues()
1✔
359

360

361
class TestValidateCatalogCoverage:
1✔
362
    """Tests for validate_catalog_coverage rule."""
363

364
    def _make_part_with_image(
1✔
365
        self,
366
        image_id: str | None = None,
367
        xref: int | None = None,
368
        digest: bytes | None = None,
369
    ) -> Part:
370
        """Create a Part with a diagram image ID, xref, and/or digest."""
371

372
        return Part(
1✔
373
            bbox=BBox(0, 0, 10, 10),
374
            count=PartCount(bbox=BBox(0, 0, 5, 5), count=1),
375
            diagram=PartImage(
376
                bbox=BBox(0, 0, 10, 10),
377
                image_id=image_id,
378
                xref=xref,
379
                digest=digest,
380
            ),
381
        )
382

383
    def _make_manual(
1✔
384
        self,
385
        instruction_parts_config: list[dict[str, Any]],
386
        catalog_parts_config: list[dict[str, Any]],
387
    ) -> Manual:
388
        """Create a Manual with specified parts.
389

390
        Args:
391
            instruction_parts_config: List of dicts with keys 'image_id', 'xref',
392
                'digest'
393
            catalog_parts_config: List of dicts with keys 'image_id', 'xref',
394
                'digest'
395
        """
396
        pages = []
1✔
397

398
        # Instruction page
399
        if instruction_parts_config:
1✔
400
            parts = [
1✔
401
                self._make_part_with_image(**cfg) for cfg in instruction_parts_config
402
            ]
403
            step = Step(
1✔
404
                bbox=BBox(0, 0, 100, 100),
405
                step_number=StepNumber(bbox=BBox(0, 0, 10, 10), value=1),
406
                parts_list=PartsList(bbox=BBox(0, 0, 50, 50), parts=parts),
407
            )
408
            pages.append(
1✔
409
                Page(
410
                    bbox=BBox(0, 0, 100, 100),
411
                    pdf_page_number=1,
412
                    page_number=PageNumber(bbox=BBox(90, 90, 100, 100), value=1),
413
                    categories={Page.PageType.INSTRUCTION},
414
                    instruction=InstructionContent(steps=[step]),
415
                )
416
            )
417

418
        # Catalog page
419
        if catalog_parts_config:
1✔
420
            parts = [self._make_part_with_image(**cfg) for cfg in catalog_parts_config]
1✔
421
            pages.append(
1✔
422
                Page(
423
                    bbox=BBox(0, 0, 100, 100),
424
                    pdf_page_number=2,
425
                    page_number=PageNumber(bbox=BBox(90, 90, 100, 100), value=2),
426
                    categories={Page.PageType.CATALOG},
427
                    catalog=CatalogContent(parts=parts),
428
                )
429
            )
430

431
        return Manual(pages=pages)
1✔
432

433
    def test_no_catalog_pages(self) -> None:
1✔
434
        """Test when no catalog pages are present."""
435
        manual = self._make_manual([{"xref": 1}], [])
1✔
436
        validation = ValidationResult()
1✔
437
        validate_catalog_coverage(validation, manual)
1✔
438
        assert not validation.has_issues()
1✔
439

440
    def test_no_instruction_parts(self) -> None:
1✔
441
        """Test when no instruction parts are found."""
442
        manual = self._make_manual([], [{"xref": 1}])
1✔
443
        validation = ValidationResult()
1✔
444
        validate_catalog_coverage(validation, manual)
1✔
445
        assert not validation.has_issues()
1✔
446

447
    def test_perfect_coverage_xref(self) -> None:
1✔
448
        """Test when all instruction parts are in catalog using xref."""
449
        manual = self._make_manual(
1✔
450
            [{"xref": 1}, {"xref": 2}],
451
            [{"xref": 1}, {"xref": 2}, {"xref": 3}],
452
        )
453
        validation = ValidationResult()
1✔
454
        validate_catalog_coverage(validation, manual)
1✔
455
        assert validation.info_count == 1
1✔
456
        assert "100.0%" in validation.issues[0].message
1✔
457

458
    def test_perfect_coverage_digest(self) -> None:
1✔
459
        """Test when all instruction parts are in catalog using digest."""
460
        manual = self._make_manual(
1✔
461
            [{"digest": b"a"}, {"digest": b"b"}],
462
            [{"digest": b"a"}, {"digest": b"b"}, {"digest": b"c"}],
463
        )
464
        validation = ValidationResult()
1✔
465
        validate_catalog_coverage(validation, manual)
1✔
466
        assert validation.info_count == 1
1✔
467
        assert "100.0%" in validation.issues[0].message
1✔
468

469
    def test_mixed_matching(self) -> None:
1✔
470
        """Test matching using both xref and digest."""
471
        manual = self._make_manual(
1✔
472
            [
473
                {"xref": 1},  # Matches by xref
474
                {"digest": b"b"},  # Matches by digest
475
                {"xref": 3, "digest": b"c"},  # Matches by xref (preferred)
476
            ],
477
            [
478
                {"xref": 1, "digest": b"x"},
479
                {"xref": 9, "digest": b"b"},
480
                {"xref": 3, "digest": b"z"},
481
            ],
482
        )
483
        validation = ValidationResult()
1✔
484
        validate_catalog_coverage(validation, manual)
1✔
485
        assert validation.info_count == 1
1✔
486
        assert "100.0%" in validation.issues[0].message
1✔
487

488
    def test_partial_coverage_experimental(self) -> None:
1✔
489
        """Test partial coverage with experimental flag (INFO)."""
490
        # 1 match (xref), 1 missing
491
        manual = self._make_manual(
1✔
492
            [{"xref": 1}, {"xref": 2}],
493
            [{"xref": 1}],
494
        )
495
        validation = ValidationResult()
1✔
496
        validate_catalog_coverage(validation, manual, experimental=True)
1✔
497

498
        # 1 INFO for coverage stat, 1 INFO for missing parts (experimental)
499
        assert validation.info_count == 2
1✔
500
        assert validation.warning_count == 0
1✔
501
        assert any(i.rule == "missing_from_catalog" for i in validation.issues)
1✔
502
        missing_issue = next(
1✔
503
            i for i in validation.issues if i.rule == "missing_from_catalog"
504
        )
505
        assert missing_issue.severity == ValidationSeverity.INFO
1✔
506
        assert "[EXPERIMENTAL]" in missing_issue.message
1✔
507
        assert missing_issue.details is not None
1✔
508
        assert "xref:2" in missing_issue.details
1✔
509

510
    def test_partial_coverage_strict(self) -> None:
1✔
511
        """Test partial coverage without experimental flag (WARNING)."""
512
        # 1 match, 1 missing
513
        manual = self._make_manual(
1✔
514
            [{"digest": b"a"}, {"digest": b"b"}],
515
            [{"digest": b"a"}],
516
        )
517
        validation = ValidationResult()
1✔
518
        validate_catalog_coverage(validation, manual, experimental=False)
1✔
519

520
        # 1 INFO for coverage stat, 1 WARNING for missing parts
521
        assert validation.info_count == 1
1✔
522
        assert validation.warning_count == 1
1✔
523
        assert any(i.rule == "missing_from_catalog" for i in validation.issues)
1✔
524
        missing_issue = next(
1✔
525
            i for i in validation.issues if i.rule == "missing_from_catalog"
526
        )
527
        assert missing_issue.severity == ValidationSeverity.WARNING
1✔
528
        assert "[EXPERIMENTAL]" not in missing_issue.message
1✔
529
        assert missing_issue.details is not None
1✔
530
        assert "digest:" in missing_issue.details  # Hex representation of b"b"
1✔
531

532
    def test_zero_coverage(self) -> None:
1✔
533
        """Test zero coverage (should not warn, assumes no image reuse)."""
534
        manual = self._make_manual([{"xref": 1}], [{"xref": 2}])
1✔
535
        validation = ValidationResult()
1✔
536
        validate_catalog_coverage(validation, manual)
1✔
537

538
        # Only stats info, no warning because coverage is 0%
539
        assert validation.info_count == 1
1✔
540
        assert validation.warning_count == 0
1✔
541
        assert "0.0%" in validation.issues[0].message
1✔
542

543

544
class TestValidateStepsHaveParts:
1✔
545
    """Tests for validate_steps_have_parts rule."""
546

547
    def test_all_steps_have_parts(self) -> None:
1✔
548
        """Test when all steps have parts."""
549
        validation = ValidationResult()
1✔
550
        validate_steps_have_parts(validation, [])
1✔
551
        assert not validation.has_issues()
1✔
552

553
    def test_some_steps_missing_parts(self) -> None:
1✔
554
        """Test some steps missing parts."""
555
        validation = ValidationResult()
1✔
556
        # (page, step_number) tuples
557
        validate_steps_have_parts(validation, [(1, 1), (3, 5), (5, 10)])
1✔
558
        assert validation.info_count == 1
1✔
559
        issue = validation.issues[0]
1✔
560
        assert issue.rule == "steps_without_parts"
1✔
561
        assert issue.pages == [1, 3, 5]
1✔
562
        assert issue.details is not None
1✔
563
        assert "step 1 (p.1)" in issue.details
1✔
564
        assert "step 5 (p.3)" in issue.details
1✔
565
        assert "step 10 (p.5)" in issue.details
1✔
566

567

568
def _make_page_data(page_num: int) -> PageData:
1✔
569
    """Create a minimal PageData for testing."""
570
    return PageData(
1✔
571
        page_number=page_num,
572
        bbox=BBox(0, 0, 100, 100),
573
        blocks=[],
574
    )
575

576

577
def _make_classification_result(
1✔
578
    page_data: PageData,
579
    page_number_val: int | None = None,
580
    step_numbers: list[int] | None = None,
581
    include_parts: bool = True,
582
) -> ClassificationResult:
583
    """Create a ClassificationResult with a Page for testing.
584

585
    Args:
586
        page_data: The PageData to associate
587
        page_number_val: The LEGO page number value (None for no page number)
588
        step_numbers: List of step numbers to include
589
        include_parts: Whether to include parts lists in steps
590
    """
591
    result = ClassificationResult(page_data=page_data)
1✔
592

593
    # Build the Page object
594
    page_num_elem = (
1✔
595
        PageNumber(bbox=BBox(0, 90, 10, 100), value=page_number_val)
596
        if page_number_val is not None
597
        else None
598
    )
599

600
    step_elems: list[Step] = []
1✔
601
    if step_numbers:
1✔
602
        for step_num in step_numbers:
1✔
603
            parts_list = None
1✔
604
            if include_parts:
1✔
605
                parts_list = PartsList(
1✔
606
                    bbox=BBox(0, 0, 20, 10),
607
                    parts=[
608
                        Part(
609
                            bbox=BBox(0, 0, 10, 10),
610
                            count=PartCount(bbox=BBox(0, 0, 5, 5), count=1),
611
                        )
612
                    ],
613
                )
614
            step_elems.append(
1✔
615
                Step(
616
                    bbox=BBox(0, 0, 80, 80),
617
                    step_number=StepNumber(bbox=BBox(0, 10, 10, 20), value=step_num),
618
                    parts_list=parts_list,
619
                )
620
            )
621

622
    page = Page(
1✔
623
        bbox=BBox(0, 0, 100, 100),
624
        pdf_page_number=page_data.page_number,
625
        page_number=page_num_elem,
626
        instruction=InstructionContent(steps=step_elems) if step_elems else None,
627
    )
628

629
    # Add a candidate for the page
630
    candidate = Candidate(
1✔
631
        label="page",
632
        source_blocks=[],
633
        bbox=page.bbox,
634
        score=1.0,
635
        score_details=TestScore(),
636
        constructed=page,
637
    )
638
    result.add_candidate(candidate)
1✔
639

640
    return result
1✔
641

642

643
class TestValidateResults:
1✔
644
    """Tests for the main validate_results function."""
645

646
    def test_perfect_document(self) -> None:
1✔
647
        """Test document with no issues."""
648
        pages = [_make_page_data(i) for i in range(1, 4)]
1✔
649
        results = [
1✔
650
            _make_classification_result(pages[0], page_number_val=1, step_numbers=[1]),
651
            _make_classification_result(pages[1], page_number_val=2, step_numbers=[2]),
652
            _make_classification_result(pages[2], page_number_val=3, step_numbers=[3]),
653
        ]
654
        batch_result = BatchClassificationResult(
1✔
655
            results=results, histogram=TextHistogram.empty()
656
        )
657

658
        validation = validate_results(batch_result)
1✔
659
        # No errors or warnings expected
660
        assert validation.error_count == 0
1✔
661
        assert validation.warning_count == 0
1✔
662

663
    def test_missing_page_numbers(self) -> None:
1✔
664
        """Test detection of missing page numbers."""
665
        pages = [_make_page_data(i) for i in range(1, 4)]
1✔
666
        results = [
1✔
667
            _make_classification_result(
668
                pages[0], page_number_val=None, step_numbers=[1]
669
            ),
670
            _make_classification_result(pages[1], page_number_val=2, step_numbers=[2]),
671
            _make_classification_result(
672
                pages[2], page_number_val=None, step_numbers=[3]
673
            ),
674
        ]
675
        batch_result = BatchClassificationResult(
1✔
676
            results=results, histogram=TextHistogram.empty()
677
        )
678

679
        validation = validate_results(batch_result)
1✔
680
        assert any(i.rule == "missing_page_numbers" for i in validation.issues)
1✔
681

682
    def test_step_sequence_issues(self) -> None:
1✔
683
        """Test detection of step sequence issues."""
684
        pages = [_make_page_data(i) for i in range(1, 4)]
1✔
685
        results = [
1✔
686
            _make_classification_result(pages[0], page_number_val=1, step_numbers=[1]),
687
            _make_classification_result(
688
                pages[1], page_number_val=2, step_numbers=[3]
689
            ),  # Skipped step 2
690
            _make_classification_result(pages[2], page_number_val=3, step_numbers=[4]),
691
        ]
692
        batch_result = BatchClassificationResult(
1✔
693
            results=results, histogram=TextHistogram.empty()
694
        )
695

696
        validation = validate_results(batch_result)
1✔
697
        assert any(i.rule == "step_gaps" for i in validation.issues)
1✔
698

699

700
class TestPrintValidation:
1✔
701
    """Tests for print_validation function."""
702

703
    def test_print_no_issues(self, capsys: object) -> None:
1✔
704
        """Test printing when no issues."""
705
        validation = ValidationResult()
1✔
706
        print_validation(validation)
1✔
707
        # Check output contains success message
708
        captured = capsys.readouterr()  # type: ignore[union-attr]
1✔
709
        assert "passed" in captured.out
1✔
710

711
    def test_print_with_issues(self, capsys: object) -> None:
1✔
712
        """Test printing with various issues."""
713
        validation = ValidationResult()
1✔
714
        validation.add(
1✔
715
            ValidationIssue(
716
                severity=ValidationSeverity.ERROR,
717
                rule="test_error",
718
                message="Test error message",
719
                pages=[1, 2, 3],
720
            )
721
        )
722
        validation.add(
1✔
723
            ValidationIssue(
724
                severity=ValidationSeverity.WARNING,
725
                rule="test_warning",
726
                message="Test warning message",
727
                details="Some details",
728
            )
729
        )
730

731
        print_validation(validation, use_color=False)
1✔
732
        captured = capsys.readouterr()  # type: ignore[union-attr]
1✔
733

734
        assert "test_error" in captured.out
1✔
735
        assert "Test error message" in captured.out
1✔
736
        assert "test_warning" in captured.out
1✔
737
        assert "Some details" in captured.out
1✔
738

739

740
# =============================================================================
741
# Domain Invariant Validation Rules Tests
742
# =============================================================================
743

744

745
def _make_page_with_steps(
1✔
746
    step_data: list[tuple[int, BBox, BBox | None]],  # (step_num, step_bbox, pl_bbox)
747
    page_number_val: int = 1,
748
    page_bbox: BBox | None = None,
749
) -> tuple[Page, PageData]:
750
    """Create a Page with steps for testing domain invariants.
751

752
    Args:
753
        step_data: List of (step_number, step_bbox, parts_list_bbox) tuples.
754
            If parts_list_bbox is None, no parts list is added.
755
        page_number_val: The page number value
756
        page_bbox: The page bounding box (default 0,0,100,100)
757

758
    Returns:
759
        Tuple of (Page, PageData)
760
    """
761
    if page_bbox is None:
1✔
762
        page_bbox = BBox(0, 0, 100, 100)
1✔
763

764
    page_data = PageData(
1✔
765
        page_number=1,
766
        bbox=page_bbox,
767
        blocks=[],
768
    )
769

770
    steps = []
1✔
771
    for step_num, step_bbox, pl_bbox in step_data:
1✔
772
        parts_list = None
1✔
773
        if pl_bbox is not None:
1✔
774
            # Create a parts list with one part
775
            part = Part(
1✔
776
                bbox=BBox(pl_bbox.x0, pl_bbox.y0, pl_bbox.x1, pl_bbox.y1 - 5),
777
                count=PartCount(
778
                    bbox=BBox(pl_bbox.x0, pl_bbox.y1 - 5, pl_bbox.x1, pl_bbox.y1),
779
                    count=1,
780
                ),
781
            )
782
            parts_list = PartsList(bbox=pl_bbox, parts=[part])
1✔
783

784
        step = Step(
1✔
785
            bbox=step_bbox,
786
            step_number=StepNumber(
787
                bbox=BBox(
788
                    step_bbox.x0, step_bbox.y0, step_bbox.x0 + 10, step_bbox.y0 + 10
789
                ),
790
                value=step_num,
791
            ),
792
            parts_list=parts_list,
793
        )
794
        steps.append(step)
1✔
795

796
    page = Page(
1✔
797
        bbox=page_bbox,
798
        pdf_page_number=1,
799
        page_number=PageNumber(bbox=BBox(90, 90, 100, 100), value=page_number_val),
800
        instruction=InstructionContent(steps=steps) if steps else None,
801
    )
802

803
    return page, page_data
1✔
804

805

806
class TestValidatePartsListHasParts:
1✔
807
    """Tests for validate_parts_list_has_parts rule."""
808

809
    def test_no_empty_parts_lists(self) -> None:
1✔
810
        """Test page with all parts lists having parts."""
811
        page, page_data = _make_page_with_steps(
1✔
812
            [
813
                (1, BBox(0, 0, 50, 50), BBox(40, 0, 50, 20)),
814
            ]
815
        )
816
        validation = ValidationResult()
1✔
817
        validate_parts_list_has_parts(validation, page, page_data)
1✔
818
        assert not validation.has_issues()
1✔
819

820
    def test_empty_parts_list(self) -> None:
1✔
821
        """Test detection of empty parts list."""
822
        page, page_data = _make_page_with_steps(
1✔
823
            [
824
                (1, BBox(0, 0, 50, 50), BBox(40, 0, 50, 20)),
825
            ]
826
        )
827
        # Manually empty the parts list
828
        assert page.instruction is not None
1✔
829
        page.instruction.steps[0].parts_list.parts = []  # type: ignore[union-attr]
1✔
830

831
        validation = ValidationResult()
1✔
832
        validate_parts_list_has_parts(validation, page, page_data)
1✔
833
        assert validation.warning_count == 1
1✔
834
        assert validation.issues[0].rule == "empty_parts_list"
1✔
835

836

837
class TestValidatePartsListsNoOverlap:
1✔
838
    """Tests for validate_parts_lists_no_overlap rule."""
839

840
    def test_non_overlapping_parts_lists(self) -> None:
1✔
841
        """Test page with non-overlapping parts lists."""
842
        page, page_data = _make_page_with_steps(
1✔
843
            [
844
                (1, BBox(0, 0, 45, 50), BBox(35, 0, 45, 20)),
845
                (2, BBox(55, 0, 100, 50), BBox(90, 0, 100, 20)),
846
            ]
847
        )
848
        validation = ValidationResult()
1✔
849
        validate_parts_lists_no_overlap(validation, page, page_data)
1✔
850
        assert not validation.has_issues()
1✔
851

852
    def test_overlapping_parts_lists(self) -> None:
1✔
853
        """Test detection of overlapping parts lists."""
854
        page, page_data = _make_page_with_steps(
1✔
855
            [
856
                (1, BBox(0, 0, 60, 50), BBox(40, 0, 60, 20)),
857
                (2, BBox(40, 0, 100, 50), BBox(40, 0, 60, 20)),  # Same bbox!
858
            ]
859
        )
860
        validation = ValidationResult()
1✔
861
        validate_parts_lists_no_overlap(validation, page, page_data)
1✔
862
        assert validation.error_count == 1
1✔
863
        assert validation.issues[0].rule == "overlapping_parts_lists"
1✔
864

865

866
class TestValidateStepsNoSignificantOverlap:
1✔
867
    """Tests for validate_steps_no_significant_overlap rule."""
868

869
    def test_non_overlapping_steps(self) -> None:
1✔
870
        """Test page with non-overlapping steps."""
871
        page, page_data = _make_page_with_steps(
1✔
872
            [
873
                (1, BBox(0, 0, 45, 50), None),
874
                (2, BBox(55, 0, 100, 50), None),
875
            ]
876
        )
877
        validation = ValidationResult()
1✔
878
        validate_steps_no_significant_overlap(validation, page, page_data)
1✔
879
        assert not validation.has_issues()
1✔
880

881
    def test_significantly_overlapping_steps(self) -> None:
1✔
882
        """Test detection of significantly overlapping steps."""
883
        page, page_data = _make_page_with_steps(
1✔
884
            [
885
                (1, BBox(0, 0, 80, 50), None),
886
                (2, BBox(20, 0, 100, 50), None),  # 60% overlap
887
            ]
888
        )
889
        validation = ValidationResult()
1✔
890
        validate_steps_no_significant_overlap(
1✔
891
            validation, page, page_data, overlap_threshold=0.05
892
        )
893
        assert validation.warning_count == 1
1✔
894
        assert validation.issues[0].rule == "overlapping_steps"
1✔
895

896
    def test_minor_overlap_allowed(self) -> None:
1✔
897
        """Test that minor overlap below threshold is allowed."""
898
        page, page_data = _make_page_with_steps(
1✔
899
            [
900
                (1, BBox(0, 0, 51, 50), None),
901
                (2, BBox(50, 0, 100, 50), None),  # 1px overlap
902
            ]
903
        )
904
        validation = ValidationResult()
1✔
905
        validate_steps_no_significant_overlap(
1✔
906
            validation, page, page_data, overlap_threshold=0.05
907
        )
908
        assert not validation.has_issues()
1✔
909

910

911
class TestValidateElementsWithinPage:
1✔
912
    """Tests for validate_elements_within_page rule."""
913

914
    def test_elements_within_bounds(self) -> None:
1✔
915
        """Test page with all elements within bounds."""
916
        page, page_data = _make_page_with_steps(
1✔
917
            [
918
                (1, BBox(10, 10, 90, 90), BBox(70, 10, 90, 30)),
919
            ]
920
        )
921
        validation = ValidationResult()
1✔
922
        validate_elements_within_page(validation, page, page_data)
1✔
923
        assert not validation.has_issues()
1✔
924

925
    def test_element_outside_bounds(self) -> None:
1✔
926
        """Test detection of element outside page bounds."""
927
        page, page_data = _make_page_with_steps(
1✔
928
            [
929
                (1, BBox(10, 10, 110, 90), None),  # Extends past right edge
930
            ]
931
        )
932
        validation = ValidationResult()
1✔
933
        validate_elements_within_page(validation, page, page_data)
1✔
934
        assert validation.error_count >= 1
1✔
935
        assert any(i.rule == "element_outside_page" for i in validation.issues)
1✔
936

937

938
class TestValidateNoDividerIntersection:
1✔
939
    """Tests for validate_no_divider_intersection rule."""
940

941
    def _make_page_with_divider(
1✔
942
        self,
943
        divider_bbox: BBox,
944
        element_bbox: BBox,
945
        element_type: str = "Step",
946
    ) -> tuple[Page, PageData]:
947
        """Create a page with a divider and one other element."""
948

949
        page_bbox = BBox(0, 0, 100, 100)
1✔
950
        page_data = PageData(page_number=1, bbox=page_bbox, blocks=[])
1✔
951

952
        divider = Divider(bbox=divider_bbox, orientation=Divider.Orientation.VERTICAL)
1✔
953

954
        element: Any
955
        if element_type == "Step":
1✔
956
            element = Step(
1✔
957
                bbox=element_bbox,
958
                step_number=StepNumber(bbox=element_bbox, value=1),
959
            )
960
            steps = [element]
1✔
961
            background = None
1✔
962
            progress_bar = None
1✔
963
        elif element_type == "Background":
1✔
964
            element = Background(bbox=element_bbox)
1✔
965
            steps = []
1✔
966
            background = element
1✔
967
            progress_bar = None
1✔
968
        elif element_type == "ProgressBar":
1✔
969
            bar = ProgressBarBar(bbox=element_bbox)
1✔
970
            element = ProgressBar(bbox=element_bbox, full_width=100, bar=bar)
1✔
971
            steps = []
1✔
972
            background = None
1✔
973
            progress_bar = element
1✔
974
        else:
975
            raise ValueError(f"Unknown element type: {element_type}")
×
976

977
        page = Page(
1✔
978
            bbox=page_bbox,
979
            pdf_page_number=1,
980
            dividers=[divider],
981
            instruction=InstructionContent(steps=steps) if steps else None,
982
            background=background,
983
            progress_bar=progress_bar,
984
        )
985

986
        return page, page_data
1✔
987

988
    def test_no_dividers(self) -> None:
1✔
989
        """Test checking a page with no dividers."""
990
        page, page_data = _make_page_with_steps([(1, BBox(0, 0, 10, 10), None)])
1✔
991
        validation = ValidationResult()
1✔
992
        validate_no_divider_intersection(validation, page, page_data)
1✔
993
        assert not validation.has_issues()
1✔
994

995
    def test_no_intersection(self) -> None:
1✔
996
        """Test element not intersecting divider."""
997
        page, page_data = self._make_page_with_divider(
1✔
998
            divider_bbox=BBox(50, 0, 51, 100),  # Vertical line at x=50
999
            element_bbox=BBox(0, 0, 40, 40),  # Left side
1000
        )
1001
        validation = ValidationResult()
1✔
1002
        validate_no_divider_intersection(validation, page, page_data)
1✔
1003
        assert not validation.has_issues()
1✔
1004

1005
    def test_intersection(self) -> None:
1✔
1006
        """Test element intersecting divider."""
1007
        page, page_data = self._make_page_with_divider(
1✔
1008
            divider_bbox=BBox(50, 0, 51, 100),  # Vertical line at x=50
1009
            element_bbox=BBox(40, 0, 60, 40),  # Crosses x=50
1010
        )
1011
        validation = ValidationResult()
1✔
1012
        validate_no_divider_intersection(validation, page, page_data)
1✔
1013
        assert validation.warning_count >= 1
1✔
1014
        assert any(i.rule == "divider_intersection" for i in validation.issues)
1✔
1015

1016
    def test_excluded_elements_ignored(self) -> None:
1✔
1017
        """Test that excluded elements (Background, ProgressBar) are ignored."""
1018
        # Test Background intersection
1019
        page, page_data = self._make_page_with_divider(
1✔
1020
            divider_bbox=BBox(50, 0, 51, 100),
1021
            element_bbox=BBox(0, 0, 100, 100),  # Full page background
1022
            element_type="Background",
1023
        )
1024
        validation = ValidationResult()
1✔
1025
        validate_no_divider_intersection(validation, page, page_data)
1✔
1026
        assert not validation.has_issues()
1✔
1027

1028
        # Test ProgressBar intersection
1029
        page, page_data = self._make_page_with_divider(
1✔
1030
            divider_bbox=BBox(50, 0, 51, 100),
1031
            element_bbox=BBox(0, 90, 100, 100),  # Bottom bar crossing divider
1032
            element_type="ProgressBar",
1033
        )
1034
        validate_no_divider_intersection(validation, page, page_data)
1✔
1035
        assert not validation.has_issues()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc