• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 19521244091

15 Nov 2025 02:10AM UTC coverage: 90.833% (-0.4%) from 91.217%
19521244091

push

github

bramp
refactor: remove unused code and simplify domain invariant tests

Removed approximately 220 lines of unused/redundant code from classifier tests:

classifier_rules_test.py:
- Removed ClassifiedPage wrapper class (~110 lines) - never instantiated
- Removed helper functions (_parts_lists, _part_images, _part_counts,
  _print_label_counts) - never called
- Cleaned up unused imports (defaultdict, Block, ClassificationResult)
- Updated docstring to reflect remaining test coverage

domain_invariants_test.py:
- Simplified all 4 tests to use result.page property directly
- Replaced verbose 6-line get_candidates() pattern with simple property access
- Removed redundant isinstance(page, Page) assertions (~48 lines total)
- Tests now more clearly express intent: validate Page/PartsList/Part objects

All tests continue to pass. No functionality was lost.

4 of 4 new or added lines in 2 files covered. (100.0%)

151 existing lines in 7 files now uncovered.

4994 of 5498 relevant lines covered (90.83%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.44
/src/build_a_long/pdf_extract/classifier/classifier_rules_test.py
1
"""Rule-based tests over real fixtures for the PDF element classifier.
2

3
This suite validates high-level invariants that must hold after classification.
4

5
Rules covered:
6
- No labeled element should be marked as deleted.
7
- Each element has at most one winner candidate.
8

9
Real fixture(s) live under this package's fixtures/ directory.
10
"""
11

12
import logging
1✔
13

14
import pytest
1✔
15

16
from build_a_long.pdf_extract.classifier import classify_elements
1✔
17
from build_a_long.pdf_extract.extractor import ExtractionResult, PageData
1✔
18
from build_a_long.pdf_extract.fixtures import FIXTURES_DIR, RAW_FIXTURE_FILES
1✔
19

20
log = logging.getLogger(__name__)
1✔
21

22

23
def _load_pages_from_fixture(fixture_file: str) -> list[PageData]:
1✔
24
    """Load all pages from a fixture file.
25

26
    Args:
27
        fixture_file: Name of the fixture file (e.g., '6509377_page_010_raw.json')
28

29
    Returns:
30
        All pages from the extraction result
31

32
    Raises:
33
        ValueError: If the fixture contains no pages
34
    """
35
    fixture_path = FIXTURES_DIR / fixture_file
1✔
36
    extraction: ExtractionResult = ExtractionResult.model_validate_json(
1✔
37
        fixture_path.read_text()
38
    )  # type: ignore[assignment]
39

40
    if not extraction.pages:
1✔
UNCOV
41
        raise ValueError(f"No pages found in {fixture_file}")
×
42

43
    return extraction.pages
1✔
44

45

46
class TestClassifierRules:
1✔
47
    """End-to-end rules that must hold on real pages after classification."""
48

49
    @pytest.mark.parametrize("fixture_file", RAW_FIXTURE_FILES)
1✔
50
    def test_no_labeled_element_is_deleted(self, fixture_file: str) -> None:
1✔
51
        """No element with a label should be marked as deleted.
52

53
        If an element has been classified with a label, it should not be deleted.
54
        This ensures that the classification and deletion logic don't conflict.
55
        """
56
        pages = _load_pages_from_fixture(fixture_file)
1✔
57

58
        for page_idx, page in enumerate(pages):
1✔
59
            # Run the full classification pipeline on the page
60
            result = classify_elements(page)
1✔
61

62
            # Find all elements that are both labeled and deleted
63
            labeled_and_deleted = []
1✔
64
            for elem in page.blocks:
1✔
65
                if result.get_label(elem) is not None and result.is_removed(elem):
1✔
UNCOV
66
                    labeled_and_deleted.append(elem)
×
67

68
            if labeled_and_deleted:
1✔
UNCOV
69
                log.error(
×
70
                    f"Found {len(labeled_and_deleted)} labeled elements "
71
                    f"that are deleted:"
72
                )
UNCOV
73
                for elem in labeled_and_deleted:
×
UNCOV
74
                    log.error(
×
75
                        f"  - {result.get_label(elem)} id:{elem.id} "
76
                        f"bbox:{elem.bbox} [DELETED]"
77
                    )
78

79
            assert len(labeled_and_deleted) == 0, (
1✔
80
                f"Found {len(labeled_and_deleted)} labeled elements that are "
81
                f"deleted in {fixture_file} page {page_idx}. "
82
                f"Labeled elements should not be deleted."
83
            )
84

85
    @pytest.mark.parametrize("fixture_file", RAW_FIXTURE_FILES)
1✔
86
    def test_each_element_has_at_most_one_winner(self, fixture_file: str) -> None:
1✔
87
        """Each element should have at most one winner candidate across all labels.
88

89
        An element can have multiple candidates across different labels, but only
90
        one of them should be marked as a winner. This ensures classification
91
        decisions are unambiguous.
92
        """
93
        pages = _load_pages_from_fixture(fixture_file)
1✔
94

95
        for page_idx, page in enumerate(pages):
1✔
96
            # Run the full classification pipeline on the page
97
            result = classify_elements(page)
1✔
98

99
            # Track which blocks have won, and for which label
100
            block_to_winning_label: dict[int, str] = {}
1✔
101

102
            # Check all candidates across all labels
103
            all_candidates = result.get_all_candidates()
1✔
104
            for label, candidates in all_candidates.items():
1✔
105
                for candidate in candidates:
1✔
106
                    if not candidate.is_winner:
1✔
107
                        continue
1✔
108

109
                    # Skip synthetic candidates (no source block)
110
                    if candidate.source_block is None:
1✔
111
                        continue
1✔
112

113
                    block_id = candidate.source_block.id
1✔
114

115
                    # Check if this block already has a winner
116
                    if block_id in block_to_winning_label:
1✔
UNCOV
117
                        existing_label = block_to_winning_label[block_id]
×
UNCOV
118
                        pytest.fail(
×
119
                            f"Block {block_id} in {fixture_file} page {page_idx} "
120
                            f"has multiple winner candidates: '{existing_label}' "
121
                            f"and '{label}'. Each block should have at most one winner."
122
                        )
123

124
                    block_to_winning_label[block_id] = label
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc