• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 20400711546

20 Dec 2025 10:09PM UTC coverage: 89.367% (+0.006%) from 89.361%
20400711546

push

github

bramp
docs: Add comprehensive Classifier best practices documentation

- Add detailed docstrings to Classifier and RuleBasedClassifier classes
  covering all aspects of writing robust classifiers
- Document scoring phase: API access rules, Score object design,
  intrinsic vs relationship-based scoring
- Document build phase: source block rules, exception handling,
  construction patterns
- Document build_all(): when to use for global coordination
- Add complete code examples for atomic and composite patterns
- Fix DESIGN.md contradiction about Score objects storing candidates
- Update README.md and DESIGN.md to reference class docstrings as
  single source of truth
- Add recommendations to use RuleBasedClassifier for atomic classifiers

This consolidates documentation to reduce duplication and provides
clear guidelines for both humans and AI agents writing new classifiers.

13708 of 15339 relevant lines covered (89.37%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.14
/src/build_a_long/pdf_extract/classifier/steps/step_count_classifier.py
1
"""
2
Step count classifier.
3

4
Purpose
5
-------
6
Detect step-count text like "2x" that appears in substep callout boxes.
7
These are similar to part counts but use a larger font size (typically 16pt),
8
between part count size and step number size.
9

10
Debugging
11
---------
12
Enable DEBUG logs with LOG_LEVEL=DEBUG.
13
"""
14

15
import logging
1✔
16
from collections.abc import Sequence
1✔
17
from typing import ClassVar
1✔
18

19
from build_a_long.pdf_extract.classifier.candidate import Candidate
1✔
20
from build_a_long.pdf_extract.classifier.classification_result import (
1✔
21
    ClassificationResult,
22
)
23
from build_a_long.pdf_extract.classifier.rule_based_classifier import (
1✔
24
    RuleBasedClassifier,
25
)
26
from build_a_long.pdf_extract.classifier.rules import (
1✔
27
    FontSizeRangeRule,
28
    IsInstanceFilter,
29
    PartCountTextRule,
30
    Rule,
31
)
32
from build_a_long.pdf_extract.classifier.rules.scale import LinearScale
1✔
33
from build_a_long.pdf_extract.classifier.text import (
1✔
34
    extract_part_count_value,
35
)
36
from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
37
    StepCount,
38
)
39
from build_a_long.pdf_extract.extractor.page_blocks import Text
1✔
40

41
log = logging.getLogger(__name__)
1✔
42

43

44
class StepCountClassifier(RuleBasedClassifier):
1✔
45
    """Classifier for step counts (substep counts like "2x").
46

47
    These are count labels that appear inside substep callout boxes,
48
    indicating how many times to build the sub-assembly.
49
    They use a font size between part counts and step numbers.
50
    """
51

52
    output: ClassVar[str] = "step_count"
1✔
53
    requires: ClassVar[frozenset[str]] = frozenset()
1✔
54

55
    @property
1✔
56
    def effects_margin(self) -> float | None:
1✔
57
        return 2.0
1✔
58

59
    @property
1✔
60
    def min_score(self) -> float:
1✔
61
        return self.config.step_count.min_score
1✔
62

63
    @property
1✔
64
    def rules(self) -> Sequence[Rule]:
1✔
65
        config = self.config
1✔
66
        step_count_config = config.step_count
1✔
67
        hints = config.font_size_hints
1✔
68

69
        return [
1✔
70
            # Must be text
71
            IsInstanceFilter(Text),
72
            # Check if text matches count pattern (e.g., "2x", "4x")
73
            PartCountTextRule(
74
                weight=step_count_config.text_weight,
75
                name="text_score",
76
                required=True,
77
            ),
78
            # Score font size: should be >= part_count_size and <= step_number_size
79
            # 0.7 within tolerance of min, 1.0 above min+tolerance, 0.0 outside range
80
            FontSizeRangeRule(
81
                scale=LinearScale(
82
                    {
83
                        (hints.part_count_size or 10.0) - 1.0: 0.0,
84
                        hints.part_count_size or 10.0: 0.7,
85
                        (hints.part_count_size or 10.0) + 1.0: 1.0,
86
                        (hints.step_number_size or 20.0) + 1.0: 1.0,
87
                        (hints.step_number_size or 20.0) + 2.0: 0.0,
88
                    }
89
                ),
90
                weight=step_count_config.font_size_weight,
91
                name="font_size_score",
92
            ),
93
        ]
94

95
    def build(self, candidate: Candidate, result: ClassificationResult) -> StepCount:
1✔
96
        """Construct a StepCount element from a candidate.
97

98
        The candidate may include additional source blocks (e.g., text outline
99
        effects) beyond the primary Text block.
100
        """
101
        # Get the primary text block (first in source_blocks)
102
        assert len(candidate.source_blocks) >= 1
1✔
103
        block = candidate.source_blocks[0]
1✔
104
        assert isinstance(block, Text)
1✔
105

106
        # Parse the count value
107
        value = extract_part_count_value(block.text)
1✔
108
        if value is None:
1✔
109
            raise ValueError(f"Could not parse step count from text: '{block.text}'")
×
110

111
        # Use candidate.bbox which is the union of all source blocks
112
        return StepCount(count=value, bbox=candidate.bbox)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc