• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 19995046189

06 Dec 2025 10:18PM UTC coverage: 90.506% (+0.09%) from 90.421%
19995046189

push

github

bramp
test: regenerate golden files for step classifier refactoring

10525 of 11629 relevant lines covered (90.51%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.85
/src/build_a_long/pdf_extract/classifier/classification_result.py
1
"""ClassificationResult class for single page classification."""
2

3
from __future__ import annotations
1✔
4

5
import logging
1✔
6
from typing import TYPE_CHECKING, cast
1✔
7

8
from pydantic import BaseModel, Field, PrivateAttr, model_validator
1✔
9

10
from build_a_long.pdf_extract.classifier.candidate import Candidate
1✔
11
from build_a_long.pdf_extract.classifier.removal_reason import RemovalReason
1✔
12
from build_a_long.pdf_extract.extractor.extractor import PageData
1✔
13
from build_a_long.pdf_extract.extractor.lego_page_elements import (
1✔
14
    LegoPageElements,
15
    Page,
16
)
17
from build_a_long.pdf_extract.extractor.page_blocks import Blocks
1✔
18

19
if TYPE_CHECKING:
20
    from build_a_long.pdf_extract.classifier.label_classifier import LabelClassifier
21

22
log = logging.getLogger(__name__)
1✔
23

24
# Score key can be either a single Block or a tuple of Blocks (for pairings)
25
ScoreKey = Blocks | tuple[Blocks, ...]
1✔
26

27

28
class CandidateFailedError(Exception):
1✔
29
    """Raised when a candidate cannot be built due to a failure.
30

31
    This exception carries information about which candidate failed,
32
    allowing callers to potentially create replacement candidates and retry.
33
    """
34

35
    def __init__(self, candidate: Candidate, message: str):
1✔
36
        super().__init__(message)
1✔
37
        self.candidate = candidate
1✔
38

39

40
class _BuildSnapshot(BaseModel):
1✔
41
    """Snapshot of candidate and consumed block state for rollback.
42

43
    This is used to implement transactional semantics in build():
44
    if a classifier build fails, we can restore the state as if
45
    the build never started.
46
    """
47

48
    model_config = {"frozen": True}
1✔
49

50
    # Map candidate id -> (constructed value, failure_reason)
51
    candidate_states: dict[int, tuple[LegoPageElements | None, str | None]]
1✔
52
    # Set of consumed block IDs
53
    consumed_blocks: set[int]
1✔
54

55

56
class ClassificationResult(BaseModel):
1✔
57
    """Result of classifying a single page.
58

59
    This class stores both the results and intermediate artifacts for a page
60
    classification. It provides structured access to:
61
    - Labels assigned to blocks
62
    - LegoPageElements constructed from blocks
63
    - Removal reasons for filtered blocks
64
    - All candidates considered (including rejected ones)
65

66
    The use of dictionaries keyed by block IDs (int) instead of Block objects
67
    ensures JSON serializability and consistent equality semantics.
68

69
    # TODO: Consider refactoring to separate DAO (Data Access Object) representation
70
    # from the business logic. The public fields below are used for serialization
71
    # but external code should prefer using the accessor methods to maintain
72
    # encapsulation and allow future refactoring.
73

74
    External code should use the accessor methods rather than accessing these
75
    fields directly to maintain encapsulation.
76
    """
77

78
    page_data: PageData
1✔
79
    """The original page data being classified"""
1✔
80

81
    skipped_reason: str | None = None
1✔
82
    """If set, classification was skipped for this page.
1✔
83
    
84
    This is used for pages that cannot be reasonably classified, such as:
85
    - Pages with too many blocks (e.g., >1000 vector drawings)
86
    - Info/inventory pages where each character is a separate vector
87
    
88
    When set, most classification results will be empty.
89
    """
90

91
    # TODO Do we need this field? Can we remove it?
92
    warnings: list[str] = Field(default_factory=list)
1✔
93
    """Warning messages generated during classification.
1✔
94
    
95
    Public for serialization. Prefer using add_warning() and get_warnings() methods.
96
    """
97

98
    removal_reasons: dict[int, RemovalReason] = Field(default_factory=dict)
1✔
99
    """Maps block IDs (block.id, not id(block)) to the reason they were removed.
1✔
100
    
101
    Keys are block IDs (int) instead of Block objects to ensure JSON serializability
102
    and consistency with constructed_elements.
103
    
104
    Public for serialization. Prefer using accessor methods.
105
    """
106

107
    candidates: dict[str, list[Candidate]] = Field(default_factory=dict)
1✔
108
    """Maps label names to lists of all candidates considered for that label.
1✔
109
    
110
    Each candidate includes:
111
    - The source element
112
    - Its score and score details
113
    - The constructed LegoPageElement (if successful)
114
    - Failure reason (if construction failed)
115
    
116
    This enables:
117
    - Re-evaluation with hints (exclude specific candidates)
118
    - Debugging (see why each candidate won/lost)
119
    - UI support (show users alternatives)
120
    
121
    Public for serialization. Prefer using get_* accessor methods.
122
    """
123

124
    _classifiers: dict[str, LabelClassifier] = PrivateAttr(default_factory=dict)
1✔
125
    _consumed_blocks: set[int] = PrivateAttr(default_factory=set)
1✔
126

127
    @model_validator(mode="after")
1✔
128
    def validate_unique_block_ids(self) -> ClassificationResult:
1✔
129
        """Validate that all block IDs in page_data are unique.
130

131
        Blocks must have unique IDs.
132
        Note: Blocks with IDs can be tracked in removal_reasons
133
        (which require block.id as keys for JSON serializability).
134
        """
135
        # Validate unique IDs
136
        block_ids = [b.id for b in self.page_data.blocks]
1✔
137
        if len(block_ids) != len(set(block_ids)):
1✔
138
            duplicates = [id_ for id_ in block_ids if block_ids.count(id_) > 1]
1✔
139
            raise ValueError(
1✔
140
                f"PageData blocks must have unique IDs. "
141
                f"Found duplicates: {set(duplicates)}"
142
            )
143
        return self
1✔
144

145
    def _register_classifier(self, label: str, classifier: LabelClassifier) -> None:
1✔
146
        """Register a classifier for a specific label.
147

148
        This is called automatically by LabelClassifier.score() and should not
149
        be called directly by external code.
150
        """
151
        self._classifiers[label] = classifier
1✔
152

153
    def build(self, candidate: Candidate) -> LegoPageElements:
1✔
154
        """Construct a candidate using the registered classifier.
155

156
        This is the entry point for top-down construction. If the build fails,
157
        all changes to candidate states and consumed blocks are automatically
158
        rolled back, ensuring transactional semantics.
159

160
        If a nested candidate fails due to conflicts, this method will attempt
161
        to create replacement candidates and retry the build.
162
        """
163
        if candidate.constructed:
1✔
164
            return candidate.constructed
1✔
165

166
        if candidate.failure_reason:
1✔
167
            raise CandidateFailedError(
1✔
168
                candidate, f"Candidate failed: {candidate.failure_reason}"
169
            )
170

171
        # Check if any source block is already consumed
172
        # TODO Do we need the following? As _fail_conflicting_candidates should
173
        # be setting failure reasons already.
174
        for block in candidate.source_blocks:
1✔
175
            if block.id in self._consumed_blocks:
1✔
176
                # Find who consumed it (for better error message)
177
                # This is expensive but only happens on failure
178
                winner_label = "unknown"
×
179
                for _label, cat_candidates in self.candidates.items():
×
180
                    for c in cat_candidates:
×
181
                        if c.constructed and any(
×
182
                            b.id == block.id for b in c.source_blocks
183
                        ):
184
                            winner_label = _label
×
185
                            break
×
186

187
                failure_msg = f"Block {block.id} already consumed by '{winner_label}'"
×
188
                candidate.failure_reason = failure_msg
×
189
                raise CandidateFailedError(candidate, failure_msg)
×
190

191
        classifier = self._classifiers.get(candidate.label)
1✔
192
        if not classifier:
1✔
193
            raise ValueError(f"No classifier registered for label '{candidate.label}'")
×
194

195
        # Take snapshot before building for automatic rollback on failure
196
        snapshot = self._take_snapshot()
1✔
197

198
        try:
1✔
199
            element = classifier.build(candidate, self)
1✔
200
            candidate.constructed = element
1✔
201

202
            # Mark blocks as consumed
203
            log.debug(
1✔
204
                "[build] Marking %d blocks as consumed for '%s' at %s: %s",
205
                len(candidate.source_blocks),
206
                candidate.label,
207
                candidate.bbox,
208
                [b.id for b in candidate.source_blocks],
209
            )
210
            for block in candidate.source_blocks:
1✔
211
                self._consumed_blocks.add(block.id)
1✔
212

213
            # Fail other candidates that use these blocks
214
            self._fail_conflicting_candidates(candidate)
1✔
215

216
            return element
1✔
217
        except CandidateFailedError as e:
1✔
218
            # A nested candidate failed - rollback and check if we can retry
219
            self._restore_snapshot(snapshot)
1✔
220

221
            # If the failed candidate has a "Replaced by reduced candidate" reason,
222
            # we may be able to find the replacement and the caller can retry
223
            failed_candidate = e.candidate
1✔
224
            if (
1✔
225
                failed_candidate.failure_reason
226
                and "Replaced by reduced candidate" in failed_candidate.failure_reason
227
            ):
228
                # The failed candidate was replaced - caller should retry with
229
                # new candidates available
230
                log.debug(
×
231
                    "[build] Nested candidate %s (%s) was replaced, "
232
                    "propagating for retry",
233
                    failed_candidate.label,
234
                    failed_candidate.bbox,
235
                )
236
            raise
1✔
237
        except Exception:
1✔
238
            # Rollback all changes made during this build
239
            self._restore_snapshot(snapshot)
1✔
240
            raise
1✔
241

242
    def _take_snapshot(self) -> _BuildSnapshot:
1✔
243
        """Take a snapshot of all candidate states and consumed blocks."""
244
        candidate_states = {}
1✔
245
        for candidates in self.candidates.values():
1✔
246
            for c in candidates:
1✔
247
                candidate_states[id(c)] = (c.constructed, c.failure_reason)
1✔
248

249
        return _BuildSnapshot(
1✔
250
            candidate_states=candidate_states,
251
            consumed_blocks=self._consumed_blocks.copy(),
252
        )
253

254
    def _restore_snapshot(self, snapshot: _BuildSnapshot) -> None:
1✔
255
        """Restore candidate states and consumed blocks from a snapshot."""
256
        # Restore candidate states
257
        for candidates in self.candidates.values():
1✔
258
            for c in candidates:
1✔
259
                cid = id(c)
1✔
260
                if cid in snapshot.candidate_states:
1✔
261
                    c.constructed, c.failure_reason = snapshot.candidate_states[cid]
1✔
262

263
        # Restore consumed blocks
264
        self._consumed_blocks = snapshot.consumed_blocks.copy()
1✔
265

266
    def _fail_conflicting_candidates(self, winner: Candidate) -> None:
1✔
267
        """Mark other candidates sharing blocks with winner as failed.
268

269
        For candidates that support re-scoring, we try to create a reduced
270
        version without the conflicting blocks before failing them entirely.
271
        """
272
        winner_block_ids = {b.id for b in winner.source_blocks}
1✔
273

274
        if not winner_block_ids:
1✔
275
            return
1✔
276

277
        for label, candidates in self.candidates.items():
1✔
278
            for candidate in candidates:
1✔
279
                if candidate is winner:
1✔
280
                    continue
1✔
281
                if candidate.failure_reason:
1✔
282
                    continue
1✔
283

284
                # Check for overlap
285
                conflicting_block_ids = {
1✔
286
                    b.id for b in candidate.source_blocks if b.id in winner_block_ids
287
                }
288

289
                if not conflicting_block_ids:
1✔
290
                    continue
1✔
291

292
                # Try to create a reduced candidate via rescore_without_blocks
293
                classifier = self._classifiers.get(label)
1✔
294
                if classifier:
1✔
295
                    reduced = classifier.rescore_without_blocks(
1✔
296
                        candidate, winner_block_ids, self
297
                    )
298
                    if reduced is not None:
1✔
299
                        # Add the reduced candidate as a replacement
300
                        candidates.append(reduced)
×
301
                        candidate.failure_reason = (
×
302
                            f"Replaced by reduced candidate "
303
                            f"(excluded blocks: {conflicting_block_ids})"
304
                        )
305
                        continue
×
306

307
                # Fall back to failing the candidate
308
                candidate.failure_reason = (
1✔
309
                    f"Lost conflict to '{winner.label}' (score={winner.score:.3f})"
310
                )
311

312
    def _validate_block_in_page_data(
1✔
313
        self, block: Blocks | None, param_name: str = "block"
314
    ) -> None:
315
        """Validate that a block is in PageData.
316

317
        Args:
318
            block: The block to validate (None is allowed and skips validation)
319
            param_name: Name of the parameter being validated (for error messages)
320

321
        Raises:
322
            ValueError: If block is not None and not in PageData.blocks
323
        """
324
        if block is not None and block not in self.page_data.blocks:
1✔
325
            raise ValueError(f"{param_name} must be in PageData.blocks. Block: {block}")
1✔
326

327
    @property
1✔
328
    def blocks(self) -> list[Blocks]:
1✔
329
        """Get the blocks from the page data.
330

331
        Returns:
332
            List of blocks from the page data
333
        """
334
        return self.page_data.blocks
1✔
335

336
    @property
1✔
337
    def page(self) -> Page | None:
1✔
338
        """Returns the Page object built from this classification result."""
339
        page_candidates = self.get_scored_candidates("page", valid_only=True)
1✔
340
        if page_candidates:
1✔
341
            page = page_candidates[0].constructed
1✔
342
            assert isinstance(page, Page)
1✔
343
            return page
1✔
344
        return None
1✔
345

346
    def add_warning(self, warning: str) -> None:
1✔
347
        """Add a warning message to the classification result.
348

349
        Args:
350
            warning: The warning message to add
351
        """
352
        self.warnings.append(warning)
1✔
353

354
    def get_warnings(self) -> list[str]:
1✔
355
        """Get all warnings generated during classification.
356

357
        Returns:
358
            List of warning messages
359
        """
360
        return self.warnings.copy()
1✔
361

362
    # TODO Reconsider the methods below - some may be redundant.
363

364
    def get_candidates(self, label: str) -> list[Candidate]:
1✔
365
        """Get all candidates for a specific label.
366

367
        Args:
368
            label: The label to get candidates for
369

370
        Returns:
371
            List of candidates for that label (returns copy to prevent
372
            external modification)
373
        """
374
        return self.candidates.get(label, []).copy()
1✔
375

376
    def get_scored_candidates(
1✔
377
        self,
378
        label: str,
379
        min_score: float = 0.0,
380
        valid_only: bool = True,
381
        exclude_failed: bool = False,
382
    ) -> list[Candidate]:
383
        """Get candidates for a label that have been scored.
384

385
        **Use this method in score() when working with dependency classifiers.**
386

387
        This enforces the pattern of working with candidates (not constructed
388
        elements or raw blocks) when one classifier depends on another. The
389
        returned candidates are sorted by score (highest first).
390

391
        During score(), you should:
392
        1. Get parent candidates using this method
393
        2. Store references to parent candidates in your score_details
394
        3. In construct(), validate parent candidates before using their elements
395

396
        Example:
397
            # In PartsClassifier.score()
398
            part_count_candidates = result.get_scored_candidates("part_count")
399
            for pc_cand in part_count_candidates:
400
                # Store the CANDIDATE reference in score details
401
                score_details = _PartPairScore(
402
                    part_count_candidate=pc_cand,  # Not pc_cand.constructed!
403
                    image=img,
404
                )
405

406
            # Later in _construct_single()
407
            def _construct_single(self, candidate, result):
408
                pc_cand = candidate.score_details.part_count_candidate
409

410
                # Validate parent candidate is still valid
411
                if not pc_cand.is_valid:
412
                    raise ValueError(
413
                        f"Parent invalid: {pc_cand.failure_reason or 'not constructed'}"
414
                    )
415

416
                # Now safe to use the constructed element
417
                assert isinstance(pc_cand.constructed, PartCount)
418
                return Part(count=pc_cand.constructed, ...)
419

420
        Args:
421
            label: The label to get candidates for
422
            min_score: Optional minimum score threshold (default: 0.0)
423
            valid_only: If True (default), only return valid candidates
424
                (constructed and no failure). Set to False to get all scored
425
                candidates regardless of construction status.
426
            exclude_failed: If True, filter out candidates with failure_reason,
427
                even if valid_only is False. (default: False)
428

429
        Returns:
430
            List of scored candidates sorted by score (highest first).
431
            By default, only includes valid candidates (is_valid=True).
432
        """
433
        candidates = self.get_candidates(label)
1✔
434

435
        # Filter to candidates that have been scored
436
        scored = [c for c in candidates if c.score_details is not None]
1✔
437

438
        # Apply score threshold if specified
439
        if min_score > 0:
1✔
440
            scored = [c for c in scored if c.score >= min_score]
1✔
441

442
        # Filter to valid candidates if requested (default)
443
        if valid_only:
1✔
444
            scored = [c for c in scored if c.is_valid]
1✔
445
        elif exclude_failed:
1✔
446
            scored = [c for c in scored if c.failure_reason is None]
1✔
447

448
        # Sort by score descending
449
        # TODO add a tie breaker for determinism.
450
        scored.sort(key=lambda c: -c.score)
1✔
451

452
        return scored
1✔
453

454
    def get_winners_by_score[T: LegoPageElements](
1✔
455
        self, label: str, element_type: type[T], max_count: int | None = None
456
    ) -> list[T]:
457
        """Get the best candidates for a specific label by score.
458

459
        **DEPRECATED for use in score() methods.**
460

461
        This method returns constructed LegoPageElements, which encourages the
462
        anti-pattern of looking at constructed elements during the score() phase.
463

464
        - **In score()**: Use get_scored_candidates() instead to work with candidates
465
        - **In construct()**: It's OK to use this method when you need fully
466
          constructed dependency elements
467

468
        Prefer get_scored_candidates() in score() to maintain proper separation
469
        between the scoring and construction phases.
470

471
        Selects candidates by:
472
        - Successfully constructed (constructed is not None)
473
        - Match the specified element type
474
        - Sorted by score (highest first)
475

476
        Invariant: Each source block should have at most one successfully
477
        constructed candidate per label. This method validates that invariant.
478

479
        Args:
480
            label: The label to get winners for (e.g., "page_number", "step")
481
            element_type: The type of element to filter for (e.g., PageNumber)
482
            max_count: Maximum number of winners to return (None = all valid)
483

484
        Returns:
485
            List of constructed elements of the specified type, sorted by score
486
            (highest first)
487

488
        Raises:
489
            AssertionError: If element_type doesn't match the actual constructed type,
490
                or if multiple candidates exist for the same source block
491
        """
492
        # Get all candidates and filter for successful construction
493
        valid_candidates = [
1✔
494
            c for c in self.get_candidates(label) if c.constructed is not None
495
        ]
496

497
        # Validate that each source block has at most one candidate for this label
498
        # (candidates without source blocks are synthetic and can have duplicates)
499
        seen_blocks: set[int] = set()
1✔
500
        for candidate in valid_candidates:
1✔
501
            assert isinstance(candidate.constructed, element_type), (
1✔
502
                f"Type mismatch for label '{label}': requested "
503
                f"{element_type.__name__} but got "
504
                f"{type(candidate.constructed).__name__}. "
505
                f"This indicates a programming error in the caller."
506
            )
507

508
            for source_block in candidate.source_blocks:
1✔
509
                block_id = id(source_block)
1✔
510
                assert block_id not in seen_blocks, (
1✔
511
                    f"Multiple successfully constructed candidates found for "
512
                    f"label '{label}' with the same source block id:{block_id}. "
513
                    f"This indicates a programming error in the classifier. "
514
                    f"Source block: {source_block}"
515
                )
516
                seen_blocks.add(block_id)
1✔
517

518
        # Sort by score (highest first), then by source block ID for determinism
519
        # when scores are equal
520
        valid_candidates.sort(
1✔
521
            key=lambda c: (
522
                -c.score,  # Negative for descending order
523
                # TODO Fix this, so it's deterministic.
524
                c.source_blocks[0].id if c.source_blocks else 0,  # Tie-breaker
525
            )
526
        )
527

528
        # Apply max_count if specified
529
        if max_count is not None:
1✔
530
            valid_candidates = valid_candidates[:max_count]
×
531

532
        # Extract constructed elements
533
        return [cast(T, c.constructed) for c in valid_candidates]
1✔
534

535
    def get_all_candidates(self) -> dict[str, list[Candidate]]:
1✔
536
        """Get all candidates across all labels.
537

538
        Returns:
539
            Dictionary mapping labels to their candidates (returns copy to
540
            prevent external modification)
541
        """
542
        return {label: cands for label, cands in self.candidates.items()}
1✔
543

544
    def count_successful_candidates(self, label: str) -> int:
1✔
545
        """Count how many candidates were successfully constructed for a label.
546

547
        Test helper method that counts candidates where construction succeeded.
548

549
        Args:
550
            label: The label to count successful candidates for
551

552
        Returns:
553
            Count of successfully constructed candidates
554
        """
555
        return sum(1 for c in self.get_candidates(label) if c.constructed is not None)
1✔
556

557
    def get_all_candidates_for_block(self, block: Blocks) -> list[Candidate]:
1✔
558
        """Get all candidates for a block across all labels.
559

560
        Searches across all labels to find candidates that used the given block
561
        as their source. For finding a candidate with a specific label, use
562
        get_candidate_for_block() instead.
563

564
        Args:
565
            block: The block to find candidates for
566

567
        Returns:
568
            List of all candidates across all labels with this block in source_blocks
569
        """
570
        results = []
1✔
571
        for candidates in self.candidates.values():
1✔
572
            for candidate in candidates:
×
573
                if block in candidate.source_blocks:
×
574
                    results.append(candidate)
×
575
        return results
1✔
576

577
    def get_candidate_for_block(self, block: Blocks, label: str) -> Candidate | None:
1✔
578
        """Get the candidate for a specific block with a specific label.
579

580
        Helper method for testing - returns the single candidate for the given
581
        block and label combination. Returns None if no such candidate exists.
582

583
        Args:
584
            block: The block to find the candidate for
585
            label: The label to search within
586

587
        Returns:
588
            The candidate if found, None otherwise
589

590
        Raises:
591
            ValueError: If multiple candidates exist for this block/label pair
592
        """
593
        candidates = [c for c in self.get_candidates(label) if block in c.source_blocks]
1✔
594

595
        if len(candidates) == 0:
1✔
596
            return None
1✔
597

598
        if len(candidates) == 1:
1✔
599
            return candidates[0]
1✔
600

601
        raise ValueError(
×
602
            f"Multiple candidates found for block {block.id} "
603
            f"with label '{label}'. Expected at most one."
604
        )
605

606
    def get_best_candidate(self, block: Blocks) -> Candidate | None:
1✔
607
        """Get the highest-scoring successfully constructed candidate for a block.
608

609
        When a block has candidates for multiple labels, this returns the one
610
        with the highest score. This is the "winning" candidate for reporting
611
        and output purposes.
612

613
        Args:
614
            block: The block to get the best candidate for
615

616
        Returns:
617
            The highest-scoring successfully constructed candidate, or None
618
            if no successfully constructed candidate exists
619
        """
620
        candidates = self.get_all_candidates_for_block(block)
1✔
621
        valid_candidates = [c for c in candidates if c.constructed is not None]
1✔
622

623
        if not valid_candidates:
1✔
624
            return None
1✔
625

626
        # Return the highest-scoring candidate
627
        return max(valid_candidates, key=lambda c: c.score)
×
628

629
    # TODO I think this API is broken - there can be multiple labels per block,
630
    # but we only return one here.
631
    def get_label(self, block: Blocks) -> str | None:
1✔
632
        """Get the label for a block from its highest-scoring constructed candidate.
633

634
        Returns the label of the successfully constructed candidate with the
635
        highest score for the given block, or None if no successfully
636
        constructed candidate exists.
637

638
        This is a convenience method equivalent to:
639
            candidate = result.get_best_candidate(block)
640
            return candidate.label if candidate else None
641

642
        Args:
643
            block: The block to get the label for
644

645
        Returns:
646
            The label string of the highest-scoring constructed candidate,
647
            None otherwise
648
        """
649
        best_candidate = self.get_best_candidate(block)
1✔
650
        return best_candidate.label if best_candidate else None
1✔
651

652
    def add_candidate(self, candidate: Candidate) -> None:
1✔
653
        """Add a single candidate.
654

655
        The label is extracted from candidate.label.
656

657
        Args:
658
            candidate: The candidate to add
659

660
        Raises:
661
            ValueError: If candidate has source_blocks that are not in PageData
662
        """
663
        for source_block in candidate.source_blocks:
1✔
664
            self._validate_block_in_page_data(source_block, "candidate.source_blocks")
1✔
665

666
        label = candidate.label
1✔
667
        if label not in self.candidates:
1✔
668
            self.candidates[label] = []
1✔
669
        self.candidates[label].append(candidate)
1✔
670

671
    def mark_removed(self, block: Blocks, reason: RemovalReason) -> None:
1✔
672
        """Mark a block as removed with the given reason.
673

674
        Args:
675
            block: The block to mark as removed
676
            reason: The reason for removal
677

678
        Raises:
679
            ValueError: If block is not in PageData
680
        """
681
        self._validate_block_in_page_data(block, "block")
1✔
682
        self.removal_reasons[block.id] = reason
1✔
683

684
    def is_removed(self, block: Blocks) -> bool:
1✔
685
        """Check if a block has been marked for removal.
686

687
        Args:
688
            block: The block to check
689

690
        Returns:
691
            True if the block is marked for removal, False otherwise
692
        """
693
        return block.id in self.removal_reasons
1✔
694

695
    def get_removal_reason(self, block: Blocks) -> RemovalReason | None:
1✔
696
        """Get the reason why a block was removed.
697

698
        Args:
699
            block: The block to get the removal reason for
700

701
        Returns:
702
            The RemovalReason if the block was removed, None otherwise
703
        """
704
        return self.removal_reasons.get(block.id)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc