• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bramp / build-along / 19995046189

06 Dec 2025 10:18PM UTC coverage: 90.506% (+0.09%) from 90.421%
19995046189

push

github

bramp
test: regenerate golden files for step classifier refactoring

10525 of 11629 relevant lines covered (90.51%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.45
/src/build_a_long/pdf_extract/extractor/bbox.py
1
from __future__ import annotations
1✔
2

3
from typing import Annotated, Protocol, TypeVar
1✔
4

5
from annotated_types import Ge
1✔
6
from pydantic import BaseModel, ConfigDict
1✔
7

8
# Type alias for non-negative floats
9
NonNegativeFloat = Annotated[float, Ge(0)]
1✔
10

11

12
class BBox(BaseModel):
1✔
13
    model_config = ConfigDict(frozen=True)
1✔
14

15
    x0: float
1✔
16
    y0: float
1✔
17
    x1: float
1✔
18
    y1: float
1✔
19

20
    def __init__(
1✔
21
        self,
22
        x0: float | None = None,
23
        y0: float | None = None,
24
        x1: float | None = None,
25
        y1: float | None = None,
26
        /,
27
        **kwargs,
28
    ):
29
        """Initialize BBox with positional or keyword arguments.
30

31
        Supports both:
32
        - BBox(0, 0, 10, 10)  # positional
33
        - BBox(x0=0, y0=0, x1=10, y1=10)  # keyword
34
        """
35
        # If positional args are provided, use them
36
        if x0 is not None and y0 is not None and x1 is not None and y1 is not None:
1✔
37
            super().__init__(x0=x0, y0=y0, x1=x1, y1=y1, **kwargs)
1✔
38
        else:
39
            # Otherwise use keyword args
40
            super().__init__(**kwargs)
1✔
41

42
    def model_post_init(self, __context) -> None:
1✔
43
        """Validate x0 <= x1 and y0 <= y1."""
44
        if self.x0 > self.x1:
1✔
45
            raise ValueError(f"x0 ({self.x0}) must not be greater than x1 ({self.x1})")
×
46
        if self.y0 > self.y1:
1✔
47
            raise ValueError(f"y0 ({self.y0}) must not be greater than y1 ({self.y1})")
×
48

49
    def __str__(self) -> str:
1✔
50
        """Return a compact string representation of the bounding box."""
51
        return f"({self.x0:.1f},{self.y0:.1f},{self.x1:.1f},{self.y1:.1f})"
1✔
52

53
    @classmethod
1✔
54
    def from_tuple(cls, bbox_tuple: tuple[float, float, float, float]) -> BBox:
1✔
55
        """Create a BBox from a tuple of four floats (x0, y0, x1, y1)."""
56
        return cls(
1✔
57
            x0=bbox_tuple[0],
58
            y0=bbox_tuple[1],
59
            x1=bbox_tuple[2],
60
            y1=bbox_tuple[3],
61
        )
62

63
    def equals(self, other: BBox) -> bool:
1✔
64
        """
65
        Checks if this bounding box is equal to another bounding box.
66
        """
67
        return (
1✔
68
            self.x0 == other.x0
69
            and self.y0 == other.y0
70
            and self.x1 == other.x1
71
            and self.y1 == other.y1
72
        )
73

74
    def similar(self, other: BBox, tolerance: float = 1.0) -> bool:
1✔
75
        """
76
        Checks if this bounding box is nearly identical to another within tolerance.
77

78
        Args:
79
            other: The other bounding box to compare.
80
            tolerance: Maximum difference allowed for each coordinate.
81

82
        Returns:
83
            True if all coordinates differ by at most the tolerance.
84
        """
85
        return (
1✔
86
            abs(self.x0 - other.x0) <= tolerance
87
            and abs(self.y0 - other.y0) <= tolerance
88
            and abs(self.x1 - other.x1) <= tolerance
89
            and abs(self.y1 - other.y1) <= tolerance
90
        )
91

92
    def overlaps(self, other: BBox) -> bool:
1✔
93
        """
94
        Checks if this bounding box overlaps with another bounding box.
95
        """
96
        # If one rectangle is to the right of the other
97
        if self.x0 >= other.x1 or other.x0 >= self.x1:
1✔
98
            return False
1✔
99
        # If one rectangle is above the other
100
        return not (self.y0 >= other.y1 or other.y0 >= self.y1)
1✔
101

102
    def contains(self, other: BBox) -> bool:
1✔
103
        """
104
        Checks if this bounding box fully contains another bounding box.
105
        This is the inverse of fully_inside - returns True if other is inside self.
106
        """
107
        return (
1✔
108
            other.x0 >= self.x0
109
            and other.y0 >= self.y0
110
            and other.x1 <= self.x1
111
            and other.y1 <= self.y1
112
        )
113

114
    def adjacent(self, other: BBox, tolerance: float = 1e-6) -> bool:
1✔
115
        """
116
        Checks if this bounding box is adjacent to another bounding box
117
        (they are touching).
118
        A small tolerance is used for floating point comparisons.
119
        """
120
        # Check for horizontal adjacency
121
        horizontal_adjacent = (
1✔
122
            abs(self.x1 - other.x0) < tolerance and self.overlaps_vertical(other)
123
        ) or (abs(other.x1 - self.x0) < tolerance and self.overlaps_vertical(other))
124
        # Check for vertical adjacency
125
        vertical_adjacent = (
1✔
126
            abs(self.y1 - other.y0) < tolerance and self.overlaps_horizontal(other)
127
        ) or (abs(other.y1 - self.y0) < tolerance and self.overlaps_horizontal(other))
128

129
        return horizontal_adjacent or vertical_adjacent
1✔
130

131
    def overlaps_horizontal(self, other: BBox) -> bool:
1✔
132
        """Helper to check if horizontal projections overlap."""
133
        return max(self.x0, other.x0) < min(self.x1, other.x1)
1✔
134

135
    def overlaps_vertical(self, other: BBox) -> bool:
1✔
136
        """Helper to check if vertical projections overlap."""
137
        return max(self.y0, other.y0) < min(self.y1, other.y1)
1✔
138

139
    @property
1✔
140
    def width(self) -> NonNegativeFloat:
1✔
141
        """Return the width of this bounding box (non-negative)."""
142
        return self.x1 - self.x0
1✔
143

144
    @property
1✔
145
    def height(self) -> NonNegativeFloat:
1✔
146
        """Return the height of this bounding box (non-negative)."""
147
        return self.y1 - self.y0
1✔
148

149
    @property
1✔
150
    def area(self) -> NonNegativeFloat:
1✔
151
        """Return the area of this bounding box (non-negative)."""
152
        return self.width * self.height
1✔
153

154
    def intersection_area(self, other: BBox) -> float:
1✔
155
        """Return the area of intersection between this bbox and another."""
156
        ix0 = max(self.x0, other.x0)
1✔
157
        iy0 = max(self.y0, other.y0)
1✔
158
        ix1 = min(self.x1, other.x1)
1✔
159
        iy1 = min(self.y1, other.y1)
1✔
160
        w = max(0.0, ix1 - ix0)
1✔
161
        h = max(0.0, iy1 - iy0)
1✔
162
        return w * h
1✔
163

164
    def intersect(self, other: BBox) -> BBox:
1✔
165
        """Return the intersection bbox between this bbox and another.
166

167
        If there is no intersection, returns a zero-area bbox at the
168
        closest point of approach.
169
        """
170
        ix0 = max(self.x0, other.x0)
1✔
171
        iy0 = max(self.y0, other.y0)
1✔
172
        ix1 = min(self.x1, other.x1)
1✔
173
        iy1 = min(self.y1, other.y1)
1✔
174

175
        # Ensure valid bbox (x0 <= x1, y0 <= y1)
176
        if ix0 > ix1:
1✔
177
            ix0 = ix1 = (ix0 + ix1) / 2
1✔
178
        if iy0 > iy1:
1✔
179
            iy0 = iy1 = (iy0 + iy1) / 2
1✔
180

181
        return BBox(x0=ix0, y0=iy0, x1=ix1, y1=iy1)
1✔
182

183
    def iou(self, other: BBox) -> float:
1✔
184
        """Intersection over Union with another bbox.
185

186
        Returns 0.0 when there is no overlap or union is zero.
187
        """
188
        inter = self.intersection_area(other)
1✔
189
        if inter == 0.0:
1✔
190
            return 0.0
1✔
191
        ua = self.area + other.area - inter
1✔
192
        if ua <= 0.0:
1✔
193
            return 0.0
×
194
        return inter / ua
1✔
195

196
    def min_distance(self, other: BBox) -> float:
1✔
197
        """Calculate minimum distance between this bbox and another.
198

199
        Returns 0.0 if the bboxes overlap or touch.
200
        Otherwise returns the minimum Euclidean distance between any two points
201
        on the bbox edges.
202

203
        Args:
204
            other: The other BBox to measure distance to.
205

206
        Returns:
207
            Minimum distance between the bboxes (0.0 if overlapping).
208
        """
209
        # If they overlap, distance is 0
210
        if self.overlaps(other):
1✔
211
            return 0.0
1✔
212

213
        # Calculate horizontal distance
214
        if self.x1 < other.x0:
1✔
215
            dx = other.x0 - self.x1
1✔
216
        elif other.x1 < self.x0:
1✔
217
            dx = self.x0 - other.x1
1✔
218
        else:
219
            dx = 0.0
1✔
220

221
        # Calculate vertical distance
222
        if self.y1 < other.y0:
1✔
223
            dy = other.y0 - self.y1
1✔
224
        elif other.y1 < self.y0:
1✔
225
            dy = self.y0 - other.y1
1✔
226
        else:
227
            dy = 0.0
1✔
228

229
        # Return Euclidean distance
230
        return (dx**2 + dy**2) ** 0.5
1✔
231

232
    @property
1✔
233
    def center(self) -> tuple[float, float]:
1✔
234
        """Return the (x, y) center point of the bbox."""
235
        return ((self.x0 + self.x1) / 2.0, (self.y0 + self.y1) / 2.0)
×
236

237
    def to_tuple(self) -> tuple[float, float, float, float]:
1✔
238
        """Convert BBox to tuple (x0, y0, x1, y1).
239

240
        Useful for interfacing with PIL and other libraries that expect tuples.
241
        """
242
        return (self.x0, self.y0, self.x1, self.y1)
×
243

244
    def union(self, other: BBox) -> BBox:
1✔
245
        """Return the bounding box that encompasses both this bbox and another.
246

247
        Args:
248
            other: The other BBox to union with.
249

250
        Returns:
251
            A new BBox that contains both bounding boxes.
252
        """
253
        return BBox(
1✔
254
            x0=min(self.x0, other.x0),
255
            y0=min(self.y0, other.y0),
256
            x1=max(self.x1, other.x1),
257
            y1=max(self.y1, other.y1),
258
        )
259

260
    @classmethod
1✔
261
    def union_all(cls, bboxes: list[BBox]) -> BBox:
1✔
262
        """Return the bounding box that encompasses all provided bboxes.
263

264
        Args:
265
            bboxes: List of BBox objects to union. Must be non-empty.
266

267
        Returns:
268
            A new BBox that contains all bounding boxes.
269

270
        Raises:
271
            ValueError: If bboxes list is empty.
272
        """
273
        if not bboxes:
1✔
274
            raise ValueError("Cannot compute union of empty list of bboxes")
1✔
275

276
        if len(bboxes) == 1:
1✔
277
            return bboxes[0]
1✔
278

279
        return BBox(
1✔
280
            x0=min(b.x0 for b in bboxes),
281
            y0=min(b.y0 for b in bboxes),
282
            x1=max(b.x1 for b in bboxes),
283
            y1=max(b.y1 for b in bboxes),
284
        )
285

286
    def clip_to(self, bounds: BBox) -> BBox:
1✔
287
        """Clip this bounding box to stay within the given bounds.
288

289
        Args:
290
            bounds: The bounding box to clip to.
291

292
        Returns:
293
            A new BBox clipped to the bounds. If this bbox doesn't overlap
294
            with bounds at all, returns a degenerate bbox (x0 == x1 or y0 == y1)
295
            at the nearest edge.
296
        """
297
        x0 = max(self.x0, bounds.x0)
1✔
298
        y0 = max(self.y0, bounds.y0)
1✔
299
        x1 = min(self.x1, bounds.x1)
1✔
300
        y1 = min(self.y1, bounds.y1)
1✔
301

302
        # If no overlap, clamp to create a degenerate (zero-area) bbox
303
        if x0 > x1:
1✔
304
            x0 = x1 = max(bounds.x0, min(self.x0, bounds.x1))
1✔
305
        if y0 > y1:
1✔
306
            y0 = y1 = max(bounds.y0, min(self.y0, bounds.y1))
1✔
307

308
        return BBox(x0=x0, y0=y0, x1=x1, y1=y1)
1✔
309

310
    def expand(self, margin: float) -> BBox:
1✔
311
        """Return a new BBox expanded by the given margin on all sides.
312

313
        Args:
314
            margin: The amount to expand the bbox by. Can be negative to shrink.
315

316
        Returns:
317
            A new BBox expanded by the margin.
318

319
        Raises:
320
            ValueError: If a negative margin would result in an invalid bbox
321
                (width or height < 0).
322
        """
323
        x0 = self.x0 - margin
1✔
324
        y0 = self.y0 - margin
1✔
325
        x1 = self.x1 + margin
1✔
326
        y1 = self.y1 + margin
1✔
327

328
        if x0 > x1 or y0 > y1:
1✔
329
            raise ValueError(
1✔
330
                f"Cannot expand bbox by {margin}: result would be invalid "
331
                f"(width={x1 - x0}, height={y1 - y0})"
332
            )
333

334
        return BBox(x0=x0, y0=y0, x1=x1, y1=y1)
1✔
335

336

337
class HasBBox(Protocol):
1✔
338
    """Protocol for objects that have a bbox attribute."""
339

340
    @property
341
    def bbox(self) -> BBox: ...
342

343

344
T = TypeVar("T", bound=HasBBox)
1✔
345

346

347
def build_connected_cluster(
1✔
348
    seed_items: list[T],
349
    candidate_items: list[T],
350
) -> list[T]:
351
    """Build a connected cluster of items based on bbox overlap.
352

353
    Starts with seed items and recursively adds candidates that overlap
354
    with any item already in the cluster.
355

356
    Args:
357
        seed_items: Initial items to start the cluster
358
        candidate_items: Items to consider adding to the cluster
359

360
    Returns:
361
        List of items in the connected cluster (includes seed items)
362

363
    Example:
364
        >>> # Find images that form a connected cluster with a bag number
365
        >>> bag_images = [img for img in images if img.bbox.overlaps(bag_bbox)]
366
        >>> cluster = build_connected_cluster(bag_images, images)
367
    """
368
    if not seed_items:
1✔
369
        return []
1✔
370

371
    # Build index mapping for quick lookup
372
    candidate_set = set(range(len(candidate_items)))
1✔
373
    cluster_indices: set[int] = set()
1✔
374
    to_process: list[int] = []
1✔
375

376
    # Add seed items to cluster
377
    for seed in seed_items:
1✔
378
        for idx, candidate in enumerate(candidate_items):
1✔
379
            if candidate is seed or candidate.bbox.equals(seed.bbox):
1✔
380
                if idx in candidate_set:
1✔
381
                    cluster_indices.add(idx)
1✔
382
                    to_process.append(idx)
1✔
383
                    candidate_set.discard(idx)
1✔
384
                break
1✔
385

386
    # Expand cluster by finding overlapping items
387
    processed: set[int] = set()
1✔
388
    while to_process:
1✔
389
        current_idx = to_process.pop()
1✔
390
        if current_idx in processed:
1✔
391
            continue
×
392
        processed.add(current_idx)
1✔
393

394
        current_item = candidate_items[current_idx]
1✔
395

396
        # Find candidates that overlap with current item
397
        for idx in list(candidate_set):
1✔
398
            candidate = candidate_items[idx]
1✔
399
            if candidate.bbox.overlaps(current_item.bbox):
1✔
400
                cluster_indices.add(idx)
1✔
401
                to_process.append(idx)
1✔
402
                candidate_set.discard(idx)
1✔
403

404
    # Return clustered items in original order
405
    return [candidate_items[idx] for idx in sorted(cluster_indices)]
1✔
406

407

408
def build_all_connected_clusters[T: HasBBox](items: list[T]) -> list[list[T]]:
1✔
409
    """Build all connected clusters from a list of items based on bbox overlap.
410

411
    Groups all items into clusters where items in each cluster are
412
    transitively connected through overlapping bounding boxes.
413

414
    Args:
415
        items: List of items with bbox property
416

417
    Returns:
418
        List of clusters, where each cluster is a list of connected items
419

420
    Example:
421
        >>> # Find all groups of overlapping images on a page
422
        >>> clusters = build_all_connected_clusters(images)
423
        >>> for cluster in clusters:
424
        ...     print(f"Cluster of {len(cluster)} images")
425
    """
426
    if not items:
1✔
427
        return []
1✔
428

429
    # Track which items have been assigned to clusters
430
    remaining = set(range(len(items)))
1✔
431
    clusters: list[list[T]] = []
1✔
432

433
    while remaining:
1✔
434
        # Pick an arbitrary seed from remaining items
435
        seed_idx = min(remaining)
1✔
436
        seed_item = items[seed_idx]
1✔
437
        remaining.remove(seed_idx)
1✔
438

439
        # Build a cluster starting from this seed
440
        cluster = build_connected_cluster([seed_item], items)
1✔
441

442
        # Remove clustered items from remaining set
443
        for item in cluster:
1✔
444
            try:
1✔
445
                idx = items.index(item)
1✔
446
                remaining.discard(idx)
1✔
447
            except ValueError:
×
448
                pass
×
449

450
        clusters.append(cluster)
1✔
451

452
    return clusters
1✔
453

454

455
def filter_contained(items: list[T], container: BBox) -> list[T]:
1✔
456
    """Filter items to keep only those fully contained within the container bbox.
457

458
    Args:
459
        items: List of items with bbox property
460
        container: The bounding box to check containment against
461

462
    Returns:
463
        List of items fully contained in the container
464
    """
465
    return [item for item in items if container.contains(item.bbox)]
1✔
466

467

468
def filter_overlapping(items: list[T], target: BBox) -> list[T]:
1✔
469
    """Filter items to keep only those overlapping with the target bbox.
470

471
    Args:
472
        items: List of items with bbox property
473
        target: The bounding box to check overlap against
474

475
    Returns:
476
        List of items overlapping with the target
477
    """
478
    return [item for item in items if target.overlaps(item.bbox)]
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc