• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

freeeve / roaringsearch / 21190975210

20 Jan 2026 11:19PM UTC coverage: 86.655% (+0.2%) from 86.423%
21190975210

push

github

freeeve
refactor(tests): use constant for docs/op metric in bench_add_test.go

1448 of 1671 relevant lines covered (86.65%)

234683.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.77
/fields.go
1
package roaringsearch
2

3
import (
4
        "cmp"
5
        "container/heap"
6
        "io"
7
        "os"
8
        "slices"
9
        "sync"
10
        "sync/atomic"
11
        "unsafe"
12

13
        "github.com/RoaringBitmap/roaring"
14
        "github.com/freeeve/msgpck"
15
)
16

17
// BitmapFilter provides fast filtering by multiple category fields using bitmap indexes.
18
// Each field (e.g., "media_type", "language") can have multiple category values
19
// (e.g., "book", "movie"), and each category maps to a bitmap of document IDs.
20
//
21
// Example:
22
//
23
//        filter := NewBitmapFilter()
24
//        filter.Set(1, "media_type", "book")
25
//        filter.Set(1, "language", "english")
26
//        filter.Set(2, "media_type", "movie")
27
//
28
//        books := filter.Get("media_type", "book")           // bitmap of books
29
//        english := filter.Get("language", "english")        // bitmap of english
30
//        englishBooks := roaring.And(books, english)         // AND filter
31
type BitmapFilter struct {
32
        mu     sync.RWMutex
33
        fields map[string]map[string]*roaring.Bitmap
34
        dirty  atomic.Bool
35
}
36

37
// NewBitmapFilter creates a new bitmap filter.
38
func NewBitmapFilter() *BitmapFilter {
12✔
39
        return &BitmapFilter{
12✔
40
                fields: make(map[string]map[string]*roaring.Bitmap),
12✔
41
        }
12✔
42
}
12✔
43

44
// Set assigns a document to a category within a field.
45
func (c *BitmapFilter) Set(docID uint32, field, category string) {
47✔
46
        c.mu.Lock()
47✔
47
        defer c.mu.Unlock()
47✔
48
        c.setLocked(docID, field, category)
47✔
49
}
47✔
50

51
func (c *BitmapFilter) setLocked(docID uint32, field, category string) {
47✔
52
        fieldMap, ok := c.fields[field]
47✔
53
        if !ok {
61✔
54
                fieldMap = make(map[string]*roaring.Bitmap)
14✔
55
                c.fields[field] = fieldMap
14✔
56
        }
14✔
57

58
        bm, ok := fieldMap[category]
47✔
59
        if !ok {
78✔
60
                bm = roaring.New()
31✔
61
                fieldMap[category] = bm
31✔
62
        }
31✔
63
        bm.Add(docID)
47✔
64
        c.dirty.Store(true)
47✔
65
}
66

67
// FilterBatch accumulates entries for efficient batch insertion.
68
type FilterBatch struct {
69
        filter     *BitmapFilter
70
        field      string
71
        docIDs     []uint32
72
        categories []string
73
}
74

75
// Batch creates a new batch builder for the given field.
76
// Use BatchSize for better performance when you know the approximate count.
77
func (c *BitmapFilter) Batch(field string) *FilterBatch {
1✔
78
        return c.BatchSize(field, 1024)
1✔
79
}
1✔
80

81
// BatchSize creates a batch builder with pre-allocated capacity.
82
func (c *BitmapFilter) BatchSize(field string, size int) *FilterBatch {
1✔
83
        return &FilterBatch{
1✔
84
                filter:     c,
1✔
85
                field:      field,
1✔
86
                docIDs:     make([]uint32, 0, size),
1✔
87
                categories: make([]string, 0, size),
1✔
88
        }
1✔
89
}
1✔
90

91
// Add adds a document with a category to the batch.
92
func (b *FilterBatch) Add(docID uint32, category string) {
6✔
93
        b.docIDs = append(b.docIDs, docID)
6✔
94
        b.categories = append(b.categories, category)
6✔
95
}
6✔
96

97
// findCategoryIndex returns the index of category in the list, or -1 if not found.
98
func findCategoryIndex(categoryList []string, cat string) int {
6✔
99
        for j, existing := range categoryList {
11✔
100
                if existing == cat {
8✔
101
                        return j
3✔
102
                }
3✔
103
        }
104
        return -1
3✔
105
}
106

107
// buildCategoryGroups builds category list, indices, and grouped doc IDs.
108
func (b *FilterBatch) buildCategoryGroups() ([]string, [][]uint32) {
2✔
109
        n := len(b.docIDs)
2✔
110
        categoryList := make([]string, 0, 16)
2✔
111
        indices := make([]int, n)
2✔
112

2✔
113
        for i, cat := range b.categories {
8✔
114
                idx := findCategoryIndex(categoryList, cat)
6✔
115
                if idx == -1 {
9✔
116
                        idx = len(categoryList)
3✔
117
                        categoryList = append(categoryList, cat)
3✔
118
                }
3✔
119
                indices[i] = idx
6✔
120
        }
121

122
        numCats := len(categoryList)
2✔
123
        counts := make([]int, numCats)
2✔
124
        for _, idx := range indices {
8✔
125
                counts[idx]++
6✔
126
        }
6✔
127

128
        groups := make([][]uint32, numCats)
2✔
129
        for i, count := range counts {
5✔
130
                groups[i] = make([]uint32, 0, count)
3✔
131
        }
3✔
132

133
        for i, idx := range indices {
8✔
134
                groups[idx] = append(groups[idx], b.docIDs[i])
6✔
135
        }
6✔
136

137
        return categoryList, groups
2✔
138
}
139

140
// addToBitmapsParallel adds doc IDs to bitmaps in parallel.
141
func addToBitmapsParallel(bitmaps []*roaring.Bitmap, groups [][]uint32) {
×
142
        var wg sync.WaitGroup
×
143
        for idx, ids := range groups {
×
144
                wg.Add(1)
×
145
                go func(bm *roaring.Bitmap, docIDs []uint32) {
×
146
                        defer wg.Done()
×
147
                        bm.AddMany(docIDs)
×
148
                }(bitmaps[idx], ids)
×
149
        }
150
        wg.Wait()
×
151
}
152

153
// Flush commits all accumulated entries to the filter.
154
func (b *FilterBatch) Flush() {
3✔
155
        if len(b.docIDs) == 0 {
4✔
156
                return
1✔
157
        }
1✔
158

159
        categoryList, groups := b.buildCategoryGroups()
2✔
160
        numCats := len(categoryList)
2✔
161

2✔
162
        b.filter.mu.Lock()
2✔
163
        defer b.filter.mu.Unlock()
2✔
164

2✔
165
        fieldMap, ok := b.filter.fields[b.field]
2✔
166
        if !ok {
3✔
167
                fieldMap = make(map[string]*roaring.Bitmap)
1✔
168
                b.filter.fields[b.field] = fieldMap
1✔
169
        }
1✔
170

171
        bitmaps := make([]*roaring.Bitmap, numCats)
2✔
172
        for idx := range groups {
5✔
173
                cat := categoryList[idx]
3✔
174
                bm, ok := fieldMap[cat]
3✔
175
                if !ok {
6✔
176
                        bm = roaring.New()
3✔
177
                        fieldMap[cat] = bm
3✔
178
                }
3✔
179
                bitmaps[idx] = bm
3✔
180
        }
181

182
        if numCats >= 4 {
2✔
183
                addToBitmapsParallel(bitmaps, groups)
×
184
        } else {
2✔
185
                for idx, ids := range groups {
5✔
186
                        bitmaps[idx].AddMany(ids)
3✔
187
                }
3✔
188
        }
189

190
        b.docIDs = b.docIDs[:0]
2✔
191
        b.categories = b.categories[:0]
2✔
192
}
193

194
// Remove removes a document from all categories across all fields.
195
func (c *BitmapFilter) Remove(docID uint32) {
1✔
196
        c.mu.Lock()
1✔
197
        defer c.mu.Unlock()
1✔
198

1✔
199
        for _, fieldMap := range c.fields {
2✔
200
                for _, bm := range fieldMap {
2✔
201
                        bm.Remove(docID)
1✔
202
                }
1✔
203
        }
204
        c.dirty.Store(true)
1✔
205
}
206

207
// Get returns a bitmap of documents in the given category for a field.
208
// Returns nil if field or category doesn't exist.
209
func (c *BitmapFilter) Get(field, category string) *roaring.Bitmap {
19✔
210
        c.mu.RLock()
19✔
211
        defer c.mu.RUnlock()
19✔
212

19✔
213
        fieldMap, ok := c.fields[field]
19✔
214
        if !ok {
20✔
215
                return nil
1✔
216
        }
1✔
217
        return fieldMap[category]
18✔
218
}
219

220
// GetAny returns a bitmap of documents in ANY of the given categories (OR).
221
func (c *BitmapFilter) GetAny(field string, categories []string) *roaring.Bitmap {
1✔
222
        c.mu.RLock()
1✔
223
        defer c.mu.RUnlock()
1✔
224

1✔
225
        fieldMap, ok := c.fields[field]
1✔
226
        if !ok {
1✔
227
                return roaring.New()
×
228
        }
×
229

230
        result := roaring.New()
1✔
231
        for _, cat := range categories {
3✔
232
                if bm, ok := fieldMap[cat]; ok {
4✔
233
                        result.Or(bm)
2✔
234
                }
2✔
235
        }
236
        return result
1✔
237
}
238

239
// Categories returns all category values for a given field.
240
func (c *BitmapFilter) Categories(field string) []string {
2✔
241
        c.mu.RLock()
2✔
242
        defer c.mu.RUnlock()
2✔
243

2✔
244
        fieldMap, ok := c.fields[field]
2✔
245
        if !ok {
3✔
246
                return nil
1✔
247
        }
1✔
248

249
        cats := make([]string, 0, len(fieldMap))
1✔
250
        for cat := range fieldMap {
4✔
251
                cats = append(cats, cat)
3✔
252
        }
3✔
253
        return cats
1✔
254
}
255

256
// Counts returns the number of documents in each category for a field.
257
func (c *BitmapFilter) Counts(field string) map[string]uint64 {
1✔
258
        c.mu.RLock()
1✔
259
        defer c.mu.RUnlock()
1✔
260

1✔
261
        fieldMap, ok := c.fields[field]
1✔
262
        if !ok {
1✔
263
                return nil
×
264
        }
×
265

266
        counts := make(map[string]uint64, len(fieldMap))
1✔
267
        for cat, bm := range fieldMap {
3✔
268
                counts[cat] = bm.GetCardinality()
2✔
269
        }
2✔
270
        return counts
1✔
271
}
272

273
// AllCounts returns counts for all fields and categories.
274
func (c *BitmapFilter) AllCounts() map[string]map[string]uint64 {
1✔
275
        c.mu.RLock()
1✔
276
        defer c.mu.RUnlock()
1✔
277

1✔
278
        result := make(map[string]map[string]uint64, len(c.fields))
1✔
279
        for field, fieldMap := range c.fields {
3✔
280
                counts := make(map[string]uint64, len(fieldMap))
2✔
281
                for cat, bm := range fieldMap {
6✔
282
                        counts[cat] = bm.GetCardinality()
4✔
283
                }
4✔
284
                result[field] = counts
2✔
285
        }
286
        return result
1✔
287
}
288

289
// MemoryUsage returns the total memory used by all bitmaps in bytes.
290
func (c *BitmapFilter) MemoryUsage() uint64 {
×
291
        c.mu.RLock()
×
292
        defer c.mu.RUnlock()
×
293

×
294
        var total uint64
×
295
        for _, fieldMap := range c.fields {
×
296
                for _, bm := range fieldMap {
×
297
                        total += bm.GetSizeInBytes()
×
298
                }
×
299
        }
300
        return total
×
301
}
302

303
// bitmapFilterData is the serializable representation.
304
type bitmapFilterData struct {
305
        Fields map[string]map[string][]byte `msgpack:"fields"`
306
}
307

308
// SaveToFile saves the bitmap filter to a file atomically.
309
// Writes to a temp file first, then renames to prevent corruption on crash.
310
func (c *BitmapFilter) SaveToFile(path string) error {
2✔
311
        if !c.dirty.Load() {
3✔
312
                if _, err := os.Stat(path); err == nil {
1✔
313
                        return nil // File exists and no changes - safe to skip
×
314
                }
×
315
                // File doesn't exist, must create it even if not dirty
316
        }
317

318
        tmpPath := path + ".tmp"
2✔
319
        file, err := os.Create(tmpPath)
2✔
320
        if err != nil {
2✔
321
                return err
×
322
        }
×
323

324
        if err := c.Encode(file); err != nil {
2✔
325
                file.Close()
×
326
                os.Remove(tmpPath)
×
327
                return err
×
328
        }
×
329

330
        if err := file.Sync(); err != nil {
2✔
331
                file.Close()
×
332
                os.Remove(tmpPath)
×
333
                return err
×
334
        }
×
335

336
        if err := file.Close(); err != nil {
2✔
337
                os.Remove(tmpPath)
×
338
                return err
×
339
        }
×
340

341
        if err := os.Rename(tmpPath, path); err != nil {
2✔
342
                os.Remove(tmpPath)
×
343
                return err
×
344
        }
×
345

346
        c.dirty.Store(false)
2✔
347
        return nil
2✔
348
}
349

350
// Encode writes the bitmap filter to a writer.
351
// Takes a snapshot of the data first to avoid holding the lock during I/O.
352
func (c *BitmapFilter) Encode(w io.Writer) error {
2✔
353
        // Snapshot data while holding lock briefly
2✔
354
        c.mu.RLock()
2✔
355
        data := bitmapFilterData{
2✔
356
                Fields: make(map[string]map[string][]byte, len(c.fields)),
2✔
357
        }
2✔
358

2✔
359
        for field, fieldMap := range c.fields {
4✔
360
                data.Fields[field] = make(map[string][]byte, len(fieldMap))
2✔
361
                for cat, bm := range fieldMap {
7✔
362
                        bmBytes, err := bm.ToBytes()
5✔
363
                        if err != nil {
5✔
364
                                c.mu.RUnlock()
×
365
                                return err
×
366
                        }
×
367
                        data.Fields[field][cat] = bmBytes
5✔
368
                }
369
        }
370
        c.mu.RUnlock()
2✔
371

2✔
372
        // Write without holding lock - safe for concurrent reads/writes
2✔
373
        encoded, err := msgpck.Marshal(data)
2✔
374
        if err != nil {
2✔
375
                return err
×
376
        }
×
377
        _, err = w.Write(encoded)
2✔
378
        return err
2✔
379
}
380

381
// LoadBitmapFilter loads a bitmap filter from a file.
382
func LoadBitmapFilter(path string) (*BitmapFilter, error) {
2✔
383
        file, err := os.Open(path)
2✔
384
        if err != nil {
2✔
385
                return nil, err
×
386
        }
×
387
        defer file.Close()
2✔
388
        return ReadBitmapFilter(file)
2✔
389
}
390

391
// ReadBitmapFilter reads a bitmap filter from a reader.
392
func ReadBitmapFilter(r io.Reader) (*BitmapFilter, error) {
3✔
393
        data, err := io.ReadAll(r)
3✔
394
        if err != nil {
3✔
395
                return nil, err
×
396
        }
×
397

398
        var decoded bitmapFilterData
3✔
399
        if err := msgpck.UnmarshalStruct(data, &decoded); err != nil {
4✔
400
                return nil, err
1✔
401
        }
1✔
402

403
        c := &BitmapFilter{
2✔
404
                fields: make(map[string]map[string]*roaring.Bitmap, len(decoded.Fields)),
2✔
405
        }
2✔
406

2✔
407
        for field, fieldMap := range decoded.Fields {
4✔
408
                c.fields[field] = make(map[string]*roaring.Bitmap, len(fieldMap))
2✔
409
                for cat, bmBytes := range fieldMap {
7✔
410
                        bm := roaring.New()
5✔
411
                        if err := bm.UnmarshalBinary(bmBytes); err != nil {
5✔
412
                                return nil, err
×
413
                        }
×
414
                        c.fields[field][cat] = bm
5✔
415
                }
416
        }
417

418
        return c, nil
2✔
419
}
420

421
// SortColumn provides a typed columnar array for sorting documents by a value.
422
// Uses heap-based partial sort for efficient top-K queries.
423
//
424
// Example:
425
//
426
//        ratings := NewSortColumn[uint16]()
427
//        ratings.Set(1, 85)
428
//        ratings.Set(2, 92)
429
//
430
//        // Sort all docs
431
//        results := ratings.Sort([]uint32{1, 2}, false, 10)
432
//
433
//        // Sort filtered docs from a bitmap
434
//        results := ratings.SortBitmapDesc(filteredBitmap, 100)
435
type SortColumn[T cmp.Ordered] struct {
436
        mu       sync.RWMutex
437
        values   []T
438
        maxDocID uint32
439
        dirty    atomic.Bool
440
}
441

442
// SortedResult holds a document ID and its sort value.
443
type SortedResult[T cmp.Ordered] struct {
444
        DocID uint32
445
        Value T
446
}
447

448
// NewSortColumn creates a new typed sort column.
449
func NewSortColumn[T cmp.Ordered]() *SortColumn[T] {
14✔
450
        return &SortColumn[T]{
14✔
451
                values: make([]T, 0),
14✔
452
        }
14✔
453
}
14✔
454

455
// Set sets the value for a document.
456
func (col *SortColumn[T]) Set(docID uint32, value T) {
153✔
457
        col.mu.Lock()
153✔
458
        defer col.mu.Unlock()
153✔
459
        col.setLocked(docID, value)
153✔
460
}
153✔
461

462
func (col *SortColumn[T]) setLocked(docID uint32, value T) {
153✔
463
        // Grow array if needed
153✔
464
        if docID >= uint32(len(col.values)) {
165✔
465
                newSize := docID + 1
12✔
466
                if newSize < uint32(len(col.values)*5/4) {
12✔
467
                        newSize = uint32(len(col.values) * 5 / 4)
×
468
                }
×
469
                if newSize < 1024 {
24✔
470
                        newSize = 1024
12✔
471
                }
12✔
472
                newValues := make([]T, newSize)
12✔
473
                copy(newValues, col.values)
12✔
474
                col.values = newValues
12✔
475
        }
476

477
        col.values[docID] = value
153✔
478

153✔
479
        if docID > col.maxDocID {
306✔
480
                col.maxDocID = docID
153✔
481
        }
153✔
482
        col.dirty.Store(true)
153✔
483
}
484

485
// SortColumnBatch accumulates entries for efficient batch insertion.
486
type SortColumnBatch[T cmp.Ordered] struct {
487
        col    *SortColumn[T]
488
        docIDs []uint32
489
        values []T
490
}
491

492
// Batch creates a new batch builder for this column.
493
// Use BatchSize for better performance when you know the approximate count.
494
func (col *SortColumn[T]) Batch() *SortColumnBatch[T] {
1✔
495
        return col.BatchSize(1024)
1✔
496
}
1✔
497

498
// BatchSize creates a batch builder with pre-allocated capacity.
499
func (col *SortColumn[T]) BatchSize(size int) *SortColumnBatch[T] {
1✔
500
        return &SortColumnBatch[T]{
1✔
501
                col:    col,
1✔
502
                docIDs: make([]uint32, 0, size),
1✔
503
                values: make([]T, 0, size),
1✔
504
        }
1✔
505
}
1✔
506

507
// Add adds a document with a value to the batch.
508
func (b *SortColumnBatch[T]) Add(docID uint32, value T) {
5✔
509
        b.docIDs = append(b.docIDs, docID)
5✔
510
        b.values = append(b.values, value)
5✔
511
}
5✔
512

513
// Flush commits all accumulated entries to the column.
514
func (b *SortColumnBatch[T]) Flush() {
3✔
515
        if len(b.docIDs) == 0 {
4✔
516
                return
1✔
517
        }
1✔
518

519
        // Find max docID to pre-allocate
520
        var maxID uint32
2✔
521
        for _, id := range b.docIDs {
7✔
522
                if id > maxID {
10✔
523
                        maxID = id
5✔
524
                }
5✔
525
        }
526

527
        b.col.mu.Lock()
2✔
528
        defer b.col.mu.Unlock()
2✔
529

2✔
530
        // Pre-allocate if needed
2✔
531
        if maxID >= uint32(len(b.col.values)) {
4✔
532
                newValues := make([]T, maxID+1)
2✔
533
                copy(newValues, b.col.values)
2✔
534
                b.col.values = newValues
2✔
535
        }
2✔
536

537
        // Set all values
538
        for i, id := range b.docIDs {
7✔
539
                b.col.values[id] = b.values[i]
5✔
540
                if id > b.col.maxDocID {
10✔
541
                        b.col.maxDocID = id
5✔
542
                }
5✔
543
        }
544

545
        // Clear for reuse
546
        b.docIDs = b.docIDs[:0]
2✔
547
        b.values = b.values[:0]
2✔
548
}
549

550
// Get returns the value for a document.
551
func (col *SortColumn[T]) Get(docID uint32) T {
9✔
552
        col.mu.RLock()
9✔
553
        defer col.mu.RUnlock()
9✔
554

9✔
555
        var zero T
9✔
556
        if docID >= uint32(len(col.values)) {
9✔
557
                return zero
×
558
        }
×
559
        return col.values[docID]
9✔
560
}
561

562
// MemoryUsage returns the memory used by the values array in bytes.
563
func (col *SortColumn[T]) MemoryUsage() uint64 {
×
564
        col.mu.RLock()
×
565
        defer col.mu.RUnlock()
×
566

×
567
        var zero T
×
568
        return uint64(len(col.values)) * uint64(unsafe.Sizeof(zero))
×
569
}
×
570

571
// Sort sorts document IDs by their value.
572
// Uses heap-based partial sort when limit is small relative to input.
573
func (col *SortColumn[T]) Sort(docIDs []uint32, asc bool, limit int) []SortedResult[T] {
12✔
574
        col.mu.RLock()
12✔
575
        defer col.mu.RUnlock()
12✔
576

12✔
577
        return col.sortLocked(docIDs, asc, limit)
12✔
578
}
12✔
579

580
// SortDesc is a convenience method for descending sort.
581
func (col *SortColumn[T]) SortDesc(docIDs []uint32, limit int) []SortedResult[T] {
4✔
582
        return col.Sort(docIDs, false, limit)
4✔
583
}
4✔
584

585
// SortBitmap sorts documents from a bitmap by their value.
586
func (col *SortColumn[T]) SortBitmap(bm *roaring.Bitmap, asc bool, limit int) []SortedResult[T] {
4✔
587
        if bm == nil || bm.IsEmpty() {
6✔
588
                return nil
2✔
589
        }
2✔
590

591
        col.mu.RLock()
2✔
592
        defer col.mu.RUnlock()
2✔
593

2✔
594
        return col.sortLocked(bm.ToArray(), asc, limit)
2✔
595
}
596

597
// SortBitmapDesc is a convenience method for descending bitmap sort.
598
func (col *SortColumn[T]) SortBitmapDesc(bm *roaring.Bitmap, limit int) []SortedResult[T] {
2✔
599
        return col.SortBitmap(bm, false, limit)
2✔
600
}
2✔
601

602
func (col *SortColumn[T]) sortLocked(docIDs []uint32, asc bool, limit int) []SortedResult[T] {
14✔
603
        if len(docIDs) == 0 {
14✔
604
                return nil
×
605
        }
×
606

607
        values := col.values
14✔
608

14✔
609
        // Use heap for partial sort when limit is small relative to input
14✔
610
        if limit > 0 && limit < len(docIDs)/4 {
17✔
611
                return col.heapSort(docIDs, values, asc, limit)
3✔
612
        }
3✔
613

614
        // Full sort
615
        results := make([]SortedResult[T], len(docIDs))
11✔
616
        for i, docID := range docIDs {
47✔
617
                var value T
36✔
618
                if docID < uint32(len(values)) {
72✔
619
                        value = values[docID]
36✔
620
                }
36✔
621
                results[i] = SortedResult[T]{DocID: docID, Value: value}
36✔
622
        }
623

624
        if asc {
14✔
625
                slices.SortFunc(results, func(a, b SortedResult[T]) int {
12✔
626
                        return cmp.Compare(a.Value, b.Value)
9✔
627
                })
9✔
628
        } else {
8✔
629
                slices.SortFunc(results, func(a, b SortedResult[T]) int {
40✔
630
                        return cmp.Compare(b.Value, a.Value)
32✔
631
                })
32✔
632
        }
633

634
        if limit > 0 && limit < len(results) {
13✔
635
                results = results[:limit]
2✔
636
        }
2✔
637

638
        return results
11✔
639
}
640

641
// isBetterValue returns true if newVal should replace topVal in the heap.
642
func isBetterValue[T cmp.Ordered](newVal, topVal T, asc bool) bool {
196✔
643
        if asc {
286✔
644
                return newVal < topVal
90✔
645
        }
90✔
646
        return newVal > topVal
106✔
647
}
648

649
// heapToSortedResults extracts heap items into a sorted slice.
650
func heapToSortedResults[T cmp.Ordered](h *resultHeap[T]) []SortedResult[T] {
3✔
651
        results := make([]SortedResult[T], h.Len())
3✔
652
        for i := len(results) - 1; i >= 0; i-- {
27✔
653
                results[i] = heap.Pop(h).(SortedResult[T])
24✔
654
        }
24✔
655
        return results
3✔
656
}
657

658
func (col *SortColumn[T]) heapSort(docIDs []uint32, values []T, asc bool, limit int) []SortedResult[T] {
3✔
659
        h := &resultHeap[T]{
3✔
660
                items: make([]SortedResult[T], 0, limit),
3✔
661
                asc:   asc,
3✔
662
        }
3✔
663

3✔
664
        for _, docID := range docIDs {
223✔
665
                var value T
220✔
666
                if docID < uint32(len(values)) {
440✔
667
                        value = values[docID]
220✔
668
                }
220✔
669
                col.heapInsert(h, docID, value, asc, limit)
220✔
670
        }
671

672
        if h.Len() < limit && h.Len() > 0 {
3✔
673
                heap.Init(h)
×
674
        }
×
675

676
        return heapToSortedResults(h)
3✔
677
}
678

679
// heapInsert adds a value to the heap, maintaining the top-k invariant.
680
func (col *SortColumn[T]) heapInsert(h *resultHeap[T], docID uint32, value T, asc bool, limit int) {
220✔
681
        if h.Len() < limit {
244✔
682
                h.items = append(h.items, SortedResult[T]{DocID: docID, Value: value})
24✔
683
                if h.Len() == limit {
27✔
684
                        heap.Init(h)
3✔
685
                }
3✔
686
                return
24✔
687
        }
688

689
        if isBetterValue(value, h.items[0].Value, asc) {
302✔
690
                h.items[0] = SortedResult[T]{DocID: docID, Value: value}
106✔
691
                heap.Fix(h, 0)
106✔
692
        }
106✔
693
}
694

695
// resultHeap implements heap.Interface for SortedResult.
696
type resultHeap[T cmp.Ordered] struct {
697
        items []SortedResult[T]
698
        asc   bool
699
}
700

701
func (h *resultHeap[T]) Len() int { return len(h.items) }
388✔
702

703
func (h *resultHeap[T]) Less(i, j int) bool {
584✔
704
        if h.asc {
625✔
705
                return h.items[i].Value > h.items[j].Value // max-heap for ascending
41✔
706
        }
41✔
707
        return h.items[i].Value < h.items[j].Value // min-heap for descending
543✔
708
}
709

710
func (h *resultHeap[T]) Swap(i, j int) { h.items[i], h.items[j] = h.items[j], h.items[i] }
328✔
711

712
func (h *resultHeap[T]) Push(x any) {
3✔
713
        h.items = append(h.items, x.(SortedResult[T]))
3✔
714
}
3✔
715

716
func (h *resultHeap[T]) Pop() any {
25✔
717
        n := len(h.items)
25✔
718
        x := h.items[n-1]
25✔
719
        h.items = h.items[:n-1]
25✔
720
        return x
25✔
721
}
25✔
722

723
// sortColumnData is the serializable representation.
724
type sortColumnData[T cmp.Ordered] struct {
725
        Values   []T    `msgpack:"values"`
726
        MaxDocID uint32 `msgpack:"max_doc_id"`
727
}
728

729
// SaveToFile saves the sort column to a file atomically.
730
// Writes to a temp file first, then renames to prevent corruption on crash.
731
func (col *SortColumn[T]) SaveToFile(path string) error {
2✔
732
        if !col.dirty.Load() {
3✔
733
                if _, err := os.Stat(path); err == nil {
1✔
734
                        return nil // File exists and no changes - safe to skip
×
735
                }
×
736
                // File doesn't exist, must create it even if not dirty
737
        }
738

739
        tmpPath := path + ".tmp"
2✔
740
        file, err := os.Create(tmpPath)
2✔
741
        if err != nil {
2✔
742
                return err
×
743
        }
×
744

745
        if err := col.Encode(file); err != nil {
2✔
746
                file.Close()
×
747
                os.Remove(tmpPath)
×
748
                return err
×
749
        }
×
750

751
        if err := file.Sync(); err != nil {
2✔
752
                file.Close()
×
753
                os.Remove(tmpPath)
×
754
                return err
×
755
        }
×
756

757
        if err := file.Close(); err != nil {
2✔
758
                os.Remove(tmpPath)
×
759
                return err
×
760
        }
×
761

762
        if err := os.Rename(tmpPath, path); err != nil {
2✔
763
                os.Remove(tmpPath)
×
764
                return err
×
765
        }
×
766

767
        col.dirty.Store(false)
2✔
768
        return nil
2✔
769
}
770

771
// Encode writes the sort column to a writer.
772
// Takes a snapshot of the data first to avoid holding the lock during I/O.
773
func (col *SortColumn[T]) Encode(w io.Writer) error {
2✔
774
        // Snapshot data while holding lock briefly
2✔
775
        col.mu.RLock()
2✔
776
        var valuesCopy []T
2✔
777
        if len(col.values) > 0 {
3✔
778
                valuesCopy = make([]T, col.maxDocID+1)
1✔
779
                copy(valuesCopy, col.values[:col.maxDocID+1])
1✔
780
        }
1✔
781
        maxDocID := col.maxDocID
2✔
782
        col.mu.RUnlock()
2✔
783

2✔
784
        // Write without holding lock - safe for concurrent reads/writes
2✔
785
        data := sortColumnData[T]{
2✔
786
                Values:   valuesCopy,
2✔
787
                MaxDocID: maxDocID,
2✔
788
        }
2✔
789

2✔
790
        encoded, err := msgpck.Marshal(data)
2✔
791
        if err != nil {
2✔
792
                return err
×
793
        }
×
794
        _, err = w.Write(encoded)
2✔
795
        return err
2✔
796
}
797

798
// LoadSortColumn loads a sort column from a file.
799
func LoadSortColumn[T cmp.Ordered](path string) (*SortColumn[T], error) {
2✔
800
        file, err := os.Open(path)
2✔
801
        if err != nil {
2✔
802
                return nil, err
×
803
        }
×
804
        defer file.Close()
2✔
805
        return ReadSortColumn[T](file)
2✔
806
}
807

808
// ReadSortColumn reads a sort column from a reader.
809
func ReadSortColumn[T cmp.Ordered](r io.Reader) (*SortColumn[T], error) {
3✔
810
        bytes, err := io.ReadAll(r)
3✔
811
        if err != nil {
3✔
812
                return nil, err
×
813
        }
×
814

815
        var data sortColumnData[T]
3✔
816
        if err := msgpck.UnmarshalStruct(bytes, &data); err != nil {
4✔
817
                return nil, err
1✔
818
        }
1✔
819

820
        return &SortColumn[T]{
2✔
821
                values:   data.Values,
2✔
822
                maxDocID: data.MaxDocID,
2✔
823
        }, nil
2✔
824
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc