• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16992684502

15 Aug 2025 02:56PM UTC coverage: 87.875% (+0.2%) from 87.72%
16992684502

Pull #2456

github

web-flow
Merge 2d540e578 into 4a23f65b3
Pull Request #2456: feat: basic BoolBuffer / BoolBufferMut

1275 of 1428 new or added lines in 110 files covered. (89.29%)

334 existing lines in 31 files now uncovered.

57169 of 65057 relevant lines covered (87.88%)

658056.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.74
/vortex-buffer/src/bit/buf.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::ops::{BitAnd, BitOr, BitXor, Not};
5

6
use crate::bit::aligned::BitChunks;
7
use crate::bit::ops::{bitwise_and, bitwise_not, bitwise_or, bitwise_unary_op, bitwise_xor};
8
use crate::bit::unaligned::{BitIndexIterator, BitIterator, BitSliceIterator, UnalignedBitChunks};
9
use crate::{
10
    Alignment, BitBufferMut, Buffer, BufferMut, ByteBuffer, buffer, get_bit, get_bit_unchecked,
11
};
12

13
/// An immutable bitset stored as a packed byte buffer.
14
#[derive(Clone, Debug, Eq)]
15
pub struct BitBuffer {
16
    buffer: ByteBuffer,
17
    len: usize,
18
    offset: usize,
19
}
20

21
impl PartialEq for BitBuffer {
22
    fn eq(&self, other: &Self) -> bool {
4,515✔
23
        if self.len != other.len {
4,515✔
NEW
24
            return false;
×
25
        }
4,515✔
26

27
        self.chunks()
4,515✔
28
            .iter()
4,515✔
29
            .zip(other.chunks())
4,515✔
30
            .all(|(a, b)| a == b)
18,060✔
31
    }
4,515✔
32
}
33

34
impl BitBuffer {
35
    /// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view.
36
    ///
37
    /// Panics if the buffer is not large enough to hold `len` bits.
38
    pub fn new(buffer: ByteBuffer, len: usize) -> Self {
269,130✔
39
        assert!(
269,130✔
40
            buffer.len() * 8 >= len,
269,130✔
NEW
41
            "provided ByteBuffer not large enough to back BoolBuffer with len {len}"
×
42
        );
43

44
        Self {
269,130✔
45
            buffer,
269,130✔
46
            len,
269,130✔
47
            offset: 0,
269,130✔
48
        }
269,130✔
49
    }
269,130✔
50

51
    /// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view, starting at the
52
    /// given `offset` (in bits).
53
    ///
54
    /// Panics if the buffer is not large enough to hold `len` bits or if the offset is greater than
55
    pub fn new_with_offset(buffer: ByteBuffer, len: usize, offset: usize) -> Self {
758,424✔
56
        assert!(
758,424✔
57
            len.saturating_add(offset) <= buffer.len().saturating_mul(8),
758,424✔
NEW
58
            "provided ByteBuffer (len={}) not large enough to back BoolBuffer with offset {offset} len {len}",
×
NEW
59
            buffer.len()
×
60
        );
61

62
        Self {
758,424✔
63
            buffer,
758,424✔
64
            len,
758,424✔
65
            offset,
758,424✔
66
        }
758,424✔
67
    }
758,424✔
68

69
    /// Create a new `BoolBuffer` of length `len` where all bits are set (true).
70
    pub fn new_set(len: usize) -> Self {
70,272✔
71
        let words = len.div_ceil(8);
70,272✔
72
        let buffer = buffer![0xFF; words];
70,272✔
73

74
        Self {
70,272✔
75
            buffer,
70,272✔
76
            len,
70,272✔
77
            offset: 0,
70,272✔
78
        }
70,272✔
79
    }
70,272✔
80

81
    /// Create a new `BoolBuffer` of length `len` where all bits are unset (false).
82
    pub fn new_unset(len: usize) -> Self {
12,634✔
83
        let words = len.div_ceil(8);
12,634✔
84
        let buffer = Buffer::zeroed(words);
12,634✔
85

86
        Self {
12,634✔
87
            buffer,
12,634✔
88
            len,
12,634✔
89
            offset: 0,
12,634✔
90
        }
12,634✔
91
    }
12,634✔
92

93
    /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BitBuffer`
94
    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
24,360✔
95
        let mut buffer = BufferMut::with_capacity(len.div_ceil(64) * 8);
24,360✔
96

97
        let chunks = len / 64;
24,360✔
98
        let remainder = len % 64;
24,360✔
99
        for chunk in 0..chunks {
262,966✔
100
            let mut packed = 0;
261,634✔
101
            for bit_idx in 0..64 {
17,006,210✔
102
                let i = bit_idx + chunk * 64;
16,744,576✔
103
                packed |= (f(i) as u64) << bit_idx;
16,744,576✔
104
            }
16,744,576✔
105

106
            // SAFETY: Already allocated sufficient capacity
107
            unsafe { buffer.push_unchecked(packed) }
261,634✔
108
        }
109

110
        if remainder != 0 {
24,360✔
111
            let mut packed = 0;
23,926✔
112
            for bit_idx in 0..remainder {
125,264✔
113
                let i = bit_idx + chunks * 64;
125,264✔
114
                packed |= (f(i) as u64) << bit_idx;
125,264✔
115
            }
125,264✔
116

117
            // SAFETY: Already allocated sufficient capacity
118
            unsafe { buffer.push_unchecked(packed) }
23,926✔
119
        }
434✔
120

121
        buffer.truncate(len.div_ceil(8));
24,360✔
122
        Self::new(
24,360✔
123
            Buffer::from_byte_buffer_aligned(
24,360✔
124
                buffer.freeze().into_byte_buffer(),
24,360✔
125
                Alignment::of::<u8>(),
24,360✔
126
            ),
127
            len,
24,360✔
128
        )
129
    }
24,360✔
130

131
    /// Get the logical length of this `BoolBuffer`.
132
    ///
133
    /// This may differ from the physical length of the backing buffer, for example if it was
134
    /// created using the `new_with_offset` constructor, or if it was sliced.
135
    #[inline]
136
    pub fn len(&self) -> usize {
46,865,584✔
137
        self.len
46,865,584✔
138
    }
46,865,584✔
139

140
    /// Returns `true` if the `BoolBuffer` is empty.
141
    #[inline]
142
    pub fn is_empty(&self) -> bool {
9✔
143
        self.len() == 0
9✔
144
    }
9✔
145

146
    /// Offset of the start of the buffer in bits.
147
    #[inline]
148
    pub fn offset(&self) -> usize {
22,630,740✔
149
        self.offset
22,630,740✔
150
    }
22,630,740✔
151

152
    /// Get a reference to the underlying buffer.
153
    #[inline]
154
    pub fn inner(&self) -> &ByteBuffer {
56,378✔
155
        &self.buffer
56,378✔
156
    }
56,378✔
157

158
    /// Retrieve the value at the given index.
159
    ///
160
    /// Panics if the index is out of bounds.
161
    pub fn value(&self, index: usize) -> bool {
38,882,126✔
162
        get_bit(&self.buffer, index + self.offset)
38,882,126✔
163
    }
38,882,126✔
164

165
    /// Retrieve the value at the given index without bounds checking
166
    ///
167
    /// # SAFETY
168
    /// Caller must ensure that index is within the range of the buffer
169
    pub unsafe fn value_unchecked(&self, index: usize) -> bool {
129,731✔
170
        unsafe { get_bit_unchecked(&self.buffer, index + self.offset) }
129,731✔
171
    }
129,731✔
172

173
    /// Create a new zero-copy slice of this BoolBuffer that begins at the `start` index and extends
174
    /// for `len` bits.
175
    ///
176
    /// Panics if the slice would extend beyond the end of the buffer.
177
    pub fn slice(&self, start: usize, len: usize) -> Self {
35,170✔
178
        assert!(
35,170✔
179
            start + len <= self.len,
35,170✔
NEW
180
            "slice of len {len} starting at {start} exceeds len {}",
×
181
            self.len
182
        );
183

184
        Self::new_with_offset(self.buffer.clone(), len, self.offset + start)
35,170✔
185
    }
35,170✔
186

187
    /// Slice any full bytes from the buffer, leaving the offset < 8.
188
    pub fn shrink_offset(self) -> Self {
499,565✔
189
        let bit_offset = self.offset % 8;
499,565✔
190
        let len = self.len;
499,565✔
191
        let buffer = self.into_inner();
499,565✔
192
        BitBuffer::new_with_offset(buffer, len, bit_offset)
499,565✔
193
    }
499,565✔
194

195
    /// Access chunks of the buffer aligned to 8 byte boundary as [prefix, \<full chunks\>, suffix]
196
    pub fn unaligned_chunks(&self) -> UnalignedBitChunks {
556,564✔
197
        UnalignedBitChunks::new(self.buffer.clone(), self.offset, self.len)
556,564✔
198
    }
556,564✔
199

200
    /// Access chunks of the underlying buffer as 8 byte chunks with a final trailer
201
    ///
202
    /// If you're performing operations on a single buffer, prefer [BitBuffer::unaligned_chunks]
203
    pub fn chunks(&self) -> BitChunks {
9,030✔
204
        BitChunks::new(self.buffer.clone(), self.offset, self.len)
9,030✔
205
    }
9,030✔
206

207
    /// Get the number of set bits in the buffer.
208
    pub fn true_count(&self) -> usize {
556,564✔
209
        self.unaligned_chunks().count_ones()
556,564✔
210
    }
556,564✔
211

212
    /// Get the number of unset bits in the buffer.
213
    pub fn false_count(&self) -> usize {
2✔
214
        self.len - self.true_count()
2✔
215
    }
2✔
216

217
    /// Iterator over bits in the buffer
218
    pub fn iter(&self) -> BitIterator {
59,632✔
219
        BitIterator::new(self.buffer.clone(), self.offset, self.len)
59,632✔
220
    }
59,632✔
221

222
    /// Iterator over set indices of the underlying buffer
223
    pub fn set_indices(&self) -> BitIndexIterator {
69,648✔
224
        BitIndexIterator::new(self.buffer.clone(), self.offset, self.len)
69,648✔
225
    }
69,648✔
226

227
    /// Iterator over set slices of the underlying buffer
228
    pub fn set_slices(&self) -> BitSliceIterator {
8,770✔
229
        BitSliceIterator::new(self.buffer.clone(), self.offset, self.len)
8,770✔
230
    }
8,770✔
231

232
    /// Created a new BitBuffer with offset reset to 0
NEW
233
    pub fn sliced(&self) -> Self {
×
NEW
234
        if self.offset % 8 == 0 {
×
NEW
235
            return Self::new(
×
NEW
236
                self.buffer.slice(self.offset / 8..self.len.div_ceil(8)),
×
NEW
237
                self.len,
×
238
            );
NEW
239
        }
×
240

NEW
241
        Self::new(
×
NEW
242
            bitwise_unary_op(self.buffer.clone(), self.offset, self.len, |a| a),
×
NEW
243
            self.len,
×
244
        )
NEW
245
    }
×
246
}
247

248
// Conversions
249

250
impl BitBuffer {
251
    /// Consumes this `BoolBuffer` and returns the backing `Buffer<u8>` with any offset
252
    /// and length information applied.
253
    pub fn into_inner(self) -> ByteBuffer {
573,092✔
254
        let word_start = self.offset / 8;
573,092✔
255
        let word_end = (self.offset + self.len).div_ceil(8);
573,092✔
256

257
        self.buffer.slice(word_start..word_end)
573,092✔
258
    }
573,092✔
259

260
    /// Get a mutable version of this `BitBuffer` along with bit offset in the first byte.
261
    ///
262
    /// If the caller doesn't hold only reference to the underlying buffer, a copy is created.
263
    /// The second value of the tuple is a bit_offset of the first value in the first byte
264
    pub fn into_mut(self) -> (BitBufferMut, usize) {
4,171✔
265
        let bit_offset = self.offset % 8;
4,171✔
266
        let len = self.len;
4,171✔
267
        // TODO(robert): if we are copying here we can strip offset bits
268
        let shrunk = self.into_inner().into_mut();
4,171✔
269
        (BitBufferMut::from_buffer(shrunk, len), bit_offset)
4,171✔
270
    }
4,171✔
271
}
272

273
impl From<&[bool]> for BitBuffer {
274
    fn from(value: &[bool]) -> Self {
7,396✔
275
        let mut buf = BitBufferMut::new_unset(value.len());
7,396✔
276
        for (i, &v) in value.iter().enumerate() {
28,939✔
277
            if v {
28,939✔
278
                // SAFETY: i is in bounds
279
                unsafe { buf.set_unchecked(i) }
14,534✔
280
            }
14,405✔
281
        }
282
        buf.freeze()
7,396✔
283
    }
7,396✔
284
}
285

286
impl From<Vec<bool>> for BitBuffer {
287
    fn from(value: Vec<bool>) -> Self {
817✔
288
        value.as_slice().into()
817✔
289
    }
817✔
290
}
291

292
impl FromIterator<bool> for BitBuffer {
293
    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
111,790✔
294
        let iter = iter.into_iter();
111,790✔
295
        let (low, high) = iter.size_hint();
111,790✔
296
        if let Some(len) = high {
111,790✔
297
            let mut buf = BitBufferMut::new_unset(len);
111,789✔
298
            for (i, v) in iter.enumerate() {
22,848,157✔
299
                if v {
22,848,157✔
300
                    // SAFETY: i is in bounds
301
                    unsafe { buf.set_unchecked(i) }
8,431,973✔
302
                }
14,416,184✔
303
            }
304
            buf.freeze()
111,789✔
305
        } else {
306
            let mut buf = BitBufferMut::new(low);
1✔
307
            for v in iter {
129✔
308
                buf.append(v);
128✔
309
            }
128✔
310
            buf.freeze()
1✔
311
        }
312
    }
111,790✔
313
}
314

315
impl BitOr for &BitBuffer {
316
    type Output = BitBuffer;
317

318
    fn bitor(self, rhs: Self) -> Self::Output {
129✔
319
        self.clone() | rhs.clone()
129✔
320
    }
129✔
321
}
322

323
impl BitOr for BitBuffer {
324
    type Output = BitBuffer;
325

326
    fn bitor(self, rhs: Self) -> Self::Output {
129✔
327
        assert_eq!(self.len, rhs.len);
129✔
328
        BitBuffer::new_with_offset(
129✔
329
            bitwise_or(self.buffer, self.offset, rhs.buffer, rhs.offset, self.len),
129✔
330
            self.len,
129✔
331
            0,
332
        )
333
    }
129✔
334
}
335

336
impl BitAnd for &BitBuffer {
337
    type Output = BitBuffer;
338

339
    fn bitand(self, rhs: Self) -> Self::Output {
26,614✔
340
        self.clone() & rhs.clone()
26,614✔
341
    }
26,614✔
342
}
343

344
impl BitAnd for BitBuffer {
345
    type Output = BitBuffer;
346

347
    fn bitand(self, rhs: Self) -> Self::Output {
26,614✔
348
        assert_eq!(self.len, rhs.len);
26,614✔
349
        BitBuffer::new_with_offset(
26,614✔
350
            bitwise_and(self.buffer, self.offset, rhs.buffer, rhs.offset, self.len),
26,614✔
351
            self.len,
26,614✔
352
            0,
353
        )
354
    }
26,614✔
355
}
356

357
impl Not for &BitBuffer {
358
    type Output = BitBuffer;
359

360
    fn not(self) -> Self::Output {
63,339✔
361
        !self.clone()
63,339✔
362
    }
63,339✔
363
}
364

365
impl Not for BitBuffer {
366
    type Output = BitBuffer;
367

368
    fn not(self) -> Self::Output {
63,339✔
369
        BitBuffer::new_with_offset(bitwise_not(self.buffer, self.offset, self.len), self.len, 0)
63,339✔
370
    }
63,339✔
371
}
372

373
impl BitXor for &BitBuffer {
374
    type Output = BitBuffer;
375

NEW
376
    fn bitxor(self, rhs: Self) -> Self::Output {
×
NEW
377
        self.clone() ^ rhs.clone()
×
NEW
378
    }
×
379
}
380

381
impl BitXor for BitBuffer {
382
    type Output = BitBuffer;
383

NEW
384
    fn bitxor(self, rhs: Self) -> Self::Output {
×
NEW
385
        assert_eq!(self.len, rhs.len);
×
NEW
386
        BitBuffer::new_with_offset(
×
NEW
387
            bitwise_xor(self.buffer, self.offset, rhs.buffer, rhs.offset, self.len),
×
NEW
388
            self.len,
×
389
            0,
390
        )
NEW
391
    }
×
392
}
393

394
impl IntoIterator for &BitBuffer {
395
    type Item = bool;
396
    type IntoIter = BitIterator;
397

398
    fn into_iter(self) -> Self::IntoIter {
4,945✔
399
        self.iter()
4,945✔
400
    }
4,945✔
401
}
402

403
impl IntoIterator for BitBuffer {
404
    type Item = bool;
405
    type IntoIter = BitIterator;
406

407
    fn into_iter(self) -> Self::IntoIter {
129✔
408
        self.iter()
129✔
409
    }
129✔
410
}
411

412
#[cfg(test)]
413
mod tests {
414
    use crate::bit::BitBuffer;
415
    use crate::{ByteBuffer, buffer};
416

417
    #[test]
418
    fn test_bool() {
1✔
419
        // Create a new Buffer<u64> of length 1024 where the 8th bit is set.
420
        let buffer: ByteBuffer = buffer![1 << 7; 1024];
1✔
421
        let bools = BitBuffer::new(buffer, 1024 * 8);
1✔
422

423
        // sanity checks
424
        assert_eq!(bools.len(), 1024 * 8);
1✔
425
        assert!(!bools.is_empty());
1✔
426
        assert_eq!(bools.true_count(), 1024);
1✔
427
        assert_eq!(bools.false_count(), 1024 * 7);
1✔
428

429
        // Check all the values
430
        for word in 0..1024 {
1,025✔
431
            for bit in 0..8 {
9,216✔
432
                if bit == 7 {
8,192✔
433
                    assert!(bools.value(word * 8 + bit));
1,024✔
434
                } else {
435
                    assert!(!bools.value(word * 8 + bit));
7,168✔
436
                }
437
            }
438
        }
439

440
        // Slice the buffer to create a new subset view.
441
        let sliced = bools.slice(64, 8);
1✔
442

443
        // sanity checks
444
        assert_eq!(sliced.len(), 8);
1✔
445
        assert!(!sliced.is_empty());
1✔
446
        assert_eq!(sliced.true_count(), 1);
1✔
447
        assert_eq!(sliced.false_count(), 7);
1✔
448

449
        // Check all of the values like before
450
        for bit in 0..8 {
9✔
451
            if bit == 7 {
8✔
452
                assert!(sliced.value(bit));
1✔
453
            } else {
454
                assert!(!sliced.value(bit));
7✔
455
            }
456
        }
457
    }
1✔
458
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc