• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 17042426005

18 Aug 2025 01:44PM UTC coverage: 87.995%. First build
17042426005

Pull #4216

github

web-flow
Merge 52d72e70b into c0b668f7f
Pull Request #4216: feat: better and more consistent validation in SerdeVTable::build

525 of 671 new or added lines in 80 files covered. (78.24%)

56705 of 64441 relevant lines covered (88.0%)

627615.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.36
/vortex-array/src/arrays/bool/array.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use arrow_array::BooleanArray;
5
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6
use vortex_buffer::ByteBuffer;
7
use vortex_dtype::DType;
8
use vortex_error::{VortexResult, vortex_ensure};
9

10
use crate::Canonical;
11
use crate::arrays::{BoolVTable, bool};
12
use crate::builders::ArrayBuilder;
13
use crate::stats::{ArrayStats, StatsSetRef};
14
use crate::validity::Validity;
15
use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
16

17
/// A boolean array that stores true/false values in a compact bit-packed format.
18
///
19
/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
20
/// is stored as a single bit rather than a full byte.
21
///
22
/// The data layout uses:
23
/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
24
/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
25
///   indicate valid and false indicates null. if the i-th value is null in the validity child,
26
///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
27
/// - Bit-level slicing is supported with minimal overhead
28
///
29
/// # Examples
30
///
31
/// ```
32
/// use vortex_array::arrays::BoolArray;
33
/// use vortex_array::IntoArray;
34
///
35
/// // Create from iterator using FromIterator impl
36
/// let array: BoolArray = [true, false, true, false].into_iter().collect();
37
///
38
/// // Slice the array
39
/// let sliced = array.slice(1, 3);
40
/// assert_eq!(sliced.len(), 2);
41
///
42
/// // Access individual values
43
/// let value = array.scalar_at(0);
44
/// assert_eq!(value, true.into());
45
/// ```
46
#[derive(Clone, Debug)]
47
pub struct BoolArray {
48
    dtype: DType,
49
    buffer: BooleanBuffer,
50
    pub(crate) validity: Validity,
51
    pub(crate) stats_set: ArrayStats,
52
}
53

54
impl BoolArray {
55
    fn validate(
2,406✔
56
        buffer: &ByteBuffer,
2,406✔
57
        offset: usize,
2,406✔
58
        len: usize,
2,406✔
59
        validity: &Validity,
2,406✔
60
    ) -> VortexResult<()> {
2,406✔
61
        vortex_ensure!(
2,406✔
62
            offset < 8,
2,406✔
NEW
63
            "offset must be less than whole byte, was {offset} bits"
×
64
        );
65

66
        // Validate the buffer is large enough to hold all the bits
67
        let required_bytes = offset.saturating_add(len).div_ceil(8);
2,406✔
68
        vortex_ensure!(
2,406✔
69
            buffer.len() >= required_bytes,
2,406✔
NEW
70
            "BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
×
NEW
71
            buffer.len()
×
72
        );
73

74
        // Validate validity
75
        if let Some(validity_len) = validity.maybe_len() {
2,406✔
NEW
76
            vortex_ensure!(
×
NEW
77
                validity_len == len,
×
NEW
78
                "BoolArray of size {len} cannot be built with validity of size {validity_len}"
×
79
            );
80
        }
2,406✔
81

82
        Ok(())
2,406✔
83
    }
2,406✔
84
}
85

86
impl BoolArray {
87
    /// Construct a new `BoolArray` from its components:
88
    ///
89
    /// * `buffer` is a raw ByteBuffer holding the packed bits
90
    /// * `offset` is the number of bits in the start of the buffer that should be skipped when
91
    ///   looking up the i-th value.
92
    /// * `len` is the length of the array, which should correspond to the number of bits
93
    /// * `validity` holds the null values.
94
    ///
95
    /// # Validation
96
    ///
97
    /// Buffer must be at least large enough to hold `len` bits starting at `offset`.
98
    ///
99
    /// A provided validity array must be of size `len`.
100
    ///
101
    /// The offset must be less than a whole byte.
102
    pub fn try_new(
2,406✔
103
        buffer: ByteBuffer,
2,406✔
104
        offset: usize,
2,406✔
105
        len: usize,
2,406✔
106
        validity: Validity,
2,406✔
107
    ) -> VortexResult<Self> {
2,406✔
108
        Self::validate(&buffer, offset, len, &validity)?;
2,406✔
109

110
        Ok(Self::new(
2,406✔
111
            BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len),
2,406✔
112
            validity,
2,406✔
113
        ))
2,406✔
114
    }
2,406✔
115

116
    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`] directly.
117
    ///
118
    /// Panics if the validity length differs from the buffer length.
119
    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
273,010✔
120
        if let Some(validity_len) = validity.maybe_len() {
273,010✔
121
            assert_eq!(buffer.len(), validity_len);
18,225✔
122
        }
254,785✔
123

124
        // Shrink the buffer to remove any whole bytes.
125
        let buffer = buffer.shrink_offset();
273,010✔
126
        Self {
273,010✔
127
            dtype: DType::Bool(validity.nullability()),
273,010✔
128
            buffer,
273,010✔
129
            validity,
273,010✔
130
            stats_set: ArrayStats::default(),
273,010✔
131
        }
273,010✔
132
    }
273,010✔
133

134
    /// Create a new BoolArray from a set of indices and a length.
135
    ///
136
    /// All indices must be less than the length.
137
    pub fn from_indices<I: IntoIterator<Item = usize>>(
50✔
138
        length: usize,
50✔
139
        indices: I,
50✔
140
        validity: Validity,
50✔
141
    ) -> Self {
50✔
142
        let mut buffer = MutableBuffer::new_null(length);
50✔
143
        let buffer_slice = buffer.as_slice_mut();
50✔
144
        indices
50✔
145
            .into_iter()
50✔
146
            .for_each(|idx| arrow_buffer::bit_util::set_bit(buffer_slice, idx));
102✔
147
        Self::new(
50✔
148
            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
50✔
149
            validity,
50✔
150
        )
151
    }
50✔
152

153
    /// Returns the underlying [`BooleanBuffer`] of the array.
154
    pub fn boolean_buffer(&self) -> &BooleanBuffer {
22,438,581✔
155
        assert!(
22,438,581✔
156
            self.buffer.offset() < 8,
22,438,581✔
157
            "Offset must be <8, did we forget to call shrink_offset? Found {}",
×
158
            self.buffer.offset()
×
159
        );
160
        &self.buffer
22,438,581✔
161
    }
22,438,581✔
162

163
    /// Get a mutable version of this array.
164
    ///
165
    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
166
    /// otherwise a copy is created.
167
    ///
168
    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
169
    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
2,908✔
170
        let offset = self.buffer.offset();
2,908✔
171
        let len = self.buffer.len();
2,908✔
172
        let arrow_buffer = self.buffer.into_inner();
2,908✔
173
        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
2,908✔
174
            arrow_buffer.into_mutable().unwrap_or_else(|b| {
2,908✔
175
                let mut buf = MutableBuffer::with_capacity(b.len());
85✔
176
                buf.extend_from_slice(b.as_slice());
85✔
177
                buf
85✔
178
            })
85✔
179
        } else {
180
            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
×
181
            buf.extend_from_slice(arrow_buffer.as_slice());
×
182
            buf
×
183
        };
184

185
        (
2,908✔
186
            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
2,908✔
187
            offset,
2,908✔
188
        )
2,908✔
189
    }
2,908✔
190
}
191

192
impl From<BooleanBuffer> for BoolArray {
193
    fn from(value: BooleanBuffer) -> Self {
75,469✔
194
        Self::new(value, Validity::NonNullable)
75,469✔
195
    }
75,469✔
196
}
197

198
impl FromIterator<bool> for BoolArray {
199
    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
57,946✔
200
        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
57,946✔
201
    }
57,946✔
202
}
203

204
impl FromIterator<Option<bool>> for BoolArray {
205
    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
43✔
206
        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
43✔
207

208
        Self::new(
43✔
209
            buffer,
43✔
210
            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
43✔
211
        )
212
    }
43✔
213
}
214

215
impl ValidityHelper for BoolArray {
216
    fn validity(&self) -> &Validity {
22,772,378✔
217
        &self.validity
22,772,378✔
218
    }
22,772,378✔
219
}
220

221
impl ArrayVTable<BoolVTable> for BoolVTable {
222
    fn len(array: &BoolArray) -> usize {
45,838,181✔
223
        array.buffer.len()
45,838,181✔
224
    }
45,838,181✔
225

226
    fn dtype(array: &BoolArray) -> &DType {
45,521,389✔
227
        &array.dtype
45,521,389✔
228
    }
45,521,389✔
229

230
    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
837,518✔
231
        array.stats_set.to_ref(array.as_ref())
837,518✔
232
    }
837,518✔
233
}
234

235
impl CanonicalVTable<BoolVTable> for BoolVTable {
236
    fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
120,222✔
237
        Ok(Canonical::Bool(array.clone()))
120,222✔
238
    }
120,222✔
239

240
    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
22✔
241
        builder.extend_from_array(array.as_ref())
22✔
242
    }
22✔
243
}
244

245
pub trait BooleanBufferExt {
246
    /// Slice any full bytes from the buffer, leaving the offset < 8.
247
    fn shrink_offset(self) -> Self;
248
}
249

250
impl BooleanBufferExt for BooleanBuffer {
251
    fn shrink_offset(self) -> Self {
273,010✔
252
        let byte_offset = self.offset() / 8;
273,010✔
253
        let bit_offset = self.offset() % 8;
273,010✔
254
        let len = self.len();
273,010✔
255
        let buffer = self
273,010✔
256
            .into_inner()
273,010✔
257
            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
273,010✔
258
        BooleanBuffer::new(buffer, bit_offset, len)
273,010✔
259
    }
273,010✔
260
}
261

262
#[cfg(test)]
263
mod tests {
264
    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
265
    use vortex_buffer::buffer;
266

267
    use crate::arrays::{BoolArray, PrimitiveArray};
268
    use crate::patches::Patches;
269
    use crate::validity::Validity;
270
    use crate::vtable::ValidityHelper;
271
    use crate::{Array, IntoArray, ToCanonical};
272

273
    #[test]
274
    fn bool_array() {
1✔
275
        let arr = BoolArray::from_iter([true, false, true]);
1✔
276
        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
1✔
277
        assert!(scalar);
1✔
278
    }
1✔
279

280
    #[test]
281
    fn test_all_some_iter() {
1✔
282
        let arr = BoolArray::from_iter([Some(true), Some(false)]);
1✔
283

284
        assert!(matches!(arr.validity(), Validity::AllValid));
1✔
285

286
        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
1✔
287
        assert!(scalar);
1✔
288
        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
1✔
289
        assert!(!scalar);
1✔
290
    }
1✔
291

292
    #[test]
293
    fn test_bool_from_iter() {
1✔
294
        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
1✔
295

296
        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
1✔
297
        assert!(scalar);
1✔
298

299
        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
1✔
300
        assert!(scalar);
1✔
301

302
        let scalar = arr.scalar_at(2);
1✔
303
        assert!(scalar.is_null());
1✔
304

305
        let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
1✔
306
        assert!(!scalar);
1✔
307

308
        let scalar = arr.scalar_at(4);
1✔
309
        assert!(scalar.is_null());
1✔
310
    }
1✔
311

312
    #[test]
313
    fn patch_sliced_bools() {
1✔
314
        let arr = {
1✔
315
            let mut builder = BooleanBufferBuilder::new(12);
1✔
316
            builder.append(false);
1✔
317
            builder.append_n(11, true);
1✔
318
            BoolArray::from(builder.finish())
1✔
319
        };
320
        let sliced = arr.slice(4, 12);
1✔
321
        let sliced_len = sliced.len();
1✔
322
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
323
        assert_eq!(offset, 4);
1✔
324
        assert_eq!(values.as_slice(), &[254, 15]);
1✔
325

326
        // patch the underlying array
327
        let patches = Patches::new(
1✔
328
            arr.len(),
1✔
329
            0,
330
            buffer![4u32].into_array(), // This creates a non-nullable array
1✔
331
            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
1✔
332
        );
333
        let arr = arr.patch(&patches).unwrap();
1✔
334
        let arr_len = arr.len();
1✔
335
        let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
1✔
336
        assert_eq!(offset, 0);
1✔
337
        assert_eq!(values.len(), arr_len + offset);
1✔
338
        assert_eq!(values.as_slice(), &[238, 15]);
1✔
339

340
        // the slice should be unchanged
341
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
342
        assert_eq!(offset, 4);
1✔
343
        assert_eq!(values.len(), sliced_len + offset);
1✔
344
        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
1✔
345
    }
1✔
346

347
    #[test]
348
    fn slice_array_in_middle() {
1✔
349
        let arr = BoolArray::from(BooleanBuffer::new_set(16));
1✔
350
        let sliced = arr.slice(4, 12);
1✔
351
        let sliced_len = sliced.len();
1✔
352
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
353
        assert_eq!(offset, 4);
1✔
354
        assert_eq!(values.len(), sliced_len + offset);
1✔
355
        assert_eq!(values.as_slice(), &[255, 15]);
1✔
356
    }
1✔
357

358
    #[test]
359
    #[should_panic]
360
    fn patch_bools_owned() {
1✔
361
        let buffer = buffer![255u8; 2];
1✔
362
        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
1✔
363
        let arr = BoolArray::new(buf, Validity::NonNullable);
1✔
364
        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
1✔
365

366
        let patches = Patches::new(
1✔
367
            arr.len(),
1✔
368
            0,
369
            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
1✔
370
            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
1✔
371
        );
372
        let arr = arr.patch(&patches).unwrap();
1✔
373
        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
1✔
374

375
        let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
376
        assert_eq!(values.as_slice(), &[254, 127]);
377
    }
378
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc