• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16938087171

13 Aug 2025 01:04PM UTC coverage: 86.347%. First build
16938087171

Pull #4216

github

web-flow
Merge 53478156c into be9bafd2c
Pull Request #4216: feat: better and more consistent validation in SerdeVTable::build

376 of 492 new or added lines in 56 files covered. (76.42%)

53633 of 62113 relevant lines covered (86.35%)

560762.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.33
/vortex-array/src/arrays/bool/array.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use arrow_array::BooleanArray;
5
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6
use vortex_buffer::ByteBuffer;
7
use vortex_dtype::DType;
8
use vortex_error::{VortexResult, vortex_ensure};
9

10
use crate::Canonical;
11
use crate::arrays::{BoolVTable, bool};
12
use crate::builders::ArrayBuilder;
13
use crate::stats::{ArrayStats, StatsSetRef};
14
use crate::validity::Validity;
15
use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
16

17
/// A boolean array that stores true/false values in a compact bit-packed format.
18
///
19
/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
20
/// is stored as a single bit rather than a full byte.
21
///
22
/// The data layout uses:
23
/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
24
/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
25
///   indicate valid and false indicates null. if the i-th value is null in the validity child,
26
///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
27
/// - Bit-level slicing is supported with minimal overhead
28
///
29
/// # Examples
30
///
31
/// ```
32
/// use vortex_array::arrays::BoolArray;
33
/// use vortex_array::IntoArray;
34
///
35
/// // Create from iterator using FromIterator impl
36
/// let array: BoolArray = [true, false, true, false].into_iter().collect();
37
///
38
/// // Slice the array
39
/// let sliced = array.slice(1, 3);
40
/// assert_eq!(sliced.len(), 2);
41
///
42
/// // Access individual values
43
/// let value = array.scalar_at(0);
44
/// assert_eq!(value, true.into());
45
/// ```
46
#[derive(Clone, Debug)]
47
pub struct BoolArray {
48
    dtype: DType,
49
    buffer: BooleanBuffer,
50
    pub(crate) validity: Validity,
51
    pub(crate) stats_set: ArrayStats,
52
}
53

54
impl BoolArray {
55
    fn validate(
2,232✔
56
        buffer: &ByteBuffer,
2,232✔
57
        offset: usize,
2,232✔
58
        len: usize,
2,232✔
59
        validity: &Validity,
2,232✔
60
    ) -> VortexResult<()> {
2,232✔
61
        vortex_ensure!(
2,232✔
62
            offset < 8,
2,232✔
NEW
63
            "offset must be less than whole byte, was {offset} bits"
×
64
        );
65

66
        // Validate the buffer is large enough to hold all the bits
67
        let required_bytes = offset.saturating_add(len).div_ceil(8);
2,232✔
68
        vortex_ensure!(
2,232✔
69
            buffer.len() >= required_bytes,
2,232✔
NEW
70
            "BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
×
NEW
71
            buffer.len()
×
72
        );
73

74
        // Validate validity
75
        if let Some(validity_len) = validity.maybe_len() {
2,232✔
NEW
76
            vortex_ensure!(
×
NEW
77
                validity_len == len,
×
NEW
78
                "BoolArray of size {len} cannot be built with validity of size {validity_len}"
×
79
            );
80
        }
2,232✔
81

82
        Ok(())
2,232✔
83
    }
2,232✔
84
}
85

86
impl BoolArray {
87
    /// Construct a new `BoolArray` from its components:
88
    ///
89
    /// * `buffer` is a raw ByteBuffer holding the packed bits
90
    /// * `offset` is the number of bits in the start of the buffer that should be skipped when
91
    ///   looking up the i-th value.
92
    /// * `len` is the length of the array, which should correspond to the number of bits
93
    /// * `validity` holds the null values.
94
    ///
95
    /// # Validation
96
    ///
97
    /// Buffer must be at least large enough to hold `len` bits starting at `offset`.
98
    ///
99
    /// A provided validity array must be of size `len`.
100
    ///
101
    /// The offset must be less than a whole byte.
102
    pub fn try_new(
2,232✔
103
        buffer: ByteBuffer,
2,232✔
104
        offset: usize,
2,232✔
105
        len: usize,
2,232✔
106
        validity: Validity,
2,232✔
107
    ) -> VortexResult<Self> {
2,232✔
108
        Self::validate(&buffer, offset, len, &validity)?;
2,232✔
109

110
        Ok(Self::new(
2,232✔
111
            BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len),
2,232✔
112
            validity,
2,232✔
113
        ))
2,232✔
114
    }
2,232✔
115

116
    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`] directly.
117
    ///
118
    /// Panics if the validity length differs from the buffer length.
119
    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
269,431✔
120
        if let Some(validity_len) = validity.maybe_len() {
269,431✔
121
            assert_eq!(buffer.len(), validity_len);
17,874✔
122
        }
251,557✔
123

124
        // Shrink the buffer to remove any whole bytes.
125
        let buffer = buffer.shrink_offset();
269,431✔
126
        Self {
269,431✔
127
            dtype: DType::Bool(validity.nullability()),
269,431✔
128
            buffer,
269,431✔
129
            validity,
269,431✔
130
            stats_set: ArrayStats::default(),
269,431✔
131
        }
269,431✔
132
    }
269,431✔
133

134
    /// Create a new BoolArray from a set of indices and a length.
135
    /// All indices must be less than the length.
136
    pub fn from_indices<I: IntoIterator<Item = usize>>(
50✔
137
        length: usize,
50✔
138
        indices: I,
50✔
139
        validity: Validity,
50✔
140
    ) -> Self {
50✔
141
        let mut buffer = MutableBuffer::new_null(length);
50✔
142
        indices
50✔
143
            .into_iter()
50✔
144
            .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
102✔
145
        Self::new(
50✔
146
            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
50✔
147
            validity,
50✔
148
        )
149
    }
50✔
150

151
    /// Returns the underlying [`BooleanBuffer`] of the array.
152
    pub fn boolean_buffer(&self) -> &BooleanBuffer {
22,779,030✔
153
        assert!(
22,779,030✔
154
            self.buffer.offset() < 8,
22,779,030✔
155
            "Offset must be <8, did we forget to call shrink_offset? Found {}",
×
156
            self.buffer.offset()
×
157
        );
158
        &self.buffer
22,779,030✔
159
    }
22,779,030✔
160

161
    /// Get a mutable version of this array.
162
    ///
163
    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
164
    /// otherwise a copy is created.
165
    ///
166
    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
167
    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
2,908✔
168
        let offset = self.buffer.offset();
2,908✔
169
        let len = self.buffer.len();
2,908✔
170
        let arrow_buffer = self.buffer.into_inner();
2,908✔
171
        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
2,908✔
172
            arrow_buffer.into_mutable().unwrap_or_else(|b| {
2,908✔
173
                let mut buf = MutableBuffer::with_capacity(b.len());
85✔
174
                buf.extend_from_slice(b.as_slice());
85✔
175
                buf
85✔
176
            })
85✔
177
        } else {
178
            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
×
179
            buf.extend_from_slice(arrow_buffer.as_slice());
×
180
            buf
×
181
        };
182

183
        (
2,908✔
184
            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
2,908✔
185
            offset,
2,908✔
186
        )
2,908✔
187
    }
2,908✔
188
}
189

190
impl From<BooleanBuffer> for BoolArray {
191
    fn from(value: BooleanBuffer) -> Self {
75,117✔
192
        Self::new(value, Validity::NonNullable)
75,117✔
193
    }
75,117✔
194
}
195

196
impl FromIterator<bool> for BoolArray {
197
    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
57,946✔
198
        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
57,946✔
199
    }
57,946✔
200
}
201

202
impl FromIterator<Option<bool>> for BoolArray {
203
    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
43✔
204
        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
43✔
205

206
        Self::new(
43✔
207
            buffer,
43✔
208
            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
43✔
209
        )
210
    }
43✔
211
}
212

213
impl ValidityHelper for BoolArray {
214
    fn validity(&self) -> &Validity {
23,048,828✔
215
        &self.validity
23,048,828✔
216
    }
23,048,828✔
217
}
218

219
impl ArrayVTable<BoolVTable> for BoolVTable {
220
    fn len(array: &BoolArray) -> usize {
46,486,012✔
221
        array.buffer.len()
46,486,012✔
222
    }
46,486,012✔
223

224
    fn dtype(array: &BoolArray) -> &DType {
46,102,590✔
225
        &array.dtype
46,102,590✔
226
    }
46,102,590✔
227

228
    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
852,643✔
229
        array.stats_set.to_ref(array.as_ref())
852,643✔
230
    }
852,643✔
231
}
232

233
impl CanonicalVTable<BoolVTable> for BoolVTable {
234
    fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
163,364✔
235
        Ok(Canonical::Bool(array.clone()))
163,364✔
236
    }
163,364✔
237

238
    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
22✔
239
        builder.extend_from_array(array.as_ref())
22✔
240
    }
22✔
241
}
242

243
pub trait BooleanBufferExt {
244
    /// Slice any full bytes from the buffer, leaving the offset < 8.
245
    fn shrink_offset(self) -> Self;
246
}
247

248
impl BooleanBufferExt for BooleanBuffer {
249
    fn shrink_offset(self) -> Self {
269,431✔
250
        let byte_offset = self.offset() / 8;
269,431✔
251
        let bit_offset = self.offset() % 8;
269,431✔
252
        let len = self.len();
269,431✔
253
        let buffer = self
269,431✔
254
            .into_inner()
269,431✔
255
            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
269,431✔
256
        BooleanBuffer::new(buffer, bit_offset, len)
269,431✔
257
    }
269,431✔
258
}
259

260
#[cfg(test)]
261
mod tests {
262
    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
263
    use vortex_buffer::buffer;
264

265
    use crate::arrays::{BoolArray, PrimitiveArray};
266
    use crate::patches::Patches;
267
    use crate::validity::Validity;
268
    use crate::vtable::ValidityHelper;
269
    use crate::{Array, IntoArray, ToCanonical};
270

271
    #[test]
272
    fn bool_array() {
1✔
273
        let arr = BoolArray::from_iter([true, false, true]);
1✔
274
        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
1✔
275
        assert!(scalar);
1✔
276
    }
1✔
277

278
    #[test]
279
    fn test_all_some_iter() {
1✔
280
        let arr = BoolArray::from_iter([Some(true), Some(false)]);
1✔
281

282
        assert!(matches!(arr.validity(), Validity::AllValid));
1✔
283

284
        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
1✔
285
        assert!(scalar);
1✔
286
        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
1✔
287
        assert!(!scalar);
1✔
288
    }
1✔
289

290
    #[test]
291
    fn test_bool_from_iter() {
1✔
292
        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
1✔
293

294
        let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
1✔
295
        assert!(scalar);
1✔
296

297
        let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
1✔
298
        assert!(scalar);
1✔
299

300
        let scalar = arr.scalar_at(2);
1✔
301
        assert!(scalar.is_null());
1✔
302

303
        let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
1✔
304
        assert!(!scalar);
1✔
305

306
        let scalar = arr.scalar_at(4);
1✔
307
        assert!(scalar.is_null());
1✔
308
    }
1✔
309

310
    #[test]
311
    fn patch_sliced_bools() {
1✔
312
        let arr = {
1✔
313
            let mut builder = BooleanBufferBuilder::new(12);
1✔
314
            builder.append(false);
1✔
315
            builder.append_n(11, true);
1✔
316
            BoolArray::from(builder.finish())
1✔
317
        };
318
        let sliced = arr.slice(4, 12);
1✔
319
        let sliced_len = sliced.len();
1✔
320
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
321
        assert_eq!(offset, 4);
1✔
322
        assert_eq!(values.as_slice(), &[254, 15]);
1✔
323

324
        // patch the underlying array
325
        let patches = Patches::new(
1✔
326
            arr.len(),
1✔
327
            0,
328
            buffer![4u32].into_array(), // This creates a non-nullable array
1✔
329
            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
1✔
330
        );
331
        let arr = arr.patch(&patches).unwrap();
1✔
332
        let arr_len = arr.len();
1✔
333
        let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
1✔
334
        assert_eq!(offset, 0);
1✔
335
        assert_eq!(values.len(), arr_len + offset);
1✔
336
        assert_eq!(values.as_slice(), &[238, 15]);
1✔
337

338
        // the slice should be unchanged
339
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
340
        assert_eq!(offset, 4);
1✔
341
        assert_eq!(values.len(), sliced_len + offset);
1✔
342
        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
1✔
343
    }
1✔
344

345
    #[test]
346
    fn slice_array_in_middle() {
1✔
347
        let arr = BoolArray::from(BooleanBuffer::new_set(16));
1✔
348
        let sliced = arr.slice(4, 12);
1✔
349
        let sliced_len = sliced.len();
1✔
350
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
351
        assert_eq!(offset, 4);
1✔
352
        assert_eq!(values.len(), sliced_len + offset);
1✔
353
        assert_eq!(values.as_slice(), &[255, 15]);
1✔
354
    }
1✔
355

356
    #[test]
357
    #[should_panic]
358
    fn patch_bools_owned() {
1✔
359
        let buffer = buffer![255u8; 2];
1✔
360
        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
1✔
361
        let arr = BoolArray::new(buf, Validity::NonNullable);
1✔
362
        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
1✔
363

364
        let patches = Patches::new(
1✔
365
            arr.len(),
1✔
366
            0,
367
            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
1✔
368
            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
1✔
369
        );
370
        let arr = arr.patch(&patches).unwrap();
1✔
371
        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
1✔
372

373
        let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
374
        assert_eq!(values.as_slice(), &[254, 127]);
375
    }
376
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc