• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16593958537

29 Jul 2025 10:48AM UTC coverage: 82.285% (+0.5%) from 81.796%
16593958537

Pull #4036

github

web-flow
Merge 04147cb0f into 348079fc3
Pull Request #4036: varbinview builder buffer deduplication

146 of 154 new or added lines in 2 files covered. (94.81%)

348 existing lines in 26 files now uncovered.

44470 of 54044 relevant lines covered (82.28%)

169522.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.15
/vortex-array/src/arrays/bool/array.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use arrow_array::BooleanArray;
5
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6
use vortex_dtype::DType;
7
use vortex_error::{VortexResult, vortex_panic};
8

9
use crate::Canonical;
10
use crate::arrays::{BoolVTable, bool};
11
use crate::builders::ArrayBuilder;
12
use crate::stats::{ArrayStats, StatsSetRef};
13
use crate::validity::Validity;
14
use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
15

16
/// A boolean array that stores true/false values in a compact bit-packed format.
17
///
18
/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
19
/// is stored as a single bit rather than a full byte.
20
///
21
/// The data layout uses:
22
/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
23
/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
24
///   indicate valid and false indicates null. if the i-th value is null in the validity child,
25
///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
26
/// - Bit-level slicing is supported with minimal overhead
27
///
28
/// # Examples
29
///
30
/// ```
31
/// use vortex_array::arrays::BoolArray;
32
/// use vortex_array::IntoArray;
33
///
34
/// // Create from iterator using FromIterator impl
35
/// let array: BoolArray = [true, false, true, false].into_iter().collect();
36
///
37
/// // Slice the array
38
/// let sliced = array.slice(1, 3).unwrap();
39
/// assert_eq!(sliced.len(), 2);
40
///
41
/// // Access individual values
42
/// let value = array.scalar_at(0).unwrap();
43
/// assert_eq!(value, true.into());
44
/// ```
45
#[derive(Clone, Debug)]
46
pub struct BoolArray {
47
    dtype: DType,
48
    buffer: BooleanBuffer,
49
    pub(crate) validity: Validity,
50
    pub(crate) stats_set: ArrayStats,
51
}
52

53
impl BoolArray {
54
    /// Create a new BoolArray from a set of indices and a length.
55
    /// All indices must be less than the length.
56
    pub fn from_indices<I: IntoIterator<Item = usize>>(
48✔
57
        length: usize,
48✔
58
        indices: I,
48✔
59
        validity: Validity,
48✔
60
    ) -> Self {
48✔
61
        let mut buffer = MutableBuffer::new_null(length);
48✔
62
        indices
48✔
63
            .into_iter()
48✔
64
            .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
98✔
65
        Self::new(
48✔
66
            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
48✔
67
            validity,
48✔
68
        )
69
    }
48✔
70

71
    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`], without checking
72
    /// any invariants.
73
    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
55,776✔
74
        if let Some(len) = validity.maybe_len() {
55,776✔
75
            if buffer.len() != len {
2,713✔
UNCOV
76
                vortex_panic!(
×
UNCOV
77
                    "Buffer and validity length mismatch: buffer={}, validity={}",
×
UNCOV
78
                    buffer.len(),
×
79
                    len
80
                );
81
            }
2,713✔
82
        }
53,063✔
83

84
        // Shrink the buffer to remove any whole bytes.
85
        let buffer = buffer.shrink_offset();
55,776✔
86
        Self {
55,776✔
87
            dtype: DType::Bool(validity.nullability()),
55,776✔
88
            buffer,
55,776✔
89
            validity,
55,776✔
90
            stats_set: ArrayStats::default(),
55,776✔
91
        }
55,776✔
92
    }
55,776✔
93

94
    /// Returns the underlying [`BooleanBuffer`] of the array.
95
    pub fn boolean_buffer(&self) -> &BooleanBuffer {
181,077✔
96
        assert!(
181,077✔
97
            self.buffer.offset() < 8,
181,077✔
UNCOV
98
            "Offset must be <8, did we forget to call shrink_offset? Found {}",
×
UNCOV
99
            self.buffer.offset()
×
100
        );
101
        &self.buffer
181,077✔
102
    }
181,077✔
103

104
    /// Get a mutable version of this array.
105
    ///
106
    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
107
    /// otherwise a copy is created.
108
    ///
109
    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
110
    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
897✔
111
        let offset = self.buffer.offset();
897✔
112
        let len = self.buffer.len();
897✔
113
        let arrow_buffer = self.buffer.into_inner();
897✔
114
        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
897✔
115
            arrow_buffer.into_mutable().unwrap_or_else(|b| {
897✔
116
                let mut buf = MutableBuffer::with_capacity(b.len());
46✔
117
                buf.extend_from_slice(b.as_slice());
46✔
118
                buf
46✔
119
            })
46✔
120
        } else {
UNCOV
121
            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
×
UNCOV
122
            buf.extend_from_slice(arrow_buffer.as_slice());
×
UNCOV
123
            buf
×
124
        };
125

126
        (
897✔
127
            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
897✔
128
            offset,
897✔
129
        )
897✔
130
    }
897✔
131
}
132

133
impl From<BooleanBuffer> for BoolArray {
134
    fn from(value: BooleanBuffer) -> Self {
16,111✔
135
        Self::new(value, Validity::NonNullable)
16,111✔
136
    }
16,111✔
137
}
138

139
impl FromIterator<bool> for BoolArray {
140
    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
9,392✔
141
        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
9,392✔
142
    }
9,392✔
143
}
144

145
impl FromIterator<Option<bool>> for BoolArray {
146
    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
38✔
147
        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
38✔
148

149
        Self::new(
38✔
150
            buffer,
38✔
151
            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
38✔
152
        )
153
    }
38✔
154
}
155

156
impl ValidityHelper for BoolArray {
157
    fn validity(&self) -> &Validity {
153,583✔
158
        &self.validity
153,583✔
159
    }
153,583✔
160
}
161

162
impl ArrayVTable<BoolVTable> for BoolVTable {
163
    fn len(array: &BoolArray) -> usize {
518,286✔
164
        array.buffer.len()
518,286✔
165
    }
518,286✔
166

167
    fn dtype(array: &BoolArray) -> &DType {
368,013✔
168
        &array.dtype
368,013✔
169
    }
368,013✔
170

171
    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
262,624✔
172
        array.stats_set.to_ref(array.as_ref())
262,624✔
173
    }
262,624✔
174
}
175

176
impl CanonicalVTable<BoolVTable> for BoolVTable {
177
    fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
56,099✔
178
        Ok(Canonical::Bool(array.clone()))
56,099✔
179
    }
56,099✔
180

181
    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
22✔
182
        builder.extend_from_array(array.as_ref())
22✔
183
    }
22✔
184
}
185

186
pub trait BooleanBufferExt {
187
    /// Slice any full bytes from the buffer, leaving the offset < 8.
188
    fn shrink_offset(self) -> Self;
189
}
190

191
impl BooleanBufferExt for BooleanBuffer {
192
    fn shrink_offset(self) -> Self {
55,776✔
193
        let byte_offset = self.offset() / 8;
55,776✔
194
        let bit_offset = self.offset() % 8;
55,776✔
195
        let len = self.len();
55,776✔
196
        let buffer = self
55,776✔
197
            .into_inner()
55,776✔
198
            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
55,776✔
199
        BooleanBuffer::new(buffer, bit_offset, len)
55,776✔
200
    }
55,776✔
201
}
202

203
#[cfg(test)]
204
mod tests {
205
    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
206
    use vortex_buffer::buffer;
207

208
    use crate::arrays::{BoolArray, PrimitiveArray};
209
    use crate::patches::Patches;
210
    use crate::validity::Validity;
211
    use crate::vtable::ValidityHelper;
212
    use crate::{Array, IntoArray, ToCanonical};
213

214
    #[test]
215
    fn bool_array() {
1✔
216
        let arr = BoolArray::from_iter([true, false, true]);
1✔
217
        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
1✔
218
        assert!(scalar);
1✔
219
    }
1✔
220

221
    #[test]
222
    fn test_all_some_iter() {
1✔
223
        let arr = BoolArray::from_iter([Some(true), Some(false)]);
1✔
224

225
        assert!(matches!(arr.validity(), Validity::AllValid));
1✔
226

227
        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
1✔
228
        assert!(scalar);
1✔
229
        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
1✔
230
        assert!(!scalar);
1✔
231
    }
1✔
232

233
    #[test]
234
    fn test_bool_from_iter() {
1✔
235
        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
1✔
236

237
        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
1✔
238
        assert!(scalar);
1✔
239

240
        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
1✔
241
        assert!(scalar);
1✔
242

243
        let scalar = arr.scalar_at(2).unwrap();
1✔
244
        assert!(scalar.is_null());
1✔
245

246
        let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
1✔
247
        assert!(!scalar);
1✔
248

249
        let scalar = arr.scalar_at(4).unwrap();
1✔
250
        assert!(scalar.is_null());
1✔
251
    }
1✔
252

253
    #[test]
254
    fn patch_sliced_bools() {
1✔
255
        let arr = {
1✔
256
            let mut builder = BooleanBufferBuilder::new(12);
1✔
257
            builder.append(false);
1✔
258
            builder.append_n(11, true);
1✔
259
            BoolArray::from(builder.finish())
1✔
260
        };
261
        let sliced = arr.slice(4, 12).unwrap();
1✔
262
        let sliced_len = sliced.len();
1✔
263
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
264
        assert_eq!(offset, 4);
1✔
265
        assert_eq!(values.as_slice(), &[254, 15]);
1✔
266

267
        // patch the underlying array
268
        let patches = Patches::new(
1✔
269
            arr.len(),
1✔
270
            0,
271
            PrimitiveArray::new(buffer![4u32], Validity::AllValid).into_array(),
1✔
272
            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
1✔
273
        );
274
        let arr = arr.patch(&patches).unwrap();
1✔
275
        let arr_len = arr.len();
1✔
276
        let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
1✔
277
        assert_eq!(offset, 0);
1✔
278
        assert_eq!(values.len(), arr_len + offset);
1✔
279
        assert_eq!(values.as_slice(), &[238, 15]);
1✔
280

281
        // the slice should be unchanged
282
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
283
        assert_eq!(offset, 4);
1✔
284
        assert_eq!(values.len(), sliced_len + offset);
1✔
285
        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
1✔
286
    }
1✔
287

288
    #[test]
289
    fn slice_array_in_middle() {
1✔
290
        let arr = BoolArray::from(BooleanBuffer::new_set(16));
1✔
291
        let sliced = arr.slice(4, 12).unwrap();
1✔
292
        let sliced_len = sliced.len();
1✔
293
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
294
        assert_eq!(offset, 4);
1✔
295
        assert_eq!(values.len(), sliced_len + offset);
1✔
296
        assert_eq!(values.as_slice(), &[255, 15]);
1✔
297
    }
1✔
298

299
    #[test]
300
    #[should_panic]
301
    fn patch_bools_owned() {
1✔
302
        let buffer = buffer![255u8; 2];
1✔
303
        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
1✔
304
        let arr = BoolArray::new(buf, Validity::NonNullable);
1✔
305
        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
1✔
306

307
        let patches = Patches::new(
1✔
308
            arr.len(),
1✔
309
            0,
310
            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
1✔
311
            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
1✔
312
        );
313
        let arr = arr.patch(&patches).unwrap();
1✔
314
        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
1✔
315

316
        let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
317
        assert_eq!(values.as_slice(), &[254, 127]);
318
    }
319
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc