• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16891637322

11 Aug 2025 08:42PM UTC coverage: 86.68% (+0.5%) from 86.216%
16891637322

Pull #4175

github

web-flow
Merge babcb78ad into 74fb70691
Pull Request #4175: feat: statistical and population genetics benchmark queries and dataset

54342 of 62693 relevant lines covered (86.68%)

540475.09 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.15
/vortex-array/src/arrays/bool/array.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use arrow_array::BooleanArray;
5
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6
use vortex_dtype::DType;
7
use vortex_error::{VortexResult, vortex_panic};
8

9
use crate::Canonical;
10
use crate::arrays::{BoolVTable, bool};
11
use crate::builders::ArrayBuilder;
12
use crate::stats::{ArrayStats, StatsSetRef};
13
use crate::validity::Validity;
14
use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
15

16
/// A boolean array that stores true/false values in a compact bit-packed format.
17
///
18
/// This mirrors the Apache Arrow Boolean array encoding, where each boolean value
19
/// is stored as a single bit rather than a full byte.
20
///
21
/// The data layout uses:
22
/// - A bit-packed buffer where each bit represents one boolean value (0 = false, 1 = true)
23
/// - An optional validity child array, which must be of type `Bool(NonNullable)`, where true values
24
///   indicate valid and false indicates null. if the i-th value is null in the validity child,
25
///   the i-th packed bit in the buffer may be 0 or 1, i.e. it is undefined.
26
/// - Bit-level slicing is supported with minimal overhead
27
///
28
/// # Examples
29
///
30
/// ```
31
/// use vortex_array::arrays::BoolArray;
32
/// use vortex_array::IntoArray;
33
///
34
/// // Create from iterator using FromIterator impl
35
/// let array: BoolArray = [true, false, true, false].into_iter().collect();
36
///
37
/// // Slice the array
38
/// let sliced = array.slice(1, 3).unwrap();
39
/// assert_eq!(sliced.len(), 2);
40
///
41
/// // Access individual values
42
/// let value = array.scalar_at(0).unwrap();
43
/// assert_eq!(value, true.into());
44
/// ```
45
#[derive(Clone, Debug)]
46
pub struct BoolArray {
47
    dtype: DType,
48
    buffer: BooleanBuffer,
49
    pub(crate) validity: Validity,
50
    pub(crate) stats_set: ArrayStats,
51
}
52

53
impl BoolArray {
54
    /// Create a new [`BoolArray`] from a set of indices, a length and a [`Validity`].
55
    /// All indices must be less than the length.
56
    pub fn from_indices<I: IntoIterator<Item = usize>>(
50✔
57
        length: usize,
50✔
58
        indices: I,
50✔
59
        validity: Validity,
50✔
60
    ) -> Self {
50✔
61
        let mut buffer = MutableBuffer::new_null(length);
50✔
62
        let buffer_slice = buffer.as_slice_mut();
50✔
63
        indices
50✔
64
            .into_iter()
50✔
65
            .for_each(|idx| arrow_buffer::bit_util::set_bit(buffer_slice, idx));
102✔
66
        Self::new(
50✔
67
            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
50✔
68
            validity,
50✔
69
        )
70
    }
50✔
71

72
    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`], without checking
73
    /// any invariants.
74
    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
271,100✔
75
        if let Some(len) = validity.maybe_len()
271,100✔
76
            && buffer.len() != len
17,835✔
77
        {
78
            vortex_panic!(
×
79
                "Buffer and validity length mismatch: buffer={}, validity={}",
×
80
                buffer.len(),
×
81
                len
82
            );
83
        }
271,100✔
84

85
        // Shrink the buffer to remove any whole bytes.
86
        let buffer = buffer.shrink_offset();
271,100✔
87
        Self {
271,100✔
88
            dtype: DType::Bool(validity.nullability()),
271,100✔
89
            buffer,
271,100✔
90
            validity,
271,100✔
91
            stats_set: ArrayStats::default(),
271,100✔
92
        }
271,100✔
93
    }
271,100✔
94

95
    /// Returns the underlying [`BooleanBuffer`] of the array.
96
    pub fn boolean_buffer(&self) -> &BooleanBuffer {
21,800,059✔
97
        assert!(
21,800,059✔
98
            self.buffer.offset() < 8,
21,800,059✔
99
            "Offset must be <8, did we forget to call shrink_offset? Found {}",
×
100
            self.buffer.offset()
×
101
        );
102
        &self.buffer
21,800,059✔
103
    }
21,800,059✔
104

105
    /// Get a mutable version of this array.
106
    ///
107
    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
108
    /// otherwise a copy is created.
109
    ///
110
    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
111
    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
2,908✔
112
        let offset = self.buffer.offset();
2,908✔
113
        let len = self.buffer.len();
2,908✔
114
        let arrow_buffer = self.buffer.into_inner();
2,908✔
115
        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
2,908✔
116
            arrow_buffer.into_mutable().unwrap_or_else(|b| {
2,908✔
117
                let mut buf = MutableBuffer::with_capacity(b.len());
85✔
118
                buf.extend_from_slice(b.as_slice());
85✔
119
                buf
85✔
120
            })
85✔
121
        } else {
122
            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
×
123
            buf.extend_from_slice(arrow_buffer.as_slice());
×
124
            buf
×
125
        };
126

127
        (
2,908✔
128
            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
2,908✔
129
            offset,
2,908✔
130
        )
2,908✔
131
    }
2,908✔
132
}
133

134
impl From<BooleanBuffer> for BoolArray {
135
    fn from(value: BooleanBuffer) -> Self {
75,078✔
136
        Self::new(value, Validity::NonNullable)
75,078✔
137
    }
75,078✔
138
}
139

140
impl FromIterator<bool> for BoolArray {
141
    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
57,946✔
142
        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
57,946✔
143
    }
57,946✔
144
}
145

146
impl FromIterator<Option<bool>> for BoolArray {
147
    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
43✔
148
        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
43✔
149

150
        Self::new(
43✔
151
            buffer,
43✔
152
            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
43✔
153
        )
154
    }
43✔
155
}
156

157
impl ValidityHelper for BoolArray {
158
    fn validity(&self) -> &Validity {
22,070,688✔
159
        &self.validity
22,070,688✔
160
    }
22,070,688✔
161
}
162

163
impl ArrayVTable<BoolVTable> for BoolVTable {
164
    fn len(array: &BoolArray) -> usize {
44,531,505✔
165
        array.buffer.len()
44,531,505✔
166
    }
44,531,505✔
167

168
    fn dtype(array: &BoolArray) -> &DType {
44,146,502✔
169
        &array.dtype
44,146,502✔
170
    }
44,146,502✔
171

172
    fn stats(array: &BoolArray) -> StatsSetRef<'_> {
859,757✔
173
        array.stats_set.to_ref(array.as_ref())
859,757✔
174
    }
859,757✔
175
}
176

177
impl CanonicalVTable<BoolVTable> for BoolVTable {
178
    fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
164,659✔
179
        Ok(Canonical::Bool(array.clone()))
164,659✔
180
    }
164,659✔
181

182
    fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
22✔
183
        builder.extend_from_array(array.as_ref())
22✔
184
    }
22✔
185
}
186

187
pub trait BooleanBufferExt {
188
    /// Slice any full bytes from the buffer, leaving the offset < 8.
189
    fn shrink_offset(self) -> Self;
190
}
191

192
impl BooleanBufferExt for BooleanBuffer {
193
    fn shrink_offset(self) -> Self {
271,100✔
194
        let byte_offset = self.offset() / 8;
271,100✔
195
        let bit_offset = self.offset() % 8;
271,100✔
196
        let len = self.len();
271,100✔
197
        let buffer = self
271,100✔
198
            .into_inner()
271,100✔
199
            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
271,100✔
200
        BooleanBuffer::new(buffer, bit_offset, len)
271,100✔
201
    }
271,100✔
202
}
203

204
#[cfg(test)]
205
mod tests {
206
    use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
207
    use vortex_buffer::buffer;
208

209
    use crate::arrays::{BoolArray, PrimitiveArray};
210
    use crate::patches::Patches;
211
    use crate::validity::Validity;
212
    use crate::vtable::ValidityHelper;
213
    use crate::{Array, IntoArray, ToCanonical};
214

215
    #[test]
216
    fn bool_array() {
1✔
217
        let arr = BoolArray::from_iter([true, false, true]);
1✔
218
        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
1✔
219
        assert!(scalar);
1✔
220
    }
1✔
221

222
    #[test]
223
    fn test_all_some_iter() {
1✔
224
        let arr = BoolArray::from_iter([Some(true), Some(false)]);
1✔
225

226
        assert!(matches!(arr.validity(), Validity::AllValid));
1✔
227

228
        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
1✔
229
        assert!(scalar);
1✔
230
        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
1✔
231
        assert!(!scalar);
1✔
232
    }
1✔
233

234
    #[test]
235
    fn test_bool_from_iter() {
1✔
236
        let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
1✔
237

238
        let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
1✔
239
        assert!(scalar);
1✔
240

241
        let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
1✔
242
        assert!(scalar);
1✔
243

244
        let scalar = arr.scalar_at(2).unwrap();
1✔
245
        assert!(scalar.is_null());
1✔
246

247
        let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
1✔
248
        assert!(!scalar);
1✔
249

250
        let scalar = arr.scalar_at(4).unwrap();
1✔
251
        assert!(scalar.is_null());
1✔
252
    }
1✔
253

254
    #[test]
255
    fn patch_sliced_bools() {
1✔
256
        let arr = {
1✔
257
            let mut builder = BooleanBufferBuilder::new(12);
1✔
258
            builder.append(false);
1✔
259
            builder.append_n(11, true);
1✔
260
            BoolArray::from(builder.finish())
1✔
261
        };
262
        let sliced = arr.slice(4, 12).unwrap();
1✔
263
        let sliced_len = sliced.len();
1✔
264
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
265
        assert_eq!(offset, 4);
1✔
266
        assert_eq!(values.as_slice(), &[254, 15]);
1✔
267

268
        // patch the underlying array
269
        let patches = Patches::new(
1✔
270
            arr.len(),
1✔
271
            0,
272
            buffer![4u32].into_array(), // This creates a non-nullable array
1✔
273
            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
1✔
274
        );
275
        let arr = arr.patch(&patches).unwrap();
1✔
276
        let arr_len = arr.len();
1✔
277
        let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
1✔
278
        assert_eq!(offset, 0);
1✔
279
        assert_eq!(values.len(), arr_len + offset);
1✔
280
        assert_eq!(values.as_slice(), &[238, 15]);
1✔
281

282
        // the slice should be unchanged
283
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
284
        assert_eq!(offset, 4);
1✔
285
        assert_eq!(values.len(), sliced_len + offset);
1✔
286
        assert_eq!(values.as_slice(), &[254, 15]); // unchanged
1✔
287
    }
1✔
288

289
    #[test]
290
    fn slice_array_in_middle() {
1✔
291
        let arr = BoolArray::from(BooleanBuffer::new_set(16));
1✔
292
        let sliced = arr.slice(4, 12).unwrap();
1✔
293
        let sliced_len = sliced.len();
1✔
294
        let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
1✔
295
        assert_eq!(offset, 4);
1✔
296
        assert_eq!(values.len(), sliced_len + offset);
1✔
297
        assert_eq!(values.as_slice(), &[255, 15]);
1✔
298
    }
1✔
299

300
    #[test]
301
    #[should_panic]
302
    fn patch_bools_owned() {
1✔
303
        let buffer = buffer![255u8; 2];
1✔
304
        let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
1✔
305
        let arr = BoolArray::new(buf, Validity::NonNullable);
1✔
306
        let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
1✔
307

308
        let patches = Patches::new(
1✔
309
            arr.len(),
1✔
310
            0,
311
            PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
1✔
312
            BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
1✔
313
        );
314
        let arr = arr.patch(&patches).unwrap();
1✔
315
        assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
1✔
316

317
        let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
318
        assert_eq!(values.as_slice(), &[254, 127]);
319
    }
320
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc