• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16992591828

15 Aug 2025 02:51PM UTC coverage: 87.203% (-0.5%) from 87.72%
16992591828

Pull #2456

github

web-flow
Merge fe7e226a7 into 4a23f65b3
Pull Request #2456: feat: basic BoolBuffer / BoolBufferMut

476 of 1230 new or added lines in 107 files covered. (38.7%)

74 existing lines in 19 files now uncovered.

56525 of 64820 relevant lines covered (87.2%)

623751.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.25
/vortex-array/src/arrays/primitive/mod.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::Debug;
5
use std::iter;
6

7
mod accessor;
8

9
use arrow_buffer::BooleanBufferBuilder;
10
use vortex_buffer::{Alignment, Buffer, BufferMut, ByteBuffer, ByteBufferMut};
11
use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
12
use vortex_error::{VortexResult, vortex_panic};
13

14
use crate::builders::ArrayBuilder;
15
use crate::stats::{ArrayStats, StatsSetRef};
16
use crate::validity::Validity;
17
use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
18

19
mod compute;
20
mod native_value;
21
mod ops;
22
mod patch;
23
mod serde;
24
mod top_value;
25

26
pub use compute::{IS_CONST_LANE_WIDTH, compute_is_constant};
27
pub use native_value::NativeValue;
28

29
use crate::vtable::{
30
    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
31
    ValidityVTableFromValidityHelper,
32
};
33

34
vtable!(Primitive);
35

36
impl VTable for PrimitiveVTable {
37
    type Array = PrimitiveArray;
38
    type Encoding = PrimitiveEncoding;
39

40
    type ArrayVTable = Self;
41
    type CanonicalVTable = Self;
42
    type OperationsVTable = Self;
43
    type ValidityVTable = ValidityVTableFromValidityHelper;
44
    type VisitorVTable = Self;
45
    type ComputeVTable = NotSupported;
46
    type EncodeVTable = NotSupported;
47
    type SerdeVTable = Self;
48

148✔
49
    fn id(_encoding: &Self::Encoding) -> EncodingId {
1,751,424✔
50
        EncodingId::new_ref("vortex.primitive")
1,751,424✔
51
    }
1,751,276✔
52

30✔
53
    fn encoding(_array: &Self::Array) -> EncodingRef {
865,359✔
54
        EncodingRef::new_ref(PrimitiveEncoding.as_ref())
865,359✔
55
    }
865,329✔
56
}
57

58
/// A primitive array that stores [native types][vortex_dtype::NativePType] in a contiguous buffer
59
/// of memory, along with an optional validity child.
60
///
61
/// This mirrors the Apache Arrow Primitive layout and can be converted into and out of one
62
/// without allocations or copies.
63
///
64
/// The underlying buffer must be natively aligned to the primitive type they are representing.
65
///
66
/// Values are stored in their native representation with proper alignment.
67
/// Null values still occupy space in the buffer but are marked invalid in the validity mask.
68
///
69
/// # Examples
70
///
71
/// ```
72
/// use vortex_array::arrays::PrimitiveArray;
73
/// use vortex_array::compute::sum;
74
/// ///
75
/// // Create from iterator using FromIterator impl
76
/// let array: PrimitiveArray = [1i32, 2, 3, 4, 5].into_iter().collect();
77
///
78
/// // Slice the array
79
/// let sliced = array.slice(1, 3).unwrap();
80
///
81
/// // Access individual values
82
/// let value = sliced.scalar_at(0).unwrap();
83
/// assert_eq!(value, 2i32.into());
84
///
85
/// // Convert into a type-erased array that can be passed to compute functions.
86
/// let summed = sum(sliced.as_ref()).unwrap().as_primitive().typed_value::<i64>().unwrap();
87
/// assert_eq!(summed, 5i64);
88
/// ```
89
#[derive(Clone, Debug)]
90
pub struct PrimitiveArray {
91
    dtype: DType,
92
    buffer: ByteBuffer,
93
    validity: Validity,
94
    stats_set: ArrayStats,
95
}
96

97
#[derive(Clone, Debug)]
98
pub struct PrimitiveEncoding;
99

100
impl PrimitiveArray {
314✔
101
    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
4,703,320✔
102
        let buffer = buffer.into();
4,703,320✔
103
        if let Some(len) = validity.maybe_len()
4,703,006✔
104
            && buffer.len() != len
95,445✔
105
        {
106
            vortex_panic!(
×
107
                "Buffer and validity length mismatch: buffer={}, validity={}",
×
108
                buffer.len(),
109
                len
110
            );
314✔
111
        }
4,703,006✔
112

314✔
113
        Self {
4,703,320✔
114
            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
4,703,320✔
115
            buffer: buffer.into_byte_buffer(),
4,703,320✔
116
            validity,
4,703,320✔
117
            stats_set: Default::default(),
4,703,320✔
118
        }
4,703,320✔
119
    }
4,703,006✔
120

121
    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
7,198✔
122
        Self::new(Buffer::<T>::empty(), nullability.into())
7,198✔
123
    }
7,198✔
124

32✔
125
    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
536,772✔
126
        match_each_native_ptype!(ptype, |T| {
536,740✔
127
            Self::new::<T>(Buffer::from_byte_buffer(buffer), validity)
2,857✔
128
        })
32✔
129
    }
536,740✔
130

131
    /// Create a PrimitiveArray from an iterator of `T`.
132
    /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
133
    pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
6,665✔
134
        let iter = iter.into_iter();
6,665✔
135
        let mut values = BufferMut::with_capacity(iter.size_hint().0);
6,665✔
136
        let mut validity = BooleanBufferBuilder::new(values.capacity());
6,665✔
137

138
        for i in iter {
35,133✔
139
            match i {
28,468✔
140
                None => {
9,284✔
141
                    validity.append(false);
9,284✔
142
                    values.push(T::default());
9,284✔
143
                }
9,284✔
144
                Some(e) => {
19,184✔
145
                    validity.append(true);
19,184✔
146
                    values.push(e);
19,184✔
147
                }
19,184✔
148
            }
149
        }
150
        Self::new(values.freeze(), Validity::from(validity.finish()))
6,665✔
151
    }
6,665✔
152

153
    /// Create a PrimitiveArray from a byte buffer containing only the valid elements.
154
    pub fn from_values_byte_buffer(
475,722✔
155
        valid_elems_buffer: ByteBuffer,
475,722✔
156
        ptype: PType,
475,722✔
157
        validity: Validity,
475,722✔
158
        n_rows: usize,
475,722✔
159
    ) -> VortexResult<Self> {
475,722✔
160
        let byte_width = ptype.byte_width();
475,722✔
161
        let alignment = Alignment::new(byte_width);
475,722✔
162
        let buffer = match &validity {
475,722✔
163
            Validity::AllValid | Validity::NonNullable => valid_elems_buffer.aligned(alignment),
473,070✔
UNCOV
164
            Validity::AllInvalid => ByteBuffer::zeroed_aligned(n_rows * byte_width, alignment),
×
165
            Validity::Array(is_valid) => {
2,652✔
166
                let bool_array = is_valid.to_canonical()?.into_bool()?;
2,652✔
167
                let bool_buffer = bool_array.boolean_buffer();
2,652✔
168
                let mut bytes = ByteBufferMut::zeroed_aligned(n_rows * byte_width, alignment);
2,652✔
169
                for (i, valid_i) in bool_buffer.set_indices().enumerate() {
4,758✔
170
                    bytes[valid_i * byte_width..(valid_i + 1) * byte_width]
4,758✔
171
                        .copy_from_slice(&valid_elems_buffer[i * byte_width..(i + 1) * byte_width])
4,758✔
172
                }
173
                bytes.freeze()
2,652✔
174
            }
175
        };
176

177
        Ok(Self::from_byte_buffer(buffer, ptype, validity))
475,722✔
178
    }
475,722✔
179

5,770✔
180
    pub fn ptype(&self) -> PType {
220,839,787✔
181
        self.dtype().as_ptype()
220,839,787✔
182
    }
220,834,017✔
183

5,304✔
184
    pub fn byte_buffer(&self) -> &ByteBuffer {
218,308,998✔
185
        &self.buffer
218,308,998✔
186
    }
218,303,694✔
187

188
    pub fn into_byte_buffer(self) -> ByteBuffer {
267,361✔
189
        self.buffer
267,361✔
190
    }
267,361✔
191

248✔
192
    pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
2,595,840✔
193
        if T::PTYPE != self.ptype() {
2,595,592✔
194
            vortex_panic!(
×
195
                "Attempted to get buffer of type {} from array of type {}",
196
                T::PTYPE,
197
                self.ptype()
198
            )
248✔
199
        }
2,595,840✔
200
        Buffer::from_byte_buffer(self.byte_buffer().clone())
2,595,840✔
201
    }
2,595,592✔
202

203
    pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
100,753✔
204
        if T::PTYPE != self.ptype() {
100,753✔
205
            vortex_panic!(
×
206
                "Attempted to get buffer of type {} from array of type {}",
207
                T::PTYPE,
208
                self.ptype()
209
            )
210
        }
100,753✔
211
        Buffer::from_byte_buffer(self.buffer)
100,753✔
212
    }
100,753✔
213

214
    /// Extract a mutable buffer from the PrimitiveArray. Attempts to do this with zero-copy
215
    /// if the buffer is uniquely owned, otherwise will make a copy.
216
    pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
82,048✔
217
        if T::PTYPE != self.ptype() {
82,048✔
218
            vortex_panic!(
×
219
                "Attempted to get buffer_mut of type {} from array of type {}",
220
                T::PTYPE,
221
                self.ptype()
222
            )
223
        }
82,048✔
224
        self.into_buffer()
82,048✔
225
            .try_into_mut()
82,048✔
226
            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
82,048✔
227
    }
82,048✔
228

229
    /// Try to extract a mutable buffer from the PrimitiveArray with zero copy.
230
    #[allow(clippy::panic_in_result_fn)]
231
    pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
×
232
        if T::PTYPE != self.ptype() {
×
233
            vortex_panic!(
×
234
                "Attempted to get buffer_mut of type {} from array of type {}",
235
                T::PTYPE,
236
                self.ptype()
237
            )
238
        }
×
239
        let validity = self.validity().clone();
×
240
        Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
×
241
            .try_into_mut()
×
242
            .map_err(|buffer| PrimitiveArray::new(buffer, validity))
×
243
    }
244

245
    /// Map each element in the array to a new value.
246
    ///
247
    /// This ignores validity and maps over all maybe-null elements.
248
    ///
249
    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
250
    ///   over the valid elements.
251
    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
×
252
    where
×
253
        T: NativePType,
×
254
        R: NativePType,
×
255
        F: FnMut(T) -> R,
256
    {
257
        let validity = self.validity().clone();
×
258
        let buffer = match self.try_into_buffer_mut() {
×
259
            Ok(buffer_mut) => buffer_mut.map_each(f),
×
260
            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
261
        };
262
        PrimitiveArray::new(buffer.freeze(), validity)
×
263
    }
264

265
    /// Map each element in the array to a new value.
266
    ///
267
    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
268
    /// valid and false otherwise.
4✔
269
    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
4,500✔
270
    where
4,500✔
271
        T: NativePType,
4,500✔
272
        R: NativePType,
4,500✔
273
        F: FnMut((T, bool)) -> R,
4,496✔
274
    {
4✔
275
        let validity = self.validity();
4,496✔
276

4✔
277
        let buf_iter = self.buffer::<T>().into_iter();
4,496✔
278

4✔
279
        let buffer = match &validity {
4,496✔
280
            Validity::NonNullable | Validity::AllValid => {
4✔
281
                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
4,246✔
282
            }
283
            Validity::AllInvalid => {
284
                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
285
            }
286
            Validity::Array(val) => {
250✔
287
                let val = val.to_canonical()?.into_bool()?;
250✔
288
                BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
250✔
289
            }
290
        };
4✔
291
        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
4,500✔
292
    }
4,496✔
293

294
    /// Return a slice of the array's buffer.
295
    ///
296
    /// NOTE: these values may be nonsense if the validity buffer indicates that the value is null.
476✔
297
    pub fn as_slice<T: NativePType>(&self) -> &[T] {
39,038,902✔
298
        if T::PTYPE != self.ptype() {
39,038,426✔
299
            vortex_panic!(
×
300
                "Attempted to get slice of type {} from array of type {}",
301
                T::PTYPE,
302
                self.ptype()
303
            )
476✔
304
        }
39,038,902✔
305
        let raw_slice = self.byte_buffer().as_ptr();
39,038,426✔
306
        // SAFETY: alignment of Buffer is checked on construction
307
        unsafe {
476✔
308
            std::slice::from_raw_parts(raw_slice.cast(), self.byte_buffer().len() / size_of::<T>())
39,038,426✔
309
        }
476✔
310
    }
39,038,426✔
311

58✔
312
    pub fn reinterpret_cast(&self, ptype: PType) -> Self {
78,742✔
313
        if self.ptype() == ptype {
78,710✔
314
            return self.clone();
41,117✔
315
        }
37,599✔
316

32✔
317
        assert_eq!(
37,631✔
318
            self.ptype().byte_width(),
37,631✔
319
            ptype.byte_width(),
37,599✔
320
            "can't reinterpret cast between integers of two different widths"
321
        );
322

32✔
323
        PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
37,657✔
324
    }
78,684✔
325
}
326

327
impl ArrayVTable<PrimitiveVTable> for PrimitiveVTable {
3,836✔
328
    fn len(array: &PrimitiveArray) -> usize {
137,403,450✔
329
        array.byte_buffer().len() / array.ptype().byte_width()
137,403,450✔
330
    }
137,399,614✔
331

8,562✔
332
    fn dtype(array: &PrimitiveArray) -> &DType {
320,454,112✔
333
        &array.dtype
320,454,112✔
334
    }
320,445,550✔
335

1,834✔
336
    fn stats(array: &PrimitiveArray) -> StatsSetRef<'_> {
13,844,330✔
337
        array.stats_set.to_ref(array.as_ref())
13,844,330✔
338
    }
13,842,496✔
339
}
340

341
impl ValidityHelper for PrimitiveArray {
1,260✔
342
    fn validity(&self) -> &Validity {
69,876,224✔
343
        &self.validity
69,876,224✔
344
    }
69,874,964✔
345
}
346

347
impl<T: NativePType> FromIterator<T> for PrimitiveArray {
348
    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
39,959✔
349
        let values = BufferMut::from_iter(iter);
39,959✔
350
        PrimitiveArray::new(values.freeze(), Validity::NonNullable)
39,959✔
351
    }
39,959✔
352
}
353

354
impl<T: NativePType> IntoArray for Buffer<T> {
16✔
355
    fn into_array(self) -> ArrayRef {
37,752✔
356
        PrimitiveArray::new(self, Validity::NonNullable).into_array()
37,752✔
357
    }
37,736✔
358
}
359

360
impl<T: NativePType> IntoArray for BufferMut<T> {
361
    fn into_array(self) -> ArrayRef {
14,554✔
362
        self.freeze().into_array()
14,554✔
363
    }
14,554✔
364
}
365

366
impl CanonicalVTable<PrimitiveVTable> for PrimitiveVTable {
302✔
367
    fn canonicalize(array: &PrimitiveArray) -> VortexResult<Canonical> {
3,260,483✔
368
        Ok(Canonical::Primitive(array.clone()))
3,260,483✔
369
    }
3,260,181✔
370

172✔
371
    fn append_to_builder(
39,630✔
372
        array: &PrimitiveArray,
39,630✔
373
        builder: &mut dyn ArrayBuilder,
39,630✔
374
    ) -> VortexResult<()> {
39,630✔
375
        builder.extend_from_array(array.as_ref())
39,630✔
376
    }
39,458✔
377
}
378

379
#[cfg(test)]
380
mod tests {
381
    use vortex_buffer::buffer;
382
    use vortex_scalar::PValue;
383

384
    use crate::arrays::{BoolArray, PrimitiveArray};
385
    use crate::compute::conformance::filter::test_filter_conformance;
386
    use crate::compute::conformance::mask::test_mask_conformance;
387
    use crate::compute::conformance::search_sorted::rstest_reuse::apply;
388
    use crate::compute::conformance::search_sorted::{search_sorted_conformance, *};
389
    use crate::search_sorted::{SearchResult, SearchSorted, SearchSortedSide};
390
    use crate::validity::Validity;
391
    use crate::{ArrayRef, IntoArray};
392

393
    #[apply(search_sorted_conformance)]
394
    fn test_search_sorted_primitive(
395
        #[case] array: ArrayRef,
396
        #[case] value: i32,
397
        #[case] side: SearchSortedSide,
398
        #[case] expected: SearchResult,
399
    ) {
400
        let res = array
401
            .as_primitive_typed()
402
            .search_sorted(&Some(PValue::from(value)), side);
403
        assert_eq!(res, expected);
404
    }
405

406
    #[test]
407
    fn test_mask_primitive_array() {
1✔
408
        test_mask_conformance(
1✔
409
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref(),
1✔
410
        );
411
        test_mask_conformance(
1✔
412
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref(),
1✔
413
        );
414
        test_mask_conformance(
1✔
415
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllInvalid).as_ref(),
1✔
416
        );
417
        test_mask_conformance(
1✔
418
            PrimitiveArray::new(
1✔
419
                buffer![0, 1, 2, 3, 4],
1✔
420
                Validity::Array(
1✔
421
                    BoolArray::from_iter([true, false, true, false, true]).into_array(),
1✔
422
                ),
1✔
423
            )
1✔
424
            .as_ref(),
1✔
425
        );
426
    }
1✔
427

428
    #[test]
429
    fn test_filter_primitive_array() {
1✔
430
        // Test various sizes
431
        test_filter_conformance(
1✔
432
            PrimitiveArray::new(buffer![42i32], Validity::NonNullable).as_ref(),
1✔
433
        );
434
        test_filter_conformance(PrimitiveArray::new(buffer![0, 1], Validity::NonNullable).as_ref());
1✔
435
        test_filter_conformance(
1✔
436
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref(),
1✔
437
        );
438
        test_filter_conformance(
1✔
439
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4, 5, 6, 7], Validity::NonNullable).as_ref(),
1✔
440
        );
441

442
        // Test with validity
443
        test_filter_conformance(
1✔
444
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref(),
1✔
445
        );
446
        test_filter_conformance(
1✔
447
            PrimitiveArray::new(
1✔
448
                buffer![0, 1, 2, 3, 4, 5],
1✔
449
                Validity::Array(
1✔
450
                    BoolArray::from_iter([true, false, true, false, true, true]).into_array(),
1✔
451
                ),
1✔
452
            )
1✔
453
            .as_ref(),
1✔
454
        );
455
    }
1✔
456
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc