• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16593958537

29 Jul 2025 10:48AM UTC coverage: 82.285% (+0.5%) from 81.796%
16593958537

Pull #4036

github

web-flow
Merge 04147cb0f into 348079fc3
Pull Request #4036: varbinview builder buffer deduplication

146 of 154 new or added lines in 2 files covered. (94.81%)

348 existing lines in 26 files now uncovered.

44470 of 54044 relevant lines covered (82.28%)

169522.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.06
/vortex-array/src/arrays/primitive/mod.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::Debug;
5
use std::iter;
6

7
mod accessor;
8

9
use arrow_buffer::BooleanBufferBuilder;
10
use vortex_buffer::{Alignment, Buffer, BufferMut, ByteBuffer, ByteBufferMut};
11
use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
12
use vortex_error::{VortexResult, vortex_panic};
13

14
use crate::builders::ArrayBuilder;
15
use crate::stats::{ArrayStats, StatsSetRef};
16
use crate::validity::Validity;
17
use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
18

19
mod compute;
20
mod native_value;
21
mod ops;
22
mod patch;
23
mod serde;
24
mod top_value;
25

26
pub use compute::{IS_CONST_LANE_WIDTH, compute_is_constant};
27
pub use native_value::NativeValue;
28

29
use crate::vtable::{
30
    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
31
    ValidityVTableFromValidityHelper,
32
};
33

34
vtable!(Primitive);
35

36
impl VTable for PrimitiveVTable {
37
    type Array = PrimitiveArray;
38
    type Encoding = PrimitiveEncoding;
39

40
    type ArrayVTable = Self;
41
    type CanonicalVTable = Self;
42
    type OperationsVTable = Self;
43
    type ValidityVTable = ValidityVTableFromValidityHelper;
44
    type VisitorVTable = Self;
45
    type ComputeVTable = NotSupported;
46
    type EncodeVTable = NotSupported;
47
    type SerdeVTable = Self;
48

49
    fn id(_encoding: &Self::Encoding) -> EncodingId {
215,757✔
50
        EncodingId::new_ref("vortex.primitive")
215,757✔
51
    }
215,757✔
52

53
    fn encoding(_array: &Self::Array) -> EncodingRef {
91,711✔
54
        EncodingRef::new_ref(PrimitiveEncoding.as_ref())
91,711✔
55
    }
91,711✔
56
}
57

58
/// A primitive array that stores [native types][vortex_dtype::NativePType] in a contiguous buffer
59
/// of memory, along with an optional validity child.
60
///
61
/// This mirrors the Apache Arrow Primitive layout and can be converted into and out of one
62
/// without allocations or copies.
63
///
64
/// The underlying buffer must be natively aligned to the primitive type they are representing.
65
///
66
/// Values are stored in their native representation with proper alignment.
67
/// Null values still occupy space in the buffer but are marked invalid in the validity mask.
68
///
69
/// # Examples
70
///
71
/// ```
72
/// use vortex_array::arrays::PrimitiveArray;
73
/// use vortex_array::compute::sum;
74
/// ///
75
/// // Create from iterator using FromIterator impl
76
/// let array: PrimitiveArray = [1i32, 2, 3, 4, 5].into_iter().collect();
77
///
78
/// // Slice the array
79
/// let sliced = array.slice(1, 3).unwrap();
80
///
81
/// // Access individual values
82
/// let value = sliced.scalar_at(0).unwrap();
83
/// assert_eq!(value, 2i32.into());
84
///
85
/// // Convert into a type-erased array that can be passed to compute functions.
86
/// let summed = sum(sliced.as_ref()).unwrap().as_primitive().typed_value::<i64>().unwrap();
87
/// assert_eq!(summed, 5i64);
88
/// ```
89
#[derive(Clone, Debug)]
90
pub struct PrimitiveArray {
91
    dtype: DType,
92
    buffer: ByteBuffer,
93
    validity: Validity,
94
    stats_set: ArrayStats,
95
}
96

97
#[derive(Clone, Debug)]
98
pub struct PrimitiveEncoding;
99

100
impl PrimitiveArray {
101
    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
233,104✔
102
        let buffer = buffer.into();
233,104✔
103
        if let Some(len) = validity.maybe_len() {
233,104✔
104
            if buffer.len() != len {
16,329✔
UNCOV
105
                vortex_panic!(
×
UNCOV
106
                    "Buffer and validity length mismatch: buffer={}, validity={}",
×
UNCOV
107
                    buffer.len(),
×
108
                    len
109
                );
110
            }
16,329✔
111
        }
216,775✔
112
        Self {
233,104✔
113
            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
233,104✔
114
            buffer: buffer.into_byte_buffer(),
233,104✔
115
            validity,
233,104✔
116
            stats_set: Default::default(),
233,104✔
117
        }
233,104✔
118
    }
233,104✔
119

120
    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
6✔
121
        Self::new(Buffer::<T>::empty(), nullability.into())
6✔
122
    }
6✔
123

124
    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
34,375✔
125
        match_each_native_ptype!(ptype, |T| {
34,375✔
126
            Self::new::<T>(Buffer::from_byte_buffer(buffer), validity)
562✔
127
        })
128
    }
34,375✔
129

130
    /// Create a PrimitiveArray from an iterator of `T`.
131
    /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
132
    pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
101✔
133
        let iter = iter.into_iter();
101✔
134
        let mut values = BufferMut::with_capacity(iter.size_hint().0);
101✔
135
        let mut validity = BooleanBufferBuilder::new(values.capacity());
101✔
136

137
        for i in iter {
2,642✔
138
            match i {
2,541✔
139
                None => {
179✔
140
                    validity.append(false);
179✔
141
                    values.push(T::default());
179✔
142
                }
179✔
143
                Some(e) => {
2,362✔
144
                    validity.append(true);
2,362✔
145
                    values.push(e);
2,362✔
146
                }
2,362✔
147
            }
148
        }
149
        Self::new(values.freeze(), Validity::from(validity.finish()))
101✔
150
    }
101✔
151

152
    /// Create a PrimitiveArray from a byte buffer containing only the valid elements.
153
    pub fn from_values_byte_buffer(
1,330✔
154
        valid_elems_buffer: ByteBuffer,
1,330✔
155
        ptype: PType,
1,330✔
156
        validity: Validity,
1,330✔
157
        n_rows: usize,
1,330✔
158
    ) -> VortexResult<Self> {
1,330✔
159
        let byte_width = ptype.byte_width();
1,330✔
160
        let alignment = Alignment::new(byte_width);
1,330✔
161
        let buffer = match &validity {
1,330✔
162
            Validity::AllValid | Validity::NonNullable => valid_elems_buffer.aligned(alignment),
912✔
163
            Validity::AllInvalid => ByteBuffer::zeroed_aligned(n_rows * byte_width, alignment),
×
164
            Validity::Array(is_valid) => {
418✔
165
                let bool_array = is_valid.to_canonical()?.into_bool()?;
418✔
166
                let bool_buffer = bool_array.boolean_buffer();
418✔
167
                let mut bytes = ByteBufferMut::zeroed_aligned(n_rows * byte_width, alignment);
418✔
168
                for (i, valid_i) in bool_buffer.set_indices().enumerate() {
494✔
169
                    bytes[valid_i * byte_width..(valid_i + 1) * byte_width]
494✔
170
                        .copy_from_slice(&valid_elems_buffer[i * byte_width..(i + 1) * byte_width])
494✔
171
                }
172
                bytes.freeze()
418✔
173
            }
174
        };
175

176
        Ok(Self::from_byte_buffer(buffer, ptype, validity))
1,330✔
177
    }
1,330✔
178

179
    pub fn ptype(&self) -> PType {
18,986,482✔
180
        self.dtype().as_ptype()
18,986,482✔
181
    }
18,986,482✔
182

183
    pub fn byte_buffer(&self) -> &ByteBuffer {
18,656,090✔
184
        &self.buffer
18,656,090✔
185
    }
18,656,090✔
186

187
    pub fn into_byte_buffer(self) -> ByteBuffer {
20,282✔
188
        self.buffer
20,282✔
189
    }
20,282✔
190

191
    pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
95,954✔
192
        if T::PTYPE != self.ptype() {
95,954✔
UNCOV
193
            vortex_panic!(
×
UNCOV
194
                "Attempted to get buffer of type {} from array of type {}",
×
195
                T::PTYPE,
UNCOV
196
                self.ptype()
×
197
            )
198
        }
95,954✔
199
        Buffer::from_byte_buffer(self.byte_buffer().clone())
95,954✔
200
    }
95,954✔
201

202
    pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
9,610✔
203
        if T::PTYPE != self.ptype() {
9,610✔
204
            vortex_panic!(
×
UNCOV
205
                "Attempted to get buffer of type {} from array of type {}",
×
206
                T::PTYPE,
207
                self.ptype()
×
208
            )
209
        }
9,610✔
210
        Buffer::from_byte_buffer(self.buffer)
9,610✔
211
    }
9,610✔
212

213
    /// Extract a mutable buffer from the PrimitiveArray. Attempts to do this with zero-copy
214
    /// if the buffer is uniquely owned, otherwise will make a copy.
215
    pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
9,034✔
216
        if T::PTYPE != self.ptype() {
9,034✔
UNCOV
217
            vortex_panic!(
×
UNCOV
218
                "Attempted to get buffer_mut of type {} from array of type {}",
×
219
                T::PTYPE,
220
                self.ptype()
×
221
            )
222
        }
9,034✔
223
        self.into_buffer()
9,034✔
224
            .try_into_mut()
9,034✔
225
            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
9,034✔
226
    }
9,034✔
227

228
    /// Try to extract a mutable buffer from the PrimitiveArray with zero copy.
229
    #[allow(clippy::panic_in_result_fn)]
230
    pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
×
231
        if T::PTYPE != self.ptype() {
×
UNCOV
232
            vortex_panic!(
×
UNCOV
233
                "Attempted to get buffer_mut of type {} from array of type {}",
×
234
                T::PTYPE,
UNCOV
235
                self.ptype()
×
236
            )
UNCOV
237
        }
×
UNCOV
238
        let validity = self.validity().clone();
×
UNCOV
239
        Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
×
UNCOV
240
            .try_into_mut()
×
UNCOV
241
            .map_err(|buffer| PrimitiveArray::new(buffer, validity))
×
UNCOV
242
    }
×
243

244
    /// Map each element in the array to a new value.
245
    ///
246
    /// This ignores validity and maps over all maybe-null elements.
247
    ///
248
    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
249
    ///   over the valid elements.
UNCOV
250
    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
×
UNCOV
251
    where
×
252
        T: NativePType,
×
UNCOV
253
        R: NativePType,
×
UNCOV
254
        F: FnMut(T) -> R,
×
255
    {
UNCOV
256
        let validity = self.validity().clone();
×
UNCOV
257
        let buffer = match self.try_into_buffer_mut() {
×
UNCOV
258
            Ok(buffer_mut) => buffer_mut.map_each(f),
×
UNCOV
259
            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
×
260
        };
UNCOV
261
        PrimitiveArray::new(buffer.freeze(), validity)
×
UNCOV
262
    }
×
263

264
    /// Map each element in the array to a new value.
265
    ///
266
    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
267
    /// valid and false otherwise.
268
    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
3,727✔
269
    where
3,727✔
270
        T: NativePType,
3,727✔
271
        R: NativePType,
3,727✔
272
        F: FnMut((T, bool)) -> R,
3,727✔
273
    {
274
        let validity = self.validity();
3,727✔
275

276
        let buf_iter = self.buffer::<T>().into_iter();
3,727✔
277

278
        let buffer = match &validity {
3,727✔
279
            Validity::NonNullable | Validity::AllValid => {
280
                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
3,487✔
281
            }
282
            Validity::AllInvalid => {
UNCOV
283
                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
×
284
            }
285
            Validity::Array(val) => {
240✔
286
                let val = val.to_canonical()?.into_bool()?;
240✔
287
                BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
240✔
288
            }
289
        };
290
        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
3,727✔
291
    }
3,727✔
292

293
    /// Return a slice of the array's buffer.
294
    ///
295
    /// NOTE: these values may be nonsense if the validity buffer indicates that the value is null.
296
    pub fn as_slice<T: NativePType>(&self) -> &[T] {
4,004,739✔
297
        if T::PTYPE != self.ptype() {
4,004,739✔
UNCOV
298
            vortex_panic!(
×
UNCOV
299
                "Attempted to get slice of type {} from array of type {}",
×
300
                T::PTYPE,
UNCOV
301
                self.ptype()
×
302
            )
303
        }
4,004,739✔
304
        let raw_slice = self.byte_buffer().as_ptr();
4,004,739✔
305
        // SAFETY: alignment of Buffer is checked on construction
306
        unsafe {
307
            std::slice::from_raw_parts(raw_slice.cast(), self.byte_buffer().len() / size_of::<T>())
4,004,739✔
308
        }
309
    }
4,004,739✔
310

311
    pub fn reinterpret_cast(&self, ptype: PType) -> Self {
51,244✔
312
        if self.ptype() == ptype {
51,244✔
313
            return self.clone();
20,434✔
314
        }
30,810✔
315

316
        assert_eq!(
30,810✔
317
            self.ptype().byte_width(),
30,810✔
318
            ptype.byte_width(),
30,810✔
UNCOV
319
            "can't reinterpret cast between integers of two different widths"
×
320
        );
321

322
        PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
30,810✔
323
    }
51,244✔
324
}
325

326
impl ArrayVTable<PrimitiveVTable> for PrimitiveVTable {
327
    fn len(array: &PrimitiveArray) -> usize {
10,369,789✔
328
        array.byte_buffer().len() / array.ptype().byte_width()
10,369,789✔
329
    }
10,369,789✔
330

331
    fn dtype(array: &PrimitiveArray) -> &DType {
28,454,507✔
332
        &array.dtype
28,454,507✔
333
    }
28,454,507✔
334

335
    fn stats(array: &PrimitiveArray) -> StatsSetRef<'_> {
1,171,114✔
336
        array.stats_set.to_ref(array.as_ref())
1,171,114✔
337
    }
1,171,114✔
338
}
339

340
impl ValidityHelper for PrimitiveArray {
341
    fn validity(&self) -> &Validity {
7,744,910✔
342
        &self.validity
7,744,910✔
343
    }
7,744,910✔
344
}
345

346
impl<T: NativePType> FromIterator<T> for PrimitiveArray {
347
    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
253✔
348
        let values = BufferMut::from_iter(iter);
253✔
349
        PrimitiveArray::new(values.freeze(), Validity::NonNullable)
253✔
350
    }
253✔
351
}
352

353
impl<T: NativePType> IntoArray for Buffer<T> {
354
    fn into_array(self) -> ArrayRef {
24,874✔
355
        PrimitiveArray::new(self, Validity::NonNullable).into_array()
24,874✔
356
    }
24,874✔
357
}
358

359
impl<T: NativePType> IntoArray for BufferMut<T> {
360
    fn into_array(self) -> ArrayRef {
6,998✔
361
        self.freeze().into_array()
6,998✔
362
    }
6,998✔
363
}
364

365
impl CanonicalVTable<PrimitiveVTable> for PrimitiveVTable {
366
    fn canonicalize(array: &PrimitiveArray) -> VortexResult<Canonical> {
168,757✔
367
        Ok(Canonical::Primitive(array.clone()))
168,757✔
368
    }
168,757✔
369

370
    fn append_to_builder(
32,458✔
371
        array: &PrimitiveArray,
32,458✔
372
        builder: &mut dyn ArrayBuilder,
32,458✔
373
    ) -> VortexResult<()> {
32,458✔
374
        builder.extend_from_array(array.as_ref())
32,458✔
375
    }
32,458✔
376
}
377

378
#[cfg(test)]
379
mod tests {
380
    use vortex_buffer::buffer;
381
    use vortex_scalar::PValue;
382

383
    use crate::arrays::{BoolArray, PrimitiveArray};
384
    use crate::compute::conformance::filter::test_filter_conformance;
385
    use crate::compute::conformance::mask::test_mask_conformance;
386
    use crate::compute::conformance::search_sorted::rstest_reuse::apply;
387
    use crate::compute::conformance::search_sorted::{search_sorted_conformance, *};
388
    use crate::search_sorted::{SearchResult, SearchSorted, SearchSortedSide};
389
    use crate::validity::Validity;
390
    use crate::{ArrayRef, IntoArray};
391

392
    #[apply(search_sorted_conformance)]
393
    fn test_search_sorted_primitive(
394
        #[case] array: ArrayRef,
395
        #[case] value: i32,
396
        #[case] side: SearchSortedSide,
397
        #[case] expected: SearchResult,
398
    ) {
399
        let res = array
400
            .as_primitive_typed()
401
            .search_sorted(&Some(PValue::from(value)), side);
402
        assert_eq!(res, expected);
403
    }
404

405
    #[test]
406
    fn test_mask_primitive_array() {
1✔
407
        test_mask_conformance(
1✔
408
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref(),
1✔
409
        );
410
        test_mask_conformance(
1✔
411
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref(),
1✔
412
        );
413
        test_mask_conformance(
1✔
414
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllInvalid).as_ref(),
1✔
415
        );
416
        test_mask_conformance(
1✔
417
            PrimitiveArray::new(
1✔
418
                buffer![0, 1, 2, 3, 4],
1✔
419
                Validity::Array(
1✔
420
                    BoolArray::from_iter([true, false, true, false, true]).into_array(),
1✔
421
                ),
1✔
422
            )
1✔
423
            .as_ref(),
1✔
424
        );
425
    }
1✔
426

427
    #[test]
428
    fn test_filter_primitive_array() {
1✔
429
        // Test various sizes
430
        test_filter_conformance(
1✔
431
            PrimitiveArray::new(buffer![42i32], Validity::NonNullable).as_ref(),
1✔
432
        );
433
        test_filter_conformance(PrimitiveArray::new(buffer![0, 1], Validity::NonNullable).as_ref());
1✔
434
        test_filter_conformance(
1✔
435
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref(),
1✔
436
        );
437
        test_filter_conformance(
1✔
438
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4, 5, 6, 7], Validity::NonNullable).as_ref(),
1✔
439
        );
440

441
        // Test with validity
442
        test_filter_conformance(
1✔
443
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref(),
1✔
444
        );
445
        test_filter_conformance(
1✔
446
            PrimitiveArray::new(
1✔
447
                buffer![0, 1, 2, 3, 4, 5],
1✔
448
                Validity::Array(
1✔
449
                    BoolArray::from_iter([true, false, true, false, true, true]).into_array(),
1✔
450
                ),
1✔
451
            )
1✔
452
            .as_ref(),
1✔
453
        );
454
    }
1✔
455
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc