• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16831011058

08 Aug 2025 12:58PM UTC coverage: 84.902% (+0.9%) from 83.993%
16831011058

Pull #4155

github

web-flow
Merge b27de7bd8 into 1d60e8d6c
Pull Request #4155: chore[bench-website]: add back tpc-ds to query_bench

50710 of 59728 relevant lines covered (84.9%)

567607.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.98
/vortex-array/src/arrays/primitive/mod.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::Debug;
5
use std::iter;
6

7
mod accessor;
8

9
use arrow_buffer::BooleanBufferBuilder;
10
use vortex_buffer::{Alignment, Buffer, BufferMut, ByteBuffer, ByteBufferMut};
11
use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
12
use vortex_error::{VortexResult, vortex_panic};
13

14
use crate::builders::ArrayBuilder;
15
use crate::stats::{ArrayStats, StatsSetRef};
16
use crate::validity::Validity;
17
use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
18

19
mod compute;
20
mod native_value;
21
mod ops;
22
mod patch;
23
mod serde;
24
mod top_value;
25

26
pub use compute::{IS_CONST_LANE_WIDTH, compute_is_constant};
27
pub use native_value::NativeValue;
28

29
use crate::vtable::{
30
    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
31
    ValidityVTableFromValidityHelper,
32
};
33

34
vtable!(Primitive);
35

36
impl VTable for PrimitiveVTable {
37
    type Array = PrimitiveArray;
38
    type Encoding = PrimitiveEncoding;
39

40
    type ArrayVTable = Self;
41
    type CanonicalVTable = Self;
42
    type OperationsVTable = Self;
43
    type ValidityVTable = ValidityVTableFromValidityHelper;
44
    type VisitorVTable = Self;
45
    type ComputeVTable = NotSupported;
46
    type EncodeVTable = NotSupported;
47
    type SerdeVTable = Self;
48

49
    fn id(_encoding: &Self::Encoding) -> EncodingId {
1,757,528✔
50
        EncodingId::new_ref("vortex.primitive")
1,757,528✔
51
    }
1,757,528✔
52

53
    fn encoding(_array: &Self::Array) -> EncodingRef {
868,728✔
54
        EncodingRef::new_ref(PrimitiveEncoding.as_ref())
868,728✔
55
    }
868,728✔
56
}
57

58
/// A primitive array that stores [native types][vortex_dtype::NativePType] in a contiguous buffer
59
/// of memory, along with an optional validity child.
60
///
61
/// This mirrors the Apache Arrow Primitive layout and can be converted into and out of one
62
/// without allocations or copies.
63
///
64
/// The underlying buffer must be natively aligned to the primitive type they are representing.
65
///
66
/// Values are stored in their native representation with proper alignment.
67
/// Null values still occupy space in the buffer but are marked invalid in the validity mask.
68
///
69
/// # Examples
70
///
71
/// ```
72
/// use vortex_array::arrays::PrimitiveArray;
73
/// use vortex_array::compute::sum;
74
/// ///
75
/// // Create from iterator using FromIterator impl
76
/// let array: PrimitiveArray = [1i32, 2, 3, 4, 5].into_iter().collect();
77
///
78
/// // Slice the array
79
/// let sliced = array.slice(1, 3).unwrap();
80
///
81
/// // Access individual values
82
/// let value = sliced.scalar_at(0).unwrap();
83
/// assert_eq!(value, 2i32.into());
84
///
85
/// // Convert into a type-erased array that can be passed to compute functions.
86
/// let summed = sum(sliced.as_ref()).unwrap().as_primitive().typed_value::<i64>().unwrap();
87
/// assert_eq!(summed, 5i64);
88
/// ```
89
#[derive(Clone, Debug)]
90
pub struct PrimitiveArray {
91
    dtype: DType,
92
    buffer: ByteBuffer,
93
    validity: Validity,
94
    stats_set: ArrayStats,
95
}
96

97
#[derive(Clone, Debug)]
98
pub struct PrimitiveEncoding;
99

100
impl PrimitiveArray {
101
    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
4,701,810✔
102
        let buffer = buffer.into();
4,701,810✔
103
        if let Some(len) = validity.maybe_len()
4,701,810✔
104
            && buffer.len() != len
95,405✔
105
        {
106
            vortex_panic!(
×
107
                "Buffer and validity length mismatch: buffer={}, validity={}",
×
108
                buffer.len(),
×
109
                len
110
            );
111
        }
4,701,810✔
112

113
        Self {
4,701,810✔
114
            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
4,701,810✔
115
            buffer: buffer.into_byte_buffer(),
4,701,810✔
116
            validity,
4,701,810✔
117
            stats_set: Default::default(),
4,701,810✔
118
        }
4,701,810✔
119
    }
4,701,810✔
120

121
    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
7,198✔
122
        Self::new(Buffer::<T>::empty(), nullability.into())
7,198✔
123
    }
7,198✔
124

125
    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
536,758✔
126
        match_each_native_ptype!(ptype, |T| {
536,758✔
127
            Self::new::<T>(Buffer::from_byte_buffer(buffer), validity)
2,857✔
128
        })
129
    }
536,758✔
130

131
    /// Create a PrimitiveArray from an iterator of `T`.
132
    /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
133
    pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
6,665✔
134
        let iter = iter.into_iter();
6,665✔
135
        let mut values = BufferMut::with_capacity(iter.size_hint().0);
6,665✔
136
        let mut validity = BooleanBufferBuilder::new(values.capacity());
6,665✔
137

138
        for i in iter {
35,133✔
139
            match i {
28,468✔
140
                None => {
9,284✔
141
                    validity.append(false);
9,284✔
142
                    values.push(T::default());
9,284✔
143
                }
9,284✔
144
                Some(e) => {
19,184✔
145
                    validity.append(true);
19,184✔
146
                    values.push(e);
19,184✔
147
                }
19,184✔
148
            }
149
        }
150
        Self::new(values.freeze(), Validity::from(validity.finish()))
6,665✔
151
    }
6,665✔
152

153
    /// Create a PrimitiveArray from a byte buffer containing only the valid elements.
154
    pub fn from_values_byte_buffer(
475,722✔
155
        valid_elems_buffer: ByteBuffer,
475,722✔
156
        ptype: PType,
475,722✔
157
        validity: Validity,
475,722✔
158
        n_rows: usize,
475,722✔
159
    ) -> VortexResult<Self> {
475,722✔
160
        let byte_width = ptype.byte_width();
475,722✔
161
        let alignment = Alignment::new(byte_width);
475,722✔
162
        let buffer = match &validity {
475,722✔
163
            Validity::AllValid | Validity::NonNullable => valid_elems_buffer.aligned(alignment),
473,070✔
164
            Validity::AllInvalid => ByteBuffer::zeroed_aligned(n_rows * byte_width, alignment),
×
165
            Validity::Array(is_valid) => {
2,652✔
166
                let bool_array = is_valid.to_canonical()?.into_bool()?;
2,652✔
167
                let bool_buffer = bool_array.boolean_buffer();
2,652✔
168
                let mut bytes = ByteBufferMut::zeroed_aligned(n_rows * byte_width, alignment);
2,652✔
169
                for (i, valid_i) in bool_buffer.set_indices().enumerate() {
4,758✔
170
                    bytes[valid_i * byte_width..(valid_i + 1) * byte_width]
4,758✔
171
                        .copy_from_slice(&valid_elems_buffer[i * byte_width..(i + 1) * byte_width])
4,758✔
172
                }
173
                bytes.freeze()
2,652✔
174
            }
175
        };
176

177
        Ok(Self::from_byte_buffer(buffer, ptype, validity))
475,722✔
178
    }
475,722✔
179

180
    pub fn ptype(&self) -> PType {
220,843,747✔
181
        self.dtype().as_ptype()
220,843,747✔
182
    }
220,843,747✔
183

184
    pub fn byte_buffer(&self) -> &ByteBuffer {
218,312,464✔
185
        &self.buffer
218,312,464✔
186
    }
218,312,464✔
187

188
    pub fn into_byte_buffer(self) -> ByteBuffer {
268,066✔
189
        self.buffer
268,066✔
190
    }
268,066✔
191

192
    pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
2,595,514✔
193
        if T::PTYPE != self.ptype() {
2,595,514✔
194
            vortex_panic!(
×
195
                "Attempted to get buffer of type {} from array of type {}",
×
196
                T::PTYPE,
197
                self.ptype()
×
198
            )
199
        }
2,595,514✔
200
        Buffer::from_byte_buffer(self.byte_buffer().clone())
2,595,514✔
201
    }
2,595,514✔
202

203
    pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
100,780✔
204
        if T::PTYPE != self.ptype() {
100,780✔
205
            vortex_panic!(
×
206
                "Attempted to get buffer of type {} from array of type {}",
×
207
                T::PTYPE,
208
                self.ptype()
×
209
            )
210
        }
100,780✔
211
        Buffer::from_byte_buffer(self.buffer)
100,780✔
212
    }
100,780✔
213

214
    /// Extract a mutable buffer from the PrimitiveArray. Attempts to do this with zero-copy
215
    /// if the buffer is uniquely owned, otherwise will make a copy.
216
    pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
82,075✔
217
        if T::PTYPE != self.ptype() {
82,075✔
218
            vortex_panic!(
×
219
                "Attempted to get buffer_mut of type {} from array of type {}",
×
220
                T::PTYPE,
221
                self.ptype()
×
222
            )
223
        }
82,075✔
224
        self.into_buffer()
82,075✔
225
            .try_into_mut()
82,075✔
226
            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
82,075✔
227
    }
82,075✔
228

229
    /// Try to extract a mutable buffer from the PrimitiveArray with zero copy.
230
    #[allow(clippy::panic_in_result_fn)]
231
    pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
×
232
        if T::PTYPE != self.ptype() {
×
233
            vortex_panic!(
×
234
                "Attempted to get buffer_mut of type {} from array of type {}",
×
235
                T::PTYPE,
236
                self.ptype()
×
237
            )
238
        }
×
239
        let validity = self.validity().clone();
×
240
        Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
×
241
            .try_into_mut()
×
242
            .map_err(|buffer| PrimitiveArray::new(buffer, validity))
×
243
    }
×
244

245
    /// Map each element in the array to a new value.
246
    ///
247
    /// This ignores validity and maps over all maybe-null elements.
248
    ///
249
    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
250
    ///   over the valid elements.
251
    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
×
252
    where
×
253
        T: NativePType,
×
254
        R: NativePType,
×
255
        F: FnMut(T) -> R,
×
256
    {
257
        let validity = self.validity().clone();
×
258
        let buffer = match self.try_into_buffer_mut() {
×
259
            Ok(buffer_mut) => buffer_mut.map_each(f),
×
260
            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
×
261
        };
262
        PrimitiveArray::new(buffer.freeze(), validity)
×
263
    }
×
264

265
    /// Map each element in the array to a new value.
266
    ///
267
    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
268
    /// valid and false otherwise.
269
    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
4,496✔
270
    where
4,496✔
271
        T: NativePType,
4,496✔
272
        R: NativePType,
4,496✔
273
        F: FnMut((T, bool)) -> R,
4,496✔
274
    {
275
        let validity = self.validity();
4,496✔
276

277
        let buf_iter = self.buffer::<T>().into_iter();
4,496✔
278

279
        let buffer = match &validity {
4,496✔
280
            Validity::NonNullable | Validity::AllValid => {
281
                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
4,246✔
282
            }
283
            Validity::AllInvalid => {
284
                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
×
285
            }
286
            Validity::Array(val) => {
250✔
287
                let val = val.to_canonical()?.into_bool()?;
250✔
288
                BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
250✔
289
            }
290
        };
291
        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
4,496✔
292
    }
4,496✔
293

294
    /// Return a slice of the array's buffer.
295
    ///
296
    /// NOTE: these values may be nonsense if the validity buffer indicates that the value is null.
297
    pub fn as_slice<T: NativePType>(&self) -> &[T] {
39,038,906✔
298
        if T::PTYPE != self.ptype() {
39,038,906✔
299
            vortex_panic!(
×
300
                "Attempted to get slice of type {} from array of type {}",
×
301
                T::PTYPE,
302
                self.ptype()
×
303
            )
304
        }
39,038,906✔
305
        let raw_slice = self.byte_buffer().as_ptr();
39,038,906✔
306
        // SAFETY: alignment of Buffer is checked on construction
307
        unsafe {
308
            std::slice::from_raw_parts(raw_slice.cast(), self.byte_buffer().len() / size_of::<T>())
39,038,906✔
309
        }
310
    }
39,038,906✔
311

312
    pub fn reinterpret_cast(&self, ptype: PType) -> Self {
78,930✔
313
        if self.ptype() == ptype {
78,930✔
314
            return self.clone();
41,313✔
315
        }
37,617✔
316

317
        assert_eq!(
37,617✔
318
            self.ptype().byte_width(),
37,617✔
319
            ptype.byte_width(),
37,617✔
320
            "can't reinterpret cast between integers of two different widths"
×
321
        );
322

323
        PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
37,617✔
324
    }
78,930✔
325
}
326

327
impl ArrayVTable<PrimitiveVTable> for PrimitiveVTable {
328
    fn len(array: &PrimitiveArray) -> usize {
137,407,484✔
329
        array.byte_buffer().len() / array.ptype().byte_width()
137,407,484✔
330
    }
137,407,484✔
331

332
    fn dtype(array: &PrimitiveArray) -> &DType {
320,466,095✔
333
        &array.dtype
320,466,095✔
334
    }
320,466,095✔
335

336
    fn stats(array: &PrimitiveArray) -> StatsSetRef<'_> {
13,847,758✔
337
        array.stats_set.to_ref(array.as_ref())
13,847,758✔
338
    }
13,847,758✔
339
}
340

341
impl ValidityHelper for PrimitiveArray {
342
    fn validity(&self) -> &Validity {
69,877,550✔
343
        &self.validity
69,877,550✔
344
    }
69,877,550✔
345
}
346

347
impl<T: NativePType> FromIterator<T> for PrimitiveArray {
348
    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
39,920✔
349
        let values = BufferMut::from_iter(iter);
39,920✔
350
        PrimitiveArray::new(values.freeze(), Validity::NonNullable)
39,920✔
351
    }
39,920✔
352
}
353

354
impl<T: NativePType> IntoArray for Buffer<T> {
355
    fn into_array(self) -> ArrayRef {
37,550✔
356
        PrimitiveArray::new(self, Validity::NonNullable).into_array()
37,550✔
357
    }
37,550✔
358
}
359

360
impl<T: NativePType> IntoArray for BufferMut<T> {
361
    fn into_array(self) -> ArrayRef {
14,563✔
362
        self.freeze().into_array()
14,563✔
363
    }
14,563✔
364
}
365

366
impl CanonicalVTable<PrimitiveVTable> for PrimitiveVTable {
367
    fn canonicalize(array: &PrimitiveArray) -> VortexResult<Canonical> {
3,260,838✔
368
        Ok(Canonical::Primitive(array.clone()))
3,260,838✔
369
    }
3,260,838✔
370

371
    fn append_to_builder(
39,458✔
372
        array: &PrimitiveArray,
39,458✔
373
        builder: &mut dyn ArrayBuilder,
39,458✔
374
    ) -> VortexResult<()> {
39,458✔
375
        builder.extend_from_array(array.as_ref())
39,458✔
376
    }
39,458✔
377
}
378

379
#[cfg(test)]
380
mod tests {
381
    use vortex_buffer::buffer;
382
    use vortex_scalar::PValue;
383

384
    use crate::arrays::{BoolArray, PrimitiveArray};
385
    use crate::compute::conformance::filter::test_filter_conformance;
386
    use crate::compute::conformance::mask::test_mask_conformance;
387
    use crate::compute::conformance::search_sorted::rstest_reuse::apply;
388
    use crate::compute::conformance::search_sorted::{search_sorted_conformance, *};
389
    use crate::search_sorted::{SearchResult, SearchSorted, SearchSortedSide};
390
    use crate::validity::Validity;
391
    use crate::{ArrayRef, IntoArray};
392

393
    #[apply(search_sorted_conformance)]
394
    fn test_search_sorted_primitive(
395
        #[case] array: ArrayRef,
396
        #[case] value: i32,
397
        #[case] side: SearchSortedSide,
398
        #[case] expected: SearchResult,
399
    ) {
400
        let res = array
401
            .as_primitive_typed()
402
            .search_sorted(&Some(PValue::from(value)), side);
403
        assert_eq!(res, expected);
404
    }
405

406
    #[test]
407
    fn test_mask_primitive_array() {
1✔
408
        test_mask_conformance(
1✔
409
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref(),
1✔
410
        );
411
        test_mask_conformance(
1✔
412
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref(),
1✔
413
        );
414
        test_mask_conformance(
1✔
415
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllInvalid).as_ref(),
1✔
416
        );
417
        test_mask_conformance(
1✔
418
            PrimitiveArray::new(
1✔
419
                buffer![0, 1, 2, 3, 4],
1✔
420
                Validity::Array(
1✔
421
                    BoolArray::from_iter([true, false, true, false, true]).into_array(),
1✔
422
                ),
1✔
423
            )
1✔
424
            .as_ref(),
1✔
425
        );
426
    }
1✔
427

428
    #[test]
429
    fn test_filter_primitive_array() {
1✔
430
        // Test various sizes
431
        test_filter_conformance(
1✔
432
            PrimitiveArray::new(buffer![42i32], Validity::NonNullable).as_ref(),
1✔
433
        );
434
        test_filter_conformance(PrimitiveArray::new(buffer![0, 1], Validity::NonNullable).as_ref());
1✔
435
        test_filter_conformance(
1✔
436
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref(),
1✔
437
        );
438
        test_filter_conformance(
1✔
439
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4, 5, 6, 7], Validity::NonNullable).as_ref(),
1✔
440
        );
441

442
        // Test with validity
443
        test_filter_conformance(
1✔
444
            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref(),
1✔
445
        );
446
        test_filter_conformance(
1✔
447
            PrimitiveArray::new(
1✔
448
                buffer![0, 1, 2, 3, 4, 5],
1✔
449
                Validity::Array(
1✔
450
                    BoolArray::from_iter([true, false, true, false, true, true]).into_array(),
1✔
451
                ),
1✔
452
            )
1✔
453
            .as_ref(),
1✔
454
        );
455
    }
1✔
456
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc