• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16595455362

29 Jul 2025 12:00PM UTC coverage: 82.182% (+0.4%) from 81.796%
16595455362

Pull #4036

github

web-flow
Merge 28b120788 into 261aabd6a
Pull Request #4036: varbinview builder buffer deduplication

147 of 155 new or added lines in 2 files covered. (94.84%)

404 existing lines in 34 files now uncovered.

44415 of 54045 relevant lines covered (82.18%)

167869.47 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.46
/vortex-array/src/compute/mod.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
//! Compute kernels on top of Vortex Arrays.
5
//!
6
//! We aim to provide a basic set of compute kernels that can be used to efficiently index, slice,
7
//! and filter Vortex Arrays in their encoded forms.
8
//!
9
//! Every array encoding has the ability to implement their own efficient implementations of these
10
//! operators, else we will decode, and perform the equivalent operator from Arrow.
11

12
use std::any::{Any, type_name};
13
use std::fmt::{Debug, Formatter};
14

15
use arcref::ArcRef;
16
pub use between::*;
17
pub use boolean::*;
18
pub use cast::*;
19
pub use compare::*;
20
pub use fill_null::*;
21
pub use filter::*;
22
pub use invert::*;
23
pub use is_constant::*;
24
pub use is_sorted::*;
25
use itertools::Itertools;
26
pub use like::*;
27
pub use list_contains::*;
28
pub use mask::*;
29
pub use min_max::*;
30
pub use nan_count::*;
31
pub use numeric::*;
32
use parking_lot::RwLock;
33
pub use sum::*;
34
pub use take::*;
35
use vortex_dtype::DType;
36
use vortex_error::{VortexError, VortexResult, vortex_bail, vortex_err};
37
use vortex_mask::Mask;
38
use vortex_scalar::Scalar;
39
pub use zip::*;
40

41
use crate::builders::ArrayBuilder;
42
use crate::{Array, ArrayRef};
43

44
#[cfg(feature = "arbitrary")]
45
mod arbitrary;
46
mod between;
47
mod boolean;
48
mod cast;
49
mod compare;
50
#[cfg(feature = "test-harness")]
51
pub mod conformance;
52
mod fill_null;
53
mod filter;
54
mod invert;
55
mod is_constant;
56
mod is_sorted;
57
mod like;
58
mod list_contains;
59
mod mask;
60
mod min_max;
61
mod nan_count;
62
mod numeric;
63
mod sum;
64
mod take;
65
mod zip;
66

67
/// An instance of a compute function holding the implementation vtable and a set of registered
68
/// compute kernels.
69
pub struct ComputeFn {
70
    id: ArcRef<str>,
71
    vtable: ArcRef<dyn ComputeFnVTable>,
72
    kernels: RwLock<Vec<ArcRef<dyn Kernel>>>,
73
}
74

75
impl ComputeFn {
76
    /// Create a new compute function from the given [`ComputeFnVTable`].
77
    pub fn new(id: ArcRef<str>, vtable: ArcRef<dyn ComputeFnVTable>) -> Self {
41,084✔
78
        Self {
41,084✔
79
            id,
41,084✔
80
            vtable,
41,084✔
81
            kernels: Default::default(),
41,084✔
82
        }
41,084✔
83
    }
41,084✔
84

85
    /// Returns the string identifier of the compute function.
86
    pub fn id(&self) -> &ArcRef<str> {
×
UNCOV
87
        &self.id
×
UNCOV
88
    }
×
89

90
    /// Register a kernel for the compute function.
91
    pub fn register_kernel(&self, kernel: ArcRef<dyn Kernel>) {
363,659✔
92
        self.kernels.write().push(kernel);
363,659✔
93
    }
363,659✔
94

95
    /// Invokes the compute function with the given arguments.
96
    pub fn invoke(&self, args: &InvocationArgs) -> VortexResult<Output> {
507,030✔
97
        // Perform some pre-condition checks against the arguments and the function properties.
98
        if self.is_elementwise() {
507,030✔
99
            // For element-wise functions, all input arrays must be the same length.
100
            if !args
74,634✔
101
                .inputs
74,634✔
102
                .iter()
74,634✔
103
                .filter_map(|input| input.array())
138,585✔
104
                .map(|array| array.len())
104,168✔
105
                .all_equal()
74,634✔
106
            {
107
                vortex_bail!(
38✔
108
                    "Compute function {} is elementwise but input arrays have different lengths",
38✔
109
                    self.id
110
                );
111
            }
74,596✔
112
        }
432,396✔
113

114
        let expected_dtype = self.vtable.return_dtype(args)?;
506,992✔
115
        let expected_len = self.vtable.return_len(args)?;
506,991✔
116

117
        let output = self.vtable.invoke(args, &self.kernels.read())?;
504,651✔
118

119
        if output.dtype() != &expected_dtype {
503,078✔
UNCOV
120
            vortex_bail!(
×
121
                "Internal error: compute function {} returned a result of type {} but expected {}\n{}",
×
122
                self.id,
123
                output.dtype(),
×
124
                &expected_dtype,
×
125
                args.inputs
×
126
                    .iter()
×
UNCOV
127
                    .filter_map(|input| input.array())
×
UNCOV
128
                    .format_with(",", |array, f| f(&array.display_tree()))
×
129
            );
130
        }
503,078✔
131
        if output.len() != expected_len {
503,078✔
UNCOV
132
            vortex_bail!(
×
133
                "Internal error: compute function {} returned a result of length {} but expected {}",
×
134
                self.id,
UNCOV
135
                output.len(),
×
136
                expected_len
137
            );
138
        }
503,078✔
139

140
        Ok(output)
503,078✔
141
    }
507,030✔
142

143
    /// Compute the return type of the function given the input arguments.
144
    pub fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
15✔
145
        self.vtable.return_dtype(args)
15✔
146
    }
15✔
147

148
    /// Compute the return length of the function given the input arguments.
149
    pub fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
×
UNCOV
150
        self.vtable.return_len(args)
×
UNCOV
151
    }
×
152

153
    /// Returns whether the compute function is elementwise, i.e. the output is the same shape as
154
    pub fn is_elementwise(&self) -> bool {
507,060✔
155
        // TODO(ngates): should this just be a constant passed in the constructor?
156
        self.vtable.is_elementwise()
507,060✔
157
    }
507,060✔
158

159
    /// Returns the compute function's kernels.
160
    pub fn kernels(&self) -> Vec<ArcRef<dyn Kernel>> {
4,554✔
161
        self.kernels.read().to_vec()
4,554✔
162
    }
4,554✔
163
}
164

165
/// VTable for the implementation of a compute function.
166
pub trait ComputeFnVTable: 'static + Send + Sync {
167
    /// Invokes the compute function entry-point with the given input arguments and options.
168
    ///
169
    /// The entry-point logic can short-circuit compute using statistics, update result array
170
    /// statistics, search for relevant compute kernels, and canonicalize the inputs in order
171
    /// to successfully compute a result.
172
    fn invoke(&self, args: &InvocationArgs, kernels: &[ArcRef<dyn Kernel>])
173
    -> VortexResult<Output>;
174

175
    /// Computes the return type of the function given the input arguments.
176
    ///
177
    /// All kernel implementations will be validated to return the [`DType`] as computed here.
178
    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType>;
179

180
    /// Computes the return length of the function given the input arguments.
181
    ///
182
    /// All kernel implementations will be validated to return the len as computed here.
183
    /// Scalars are considered to have length 1.
184
    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize>;
185

186
    /// Returns whether the function operates elementwise, i.e. the output is the same shape as the
187
    /// input and no information is shared between elements.
188
    ///
189
    /// Examples include `add`, `subtract`, `and`, `cast`, `fill_null` etc.
190
    /// Examples that are not elementwise include `sum`, `count`, `min`, `fill_forward` etc.
191
    ///
192
    /// All input arrays to an elementwise function *must* have the same length.
193
    fn is_elementwise(&self) -> bool;
194
}
195

196
/// Arguments to a compute function invocation.
197
#[derive(Clone)]
198
pub struct InvocationArgs<'a> {
199
    pub inputs: &'a [Input<'a>],
200
    pub options: &'a dyn Options,
201
}
202

203
/// For unary compute functions, it's useful to just have this short-cut.
204
pub struct UnaryArgs<'a, O: Options> {
205
    pub array: &'a dyn Array,
206
    pub options: &'a O,
207
}
208

209
impl<'a, O: Options> TryFrom<&InvocationArgs<'a>> for UnaryArgs<'a, O> {
210
    type Error = VortexError;
211

212
    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
375,319✔
213
        if value.inputs.len() != 1 {
375,319✔
UNCOV
214
            vortex_bail!("Expected 1 input, found {}", value.inputs.len());
×
215
        }
375,319✔
216
        let array = value.inputs[0]
375,319✔
217
            .array()
375,319✔
218
            .ok_or_else(|| vortex_err!("Expected input 0 to be an array"))?;
375,319✔
219
        let options =
375,319✔
220
            value.options.as_any().downcast_ref::<O>().ok_or_else(|| {
375,319✔
UNCOV
221
                vortex_err!("Expected options to be of type {}", type_name::<O>())
×
UNCOV
222
            })?;
×
223
        Ok(UnaryArgs { array, options })
375,319✔
224
    }
375,319✔
225
}
226

227
/// For binary compute functions, it's useful to just have this short-cut.
228
pub struct BinaryArgs<'a, O: Options> {
229
    pub lhs: &'a dyn Array,
230
    pub rhs: &'a dyn Array,
231
    pub options: &'a O,
232
}
233

234
impl<'a, O: Options> TryFrom<&InvocationArgs<'a>> for BinaryArgs<'a, O> {
235
    type Error = VortexError;
236

237
    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
1,367✔
238
        if value.inputs.len() != 2 {
1,367✔
UNCOV
239
            vortex_bail!("Expected 2 input, found {}", value.inputs.len());
×
240
        }
1,367✔
241
        let lhs = value.inputs[0]
1,367✔
242
            .array()
1,367✔
243
            .ok_or_else(|| vortex_err!("Expected input 0 to be an array"))?;
1,367✔
244
        let rhs = value.inputs[1]
1,367✔
245
            .array()
1,367✔
246
            .ok_or_else(|| vortex_err!("Expected input 1 to be an array"))?;
1,367✔
247
        let options =
1,367✔
248
            value.options.as_any().downcast_ref::<O>().ok_or_else(|| {
1,367✔
UNCOV
249
                vortex_err!("Expected options to be of type {}", type_name::<O>())
×
UNCOV
250
            })?;
×
251
        Ok(BinaryArgs { lhs, rhs, options })
1,367✔
252
    }
1,367✔
253
}
254

255
/// Input to a compute function.
256
pub enum Input<'a> {
257
    Scalar(&'a Scalar),
258
    Array(&'a dyn Array),
259
    Mask(&'a Mask),
260
    Builder(&'a mut dyn ArrayBuilder),
261
    DType(&'a DType),
262
}
263

264
impl Debug for Input<'_> {
265
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
×
266
        let mut f = f.debug_struct("Input");
×
267
        match self {
×
268
            Input::Scalar(scalar) => f.field("Scalar", scalar),
×
269
            Input::Array(array) => f.field("Array", array),
×
270
            Input::Mask(mask) => f.field("Mask", mask),
×
UNCOV
271
            Input::Builder(builder) => f.field("Builder", &builder.len()),
×
272
            Input::DType(dtype) => f.field("DType", dtype),
×
273
        };
UNCOV
274
        f.finish()
×
UNCOV
275
    }
×
276
}
277

278
impl<'a> From<&'a dyn Array> for Input<'a> {
279
    fn from(value: &'a dyn Array) -> Self {
741,922✔
280
        Input::Array(value)
741,922✔
281
    }
741,922✔
282
}
283

284
impl<'a> From<&'a Scalar> for Input<'a> {
285
    fn from(value: &'a Scalar) -> Self {
5,963✔
286
        Input::Scalar(value)
5,963✔
287
    }
5,963✔
288
}
289

290
impl<'a> From<&'a Mask> for Input<'a> {
291
    fn from(value: &'a Mask) -> Self {
41,733✔
292
        Input::Mask(value)
41,733✔
293
    }
41,733✔
294
}
295

296
impl<'a> From<&'a DType> for Input<'a> {
297
    fn from(value: &'a DType) -> Self {
24,467✔
298
        Input::DType(value)
24,467✔
299
    }
24,467✔
300
}
301

302
impl<'a> Input<'a> {
303
    pub fn scalar(&self) -> Option<&'a Scalar> {
19,321✔
304
        match self {
19,321✔
305
            Input::Scalar(scalar) => Some(*scalar),
19,321✔
UNCOV
306
            _ => None,
×
307
        }
308
    }
19,321✔
309

310
    pub fn array(&self) -> Option<&'a dyn Array> {
2,932,077✔
311
        match self {
2,932,077✔
312
            Input::Array(array) => Some(*array),
2,897,660✔
313
            _ => None,
34,417✔
314
        }
315
    }
2,932,077✔
316

317
    pub fn mask(&self) -> Option<&'a Mask> {
456,059✔
318
        match self {
456,059✔
319
            Input::Mask(mask) => Some(*mask),
456,059✔
UNCOV
320
            _ => None,
×
321
        }
322
    }
456,059✔
323

324
    pub fn builder(&'a mut self) -> Option<&'a mut dyn ArrayBuilder> {
×
325
        match self {
×
UNCOV
326
            Input::Builder(builder) => Some(*builder),
×
327
            _ => None,
×
328
        }
UNCOV
329
    }
×
330

331
    pub fn dtype(&self) -> Option<&'a DType> {
134,983✔
332
        match self {
134,983✔
333
            Input::DType(dtype) => Some(*dtype),
134,983✔
UNCOV
334
            _ => None,
×
335
        }
336
    }
134,983✔
337
}
338

339
/// Output from a compute function.
340
#[derive(Debug)]
341
pub enum Output {
342
    Scalar(Scalar),
343
    Array(ArrayRef),
344
}
345

346
#[allow(clippy::len_without_is_empty)]
347
impl Output {
348
    pub fn dtype(&self) -> &DType {
503,078✔
349
        match self {
503,078✔
350
            Output::Scalar(scalar) => scalar.dtype(),
352,969✔
351
            Output::Array(array) => array.dtype(),
150,109✔
352
        }
353
    }
503,078✔
354

355
    pub fn len(&self) -> usize {
503,078✔
356
        match self {
503,078✔
357
            Output::Scalar(_) => 1,
352,969✔
358
            Output::Array(array) => array.len(),
150,109✔
359
        }
360
    }
503,078✔
361

362
    pub fn unwrap_scalar(self) -> VortexResult<Scalar> {
513,054✔
363
        match self {
513,054✔
UNCOV
364
            Output::Array(_) => vortex_bail!("Expected array output, got Array"),
×
365
            Output::Scalar(scalar) => Ok(scalar),
513,054✔
366
        }
367
    }
513,054✔
368

369
    pub fn unwrap_array(self) -> VortexResult<ArrayRef> {
154,625✔
370
        match self {
154,625✔
371
            Output::Array(array) => Ok(array),
154,625✔
UNCOV
372
            Output::Scalar(_) => vortex_bail!("Expected array output, got Scalar"),
×
373
        }
374
    }
154,625✔
375
}
376

377
impl From<ArrayRef> for Output {
378
    fn from(value: ArrayRef) -> Self {
154,252✔
379
        Output::Array(value)
154,252✔
380
    }
154,252✔
381
}
382

383
impl From<Scalar> for Output {
384
    fn from(value: Scalar) -> Self {
513,054✔
385
        Output::Scalar(value)
513,054✔
386
    }
513,054✔
387
}
388

389
/// Options for a compute function invocation.
390
pub trait Options: 'static {
391
    fn as_any(&self) -> &dyn Any;
392
}
393

394
impl Options for () {
395
    fn as_any(&self) -> &dyn Any {
376,686✔
396
        self
376,686✔
397
    }
376,686✔
398
}
399

400
/// Compute functions can ask arrays for compute kernels for a given invocation.
401
///
402
/// The kernel is invoked with the input arguments and options, and can return `None` if it is
403
/// unable to compute the result for the given inputs due to missing implementation logic.
404
/// For example, if kernel doesn't support the `LTE` operator. By returning `None`, the kernel
405
/// is indicating that it cannot compute the result for the given inputs, and another kernel should
406
/// be tried. *Not* that the given inputs are invalid for the compute function.
407
///
408
/// If the kernel fails to compute a result, it should return a `Some` with the error.
409
pub trait Kernel: 'static + Send + Sync + Debug {
410
    /// Invokes the kernel with the given input arguments and options.
411
    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>>;
412
}
413

414
/// Register a kernel for a compute function.
415
/// See each compute function for the correct type of kernel to register.
416
#[macro_export]
417
macro_rules! register_kernel {
418
    ($T:expr) => {
419
        $crate::aliases::inventory::submit!($T);
420
    };
421
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc