• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16595455362

29 Jul 2025 12:00PM UTC coverage: 82.182% (+0.4%) from 81.796%
16595455362

Pull #4036

github

web-flow
Merge 28b120788 into 261aabd6a
Pull Request #4036: varbinview builder buffer deduplication

147 of 155 new or added lines in 2 files covered. (94.84%)

404 existing lines in 34 files now uncovered.

44415 of 54045 relevant lines covered (82.18%)

167869.47 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.85
/vortex-array/src/compute/compare.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use core::fmt;
5
use std::any::Any;
6
use std::fmt::{Display, Formatter};
7
use std::sync::LazyLock;
8

9
use arcref::ArcRef;
10
use arrow_buffer::BooleanBuffer;
11
use arrow_ord::cmp;
12
use vortex_dtype::{DType, NativePType, Nullability};
13
use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
14
use vortex_scalar::Scalar;
15

16
use crate::arrays::ConstantArray;
17
use crate::arrow::{Datum, from_arrow_array_with_len};
18
use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
19
use crate::vtable::VTable;
20
use crate::{Array, ArrayRef, Canonical, IntoArray};
21

22
static COMPARE_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
2,875✔
23
    let compute = ComputeFn::new("compare".into(), ArcRef::new_ref(&Compare));
2,875✔
24
    for kernel in inventory::iter::<CompareKernelRef> {
23,433✔
25
        compute.register_kernel(kernel.0.clone());
20,558✔
26
    }
20,558✔
27
    compute
2,875✔
28
});
2,875✔
29

30
/// Compares two arrays and returns a new boolean array with the result of the comparison.
31
/// Or, returns None if comparison is not supported for these arrays.
32
pub fn compare(left: &dyn Array, right: &dyn Array, operator: Operator) -> VortexResult<ArrayRef> {
17,835✔
33
    COMPARE_FN
17,835✔
34
        .invoke(&InvocationArgs {
17,835✔
35
            inputs: &[left.into(), right.into()],
17,835✔
36
            options: &operator,
17,835✔
37
        })?
17,835✔
38
        .unwrap_array()
17,835✔
39
}
17,835✔
40

41
#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Hash)]
42
pub enum Operator {
43
    /// Equality (`=`)
44
    Eq,
45
    /// Inequality (`!=`)
46
    NotEq,
47
    /// Greater than (`>`)
48
    Gt,
49
    /// Greater than or equal (`>=`)
50
    Gte,
51
    /// Less than (`<`)
52
    Lt,
53
    /// Less than or equal (`<=`)
54
    Lte,
55
}
56

57
impl Display for Operator {
58
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
×
59
        let display = match &self {
×
60
            Operator::Eq => "=",
×
61
            Operator::NotEq => "!=",
×
62
            Operator::Gt => ">",
×
63
            Operator::Gte => ">=",
×
64
            Operator::Lt => "<",
×
65
            Operator::Lte => "<=",
×
66
        };
67
        Display::fmt(display, f)
×
UNCOV
68
    }
×
69
}
70

71
impl Operator {
UNCOV
72
    pub fn inverse(self) -> Self {
×
UNCOV
73
        match self {
×
UNCOV
74
            Operator::Eq => Operator::NotEq,
×
UNCOV
75
            Operator::NotEq => Operator::Eq,
×
UNCOV
76
            Operator::Gt => Operator::Lte,
×
UNCOV
77
            Operator::Gte => Operator::Lt,
×
UNCOV
78
            Operator::Lt => Operator::Gte,
×
UNCOV
79
            Operator::Lte => Operator::Gt,
×
80
        }
UNCOV
81
    }
×
82

83
    /// Change the sides of the operator, where changing lhs and rhs won't change the result of the operation
84
    pub fn swap(self) -> Self {
10,715✔
85
        match self {
10,715✔
86
            Operator::Eq => Operator::Eq,
4,590✔
87
            Operator::NotEq => Operator::NotEq,
993✔
88
            Operator::Gt => Operator::Lt,
1,409✔
89
            Operator::Gte => Operator::Lte,
1,259✔
90
            Operator::Lt => Operator::Gt,
1,071✔
91
            Operator::Lte => Operator::Gte,
1,393✔
92
        }
93
    }
10,715✔
94
}
95

96
pub struct CompareKernelRef(ArcRef<dyn Kernel>);
97
inventory::collect!(CompareKernelRef);
98

99
pub trait CompareKernel: VTable {
100
    fn compare(
101
        &self,
102
        lhs: &Self::Array,
103
        rhs: &dyn Array,
104
        operator: Operator,
105
    ) -> VortexResult<Option<ArrayRef>>;
106
}
107

108
#[derive(Debug)]
109
pub struct CompareKernelAdapter<V: VTable>(pub V);
110

111
impl<V: VTable + CompareKernel> CompareKernelAdapter<V> {
UNCOV
112
    pub const fn lift(&'static self) -> CompareKernelRef {
×
UNCOV
113
        CompareKernelRef(ArcRef::new_ref(self))
×
UNCOV
114
    }
×
115
}
116

117
impl<V: VTable + CompareKernel> Kernel for CompareKernelAdapter<V> {
118
    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
163,826✔
119
        let inputs = CompareArgs::try_from(args)?;
163,826✔
120
        let Some(array) = inputs.lhs.as_opt::<V>() else {
163,826✔
121
            return Ok(None);
147,596✔
122
        };
123
        Ok(V::compare(&self.0, array, inputs.rhs, inputs.operator)?.map(|array| array.into()))
16,230✔
124
    }
163,826✔
125
}
126

127
struct Compare;
128

129
impl ComputeFnVTable for Compare {
130
    fn invoke(
17,835✔
131
        &self,
17,835✔
132
        args: &InvocationArgs,
17,835✔
133
        kernels: &[ArcRef<dyn Kernel>],
17,835✔
134
    ) -> VortexResult<Output> {
17,835✔
135
        let CompareArgs { lhs, rhs, operator } = CompareArgs::try_from(args)?;
17,835✔
136

137
        let return_dtype = self.return_dtype(args)?;
17,835✔
138

139
        if lhs.is_empty() {
17,835✔
140
            return Ok(Canonical::empty(&return_dtype).into_array().into());
1✔
141
        }
17,834✔
142

143
        let left_constant_null = lhs.as_constant().map(|l| l.is_null()).unwrap_or(false);
17,834✔
144
        let right_constant_null = rhs.as_constant().map(|r| r.is_null()).unwrap_or(false);
17,834✔
145
        if left_constant_null || right_constant_null {
17,834✔
146
            return Ok(ConstantArray::new(Scalar::null(return_dtype), lhs.len())
115✔
147
                .into_array()
115✔
148
                .into());
115✔
149
        }
17,719✔
150

151
        let right_is_constant = rhs.is_constant();
17,719✔
152

153
        // Always try to put constants on the right-hand side so encodings can optimise themselves.
154
        if lhs.is_constant() && !right_is_constant {
17,719✔
155
            return Ok(compare(rhs, lhs, operator.swap())?.into());
1,323✔
156
        }
16,396✔
157

158
        // First try lhs op rhs, then invert and try again.
159
        for kernel in kernels {
120,209✔
160
            if let Some(output) = kernel.invoke(args)? {
110,817✔
161
                return Ok(output);
7,004✔
162
            }
103,813✔
163
        }
164
        if let Some(output) = lhs.invoke(&COMPARE_FN, args)? {
9,392✔
UNCOV
165
            return Ok(output);
×
166
        }
9,392✔
167

168
        // Try inverting the operator and swapping the arguments
169
        let inverted_args = InvocationArgs {
9,392✔
170
            inputs: &[rhs.into(), lhs.into()],
9,392✔
171
            options: &operator.swap(),
9,392✔
172
        };
9,392✔
173
        for kernel in kernels {
75,396✔
174
            if let Some(output) = kernel.invoke(&inverted_args)? {
69,116✔
175
                return Ok(output);
3,112✔
176
            }
66,004✔
177
        }
178
        if let Some(output) = rhs.invoke(&COMPARE_FN, &inverted_args)? {
6,280✔
UNCOV
179
            return Ok(output);
×
180
        }
6,280✔
181

182
        // Only log missing compare implementation if there's possibly better one than arrow,
183
        // i.e. lhs isn't arrow or rhs isn't arrow or constant
184
        if !(lhs.is_arrow() && (rhs.is_arrow() || right_is_constant)) {
6,280✔
185
            log::debug!(
3,710✔
UNCOV
186
                "No compare implementation found for LHS {}, RHS {}, and operator {} (or inverse)",
×
UNCOV
187
                lhs.encoding_id(),
×
UNCOV
188
                rhs.encoding_id(),
×
189
                operator,
190
            );
191
        }
2,570✔
192

193
        // Fallback to arrow on canonical types
194
        Ok(arrow_compare(lhs, rhs, operator)?.into())
6,280✔
195
    }
17,835✔
196

197
    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
35,670✔
198
        let CompareArgs { lhs, rhs, .. } = CompareArgs::try_from(args)?;
35,670✔
199

200
        if !lhs.dtype().eq_ignore_nullability(rhs.dtype()) {
35,670✔
UNCOV
201
            vortex_bail!(
×
UNCOV
202
                "Cannot compare different DTypes {} and {}",
×
UNCOV
203
                lhs.dtype(),
×
204
                rhs.dtype()
×
205
            );
206
        }
35,670✔
207

208
        // TODO(ngates): no reason why not
209
        if lhs.dtype().is_struct() {
35,670✔
UNCOV
210
            vortex_bail!(
×
UNCOV
211
                "Compare does not support arrays with Struct DType, got: {} and {}",
×
UNCOV
212
                lhs.dtype(),
×
UNCOV
213
                rhs.dtype()
×
214
            )
215
        }
35,670✔
216

217
        Ok(DType::Bool(
35,670✔
218
            lhs.dtype().nullability() | rhs.dtype().nullability(),
35,670✔
219
        ))
35,670✔
220
    }
35,670✔
221

222
    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
17,835✔
223
        let CompareArgs { lhs, rhs, .. } = CompareArgs::try_from(args)?;
17,835✔
224
        if lhs.len() != rhs.len() {
17,835✔
UNCOV
225
            vortex_bail!(
×
UNCOV
226
                "Compare operations only support arrays of the same length, got {} and {}",
×
UNCOV
227
                lhs.len(),
×
UNCOV
228
                rhs.len()
×
229
            );
230
        }
17,835✔
231
        Ok(lhs.len())
17,835✔
232
    }
17,835✔
233

234
    fn is_elementwise(&self) -> bool {
17,835✔
235
        true
17,835✔
236
    }
17,835✔
237
}
238

239
struct CompareArgs<'a> {
240
    lhs: &'a dyn Array,
241
    rhs: &'a dyn Array,
242
    operator: Operator,
243
}
244

245
impl Options for Operator {
246
    fn as_any(&self) -> &dyn Any {
251,273✔
247
        self
251,273✔
248
    }
251,273✔
249
}
250

251
impl<'a> TryFrom<&InvocationArgs<'a>> for CompareArgs<'a> {
252
    type Error = VortexError;
253

254
    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
251,273✔
255
        if value.inputs.len() != 2 {
251,273✔
UNCOV
256
            vortex_bail!("Expected 2 inputs, found {}", value.inputs.len());
×
257
        }
251,273✔
258
        let lhs = value.inputs[0]
251,273✔
259
            .array()
251,273✔
260
            .ok_or_else(|| vortex_err!("Expected first input to be an array"))?;
251,273✔
261
        let rhs = value.inputs[1]
251,273✔
262
            .array()
251,273✔
263
            .ok_or_else(|| vortex_err!("Expected second input to be an array"))?;
251,273✔
264
        let operator = *value
251,273✔
265
            .options
251,273✔
266
            .as_any()
251,273✔
267
            .downcast_ref::<Operator>()
251,273✔
268
            .vortex_expect("Expected options to be an operator");
251,273✔
269

270
        Ok(CompareArgs { lhs, rhs, operator })
251,273✔
271
    }
251,273✔
272
}
273

274
/// Helper function to compare empty values with arrays that have external value length information
275
/// like `VarBin`.
276
pub fn compare_lengths_to_empty<P, I>(lengths: I, op: Operator) -> BooleanBuffer
45✔
277
where
45✔
278
    P: NativePType,
45✔
279
    I: Iterator<Item = P>,
45✔
280
{
281
    // All comparison can be expressed in terms of equality. "" is the absolute min of possible value.
282
    let cmp_fn = match op {
45✔
283
        Operator::Eq | Operator::Lte => |v| v == P::zero(),
123✔
284
        Operator::NotEq | Operator::Gt => |v| v != P::zero(),
8✔
285
        Operator::Gte => |_| true,
286
        Operator::Lt => |_| false,
287
    };
288

289
    lengths.map(cmp_fn).collect::<BooleanBuffer>()
45✔
290
}
45✔
291

292
/// Implementation of `CompareFn` using the Arrow crate.
293
fn arrow_compare(
6,281✔
294
    left: &dyn Array,
6,281✔
295
    right: &dyn Array,
6,281✔
296
    operator: Operator,
6,281✔
297
) -> VortexResult<ArrayRef> {
6,281✔
298
    let nullable = left.dtype().is_nullable() || right.dtype().is_nullable();
6,281✔
299
    let lhs = Datum::try_new(left)?;
6,281✔
300
    let rhs = Datum::try_new(right)?;
6,281✔
301

302
    let array = match operator {
6,281✔
303
        Operator::Eq => cmp::eq(&lhs, &rhs)?,
2,641✔
304
        Operator::NotEq => cmp::neq(&lhs, &rhs)?,
233✔
305
        Operator::Gt => cmp::gt(&lhs, &rhs)?,
804✔
306
        Operator::Gte => cmp::gt_eq(&lhs, &rhs)?,
1,025✔
307
        Operator::Lt => cmp::lt(&lhs, &rhs)?,
717✔
308
        Operator::Lte => cmp::lt_eq(&lhs, &rhs)?,
861✔
309
    };
310
    from_arrow_array_with_len(&array, left.len(), nullable)
6,281✔
311
}
6,281✔
312

313
pub fn scalar_cmp(lhs: &Scalar, rhs: &Scalar, operator: Operator) -> Scalar {
5,611✔
314
    if lhs.is_null() | rhs.is_null() {
5,611✔
UNCOV
315
        Scalar::null(DType::Bool(Nullability::Nullable))
×
316
    } else {
317
        let b = match operator {
5,611✔
318
            Operator::Eq => lhs == rhs,
888✔
319
            Operator::NotEq => lhs != rhs,
760✔
320
            Operator::Gt => lhs > rhs,
1,753✔
321
            Operator::Gte => lhs >= rhs,
402✔
322
            Operator::Lt => lhs < rhs,
1,528✔
323
            Operator::Lte => lhs <= rhs,
280✔
324
        };
325

326
        Scalar::bool(b, lhs.dtype().nullability() | rhs.dtype().nullability())
5,611✔
327
    }
328
}
5,611✔
329

330
#[cfg(test)]
331
mod tests {
332
    use arrow_buffer::BooleanBuffer;
333
    use rstest::rstest;
334

335
    use super::*;
336
    use crate::ToCanonical;
337
    use crate::arrays::{BoolArray, ConstantArray, VarBinArray, VarBinViewArray};
338
    use crate::test_harness::to_int_indices;
339
    use crate::validity::Validity;
340

341
    #[test]
342
    fn test_bool_basic_comparisons() {
1✔
343
        let arr = BoolArray::new(
1✔
344
            BooleanBuffer::from_iter([true, true, false, true, false]),
1✔
345
            Validity::from_iter([false, true, true, true, true]),
1✔
346
        );
347

348
        let matches = compare(arr.as_ref(), arr.as_ref(), Operator::Eq)
1✔
349
            .unwrap()
1✔
350
            .to_bool()
1✔
351
            .unwrap();
1✔
352

353
        assert_eq!(to_int_indices(matches).unwrap(), [1u64, 2, 3, 4]);
1✔
354

355
        let matches = compare(arr.as_ref(), arr.as_ref(), Operator::NotEq)
1✔
356
            .unwrap()
1✔
357
            .to_bool()
1✔
358
            .unwrap();
1✔
359
        let empty: [u64; 0] = [];
1✔
360
        assert_eq!(to_int_indices(matches).unwrap(), empty);
1✔
361

362
        let other = BoolArray::new(
1✔
363
            BooleanBuffer::from_iter([false, false, false, true, true]),
1✔
364
            Validity::from_iter([false, true, true, true, true]),
1✔
365
        );
366

367
        let matches = compare(arr.as_ref(), other.as_ref(), Operator::Lte)
1✔
368
            .unwrap()
1✔
369
            .to_bool()
1✔
370
            .unwrap();
1✔
371
        assert_eq!(to_int_indices(matches).unwrap(), [2u64, 3, 4]);
1✔
372

373
        let matches = compare(arr.as_ref(), other.as_ref(), Operator::Lt)
1✔
374
            .unwrap()
1✔
375
            .to_bool()
1✔
376
            .unwrap();
1✔
377
        assert_eq!(to_int_indices(matches).unwrap(), [4u64]);
1✔
378

379
        let matches = compare(other.as_ref(), arr.as_ref(), Operator::Gte)
1✔
380
            .unwrap()
1✔
381
            .to_bool()
1✔
382
            .unwrap();
1✔
383
        assert_eq!(to_int_indices(matches).unwrap(), [2u64, 3, 4]);
1✔
384

385
        let matches = compare(other.as_ref(), arr.as_ref(), Operator::Gt)
1✔
386
            .unwrap()
1✔
387
            .to_bool()
1✔
388
            .unwrap();
1✔
389
        assert_eq!(to_int_indices(matches).unwrap(), [4u64]);
1✔
390
    }
1✔
391

392
    #[test]
393
    fn constant_compare() {
1✔
394
        let left = ConstantArray::new(Scalar::from(2u32), 10);
1✔
395
        let right = ConstantArray::new(Scalar::from(10u32), 10);
1✔
396

397
        let compare = compare(left.as_ref(), right.as_ref(), Operator::Gt).unwrap();
1✔
398
        let res = compare.as_constant().unwrap();
1✔
399
        assert_eq!(res.as_bool().value(), Some(false));
1✔
400
        assert_eq!(compare.len(), 10);
1✔
401

402
        let compare = arrow_compare(&left.into_array(), &right.into_array(), Operator::Gt).unwrap();
1✔
403
        let res = compare.as_constant().unwrap();
1✔
404
        assert_eq!(res.as_bool().value(), Some(false));
1✔
405
        assert_eq!(compare.len(), 10);
1✔
406
    }
1✔
407

408
    #[rstest]
409
    #[case(Operator::Eq, vec![false, false, false, true])]
410
    #[case(Operator::NotEq, vec![true, true, true, false])]
411
    #[case(Operator::Gt, vec![true, true, true, false])]
412
    #[case(Operator::Gte, vec![true, true, true, true])]
413
    #[case(Operator::Lt, vec![false, false, false, false])]
414
    #[case(Operator::Lte, vec![false, false, false, true])]
415
    fn test_cmp_to_empty(#[case] op: Operator, #[case] expected: Vec<bool>) {
416
        let lengths: Vec<i32> = vec![1, 5, 7, 0];
417

418
        let output = compare_lengths_to_empty(lengths.iter().copied(), op);
419
        assert_eq!(Vec::from_iter(output.iter()), expected);
420
    }
421

422
    #[rstest]
423
    #[case(VarBinArray::from(vec!["a", "b"]).into_array(), VarBinViewArray::from_iter_str(["a", "b"]).into_array())]
424
    #[case(VarBinViewArray::from_iter_str(["a", "b"]).into_array(), VarBinArray::from(vec!["a", "b"]).into_array())]
425
    #[case(VarBinArray::from(vec!["a".as_bytes(), "b".as_bytes()]).into_array(), VarBinViewArray::from_iter_bin(["a".as_bytes(), "b".as_bytes()]).into_array())]
426
    #[case(VarBinViewArray::from_iter_bin(["a".as_bytes(), "b".as_bytes()]).into_array(), VarBinArray::from(vec!["a".as_bytes(), "b".as_bytes()]).into_array())]
427
    fn arrow_compare_different_encodings(#[case] left: ArrayRef, #[case] right: ArrayRef) {
428
        let res = compare(&left, &right, Operator::Eq).unwrap();
429
        assert_eq!(
430
            res.to_bool().unwrap().boolean_buffer().count_set_bits(),
431
            left.len()
432
        );
433
    }
434
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc