• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16935267080

13 Aug 2025 11:00AM UTC coverage: 24.312% (-63.3%) from 87.658%
16935267080

Pull #4226

github

web-flow
Merge 81b48c7fb into baa6ea202
Pull Request #4226: Support converting TimestampTZ to and from duckdb

0 of 2 new or added lines in 1 file covered. (0.0%)

20666 existing lines in 469 files now uncovered.

8726 of 35892 relevant lines covered (24.31%)

147.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/vortex-array/src/compute/filter.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::ops::BitAnd;
5
use std::sync::LazyLock;
6

7
use arcref::ArcRef;
8
use arrow_array::BooleanArray;
9
use vortex_dtype::DType;
10
use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
11
use vortex_mask::Mask;
12
use vortex_scalar::Scalar;
13

14
use crate::arrays::{BoolArray, ConstantArray};
15
use crate::arrow::{FromArrowArray, IntoArrowArray};
16
use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output, fill_null};
17
use crate::vtable::VTable;
18
use crate::{Array, ArrayRef, Canonical, IntoArray, ToCanonical};
19

20
/// The filter [`ComputeFn`].
UNCOV
21
static FILTER_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
×
UNCOV
22
    let compute = ComputeFn::new("filter".into(), ArcRef::new_ref(&Filter));
×
UNCOV
23
    for kernel in inventory::iter::<FilterKernelRef> {
×
UNCOV
24
        compute.register_kernel(kernel.0.clone());
×
UNCOV
25
    }
×
UNCOV
26
    compute
×
UNCOV
27
});
×
28

29
/// Keep only the elements for which the corresponding mask value is true.
30
///
31
/// # Examples
32
///
33
/// ```
34
/// use vortex_array::{Array, IntoArray};
35
/// use vortex_array::arrays::{BoolArray, PrimitiveArray};
36
/// use vortex_array::compute::{ filter, mask};
37
/// use vortex_mask::Mask;
38
/// use vortex_scalar::Scalar;
39
///
40
/// let array =
41
///     PrimitiveArray::from_option_iter([Some(0i32), None, Some(1i32), None, Some(2i32)]);
42
/// let mask = Mask::try_from(
43
///     &BoolArray::from_iter([true, false, false, false, true]),
44
/// )
45
/// .unwrap();
46
///
47
/// let filtered = filter(array.as_ref(), &mask).unwrap();
48
/// assert_eq!(filtered.len(), 2);
49
/// assert_eq!(filtered.scalar_at(0).unwrap(), Scalar::from(Some(0_i32)));
50
/// assert_eq!(filtered.scalar_at(1).unwrap(), Scalar::from(Some(2_i32)));
51
/// ```
52
///
53
/// # Panics
54
///
55
/// The `predicate` must receive an Array with type non-nullable bool, and will panic if this is
56
/// not the case.
UNCOV
57
pub fn filter(array: &dyn Array, mask: &Mask) -> VortexResult<ArrayRef> {
×
UNCOV
58
    FILTER_FN
×
UNCOV
59
        .invoke(&InvocationArgs {
×
UNCOV
60
            inputs: &[array.into(), mask.into()],
×
UNCOV
61
            options: &(),
×
UNCOV
62
        })?
×
UNCOV
63
        .unwrap_array()
×
UNCOV
64
}
×
65

66
struct Filter;
67

68
impl ComputeFnVTable for Filter {
UNCOV
69
    fn invoke(
×
UNCOV
70
        &self,
×
UNCOV
71
        args: &InvocationArgs,
×
UNCOV
72
        kernels: &[ArcRef<dyn Kernel>],
×
UNCOV
73
    ) -> VortexResult<Output> {
×
UNCOV
74
        let FilterArgs { array, mask } = FilterArgs::try_from(args)?;
×
75

UNCOV
76
        let true_count = mask.true_count();
×
77

78
        // Fast-path for empty mask.
UNCOV
79
        if true_count == 0 {
×
UNCOV
80
            return Ok(Canonical::empty(array.dtype()).into_array().into());
×
UNCOV
81
        }
×
82

83
        // Fast-path for full mask
UNCOV
84
        if true_count == mask.len() {
×
UNCOV
85
            return Ok(array.to_array().into());
×
UNCOV
86
        }
×
87

UNCOV
88
        for kernel in kernels {
×
UNCOV
89
            if let Some(output) = kernel.invoke(args)? {
×
UNCOV
90
                return Ok(output);
×
UNCOV
91
            }
×
92
        }
UNCOV
93
        if let Some(output) = array.invoke(&FILTER_FN, args)? {
×
94
            return Ok(output);
×
UNCOV
95
        }
×
96

97
        // Otherwise, we can use scalar_at if the mask has length 1.
UNCOV
98
        if mask.true_count() == 1 {
×
UNCOV
99
            let idx = mask.first().vortex_expect("true_count == 1");
×
UNCOV
100
            return Ok(ConstantArray::new(array.scalar_at(idx)?, 1)
×
UNCOV
101
                .into_array()
×
UNCOV
102
                .into());
×
UNCOV
103
        }
×
104

105
        // Fallback: implement using Arrow kernels.
UNCOV
106
        log::debug!("No filter implementation found for {}", array.encoding_id(),);
×
107

UNCOV
108
        if !array.is_canonical() {
×
UNCOV
109
            let canonical = array.to_canonical()?.into_array();
×
UNCOV
110
            return filter(&canonical, mask).map(Into::into);
×
111
        };
×
112

113
        vortex_bail!(
×
114
            "No filter implementation found for array {}",
×
115
            array.encoding()
×
116
        )
UNCOV
117
    }
×
118

UNCOV
119
    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
×
UNCOV
120
        Ok(FilterArgs::try_from(args)?.array.dtype().clone())
×
UNCOV
121
    }
×
122

UNCOV
123
    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
×
UNCOV
124
        let FilterArgs { array, mask } = FilterArgs::try_from(args)?;
×
UNCOV
125
        if mask.len() != array.len() {
×
UNCOV
126
            vortex_bail!(
×
UNCOV
127
                "mask.len() is {}, does not equal array.len() of {}",
×
UNCOV
128
                mask.len(),
×
UNCOV
129
                array.len()
×
130
            );
UNCOV
131
        }
×
UNCOV
132
        Ok(mask.true_count())
×
UNCOV
133
    }
×
134

UNCOV
135
    fn is_elementwise(&self) -> bool {
×
UNCOV
136
        false
×
UNCOV
137
    }
×
138
}
139

140
struct FilterArgs<'a> {
141
    array: &'a dyn Array,
142
    mask: &'a Mask,
143
}
144

145
impl<'a> TryFrom<&InvocationArgs<'a>> for FilterArgs<'a> {
146
    type Error = VortexError;
147

UNCOV
148
    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
×
UNCOV
149
        if value.inputs.len() != 2 {
×
150
            vortex_bail!("Expected 2 inputs, found {}", value.inputs.len());
×
UNCOV
151
        }
×
UNCOV
152
        let array = value.inputs[0]
×
UNCOV
153
            .array()
×
UNCOV
154
            .ok_or_else(|| vortex_err!("Expected first input to be an array"))?;
×
UNCOV
155
        let mask = value.inputs[1]
×
UNCOV
156
            .mask()
×
UNCOV
157
            .ok_or_else(|| vortex_err!("Expected second input to be a mask"))?;
×
UNCOV
158
        Ok(Self { array, mask })
×
UNCOV
159
    }
×
160
}
161

162
/// A kernel that implements the filter function.
163
pub struct FilterKernelRef(pub ArcRef<dyn Kernel>);
164
inventory::collect!(FilterKernelRef);
165

166
pub trait FilterKernel: VTable {
167
    /// Filter an array by the provided predicate.
168
    ///
169
    /// Note that the entry-point filter functions handles `Mask::AllTrue` and `Mask::AllFalse`,
170
    /// leaving only `Mask::Values` to be handled by this function.
171
    fn filter(&self, array: &Self::Array, mask: &Mask) -> VortexResult<ArrayRef>;
172
}
173

174
/// Adapter to convert a [`FilterKernel`] into a [`Kernel`].
175
#[derive(Debug)]
176
pub struct FilterKernelAdapter<V: VTable>(pub V);
177

178
impl<V: VTable + FilterKernel> FilterKernelAdapter<V> {
179
    pub const fn lift(&'static self) -> FilterKernelRef {
×
180
        FilterKernelRef(ArcRef::new_ref(self))
×
181
    }
×
182
}
183

184
impl<V: VTable + FilterKernel> Kernel for FilterKernelAdapter<V> {
UNCOV
185
    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
×
UNCOV
186
        let inputs = FilterArgs::try_from(args)?;
×
UNCOV
187
        let Some(array) = inputs.array.as_opt::<V>() else {
×
UNCOV
188
            return Ok(None);
×
189
        };
UNCOV
190
        let filtered = V::filter(&self.0, array, inputs.mask)?;
×
UNCOV
191
        Ok(Some(filtered.into()))
×
UNCOV
192
    }
×
193
}
194

195
impl TryFrom<&BoolArray> for Mask {
196
    type Error = VortexError;
197

UNCOV
198
    fn try_from(array: &BoolArray) -> Result<Self, Self::Error> {
×
UNCOV
199
        if let Some(constant) = array.as_constant() {
×
UNCOV
200
            let bool_constant = constant.as_bool();
×
UNCOV
201
            if bool_constant.value().unwrap_or(false) {
×
UNCOV
202
                return Ok(Self::new_true(array.len()));
×
203
            } else {
UNCOV
204
                return Ok(Self::new_false(array.len()));
×
205
            }
UNCOV
206
        }
×
207

208
        // Extract a boolean buffer, treating null values to false
UNCOV
209
        let buffer = match array.validity_mask()? {
×
UNCOV
210
            Mask::AllTrue(_) => array.boolean_buffer().clone(),
×
211
            Mask::AllFalse(_) => return Ok(Self::new_false(array.len())),
×
UNCOV
212
            Mask::Values(validity) => validity.boolean_buffer().bitand(array.boolean_buffer()),
×
213
        };
214

UNCOV
215
        Ok(Self::from_buffer(buffer))
×
UNCOV
216
    }
×
217
}
218

219
impl TryFrom<&dyn Array> for Mask {
220
    type Error = VortexError;
221

222
    /// Converts from a possible nullable boolean array. Null values are treated as false.
UNCOV
223
    fn try_from(array: &dyn Array) -> Result<Self, Self::Error> {
×
UNCOV
224
        if !matches!(array.dtype(), DType::Bool(_)) {
×
225
            vortex_bail!("mask must be bool array, has dtype {}", array.dtype());
×
UNCOV
226
        }
×
227

228
        // Convert nulls to false first in case this can be done cheaply by the encoding.
UNCOV
229
        let array = fill_null(array, &Scalar::bool(false, array.dtype().nullability()))?;
×
230

UNCOV
231
        Self::try_from(&array.to_bool()?)
×
UNCOV
232
    }
×
233
}
234

UNCOV
235
pub fn arrow_filter_fn(array: &dyn Array, mask: &Mask) -> VortexResult<ArrayRef> {
×
UNCOV
236
    let values = match &mask {
×
UNCOV
237
        Mask::Values(values) => values,
×
238
        _ => unreachable!("check in filter invoke"),
×
239
    };
240

UNCOV
241
    let array_ref = array.to_array().into_arrow_preferred()?;
×
UNCOV
242
    let mask_array = BooleanArray::new(values.boolean_buffer().clone(), None);
×
UNCOV
243
    let filtered = arrow_select::filter::filter(array_ref.as_ref(), &mask_array)?;
×
244

UNCOV
245
    Ok(ArrayRef::from_arrow(
×
UNCOV
246
        filtered.as_ref(),
×
UNCOV
247
        array.dtype().is_nullable(),
×
UNCOV
248
    ))
×
UNCOV
249
}
×
250

251
#[cfg(test)]
252
mod test {
253
    use super::*;
254
    use crate::arrays::{BoolArray, PrimitiveArray};
255
    use crate::compute::filter::filter;
256

257
    #[test]
258
    fn test_filter() {
259
        let items =
260
            PrimitiveArray::from_option_iter([Some(0i32), None, Some(1i32), None, Some(2i32)])
261
                .into_array();
262
        let mask = Mask::try_from(&BoolArray::from_iter([true, false, true, false, true])).unwrap();
263

264
        let filtered = filter(&items, &mask).unwrap();
265
        assert_eq!(
266
            filtered.to_primitive().unwrap().as_slice::<i32>(),
267
            &[0i32, 1i32, 2i32]
268
        );
269
    }
270
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc