• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16980200873

15 Aug 2025 12:53AM UTC coverage: 49.805%. First build
16980200873

Pull #2456

github

web-flow
Merge aff477380 into aaf3e36ad
Pull Request #2456: feat: basic BoolBuffer / BoolBufferMut

574 of 1074 new or added lines in 84 files covered. (53.45%)

20158 of 40474 relevant lines covered (49.8%)

238516.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.17
/vortex-array/src/compute/filter.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::ops::BitAnd;
5
use std::sync::LazyLock;
6

7
use arcref::ArcRef;
8
use arrow_array::BooleanArray;
9
use vortex_dtype::DType;
10
use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
11
use vortex_mask::Mask;
12
use vortex_scalar::Scalar;
13

14
use crate::arrays::{BoolArray, ConstantArray};
15
use crate::arrow::{FromArrowArray, IntoArrowArray};
16
use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output, fill_null};
17
use crate::vtable::VTable;
18
use crate::{Array, ArrayRef, Canonical, IntoArray, ToCanonical};
19

20
/// The filter [`ComputeFn`].
21
static FILTER_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
3✔
22
    let compute = ComputeFn::new("filter".into(), ArcRef::new_ref(&Filter));
3✔
23
    for kernel in inventory::iter::<FilterKernelRef> {
69✔
24
        compute.register_kernel(kernel.0.clone());
66✔
25
    }
66✔
26
    compute
3✔
27
});
3✔
28

29
/// Keep only the elements for which the corresponding mask value is true.
30
///
31
/// # Examples
32
///
33
/// ```
34
/// use vortex_array::{Array, IntoArray};
35
/// use vortex_array::arrays::{BoolArray, PrimitiveArray};
36
/// use vortex_array::compute::{ filter, mask};
37
/// use vortex_mask::Mask;
38
/// use vortex_scalar::Scalar;
39
///
40
/// let array =
41
///     PrimitiveArray::from_option_iter([Some(0i32), None, Some(1i32), None, Some(2i32)]);
42
/// let mask = Mask::try_from(
43
///     &BoolArray::from_iter([true, false, false, false, true]),
44
/// )
45
/// .unwrap();
46
///
47
/// let filtered = filter(array.as_ref(), &mask).unwrap();
48
/// assert_eq!(filtered.len(), 2);
49
/// assert_eq!(filtered.scalar_at(0).unwrap(), Scalar::from(Some(0_i32)));
50
/// assert_eq!(filtered.scalar_at(1).unwrap(), Scalar::from(Some(2_i32)));
51
/// ```
52
///
53
/// # Panics
54
///
55
/// The `predicate` must receive an Array with type non-nullable bool, and will panic if this is
56
/// not the case.
57
pub fn filter(array: &dyn Array, mask: &Mask) -> VortexResult<ArrayRef> {
13,929✔
58
    FILTER_FN
13,929✔
59
        .invoke(&InvocationArgs {
13,929✔
60
            inputs: &[array.into(), mask.into()],
13,929✔
61
            options: &(),
13,929✔
62
        })?
13,929✔
63
        .unwrap_array()
13,929✔
64
}
13,929✔
65

66
struct Filter;
67

68
impl ComputeFnVTable for Filter {
69
    fn invoke(
13,929✔
70
        &self,
13,929✔
71
        args: &InvocationArgs,
13,929✔
72
        kernels: &[ArcRef<dyn Kernel>],
13,929✔
73
    ) -> VortexResult<Output> {
13,929✔
74
        let FilterArgs { array, mask } = FilterArgs::try_from(args)?;
13,929✔
75

76
        let true_count = mask.true_count();
13,929✔
77

78
        // Fast-path for empty mask.
79
        if true_count == 0 {
13,929✔
80
            return Ok(Canonical::empty(array.dtype()).into_array().into());
×
81
        }
13,929✔
82

83
        // Fast-path for full mask
84
        if true_count == mask.len() {
13,929✔
85
            return Ok(array.to_array().into());
×
86
        }
13,929✔
87

88
        for kernel in kernels {
164,568✔
89
            if let Some(output) = kernel.invoke(args)? {
164,409✔
90
                return Ok(output);
13,770✔
91
            }
150,639✔
92
        }
93
        if let Some(output) = array.invoke(&FILTER_FN, args)? {
159✔
94
            return Ok(output);
×
95
        }
159✔
96

97
        // Otherwise, we can use scalar_at if the mask has length 1.
98
        if mask.true_count() == 1 {
159✔
99
            let idx = mask.first().vortex_expect("true_count == 1");
54✔
100
            return Ok(ConstantArray::new(array.scalar_at(idx)?, 1)
54✔
101
                .into_array()
54✔
102
                .into());
54✔
103
        }
105✔
104

105
        // Fallback: implement using Arrow kernels.
106
        log::debug!("No filter implementation found for {}", array.encoding_id(),);
105✔
107

108
        if !array.is_canonical() {
105✔
109
            let canonical = array.to_canonical()?.into_array();
105✔
110
            return filter(&canonical, mask).map(Into::into);
105✔
111
        };
×
112

113
        vortex_bail!(
×
114
            "No filter implementation found for array {}",
×
115
            array.encoding()
×
116
        )
117
    }
13,929✔
118

119
    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
13,929✔
120
        Ok(FilterArgs::try_from(args)?.array.dtype().clone())
13,929✔
121
    }
13,929✔
122

123
    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
13,929✔
124
        let FilterArgs { array, mask } = FilterArgs::try_from(args)?;
13,929✔
125
        if mask.len() != array.len() {
13,929✔
126
            vortex_bail!(
×
127
                "mask.len() is {}, does not equal array.len() of {}",
×
128
                mask.len(),
×
129
                array.len()
×
130
            );
131
        }
13,929✔
132
        Ok(mask.true_count())
13,929✔
133
    }
13,929✔
134

135
    fn is_elementwise(&self) -> bool {
13,929✔
136
        false
13,929✔
137
    }
13,929✔
138
}
139

140
struct FilterArgs<'a> {
141
    array: &'a dyn Array,
142
    mask: &'a Mask,
143
}
144

145
impl<'a> TryFrom<&InvocationArgs<'a>> for FilterArgs<'a> {
146
    type Error = VortexError;
147

148
    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
206,196✔
149
        if value.inputs.len() != 2 {
206,196✔
150
            vortex_bail!("Expected 2 inputs, found {}", value.inputs.len());
×
151
        }
206,196✔
152
        let array = value.inputs[0]
206,196✔
153
            .array()
206,196✔
154
            .ok_or_else(|| vortex_err!("Expected first input to be an array"))?;
206,196✔
155
        let mask = value.inputs[1]
206,196✔
156
            .mask()
206,196✔
157
            .ok_or_else(|| vortex_err!("Expected second input to be a mask"))?;
206,196✔
158
        Ok(Self { array, mask })
206,196✔
159
    }
206,196✔
160
}
161

162
/// A kernel that implements the filter function.
163
pub struct FilterKernelRef(pub ArcRef<dyn Kernel>);
164
inventory::collect!(FilterKernelRef);
165

166
pub trait FilterKernel: VTable {
167
    /// Filter an array by the provided predicate.
168
    ///
169
    /// Note that the entry-point filter functions handles `Mask::AllTrue` and `Mask::AllFalse`,
170
    /// leaving only `Mask::Values` to be handled by this function.
171
    fn filter(&self, array: &Self::Array, mask: &Mask) -> VortexResult<ArrayRef>;
172
}
173

174
/// Adapter to convert a [`FilterKernel`] into a [`Kernel`].
175
#[derive(Debug)]
176
pub struct FilterKernelAdapter<V: VTable>(pub V);
177

178
impl<V: VTable + FilterKernel> FilterKernelAdapter<V> {
179
    pub const fn lift(&'static self) -> FilterKernelRef {
×
180
        FilterKernelRef(ArcRef::new_ref(self))
×
181
    }
×
182
}
183

184
impl<V: VTable + FilterKernel> Kernel for FilterKernelAdapter<V> {
185
    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
164,409✔
186
        let inputs = FilterArgs::try_from(args)?;
164,409✔
187
        let Some(array) = inputs.array.as_opt::<V>() else {
164,409✔
188
            return Ok(None);
150,639✔
189
        };
190
        let filtered = V::filter(&self.0, array, inputs.mask)?;
13,770✔
191
        Ok(Some(filtered.into()))
13,770✔
192
    }
164,409✔
193
}
194

195
impl TryFrom<&BoolArray> for Mask {
196
    type Error = VortexError;
197

198
    fn try_from(array: &BoolArray) -> Result<Self, Self::Error> {
2,754✔
199
        if let Some(constant) = array.as_constant() {
2,754✔
200
            let bool_constant = constant.as_bool();
804✔
201
            return if bool_constant.value().unwrap_or(false) {
804✔
202
                Ok(Self::new_true(array.len()))
588✔
203
            } else {
204
                Ok(Self::new_false(array.len()))
216✔
205
            };
206
        }
1,950✔
207

208
        // Extract a boolean buffer, treating null values to false
209
        let buffer = match array.validity_mask()? {
1,950✔
210
            Mask::AllTrue(_) => array.bit_buffer().clone(),
1,950✔
211
            Mask::AllFalse(_) => return Ok(Self::new_false(array.len())),
×
NEW
212
            Mask::Values(validity) => validity.bit_buffer().bitand(array.bit_buffer()),
×
213
        };
214

215
        Ok(Self::from_buffer(buffer))
1,950✔
216
    }
2,754✔
217
}
218

219
impl TryFrom<&dyn Array> for Mask {
220
    type Error = VortexError;
221

222
    /// Converts from a possible nullable boolean array. Null values are treated as false.
223
    fn try_from(array: &dyn Array) -> Result<Self, Self::Error> {
2,754✔
224
        if !matches!(array.dtype(), DType::Bool(_)) {
2,754✔
225
            vortex_bail!("mask must be bool array, has dtype {}", array.dtype());
×
226
        }
2,754✔
227

228
        // Convert nulls to false first in case this can be done cheaply by the encoding.
229
        let array = fill_null(array, &Scalar::bool(false, array.dtype().nullability()))?;
2,754✔
230

231
        Self::try_from(&array.to_bool()?)
2,754✔
232
    }
2,754✔
233
}
234

235
pub fn arrow_filter_fn(array: &dyn Array, mask: &Mask) -> VortexResult<ArrayRef> {
18✔
236
    let values = match &mask {
18✔
237
        Mask::Values(values) => values,
18✔
238
        _ => unreachable!("check in filter invoke"),
×
239
    };
240

241
    let array_ref = array.to_array().into_arrow_preferred()?;
18✔
242
    let mask_array = BooleanArray::new(values.bit_buffer().clone().into(), None);
18✔
243
    let filtered = arrow_select::filter::filter(array_ref.as_ref(), &mask_array)?;
18✔
244

245
    Ok(ArrayRef::from_arrow(
18✔
246
        filtered.as_ref(),
18✔
247
        array.dtype().is_nullable(),
18✔
248
    ))
18✔
249
}
18✔
250

251
#[cfg(test)]
252
mod test {
253
    use super::*;
254
    use crate::arrays::{BoolArray, PrimitiveArray};
255
    use crate::compute::filter::filter;
256

257
    #[test]
258
    fn test_filter() {
259
        let items =
260
            PrimitiveArray::from_option_iter([Some(0i32), None, Some(1i32), None, Some(2i32)])
261
                .into_array();
262
        let mask = Mask::try_from(&BoolArray::from_iter([true, false, true, false, true])).unwrap();
263

264
        let filtered = filter(&items, &mask).unwrap();
265
        assert_eq!(
266
            filtered.to_primitive().unwrap().as_slice::<i32>(),
267
            &[0i32, 1i32, 2i32]
268
        );
269
    }
270
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc