• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16620437478

30 Jul 2025 10:46AM UTC coverage: 82.754% (+0.06%) from 82.696%
16620437478

push

github

web-flow
varbinview zip kernel (#4054)

Uses the buffer deduplicating builder to construct the zipped array.
This guards against the pathological case where we are zipping two
varbinview arrays with a mask that has lots of contiguous slices. Each
`builder.extend_from_array(input.slice(..))` would duplicate the entire
buffers of `input`, and each slice in the mask would add the same
buffers to the result array over and over again.

---------

Signed-off-by: Onur Satici <onur@spiraldb.com>

79 of 82 new or added lines in 2 files covered. (96.34%)

2 existing lines in 1 file now uncovered.

45322 of 54767 relevant lines covered (82.75%)

184684.33 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.89
/vortex-array/src/compute/zip.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::sync::LazyLock;
5

6
use arcref::ArcRef;
7
use vortex_dtype::DType;
8
use vortex_error::{VortexError, VortexResult, vortex_bail, vortex_err};
9
use vortex_mask::{AllOr, Mask};
10

11
use super::{ComputeFnVTable, InvocationArgs, Output, cast};
12
use crate::builders::{ArrayBuilder, builder_with_capacity};
13
use crate::compute::{ComputeFn, Kernel};
14
use crate::vtable::VTable;
15
use crate::{Array, ArrayRef};
16

17
/// Performs element-wise conditional selection between two arrays based on a mask.
18
///
19
/// Returns a new array where `result[i] = if_true[i]` when `mask[i]` is true,
20
/// otherwise `result[i] = if_false[i]`.
21
pub fn zip(if_true: &dyn Array, if_false: &dyn Array, mask: &Mask) -> VortexResult<ArrayRef> {
115✔
22
    ZIP_FN
115✔
23
        .invoke(&InvocationArgs {
115✔
24
            inputs: &[if_true.into(), if_false.into(), mask.into()],
115✔
25
            options: &(),
115✔
26
        })?
115✔
27
        .unwrap_array()
77✔
28
}
115✔
29

30
pub static ZIP_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
115✔
31
    let compute = ComputeFn::new("zip".into(), ArcRef::new_ref(&Zip));
115✔
32
    for kernel in inventory::iter::<ZipKernelRef> {
230✔
33
        compute.register_kernel(kernel.0.clone());
115✔
34
    }
115✔
35
    compute
115✔
36
});
115✔
37

38
struct Zip;
39

40
impl ComputeFnVTable for Zip {
41
    fn invoke(
77✔
42
        &self,
77✔
43
        args: &InvocationArgs,
77✔
44
        kernels: &[ArcRef<dyn Kernel>],
77✔
45
    ) -> VortexResult<Output> {
77✔
46
        let ZipArgs {
47
            if_true,
77✔
48
            if_false,
77✔
49
            mask,
77✔
50
        } = ZipArgs::try_from(args)?;
77✔
51

52
        if mask.all_true() {
77✔
53
            return Ok(cast(if_true, &zip_return_dtype(if_true, if_false))?.into());
38✔
54
        }
39✔
55

56
        if mask.all_false() {
39✔
57
            return Ok(cast(if_false, &zip_return_dtype(if_true, if_false))?.into());
×
58
        }
39✔
59

60
        // check if if_true supports zip directly
61
        for kernel in kernels {
77✔
62
            if let Some(output) = kernel.invoke(args)? {
39✔
63
                return Ok(output);
1✔
64
            }
38✔
65
        }
66

67
        if let Some(output) = if_true.invoke(&ZIP_FN, args)? {
38✔
68
            return Ok(output);
×
69
        }
38✔
70

71
        // TODO(os): add invert_mask opt and check if if_false has a kernel like:
72
        //           kernel.invoke(Args(if_false, if_true, mask, invert_mask = true))
73

74
        Ok(zip_impl(
38✔
75
            if_true.to_canonical()?.as_ref(),
38✔
76
            if_false.to_canonical()?.as_ref(),
38✔
77
            mask,
38✔
78
        )?
×
79
        .into())
38✔
80
    }
77✔
81

82
    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
77✔
83
        let ZipArgs {
84
            if_true, if_false, ..
77✔
85
        } = ZipArgs::try_from(args)?;
77✔
86

87
        if !if_true.dtype().eq_ignore_nullability(if_false.dtype()) {
77✔
88
            vortex_bail!("input arrays to zip must have the same dtype");
×
89
        }
77✔
90
        Ok(zip_return_dtype(if_true, if_false))
77✔
91
    }
77✔
92

93
    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
77✔
94
        let ZipArgs { if_true, mask, .. } = ZipArgs::try_from(args)?;
77✔
95
        // ComputeFn::invoke asserts if_true.len() == if_false.len(), because zip is elementwise
96
        if if_true.len() != mask.len() {
77✔
NEW
97
            vortex_bail!("input arrays must have the same length as the mask");
×
98
        }
77✔
99
        Ok(if_true.len())
77✔
100
    }
77✔
101

102
    fn is_elementwise(&self) -> bool {
115✔
103
        true
115✔
104
    }
115✔
105
}
106

107
struct ZipArgs<'a> {
108
    if_true: &'a dyn Array,
109
    if_false: &'a dyn Array,
110
    mask: &'a Mask,
111
}
112

113
impl<'a> TryFrom<&InvocationArgs<'a>> for ZipArgs<'a> {
114
    type Error = VortexError;
115

116
    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
270✔
117
        if value.inputs.len() != 3 {
270✔
118
            vortex_bail!("Expected 3 inputs for zip, found {}", value.inputs.len());
×
119
        }
270✔
120
        let if_true = value.inputs[0]
270✔
121
            .array()
270✔
122
            .ok_or_else(|| vortex_err!("Expected input 0 to be an array"))?;
270✔
123

124
        let if_false = value.inputs[1]
270✔
125
            .array()
270✔
126
            .ok_or_else(|| vortex_err!("Expected input 1 to be an array"))?;
270✔
127

128
        let mask = value.inputs[2]
270✔
129
            .mask()
270✔
130
            .ok_or_else(|| vortex_err!("Expected input 2 to be a mask"))?;
270✔
131

132
        Ok(Self {
270✔
133
            if_true,
270✔
134
            if_false,
270✔
135
            mask,
270✔
136
        })
270✔
137
    }
270✔
138
}
139

140
pub trait ZipKernel: VTable {
141
    fn zip(
142
        &self,
143
        if_true: &Self::Array,
144
        if_false: &dyn Array,
145
        mask: &Mask,
146
    ) -> VortexResult<Option<ArrayRef>>;
147
}
148

149
pub struct ZipKernelRef(pub ArcRef<dyn Kernel>);
150
inventory::collect!(ZipKernelRef);
151

152
#[derive(Debug)]
153
pub struct ZipKernelAdapter<V: VTable>(pub V);
154

155
impl<V: VTable + ZipKernel> ZipKernelAdapter<V> {
156
    pub const fn lift(&'static self) -> ZipKernelRef {
×
157
        ZipKernelRef(ArcRef::new_ref(self))
×
158
    }
×
159
}
160

161
impl<V: VTable + ZipKernel> Kernel for ZipKernelAdapter<V> {
162
    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
39✔
163
        let ZipArgs {
164
            if_true,
39✔
165
            if_false,
39✔
166
            mask,
39✔
167
        } = ZipArgs::try_from(args)?;
39✔
168
        let Some(if_true) = if_true.as_opt::<V>() else {
39✔
169
            return Ok(None);
38✔
170
        };
171
        Ok(V::zip(&self.0, if_true, if_false, mask)?.map(Into::into))
1✔
172
    }
39✔
173
}
174

175
pub(crate) fn zip_return_dtype(if_true: &dyn Array, if_false: &dyn Array) -> DType {
154✔
176
    if_true
154✔
177
        .dtype()
154✔
178
        .union_nullability(if_false.dtype().nullability())
154✔
179
}
154✔
180

181
fn zip_impl(if_true: &dyn Array, if_false: &dyn Array, mask: &Mask) -> VortexResult<ArrayRef> {
38✔
182
    // if_true.len() == if_false.len() from ComputeFn::invoke
183
    let builder = builder_with_capacity(&zip_return_dtype(if_true, if_false), if_true.len());
38✔
184
    zip_impl_with_builder(if_true, if_false, mask, builder)
38✔
185
}
38✔
186

187
pub(crate) fn zip_impl_with_builder(
39✔
188
    if_true: &dyn Array,
39✔
189
    if_false: &dyn Array,
39✔
190
    mask: &Mask,
39✔
191
    mut builder: Box<dyn ArrayBuilder>,
39✔
192
) -> VortexResult<ArrayRef> {
39✔
193
    match mask.slices() {
39✔
194
        AllOr::All => Ok(if_true.to_array()),
×
195
        AllOr::None => Ok(if_false.to_array()),
×
196
        AllOr::Some(slices) => {
39✔
197
            for (start, end) in slices {
148✔
198
                builder.extend_from_array(&if_false.slice(builder.len(), *start)?)?;
109✔
199
                builder.extend_from_array(&if_true.slice(*start, *end)?)?;
109✔
200
            }
201
            if builder.len() < if_false.len() {
39✔
202
                builder.extend_from_array(&if_false.slice(builder.len(), if_false.len())?)?;
39✔
203
            }
×
204
            Ok(builder.finish())
39✔
205
        }
206
    }
207
}
39✔
208

209
#[cfg(test)]
210
mod tests {
211
    use vortex_array::arrays::{BoolArray, PrimitiveArray};
212
    use vortex_array::compute::zip;
213
    use vortex_array::{IntoArray, ToCanonical};
214
    use vortex_mask::Mask;
215

216
    #[test]
217
    fn test_zip_basic() {
1✔
218
        let mask =
1✔
219
            Mask::try_from(&BoolArray::from_iter([true, false, false, true, false])).unwrap();
1✔
220
        let if_true = PrimitiveArray::from_iter([10, 20, 30, 40, 50]).into_array();
1✔
221
        let if_false = PrimitiveArray::from_iter([1, 2, 3, 4, 5]).into_array();
1✔
222

223
        let result = zip(&if_true, &if_false, &mask).unwrap();
1✔
224
        let expected = PrimitiveArray::from_iter([10, 2, 3, 40, 5]);
1✔
225

226
        assert_eq!(
1✔
227
            result.to_primitive().unwrap().as_slice::<i32>(),
1✔
228
            expected.as_slice::<i32>()
1✔
229
        );
230
    }
1✔
231

232
    #[test]
233
    fn test_zip_all_true() {
1✔
234
        let mask = Mask::new_true(4);
1✔
235
        let if_true = PrimitiveArray::from_iter([10, 20, 30, 40]).into_array();
1✔
236
        let if_false =
1✔
237
            PrimitiveArray::from_option_iter([Some(1), Some(2), Some(3), None]).into_array();
1✔
238

239
        let result = zip(&if_true, &if_false, &mask).unwrap();
1✔
240

241
        assert_eq!(
1✔
242
            result.to_primitive().unwrap().as_slice::<i32>(),
1✔
243
            if_true.to_primitive().unwrap().as_slice::<i32>()
1✔
244
        );
245

246
        // result must be nullable even if_true was not
247
        assert_eq!(result.dtype(), if_false.dtype())
1✔
248
    }
1✔
249

250
    #[test]
251
    #[should_panic]
252
    fn test_invalid_lengths() {
1✔
253
        let mask = Mask::new_false(4);
1✔
254
        let if_true = PrimitiveArray::from_iter([10, 20, 30]).into_array();
1✔
255
        let if_false = PrimitiveArray::from_iter([1, 2, 3, 4]).into_array();
1✔
256

257
        zip(&if_true, &if_false, &mask).unwrap();
1✔
258
    }
1✔
259
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc