• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16204612549

10 Jul 2025 07:50PM UTC coverage: 81.152% (+2.9%) from 78.263%
16204612549

Pull #3825

github

web-flow
Merge d0d2717da into be9c2fd3e
Pull Request #3825: feat: Add optimize ArrayOp with VBView implementation

178 of 211 new or added lines in 4 files covered. (84.36%)

330 existing lines in 34 files now uncovered.

45433 of 55985 relevant lines covered (81.15%)

145951.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.66
/vortex-expr/src/exprs/merge.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::Display;
5
use std::hash::Hash;
6

7
use itertools::Itertools as _;
8
use vortex_array::arrays::StructArray;
9
use vortex_array::validity::Validity;
10
use vortex_array::{Array, ArrayRef, DeserializeMetadata, EmptyMetadata, IntoArray, ToCanonical};
11
use vortex_dtype::{DType, FieldNames, Nullability, StructFields};
12
use vortex_error::{VortexExpect as _, VortexResult, vortex_bail};
13

14
use crate::{AnalysisExpr, ExprEncodingRef, ExprId, ExprRef, IntoExpr, Scope, VTable, vtable};
15

16
vtable!(Merge);
17

18
/// Merge zero or more expressions that ALL return structs.
19
///
20
/// If any field names are duplicated, the field from later expressions wins.
21
///
22
/// NOTE: Fields are not recursively merged, i.e. the later field REPLACES the earlier field.
23
/// This makes struct fields behaviour consistent with other dtypes.
24
#[allow(clippy::derived_hash_with_manual_eq)]
25
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
26
pub struct MergeExpr {
27
    values: Vec<ExprRef>,
28
    nullability: Nullability,
29
}
30

31
pub struct MergeExprEncoding;
32

33
impl VTable for MergeVTable {
34
    type Expr = MergeExpr;
35
    type Encoding = MergeExprEncoding;
36
    type Metadata = EmptyMetadata;
37

38
    fn id(_encoding: &Self::Encoding) -> ExprId {
123✔
39
        ExprId::new_ref("merge")
123✔
40
    }
123✔
41

UNCOV
42
    fn encoding(_expr: &Self::Expr) -> ExprEncodingRef {
×
UNCOV
43
        ExprEncodingRef::new_ref(MergeExprEncoding.as_ref())
×
44
    }
×
45

46
    fn metadata(_expr: &Self::Expr) -> Option<Self::Metadata> {
×
UNCOV
47
        Some(EmptyMetadata)
×
48
    }
×
49

50
    fn children(expr: &Self::Expr) -> Vec<&ExprRef> {
26✔
51
        expr.values.iter().collect()
26✔
52
    }
26✔
53

54
    fn with_children(expr: &Self::Expr, children: Vec<ExprRef>) -> VortexResult<Self::Expr> {
1✔
55
        Ok(MergeExpr {
1✔
56
            values: children,
1✔
57
            nullability: expr.nullability,
1✔
58
        })
1✔
59
    }
1✔
60

UNCOV
61
    fn build(
×
UNCOV
62
        _encoding: &Self::Encoding,
×
63
        _metadata: &<Self::Metadata as DeserializeMetadata>::Output,
×
64
        children: Vec<ExprRef>,
×
65
    ) -> VortexResult<Self::Expr> {
×
66
        if children.is_empty() {
×
67
            vortex_bail!(
×
68
                "Merge expression must have at least one child, got: {:?}",
×
69
                children
×
70
            );
×
71
        }
×
72
        Ok(MergeExpr {
×
73
            values: children,
×
74
            nullability: Nullability::NonNullable, // Default to non-nullable
×
75
        })
×
76
    }
×
77

78
    fn evaluate(expr: &Self::Expr, scope: &Scope) -> VortexResult<ArrayRef> {
5✔
79
        let len = scope.len();
5✔
80
        let value_arrays = expr
5✔
81
            .values
5✔
82
            .iter()
5✔
83
            .map(|value_expr| value_expr.unchecked_evaluate(scope))
8✔
84
            .process_results(|it| it.collect::<Vec<_>>())?;
5✔
85

86
        // Collect fields in order of appearance. Later fields overwrite earlier fields.
87
        let mut field_names = Vec::new();
5✔
88
        let mut arrays = Vec::new();
5✔
89

90
        for value_array in value_arrays.iter() {
8✔
91
            // TODO(marko): When nullable, we need to merge struct validity into field validity.
92
            if value_array.dtype().is_nullable() {
8✔
UNCOV
93
                todo!("merge nullable structs");
×
94
            }
8✔
95
            if !value_array.dtype().is_struct() {
8✔
UNCOV
96
                vortex_bail!("merge expects non-nullable struct input");
×
97
            }
8✔
98

99
            let struct_array = value_array.to_struct()?;
8✔
100

101
            for (i, field_name) in struct_array.names().iter().enumerate() {
14✔
102
                let array = struct_array.fields()[i].clone();
14✔
103

104
                // Update or insert field.
105
                if let Some(idx) = field_names.iter().position(|name| name == field_name) {
20✔
106
                    arrays[idx] = array;
2✔
107
                } else {
12✔
108
                    field_names.push(field_name.clone());
12✔
109
                    arrays.push(array);
12✔
110
                }
12✔
111
            }
112
        }
113

114
        let validity = match expr.nullability {
5✔
115
            Nullability::NonNullable => Validity::NonNullable,
4✔
116
            Nullability::Nullable => Validity::AllValid,
1✔
117
        };
118
        Ok(
119
            StructArray::try_new(FieldNames::from(field_names), arrays, len, validity)?
5✔
120
                .into_array(),
5✔
121
        )
122
    }
5✔
123

124
    fn return_dtype(expr: &Self::Expr, scope: &DType) -> VortexResult<DType> {
5✔
125
        let mut field_names = Vec::new();
5✔
126
        let mut arrays = Vec::new();
5✔
127

128
        for value in expr.values.iter() {
8✔
129
            let dtype = value.return_dtype(scope)?;
8✔
130
            if !dtype.is_struct() {
8✔
UNCOV
131
                vortex_bail!("merge expects non-nullable struct input");
×
132
            }
8✔
133

8✔
134
            let struct_dtype = dtype
8✔
135
                .as_struct()
8✔
136
                .vortex_expect("merge expects struct input");
8✔
137

138
            for i in 0..struct_dtype.nfields() {
14✔
139
                let field_name = struct_dtype.field_name(i).vortex_expect("never OOB");
14✔
140
                let field_dtype = struct_dtype.field_by_index(i).vortex_expect("never OOB");
14✔
141
                if let Some(idx) = field_names.iter().position(|name| name == field_name) {
20✔
142
                    arrays[idx] = field_dtype;
2✔
143
                } else {
12✔
144
                    field_names.push(field_name.clone());
12✔
145
                    arrays.push(field_dtype);
12✔
146
                }
12✔
147
            }
148
        }
149

150
        Ok(DType::Struct(
5✔
151
            StructFields::new(FieldNames::from(field_names), arrays),
5✔
152
            expr.nullability,
5✔
153
        ))
5✔
154
    }
5✔
155
}
156

157
impl MergeExpr {
158
    pub fn new(values: Vec<ExprRef>, nullability: Nullability) -> Self {
8✔
159
        MergeExpr {
8✔
160
            values,
8✔
161
            nullability,
8✔
162
        }
8✔
163
    }
8✔
164

165
    pub fn nullability(&self) -> Nullability {
3✔
166
        self.nullability
3✔
167
    }
3✔
168
}
169

170
pub fn merge(
3✔
171
    elements: impl IntoIterator<Item = impl Into<ExprRef>>,
3✔
172
    nullability: Nullability,
3✔
173
) -> ExprRef {
3✔
174
    let values = elements.into_iter().map(|value| value.into()).collect_vec();
5✔
175
    MergeExpr::new(values, nullability).into_expr()
3✔
176
}
3✔
177

178
impl Display for MergeExpr {
UNCOV
179
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
×
UNCOV
180
        write!(
×
181
            f,
×
182
            "merge({}){}",
×
183
            self.values.iter().format(", "),
×
184
            self.nullability
×
185
        )
×
186
    }
×
187
}
188

189
impl AnalysisExpr for MergeExpr {}
190

191
#[cfg(test)]
192
mod tests {
193
    use vortex_array::arrays::{PrimitiveArray, StructArray};
194
    use vortex_array::{Array, IntoArray, ToCanonical};
195
    use vortex_buffer::buffer;
196
    use vortex_dtype::Nullability;
197
    use vortex_error::{VortexResult, vortex_bail};
198

199
    use crate::{MergeExpr, Scope, get_item, root};
200

201
    fn primitive_field(array: &dyn Array, field_path: &[&str]) -> VortexResult<PrimitiveArray> {
5✔
202
        let mut field_path = field_path.iter();
5✔
203

204
        let Some(field) = field_path.next() else {
5✔
205
            vortex_bail!("empty field path");
206
        };
207

208
        let mut array = array.to_struct()?.field_by_name(field)?.clone();
5✔
209
        for field in field_path {
5✔
210
            array = array.to_struct()?.field_by_name(field)?.clone();
211
        }
212
        array.to_primitive()
5✔
213
    }
5✔
214

215
    #[test]
216
    pub fn test_merge() {
1✔
217
        let expr = MergeExpr::new(
1✔
218
            vec![
1✔
219
                get_item("0", root()),
1✔
220
                get_item("1", root()),
1✔
221
                get_item("2", root()),
1✔
222
            ],
1✔
223
            Nullability::NonNullable,
1✔
224
        );
1✔
225

1✔
226
        let test_array = StructArray::from_fields(&[
1✔
227
            (
1✔
228
                "0",
1✔
229
                StructArray::from_fields(&[
1✔
230
                    ("a", buffer![0, 0, 0].into_array()),
1✔
231
                    ("b", buffer![1, 1, 1].into_array()),
1✔
232
                ])
1✔
233
                .unwrap()
1✔
234
                .into_array(),
1✔
235
            ),
1✔
236
            (
1✔
237
                "1",
1✔
238
                StructArray::from_fields(&[
1✔
239
                    ("b", buffer![2, 2, 2].into_array()),
1✔
240
                    ("c", buffer![3, 3, 3].into_array()),
1✔
241
                ])
1✔
242
                .unwrap()
1✔
243
                .into_array(),
1✔
244
            ),
1✔
245
            (
1✔
246
                "2",
1✔
247
                StructArray::from_fields(&[
1✔
248
                    ("d", buffer![4, 4, 4].into_array()),
1✔
249
                    ("e", buffer![5, 5, 5].into_array()),
1✔
250
                ])
1✔
251
                .unwrap()
1✔
252
                .into_array(),
1✔
253
            ),
1✔
254
        ])
1✔
255
        .unwrap()
1✔
256
        .into_array();
1✔
257
        let actual_array = expr.evaluate(&Scope::new(test_array)).unwrap();
1✔
258

1✔
259
        assert_eq!(
1✔
260
            actual_array.as_struct_typed().names(),
1✔
261
            &["a", "b", "c", "d", "e"].into()
1✔
262
        );
1✔
263

264
        assert_eq!(
1✔
265
            primitive_field(&actual_array, &["a"])
1✔
266
                .unwrap()
1✔
267
                .as_slice::<i32>(),
1✔
268
            [0, 0, 0]
1✔
269
        );
1✔
270
        assert_eq!(
1✔
271
            primitive_field(&actual_array, &["b"])
1✔
272
                .unwrap()
1✔
273
                .as_slice::<i32>(),
1✔
274
            [2, 2, 2]
1✔
275
        );
1✔
276
        assert_eq!(
1✔
277
            primitive_field(&actual_array, &["c"])
1✔
278
                .unwrap()
1✔
279
                .as_slice::<i32>(),
1✔
280
            [3, 3, 3]
1✔
281
        );
1✔
282
        assert_eq!(
1✔
283
            primitive_field(&actual_array, &["d"])
1✔
284
                .unwrap()
1✔
285
                .as_slice::<i32>(),
1✔
286
            [4, 4, 4]
1✔
287
        );
1✔
288
        assert_eq!(
1✔
289
            primitive_field(&actual_array, &["e"])
1✔
290
                .unwrap()
1✔
291
                .as_slice::<i32>(),
1✔
292
            [5, 5, 5]
1✔
293
        );
1✔
294
    }
1✔
295

296
    #[test]
297
    pub fn test_empty_merge() {
1✔
298
        let expr = MergeExpr::new(Vec::new(), Nullability::NonNullable);
1✔
299

1✔
300
        let test_array = StructArray::from_fields(&[("a", buffer![0, 1, 2].into_array())])
1✔
301
            .unwrap()
1✔
302
            .into_array();
1✔
303
        let actual_array = expr.evaluate(&Scope::new(test_array.clone())).unwrap();
1✔
304
        assert_eq!(actual_array.len(), test_array.len());
1✔
305
        assert_eq!(actual_array.as_struct_typed().nfields(), 0);
1✔
306
    }
1✔
307

308
    #[test]
309
    pub fn test_nested_merge() {
1✔
310
        // Nested structs are not merged!
1✔
311

1✔
312
        let expr = MergeExpr::new(
1✔
313
            vec![get_item("0", root()), get_item("1", root())],
1✔
314
            Nullability::NonNullable,
1✔
315
        );
1✔
316

1✔
317
        let test_array = StructArray::from_fields(&[
1✔
318
            (
1✔
319
                "0",
1✔
320
                StructArray::from_fields(&[(
1✔
321
                    "a",
1✔
322
                    StructArray::from_fields(&[
1✔
323
                        ("x", buffer![0, 0, 0].into_array()),
1✔
324
                        ("y", buffer![1, 1, 1].into_array()),
1✔
325
                    ])
1✔
326
                    .unwrap()
1✔
327
                    .into_array(),
1✔
328
                )])
1✔
329
                .unwrap()
1✔
330
                .into_array(),
1✔
331
            ),
1✔
332
            (
1✔
333
                "1",
1✔
334
                StructArray::from_fields(&[(
1✔
335
                    "a",
1✔
336
                    StructArray::from_fields(&[("x", buffer![0, 0, 0].into_array())])
1✔
337
                        .unwrap()
1✔
338
                        .into_array(),
1✔
339
                )])
1✔
340
                .unwrap()
1✔
341
                .into_array(),
1✔
342
            ),
1✔
343
        ])
1✔
344
        .unwrap()
1✔
345
        .into_array();
1✔
346
        let actual_array = expr
1✔
347
            .evaluate(&Scope::new(test_array.clone()))
1✔
348
            .unwrap()
1✔
349
            .to_struct()
1✔
350
            .unwrap();
1✔
351

1✔
352
        assert_eq!(
1✔
353
            actual_array
1✔
354
                .field_by_name("a")
1✔
355
                .unwrap()
1✔
356
                .to_struct()
1✔
357
                .unwrap()
1✔
358
                .names()
1✔
359
                .iter()
1✔
360
                .map(|name| name.as_ref())
1✔
361
                .collect::<Vec<_>>(),
1✔
362
            vec!["x"]
1✔
363
        );
1✔
364
    }
1✔
365

366
    #[test]
367
    pub fn test_merge_order() {
1✔
368
        let expr = MergeExpr::new(
1✔
369
            vec![get_item("0", root()), get_item("1", root())],
1✔
370
            Nullability::NonNullable,
1✔
371
        );
1✔
372

1✔
373
        let test_array = StructArray::from_fields(&[
1✔
374
            (
1✔
375
                "0",
1✔
376
                StructArray::from_fields(&[
1✔
377
                    ("a", buffer![0, 0, 0].into_array()),
1✔
378
                    ("c", buffer![1, 1, 1].into_array()),
1✔
379
                ])
1✔
380
                .unwrap()
1✔
381
                .into_array(),
1✔
382
            ),
1✔
383
            (
1✔
384
                "1",
1✔
385
                StructArray::from_fields(&[
1✔
386
                    ("b", buffer![2, 2, 2].into_array()),
1✔
387
                    ("d", buffer![3, 3, 3].into_array()),
1✔
388
                ])
1✔
389
                .unwrap()
1✔
390
                .into_array(),
1✔
391
            ),
1✔
392
        ])
1✔
393
        .unwrap()
1✔
394
        .into_array();
1✔
395
        let actual_array = expr
1✔
396
            .evaluate(&Scope::new(test_array.clone()))
1✔
397
            .unwrap()
1✔
398
            .to_struct()
1✔
399
            .unwrap();
1✔
400

1✔
401
        assert_eq!(actual_array.names(), &["a", "c", "b", "d"].into());
1✔
402
    }
1✔
403

404
    #[test]
405
    pub fn test_merge_nullable() {
1✔
406
        let expr = MergeExpr::new(vec![get_item("0", root())], Nullability::Nullable);
1✔
407

1✔
408
        let test_array = StructArray::from_fields(&[(
1✔
409
            "0",
1✔
410
            StructArray::from_fields(&[
1✔
411
                ("a", buffer![0, 0, 0].into_array()),
1✔
412
                ("b", buffer![1, 1, 1].into_array()),
1✔
413
            ])
1✔
414
            .unwrap()
1✔
415
            .into_array(),
1✔
416
        )])
1✔
417
        .unwrap()
1✔
418
        .into_array();
1✔
419
        let actual_array = expr.evaluate(&Scope::new(test_array.clone())).unwrap();
1✔
420
        assert!(actual_array.dtype().is_nullable());
1✔
421
    }
1✔
422
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc