• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16139349253

08 Jul 2025 09:26AM UTC coverage: 78.057% (-0.2%) from 78.253%
16139349253

push

github

web-flow
VortexExpr VTables (#3713)

Adds the same vtable machinery as arrays and layouts already use. It
uses the "Encoding" naming scheme from arrays and layouts. I don't
particularly like it, but it's consistent. Open to renames later.

Further, adds an expression registry to the Vortex session that will be
used for deserialization.

Expressions only decide their "options" serialization. So in theory, can
support many container formats, not just proto, provided each expression
can deserialize their own options format.

---------

Signed-off-by: Nicholas Gates <nick@nickgates.com>

800 of 1190 new or added lines in 38 files covered. (67.23%)

40 existing lines in 13 files now uncovered.

44100 of 56497 relevant lines covered (78.06%)

54989.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.91
/vortex-expr/src/exprs/pack.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::Display;
5
use std::hash::Hash;
6

7
use itertools::Itertools as _;
8
use vortex_array::arrays::StructArray;
9
use vortex_array::validity::Validity;
10
use vortex_array::{ArrayRef, DeserializeMetadata, IntoArray, ProstMetadata};
11
use vortex_dtype::{DType, FieldName, FieldNames, Nullability, StructFields};
12
use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_err};
13
use vortex_proto::expr as pb;
14

15
use crate::{
16
    AnalysisExpr, ExprEncodingRef, ExprId, ExprRef, IntoExpr, Scope, ScopeDType, VTable, vtable,
17
};
18

19
vtable!(Pack);
20

21
/// Pack zero or more expressions into a structure with named fields.
22
///
23
/// # Examples
24
///
25
/// ```
26
/// use vortex_array::{IntoArray, ToCanonical};
27
/// use vortex_buffer::buffer;
28
/// use vortex_expr::{root, PackExpr, Scope, VortexExpr};
29
/// use vortex_scalar::Scalar;
30
/// use vortex_dtype::Nullability;
31
///
32
/// let example = PackExpr::try_new(
33
///     ["x", "x copy", "second x copy"].into(),
34
///     vec![root(), root(), root()],
35
///     Nullability::NonNullable,
36
/// ).unwrap();
37
/// let packed = example.evaluate(&Scope::new(buffer![100, 110, 200].into_array())).unwrap();
38
/// let x_copy = packed
39
///     .to_struct()
40
///     .unwrap()
41
///     .field_by_name("x copy")
42
///     .unwrap()
43
///     .clone();
44
/// assert_eq!(x_copy.scalar_at(0).unwrap(), Scalar::from(100));
45
/// assert_eq!(x_copy.scalar_at(1).unwrap(), Scalar::from(110));
46
/// assert_eq!(x_copy.scalar_at(2).unwrap(), Scalar::from(200));
47
/// ```
48
///
49
#[allow(clippy::derived_hash_with_manual_eq)]
50
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
51
pub struct PackExpr {
52
    names: FieldNames,
53
    values: Vec<ExprRef>,
54
    nullability: Nullability,
55
}
56

57
pub struct PackExprEncoding;
58

59
impl VTable for PackVTable {
60
    type Expr = PackExpr;
61
    type Encoding = PackExprEncoding;
62
    type Metadata = ProstMetadata<pb::PackOpts>;
63

64
    fn id(_encoding: &Self::Encoding) -> ExprId {
106✔
65
        ExprId::new_ref("pack")
106✔
66
    }
106✔
67

NEW
68
    fn encoding(_expr: &Self::Expr) -> ExprEncodingRef {
×
NEW
69
        ExprEncodingRef::new_ref(PackExprEncoding.as_ref())
×
NEW
70
    }
×
71

NEW
72
    fn metadata(expr: &Self::Expr) -> Option<Self::Metadata> {
×
NEW
73
        Some(ProstMetadata(pb::PackOpts {
×
NEW
74
            paths: expr.names.iter().map(|n| n.to_string()).collect(),
×
NEW
75
            nullable: expr.nullability.into(),
×
NEW
76
        }))
×
NEW
77
    }
×
78

79
    fn children(expr: &Self::Expr) -> Vec<&ExprRef> {
15,640✔
80
        expr.values.iter().collect()
15,640✔
81
    }
15,640✔
82

83
    fn with_children(expr: &Self::Expr, children: Vec<ExprRef>) -> VortexResult<Self::Expr> {
958✔
84
        if children.len() != expr.values.len() {
958✔
NEW
85
            vortex_bail!(
×
NEW
86
                "Pack expression expects {} children, got {}",
×
NEW
87
                expr.values.len(),
×
NEW
88
                children.len()
×
NEW
89
            );
×
90
        }
958✔
91
        PackExpr::try_new(expr.names.clone(), children, expr.nullability)
958✔
92
    }
958✔
93

NEW
94
    fn build(
×
NEW
95
        _encoding: &Self::Encoding,
×
NEW
96
        metadata: &<Self::Metadata as DeserializeMetadata>::Output,
×
NEW
97
        children: Vec<ExprRef>,
×
NEW
98
    ) -> VortexResult<Self::Expr> {
×
NEW
99
        if children.len() != metadata.paths.len() {
×
NEW
100
            vortex_bail!(
×
NEW
101
                "Pack expression expects {} children, got {}",
×
NEW
102
                metadata.paths.len(),
×
NEW
103
                children.len()
×
NEW
104
            );
×
NEW
105
        }
×
NEW
106
        let names: FieldNames = metadata
×
NEW
107
            .paths
×
NEW
108
            .iter()
×
NEW
109
            .map(|name| FieldName::from(name.as_str()))
×
NEW
110
            .collect();
×
NEW
111
        PackExpr::try_new(names, children, metadata.nullable.into())
×
NEW
112
    }
×
113

114
    fn evaluate(expr: &Self::Expr, scope: &Scope) -> VortexResult<ArrayRef> {
782✔
115
        let len = scope.len();
782✔
116
        let value_arrays = expr
782✔
117
            .values
782✔
118
            .iter()
782✔
119
            .map(|value_expr| value_expr.unchecked_evaluate(scope))
1,208✔
120
            .process_results(|it| it.collect::<Vec<_>>())?;
782✔
121
        let validity = match expr.nullability {
782✔
122
            Nullability::NonNullable => Validity::NonNullable,
781✔
123
            Nullability::Nullable => Validity::AllValid,
1✔
124
        };
125
        Ok(StructArray::try_new(expr.names.clone(), value_arrays, len, validity)?.into_array())
782✔
126
    }
782✔
127

128
    fn return_dtype(expr: &Self::Expr, scope: &ScopeDType) -> VortexResult<DType> {
1,187✔
129
        let value_dtypes = expr
1,187✔
130
            .values
1,187✔
131
            .iter()
1,187✔
132
            .map(|value_expr| value_expr.return_dtype(scope))
1,661✔
133
            .process_results(|it| it.collect())?;
1,187✔
134
        Ok(DType::Struct(
1,187✔
135
            StructFields::new(expr.names.clone(), value_dtypes),
1,187✔
136
            expr.nullability,
1,187✔
137
        ))
1,187✔
138
    }
1,187✔
139
}
140

141
impl PackExpr {
142
    pub fn try_new(
2,737✔
143
        names: FieldNames,
2,737✔
144
        values: Vec<ExprRef>,
2,737✔
145
        nullability: Nullability,
2,737✔
146
    ) -> VortexResult<Self> {
2,737✔
147
        if names.len() != values.len() {
2,737✔
148
            vortex_bail!("length mismatch {} {}", names.len(), values.len());
×
149
        }
2,737✔
150
        Ok(PackExpr {
2,737✔
151
            names,
2,737✔
152
            values,
2,737✔
153
            nullability,
2,737✔
154
        })
2,737✔
155
    }
2,737✔
156

157
    pub fn names(&self) -> &FieldNames {
×
158
        &self.names
×
159
    }
×
160

161
    pub fn field(&self, field_name: &FieldName) -> VortexResult<ExprRef> {
979✔
162
        let idx = self
979✔
163
            .names
979✔
164
            .iter()
979✔
165
            .position(|name| name == field_name)
980✔
166
            .ok_or_else(|| {
979✔
167
                vortex_err!(
×
168
                    "Cannot find field {} in pack fields {:?}",
×
169
                    field_name,
×
170
                    self.names
×
171
                )
×
172
            })?;
979✔
173

174
        self.values
979✔
175
            .get(idx)
979✔
176
            .cloned()
979✔
177
            .ok_or_else(|| vortex_err!("field index out of bounds: {}", idx))
979✔
178
    }
979✔
179

180
    pub fn nullability(&self) -> Nullability {
×
181
        self.nullability
×
182
    }
×
183
}
184

185
pub fn pack(
1,694✔
186
    elements: impl IntoIterator<Item = (impl Into<FieldName>, ExprRef)>,
1,694✔
187
    nullability: Nullability,
1,694✔
188
) -> ExprRef {
1,694✔
189
    let (names, values): (Vec<_>, Vec<_>) = elements
1,694✔
190
        .into_iter()
1,694✔
191
        .map(|(name, value)| (name.into(), value))
2,075✔
192
        .unzip();
1,694✔
193
    PackExpr::try_new(names.into(), values, nullability)
1,694✔
194
        .vortex_expect("pack names and values have the same length")
1,694✔
195
        .into_expr()
1,694✔
196
}
1,694✔
197

198
impl Display for PackExpr {
199
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
×
200
        write!(
×
201
            f,
×
202
            "pack({{{}}}){}",
×
203
            self.names
×
204
                .iter()
×
205
                .zip(&self.values)
×
206
                .format_with(", ", |(name, expr), f| f(&format_args!("{name}: {expr}"))),
×
207
            self.nullability
×
208
        )
×
209
    }
×
210
}
211

212
impl AnalysisExpr for PackExpr {}
213

214
#[cfg(test)]
215
mod tests {
216

217
    use vortex_array::arrays::{PrimitiveArray, StructArray};
218
    use vortex_array::validity::Validity;
219
    use vortex_array::vtable::ValidityHelper;
220
    use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical};
221
    use vortex_buffer::buffer;
222
    use vortex_dtype::{FieldNames, Nullability};
223
    use vortex_error::{VortexResult, vortex_bail};
224

225
    use crate::{IntoExpr, PackExpr, Scope, col};
226

227
    fn test_array() -> ArrayRef {
4✔
228
        StructArray::from_fields(&[
4✔
229
            ("a", buffer![0, 1, 2].into_array()),
4✔
230
            ("b", buffer![4, 5, 6].into_array()),
4✔
231
        ])
4✔
232
        .unwrap()
4✔
233
        .into_array()
4✔
234
    }
4✔
235

236
    fn primitive_field(array: &dyn Array, field_path: &[&str]) -> VortexResult<PrimitiveArray> {
7✔
237
        let mut field_path = field_path.iter();
7✔
238

239
        let Some(field) = field_path.next() else {
7✔
240
            vortex_bail!("empty field path");
×
241
        };
242

243
        let mut array = array.to_struct()?.field_by_name(field)?.clone();
7✔
244
        for field in field_path {
9✔
245
            array = array.to_struct()?.field_by_name(field)?.clone();
2✔
246
        }
247
        Ok(array.to_primitive().unwrap())
7✔
248
    }
7✔
249

250
    #[test]
251
    pub fn test_empty_pack() {
1✔
252
        let expr =
1✔
253
            PackExpr::try_new(FieldNames::default(), Vec::new(), Nullability::NonNullable).unwrap();
1✔
254

1✔
255
        let test_array = test_array();
1✔
256
        let actual_array = expr.evaluate(&Scope::new(test_array.clone())).unwrap();
1✔
257
        assert_eq!(actual_array.len(), test_array.len());
1✔
258
        assert_eq!(
1✔
259
            actual_array.to_struct().unwrap().struct_fields().nfields(),
1✔
260
            0
1✔
261
        );
1✔
262
    }
1✔
263

264
    #[test]
265
    pub fn test_simple_pack() {
1✔
266
        let expr = PackExpr::try_new(
1✔
267
            ["one", "two", "three"].into(),
1✔
268
            vec![col("a"), col("b"), col("a")],
1✔
269
            Nullability::NonNullable,
1✔
270
        )
1✔
271
        .unwrap();
1✔
272

1✔
273
        let actual_array = expr
1✔
274
            .evaluate(&Scope::new(test_array()))
1✔
275
            .unwrap()
1✔
276
            .to_struct()
1✔
277
            .unwrap();
1✔
278
        let expected_names: FieldNames = ["one", "two", "three"].into();
1✔
279
        assert_eq!(actual_array.names(), &expected_names);
1✔
280
        assert_eq!(actual_array.validity(), &Validity::NonNullable);
1✔
281

282
        assert_eq!(
1✔
283
            primitive_field(actual_array.as_ref(), &["one"])
1✔
284
                .unwrap()
1✔
285
                .as_slice::<i32>(),
1✔
286
            [0, 1, 2]
1✔
287
        );
1✔
288
        assert_eq!(
1✔
289
            primitive_field(actual_array.as_ref(), &["two"])
1✔
290
                .unwrap()
1✔
291
                .as_slice::<i32>(),
1✔
292
            [4, 5, 6]
1✔
293
        );
1✔
294
        assert_eq!(
1✔
295
            primitive_field(actual_array.as_ref(), &["three"])
1✔
296
                .unwrap()
1✔
297
                .as_slice::<i32>(),
1✔
298
            [0, 1, 2]
1✔
299
        );
1✔
300
    }
1✔
301

302
    #[test]
303
    pub fn test_nested_pack() {
1✔
304
        let expr = PackExpr::try_new(
1✔
305
            ["one", "two", "three"].into(),
1✔
306
            vec![
1✔
307
                col("a"),
1✔
308
                PackExpr::try_new(
1✔
309
                    ["two_one", "two_two"].into(),
1✔
310
                    vec![col("b"), col("b")],
1✔
311
                    Nullability::NonNullable,
1✔
312
                )
1✔
313
                .unwrap()
1✔
314
                .into_expr(),
1✔
315
                col("a"),
1✔
316
            ],
1✔
317
            Nullability::NonNullable,
1✔
318
        )
1✔
319
        .unwrap();
1✔
320

1✔
321
        let actual_array = expr
1✔
322
            .evaluate(&Scope::new(test_array()))
1✔
323
            .unwrap()
1✔
324
            .to_struct()
1✔
325
            .unwrap();
1✔
326
        let expected_names = FieldNames::from(["one", "two", "three"]);
1✔
327
        assert_eq!(actual_array.names(), &expected_names);
1✔
328

329
        assert_eq!(
1✔
330
            primitive_field(actual_array.as_ref(), &["one"])
1✔
331
                .unwrap()
1✔
332
                .as_slice::<i32>(),
1✔
333
            [0, 1, 2]
1✔
334
        );
1✔
335
        assert_eq!(
1✔
336
            primitive_field(actual_array.as_ref(), &["two", "two_one"])
1✔
337
                .unwrap()
1✔
338
                .as_slice::<i32>(),
1✔
339
            [4, 5, 6]
1✔
340
        );
1✔
341
        assert_eq!(
1✔
342
            primitive_field(actual_array.as_ref(), &["two", "two_two"])
1✔
343
                .unwrap()
1✔
344
                .as_slice::<i32>(),
1✔
345
            [4, 5, 6]
1✔
346
        );
1✔
347
        assert_eq!(
1✔
348
            primitive_field(actual_array.as_ref(), &["three"])
1✔
349
                .unwrap()
1✔
350
                .as_slice::<i32>(),
1✔
351
            [0, 1, 2]
1✔
352
        );
1✔
353
    }
1✔
354

355
    #[test]
356
    pub fn test_pack_nullable() {
1✔
357
        let expr = PackExpr::try_new(
1✔
358
            ["one", "two", "three"].into(),
1✔
359
            vec![col("a"), col("b"), col("a")],
1✔
360
            Nullability::Nullable,
1✔
361
        )
1✔
362
        .unwrap();
1✔
363

1✔
364
        let actual_array = expr
1✔
365
            .evaluate(&Scope::new(test_array()))
1✔
366
            .unwrap()
1✔
367
            .to_struct()
1✔
368
            .unwrap();
1✔
369
        let expected_names: FieldNames = ["one", "two", "three"].into();
1✔
370
        assert_eq!(actual_array.names(), &expected_names);
1✔
371
        assert_eq!(actual_array.validity(), &Validity::AllValid);
1✔
372
    }
1✔
373
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc