• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16139349253

08 Jul 2025 09:26AM UTC coverage: 78.057% (-0.2%) from 78.253%
16139349253

push

github

web-flow
VortexExpr VTables (#3713)

Adds the same vtable machinery as arrays and layouts already use. It
uses the "Encoding" naming scheme from arrays and layouts. I don't
particularly like it, but it's consistent. Open to renames later.

Further, adds an expression registry to the Vortex session that will be
used for deserialization.

Expressions only decide their "options" serialization. So in theory, can
support many container formats, not just proto, provided each expression
can deserialize their own options format.

---------

Signed-off-by: Nicholas Gates <nick@nickgates.com>

800 of 1190 new or added lines in 38 files covered. (67.23%)

40 existing lines in 13 files now uncovered.

44100 of 56497 relevant lines covered (78.06%)

54989.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.3
/vortex-expr/src/exprs/select.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::Display;
5

6
use itertools::Itertools;
7
use vortex_array::{ArrayRef, DeserializeMetadata, EmptyMetadata, IntoArray, ToCanonical};
8
use vortex_dtype::{DType, FieldNames};
9
use vortex_error::{VortexResult, vortex_bail, vortex_err};
10

11
use crate::field::DisplayFieldNames;
12
use crate::{
13
    AnalysisExpr, ExprEncodingRef, ExprId, ExprRef, IntoExpr, Scope, ScopeDType, VTable, vtable,
14
};
15

16
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
17
pub enum SelectField {
18
    Include(FieldNames),
19
    Exclude(FieldNames),
20
}
21

22
vtable!(Select);
23

24
#[derive(Debug, Clone, Hash)]
25
#[allow(clippy::derived_hash_with_manual_eq)]
26
pub struct SelectExpr {
27
    fields: SelectField,
28
    child: ExprRef,
29
}
30

31
impl PartialEq for SelectExpr {
NEW
32
    fn eq(&self, other: &Self) -> bool {
×
NEW
33
        self.fields == other.fields && self.child.eq(&other.child)
×
NEW
34
    }
×
35
}
36

37
pub struct SelectExprEncoding;
38

39
impl VTable for SelectVTable {
40
    type Expr = SelectExpr;
41
    type Encoding = SelectExprEncoding;
42
    type Metadata = EmptyMetadata;
43

44
    fn id(_encoding: &Self::Encoding) -> ExprId {
106✔
45
        ExprId::new_ref("select")
106✔
46
    }
106✔
47

NEW
48
    fn encoding(_expr: &Self::Expr) -> ExprEncodingRef {
×
NEW
49
        ExprEncodingRef::new_ref(SelectExprEncoding.as_ref())
×
NEW
50
    }
×
51

NEW
52
    fn metadata(_expr: &Self::Expr) -> Option<Self::Metadata> {
×
NEW
53
        // Select does not support serialization
×
NEW
54
        None
×
NEW
55
    }
×
56

57
    fn children(expr: &Self::Expr) -> Vec<&ExprRef> {
2,150✔
58
        vec![&expr.child]
2,150✔
59
    }
2,150✔
60

NEW
61
    fn with_children(expr: &Self::Expr, children: Vec<ExprRef>) -> VortexResult<Self::Expr> {
×
NEW
62
        if children.len() != 1 {
×
NEW
63
            vortex_bail!(
×
NEW
64
                "Select expression must have exactly 1 child, got {}",
×
NEW
65
                children.len()
×
NEW
66
            );
×
NEW
67
        }
×
NEW
68
        Ok(SelectExpr {
×
NEW
69
            fields: expr.fields.clone(),
×
NEW
70
            child: children[0].clone(),
×
NEW
71
        })
×
NEW
72
    }
×
73

NEW
74
    fn build(
×
NEW
75
        _encoding: &Self::Encoding,
×
NEW
76
        _metadata: &<Self::Metadata as DeserializeMetadata>::Output,
×
NEW
77
        _children: Vec<ExprRef>,
×
NEW
78
    ) -> VortexResult<Self::Expr> {
×
NEW
79
        vortex_bail!("Select does not support deserialization")
×
NEW
80
    }
×
81

82
    fn evaluate(expr: &Self::Expr, scope: &Scope) -> VortexResult<ArrayRef> {
2✔
83
        let batch = expr.child.unchecked_evaluate(scope)?.to_struct()?;
2✔
84
        Ok(match &expr.fields {
2✔
85
            SelectField::Include(f) => batch.project(f.as_ref()),
1✔
86
            SelectField::Exclude(names) => {
1✔
87
                let included_names = batch
1✔
88
                    .names()
1✔
89
                    .iter()
1✔
90
                    .filter(|&f| !names.as_ref().contains(f))
2✔
91
                    .cloned()
1✔
92
                    .collect::<Vec<_>>();
1✔
93
                batch.project(included_names.as_slice())
1✔
94
            }
NEW
95
        }?
×
96
        .into_array())
2✔
97
    }
2✔
98

99
    fn return_dtype(expr: &Self::Expr, scope: &ScopeDType) -> VortexResult<DType> {
362✔
100
        let child_dtype = expr.child.return_dtype(scope)?;
362✔
101
        let child_struct_dtype = child_dtype
362✔
102
            .as_struct()
362✔
103
            .ok_or_else(|| vortex_err!("Select child not a struct dtype"))?;
362✔
104

105
        let projected = match &expr.fields {
362✔
106
            SelectField::Include(fields) => child_struct_dtype.project(fields.as_ref())?,
359✔
107
            SelectField::Exclude(fields) => child_struct_dtype
3✔
108
                .names()
3✔
109
                .iter()
3✔
110
                .cloned()
3✔
111
                .zip_eq(child_struct_dtype.fields())
3✔
112
                .filter(|(name, _)| !fields.as_ref().contains(name))
12✔
113
                .collect(),
3✔
114
        };
115

116
        Ok(DType::Struct(projected, child_dtype.nullability()))
362✔
117
    }
362✔
118
}
119

120
pub fn select(fields: impl Into<FieldNames>, child: ExprRef) -> ExprRef {
22✔
121
    SelectExpr::include_expr(fields.into(), child)
22✔
122
}
22✔
123

124
pub fn select_exclude(fields: impl Into<FieldNames>, child: ExprRef) -> ExprRef {
4✔
125
    SelectExpr::exclude_expr(fields.into(), child)
4✔
126
}
4✔
127

128
impl SelectExpr {
129
    pub fn new(fields: SelectField, child: ExprRef) -> Self {
346✔
130
        Self { fields, child }
346✔
131
    }
346✔
132

133
    pub fn include_expr(columns: FieldNames, child: ExprRef) -> ExprRef {
342✔
134
        Self::new(SelectField::Include(columns), child).into_expr()
342✔
135
    }
342✔
136

137
    pub fn exclude_expr(columns: FieldNames, child: ExprRef) -> ExprRef {
4✔
138
        Self::new(SelectField::Exclude(columns), child).into_expr()
4✔
139
    }
4✔
140

141
    pub fn fields(&self) -> &SelectField {
359✔
142
        &self.fields
359✔
143
    }
359✔
144

145
    pub fn child(&self) -> &ExprRef {
359✔
146
        &self.child
359✔
147
    }
359✔
148

149
    pub fn as_include(&self, field_names: &FieldNames) -> VortexResult<ExprRef> {
×
NEW
150
        Ok(Self::new(
×
151
            SelectField::Include(self.fields.as_include_names(field_names)?),
×
152
            self.child.clone(),
×
NEW
153
        )
×
NEW
154
        .into_expr())
×
UNCOV
155
    }
×
156
}
157

158
impl SelectField {
159
    pub fn include(columns: FieldNames) -> Self {
×
160
        assert_eq!(columns.iter().unique().collect_vec().len(), columns.len());
×
161
        Self::Include(columns)
×
162
    }
×
163

164
    pub fn exclude(columns: FieldNames) -> Self {
×
165
        assert_eq!(columns.iter().unique().collect_vec().len(), columns.len());
×
166
        Self::Exclude(columns)
×
167
    }
×
168

169
    pub fn is_include(&self) -> bool {
×
170
        matches!(self, Self::Include(_))
×
171
    }
×
172

173
    pub fn is_exclude(&self) -> bool {
×
174
        matches!(self, Self::Exclude(_))
×
175
    }
×
176

177
    pub fn fields(&self) -> &FieldNames {
359✔
178
        match self {
359✔
179
            SelectField::Include(fields) => fields,
359✔
180
            SelectField::Exclude(fields) => fields,
×
181
        }
182
    }
359✔
183

184
    pub fn as_include_names(&self, field_names: &FieldNames) -> VortexResult<FieldNames> {
359✔
185
        if self
359✔
186
            .fields()
359✔
187
            .iter()
359✔
188
            .any(|f| !field_names.iter().contains(f))
382✔
189
        {
190
            vortex_bail!(
×
191
                "Field {:?} in select not in field names {:?}",
×
192
                self,
×
193
                field_names
×
194
            );
×
195
        }
359✔
196
        match self {
359✔
197
            SelectField::Include(fields) => Ok(fields.clone()),
359✔
198
            SelectField::Exclude(exc_fields) => Ok(field_names
×
199
                .iter()
×
200
                .filter(|f| exc_fields.iter().contains(f))
×
201
                .cloned()
×
202
                .collect()),
×
203
        }
204
    }
359✔
205
}
206

207
impl Display for SelectField {
208
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
3✔
209
        match self {
3✔
210
            SelectField::Include(fields) => write!(f, "{{{}}}", DisplayFieldNames(fields)),
2✔
211
            SelectField::Exclude(fields) => write!(f, "~{{{}}}", DisplayFieldNames(fields)),
1✔
212
        }
213
    }
3✔
214
}
215

216
impl Display for SelectExpr {
217
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
3✔
218
        write!(f, "{}{}", self.child, self.fields)
3✔
219
    }
3✔
220
}
221

222
impl AnalysisExpr for SelectExpr {}
223

224
#[cfg(test)]
225
mod tests {
226

227
    use vortex_array::arrays::StructArray;
228
    use vortex_array::{IntoArray, ToCanonical};
229
    use vortex_buffer::buffer;
230
    use vortex_dtype::{DType, FieldName, Nullability};
231

232
    use crate::{Scope, ScopeDType, root, select, select_exclude, test_harness};
233

234
    fn test_array() -> StructArray {
2✔
235
        StructArray::from_fields(&[
2✔
236
            ("a", buffer![0, 1, 2].into_array()),
2✔
237
            ("b", buffer![4, 5, 6].into_array()),
2✔
238
        ])
2✔
239
        .unwrap()
2✔
240
    }
2✔
241

242
    #[test]
243
    pub fn include_columns() {
1✔
244
        let st = test_array();
1✔
245
        let select = select(vec![FieldName::from("a")], root());
1✔
246
        let selected = select
1✔
247
            .evaluate(&Scope::new(st.to_array()))
1✔
248
            .unwrap()
1✔
249
            .to_struct()
1✔
250
            .unwrap();
1✔
251
        let selected_names = selected.names().clone();
1✔
252
        assert_eq!(selected_names.as_ref(), &["a".into()]);
1✔
253
    }
1✔
254

255
    #[test]
256
    pub fn exclude_columns() {
1✔
257
        let st = test_array();
1✔
258
        let select = select_exclude(vec![FieldName::from("a")], root());
1✔
259
        let selected = select
1✔
260
            .evaluate(&Scope::new(st.to_array()))
1✔
261
            .unwrap()
1✔
262
            .to_struct()
1✔
263
            .unwrap();
1✔
264
        let selected_names = selected.names().clone();
1✔
265
        assert_eq!(selected_names.as_ref(), &["b".into()]);
1✔
266
    }
1✔
267

268
    #[test]
269
    fn dtype() {
1✔
270
        let dtype = test_harness::struct_dtype();
1✔
271

1✔
272
        let select_expr = select(vec![FieldName::from("a")], root());
1✔
273
        let expected_dtype = DType::Struct(
1✔
274
            dtype.as_struct().unwrap().project(&["a".into()]).unwrap(),
1✔
275
            Nullability::NonNullable,
1✔
276
        );
1✔
277
        assert_eq!(
1✔
278
            select_expr
1✔
279
                .return_dtype(&ScopeDType::new(dtype.clone()))
1✔
280
                .unwrap(),
1✔
281
            expected_dtype
1✔
282
        );
1✔
283

284
        let select_expr_exclude = select_exclude(
1✔
285
            vec![
1✔
286
                FieldName::from("col1"),
1✔
287
                FieldName::from("col2"),
1✔
288
                FieldName::from("bool1"),
1✔
289
                FieldName::from("bool2"),
1✔
290
            ],
1✔
291
            root(),
1✔
292
        );
1✔
293
        assert_eq!(
1✔
294
            select_expr_exclude
1✔
295
                .return_dtype(&ScopeDType::new(dtype.clone()))
1✔
296
                .unwrap(),
1✔
297
            expected_dtype
1✔
298
        );
1✔
299

300
        let select_expr_exclude = select_exclude(
1✔
301
            vec![FieldName::from("col1"), FieldName::from("col2")],
1✔
302
            root(),
1✔
303
        );
1✔
304
        assert_eq!(
1✔
305
            select_expr_exclude
1✔
306
                .return_dtype(&ScopeDType::new(dtype.clone()))
1✔
307
                .unwrap(),
1✔
308
            DType::Struct(
1✔
309
                dtype
1✔
310
                    .as_struct()
1✔
311
                    .unwrap()
1✔
312
                    .project(&["a".into(), "bool1".into(), "bool2".into()])
1✔
313
                    .unwrap(),
1✔
314
                Nullability::NonNullable
1✔
315
            )
1✔
316
        );
1✔
317
    }
1✔
318
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc