• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16201397870

10 Jul 2025 05:05PM UTC coverage: 81.145% (+0.06%) from 81.084%
16201397870

push

github

web-flow
Remove var expression (#3829)

Fixes #3671 

* Removes the Var expression, leaving instead a `root()` expression for
resolving the scope root.
* The expression scope can hold context variables, useful for passing in
auth tokens for example, but not variables that would impact the
return_dtype of the expression.
* ScopeDType has therefore been removed, because the dtype of the scope
_is_ just the dtype of the root array.
* Simplifies some transformation / partitioning logic where vars no
longer need to be considered.

Signed-off-by: Nicholas Gates <nick@nickgates.com>
Co-authored-by: Will Manning <will@spiraldb.com>

164 of 175 new or added lines in 32 files covered. (93.71%)

18 existing lines in 6 files now uncovered.

45273 of 55793 relevant lines covered (81.14%)

146273.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.78
/vortex-expr/src/transform/partition.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::{Display, Formatter};
5

6
use itertools::Itertools;
7
use vortex_dtype::{DType, FieldName, FieldNames, Nullability, StructFields};
8
use vortex_error::{VortexExpect, VortexResult};
9
use vortex_utils::aliases::hash_map::HashMap;
10

11
use crate::transform::annotations::{
12
    Annotation, AnnotationFn, Annotations, descendent_annotations,
13
};
14
use crate::transform::simplify_typed::simplify_typed;
15
use crate::traversal::{FoldDown, FoldUp, FolderMut, MutNodeVisitor, Node, TransformResult};
16
use crate::{ExprRef, GetItemVTable, get_item, pack, root};
17

18
/// Partition an expression into sub-expressions that are uniquely associated with an annotation.
19
/// A root expression is also returned that can be used to recombine the results of the partitions
20
/// into the result of the original expression.
21
///
22
/// ## Note
23
///
24
/// This function currently respects the validity of each field in the scope, but the not validity
25
/// of the scope itself. The fix would be for the returned `PartitionedExpr` to include a partition
26
/// expression for computing the validity, or to include that expression as part of the root.
27
///
28
/// See <https://github.com/vortex-data/vortex/issues/1907>.
29
pub fn partition<A: AnnotationFn>(
4,237✔
30
    expr: ExprRef,
4,237✔
31
    scope: &DType,
4,237✔
32
    annotate_fn: A,
4,237✔
33
) -> VortexResult<PartitionedExpr<A::Annotation>>
4,237✔
34
where
4,237✔
35
    A::Annotation: Display,
4,237✔
36
{
4,237✔
37
    // Annotate each expression with the annotations that any of its descendent expressions have.
4,237✔
38
    let annotations = descendent_annotations(&expr, annotate_fn);
4,237✔
39

4,237✔
40
    // Now we split the original expression into sub-expressions based on the annotations, and
4,237✔
41
    // generate a root expression to re-assemble the results.
4,237✔
42

4,237✔
43
    let mut splitter = StructFieldExpressionSplitter::<A::Annotation>::new(&annotations);
4,237✔
44
    let root = expr
4,237✔
45
        .clone()
4,237✔
46
        .transform_with_context(&mut splitter, ())?
4,237✔
47
        .result();
4,237✔
48

4,237✔
49
    let mut partitions = Vec::with_capacity(splitter.sub_expressions.len());
4,237✔
50
    let mut partition_annotations = Vec::with_capacity(splitter.sub_expressions.len());
4,237✔
51
    let mut partition_dtypes = Vec::with_capacity(splitter.sub_expressions.len());
4,237✔
52

53
    for (annotation, exprs) in splitter.sub_expressions.into_iter() {
5,805✔
54
        // We pack all sub-expressions for the same annotation into a single expression.
55
        let expr = pack(
5,805✔
56
            exprs.into_iter().enumerate().map(|(idx, expr)| {
5,809✔
57
                (
5,809✔
58
                    StructFieldExpressionSplitter::field_name(&annotation, idx),
5,809✔
59
                    expr,
5,809✔
60
                )
5,809✔
61
            }),
5,809✔
62
            Nullability::NonNullable,
5,805✔
63
        );
5,805✔
64

65
        let expr = simplify_typed(expr.clone(), scope)?;
5,805✔
66
        let expr_dtype = expr.return_dtype(scope)?;
5,805✔
67

68
        partitions.push(expr);
5,805✔
69
        partition_annotations.push(annotation);
5,805✔
70
        partition_dtypes.push(expr_dtype);
5,805✔
71
    }
72

73
    let partition_names =
4,237✔
74
        FieldNames::from_iter(partition_annotations.iter().map(|id| id.to_string()));
5,805✔
75
    let root_scope = DType::Struct(
4,237✔
76
        StructFields::new(partition_names.clone(), partition_dtypes.clone()),
4,237✔
77
        Nullability::NonNullable,
4,237✔
78
    );
4,237✔
79

4,237✔
80
    Ok(PartitionedExpr {
4,237✔
81
        root: simplify_typed(root, &root_scope)?,
4,237✔
82
        partitions: partitions.into_boxed_slice(),
4,237✔
83
        partition_names,
4,237✔
84
        partition_dtypes: partition_dtypes.into_boxed_slice(),
4,237✔
85
        partition_annotations: partition_annotations.into_boxed_slice(),
4,237✔
86
    })
87
}
4,237✔
88

89
/// The result of partitioning an expression.
90
#[derive(Debug)]
91
pub struct PartitionedExpr<A> {
92
    /// The root expression used to re-assemble the results.
93
    pub root: ExprRef,
94
    /// The partition expressions themselves.
95
    pub partitions: Box<[ExprRef]>,
96
    /// The field name of each partition as referenced in the root expression.
97
    pub partition_names: FieldNames,
98
    /// The return dtype of each partition expression.
99
    pub partition_dtypes: Box<[DType]>,
100
    /// The annotation associated with each partition.
101
    pub partition_annotations: Box<[A]>,
102
}
103

104
impl<A: Display> Display for PartitionedExpr<A> {
105
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
×
106
        write!(
×
107
            f,
×
108
            "root: {} {{{}}}",
×
109
            self.root,
×
110
            self.partition_names
×
111
                .iter()
×
112
                .zip(self.partitions.iter())
×
113
                .map(|(name, partition)| format!("{name}: {partition}"))
×
114
                .join(", ")
×
115
        )
×
116
    }
×
117
}
118

119
impl<A: Annotation + Display> PartitionedExpr<A> {
120
    /// Return the partition for a given field, if it exists.
121
    // FIXME(ngates): this should return an iterator since an annotation may have multiple partitions.
122
    pub fn find_partition(&self, id: &A) -> Option<&ExprRef> {
3✔
123
        let id = FieldName::from(id.to_string());
3✔
124
        self.partition_names
3✔
125
            .iter()
3✔
126
            .position(|field| field == &id)
5✔
127
            .map(|idx| &self.partitions[idx])
3✔
128
    }
3✔
129
}
130

131
#[derive(Debug)]
132
struct StructFieldExpressionSplitter<'a, A: Annotation> {
133
    annotations: &'a Annotations<'a, A>,
134
    sub_expressions: HashMap<A, Vec<ExprRef>>,
135
}
136

137
impl<'a, A: Annotation + Display> StructFieldExpressionSplitter<'a, A> {
138
    fn new(annotations: &'a Annotations<'a, A>) -> Self {
4,237✔
139
        Self {
4,237✔
140
            sub_expressions: HashMap::new(),
4,237✔
141
            annotations,
4,237✔
142
        }
4,237✔
143
    }
4,237✔
144

145
    /// Each annotation may be associated with multiple sub-expressions, so we need to
146
    /// a unique name for each sub-expression.
147
    fn field_name(annotation: &A, idx: usize) -> FieldName {
11,619✔
148
        format!("{}_{}", annotation, idx).into()
11,619✔
149
    }
11,619✔
150
}
151

152
// FIXME(ngates): rewrite as MutNodeVisitor that skips down when annotations.len() == 1
153
impl<A: Annotation + Display> FolderMut for StructFieldExpressionSplitter<'_, A> {
154
    type NodeTy = ExprRef;
155
    type Out = ExprRef;
156
    type Context = ();
157

158
    fn visit_down(
6,721✔
159
        &mut self,
6,721✔
160
        node: &Self::NodeTy,
6,721✔
161
        _context: Self::Context,
6,721✔
162
    ) -> VortexResult<FoldDown<ExprRef, Self::Context>> {
6,721✔
163
        // If this expression only accesses a single field, then we can skip the children
6,721✔
164
        let annotations = self.annotations.get(node);
6,721✔
165
        if annotations.as_ref().is_some_and(|a| a.len() == 1) {
6,721✔
166
            let annotation = annotations
5,809✔
167
                .vortex_expect("access is non-empty")
5,809✔
168
                .iter()
5,809✔
169
                .next()
5,809✔
170
                .vortex_expect("expected one field");
5,809✔
171

5,809✔
172
            let sub_exprs = self.sub_expressions.entry(annotation.clone()).or_default();
5,809✔
173
            let idx = sub_exprs.len();
5,809✔
174
            sub_exprs.push(node.clone());
5,809✔
175

5,809✔
176
            // In the root, we replace the annotated sub-expression with a `&.<A>.<A_idx>` since
5,809✔
177
            // we assemble all sub-expressions for the same annotation into a single child.
5,809✔
178
            let replacement = get_item(
5,809✔
179
                StructFieldExpressionSplitter::field_name(annotation, idx),
5,809✔
180
                get_item(FieldName::from(annotation.to_string()), root()),
5,809✔
181
            );
5,809✔
182

5,809✔
183
            return Ok(FoldDown::SkipChildren(replacement));
5,809✔
184
        };
912✔
185

912✔
186
        // Otherwise, continue traversing.
912✔
187
        Ok(FoldDown::Continue(()))
912✔
188
    }
6,721✔
189

190
    fn visit_up(
912✔
191
        &mut self,
912✔
192
        node: Self::NodeTy,
912✔
193
        _context: Self::Context,
912✔
194
        children: Vec<Self::Out>,
912✔
195
    ) -> VortexResult<FoldUp<Self::Out>> {
912✔
196
        Ok(FoldUp::Continue(node.with_children(children)?))
912✔
197
    }
912✔
198
}
199

200
pub(crate) struct ReplaceAccessesWithChild(Vec<FieldName>);
201

202
impl MutNodeVisitor for ReplaceAccessesWithChild {
203
    type NodeTy = ExprRef;
204

UNCOV
205
    fn visit_up(&mut self, node: Self::NodeTy) -> VortexResult<TransformResult<ExprRef>> {
×
UNCOV
206
        if let Some(item) = node.as_opt::<GetItemVTable>() {
×
UNCOV
207
            if self.0.contains(item.field()) {
×
UNCOV
208
                return Ok(TransformResult::yes(item.child().clone()));
×
UNCOV
209
            }
×
UNCOV
210
        }
×
UNCOV
211
        Ok(TransformResult::no(node))
×
UNCOV
212
    }
×
213
}
214

215
#[cfg(test)]
216
mod tests {
217
    use vortex_dtype::Nullability::NonNullable;
218
    use vortex_dtype::PType::I32;
219
    use vortex_dtype::{DType, StructFields};
220

221
    use super::*;
222
    use crate::transform::immediate_access::annotate_scope_access;
223
    use crate::transform::replace::replace_root_fields;
224
    use crate::transform::simplify::simplify;
225
    use crate::transform::simplify_typed::simplify_typed;
226
    use crate::{and, col, get_item, lit, merge, pack, root, select};
227

228
    fn dtype() -> DType {
7✔
229
        DType::Struct(
7✔
230
            StructFields::from_iter([
7✔
231
                (
7✔
232
                    "a",
7✔
233
                    DType::Struct(
7✔
234
                        StructFields::from_iter([("x", I32.into()), ("y", DType::from(I32))]),
7✔
235
                        NonNullable,
7✔
236
                    ),
7✔
237
                ),
7✔
238
                ("b", I32.into()),
7✔
239
                ("c", I32.into()),
7✔
240
            ]),
7✔
241
            NonNullable,
7✔
242
        )
7✔
243
    }
7✔
244

245
    #[test]
246
    fn test_expr_top_level_ref() {
1✔
247
        let dtype = dtype();
1✔
248
        let fields = dtype.as_struct().unwrap();
1✔
249

1✔
250
        let expr = root();
1✔
251
        let partitioned = partition(expr.clone(), &dtype, annotate_scope_access(fields)).unwrap();
1✔
252

1✔
253
        // An un-expanded root expression is annotated by all fields, but since it is a single node
1✔
254
        assert_eq!(partitioned.partitions.len(), 0);
1✔
255
        assert_eq!(&partitioned.root, &root());
1✔
256

257
        // Instead, callers must expand the root expression themselves.
258
        let expr = replace_root_fields(expr.clone(), fields);
1✔
259
        let partitioned = partition(expr.clone(), &dtype, annotate_scope_access(fields)).unwrap();
1✔
260

1✔
261
        assert_eq!(partitioned.partitions.len(), fields.names().len());
1✔
262
    }
1✔
263

264
    #[test]
265
    fn test_expr_top_level_ref_get_item_and_split() {
1✔
266
        let dtype = dtype();
1✔
267
        let fields = dtype.as_struct().unwrap();
1✔
268

1✔
269
        let expr = get_item("y", get_item("a", root()));
1✔
270

1✔
271
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
272
        assert_eq!(&partitioned.root, &get_item("a_0", get_item("a", root())));
1✔
273
    }
1✔
274

275
    #[test]
276
    fn test_expr_top_level_ref_get_item_and_split_pack() {
1✔
277
        let dtype = dtype();
1✔
278
        let fields = dtype.as_struct().unwrap();
1✔
279

1✔
280
        let expr = pack(
1✔
281
            [
1✔
282
                ("x", get_item("x", get_item("a", root()))),
1✔
283
                ("y", get_item("y", get_item("a", root()))),
1✔
284
                ("c", get_item("c", root())),
1✔
285
            ],
1✔
286
            NonNullable,
1✔
287
        );
1✔
288
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
289

1✔
290
        let split_a = partitioned.find_partition(&"a".into()).unwrap();
1✔
291
        assert_eq!(
1✔
292
            &simplify(split_a.clone()).unwrap(),
1✔
293
            &pack(
1✔
294
                [
1✔
295
                    ("a_0", get_item("x", get_item("a", root()))),
1✔
296
                    ("a_1", get_item("y", get_item("a", root())))
1✔
297
                ],
1✔
298
                NonNullable
1✔
299
            )
1✔
300
        );
1✔
301
    }
1✔
302

303
    #[test]
304
    fn test_expr_top_level_ref_get_item_add() {
1✔
305
        let dtype = dtype();
1✔
306
        let fields = dtype.as_struct().unwrap();
1✔
307

1✔
308
        let expr = and(get_item("y", get_item("a", root())), lit(1));
1✔
309
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
310

1✔
311
        // Whole expr is a single split
1✔
312
        assert_eq!(partitioned.partitions.len(), 1);
1✔
313
    }
1✔
314

315
    #[test]
316
    fn test_expr_top_level_ref_get_item_add_cannot_split() {
1✔
317
        let dtype = dtype();
1✔
318
        let fields = dtype.as_struct().unwrap();
1✔
319

1✔
320
        let expr = and(get_item("y", get_item("a", root())), get_item("b", root()));
1✔
321
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
322

1✔
323
        // One for id.a and id.b
1✔
324
        assert_eq!(partitioned.partitions.len(), 2);
1✔
325
    }
1✔
326

327
    // Test that typed_simplify removes select and partition precise
328
    #[test]
329
    fn test_expr_partition_many_occurrences_of_field() {
1✔
330
        let dtype = dtype();
1✔
331
        let fields = dtype.as_struct().unwrap();
1✔
332

1✔
333
        let expr = and(
1✔
334
            get_item("y", get_item("a", root())),
1✔
335
            select(vec!["a".into(), "b".into()], root()),
1✔
336
        );
1✔
337
        let expr = simplify_typed(expr, &dtype).unwrap();
1✔
338
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
339

1✔
340
        // One for id.a and id.b
1✔
341
        assert_eq!(partitioned.partitions.len(), 2);
1✔
342

343
        // This fetches [].$c which is unused, however a previous optimisation should replace select
344
        // with get_item and pack removing this field.
345
        assert_eq!(
1✔
346
            &partitioned.root,
1✔
347
            &and(
1✔
348
                get_item("a_0", get_item("a", root())),
1✔
349
                pack(
1✔
350
                    [
1✔
351
                        (
1✔
352
                            "a",
1✔
353
                            get_item(
1✔
354
                                StructFieldExpressionSplitter::<FieldName>::field_name(
1✔
355
                                    &"a".into(),
1✔
356
                                    1
1✔
357
                                ),
1✔
358
                                get_item("a", root())
1✔
359
                            )
1✔
360
                        ),
1✔
361
                        ("b", get_item("b_0", get_item("b", root())))
1✔
362
                    ],
1✔
363
                    NonNullable
1✔
364
                )
1✔
365
            )
1✔
366
        )
1✔
367
    }
1✔
368

369
    #[test]
370
    fn test_expr_merge() {
1✔
371
        let dtype = dtype();
1✔
372
        let fields = dtype.as_struct().unwrap();
1✔
373

1✔
374
        let expr = merge(
1✔
375
            [col("a"), pack([("b", col("b"))], NonNullable)],
1✔
376
            NonNullable,
1✔
377
        );
1✔
378

1✔
379
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
380
        let expected = pack(
1✔
381
            [
1✔
382
                ("x", get_item("x", get_item("a_0", col("a")))),
1✔
383
                ("y", get_item("y", get_item("a_0", col("a")))),
1✔
384
                ("b", get_item("b", get_item("b_0", col("b")))),
1✔
385
            ],
1✔
386
            NonNullable,
1✔
387
        );
1✔
388
        assert_eq!(
1✔
389
            &partitioned.root, &expected,
1✔
390
            "{} {}",
391
            partitioned.root, expected
392
        );
393

394
        assert_eq!(partitioned.partitions.len(), 2);
1✔
395

396
        let part_a = partitioned.find_partition(&"a".into()).unwrap();
1✔
397
        let expected_a = pack([("a_0", col("a"))], NonNullable);
1✔
398
        assert_eq!(part_a, &expected_a, "{} {}", part_a, expected_a);
1✔
399

400
        let part_b = partitioned.find_partition(&"b".into()).unwrap();
1✔
401
        let expected_b = pack([("b_0", pack([("b", col("b"))], NonNullable))], NonNullable);
1✔
402
        assert_eq!(part_b, &expected_b, "{} {}", part_b, expected_b);
1✔
403
    }
1✔
404
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc