• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16176491672

09 Jul 2025 05:48PM UTC coverage: 78.1% (+0.01%) from 78.09%
16176491672

push

github

web-flow
ci: update target dir for coverage runs (#3805)

The `-C instrument-coverage` flag doesn't play well with ASAN, because
both instruments require hooking into shutdown so one ends up overriding
the other.

Signed-off-by: Andrew Duffy <andrew@a10y.dev>

44146 of 56525 relevant lines covered (78.1%)

53416.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.41
/vortex-expr/src/transform/partition.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::{Display, Formatter};
5

6
use itertools::Itertools;
7
use vortex_dtype::{DType, FieldName, FieldNames, Nullability, StructFields};
8
use vortex_error::{VortexExpect, VortexResult};
9
use vortex_utils::aliases::hash_map::HashMap;
10

11
use crate::transform::annotations::{
12
    Annotation, AnnotationFn, Annotations, descendent_annotations,
13
};
14
use crate::transform::simplify_typed::simplify_typed;
15
use crate::traversal::{FoldDown, FoldUp, FolderMut, MutNodeVisitor, Node, TransformResult};
16
use crate::{ExprRef, GetItemVTable, ScopeDType, get_item, pack, root};
17

18
/// Partition an expression into sub-expressions that are uniquely associated with an annotation.
19
/// A root expression is also returned that can be used to recombine the results of the partitions
20
/// into the result of the original expression.
21
///
22
/// ## Note
23
///
24
/// This function currently respects the validity of each field in the scope, but the not validity
25
/// of the scope itself. The fix would be for the returned `PartitionedExpr` to include a partition
26
/// expression for computing the validity, or to include that expression as part of the root.
27
///
28
/// See <https://github.com/vortex-data/vortex/issues/1907>.
29
pub fn partition<A: AnnotationFn>(
876✔
30
    expr: ExprRef,
876✔
31
    scope: &DType,
876✔
32
    annotate_fn: A,
876✔
33
) -> VortexResult<PartitionedExpr<A::Annotation>>
876✔
34
where
876✔
35
    A::Annotation: Display,
876✔
36
{
876✔
37
    // Annotate each expression with the annotations that any of its descendent expressions have.
876✔
38
    let annotations = descendent_annotations(&expr, annotate_fn);
876✔
39

876✔
40
    // Now we split the original expression into sub-expressions based on the annotations, and
876✔
41
    // generate a root expression to re-assemble the results.
876✔
42

876✔
43
    let mut splitter = StructFieldExpressionSplitter::<A::Annotation>::new(&annotations);
876✔
44
    let root = expr
876✔
45
        .clone()
876✔
46
        .transform_with_context(&mut splitter, ())?
876✔
47
        .result();
876✔
48

876✔
49
    let mut partitions = Vec::with_capacity(splitter.sub_expressions.len());
876✔
50
    let mut partition_annotations = Vec::with_capacity(splitter.sub_expressions.len());
876✔
51
    let mut partition_dtypes = Vec::with_capacity(splitter.sub_expressions.len());
876✔
52

876✔
53
    let scope_dtype = ScopeDType::new(scope.clone());
876✔
54

55
    for (annotation, exprs) in splitter.sub_expressions.into_iter() {
1,209✔
56
        // We pack all sub-expressions for the same annotation into a single expression.
57
        let expr = pack(
1,209✔
58
            exprs.into_iter().enumerate().map(|(idx, expr)| {
1,212✔
59
                (
1,212✔
60
                    StructFieldExpressionSplitter::field_name(&annotation, idx),
1,212✔
61
                    expr,
1,212✔
62
                )
1,212✔
63
            }),
1,212✔
64
            Nullability::NonNullable,
1,209✔
65
        );
1,209✔
66

67
        let expr = simplify_typed(expr.clone(), &scope_dtype)?;
1,209✔
68
        let expr_dtype = expr.return_dtype(&scope_dtype)?;
1,209✔
69

70
        partitions.push(expr);
1,209✔
71
        partition_annotations.push(annotation);
1,209✔
72
        partition_dtypes.push(expr_dtype);
1,209✔
73
    }
74

75
    let partition_names =
876✔
76
        FieldNames::from_iter(partition_annotations.iter().map(|id| id.to_string()));
1,209✔
77
    let ctx = ScopeDType::new(DType::Struct(
876✔
78
        StructFields::new(partition_names.clone(), partition_dtypes.clone()),
876✔
79
        Nullability::NonNullable,
876✔
80
    ));
876✔
81

876✔
82
    Ok(PartitionedExpr {
876✔
83
        root: simplify_typed(root, &ctx)?,
876✔
84
        partitions: partitions.into_boxed_slice(),
876✔
85
        partition_names,
876✔
86
        partition_dtypes: partition_dtypes.into_boxed_slice(),
876✔
87
        partition_annotations: partition_annotations.into_boxed_slice(),
876✔
88
    })
89
}
876✔
90

91
/// The result of partitioning an expression.
92
#[derive(Debug)]
93
pub struct PartitionedExpr<A> {
94
    /// The root expression used to re-assemble the results.
95
    pub root: ExprRef,
96
    /// The partition expressions themselves.
97
    pub partitions: Box<[ExprRef]>,
98
    /// The field name of each partition as referenced in the root expression.
99
    pub partition_names: FieldNames,
100
    /// The return dtype of each partition expression.
101
    pub partition_dtypes: Box<[DType]>,
102
    /// The annotation associated with each partition.
103
    pub partition_annotations: Box<[A]>,
104
}
105

106
impl<A: Display> Display for PartitionedExpr<A> {
107
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
×
108
        write!(
×
109
            f,
×
110
            "root: {} {{{}}}",
×
111
            self.root,
×
112
            self.partition_names
×
113
                .iter()
×
114
                .zip(self.partitions.iter())
×
115
                .map(|(name, partition)| format!("{name}: {partition}"))
×
116
                .join(", ")
×
117
        )
×
118
    }
×
119
}
120

121
impl<A: Annotation + Display> PartitionedExpr<A> {
122
    /// Return the partition for a given field, if it exists.
123
    // FIXME(ngates): this should return an iterator since an annotation may have multiple partitions.
124
    pub fn find_partition(&self, id: &A) -> Option<&ExprRef> {
3✔
125
        let id = FieldName::from(id.to_string());
3✔
126
        self.partition_names
3✔
127
            .iter()
3✔
128
            .position(|field| field == &id)
4✔
129
            .map(|idx| &self.partitions[idx])
3✔
130
    }
3✔
131
}
132

133
#[derive(Debug)]
134
struct StructFieldExpressionSplitter<'a, A: Annotation> {
135
    annotations: &'a Annotations<'a, A>,
136
    sub_expressions: HashMap<A, Vec<ExprRef>>,
137
}
138

139
impl<'a, A: Annotation + Display> StructFieldExpressionSplitter<'a, A> {
140
    fn new(annotations: &'a Annotations<'a, A>) -> Self {
876✔
141
        Self {
876✔
142
            sub_expressions: HashMap::new(),
876✔
143
            annotations,
876✔
144
        }
876✔
145
    }
876✔
146

147
    /// Each annotation may be associated with multiple sub-expressions, so we need to
148
    /// a unique name for each sub-expression.
149
    fn field_name(annotation: &A, idx: usize) -> FieldName {
2,425✔
150
        format!("{}_{}", annotation, idx).into()
2,425✔
151
    }
2,425✔
152
}
153

154
// FIXME(ngates): rewrite as MutNodeVisitor that skips down when annotations.len() == 1
155
impl<A: Annotation + Display> FolderMut for StructFieldExpressionSplitter<'_, A> {
156
    type NodeTy = ExprRef;
157
    type Out = ExprRef;
158
    type Context = ();
159

160
    fn visit_down(
1,530✔
161
        &mut self,
1,530✔
162
        node: &Self::NodeTy,
1,530✔
163
        _context: Self::Context,
1,530✔
164
    ) -> VortexResult<FoldDown<ExprRef, Self::Context>> {
1,530✔
165
        // If this expression only accesses a single field, then we can skip the children
1,530✔
166
        let annotations = self.annotations.get(node);
1,530✔
167
        if annotations.as_ref().is_some_and(|a| a.len() == 1) {
1,530✔
168
            let annotation = annotations
1,212✔
169
                .vortex_expect("access is non-empty")
1,212✔
170
                .iter()
1,212✔
171
                .next()
1,212✔
172
                .vortex_expect("expected one field");
1,212✔
173

1,212✔
174
            let sub_exprs = self.sub_expressions.entry(annotation.clone()).or_default();
1,212✔
175
            let idx = sub_exprs.len();
1,212✔
176
            sub_exprs.push(node.clone());
1,212✔
177

1,212✔
178
            // In the root, we replace the annotated sub-expression with a `&.<A>.<A_idx>` since
1,212✔
179
            // we assemble all sub-expressions for the same annotation into a single child.
1,212✔
180
            let replacement = get_item(
1,212✔
181
                StructFieldExpressionSplitter::field_name(annotation, idx),
1,212✔
182
                get_item(FieldName::from(annotation.to_string()), root()),
1,212✔
183
            );
1,212✔
184

1,212✔
185
            return Ok(FoldDown::SkipChildren(replacement));
1,212✔
186
        };
318✔
187

318✔
188
        // Otherwise, continue traversing.
318✔
189
        Ok(FoldDown::Continue(()))
318✔
190
    }
1,530✔
191

192
    fn visit_up(
318✔
193
        &mut self,
318✔
194
        node: Self::NodeTy,
318✔
195
        _context: Self::Context,
318✔
196
        children: Vec<Self::Out>,
318✔
197
    ) -> VortexResult<FoldUp<Self::Out>> {
318✔
198
        Ok(FoldUp::Continue(node.with_children(children)?))
318✔
199
    }
318✔
200
}
201

202
pub(crate) struct ReplaceAccessesWithChild(Vec<FieldName>);
203

204
impl ReplaceAccessesWithChild {
205
    pub(crate) fn new(field_names: Vec<FieldName>) -> Self {
64✔
206
        Self(field_names)
64✔
207
    }
64✔
208
}
209

210
impl MutNodeVisitor for ReplaceAccessesWithChild {
211
    type NodeTy = ExprRef;
212

213
    fn visit_up(&mut self, node: Self::NodeTy) -> VortexResult<TransformResult<ExprRef>> {
262✔
214
        if let Some(item) = node.as_opt::<GetItemVTable>() {
262✔
215
            if self.0.contains(item.field()) {
109✔
216
                return Ok(TransformResult::yes(item.child().clone()));
65✔
217
            }
44✔
218
        }
153✔
219
        Ok(TransformResult::no(node))
197✔
220
    }
262✔
221
}
222

223
#[cfg(test)]
224
mod tests {
225
    use vortex_dtype::Nullability::NonNullable;
226
    use vortex_dtype::PType::I32;
227
    use vortex_dtype::{DType, StructFields};
228

229
    use super::*;
230
    use crate::transform::immediate_access::annotate_scope_access;
231
    use crate::transform::replace::replace_root_fields;
232
    use crate::transform::simplify::simplify;
233
    use crate::transform::simplify_typed::simplify_typed;
234
    use crate::{and, col, get_item, lit, merge, pack, root, select};
235

236
    fn dtype() -> DType {
7✔
237
        DType::Struct(
7✔
238
            StructFields::from_iter([
7✔
239
                (
7✔
240
                    "a",
7✔
241
                    DType::Struct(
7✔
242
                        StructFields::from_iter([("x", I32.into()), ("y", DType::from(I32))]),
7✔
243
                        NonNullable,
7✔
244
                    ),
7✔
245
                ),
7✔
246
                ("b", I32.into()),
7✔
247
                ("c", I32.into()),
7✔
248
            ]),
7✔
249
            NonNullable,
7✔
250
        )
7✔
251
    }
7✔
252

253
    #[test]
254
    fn test_expr_top_level_ref() {
1✔
255
        let dtype = dtype();
1✔
256
        let fields = dtype.as_struct().unwrap();
1✔
257

1✔
258
        let expr = root();
1✔
259
        let partitioned = partition(expr.clone(), &dtype, annotate_scope_access(fields)).unwrap();
1✔
260

1✔
261
        // An un-expanded root expression is annotated by all fields, but since it is a single node
1✔
262
        assert_eq!(partitioned.partitions.len(), 0);
1✔
263
        assert_eq!(&partitioned.root, &root());
1✔
264

265
        // Instead, callers must expand the root expression themselves.
266
        let expr = replace_root_fields(expr.clone(), fields);
1✔
267
        let partitioned = partition(expr.clone(), &dtype, annotate_scope_access(fields)).unwrap();
1✔
268

1✔
269
        assert_eq!(partitioned.partitions.len(), fields.names().len());
1✔
270
    }
1✔
271

272
    #[test]
273
    fn test_expr_top_level_ref_get_item_and_split() {
1✔
274
        let dtype = dtype();
1✔
275
        let fields = dtype.as_struct().unwrap();
1✔
276

1✔
277
        let expr = get_item("y", get_item("a", root()));
1✔
278

1✔
279
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
280
        assert_eq!(&partitioned.root, &get_item("a_0", get_item("a", root())));
1✔
281
    }
1✔
282

283
    #[test]
284
    fn test_expr_top_level_ref_get_item_and_split_pack() {
1✔
285
        let dtype = dtype();
1✔
286
        let fields = dtype.as_struct().unwrap();
1✔
287

1✔
288
        let expr = pack(
1✔
289
            [
1✔
290
                ("x", get_item("x", get_item("a", root()))),
1✔
291
                ("y", get_item("y", get_item("a", root()))),
1✔
292
                ("c", get_item("c", root())),
1✔
293
            ],
1✔
294
            NonNullable,
1✔
295
        );
1✔
296
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
297

1✔
298
        let split_a = partitioned.find_partition(&"a".into()).unwrap();
1✔
299
        assert_eq!(
1✔
300
            &simplify(split_a.clone()).unwrap(),
1✔
301
            &pack(
1✔
302
                [
1✔
303
                    ("a_0", get_item("x", get_item("a", root()))),
1✔
304
                    ("a_1", get_item("y", get_item("a", root())))
1✔
305
                ],
1✔
306
                NonNullable
1✔
307
            )
1✔
308
        );
1✔
309
    }
1✔
310

311
    #[test]
312
    fn test_expr_top_level_ref_get_item_add() {
1✔
313
        let dtype = dtype();
1✔
314
        let fields = dtype.as_struct().unwrap();
1✔
315

1✔
316
        let expr = and(get_item("y", get_item("a", root())), lit(1));
1✔
317
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
318

1✔
319
        // Whole expr is a single split
1✔
320
        assert_eq!(partitioned.partitions.len(), 1);
1✔
321
    }
1✔
322

323
    #[test]
324
    fn test_expr_top_level_ref_get_item_add_cannot_split() {
1✔
325
        let dtype = dtype();
1✔
326
        let fields = dtype.as_struct().unwrap();
1✔
327

1✔
328
        let expr = and(get_item("y", get_item("a", root())), get_item("b", root()));
1✔
329
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
330

1✔
331
        // One for id.a and id.b
1✔
332
        assert_eq!(partitioned.partitions.len(), 2);
1✔
333
    }
1✔
334

335
    // Test that typed_simplify removes select and partition precise
336
    #[test]
337
    fn test_expr_partition_many_occurrences_of_field() {
1✔
338
        let dtype = dtype();
1✔
339
        let fields = dtype.as_struct().unwrap();
1✔
340

1✔
341
        let expr = and(
1✔
342
            get_item("y", get_item("a", root())),
1✔
343
            select(vec!["a".into(), "b".into()], root()),
1✔
344
        );
1✔
345
        let expr = simplify_typed(expr, &ScopeDType::new(dtype.clone())).unwrap();
1✔
346
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
347

1✔
348
        // One for id.a and id.b
1✔
349
        assert_eq!(partitioned.partitions.len(), 2);
1✔
350

351
        // This fetches [].$c which is unused, however a previous optimisation should replace select
352
        // with get_item and pack removing this field.
353
        assert_eq!(
1✔
354
            &partitioned.root,
1✔
355
            &and(
1✔
356
                get_item("a_0", get_item("a", root())),
1✔
357
                pack(
1✔
358
                    [
1✔
359
                        (
1✔
360
                            "a",
1✔
361
                            get_item(
1✔
362
                                StructFieldExpressionSplitter::<FieldName>::field_name(
1✔
363
                                    &"a".into(),
1✔
364
                                    1
1✔
365
                                ),
1✔
366
                                get_item("a", root())
1✔
367
                            )
1✔
368
                        ),
1✔
369
                        ("b", get_item("b_0", get_item("b", root())))
1✔
370
                    ],
1✔
371
                    NonNullable
1✔
372
                )
1✔
373
            )
1✔
374
        )
1✔
375
    }
1✔
376

377
    #[test]
378
    fn test_expr_merge() {
1✔
379
        let dtype = dtype();
1✔
380
        let fields = dtype.as_struct().unwrap();
1✔
381

1✔
382
        let expr = merge(
1✔
383
            [col("a"), pack([("b", col("b"))], NonNullable)],
1✔
384
            NonNullable,
1✔
385
        );
1✔
386

1✔
387
        let partitioned = partition(expr, &dtype, annotate_scope_access(fields)).unwrap();
1✔
388
        let expected = pack(
1✔
389
            [
1✔
390
                ("x", get_item("x", get_item("a_0", col("a")))),
1✔
391
                ("y", get_item("y", get_item("a_0", col("a")))),
1✔
392
                ("b", get_item("b", get_item("b_0", col("b")))),
1✔
393
            ],
1✔
394
            NonNullable,
1✔
395
        );
1✔
396
        assert_eq!(
1✔
397
            &partitioned.root, &expected,
1✔
398
            "{} {}",
×
399
            partitioned.root, expected
400
        );
401

402
        assert_eq!(partitioned.partitions.len(), 2);
1✔
403

404
        let part_a = partitioned.find_partition(&"a".into()).unwrap();
1✔
405
        let expected_a = pack([("a_0", col("a"))], NonNullable);
1✔
406
        assert_eq!(part_a, &expected_a, "{} {}", part_a, expected_a);
1✔
407

408
        let part_b = partitioned.find_partition(&"b".into()).unwrap();
1✔
409
        let expected_b = pack([("b_0", pack([("b", col("b"))], NonNullable))], NonNullable);
1✔
410
        assert_eq!(part_b, &expected_b, "{} {}", part_b, expected_b);
1✔
411
    }
1✔
412
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc