• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16199478115

10 Jul 2025 03:34PM UTC coverage: 81.036% (+2.8%) from 78.188%
16199478115

Pull #3822

github

web-flow
Merge a2614dacf into 3ed9f3090
Pull Request #3822: chore: TPC-H CI/coverage improvements

45600 of 56271 relevant lines covered (81.04%)

64526.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.12
/vortex-expr/src/transform/var_partition.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::{Display, Formatter};
5
use std::hash::{BuildHasher, Hash, Hasher};
6
use std::sync::LazyLock;
7

8
use itertools::Itertools;
9
use vortex_dtype::{FieldName, Nullability};
10
use vortex_error::{VortexExpect, VortexResult};
11
use vortex_utils::aliases::hash_map::{DefaultHashBuilder, HashMap};
12

13
use crate::transform::annotations::{Annotations, variable_scope_annotations};
14
use crate::transform::partition::ReplaceAccessesWithChild;
15
use crate::traversal::{FoldDown, FoldUp, FolderMut, Node};
16
use crate::{ExprRef, Identifier, get_item, pack, var};
17

18
static SPLITTER_RANDOM_STATE: LazyLock<DefaultHashBuilder> =
19
    LazyLock::new(DefaultHashBuilder::default);
20

21
/// Partition an expression by the variable identifiers.
22
pub fn var_partitions(expr: &ExprRef) -> VortexResult<VarPartitionedExpr> {
×
23
    VariableExpressionSplitter::split_all(expr)
×
24
}
×
25

26
/// Partition an expression using the partition function `f`
27
/// e.g. var(x) + var(y) + var(z), where f(x) = {x} and f(y | z) = {y}
28
/// the partitioned expr will be
29
/// root: var(x) + var(y).0 + var(y).1, { x: var(x), y: pack(0: var(y), 1: var(z) }
30
pub fn var_partitions_with_map(
×
31
    expr: &ExprRef,
×
32
    f: impl Fn(&Identifier) -> Identifier,
×
33
) -> VortexResult<VarPartitionedExpr> {
×
34
    VariableExpressionSplitter::split(expr, f)
×
35
}
×
36

37
// TODO(joe): replace with let expressions.
38
/// The result of partitioning an expression.
39
#[derive(Debug)]
40
pub struct VarPartitionedExpr {
41
    /// The root expression used to re-assemble the results.
42
    pub root: ExprRef,
43
    /// The partitions of the expression.
44
    pub partitions: Box<[ExprRef]>,
45
    /// The field names for the partitions
46
    pub partition_names: Box<[Identifier]>,
47
}
48

49
impl Display for VarPartitionedExpr {
50
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
×
51
        write!(
×
52
            f,
×
53
            "root: {} {{{}}}",
×
54
            self.root,
×
55
            self.partition_names
×
56
                .iter()
×
57
                .zip(self.partitions.iter())
×
58
                .map(|(name, partition)| format!("{name}: {partition}"))
×
59
                .join(", ")
×
60
        )
×
61
    }
×
62
}
63

64
impl VarPartitionedExpr {
65
    /// Return the partition for a given field, if it exists.
66
    pub fn find_partition(&self, field: &Identifier) -> Option<&ExprRef> {
4✔
67
        self.partition_names
4✔
68
            .iter()
4✔
69
            .position(|name| name == field)
6✔
70
            .map(|idx| &self.partitions[idx])
4✔
71
    }
4✔
72
}
73

74
#[derive(Debug)]
75
struct VariableExpressionSplitter<'a> {
76
    sub_expressions: HashMap<Identifier, Vec<ExprRef>>,
77
    accesses: &'a Annotations<'a, Identifier>,
78
}
79

80
impl<'a> VariableExpressionSplitter<'a> {
81
    fn new(accesses: &'a Annotations<'a, Identifier>) -> Self {
4✔
82
        Self {
4✔
83
            sub_expressions: HashMap::new(),
4✔
84
            accesses,
4✔
85
        }
4✔
86
    }
4✔
87

88
    pub(crate) fn field_idx_name(field: &Identifier, idx: usize) -> FieldName {
18✔
89
        let mut hasher = SPLITTER_RANDOM_STATE.build_hasher();
18✔
90
        field.hash(&mut hasher);
18✔
91
        idx.hash(&mut hasher);
18✔
92
        hasher.finish().to_string().into()
18✔
93
    }
18✔
94

95
    fn split_all(expr: &ExprRef) -> VortexResult<VarPartitionedExpr> {
3✔
96
        Self::split(expr, Clone::clone)
3✔
97
    }
3✔
98

99
    fn split(
4✔
100
        expr: &ExprRef,
4✔
101
        f: impl Fn(&Identifier) -> Identifier,
4✔
102
    ) -> VortexResult<VarPartitionedExpr> {
4✔
103
        let field_accesses = variable_scope_annotations(expr, f);
4✔
104

4✔
105
        let mut splitter = VariableExpressionSplitter::new(&field_accesses);
4✔
106
        let split = expr.clone().transform_with_context(&mut splitter, ())?;
4✔
107
        let mut remove_accesses: Vec<FieldName> = Vec::new();
4✔
108

4✔
109
        let mut partitions = Vec::with_capacity(splitter.sub_expressions.len());
4✔
110
        let mut partition_names = Vec::with_capacity(splitter.sub_expressions.len());
4✔
111
        for (name, exprs) in splitter.sub_expressions.into_iter() {
7✔
112
            // If there is a single expr then we don't need to `pack` this, and we must update
113
            // the root expr removing this access.
114
            let expr = if exprs.len() == 1 {
7✔
115
                remove_accesses.push(Self::field_idx_name(&name, 0));
5✔
116
                exprs.first().vortex_expect("exprs is non-empty").clone()
5✔
117
            } else {
118
                pack(
2✔
119
                    exprs
2✔
120
                        .into_iter()
2✔
121
                        .enumerate()
2✔
122
                        .map(|(idx, expr)| (Self::field_idx_name(&name, idx), expr)),
4✔
123
                    Nullability::NonNullable,
2✔
124
                )
2✔
125
            };
126

127
            partitions.push(expr);
7✔
128
            partition_names.push(name);
7✔
129
        }
130

131
        let expression_access_counts = field_accesses.get(&expr).map(|ac| ac.len());
4✔
132
        // Ensure that there are not more accesses than partitions, we missed something
4✔
133
        assert!(expression_access_counts.unwrap_or(0) <= partitions.len());
4✔
134
        // Ensure that there are as many partitions as there are accesses/fields in the scope,
135
        // this will affect performance, not correctness.
136
        debug_assert_eq!(expression_access_counts.unwrap_or(0), partitions.len());
4✔
137

138
        let split = split
4✔
139
            .result()
4✔
140
            .transform(&mut ReplaceAccessesWithChild::new(remove_accesses))?;
4✔
141

142
        Ok(VarPartitionedExpr {
4✔
143
            root: split.into_inner(),
4✔
144
            partitions: partitions.into_boxed_slice(),
4✔
145
            partition_names: partition_names.into(),
4✔
146
        })
4✔
147
    }
4✔
148
}
149

150
impl FolderMut for VariableExpressionSplitter<'_> {
151
    type NodeTy = ExprRef;
152
    type Out = ExprRef;
153
    type Context = ();
154

155
    fn visit_down(
13✔
156
        &mut self,
13✔
157
        node: &Self::NodeTy,
13✔
158
        _context: Self::Context,
13✔
159
    ) -> VortexResult<FoldDown<ExprRef, Self::Context>> {
13✔
160
        // If this expression only accesses a single field, then we can skip the children
13✔
161
        let access = self.accesses.get(node);
13✔
162
        if access.as_ref().is_some_and(|a| a.len() == 1) {
13✔
163
            let field_name = access
9✔
164
                .vortex_expect("access is non-empty")
9✔
165
                .iter()
9✔
166
                .next()
9✔
167
                .vortex_expect("expected one field");
9✔
168

9✔
169
            let sub_exprs = self.sub_expressions.entry(field_name.clone()).or_default();
9✔
170
            let idx = sub_exprs.len();
9✔
171

9✔
172
            sub_exprs.push(node.clone());
9✔
173

9✔
174
            let access = get_item(
9✔
175
                Self::field_idx_name(field_name, idx),
9✔
176
                var(field_name.clone()),
9✔
177
            );
9✔
178

9✔
179
            return Ok(FoldDown::SkipChildren(access));
9✔
180
        };
4✔
181

4✔
182
        // Otherwise, continue traversing.
4✔
183
        Ok(FoldDown::Continue(()))
4✔
184
    }
13✔
185

186
    fn visit_up(
4✔
187
        &mut self,
4✔
188
        node: Self::NodeTy,
4✔
189
        _context: Self::Context,
4✔
190
        children: Vec<Self::Out>,
4✔
191
    ) -> VortexResult<FoldUp<Self::Out>> {
4✔
192
        Ok(FoldUp::Continue(node.with_children(children)?))
4✔
193
    }
4✔
194
}
195

196
#[cfg(test)]
197
mod tests {
198
    use vortex_dtype::Nullability::NonNullable;
199

200
    use super::*;
201
    use crate::{PackVTable, VarVTable, and, root, var};
202

203
    #[test]
204
    fn test_expr_top_level_ref() {
1✔
205
        let expr = root();
1✔
206

1✔
207
        let split = VariableExpressionSplitter::split_all(&expr);
1✔
208

1✔
209
        assert!(split.is_ok());
1✔
210

211
        let partitioned = split.unwrap();
1✔
212

1✔
213
        assert!(partitioned.root.is::<VarVTable>());
1✔
214
        // Have a single top level pack with all fields in dtype
215
        assert_eq!(partitioned.partitions.len(), 1)
1✔
216
    }
1✔
217

218
    #[test]
219
    fn test_expr_top_level_ref_get_item_and_split() {
1✔
220
        let expr = pack([("root", root()), ("x", var("x"))], NonNullable);
1✔
221

1✔
222
        let partitioned = VariableExpressionSplitter::split_all(&expr).unwrap();
1✔
223

1✔
224
        assert_eq!(partitioned.partitions.len(), 2);
1✔
225
        assert_eq!(partitioned.find_partition(&"".into()), Some(&root()));
1✔
226
        assert_eq!(partitioned.find_partition(&"x".into()), Some(&var("x")));
1✔
227
    }
1✔
228

229
    #[test]
230
    fn test_partition_var_split_with() {
1✔
231
        let expr = pack(
1✔
232
            [("root", root()), ("x", var("x")), ("y", var("y"))],
1✔
233
            NonNullable,
1✔
234
        );
1✔
235

1✔
236
        let partitioned = VariableExpressionSplitter::split(&expr, |id| {
3✔
237
            if id == "x" { id.clone() } else { "".into() }
3✔
238
        })
3✔
239
        .unwrap();
1✔
240

1✔
241
        assert_eq!(partitioned.partitions.len(), 2);
1✔
242
        assert!(
1✔
243
            partitioned
1✔
244
                .find_partition(&"".into())
1✔
245
                .unwrap()
1✔
246
                .is::<PackVTable>()
1✔
247
        );
1✔
248
        assert_eq!(partitioned.find_partition(&"x".into()), Some(&var("x")));
1✔
249
    }
1✔
250

251
    #[test]
252
    fn test_expr_top_level_ref_get_item_and_split_pack() {
1✔
253
        let expr = and(and(var("x"), root()), var("x"));
1✔
254
        let partitioned = VariableExpressionSplitter::split_all(&expr).unwrap();
1✔
255
        assert_eq!(partitioned.partitions.len(), 2);
1✔
256
    }
1✔
257
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc