• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16204612549

10 Jul 2025 07:50PM UTC coverage: 81.152% (+2.9%) from 78.263%
16204612549

Pull #3825

github

web-flow
Merge d0d2717da into be9c2fd3e
Pull Request #3825: feat: Add optimize ArrayOp with VBView implementation

178 of 211 new or added lines in 4 files covered. (84.36%)

330 existing lines in 34 files now uncovered.

45433 of 55985 relevant lines covered (81.15%)

145951.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.4
/vortex-expr/src/pruning/pruning_expr.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::iter;
5

6
use itertools::Itertools;
7
use vortex_array::stats::Stat;
8
use vortex_dtype::{Field, FieldName, FieldPath, FieldPathSet};
9
use vortex_utils::aliases::hash_map::HashMap;
10

11
use super::relation::Relation;
12
use crate::{ExprRef, StatsCatalog, get_item, root};
13

14
pub type RequiredStats = Relation<FieldPath, Stat>;
15

16
// A catalog that return a stat column whenever it is required
17
#[derive(Default)]
18
struct AnyStatsCatalog {
19
    usage: HashMap<(FieldPath, Stat), ExprRef>,
20
}
21

22
// A catalog that return a stat column if it exists in the given scope.
23
struct ScopeStatsCatalog<'a> {
24
    any_catalog: AnyStatsCatalog,
25
    field_paths: &'a FieldPathSet,
26
}
27

28
impl StatsCatalog for ScopeStatsCatalog<'_> {
29
    fn stats_ref(&mut self, field_path: &FieldPath, stat: Stat) -> Option<ExprRef> {
810✔
30
        let stat_path = field_path.clone().push(Field::Name(stat.name().into()));
810✔
31

810✔
32
        if self.field_paths.contains(&stat_path) {
810✔
33
            self.any_catalog.stats_ref(field_path, stat)
810✔
34
        } else {
UNCOV
35
            None
×
36
        }
37
    }
810✔
38
}
39

40
impl StatsCatalog for AnyStatsCatalog {
41
    fn stats_ref(&mut self, field_path: &FieldPath, stat: Stat) -> Option<ExprRef> {
831✔
42
        let mut expr = root();
831✔
43
        let name = field_path_stat_field_name(field_path, stat);
831✔
44
        expr = get_item(name, expr);
831✔
45
        self.usage.insert((field_path.clone(), stat), expr.clone());
831✔
46
        Some(expr)
831✔
47
    }
831✔
48
}
49

50
pub fn field_path_stat_field_name(field_path: &FieldPath, stat: Stat) -> FieldName {
847✔
51
    field_path
847✔
52
        .path()
847✔
53
        .iter()
847✔
54
        .map(|f| match f {
847✔
55
            Field::Name(n) => n.as_ref(),
37✔
UNCOV
56
            Field::ElementType => todo!("element type not currently handled"),
×
57
        })
847✔
58
        .chain(iter::once(stat.name()))
847✔
59
        .join("_")
847✔
60
        .into()
847✔
61
}
847✔
62

63
/// Create a stat based falsification expr assuming all stats for all column in the expression
64
/// exist
65
pub fn pruning_expr(expr: &ExprRef) -> Option<(ExprRef, RequiredStats)> {
10✔
66
    let mut catalog = AnyStatsCatalog {
10✔
67
        ..Default::default()
10✔
68
    };
10✔
69
    let expr = expr.stat_falsification(&mut catalog)?;
10✔
70

71
    // TODO(joe): filter access by used exprs
72
    let mut relation: Relation<FieldPath, Stat> = Relation::new();
9✔
73
    for ((field_path, stat), _) in catalog.usage.into_iter() {
20✔
74
        relation.insert(field_path, stat)
20✔
75
    }
76

77
    Some((expr, relation))
9✔
78
}
10✔
79

80
/// Build a pruning expr mask an existing bundle of stats
81
/// Creates a stat based falsification expr using the stats in the `scope_field_paths`.
82
/// These are of the form
83
/// [["col_0", ..., "col_n", "stat_name"], ...] for each stat.
84
pub fn checked_pruning_expr(
661✔
85
    expr: &ExprRef,
661✔
86
    field_paths: &FieldPathSet,
661✔
87
) -> Option<(ExprRef, RequiredStats)> {
661✔
88
    let mut catalog = ScopeStatsCatalog {
661✔
89
        any_catalog: Default::default(),
661✔
90
        field_paths,
661✔
91
    };
661✔
92

93
    let expr = expr.stat_falsification(&mut catalog)?;
661✔
94

95
    // TODO(joe): filter access by used exprs
96
    let mut relation: Relation<FieldPath, Stat> = Relation::new();
585✔
97
    for ((field_path, stat), _) in catalog.any_catalog.usage.into_iter() {
786✔
98
        relation.insert(field_path, stat)
786✔
99
    }
100

101
    Some((expr, relation))
585✔
102
}
661✔
103

104
#[cfg(test)]
105
mod tests {
106
    use vortex_array::stats::Stat;
107
    use vortex_dtype::{FieldName, FieldPath};
108

109
    use crate::pruning::field_path_stat_field_name;
110
    use crate::pruning::pruning_expr::{HashMap, pruning_expr};
111
    use crate::{
112
        HashSet, and, col, eq, get_item, get_item_scope, gt, gt_eq, lit, lt, lt_eq, not_eq, or,
113
        root,
114
    };
115

116
    #[test]
117
    pub fn pruning_equals() {
1✔
118
        let name = FieldName::from("a");
1✔
119
        let literal_eq = lit(42);
1✔
120
        let eq_expr = eq(get_item("a", root()), literal_eq.clone());
1✔
121
        let (converted, _refs) = pruning_expr(&eq_expr).unwrap();
1✔
122
        let expected_expr = or(
1✔
123
            gt(
1✔
124
                get_item(
1✔
125
                    field_path_stat_field_name(&FieldPath::from_name(name.clone()), Stat::Min),
1✔
126
                    root(),
1✔
127
                ),
1✔
128
                literal_eq.clone(),
1✔
129
            ),
1✔
130
            gt(
1✔
131
                literal_eq,
1✔
132
                get_item_scope(field_path_stat_field_name(
1✔
133
                    &FieldPath::from_name(name),
1✔
134
                    Stat::Max,
1✔
135
                )),
1✔
136
            ),
1✔
137
        );
1✔
138
        assert_eq!(&converted, &expected_expr);
1✔
139
    }
1✔
140

141
    #[test]
142
    pub fn pruning_equals_column() {
1✔
143
        let column = FieldName::from("a");
1✔
144
        let other_col = FieldName::from("b");
1✔
145
        let eq_expr = eq(
1✔
146
            get_item_scope(column.clone()),
1✔
147
            get_item_scope(other_col.clone()),
1✔
148
        );
1✔
149

1✔
150
        let (converted, refs) = pruning_expr(&eq_expr).unwrap();
1✔
151
        assert_eq!(
1✔
152
            refs.map(),
1✔
153
            &HashMap::from_iter([
1✔
154
                (
1✔
155
                    FieldPath::from_name(column.clone()),
1✔
156
                    HashSet::from_iter([Stat::Min, Stat::Max])
1✔
157
                ),
1✔
158
                (
1✔
159
                    FieldPath::from_name(other_col.clone()),
1✔
160
                    HashSet::from_iter([Stat::Max, Stat::Min])
1✔
161
                )
1✔
162
            ])
1✔
163
        );
1✔
164
        let expected_expr = or(
1✔
165
            gt(
1✔
166
                get_item_scope(field_path_stat_field_name(
1✔
167
                    &FieldPath::from_name(column.clone()),
1✔
168
                    Stat::Min,
1✔
169
                )),
1✔
170
                get_item_scope(field_path_stat_field_name(
1✔
171
                    &FieldPath::from_name(other_col.clone()),
1✔
172
                    Stat::Max,
1✔
173
                )),
1✔
174
            ),
1✔
175
            gt(
1✔
176
                get_item_scope(field_path_stat_field_name(
1✔
177
                    &FieldPath::from_name(other_col),
1✔
178
                    Stat::Min,
1✔
179
                )),
1✔
180
                get_item_scope(field_path_stat_field_name(
1✔
181
                    &FieldPath::from_name(column),
1✔
182
                    Stat::Max,
1✔
183
                )),
1✔
184
            ),
1✔
185
        );
1✔
186
        assert_eq!(&converted, &expected_expr);
1✔
187
    }
1✔
188

189
    #[test]
190
    pub fn pruning_not_equals_column() {
1✔
191
        let column = FieldName::from("a");
1✔
192
        let other_col = FieldName::from("b");
1✔
193
        let not_eq_expr = not_eq(
1✔
194
            get_item_scope(column.clone()),
1✔
195
            get_item_scope(other_col.clone()),
1✔
196
        );
1✔
197

1✔
198
        let (converted, refs) = pruning_expr(&not_eq_expr).unwrap();
1✔
199
        assert_eq!(
1✔
200
            refs.map(),
1✔
201
            &HashMap::from_iter([
1✔
202
                (
1✔
203
                    FieldPath::from_name(column.clone()),
1✔
204
                    HashSet::from_iter([Stat::Min, Stat::Max])
1✔
205
                ),
1✔
206
                (
1✔
207
                    FieldPath::from_name(other_col.clone()),
1✔
208
                    HashSet::from_iter([Stat::Max, Stat::Min])
1✔
209
                )
1✔
210
            ])
1✔
211
        );
1✔
212
        let expected_expr = and(
1✔
213
            eq(
1✔
214
                get_item_scope(field_path_stat_field_name(
1✔
215
                    &FieldPath::from_name(column.clone()),
1✔
216
                    Stat::Min,
1✔
217
                )),
1✔
218
                get_item_scope(field_path_stat_field_name(
1✔
219
                    &FieldPath::from_name(other_col.clone()),
1✔
220
                    Stat::Max,
1✔
221
                )),
1✔
222
            ),
1✔
223
            eq(
1✔
224
                get_item_scope(field_path_stat_field_name(
1✔
225
                    &FieldPath::from_name(column),
1✔
226
                    Stat::Max,
1✔
227
                )),
1✔
228
                get_item_scope(field_path_stat_field_name(
1✔
229
                    &FieldPath::from_name(other_col),
1✔
230
                    Stat::Min,
1✔
231
                )),
1✔
232
            ),
1✔
233
        );
1✔
234

1✔
235
        assert_eq!(&converted, &expected_expr);
1✔
236
    }
1✔
237

238
    #[test]
239
    pub fn pruning_gt_column() {
1✔
240
        let column = FieldName::from("a");
1✔
241
        let other_col = FieldName::from("b");
1✔
242
        let other_expr = get_item_scope(other_col.clone());
1✔
243
        let not_eq_expr = gt(get_item_scope(column.clone()), other_expr.clone());
1✔
244

1✔
245
        let (converted, refs) = pruning_expr(&not_eq_expr).unwrap();
1✔
246
        assert_eq!(
1✔
247
            refs.map(),
1✔
248
            &HashMap::from_iter([
1✔
249
                (
1✔
250
                    FieldPath::from_name(column.clone()),
1✔
251
                    HashSet::from_iter([Stat::Max])
1✔
252
                ),
1✔
253
                (
1✔
254
                    FieldPath::from_name(other_col.clone()),
1✔
255
                    HashSet::from_iter([Stat::Min])
1✔
256
                )
1✔
257
            ])
1✔
258
        );
1✔
259
        let expected_expr = lt_eq(
1✔
260
            get_item_scope(field_path_stat_field_name(
1✔
261
                &FieldPath::from_name(column),
1✔
262
                Stat::Max,
1✔
263
            )),
1✔
264
            get_item_scope(field_path_stat_field_name(
1✔
265
                &FieldPath::from_name(other_col),
1✔
266
                Stat::Min,
1✔
267
            )),
1✔
268
        );
1✔
269
        assert_eq!(&converted, &expected_expr);
1✔
270
    }
1✔
271

272
    #[test]
273
    pub fn pruning_gt_value() {
1✔
274
        let column = FieldName::from("a");
1✔
275
        let other_col = lit(42);
1✔
276
        let not_eq_expr = gt(get_item_scope(column.clone()), other_col.clone());
1✔
277

1✔
278
        let (converted, refs) = pruning_expr(&not_eq_expr).unwrap();
1✔
279
        assert_eq!(
1✔
280
            refs.map(),
1✔
281
            &HashMap::from_iter([(
1✔
282
                FieldPath::from_name(column.clone()),
1✔
283
                HashSet::from_iter([Stat::Max])
1✔
284
            ),])
1✔
285
        );
1✔
286
        let expected_expr = lt_eq(
1✔
287
            get_item_scope(field_path_stat_field_name(
1✔
288
                &FieldPath::from_name(column),
1✔
289
                Stat::Max,
1✔
290
            )),
1✔
291
            other_col.clone(),
1✔
292
        );
1✔
293
        assert_eq!(&converted, &(expected_expr));
1✔
294
    }
1✔
295

296
    #[test]
297
    pub fn pruning_lt_column() {
1✔
298
        let column = FieldName::from("a");
1✔
299
        let other_col = FieldName::from("b");
1✔
300
        let other_expr = get_item_scope(other_col.clone());
1✔
301
        let not_eq_expr = lt(get_item_scope(column.clone()), other_expr.clone());
1✔
302

1✔
303
        let (converted, refs) = pruning_expr(&not_eq_expr).unwrap();
1✔
304
        assert_eq!(
1✔
305
            refs.map(),
1✔
306
            &HashMap::from_iter([
1✔
307
                (
1✔
308
                    FieldPath::from_name(column.clone()),
1✔
309
                    HashSet::from_iter([Stat::Min])
1✔
310
                ),
1✔
311
                (
1✔
312
                    FieldPath::from_name(other_col.clone()),
1✔
313
                    HashSet::from_iter([Stat::Max])
1✔
314
                )
1✔
315
            ])
1✔
316
        );
1✔
317
        let expected_expr = gt_eq(
1✔
318
            get_item_scope(field_path_stat_field_name(
1✔
319
                &FieldPath::from_name(column),
1✔
320
                Stat::Min,
1✔
321
            )),
1✔
322
            get_item_scope(field_path_stat_field_name(
1✔
323
                &FieldPath::from_name(other_col),
1✔
324
                Stat::Max,
1✔
325
            )),
1✔
326
        );
1✔
327
        assert_eq!(&converted, &expected_expr);
1✔
328
    }
1✔
329

330
    #[test]
331
    pub fn pruning_lt_value() {
1✔
332
        let column = FieldName::from("a");
1✔
333
        let other_col = lit(42);
1✔
334
        let not_eq_expr = lt(get_item_scope(column.clone()), other_col.clone());
1✔
335

1✔
336
        let (converted, refs) = pruning_expr(&not_eq_expr).unwrap();
1✔
337
        assert_eq!(
1✔
338
            refs.map(),
1✔
339
            &HashMap::from_iter([(
1✔
340
                FieldPath::from_name(column.clone()),
1✔
341
                HashSet::from_iter([Stat::Min])
1✔
342
            )])
1✔
343
        );
1✔
344
        let expected_expr = gt_eq(
1✔
345
            get_item_scope(field_path_stat_field_name(
1✔
346
                &FieldPath::from_name(column),
1✔
347
                Stat::Min,
1✔
348
            )),
1✔
349
            other_col.clone(),
1✔
350
        );
1✔
351
        assert_eq!(&converted, &expected_expr);
1✔
352
    }
1✔
353

354
    #[test]
355
    fn pruning_identity() {
1✔
356
        let expr = or(lt(root().clone(), lit(10)), gt(root().clone(), lit(50)));
1✔
357

1✔
358
        let (predicate, _) = pruning_expr(&expr).unwrap();
1✔
359

1✔
360
        let expected_expr = and(
1✔
361
            gt_eq(get_item_scope(FieldName::from("min")), lit(10)),
1✔
362
            lt_eq(get_item_scope(FieldName::from("max")), lit(50)),
1✔
363
        );
1✔
364
        assert_eq!(&predicate, &expected_expr)
1✔
365
    }
1✔
366
    #[test]
367
    pub fn pruning_and_or_operators() {
1✔
368
        // Test case: a > 10 AND a < 50
1✔
369
        let column = FieldName::from("a");
1✔
370
        let and_expr = and(
1✔
371
            gt(get_item_scope(column.clone()), lit(10)),
1✔
372
            lt(get_item_scope(column), lit(50)),
1✔
373
        );
1✔
374
        let (predicate, _) = pruning_expr(&and_expr).unwrap();
1✔
375

1✔
376
        // Expected: a_max <= 10 OR a_min >= 50
1✔
377
        assert_eq!(
1✔
378
            &predicate,
1✔
379
            &or(
1✔
380
                lt_eq(get_item_scope(FieldName::from("a_max")), lit(10)),
1✔
381
                gt_eq(get_item_scope(FieldName::from("a_min")), lit(50))
1✔
382
            ),
1✔
383
        );
1✔
384
    }
1✔
385

386
    #[test]
387
    fn test_gt_eq_with_booleans() {
1✔
388
        // Consider this unusual, but valid (in Arrow, BooleanArray implements ArrayOrd), filter expression:
1✔
389
        // x > (y > z)
1✔
390
        // The x column is a Boolean-valued column. The y and z columns are numeric. True > False.
1✔
391
        // Suppose we had a Vortex zone whose min/max statistics for each column were:
1✔
392
        // x: [True, True]
1✔
393
        // y: [1, 2]
1✔
394
        // z: [0, 2]
1✔
395
        // The pruning predicate will convert the aforementioned expression into:
1✔
396
        // x_max <= (y_min > z_min)
1✔
397
        // If we evaluate that pruning expression on our zone we get:
1✔
398
        // x_max <= (y_min > z_min)
1✔
399
        // x_max <= (1     > 0    )
1✔
400
        // x_max <= True
1✔
401
        // True <= True
1✔
402
        // True
1✔
403
        // If a pruning predicate evaluates to true then, as stated in PruningPredicate::evaluate:
1✔
404
        // > a true value means the chunk can be pruned.
1✔
405
        // But, the following record lies within the above intervals and *passes* the filter expression! We
1✔
406
        // cannot prune this zone because we need this record!
1✔
407
        // {x: True, y: 1, z: 2}
1✔
408
        // x > (y > z)
1✔
409
        // True > (1 > 2)
1✔
410
        // True > False
1✔
411
        // True
1✔
412
        let expr = gt_eq(col("x"), gt(col("y"), col("z")));
1✔
413
        assert!(pruning_expr(&expr).is_none());
1✔
414
        // TODO(DK): a sufficiently complex pruner would produce: `x_max <= (y_max > z_min)`
415
    }
1✔
416
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc