• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16288880812

15 Jul 2025 09:03AM UTC coverage: 81.533% (+0.05%) from 81.481%
16288880812

Pull #3870

github

web-flow
Merge 310a0eaf8 into b0be264bf
Pull Request #3870: chore[bench]: move tpcds into new benchmark format

46270 of 56750 relevant lines covered (81.53%)

145166.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.36
/vortex-expr/src/lib.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::any::Any;
5
use std::fmt::{Debug, Display, Formatter};
6
use std::hash::{Hash, Hasher};
7
use std::sync::Arc;
8

9
use dyn_eq::DynEq;
10
use dyn_hash::DynHash;
11
pub use exprs::*;
12
pub mod aliases;
13
mod analysis;
14
#[cfg(feature = "arbitrary")]
15
pub mod arbitrary;
16
mod encoding;
17
mod exprs;
18
mod field;
19
pub mod forms;
20
pub mod proto;
21
pub mod pruning;
22
mod registry;
23
mod scope;
24
mod scope_vars;
25
pub mod transform;
26
pub mod traversal;
27
mod vtable;
28

29
pub use analysis::*;
30
pub use between::*;
31
pub use binary::*;
32
pub use cast::*;
33
pub use encoding::*;
34
pub use get_item::*;
35
pub use is_null::*;
36
pub use like::*;
37
pub use list_contains::*;
38
pub use literal::*;
39
pub use merge::*;
40
pub use not::*;
41
pub use operators::*;
42
pub use pack::*;
43
pub use registry::*;
44
pub use root::*;
45
pub use scope::*;
46
pub use select::*;
47
use vortex_array::{Array, ArrayRef, SerializeMetadata};
48
use vortex_dtype::{DType, FieldName, FieldPath};
49
use vortex_error::{VortexExpect, VortexResult, VortexUnwrap, vortex_bail};
50
use vortex_utils::aliases::hash_set::HashSet;
51
pub use vtable::*;
52

53
use crate::traversal::{Node, ReferenceCollector};
54

55
pub trait IntoExpr {
56
    /// Convert this type into an expression reference.
57
    fn into_expr(self) -> ExprRef;
58
}
59

60
pub type ExprRef = Arc<dyn VortexExpr>;
61

62
/// Represents logical operation on [`ArrayRef`]s
63
pub trait VortexExpr:
64
    'static + Send + Sync + Debug + Display + DynEq + DynHash + private::Sealed + AnalysisExpr
65
{
66
    /// Convert expression reference to reference of [`Any`] type
67
    fn as_any(&self) -> &dyn Any;
68

69
    /// Convert the expression to an [`ExprRef`].
70
    fn to_expr(&self) -> ExprRef;
71

72
    /// Return the encoding of the expression.
73
    fn encoding(&self) -> ExprEncodingRef;
74

75
    /// Serialize the metadata of this expression into a bytes vector.
76
    ///
77
    /// Returns `None` if the expression does not support serialization.
78
    fn metadata(&self) -> Option<Vec<u8>> {
×
79
        None
×
80
    }
×
81

82
    /// Compute result of expression on given batch producing a new batch
83
    ///
84
    /// "Unchecked" means that this function lacks a debug assertion that the returned array matches
85
    /// the [VortexExpr::return_dtype] method. Use instead the
86
    /// [`VortexExpr::evaluate`](./trait.VortexExpr.html#method.evaluate).
87
    /// function which includes such an assertion.
88
    fn unchecked_evaluate(&self, ctx: &Scope) -> VortexResult<ArrayRef>;
89

90
    /// Returns the children of this expression.
91
    fn children(&self) -> Vec<&ExprRef>;
92

93
    /// Returns a new instance of this expression with the children replaced.
94
    fn with_children(self: Arc<Self>, children: Vec<ExprRef>) -> VortexResult<ExprRef>;
95

96
    /// Compute the type of the array returned by
97
    /// [`VortexExpr::evaluate`](./trait.VortexExpr.html#method.evaluate).
98
    fn return_dtype(&self, scope: &DType) -> VortexResult<DType>;
99
}
100

101
dyn_eq::eq_trait_object!(VortexExpr);
102
dyn_hash::hash_trait_object!(VortexExpr);
103

104
impl dyn VortexExpr + '_ {
105
    pub fn id(&self) -> ExprId {
11✔
106
        self.encoding().id()
11✔
107
    }
11✔
108

109
    pub fn is<V: VTable>(&self) -> bool {
334,346✔
110
        self.as_opt::<V>().is_some()
334,346✔
111
    }
334,346✔
112

113
    pub fn as_<V: VTable>(&self) -> &V::Expr {
×
114
        self.as_opt::<V>()
×
115
            .vortex_expect("Expr is not of the expected type")
×
116
    }
×
117

118
    pub fn as_opt<V: VTable>(&self) -> Option<&V::Expr> {
1,070,591✔
119
        VortexExpr::as_any(self)
1,070,591✔
120
            .downcast_ref::<ExprAdapter<V>>()
1,070,591✔
121
            .map(|e| &e.0)
1,070,591✔
122
    }
1,070,591✔
123

124
    /// Compute result of expression on given batch producing a new batch
125
    pub fn evaluate(&self, scope: &Scope) -> VortexResult<ArrayRef> {
11,012✔
126
        let result = self.unchecked_evaluate(scope)?;
11,012✔
127
        assert_eq!(
11,011✔
128
            result.dtype(),
11,011✔
129
            &self.return_dtype(scope.dtype())?,
11,011✔
130
            "Expression {} returned dtype {} but declared return_dtype of {}",
×
131
            self,
×
132
            result.dtype(),
×
133
            self.return_dtype(scope.dtype())?,
×
134
        );
135
        Ok(result)
11,011✔
136
    }
11,012✔
137
}
138

139
pub trait VortexExprExt {
140
    /// Accumulate all field references from this expression and its children in a set
141
    fn field_references(&self) -> HashSet<FieldName>;
142
}
143

144
impl VortexExprExt for ExprRef {
145
    fn field_references(&self) -> HashSet<FieldName> {
×
146
        let mut collector = ReferenceCollector::new();
×
147
        // The collector is infallible, so we can unwrap the result
×
148
        self.accept(&mut collector).vortex_unwrap();
×
149
        collector.into_fields()
×
150
    }
×
151
}
152

153
#[derive(Clone)]
154
#[repr(transparent)]
155
pub struct ExprAdapter<V: VTable>(V::Expr);
156

157
impl<V: VTable> VortexExpr for ExprAdapter<V> {
158
    fn as_any(&self) -> &dyn Any {
1,070,743✔
159
        self
1,070,743✔
160
    }
1,070,743✔
161

162
    fn to_expr(&self) -> ExprRef {
12,302✔
163
        Arc::new(ExprAdapter::<V>(self.0.clone()))
12,302✔
164
    }
12,302✔
165

166
    fn encoding(&self) -> ExprEncodingRef {
11✔
167
        V::encoding(&self.0)
11✔
168
    }
11✔
169

170
    fn metadata(&self) -> Option<Vec<u8>> {
11✔
171
        V::metadata(&self.0).map(|m| m.serialize())
11✔
172
    }
11✔
173

174
    fn unchecked_evaluate(&self, ctx: &Scope) -> VortexResult<ArrayRef> {
42,321✔
175
        V::evaluate(&self.0, ctx)
42,321✔
176
    }
42,321✔
177

178
    fn children(&self) -> Vec<&ExprRef> {
994,710✔
179
        V::children(&self.0)
994,710✔
180
    }
994,710✔
181

182
    fn with_children(self: Arc<Self>, children: Vec<ExprRef>) -> VortexResult<ExprRef> {
12,302✔
183
        if self.children().len() != children.len() {
12,302✔
184
            vortex_bail!(
×
185
                "Expected {} children, got {}",
×
186
                self.children().len(),
×
187
                children.len()
×
188
            );
×
189
        }
12,302✔
190
        Ok(V::with_children(&self.0, children)?.to_expr())
12,302✔
191
    }
12,302✔
192

193
    fn return_dtype(&self, scope: &DType) -> VortexResult<DType> {
81,193✔
194
        V::return_dtype(&self.0, scope)
81,193✔
195
    }
81,193✔
196
}
197

198
impl<V: VTable> Debug for ExprAdapter<V> {
199
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
×
200
        Debug::fmt(&self.0, f)
×
201
    }
×
202
}
203

204
impl<V: VTable> Display for ExprAdapter<V> {
205
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
72✔
206
        Display::fmt(&self.0, f)
72✔
207
    }
72✔
208
}
209

210
impl<V: VTable> PartialEq for ExprAdapter<V> {
211
    fn eq(&self, other: &Self) -> bool {
126,339✔
212
        PartialEq::eq(&self.0, &other.0)
126,339✔
213
    }
126,339✔
214
}
215

216
impl<V: VTable> Eq for ExprAdapter<V> {}
217

218
impl<V: VTable> Hash for ExprAdapter<V> {
219
    fn hash<H: Hasher>(&self, state: &mut H) {
191,027✔
220
        Hash::hash(&self.0, state);
191,027✔
221
    }
191,027✔
222
}
223

224
impl<V: VTable> AnalysisExpr for ExprAdapter<V> {
225
    fn stat_falsification(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
680✔
226
        <V::Expr as AnalysisExpr>::stat_falsification(&self.0, catalog)
680✔
227
    }
680✔
228

229
    fn max(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
805✔
230
        <V::Expr as AnalysisExpr>::max(&self.0, catalog)
805✔
231
    }
805✔
232

233
    fn min(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
805✔
234
        <V::Expr as AnalysisExpr>::min(&self.0, catalog)
805✔
235
    }
805✔
236

237
    fn field_path(&self) -> Option<FieldPath> {
21✔
238
        <V::Expr as AnalysisExpr>::field_path(&self.0)
21✔
239
    }
21✔
240
}
241

242
mod private {
243
    use super::*;
244

245
    pub trait Sealed {}
246

247
    impl<V: VTable> Sealed for ExprAdapter<V> {}
248
}
249

250
/// Splits top level and operations into separate expressions
251
pub fn split_conjunction(expr: &ExprRef) -> Vec<ExprRef> {
2✔
252
    let mut conjunctions = vec![];
2✔
253
    split_inner(expr, &mut conjunctions);
2✔
254
    conjunctions
2✔
255
}
2✔
256

257
fn split_inner(expr: &ExprRef, exprs: &mut Vec<ExprRef>) {
4✔
258
    match expr.as_opt::<BinaryVTable>() {
4✔
259
        Some(bexp) if bexp.op() == Operator::And => {
2✔
260
            split_inner(bexp.lhs(), exprs);
1✔
261
            split_inner(bexp.rhs(), exprs);
1✔
262
        }
1✔
263
        Some(_) | None => {
3✔
264
            exprs.push(expr.clone());
3✔
265
        }
3✔
266
    }
267
}
4✔
268

269
/// An expression wrapper that performs pointer equality.
270
#[derive(Clone)]
271
pub struct ExactExpr(pub ExprRef);
272

273
impl PartialEq for ExactExpr {
274
    fn eq(&self, other: &Self) -> bool {
9,381✔
275
        Arc::ptr_eq(&self.0, &other.0)
9,381✔
276
    }
9,381✔
277
}
278

279
impl Eq for ExactExpr {}
280

281
impl Hash for ExactExpr {
282
    fn hash<H: Hasher>(&self, state: &mut H) {
13,484✔
283
        Arc::as_ptr(&self.0).hash(state)
13,484✔
284
    }
13,484✔
285
}
286

287
#[cfg(feature = "test-harness")]
288
pub mod test_harness {
289

290
    use vortex_dtype::{DType, Nullability, PType, StructFields};
291

292
    pub fn struct_dtype() -> DType {
6✔
293
        DType::Struct(
6✔
294
            StructFields::new(
6✔
295
                ["a", "col1", "col2", "bool1", "bool2"].into(),
6✔
296
                vec![
6✔
297
                    DType::Primitive(PType::I32, Nullability::NonNullable),
6✔
298
                    DType::Primitive(PType::U16, Nullability::Nullable),
6✔
299
                    DType::Primitive(PType::U16, Nullability::Nullable),
6✔
300
                    DType::Bool(Nullability::NonNullable),
6✔
301
                    DType::Bool(Nullability::NonNullable),
6✔
302
                ],
6✔
303
            ),
6✔
304
            Nullability::NonNullable,
6✔
305
        )
6✔
306
    }
6✔
307
}
308

309
#[cfg(test)]
310
mod tests {
311
    use vortex_dtype::{DType, FieldNames, Nullability, PType, StructFields};
312
    use vortex_scalar::Scalar;
313

314
    use super::*;
315

316
    #[test]
317
    fn basic_expr_split_test() {
1✔
318
        let lhs = get_item("col1", root());
1✔
319
        let rhs = lit(1);
1✔
320
        let expr = eq(lhs, rhs);
1✔
321
        let conjunction = split_conjunction(&expr);
1✔
322
        assert_eq!(conjunction.len(), 1);
1✔
323
    }
1✔
324

325
    #[test]
326
    fn basic_conjunction_split_test() {
1✔
327
        let lhs = get_item("col1", root());
1✔
328
        let rhs = lit(1);
1✔
329
        let expr = and(lhs, rhs);
1✔
330
        let conjunction = split_conjunction(&expr);
1✔
331
        assert_eq!(conjunction.len(), 2, "Conjunction is {conjunction:?}");
1✔
332
    }
1✔
333

334
    #[test]
335
    fn expr_display() {
1✔
336
        assert_eq!(col("a").to_string(), "$.a");
1✔
337
        assert_eq!(root().to_string(), "$");
1✔
338

339
        let col1: Arc<dyn VortexExpr> = col("col1");
1✔
340
        let col2: Arc<dyn VortexExpr> = col("col2");
1✔
341
        assert_eq!(
1✔
342
            and(col1.clone(), col2.clone()).to_string(),
1✔
343
            "($.col1 and $.col2)"
1✔
344
        );
1✔
345
        assert_eq!(
1✔
346
            or(col1.clone(), col2.clone()).to_string(),
1✔
347
            "($.col1 or $.col2)"
1✔
348
        );
1✔
349
        assert_eq!(
1✔
350
            eq(col1.clone(), col2.clone()).to_string(),
1✔
351
            "($.col1 = $.col2)"
1✔
352
        );
1✔
353
        assert_eq!(
1✔
354
            not_eq(col1.clone(), col2.clone()).to_string(),
1✔
355
            "($.col1 != $.col2)"
1✔
356
        );
1✔
357
        assert_eq!(
1✔
358
            gt(col1.clone(), col2.clone()).to_string(),
1✔
359
            "($.col1 > $.col2)"
1✔
360
        );
1✔
361
        assert_eq!(
1✔
362
            gt_eq(col1.clone(), col2.clone()).to_string(),
1✔
363
            "($.col1 >= $.col2)"
1✔
364
        );
1✔
365
        assert_eq!(
1✔
366
            lt(col1.clone(), col2.clone()).to_string(),
1✔
367
            "($.col1 < $.col2)"
1✔
368
        );
1✔
369
        assert_eq!(
1✔
370
            lt_eq(col1.clone(), col2.clone()).to_string(),
1✔
371
            "($.col1 <= $.col2)"
1✔
372
        );
1✔
373

374
        assert_eq!(
1✔
375
            or(
1✔
376
                lt(col1.clone(), col2.clone()),
1✔
377
                not_eq(col1.clone(), col2.clone()),
1✔
378
            )
1✔
379
            .to_string(),
1✔
380
            "(($.col1 < $.col2) or ($.col1 != $.col2))"
1✔
381
        );
1✔
382

383
        assert_eq!(not(col1.clone()).to_string(), "!$.col1");
1✔
384

385
        assert_eq!(
1✔
386
            select(vec![FieldName::from("col1")], root()).to_string(),
1✔
387
            "${col1}"
1✔
388
        );
1✔
389
        assert_eq!(
1✔
390
            select(
1✔
391
                vec![FieldName::from("col1"), FieldName::from("col2")],
1✔
392
                root()
1✔
393
            )
1✔
394
            .to_string(),
1✔
395
            "${col1, col2}"
1✔
396
        );
1✔
397
        assert_eq!(
1✔
398
            select_exclude(
1✔
399
                vec![FieldName::from("col1"), FieldName::from("col2")],
1✔
400
                root()
1✔
401
            )
1✔
402
            .to_string(),
1✔
403
            "$~{col1, col2}"
1✔
404
        );
1✔
405

406
        assert_eq!(lit(Scalar::from(0u8)).to_string(), "0u8");
1✔
407
        assert_eq!(lit(Scalar::from(0.0f32)).to_string(), "0f32");
1✔
408
        assert_eq!(
1✔
409
            lit(Scalar::from(i64::MAX)).to_string(),
1✔
410
            "9223372036854775807i64"
1✔
411
        );
1✔
412
        assert_eq!(lit(Scalar::from(true)).to_string(), "true");
1✔
413
        assert_eq!(
1✔
414
            lit(Scalar::null(DType::Bool(Nullability::Nullable))).to_string(),
1✔
415
            "null"
1✔
416
        );
1✔
417

418
        assert_eq!(
1✔
419
            lit(Scalar::struct_(
1✔
420
                DType::Struct(
1✔
421
                    StructFields::new(
1✔
422
                        FieldNames::from(["dog", "cat"]),
1✔
423
                        vec![
1✔
424
                            DType::Primitive(PType::U32, Nullability::NonNullable),
1✔
425
                            DType::Utf8(Nullability::NonNullable)
1✔
426
                        ],
1✔
427
                    ),
1✔
428
                    Nullability::NonNullable
1✔
429
                ),
1✔
430
                vec![Scalar::from(32_u32), Scalar::from("rufus".to_string())]
1✔
431
            ))
1✔
432
            .to_string(),
1✔
433
            "{dog: 32u32, cat: \"rufus\"}"
1✔
434
        );
1✔
435
    }
1✔
436
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc