• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 17045413678

18 Aug 2025 03:43PM UTC coverage: 86.065% (-1.8%) from 87.913%
17045413678

Pull #4215

github

web-flow
Merge 2657b4c8e into cb2220961
Pull Request #4215: Ji/vectors

136 of 1803 new or added lines in 42 files covered. (7.54%)

127 existing lines in 26 files now uncovered.

56918 of 66134 relevant lines covered (86.06%)

612050.14 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.63
/vortex-expr/src/lib.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
//! Vortex's expression language.
5
//!
6
//! All expressions are serializable, and own their own wire format.
7
//!
8
//! The implementation takes inspiration from [Postgres] and [Apache Datafusion].
9
//!
10
//! [Postgres]: https://www.postgresql.org/docs/current/sql-expressions.html
11
//! [Apache Datafusion]: https://github.com/apache/datafusion/tree/5fac581efbaffd0e6a9edf931182517524526afd/datafusion/expr
12

13
use std::any::Any;
14
use std::fmt::{Debug, Display, Formatter};
15
use std::hash::{Hash, Hasher};
16
use std::sync::Arc;
17

18
use dyn_hash::DynHash;
19
pub use exprs::*;
20
pub mod aliases;
21
mod analysis;
22
#[cfg(feature = "arbitrary")]
23
pub mod arbitrary;
24
pub mod dyn_traits;
25
mod encoding;
26
mod exprs;
27
mod field;
28
pub mod forms;
29
pub mod proto;
30
pub mod pruning;
31
mod registry;
32
mod scope;
33
mod scope_vars;
34
pub mod transform;
35
pub mod traversal;
36
mod vtable;
37

38
pub use analysis::*;
39
pub use between::*;
40
pub use binary::*;
41
pub use cast::*;
42
pub use encoding::*;
43
pub use get_item::*;
44
pub use is_null::*;
45
pub use like::*;
46
pub use list_contains::*;
47
pub use literal::*;
48
pub use merge::*;
49
pub use not::*;
50
pub use operators::*;
51
pub use pack::*;
52
pub use registry::*;
53
pub use root::*;
54
pub use scope::*;
55
pub use scope_vars::*;
56
pub use select::*;
57
use vortex_array::{Array, ArrayRef, SerializeMetadata};
58
use vortex_dtype::{DType, FieldName, FieldPath};
59
use vortex_error::{VortexExpect, VortexResult, VortexUnwrap, vortex_bail};
60
use vortex_utils::aliases::hash_set::HashSet;
61
pub use vtable::*;
62

63
use crate::dyn_traits::DynEq;
64
use crate::traversal::{NodeExt, ReferenceCollector};
65

66
pub trait IntoExpr {
67
    /// Convert this type into an expression reference.
68
    fn into_expr(self) -> ExprRef;
69
}
70

71
pub type ExprRef = Arc<dyn VortexExpr>;
72

73
/// Represents logical operation on [`ArrayRef`]s
74
pub trait VortexExpr:
75
    'static + Send + Sync + Debug + Display + DynEq + DynHash + private::Sealed + AnalysisExpr
76
{
77
    /// Convert expression reference to reference of [`Any`] type
78
    fn as_any(&self) -> &dyn Any;
79

80
    /// Convert the expression to an [`ExprRef`].
81
    fn to_expr(&self) -> ExprRef;
82

83
    /// Return the encoding of the expression.
84
    fn encoding(&self) -> ExprEncodingRef;
85

86
    /// Serialize the metadata of this expression into a bytes vector.
87
    ///
88
    /// Returns `None` if the expression does not support serialization.
89
    fn metadata(&self) -> Option<Vec<u8>> {
90
        None
91
    }
92

93
    /// Compute result of expression on given batch producing a new batch
94
    ///
95
    /// "Unchecked" means that this function lacks a debug assertion that the returned array matches
96
    /// the [VortexExpr::return_dtype] method. Use instead the
97
    /// [`VortexExpr::evaluate`](./trait.VortexExpr.html#method.evaluate).
98
    /// function which includes such an assertion.
99
    fn unchecked_evaluate(&self, ctx: &Scope) -> VortexResult<ArrayRef>;
100

101
    /// Returns the children of this expression.
102
    fn children(&self) -> Vec<&ExprRef>;
103

104
    /// Returns a new instance of this expression with the children replaced.
105
    fn with_children(self: Arc<Self>, children: Vec<ExprRef>) -> VortexResult<ExprRef>;
106

107
    /// Compute the type of the array returned by
108
    /// [`VortexExpr::evaluate`](./trait.VortexExpr.html#method.evaluate).
109
    fn return_dtype(&self, scope: &DType) -> VortexResult<DType>;
110
}
111

112
dyn_hash::hash_trait_object!(VortexExpr);
113

114
impl PartialEq for dyn VortexExpr {
115
    fn eq(&self, other: &Self) -> bool {
204,952✔
116
        self.dyn_eq(other.as_any())
204,952✔
117
    }
204,952✔
118
}
119

120
impl Eq for dyn VortexExpr {}
121

122
impl dyn VortexExpr + '_ {
123
    pub fn id(&self) -> ExprId {
117✔
124
        self.encoding().id()
179✔
125
    }
179✔
126

62✔
127
    pub fn is<V: VTable>(&self) -> bool {
89,509✔
128
        self.as_opt::<V>().is_some()
89,509✔
129
    }
89,509✔
130

131
    pub fn as_<V: VTable>(&self) -> &V::Expr {
UNCOV
132
        self.as_opt::<V>()
×
UNCOV
133
            .vortex_expect("Expr is not of the expected type")
×
UNCOV
134
    }
×
135

136
    pub fn as_opt<V: VTable>(&self) -> Option<&V::Expr> {
980,212✔
137
        VortexExpr::as_any(self)
980,212✔
138
            .downcast_ref::<ExprAdapter<V>>()
980,212✔
139
            .map(|e| &e.0)
980,188✔
140
    }
980,188✔
141

142
    /// Compute result of expression on given batch producing a new batch
143
    pub fn evaluate(&self, scope: &Scope) -> VortexResult<ArrayRef> {
15,567✔
144
        let result = self.unchecked_evaluate(scope)?;
15,567✔
145
        assert_eq!(
15,858✔
146
            result.dtype(),
15,858✔
147
            &self.return_dtype(scope.dtype())?,
15,858✔
148
            "Expression {} returned dtype {} but declared return_dtype of {}",
292✔
149
            self,
292✔
150
            result.dtype(),
151
            self.return_dtype(scope.dtype())?,
152
        );
6✔
153
        Ok(result)
15,572✔
154
    }
15,573✔
155
}
6✔
156

6✔
157
pub trait VortexExprExt {
158
    /// Accumulate all field references from this expression and its children in a set
159
    fn field_references(&self) -> HashSet<FieldName>;
160
}
161

162
impl VortexExprExt for ExprRef {
6✔
163
    fn field_references(&self) -> HashSet<FieldName> {
6✔
164
        let mut collector = ReferenceCollector::new();
165
        // The collector is infallible, so we can unwrap the result
166
        self.accept(&mut collector).vortex_unwrap();
167
        collector.into_fields()
168
    }
169
}
170

171
#[derive(Clone)]
172
#[repr(transparent)]
173
pub struct ExprAdapter<V: VTable>(V::Expr);
174

175
impl<V: VTable> VortexExpr for ExprAdapter<V> {
176
    fn as_any(&self) -> &dyn Any {
1,184,994✔
177
        self
1,184,994✔
178
    }
1,184,994✔
179

180
    fn to_expr(&self) -> ExprRef {
19,473✔
181
        Arc::new(ExprAdapter::<V>(self.0.clone()))
19,473✔
182
    }
19,473✔
183

184
    fn encoding(&self) -> ExprEncodingRef {
117✔
185
        V::encoding(&self.0)
117✔
186
    }
117✔
187

188
    fn metadata(&self) -> Option<Vec<u8>> {
117✔
189
        V::metadata(&self.0).map(|m| m.serialize())
117✔
190
    }
117✔
191

192
    fn unchecked_evaluate(&self, ctx: &Scope) -> VortexResult<ArrayRef> {
75,487✔
193
        V::evaluate(&self.0, ctx)
75,487✔
194
    }
75,487✔
195

196
    fn children(&self) -> Vec<&ExprRef> {
752,107✔
197
        V::children(&self.0)
752,107✔
198
    }
752,107✔
199

200
    fn with_children(self: Arc<Self>, children: Vec<ExprRef>) -> VortexResult<ExprRef> {
19,473✔
201
        if self.children().len() != children.len() {
19,827✔
202
            vortex_bail!(
354✔
203
                "Expected {} children, got {}",
354✔
204
                self.children().len(),
205
                children.len()
6✔
206
            );
6✔
207
        }
19,479✔
208
        Ok(V::with_children(&self.0, children)?.to_expr())
19,473✔
209
    }
19,473✔
210

211
    fn return_dtype(&self, scope: &DType) -> VortexResult<DType> {
130,937✔
212
        V::return_dtype(&self.0, scope)
130,937✔
213
    }
130,937✔
214
}
215

216
impl<V: VTable> Debug for ExprAdapter<V> {
217
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
22✔
218
        Debug::fmt(&self.0, f)
22✔
219
    }
22✔
220
}
221

228✔
222
impl<V: VTable> Display for ExprAdapter<V> {
228✔
223
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
330✔
224
        Display::fmt(&self.0, f)
102✔
225
    }
108✔
226
}
6✔
227

228
impl<V: VTable> PartialEq for ExprAdapter<V> {
229
    fn eq(&self, other: &Self) -> bool {
178,063✔
230
        PartialEq::eq(&self.0, &other.0)
178,063✔
231
    }
178,063✔
232
}
6✔
233

6✔
234
impl<V: VTable> Eq for ExprAdapter<V> {}
6✔
235

236
impl<V: VTable> Hash for ExprAdapter<V> {
48✔
237
    fn hash<H: Hasher>(&self, state: &mut H) {
310,442✔
238
        Hash::hash(&self.0, state);
310,442✔
239
    }
310,394✔
240
}
241

242
impl<V: VTable> AnalysisExpr for ExprAdapter<V> {
243
    fn stat_falsification(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
2,110✔
244
        <V::Expr as AnalysisExpr>::stat_falsification(&self.0, catalog)
2,110✔
245
    }
2,110✔
246

247
    fn max(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
1,682✔
248
        <V::Expr as AnalysisExpr>::max(&self.0, catalog)
1,682✔
249
    }
1,682✔
250

251
    fn min(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
1,681✔
252
        <V::Expr as AnalysisExpr>::min(&self.0, catalog)
1,681✔
253
    }
1,681✔
254

255
    fn nan_count(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
2,116✔
256
        <V::Expr as AnalysisExpr>::nan_count(&self.0, catalog)
2,116✔
257
    }
2,116✔
258

259
    fn field_path(&self) -> Option<FieldPath> {
1,337✔
260
        <V::Expr as AnalysisExpr>::field_path(&self.0)
1,337✔
261
    }
1,391✔
262
}
54✔
263

54✔
264
mod private {
265
    use super::*;
266

267
    pub trait Sealed {}
268

269
    impl<V: VTable> Sealed for ExprAdapter<V> {}
68✔
270
}
68✔
271

68✔
272
/// Splits top level and operations into separate expressions.
273
pub fn split_conjunction(expr: &ExprRef) -> Vec<ExprRef> {
2✔
274
    let mut conjunctions = vec![];
2✔
275
    split_inner(expr, &mut conjunctions);
2✔
276
    conjunctions
2✔
277
}
2✔
278

279
fn split_inner(expr: &ExprRef, exprs: &mut Vec<ExprRef>) {
4✔
280
    match expr.as_opt::<BinaryVTable>() {
4✔
281
        Some(bexp) if bexp.op() == Operator::And => {
2✔
282
            split_inner(bexp.lhs(), exprs);
1✔
283
            split_inner(bexp.rhs(), exprs);
1✔
284
        }
1✔
285
        Some(_) | None => {
3✔
286
            exprs.push(expr.clone());
3✔
287
        }
3✔
288
    }
289
}
4✔
290

291
/// An expression wrapper that performs pointer equality.
292
#[derive(Clone)]
293
pub struct ExactExpr(pub ExprRef);
294

295
impl PartialEq for ExactExpr {
296
    fn eq(&self, other: &Self) -> bool {
12,126✔
297
        Arc::ptr_eq(&self.0, &other.0)
12,126✔
298
    }
12,126✔
299
}
300

301
impl Eq for ExactExpr {}
302

303
impl Hash for ExactExpr {
304
    fn hash<H: Hasher>(&self, state: &mut H) {
18,649✔
305
        Arc::as_ptr(&self.0).hash(state)
18,649✔
306
    }
18,649✔
307
}
308

309
#[cfg(feature = "test-harness")]
310
pub mod test_harness {
311

312
    use vortex_dtype::{DType, Nullability, PType, StructFields};
313

314
    pub fn struct_dtype() -> DType {
6✔
315
        DType::Struct(
6✔
316
            StructFields::new(
6✔
317
                ["a", "col1", "col2", "bool1", "bool2"].into(),
6✔
318
                vec![
6✔
319
                    DType::Primitive(PType::I32, Nullability::NonNullable),
6✔
320
                    DType::Primitive(PType::U16, Nullability::Nullable),
6✔
321
                    DType::Primitive(PType::U16, Nullability::Nullable),
6✔
322
                    DType::Bool(Nullability::NonNullable),
6✔
323
                    DType::Bool(Nullability::NonNullable),
6✔
324
                ],
6✔
325
            ),
6✔
326
            Nullability::NonNullable,
6✔
327
        )
6✔
328
    }
6✔
329
}
330

331
#[cfg(test)]
332
mod tests {
333
    use vortex_dtype::{DType, FieldNames, Nullability, PType, StructFields};
334
    use vortex_scalar::Scalar;
335

336
    use super::*;
4✔
337

4✔
338
    #[test]
4✔
339
    fn basic_expr_split_test() {
1✔
340
        let lhs = get_item("col1", root());
1✔
341
        let rhs = lit(1);
1✔
342
        let expr = eq(lhs, rhs);
1✔
343
        let conjunction = split_conjunction(&expr);
1✔
344
        assert_eq!(conjunction.len(), 1);
1✔
345
    }
1✔
346

347
    #[test]
348
    fn basic_conjunction_split_test() {
1✔
349
        let lhs = get_item("col1", root());
1✔
350
        let rhs = lit(1);
1✔
351
        let expr = and(lhs, rhs);
1✔
352
        let conjunction = split_conjunction(&expr);
1✔
353
        assert_eq!(conjunction.len(), 2, "Conjunction is {conjunction:?}");
1✔
354
    }
1✔
355

356
    #[test]
357
    fn expr_display() {
1✔
358
        assert_eq!(col("a").to_string(), "$.a");
1✔
359
        assert_eq!(root().to_string(), "$");
1✔
360

361
        let col1: Arc<dyn VortexExpr> = col("col1");
1✔
362
        let col2: Arc<dyn VortexExpr> = col("col2");
1✔
363
        assert_eq!(
1✔
364
            and(col1.clone(), col2.clone()).to_string(),
1✔
365
            "($.col1 and $.col2)"
366
        );
367
        assert_eq!(
1✔
368
            or(col1.clone(), col2.clone()).to_string(),
1✔
369
            "($.col1 or $.col2)"
370
        );
371
        assert_eq!(
1✔
372
            eq(col1.clone(), col2.clone()).to_string(),
1✔
373
            "($.col1 = $.col2)"
374
        );
375
        assert_eq!(
1✔
376
            not_eq(col1.clone(), col2.clone()).to_string(),
1✔
377
            "($.col1 != $.col2)"
378
        );
379
        assert_eq!(
1✔
380
            gt(col1.clone(), col2.clone()).to_string(),
1✔
381
            "($.col1 > $.col2)"
382
        );
383
        assert_eq!(
1✔
384
            gt_eq(col1.clone(), col2.clone()).to_string(),
1✔
385
            "($.col1 >= $.col2)"
386
        );
387
        assert_eq!(
1✔
388
            lt(col1.clone(), col2.clone()).to_string(),
1✔
389
            "($.col1 < $.col2)"
390
        );
391
        assert_eq!(
1✔
392
            lt_eq(col1.clone(), col2.clone()).to_string(),
1✔
393
            "($.col1 <= $.col2)"
394
        );
395

396
        assert_eq!(
1✔
397
            or(
1✔
398
                lt(col1.clone(), col2.clone()),
1✔
399
                not_eq(col1.clone(), col2.clone()),
1✔
400
            )
1✔
401
            .to_string(),
1✔
402
            "(($.col1 < $.col2) or ($.col1 != $.col2))"
403
        );
404

405
        assert_eq!(not(col1.clone()).to_string(), "(!$.col1)");
1✔
406

407
        assert_eq!(
1✔
408
            select(vec![FieldName::from("col1")], root()).to_string(),
1✔
409
            "${col1}"
410
        );
411
        assert_eq!(
1✔
412
            select(
1✔
413
                vec![FieldName::from("col1"), FieldName::from("col2")],
1✔
414
                root()
1✔
415
            )
1✔
416
            .to_string(),
1✔
417
            "${col1, col2}"
418
        );
419
        assert_eq!(
1✔
420
            select_exclude(
1✔
421
                vec![FieldName::from("col1"), FieldName::from("col2")],
1✔
422
                root()
1✔
423
            )
1✔
424
            .to_string(),
1✔
425
            "$~{col1, col2}"
426
        );
427

428
        assert_eq!(lit(Scalar::from(0u8)).to_string(), "0u8");
1✔
429
        assert_eq!(lit(Scalar::from(0.0f32)).to_string(), "0f32");
1✔
430
        assert_eq!(
1✔
431
            lit(Scalar::from(i64::MAX)).to_string(),
1✔
432
            "9223372036854775807i64"
433
        );
434
        assert_eq!(lit(Scalar::from(true)).to_string(), "true");
1✔
435
        assert_eq!(
1✔
436
            lit(Scalar::null(DType::Bool(Nullability::Nullable))).to_string(),
1✔
437
            "null"
438
        );
439

440
        assert_eq!(
1✔
441
            lit(Scalar::struct_(
1✔
442
                DType::Struct(
1✔
443
                    StructFields::new(
1✔
444
                        FieldNames::from(["dog", "cat"]),
1✔
445
                        vec![
1✔
446
                            DType::Primitive(PType::U32, Nullability::NonNullable),
1✔
447
                            DType::Utf8(Nullability::NonNullable)
1✔
448
                        ],
1✔
449
                    ),
1✔
450
                    Nullability::NonNullable
1✔
451
                ),
1✔
452
                vec![Scalar::from(32_u32), Scalar::from("rufus".to_string())]
1✔
453
            ))
1✔
454
            .to_string(),
1✔
455
            "{dog: 32u32, cat: \"rufus\"}"
456
        );
457
    }
1✔
458
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc