• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16201397870

10 Jul 2025 05:05PM UTC coverage: 81.145% (+0.06%) from 81.084%
16201397870

push

github

web-flow
Remove var expression (#3829)

Fixes #3671 

* Removes the Var expression, leaving instead a `root()` expression for
resolving the scope root.
* The expression scope can hold context variables, useful for passing in
auth tokens for example, but not variables that would impact the
return_dtype of the expression.
* ScopeDType has therefore been removed, because the dtype of the scope
_is_ just the dtype of the root array.
* Simplifies some transformation / partitioning logic where vars no
longer need to be considered.

Signed-off-by: Nicholas Gates <nick@nickgates.com>
Co-authored-by: Will Manning <will@spiraldb.com>

164 of 175 new or added lines in 32 files covered. (93.71%)

18 existing lines in 6 files now uncovered.

45273 of 55793 relevant lines covered (81.14%)

146273.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.36
/vortex-expr/src/lib.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::any::Any;
5
use std::fmt::{Debug, Display, Formatter};
6
use std::hash::{Hash, Hasher};
7
use std::sync::Arc;
8

9
use dyn_eq::DynEq;
10
use dyn_hash::DynHash;
11
pub use exprs::*;
12
pub mod aliases;
13
mod analysis;
14
#[cfg(feature = "arbitrary")]
15
pub mod arbitrary;
16
mod encoding;
17
mod exprs;
18
mod field;
19
pub mod forms;
20
pub mod proto;
21
pub mod pruning;
22
mod registry;
23
mod scope;
24
mod scope_vars;
25
pub mod transform;
26
pub mod traversal;
27
mod vtable;
28

29
pub use analysis::*;
30
pub use between::*;
31
pub use binary::*;
32
pub use cast::*;
33
pub use encoding::*;
34
pub use get_item::*;
35
pub use is_null::*;
36
pub use like::*;
37
pub use list_contains::*;
38
pub use literal::*;
39
pub use merge::*;
40
pub use not::*;
41
pub use operators::*;
42
pub use pack::*;
43
pub use registry::*;
44
pub use root::*;
45
pub use scope::*;
46
pub use select::*;
47
use vortex_array::{Array, ArrayRef, SerializeMetadata};
48
use vortex_dtype::{DType, FieldName, FieldPath};
49
use vortex_error::{VortexExpect, VortexResult, VortexUnwrap, vortex_bail};
50
use vortex_utils::aliases::hash_set::HashSet;
51
pub use vtable::*;
52

53
use crate::traversal::{Node, ReferenceCollector};
54

55
pub trait IntoExpr {
56
    /// Convert this type into an expression reference.
57
    fn into_expr(self) -> ExprRef;
58
}
59

60
pub type ExprRef = Arc<dyn VortexExpr>;
61

62
/// Represents logical operation on [`ArrayRef`]s
63
pub trait VortexExpr:
64
    'static + Send + Sync + Debug + Display + DynEq + DynHash + private::Sealed + AnalysisExpr
65
{
66
    /// Convert expression reference to reference of [`Any`] type
67
    fn as_any(&self) -> &dyn Any;
68

69
    /// Convert the expression to an [`ExprRef`].
70
    fn to_expr(&self) -> ExprRef;
71

72
    /// Return the encoding of the expression.
73
    fn encoding(&self) -> ExprEncodingRef;
74

75
    /// Serialize the metadata of this expression into a bytes vector.
76
    ///
77
    /// Returns `None` if the expression does not support serialization.
78
    fn metadata(&self) -> Option<Vec<u8>> {
×
79
        None
×
80
    }
×
81

82
    /// Compute result of expression on given batch producing a new batch
83
    ///
84
    /// "Unchecked" means that this function lacks a debug assertion that the returned array matches
85
    /// the [VortexExpr::return_dtype] method. Use instead the [`\<dyn VortexExpr\>::evaluate`]
86
    /// function which includes such an assertion.
87
    fn unchecked_evaluate(&self, ctx: &Scope) -> VortexResult<ArrayRef>;
88

89
    /// Returns the children of this expression.
90
    fn children(&self) -> Vec<&ExprRef>;
91

92
    /// Returns a new instance of this expression with the children replaced.
93
    fn with_children(self: Arc<Self>, children: Vec<ExprRef>) -> VortexResult<ExprRef>;
94

95
    /// Compute the type of the array returned by [`\<dyn VortexExpr\>::evaluate`].
96
    fn return_dtype(&self, scope: &DType) -> VortexResult<DType>;
97
}
98

99
dyn_eq::eq_trait_object!(VortexExpr);
100
dyn_hash::hash_trait_object!(VortexExpr);
101

102
impl dyn VortexExpr + '_ {
103
    pub fn id(&self) -> ExprId {
11✔
104
        self.encoding().id()
11✔
105
    }
11✔
106

107
    pub fn is<V: VTable>(&self) -> bool {
340,522✔
108
        self.as_opt::<V>().is_some()
340,522✔
109
    }
340,522✔
110

111
    pub fn as_<V: VTable>(&self) -> &V::Expr {
×
112
        self.as_opt::<V>()
×
113
            .vortex_expect("Expr is not of the expected type")
×
114
    }
×
115

116
    pub fn as_opt<V: VTable>(&self) -> Option<&V::Expr> {
1,089,188✔
117
        VortexExpr::as_any(self)
1,089,188✔
118
            .downcast_ref::<ExprAdapter<V>>()
1,089,188✔
119
            .map(|e| &e.0)
1,089,188✔
120
    }
1,089,188✔
121

122
    /// Compute result of expression on given batch producing a new batch
123
    pub fn evaluate(&self, scope: &Scope) -> VortexResult<ArrayRef> {
11,090✔
124
        let result = self.unchecked_evaluate(scope)?;
11,090✔
125
        assert_eq!(
11,089✔
126
            result.dtype(),
11,089✔
127
            &self.return_dtype(scope.dtype())?,
11,089✔
128
            "Expression {} returned dtype {} but declared return_dtype of {}",
×
129
            self,
×
130
            result.dtype(),
×
NEW
131
            self.return_dtype(scope.dtype())?,
×
132
        );
133
        Ok(result)
11,089✔
134
    }
11,090✔
135
}
136

137
pub trait VortexExprExt {
138
    /// Accumulate all field references from this expression and its children in a set
139
    fn field_references(&self) -> HashSet<FieldName>;
140
}
141

142
impl VortexExprExt for ExprRef {
143
    fn field_references(&self) -> HashSet<FieldName> {
×
144
        let mut collector = ReferenceCollector::new();
×
145
        // The collector is infallible, so we can unwrap the result
×
146
        self.accept(&mut collector).vortex_unwrap();
×
147
        collector.into_fields()
×
148
    }
×
149
}
150

151
#[derive(Clone)]
152
#[repr(transparent)]
153
pub struct ExprAdapter<V: VTable>(V::Expr);
154

155
impl<V: VTable> VortexExpr for ExprAdapter<V> {
156
    fn as_any(&self) -> &dyn Any {
1,089,329✔
157
        self
1,089,329✔
158
    }
1,089,329✔
159

160
    fn to_expr(&self) -> ExprRef {
12,521✔
161
        Arc::new(ExprAdapter::<V>(self.0.clone()))
12,521✔
162
    }
12,521✔
163

164
    fn encoding(&self) -> ExprEncodingRef {
11✔
165
        V::encoding(&self.0)
11✔
166
    }
11✔
167

168
    fn metadata(&self) -> Option<Vec<u8>> {
11✔
169
        V::metadata(&self.0).map(|m| m.serialize())
11✔
170
    }
11✔
171

172
    fn unchecked_evaluate(&self, ctx: &Scope) -> VortexResult<ArrayRef> {
42,763✔
173
        V::evaluate(&self.0, ctx)
42,763✔
174
    }
42,763✔
175

176
    fn children(&self) -> Vec<&ExprRef> {
1,011,427✔
177
        V::children(&self.0)
1,011,427✔
178
    }
1,011,427✔
179

180
    fn with_children(self: Arc<Self>, children: Vec<ExprRef>) -> VortexResult<ExprRef> {
12,521✔
181
        if self.children().len() != children.len() {
12,521✔
182
            vortex_bail!(
×
183
                "Expected {} children, got {}",
×
184
                self.children().len(),
×
185
                children.len()
×
186
            );
×
187
        }
12,521✔
188
        Ok(V::with_children(&self.0, children)?.to_expr())
12,521✔
189
    }
12,521✔
190

191
    fn return_dtype(&self, scope: &DType) -> VortexResult<DType> {
82,194✔
192
        V::return_dtype(&self.0, scope)
82,194✔
193
    }
82,194✔
194
}
195

196
impl<V: VTable> Debug for ExprAdapter<V> {
197
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
×
198
        Debug::fmt(&self.0, f)
×
199
    }
×
200
}
201

202
impl<V: VTable> Display for ExprAdapter<V> {
203
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
72✔
204
        Display::fmt(&self.0, f)
72✔
205
    }
72✔
206
}
207

208
impl<V: VTable> PartialEq for ExprAdapter<V> {
209
    fn eq(&self, other: &Self) -> bool {
128,424✔
210
        PartialEq::eq(&self.0, &other.0)
128,424✔
211
    }
128,424✔
212
}
213

214
impl<V: VTable> Eq for ExprAdapter<V> {}
215

216
impl<V: VTable> Hash for ExprAdapter<V> {
217
    fn hash<H: Hasher>(&self, state: &mut H) {
193,875✔
218
        Hash::hash(&self.0, state);
193,875✔
219
    }
193,875✔
220
}
221

222
impl<V: VTable> AnalysisExpr for ExprAdapter<V> {
223
    fn stat_falsification(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
693✔
224
        <V::Expr as AnalysisExpr>::stat_falsification(&self.0, catalog)
693✔
225
    }
693✔
226

227
    fn max(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
819✔
228
        <V::Expr as AnalysisExpr>::max(&self.0, catalog)
819✔
229
    }
819✔
230

231
    fn min(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
819✔
232
        <V::Expr as AnalysisExpr>::min(&self.0, catalog)
819✔
233
    }
819✔
234

235
    fn field_path(&self) -> Option<FieldPath> {
21✔
236
        <V::Expr as AnalysisExpr>::field_path(&self.0)
21✔
237
    }
21✔
238
}
239

240
mod private {
241
    use super::*;
242

243
    pub trait Sealed {}
244

245
    impl<V: VTable> Sealed for ExprAdapter<V> {}
246
}
247

248
/// Splits top level and operations into separate expressions
249
pub fn split_conjunction(expr: &ExprRef) -> Vec<ExprRef> {
2✔
250
    let mut conjunctions = vec![];
2✔
251
    split_inner(expr, &mut conjunctions);
2✔
252
    conjunctions
2✔
253
}
2✔
254

255
fn split_inner(expr: &ExprRef, exprs: &mut Vec<ExprRef>) {
4✔
256
    match expr.as_opt::<BinaryVTable>() {
4✔
257
        Some(bexp) if bexp.op() == Operator::And => {
2✔
258
            split_inner(bexp.lhs(), exprs);
1✔
259
            split_inner(bexp.rhs(), exprs);
1✔
260
        }
1✔
261
        Some(_) | None => {
3✔
262
            exprs.push(expr.clone());
3✔
263
        }
3✔
264
    }
265
}
4✔
266

267
/// An expression wrapper that performs pointer equality.
268
#[derive(Clone)]
269
pub struct ExactExpr(pub ExprRef);
270

271
impl PartialEq for ExactExpr {
272
    fn eq(&self, other: &Self) -> bool {
9,468✔
273
        Arc::ptr_eq(&self.0, &other.0)
9,468✔
274
    }
9,468✔
275
}
276

277
impl Eq for ExactExpr {}
278

279
impl Hash for ExactExpr {
280
    fn hash<H: Hasher>(&self, state: &mut H) {
13,649✔
281
        Arc::as_ptr(&self.0).hash(state)
13,649✔
282
    }
13,649✔
283
}
284

285
#[cfg(feature = "test-harness")]
286
pub mod test_harness {
287

288
    use vortex_dtype::{DType, Nullability, PType, StructFields};
289

290
    pub fn struct_dtype() -> DType {
6✔
291
        DType::Struct(
6✔
292
            StructFields::new(
6✔
293
                ["a", "col1", "col2", "bool1", "bool2"].into(),
6✔
294
                vec![
6✔
295
                    DType::Primitive(PType::I32, Nullability::NonNullable),
6✔
296
                    DType::Primitive(PType::U16, Nullability::Nullable),
6✔
297
                    DType::Primitive(PType::U16, Nullability::Nullable),
6✔
298
                    DType::Bool(Nullability::NonNullable),
6✔
299
                    DType::Bool(Nullability::NonNullable),
6✔
300
                ],
6✔
301
            ),
6✔
302
            Nullability::NonNullable,
6✔
303
        )
6✔
304
    }
6✔
305
}
306

307
#[cfg(test)]
308
mod tests {
309
    use vortex_dtype::{DType, FieldNames, Nullability, PType, StructFields};
310
    use vortex_scalar::Scalar;
311

312
    use super::*;
313

314
    #[test]
315
    fn basic_expr_split_test() {
1✔
316
        let lhs = get_item("col1", root());
1✔
317
        let rhs = lit(1);
1✔
318
        let expr = eq(lhs, rhs);
1✔
319
        let conjunction = split_conjunction(&expr);
1✔
320
        assert_eq!(conjunction.len(), 1);
1✔
321
    }
1✔
322

323
    #[test]
324
    fn basic_conjunction_split_test() {
1✔
325
        let lhs = get_item("col1", root());
1✔
326
        let rhs = lit(1);
1✔
327
        let expr = and(lhs, rhs);
1✔
328
        let conjunction = split_conjunction(&expr);
1✔
329
        assert_eq!(conjunction.len(), 2, "Conjunction is {conjunction:?}");
1✔
330
    }
1✔
331

332
    #[test]
333
    fn expr_display() {
1✔
334
        assert_eq!(col("a").to_string(), "$.a");
1✔
335
        assert_eq!(root().to_string(), "$");
1✔
336

337
        let col1: Arc<dyn VortexExpr> = col("col1");
1✔
338
        let col2: Arc<dyn VortexExpr> = col("col2");
1✔
339
        assert_eq!(
1✔
340
            and(col1.clone(), col2.clone()).to_string(),
1✔
341
            "($.col1 and $.col2)"
1✔
342
        );
1✔
343
        assert_eq!(
1✔
344
            or(col1.clone(), col2.clone()).to_string(),
1✔
345
            "($.col1 or $.col2)"
1✔
346
        );
1✔
347
        assert_eq!(
1✔
348
            eq(col1.clone(), col2.clone()).to_string(),
1✔
349
            "($.col1 = $.col2)"
1✔
350
        );
1✔
351
        assert_eq!(
1✔
352
            not_eq(col1.clone(), col2.clone()).to_string(),
1✔
353
            "($.col1 != $.col2)"
1✔
354
        );
1✔
355
        assert_eq!(
1✔
356
            gt(col1.clone(), col2.clone()).to_string(),
1✔
357
            "($.col1 > $.col2)"
1✔
358
        );
1✔
359
        assert_eq!(
1✔
360
            gt_eq(col1.clone(), col2.clone()).to_string(),
1✔
361
            "($.col1 >= $.col2)"
1✔
362
        );
1✔
363
        assert_eq!(
1✔
364
            lt(col1.clone(), col2.clone()).to_string(),
1✔
365
            "($.col1 < $.col2)"
1✔
366
        );
1✔
367
        assert_eq!(
1✔
368
            lt_eq(col1.clone(), col2.clone()).to_string(),
1✔
369
            "($.col1 <= $.col2)"
1✔
370
        );
1✔
371

372
        assert_eq!(
1✔
373
            or(
1✔
374
                lt(col1.clone(), col2.clone()),
1✔
375
                not_eq(col1.clone(), col2.clone()),
1✔
376
            )
1✔
377
            .to_string(),
1✔
378
            "(($.col1 < $.col2) or ($.col1 != $.col2))"
1✔
379
        );
1✔
380

381
        assert_eq!(not(col1.clone()).to_string(), "!$.col1");
1✔
382

383
        assert_eq!(
1✔
384
            select(vec![FieldName::from("col1")], root()).to_string(),
1✔
385
            "${col1}"
1✔
386
        );
1✔
387
        assert_eq!(
1✔
388
            select(
1✔
389
                vec![FieldName::from("col1"), FieldName::from("col2")],
1✔
390
                root()
1✔
391
            )
1✔
392
            .to_string(),
1✔
393
            "${col1, col2}"
1✔
394
        );
1✔
395
        assert_eq!(
1✔
396
            select_exclude(
1✔
397
                vec![FieldName::from("col1"), FieldName::from("col2")],
1✔
398
                root()
1✔
399
            )
1✔
400
            .to_string(),
1✔
401
            "$~{col1, col2}"
1✔
402
        );
1✔
403

404
        assert_eq!(lit(Scalar::from(0u8)).to_string(), "0u8");
1✔
405
        assert_eq!(lit(Scalar::from(0.0f32)).to_string(), "0f32");
1✔
406
        assert_eq!(
1✔
407
            lit(Scalar::from(i64::MAX)).to_string(),
1✔
408
            "9223372036854775807i64"
1✔
409
        );
1✔
410
        assert_eq!(lit(Scalar::from(true)).to_string(), "true");
1✔
411
        assert_eq!(
1✔
412
            lit(Scalar::null(DType::Bool(Nullability::Nullable))).to_string(),
1✔
413
            "null"
1✔
414
        );
1✔
415

416
        assert_eq!(
1✔
417
            lit(Scalar::struct_(
1✔
418
                DType::Struct(
1✔
419
                    StructFields::new(
1✔
420
                        FieldNames::from(["dog", "cat"]),
1✔
421
                        vec![
1✔
422
                            DType::Primitive(PType::U32, Nullability::NonNullable),
1✔
423
                            DType::Utf8(Nullability::NonNullable)
1✔
424
                        ],
1✔
425
                    ),
1✔
426
                    Nullability::NonNullable
1✔
427
                ),
1✔
428
                vec![Scalar::from(32_u32), Scalar::from("rufus".to_string())]
1✔
429
            ))
1✔
430
            .to_string(),
1✔
431
            "{dog: 32u32, cat: \"rufus\"}"
1✔
432
        );
1✔
433
    }
1✔
434
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc