• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16843913118

09 Aug 2025 01:37AM UTC coverage: 86.313% (+0.08%) from 86.234%
16843913118

Pull #4180

github

web-flow
Merge 8fc67e235 into de5f71d7b
Pull Request #4180: feat: Support more datafusion features

236 of 257 new or added lines in 8 files covered. (91.83%)

1 existing line in 1 file now uncovered.

53667 of 62177 relevant lines covered (86.31%)

545791.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.86
/vortex-datafusion/src/convert/exprs.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::sync::Arc;
5

6
use datafusion::arrow::datatypes::{DataType, Schema};
7
use datafusion::logical_expr::Operator as DFOperator;
8
use datafusion::physical_expr::{PhysicalExpr, PhysicalExprRef, expressions};
9
use vortex::error::{VortexResult, vortex_bail, vortex_err};
10
use vortex::expr::{ExprRef, Operator, and};
11
use vortex::scalar::Scalar;
12

13
use crate::convert::{FromDataFusion, TryFromDataFusion};
14

15
const SUPPORTED_BINARY_OPS: &[DFOperator] = &[
16
    DFOperator::Eq,
17
    DFOperator::NotEq,
18
    DFOperator::Gt,
19
    DFOperator::GtEq,
20
    DFOperator::Lt,
21
    DFOperator::LtEq,
22
    DFOperator::Plus,
23
    DFOperator::Minus,
24
];
25

26
// If we cannot convert an expr to a vortex expr, we run no filter, since datafusion
27
// will rerun the filter expression anyway.
28
pub(crate) fn make_vortex_predicate(predicate: &[&Arc<dyn PhysicalExpr>]) -> Option<ExprRef> {
282✔
29
    // This splits expressions into conjunctions and converts them to vortex expressions.
30
    // Any inconvertible expressions are dropped since true /\ a == a.
31
    predicate
282✔
32
        .iter()
282✔
33
        .filter_map(|e| ExprRef::try_from_df(e.as_ref()).ok())
282✔
34
        .reduce(and)
282✔
35
}
282✔
36

37
// TODO(joe): Don't return an error when we have an unsupported node, bubble up "TRUE" as in keep
38
//  for that node, up to any `and` or `or` node.
39
impl TryFromDataFusion<dyn PhysicalExpr> for ExprRef {
40
    fn try_from_df(df: &dyn PhysicalExpr) -> VortexResult<Self> {
2,326✔
41
        use vortex::expr::{BinaryExpr, ExprRef, IntoExpr, LikeExpr, get_item, lit, root};
42

43
        if let Some(binary_expr) = df.as_any().downcast_ref::<expressions::BinaryExpr>() {
2,326✔
44
            let left = ExprRef::try_from_df(binary_expr.left().as_ref())?;
1,010✔
45
            let right = ExprRef::try_from_df(binary_expr.right().as_ref())?;
1,010✔
46
            let operator = Operator::try_from_df(binary_expr.op())?;
1,010✔
47

48
            return Ok(BinaryExpr::new_expr(left, operator, right));
1,010✔
49
        }
1,316✔
50

51
        if let Some(col_expr) = df.as_any().downcast_ref::<expressions::Column>() {
1,316✔
52
            return Ok(get_item(col_expr.name().to_owned(), root()));
730✔
53
        }
586✔
54

55
        if let Some(like) = df.as_any().downcast_ref::<expressions::LikeExpr>() {
586✔
56
            let child = ExprRef::try_from_df(like.expr().as_ref())?;
12✔
57
            let pattern = ExprRef::try_from_df(like.pattern().as_ref())?;
12✔
58
            return Ok(
12✔
59
                LikeExpr::new(child, pattern, like.negated(), like.case_insensitive()).into_expr(),
12✔
60
            );
12✔
61
        }
574✔
62

63
        if let Some(literal) = df.as_any().downcast_ref::<expressions::Literal>() {
574✔
64
            let value = Scalar::from_df(literal.value());
574✔
65
            return Ok(lit(value));
574✔
66
        }
×
67

68
        vortex_bail!("Couldn't convert DataFusion physical {df} expression to a vortex expression")
×
69
    }
2,326✔
70
}
71

72
impl TryFromDataFusion<DFOperator> for Operator {
73
    fn try_from_df(value: &DFOperator) -> VortexResult<Self> {
1,010✔
74
        match value {
1,010✔
75
            DFOperator::Eq => Ok(Operator::Eq),
134✔
76
            DFOperator::NotEq => Ok(Operator::NotEq),
2✔
77
            DFOperator::Lt => Ok(Operator::Lt),
138✔
78
            DFOperator::LtEq => Ok(Operator::Lte),
98✔
79
            DFOperator::Gt => Ok(Operator::Gt),
82✔
80
            DFOperator::GtEq => Ok(Operator::Gte),
186✔
81
            DFOperator::And => Ok(Operator::And),
302✔
82
            DFOperator::Or => Ok(Operator::Or),
68✔
NEW
83
            DFOperator::Plus => Ok(Operator::Add),
×
NEW
84
            DFOperator::Minus => Ok(Operator::Sub),
×
85
            DFOperator::IsDistinctFrom
86
            | DFOperator::IsNotDistinctFrom
87
            | DFOperator::RegexMatch
88
            | DFOperator::RegexIMatch
89
            | DFOperator::RegexNotMatch
90
            | DFOperator::RegexNotIMatch
91
            | DFOperator::LikeMatch
92
            | DFOperator::ILikeMatch
93
            | DFOperator::NotLikeMatch
94
            | DFOperator::NotILikeMatch
95
            | DFOperator::BitwiseAnd
96
            | DFOperator::BitwiseOr
97
            | DFOperator::BitwiseXor
98
            | DFOperator::BitwiseShiftRight
99
            | DFOperator::BitwiseShiftLeft
100
            | DFOperator::StringConcat
101
            | DFOperator::AtArrow
102
            | DFOperator::ArrowAt
103
            | DFOperator::Multiply
104
            | DFOperator::Divide
105
            | DFOperator::Modulo
106
            | DFOperator::Arrow
107
            | DFOperator::LongArrow
108
            | DFOperator::HashArrow
109
            | DFOperator::HashLongArrow
110
            | DFOperator::AtAt
111
            | DFOperator::IntegerDivide
112
            | DFOperator::HashMinus
113
            | DFOperator::AtQuestion
114
            | DFOperator::Question
115
            | DFOperator::QuestionAnd
116
            | DFOperator::QuestionPipe => {
117
                Err(vortex_err!("Unsupported datafusion operator {value}"))
×
118
            }
119
        }
120
    }
1,010✔
121
}
122

123
pub(crate) fn can_be_pushed_down(expr: &PhysicalExprRef, schema: &Schema) -> bool {
472✔
124
    use datafusion::physical_plan::expressions::{BinaryExpr, Column, LikeExpr, Literal};
125

126
    let expr = expr.as_any();
472✔
127
    if let Some(binary) = expr.downcast_ref::<BinaryExpr>() {
472✔
128
        (binary.op().is_logic_operator() || SUPPORTED_BINARY_OPS.contains(binary.op()))
160✔
129
            && can_be_pushed_down(binary.left(), schema)
160✔
130
            && can_be_pushed_down(binary.right(), schema)
154✔
131
    } else if let Some(col) = expr.downcast_ref::<Column>() {
312✔
132
        let field = schema.field(col.index());
156✔
133
        supported_data_types(field.data_type())
156✔
134
    } else if let Some(like) = expr.downcast_ref::<LikeExpr>() {
156✔
135
        can_be_pushed_down(like.expr(), schema) && can_be_pushed_down(like.pattern(), schema)
12✔
136
    } else if let Some(lit) = expr.downcast_ref::<Literal>() {
144✔
137
        supported_data_types(&lit.value().data_type())
136✔
138
    } else {
139
        log::debug!("DataFusion expression can't be pushed down: {expr:?}");
8✔
140
        false
8✔
141
    }
142
}
472✔
143

144
fn supported_data_types(dt: &DataType) -> bool {
292✔
145
    use DataType::*;
146
    let is_supported = dt.is_null()
292✔
147
        || dt.is_numeric()
292✔
NEW
148
        || matches!(
×
149
            dt,
180✔
150
            Boolean
151
                | Utf8
152
                | Utf8View
153
                | Binary
154
                | BinaryView
155
                | Date32
156
                | Date64
157
                | Timestamp(_, _)
158
                | Time32(_)
159
                | Time64(_)
160
        );
161

162
    if !is_supported {
292✔
NEW
163
        log::debug!("DataFusion data type {dt:?} is not supported");
×
164
    }
292✔
165

166
    is_supported
292✔
167
}
292✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc