• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16968296061

14 Aug 2025 02:36PM UTC coverage: 85.384%. First build
16968296061

Pull #4180

github

web-flow
Merge 4f1b98ecb into d1c2d9c66
Pull Request #4180: feat: Support more datafusion features

189 of 259 new or added lines in 6 files covered. (72.97%)

55334 of 64806 relevant lines covered (85.38%)

489107.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.68
/vortex-datafusion/src/convert/exprs.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::sync::Arc;
5

6
use datafusion::arrow::datatypes::{DataType, Schema};
7
use datafusion::logical_expr::Operator as DFOperator;
8
use datafusion::physical_expr::{PhysicalExpr, PhysicalExprRef, expressions};
9
use vortex::error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
10
use vortex::expr::{ExprRef, Operator, and};
11
use vortex::scalar::Scalar;
12

13
use crate::convert::{FromDataFusion, TryFromDataFusion};
14

15
const SUPPORTED_BINARY_OPS: &[DFOperator] = &[
16
    DFOperator::Eq,
17
    DFOperator::NotEq,
18
    DFOperator::Gt,
19
    DFOperator::GtEq,
20
    DFOperator::Lt,
21
    DFOperator::LtEq,
22
];
23

24
// If we cannot convert an expr to a vortex expr, we run no filter, since datafusion
25
// will rerun the filter expression anyway.
26
pub(crate) fn make_vortex_predicate(predicate: &[&Arc<dyn PhysicalExpr>]) -> Option<ExprRef> {
5✔
27
    // This splits expressions into conjunctions and converts them to vortex expressions.
28
    // Any inconvertible expressions are dropped since true /\ a == a.
29
    predicate
5✔
30
        .iter()
5✔
31
        .filter_map(|e| ExprRef::try_from_df(e.as_ref()).ok())
5✔
32
        .reduce(and)
5✔
33
}
5✔
34

35
// TODO(joe): Don't return an error when we have an unsupported node, bubble up "TRUE" as in keep
36
//  for that node, up to any `and` or `or` node.
37
impl TryFromDataFusion<dyn PhysicalExpr> for ExprRef {
38
    fn try_from_df(df: &dyn PhysicalExpr) -> VortexResult<Self> {
15✔
39
        use vortex::expr::{BinaryExpr, ExprRef, IntoExpr, LikeExpr, get_item, lit, root};
40

41
        if let Some(binary_expr) = df.as_any().downcast_ref::<expressions::BinaryExpr>() {
15✔
42
            let left = ExprRef::try_from_df(binary_expr.left().as_ref())?;
5✔
43
            let right = ExprRef::try_from_df(binary_expr.right().as_ref())?;
5✔
44
            let operator = Operator::try_from_df(binary_expr.op())?;
5✔
45

46
            return Ok(BinaryExpr::new_expr(left, operator, right));
5✔
47
        }
10✔
48

49
        if let Some(col_expr) = df.as_any().downcast_ref::<expressions::Column>() {
10✔
50
            return Ok(get_item(col_expr.name().to_owned(), root()));
3✔
51
        }
7✔
52

53
        if let Some(like) = df.as_any().downcast_ref::<expressions::LikeExpr>() {
7✔
54
            let child = ExprRef::try_from_df(like.expr().as_ref())?;
×
55
            let pattern = ExprRef::try_from_df(like.pattern().as_ref())?;
×
56
            return Ok(
×
57
                LikeExpr::new(child, pattern, like.negated(), like.case_insensitive()).into_expr(),
×
58
            );
×
59
        }
7✔
60

61
        if let Some(literal) = df.as_any().downcast_ref::<expressions::Literal>() {
7✔
62
            let value = Scalar::from_df(literal.value());
7✔
63
            return Ok(lit(value));
7✔
64
        }
×
65

66
        vortex_bail!("Couldn't convert DataFusion physical {df} expression to a vortex expression")
×
67
    }
15✔
68
}
69

70
impl TryFromDataFusion<DFOperator> for Operator {
71
    fn try_from_df(value: &DFOperator) -> VortexResult<Self> {
5✔
72
        match value {
5✔
73
            DFOperator::Eq => Ok(Operator::Eq),
2✔
74
            DFOperator::NotEq => Ok(Operator::NotEq),
×
75
            DFOperator::Lt => Ok(Operator::Lt),
3✔
76
            DFOperator::LtEq => Ok(Operator::Lte),
×
77
            DFOperator::Gt => Ok(Operator::Gt),
×
78
            DFOperator::GtEq => Ok(Operator::Gte),
×
79
            DFOperator::And => Ok(Operator::And),
×
80
            DFOperator::Or => Ok(Operator::Or),
×
81
            DFOperator::IsDistinctFrom
82
            | DFOperator::IsNotDistinctFrom
83
            | DFOperator::RegexMatch
84
            | DFOperator::RegexIMatch
85
            | DFOperator::RegexNotMatch
86
            | DFOperator::RegexNotIMatch
87
            | DFOperator::LikeMatch
88
            | DFOperator::ILikeMatch
89
            | DFOperator::NotLikeMatch
90
            | DFOperator::NotILikeMatch
91
            | DFOperator::BitwiseAnd
92
            | DFOperator::BitwiseOr
93
            | DFOperator::BitwiseXor
94
            | DFOperator::BitwiseShiftRight
95
            | DFOperator::BitwiseShiftLeft
96
            | DFOperator::StringConcat
97
            | DFOperator::AtArrow
98
            | DFOperator::ArrowAt
99
            | DFOperator::Plus
100
            | DFOperator::Minus
101
            | DFOperator::Multiply
102
            | DFOperator::Divide
103
            | DFOperator::Modulo
104
            | DFOperator::Arrow
105
            | DFOperator::LongArrow
106
            | DFOperator::HashArrow
107
            | DFOperator::HashLongArrow
108
            | DFOperator::AtAt
109
            | DFOperator::IntegerDivide
110
            | DFOperator::HashMinus
111
            | DFOperator::AtQuestion
112
            | DFOperator::Question
113
            | DFOperator::QuestionAnd
114
            | DFOperator::QuestionPipe => {
115
                Err(vortex_err!("Unsupported datafusion operator {value}"))
×
116
            }
117
        }
118
    }
5✔
119
}
120

121
pub(crate) fn can_be_pushed_down(expr: &PhysicalExprRef, schema: &Schema) -> bool {
22✔
122
    use datafusion::physical_plan::expressions::{BinaryExpr, Column, LikeExpr, Literal};
123

124
    let expr = expr.as_any();
22✔
125
    if let Some(binary) = expr.downcast_ref::<BinaryExpr>() {
22✔
126
        (binary.op().is_logic_operator() || SUPPORTED_BINARY_OPS.contains(binary.op()))
8✔
127
            && can_be_pushed_down(binary.left(), schema)
8✔
128
            && can_be_pushed_down(binary.right(), schema)
6✔
129
            && binary
6✔
130
                .left()
6✔
131
                .data_type(schema)
6✔
132
                .ok()
6✔
133
                .vortex_expect("never fails")
6✔
134
                == binary
6✔
135
                    .right()
6✔
136
                    .data_type(schema)
6✔
137
                    .ok()
6✔
138
                    .vortex_expect("never fails")
6✔
139
    } else if let Some(col) = expr.downcast_ref::<Column>() {
14✔
140
        schema
6✔
141
            .field_with_name(col.name())
6✔
142
            .ok()
6✔
143
            .is_some_and(|field| supported_data_types(field.data_type()))
6✔
144
    } else if let Some(like) = expr.downcast_ref::<LikeExpr>() {
8✔
NEW
145
        can_be_pushed_down(like.expr(), schema) && can_be_pushed_down(like.pattern(), schema)
×
146
    } else if let Some(lit) = expr.downcast_ref::<Literal>() {
8✔
147
        supported_data_types(&lit.value().data_type())
8✔
148
    } else {
NEW
149
        log::debug!("DataFusion expression can't be pushed down: {expr:?}");
×
NEW
150
        false
×
151
    }
152
}
22✔
153

154
fn supported_data_types(dt: &DataType) -> bool {
12✔
155
    use DataType::*;
156
    let is_supported = dt.is_null()
12✔
157
        || dt.is_numeric()
12✔
NEW
158
        || matches!(
×
NEW
159
            dt,
×
160
            Boolean
161
                | Utf8
162
                | Utf8View
163
                | Binary
164
                | BinaryView
165
                | Date32
166
                | Date64
167
                | Timestamp(_, _)
168
                | Time32(_)
169
                | Time64(_)
170
        );
171

172
    if !is_supported {
12✔
NEW
173
        log::debug!("DataFusion data type {dt:?} is not supported");
×
174
    }
12✔
175

176
    is_supported
12✔
177
}
12✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc