• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16398874017

20 Jul 2025 10:24AM UTC coverage: 81.523% (+0.01%) from 81.512%
16398874017

push

github

web-flow
Add cardinality callback to DuckDB TableFunction (#3941)

This helps DuckDB to produce far better joins

Signed-off-by: Nicholas Gates <nick@nickgates.com>

24 of 28 new or added lines in 3 files covered. (85.71%)

2 existing lines in 1 file now uncovered.

42047 of 51577 relevant lines covered (81.52%)

171533.82 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.47
/vortex-duckdb/src/duckdb/table_function/mod.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::ffi::{CStr, CString, c_void};
5
use std::fmt::Debug;
6
use std::ptr;
7

8
use vortex::error::{VortexExpect, VortexResult};
9
mod bind;
10
mod cardinality;
11
mod init;
12
mod pushdown_complex_filter;
13

14
pub use bind::*;
15
pub use init::*;
16

17
use crate::duckdb::LogicalType;
18
use crate::duckdb::connection::Connection;
19
use crate::duckdb::data_chunk::DataChunk;
20
use crate::duckdb::expr::Expression;
21
use crate::duckdb::table_function::cardinality::cardinality_callback;
22
use crate::duckdb::table_function::pushdown_complex_filter::pushdown_complex_filter_callback;
23
use crate::{cpp, duckdb_try};
24

25
/// A trait that defines the supported operations for a table function in DuckDB.
26
///
27
/// This trait does not yet cover the full C++ API, see table_function.hpp.
28
pub trait TableFunction: Sized + Debug {
29
    type BindData: Send + Clone;
30
    type GlobalState: Send + Sync;
31
    type LocalState;
32

33
    /// Whether the table function supports projection pushdown.
34
    /// If not supported a projection will be added that filters out unused columns.
35
    const PROJECTION_PUSHDOWN: bool = false;
36

37
    /// Whether the table function supports filter pushdown.
38
    /// If not supported a filter will be added that applies the table filter directly.
39
    const FILTER_PUSHDOWN: bool = false;
40

41
    /// Whether the table function can immediately prune out filter columns that are unused
42
    /// in the remainder of the query plan.
43
    /// e.g. "SELECT i FROM tbl WHERE j = 42;"
44
    ///   - j does not need to leave the table function at all.
45
    const FILTER_PRUNE: bool = false;
46

47
    /// Returns the parameters of the table function.
48
    fn parameters() -> Vec<LogicalType> {
×
49
        // By default, we don't have any parameters.
50
        vec![]
×
51
    }
×
52

53
    /// Returns the named parameters of the table function, if any.
54
    fn named_parameters() -> Vec<(CString, LogicalType)> {
36✔
55
        // By default, we don't have any named parameters.
56
        vec![]
36✔
57
    }
36✔
58

59
    /// This function is used for determining the schema of a table producing function and
60
    /// returning bind data.
61
    fn bind(input: &BindInput, result: &mut BindResult) -> VortexResult<Self::BindData>;
62

63
    /// The function is called during query execution and is responsible for producing the output
64
    fn scan(
65
        bind_data: &Self::BindData,
66
        init_local: &mut Self::LocalState,
67
        init_global: &mut Self::GlobalState,
68
        chunk: &mut DataChunk,
69
    ) -> VortexResult<()>;
70

71
    /// Initialize the global operator state of the function.
72
    ///
73
    /// The global operator state is used to keep track of the progress in the table function and
74
    /// is shared between all threads working on the table function.
75
    fn init_global(input: &TableInitInput<Self>) -> VortexResult<Self::GlobalState>;
76

77
    /// Initialize the local operator state of the function.
78
    ///
79
    /// The local operator state is used to keep track of the progress in the table function and
80
    /// is thread-local.
81
    fn init_local(
82
        init: &TableInitInput<Self>,
83
        global: &mut Self::GlobalState,
84
    ) -> VortexResult<Self::LocalState>;
85

86
    /// Pushes down a filter expression to the table function.
87
    ///
88
    /// Returns `true` if the filter was successfully pushed down (and stored on the bind data),
89
    /// or `false` if the filter could not be pushed down. In which case, the filter will be
90
    /// applied later in the query plan.
91
    fn pushdown_complex_filter(
×
92
        _bind_data: &mut Self::BindData,
×
93
        _expr: &Expression,
×
94
    ) -> VortexResult<bool> {
×
95
        Ok(false)
×
96
    }
×
97

98
    /// Returns the cardinality estimate of the table function.
NEW
99
    fn cardinality(_bind_data: &Self::BindData) -> Cardinality {
×
NEW
100
        Cardinality::Unknown
×
NEW
101
    }
×
102

103
    // TODO(ngates): there are many more callbacks that can be configured.
104
}
105

106
pub enum Cardinality {
107
    /// Completely unknown cardinality.
108
    Unknown,
109
    /// An estimate of the number of rows that will be returned by the table function.
110
    Estimate(u64),
111
    /// Will not return more than this number of rows.
112
    Maximum(u64),
113
}
114

115
impl Connection {
116
    pub fn register_table_function<T: TableFunction>(&self, name: &CStr) -> VortexResult<()> {
36✔
117
        // Set up the parameters.
118
        let parameters = T::parameters();
36✔
119
        let parameter_ptrs = parameters
36✔
120
            .iter()
36✔
121
            .map(|logical_type| logical_type.as_ptr())
36✔
122
            .collect::<Vec<_>>();
36✔
123

124
        let param_names = T::named_parameters();
36✔
125
        let (param_names_ptrs, param_types_ptr) = param_names
36✔
126
            .into_iter()
36✔
127
            .map(|(name, logical_type)| (name.as_ptr(), logical_type.as_ptr()))
36✔
128
            .unzip::<_, _, Vec<_>, Vec<_>>();
36✔
129

130
        let vtab = cpp::duckdb_vx_tfunc_vtab_t {
36✔
131
            name: name.as_ptr(),
36✔
132
            parameters: parameter_ptrs.as_ptr(),
36✔
133
            parameter_count: parameters.len() as _,
36✔
134
            named_parameter_names: param_names_ptrs.as_ptr(),
36✔
135
            named_parameter_types: param_types_ptr.as_ptr(),
36✔
136
            named_parameter_count: param_names_ptrs.len() as _,
36✔
137
            bind: Some(bind_callback::<T>),
36✔
138
            bind_data_clone: Some(bind_data_clone_callback::<T>),
36✔
139
            init_global: Some(init_global_callback::<T>),
36✔
140
            init_local: Some(init_local_callback::<T>),
36✔
141
            function: Some(function::<T>),
36✔
142
            statistics: ptr::null_mut::<c_void>(),
36✔
143
            cardinality: Some(cardinality_callback::<T>),
36✔
144
            pushdown_complex_filter: Some(pushdown_complex_filter_callback::<T>),
36✔
145
            pushdown_expression: ptr::null_mut::<c_void>(),
36✔
146
            table_scan_progress: ptr::null_mut::<c_void>(),
36✔
147
            projection_pushdown: T::PROJECTION_PUSHDOWN,
36✔
148
            filter_pushdown: T::FILTER_PUSHDOWN,
36✔
149
            filter_prune: T::FILTER_PRUNE,
36✔
150
            sampling_pushdown: false,
36✔
151
            late_materialization: false,
36✔
152
        };
36✔
153

154
        duckdb_try!(
36✔
155
            unsafe { cpp::duckdb_vx_tfunc_register(self.as_ptr(), &raw const vtab) },
36✔
156
            "Failed to register table function '{}'",
×
157
            name.to_string_lossy()
×
158
        );
159

160
        Ok(())
36✔
161
    }
36✔
162
}
163

164
/// The native function callback for a table function.
165
unsafe extern "C" fn function<T: TableFunction>(
10,666✔
166
    bind_data: *const c_void,
10,666✔
167
    global_init_data: *mut c_void,
10,666✔
168
    local_init_data: *mut c_void,
10,666✔
169
    output: cpp::duckdb_data_chunk,
10,666✔
170
    error_out: *mut cpp::duckdb_vx_error,
10,666✔
171
) {
10,666✔
172
    let bind_data = unsafe { &*(bind_data as *const T::BindData) };
10,666✔
173
    let global_init_data = unsafe { global_init_data.cast::<T::GlobalState>().as_mut() }
10,666✔
174
        .vortex_expect("global_init_data null pointer");
10,666✔
175
    let local_init_data = unsafe { local_init_data.cast::<T::LocalState>().as_mut() }
10,666✔
176
        .vortex_expect("local_init_data null pointer");
10,666✔
177
    let mut data_chunk = unsafe { DataChunk::borrow(output) };
10,666✔
178

179
    match T::scan(
10,666✔
180
        bind_data,
10,666✔
181
        local_init_data,
10,666✔
182
        global_init_data,
10,666✔
183
        &mut data_chunk,
10,666✔
184
    ) {
10,666✔
185
        Ok(()) => {
10,666✔
186
            // The data chunk is already filled by the function.
10,666✔
187
            // No need to do anything here.
10,666✔
188
        }
10,666✔
189
        Err(e) => unsafe {
×
190
            error_out.write(cpp::duckdb_vx_error_create(
×
191
                e.to_string().as_ptr().cast(),
×
192
                e.to_string().len(),
×
193
            ));
×
194
        },
×
195
    }
196
}
10,666✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc