• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 17044824718

18 Aug 2025 03:19PM UTC coverage: 87.851% (-0.06%) from 87.914%
17044824718

Pull #4272

github

web-flow
Merge e484357ff into 41301df7b
Pull Request #4272: chore[vortex-duckdb]: `explain` custom display

2 of 48 new or added lines in 2 files covered. (4.17%)

30 existing lines in 2 files now uncovered.

56586 of 64411 relevant lines covered (87.85%)

628422.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

63.64
/vortex-duckdb/src/duckdb/table_function/mod.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::ffi::{CStr, CString, c_void};
5
use std::fmt::Debug;
6
use std::ptr;
7

8
use vortex::error::{VortexExpect, VortexResult};
9
mod bind;
10
mod cardinality;
11
mod init;
12
mod partition;
13
mod pushdown_complex_filter;
14

15
pub use bind::*;
16
pub use init::*;
17

18
use crate::cpp::duckdb_vx_client_context;
19
use crate::duckdb::LogicalType;
20
use crate::duckdb::client_context::ClientContext;
21
use crate::duckdb::connection::Connection;
22
use crate::duckdb::data_chunk::DataChunk;
23
use crate::duckdb::expr::Expression;
24
use crate::duckdb::table_function::cardinality::cardinality_callback;
25
use crate::duckdb::table_function::partition::get_partition_data_callback;
26
use crate::duckdb::table_function::pushdown_complex_filter::pushdown_complex_filter_callback;
27
use crate::{cpp, duckdb_try};
28

29
/// A trait that defines the supported operations for a table function in DuckDB.
30
///
31
/// This trait does not yet cover the full C++ API, see table_function.hpp.
32
pub trait TableFunction: Sized + Debug {
33
    type BindData: Send + Clone;
34
    type GlobalState: Send + Sync;
35
    type LocalState;
36

37
    /// Whether the table function supports projection pushdown.
38
    /// If not supported a projection will be added that filters out unused columns.
39
    const PROJECTION_PUSHDOWN: bool = false;
40

41
    /// Whether the table function supports filter pushdown.
42
    /// If not supported a filter will be added that applies the table filter directly.
43
    const FILTER_PUSHDOWN: bool = false;
44

45
    /// Whether the table function can immediately prune out filter columns that are unused
46
    /// in the remainder of the query plan.
47
    /// e.g. "SELECT i FROM tbl WHERE j = 42;"
48
    ///   - j does not need to leave the table function at all.
49
    const FILTER_PRUNE: bool = false;
50

51
    /// Returns the parameters of the table function.
52
    fn parameters() -> Vec<LogicalType> {
1✔
53
        // By default, we don't have any parameters.
54
        vec![]
1✔
55
    }
1✔
56

57
    /// Returns the named parameters of the table function, if any.
58
    fn named_parameters() -> Vec<(CString, LogicalType)> {
45✔
59
        // By default, we don't have any named parameters.
60
        vec![]
45✔
61
    }
45✔
62

63
    /// This function is used for determining the schema of a table producing function and
64
    /// returning bind data.
65
    fn bind(
66
        client_context: &ClientContext,
67
        input: &BindInput,
68
        result: &mut BindResult,
69
    ) -> VortexResult<Self::BindData>;
70

71
    /// The function is called during query execution and is responsible for producing the output
72
    fn scan(
73
        client_context: &ClientContext,
74
        bind_data: &Self::BindData,
75
        init_local: &mut Self::LocalState,
76
        init_global: &mut Self::GlobalState,
77
        chunk: &mut DataChunk,
78
    ) -> VortexResult<()>;
79

80
    /// Initialize the global operator state of the function.
81
    ///
82
    /// The global operator state is used to keep track of the progress in the table function and
83
    /// is shared between all threads working on the table function.
84
    fn init_global(input: &TableInitInput<Self>) -> VortexResult<Self::GlobalState>;
85

86
    /// Initialize the local operator state of the function.
87
    ///
88
    /// The local operator state is used to keep track of the progress in the table function and
89
    /// is thread-local.
90
    fn init_local(
91
        init: &TableInitInput<Self>,
92
        global: &mut Self::GlobalState,
93
    ) -> VortexResult<Self::LocalState>;
94

95
    /// Pushes down a filter expression to the table function.
96
    ///
97
    /// Returns `true` if the filter was successfully pushed down (and stored on the bind data),
98
    /// or `false` if the filter could not be pushed down. In which case, the filter will be
99
    /// applied later in the query plan.
100
    fn pushdown_complex_filter(
×
101
        _bind_data: &mut Self::BindData,
×
102
        _expr: &Expression,
×
103
    ) -> VortexResult<bool> {
×
104
        Ok(false)
×
105
    }
×
106

107
    /// Returns the cardinality estimate of the table function.
108
    fn cardinality(_bind_data: &Self::BindData) -> Cardinality {
2✔
109
        Cardinality::Unknown
2✔
110
    }
2✔
111

112
    /// Returns the idx of the current partition being processed by a local threa.
113
    /// This *must* be globally unique.
114
    fn partition_data(
115
        _bind_data: &Self::BindData,
116
        _global_init_data: &mut Self::GlobalState,
117
        _local_init_data: &mut Self::LocalState,
118
    ) -> VortexResult<u64>;
119

120
    /// Returns a string representation for EXPLAIN output
NEW
121
    fn to_string(_bind_data: &Self::BindData) -> Option<String> {
×
NEW
122
        None
×
NEW
123
    }
×
124

125
    // TODO(ngates): there are many more callbacks that can be configured.
126
}
127

128
pub enum Cardinality {
129
    /// Completely unknown cardinality.
130
    Unknown,
131
    /// An estimate of the number of rows that will be returned by the table function.
132
    Estimate(u64),
133
    /// Will not return more than this number of rows.
134
    Maximum(u64),
135
}
136

137
impl Connection {
138
    pub fn register_table_function<T: TableFunction>(&self, name: &CStr) -> VortexResult<()> {
45✔
139
        // Set up the parameters.
140
        let parameters = T::parameters();
45✔
141
        let parameter_ptrs = parameters
45✔
142
            .iter()
45✔
143
            .map(|logical_type| logical_type.as_ptr())
45✔
144
            .collect::<Vec<_>>();
45✔
145

146
        let param_names = T::named_parameters();
45✔
147
        let (param_names_ptrs, param_types_ptr) = param_names
45✔
148
            .into_iter()
45✔
149
            .map(|(name, logical_type)| (name.as_ptr(), logical_type.as_ptr()))
45✔
150
            .unzip::<_, _, Vec<_>, Vec<_>>();
45✔
151

152
        let vtab = cpp::duckdb_vx_tfunc_vtab_t {
45✔
153
            name: name.as_ptr(),
45✔
154
            parameters: parameter_ptrs.as_ptr(),
45✔
155
            parameter_count: parameters.len() as _,
45✔
156
            named_parameter_names: param_names_ptrs.as_ptr(),
45✔
157
            named_parameter_types: param_types_ptr.as_ptr(),
45✔
158
            named_parameter_count: param_names_ptrs.len() as _,
45✔
159
            bind: Some(bind_callback::<T>),
45✔
160
            bind_data_clone: Some(bind_data_clone_callback::<T>),
45✔
161
            init_global: Some(init_global_callback::<T>),
45✔
162
            init_local: Some(init_local_callback::<T>),
45✔
163
            function: Some(function::<T>),
45✔
164
            statistics: ptr::null_mut::<c_void>(),
45✔
165
            cardinality: Some(cardinality_callback::<T>),
45✔
166
            pushdown_complex_filter: Some(pushdown_complex_filter_callback::<T>),
45✔
167
            pushdown_expression: ptr::null_mut::<c_void>(),
45✔
168
            to_string: Some(to_string_callback::<T>),
45✔
169
            free_string: Some(free_string_callback),
45✔
170
            table_scan_progress: ptr::null_mut::<c_void>(),
45✔
171
            get_partition_data: Some(get_partition_data_callback::<T>),
45✔
172
            projection_pushdown: T::PROJECTION_PUSHDOWN,
45✔
173
            filter_pushdown: T::FILTER_PUSHDOWN,
45✔
174
            filter_prune: T::FILTER_PRUNE,
45✔
175
            sampling_pushdown: false,
45✔
176
            late_materialization: false,
45✔
177
        };
45✔
178

179
        duckdb_try!(
45✔
180
            unsafe { cpp::duckdb_vx_tfunc_register(self.as_ptr(), &raw const vtab) },
45✔
181
            "Failed to register table function '{}'",
×
UNCOV
182
            name.to_string_lossy()
×
183
        );
184

185
        Ok(())
45✔
186
    }
45✔
187
}
188

189
/// The to_string callback for a table function.
NEW
190
unsafe extern "C-unwind" fn to_string_callback<T: TableFunction>(
×
NEW
191
    bind_data: *mut c_void,
×
NEW
192
    error_out: *mut cpp::duckdb_vx_error,
×
NEW
193
) -> *const std::os::raw::c_char {
×
NEW
194
    let bind_data = unsafe { &*(bind_data as *const T::BindData) };
×
195

NEW
196
    match T::to_string(bind_data) {
×
NEW
197
        Some(s) => {
×
198
            // Convert to CString and leak it - the C++ side is responsible for freeing it
NEW
199
            let c_str = match CString::new(s) {
×
NEW
200
                Ok(c) => c,
×
NEW
201
                Err(e) => {
×
NEW
202
                    unsafe {
×
NEW
203
                        error_out.write(cpp::duckdb_vx_error_create(
×
NEW
204
                            e.to_string().as_ptr().cast(),
×
NEW
205
                            e.to_string().len(),
×
NEW
206
                        ));
×
NEW
207
                    }
×
NEW
208
                    return ptr::null();
×
209
                }
210
            };
NEW
211
            c_str.into_raw()
×
212
        }
NEW
213
        None => ptr::null(),
×
214
    }
NEW
215
}
×
216

217
/// Free a string allocated by Rust
NEW
218
unsafe extern "C-unwind" fn free_string_callback(s: *const std::os::raw::c_char) {
×
NEW
UNCOV
219
    if !s.is_null() {
×
220
        // Reconstruct the CString and let it drop
NEW
UNCOV
221
        unsafe {
×
NEW
UNCOV
222
            let _ = CString::from_raw(s as *mut std::os::raw::c_char);
×
NEW
UNCOV
223
        }
×
NEW
UNCOV
224
    }
×
NEW
UNCOV
225
}
×
226

227
/// The native function callback for a table function.
228
unsafe extern "C-unwind" fn function<T: TableFunction>(
13,694✔
229
    duckdb_client_context: duckdb_vx_client_context,
13,694✔
230
    bind_data: *const c_void,
13,694✔
231
    global_init_data: *mut c_void,
13,694✔
232
    local_init_data: *mut c_void,
13,694✔
233
    output: cpp::duckdb_data_chunk,
13,694✔
234
    error_out: *mut cpp::duckdb_vx_error,
13,694✔
235
) {
13,694✔
236
    let client_context = unsafe { ClientContext::borrow(duckdb_client_context) };
13,694✔
237
    let bind_data = unsafe { &*(bind_data as *const T::BindData) };
13,694✔
238
    let global_init_data = unsafe { global_init_data.cast::<T::GlobalState>().as_mut() }
13,694✔
239
        .vortex_expect("global_init_data null pointer");
13,694✔
240
    let local_init_data = unsafe { local_init_data.cast::<T::LocalState>().as_mut() }
13,694✔
241
        .vortex_expect("local_init_data null pointer");
13,694✔
242
    let mut data_chunk = unsafe { DataChunk::borrow(output) };
13,694✔
243

244
    match T::scan(
13,694✔
245
        &client_context,
13,694✔
246
        bind_data,
13,694✔
247
        local_init_data,
13,694✔
248
        global_init_data,
13,694✔
249
        &mut data_chunk,
13,694✔
250
    ) {
13,694✔
251
        Ok(()) => {
13,694✔
252
            // The data chunk is already filled by the function.
13,694✔
253
            // No need to do anything here.
13,694✔
254
        }
13,694✔
UNCOV
255
        Err(e) => unsafe {
×
UNCOV
256
            error_out.write(cpp::duckdb_vx_error_create(
×
UNCOV
257
                e.to_string().as_ptr().cast(),
×
UNCOV
258
                e.to_string().len(),
×
UNCOV
259
            ));
×
UNCOV
260
        },
×
261
    }
262
}
13,694✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc