• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16416400962

21 Jul 2025 12:02PM UTC coverage: 81.576%. First build
16416400962

Pull #3949

github

web-flow
Merge 8fd8ea7ab into 301b27f5f
Pull Request #3949: [wip] Add row_id support to DuckDB

139 of 200 new or added lines in 11 files covered. (69.5%)

42124 of 51638 relevant lines covered (81.58%)

171468.07 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.28
/vortex-duckdb/src/duckdb/table_function/mod.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::ffi::{CStr, CString, c_void};
5
use std::fmt::Debug;
6
use std::ptr;
7

8
use vortex::error::{VortexExpect, VortexResult};
9
mod bind;
10
mod cardinality;
11
mod init;
12
mod pushdown_complex_filter;
13
mod row_id_cols;
14
mod virtual_cols;
15

16
use crate::duckdb::LogicalType;
17
use crate::duckdb::connection::Connection;
18
use crate::duckdb::data_chunk::DataChunk;
19
use crate::duckdb::expr::Expression;
20
use crate::duckdb::table_function::cardinality::cardinality_callback;
21
use crate::duckdb::table_function::pushdown_complex_filter::pushdown_complex_filter_callback;
22
use crate::{cpp, duckdb_try};
23
pub use bind::*;
24
pub use init::*;
25
pub use row_id_cols::*;
26
pub use virtual_cols::*;
27

28
/// A trait that defines the supported operations for a table function in DuckDB.
29
///
30
/// This trait does not yet cover the full C++ API, see table_function.hpp.
31
pub trait TableFunction: Sized + Debug {
32
    type BindData: Send + Clone;
33
    type GlobalState: Send + Sync;
34
    type LocalState;
35

36
    /// Whether the table function supports projection pushdown.
37
    /// If not supported a projection will be added that filters out unused columns.
38
    const PROJECTION_PUSHDOWN: bool = false;
39

40
    /// Whether the table function supports filter pushdown.
41
    /// If not supported a filter will be added that applies the table filter directly.
42
    const FILTER_PUSHDOWN: bool = false;
43

44
    /// Whether the table function can immediately prune out filter columns that are unused
45
    /// in the remainder of the query plan.
46
    /// e.g. "SELECT i FROM tbl WHERE j = 42;"
47
    ///   - j does not need to leave the table function at all.
48
    const FILTER_PRUNE: bool = false;
49

50
    /// Whether the table supports late materialization.
51
    const LATE_MATERIALIZATION: bool = false;
52

53
    /// Returns the parameters of the table function.
54
    fn parameters() -> Vec<LogicalType> {
×
55
        // By default, we don't have any parameters.
56
        vec![]
×
57
    }
×
58

59
    /// Returns the named parameters of the table function, if any.
60
    fn named_parameters() -> Vec<(CString, LogicalType)> {
36✔
61
        // By default, we don't have any named parameters.
62
        vec![]
36✔
63
    }
36✔
64

65
    /// This function is used for determining the schema of a table producing function and
66
    /// returning bind data.
67
    fn bind(input: &BindInput, result: &mut BindResult) -> VortexResult<Self::BindData>;
68

69
    /// The function is called during query execution and is responsible for producing the output
70
    fn scan(
71
        bind_data: &Self::BindData,
72
        init_local: &mut Self::LocalState,
73
        init_global: &mut Self::GlobalState,
74
        chunk: &mut DataChunk,
75
    ) -> VortexResult<()>;
76

77
    /// Initialize the global operator state of the function.
78
    ///
79
    /// The global operator state is used to keep track of the progress in the table function and
80
    /// is shared between all threads working on the table function.
81
    fn init_global(input: &TableInitInput<Self>) -> VortexResult<Self::GlobalState>;
82

83
    /// Initialize the local operator state of the function.
84
    ///
85
    /// The local operator state is used to keep track of the progress in the table function and
86
    /// is thread-local.
87
    fn init_local(
88
        init: &TableInitInput<Self>,
89
        global: &mut Self::GlobalState,
90
    ) -> VortexResult<Self::LocalState>;
91

92
    /// Pushes down a filter expression to the table function.
93
    ///
94
    /// Returns `true` if the filter was successfully pushed down (and stored on the bind data),
95
    /// or `false` if the filter could not be pushed down. In which case, the filter will be
96
    /// applied later in the query plan.
97
    fn pushdown_complex_filter(
×
98
        _bind_data: &mut Self::BindData,
×
99
        _expr: &Expression,
×
100
    ) -> VortexResult<bool> {
×
101
        Ok(false)
×
102
    }
×
103

104
    /// Returns the cardinality estimate of the table function.
105
    fn cardinality(_bind_data: &Self::BindData) -> Cardinality {
×
106
        Cardinality::Unknown
×
107
    }
×
108

109
    /// Return the columns that uniquely identify a row ID in the table function.
110
    /// Used for late-materialization and other optimizations.
NEW
111
    fn row_id_columns(_bind_data: &Self::BindData, _result: &mut RowIdColsResult) {}
×
112

113
    /// Returns the virtual columns of the table function.
NEW
114
    fn virtual_columns(_bind_data: &Self::BindData, _result: &mut VirtualColsResult) {}
×
115

116
    // TODO(ngates): there are many more callbacks that can be configured.
117
}
118

119
pub enum Cardinality {
120
    /// Completely unknown cardinality.
121
    Unknown,
122
    /// An estimate of the number of rows that will be returned by the table function.
123
    Estimate(u64),
124
    /// Will not return more than this number of rows.
125
    Maximum(u64),
126
}
127

128
impl Connection {
129
    pub fn register_table_function<T: TableFunction>(&self, name: &CStr) -> VortexResult<()> {
36✔
130
        // Set up the parameters.
131
        let parameters = T::parameters();
36✔
132
        let parameter_ptrs = parameters
36✔
133
            .iter()
36✔
134
            .map(|logical_type| logical_type.as_ptr())
36✔
135
            .collect::<Vec<_>>();
36✔
136

137
        let param_names = T::named_parameters();
36✔
138
        let (param_names_ptrs, param_types_ptr) = param_names
36✔
139
            .into_iter()
36✔
140
            .map(|(name, logical_type)| (name.as_ptr(), logical_type.as_ptr()))
36✔
141
            .unzip::<_, _, Vec<_>, Vec<_>>();
36✔
142

143
        let vtab = cpp::duckdb_vx_tfunc_vtab_t {
36✔
144
            name: name.as_ptr(),
36✔
145
            parameters: parameter_ptrs.as_ptr(),
36✔
146
            parameter_count: parameters.len() as _,
36✔
147
            named_parameter_names: param_names_ptrs.as_ptr(),
36✔
148
            named_parameter_types: param_types_ptr.as_ptr(),
36✔
149
            named_parameter_count: param_names_ptrs.len() as _,
36✔
150
            bind: Some(bind_callback::<T>),
36✔
151
            bind_data_clone: Some(bind_data_clone_callback::<T>),
36✔
152
            init_global: Some(init_global_callback::<T>),
36✔
153
            init_local: Some(init_local_callback::<T>),
36✔
154
            function: Some(function::<T>),
36✔
155
            statistics: ptr::null_mut::<c_void>(),
36✔
156
            cardinality: Some(cardinality_callback::<T>),
36✔
157
            pushdown_complex_filter: Some(pushdown_complex_filter_callback::<T>),
36✔
158
            get_virtual_columns: Some(get_virtual_columns_callback::<T>),
36✔
159
            get_row_id_columns: Some(get_row_id_columns_callback::<T>),
36✔
160
            pushdown_expression: ptr::null_mut::<c_void>(),
36✔
161
            table_scan_progress: ptr::null_mut::<c_void>(),
36✔
162
            projection_pushdown: T::PROJECTION_PUSHDOWN,
36✔
163
            filter_pushdown: T::FILTER_PUSHDOWN,
36✔
164
            filter_prune: T::FILTER_PRUNE,
36✔
165
            sampling_pushdown: false,
36✔
166
            late_materialization: T::LATE_MATERIALIZATION,
36✔
167
        };
36✔
168

169
        duckdb_try!(
36✔
170
            unsafe { cpp::duckdb_vx_tfunc_register(self.as_ptr(), &raw const vtab) },
36✔
171
            "Failed to register table function '{}'",
×
172
            name.to_string_lossy()
×
173
        );
174

175
        Ok(())
36✔
176
    }
36✔
177
}
178

179
/// The native function callback for a table function.
180
unsafe extern "C" fn function<T: TableFunction>(
10,666✔
181
    bind_data: *const c_void,
10,666✔
182
    global_init_data: *mut c_void,
10,666✔
183
    local_init_data: *mut c_void,
10,666✔
184
    output: cpp::duckdb_data_chunk,
10,666✔
185
    error_out: *mut cpp::duckdb_vx_error,
10,666✔
186
) {
10,666✔
187
    let bind_data = unsafe { &*(bind_data as *const T::BindData) };
10,666✔
188
    let global_init_data = unsafe { global_init_data.cast::<T::GlobalState>().as_mut() }
10,666✔
189
        .vortex_expect("global_init_data null pointer");
10,666✔
190
    let local_init_data = unsafe { local_init_data.cast::<T::LocalState>().as_mut() }
10,666✔
191
        .vortex_expect("local_init_data null pointer");
10,666✔
192
    let mut data_chunk = unsafe { DataChunk::borrow(output) };
10,666✔
193

194
    match T::scan(
10,666✔
195
        bind_data,
10,666✔
196
        local_init_data,
10,666✔
197
        global_init_data,
10,666✔
198
        &mut data_chunk,
10,666✔
199
    ) {
10,666✔
200
        Ok(()) => {
10,666✔
201
            // The data chunk is already filled by the function.
10,666✔
202
            // No need to do anything here.
10,666✔
203
        }
10,666✔
204
        Err(e) => unsafe {
×
205
            error_out.write(cpp::duckdb_vx_error_create(
×
206
                e.to_string().as_ptr().cast(),
×
207
                e.to_string().len(),
×
208
            ));
×
209
        },
×
210
    }
211
}
10,666✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc