16935267080

Committed 13 Aug 2025 11:00AM UTC coverage: 24.312% (-63.3%) from 87.658%

Build # 16935267080

Build Type

Pull #4226

github

Committed by

web-flow

Commit Message

Merge 81b48c7fb into baa6ea202

Pull Request Pull Request #4226: Support converting TimestampTZ to and from duckdb

Run Details

0 of 2 new or added lines in 1 file covered. (0.0%)

20666 existing lines in 469 files now uncovered.

8726 of 35892 relevant lines covered (24.31%)

147.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/vortex-scan/src/multi_thread.rs

// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::iter;
use std::sync::{Arc, LazyLock};

use futures::{StreamExt, stream};
use tokio::runtime::{Builder, Runtime};
use vortex_array::ArrayRef;
use vortex_array::iter::{ArrayIterator, ArrayIteratorAdapter};
use vortex_error::{VortexExpect, VortexResult, vortex_err};

use crate::ScanBuilder;

/// We create an internal Tokio runtime used exclusively for orchestrating work-stealing
/// of CPU-bound work for multithreaded scans.
///
/// It is intentionally not exposed to the user, not configurable, and does not enable I/O or
/// timers.
static CPU_RUNTIME: LazyLock<Runtime> = LazyLock::new(|| {
    Builder::new_multi_thread()
        .thread_name("vortex-multithread-scan")
        .build()
        .vortex_expect("Failed to create a new Tokio runtime")
});

impl ScanBuilder<ArrayRef> {
    /// Execute the scan on multiple worker threads.
    pub fn into_array_iter_multithread(self) -> VortexResult<impl ArrayIterator + Send + 'static> {
        let dtype = self.dtype()?;
        Ok(ArrayIteratorAdapter::new(
            dtype,
            self.into_iter_multithread(|a| a)?,
        ))
    }

    /// Execute the scan on multiple worker threads.
    ///
    /// A `map_fn` can be passed to further transform the results of the scan while still running
    /// on the thread pool.
    pub fn into_iter_multithread<T, F>(
        self,
        map_fn: F,
    ) -> VortexResult<impl Iterator<Item = T> + Send + 'static>
    where
        T: 'static + Send,
        F: Fn(VortexResult<ArrayRef>) -> T + Send + Sync + 'static,
    {
        let concurrency = self.concurrency;
        let num_workers = CPU_RUNTIME.metrics().num_workers();

        let tasks = self.build()?;
        // We need to clone and send the map_fn into each task.
        let map_fn = Arc::new(map_fn);
        let handle = CPU_RUNTIME.handle().clone();

        let mut stream = stream::iter(tasks)
            .map(move |task| {
                let map_fn = map_fn.clone();
                // We don't _need_ to spawn the work here. But it allows Tokio to make progress on
                // the tasks in the background, even if the consumer thread is not calling
                // poll_next.

                handle.spawn(async move { task.await.transpose().map(|t| map_fn(t)) })
            })
            // TODO(ngates): this is very crude indeed. This buffered call essentially controls how
            //  many splits we have in-flight at any given time. We multiple workers by concurrency
            //  to configure per-thread concurrency, which essentially means each thread can make
            //  progress on one split while waiting for the I/O of another split to complete.
            //  In an ideal world, the number of in-flight tasks would be dynamically adjusted
            //  based on how much I/O the tasks _actually_ require. For example, all pruning tasks
            //  could be spawned immediately since they all use a single segment, this would allow
            //  head-room to run ahead and figure out the I/O demands of subsequent tasks.
            .buffered(num_workers * concurrency);

        Ok(iter::from_fn(move || {
            tokio::task::block_in_place(|| CPU_RUNTIME.handle().block_on(stream.next()))
        })
        .filter_map(|result| {
            result
                .map_err(|e| vortex_err!("Failed to join on a spawned scan task {e}"))
                .vortex_expect("Failed to join on a spawned scan task")
        }))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_runtime_initialization() {
        // Access the runtime to ensure it's initialized
        let runtime = &*CPU_RUNTIME;
        assert!(runtime.metrics().num_workers() > 0);

        // Verify runtime metrics
        let handle = runtime.handle();
        assert!(handle.metrics().num_workers() > 0);
    }

    #[test]
    fn test_runtime_concurrency_calculation() {
        let num_workers = CPU_RUNTIME.metrics().num_workers();
        let concurrency = 2;

        // Test the buffering calculation
        let buffer_size = num_workers * concurrency;
        assert!(buffer_size > 0);
        assert_eq!(buffer_size, num_workers * 2);
    }
}

1	// SPDX-License-Identifier: Apache-2.0
2	// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4	use std::iter;
5	use std::sync::{Arc, LazyLock};
6
7	use futures::{StreamExt, stream};
8	use tokio::runtime::{Builder, Runtime};
9	use vortex_array::ArrayRef;
10	use vortex_array::iter::{ArrayIterator, ArrayIteratorAdapter};
11	use vortex_error::{VortexExpect, VortexResult, vortex_err};
12
13	use crate::ScanBuilder;
14
15	/// We create an internal Tokio runtime used exclusively for orchestrating work-stealing
16	/// of CPU-bound work for multithreaded scans.
17	///
18	/// It is intentionally not exposed to the user, not configurable, and does not enable I/O or
19	/// timers.
UNCOV 20	static CPU_RUNTIME: LazyLock<Runtime> = LazyLock::new(\|\| {	×
UNCOV 21	Builder::new_multi_thread()	×
UNCOV 22	.thread_name("vortex-multithread-scan")	×
UNCOV 23	.build()	×
UNCOV 24	.vortex_expect("Failed to create a new Tokio runtime")	×
UNCOV 25	});	×
26
27	impl ScanBuilder<ArrayRef> {
28	/// Execute the scan on multiple worker threads.
29	pub fn into_array_iter_multithread(self) -> VortexResult<impl ArrayIterator + Send + 'static> {	×
30	let dtype = self.dtype()?;	×
31	Ok(ArrayIteratorAdapter::new(	×
32	dtype,	×
33	self.into_iter_multithread(\|a\| a)?,	×
34	))
35	}	×
36
37	/// Execute the scan on multiple worker threads.
38	///
39	/// A `map_fn` can be passed to further transform the results of the scan while still running
40	/// on the thread pool.
41	pub fn into_iter_multithread<T, F>(	×
42	self,	×
43	map_fn: F,	×
44	) -> VortexResult<impl Iterator<Item = T> + Send + 'static>	×
45	where	×
46	T: 'static + Send,	×
47	F: Fn(VortexResult<ArrayRef>) -> T + Send + Sync + 'static,	×
48	{
49	let concurrency = self.concurrency;	×
50	let num_workers = CPU_RUNTIME.metrics().num_workers();	×
51
52	let tasks = self.build()?;	×
53	// We need to clone and send the map_fn into each task.
54	let map_fn = Arc::new(map_fn);	×
55	let handle = CPU_RUNTIME.handle().clone();	×
56
57	let mut stream = stream::iter(tasks)	×
58	.map(move \|task\| {	×
59	let map_fn = map_fn.clone();	×
60	// We don't _need_ to spawn the work here. But it allows Tokio to make progress on
61	// the tasks in the background, even if the consumer thread is not calling
62	// poll_next.
63
64	handle.spawn(async move { task.await.transpose().map(\|t\| map_fn(t)) })	×
65	})	×
66	// TODO(ngates): this is very crude indeed. This buffered call essentially controls how
67	// many splits we have in-flight at any given time. We multiple workers by concurrency
68	// to configure per-thread concurrency, which essentially means each thread can make
69	// progress on one split while waiting for the I/O of another split to complete.
70	// In an ideal world, the number of in-flight tasks would be dynamically adjusted
71	// based on how much I/O the tasks _actually_ require. For example, all pruning tasks
72	// could be spawned immediately since they all use a single segment, this would allow
73	// head-room to run ahead and figure out the I/O demands of subsequent tasks.
74	.buffered(num_workers * concurrency);	×
75
76	Ok(iter::from_fn(move \|\| {	×
77	tokio::task::block_in_place(\|\| CPU_RUNTIME.handle().block_on(stream.next()))	×
78	})	×
79	.filter_map(\|result\| {	×
80	result	×
81	.map_err(\|e\| vortex_err!("Failed to join on a spawned scan task {e}"))	×
82	.vortex_expect("Failed to join on a spawned scan task")	×
83	}))	×
84	}	×
85	}
86
87	#[cfg(test)]
88	mod tests {
89	use super::*;
90
91	#[test]
92	fn test_runtime_initialization() {
93	// Access the runtime to ensure it's initialized
94	let runtime = &*CPU_RUNTIME;
95	assert!(runtime.metrics().num_workers() > 0);
96
97	// Verify runtime metrics
98	let handle = runtime.handle();
99	assert!(handle.metrics().num_workers() > 0);
100	}
101
102	#[test]
103	fn test_runtime_concurrency_calculation() {
104	let num_workers = CPU_RUNTIME.metrics().num_workers();
105	let concurrency = 2;
106
107	// Test the buffering calculation
108	let buffer_size = num_workers * concurrency;
109	assert!(buffer_size > 0);
110	assert_eq!(buffer_size, num_workers * 2);
111	}
112	}

vortex-data / vortex / 16935267080

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous