• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16331938722

16 Jul 2025 10:49PM UTC coverage: 80.702% (-0.9%) from 81.557%
16331938722

push

github

web-flow
feat: build with stable rust (#3881)

120 of 173 new or added lines in 28 files covered. (69.36%)

174 existing lines in 102 files now uncovered.

41861 of 51871 relevant lines covered (80.7%)

157487.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.22
/vortex-scan/src/split_by.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::collections::BTreeSet;
5
use std::ops::Range;
6

7
use itertools::Itertools;
8
use vortex_array::stats::StatBound;
9
use vortex_dtype::FieldMask;
10
use vortex_error::{VortexResult, vortex_err};
11

12
use crate::LayoutReader;
13

14
/// Defines how the Vortex file is split into batches for reading.
15
///
16
/// Note that each split must fit into the platform's maximum usize.
17
#[derive(Default, Copy, Clone, Debug)]
18
pub enum SplitBy {
19
    #[default]
20
    /// Splits any time there is a chunk boundary in the file.
21
    Layout,
22
    /// Splits every n rows.
23
    RowCount(usize),
24
    // UncompressedSize(u64),
25
}
26

27
impl SplitBy {
28
    /// Compute the splits for the given layout.
29
    // TODO(ngates): remove this once layout readers are stream based.
30
    pub(crate) fn splits(
1,353✔
31
        &self,
1,353✔
32
        layout_reader: &dyn LayoutReader,
1,353✔
33
        field_mask: &[FieldMask],
1,353✔
34
    ) -> VortexResult<Vec<Range<u64>>> {
1,353✔
35
        Ok(match *self {
1,353✔
36
            SplitBy::Layout => {
37
                let mut row_splits = BTreeSet::<u64>::new();
1,352✔
38
                row_splits.insert(0);
1,352✔
39

40
                // Register the splits for all the layouts.
41
                layout_reader.register_splits(field_mask, 0, &mut row_splits)?;
1,352✔
42

43
                row_splits
1,352✔
44
                    .into_iter()
1,352✔
45
                    .tuple_windows()
1,352✔
46
                    .map(|(start, end)| start..end)
4,592✔
47
                    .collect()
1,352✔
48
            }
49
            SplitBy::RowCount(n) => {
1✔
50
                let row_count = *layout_reader.row_count().to_exact().ok_or_else(|| {
1✔
51
                    vortex_err!("Cannot split layout by row count, row count is not exact")
×
UNCOV
52
                })?;
×
53
                let mut splits =
1✔
54
                    Vec::with_capacity(usize::try_from((row_count + n as u64) / n as u64)?);
1✔
55
                for start in (0..row_count).step_by(n) {
4✔
56
                    let end = (start + n as u64).min(row_count);
4✔
57
                    splits.push(start..end);
4✔
58
                }
4✔
59
                splits
1✔
60
            }
61
        })
62
    }
1,353✔
63
}
64

65
#[cfg(test)]
66
mod test {
67
    use std::sync::Arc;
68

69
    use futures::executor::block_on;
70
    use futures::stream;
71
    use vortex_array::{ArrayContext, IntoArray};
72
    use vortex_buffer::buffer;
73
    use vortex_dtype::Nullability::NonNullable;
74
    use vortex_dtype::{DType, FieldPath, PType};
75
    use vortex_layout::layouts::flat::writer::FlatLayoutStrategy;
76
    use vortex_layout::segments::{SegmentSource, SequenceWriter, TestSegments};
77
    use vortex_layout::sequence::SequenceId;
78
    use vortex_layout::{LayoutStrategy, SequentialStreamAdapter, SequentialStreamExt as _};
79

80
    use super::*;
81

82
    #[test]
83
    fn test_layout_splits_flat() {
1✔
84
        let segments = TestSegments::default();
1✔
85
        let layout = block_on(
1✔
86
            FlatLayoutStrategy::default().write_stream(
1✔
87
                &ArrayContext::empty(),
1✔
88
                SequenceWriter::new(Box::new(segments.clone())),
1✔
89
                SequentialStreamAdapter::new(
1✔
90
                    DType::Primitive(PType::I32, NonNullable),
1✔
91
                    stream::once(async {
1✔
92
                        Ok((
1✔
93
                            SequenceId::root().downgrade(),
1✔
94
                            buffer![1_i32; 10].into_array(),
1✔
95
                        ))
1✔
96
                    }),
1✔
97
                )
98
                .sendable(),
1✔
99
            ),
100
        )
101
        .unwrap();
1✔
102

103
        let segments: Arc<dyn SegmentSource> = Arc::new(segments);
1✔
104
        let reader = layout.new_reader("".into(), segments).unwrap();
1✔
105

106
        let splits = SplitBy::Layout
1✔
107
            .splits(reader.as_ref(), &[FieldMask::Exact(FieldPath::root())])
1✔
108
            .unwrap();
1✔
109
        assert_eq!(splits, vec![0..10]);
1✔
110
    }
1✔
111

112
    #[test]
113
    fn test_row_count_splits() {
1✔
114
        let segments = TestSegments::default();
1✔
115
        let layout = block_on(
1✔
116
            FlatLayoutStrategy::default().write_stream(
1✔
117
                &ArrayContext::empty(),
1✔
118
                SequenceWriter::new(Box::new(segments.clone())),
1✔
119
                SequentialStreamAdapter::new(
1✔
120
                    DType::Primitive(PType::I32, NonNullable),
1✔
121
                    stream::once(async {
1✔
122
                        Ok((
1✔
123
                            SequenceId::root().downgrade(),
1✔
124
                            buffer![1_i32; 10].into_array(),
1✔
125
                        ))
1✔
126
                    }),
1✔
127
                )
128
                .sendable(),
1✔
129
            ),
130
        )
131
        .unwrap();
1✔
132

133
        let segments: Arc<dyn SegmentSource> = Arc::new(segments);
1✔
134
        let reader = layout.new_reader("".into(), segments).unwrap();
1✔
135

136
        let splits = SplitBy::RowCount(3)
1✔
137
            .splits(reader.as_ref(), &[FieldMask::Exact(FieldPath::root())])
1✔
138
            .unwrap();
1✔
139
        assert_eq!(splits, vec![0..3, 3..6, 6..9, 9..10]);
1✔
140
    }
1✔
141
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc