• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16198217859

10 Jul 2025 02:41PM UTC coverage: 78.257% (+0.07%) from 78.188%
16198217859

push

github

web-flow
Row idx layout reader (#3819)

Fixes #3613 

* Implements a RowIdx layout reader without using scope vars (in
preparation of removing them).
* Works the same way, by partitioning the expression into the row index
and non-row index parts.
* Abstracts out partitioned evaluations (shared with struct layouts that
partition by field)
* Introduces a RowIdx expression ensuring identity doesn't resolve to
include this meta column.

---------

Signed-off-by: Nicholas Gates <nick@nickgates.com>

347 of 442 new or added lines in 9 files covered. (78.51%)

19 existing lines in 2 files now uncovered.

44183 of 56459 relevant lines covered (78.26%)

53569.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.5
/vortex-layout/src/reader.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::collections::BTreeSet;
5
use std::ops::Range;
6
use std::sync::Arc;
7

8
use async_trait::async_trait;
9
use futures::FutureExt;
10
use futures::future::{BoxFuture, Shared};
11
use once_cell::sync::OnceCell;
12
use vortex_array::stats::Precision;
13
use vortex_array::{ArrayContext, ArrayRef};
14
use vortex_dtype::{DType, FieldMask};
15
use vortex_error::{SharedVortexResult, VortexError, VortexResult, vortex_bail};
16
use vortex_expr::{ExprRef, ScopeDType};
17
use vortex_mask::Mask;
18

19
use crate::children::LayoutChildren;
20
use crate::segments::SegmentSource;
21

22
pub type LayoutReaderRef = Arc<dyn LayoutReader>;
23

24
/// A [`LayoutReader`] is used to read a [`crate::Layout`] in a way that can cache state across multiple
25
/// evaluation operations.
26
pub trait LayoutReader: 'static + Send + Sync {
27
    /// Returns the name of the layout reader for debugging.
28
    fn name(&self) -> &Arc<str>;
29

30
    /// Returns the un-projected dtype of the layout reader.
31
    fn dtype(&self) -> &DType;
32

33
    /// Pruning, filter, and projections are evaluated in this scope.
34
    fn scope_dtype(&self) -> &ScopeDType;
35

36
    /// Returns the number of rows in the layout reader.
37
    /// An inexact count may be larger or smaller than the actual row count.
38
    fn row_count(&self) -> Precision<u64>;
39

40
    /// Register the splits of this layout reader.
41
    // TODO(ngates): this is a temporary API until we make layout readers stream based.
42
    fn register_splits(
43
        &self,
44
        field_mask: &[FieldMask],
45
        row_offset: u64,
46
        splits: &mut BTreeSet<u64>,
47
    ) -> VortexResult<()>;
48

49
    /// Performs an approximate evaluation of the expression against the layout reader.
50
    fn pruning_evaluation(
51
        &self,
52
        row_range: &Range<u64>,
53
        expr: &ExprRef,
54
    ) -> VortexResult<Box<dyn PruningEvaluation>>;
55

56
    /// Performs an exact evaluation of the expression against the layout reader.
57
    fn filter_evaluation(
58
        &self,
59
        row_range: &Range<u64>,
60
        expr: &ExprRef,
61
    ) -> VortexResult<Box<dyn MaskEvaluation>>;
62

63
    /// Evaluates the expression against the layout.
64
    fn projection_evaluation(
65
        &self,
66
        row_range: &Range<u64>,
67
        expr: &ExprRef,
68
    ) -> VortexResult<Box<dyn ArrayEvaluation>>;
69
}
70

71
pub type MaskFuture = Shared<BoxFuture<'static, SharedVortexResult<Mask>>>;
72

73
/// Create a resolved [`MaskFuture`] from a [`Mask`].
74
pub fn mask_future_ready(mask: Mask) -> MaskFuture {
×
75
    async move { Ok::<_, Arc<VortexError>>(mask) }
×
76
        .boxed()
×
77
        .shared()
×
78
}
×
79

80
/// Returns a mask where all false values are proven to be false in the given expression.
81
///
82
/// The returned mask **does not** need to have been intersected with the input mask.
83
#[async_trait]
84
pub trait PruningEvaluation: 'static + Send + Sync {
85
    async fn invoke(&self, mask: Mask) -> VortexResult<Mask>;
86
}
87

88
pub struct NoOpPruningEvaluation;
89

90
#[async_trait]
91
impl PruningEvaluation for NoOpPruningEvaluation {
92
    async fn invoke(&self, mask: Mask) -> VortexResult<Mask> {
307✔
93
        Ok(mask)
307✔
94
    }
614✔
95
}
96

97
/// Refines the given mask, returning a mask equal in length to the input mask.
98
///
99
/// ## Post-conditions
100
///
101
/// The returned mask **MUST** have been intersected with the input mask.
102
#[async_trait]
103
pub trait MaskEvaluation: 'static + Send + Sync {
104
    async fn invoke(&self, mask: Mask) -> VortexResult<Mask>;
105
}
106

107
pub struct NoOpMaskEvaluation;
108

109
#[async_trait]
110
impl MaskEvaluation for NoOpMaskEvaluation {
NEW
111
    async fn invoke(&self, mask: Mask) -> VortexResult<Mask> {
×
NEW
112
        Ok(mask)
×
NEW
113
    }
×
114
}
115

116
/// Evaluates an expression against an array, returning an array equal in length to the true count
117
/// of the input mask.
118
#[async_trait]
119
pub trait ArrayEvaluation: 'static + Send + Sync {
120
    async fn invoke(&self, mask: Mask) -> VortexResult<ArrayRef>;
121
}
122

123
pub struct LazyReaderChildren {
124
    children: Arc<dyn LayoutChildren>,
125
    segment_source: Arc<dyn SegmentSource>,
126
    ctx: ArrayContext,
127

128
    // TODO(ngates): we may want a hash map of some sort here?
129
    cache: Vec<OnceCell<LayoutReaderRef>>,
130
}
131

132
impl LazyReaderChildren {
133
    pub fn new(
655✔
134
        children: Arc<dyn LayoutChildren>,
655✔
135
        segment_source: Arc<dyn SegmentSource>,
655✔
136
        ctx: ArrayContext,
655✔
137
    ) -> Self {
655✔
138
        let nchildren = children.nchildren();
655✔
139
        let cache = (0..nchildren).map(|_| OnceCell::new()).collect();
1,029✔
140
        Self {
655✔
141
            children,
655✔
142
            segment_source,
655✔
143
            ctx,
655✔
144
            cache,
655✔
145
        }
655✔
146
    }
655✔
147

148
    pub fn get(
2,393✔
149
        &self,
2,393✔
150
        idx: usize,
2,393✔
151
        dtype: &DType,
2,393✔
152
        name: &Arc<str>,
2,393✔
153
    ) -> VortexResult<&LayoutReaderRef> {
2,393✔
154
        if idx >= self.cache.len() {
2,393✔
155
            vortex_bail!("Child index out of bounds: {} of {}", idx, self.cache.len());
×
156
        }
2,393✔
157

2,393✔
158
        self.cache[idx].get_or_try_init(|| {
2,393✔
159
            let child = self.children.child(idx, dtype)?;
971✔
160
            child.new_reader(name.clone(), self.segment_source.clone(), self.ctx.clone())
971✔
161
        })
2,393✔
162
    }
2,393✔
163
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc