• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16598973893

29 Jul 2025 02:25PM UTC coverage: 82.692% (-0.01%) from 82.703%
16598973893

push

github

web-flow
Clean up stats propagation for slicing (#3356)

Reduces the amount we copy some stats (by removing into_iter that forces
a full stats copy)

---------

Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Robert Kruszewski <github@robertk.io>
Signed-off-by: Will Manning <will@willmanning.io>
Co-authored-by: Robert Kruszewski <github@robertk.io>
Co-authored-by: Will Manning <will@willmanning.io>

130 of 157 new or added lines in 15 files covered. (82.8%)

30 existing lines in 13 files now uncovered.

45215 of 54679 relevant lines covered (82.69%)

184610.34 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.26
/vortex-array/src/stats/array.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
//! Stats as they are stored on arrays.
5

6
use std::sync::Arc;
7

8
use parking_lot::RwLock;
9
use vortex_error::{VortexError, VortexResult, vortex_panic};
10
use vortex_scalar::ScalarValue;
11

12
use super::{
13
    Precision, Stat, StatType, StatsProvider, StatsProviderExt, StatsSet, StatsSetIntoIter,
14
};
15
use crate::Array;
16
use crate::compute::{
17
    MinMaxResult, is_constant, is_sorted, is_strict_sorted, min_max, nan_count, sum,
18
};
19

20
/// A shared [`StatsSet`] stored in an array. Can be shared by copies of the array and can also be mutated in place.
21
// TODO(adamg): This is a very bad name.
22
#[derive(Clone, Default, Debug)]
23
pub struct ArrayStats {
24
    inner: Arc<RwLock<StatsSet>>,
25
}
26

27
/// Reference to an array's [`StatsSet`]. Can be used to get and mutate the underlying stats.
28
///
29
/// Constructed by calling [`ArrayStats::to_ref`].
30
pub struct StatsSetRef<'a> {
31
    // We need to reference back to the array
32
    dyn_array_ref: &'a dyn Array,
33
    array_stats: &'a ArrayStats,
34
}
35

36
impl ArrayStats {
37
    pub fn to_ref<'a>(&'a self, array: &'a dyn Array) -> StatsSetRef<'a> {
2,244,351✔
38
        StatsSetRef {
2,244,351✔
39
            dyn_array_ref: array,
2,244,351✔
40
            array_stats: self,
2,244,351✔
41
        }
2,244,351✔
42
    }
2,244,351✔
43

44
    pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
343,711✔
45
        self.inner.write().set(stat, value);
343,711✔
46
    }
343,711✔
47

48
    pub fn clear(&self, stat: Stat) {
×
49
        self.inner.write().clear(stat);
×
50
    }
×
51

52
    pub fn retain(&self, stats: &[Stat]) {
×
53
        self.inner.write().retain_only(stats);
×
54
    }
×
55
}
56

57
impl From<StatsSet> for ArrayStats {
UNCOV
58
    fn from(value: StatsSet) -> Self {
×
UNCOV
59
        Self {
×
UNCOV
60
            inner: Arc::new(RwLock::new(value)),
×
UNCOV
61
        }
×
UNCOV
62
    }
×
63
}
64

65
impl From<ArrayStats> for StatsSet {
66
    fn from(value: ArrayStats) -> Self {
×
67
        value.inner.read().clone()
×
68
    }
×
69
}
70

71
impl StatsProvider for ArrayStats {
72
    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
864,679✔
73
        let guard = self.inner.read();
864,679✔
74
        guard.get(stat)
864,679✔
75
    }
864,679✔
76

77
    fn len(&self) -> usize {
×
78
        let guard = self.inner.read();
×
79
        guard.len()
×
80
    }
×
81
}
82

83
impl StatsSetRef<'_> {
84
    pub fn set_iter(&self, iter: StatsSetIntoIter) {
38✔
85
        let mut guard = self.array_stats.inner.write();
38✔
86
        for (stat, value) in iter {
304✔
87
            guard.set(stat, value);
266✔
88
        }
266✔
89
    }
38✔
90

NEW
91
    pub fn inherit_from(&self, stats: StatsSetRef<'_>) {
×
NEW
92
        stats.with_iter(|iter| self.inherit(iter));
×
NEW
93
    }
×
94

95
    pub fn inherit<'a>(&self, iter: impl Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) {
56,314✔
96
        // TODO(ngates): depending on statistic, this should choose the more precise one
97
        let mut guard = self.array_stats.inner.write();
56,314✔
98
        for (stat, value) in iter {
58,345✔
99
            guard.set(*stat, value.clone());
2,031✔
100
        }
2,031✔
101
    }
56,314✔
102

103
    pub fn replace(&self, stats: StatsSet) {
442,512✔
104
        *self.array_stats.inner.write() = stats;
442,512✔
105
    }
442,512✔
106

107
    pub fn to_owned(&self) -> StatsSet {
556,573✔
108
        self.array_stats.inner.read().clone()
556,573✔
109
    }
556,573✔
110

111
    pub fn with_iter<
56,314✔
112
        F: for<'a> FnOnce(&mut dyn Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) -> R,
56,314✔
113
        R,
56,314✔
114
    >(
56,314✔
115
        &self,
56,314✔
116
        f: F,
56,314✔
117
    ) -> R {
56,314✔
118
        let lock = self.array_stats.inner.read();
56,314✔
119
        f(&mut lock.iter())
56,314✔
120
    }
56,314✔
121

122
    pub fn compute_stat(&self, stat: Stat) -> VortexResult<Option<ScalarValue>> {
256,830✔
123
        // If it's already computed and exact, we can return it.
124
        if let Some(Precision::Exact(stat)) = self.get(stat) {
256,830✔
125
            return Ok(Some(stat));
145,073✔
126
        }
111,757✔
127

128
        Ok(match stat {
111,757✔
129
            Stat::Min => {
130
                min_max(self.dyn_array_ref)?.map(|MinMaxResult { min, max: _ }| min.into_value())
48,251✔
131
            }
132
            Stat::Max => {
133
                min_max(self.dyn_array_ref)?.map(|MinMaxResult { min: _, max }| max.into_value())
9,312✔
134
            }
135
            Stat::Sum => {
136
                Stat::Sum
14,613✔
137
                    .dtype(self.dyn_array_ref.dtype())
14,613✔
138
                    .is_some()
14,613✔
139
                    .then(|| {
14,613✔
140
                        // Sum is supported for this dtype.
141
                        sum(self.dyn_array_ref)
8,043✔
142
                    })
8,043✔
143
                    .transpose()?
14,613✔
144
                    .map(|s| s.into_value())
14,613✔
145
            }
146
            Stat::NullCount => Some(self.dyn_array_ref.invalid_count()?.into()),
4,984✔
147
            Stat::IsConstant => {
148
                if self.dyn_array_ref.is_empty() {
6,073✔
149
                    None
38✔
150
                } else {
151
                    is_constant(self.dyn_array_ref)?.map(ScalarValue::from)
6,035✔
152
                }
153
            }
154
            Stat::IsSorted => Some(is_sorted(self.dyn_array_ref)?.into()),
5,732✔
155
            Stat::IsStrictSorted => Some(is_strict_sorted(self.dyn_array_ref)?.into()),
5,745✔
156
            Stat::UncompressedSizeInBytes => {
157
                let nbytes: ScalarValue =
5,732✔
158
                    self.dyn_array_ref.to_canonical()?.as_ref().nbytes().into();
5,732✔
159
                self.set(stat, Precision::exact(nbytes.clone()));
5,732✔
160
                Some(nbytes)
5,732✔
161
            }
162
            Stat::NaNCount => {
163
                Stat::NaNCount
11,315✔
164
                    .dtype(self.dyn_array_ref.dtype())
11,315✔
165
                    .is_some()
11,315✔
166
                    .then(|| {
11,315✔
167
                        // NaNCount is supported for this dtype.
168
                        nan_count(self.dyn_array_ref)
114✔
169
                    })
114✔
170
                    .transpose()?
11,315✔
171
                    .map(|s| s.into())
11,315✔
172
            }
173
        })
174
    }
256,830✔
175

176
    pub fn compute_all(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
11,282✔
177
        let mut stats_set = StatsSet::default();
11,282✔
178
        for &stat in stats {
90,606✔
179
            if let Some(s) = self.compute_stat(stat)? {
79,324✔
180
                stats_set.set(stat, Precision::exact(s))
56,179✔
181
            }
23,145✔
182
        }
183
        Ok(stats_set)
11,282✔
184
    }
11,282✔
185
}
186

187
impl StatsSetRef<'_> {
188
    pub fn get_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
343,747✔
189
        &self,
343,747✔
190
        stat: Stat,
343,747✔
191
    ) -> Option<Precision<U>> {
343,747✔
192
        StatsProviderExt::get_as::<U>(self, stat)
343,747✔
193
    }
343,747✔
194

195
    pub fn get_as_bound<S, U>(&self) -> Option<S::Bound>
×
196
    where
×
197
        S: StatType<U>,
×
198
        U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>,
×
199
    {
200
        StatsProviderExt::get_as_bound::<S, U>(self)
×
201
    }
×
202

203
    pub fn compute_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
96,945✔
204
        &self,
96,945✔
205
        stat: Stat,
96,945✔
206
    ) -> Option<U> {
96,945✔
207
        self.compute_stat(stat)
96,945✔
208
            .inspect_err(|e| log::warn!("Failed to compute stat {stat}: {e}"))
96,945✔
209
            .ok()
96,945✔
210
            .flatten()
96,945✔
211
            .map(|s| U::try_from(&s))
96,945✔
212
            .transpose()
96,945✔
213
            .unwrap_or_else(|err| {
96,945✔
214
                vortex_panic!(
×
215
                    err,
×
216
                    "Failed to compute stat {} as {}",
×
217
                    stat,
218
                    std::any::type_name::<U>()
×
219
                )
220
            })
221
    }
96,945✔
222

223
    pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
343,711✔
224
        self.array_stats.set(stat, value);
343,711✔
225
    }
343,711✔
226

227
    pub fn clear(&self, stat: Stat) {
×
NEW
228
        self.array_stats.clear(stat);
×
229
    }
×
230

231
    pub fn retain(&self, stats: &[Stat]) {
×
NEW
232
        self.array_stats.retain(stats);
×
233
    }
×
234

235
    pub fn compute_min<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
15,537✔
236
        &self,
15,537✔
237
    ) -> Option<U> {
15,537✔
238
        self.compute_as(Stat::Min)
15,537✔
239
    }
15,537✔
240

241
    pub fn compute_max<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
18✔
242
        &self,
18✔
243
    ) -> Option<U> {
18✔
244
        self.compute_as(Stat::Max)
18✔
245
    }
18✔
246

247
    pub fn compute_is_sorted(&self) -> Option<bool> {
×
248
        self.compute_as(Stat::IsSorted)
×
249
    }
×
250

251
    pub fn compute_is_strict_sorted(&self) -> Option<bool> {
3,516✔
252
        self.compute_as(Stat::IsStrictSorted)
3,516✔
253
    }
3,516✔
254

255
    pub fn compute_is_constant(&self) -> Option<bool> {
379✔
256
        self.compute_as(Stat::IsConstant)
379✔
257
    }
379✔
258

259
    pub fn compute_null_count(&self) -> Option<usize> {
10,512✔
260
        self.compute_as(Stat::NullCount)
10,512✔
261
    }
10,512✔
262

263
    pub fn compute_uncompressed_size_in_bytes(&self) -> Option<usize> {
×
264
        self.compute_as(Stat::UncompressedSizeInBytes)
×
265
    }
×
266
}
267

268
impl StatsProvider for StatsSetRef<'_> {
269
    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
864,679✔
270
        self.array_stats.get(stat)
864,679✔
271
    }
864,679✔
272

273
    fn len(&self) -> usize {
×
NEW
274
        self.array_stats.len()
×
275
    }
×
276
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc