• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16524157085

25 Jul 2025 02:15PM UTC coverage: 81.694% (-0.06%) from 81.758%
16524157085

Pull #3356

github

web-flow
Merge f8337491a into 45200f15d
Pull Request #3356: Clean up stats propagation for slicing

79 of 106 new or added lines in 12 files covered. (74.53%)

26 existing lines in 12 files now uncovered.

43118 of 52780 relevant lines covered (81.69%)

170587.43 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.26
/vortex-array/src/stats/array.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
//! Stats as they are stored on arrays.
5

6
use std::sync::Arc;
7

8
use parking_lot::RwLock;
9
use vortex_error::{VortexError, VortexResult, vortex_panic};
10
use vortex_scalar::ScalarValue;
11

12
use super::{
13
    Precision, Stat, StatType, StatsProvider, StatsProviderExt, StatsSet, StatsSetIntoIter,
14
};
15
use crate::Array;
16
use crate::compute::{
17
    MinMaxResult, is_constant, is_sorted, is_strict_sorted, min_max, nan_count, sum,
18
};
19

20
/// A shared [`StatsSet`] stored in an array. Can be shared by copies of the array and can also be mutated in place.
21
// TODO(adamg): This is a very bad name.
22
#[derive(Clone, Default, Debug)]
23
pub struct ArrayStats {
24
    inner: Arc<RwLock<StatsSet>>,
25
}
26

27
/// Reference to an array's [`StatsSet`]. Can be used to get and mutate the underlying stats.
28
///
29
/// Constructed by calling [`ArrayStats::to_ref`].
30
pub struct StatsSetRef<'a> {
31
    // We need to reference back to the array
32
    dyn_array_ref: &'a dyn Array,
33
    array_stats: &'a ArrayStats,
34
}
35

36
impl ArrayStats {
37
    pub fn to_ref<'a>(&'a self, array: &'a dyn Array) -> StatsSetRef<'a> {
1,773,803✔
38
        StatsSetRef {
1,773,803✔
39
            dyn_array_ref: array,
1,773,803✔
40
            array_stats: self,
1,773,803✔
41
        }
1,773,803✔
42
    }
1,773,803✔
43

44
    pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
317,239✔
45
        self.inner.write().set(stat, value);
317,239✔
46
    }
317,239✔
47

48
    pub fn clear(&self, stat: Stat) {
×
49
        self.inner.write().clear(stat);
×
50
    }
×
51

52
    pub fn retain(&self, stats: &[Stat]) {
×
53
        self.inner.write().retain_only(stats);
×
54
    }
×
55
}
56

57
impl From<StatsSet> for ArrayStats {
UNCOV
58
    fn from(value: StatsSet) -> Self {
×
UNCOV
59
        Self {
×
UNCOV
60
            inner: Arc::new(RwLock::new(value)),
×
UNCOV
61
        }
×
UNCOV
62
    }
×
63
}
64

65
impl From<ArrayStats> for StatsSet {
66
    fn from(value: ArrayStats) -> Self {
×
67
        value.inner.read().clone()
×
68
    }
×
69
}
70

71
impl StatsProvider for ArrayStats {
72
    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
745,028✔
73
        let guard = self.inner.read();
745,028✔
74
        guard.get(stat)
745,028✔
75
    }
745,028✔
76

77
    fn len(&self) -> usize {
×
78
        let guard = self.inner.read();
×
79
        guard.len()
×
80
    }
×
81
}
82

83
impl StatsSetRef<'_> {
84
    pub fn set_iter(&self, iter: StatsSetIntoIter) {
37✔
85
        let mut guard = self.array_stats.inner.write();
37✔
86
        for (stat, value) in iter {
296✔
87
            guard.set(stat, value);
259✔
88
        }
259✔
89
    }
37✔
90

NEW
91
    pub fn inherit_from(&self, stats: StatsSetRef<'_>) {
×
NEW
92
        stats.with_iter(|iter| self.inherit(iter));
×
NEW
93
    }
×
94

95
    pub fn inherit<'a>(&self, iter: impl Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) {
53,920✔
96
        // TODO(ngates): depending on statistic, this should choose the more precise one
97
        let mut guard = self.array_stats.inner.write();
53,920✔
98
        for (stat, value) in iter {
56,203✔
99
            guard.set(*stat, value.clone());
2,283✔
100
        }
2,283✔
101
    }
53,920✔
102

103
    pub fn replace(&self, stats: StatsSet) {
321,794✔
104
        *self.array_stats.inner.write() = stats;
321,794✔
105
    }
321,794✔
106

107
    pub fn to_owned(&self) -> StatsSet {
355,743✔
108
        self.array_stats.inner.read().clone()
355,743✔
109
    }
355,743✔
110

111
    pub fn with_iter<
53,920✔
112
        F: for<'a> FnOnce(&mut dyn Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) -> R,
53,920✔
113
        R,
53,920✔
114
    >(
53,920✔
115
        &self,
53,920✔
116
        f: F,
53,920✔
117
    ) -> R {
53,920✔
118
        let lock = self.array_stats.inner.read();
53,920✔
119
        f(&mut lock.iter())
53,920✔
120
    }
53,920✔
121

122
    pub fn compute_stat(&self, stat: Stat) -> VortexResult<Option<ScalarValue>> {
251,389✔
123
        // If it's already computed and exact, we can return it.
124
        if let Some(Precision::Exact(stat)) = self.get(stat) {
251,389✔
125
            return Ok(Some(stat));
142,045✔
126
        }
109,344✔
127

128
        Ok(match stat {
109,344✔
129
            Stat::Min => {
130
                min_max(self.dyn_array_ref)?.map(|MinMaxResult { min, max: _ }| min.into_value())
47,359✔
131
            }
132
            Stat::Max => {
133
                min_max(self.dyn_array_ref)?.map(|MinMaxResult { min: _, max }| max.into_value())
9,095✔
134
            }
135
            Stat::Sum => {
136
                Stat::Sum
14,333✔
137
                    .dtype(self.dyn_array_ref.dtype())
14,333✔
138
                    .is_some()
14,333✔
139
                    .then(|| {
14,333✔
140
                        // Sum is supported for this dtype.
141
                        sum(self.dyn_array_ref)
7,881✔
142
                    })
7,881✔
143
                    .transpose()?
14,333✔
144
                    .map(|s| s.into_value())
14,333✔
145
            }
146
            Stat::NullCount => Some(self.dyn_array_ref.invalid_count()?.into()),
4,912✔
147
            Stat::IsConstant => {
148
                if self.dyn_array_ref.is_empty() {
5,942✔
149
                    None
37✔
150
                } else {
151
                    is_constant(self.dyn_array_ref)?.map(ScalarValue::from)
5,905✔
152
                }
153
            }
154
            Stat::IsSorted => Some(is_sorted(self.dyn_array_ref)?.into()),
5,606✔
155
            Stat::IsStrictSorted => Some(is_strict_sorted(self.dyn_array_ref)?.into()),
5,372✔
156
            Stat::UncompressedSizeInBytes => {
157
                let nbytes: ScalarValue =
5,606✔
158
                    self.dyn_array_ref.to_canonical()?.as_ref().nbytes().into();
5,606✔
159
                self.set(stat, Precision::exact(nbytes.clone()));
5,606✔
160
                Some(nbytes)
5,606✔
161
            }
162
            Stat::NaNCount => {
163
                Stat::NaNCount
11,119✔
164
                    .dtype(self.dyn_array_ref.dtype())
11,119✔
165
                    .is_some()
11,119✔
166
                    .then(|| {
11,119✔
167
                        // NaNCount is supported for this dtype.
168
                        nan_count(self.dyn_array_ref)
111✔
169
                    })
111✔
170
                    .transpose()?
11,119✔
171
                    .map(|s| s.into())
11,119✔
172
            }
173
        })
174
    }
251,389✔
175

176
    pub fn compute_all(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
11,087✔
177
        let mut stats_set = StatsSet::default();
11,087✔
178
        for &stat in stats {
88,932✔
179
            if let Some(s) = self.compute_stat(stat)? {
77,845✔
180
                stats_set.set(stat, Precision::exact(s))
55,146✔
181
            }
22,699✔
182
        }
183
        Ok(stats_set)
11,087✔
184
    }
11,087✔
185
}
186

187
impl StatsSetRef<'_> {
188
    pub fn get_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
274,749✔
189
        &self,
274,749✔
190
        stat: Stat,
274,749✔
191
    ) -> Option<Precision<U>> {
274,749✔
192
        StatsProviderExt::get_as::<U>(self, stat)
274,749✔
193
    }
274,749✔
194

195
    pub fn get_as_bound<S, U>(&self) -> Option<S::Bound>
×
196
    where
×
197
        S: StatType<U>,
×
198
        U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>,
×
199
    {
200
        StatsProviderExt::get_as_bound::<S, U>(self)
×
201
    }
×
202

203
    pub fn compute_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
96,463✔
204
        &self,
96,463✔
205
        stat: Stat,
96,463✔
206
    ) -> Option<U> {
96,463✔
207
        self.compute_stat(stat)
96,463✔
208
            .inspect_err(|e| log::warn!("Failed to compute stat {stat}: {e}"))
96,463✔
209
            .ok()
96,463✔
210
            .flatten()
96,463✔
211
            .map(|s| U::try_from(&s))
96,463✔
212
            .transpose()
96,463✔
213
            .unwrap_or_else(|err| {
96,463✔
214
                vortex_panic!(
×
215
                    err,
×
216
                    "Failed to compute stat {} as {}",
×
217
                    stat,
218
                    std::any::type_name::<U>()
×
219
                )
220
            })
221
    }
96,463✔
222

223
    pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
317,239✔
224
        self.array_stats.set(stat, value);
317,239✔
225
    }
317,239✔
226

227
    pub fn clear(&self, stat: Stat) {
×
NEW
228
        self.array_stats.clear(stat);
×
229
    }
×
230

231
    pub fn retain(&self, stats: &[Stat]) {
×
NEW
232
        self.array_stats.retain(stats);
×
233
    }
×
234

235
    pub fn compute_min<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
15,588✔
236
        &self,
15,588✔
237
    ) -> Option<U> {
15,588✔
238
        self.compute_as(Stat::Min)
15,588✔
239
    }
15,588✔
240

241
    pub fn compute_max<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
18✔
242
        &self,
18✔
243
    ) -> Option<U> {
18✔
244
        self.compute_as(Stat::Max)
18✔
245
    }
18✔
246

247
    pub fn compute_is_sorted(&self) -> Option<bool> {
×
248
        self.compute_as(Stat::IsSorted)
×
249
    }
×
250

251
    pub fn compute_is_strict_sorted(&self) -> Option<bool> {
3,115✔
252
        self.compute_as(Stat::IsStrictSorted)
3,115✔
253
    }
3,115✔
254

255
    pub fn compute_is_constant(&self) -> Option<bool> {
373✔
256
        self.compute_as(Stat::IsConstant)
373✔
257
    }
373✔
258

259
    pub fn compute_null_count(&self) -> Option<usize> {
10,316✔
260
        self.compute_as(Stat::NullCount)
10,316✔
261
    }
10,316✔
262

263
    pub fn compute_uncompressed_size_in_bytes(&self) -> Option<usize> {
×
264
        self.compute_as(Stat::UncompressedSizeInBytes)
×
265
    }
×
266
}
267

268
impl StatsProvider for StatsSetRef<'_> {
269
    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
745,028✔
270
        self.array_stats.get(stat)
745,028✔
271
    }
745,028✔
272

273
    fn len(&self) -> usize {
×
NEW
274
        self.array_stats.len()
×
275
    }
×
276
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc