• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16811137738

07 Aug 2025 05:13PM UTC coverage: 85.139% (+0.3%) from 84.847%
16811137738

Pull #4152

github

web-flow
Merge e081c672c into 30635faae
Pull Request #4152: ScalarValue operations must go via Scalar

375 of 392 new or added lines in 23 files covered. (95.66%)

22 existing lines in 9 files now uncovered.

50960 of 59855 relevant lines covered (85.14%)

565634.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.02
/vortex-array/src/stats/array.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
//! Stats as they are stored on arrays.
5

6
use std::sync::Arc;
7

8
use parking_lot::RwLock;
9
use vortex_error::{VortexError, VortexResult, vortex_panic};
10
use vortex_scalar::ScalarValue;
11

12
use super::{
13
    Precision, Stat, StatType, StatsProvider, StatsProviderExt, StatsSet, StatsSetIntoIter,
14
};
15
use crate::Array;
16
use crate::compute::{
17
    MinMaxResult, is_constant, is_sorted, is_strict_sorted, min_max, nan_count, sum,
18
};
19

20
/// A shared [`StatsSet`] stored in an array. Can be shared by copies of the array and can also be mutated in place.
21
// TODO(adamg): This is a very bad name.
22
#[derive(Clone, Default, Debug)]
23
pub struct ArrayStats {
24
    inner: Arc<RwLock<StatsSet>>,
25
}
26

27
/// Reference to an array's [`StatsSet`]. Can be used to get and mutate the underlying stats.
28
///
29
/// Constructed by calling [`ArrayStats::to_ref`].
30
pub struct StatsSetRef<'a> {
31
    // We need to reference back to the array
32
    dyn_array_ref: &'a dyn Array,
33
    array_stats: &'a ArrayStats,
34
}
35

765,024✔
36
impl ArrayStats {
765,024✔
37
    pub fn to_ref<'a>(&'a self, array: &'a dyn Array) -> StatsSetRef<'a> {
19,570,779✔
38
        StatsSetRef {
19,570,779✔
39
            dyn_array_ref: array,
19,570,779✔
40
            array_stats: self,
19,570,779✔
41
        }
18,805,755✔
42
    }
18,914,970✔
43

109,215✔
44
    pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
650,276✔
45
        self.inner.write().set(stat, value);
541,061✔
46
    }
541,061✔
47

48
    pub fn clear(&self, stat: Stat) {
×
49
        self.inner.write().clear(stat);
50
    }
×
51

52
    pub fn retain(&self, stats: &[Stat]) {
×
53
        self.inner.write().retain_only(stats);
54
    }
55
}
56

57
impl From<StatsSet> for ArrayStats {
58
    fn from(value: StatsSet) -> Self {
×
59
        Self {
×
60
            inner: Arc::new(RwLock::new(value)),
×
61
        }
62
    }
63
}
64

65
impl From<ArrayStats> for StatsSet {
66
    fn from(value: ArrayStats) -> Self {
×
67
        value.inner.read().clone()
68
    }
69
}
70

71
impl StatsProvider for ArrayStats {
72
    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
2,071,046✔
73
        let guard = self.inner.read();
2,071,046✔
74
        guard.get(stat)
2,071,046✔
75
    }
2,071,046✔
76

77
    fn len(&self) -> usize {
×
78
        let guard = self.inner.read();
×
79
        guard.len()
×
80
    }
81
}
45,795✔
82

83
impl StatsSetRef<'_> {
45,795✔
84
    pub fn set_iter(&self, iter: StatsSetIntoIter) {
47,055✔
85
        let mut guard = self.array_stats.inner.write();
1,260✔
86
        for (stat, value) in iter {
1,533✔
87
            guard.set(stat, value);
46,068✔
88
        }
273✔
89
    }
144,870✔
90

144,831✔
91
    pub fn inherit_from(&self, stats: StatsSetRef<'_>) {
144,831✔
92
        stats.with_iter(|iter| self.inherit(iter));
93
    }
150,354✔
94

150,354✔
95
    pub fn inherit<'a>(&self, iter: impl Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) {
2,692,616✔
96
        // TODO(ngates): depending on statistic, this should choose the more precise one
97
        let mut guard = self.array_stats.inner.write();
2,588,057✔
98
        for (stat, value) in iter {
2,592,134✔
99
            guard.set(*stat, value.clone());
49,872✔
100
        }
49,872✔
101
    }
2,588,057✔
102

45,795✔
103
    pub fn replace(&self, stats: StatsSet) {
4,660,949✔
104
        *self.array_stats.inner.write() = stats;
4,660,949✔
105
    }
4,660,949✔
106

45,795✔
107
    pub fn to_owned(&self) -> StatsSet {
4,842,762✔
108
        self.array_stats.inner.read().clone()
4,921,470✔
109
    }
4,842,762✔
110

78,708✔
111
    pub fn with_iter<
2,588,687✔
112
        F: for<'a> FnOnce(&mut dyn Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) -> R,
2,574,545✔
113
        R,
2,542,262✔
114
    >(
2,574,545✔
115
        &self,
2,556,539✔
116
        f: F,
2,543,402✔
117
    ) -> R {
2,542,262✔
118
        let lock = self.array_stats.inner.read();
2,547,764✔
119
        f(&mut lock.iter())
2,547,764✔
120
    }
2,547,764✔
121

5,502✔
122
    pub fn compute_stat(&self, stat: Stat) -> VortexResult<Option<ScalarValue>> {
232,295✔
123
        // If it's already computed and exact, we can return it.
2,259✔
124
        if let Some(Precision::Exact(stat)) = self.get(stat) {
234,554✔
125
            return Ok(Some(stat));
140,100✔
126
        }
97,697✔
127

1,548✔
128
        Ok(match stat {
97,697✔
129
            Stat::Min => {
1,182✔
130
                min_max(self.dyn_array_ref)?.map(|MinMaxResult { min, max: _ }| min.into_value())
42,188✔
131
            }
132
            Stat::Max => {
1,182✔
133
                min_max(self.dyn_array_ref)?.map(|MinMaxResult { min: _, max }| max.into_value())
9,050✔
134
            }
135
            Stat::Sum => {
957✔
136
                Stat::Sum
12,961✔
137
                    .dtype(self.dyn_array_ref.dtype())
11,584✔
138
                    .is_some()
12,541✔
139
                    .then(|| {
12,541✔
140
                        // Sum is supported for this dtype.
957✔
141
                        sum(self.dyn_array_ref)
6,904✔
142
                    })
6,904✔
143
                    .transpose()?
16,927✔
144
                    .map(|s| s.into_value())
16,927✔
145
            }
5,343✔
146
            Stat::NullCount => Some(self.dyn_array_ref.invalid_count()?.into()),
9,594✔
147
            Stat::IsConstant => {
148
                if self.dyn_array_ref.is_empty() {
5,619✔
149
                    None
39✔
150
                } else {
5,343✔
151
                    is_constant(self.dyn_array_ref)?.map(ScalarValue::from)
10,923✔
152
                }
153
            }
154
            Stat::IsSorted => Some(is_sorted(self.dyn_array_ref)?.into()),
84,131✔
155
            Stat::IsStrictSorted => Some(is_strict_sorted(self.dyn_array_ref)?.into()),
5,929✔
156
            Stat::UncompressedSizeInBytes => {
5,343✔
157
                let nbytes: ScalarValue =
10,766✔
158
                    self.dyn_array_ref.to_canonical()?.as_ref().nbytes().into();
41,309✔
159
                self.set(stat, Precision::exact(nbytes.clone()));
35,966✔
160
                Some(nbytes)
27,014✔
161
            }
8,952✔
162
            Stat::NaNCount => {
163
                Stat::NaNCount
13,573✔
164
                    .dtype(self.dyn_array_ref.dtype())
13,573✔
165
                    .is_some()
8,230✔
166
                    .then(|| {
8,230✔
167
                        // NaNCount is supported for this dtype.
168
                        nan_count(self.dyn_array_ref)
130,968✔
169
                    })
130,968✔
170
                    .transpose()?
139,081✔
171
                    .map(|s| s.into())
139,081✔
172
            }
130,851✔
173
        })
71,868✔
174
    }
304,163✔
175

176
    pub fn compute_all(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
8,196✔
177
        let mut stats_set = StatsSet::default();
8,196✔
178
        for &stat in stats {
70,854✔
179
            if let Some(s) = self.compute_stat(stat)? {
62,658✔
180
                stats_set.set(stat, Precision::exact(s))
44,444✔
181
            }
18,214✔
182
        }
71,868✔
183
        Ok(stats_set)
80,064✔
184
    }
139,047✔
185
}
186

187
impl StatsSetRef<'_> {
188
    pub fn get_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
1,130,449✔
189
        &self,
1,130,449✔
190
        stat: Stat,
1,130,449✔
191
    ) -> Option<Precision<U>> {
1,130,449✔
192
        StatsProviderExt::get_as::<U>(self, stat)
1,130,449✔
193
    }
1,130,449✔
194

30,606✔
195
    pub fn get_as_bound<S, U>(&self) -> Option<S::Bound>
30,606✔
196
    where
30,606✔
197
        S: StatType<U>,
30,606✔
198
        U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>,
30,606✔
199
    {
30,606✔
200
        StatsProviderExt::get_as_bound::<S, U>(self)
30,606✔
201
    }
30,606✔
202

30,606✔
203
    pub fn compute_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
127,688✔
204
        &self,
127,688✔
205
        stat: Stat,
97,082✔
206
    ) -> Option<U> {
97,082✔
207
        self.compute_stat(stat)
97,082✔
208
            .inspect_err(|e| log::warn!("Failed to compute stat {stat}: {e}"))
97,082✔
209
            .ok()
97,082✔
210
            .flatten()
97,082✔
211
            .map(|s| U::try_from(&s))
97,082✔
212
            .transpose()
127,688✔
213
            .unwrap_or_else(|err| {
97,082✔
214
                vortex_panic!(
109,215✔
215
                    err,
109,215✔
216
                    "Failed to compute stat {} as {}",
109,215✔
217
                    stat,
218
                    std::any::type_name::<U>()
×
219
                )
220
            })
221
    }
97,082✔
222

223
    pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
541,061✔
224
        self.array_stats.set(stat, value);
541,061✔
225
    }
541,061✔
226

6,024✔
227
    pub fn clear(&self, stat: Stat) {
6,024✔
228
        self.array_stats.clear(stat);
6,024✔
229
    }
230

UNCOV
231
    pub fn retain(&self, stats: &[Stat]) {
×
UNCOV
232
        self.array_stats.retain(stats);
×
233
    }
234

235
    pub fn compute_min<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
13,189✔
236
        &self,
13,189✔
237
    ) -> Option<U> {
13,189✔
238
        self.compute_as(Stat::Min)
14,416✔
239
    }
14,416✔
240

1,227✔
241
    pub fn compute_max<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
18✔
242
        &self,
243✔
243
    ) -> Option<U> {
243✔
244
        self.compute_as(Stat::Max)
243✔
245
    }
18✔
246

2,088✔
247
    pub fn compute_is_sorted(&self) -> Option<bool> {
2,088✔
248
        self.compute_as(Stat::IsSorted)
2,088✔
249
    }
250

251
    pub fn compute_is_strict_sorted(&self) -> Option<bool> {
15,678✔
252
        self.compute_as(Stat::IsStrictSorted)
15,678✔
253
    }
15,678✔
254

255
    pub fn compute_is_constant(&self) -> Option<bool> {
235✔
256
        self.compute_as(Stat::IsConstant)
289,756✔
257
    }
289,756✔
258

289,521✔
259
    pub fn compute_null_count(&self) -> Option<usize> {
299,349✔
260
        self.compute_as(Stat::NullCount)
299,349✔
261
    }
299,349✔
262

289,521✔
263
    pub fn compute_uncompressed_size_in_bytes(&self) -> Option<usize> {
264
        self.compute_as(Stat::UncompressedSizeInBytes)
×
NEW
265
    }
×
266
}
267

268
impl StatsProvider for StatsSetRef<'_> {
269
    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
2,071,046✔
270
        self.array_stats.get(stat)
2,071,046✔
271
    }
2,071,046✔
272

273
    fn len(&self) -> usize {
274
        self.array_stats.len()
275
    }
276
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc