• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16832413242

08 Aug 2025 02:04PM UTC coverage: 84.935% (+0.06%) from 84.877%
16832413242

Pull #4161

github

web-flow
Merge 04e9b0a07 into c88d9ada1
Pull Request #4161: feat(python): Add Arrow FFI streaming support to write API

50657 of 59642 relevant lines covered (84.94%)

568241.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

72.94
/vortex-array/src/stats/array.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
//! Stats as they are stored on arrays.
5

6
use std::sync::Arc;
7

8
use parking_lot::RwLock;
9
use vortex_error::{VortexError, VortexResult, vortex_panic};
10
use vortex_scalar::{Scalar, ScalarValue};
11

12
use super::{Precision, Stat, StatType, StatsProvider, StatsSet, StatsSetIntoIter};
13
use crate::Array;
14
use crate::compute::{
15
    MinMaxResult, is_constant, is_sorted, is_strict_sorted, min_max, nan_count, sum,
16
};
17

18
/// A shared [`StatsSet`] stored in an array. Can be shared by copies of the array and can also be mutated in place.
19
// TODO(adamg): This is a very bad name.
20
#[derive(Clone, Default, Debug)]
21
pub struct ArrayStats {
22
    inner: Arc<RwLock<StatsSet>>,
23
}
24

25
/// Reference to an array's [`StatsSet`]. Can be used to get and mutate the underlying stats.
26
///
27
/// Constructed by calling [`ArrayStats::to_ref`].
28
pub struct StatsSetRef<'a> {
29
    // We need to reference back to the array
30
    dyn_array_ref: &'a dyn Array,
31
    array_stats: &'a ArrayStats,
32
}
33

34
impl ArrayStats {
35
    pub fn to_ref<'a>(&'a self, array: &'a dyn Array) -> StatsSetRef<'a> {
19,659,863✔
36
        StatsSetRef {
19,659,863✔
37
            dyn_array_ref: array,
19,659,863✔
38
            array_stats: self,
19,659,863✔
39
        }
19,659,863✔
40
    }
19,659,863✔
41

42
    pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
646,496✔
43
        self.inner.write().set(stat, value);
646,496✔
44
    }
646,496✔
45

46
    pub fn clear(&self, stat: Stat) {
×
47
        self.inner.write().clear(stat);
×
48
    }
×
49

50
    pub fn retain(&self, stats: &[Stat]) {
×
51
        self.inner.write().retain_only(stats);
×
52
    }
×
53
}
54

55
impl From<StatsSet> for ArrayStats {
56
    fn from(value: StatsSet) -> Self {
×
57
        Self {
×
58
            inner: Arc::new(RwLock::new(value)),
×
59
        }
×
60
    }
×
61
}
62

63
impl From<ArrayStats> for StatsSet {
64
    fn from(value: ArrayStats) -> Self {
×
65
        value.inner.read().clone()
×
66
    }
×
67
}
68

69
impl StatsSetRef<'_> {
70
    pub fn set_iter(&self, iter: StatsSetIntoIter) {
39✔
71
        let mut guard = self.array_stats.inner.write();
39✔
72
        for (stat, value) in iter {
312✔
73
            guard.set(stat, value);
273✔
74
        }
273✔
75
    }
39✔
76

77
    pub fn inherit_from(&self, stats: StatsSetRef<'_>) {
×
78
        stats.with_iter(|iter| self.inherit(iter));
×
79
    }
×
80

81
    pub fn inherit<'a>(&self, iter: impl Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) {
2,586,053✔
82
        // TODO(ngates): depending on statistic, this should choose the more precise one
83
        let mut guard = self.array_stats.inner.write();
2,586,053✔
84
        for (stat, value) in iter {
2,591,351✔
85
            guard.set(*stat, value.clone());
5,298✔
86
        }
5,298✔
87
    }
2,586,053✔
88

89
    pub fn replace(&self, stats: StatsSet) {
4,758,356✔
90
        *self.array_stats.inner.write() = stats;
4,758,356✔
91
    }
4,758,356✔
92

93
    pub fn to_owned(&self) -> StatsSet {
4,991,486✔
94
        self.array_stats.inner.read().clone()
4,991,486✔
95
    }
4,991,486✔
96

97
    pub fn with_iter<
2,586,053✔
98
        F: for<'a> FnOnce(&mut dyn Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) -> R,
2,586,053✔
99
        R,
2,586,053✔
100
    >(
2,586,053✔
101
        &self,
2,586,053✔
102
        f: F,
2,586,053✔
103
    ) -> R {
2,586,053✔
104
        let lock = self.array_stats.inner.read();
2,586,053✔
105
        f(&mut lock.iter())
2,586,053✔
106
    }
2,586,053✔
107

108
    pub fn compute_stat(&self, stat: Stat) -> VortexResult<Option<Scalar>> {
311,237✔
109
        // If it's already computed and exact, we can return it.
110
        if let Some(Precision::Exact(s)) = self.get(stat) {
311,237✔
111
            return Ok(Some(s));
181,023✔
112
        }
130,214✔
113

114
        Ok(match stat {
130,214✔
115
            Stat::Min => min_max(self.dyn_array_ref)?.map(|MinMaxResult { min, max: _ }| min),
56,465✔
116
            Stat::Max => min_max(self.dyn_array_ref)?.map(|MinMaxResult { min: _, max }| max),
10,190✔
117
            Stat::Sum => {
118
                Stat::Sum
17,086✔
119
                    .dtype(self.dyn_array_ref.dtype())
17,086✔
120
                    .is_some()
17,086✔
121
                    .then(|| {
17,086✔
122
                        // Sum is supported for this dtype.
123
                        sum(self.dyn_array_ref)
9,163✔
124
                    })
9,163✔
125
                    .transpose()?
17,086✔
126
            }
127
            Stat::NullCount => Some(self.dyn_array_ref.invalid_count()?.into()),
5,799✔
128
            Stat::IsConstant => {
129
                if self.dyn_array_ref.is_empty() {
6,801✔
130
                    None
39✔
131
                } else {
132
                    is_constant(self.dyn_array_ref)?.map(|v| v.into())
6,762✔
133
                }
134
            }
135
            Stat::IsSorted => Some(is_sorted(self.dyn_array_ref)?.into()),
6,380✔
136
            Stat::IsStrictSorted => Some(is_strict_sorted(self.dyn_array_ref)?.into()),
7,540✔
137
            Stat::UncompressedSizeInBytes => {
138
                let nbytes = self.dyn_array_ref.to_canonical()?.as_ref().nbytes();
6,380✔
139
                self.set(stat, Precision::exact(nbytes));
6,380✔
140
                Some(nbytes.into())
6,380✔
141
            }
142
            Stat::NaNCount => {
143
                Stat::NaNCount
13,573✔
144
                    .dtype(self.dyn_array_ref.dtype())
13,573✔
145
                    .is_some()
13,573✔
146
                    .then(|| {
13,573✔
147
                        // NaNCount is supported for this dtype.
148
                        nan_count(self.dyn_array_ref)
117✔
149
                    })
117✔
150
                    .transpose()?
13,573✔
151
                    .map(|s| s.into())
13,573✔
152
            }
153
        })
154
    }
311,237✔
155

156
    pub fn compute_all(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
13,539✔
157
        let mut stats_set = StatsSet::default();
13,539✔
158
        for &stat in stats {
106,740✔
159
            if let Some(s) = self.compute_stat(stat)? {
93,201✔
160
                stats_set.set(stat, Precision::exact(s.into_value()))
66,035✔
161
            }
27,166✔
162
        }
163
        Ok(stats_set)
13,539✔
164
    }
13,539✔
165
}
166

167
impl StatsSetRef<'_> {
168
    pub fn get_as<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
1,365,024✔
169
        &self,
1,365,024✔
170
        stat: Stat,
1,365,024✔
171
    ) -> Option<Precision<U>> {
1,365,024✔
172
        self.get(stat).map(|v| {
1,365,024✔
173
            v.map(|v| {
760,533✔
174
                U::try_from(&v).unwrap_or_else(|err| {
760,533✔
175
                    vortex_panic!(
×
176
                        err,
×
177
                        "Failed to get stat {} as {}",
×
178
                        stat,
179
                        std::any::type_name::<U>()
×
180
                    )
181
                })
182
            })
760,533✔
183
        })
760,533✔
184
    }
1,365,024✔
185

186
    pub fn get_as_bound<S, U>(&self) -> Option<S::Bound>
×
187
    where
×
188
        S: StatType<U>,
×
189
        U: for<'a> TryFrom<&'a Scalar, Error = VortexError>,
×
190
    {
191
        self.get_as::<U>(S::STAT).map(|v| v.bound::<S>())
×
192
    }
×
193

194
    pub fn compute_as<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
127,922✔
195
        &self,
127,922✔
196
        stat: Stat,
127,922✔
197
    ) -> Option<U> {
127,922✔
198
        self.compute_stat(stat)
127,922✔
199
            .inspect_err(|e| log::warn!("Failed to compute stat {stat}: {e}"))
127,922✔
200
            .ok()
127,922✔
201
            .flatten()
127,922✔
202
            .map(|s| U::try_from(&s))
127,922✔
203
            .transpose()
127,922✔
204
            .unwrap_or_else(|err| {
127,922✔
205
                vortex_panic!(
×
206
                    err,
×
207
                    "Failed to compute stat {} as {}",
×
208
                    stat,
209
                    std::any::type_name::<U>()
×
210
                )
211
            })
212
    }
127,922✔
213

214
    pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
646,496✔
215
        self.array_stats.set(stat, value);
646,496✔
216
    }
646,496✔
217

218
    pub fn clear(&self, stat: Stat) {
×
219
        self.array_stats.clear(stat);
×
220
    }
×
221

222
    pub fn retain(&self, stats: &[Stat]) {
×
223
        self.array_stats.retain(stats);
×
224
    }
×
225

226
    pub fn compute_min<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(&self) -> Option<U> {
19,213✔
227
        self.compute_as(Stat::Min)
19,213✔
228
    }
19,213✔
229

230
    pub fn compute_max<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(&self) -> Option<U> {
18✔
231
        self.compute_as(Stat::Max)
18✔
232
    }
18✔
233

234
    pub fn compute_is_sorted(&self) -> Option<bool> {
×
235
        self.compute_as(Stat::IsSorted)
×
236
    }
×
237

238
    pub fn compute_is_strict_sorted(&self) -> Option<bool> {
17,139✔
239
        self.compute_as(Stat::IsStrictSorted)
17,139✔
240
    }
17,139✔
241

242
    pub fn compute_is_constant(&self) -> Option<bool> {
460✔
243
        self.compute_as(Stat::IsConstant)
460✔
244
    }
460✔
245

246
    pub fn compute_null_count(&self) -> Option<usize> {
11,916✔
247
        self.compute_as(Stat::NullCount)
11,916✔
248
    }
11,916✔
249

250
    pub fn compute_uncompressed_size_in_bytes(&self) -> Option<usize> {
×
251
        self.compute_as(Stat::UncompressedSizeInBytes)
×
252
    }
×
253
}
254

255
impl StatsProvider for StatsSetRef<'_> {
256
    fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
2,460,513✔
257
        self.array_stats
2,460,513✔
258
            .inner
2,460,513✔
259
            .read()
2,460,513✔
260
            .as_typed_ref(self.dyn_array_ref.dtype())
2,460,513✔
261
            .get(stat)
2,460,513✔
262
    }
2,460,513✔
263

264
    fn len(&self) -> usize {
×
265
        self.array_stats.inner.read().len()
×
266
    }
×
267
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc