• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16598973893

29 Jul 2025 02:25PM UTC coverage: 82.692% (-0.01%) from 82.703%
16598973893

push

github

web-flow
Clean up stats propagation for slicing (#3356)

Reduces the amount we copy some stats (by removing into_iter that forces
a full stats copy)

---------

Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Robert Kruszewski <github@robertk.io>
Signed-off-by: Will Manning <will@willmanning.io>
Co-authored-by: Robert Kruszewski <github@robertk.io>
Co-authored-by: Will Manning <will@willmanning.io>

130 of 157 new or added lines in 15 files covered. (82.8%)

30 existing lines in 13 files now uncovered.

45215 of 54679 relevant lines covered (82.69%)

184610.34 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.69
/vortex-array/src/stats/stats_set.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::Debug;
5

6
use enum_iterator::{Sequence, all};
7
use num_traits::CheckedAdd;
8
use vortex_dtype::DType;
9
use vortex_error::{VortexExpect, VortexResult, vortex_err};
10
use vortex_scalar::{Scalar, ScalarValue};
11

12
use super::traits::StatsProvider;
13
use super::{IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, UncompressedSizeInBytes};
14
use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
15

16
#[derive(Default, Debug, Clone)]
17
pub struct StatsSet {
18
    values: Vec<(Stat, Precision<ScalarValue>)>,
19
}
20

21
impl StatsSet {
22
    /// Create new StatSet without validating uniqueness of all the entries
23
    ///
24
    /// # Safety
25
    ///
26
    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
27
    pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
38✔
28
        Self { values }
38✔
29
    }
38✔
30

31
    /// Specialized constructor for the case where the StatsSet represents
32
    /// an array consisting entirely of [null](vortex_dtype::DType::Null) values.
33
    pub fn nulls(len: usize) -> Self {
×
34
        let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
×
35

36
        if len > 0 {
×
37
            stats.set(Stat::IsConstant, Precision::exact(true));
×
38
            stats.set(Stat::IsSorted, Precision::exact(true));
×
39
            stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
×
40
        }
×
41

42
        stats
×
43
    }
×
44

45
    /// A convenience method for creating a stats set which will represent an empty array.
46
    pub fn empty_array() -> StatsSet {
×
47
        StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
×
48
    }
×
49

UNCOV
50
    pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
×
UNCOV
51
        StatsSet::new_unchecked(vec![
×
52
            (Stat::Sum, Precision::exact(true_count)),
×
53
            (Stat::NullCount, Precision::exact(null_count)),
×
54
            (Stat::Min, Precision::exact(true_count == len)),
×
55
            (Stat::Max, Precision::exact(true_count > 0)),
×
56
            (
57
                Stat::IsConstant,
×
58
                Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
×
59
            ),
60
        ])
61
    }
×
62

63
    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
36✔
64
        Self::new_unchecked(vec![(stat, value)])
36✔
65
    }
36✔
66

67
    fn reserve_full_capacity(&mut self) {
492,722✔
68
        if self.values.capacity() < Stat::CARDINALITY {
492,722✔
69
            self.values
205,861✔
70
                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
205,861✔
71
        }
286,861✔
72
    }
492,722✔
73
}
74

75
// Getters and setters for individual stats.
76
impl StatsSet {
77
    /// Set the stat `stat` to `value`.
78
    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
448,872✔
79
        self.reserve_full_capacity();
448,872✔
80

81
        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
557,521✔
82
            *existing = (stat, value);
2,073✔
83
        } else {
446,799✔
84
            self.values.push((stat, value));
446,799✔
85
        }
446,799✔
86
    }
448,872✔
87

88
    /// Clear the stat `stat` from the set.
89
    pub fn clear(&mut self, stat: Stat) {
154✔
90
        self.values.retain(|(s, _)| *s != stat);
273✔
91
    }
154✔
92

93
    pub fn retain_only(&mut self, stats: &[Stat]) {
×
94
        self.values.retain(|(s, _)| stats.contains(s));
×
95
    }
×
96

97
    pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
43,841✔
98
        self.values
43,841✔
99
            .into_iter()
43,841✔
100
            .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
44,809✔
101
            .collect()
43,841✔
102
    }
43,841✔
103

104
    /// Iterate over the statistic names and values in-place.
105
    ///
106
    /// See [Iterator].
107
    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
61,479✔
108
        self.values.iter()
61,479✔
109
    }
61,479✔
110
}
111

112
// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
113

114
/// Owned iterator over the stats.
115
///
116
/// See [IntoIterator].
117
pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
118

119
impl Iterator for StatsSetIntoIter {
120
    type Item = (Stat, Precision<ScalarValue>);
121

122
    fn next(&mut self) -> Option<Self::Item> {
86,543✔
123
        self.0.next()
86,543✔
124
    }
86,543✔
125
}
126

127
impl IntoIterator for StatsSet {
128
    type Item = (Stat, Precision<ScalarValue>);
129
    type IntoIter = StatsSetIntoIter;
130

131
    fn into_iter(self) -> Self::IntoIter {
63,402✔
132
        StatsSetIntoIter(self.values.into_iter())
63,402✔
133
    }
63,402✔
134
}
135

136
impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
137
    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
43,850✔
138
        let iter = iter.into_iter();
43,850✔
139
        let mut values = Vec::default();
43,850✔
140
        values.reserve_exact(Stat::CARDINALITY);
43,850✔
141

142
        let mut this = Self { values };
43,850✔
143
        this.extend(iter);
43,850✔
144
        this
43,850✔
145
    }
43,850✔
146
}
147

148
impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
149
    #[inline]
150
    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
43,850✔
151
        let iter = iter.into_iter();
43,850✔
152
        self.reserve_full_capacity();
43,850✔
153

154
        iter.for_each(|(stat, value)| self.set(stat, value));
44,050✔
155
    }
43,850✔
156
}
157

158
// Merge helpers
159
impl StatsSet {
160
    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
161
    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
162
    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
20✔
163
        for s in all::<Stat>() {
180✔
164
            match s {
180✔
165
                Stat::IsConstant => self.merge_is_constant(other, dtype),
20✔
166
                Stat::IsSorted => self.merge_is_sorted(other, dtype),
20✔
167
                Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
20✔
168
                Stat::Max => self.merge_max(other, dtype),
20✔
169
                Stat::Min => self.merge_min(other, dtype),
20✔
170
                Stat::Sum => self.merge_sum(other, dtype),
20✔
171
                Stat::NullCount => self.merge_null_count(other),
20✔
172
                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
20✔
173
                Stat::NaNCount => self.merge_nan_count(other),
20✔
174
            }
175
        }
176

177
        self
20✔
178
    }
20✔
179

180
    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
181
    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
182
    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
1✔
183
        for s in all::<Stat>() {
9✔
184
            if !s.is_commutative() {
9✔
185
                self.clear(s);
2✔
186
                continue;
2✔
187
            }
7✔
188

189
            match s {
7✔
190
                Stat::IsConstant => self.merge_is_constant(other, dtype),
1✔
191
                Stat::Max => self.merge_max(other, dtype),
1✔
192
                Stat::Min => self.merge_min(other, dtype),
1✔
193
                Stat::Sum => self.merge_sum(other, dtype),
1✔
194
                Stat::NullCount => self.merge_null_count(other),
1✔
195
                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
1✔
196
                Stat::IsSorted | Stat::IsStrictSorted => {
197
                    unreachable!("not commutative")
×
198
                }
199
                Stat::NaNCount => self.merge_nan_count(other),
1✔
200
            }
201
        }
202

203
        self
1✔
204
    }
1✔
205

206
    /// Given two sets of stats (of differing precision) for the same array, combine them
207
    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
43,805✔
208
        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
43,805✔
209
        for s in other_stats {
44,366✔
210
            match s {
562✔
211
                Stat::Max => self.combine_bound::<Max>(other, dtype)?,
280✔
212
                Stat::Min => self.combine_bound::<Min>(other, dtype)?,
279✔
213
                Stat::UncompressedSizeInBytes => {
UNCOV
214
                    self.combine_bound::<UncompressedSizeInBytes>(other, dtype)?
×
215
                }
216
                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
1✔
217
                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
1✔
218
                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
1✔
UNCOV
219
                Stat::NullCount => self.combine_bound::<NullCount>(other, dtype)?,
×
220
                Stat::Sum => self.combine_bound::<Sum>(other, dtype)?,
×
221
                Stat::NaNCount => self.combine_bound::<NaNCount>(other, dtype)?,
×
222
            }
223
        }
224
        Ok(())
43,804✔
225
    }
43,805✔
226

227
    fn combine_bound<S: StatType<Scalar>>(
559✔
228
        &mut self,
559✔
229
        other: &Self,
559✔
230
        dtype: &DType,
559✔
231
    ) -> VortexResult<()>
559✔
232
    where
559✔
233
        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
559✔
234
    {
235
        match (
236
            self.get_scalar_bound::<S>(dtype),
559✔
237
            other.get_scalar_bound::<S>(dtype),
559✔
238
        ) {
239
            (Some(m1), Some(m2)) => {
2✔
240
                let meet = m1
2✔
241
                    .intersection(&m2)
2✔
242
                    .vortex_expect("can always compare scalar")
2✔
243
                    .ok_or_else(|| {
2✔
244
                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
×
245
                    })?;
×
246
                if meet != m1 {
2✔
247
                    self.set(S::STAT, meet.into_value().map(Scalar::into_value));
1✔
248
                }
1✔
249
            }
250
            (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
557✔
251
            (Some(_), _) => (),
×
252
            (None, None) => self.clear(S::STAT),
×
253
        }
254
        Ok(())
559✔
255
    }
559✔
256

257
    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &Self) -> VortexResult<()>
6✔
258
    where
6✔
259
        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
6✔
260
    {
261
        match (
262
            self.get_as_bound::<S, bool>(),
6✔
263
            other.get_as_bound::<S, bool>(),
6✔
264
        ) {
265
            (Some(m1), Some(m2)) => {
4✔
266
                let intersection = m1
4✔
267
                    .intersection(&m2)
4✔
268
                    .vortex_expect("can always compare boolean")
4✔
269
                    .ok_or_else(|| {
4✔
270
                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
1✔
271
                    })?;
1✔
272
                if intersection != m1 {
3✔
273
                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
×
274
                }
3✔
275
            }
276
            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
2✔
277
            (Some(_), None) => (),
×
278
            (None, None) => self.clear(S::STAT),
×
279
        }
280
        Ok(())
5✔
281
    }
6✔
282

283
    fn merge_min(&mut self, other: &Self, dtype: &DType) {
21✔
284
        match (
285
            self.get_scalar_bound::<Min>(dtype),
21✔
286
            other.get_scalar_bound::<Min>(dtype),
21✔
287
        ) {
288
            (Some(m1), Some(m2)) => {
5✔
289
                let meet = m1.union(&m2).vortex_expect("can compare scalar");
5✔
290
                if meet != m1 {
5✔
291
                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
1✔
292
                }
4✔
293
            }
294
            _ => self.clear(Stat::Min),
16✔
295
        }
296
    }
21✔
297

298
    fn merge_max(&mut self, other: &Self, dtype: &DType) {
21✔
299
        match (
300
            self.get_scalar_bound::<Max>(dtype),
21✔
301
            other.get_scalar_bound::<Max>(dtype),
21✔
302
        ) {
303
            (Some(m1), Some(m2)) => {
3✔
304
                let meet = m1.union(&m2).vortex_expect("can compare scalar");
3✔
305
                if meet != m1 {
3✔
306
                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
2✔
307
                }
2✔
308
            }
309
            _ => self.clear(Stat::Max),
18✔
310
        }
311
    }
21✔
312

313
    fn merge_sum(&mut self, other: &Self, dtype: &DType) {
21✔
314
        match (
315
            self.get_scalar_bound::<Sum>(dtype),
21✔
316
            other.get_scalar_bound::<Sum>(dtype),
21✔
317
        ) {
318
            (Some(m1), Some(m2)) => {
1✔
319
                // If the combine sum is exact, then we can sum them.
320
                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
1✔
321
                    s1.as_primitive()
1✔
322
                        .checked_add(&s2.as_primitive())
1✔
323
                        .map(|pscalar| {
1✔
324
                            pscalar
1✔
325
                                .pvalue()
1✔
326
                                .map(|pvalue| {
1✔
327
                                    Scalar::primitive_value(
1✔
328
                                        pvalue,
1✔
329
                                        pscalar.ptype(),
1✔
330
                                        pscalar.dtype().nullability(),
1✔
331
                                    )
332
                                    .into_value()
1✔
333
                                })
1✔
334
                                .unwrap_or_else(ScalarValue::null)
1✔
335
                        })
1✔
336
                }) {
1✔
337
                    self.set(Stat::Sum, Precision::Exact(scalar_value));
1✔
338
                }
1✔
339
            }
340
            _ => self.clear(Stat::Sum),
20✔
341
        }
342
    }
21✔
343

344
    fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
21✔
345
        let self_const = self.get_as(Stat::IsConstant);
21✔
346
        let other_const = other.get_as(Stat::IsConstant);
21✔
347
        let self_min = self.get_scalar(Stat::Min, dtype);
21✔
348
        let other_min = other.get_scalar(Stat::Min, dtype);
21✔
349

350
        if let (
351
            Some(Precision::Exact(self_const)),
1✔
352
            Some(Precision::Exact(other_const)),
1✔
353
            Some(Precision::Exact(self_min)),
1✔
354
            Some(Precision::Exact(other_min)),
1✔
355
        ) = (self_const, other_const, self_min, other_min)
21✔
356
        {
357
            if self_const && other_const && self_min == other_min {
1✔
358
                self.set(Stat::IsConstant, Precision::exact(true));
×
359
            } else {
1✔
360
                self.set(Stat::IsConstant, Precision::inexact(false));
1✔
361
            }
1✔
362
        }
20✔
363
        self.set(Stat::IsConstant, Precision::exact(false));
21✔
364
    }
21✔
365

366
    fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
20✔
367
        self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
20✔
368
    }
20✔
369

370
    fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
20✔
371
        self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
20✔
372
    }
20✔
373

374
    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
40✔
375
        &mut self,
40✔
376
        other: &Self,
40✔
377
        stat: Stat,
40✔
378
        dtype: &DType,
40✔
379
        cmp: F,
40✔
380
    ) {
40✔
381
        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
40✔
382
            == (self.get_as(stat), other.get_as(stat))
40✔
383
        {
384
            // There might be no stat because it was dropped, or it doesn't exist
385
            // (e.g. an all null array).
386
            // We assume that it was the dropped case since the doesn't exist might imply sorted,
387
            // but this in-precision is correct.
388
            if let (Some(self_max), Some(other_min)) = (
3✔
389
                self.get_scalar_bound::<Max>(dtype),
4✔
390
                other.get_scalar_bound::<Min>(dtype),
4✔
391
            ) {
392
                return if cmp(&self_max.max_value(), &other_min.min_value()) {
3✔
393
                    // keep value
2✔
394
                } else {
2✔
395
                    self.set(stat, Precision::inexact(false));
1✔
396
                };
1✔
397
            }
1✔
398
        }
36✔
399
        self.clear(stat);
37✔
400
    }
40✔
401

402
    fn merge_null_count(&mut self, other: &Self) {
21✔
403
        self.merge_sum_stat(Stat::NullCount, other)
21✔
404
    }
21✔
405

406
    fn merge_nan_count(&mut self, other: &Self) {
21✔
407
        self.merge_sum_stat(Stat::NaNCount, other)
21✔
408
    }
21✔
409

410
    fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
21✔
411
        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
21✔
412
    }
21✔
413

414
    fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
63✔
415
        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
63✔
416
            (Some(nc1), Some(nc2)) => {
2✔
417
                self.set(
2✔
418
                    stat,
2✔
419
                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
2✔
420
                );
421
            }
422
            _ => self.clear(stat),
61✔
423
        }
424
    }
63✔
425
}
426

427
impl StatsProvider for StatsSet {
428
    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
1,173,551✔
429
        self.values
1,173,551✔
430
            .iter()
1,173,551✔
431
            .find(|(s, _)| *s == stat)
1,349,429✔
432
            .map(|(_, v)| v.clone())
1,173,551✔
433
    }
1,173,551✔
434

435
    fn len(&self) -> usize {
1✔
436
        self.values.len()
1✔
437
    }
1✔
438
}
439

440
#[cfg(test)]
441
mod test {
442
    use enum_iterator::all;
443
    use itertools::Itertools;
444
    use vortex_dtype::{DType, Nullability, PType};
445

446
    use crate::arrays::PrimitiveArray;
447
    use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
448

449
    #[test]
450
    fn test_iter() {
1✔
451
        let set = StatsSet::new_unchecked(vec![
1✔
452
            (Stat::Max, Precision::exact(100)),
1✔
453
            (Stat::Min, Precision::exact(42)),
1✔
454
        ]);
455
        let mut iter = set.iter();
1✔
456
        let first = iter.next().unwrap().clone();
1✔
457
        assert_eq!(first.0, Stat::Max);
1✔
458
        assert_eq!(
1✔
459
            first.1.map(|f| i32::try_from(&f).unwrap()),
1✔
460
            Precision::exact(100)
1✔
461
        );
462
        let snd = iter.next().unwrap().clone();
1✔
463
        assert_eq!(snd.0, Stat::Min);
1✔
464
        assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
1✔
465
    }
1✔
466

467
    #[test]
468
    fn into_iter() {
1✔
469
        let mut set = StatsSet::new_unchecked(vec![
1✔
470
            (Stat::Max, Precision::exact(100)),
1✔
471
            (Stat::Min, Precision::exact(42)),
1✔
472
        ])
473
        .into_iter();
1✔
474
        let (stat, first) = set.next().unwrap();
1✔
475
        assert_eq!(stat, Stat::Max);
1✔
476
        assert_eq!(
1✔
477
            first.map(|f| i32::try_from(&f).unwrap()),
1✔
478
            Precision::exact(100)
1✔
479
        );
480
        let snd = set.next().unwrap();
1✔
481
        assert_eq!(snd.0, Stat::Min);
1✔
482
        assert_eq!(
1✔
483
            snd.1.map(|s| i32::try_from(&s).unwrap()),
1✔
484
            Precision::exact(42)
1✔
485
        );
486
    }
1✔
487

488
    #[test]
489
    fn merge_constant() {
1✔
490
        let first = StatsSet::from_iter([
1✔
491
            (Stat::Min, Precision::exact(42)),
1✔
492
            (Stat::IsConstant, Precision::exact(true)),
1✔
493
        ])
1✔
494
        .merge_ordered(
1✔
495
            &StatsSet::from_iter([
1✔
496
                (Stat::Min, Precision::inexact(42)),
1✔
497
                (Stat::IsConstant, Precision::exact(true)),
1✔
498
            ]),
1✔
499
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
500
        );
501
        assert_eq!(
1✔
502
            first.get_as::<bool>(Stat::IsConstant),
1✔
503
            Some(Precision::exact(false))
1✔
504
        );
505
        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
1✔
506
    }
1✔
507

508
    #[test]
509
    fn merge_into_min() {
1✔
510
        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
1✔
511
            &StatsSet::default(),
1✔
512
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
513
        );
514
        assert!(first.get(Stat::Min).is_none());
1✔
515
    }
1✔
516

517
    #[test]
518
    fn merge_from_min() {
1✔
519
        let first = StatsSet::default().merge_ordered(
1✔
520
            &StatsSet::of(Stat::Min, Precision::exact(42)),
1✔
521
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
522
        );
523
        assert!(first.get(Stat::Min).is_none());
1✔
524
    }
1✔
525

526
    #[test]
527
    fn merge_mins() {
1✔
528
        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
1✔
529
            &StatsSet::of(Stat::Min, Precision::exact(42)),
1✔
530
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
531
        );
532
        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
1✔
533
    }
1✔
534

535
    #[test]
536
    fn merge_into_bound_max() {
1✔
537
        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
1✔
538
            &StatsSet::default(),
1✔
539
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
540
        );
541
        assert!(first.get(Stat::Max).is_none());
1✔
542
    }
1✔
543

544
    #[test]
545
    fn merge_from_max() {
1✔
546
        let first = StatsSet::default().merge_ordered(
1✔
547
            &StatsSet::of(Stat::Max, Precision::exact(42)),
1✔
548
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
549
        );
550
        assert!(first.get(Stat::Max).is_none());
1✔
551
    }
1✔
552

553
    #[test]
554
    fn merge_maxes() {
1✔
555
        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
1✔
556
            &StatsSet::of(Stat::Max, Precision::exact(42)),
1✔
557
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
558
        );
559
        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
1✔
560
    }
1✔
561

562
    #[test]
563
    fn merge_maxes_bound() {
1✔
564
        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
1✔
565
        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
1✔
566
            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
1✔
567
        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
1✔
568
    }
1✔
569

570
    #[test]
571
    fn merge_into_scalar() {
1✔
572
        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
1✔
573
            &StatsSet::default(),
1✔
574
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
575
        );
576
        assert!(first.get(Stat::Sum).is_none());
1✔
577
    }
1✔
578

579
    #[test]
580
    fn merge_from_scalar() {
1✔
581
        let first = StatsSet::default().merge_ordered(
1✔
582
            &StatsSet::of(Stat::Sum, Precision::exact(42)),
1✔
583
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
584
        );
585
        assert!(first.get(Stat::Sum).is_none());
1✔
586
    }
1✔
587

588
    #[test]
589
    fn merge_scalars() {
1✔
590
        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
1✔
591
            &StatsSet::of(Stat::Sum, Precision::exact(42)),
1✔
592
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
593
        );
594
        assert_eq!(
1✔
595
            first.get_as::<usize>(Stat::Sum),
1✔
596
            Some(Precision::exact(79usize))
1✔
597
        );
598
    }
1✔
599

600
    #[test]
601
    fn merge_into_sortedness() {
1✔
602
        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
1✔
603
            &StatsSet::default(),
1✔
604
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
605
        );
606
        assert!(first.get(Stat::IsStrictSorted).is_none());
1✔
607
    }
1✔
608

609
    #[test]
610
    fn merge_from_sortedness() {
1✔
611
        let first = StatsSet::default().merge_ordered(
1✔
612
            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
1✔
613
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
614
        );
615
        assert!(first.get(Stat::IsStrictSorted).is_none());
1✔
616
    }
1✔
617

618
    #[test]
619
    fn merge_sortedness() {
1✔
620
        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
621
        first.set(Stat::Max, Precision::exact(1));
1✔
622
        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
623
        second.set(Stat::Min, Precision::exact(2));
1✔
624
        first = first.merge_ordered(
1✔
625
            &second,
1✔
626
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
627
        );
628
        assert_eq!(
1✔
629
            first.get_as::<bool>(Stat::IsStrictSorted),
1✔
630
            Some(Precision::exact(true))
1✔
631
        );
632
    }
1✔
633

634
    #[test]
635
    fn merge_sortedness_out_of_order() {
1✔
636
        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
637
        first.set(Stat::Min, Precision::exact(1));
1✔
638
        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
639
        second.set(Stat::Max, Precision::exact(2));
1✔
640
        second = second.merge_ordered(
1✔
641
            &first,
1✔
642
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
643
        );
644
        assert_eq!(
1✔
645
            second.get_as::<bool>(Stat::IsStrictSorted),
1✔
646
            Some(Precision::inexact(false))
1✔
647
        );
648
    }
1✔
649

650
    #[test]
651
    fn merge_sortedness_only_one_sorted() {
1✔
652
        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
653
        first.set(Stat::Max, Precision::exact(1));
1✔
654
        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
1✔
655
        second.set(Stat::Min, Precision::exact(2));
1✔
656
        first.merge_ordered(
1✔
657
            &second,
1✔
658
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
659
        );
660
        assert_eq!(
1✔
661
            second.get_as::<bool>(Stat::IsStrictSorted),
1✔
662
            Some(Precision::exact(false))
1✔
663
        );
664
    }
1✔
665

666
    #[test]
667
    fn merge_sortedness_missing_min() {
1✔
668
        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
669
        first.set(Stat::Max, Precision::exact(1));
1✔
670
        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
671
        first = first.merge_ordered(
1✔
672
            &second,
1✔
673
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
674
        );
675
        assert!(first.get(Stat::IsStrictSorted).is_none());
1✔
676
    }
1✔
677

678
    #[test]
679
    fn merge_sortedness_bound_min() {
1✔
680
        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
681
        first.set(Stat::Max, Precision::exact(1));
1✔
682
        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
683
        second.set(Stat::Min, Precision::inexact(2));
1✔
684
        first = first.merge_ordered(
1✔
685
            &second,
1✔
686
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
687
        );
688
        assert_eq!(
1✔
689
            first.get_as::<bool>(Stat::IsStrictSorted),
1✔
690
            Some(Precision::exact(true))
1✔
691
        );
692
    }
1✔
693

694
    #[test]
695
    fn merge_unordered() {
1✔
696
        let array =
1✔
697
            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
1✔
698
        let all_stats = all::<Stat>()
1✔
699
            .filter(|s| !matches!(s, Stat::Sum))
9✔
700
            .filter(|s| !matches!(s, Stat::NaNCount))
8✔
701
            .collect_vec();
1✔
702
        array.statistics().compute_all(&all_stats).unwrap();
1✔
703

704
        let stats = array.statistics().to_owned();
1✔
705
        for stat in &all_stats {
8✔
706
            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
7✔
707
        }
708

709
        let merged = stats.clone().merge_unordered(
1✔
710
            &stats,
1✔
711
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
712
        );
713
        for stat in &all_stats {
8✔
714
            assert_eq!(
7✔
715
                merged.get(*stat).is_some(),
7✔
716
                stat.is_commutative(),
7✔
717
                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
718
            )
719
        }
720

721
        assert_eq!(
1✔
722
            merged.get_as::<i32>(Stat::Min),
1✔
723
            stats.get_as::<i32>(Stat::Min)
1✔
724
        );
725
        assert_eq!(
1✔
726
            merged.get_as::<i32>(Stat::Max),
1✔
727
            stats.get_as::<i32>(Stat::Max)
1✔
728
        );
729
        assert_eq!(
1✔
730
            merged.get_as::<u64>(Stat::NullCount).unwrap(),
1✔
731
            stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
1✔
732
        );
733
    }
1✔
734

735
    #[test]
736
    fn merge_min_bound_same() {
1✔
737
        // Merging a stat with a bound and another with an exact results in exact stat.
738
        // since bound for min is a lower bound, it can in fact contain any value >= bound.
739
        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
1✔
740
            &StatsSet::of(Stat::Min, Precision::exact(5)),
1✔
741
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
742
        );
743
        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
1✔
744
    }
1✔
745

746
    #[test]
747
    fn merge_min_bound_bound_lower() {
1✔
748
        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
1✔
749
            &StatsSet::of(Stat::Min, Precision::exact(5)),
1✔
750
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
751
        );
752
        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
1✔
753
    }
1✔
754

755
    #[test]
756
    fn retain_approx() {
1✔
757
        let set = StatsSet::from_iter([
1✔
758
            (Stat::Max, Precision::exact(100)),
1✔
759
            (Stat::Min, Precision::exact(50)),
1✔
760
            (Stat::Sum, Precision::inexact(10)),
1✔
761
        ]);
1✔
762

763
        let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
1✔
764

765
        assert_eq!(set.len(), 2);
1✔
766
        assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
1✔
767
        assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
1✔
768
        assert_eq!(set.get_as::<i32>(Stat::Sum), None);
1✔
769
    }
1✔
770

771
    #[test]
772
    fn test_combine_is_constant() {
1✔
773
        {
774
            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
1✔
775
            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
1✔
776
            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
1✔
777
            assert_eq!(
1✔
778
                stats.get_as::<bool>(Stat::IsConstant),
1✔
779
                Some(Precision::exact(true))
1✔
780
            );
781
        }
782

783
        {
784
            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
1✔
785
            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
1✔
786
            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
1✔
787
            assert_eq!(
1✔
788
                stats.get_as::<bool>(Stat::IsConstant),
1✔
789
                Some(Precision::exact(true))
1✔
790
            );
791
        }
792

793
        {
794
            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
1✔
795
            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
1✔
796
            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
1✔
797
            assert_eq!(
1✔
798
                stats.get_as::<bool>(Stat::IsConstant),
1✔
799
                Some(Precision::exact(false))
1✔
800
            );
801
        }
802
    }
1✔
803

804
    #[test]
805
    fn test_combine_sets_boolean_conflict() {
1✔
806
        let mut stats1 = StatsSet::from_iter([
1✔
807
            (Stat::IsConstant, Precision::exact(true)),
1✔
808
            (Stat::IsSorted, Precision::exact(true)),
1✔
809
        ]);
1✔
810

811
        let stats2 = StatsSet::from_iter([
1✔
812
            (Stat::IsConstant, Precision::exact(false)),
1✔
813
            (Stat::IsSorted, Precision::exact(true)),
1✔
814
        ]);
1✔
815

816
        let result = stats1.combine_sets(
1✔
817
            &stats2,
1✔
818
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
819
        );
820
        assert!(result.is_err());
1✔
821
    }
1✔
822

823
    #[test]
824
    fn test_combine_sets_with_missing_stats() {
1✔
825
        let mut stats1 = StatsSet::from_iter([
1✔
826
            (Stat::Min, Precision::exact(42)),
1✔
827
            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1✔
828
        ]);
1✔
829

830
        let stats2 = StatsSet::from_iter([
1✔
831
            (Stat::Max, Precision::exact(100)),
1✔
832
            (Stat::IsStrictSorted, Precision::exact(true)),
1✔
833
        ]);
1✔
834

835
        stats1
1✔
836
            .combine_sets(
1✔
837
                &stats2,
1✔
838
                &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
839
            )
840
            .unwrap();
1✔
841

842
        // Min should remain unchanged
843
        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
1✔
844
        // Max should be added
845
        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(100)));
1✔
846
        // IsStrictSorted should be added
847
        assert_eq!(
1✔
848
            stats1.get_as::<bool>(Stat::IsStrictSorted),
1✔
849
            Some(Precision::exact(true))
1✔
850
        );
851
    }
1✔
852

853
    #[test]
854
    fn test_combine_sets_with_inexact() {
1✔
855
        let mut stats1 = StatsSet::from_iter([
1✔
856
            (Stat::Min, Precision::exact(42)),
1✔
857
            (Stat::Max, Precision::inexact(100)),
1✔
858
            (Stat::IsConstant, Precision::exact(false)),
1✔
859
        ]);
1✔
860

861
        let stats2 = StatsSet::from_iter([
1✔
862
            // Must ensure Min from stats2 is <= Min from stats1
1✔
863
            (Stat::Min, Precision::inexact(40)),
1✔
864
            (Stat::Max, Precision::exact(90)),
1✔
865
            (Stat::IsSorted, Precision::exact(true)),
1✔
866
        ]);
1✔
867

868
        stats1
1✔
869
            .combine_sets(
1✔
870
                &stats2,
1✔
871
                &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
872
            )
873
            .unwrap();
1✔
874

875
        // Min should remain unchanged since it's more restrictive than the inexact value
876
        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
1✔
877
        // Check that max was updated with the exact value
878
        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(90)));
1✔
879
        // Check that IsSorted was added
880
        assert_eq!(
1✔
881
            stats1.get_as::<bool>(Stat::IsSorted),
1✔
882
            Some(Precision::exact(true))
1✔
883
        );
884
    }
1✔
885
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc