• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16331938722

16 Jul 2025 10:49PM UTC coverage: 80.702% (-0.9%) from 81.557%
16331938722

push

github

web-flow
feat: build with stable rust (#3881)

120 of 173 new or added lines in 28 files covered. (69.36%)

174 existing lines in 102 files now uncovered.

41861 of 51871 relevant lines covered (80.7%)

157487.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.29
/vortex-array/src/stats/stats_set.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::Debug;
5

6
use enum_iterator::{Sequence, all};
7
use num_traits::CheckedAdd;
8
use vortex_dtype::DType;
9
use vortex_error::{VortexExpect, VortexResult, vortex_err};
10
use vortex_scalar::{Scalar, ScalarValue};
11

12
use super::traits::StatsProvider;
13
use super::{IsSorted, IsStrictSorted, NaNCount, NullCount, StatType, UncompressedSizeInBytes};
14
use crate::stats::{IsConstant, Max, Min, Precision, Stat, StatBound, StatsProviderExt, Sum};
15

16
#[derive(Default, Debug, Clone)]
17
pub struct StatsSet {
18
    values: Vec<(Stat, Precision<ScalarValue>)>,
19
}
20

21
impl StatsSet {
22
    /// Create new StatSet without validating uniqueness of all the entries
23
    ///
24
    /// # Safety
25
    ///
26
    /// This method will not panic or trigger UB, but may lead to duplicate stats being stored.
27
    pub fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
38✔
28
        Self { values }
38✔
29
    }
38✔
30

31
    /// Specialized constructor for the case where the StatsSet represents
32
    /// an array consisting entirely of [null](vortex_dtype::DType::Null) values.
33
    pub fn nulls(len: usize) -> Self {
×
34
        let mut stats = Self::new_unchecked(vec![(Stat::NullCount, Precision::exact(len))]);
×
35

36
        if len > 0 {
×
37
            stats.set(Stat::IsConstant, Precision::exact(true));
×
38
            stats.set(Stat::IsSorted, Precision::exact(true));
×
39
            stats.set(Stat::IsStrictSorted, Precision::exact(len < 2));
×
40
        }
×
41

42
        stats
×
43
    }
×
44

45
    /// A convenience method for creating a stats set which will represent an empty array.
46
    pub fn empty_array() -> StatsSet {
×
47
        StatsSet::new_unchecked(vec![(Stat::NullCount, Precision::exact(0))])
×
48
    }
×
49

50
    pub fn constant(scalar: Scalar, length: usize) -> Self {
37,247✔
51
        let (dtype, sv) = scalar.into_parts();
37,247✔
52
        let mut stats = Self::default();
37,247✔
53
        if length > 0 {
37,247✔
54
            stats.extend([
37,247✔
55
                (Stat::IsConstant, Precision::exact(true)),
37,247✔
56
                (Stat::IsSorted, Precision::exact(true)),
37,247✔
57
                (Stat::IsStrictSorted, Precision::exact(length <= 1)),
37,247✔
58
            ]);
37,247✔
59
        }
37,247✔
60

61
        let null_count = if sv.is_null() { length as u64 } else { 0 };
37,247✔
62
        stats.set(Stat::NullCount, Precision::exact(null_count));
37,247✔
63

64
        if !sv.is_null() {
37,247✔
65
            stats.extend([
36,383✔
66
                (Stat::Min, Precision::exact(sv.clone())),
36,383✔
67
                (Stat::Max, Precision::exact(sv.clone())),
36,383✔
68
            ]);
36,383✔
69
        }
36,383✔
70

71
        if matches!(dtype, DType::Bool(_)) {
37,247✔
72
            let bool_val = <Option<bool>>::try_from(&sv).vortex_expect("Checked dtype");
13,571✔
73
            let true_count = bool_val
13,571✔
74
                .map(|b| if b { length as u64 } else { 0 })
13,571✔
75
                .unwrap_or(0);
13,571✔
76
            stats.set(Stat::Sum, Precision::exact(true_count));
13,571✔
77
        }
23,676✔
78

79
        stats
37,247✔
80
    }
37,247✔
81

82
    pub fn bools_with_sum_and_null_count(true_count: usize, null_count: usize, len: usize) -> Self {
×
83
        StatsSet::new_unchecked(vec![
×
84
            (Stat::Sum, Precision::exact(true_count)),
×
85
            (Stat::NullCount, Precision::exact(null_count)),
×
86
            (Stat::Min, Precision::exact(true_count == len)),
×
87
            (Stat::Max, Precision::exact(true_count > 0)),
×
88
            (
89
                Stat::IsConstant,
×
90
                Precision::exact((true_count == 0 && null_count == 0) || true_count == len),
×
91
            ),
92
        ])
93
    }
×
94

95
    pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
36✔
96
        Self::new_unchecked(vec![(stat, value)])
36✔
97
    }
36✔
98

99
    fn reserve_full_capacity(&mut self) {
1,519,070✔
100
        if self.values.capacity() < Stat::CARDINALITY {
1,519,070✔
101
            self.values
242,301✔
102
                .reserve_exact(Stat::CARDINALITY - self.values.capacity());
242,301✔
103
        }
1,276,769✔
104
    }
1,519,070✔
105
}
106

107
// Getters and setters for individual stats.
108
impl StatsSet {
109
    /// Set the stat `stat` to `value`.
110
    pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
1,388,136✔
111
        self.reserve_full_capacity();
1,388,136✔
112

113
        if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
2,823,937✔
114
            *existing = (stat, value);
393,910✔
115
        } else {
994,226✔
116
            self.values.push((stat, value));
994,226✔
117
        }
994,226✔
118
    }
1,388,136✔
119

120
    /// Clear the stat `stat` from the set.
121
    pub fn clear(&mut self, stat: Stat) {
154✔
122
        self.values.retain(|(s, _)| *s != stat);
273✔
123
    }
154✔
124

125
    pub fn retain_only(&mut self, stats: &[Stat]) {
×
126
        self.values.retain(|(s, _)| stats.contains(s));
×
127
    }
×
128

129
    pub fn keep_inexact_stats(self, inexact_keep: &[Stat]) -> Self {
57,295✔
130
        self.values
57,295✔
131
            .into_iter()
57,295✔
132
            .filter_map(|(s, v)| inexact_keep.contains(&s).then(|| (s, v.into_inexact())))
140,985✔
133
            .collect()
57,295✔
134
    }
57,295✔
135

136
    /// Iterate over the statistic names and values in-place.
137
    ///
138
    /// See [Iterator].
139
    pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
1✔
140
        self.values.iter()
1✔
141
    }
1✔
142
}
143

144
// StatSetIntoIter just exists to protect current implementation from exposure on the public API.
145

146
/// Owned iterator over the stats.
147
///
148
/// See [IntoIterator].
149
pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
150

151
impl Iterator for StatsSetIntoIter {
152
    type Item = (Stat, Precision<ScalarValue>);
153

154
    fn next(&mut self) -> Option<Self::Item> {
982,452✔
155
        self.0.next()
982,452✔
156
    }
982,452✔
157
}
158

159
impl IntoIterator for StatsSet {
160
    type Item = (Stat, Precision<ScalarValue>);
161
    type IntoIter = StatsSetIntoIter;
162

163
    fn into_iter(self) -> Self::IntoIter {
376,692✔
164
        StatsSetIntoIter(self.values.into_iter())
376,692✔
165
    }
376,692✔
166
}
167

168
impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
169
    fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
57,304✔
170
        let iter = iter.into_iter();
57,304✔
171
        let mut values = Vec::default();
57,304✔
172
        values.reserve_exact(Stat::CARDINALITY);
57,304✔
173

174
        let mut this = Self { values };
57,304✔
175
        this.extend(iter);
57,304✔
176
        this
57,304✔
177
    }
57,304✔
178
}
179

180
impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
181
    #[inline]
182
    fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
130,934✔
183
        let iter = iter.into_iter();
130,934✔
184
        self.reserve_full_capacity();
130,934✔
185

186
        iter.for_each(|(stat, value)| self.set(stat, value));
287,482✔
187
    }
130,934✔
188
}
189

190
// Merge helpers
191
impl StatsSet {
192
    /// Merge stats set `other` into `self`, with the semantic assumption that `other`
193
    /// contains stats from a disjoint array that is *appended* to the array represented by `self`.
194
    pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
20✔
195
        for s in all::<Stat>() {
180✔
196
            match s {
180✔
197
                Stat::IsConstant => self.merge_is_constant(other, dtype),
20✔
198
                Stat::IsSorted => self.merge_is_sorted(other, dtype),
20✔
199
                Stat::IsStrictSorted => self.merge_is_strict_sorted(other, dtype),
20✔
200
                Stat::Max => self.merge_max(other, dtype),
20✔
201
                Stat::Min => self.merge_min(other, dtype),
20✔
202
                Stat::Sum => self.merge_sum(other, dtype),
20✔
203
                Stat::NullCount => self.merge_null_count(other),
20✔
204
                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
20✔
205
                Stat::NaNCount => self.merge_nan_count(other),
20✔
206
            }
207
        }
208

209
        self
20✔
210
    }
20✔
211

212
    /// Merge stats set `other` into `self`, from a disjoint array, with no ordering assumptions.
213
    /// Stats that are not commutative (e.g., is_sorted) are dropped from the result.
214
    pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
1✔
215
        for s in all::<Stat>() {
9✔
216
            if !s.is_commutative() {
9✔
217
                self.clear(s);
2✔
218
                continue;
2✔
219
            }
7✔
220

221
            match s {
7✔
222
                Stat::IsConstant => self.merge_is_constant(other, dtype),
1✔
223
                Stat::Max => self.merge_max(other, dtype),
1✔
224
                Stat::Min => self.merge_min(other, dtype),
1✔
225
                Stat::Sum => self.merge_sum(other, dtype),
1✔
226
                Stat::NullCount => self.merge_null_count(other),
1✔
227
                Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
1✔
228
                Stat::IsSorted | Stat::IsStrictSorted => {
229
                    unreachable!("not commutative")
×
230
                }
231
                Stat::NaNCount => self.merge_nan_count(other),
1✔
232
            }
233
        }
234

235
        self
1✔
236
    }
1✔
237

238
    /// Given two sets of stats (of differing precision) for the same array, combine them
239
    pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
57,261✔
240
        let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
57,261✔
241
        for s in other_stats {
162,208✔
242
            match s {
104,948✔
243
                Stat::Max => self.combine_bound::<Max>(other, dtype)?,
31,730✔
244
                Stat::Min => self.combine_bound::<Min>(other, dtype)?,
31,729✔
245
                Stat::UncompressedSizeInBytes => {
246
                    self.combine_bound::<UncompressedSizeInBytes>(other, dtype)?
5,876✔
247
                }
248
                Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
1✔
249
                Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
1,151✔
250
                Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
841✔
251
                Stat::NullCount => self.combine_bound::<NullCount>(other, dtype)?,
33,620✔
252
                Stat::Sum => self.combine_bound::<Sum>(other, dtype)?,
×
253
                Stat::NaNCount => self.combine_bound::<NaNCount>(other, dtype)?,
×
254
            }
255
        }
256
        Ok(())
57,260✔
257
    }
57,261✔
258

259
    fn combine_bound<S: StatType<Scalar>>(
102,955✔
260
        &mut self,
102,955✔
261
        other: &Self,
102,955✔
262
        dtype: &DType,
102,955✔
263
    ) -> VortexResult<()>
102,955✔
264
    where
102,955✔
265
        S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
102,955✔
266
    {
267
        match (
268
            self.get_scalar_bound::<S>(dtype),
102,955✔
269
            other.get_scalar_bound::<S>(dtype),
102,955✔
270
        ) {
271
            (Some(m1), Some(m2)) => {
2✔
272
                let meet = m1
2✔
273
                    .intersection(&m2)
2✔
274
                    .vortex_expect("can always compare scalar")
2✔
275
                    .ok_or_else(|| {
2✔
276
                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
×
UNCOV
277
                    })?;
×
278
                if meet != m1 {
2✔
279
                    self.set(S::STAT, meet.into_value().map(Scalar::into_value));
1✔
280
                }
1✔
281
            }
282
            (None, Some(m)) => self.set(S::STAT, m.into_value().map(Scalar::into_value)),
102,953✔
283
            (Some(_), _) => (),
×
284
            (None, None) => self.clear(S::STAT),
×
285
        }
286
        Ok(())
102,955✔
287
    }
102,955✔
288

289
    fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &Self) -> VortexResult<()>
1,996✔
290
    where
1,996✔
291
        S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
1,996✔
292
    {
293
        match (
294
            self.get_as_bound::<S, bool>(),
1,996✔
295
            other.get_as_bound::<S, bool>(),
1,996✔
296
        ) {
297
            (Some(m1), Some(m2)) => {
4✔
298
                let intersection = m1
4✔
299
                    .intersection(&m2)
4✔
300
                    .vortex_expect("can always compare boolean")
4✔
301
                    .ok_or_else(|| {
4✔
302
                        vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
1✔
303
                    })?;
1✔
304
                if intersection != m1 {
3✔
305
                    self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
×
306
                }
3✔
307
            }
308
            (None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
1,992✔
309
            (Some(_), None) => (),
×
310
            (None, None) => self.clear(S::STAT),
×
311
        }
312
        Ok(())
1,995✔
313
    }
1,996✔
314

315
    fn merge_min(&mut self, other: &Self, dtype: &DType) {
21✔
316
        match (
317
            self.get_scalar_bound::<Min>(dtype),
21✔
318
            other.get_scalar_bound::<Min>(dtype),
21✔
319
        ) {
320
            (Some(m1), Some(m2)) => {
5✔
321
                let meet = m1.union(&m2).vortex_expect("can compare scalar");
5✔
322
                if meet != m1 {
5✔
323
                    self.set(Stat::Min, meet.into_value().map(Scalar::into_value));
1✔
324
                }
4✔
325
            }
326
            _ => self.clear(Stat::Min),
16✔
327
        }
328
    }
21✔
329

330
    fn merge_max(&mut self, other: &Self, dtype: &DType) {
21✔
331
        match (
332
            self.get_scalar_bound::<Max>(dtype),
21✔
333
            other.get_scalar_bound::<Max>(dtype),
21✔
334
        ) {
335
            (Some(m1), Some(m2)) => {
3✔
336
                let meet = m1.union(&m2).vortex_expect("can compare scalar");
3✔
337
                if meet != m1 {
3✔
338
                    self.set(Stat::Max, meet.into_value().map(Scalar::into_value));
2✔
339
                }
2✔
340
            }
341
            _ => self.clear(Stat::Max),
18✔
342
        }
343
    }
21✔
344

345
    fn merge_sum(&mut self, other: &Self, dtype: &DType) {
21✔
346
        match (
347
            self.get_scalar_bound::<Sum>(dtype),
21✔
348
            other.get_scalar_bound::<Sum>(dtype),
21✔
349
        ) {
350
            (Some(m1), Some(m2)) => {
1✔
351
                // If the combine sum is exact, then we can sum them.
352
                if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
1✔
353
                    s1.as_primitive()
1✔
354
                        .checked_add(&s2.as_primitive())
1✔
355
                        .map(|pscalar| {
1✔
356
                            pscalar
1✔
357
                                .pvalue()
1✔
358
                                .map(|pvalue| {
1✔
359
                                    Scalar::primitive_value(
1✔
360
                                        pvalue,
1✔
361
                                        pscalar.ptype(),
1✔
362
                                        pscalar.dtype().nullability(),
1✔
363
                                    )
364
                                    .into_value()
1✔
365
                                })
1✔
366
                                .unwrap_or_else(ScalarValue::null)
1✔
367
                        })
1✔
368
                }) {
1✔
369
                    self.set(Stat::Sum, Precision::Exact(scalar_value));
1✔
370
                }
1✔
371
            }
372
            _ => self.clear(Stat::Sum),
20✔
373
        }
374
    }
21✔
375

376
    fn merge_is_constant(&mut self, other: &Self, dtype: &DType) {
21✔
377
        let self_const = self.get_as(Stat::IsConstant);
21✔
378
        let other_const = other.get_as(Stat::IsConstant);
21✔
379
        let self_min = self.get_scalar(Stat::Min, dtype);
21✔
380
        let other_min = other.get_scalar(Stat::Min, dtype);
21✔
381

382
        if let (
383
            Some(Precision::Exact(self_const)),
1✔
384
            Some(Precision::Exact(other_const)),
1✔
385
            Some(Precision::Exact(self_min)),
1✔
386
            Some(Precision::Exact(other_min)),
1✔
387
        ) = (self_const, other_const, self_min, other_min)
21✔
388
        {
389
            if self_const && other_const && self_min == other_min {
1✔
390
                self.set(Stat::IsConstant, Precision::exact(true));
×
391
            } else {
1✔
392
                self.set(Stat::IsConstant, Precision::inexact(false));
1✔
393
            }
1✔
394
        }
20✔
395
        self.set(Stat::IsConstant, Precision::exact(false));
21✔
396
    }
21✔
397

398
    fn merge_is_sorted(&mut self, other: &Self, dtype: &DType) {
20✔
399
        self.merge_sortedness_stat(other, Stat::IsSorted, dtype, PartialOrd::le)
20✔
400
    }
20✔
401

402
    fn merge_is_strict_sorted(&mut self, other: &Self, dtype: &DType) {
20✔
403
        self.merge_sortedness_stat(other, Stat::IsStrictSorted, dtype, PartialOrd::lt)
20✔
404
    }
20✔
405

406
    fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
40✔
407
        &mut self,
40✔
408
        other: &Self,
40✔
409
        stat: Stat,
40✔
410
        dtype: &DType,
40✔
411
        cmp: F,
40✔
412
    ) {
40✔
413
        if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
40✔
414
            == (self.get_as(stat), other.get_as(stat))
40✔
415
        {
416
            // There might be no stat because it was dropped, or it doesn't exist
417
            // (e.g. an all null array).
418
            // We assume that it was the dropped case since the doesn't exist might imply sorted,
419
            // but this in-precision is correct.
420
            if let (Some(self_max), Some(other_min)) = (
3✔
421
                self.get_scalar_bound::<Max>(dtype),
4✔
422
                other.get_scalar_bound::<Min>(dtype),
4✔
423
            ) {
424
                return if cmp(&self_max.max_value(), &other_min.min_value()) {
3✔
425
                    // keep value
2✔
426
                } else {
2✔
427
                    self.set(stat, Precision::inexact(false));
1✔
428
                };
1✔
429
            }
1✔
430
        }
36✔
431
        self.clear(stat);
37✔
432
    }
40✔
433

434
    fn merge_null_count(&mut self, other: &Self) {
21✔
435
        self.merge_sum_stat(Stat::NullCount, other)
21✔
436
    }
21✔
437

438
    fn merge_nan_count(&mut self, other: &Self) {
21✔
439
        self.merge_sum_stat(Stat::NaNCount, other)
21✔
440
    }
21✔
441

442
    fn merge_uncompressed_size_in_bytes(&mut self, other: &Self) {
21✔
443
        self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
21✔
444
    }
21✔
445

446
    fn merge_sum_stat(&mut self, stat: Stat, other: &Self) {
63✔
447
        match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
63✔
448
            (Some(nc1), Some(nc2)) => {
2✔
449
                self.set(
2✔
450
                    stat,
2✔
451
                    nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
2✔
452
                );
453
            }
454
            _ => self.clear(stat),
61✔
455
        }
456
    }
63✔
457
}
458

459
impl StatsProvider for StatsSet {
460
    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
1,435,754✔
461
        self.values
1,435,754✔
462
            .iter()
1,435,754✔
463
            .find(|(s, _)| *s == stat)
2,312,686✔
464
            .map(|(_, v)| v.clone())
1,435,754✔
465
    }
1,435,754✔
466

467
    fn len(&self) -> usize {
1✔
468
        self.values.len()
1✔
469
    }
1✔
470
}
471

472
#[cfg(test)]
473
mod test {
474
    use enum_iterator::all;
475
    use itertools::Itertools;
476
    use vortex_dtype::{DType, Nullability, PType};
477

478
    use crate::arrays::PrimitiveArray;
479
    use crate::stats::{IsConstant, Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
480

481
    #[test]
482
    fn test_iter() {
1✔
483
        let set = StatsSet::new_unchecked(vec![
1✔
484
            (Stat::Max, Precision::exact(100)),
1✔
485
            (Stat::Min, Precision::exact(42)),
1✔
486
        ]);
487
        let mut iter = set.iter();
1✔
488
        let first = iter.next().unwrap().clone();
1✔
489
        assert_eq!(first.0, Stat::Max);
1✔
490
        assert_eq!(
1✔
491
            first.1.map(|f| i32::try_from(&f).unwrap()),
1✔
492
            Precision::exact(100)
1✔
493
        );
494
        let snd = iter.next().unwrap().clone();
1✔
495
        assert_eq!(snd.0, Stat::Min);
1✔
496
        assert_eq!(snd.1.map(|s| i32::try_from(&s).unwrap()), 42);
1✔
497
    }
1✔
498

499
    #[test]
500
    fn into_iter() {
1✔
501
        let mut set = StatsSet::new_unchecked(vec![
1✔
502
            (Stat::Max, Precision::exact(100)),
1✔
503
            (Stat::Min, Precision::exact(42)),
1✔
504
        ])
505
        .into_iter();
1✔
506
        let (stat, first) = set.next().unwrap();
1✔
507
        assert_eq!(stat, Stat::Max);
1✔
508
        assert_eq!(
1✔
509
            first.map(|f| i32::try_from(&f).unwrap()),
1✔
510
            Precision::exact(100)
1✔
511
        );
512
        let snd = set.next().unwrap();
1✔
513
        assert_eq!(snd.0, Stat::Min);
1✔
514
        assert_eq!(
1✔
515
            snd.1.map(|s| i32::try_from(&s).unwrap()),
1✔
516
            Precision::exact(42)
1✔
517
        );
518
    }
1✔
519

520
    #[test]
521
    fn merge_constant() {
1✔
522
        let first = StatsSet::from_iter([
1✔
523
            (Stat::Min, Precision::exact(42)),
1✔
524
            (Stat::IsConstant, Precision::exact(true)),
1✔
525
        ])
1✔
526
        .merge_ordered(
1✔
527
            &StatsSet::from_iter([
1✔
528
                (Stat::Min, Precision::inexact(42)),
1✔
529
                (Stat::IsConstant, Precision::exact(true)),
1✔
530
            ]),
1✔
531
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
532
        );
533
        assert_eq!(
1✔
534
            first.get_as::<bool>(Stat::IsConstant),
1✔
535
            Some(Precision::exact(false))
1✔
536
        );
537
        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
1✔
538
    }
1✔
539

540
    #[test]
541
    fn merge_into_min() {
1✔
542
        let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
1✔
543
            &StatsSet::default(),
1✔
544
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
545
        );
546
        assert!(first.get(Stat::Min).is_none());
1✔
547
    }
1✔
548

549
    #[test]
550
    fn merge_from_min() {
1✔
551
        let first = StatsSet::default().merge_ordered(
1✔
552
            &StatsSet::of(Stat::Min, Precision::exact(42)),
1✔
553
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
554
        );
555
        assert!(first.get(Stat::Min).is_none());
1✔
556
    }
1✔
557

558
    #[test]
559
    fn merge_mins() {
1✔
560
        let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
1✔
561
            &StatsSet::of(Stat::Min, Precision::exact(42)),
1✔
562
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
563
        );
564
        assert_eq!(first.get_as::<i32>(Stat::Min), Some(Precision::exact(37)));
1✔
565
    }
1✔
566

567
    #[test]
568
    fn merge_into_bound_max() {
1✔
569
        let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
1✔
570
            &StatsSet::default(),
1✔
571
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
572
        );
573
        assert!(first.get(Stat::Max).is_none());
1✔
574
    }
1✔
575

576
    #[test]
577
    fn merge_from_max() {
1✔
578
        let first = StatsSet::default().merge_ordered(
1✔
579
            &StatsSet::of(Stat::Max, Precision::exact(42)),
1✔
580
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
581
        );
582
        assert!(first.get(Stat::Max).is_none());
1✔
583
    }
1✔
584

585
    #[test]
586
    fn merge_maxes() {
1✔
587
        let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
1✔
588
            &StatsSet::of(Stat::Max, Precision::exact(42)),
1✔
589
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
590
        );
591
        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::exact(42)));
1✔
592
    }
1✔
593

594
    #[test]
595
    fn merge_maxes_bound() {
1✔
596
        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
1✔
597
        let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
1✔
598
            .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
1✔
599
        assert_eq!(first.get_as::<i32>(Stat::Max), Some(Precision::inexact(43)));
1✔
600
    }
1✔
601

602
    #[test]
603
    fn merge_into_scalar() {
1✔
604
        let first = StatsSet::of(Stat::Sum, Precision::exact(42)).merge_ordered(
1✔
605
            &StatsSet::default(),
1✔
606
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
607
        );
608
        assert!(first.get(Stat::Sum).is_none());
1✔
609
    }
1✔
610

611
    #[test]
612
    fn merge_from_scalar() {
1✔
613
        let first = StatsSet::default().merge_ordered(
1✔
614
            &StatsSet::of(Stat::Sum, Precision::exact(42)),
1✔
615
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
616
        );
617
        assert!(first.get(Stat::Sum).is_none());
1✔
618
    }
1✔
619

620
    #[test]
621
    fn merge_scalars() {
1✔
622
        let first = StatsSet::of(Stat::Sum, Precision::exact(37)).merge_ordered(
1✔
623
            &StatsSet::of(Stat::Sum, Precision::exact(42)),
1✔
624
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
625
        );
626
        assert_eq!(
1✔
627
            first.get_as::<usize>(Stat::Sum),
1✔
628
            Some(Precision::exact(79usize))
1✔
629
        );
630
    }
1✔
631

632
    #[test]
633
    fn merge_into_sortedness() {
1✔
634
        let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
1✔
635
            &StatsSet::default(),
1✔
636
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
637
        );
638
        assert!(first.get(Stat::IsStrictSorted).is_none());
1✔
639
    }
1✔
640

641
    #[test]
642
    fn merge_from_sortedness() {
1✔
643
        let first = StatsSet::default().merge_ordered(
1✔
644
            &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
1✔
645
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
646
        );
647
        assert!(first.get(Stat::IsStrictSorted).is_none());
1✔
648
    }
1✔
649

650
    #[test]
651
    fn merge_sortedness() {
1✔
652
        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
653
        first.set(Stat::Max, Precision::exact(1));
1✔
654
        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
655
        second.set(Stat::Min, Precision::exact(2));
1✔
656
        first = first.merge_ordered(
1✔
657
            &second,
1✔
658
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
659
        );
660
        assert_eq!(
1✔
661
            first.get_as::<bool>(Stat::IsStrictSorted),
1✔
662
            Some(Precision::exact(true))
1✔
663
        );
664
    }
1✔
665

666
    #[test]
667
    fn merge_sortedness_out_of_order() {
1✔
668
        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
669
        first.set(Stat::Min, Precision::exact(1));
1✔
670
        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
671
        second.set(Stat::Max, Precision::exact(2));
1✔
672
        second = second.merge_ordered(
1✔
673
            &first,
1✔
674
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
675
        );
676
        assert_eq!(
1✔
677
            second.get_as::<bool>(Stat::IsStrictSorted),
1✔
678
            Some(Precision::inexact(false))
1✔
679
        );
680
    }
1✔
681

682
    #[test]
683
    fn merge_sortedness_only_one_sorted() {
1✔
684
        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
685
        first.set(Stat::Max, Precision::exact(1));
1✔
686
        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
1✔
687
        second.set(Stat::Min, Precision::exact(2));
1✔
688
        first.merge_ordered(
1✔
689
            &second,
1✔
690
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
691
        );
692
        assert_eq!(
1✔
693
            second.get_as::<bool>(Stat::IsStrictSorted),
1✔
694
            Some(Precision::exact(false))
1✔
695
        );
696
    }
1✔
697

698
    #[test]
699
    fn merge_sortedness_missing_min() {
1✔
700
        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
701
        first.set(Stat::Max, Precision::exact(1));
1✔
702
        let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
703
        first = first.merge_ordered(
1✔
704
            &second,
1✔
705
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
706
        );
707
        assert!(first.get(Stat::IsStrictSorted).is_none());
1✔
708
    }
1✔
709

710
    #[test]
711
    fn merge_sortedness_bound_min() {
1✔
712
        let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
713
        first.set(Stat::Max, Precision::exact(1));
1✔
714
        let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
1✔
715
        second.set(Stat::Min, Precision::inexact(2));
1✔
716
        first = first.merge_ordered(
1✔
717
            &second,
1✔
718
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
719
        );
720
        assert_eq!(
1✔
721
            first.get_as::<bool>(Stat::IsStrictSorted),
1✔
722
            Some(Precision::exact(true))
1✔
723
        );
724
    }
1✔
725

726
    #[test]
727
    fn merge_unordered() {
1✔
728
        let array =
1✔
729
            PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
1✔
730
        let all_stats = all::<Stat>()
1✔
731
            .filter(|s| !matches!(s, Stat::Sum))
9✔
732
            .filter(|s| !matches!(s, Stat::NaNCount))
8✔
733
            .collect_vec();
1✔
734
        array.statistics().compute_all(&all_stats).unwrap();
1✔
735

736
        let stats = array.statistics().to_owned();
1✔
737
        for stat in &all_stats {
8✔
738
            assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
7✔
739
        }
740

741
        let merged = stats.clone().merge_unordered(
1✔
742
            &stats,
1✔
743
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
744
        );
745
        for stat in &all_stats {
8✔
746
            assert_eq!(
7✔
747
                merged.get(*stat).is_some(),
7✔
748
                stat.is_commutative(),
7✔
749
                "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
750
            )
751
        }
752

753
        assert_eq!(
1✔
754
            merged.get_as::<i32>(Stat::Min),
1✔
755
            stats.get_as::<i32>(Stat::Min)
1✔
756
        );
757
        assert_eq!(
1✔
758
            merged.get_as::<i32>(Stat::Max),
1✔
759
            stats.get_as::<i32>(Stat::Max)
1✔
760
        );
761
        assert_eq!(
1✔
762
            merged.get_as::<u64>(Stat::NullCount).unwrap(),
1✔
763
            stats.get_as::<u64>(Stat::NullCount).unwrap().map(|s| s * 2)
1✔
764
        );
765
    }
1✔
766

767
    #[test]
768
    fn merge_min_bound_same() {
1✔
769
        // Merging a stat with a bound and another with an exact results in exact stat.
770
        // since bound for min is a lower bound, it can in fact contain any value >= bound.
771
        let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
1✔
772
            &StatsSet::of(Stat::Min, Precision::exact(5)),
1✔
773
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
774
        );
775
        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::exact(5)));
1✔
776
    }
1✔
777

778
    #[test]
779
    fn merge_min_bound_bound_lower() {
1✔
780
        let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
1✔
781
            &StatsSet::of(Stat::Min, Precision::exact(5)),
1✔
782
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
783
        );
784
        assert_eq!(merged.get_as::<i32>(Stat::Min), Some(Precision::inexact(4)));
1✔
785
    }
1✔
786

787
    #[test]
788
    fn retain_approx() {
1✔
789
        let set = StatsSet::from_iter([
1✔
790
            (Stat::Max, Precision::exact(100)),
1✔
791
            (Stat::Min, Precision::exact(50)),
1✔
792
            (Stat::Sum, Precision::inexact(10)),
1✔
793
        ]);
1✔
794

795
        let set = set.keep_inexact_stats(&[Stat::Min, Stat::Max]);
1✔
796

797
        assert_eq!(set.len(), 2);
1✔
798
        assert_eq!(set.get_as::<i32>(Stat::Max), Some(Precision::inexact(100)));
1✔
799
        assert_eq!(set.get_as::<i32>(Stat::Min), Some(Precision::inexact(50)));
1✔
800
        assert_eq!(set.get_as::<i32>(Stat::Sum), None);
1✔
801
    }
1✔
802

803
    #[test]
804
    fn test_combine_is_constant() {
1✔
805
        {
806
            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
1✔
807
            let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
1✔
808
            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
1✔
809
            assert_eq!(
1✔
810
                stats.get_as::<bool>(Stat::IsConstant),
1✔
811
                Some(Precision::exact(true))
1✔
812
            );
813
        }
814

815
        {
816
            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
1✔
817
            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
1✔
818
            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
1✔
819
            assert_eq!(
1✔
820
                stats.get_as::<bool>(Stat::IsConstant),
1✔
821
                Some(Precision::exact(true))
1✔
822
            );
823
        }
824

825
        {
826
            let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
1✔
827
            let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
1✔
828
            stats.combine_bool_stat::<IsConstant>(&stats2).unwrap();
1✔
829
            assert_eq!(
1✔
830
                stats.get_as::<bool>(Stat::IsConstant),
1✔
831
                Some(Precision::exact(false))
1✔
832
            );
833
        }
834
    }
1✔
835

836
    #[test]
837
    fn test_combine_sets_boolean_conflict() {
1✔
838
        let mut stats1 = StatsSet::from_iter([
1✔
839
            (Stat::IsConstant, Precision::exact(true)),
1✔
840
            (Stat::IsSorted, Precision::exact(true)),
1✔
841
        ]);
1✔
842

843
        let stats2 = StatsSet::from_iter([
1✔
844
            (Stat::IsConstant, Precision::exact(false)),
1✔
845
            (Stat::IsSorted, Precision::exact(true)),
1✔
846
        ]);
1✔
847

848
        let result = stats1.combine_sets(
1✔
849
            &stats2,
1✔
850
            &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
851
        );
852
        assert!(result.is_err());
1✔
853
    }
1✔
854

855
    #[test]
856
    fn test_combine_sets_with_missing_stats() {
1✔
857
        let mut stats1 = StatsSet::from_iter([
1✔
858
            (Stat::Min, Precision::exact(42)),
1✔
859
            (Stat::UncompressedSizeInBytes, Precision::exact(1000)),
1✔
860
        ]);
1✔
861

862
        let stats2 = StatsSet::from_iter([
1✔
863
            (Stat::Max, Precision::exact(100)),
1✔
864
            (Stat::IsStrictSorted, Precision::exact(true)),
1✔
865
        ]);
1✔
866

867
        stats1
1✔
868
            .combine_sets(
1✔
869
                &stats2,
1✔
870
                &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
871
            )
872
            .unwrap();
1✔
873

874
        // Min should remain unchanged
875
        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
1✔
876
        // Max should be added
877
        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(100)));
1✔
878
        // IsStrictSorted should be added
879
        assert_eq!(
1✔
880
            stats1.get_as::<bool>(Stat::IsStrictSorted),
1✔
881
            Some(Precision::exact(true))
1✔
882
        );
883
    }
1✔
884

885
    #[test]
886
    fn test_combine_sets_with_inexact() {
1✔
887
        let mut stats1 = StatsSet::from_iter([
1✔
888
            (Stat::Min, Precision::exact(42)),
1✔
889
            (Stat::Max, Precision::inexact(100)),
1✔
890
            (Stat::IsConstant, Precision::exact(false)),
1✔
891
        ]);
1✔
892

893
        let stats2 = StatsSet::from_iter([
1✔
894
            // Must ensure Min from stats2 is <= Min from stats1
1✔
895
            (Stat::Min, Precision::inexact(40)),
1✔
896
            (Stat::Max, Precision::exact(90)),
1✔
897
            (Stat::IsSorted, Precision::exact(true)),
1✔
898
        ]);
1✔
899

900
        stats1
1✔
901
            .combine_sets(
1✔
902
                &stats2,
1✔
903
                &DType::Primitive(PType::I32, Nullability::NonNullable),
1✔
904
            )
905
            .unwrap();
1✔
906

907
        // Min should remain unchanged since it's more restrictive than the inexact value
908
        assert_eq!(stats1.get_as::<i32>(Stat::Min), Some(Precision::exact(42)));
1✔
909
        // Check that max was updated with the exact value
910
        assert_eq!(stats1.get_as::<i32>(Stat::Max), Some(Precision::exact(90)));
1✔
911
        // Check that IsSorted was added
912
        assert_eq!(
1✔
913
            stats1.get_as::<bool>(Stat::IsSorted),
1✔
914
            Some(Precision::exact(true))
1✔
915
        );
916
    }
1✔
917
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc