• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16486479294

24 Jul 2025 02:42AM UTC coverage: 81.018% (-0.05%) from 81.067%
16486479294

push

github

web-flow
fix: Pruning expressions check NanCount where appropriate (#3973)

Fixes #3958 

NanCount stat is checked in the pruning evaluation if it is present
(i.e. for float columns and literals)

- [x] Verify fixes fuzz failure
- [x] Add unit tests

Signed-off-by: Andrew Duffy <andrew@a10y.dev>

---------

Signed-off-by: Andrew Duffy <andrew@a10y.dev>
Signed-off-by: Robert Kruszewski <github@robertk.io>
Co-authored-by: Robert Kruszewski <github@robertk.io>

106 of 132 new or added lines in 12 files covered. (80.3%)

4 existing lines in 2 files now uncovered.

42012 of 51855 relevant lines covered (81.02%)

173899.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.82
/vortex-array/src/arrays/struct_/mod.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::Debug;
5
use std::iter::once;
6

7
use itertools::Itertools;
8
use vortex_dtype::{DType, FieldName, FieldNames, StructFields};
9
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
10
use vortex_scalar::Scalar;
11

12
use crate::stats::{ArrayStats, StatsSetRef};
13
use crate::validity::Validity;
14
use crate::vtable::{
15
    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityHelper,
16
    ValidityVTableFromValidityHelper,
17
};
18
use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
19

20
mod compute;
21
mod serde;
22

23
vtable!(Struct);
24

25
impl VTable for StructVTable {
26
    type Array = StructArray;
27
    type Encoding = StructEncoding;
28

29
    type ArrayVTable = Self;
30
    type CanonicalVTable = Self;
31
    type OperationsVTable = Self;
32
    type ValidityVTable = ValidityVTableFromValidityHelper;
33
    type VisitorVTable = Self;
34
    type ComputeVTable = NotSupported;
35
    type EncodeVTable = NotSupported;
36
    type SerdeVTable = Self;
37

38
    fn id(_encoding: &Self::Encoding) -> EncodingId {
74,912✔
39
        EncodingId::new_ref("vortex.struct")
74,912✔
40
    }
74,912✔
41

42
    fn encoding(_array: &Self::Array) -> EncodingRef {
9,834✔
43
        EncodingRef::new_ref(StructEncoding.as_ref())
9,834✔
44
    }
9,834✔
45
}
46

47
#[derive(Clone, Debug)]
48
pub struct StructArray {
49
    len: usize,
50
    dtype: DType,
51
    fields: Vec<ArrayRef>,
52
    validity: Validity,
53
    stats_set: ArrayStats,
54
}
55

56
#[derive(Clone, Debug)]
57
pub struct StructEncoding;
58

59
impl StructArray {
60
    pub fn fields(&self) -> &[ArrayRef] {
69,133✔
61
        &self.fields
69,133✔
62
    }
69,133✔
63

64
    pub fn field_by_name(&self, name: impl AsRef<str>) -> VortexResult<&ArrayRef> {
15,351✔
65
        let name = name.as_ref();
15,351✔
66
        self.field_by_name_opt(name).ok_or_else(|| {
15,351✔
67
            vortex_err!(
21✔
68
                "Field {name} not found in struct array with names {:?}",
21✔
69
                self.names()
21✔
70
            )
71
        })
21✔
72
    }
15,351✔
73

74
    pub fn field_by_name_opt(&self, name: impl AsRef<str>) -> Option<&ArrayRef> {
23,101✔
75
        let name = name.as_ref();
23,101✔
76
        self.names()
23,101✔
77
            .iter()
23,101✔
78
            .position(|field_name| field_name.as_ref() == name)
55,948✔
79
            .map(|idx| &self.fields[idx])
23,101✔
80
    }
23,101✔
81

82
    pub fn names(&self) -> &FieldNames {
46,560✔
83
        self.struct_fields().names()
46,560✔
84
    }
46,560✔
85

86
    pub fn struct_fields(&self) -> &StructFields {
48,182✔
87
        let Some(struct_dtype) = &self.dtype.as_struct() else {
48,182✔
88
            unreachable!(
×
89
                "struct arrays must have be a DType::Struct, this is likely an internal bug."
90
            )
91
        };
92
        struct_dtype
48,182✔
93
    }
48,182✔
94

95
    /// Create a new `StructArray` with the given length, but without any fields.
96
    pub fn new_with_len(len: usize) -> Self {
38✔
97
        Self::try_new(
38✔
98
            FieldNames::default(),
38✔
99
            Vec::new(),
38✔
100
            len,
38✔
101
            Validity::NonNullable,
38✔
102
        )
103
        .vortex_expect("StructArray::new_with_len should not fail")
38✔
104
    }
38✔
105

106
    pub fn try_new(
23,016✔
107
        names: FieldNames,
23,016✔
108
        fields: Vec<ArrayRef>,
23,016✔
109
        length: usize,
23,016✔
110
        validity: Validity,
23,016✔
111
    ) -> VortexResult<Self> {
23,016✔
112
        let nullability = validity.nullability();
23,016✔
113

114
        if names.len() != fields.len() {
23,016✔
115
            vortex_bail!("Got {} names and {} fields", names.len(), fields.len());
×
116
        }
23,016✔
117

118
        for field in fields.iter() {
68,626✔
119
            if field.len() != length {
68,626✔
120
                vortex_bail!(
×
121
                    "Expected all struct fields to have length {length}, found {}",
×
122
                    fields.iter().map(|f| f.len()).format(","),
×
123
                );
124
            }
68,626✔
125
        }
126

127
        let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype()).cloned().collect();
68,626✔
128
        let dtype = DType::Struct(StructFields::new(names, field_dtypes), nullability);
23,016✔
129

130
        if length != validity.maybe_len().unwrap_or(length) {
23,016✔
131
            vortex_bail!(
×
132
                "array length {} and validity length must match {}",
×
133
                length,
134
                validity
×
135
                    .maybe_len()
×
136
                    .vortex_expect("can only fail if maybe is some")
×
137
            )
138
        }
23,016✔
139

140
        Ok(Self {
23,016✔
141
            len: length,
23,016✔
142
            dtype,
23,016✔
143
            fields,
23,016✔
144
            validity,
23,016✔
145
            stats_set: Default::default(),
23,016✔
146
        })
23,016✔
147
    }
23,016✔
148

149
    pub fn try_new_with_dtype(
1,261✔
150
        fields: Vec<ArrayRef>,
1,261✔
151
        dtype: StructFields,
1,261✔
152
        length: usize,
1,261✔
153
        validity: Validity,
1,261✔
154
    ) -> VortexResult<Self> {
1,261✔
155
        for (field, struct_dt) in fields.iter().zip(dtype.fields()) {
4,823✔
156
            if field.len() != length {
4,823✔
157
                vortex_bail!(
×
158
                    "Expected all struct fields to have length {length}, found {}",
×
159
                    field.len()
×
160
                );
161
            }
4,823✔
162

163
            if &struct_dt != field.dtype() {
4,823✔
164
                vortex_bail!(
×
165
                    "Expected all struct fields to have dtype {}, found {}",
×
166
                    struct_dt,
167
                    field.dtype()
×
168
                );
169
            }
4,823✔
170
        }
171

172
        Ok(Self {
1,261✔
173
            len: length,
1,261✔
174
            dtype: DType::Struct(dtype, validity.nullability()),
1,261✔
175
            fields,
1,261✔
176
            validity,
1,261✔
177
            stats_set: Default::default(),
1,261✔
178
        })
1,261✔
179
    }
1,261✔
180

181
    pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
257✔
182
        Self::try_from_iter(items.iter().map(|(a, b)| (a, b.to_array())))
611✔
183
    }
257✔
184

185
    pub fn try_from_iter_with_validity<
270✔
186
        N: AsRef<str>,
270✔
187
        A: IntoArray,
270✔
188
        T: IntoIterator<Item = (N, A)>,
270✔
189
    >(
270✔
190
        iter: T,
270✔
191
        validity: Validity,
270✔
192
    ) -> VortexResult<Self> {
270✔
193
        let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
270✔
194
            .into_iter()
270✔
195
            .map(|(name, fields)| (FieldName::from(name.as_ref()), fields.into_array()))
629✔
196
            .unzip();
270✔
197
        let len = fields
270✔
198
            .first()
270✔
199
            .map(|f| f.len())
270✔
200
            .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
270✔
201

202
        Self::try_new(FieldNames::from_iter(names), fields, len, validity)
270✔
203
    }
270✔
204

205
    pub fn try_from_iter<N: AsRef<str>, A: IntoArray, T: IntoIterator<Item = (N, A)>>(
267✔
206
        iter: T,
267✔
207
    ) -> VortexResult<Self> {
267✔
208
        Self::try_from_iter_with_validity(iter, Validity::NonNullable)
267✔
209
    }
267✔
210

211
    // TODO(aduffy): Add equivalent function to support field masks for nested column access.
212
    /// Return a new StructArray with the given projection applied.
213
    ///
214
    /// Projection does not copy data arrays. Projection is defined by an ordinal array slice
215
    /// which specifies the new ordering of columns in the struct. The projection can be used to
216
    /// perform column re-ordering, deletion, or duplication at a logical level, without any data
217
    /// copying.
218
    #[allow(clippy::same_name_method)]
219
    pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
75✔
220
        let mut children = Vec::with_capacity(projection.len());
75✔
221
        let mut names = Vec::with_capacity(projection.len());
75✔
222

223
        for f_name in projection.iter() {
76✔
224
            let idx = self
76✔
225
                .names()
76✔
226
                .iter()
76✔
227
                .position(|name| name == f_name)
115✔
228
                .ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
76✔
229

230
            names.push(self.names()[idx].clone());
76✔
231
            children.push(self.fields()[idx].clone());
76✔
232
        }
233

234
        StructArray::try_new(
75✔
235
            FieldNames::from(names.as_slice()),
75✔
236
            children,
75✔
237
            self.len(),
75✔
238
            self.validity().clone(),
75✔
239
        )
240
    }
75✔
241

242
    /// Removes and returns a column from the struct array by name.
243
    /// If the column does not exist, returns `None`.
244
    pub fn remove_column(&mut self, name: impl Into<FieldName>) -> Option<ArrayRef> {
2✔
245
        let name = name.into();
2✔
246

247
        let struct_dtype = self.struct_fields().clone();
2✔
248

249
        let position = struct_dtype
2✔
250
            .names()
2✔
251
            .iter()
2✔
252
            .position(|field_name| field_name.as_ref() == name.as_ref())?;
2✔
253

254
        let field = self.fields.remove(position);
1✔
255

256
        let new_dtype = struct_dtype.without_field(position);
1✔
257
        self.dtype = DType::Struct(new_dtype, self.dtype.nullability());
1✔
258

259
        Some(field)
1✔
260
    }
2✔
261

262
    /// Create a new StructArray by appending a new column onto the existing array.
NEW
263
    pub fn with_column(&self, name: impl Into<FieldName>, array: ArrayRef) -> VortexResult<Self> {
×
NEW
264
        let name = name.into();
×
NEW
265
        let struct_dtype = self.struct_fields().clone();
×
266

NEW
267
        let names = struct_dtype.names().iter().cloned().chain(once(name));
×
NEW
268
        let types = struct_dtype.fields().chain(once(array.dtype().clone()));
×
NEW
269
        let new_fields = StructFields::new(names.collect(), types.collect());
×
270

NEW
271
        let mut children = self.fields.clone();
×
NEW
272
        children.push(array);
×
273

NEW
274
        Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone())
×
NEW
275
    }
×
276
}
277

278
impl ValidityHelper for StructArray {
279
    fn validity(&self) -> &Validity {
20,045✔
280
        &self.validity
20,045✔
281
    }
20,045✔
282
}
283

284
impl ArrayVTable<StructVTable> for StructVTable {
285
    fn len(array: &StructArray) -> usize {
132,571✔
286
        array.len
132,571✔
287
    }
132,571✔
288

289
    fn dtype(array: &StructArray) -> &DType {
166,692✔
290
        &array.dtype
166,692✔
291
    }
166,692✔
292

293
    fn stats(array: &StructArray) -> StatsSetRef<'_> {
93,154✔
294
        array.stats_set.to_ref(array.as_ref())
93,154✔
295
    }
93,154✔
296
}
297

298
impl CanonicalVTable<StructVTable> for StructVTable {
299
    fn canonicalize(array: &StructArray) -> VortexResult<Canonical> {
32,224✔
300
        Ok(Canonical::Struct(array.clone()))
32,224✔
301
    }
32,224✔
302
}
303

304
impl OperationsVTable<StructVTable> for StructVTable {
305
    fn slice(array: &StructArray, start: usize, stop: usize) -> VortexResult<ArrayRef> {
×
306
        let fields = array
×
307
            .fields()
×
308
            .iter()
×
309
            .map(|field| field.slice(start, stop))
×
310
            .try_collect()?;
×
311
        StructArray::try_new_with_dtype(
×
312
            fields,
×
313
            array.struct_fields().clone(),
×
314
            stop - start,
×
315
            array.validity().slice(start, stop)?,
×
316
        )
317
        .map(|a| a.into_array())
×
318
    }
×
319

320
    fn scalar_at(array: &StructArray, index: usize) -> VortexResult<Scalar> {
654✔
321
        Ok(Scalar::struct_(
654✔
322
            array.dtype().clone(),
654✔
323
            array
654✔
324
                .fields()
654✔
325
                .iter()
654✔
326
                .map(|field| field.scalar_at(index))
1,273✔
327
                .try_collect()?,
654✔
328
        ))
329
    }
654✔
330
}
331

332
#[cfg(test)]
333
mod test {
334
    use vortex_buffer::buffer;
335
    use vortex_dtype::{DType, FieldName, FieldNames, Nullability, PType};
336

337
    use crate::IntoArray;
338
    use crate::arrays::primitive::PrimitiveArray;
339
    use crate::arrays::struct_::StructArray;
340
    use crate::arrays::varbin::VarBinArray;
341
    use crate::arrays::{BoolArray, BoolVTable, PrimitiveVTable};
342
    use crate::validity::Validity;
343

344
    #[test]
345
    fn test_project() {
1✔
346
        let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
1✔
347
        let ys = VarBinArray::from_vec(
1✔
348
            vec!["a", "b", "c", "d", "e"],
1✔
349
            DType::Utf8(Nullability::NonNullable),
1✔
350
        );
351
        let zs = BoolArray::from_iter([true, true, true, false, false]);
1✔
352

353
        let struct_a = StructArray::try_new(
1✔
354
            FieldNames::from(["xs", "ys", "zs"]),
1✔
355
            vec![xs.into_array(), ys.into_array(), zs.into_array()],
1✔
356
            5,
357
            Validity::NonNullable,
1✔
358
        )
359
        .unwrap();
1✔
360

361
        let struct_b = struct_a
1✔
362
            .project(&[FieldName::from("zs"), FieldName::from("xs")])
1✔
363
            .unwrap();
1✔
364
        assert_eq!(
1✔
365
            struct_b.names().as_ref(),
1✔
366
            [FieldName::from("zs"), FieldName::from("xs")],
1✔
367
        );
368

369
        assert_eq!(struct_b.len(), 5);
1✔
370

371
        let bools = &struct_b.fields[0];
1✔
372
        assert_eq!(
1✔
373
            bools
1✔
374
                .as_::<BoolVTable>()
1✔
375
                .boolean_buffer()
1✔
376
                .iter()
1✔
377
                .collect::<Vec<_>>(),
1✔
378
            vec![true, true, true, false, false]
1✔
379
        );
380

381
        let prims = &struct_b.fields[1];
1✔
382
        assert_eq!(
1✔
383
            prims.as_::<PrimitiveVTable>().as_slice::<i64>(),
1✔
384
            [0i64, 1, 2, 3, 4]
385
        );
386
    }
1✔
387

388
    #[test]
389
    fn test_remove_column() {
1✔
390
        let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
1✔
391
        let ys = PrimitiveArray::new(buffer![4u64, 5, 6, 7, 8], Validity::NonNullable);
1✔
392

393
        let mut struct_a = StructArray::try_new(
1✔
394
            FieldNames::from(["xs", "ys"]),
1✔
395
            vec![xs.into_array(), ys.into_array()],
1✔
396
            5,
397
            Validity::NonNullable,
1✔
398
        )
399
        .unwrap();
1✔
400

401
        let removed = struct_a.remove_column("xs").unwrap();
1✔
402
        assert_eq!(
1✔
403
            removed.dtype(),
1✔
404
            &DType::Primitive(PType::I64, Nullability::NonNullable)
405
        );
406
        assert_eq!(
1✔
407
            removed.as_::<PrimitiveVTable>().as_slice::<i64>(),
1✔
408
            [0i64, 1, 2, 3, 4]
409
        );
410

411
        assert_eq!(struct_a.names(), &[FieldName::from("ys")].into());
1✔
412
        assert_eq!(struct_a.fields.len(), 1);
1✔
413
        assert_eq!(struct_a.len(), 5);
1✔
414
        assert_eq!(
1✔
415
            struct_a.fields[0].dtype(),
1✔
416
            &DType::Primitive(PType::U64, Nullability::NonNullable)
417
        );
418
        assert_eq!(
1✔
419
            struct_a.fields[0]
1✔
420
                .as_::<PrimitiveVTable>()
1✔
421
                .as_slice::<u64>(),
1✔
422
            [4u64, 5, 6, 7, 8]
423
        );
424

425
        let empty = struct_a.remove_column("non_existent");
1✔
426
        assert!(
1✔
427
            empty.is_none(),
1✔
428
            "Expected None when removing non-existent column"
429
        );
430
        assert_eq!(struct_a.names(), &[FieldName::from("ys")].into());
1✔
431
    }
1✔
432
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc