• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16204612549

10 Jul 2025 07:50PM UTC coverage: 81.152% (+2.9%) from 78.263%
16204612549

Pull #3825

github

web-flow
Merge d0d2717da into be9c2fd3e
Pull Request #3825: feat: Add optimize ArrayOp with VBView implementation

178 of 211 new or added lines in 4 files covered. (84.36%)

330 existing lines in 34 files now uncovered.

45433 of 55985 relevant lines covered (81.15%)

145951.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.41
/vortex-array/src/arrays/struct_/mod.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::fmt::Debug;
5

6
use itertools::Itertools;
7
use vortex_dtype::{DType, FieldName, FieldNames, StructFields};
8
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
9
use vortex_scalar::Scalar;
10

11
use crate::stats::{ArrayStats, StatsSetRef};
12
use crate::validity::Validity;
13
use crate::vtable::{
14
    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityHelper,
15
    ValidityVTableFromValidityHelper,
16
};
17
use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
18

19
mod compute;
20
mod serde;
21

22
vtable!(Struct);
23

24
impl VTable for StructVTable {
25
    type Array = StructArray;
26
    type Encoding = StructEncoding;
27

28
    type ArrayVTable = Self;
29
    type CanonicalVTable = Self;
30
    type OperationsVTable = Self;
31
    type ValidityVTable = ValidityVTableFromValidityHelper;
32
    type VisitorVTable = Self;
33
    type ComputeVTable = NotSupported;
34
    type EncodeVTable = NotSupported;
35
    type SerdeVTable = Self;
36

37
    fn id(_encoding: &Self::Encoding) -> EncodingId {
63,776✔
38
        EncodingId::new_ref("vortex.struct")
63,776✔
39
    }
63,776✔
40

41
    fn encoding(_array: &Self::Array) -> EncodingRef {
9,615✔
42
        EncodingRef::new_ref(StructEncoding.as_ref())
9,615✔
43
    }
9,615✔
44
}
45

46
#[derive(Clone, Debug)]
47
pub struct StructArray {
48
    len: usize,
49
    dtype: DType,
50
    fields: Vec<ArrayRef>,
51
    validity: Validity,
52
    stats_set: ArrayStats,
53
}
54

55
#[derive(Clone, Debug)]
56
pub struct StructEncoding;
57

58
impl StructArray {
59
    pub fn fields(&self) -> &[ArrayRef] {
65,373✔
60
        &self.fields
65,373✔
61
    }
65,373✔
62

63
    pub fn field_by_name(&self, name: impl AsRef<str>) -> VortexResult<&ArrayRef> {
12,794✔
64
        let name = name.as_ref();
12,794✔
65
        self.field_by_name_opt(name).ok_or_else(|| {
12,794✔
66
            vortex_err!(
20✔
67
                "Field {name} not found in struct array with names {:?}",
20✔
68
                self.names()
20✔
69
            )
20✔
70
        })
12,794✔
71
    }
12,794✔
72

73
    pub fn field_by_name_opt(&self, name: impl AsRef<str>) -> Option<&ArrayRef> {
20,003✔
74
        let name = name.as_ref();
20,003✔
75
        self.names()
20,003✔
76
            .iter()
20,003✔
77
            .position(|field_name| field_name.as_ref() == name)
48,629✔
78
            .map(|idx| &self.fields[idx])
20,003✔
79
    }
20,003✔
80

81
    pub fn names(&self) -> &FieldNames {
42,145✔
82
        self.struct_fields().names()
42,145✔
83
    }
42,145✔
84

85
    pub fn struct_fields(&self) -> &StructFields {
43,722✔
86
        let Some(struct_dtype) = &self.dtype.as_struct() else {
43,722✔
87
            unreachable!(
×
88
                "struct arrays must have be a DType::Struct, this is likely an internal bug."
×
89
            )
×
90
        };
91
        struct_dtype
43,722✔
92
    }
43,722✔
93

94
    /// Create a new `StructArray` with the given length, but without any fields.
95
    pub fn new_with_len(len: usize) -> Self {
36✔
96
        Self::try_new(
36✔
97
            FieldNames::default(),
36✔
98
            Vec::new(),
36✔
99
            len,
36✔
100
            Validity::NonNullable,
36✔
101
        )
36✔
102
        .vortex_expect("StructArray::new_with_len should not fail")
36✔
103
    }
36✔
104

105
    pub fn try_new(
21,976✔
106
        names: FieldNames,
21,976✔
107
        fields: Vec<ArrayRef>,
21,976✔
108
        length: usize,
21,976✔
109
        validity: Validity,
21,976✔
110
    ) -> VortexResult<Self> {
21,976✔
111
        let nullability = validity.nullability();
21,976✔
112

21,976✔
113
        if names.len() != fields.len() {
21,976✔
UNCOV
114
            vortex_bail!("Got {} names and {} fields", names.len(), fields.len());
×
115
        }
21,976✔
116

117
        for field in fields.iter() {
64,920✔
118
            if field.len() != length {
64,920✔
119
                vortex_bail!(
×
120
                    "Expected all struct fields to have length {length}, found {}",
×
121
                    fields.iter().map(|f| f.len()).format(","),
×
122
                );
×
123
            }
64,920✔
124
        }
125

126
        let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype()).cloned().collect();
64,920✔
127
        let dtype = DType::Struct(StructFields::new(names, field_dtypes), nullability);
21,976✔
128

21,976✔
129
        if length != validity.maybe_len().unwrap_or(length) {
21,976✔
UNCOV
130
            vortex_bail!(
×
UNCOV
131
                "array length {} and validity length must match {}",
×
UNCOV
132
                length,
×
UNCOV
133
                validity
×
UNCOV
134
                    .maybe_len()
×
UNCOV
135
                    .vortex_expect("can only fail if maybe is some")
×
UNCOV
136
            )
×
137
        }
21,976✔
138

21,976✔
139
        Ok(Self {
21,976✔
140
            len: length,
21,976✔
141
            dtype,
21,976✔
142
            fields,
21,976✔
143
            validity,
21,976✔
144
            stats_set: Default::default(),
21,976✔
145
        })
21,976✔
146
    }
21,976✔
147

148
    pub fn try_new_with_dtype(
1,173✔
149
        fields: Vec<ArrayRef>,
1,173✔
150
        dtype: StructFields,
1,173✔
151
        length: usize,
1,173✔
152
        validity: Validity,
1,173✔
153
    ) -> VortexResult<Self> {
1,173✔
154
        for (field, struct_dt) in fields.iter().zip(dtype.fields()) {
4,501✔
155
            if field.len() != length {
4,501✔
156
                vortex_bail!(
×
UNCOV
157
                    "Expected all struct fields to have length {length}, found {}",
×
UNCOV
158
                    field.len()
×
UNCOV
159
                );
×
160
            }
4,501✔
161

4,501✔
162
            if &struct_dt != field.dtype() {
4,501✔
UNCOV
163
                vortex_bail!(
×
UNCOV
164
                    "Expected all struct fields to have dtype {}, found {}",
×
UNCOV
165
                    struct_dt,
×
UNCOV
166
                    field.dtype()
×
UNCOV
167
                );
×
168
            }
4,501✔
169
        }
170

171
        Ok(Self {
1,173✔
172
            len: length,
1,173✔
173
            dtype: DType::Struct(dtype, validity.nullability()),
1,173✔
174
            fields,
1,173✔
175
            validity,
1,173✔
176
            stats_set: Default::default(),
1,173✔
177
        })
1,173✔
178
    }
1,173✔
179

180
    pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
53✔
181
        Self::try_from_iter(items.iter().map(|(a, b)| (a, b.to_array())))
102✔
182
    }
53✔
183

184
    pub fn try_from_iter_with_validity<
65✔
185
        N: AsRef<str>,
65✔
186
        A: IntoArray,
65✔
187
        T: IntoIterator<Item = (N, A)>,
65✔
188
    >(
65✔
189
        iter: T,
65✔
190
        validity: Validity,
65✔
191
    ) -> VortexResult<Self> {
65✔
192
        let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
65✔
193
            .into_iter()
65✔
194
            .map(|(name, fields)| (FieldName::from(name.as_ref()), fields.into_array()))
118✔
195
            .unzip();
65✔
196
        let len = fields
65✔
197
            .first()
65✔
198
            .map(|f| f.len())
65✔
199
            .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
65✔
200

201
        Self::try_new(FieldNames::from_iter(names), fields, len, validity)
65✔
202
    }
65✔
203

204
    pub fn try_from_iter<N: AsRef<str>, A: IntoArray, T: IntoIterator<Item = (N, A)>>(
63✔
205
        iter: T,
63✔
206
    ) -> VortexResult<Self> {
63✔
207
        Self::try_from_iter_with_validity(iter, Validity::NonNullable)
63✔
208
    }
63✔
209

210
    // TODO(aduffy): Add equivalent function to support field masks for nested column access.
211
    /// Return a new StructArray with the given projection applied.
212
    ///
213
    /// Projection does not copy data arrays. Projection is defined by an ordinal array slice
214
    /// which specifies the new ordering of columns in the struct. The projection can be used to
215
    /// perform column re-ordering, deletion, or duplication at a logical level, without any data
216
    /// copying.
217
    #[allow(clippy::same_name_method)]
218
    pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
73✔
219
        let mut children = Vec::with_capacity(projection.len());
73✔
220
        let mut names = Vec::with_capacity(projection.len());
73✔
221

222
        for f_name in projection.iter() {
74✔
223
            let idx = self
74✔
224
                .names()
74✔
225
                .iter()
74✔
226
                .position(|name| name == f_name)
112✔
227
                .ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
74✔
228

229
            names.push(self.names()[idx].clone());
74✔
230
            children.push(self.fields()[idx].clone());
74✔
231
        }
232

233
        StructArray::try_new(
73✔
234
            FieldNames::from(names.as_slice()),
73✔
235
            children,
73✔
236
            self.len(),
73✔
237
            self.validity().clone(),
73✔
238
        )
73✔
239
    }
73✔
240

241
    /// Removes and returns a column from the struct array by name.
242
    /// If the column does not exist, returns `None`.
243
    pub fn remove_column(&mut self, name: impl Into<FieldName>) -> Option<ArrayRef> {
2✔
244
        let name = name.into();
2✔
245

246
        let Some(struct_dtype) = self.dtype.as_struct() else {
2✔
UNCOV
247
            unreachable!(
×
UNCOV
248
                "struct arrays must have be a DType::Struct, this is likely an internal bug."
×
UNCOV
249
            )
×
250
        };
251

252
        let position = struct_dtype
2✔
253
            .names()
2✔
254
            .iter()
2✔
255
            .position(|field_name| field_name.as_ref() == name.as_ref())?;
2✔
256

257
        let field = self.fields.remove(position);
1✔
258

1✔
259
        let new_dtype = struct_dtype.without_field(position);
1✔
260
        self.dtype = DType::Struct(new_dtype, self.dtype.nullability());
1✔
261

1✔
262
        Some(field)
1✔
263
    }
2✔
264
}
265

266
impl ValidityHelper for StructArray {
267
    fn validity(&self) -> &Validity {
18,973✔
268
        &self.validity
18,973✔
269
    }
18,973✔
270
}
271

272
impl ArrayVTable<StructVTable> for StructVTable {
273
    fn len(array: &StructArray) -> usize {
120,232✔
274
        array.len
120,232✔
275
    }
120,232✔
276

277
    fn dtype(array: &StructArray) -> &DType {
155,213✔
278
        &array.dtype
155,213✔
279
    }
155,213✔
280

281
    fn stats(array: &StructArray) -> StatsSetRef<'_> {
84,799✔
282
        array.stats_set.to_ref(array.as_ref())
84,799✔
283
    }
84,799✔
284
}
285

286
impl CanonicalVTable<StructVTable> for StructVTable {
287
    fn canonicalize(array: &StructArray) -> VortexResult<Canonical> {
28,871✔
288
        Ok(Canonical::Struct(array.clone()))
28,871✔
289
    }
28,871✔
290
}
291

292
impl OperationsVTable<StructVTable> for StructVTable {
UNCOV
293
    fn slice(array: &StructArray, start: usize, stop: usize) -> VortexResult<ArrayRef> {
×
294
        let fields = array
×
295
            .fields()
×
UNCOV
296
            .iter()
×
UNCOV
297
            .map(|field| field.slice(start, stop))
×
UNCOV
298
            .try_collect()?;
×
299
        StructArray::try_new_with_dtype(
UNCOV
300
            fields,
×
UNCOV
301
            array.struct_fields().clone(),
×
UNCOV
302
            stop - start,
×
UNCOV
303
            array.validity().slice(start, stop)?,
×
304
        )
UNCOV
305
        .map(|a| a.into_array())
×
UNCOV
306
    }
×
307

308
    fn scalar_at(array: &StructArray, index: usize) -> VortexResult<Scalar> {
630✔
309
        Ok(Scalar::struct_(
630✔
310
            array.dtype().clone(),
630✔
311
            array
630✔
312
                .fields()
630✔
313
                .iter()
630✔
314
                .map(|field| field.scalar_at(index))
1,233✔
315
                .try_collect()?,
630✔
316
        ))
317
    }
630✔
318
}
319

320
#[cfg(test)]
321
mod test {
322
    use vortex_buffer::buffer;
323
    use vortex_dtype::{DType, FieldName, FieldNames, Nullability, PType};
324

325
    use crate::IntoArray;
326
    use crate::arrays::primitive::PrimitiveArray;
327
    use crate::arrays::struct_::StructArray;
328
    use crate::arrays::varbin::VarBinArray;
329
    use crate::arrays::{BoolArray, BoolVTable, PrimitiveVTable};
330
    use crate::validity::Validity;
331

332
    #[test]
333
    fn test_project() {
1✔
334
        let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
1✔
335
        let ys = VarBinArray::from_vec(
1✔
336
            vec!["a", "b", "c", "d", "e"],
1✔
337
            DType::Utf8(Nullability::NonNullable),
1✔
338
        );
1✔
339
        let zs = BoolArray::from_iter([true, true, true, false, false]);
1✔
340

1✔
341
        let struct_a = StructArray::try_new(
1✔
342
            FieldNames::from(["xs", "ys", "zs"]),
1✔
343
            vec![xs.into_array(), ys.into_array(), zs.into_array()],
1✔
344
            5,
1✔
345
            Validity::NonNullable,
1✔
346
        )
1✔
347
        .unwrap();
1✔
348

1✔
349
        let struct_b = struct_a
1✔
350
            .project(&[FieldName::from("zs"), FieldName::from("xs")])
1✔
351
            .unwrap();
1✔
352
        assert_eq!(
1✔
353
            struct_b.names().as_ref(),
1✔
354
            [FieldName::from("zs"), FieldName::from("xs")],
1✔
355
        );
1✔
356

357
        assert_eq!(struct_b.len(), 5);
1✔
358

359
        let bools = &struct_b.fields[0];
1✔
360
        assert_eq!(
1✔
361
            bools
1✔
362
                .as_::<BoolVTable>()
1✔
363
                .boolean_buffer()
1✔
364
                .iter()
1✔
365
                .collect::<Vec<_>>(),
1✔
366
            vec![true, true, true, false, false]
1✔
367
        );
1✔
368

369
        let prims = &struct_b.fields[1];
1✔
370
        assert_eq!(
1✔
371
            prims.as_::<PrimitiveVTable>().as_slice::<i64>(),
1✔
372
            [0i64, 1, 2, 3, 4]
1✔
373
        );
1✔
374
    }
1✔
375

376
    #[test]
377
    fn test_remove_column() {
1✔
378
        let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::NonNullable);
1✔
379
        let ys = PrimitiveArray::new(buffer![4u64, 5, 6, 7, 8], Validity::NonNullable);
1✔
380

1✔
381
        let mut struct_a = StructArray::try_new(
1✔
382
            FieldNames::from(["xs", "ys"]),
1✔
383
            vec![xs.into_array(), ys.into_array()],
1✔
384
            5,
1✔
385
            Validity::NonNullable,
1✔
386
        )
1✔
387
        .unwrap();
1✔
388

1✔
389
        let removed = struct_a.remove_column("xs").unwrap();
1✔
390
        assert_eq!(
1✔
391
            removed.dtype(),
1✔
392
            &DType::Primitive(PType::I64, Nullability::NonNullable)
1✔
393
        );
1✔
394
        assert_eq!(
1✔
395
            removed.as_::<PrimitiveVTable>().as_slice::<i64>(),
1✔
396
            [0i64, 1, 2, 3, 4]
1✔
397
        );
1✔
398

399
        assert_eq!(struct_a.names(), &[FieldName::from("ys")].into());
1✔
400
        assert_eq!(struct_a.fields.len(), 1);
1✔
401
        assert_eq!(struct_a.len(), 5);
1✔
402
        assert_eq!(
1✔
403
            struct_a.fields[0].dtype(),
1✔
404
            &DType::Primitive(PType::U64, Nullability::NonNullable)
1✔
405
        );
1✔
406
        assert_eq!(
1✔
407
            struct_a.fields[0]
1✔
408
                .as_::<PrimitiveVTable>()
1✔
409
                .as_slice::<u64>(),
1✔
410
            [4u64, 5, 6, 7, 8]
1✔
411
        );
1✔
412

413
        let empty = struct_a.remove_column("non_existent");
1✔
414
        assert!(
1✔
415
            empty.is_none(),
1✔
416
            "Expected None when removing non-existent column"
417
        );
418
        assert_eq!(struct_a.names(), &[FieldName::from("ys")].into());
1✔
419
    }
1✔
420
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc