• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16474110289

23 Jul 2025 02:52PM UTC coverage: 81.205% (+0.1%) from 81.11%
16474110289

Pull #3992

github

web-flow
Merge d7bd8890a into e793ee468
Pull Request #3992: feat: teach SparseArray to canonicalize lists

235 of 246 new or added lines in 3 files covered. (95.53%)

8 existing lines in 4 files now uncovered.

42347 of 52148 relevant lines covered (81.21%)

173081.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.1
/vortex-array/src/builders/list.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::any::Any;
5
use std::sync::Arc;
6

7
use vortex_dtype::Nullability::NonNullable;
8
use vortex_dtype::{DType, NativePType, Nullability};
9
use vortex_error::{VortexExpect, VortexResult, vortex_bail};
10
use vortex_mask::Mask;
11
use vortex_scalar::ListScalar;
12

13
use crate::arrays::{ListArray, OffsetPType};
14
use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
15
use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
16
use crate::compute::{add_scalar, cast, sub_scalar};
17
use crate::{Array, ArrayRef, IntoArray, ToCanonical};
18

19
pub struct ListBuilder<O: NativePType> {
20
    value_builder: Box<dyn ArrayBuilder>,
21
    index_builder: PrimitiveBuilder<O>,
22
    nulls: LazyNullBufferBuilder,
23
    nullability: Nullability,
24
    dtype: DType,
25
}
26

27
impl<O: OffsetPType> ListBuilder<O> {
28
    /// Create a ListBuilder with the specified capacity for indices.
29
    ///
30
    /// # Notes
31
    ///
32
    /// The number of indices is one more than the number of lists in the array!
33
    ///
34
    /// See also: [ListBuilder::with_values_and_index_capacity].
35
    pub fn with_capacity(
267✔
36
        value_dtype: Arc<DType>,
267✔
37
        nullability: Nullability,
267✔
38
        index_capacity: usize,
267✔
39
    ) -> Self {
267✔
40
        // I would expect the list to have more than one value per index
41
        Self::with_values_and_index_capacity(
267✔
42
            value_dtype,
267✔
43
            nullability,
267✔
44
            2 * index_capacity,
267✔
45
            index_capacity,
267✔
46
        )
47
    }
267✔
48

49
    /// Create a ListBuilder with the specified capacity for indices and values.
50
    ///
51
    /// # Notes
52
    ///
53
    /// The number of indices is one more than the number of lists in the array!
54
    pub fn with_values_and_index_capacity(
267✔
55
        value_dtype: Arc<DType>,
267✔
56
        nullability: Nullability,
267✔
57
        values_capacity: usize,
267✔
58
        index_capacity: usize,
267✔
59
    ) -> Self {
267✔
60
        let value_builder = builder_with_capacity(value_dtype.as_ref(), values_capacity);
267✔
61
        let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, index_capacity);
267✔
62

63
        // The first index of the list, which is always 0 and represents an empty list.
64
        index_builder.append_zero();
267✔
65

66
        Self {
267✔
67
            value_builder,
267✔
68
            index_builder,
267✔
69
            nulls: LazyNullBufferBuilder::new(index_capacity),
267✔
70
            nullability,
267✔
71
            dtype: DType::List(value_dtype, nullability),
267✔
72
        }
267✔
73
    }
267✔
74

75
    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
228✔
76
        match value.elements() {
228✔
77
            None => {
78
                if self.nullability == NonNullable {
2✔
79
                    vortex_bail!("Cannot append null value to non-nullable list");
1✔
80
                }
1✔
81
                self.append_null();
1✔
82
                Ok(())
1✔
83
            }
84
            Some(elements) => {
226✔
85
                for scalar in elements {
688✔
86
                    // TODO(joe): This is slow, we should be able to append multiple values at once,
87
                    // or the list scalar should hold an Array
88
                    self.value_builder.append_scalar(&scalar)?;
462✔
89
                }
90
                self.nulls.append_non_null();
226✔
91
                self.append_index(
226✔
92
                    O::from_usize(self.value_builder.len())
226✔
93
                        .vortex_expect("Failed to convert from usize to O"),
226✔
94
                )
95
            }
96
        }
97
    }
228✔
98

99
    fn append_index(&mut self, index: O) -> VortexResult<()> {
409✔
100
        self.index_builder.append_scalar(&index.into())
409✔
101
    }
409✔
102
}
103

104
impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
105
    fn as_any(&self) -> &dyn Any {
×
106
        self
×
107
    }
×
108

109
    fn as_any_mut(&mut self) -> &mut dyn Any {
×
110
        self
×
111
    }
×
112

113
    fn dtype(&self) -> &DType {
×
114
        &self.dtype
×
115
    }
×
116

117
    fn len(&self) -> usize {
×
118
        self.nulls.len()
×
119
    }
×
120

121
    fn append_zeros(&mut self, n: usize) {
×
122
        let count = self.value_builder.len();
×
123
        self.value_builder.append_zeros(n);
×
124
        for i in 0..n {
×
125
            self.append_index(
×
126
                O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
×
127
            )
×
128
            .vortex_expect("Failed to append index");
×
129
        }
×
130
        self.nulls.append_n_non_nulls(n);
×
131
    }
×
132

133
    fn append_nulls(&mut self, n: usize) {
186✔
134
        let count = self.value_builder.len();
186✔
135
        for _ in 0..n {
186✔
136
            // A list with a null element is can be a list with a zero-span offset and a validity
183✔
137
            // bit set
183✔
138
            self.append_index(
183✔
139
                O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
183✔
140
            )
183✔
141
            .vortex_expect("Failed to append index");
183✔
142
        }
183✔
143
        self.nulls.append_n_nulls(n);
186✔
144
    }
186✔
145

146
    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
47✔
147
        let list = array.to_list()?;
47✔
148
        if list.is_empty() {
47✔
149
            return Ok(());
8✔
150
        }
39✔
151

152
        let n_already_added_values = self.value_builder.len();
39✔
153
        let Some(n_already_added_values) = O::from_usize(n_already_added_values) else {
39✔
NEW
154
            vortex_bail!(
×
UNCOV
155
                "cannot convert length {} to type {:?}",
×
156
                n_already_added_values,
157
                O::PTYPE
158
            )
159
        };
160

161
        let offsets = list.offsets();
39✔
162
        let elements = list.elements();
39✔
163

164
        let index_dtype = self.index_builder.dtype();
39✔
165

166
        let n_leading_junk_values = offsets.scalar_at(0)?.cast(index_dtype)?;
39✔
167

168
        let casted_offsets = cast(&offsets.slice(1, offsets.len())?, index_dtype)?;
39✔
169
        let offsets_without_leading_junk =
39✔
170
            sub_scalar(&casted_offsets, n_leading_junk_values.clone())?;
39✔
171
        let offsets_into_builder =
39✔
172
            add_scalar(&offsets_without_leading_junk, n_already_added_values.into())?;
39✔
173

174
        let n_leading_junk_values = n_leading_junk_values
39✔
175
            .as_primitive()
39✔
176
            .pvalue()
39✔
177
            .vortex_expect("list offsets must be non-null");
39✔
178
        let n_leading_junk_values =
39✔
179
            usize::try_from(n_leading_junk_values).vortex_expect("list offsets fit in usize");
39✔
180
        let last_offset = offsets
39✔
181
            .scalar_at(offsets.len() - 1)?
39✔
182
            .as_primitive()
39✔
183
            .pvalue()
39✔
184
            .vortex_expect("list offsets must be non-null");
39✔
185
        let last_offset = usize::try_from(last_offset).vortex_expect("list offsets fit in usize");
39✔
186
        let non_junk_values = elements.slice(n_leading_junk_values, last_offset)?;
39✔
187

188
        self.nulls.append_validity_mask(array.validity_mask()?);
39✔
189
        self.index_builder
39✔
190
            .extend_from_array(&offsets_into_builder)?;
39✔
191
        self.value_builder.ensure_capacity(non_junk_values.len());
39✔
192
        self.value_builder.extend_from_array(&non_junk_values)?;
39✔
193

194
        Ok(())
39✔
195
    }
47✔
196

197
    fn ensure_capacity(&mut self, capacity: usize) {
×
198
        self.index_builder.ensure_capacity(capacity);
×
199
        self.value_builder.ensure_capacity(capacity);
×
200
        self.nulls.ensure_capacity(capacity);
×
201
    }
×
202

203
    fn set_validity(&mut self, validity: Mask) {
×
204
        self.nulls = LazyNullBufferBuilder::new(validity.len());
×
205
        self.nulls.append_validity_mask(validity);
×
206
    }
×
207

208
    fn finish(&mut self) -> ArrayRef {
266✔
209
        assert_eq!(
266✔
210
            self.index_builder.len(),
266✔
211
            self.nulls.len() + 1,
266✔
212
            "Indices length must be one more than nulls length."
×
213
        );
214

215
        ListArray::try_new(
266✔
216
            self.value_builder.finish(),
266✔
217
            self.index_builder.finish(),
266✔
218
            self.nulls.finish_with_nullability(self.nullability),
266✔
219
        )
266✔
220
        .vortex_expect("Buffer, offsets, and validity must have same length.")
266✔
221
        .into_array()
266✔
222
    }
266✔
223
}
224

225
#[cfg(test)]
226
mod tests {
227
    use std::sync::Arc;
228

229
    use Nullability::{NonNullable, Nullable};
230
    use vortex_buffer::buffer;
231
    use vortex_dtype::PType::I32;
232
    use vortex_dtype::{DType, Nullability};
233
    use vortex_scalar::Scalar;
234

235
    use crate::array::Array;
236
    use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
237
    use crate::builders::ArrayBuilder;
238
    use crate::builders::list::ListBuilder;
239
    use crate::validity::Validity;
240
    use crate::vtable::ValidityHelper;
241
    use crate::{IntoArray as _, ToCanonical};
242

243
    #[test]
244
    fn test_empty() {
1✔
245
        let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
1✔
246

247
        let list = builder.finish();
1✔
248
        assert_eq!(list.len(), 0);
1✔
249
    }
1✔
250

251
    #[test]
252
    fn test_values() {
1✔
253
        let dtype: Arc<DType> = Arc::new(I32.into());
1✔
254
        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
1✔
255

256
        builder
1✔
257
            .append_value(
1✔
258
                Scalar::list(
1✔
259
                    dtype.clone(),
1✔
260
                    vec![1i32.into(), 2i32.into(), 3i32.into()],
1✔
261
                    NonNullable,
1✔
262
                )
1✔
263
                .as_list(),
1✔
264
            )
265
            .unwrap();
1✔
266

267
        builder
1✔
268
            .append_value(
1✔
269
                Scalar::list(
1✔
270
                    dtype,
1✔
271
                    vec![4i32.into(), 5i32.into(), 6i32.into()],
1✔
272
                    NonNullable,
1✔
273
                )
1✔
274
                .as_list(),
1✔
275
            )
276
            .unwrap();
1✔
277

278
        let list = builder.finish();
1✔
279
        assert_eq!(list.len(), 2);
1✔
280

281
        let list_array = list.to_list().unwrap();
1✔
282

283
        assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
1✔
284
        assert_eq!(list_array.elements_at(1).unwrap().len(), 3);
1✔
285
    }
1✔
286

287
    #[test]
288
    fn test_non_null_fails() {
1✔
289
        let dtype: Arc<DType> = Arc::new(I32.into());
1✔
290
        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
1✔
291

292
        assert!(
1✔
293
            builder
1✔
294
                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
1✔
295
                .is_err()
1✔
296
        )
297
    }
1✔
298

299
    #[test]
300
    fn test_nullable_values() {
1✔
301
        let dtype: Arc<DType> = Arc::new(I32.into());
1✔
302
        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
1✔
303

304
        builder
1✔
305
            .append_value(
1✔
306
                Scalar::list(
1✔
307
                    dtype.clone(),
1✔
308
                    vec![1i32.into(), 2i32.into(), 3i32.into()],
1✔
309
                    NonNullable,
1✔
310
                )
1✔
311
                .as_list(),
1✔
312
            )
313
            .unwrap();
1✔
314

315
        builder
1✔
316
            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
1✔
317
            .unwrap();
1✔
318

319
        builder
1✔
320
            .append_value(
1✔
321
                Scalar::list(
1✔
322
                    dtype,
1✔
323
                    vec![4i32.into(), 5i32.into(), 6i32.into()],
1✔
324
                    NonNullable,
1✔
325
                )
1✔
326
                .as_list(),
1✔
327
            )
328
            .unwrap();
1✔
329

330
        let list = builder.finish();
1✔
331
        assert_eq!(list.len(), 3);
1✔
332

333
        let list_array = list.to_list().unwrap();
1✔
334

335
        assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
1✔
336
        assert_eq!(list_array.elements_at(1).unwrap().len(), 0);
1✔
337
        assert_eq!(list_array.elements_at(2).unwrap().len(), 3);
1✔
338
    }
1✔
339

340
    fn test_extend_builder_gen<O: OffsetPType>() {
8✔
341
        let list = ListArray::from_iter_opt_slow::<O, _, _>(
8✔
342
            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
8✔
343
            Arc::new(I32.into()),
8✔
344
        )
345
        .unwrap();
8✔
346

347
        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
8✔
348

349
        builder.extend_from_array(&list).unwrap();
8✔
350
        builder.extend_from_array(&list).unwrap();
8✔
351
        builder
8✔
352
            .extend_from_array(&list.slice(0, 0).unwrap())
8✔
353
            .unwrap();
8✔
354
        builder
8✔
355
            .extend_from_array(&list.slice(1, 3).unwrap())
8✔
356
            .unwrap();
8✔
357

358
        let expected = ListArray::from_iter_opt_slow::<O, _, _>(
8✔
359
            [
8✔
360
                Some(vec![0, 1, 2]),
8✔
361
                None,
8✔
362
                Some(vec![4, 5]),
8✔
363
                Some(vec![0, 1, 2]),
8✔
364
                None,
8✔
365
                Some(vec![4, 5]),
8✔
366
                None,
8✔
367
                Some(vec![4, 5]),
8✔
368
            ],
8✔
369
            Arc::new(DType::Primitive(I32, NonNullable)),
8✔
370
        )
8✔
371
        .unwrap()
8✔
372
        .to_list()
8✔
373
        .unwrap();
8✔
374

375
        let actual = builder
8✔
376
            .finish()
8✔
377
            .to_canonical()
8✔
378
            .unwrap()
8✔
379
            .into_list()
8✔
380
            .unwrap();
8✔
381

382
        assert_eq!(
8✔
383
            actual.elements().to_primitive().unwrap().as_slice::<i32>(),
8✔
384
            expected
8✔
385
                .elements()
8✔
386
                .to_primitive()
8✔
387
                .unwrap()
8✔
388
                .as_slice::<i32>()
8✔
389
        );
390

391
        assert_eq!(
8✔
392
            actual.offsets().to_primitive().unwrap().as_slice::<O>(),
8✔
393
            expected.offsets().to_primitive().unwrap().as_slice::<O>()
8✔
394
        );
395

396
        assert_eq!(actual.validity(), expected.validity())
8✔
397
    }
8✔
398

399
    #[test]
400
    fn test_extend_builder() {
1✔
401
        test_extend_builder_gen::<i8>();
1✔
402
        test_extend_builder_gen::<i16>();
1✔
403
        test_extend_builder_gen::<i32>();
1✔
404
        test_extend_builder_gen::<i64>();
1✔
405

406
        test_extend_builder_gen::<u8>();
1✔
407
        test_extend_builder_gen::<u16>();
1✔
408
        test_extend_builder_gen::<u32>();
1✔
409
        test_extend_builder_gen::<u64>();
1✔
410
    }
1✔
411

412
    #[test]
413
    pub fn test_array_with_gap() {
1✔
414
        let one_trailing_unused_element = ListArray::try_new(
1✔
415
            buffer![1, 2, 3, 4].into_array(),
1✔
416
            buffer![0, 3].into_array(),
1✔
417
            Validity::NonNullable,
1✔
418
        )
419
        .unwrap();
1✔
420

421
        let second_array = ListArray::try_new(
1✔
422
            buffer![5, 6].into_array(),
1✔
423
            buffer![0, 2].into_array(),
1✔
424
            Validity::NonNullable,
1✔
425
        )
426
        .unwrap();
1✔
427

428
        let chunked_list = ChunkedArray::try_new(
1✔
429
            vec![
1✔
430
                one_trailing_unused_element.clone().into_array(),
1✔
431
                second_array.clone().into_array(),
1✔
432
            ],
433
            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
1✔
434
        );
435

436
        let canon_values = chunked_list.unwrap().to_list().unwrap();
1✔
437

438
        assert_eq!(
1✔
439
            one_trailing_unused_element.scalar_at(0).unwrap(),
1✔
440
            canon_values.scalar_at(0).unwrap()
1✔
441
        );
442
        assert_eq!(
1✔
443
            second_array.scalar_at(0).unwrap(),
1✔
444
            canon_values.scalar_at(1).unwrap()
1✔
445
        );
446
    }
1✔
447
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc