• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16578751259

28 Jul 2025 07:46PM UTC coverage: 81.858% (+0.8%) from 81.087%
16578751259

Pull #3992

github

web-flow
Merge 0fe171459 into 1ae560509
Pull Request #3992: feat: teach SparseArray to canonicalize lists

222 of 241 new or added lines in 1 file covered. (92.12%)

495 existing lines in 40 files now uncovered.

43529 of 53176 relevant lines covered (81.86%)

169875.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.71
/vortex-array/src/builders/varbinview.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::any::Any;
5
use std::cmp::max;
6
use std::sync::Arc;
7

8
use vortex_buffer::{Buffer, BufferMut, ByteBuffer, ByteBufferMut};
9
use vortex_dtype::{DType, Nullability};
10
use vortex_error::{VortexExpect, VortexResult};
11
use vortex_mask::Mask;
12

13
use crate::arrays::{BinaryView, VarBinViewArray};
14
use crate::builders::ArrayBuilder;
15
use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
16
use crate::{Array, ArrayRef, IntoArray, ToCanonical};
17

18
pub struct VarBinViewBuilder {
19
    views_builder: BufferMut<BinaryView>,
20
    pub null_buffer_builder: LazyNullBufferBuilder,
21
    completed: Vec<ByteBuffer>,
22
    in_progress: ByteBufferMut,
23
    nullability: Nullability,
24
    dtype: DType,
25
}
26

27
impl VarBinViewBuilder {
28
    // TODO(joe): add a block growth strategy, from arrow
29
    const BLOCK_SIZE: u32 = 8 * 8 * 1024;
30

31
    pub fn with_capacity(dtype: DType, capacity: usize) -> Self {
6,633✔
32
        assert!(
6,633✔
33
            matches!(dtype, DType::Utf8(_) | DType::Binary(_)),
6,633✔
UNCOV
34
            "VarBinViewBuilder DType must be Utf8 or Binary."
×
35
        );
36
        Self {
6,633✔
37
            views_builder: BufferMut::<BinaryView>::with_capacity(capacity),
6,633✔
38
            null_buffer_builder: LazyNullBufferBuilder::new(capacity),
6,633✔
39
            completed: vec![],
6,633✔
40
            in_progress: ByteBufferMut::empty(),
6,633✔
41
            nullability: dtype.nullability(),
6,633✔
42
            dtype,
6,633✔
43
        }
6,633✔
44
    }
6,633✔
45

46
    fn append_value_view(&mut self, value: &[u8]) {
2,382,189✔
47
        let length =
2,382,189✔
48
            u32::try_from(value.len()).vortex_expect("cannot have a single string >2^32 in length");
2,382,189✔
49
        if length <= 12 {
2,382,189✔
50
            self.views_builder.push(BinaryView::make_view(value, 0, 0));
172,556✔
51
            return;
172,556✔
52
        }
2,209,633✔
53

54
        let required_cap = self.in_progress.len() + value.len();
2,209,633✔
55
        if self.in_progress.capacity() < required_cap {
2,209,633✔
56
            self.flush_in_progress();
1,821✔
57
            let to_reserve = max(value.len(), VarBinViewBuilder::BLOCK_SIZE as usize);
1,821✔
58
            self.in_progress.reserve(to_reserve);
1,821✔
59
        };
2,207,812✔
60

61
        let offset = u32::try_from(self.in_progress.len()).vortex_expect("too many buffers");
2,209,633✔
62
        self.in_progress.extend_from_slice(value);
2,209,633✔
63
        let view = BinaryView::make_view(
2,209,633✔
64
            value,
2,209,633✔
65
            // buffer offset
66
            u32::try_from(self.completed.len()).vortex_expect("too many buffers"),
2,209,633✔
67
            offset,
2,209,633✔
68
        );
69
        self.views_builder.push(view);
2,209,633✔
70
    }
2,382,189✔
71

72
    #[inline]
73
    pub fn append_value<S: AsRef<[u8]>>(&mut self, value: S) {
2,382,189✔
74
        self.append_value_view(value.as_ref());
2,382,189✔
75
        self.null_buffer_builder.append_non_null();
2,382,189✔
76
    }
2,382,189✔
77

78
    #[inline]
79
    pub fn append_option<S: AsRef<[u8]>>(&mut self, value: Option<S>) {
5,666✔
80
        match value {
5,666✔
81
            Some(value) => self.append_value(value),
5,665✔
82
            None => self.append_null(),
1✔
83
        }
84
    }
5,666✔
85

86
    #[inline]
87
    fn flush_in_progress(&mut self) {
15,514✔
88
        if !self.in_progress.is_empty() {
15,514✔
89
            let block = std::mem::take(&mut self.in_progress).freeze();
1,821✔
90
            self.push_completed(block)
1,821✔
91
        }
13,693✔
92
    }
15,514✔
93

94
    fn push_completed(&mut self, block: ByteBuffer) {
1,821✔
95
        assert!(block.len() < u32::MAX as usize, "Block too large");
1,821✔
96
        assert!(self.completed.len() < u32::MAX as usize, "Too many blocks");
1,821✔
97
        self.completed.push(block);
1,821✔
98
    }
1,821✔
99

100
    pub fn completed_block_count(&self) -> usize {
740✔
101
        self.completed.len()
740✔
102
    }
740✔
103

104
    // Pushes an array of values into the buffer, where the buffers are sections of a
105
    // VarBinView and the views are the BinaryView's of the VarBinView *already with their*
106
    // buffers adjusted.
107
    // The views must all point to sections of the buffers and the validity length must match
108
    // the view length.
109
    pub fn push_buffer_and_adjusted_views(
740✔
110
        &mut self,
740✔
111
        buffer: &[ByteBuffer],
740✔
112
        views: &Buffer<BinaryView>,
740✔
113
        validity_mask: Mask,
740✔
114
    ) {
740✔
115
        self.flush_in_progress();
740✔
116

117
        self.completed.extend(buffer.iter().cloned());
740✔
118
        self.views_builder.extend_trusted(views.iter().copied());
740✔
119
        self.push_only_validity_mask(validity_mask);
740✔
120

121
        debug_assert_eq!(self.null_buffer_builder.len(), self.views_builder.len())
740✔
122
    }
740✔
123

124
    pub fn finish_into_varbinview(&mut self) -> VarBinViewArray {
6,633✔
125
        self.flush_in_progress();
6,633✔
126
        let buffers = std::mem::take(&mut self.completed);
6,633✔
127

128
        assert_eq!(
6,633✔
129
            self.views_builder.len(),
6,633✔
130
            self.null_buffer_builder.len(),
6,633✔
UNCOV
131
            "View and validity length must match"
×
132
        );
133

134
        let validity = self
6,633✔
135
            .null_buffer_builder
6,633✔
136
            .finish_with_nullability(self.nullability);
6,633✔
137

138
        VarBinViewArray::try_new(
6,633✔
139
            std::mem::take(&mut self.views_builder).freeze(),
6,633✔
140
            Arc::from(buffers),
6,633✔
141
            std::mem::replace(&mut self.dtype, DType::Null),
6,633✔
142
            validity,
6,633✔
143
        )
144
        .vortex_expect("VarBinViewArray components should be valid.")
6,633✔
145
    }
6,633✔
146
}
147

148
impl VarBinViewBuilder {
149
    // Pushes a validity mask into the builder not affecting the views or buffers
150
    fn push_only_validity_mask(&mut self, validity_mask: Mask) {
7,060✔
151
        self.null_buffer_builder.append_validity_mask(validity_mask);
7,060✔
152
    }
7,060✔
153
}
154

155
impl ArrayBuilder for VarBinViewBuilder {
156
    fn as_any(&self) -> &dyn Any {
×
157
        self
×
UNCOV
158
    }
×
159

160
    fn as_any_mut(&mut self) -> &mut dyn Any {
6,403✔
161
        self
6,403✔
162
    }
6,403✔
163

164
    #[inline]
165
    fn dtype(&self) -> &DType {
24,730✔
166
        &self.dtype
24,730✔
167
    }
24,730✔
168

169
    #[inline]
170
    fn len(&self) -> usize {
15,518✔
171
        self.null_buffer_builder.len()
15,518✔
172
    }
15,518✔
173

174
    #[inline]
175
    fn append_zeros(&mut self, n: usize) {
1✔
176
        self.views_builder.push_n(BinaryView::empty_view(), n);
1✔
177
        self.null_buffer_builder.append_n_non_nulls(n);
1✔
178
    }
1✔
179

180
    #[inline]
181
    fn append_nulls(&mut self, n: usize) {
2,086✔
182
        self.views_builder.push_n(BinaryView::empty_view(), n);
2,086✔
183
        self.null_buffer_builder.append_n_nulls(n);
2,086✔
184
    }
2,086✔
185

186
    #[inline]
187
    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
6,320✔
188
        let array = array.to_varbinview()?;
6,320✔
189
        self.flush_in_progress();
6,320✔
190

191
        let buffers_offset = u32::try_from(self.completed.len())?;
6,320✔
192
        self.completed.extend_from_slice(array.buffers());
6,320✔
193

194
        self.views_builder.extend_trusted(
6,320✔
195
            array
6,320✔
196
                .views()
6,320✔
197
                .iter()
6,320✔
198
                .map(|view| view.offset_view(buffers_offset)),
392,933✔
199
        );
200

201
        self.push_only_validity_mask(array.validity_mask()?);
6,320✔
202

203
        Ok(())
6,320✔
204
    }
6,320✔
205

206
    fn ensure_capacity(&mut self, capacity: usize) {
×
207
        if capacity > self.views_builder.capacity() {
×
208
            self.views_builder
×
209
                .reserve(capacity - self.views_builder.len());
×
210
            self.null_buffer_builder.ensure_capacity(capacity);
×
211
        }
×
UNCOV
212
    }
×
213

214
    fn set_validity(&mut self, validity: Mask) {
×
215
        self.null_buffer_builder = LazyNullBufferBuilder::new(validity.len());
×
216
        self.null_buffer_builder.append_validity_mask(validity);
×
UNCOV
217
    }
×
218

219
    fn finish(&mut self) -> ArrayRef {
4,696✔
220
        self.finish_into_varbinview().into_array()
4,696✔
221
    }
4,696✔
222
}
223

224
#[cfg(test)]
225
mod tests {
226
    use std::str::from_utf8;
227

228
    use itertools::Itertools;
229
    use vortex_dtype::{DType, Nullability};
230

231
    use crate::ToCanonical;
232
    use crate::accessor::ArrayAccessor;
233
    use crate::arrays::VarBinViewVTable;
234
    use crate::builders::{ArrayBuilder, VarBinViewBuilder};
235

236
    #[test]
237
    fn test_utf8_builder() {
1✔
238
        let mut builder = VarBinViewBuilder::with_capacity(DType::Utf8(Nullability::Nullable), 10);
1✔
239

240
        builder.append_option(Some("Hello"));
1✔
241
        builder.append_option::<&str>(None);
1✔
242
        builder.append_value("World");
1✔
243

244
        builder.append_nulls(2);
1✔
245

246
        builder.append_zeros(2);
1✔
247
        builder.append_value("test");
1✔
248

249
        let arr = builder.finish();
1✔
250

251
        let arr = arr
1✔
252
            .as_::<VarBinViewVTable>()
1✔
253
            .with_iterator(|iter| {
1✔
254
                iter.map(|x| x.map(|x| from_utf8(x).unwrap().to_string()))
8✔
255
                    .collect_vec()
1✔
256
            })
1✔
257
            .unwrap();
1✔
258
        assert_eq!(arr.len(), 8);
1✔
259
        assert_eq!(
1✔
260
            arr,
261
            vec![
1✔
262
                Some("Hello".to_string()),
1✔
263
                None,
1✔
264
                Some("World".to_string()),
1✔
265
                None,
1✔
266
                None,
1✔
267
                Some("".to_string()),
1✔
268
                Some("".to_string()),
1✔
269
                Some("test".to_string()),
1✔
270
            ]
271
        );
272
    }
1✔
273
    #[test]
274
    fn test_utf8_builder_with_extend() {
1✔
275
        let array = {
1✔
276
            let mut builder =
1✔
277
                VarBinViewBuilder::with_capacity(DType::Utf8(Nullability::Nullable), 10);
1✔
278
            builder.append_null();
1✔
279
            builder.append_value("Hello2");
1✔
280
            builder.finish()
1✔
281
        };
282
        let mut builder = VarBinViewBuilder::with_capacity(DType::Utf8(Nullability::Nullable), 10);
1✔
283

284
        builder.append_option(Some("Hello1"));
1✔
285
        builder.extend_from_array(&array).unwrap();
1✔
286
        builder.append_nulls(2);
1✔
287
        builder.append_value("Hello3");
1✔
288

289
        let arr = builder.finish().to_varbinview().unwrap();
1✔
290

291
        let arr = arr
1✔
292
            .with_iterator(|iter| {
1✔
293
                iter.map(|x| x.map(|x| from_utf8(x).unwrap().to_string()))
6✔
294
                    .collect_vec()
1✔
295
            })
1✔
296
            .unwrap();
1✔
297
        assert_eq!(arr.len(), 6);
1✔
298
        assert_eq!(
1✔
299
            arr,
300
            vec![
1✔
301
                Some("Hello1".to_string()),
1✔
302
                None,
1✔
303
                Some("Hello2".to_string()),
1✔
304
                None,
1✔
305
                None,
1✔
306
                Some("Hello3".to_string()),
1✔
307
            ]
308
        );
309
    }
1✔
310
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc