• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16566687104

28 Jul 2025 10:31AM UTC coverage: 81.78%. Remained the same
16566687104

Pull #4035

github

web-flow
Merge 53789e31b into a0efcbe7a
Pull Request #4035: use arc slice for buffers in varbinview

22 of 23 new or added lines in 14 files covered. (95.65%)

90 existing lines in 8 files now uncovered.

43220 of 52849 relevant lines covered (81.78%)

170701.56 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.71
/vortex-array/src/builders/varbinview.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::any::Any;
5
use std::cmp::max;
6

7
use vortex_buffer::{Buffer, BufferMut, ByteBuffer, ByteBufferMut};
8
use vortex_dtype::{DType, Nullability};
9
use vortex_error::{VortexExpect, VortexResult};
10
use vortex_mask::Mask;
11

12
use crate::arrays::{BinaryView, VarBinViewArray};
13
use crate::builders::ArrayBuilder;
14
use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
15
use crate::{Array, ArrayRef, IntoArray, ToCanonical};
16

17
pub struct VarBinViewBuilder {
18
    views_builder: BufferMut<BinaryView>,
19
    pub null_buffer_builder: LazyNullBufferBuilder,
20
    completed: Vec<ByteBuffer>,
21
    in_progress: ByteBufferMut,
22
    nullability: Nullability,
23
    dtype: DType,
24
}
25

26
impl VarBinViewBuilder {
27
    // TODO(joe): add a block growth strategy, from arrow
28
    const BLOCK_SIZE: u32 = 8 * 8 * 1024;
29

30
    pub fn with_capacity(dtype: DType, capacity: usize) -> Self {
6,633✔
31
        assert!(
6,633✔
32
            matches!(dtype, DType::Utf8(_) | DType::Binary(_)),
6,633✔
UNCOV
33
            "VarBinViewBuilder DType must be Utf8 or Binary."
×
34
        );
35
        Self {
6,633✔
36
            views_builder: BufferMut::<BinaryView>::with_capacity(capacity),
6,633✔
37
            null_buffer_builder: LazyNullBufferBuilder::new(capacity),
6,633✔
38
            completed: vec![],
6,633✔
39
            in_progress: ByteBufferMut::empty(),
6,633✔
40
            nullability: dtype.nullability(),
6,633✔
41
            dtype,
6,633✔
42
        }
6,633✔
43
    }
6,633✔
44

45
    fn append_value_view(&mut self, value: &[u8]) {
2,382,189✔
46
        let length =
2,382,189✔
47
            u32::try_from(value.len()).vortex_expect("cannot have a single string >2^32 in length");
2,382,189✔
48
        if length <= 12 {
2,382,189✔
49
            self.views_builder.push(BinaryView::make_view(value, 0, 0));
172,556✔
50
            return;
172,556✔
51
        }
2,209,633✔
52

53
        let required_cap = self.in_progress.len() + value.len();
2,209,633✔
54
        if self.in_progress.capacity() < required_cap {
2,209,633✔
55
            self.flush_in_progress();
1,821✔
56
            let to_reserve = max(value.len(), VarBinViewBuilder::BLOCK_SIZE as usize);
1,821✔
57
            self.in_progress.reserve(to_reserve);
1,821✔
58
        };
2,207,812✔
59

60
        let offset = u32::try_from(self.in_progress.len()).vortex_expect("too many buffers");
2,209,633✔
61
        self.in_progress.extend_from_slice(value);
2,209,633✔
62
        let view = BinaryView::make_view(
2,209,633✔
63
            value,
2,209,633✔
64
            // buffer offset
65
            u32::try_from(self.completed.len()).vortex_expect("too many buffers"),
2,209,633✔
66
            offset,
2,209,633✔
67
        );
68
        self.views_builder.push(view);
2,209,633✔
69
    }
2,382,189✔
70

71
    #[inline]
72
    pub fn append_value<S: AsRef<[u8]>>(&mut self, value: S) {
2,382,189✔
73
        self.append_value_view(value.as_ref());
2,382,189✔
74
        self.null_buffer_builder.append_non_null();
2,382,189✔
75
    }
2,382,189✔
76

77
    #[inline]
78
    pub fn append_option<S: AsRef<[u8]>>(&mut self, value: Option<S>) {
5,666✔
79
        match value {
5,666✔
80
            Some(value) => self.append_value(value),
5,665✔
81
            None => self.append_null(),
1✔
82
        }
83
    }
5,666✔
84

85
    #[inline]
86
    fn flush_in_progress(&mut self) {
15,514✔
87
        if !self.in_progress.is_empty() {
15,514✔
88
            let block = std::mem::take(&mut self.in_progress).freeze();
1,821✔
89
            self.push_completed(block)
1,821✔
90
        }
13,693✔
91
    }
15,514✔
92

93
    fn push_completed(&mut self, block: ByteBuffer) {
1,821✔
94
        assert!(block.len() < u32::MAX as usize, "Block too large");
1,821✔
95
        assert!(self.completed.len() < u32::MAX as usize, "Too many blocks");
1,821✔
96
        self.completed.push(block);
1,821✔
97
    }
1,821✔
98

99
    pub fn completed_block_count(&self) -> usize {
740✔
100
        self.completed.len()
740✔
101
    }
740✔
102

103
    // Pushes an array of values into the buffer, where the buffers are sections of a
104
    // VarBinView and the views are the BinaryView's of the VarBinView *already with their*
105
    // buffers adjusted.
106
    // The views must all point to sections of the buffers and the validity length must match
107
    // the view length.
108
    pub fn push_buffer_and_adjusted_views(
740✔
109
        &mut self,
740✔
110
        buffer: &[ByteBuffer],
740✔
111
        views: &Buffer<BinaryView>,
740✔
112
        validity_mask: Mask,
740✔
113
    ) {
740✔
114
        self.flush_in_progress();
740✔
115

116
        self.completed.extend(buffer.iter().cloned());
740✔
117
        self.views_builder.extend_trusted(views.iter().copied());
740✔
118
        self.push_only_validity_mask(validity_mask);
740✔
119

120
        debug_assert_eq!(self.null_buffer_builder.len(), self.views_builder.len())
740✔
121
    }
740✔
122

123
    pub fn finish_into_varbinview(&mut self) -> VarBinViewArray {
6,633✔
124
        self.flush_in_progress();
6,633✔
125
        let buffers = std::mem::take(&mut self.completed);
6,633✔
126

127
        assert_eq!(
6,633✔
128
            self.views_builder.len(),
6,633✔
129
            self.null_buffer_builder.len(),
6,633✔
UNCOV
130
            "View and validity length must match"
×
131
        );
132

133
        let validity = self
6,633✔
134
            .null_buffer_builder
6,633✔
135
            .finish_with_nullability(self.nullability);
6,633✔
136

137
        VarBinViewArray::try_new(
6,633✔
138
            std::mem::take(&mut self.views_builder).freeze(),
6,633✔
139
            buffers,
6,633✔
140
            std::mem::replace(&mut self.dtype, DType::Null),
6,633✔
141
            validity,
6,633✔
142
        )
143
        .vortex_expect("VarBinViewArray components should be valid.")
6,633✔
144
    }
6,633✔
145
}
146

147
impl VarBinViewBuilder {
148
    // Pushes a validity mask into the builder not affecting the views or buffers
149
    fn push_only_validity_mask(&mut self, validity_mask: Mask) {
7,060✔
150
        self.null_buffer_builder.append_validity_mask(validity_mask);
7,060✔
151
    }
7,060✔
152
}
153

154
impl ArrayBuilder for VarBinViewBuilder {
UNCOV
155
    fn as_any(&self) -> &dyn Any {
×
156
        self
×
157
    }
×
158

159
    fn as_any_mut(&mut self) -> &mut dyn Any {
6,403✔
160
        self
6,403✔
161
    }
6,403✔
162

163
    #[inline]
164
    fn dtype(&self) -> &DType {
24,730✔
165
        &self.dtype
24,730✔
166
    }
24,730✔
167

168
    #[inline]
169
    fn len(&self) -> usize {
15,518✔
170
        self.null_buffer_builder.len()
15,518✔
171
    }
15,518✔
172

173
    #[inline]
174
    fn append_zeros(&mut self, n: usize) {
1✔
175
        self.views_builder.push_n(BinaryView::empty_view(), n);
1✔
176
        self.null_buffer_builder.append_n_non_nulls(n);
1✔
177
    }
1✔
178

179
    #[inline]
180
    fn append_nulls(&mut self, n: usize) {
2,086✔
181
        self.views_builder.push_n(BinaryView::empty_view(), n);
2,086✔
182
        self.null_buffer_builder.append_n_nulls(n);
2,086✔
183
    }
2,086✔
184

185
    #[inline]
186
    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
6,320✔
187
        let array = array.to_varbinview()?;
6,320✔
188
        self.flush_in_progress();
6,320✔
189

190
        let buffers_offset = u32::try_from(self.completed.len())?;
6,320✔
191
        self.completed.extend_from_slice(array.buffers());
6,320✔
192

193
        self.views_builder.extend_trusted(
6,320✔
194
            array
6,320✔
195
                .views()
6,320✔
196
                .iter()
6,320✔
197
                .map(|view| view.offset_view(buffers_offset)),
392,933✔
198
        );
199

200
        self.push_only_validity_mask(array.validity_mask()?);
6,320✔
201

202
        Ok(())
6,320✔
203
    }
6,320✔
204

UNCOV
205
    fn ensure_capacity(&mut self, capacity: usize) {
×
206
        if capacity > self.views_builder.capacity() {
×
207
            self.views_builder
×
208
                .reserve(capacity - self.views_builder.len());
×
209
            self.null_buffer_builder.ensure_capacity(capacity);
×
210
        }
×
211
    }
×
212

UNCOV
213
    fn set_validity(&mut self, validity: Mask) {
×
214
        self.null_buffer_builder = LazyNullBufferBuilder::new(validity.len());
×
215
        self.null_buffer_builder.append_validity_mask(validity);
×
216
    }
×
217

218
    fn finish(&mut self) -> ArrayRef {
4,696✔
219
        self.finish_into_varbinview().into_array()
4,696✔
220
    }
4,696✔
221
}
222

223
#[cfg(test)]
224
mod tests {
225
    use std::str::from_utf8;
226

227
    use itertools::Itertools;
228
    use vortex_dtype::{DType, Nullability};
229

230
    use crate::ToCanonical;
231
    use crate::accessor::ArrayAccessor;
232
    use crate::arrays::VarBinViewVTable;
233
    use crate::builders::{ArrayBuilder, VarBinViewBuilder};
234

235
    #[test]
236
    fn test_utf8_builder() {
1✔
237
        let mut builder = VarBinViewBuilder::with_capacity(DType::Utf8(Nullability::Nullable), 10);
1✔
238

239
        builder.append_option(Some("Hello"));
1✔
240
        builder.append_option::<&str>(None);
1✔
241
        builder.append_value("World");
1✔
242

243
        builder.append_nulls(2);
1✔
244

245
        builder.append_zeros(2);
1✔
246
        builder.append_value("test");
1✔
247

248
        let arr = builder.finish();
1✔
249

250
        let arr = arr
1✔
251
            .as_::<VarBinViewVTable>()
1✔
252
            .with_iterator(|iter| {
1✔
253
                iter.map(|x| x.map(|x| from_utf8(x).unwrap().to_string()))
8✔
254
                    .collect_vec()
1✔
255
            })
1✔
256
            .unwrap();
1✔
257
        assert_eq!(arr.len(), 8);
1✔
258
        assert_eq!(
1✔
259
            arr,
260
            vec![
1✔
261
                Some("Hello".to_string()),
1✔
262
                None,
1✔
263
                Some("World".to_string()),
1✔
264
                None,
1✔
265
                None,
1✔
266
                Some("".to_string()),
1✔
267
                Some("".to_string()),
1✔
268
                Some("test".to_string()),
1✔
269
            ]
270
        );
271
    }
1✔
272
    #[test]
273
    fn test_utf8_builder_with_extend() {
1✔
274
        let array = {
1✔
275
            let mut builder =
1✔
276
                VarBinViewBuilder::with_capacity(DType::Utf8(Nullability::Nullable), 10);
1✔
277
            builder.append_null();
1✔
278
            builder.append_value("Hello2");
1✔
279
            builder.finish()
1✔
280
        };
281
        let mut builder = VarBinViewBuilder::with_capacity(DType::Utf8(Nullability::Nullable), 10);
1✔
282

283
        builder.append_option(Some("Hello1"));
1✔
284
        builder.extend_from_array(&array).unwrap();
1✔
285
        builder.append_nulls(2);
1✔
286
        builder.append_value("Hello3");
1✔
287

288
        let arr = builder.finish().to_varbinview().unwrap();
1✔
289

290
        let arr = arr
1✔
291
            .with_iterator(|iter| {
1✔
292
                iter.map(|x| x.map(|x| from_utf8(x).unwrap().to_string()))
6✔
293
                    .collect_vec()
1✔
294
            })
1✔
295
            .unwrap();
1✔
296
        assert_eq!(arr.len(), 6);
1✔
297
        assert_eq!(
1✔
298
            arr,
299
            vec![
1✔
300
                Some("Hello1".to_string()),
1✔
301
                None,
1✔
302
                Some("Hello2".to_string()),
1✔
303
                None,
1✔
304
                None,
1✔
305
                Some("Hello3".to_string()),
1✔
306
            ]
307
        );
308
    }
1✔
309
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc