• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 17042426005

18 Aug 2025 01:44PM UTC coverage: 87.995%. First build
17042426005

Pull #4216

github

web-flow
Merge 52d72e70b into c0b668f7f
Pull Request #4216: feat: better and more consistent validation in SerdeVTable::build

525 of 671 new or added lines in 80 files covered. (78.24%)

56705 of 64441 relevant lines covered (88.0%)

627615.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.72
/vortex-array/src/arrays/decimal/mod.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
mod compute;
5
mod ops;
6
mod patch;
7
mod serde;
8

9
use arrow_buffer::BooleanBufferBuilder;
10
use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
11
use vortex_dtype::{DType, DecimalDType};
12
use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
13
use vortex_scalar::{DecimalValueType, NativeDecimalType};
14

15
use crate::builders::ArrayBuilder;
16
use crate::stats::{ArrayStats, StatsSetRef};
17
use crate::validity::Validity;
18
use crate::vtable::{
19
    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
20
    ValidityVTableFromValidityHelper, VisitorVTable,
21
};
22
use crate::{ArrayBufferVisitor, ArrayChildVisitor, Canonical, EncodingId, EncodingRef, vtable};
23

24
vtable!(Decimal);
25

26
impl VTable for DecimalVTable {
27
    type Array = DecimalArray;
28
    type Encoding = DecimalEncoding;
29

30
    type ArrayVTable = Self;
31
    type CanonicalVTable = Self;
32
    type OperationsVTable = Self;
33
    type ValidityVTable = ValidityVTableFromValidityHelper;
34
    type VisitorVTable = Self;
35
    type ComputeVTable = NotSupported;
36
    type EncodeVTable = NotSupported;
37
    type SerdeVTable = Self;
38

39
    fn id(_encoding: &Self::Encoding) -> EncodingId {
515,501✔
40
        EncodingId::new_ref("vortex.decimal")
515,501✔
41
    }
515,501✔
42

43
    fn encoding(_array: &Self::Array) -> EncodingRef {
8,282✔
44
        EncodingRef::new_ref(DecimalEncoding.as_ref())
8,282✔
45
    }
8,282✔
46
}
47

48
#[derive(Clone, Debug)]
49
pub struct DecimalEncoding;
50

51
/// Maps a decimal precision into the smallest type that can represent it.
52
pub fn smallest_storage_type(decimal_dtype: &DecimalDType) -> DecimalValueType {
2,673✔
53
    match decimal_dtype.precision() {
2,673✔
54
        1..=2 => DecimalValueType::I8,
2,673✔
55
        3..=4 => DecimalValueType::I16,
2,517✔
56
        5..=9 => DecimalValueType::I32,
2,283✔
57
        10..=18 => DecimalValueType::I64,
1,839✔
58
        19..=38 => DecimalValueType::I128,
675✔
59
        39..=76 => DecimalValueType::I256,
×
60
        0 => unreachable!("precision must be greater than 0"),
×
61
        p => unreachable!("precision larger than 76 is invalid found precision {p}"),
×
62
    }
63
}
2,673✔
64

65
/// True if `value_type` can represent every value of the type `dtype`.
66
pub fn compatible_storage_type(value_type: DecimalValueType, dtype: DecimalDType) -> bool {
39✔
67
    value_type >= smallest_storage_type(&dtype)
39✔
68
}
39✔
69

70
/// A decimal array that stores fixed-precision decimal numbers with configurable scale.
71
///
72
/// This mirrors the Apache Arrow Decimal encoding and provides exact arithmetic for
73
/// financial and scientific computations where floating-point precision loss is unacceptable.
74
///
75
/// ## Storage Format
76
///
77
/// Decimals are stored as scaled integers in a supported scalar value type.
78
///
79
/// The precisions supported for each scalar type are:
80
/// - **i8**: precision 1-2 digits
81
/// - **i16**: precision 3-4 digits  
82
/// - **i32**: precision 5-9 digits
83
/// - **i64**: precision 10-18 digits
84
/// - **i128**: precision 19-38 digits
85
/// - **i256**: precision 39-76 digits
86
///
87
/// These are just the maximal ranges for each scalar type, but it is perfectly legal to store
88
/// values with precision that does not match this exactly. For example, a valid DecimalArray with
89
/// precision=39 may store its values in an `i8` if all of the actual values fit into it.
90
///
91
/// Similarly, a `DecimalArray` can be built that stores a set of precision=2 values in a
92
/// `Buffer<i256>`.
93
///
94
/// ## Precision and Scale
95
///
96
/// - **Precision**: Total number of significant digits (1-76, u8 range)
97
/// - **Scale**: Number of digits after the decimal point (-128 to 127, i8 range)
98
/// - **Value**: `stored_integer / 10^scale`
99
///
100
/// For example, with precision=5 and scale=2:
101
/// - Stored value 12345 represents 123.45
102
/// - Range: -999.99 to 999.99
103
///
104
/// ## Valid Scalar Types
105
///
106
/// The underlying storage uses these native types based on precision:
107
/// - `DecimalValueType::I8`, `I16`, `I32`, `I64`, `I128`, `I256`
108
/// - Type selection is automatic based on the required precision
109
///
110
/// # Examples
111
///
112
/// ```
113
/// use vortex_array::arrays::DecimalArray;
114
/// use vortex_dtype::DecimalDType;
115
/// use vortex_buffer::{buffer, Buffer};
116
/// use vortex_array::validity::Validity;
117
///
118
/// // Create a decimal array with precision=5, scale=2 (e.g., 123.45)
119
/// let decimal_dtype = DecimalDType::new(5, 2);
120
/// let values = buffer![12345i32, 67890i32, -12300i32]; // 123.45, 678.90, -123.00
121
/// let array = DecimalArray::new(values, decimal_dtype, Validity::NonNullable);
122
///
123
/// assert_eq!(array.precision(), 5);
124
/// assert_eq!(array.scale(), 2);
125
/// assert_eq!(array.len(), 3);
126
/// ```
127
#[derive(Clone, Debug)]
128
pub struct DecimalArray {
129
    dtype: DType,
130
    values: ByteBuffer,
131
    values_type: DecimalValueType,
132
    validity: Validity,
133
    stats_set: ArrayStats,
134
}
135

136
impl DecimalArray {
137
    fn validate<T: NativeDecimalType>(buffer: &Buffer<T>, validity: &Validity) -> VortexResult<()> {
8,178✔
138
        if let Some(len) = validity.maybe_len() {
8,178✔
139
            vortex_ensure!(
774✔
140
                buffer.len() == len,
774✔
NEW
141
                "Buffer and validity length mismatch: buffer={}, validity={}",
×
NEW
142
                buffer.len(),
×
143
                len,
144
            );
145
        }
7,404✔
146

147
        Ok(())
8,178✔
148
    }
8,178✔
149
}
150

151
impl DecimalArray {
152
    /// Creates a new [`DecimalArray`] from a [`Buffer`] and [`Validity`], without checking
153
    /// any invariants.
154
    ///
155
    /// # Panics
156
    ///
157
    /// Panics if the provided buffer and validity differ in length.
158
    ///
159
    /// See also [`DecimalArray::try_new`].
160
    pub fn new<T: NativeDecimalType>(
8,177✔
161
        buffer: Buffer<T>,
8,177✔
162
        decimal_dtype: DecimalDType,
8,177✔
163
        validity: Validity,
8,177✔
164
    ) -> Self {
8,177✔
165
        Self::try_new(buffer, decimal_dtype, validity).vortex_expect("DecimalArray new")
8,177✔
166
    }
8,177✔
167

168
    /// Build a new `DecimalArray` from a component `buffer`, decimal_dtype` and `validity`.
169
    ///
170
    /// This constructor validates the length of the buffer and validity are equal, returning
171
    /// an error otherwise.
172
    ///
173
    /// See [`DecimalArray::new`] for an infallible constructor that panics on validation errors.
174
    pub fn try_new<T: NativeDecimalType>(
8,178✔
175
        buffer: Buffer<T>,
8,178✔
176
        decimal_dtype: DecimalDType,
8,178✔
177
        validity: Validity,
8,178✔
178
    ) -> VortexResult<Self> {
8,178✔
179
        Self::validate(&buffer, &validity)?;
8,178✔
180

181
        Ok(Self {
8,178✔
182
            values: buffer.into_byte_buffer(),
8,178✔
183
            values_type: T::VALUES_TYPE,
8,178✔
184
            dtype: DType::Decimal(decimal_dtype, validity.nullability()),
8,178✔
185
            validity,
8,178✔
186
            stats_set: Default::default(),
8,178✔
187
        })
8,178✔
188
    }
8,178✔
189

190
    /// Returns the underlying [`ByteBuffer`] of the array.
191
    pub fn byte_buffer(&self) -> ByteBuffer {
20✔
192
        self.values.clone()
20✔
193
    }
20✔
194

195
    pub fn buffer<T: NativeDecimalType>(&self) -> Buffer<T> {
11,767✔
196
        if self.values_type != T::VALUES_TYPE {
11,767✔
197
            vortex_panic!(
×
198
                "Cannot extract Buffer<{:?}> for DecimalArray with values_type {:?}",
×
199
                T::VALUES_TYPE,
200
                self.values_type,
201
            );
202
        }
11,767✔
203
        Buffer::<T>::from_byte_buffer(self.values.clone())
11,767✔
204
    }
11,767✔
205

206
    /// Returns the decimal type information
207
    pub fn decimal_dtype(&self) -> DecimalDType {
14,379✔
208
        match &self.dtype {
14,379✔
209
            DType::Decimal(decimal_dtype, _) => *decimal_dtype,
14,379✔
210
            _ => vortex_panic!("Expected Decimal dtype, got {:?}", self.dtype),
×
211
        }
212
    }
14,379✔
213

214
    pub fn values_type(&self) -> DecimalValueType {
11,220✔
215
        self.values_type
11,220✔
216
    }
11,220✔
217

218
    pub fn precision(&self) -> u8 {
×
219
        self.decimal_dtype().precision()
×
220
    }
×
221

222
    pub fn scale(&self) -> i8 {
×
223
        self.decimal_dtype().scale()
×
224
    }
×
225

226
    pub fn from_option_iter<T: NativeDecimalType, I: IntoIterator<Item = Option<T>>>(
4✔
227
        iter: I,
4✔
228
        decimal_dtype: DecimalDType,
4✔
229
    ) -> Self {
4✔
230
        let iter = iter.into_iter();
4✔
231
        let mut values = BufferMut::with_capacity(iter.size_hint().0);
4✔
232
        let mut validity = BooleanBufferBuilder::new(values.capacity());
4✔
233

234
        for i in iter {
20✔
235
            match i {
16✔
236
                None => {
4✔
237
                    validity.append(false);
4✔
238
                    values.push(T::default());
4✔
239
                }
4✔
240
                Some(e) => {
12✔
241
                    validity.append(true);
12✔
242
                    values.push(e);
12✔
243
                }
12✔
244
            }
245
        }
246
        Self::new(
4✔
247
            values.freeze(),
4✔
248
            decimal_dtype,
4✔
249
            Validity::from(validity.finish()),
4✔
250
        )
251
    }
4✔
252
}
253

254
impl ArrayVTable<DecimalVTable> for DecimalVTable {
255
    fn len(array: &DecimalArray) -> usize {
83,922✔
256
        let divisor = match array.values_type {
83,922✔
257
            DecimalValueType::I8 => 1,
8,192✔
258
            DecimalValueType::I16 => 2,
6,508✔
259
            DecimalValueType::I32 => 4,
11,824✔
260
            DecimalValueType::I64 => 8,
10,885✔
261
            DecimalValueType::I128 => 16,
46,501✔
262
            DecimalValueType::I256 => 32,
12✔
263
            ty => vortex_panic!("unknown decimal value type {:?}", ty),
×
264
        };
265
        array.values.len() / divisor
83,922✔
266
    }
83,922✔
267

268
    fn dtype(array: &DecimalArray) -> &DType {
84,836✔
269
        &array.dtype
84,836✔
270
    }
84,836✔
271

272
    fn stats(array: &DecimalArray) -> StatsSetRef<'_> {
47,281✔
273
        array.stats_set.to_ref(array.as_ref())
47,281✔
274
    }
47,281✔
275
}
276

277
impl VisitorVTable<DecimalVTable> for DecimalVTable {
278
    fn visit_buffers(array: &DecimalArray, visitor: &mut dyn ArrayBufferVisitor) {
5,297✔
279
        visitor.visit_buffer(&array.values);
5,297✔
280
    }
5,297✔
281

282
    fn visit_children(array: &DecimalArray, visitor: &mut dyn ArrayChildVisitor) {
5,376✔
283
        visitor.visit_validity(array.validity(), array.len())
5,376✔
284
    }
5,376✔
285
}
286

287
impl CanonicalVTable<DecimalVTable> for DecimalVTable {
288
    fn canonicalize(array: &DecimalArray) -> VortexResult<Canonical> {
8,629✔
289
        Ok(Canonical::Decimal(array.clone()))
8,629✔
290
    }
8,629✔
291

292
    fn append_to_builder(array: &DecimalArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
990✔
293
        builder.extend_from_array(array.as_ref())
990✔
294
    }
990✔
295
}
296

297
impl ValidityHelper for DecimalArray {
298
    fn validity(&self) -> &Validity {
19,358✔
299
        &self.validity
19,358✔
300
    }
19,358✔
301
}
302

303
#[cfg(test)]
304
mod test {
305
    use arrow_array::Decimal128Array;
306

307
    #[test]
308
    fn test_decimal() {
1✔
309
        // They pass it b/c the DType carries the information. No other way to carry a
310
        // dtype except via the array.
311
        let value = Decimal128Array::new_null(100);
1✔
312
        let numeric = value.value(10);
1✔
313
        assert_eq!(numeric, 0i128);
1✔
314
    }
1✔
315
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc