17042426005

Committed 18 Aug 2025 01:44PM UTC coverage: 87.995%. First build

Build # 17042426005

Build Type

Pull #4216

github

Committed by

web-flow

Commit Message

Merge 52d72e70b into c0b668f7f

Pull Request Pull Request #4216: feat: better and more consistent validation in SerdeVTable::build

Run Details

525 of 671 new or added lines in 80 files covered. (78.24%)

56705 of 64441 relevant lines covered (88.0%)

627615.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.72

/vortex-array/src/arrays/decimal/mod.rs

// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

mod compute;
mod ops;
mod patch;
mod serde;

use arrow_buffer::BooleanBufferBuilder;
use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
use vortex_dtype::{DType, DecimalDType};
use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
use vortex_scalar::{DecimalValueType, NativeDecimalType};

use crate::builders::ArrayBuilder;
use crate::stats::{ArrayStats, StatsSetRef};
use crate::validity::Validity;
use crate::vtable::{
    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
    ValidityVTableFromValidityHelper, VisitorVTable,
};
use crate::{ArrayBufferVisitor, ArrayChildVisitor, Canonical, EncodingId, EncodingRef, vtable};

vtable!(Decimal);

impl VTable for DecimalVTable {
    type Array = DecimalArray;
    type Encoding = DecimalEncoding;

    type ArrayVTable = Self;
    type CanonicalVTable = Self;
    type OperationsVTable = Self;
    type ValidityVTable = ValidityVTableFromValidityHelper;
    type VisitorVTable = Self;
    type ComputeVTable = NotSupported;
    type EncodeVTable = NotSupported;
    type SerdeVTable = Self;

    fn id(_encoding: &Self::Encoding) -> EncodingId {
        EncodingId::new_ref("vortex.decimal")
    }

    fn encoding(_array: &Self::Array) -> EncodingRef {
        EncodingRef::new_ref(DecimalEncoding.as_ref())
    }
}

#[derive(Clone, Debug)]
pub struct DecimalEncoding;

/// Maps a decimal precision into the smallest type that can represent it.
pub fn smallest_storage_type(decimal_dtype: &DecimalDType) -> DecimalValueType {
    match decimal_dtype.precision() {
        1..=2 => DecimalValueType::I8,
        3..=4 => DecimalValueType::I16,
        5..=9 => DecimalValueType::I32,
        10..=18 => DecimalValueType::I64,
        19..=38 => DecimalValueType::I128,
        39..=76 => DecimalValueType::I256,
        0 => unreachable!("precision must be greater than 0"),
        p => unreachable!("precision larger than 76 is invalid found precision {p}"),
    }
}

/// True if `value_type` can represent every value of the type `dtype`.
pub fn compatible_storage_type(value_type: DecimalValueType, dtype: DecimalDType) -> bool {
    value_type >= smallest_storage_type(&dtype)
}

/// A decimal array that stores fixed-precision decimal numbers with configurable scale.
///
/// This mirrors the Apache Arrow Decimal encoding and provides exact arithmetic for
/// financial and scientific computations where floating-point precision loss is unacceptable.
///
/// ## Storage Format
///
/// Decimals are stored as scaled integers in a supported scalar value type.
///
/// The precisions supported for each scalar type are:
/// - **i8**: precision 1-2 digits
/// - **i16**: precision 3-4 digits  
/// - **i32**: precision 5-9 digits
/// - **i64**: precision 10-18 digits
/// - **i128**: precision 19-38 digits
/// - **i256**: precision 39-76 digits
///
/// These are just the maximal ranges for each scalar type, but it is perfectly legal to store
/// values with precision that does not match this exactly. For example, a valid DecimalArray with
/// precision=39 may store its values in an `i8` if all of the actual values fit into it.
///
/// Similarly, a `DecimalArray` can be built that stores a set of precision=2 values in a
/// `Buffer<i256>`.
///
/// ## Precision and Scale
///
/// - **Precision**: Total number of significant digits (1-76, u8 range)
/// - **Scale**: Number of digits after the decimal point (-128 to 127, i8 range)
/// - **Value**: `stored_integer / 10^scale`
///
/// For example, with precision=5 and scale=2:
/// - Stored value 12345 represents 123.45
/// - Range: -999.99 to 999.99
///
/// ## Valid Scalar Types
///
/// The underlying storage uses these native types based on precision:
/// - `DecimalValueType::I8`, `I16`, `I32`, `I64`, `I128`, `I256`
/// - Type selection is automatic based on the required precision
///
/// # Examples
///
/// ```
/// use vortex_array::arrays::DecimalArray;
/// use vortex_dtype::DecimalDType;
/// use vortex_buffer::{buffer, Buffer};
/// use vortex_array::validity::Validity;
///
/// // Create a decimal array with precision=5, scale=2 (e.g., 123.45)
/// let decimal_dtype = DecimalDType::new(5, 2);
/// let values = buffer![12345i32, 67890i32, -12300i32]; // 123.45, 678.90, -123.00
/// let array = DecimalArray::new(values, decimal_dtype, Validity::NonNullable);
///
/// assert_eq!(array.precision(), 5);
/// assert_eq!(array.scale(), 2);
/// assert_eq!(array.len(), 3);
/// ```
#[derive(Clone, Debug)]
pub struct DecimalArray {
    dtype: DType,
    values: ByteBuffer,
    values_type: DecimalValueType,
    validity: Validity,
    stats_set: ArrayStats,
}

impl DecimalArray {
    fn validate<T: NativeDecimalType>(buffer: &Buffer<T>, validity: &Validity) -> VortexResult<()> {
        if let Some(len) = validity.maybe_len() {
            vortex_ensure!(
                buffer.len() == len,
                "Buffer and validity length mismatch: buffer={}, validity={}",
                buffer.len(),
                len,
            );
        }

        Ok(())
    }
}

impl DecimalArray {
    /// Creates a new [`DecimalArray`] from a [`Buffer`] and [`Validity`], without checking
    /// any invariants.
    ///
    /// # Panics
    ///
    /// Panics if the provided buffer and validity differ in length.
    ///
    /// See also [`DecimalArray::try_new`].
    pub fn new<T: NativeDecimalType>(
        buffer: Buffer<T>,
        decimal_dtype: DecimalDType,
        validity: Validity,
    ) -> Self {
        Self::try_new(buffer, decimal_dtype, validity).vortex_expect("DecimalArray new")
    }

    /// Build a new `DecimalArray` from a component `buffer`, decimal_dtype` and `validity`.
    ///
    /// This constructor validates the length of the buffer and validity are equal, returning
    /// an error otherwise.
    ///
    /// See [`DecimalArray::new`] for an infallible constructor that panics on validation errors.
    pub fn try_new<T: NativeDecimalType>(
        buffer: Buffer<T>,
        decimal_dtype: DecimalDType,
        validity: Validity,
    ) -> VortexResult<Self> {
        Self::validate(&buffer, &validity)?;

        Ok(Self {
            values: buffer.into_byte_buffer(),
            values_type: T::VALUES_TYPE,
            dtype: DType::Decimal(decimal_dtype, validity.nullability()),
            validity,
            stats_set: Default::default(),
        })
    }

    /// Returns the underlying [`ByteBuffer`] of the array.
    pub fn byte_buffer(&self) -> ByteBuffer {
        self.values.clone()
    }

    pub fn buffer<T: NativeDecimalType>(&self) -> Buffer<T> {
        if self.values_type != T::VALUES_TYPE {
            vortex_panic!(
                "Cannot extract Buffer<{:?}> for DecimalArray with values_type {:?}",
                T::VALUES_TYPE,
                self.values_type,
            );
        }
        Buffer::<T>::from_byte_buffer(self.values.clone())
    }

    /// Returns the decimal type information
    pub fn decimal_dtype(&self) -> DecimalDType {
        match &self.dtype {
            DType::Decimal(decimal_dtype, _) => *decimal_dtype,
            _ => vortex_panic!("Expected Decimal dtype, got {:?}", self.dtype),
        }
    }

    pub fn values_type(&self) -> DecimalValueType {
        self.values_type
    }

    pub fn precision(&self) -> u8 {
        self.decimal_dtype().precision()
    }

    pub fn scale(&self) -> i8 {
        self.decimal_dtype().scale()
    }

    pub fn from_option_iter<T: NativeDecimalType, I: IntoIterator<Item = Option<T>>>(
        iter: I,
        decimal_dtype: DecimalDType,
    ) -> Self {
        let iter = iter.into_iter();
        let mut values = BufferMut::with_capacity(iter.size_hint().0);
        let mut validity = BooleanBufferBuilder::new(values.capacity());

        for i in iter {
            match i {
                None => {
                    validity.append(false);
                    values.push(T::default());
                }
                Some(e) => {
                    validity.append(true);
                    values.push(e);
                }
            }
        }
        Self::new(
            values.freeze(),
            decimal_dtype,
            Validity::from(validity.finish()),
        )
    }
}

impl ArrayVTable<DecimalVTable> for DecimalVTable {
    fn len(array: &DecimalArray) -> usize {
        let divisor = match array.values_type {
            DecimalValueType::I8 => 1,
            DecimalValueType::I16 => 2,
            DecimalValueType::I32 => 4,
            DecimalValueType::I64 => 8,
            DecimalValueType::I128 => 16,
            DecimalValueType::I256 => 32,
            ty => vortex_panic!("unknown decimal value type {:?}", ty),
        };
        array.values.len() / divisor
    }

    fn dtype(array: &DecimalArray) -> &DType {
        &array.dtype
    }

    fn stats(array: &DecimalArray) -> StatsSetRef<'_> {
        array.stats_set.to_ref(array.as_ref())
    }
}

impl VisitorVTable<DecimalVTable> for DecimalVTable {
    fn visit_buffers(array: &DecimalArray, visitor: &mut dyn ArrayBufferVisitor) {
        visitor.visit_buffer(&array.values);
    }

    fn visit_children(array: &DecimalArray, visitor: &mut dyn ArrayChildVisitor) {
        visitor.visit_validity(array.validity(), array.len())
    }
}

impl CanonicalVTable<DecimalVTable> for DecimalVTable {
    fn canonicalize(array: &DecimalArray) -> VortexResult<Canonical> {
        Ok(Canonical::Decimal(array.clone()))
    }

    fn append_to_builder(array: &DecimalArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
        builder.extend_from_array(array.as_ref())
    }
}

impl ValidityHelper for DecimalArray {
    fn validity(&self) -> &Validity {
        &self.validity
    }
}

#[cfg(test)]
mod test {
    use arrow_array::Decimal128Array;

    #[test]
    fn test_decimal() {
        // They pass it b/c the DType carries the information. No other way to carry a
        // dtype except via the array.
        let value = Decimal128Array::new_null(100);
        let numeric = value.value(10);
        assert_eq!(numeric, 0i128);
    }
}

1	// SPDX-License-Identifier: Apache-2.0
2	// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4	mod compute;
5	mod ops;
6	mod patch;
7	mod serde;
8
9	use arrow_buffer::BooleanBufferBuilder;
10	use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
11	use vortex_dtype::{DType, DecimalDType};
12	use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
13	use vortex_scalar::{DecimalValueType, NativeDecimalType};
14
15	use crate::builders::ArrayBuilder;
16	use crate::stats::{ArrayStats, StatsSetRef};
17	use crate::validity::Validity;
18	use crate::vtable::{
19	ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
20	ValidityVTableFromValidityHelper, VisitorVTable,
21	};
22	use crate::{ArrayBufferVisitor, ArrayChildVisitor, Canonical, EncodingId, EncodingRef, vtable};
23
24	vtable!(Decimal);
25
26	impl VTable for DecimalVTable {
27	type Array = DecimalArray;
28	type Encoding = DecimalEncoding;
29
30	type ArrayVTable = Self;
31	type CanonicalVTable = Self;
32	type OperationsVTable = Self;
33	type ValidityVTable = ValidityVTableFromValidityHelper;
34	type VisitorVTable = Self;
35	type ComputeVTable = NotSupported;
36	type EncodeVTable = NotSupported;
37	type SerdeVTable = Self;
38
39	fn id(_encoding: &Self::Encoding) -> EncodingId {	515,501✔
40	EncodingId::new_ref("vortex.decimal")	515,501✔
41	}	515,501✔
42
43	fn encoding(_array: &Self::Array) -> EncodingRef {	8,282✔
44	EncodingRef::new_ref(DecimalEncoding.as_ref())	8,282✔
45	}	8,282✔
46	}
47
48	#[derive(Clone, Debug)]
49	pub struct DecimalEncoding;
50
51	/// Maps a decimal precision into the smallest type that can represent it.
52	pub fn smallest_storage_type(decimal_dtype: &DecimalDType) -> DecimalValueType {	2,673✔
53	match decimal_dtype.precision() {	2,673✔
54	1..=2 => DecimalValueType::I8,	2,673✔
55	3..=4 => DecimalValueType::I16,	2,517✔
56	5..=9 => DecimalValueType::I32,	2,283✔
57	10..=18 => DecimalValueType::I64,	1,839✔
58	19..=38 => DecimalValueType::I128,	675✔
59	39..=76 => DecimalValueType::I256,	×
60	0 => unreachable!("precision must be greater than 0"),	×
61	p => unreachable!("precision larger than 76 is invalid found precision {p}"),	×
62	}
63	}	2,673✔
64
65	/// True if `value_type` can represent every value of the type `dtype`.
66	pub fn compatible_storage_type(value_type: DecimalValueType, dtype: DecimalDType) -> bool {	39✔
67	value_type >= smallest_storage_type(&dtype)	39✔
68	}	39✔
69
70	/// A decimal array that stores fixed-precision decimal numbers with configurable scale.
71	///
72	/// This mirrors the Apache Arrow Decimal encoding and provides exact arithmetic for
73	/// financial and scientific computations where floating-point precision loss is unacceptable.
74	///
75	/// ## Storage Format
76	///
77	/// Decimals are stored as scaled integers in a supported scalar value type.
78	///
79	/// The precisions supported for each scalar type are:
80	/// - i8: precision 1-2 digits
81	/// - i16: precision 3-4 digits
82	/// - i32: precision 5-9 digits
83	/// - i64: precision 10-18 digits
84	/// - i128: precision 19-38 digits
85	/// - i256: precision 39-76 digits
86	///
87	/// These are just the maximal ranges for each scalar type, but it is perfectly legal to store
88	/// values with precision that does not match this exactly. For example, a valid DecimalArray with
89	/// precision=39 may store its values in an `i8` if all of the actual values fit into it.
90	///
91	/// Similarly, a `DecimalArray` can be built that stores a set of precision=2 values in a
92	/// `Buffer<i256>`.
93	///
94	/// ## Precision and Scale
95	///
96	/// - Precision: Total number of significant digits (1-76, u8 range)
97	/// - Scale: Number of digits after the decimal point (-128 to 127, i8 range)
98	/// - Value: `stored_integer / 10^scale`
99	///
100	/// For example, with precision=5 and scale=2:
101	/// - Stored value 12345 represents 123.45
102	/// - Range: -999.99 to 999.99
103	///
104	/// ## Valid Scalar Types
105	///
106	/// The underlying storage uses these native types based on precision:
107	/// - `DecimalValueType::I8`, `I16`, `I32`, `I64`, `I128`, `I256`
108	/// - Type selection is automatic based on the required precision
109	///
110	/// # Examples
111	///
112	/// ```
113	/// use vortex_array::arrays::DecimalArray;
114	/// use vortex_dtype::DecimalDType;
115	/// use vortex_buffer::{buffer, Buffer};
116	/// use vortex_array::validity::Validity;
117	///
118	/// // Create a decimal array with precision=5, scale=2 (e.g., 123.45)
119	/// let decimal_dtype = DecimalDType::new(5, 2);
120	/// let values = buffer![12345i32, 67890i32, -12300i32]; // 123.45, 678.90, -123.00
121	/// let array = DecimalArray::new(values, decimal_dtype, Validity::NonNullable);
122	///
123	/// assert_eq!(array.precision(), 5);
124	/// assert_eq!(array.scale(), 2);
125	/// assert_eq!(array.len(), 3);
126	/// ```
127	#[derive(Clone, Debug)]
128	pub struct DecimalArray {
129	dtype: DType,
130	values: ByteBuffer,
131	values_type: DecimalValueType,
132	validity: Validity,
133	stats_set: ArrayStats,
134	}
135
136	impl DecimalArray {
137	fn validate<T: NativeDecimalType>(buffer: &Buffer<T>, validity: &Validity) -> VortexResult<()> {	8,178✔
138	if let Some(len) = validity.maybe_len() {	8,178✔
139	vortex_ensure!(	774✔
140	buffer.len() == len,	774✔
NEW 141	"Buffer and validity length mismatch: buffer={}, validity={}",	×
NEW 142	buffer.len(),	×
143	len,
144	);
145	}	7,404✔
146
147	Ok(())	8,178✔
148	}	8,178✔
149	}
150
151	impl DecimalArray {
152	/// Creates a new [`DecimalArray`] from a [`Buffer`] and [`Validity`], without checking
153	/// any invariants.
154	///
155	/// # Panics
156	///
157	/// Panics if the provided buffer and validity differ in length.
158	///
159	/// See also [`DecimalArray::try_new`].
160	pub fn new<T: NativeDecimalType>(	8,177✔
161	buffer: Buffer<T>,	8,177✔
162	decimal_dtype: DecimalDType,	8,177✔
163	validity: Validity,	8,177✔
164	) -> Self {	8,177✔
165	Self::try_new(buffer, decimal_dtype, validity).vortex_expect("DecimalArray new")	8,177✔
166	}	8,177✔
167
168	/// Build a new `DecimalArray` from a component `buffer`, decimal_dtype` and `validity`.
169	///
170	/// This constructor validates the length of the buffer and validity are equal, returning
171	/// an error otherwise.
172	///
173	/// See [`DecimalArray::new`] for an infallible constructor that panics on validation errors.
174	pub fn try_new<T: NativeDecimalType>(	8,178✔
175	buffer: Buffer<T>,	8,178✔
176	decimal_dtype: DecimalDType,	8,178✔
177	validity: Validity,	8,178✔
178	) -> VortexResult<Self> {	8,178✔
179	Self::validate(&buffer, &validity)?;	8,178✔
180
181	Ok(Self {	8,178✔
182	values: buffer.into_byte_buffer(),	8,178✔
183	values_type: T::VALUES_TYPE,	8,178✔
184	dtype: DType::Decimal(decimal_dtype, validity.nullability()),	8,178✔
185	validity,	8,178✔
186	stats_set: Default::default(),	8,178✔
187	})	8,178✔
188	}	8,178✔
189
190	/// Returns the underlying [`ByteBuffer`] of the array.
191	pub fn byte_buffer(&self) -> ByteBuffer {	20✔
192	self.values.clone()	20✔
193	}	20✔
194
195	pub fn buffer<T: NativeDecimalType>(&self) -> Buffer<T> {	11,767✔
196	if self.values_type != T::VALUES_TYPE {	11,767✔
197	vortex_panic!(	×
198	"Cannot extract Buffer<{:?}> for DecimalArray with values_type {:?}",	×
199	T::VALUES_TYPE,
200	self.values_type,
201	);
202	}	11,767✔
203	Buffer::<T>::from_byte_buffer(self.values.clone())	11,767✔
204	}	11,767✔
205
206	/// Returns the decimal type information
207	pub fn decimal_dtype(&self) -> DecimalDType {	14,379✔
208	match &self.dtype {	14,379✔
209	DType::Decimal(decimal_dtype, _) => *decimal_dtype,	14,379✔
210	_ => vortex_panic!("Expected Decimal dtype, got {:?}", self.dtype),	×
211	}
212	}	14,379✔
213
214	pub fn values_type(&self) -> DecimalValueType {	11,220✔
215	self.values_type	11,220✔
216	}	11,220✔
217
218	pub fn precision(&self) -> u8 {	×
219	self.decimal_dtype().precision()	×
220	}	×
221
222	pub fn scale(&self) -> i8 {	×
223	self.decimal_dtype().scale()	×
224	}	×
225
226	pub fn from_option_iter<T: NativeDecimalType, I: IntoIterator<Item = Option<T>>>(	4✔
227	iter: I,	4✔
228	decimal_dtype: DecimalDType,	4✔
229	) -> Self {	4✔
230	let iter = iter.into_iter();	4✔
231	let mut values = BufferMut::with_capacity(iter.size_hint().0);	4✔
232	let mut validity = BooleanBufferBuilder::new(values.capacity());	4✔
233
234	for i in iter {	20✔
235	match i {	16✔
236	None => {	4✔
237	validity.append(false);	4✔
238	values.push(T::default());	4✔
239	}	4✔
240	Some(e) => {	12✔
241	validity.append(true);	12✔
242	values.push(e);	12✔
243	}	12✔
244	}
245	}
246	Self::new(	4✔
247	values.freeze(),	4✔
248	decimal_dtype,	4✔
249	Validity::from(validity.finish()),	4✔
250	)
251	}	4✔
252	}
253
254	impl ArrayVTable<DecimalVTable> for DecimalVTable {
255	fn len(array: &DecimalArray) -> usize {	83,922✔
256	let divisor = match array.values_type {	83,922✔
257	DecimalValueType::I8 => 1,	8,192✔
258	DecimalValueType::I16 => 2,	6,508✔
259	DecimalValueType::I32 => 4,	11,824✔
260	DecimalValueType::I64 => 8,	10,885✔
261	DecimalValueType::I128 => 16,	46,501✔
262	DecimalValueType::I256 => 32,	12✔
263	ty => vortex_panic!("unknown decimal value type {:?}", ty),	×
264	};
265	array.values.len() / divisor	83,922✔
266	}	83,922✔
267
268	fn dtype(array: &DecimalArray) -> &DType {	84,836✔
269	&array.dtype	84,836✔
270	}	84,836✔
271
272	fn stats(array: &DecimalArray) -> StatsSetRef<'_> {	47,281✔
273	array.stats_set.to_ref(array.as_ref())	47,281✔
274	}	47,281✔
275	}
276
277	impl VisitorVTable<DecimalVTable> for DecimalVTable {
278	fn visit_buffers(array: &DecimalArray, visitor: &mut dyn ArrayBufferVisitor) {	5,297✔
279	visitor.visit_buffer(&array.values);	5,297✔
280	}	5,297✔
281
282	fn visit_children(array: &DecimalArray, visitor: &mut dyn ArrayChildVisitor) {	5,376✔
283	visitor.visit_validity(array.validity(), array.len())	5,376✔
284	}	5,376✔
285	}
286
287	impl CanonicalVTable<DecimalVTable> for DecimalVTable {
288	fn canonicalize(array: &DecimalArray) -> VortexResult<Canonical> {	8,629✔
289	Ok(Canonical::Decimal(array.clone()))	8,629✔
290	}	8,629✔
291
292	fn append_to_builder(array: &DecimalArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {	990✔
293	builder.extend_from_array(array.as_ref())	990✔
294	}	990✔
295	}
296
297	impl ValidityHelper for DecimalArray {
298	fn validity(&self) -> &Validity {	19,358✔
299	&self.validity	19,358✔
300	}	19,358✔
301	}
302
303	#[cfg(test)]
304	mod test {
305	use arrow_array::Decimal128Array;
306
307	#[test]
308	fn test_decimal() {	1✔
309	// They pass it b/c the DType carries the information. No other way to carry a
310	// dtype except via the array.
311	let value = Decimal128Array::new_null(100);	1✔
312	let numeric = value.value(10);	1✔
313	assert_eq!(numeric, 0i128);	1✔
314	}	1✔
315	}

vortex-data / vortex / 17042426005

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous