• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vortex-data / vortex / 16606450133

29 Jul 2025 08:14PM UTC coverage: 82.71% (+0.03%) from 82.684%
16606450133

Pull #4057

github

web-flow
Merge 9b2c80886 into 6fb0f3e49
Pull Request #4057: feat: `ArrayEquals` kernel

363 of 424 new or added lines in 2 files covered. (85.61%)

48 existing lines in 1 file now uncovered.

45580 of 55108 relevant lines covered (82.71%)

184996.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.92
/vortex-array/src/compute/array_equals.rs
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3

4
use std::any::Any;
5
use std::sync::LazyLock;
6

7
use arcref::ArcRef;
8
use vortex_dtype::{DType, Nullability};
9
use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
10
use vortex_scalar::Scalar;
11

12
use crate::Array;
13
use crate::arrays::ConstantArray;
14
use crate::compute::{
15
    ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Operator, Options, Output, compare,
16
};
17
use crate::stats::{Precision, Stat, StatsProvider};
18
use crate::vtable::VTable;
19

20
pub fn array_equals(left: &dyn Array, right: &dyn Array) -> VortexResult<bool> {
33✔
21
    array_equals_opts(left, right, false)
33✔
22
}
33✔
23

24
pub fn array_equals_opts(
37✔
25
    left: &dyn Array,
37✔
26
    right: &dyn Array,
37✔
27
    ignore_nullability: bool,
37✔
28
) -> VortexResult<bool> {
37✔
29
    Ok(ARRAY_EQUALS_FN
37✔
30
        .invoke(&InvocationArgs {
37✔
31
            inputs: &[left.into(), right.into()],
37✔
32
            options: &ArrayEqualsOptions {
37✔
33
                ignore_nullability,
37✔
34
                batch_size: None,
37✔
35
            },
37✔
36
        })?
37✔
37
        .unwrap_scalar()?
37✔
38
        .as_bool()
37✔
39
        .value()
37✔
40
        .vortex_expect("non-nullable"))
37✔
41
}
37✔
42

43
#[derive(Clone, Copy)]
44
struct ArrayEqualsOptions {
45
    ignore_nullability: bool,
46
    batch_size: Option<usize>,
47
}
48

49
impl Options for ArrayEqualsOptions {
50
    fn as_any(&self) -> &dyn Any {
37✔
51
        self
37✔
52
    }
37✔
53
}
54

55
pub static ARRAY_EQUALS_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
16✔
56
    let compute = ComputeFn::new("array_equals".into(), ArcRef::new_ref(&ArrayEquals));
16✔
57
    for kernel in inventory::iter::<ArrayEqualsKernelRef> {
16✔
NEW
58
        compute.register_kernel(kernel.0.clone());
×
NEW
59
    }
×
60
    compute
16✔
61
});
16✔
62

63
struct ArrayEquals;
64
impl ComputeFnVTable for ArrayEquals {
65
    fn invoke(
37✔
66
        &self,
37✔
67
        args: &InvocationArgs,
37✔
68
        kernels: &[ArcRef<dyn Kernel>],
37✔
69
    ) -> VortexResult<Output> {
37✔
70
        let ArrayEqualsArgs {
71
            left,
37✔
72
            right,
37✔
73
            ignore_nullability,
37✔
74
            batch_size,
37✔
75
        } = ArrayEqualsArgs::try_from(args)?;
37✔
76

77
        if ignore_nullability && !left.dtype().eq_ignore_nullability(right.dtype()) {
37✔
NEW
78
            return Ok(Scalar::from(false).into());
×
79
        }
37✔
80

81
        if !ignore_nullability && !left.dtype().eq(right.dtype()) {
37✔
82
            return Ok(Scalar::from(false).into());
3✔
83
        }
34✔
84

85
        if left.len() != right.len() {
34✔
86
            return Ok(Scalar::from(false).into());
1✔
87
        }
33✔
88

89
        // Early return for empty arrays - they're equal regardless of type
90
        if left.is_empty() {
33✔
91
            return Ok(Scalar::from(true).into());
1✔
92
        }
32✔
93

94
        // Handle constant array comparisons
95
        match (left.as_constant(), right.as_constant()) {
32✔
96
            (Some(l_scalar), Some(r_scalar)) => {
8✔
97
                // Both are constants - compare scalars directly
98
                return Ok(Scalar::from(l_scalar.eq(&r_scalar)).into());
8✔
99
            }
100
            (Some(constant), None) | (None, Some(constant)) => {
2✔
101
                // One is constant, one is not - they can only be equal if all elements
102
                // of the non-constant array equal the constant
103
                let non_constant_array = if left.as_constant().is_some() {
4✔
104
                    right
2✔
105
                } else {
106
                    left
2✔
107
                };
108

109
                // Quick check using statistics
110
                if constant.is_null() {
4✔
111
                    // All elements must be null for equality
NEW
112
                    if let Some(Precision::Exact(null_count_value)) =
×
113
                        non_constant_array.statistics().get(Stat::NullCount)
2✔
114
                    {
NEW
115
                        let null_count_scalar = Scalar::new(
×
NEW
116
                            DType::Primitive(vortex_dtype::PType::U64, Nullability::NonNullable),
×
NEW
117
                            null_count_value,
×
118
                        );
NEW
119
                        if let Ok(Some(count)) = null_count_scalar.as_primitive().as_::<usize>() {
×
NEW
120
                            return Ok(Scalar::from(count == non_constant_array.len()).into());
×
NEW
121
                        }
×
122
                    }
2✔
123
                } else {
124
                    // Non-null constant - check if min/max statistics can rule out equality
125
                    let stats = non_constant_array.statistics();
2✔
NEW
126
                    if let (Some(Precision::Exact(min)), Some(Precision::Exact(max))) =
×
127
                        (stats.get(Stat::Min), stats.get(Stat::Max))
2✔
128
                    {
NEW
129
                        let min_scalar = Scalar::new(non_constant_array.dtype().clone(), min);
×
NEW
130
                        let max_scalar = Scalar::new(non_constant_array.dtype().clone(), max);
×
NEW
131
                        if !constant.eq(&min_scalar) || !constant.eq(&max_scalar) {
×
NEW
132
                            return Ok(Scalar::from(false).into());
×
NEW
133
                        }
×
134
                    }
2✔
135
                }
136

137
                // Use compare function to check if all elements equal the constant
138
                // Create a constant array of the same length for comparison
139
                let constant_array = ConstantArray::new(constant, non_constant_array.len());
4✔
140
                let compare_result =
4✔
141
                    compare(non_constant_array, constant_array.as_ref(), Operator::Eq)?;
4✔
142

143
                // Check if all comparison results are true (all elements equal the constant)
144
                if let Some(all_equal) = check_constant_result(&compare_result)? {
4✔
145
                    return Ok(Scalar::from(all_equal).into());
2✔
146
                }
2✔
147

148
                // Check via statistics if possible
149
                if let Some(all_true) = check_comparison_stats(&compare_result) {
2✔
NEW
150
                    return Ok(Scalar::from(all_true).into());
×
151
                }
2✔
152

153
                // Fall through to general case handling below
154
            }
155
            (None, None) => {
20✔
156
                // Neither is constant - continue with general algorithm
20✔
157
            }
20✔
158
        }
159

160
        // Check statistics for early exit
161
        // TODO(optimization): Add more sophisticated statistical comparisons for floating point arrays
162
        if !check_stats_equality(left, right) {
22✔
163
            return Ok(Scalar::from(false).into());
2✔
164
        }
20✔
165

166
        let args = InvocationArgs {
20✔
167
            inputs: &[left.into(), right.into()],
20✔
168
            options: &ArrayEqualsOptions {
20✔
169
                ignore_nullability,
20✔
170
                batch_size,
20✔
171
            },
20✔
172
        };
20✔
173

174
        for kernel in kernels {
20✔
NEW
175
            if let Some(output) = kernel.invoke(&args)? {
×
NEW
176
                return Ok(output);
×
NEW
177
            }
×
178
        }
179

180
        if let Some(output) = left.invoke(&ARRAY_EQUALS_FN, &args)? {
20✔
NEW
181
            return Ok(output);
×
182
        }
20✔
183

184
        // Try swapping arguments
185
        let swapped_args = InvocationArgs {
20✔
186
            inputs: &[right.into(), left.into()],
20✔
187
            options: &ArrayEqualsOptions {
20✔
188
                ignore_nullability,
20✔
189
                batch_size,
20✔
190
            },
20✔
191
        };
20✔
192
        if let Some(output) = right.invoke(&ARRAY_EQUALS_FN, &swapped_args)? {
20✔
NEW
193
            return Ok(output);
×
194
        }
20✔
195

196
        // Try canonical arrays if not already canonical
197
        if !left.is_canonical() || !right.is_canonical() {
20✔
198
            log::debug!(
4✔
NEW
199
                "Falling back to canonical array_equals for encodings {} and {}",
×
NEW
200
                left.encoding_id(),
×
NEW
201
                right.encoding_id()
×
202
            );
203

204
            let left_canonical = left.to_canonical()?;
4✔
205
            let right_canonical = right.to_canonical()?;
4✔
206

207
            return Ok(Scalar::from(array_equals_opts(
4✔
208
                left_canonical.as_ref(),
4✔
209
                right_canonical.as_ref(),
4✔
210
                ignore_nullability,
4✔
NEW
211
            )?)
×
212
            .into());
4✔
213
        }
16✔
214

215
        // Final fallback to chunked comparison for canonical arrays
216
        log::debug!(
16✔
NEW
217
            "Using chunked comparison fallback for canonical arrays {} and {}",
×
NEW
218
            left.encoding_id(),
×
NEW
219
            right.encoding_id()
×
220
        );
221

222
        let all_equal = compare_chunked(left, right, batch_size)?;
16✔
223
        Ok(Scalar::from(all_equal).into())
16✔
224
    }
37✔
225

226
    fn return_dtype(&self, _args: &InvocationArgs) -> VortexResult<DType> {
37✔
227
        Ok(DType::Bool(Nullability::NonNullable))
37✔
228
    }
37✔
229

230
    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
37✔
231
        Ok(1)
37✔
232
    }
37✔
233

234
    fn is_elementwise(&self) -> bool {
39✔
235
        false
39✔
236
    }
39✔
237
}
238

239
// todo: statistics
240
pub trait ArrayEqualsKernel: VTable {
241
    fn compare_array(
242
        &self,
243
        array: &Self::Array,
244
        other: &dyn Array,
245
        ignore_nullability: bool,
246
    ) -> VortexResult<Option<bool>>;
247
}
248

249
struct ArrayEqualsArgs<'a> {
250
    left: &'a dyn Array,
251
    right: &'a dyn Array,
252
    ignore_nullability: bool,
253
    batch_size: Option<usize>,
254
}
255

256
impl<'a> TryFrom<&InvocationArgs<'a>> for ArrayEqualsArgs<'a> {
257
    type Error = VortexError;
258

259
    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
37✔
260
        if value.inputs.len() != 2 {
37✔
NEW
261
            vortex_bail!(
×
NEW
262
                "ArrayEquals function requires two arguments, got {}",
×
NEW
263
                value.inputs.len()
×
264
            );
265
        }
37✔
266
        let left = value.inputs[0]
37✔
267
            .array()
37✔
268
            .ok_or_else(|| vortex_err!("First argument must be an array"))?;
37✔
269

270
        let right = value.inputs[1]
37✔
271
            .array()
37✔
272
            .ok_or_else(|| vortex_err!("Second argument must be an array"))?;
37✔
273

274
        let options = value
37✔
275
            .options
37✔
276
            .as_any()
37✔
277
            .downcast_ref::<ArrayEqualsOptions>()
37✔
278
            .ok_or_else(|| vortex_err!("Invalid options type for array equals function"))?;
37✔
279

280
        Ok(ArrayEqualsArgs {
37✔
281
            left,
37✔
282
            right,
37✔
283
            ignore_nullability: options.ignore_nullability,
37✔
284
            batch_size: options.batch_size,
37✔
285
        })
37✔
286
    }
37✔
287
}
288

289
#[derive(Debug)]
290
pub struct ArrayEqualsKernelAdapter<V: VTable>(pub V);
291

292
pub struct ArrayEqualsKernelRef(ArcRef<dyn Kernel>);
293
inventory::collect!(ArrayEqualsKernelRef);
294

295
impl<V: VTable + ArrayEqualsKernel> ArrayEqualsKernelAdapter<V> {
NEW
296
    pub const fn lift(&'static self) -> ArrayEqualsKernelRef {
×
NEW
297
        ArrayEqualsKernelRef(ArcRef::new_ref(self))
×
NEW
298
    }
×
299
}
300

301
impl<V: VTable + ArrayEqualsKernel> Kernel for ArrayEqualsKernelAdapter<V> {
NEW
302
    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
×
303
        let ArrayEqualsArgs {
NEW
304
            left,
×
NEW
305
            right,
×
NEW
306
            ignore_nullability,
×
307
            batch_size: _, // Not used in kernel adapters
NEW
308
        } = ArrayEqualsArgs::try_from(args)?;
×
309

NEW
310
        let Some(left) = left.as_opt::<V>() else {
×
NEW
311
            return Ok(None);
×
312
        };
313

NEW
314
        let is_equal = V::compare_array(&self.0, left, right, ignore_nullability)?;
×
NEW
315
        Ok(is_equal.map(|b| Scalar::from(b).into()))
×
NEW
316
    }
×
317
}
318

319
/// Compare arrays in chunks to avoid loading entire arrays into memory
320
fn compare_chunked(
16✔
321
    left: &dyn Array,
16✔
322
    right: &dyn Array,
16✔
323
    batch_size: Option<usize>,
16✔
324
) -> VortexResult<bool> {
16✔
325
    const DEFAULT_BATCH_SIZE: usize = 65536; // 64K elements per batch
326
    let batch_size = batch_size.unwrap_or(DEFAULT_BATCH_SIZE);
16✔
327

328
    let mut offset = 0;
16✔
329
    while offset < left.len() {
28✔
330
        let end = (offset + batch_size).min(left.len());
20✔
331

332
        let left_slice = left.slice(offset, end)?;
20✔
333
        let right_slice = right.slice(offset, end)?;
20✔
334

335
        if !compare_batch(&left_slice, &right_slice)? {
20✔
336
            return Ok(false);
8✔
337
        }
12✔
338

339
        offset = end;
12✔
340
    }
341

342
    Ok(true)
8✔
343
}
16✔
344

345
/// Compare a single batch of arrays
346
fn compare_batch(left: &dyn Array, right: &dyn Array) -> VortexResult<bool> {
20✔
347
    let compare_result = compare(left, right, Operator::Eq)?;
20✔
348

349
    // Check if the comparison result indicates all equal
350
    if let Some(all_equal) = check_constant_result(&compare_result)? {
20✔
351
        return Ok(all_equal);
12✔
352
    }
8✔
353

354
    // Not constant - need to check each value
355
    check_non_constant_result(&compare_result, left, right)
8✔
356
}
20✔
357

358
/// Check if a constant comparison result indicates equality
359
fn check_constant_result(compare_result: &dyn Array) -> VortexResult<Option<bool>> {
24✔
360
    if let Some(constant_scalar) = compare_result.as_constant() {
24✔
361
        // If constant is true, all are equal
362
        Ok(Some(
363
            constant_scalar.is_valid() && constant_scalar.as_bool().value() == Some(true),
14✔
364
        ))
365
    } else {
366
        Ok(None)
10✔
367
    }
368
}
24✔
369

370
/// Check non-constant comparison results, handling null comparisons
371
fn check_non_constant_result(
8✔
372
    compare_result: &dyn Array,
8✔
373
    left: &dyn Array,
8✔
374
    right: &dyn Array,
8✔
375
) -> VortexResult<bool> {
8✔
376
    // First, check statistics for quick rejection
377
    if let Some(all_true) = check_comparison_stats(compare_result) {
8✔
NEW
378
        return Ok(all_true);
×
379
    }
8✔
380

381
    // Fallback to element-wise check
382
    for i in 0..compare_result.len() {
34,491✔
383
        let cmp_scalar = compare_result.scalar_at(i)?;
34,491✔
384

385
        // Check for definite inequality
386
        if cmp_scalar.is_valid() && cmp_scalar.as_bool().value() == Some(false) {
34,491✔
387
            return Ok(false);
6✔
388
        }
34,485✔
389

390
        // Handle null comparison results
391
        if cmp_scalar.is_null() && !check_null_equality(left, right, i)? {
34,485✔
392
            return Ok(false);
1✔
393
        }
34,484✔
394
    }
395

396
    Ok(true)
1✔
397
}
8✔
398

399
/// Check comparison statistics for quick determination
400
fn check_comparison_stats(compare_result: &dyn Array) -> Option<bool> {
10✔
401
    // If min is false, we have at least one false
402
    if let Some(Precision::Exact(min)) = compare_result.statistics().get(Stat::Min) {
10✔
NEW
403
        if min.as_bool().ok()? == Some(false) {
×
NEW
404
            return Some(false);
×
NEW
405
        }
×
406
    }
10✔
407

408
    // If both min and max are true, all are true
409
    if let Some(Precision::Exact(min)) = compare_result.statistics().get(Stat::Min) {
10✔
NEW
410
        if let Some(Precision::Exact(max)) = compare_result.statistics().get(Stat::Max) {
×
NEW
411
            if min.as_bool().ok()? == Some(true) && max.as_bool().ok()? == Some(true) {
×
NEW
412
                return Some(true);
×
NEW
413
            }
×
NEW
414
        }
×
415
    }
10✔
416

417
    None
10✔
418
}
10✔
419

420
/// Check if two potentially null values at a given index are equal
421
fn check_null_equality(left: &dyn Array, right: &dyn Array, index: usize) -> VortexResult<bool> {
2✔
422
    let left_val = left.scalar_at(index)?;
2✔
423
    let right_val = right.scalar_at(index)?;
2✔
424

425
    // Both null or both non-null means they could be equal
426
    // (if both non-null, the comparison would have returned true/false, not null)
427
    Ok(left_val.is_null() == right_val.is_null())
2✔
428
}
2✔
429

430
/// Check statistics equality for early exit
431
fn check_stats_equality(left: &dyn Array, right: &dyn Array) -> bool {
22✔
432
    let stats_to_check = [
22✔
433
        Stat::IsConstant,
22✔
434
        Stat::IsSorted,
22✔
435
        Stat::IsStrictSorted,
22✔
436
        Stat::Max,
22✔
437
        Stat::Min,
22✔
438
        Stat::Sum,
22✔
439
        Stat::NullCount,
22✔
440
        Stat::NaNCount,
22✔
441
    ];
22✔
442

443
    for stat in stats_to_check {
182✔
444
        match (left.statistics().get(stat), right.statistics().get(stat)) {
162✔
445
            (Some(Precision::Exact(left_v)), Some(Precision::Exact(right_v))) => {
22✔
446
                if !left_v.eq(&right_v) {
22✔
447
                    return false;
2✔
448
                }
20✔
449
            }
450
            _ => continue,
140✔
451
        }
452
    }
453

454
    true
20✔
455
}
22✔
456

457
#[cfg(test)]
458
mod tests {
459
    use super::*;
460
    use crate::IntoArray;
461
    use crate::arrays::{BoolArray, ChunkedArray, ConstantArray, PrimitiveArray, VarBinArray};
462
    use crate::validity::Validity;
463
    use vortex_dtype::{DType, Nullability, PType};
464

465
    #[test]
466
    fn test_simple_equals() {
1✔
467
        let arr1 = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4, 5]);
1✔
468
        let arr2 = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4, 5]);
1✔
469
        let arr3 = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4, 6]);
1✔
470

471
        assert!(array_equals(arr1.as_ref(), arr2.as_ref()).unwrap());
1✔
472
        assert!(!array_equals(arr1.as_ref(), arr3.as_ref()).unwrap());
1✔
473
    }
1✔
474

475
    #[test]
476
    fn test_stats_comparison() {
1✔
477
        // Arrays with different stats should be detected as different early
478
        let arr1 = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4, 5]);
1✔
479
        let arr2 = PrimitiveArray::from_iter(vec![10i32, 20, 30, 40, 50]);
1✔
480

481
        assert!(!array_equals(arr1.as_ref(), arr2.as_ref()).unwrap());
1✔
482
    }
1✔
483

484
    #[test]
485
    fn test_constant_arrays() {
1✔
486
        let const1 = ConstantArray::new(Scalar::from(42i32), 100);
1✔
487
        let const2 = ConstantArray::new(Scalar::from(42i32), 100);
1✔
488
        let const3 = ConstantArray::new(Scalar::from(43i32), 100);
1✔
489

490
        assert!(array_equals(const1.as_ref(), const2.as_ref()).unwrap());
1✔
491
        assert!(!array_equals(const1.as_ref(), const3.as_ref()).unwrap());
1✔
492
    }
1✔
493

494
    #[test]
495
    fn test_different_types() {
1✔
496
        let int_arr = PrimitiveArray::from_iter(vec![1i32, 2, 3]);
1✔
497
        let float_arr = PrimitiveArray::from_iter(vec![1.0f32, 2.0, 3.0]);
1✔
498

499
        assert!(!array_equals(int_arr.as_ref(), float_arr.as_ref()).unwrap());
1✔
500
    }
1✔
501

502
    #[test]
503
    fn test_with_nulls() {
1✔
504
        let arr1 = PrimitiveArray::from_option_iter(vec![Some(1i32), None, Some(3), Some(4)]);
1✔
505
        let arr2 = PrimitiveArray::from_option_iter(vec![Some(1i32), None, Some(3), Some(4)]);
1✔
506
        let arr3 = PrimitiveArray::from_option_iter(vec![Some(1i32), Some(2), Some(3), Some(4)]);
1✔
507

508
        assert!(array_equals(arr1.as_ref(), arr2.as_ref()).unwrap());
1✔
509
        assert!(!array_equals(arr1.as_ref(), arr3.as_ref()).unwrap());
1✔
510
    }
1✔
511

512
    #[test]
513
    fn test_null_arrays() {
1✔
514
        let arr1 = PrimitiveArray::from_option_iter(vec![None::<i32>, None, None]);
1✔
515
        let arr2 = PrimitiveArray::from_option_iter(vec![None::<i32>, None, None]);
1✔
516

517
        assert!(array_equals(arr1.as_ref(), arr2.as_ref()).unwrap());
1✔
518
    }
1✔
519

520
    #[test]
521
    fn test_bool_arrays() {
1✔
522
        use arrow_buffer::BooleanBuffer;
523

524
        let arr1 = BoolArray::new(
1✔
525
            BooleanBuffer::from_iter([true, false, true, false]),
1✔
526
            Validity::AllValid,
1✔
527
        );
528
        let arr2 = BoolArray::new(
1✔
529
            BooleanBuffer::from_iter([true, false, true, false]),
1✔
530
            Validity::AllValid,
1✔
531
        );
532
        let arr3 = BoolArray::new(
1✔
533
            BooleanBuffer::from_iter([true, false, false, false]),
1✔
534
            Validity::AllValid,
1✔
535
        );
536

537
        assert!(array_equals(arr1.as_ref(), arr2.as_ref()).unwrap());
1✔
538
        assert!(!array_equals(arr1.as_ref(), arr3.as_ref()).unwrap());
1✔
539
    }
1✔
540

541
    #[test]
542
    fn test_empty_arrays() {
1✔
543
        let empty1 = PrimitiveArray::from_iter(Vec::<i32>::new());
1✔
544
        let empty2 = PrimitiveArray::from_iter(Vec::<i32>::new());
1✔
545

546
        assert!(array_equals(empty1.as_ref(), empty2.as_ref()).unwrap());
1✔
547
    }
1✔
548

549
    #[test]
550
    fn test_different_lengths() {
1✔
551
        let arr1 = PrimitiveArray::from_iter(vec![1i32, 2, 3]);
1✔
552
        let arr2 = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]);
1✔
553

554
        assert!(!array_equals(arr1.as_ref(), arr2.as_ref()).unwrap());
1✔
555
    }
1✔
556

557
    #[test]
558
    fn test_large_arrays() {
1✔
559
        // Test arrays larger than BATCH_SIZE
560
        let data1: Vec<i64> = (0..100_000).collect();
1✔
561
        let data2: Vec<i64> = (0..100_000).collect();
1✔
562
        let mut data3 = data1.clone();
1✔
563
        data3[99_999] = 999_999;
1✔
564

565
        let arr1 = PrimitiveArray::from_iter(data1);
1✔
566
        let arr2 = PrimitiveArray::from_iter(data2);
1✔
567
        let arr3 = PrimitiveArray::from_iter(data3);
1✔
568

569
        assert!(array_equals(arr1.as_ref(), arr2.as_ref()).unwrap());
1✔
570
        assert!(!array_equals(arr1.as_ref(), arr3.as_ref()).unwrap());
1✔
571
    }
1✔
572

573
    #[test]
574
    fn test_non_canonical_arrays() {
1✔
575
        let varbin1 = VarBinArray::from_vec(
1✔
576
            vec!["hello".as_bytes(), "world".as_bytes()],
1✔
577
            DType::Utf8(Nullability::NonNullable),
1✔
578
        );
579
        let varbin2 = VarBinArray::from_vec(
1✔
580
            vec!["hello".as_bytes(), "world".as_bytes()],
1✔
581
            DType::Utf8(Nullability::NonNullable),
1✔
582
        );
583
        let varbin3 = VarBinArray::from_vec(
1✔
584
            vec!["hello".as_bytes(), "earth".as_bytes()],
1✔
585
            DType::Utf8(Nullability::NonNullable),
1✔
586
        );
587

588
        assert!(array_equals(varbin1.as_ref(), varbin2.as_ref()).unwrap());
1✔
589
        assert!(!array_equals(varbin1.as_ref(), varbin3.as_ref()).unwrap());
1✔
590
    }
1✔
591

592
    #[test]
593
    fn test_float_precision() {
1✔
594
        // Test if statistics-based comparison can handle float precision issues
595
        let arr1 = PrimitiveArray::from_iter(vec![1.0f64, 2.0, 3.0, 4.0, 5.0]);
1✔
596
        let arr2 = PrimitiveArray::from_iter(vec![1.0f64, 2.0, 3.0, 4.0, 5.0]);
1✔
597

598
        // Arrays with exact same values should be equal
599
        assert!(array_equals(arr1.as_ref(), arr2.as_ref()).unwrap());
1✔
600

601
        // Arrays with slightly different values should not be equal
602
        let arr3 = PrimitiveArray::from_iter(vec![1.0f64, 2.0, 3.0, 4.0, 5.0000000001]);
1✔
603
        assert!(!array_equals(arr1.as_ref(), arr3.as_ref()).unwrap());
1✔
604
    }
1✔
605

606
    #[test]
607
    fn test_batch_size_functionality() {
1✔
608
        // Test arrays larger than default batch size with different batch sizes
609
        let data1: Vec<i32> = (0..150_000).collect();
1✔
610
        let data2: Vec<i32> = (0..150_000).collect();
1✔
611

612
        let arr1 = PrimitiveArray::from_iter(data1);
1✔
613
        let arr2 = PrimitiveArray::from_iter(data2);
1✔
614

615
        // Test with different batch sizes (though we can't pass batch_size directly in public API)
616
        assert!(array_equals(arr1.as_ref(), arr2.as_ref()).unwrap());
1✔
617
    }
1✔
618

619
    #[test]
620
    fn test_primitive_vs_dict_array() {
1✔
621
        // Test comparing primitive array with dictionary-encoded array containing same values
622

623
        let primitive_arr = PrimitiveArray::from_iter(vec![1i32, 2, 1, 3, 2, 1]);
1✔
624

625
        // Create a chunked array as a proxy for non-canonical encoding
626
        let chunk1 = PrimitiveArray::from_iter(vec![1i32, 2, 1]);
1✔
627
        let chunk2 = PrimitiveArray::from_iter(vec![3i32, 2, 1]);
1✔
628
        let chunked_arr = ChunkedArray::try_new(
1✔
629
            vec![chunk1.into_array(), chunk2.into_array()],
1✔
630
            primitive_arr.dtype().clone(),
1✔
631
        )
632
        .unwrap();
1✔
633

634
        // Should be equal as they contain the same logical values
635
        assert!(array_equals(primitive_arr.as_ref(), chunked_arr.as_ref()).unwrap());
1✔
636

637
        // Test with different values
638
        let chunk1_copy = PrimitiveArray::from_iter(vec![1i32, 2, 1]);
1✔
639
        let different_chunk2 = PrimitiveArray::from_iter(vec![3i32, 2, 4]);
1✔
640
        let different_chunked = ChunkedArray::try_new(
1✔
641
            vec![chunk1_copy.into_array(), different_chunk2.into_array()],
1✔
642
            primitive_arr.dtype().clone(),
1✔
643
        )
644
        .unwrap();
1✔
645

646
        assert!(!array_equals(primitive_arr.as_ref(), different_chunked.as_ref()).unwrap());
1✔
647
    }
1✔
648

649
    #[test]
650
    fn test_constant_null_arrays() {
1✔
651
        // Test constant null arrays - should be equal to each other but not to non-null constants
652
        let null_const1 = ConstantArray::new(
1✔
653
            Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable)),
1✔
654
            5,
655
        );
656
        let null_const2 = ConstantArray::new(
1✔
657
            Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable)),
1✔
658
            5,
659
        );
660
        let non_null_const = ConstantArray::new(Scalar::from(42i32), 5);
1✔
661

662
        // Both null constants should be equal
663
        assert!(array_equals(null_const1.as_ref(), null_const2.as_ref()).unwrap());
1✔
664

665
        // Null constant should not equal non-null constant
666
        assert!(!array_equals(null_const1.as_ref(), non_null_const.as_ref()).unwrap());
1✔
667
        assert!(!array_equals(non_null_const.as_ref(), null_const1.as_ref()).unwrap());
1✔
668
    }
1✔
669

670
    #[test]
671
    fn test_mixed_constant_non_constant() {
1✔
672
        // Test comparing constant arrays with non-constant arrays
673
        let constant_42 = ConstantArray::new(Scalar::from(42i32), 4);
1✔
674
        let all_42s = PrimitiveArray::from_iter(vec![42i32, 42, 42, 42]);
1✔
675
        let mixed_values = PrimitiveArray::from_iter(vec![42i32, 42, 43, 42]);
1✔
676

677
        // Constant should equal array with all same values
678
        assert!(array_equals(constant_42.as_ref(), all_42s.as_ref()).unwrap());
1✔
679
        assert!(array_equals(all_42s.as_ref(), constant_42.as_ref()).unwrap());
1✔
680

681
        // Constant should not equal array with different values
682
        assert!(!array_equals(constant_42.as_ref(), mixed_values.as_ref()).unwrap());
1✔
683
        assert!(!array_equals(mixed_values.as_ref(), constant_42.as_ref()).unwrap());
1✔
684

685
        // Test with null constant
686
        let null_constant = ConstantArray::new(
1✔
687
            Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable)),
1✔
688
            3,
689
        );
690
        let all_nulls = PrimitiveArray::from_option_iter(vec![None::<i32>, None, None]);
1✔
691
        let mixed_nulls = PrimitiveArray::from_option_iter(vec![None::<i32>, Some(42), None]);
1✔
692

693
        // Null constant should equal array with all nulls
694
        assert!(array_equals(null_constant.as_ref(), all_nulls.as_ref()).unwrap());
1✔
695
        assert!(array_equals(all_nulls.as_ref(), null_constant.as_ref()).unwrap());
1✔
696

697
        // Null constant should not equal array with mixed nulls and values
698
        assert!(!array_equals(null_constant.as_ref(), mixed_nulls.as_ref()).unwrap());
1✔
699
        assert!(!array_equals(mixed_nulls.as_ref(), null_constant.as_ref()).unwrap());
1✔
700
    }
1✔
701
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc