Skip to content

Commit 093008f

Browse files
committed
arrow-select: fix MutableArrayData interleave for ListView
The previous code did not extend child data buffers. Signed-off-by: Alfonso Subiotto Marques <alfonso.subiotto@polarsignals.com>
1 parent fcab5d2 commit 093008f

File tree

2 files changed

+69
-14
lines changed

2 files changed

+69
-14
lines changed

arrow-data/src/transform/list_view.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,21 +27,21 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
2727
let offsets = array.buffer::<T>(0);
2828
let sizes = array.buffer::<T>(1);
2929
Box::new(
30-
move |mutable: &mut _MutableArrayData, _index: usize, start: usize, len: usize| {
31-
let offset_buffer = &mut mutable.buffer1;
32-
let sizes_buffer = &mut mutable.buffer2;
30+
move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
31+
let mut new_offset = mutable.child_data[0].len();
3332

34-
for &offset in &offsets[start..start + len] {
35-
offset_buffer.push(offset);
36-
}
33+
for i in start..start + len {
34+
let size = sizes[i].as_usize();
3735

38-
// sizes
39-
for &size in &sizes[start..start + len] {
40-
sizes_buffer.push(size);
41-
}
36+
mutable.buffer1.push(T::usize_as(new_offset));
37+
mutable.buffer2.push(sizes[i]);
38+
new_offset += size;
4239

43-
// the beauty of views is that we don't need to copy child_data, we just splat
44-
// the offsets and sizes.
40+
if size > 0 {
41+
let child_start = offsets[i].as_usize();
42+
mutable.child_data[0].extend(index, child_start, child_start + size);
43+
}
44+
}
4545
},
4646
)
4747
}

arrow-select/src/interleave.rs

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ use arrow_array::cast::AsArray;
2424
use arrow_array::types::*;
2525
use arrow_array::*;
2626
use arrow_buffer::{ArrowNativeType, BooleanBuffer, MutableBuffer, NullBuffer, OffsetBuffer};
27-
use arrow_data::ByteView;
2827
use arrow_data::transform::MutableArrayData;
28+
use arrow_data::ByteView;
2929
use arrow_schema::{ArrowError, DataType, FieldRef, Fields};
3030
use std::sync::Arc;
3131

@@ -579,9 +579,10 @@ pub fn interleave_record_batch(
579579
#[cfg(test)]
580580
mod tests {
581581
use super::*;
582-
use arrow_array::Int32RunArray;
583582
use arrow_array::builder::{GenericListBuilder, Int32Builder, PrimitiveRunBuilder};
584583
use arrow_array::types::Int8Type;
584+
use arrow_array::Int32RunArray;
585+
use arrow_buffer::ScalarBuffer;
585586
use arrow_schema::Field;
586587

587588
#[test]
@@ -1489,4 +1490,58 @@ mod tests {
14891490
Err(ArrowError::OffsetOverflowError(_))
14901491
));
14911492
}
1493+
1494+
#[test]
1495+
fn test_interleave_list_view() {
1496+
// `interleave` for ListView falls through to `interleave_fallback`, which uses
1497+
// `MutableArrayData`. `list_view::build_extend` copies offsets/sizes but never
1498+
// extends the child array, so the result contains offsets/sizes that reference
1499+
// positions in the now-absent original child arrays while the child is empty.
1500+
//
1501+
// lv_a: [[1, 2], [3]] (values=[1,2,3], offsets=[0,2], sizes=[2,1])
1502+
// lv_b: [[4, 5, 6]] (values=[4,5,6], offsets=[0], sizes=[3])
1503+
// interleave at [(0,0), (1,0), (0,1)] should produce [[1, 2], [4, 5, 6], [3]]
1504+
let field = Arc::new(Field::new_list_field(DataType::Int64, false));
1505+
1506+
let lv_a = ListViewArray::new(
1507+
Arc::clone(&field),
1508+
ScalarBuffer::from(vec![0i32, 2]),
1509+
ScalarBuffer::from(vec![2i32, 1]),
1510+
Arc::new(Int64Array::from(vec![1_i64, 2, 3])),
1511+
None,
1512+
);
1513+
let lv_b = ListViewArray::new(
1514+
field,
1515+
ScalarBuffer::from(vec![0i32]),
1516+
ScalarBuffer::from(vec![3i32]),
1517+
Arc::new(Int64Array::from(vec![4_i64, 5, 6])),
1518+
None,
1519+
);
1520+
1521+
let result = interleave(
1522+
&[&lv_a as &dyn Array, &lv_b as &dyn Array],
1523+
&[(0, 0), (1, 0), (0, 1)],
1524+
)
1525+
.unwrap();
1526+
1527+
result
1528+
.to_data()
1529+
.validate_full()
1530+
.expect("interleaved ListViewArray must be internally consistent");
1531+
1532+
let result_lv = result.as_list_view::<i32>();
1533+
assert_eq!(result_lv.len(), 3);
1534+
assert_eq!(
1535+
result_lv.value(0).as_primitive::<Int64Type>().values(),
1536+
&[1, 2]
1537+
);
1538+
assert_eq!(
1539+
result_lv.value(1).as_primitive::<Int64Type>().values(),
1540+
&[4, 5, 6]
1541+
);
1542+
assert_eq!(
1543+
result_lv.value(2).as_primitive::<Int64Type>().values(),
1544+
&[3]
1545+
);
1546+
}
14921547
}

0 commit comments

Comments
 (0)