Skip to content

Commit 64096b3

Browse files
committed
arrow-select: fix MutableArrayData interleave for ListView
The previous code did not extend child data buffers. Signed-off-by: Alfonso Subiotto Marques <alfonso.subiotto@polarsignals.com>
1 parent 19889a3 commit 64096b3

File tree

2 files changed

+66
-12
lines changed

2 files changed

+66
-12
lines changed

arrow-data/src/transform/list_view.rs

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,21 +27,20 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
2727
let offsets = array.buffer::<T>(0);
2828
let sizes = array.buffer::<T>(1);
2929
Box::new(
30-
move |mutable: &mut _MutableArrayData, _index: usize, start: usize, len: usize| {
31-
let offset_buffer = &mut mutable.buffer1;
32-
let sizes_buffer = &mut mutable.buffer2;
30+
move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
31+
let mut new_offset = T::usize_as(mutable.child_data[0].len());
3332

34-
for &offset in &offsets[start..start + len] {
35-
offset_buffer.push(offset);
36-
}
33+
for i in start..start + len {
34+
mutable.buffer1.push(new_offset);
35+
mutable.buffer2.push(sizes[i]);
36+
new_offset = new_offset.checked_add(&sizes[i]).expect("offset overflow");
3737

38-
// sizes
39-
for &size in &sizes[start..start + len] {
40-
sizes_buffer.push(size);
38+
let size = sizes[i].as_usize();
39+
if size > 0 {
40+
let child_start = offsets[i].as_usize();
41+
mutable.child_data[0].extend(index, child_start, child_start + size);
42+
}
4143
}
42-
43-
// the beauty of views is that we don't need to copy child_data, we just splat
44-
// the offsets and sizes.
4544
},
4645
)
4746
}

arrow-select/src/interleave.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ mod tests {
582582
use arrow_array::Int32RunArray;
583583
use arrow_array::builder::{GenericListBuilder, Int32Builder, PrimitiveRunBuilder};
584584
use arrow_array::types::Int8Type;
585+
use arrow_buffer::ScalarBuffer;
585586
use arrow_schema::Field;
586587

587588
#[test]
@@ -1489,4 +1490,58 @@ mod tests {
14891490
Err(ArrowError::OffsetOverflowError(_))
14901491
));
14911492
}
1493+
1494+
#[test]
1495+
fn test_interleave_list_view() {
1496+
// `interleave` for ListView falls through to `interleave_fallback`, which uses
1497+
// `MutableArrayData`. `list_view::build_extend` copies offsets/sizes but never
1498+
// extends the child array, so the result contains offsets/sizes that reference
1499+
// positions in the now-absent original child arrays while the child is empty.
1500+
//
1501+
// lv_a: [[1, 2], [3]] (values=[1,2,3], offsets=[0,2], sizes=[2,1])
1502+
// lv_b: [[4, 5, 6]] (values=[4,5,6], offsets=[0], sizes=[3])
1503+
// interleave at [(0,0), (1,0), (0,1)] should produce [[1, 2], [4, 5, 6], [3]]
1504+
let field = Arc::new(Field::new_list_field(DataType::Int64, false));
1505+
1506+
let lv_a = ListViewArray::new(
1507+
Arc::clone(&field),
1508+
ScalarBuffer::from(vec![0i32, 2]),
1509+
ScalarBuffer::from(vec![2i32, 1]),
1510+
Arc::new(Int64Array::from(vec![1_i64, 2, 3])),
1511+
None,
1512+
);
1513+
let lv_b = ListViewArray::new(
1514+
field,
1515+
ScalarBuffer::from(vec![0i32]),
1516+
ScalarBuffer::from(vec![3i32]),
1517+
Arc::new(Int64Array::from(vec![4_i64, 5, 6])),
1518+
None,
1519+
);
1520+
1521+
let result = interleave(
1522+
&[&lv_a as &dyn Array, &lv_b as &dyn Array],
1523+
&[(0, 0), (1, 0), (0, 1)],
1524+
)
1525+
.unwrap();
1526+
1527+
result
1528+
.to_data()
1529+
.validate_full()
1530+
.expect("interleaved ListViewArray must be internally consistent");
1531+
1532+
let result_lv = result.as_list_view::<i32>();
1533+
assert_eq!(result_lv.len(), 3);
1534+
assert_eq!(
1535+
result_lv.value(0).as_primitive::<Int64Type>().values(),
1536+
&[1, 2]
1537+
);
1538+
assert_eq!(
1539+
result_lv.value(1).as_primitive::<Int64Type>().values(),
1540+
&[4, 5, 6]
1541+
);
1542+
assert_eq!(
1543+
result_lv.value(2).as_primitive::<Int64Type>().values(),
1544+
&[3]
1545+
);
1546+
}
14921547
}

0 commit comments

Comments
 (0)