Skip to content

Commit d049805

Browse files
committed
arrow-select: fix MutableArrayData interleave for ListView
The previous code did not extend child data buffers. Signed-off-by: Alfonso Subiotto Marques <alfonso.subiotto@polarsignals.com>
1 parent fcab5d2 commit d049805

File tree

2 files changed

+66
-12
lines changed

2 files changed

+66
-12
lines changed

arrow-data/src/transform/list_view.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,21 +27,21 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
2727
let offsets = array.buffer::<T>(0);
2828
let sizes = array.buffer::<T>(1);
2929
Box::new(
30-
move |mutable: &mut _MutableArrayData, _index: usize, start: usize, len: usize| {
31-
let offset_buffer = &mut mutable.buffer1;
32-
let sizes_buffer = &mut mutable.buffer2;
30+
move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
31+
let mut new_offset = mutable.child_data[0].len();
3332

34-
for &offset in &offsets[start..start + len] {
35-
offset_buffer.push(offset);
36-
}
33+
for i in start..start + len {
34+
let size = sizes[i].as_usize();
3735

38-
// sizes
39-
for &size in &sizes[start..start + len] {
40-
sizes_buffer.push(size);
41-
}
36+
mutable.buffer1.push(T::usize_as(new_offset));
37+
mutable.buffer2.push(sizes[i]);
38+
new_offset += size;
4239

43-
// the beauty of views is that we don't need to copy child_data, we just splat
44-
// the offsets and sizes.
40+
if size > 0 {
41+
let child_start = offsets[i].as_usize();
42+
mutable.child_data[0].extend(index, child_start, child_start + size);
43+
}
44+
}
4545
},
4646
)
4747
}

arrow-select/src/interleave.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ mod tests {
582582
use arrow_array::Int32RunArray;
583583
use arrow_array::builder::{GenericListBuilder, Int32Builder, PrimitiveRunBuilder};
584584
use arrow_array::types::Int8Type;
585+
use arrow_buffer::ScalarBuffer;
585586
use arrow_schema::Field;
586587

587588
#[test]
@@ -1489,4 +1490,57 @@ mod tests {
14891490
Err(ArrowError::OffsetOverflowError(_))
14901491
));
14911492
}
1493+
1494+
fn test_interleave_list_view() {
1495+
// `interleave` for ListView falls through to `interleave_fallback`, which uses
1496+
// `MutableArrayData`. `list_view::build_extend` copies offsets/sizes but never
1497+
// extends the child array, so the result contains offsets/sizes that reference
1498+
// positions in the now-absent original child arrays while the child is empty.
1499+
//
1500+
// lv_a: [[1, 2], [3]] (values=[1,2,3], offsets=[0,2], sizes=[2,1])
1501+
// lv_b: [[4, 5, 6]] (values=[4,5,6], offsets=[0], sizes=[3])
1502+
// interleave at [(0,0), (1,0), (0,1)] should produce [[1, 2], [4, 5, 6], [3]]
1503+
let field = Arc::new(Field::new_list_field(DataType::Int64, false));
1504+
1505+
let lv_a = ListViewArray::new(
1506+
Arc::clone(&field),
1507+
ScalarBuffer::from(vec![0i32, 2]),
1508+
ScalarBuffer::from(vec![2i32, 1]),
1509+
Arc::new(Int64Array::from(vec![1_i64, 2, 3])),
1510+
None,
1511+
);
1512+
let lv_b = ListViewArray::new(
1513+
field,
1514+
ScalarBuffer::from(vec![0i32]),
1515+
ScalarBuffer::from(vec![3i32]),
1516+
Arc::new(Int64Array::from(vec![4_i64, 5, 6])),
1517+
None,
1518+
);
1519+
1520+
let result = interleave(
1521+
&[&lv_a as &dyn Array, &lv_b as &dyn Array],
1522+
&[(0, 0), (1, 0), (0, 1)],
1523+
)
1524+
.unwrap();
1525+
1526+
result
1527+
.to_data()
1528+
.validate_full()
1529+
.expect("interleaved ListViewArray must be internally consistent");
1530+
1531+
let result_lv = result.as_list_view::<i32>();
1532+
assert_eq!(result_lv.len(), 3);
1533+
assert_eq!(
1534+
result_lv.value(0).as_primitive::<Int64Type>().values(),
1535+
&[1, 2]
1536+
);
1537+
assert_eq!(
1538+
result_lv.value(1).as_primitive::<Int64Type>().values(),
1539+
&[4, 5, 6]
1540+
);
1541+
assert_eq!(
1542+
result_lv.value(2).as_primitive::<Int64Type>().values(),
1543+
&[3]
1544+
);
1545+
}
14921546
}

0 commit comments

Comments
 (0)