Skip to content

Commit a1131f2

Browse files
committed
Implement native interleave for ListView
1 parent fcab5d2 commit a1131f2

File tree

2 files changed

+116
-0
lines changed

2 files changed

+116
-0
lines changed

arrow-select/src/interleave.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ pub fn interleave(
108108
DataType::Struct(fields) => interleave_struct(fields, values, indices),
109109
DataType::List(field) => interleave_list::<i32>(values, indices, field),
110110
DataType::LargeList(field) => interleave_list::<i64>(values, indices, field),
111+
DataType::ListView(field) => interleave_list_view::<i32>(values, indices, field),
112+
DataType::LargeListView(field) => interleave_list_view::<i64>(values, indices, field),
111113
_ => interleave_fallback(values, indices)
112114
}
113115
}
@@ -411,6 +413,58 @@ fn interleave_list<O: OffsetSizeTrait>(
411413
Ok(Arc::new(list_array))
412414
}
413415

416+
fn interleave_list_view<O: OffsetSizeTrait>(
417+
values: &[&dyn Array],
418+
indices: &[(usize, usize)],
419+
field: &FieldRef,
420+
) -> Result<ArrayRef, ArrowError> {
421+
let interleaved = Interleave::<'_, GenericListViewArray<O>>::new(values, indices);
422+
423+
// Collect child indices for each referenced list element and build
424+
// new offsets/sizes that point into the interleaved child array
425+
let mut capacity = 0usize;
426+
let mut offsets = Vec::with_capacity(indices.len());
427+
let mut sizes = Vec::with_capacity(indices.len());
428+
for &(array_idx, row_idx) in indices {
429+
let list = interleaved.arrays[array_idx];
430+
let size = list.sizes()[row_idx].as_usize();
431+
offsets.push(
432+
O::from_usize(capacity).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?
433+
);
434+
sizes.push(
435+
O::from_usize(size).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?
436+
);
437+
capacity += size;
438+
}
439+
440+
// Build child indices for recursive interleave of child values
441+
let mut child_indices = Vec::with_capacity(capacity);
442+
for &(array_idx, row_idx) in indices {
443+
let list = interleaved.arrays[array_idx];
444+
let start = list.offsets()[row_idx].as_usize();
445+
let size = list.sizes()[row_idx].as_usize();
446+
child_indices.extend((start..start + size).map(|i| (array_idx, i)));
447+
}
448+
449+
let child_arrays: Vec<&dyn Array> = interleaved
450+
.arrays
451+
.iter()
452+
.map(|list| list.values().as_ref())
453+
.collect();
454+
455+
let interleaved_values = interleave(&child_arrays, &child_indices)?;
456+
457+
let list_view_array = GenericListViewArray::<O>::new(
458+
field.clone(),
459+
offsets.into(),
460+
sizes.into(),
461+
interleaved_values,
462+
interleaved.nulls,
463+
);
464+
465+
Ok(Arc::new(list_view_array))
466+
}
467+
414468
/// Fallback implementation of interleave using [`MutableArrayData`]
415469
fn interleave_fallback(
416470
values: &[&dyn Array],
@@ -769,6 +823,61 @@ mod tests {
769823
test_interleave_lists::<i64>();
770824
}
771825

826+
fn test_interleave_list_views<O: OffsetSizeTrait>() {
827+
// [[1, 2], null, [3]]
828+
let mut a = GenericListBuilder::<O, _>::new(Int32Builder::new());
829+
a.values().append_value(1);
830+
a.values().append_value(2);
831+
a.append(true);
832+
a.append(false);
833+
a.values().append_value(3);
834+
a.append(true);
835+
let a: GenericListViewArray<O> = a.finish().into();
836+
837+
// [[4], null, [5, 6, null]]
838+
let mut b = GenericListBuilder::<O, _>::new(Int32Builder::new());
839+
b.values().append_value(4);
840+
b.append(true);
841+
b.append(false);
842+
b.values().append_value(5);
843+
b.values().append_value(6);
844+
b.values().append_null();
845+
b.append(true);
846+
let b: GenericListViewArray<O> = b.finish().into();
847+
848+
let values = interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap();
849+
let v = values
850+
.as_any()
851+
.downcast_ref::<GenericListViewArray<O>>()
852+
.unwrap();
853+
854+
// [[3], null, [4], [5, 6, null], null]
855+
let mut expected = GenericListBuilder::<O, _>::new(Int32Builder::new());
856+
expected.values().append_value(3);
857+
expected.append(true);
858+
expected.append(false);
859+
expected.values().append_value(4);
860+
expected.append(true);
861+
expected.values().append_value(5);
862+
expected.values().append_value(6);
863+
expected.values().append_null();
864+
expected.append(true);
865+
expected.append(false);
866+
let expected: GenericListViewArray<O> = expected.finish().into();
867+
868+
assert_eq!(v, &expected);
869+
}
870+
871+
#[test]
872+
fn test_list_views() {
873+
test_interleave_list_views::<i32>();
874+
}
875+
876+
#[test]
877+
fn test_large_list_views() {
878+
test_interleave_list_views::<i64>();
879+
}
880+
772881
#[test]
773882
fn test_struct_without_nulls() {
774883
let fields = Fields::from(vec![

arrow/benches/interleave_kernels.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ fn add_benchmark(c: &mut Criterion) {
121121
let list_i64_no_nulls =
122122
create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.0, 0.0, 20, 42);
123123

124+
let list_view_i64: ListViewArray =
125+
create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.1, 0.1, 20, 42).into();
126+
let list_view_i64_no_nulls: ListViewArray =
127+
create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.0, 0.0, 20, 42).into();
128+
124129
let cases: &[(&str, &dyn Array)] = &[
125130
("i32(0.0)", &i32),
126131
("i32(0.5)", &i32_opt),
@@ -143,6 +148,8 @@ fn add_benchmark(c: &mut Criterion) {
143148
),
144149
("list<i64>(0.1,0.1,20)", &list_i64),
145150
("list<i64>(0.0,0.0,20)", &list_i64_no_nulls),
151+
("list_view<i64>(0.1,0.1,20)", &list_view_i64),
152+
("list_view<i64>(0.0,0.0,20)", &list_view_i64_no_nulls),
146153
];
147154

148155
for (prefix, base) in cases {

0 commit comments

Comments
 (0)