From c0169725b51e8bda9480ae84c8ed7b6cdb880271 Mon Sep 17 00:00:00 2001 From: Vegard Stikbakke Date: Wed, 18 Feb 2026 21:43:00 +0100 Subject: [PATCH 1/5] Implement native interleave for ListView --- arrow-select/src/interleave.rs | 107 ++++++++++++++++++++++++++++ arrow/benches/interleave_kernels.rs | 7 ++ 2 files changed, 114 insertions(+) diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index f5904bc171ee..78d7930140b8 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -108,6 +108,8 @@ pub fn interleave( DataType::Struct(fields) => interleave_struct(fields, values, indices), DataType::List(field) => interleave_list::(values, indices, field), DataType::LargeList(field) => interleave_list::(values, indices, field), + DataType::ListView(field) => interleave_list_view::(values, indices, field), + DataType::LargeListView(field) => interleave_list_view::(values, indices, field), _ => interleave_fallback(values, indices) } } @@ -411,6 +413,56 @@ fn interleave_list( Ok(Arc::new(list_array)) } +fn interleave_list_view( + values: &[&dyn Array], + indices: &[(usize, usize)], + field: &FieldRef, +) -> Result { + let interleaved = Interleave::<'_, GenericListViewArray>::new(values, indices); + + // Collect child indices for each referenced list element and build + // new offsets/sizes that point into the interleaved child array + let mut capacity = 0usize; + let mut offsets = Vec::with_capacity(indices.len()); + let mut sizes = Vec::with_capacity(indices.len()); + for &(array_idx, row_idx) in indices { + let list = interleaved.arrays[array_idx]; + let size = list.sizes()[row_idx].as_usize(); + offsets.push( + O::from_usize(capacity).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?, + ); + sizes.push(O::from_usize(size).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?); + capacity += size; + } + + // Build child indices for recursive interleave of child values + let mut child_indices = Vec::with_capacity(capacity); + for &(array_idx, row_idx) in indices { + let list = interleaved.arrays[array_idx]; + let start = list.offsets()[row_idx].as_usize(); + let size = list.sizes()[row_idx].as_usize(); + child_indices.extend((start..start + size).map(|i| (array_idx, i))); + } + + let child_arrays: Vec<&dyn Array> = interleaved + .arrays + .iter() + .map(|list| list.values().as_ref()) + .collect(); + + let interleaved_values = interleave(&child_arrays, &child_indices)?; + + let list_view_array = GenericListViewArray::::new( + field.clone(), + offsets.into(), + sizes.into(), + interleaved_values, + interleaved.nulls, + ); + + Ok(Arc::new(list_view_array)) +} + /// Fallback implementation of interleave using [`MutableArrayData`] fn interleave_fallback( values: &[&dyn Array], @@ -770,6 +822,61 @@ mod tests { test_interleave_lists::(); } + fn test_interleave_list_views() { + // [[1, 2], null, [3]] + let mut a = GenericListBuilder::::new(Int32Builder::new()); + a.values().append_value(1); + a.values().append_value(2); + a.append(true); + a.append(false); + a.values().append_value(3); + a.append(true); + let a: GenericListViewArray = a.finish().into(); + + // [[4], null, [5, 6, null]] + let mut b = GenericListBuilder::::new(Int32Builder::new()); + b.values().append_value(4); + b.append(true); + b.append(false); + b.values().append_value(5); + b.values().append_value(6); + b.values().append_null(); + b.append(true); + let b: GenericListViewArray = b.finish().into(); + + let values = interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap(); + let v = values + .as_any() + .downcast_ref::>() + .unwrap(); + + // [[3], null, [4], [5, 6, null], null] + let mut expected = GenericListBuilder::::new(Int32Builder::new()); + expected.values().append_value(3); + expected.append(true); + expected.append(false); + expected.values().append_value(4); + expected.append(true); + expected.values().append_value(5); + expected.values().append_value(6); + expected.values().append_null(); + expected.append(true); + expected.append(false); + let expected: GenericListViewArray = expected.finish().into(); + + assert_eq!(v, &expected); + } + + #[test] + fn test_list_views() { + test_interleave_list_views::(); + } + + #[test] + fn test_large_list_views() { + test_interleave_list_views::(); + } + #[test] fn test_struct_without_nulls() { let fields = Fields::from(vec![ diff --git a/arrow/benches/interleave_kernels.rs b/arrow/benches/interleave_kernels.rs index 8daf42a14414..182f48f5a646 100644 --- a/arrow/benches/interleave_kernels.rs +++ b/arrow/benches/interleave_kernels.rs @@ -121,6 +121,11 @@ fn add_benchmark(c: &mut Criterion) { let list_i64_no_nulls = create_primitive_list_array_with_seed::(8192, 0.0, 0.0, 20, 42); + let list_view_i64: ListViewArray = + create_primitive_list_array_with_seed::(8192, 0.1, 0.1, 20, 42).into(); + let list_view_i64_no_nulls: ListViewArray = + create_primitive_list_array_with_seed::(8192, 0.0, 0.0, 20, 42).into(); + let cases: &[(&str, &dyn Array)] = &[ ("i32(0.0)", &i32), ("i32(0.5)", &i32_opt), @@ -143,6 +148,8 @@ fn add_benchmark(c: &mut Criterion) { ), ("list(0.1,0.1,20)", &list_i64), ("list(0.0,0.0,20)", &list_i64_no_nulls), + ("list_view(0.1,0.1,20)", &list_view_i64), + ("list_view(0.0,0.0,20)", &list_view_i64_no_nulls), ]; for (prefix, base) in cases { From a2f54b0434011b8c1ab500153e48723404dac033 Mon Sep 17 00:00:00 2001 From: Vegard Stikbakke Date: Sun, 15 Mar 2026 16:48:04 +0100 Subject: [PATCH 2/5] Add test showing fallback implementation fails --- arrow-select/src/interleave.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index 78d7930140b8..ce02458a17e9 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -632,7 +632,9 @@ pub fn interleave_record_batch( mod tests { use super::*; use arrow_array::Int32RunArray; - use arrow_array::builder::{GenericListBuilder, Int32Builder, PrimitiveRunBuilder}; + use arrow_array::builder::{ + GenericListBuilder, Int32Builder, Int64Builder, PrimitiveRunBuilder, + }; use arrow_array::types::Int8Type; use arrow_buffer::ScalarBuffer; use arrow_schema::Field; From b18c3a61b120fb539c8b5f3ea18ca433516013c0 Mon Sep 17 00:00:00 2001 From: Vegard Stikbakke Date: Thu, 19 Mar 2026 12:56:28 +0100 Subject: [PATCH 3/5] Bulk copy child value ranges into flat array instead of recursing --- arrow-select/src/interleave.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index ce02458a17e9..1b304df108b1 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -420,8 +420,7 @@ fn interleave_list_view( ) -> Result { let interleaved = Interleave::<'_, GenericListViewArray>::new(values, indices); - // Collect child indices for each referenced list element and build - // new offsets/sizes that point into the interleaved child array + // Build new offsets/sizes and compute total child capacity let mut capacity = 0usize; let mut offsets = Vec::with_capacity(indices.len()); let mut sizes = Vec::with_capacity(indices.len()); @@ -431,27 +430,28 @@ fn interleave_list_view( offsets.push( O::from_usize(capacity).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?, ); - sizes.push(O::from_usize(size).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?); + sizes.push(O::from_usize(size).ok_or_else(|| ArrowError::OffsetOverflowError(size))?); capacity += size; } - // Build child indices for recursive interleave of child values - let mut child_indices = Vec::with_capacity(capacity); + // Bulk-copy child value ranges into a single flat child array + let child_data: Vec<_> = interleaved + .arrays + .iter() + .map(|list| list.values().to_data()) + .collect(); + let child_data_refs: Vec<_> = child_data.iter().collect(); + let mut mutable_child = MutableArrayData::new(child_data_refs, false, capacity); for &(array_idx, row_idx) in indices { let list = interleaved.arrays[array_idx]; let start = list.offsets()[row_idx].as_usize(); let size = list.sizes()[row_idx].as_usize(); - child_indices.extend((start..start + size).map(|i| (array_idx, i))); + if size > 0 { + mutable_child.extend(array_idx, start, start + size); + } } - let child_arrays: Vec<&dyn Array> = interleaved - .arrays - .iter() - .map(|list| list.values().as_ref()) - .collect(); - - let interleaved_values = interleave(&child_arrays, &child_indices)?; - + let interleaved_values = make_array(mutable_child.freeze()); let list_view_array = GenericListViewArray::::new( field.clone(), offsets.into(), From df8181771700f76dfece11795dd04e9b6f363f64 Mon Sep 17 00:00:00 2001 From: Vegard Stikbakke Date: Thu, 19 Mar 2026 14:03:06 +0100 Subject: [PATCH 4/5] Remove unused import in test block --- arrow-select/src/interleave.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index 1b304df108b1..fdc640edde5a 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -633,7 +633,7 @@ mod tests { use super::*; use arrow_array::Int32RunArray; use arrow_array::builder::{ - GenericListBuilder, Int32Builder, Int64Builder, PrimitiveRunBuilder, + GenericListBuilder, Int32Builder, PrimitiveRunBuilder, }; use arrow_array::types::Int8Type; use arrow_buffer::ScalarBuffer; From 084b5f5896bac5b304d1471014a1852ce00f5d63 Mon Sep 17 00:00:00 2001 From: Vegard Stikbakke Date: Thu, 19 Mar 2026 15:11:29 +0100 Subject: [PATCH 5/5] Fix lint --- arrow-select/src/interleave.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index fdc640edde5a..f8a753bdb76c 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -632,9 +632,7 @@ pub fn interleave_record_batch( mod tests { use super::*; use arrow_array::Int32RunArray; - use arrow_array::builder::{ - GenericListBuilder, Int32Builder, PrimitiveRunBuilder, - }; + use arrow_array::builder::{GenericListBuilder, Int32Builder, PrimitiveRunBuilder}; use arrow_array::types::Int8Type; use arrow_buffer::ScalarBuffer; use arrow_schema::Field;