diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index f5904bc171ee..f8a753bdb76c 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -108,6 +108,8 @@ pub fn interleave( DataType::Struct(fields) => interleave_struct(fields, values, indices), DataType::List(field) => interleave_list::(values, indices, field), DataType::LargeList(field) => interleave_list::(values, indices, field), + DataType::ListView(field) => interleave_list_view::(values, indices, field), + DataType::LargeListView(field) => interleave_list_view::(values, indices, field), _ => interleave_fallback(values, indices) } } @@ -411,6 +413,56 @@ fn interleave_list( Ok(Arc::new(list_array)) } +fn interleave_list_view( + values: &[&dyn Array], + indices: &[(usize, usize)], + field: &FieldRef, +) -> Result { + let interleaved = Interleave::<'_, GenericListViewArray>::new(values, indices); + + // Build new offsets/sizes and compute total child capacity + let mut capacity = 0usize; + let mut offsets = Vec::with_capacity(indices.len()); + let mut sizes = Vec::with_capacity(indices.len()); + for &(array_idx, row_idx) in indices { + let list = interleaved.arrays[array_idx]; + let size = list.sizes()[row_idx].as_usize(); + offsets.push( + O::from_usize(capacity).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?, + ); + sizes.push(O::from_usize(size).ok_or_else(|| ArrowError::OffsetOverflowError(size))?); + capacity += size; + } + + // Bulk-copy child value ranges into a single flat child array + let child_data: Vec<_> = interleaved + .arrays + .iter() + .map(|list| list.values().to_data()) + .collect(); + let child_data_refs: Vec<_> = child_data.iter().collect(); + let mut mutable_child = MutableArrayData::new(child_data_refs, false, capacity); + for &(array_idx, row_idx) in indices { + let list = interleaved.arrays[array_idx]; + let start = list.offsets()[row_idx].as_usize(); + let size = list.sizes()[row_idx].as_usize(); + if size > 0 { + mutable_child.extend(array_idx, start, start + size); + } + } + + let interleaved_values = make_array(mutable_child.freeze()); + let list_view_array = GenericListViewArray::::new( + field.clone(), + offsets.into(), + sizes.into(), + interleaved_values, + interleaved.nulls, + ); + + Ok(Arc::new(list_view_array)) +} + /// Fallback implementation of interleave using [`MutableArrayData`] fn interleave_fallback( values: &[&dyn Array], @@ -770,6 +822,61 @@ mod tests { test_interleave_lists::(); } + fn test_interleave_list_views() { + // [[1, 2], null, [3]] + let mut a = GenericListBuilder::::new(Int32Builder::new()); + a.values().append_value(1); + a.values().append_value(2); + a.append(true); + a.append(false); + a.values().append_value(3); + a.append(true); + let a: GenericListViewArray = a.finish().into(); + + // [[4], null, [5, 6, null]] + let mut b = GenericListBuilder::::new(Int32Builder::new()); + b.values().append_value(4); + b.append(true); + b.append(false); + b.values().append_value(5); + b.values().append_value(6); + b.values().append_null(); + b.append(true); + let b: GenericListViewArray = b.finish().into(); + + let values = interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap(); + let v = values + .as_any() + .downcast_ref::>() + .unwrap(); + + // [[3], null, [4], [5, 6, null], null] + let mut expected = GenericListBuilder::::new(Int32Builder::new()); + expected.values().append_value(3); + expected.append(true); + expected.append(false); + expected.values().append_value(4); + expected.append(true); + expected.values().append_value(5); + expected.values().append_value(6); + expected.values().append_null(); + expected.append(true); + expected.append(false); + let expected: GenericListViewArray = expected.finish().into(); + + assert_eq!(v, &expected); + } + + #[test] + fn test_list_views() { + test_interleave_list_views::(); + } + + #[test] + fn test_large_list_views() { + test_interleave_list_views::(); + } + #[test] fn test_struct_without_nulls() { let fields = Fields::from(vec![ diff --git a/arrow/benches/interleave_kernels.rs b/arrow/benches/interleave_kernels.rs index 8daf42a14414..182f48f5a646 100644 --- a/arrow/benches/interleave_kernels.rs +++ b/arrow/benches/interleave_kernels.rs @@ -121,6 +121,11 @@ fn add_benchmark(c: &mut Criterion) { let list_i64_no_nulls = create_primitive_list_array_with_seed::(8192, 0.0, 0.0, 20, 42); + let list_view_i64: ListViewArray = + create_primitive_list_array_with_seed::(8192, 0.1, 0.1, 20, 42).into(); + let list_view_i64_no_nulls: ListViewArray = + create_primitive_list_array_with_seed::(8192, 0.0, 0.0, 20, 42).into(); + let cases: &[(&str, &dyn Array)] = &[ ("i32(0.0)", &i32), ("i32(0.5)", &i32_opt), @@ -143,6 +148,8 @@ fn add_benchmark(c: &mut Criterion) { ), ("list(0.1,0.1,20)", &list_i64), ("list(0.0,0.0,20)", &list_i64_no_nulls), + ("list_view(0.1,0.1,20)", &list_view_i64), + ("list_view(0.0,0.0,20)", &list_view_i64_no_nulls), ]; for (prefix, base) in cases {