Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions arrow-select/src/interleave.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ pub fn interleave(
DataType::Struct(fields) => interleave_struct(fields, values, indices),
DataType::List(field) => interleave_list::<i32>(values, indices, field),
DataType::LargeList(field) => interleave_list::<i64>(values, indices, field),
DataType::ListView(field) => interleave_list_view::<i32>(values, indices, field),
DataType::LargeListView(field) => interleave_list_view::<i64>(values, indices, field),
_ => interleave_fallback(values, indices)
}
}
Expand Down Expand Up @@ -411,6 +413,56 @@ fn interleave_list<O: OffsetSizeTrait>(
Ok(Arc::new(list_array))
}

fn interleave_list_view<O: OffsetSizeTrait>(
values: &[&dyn Array],
indices: &[(usize, usize)],
field: &FieldRef,
) -> Result<ArrayRef, ArrowError> {
let interleaved = Interleave::<'_, GenericListViewArray<O>>::new(values, indices);

// Build new offsets/sizes and compute total child capacity
let mut capacity = 0usize;
let mut offsets = Vec::with_capacity(indices.len());
let mut sizes = Vec::with_capacity(indices.len());
for &(array_idx, row_idx) in indices {
let list = interleaved.arrays[array_idx];
let size = list.sizes()[row_idx].as_usize();
offsets.push(
O::from_usize(capacity).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?,
);
sizes.push(O::from_usize(size).ok_or_else(|| ArrowError::OffsetOverflowError(size))?);
capacity += size;
}

// Bulk-copy child value ranges into a single flat child array
let child_data: Vec<_> = interleaved
.arrays
.iter()
.map(|list| list.values().to_data())
.collect();
let child_data_refs: Vec<_> = child_data.iter().collect();
let mut mutable_child = MutableArrayData::new(child_data_refs, false, capacity);
for &(array_idx, row_idx) in indices {
let list = interleaved.arrays[array_idx];
let start = list.offsets()[row_idx].as_usize();
let size = list.sizes()[row_idx].as_usize();
if size > 0 {
mutable_child.extend(array_idx, start, start + size);
}
}

let interleaved_values = make_array(mutable_child.freeze());
let list_view_array = GenericListViewArray::<O>::new(
field.clone(),
offsets.into(),
sizes.into(),
interleaved_values,
interleaved.nulls,
);

Ok(Arc::new(list_view_array))
}

/// Fallback implementation of interleave using [`MutableArrayData`]
fn interleave_fallback(
values: &[&dyn Array],
Expand Down Expand Up @@ -770,6 +822,61 @@ mod tests {
test_interleave_lists::<i64>();
}

fn test_interleave_list_views<O: OffsetSizeTrait>() {
// [[1, 2], null, [3]]
let mut a = GenericListBuilder::<O, _>::new(Int32Builder::new());
a.values().append_value(1);
a.values().append_value(2);
a.append(true);
a.append(false);
a.values().append_value(3);
a.append(true);
let a: GenericListViewArray<O> = a.finish().into();

// [[4], null, [5, 6, null]]
let mut b = GenericListBuilder::<O, _>::new(Int32Builder::new());
b.values().append_value(4);
b.append(true);
b.append(false);
b.values().append_value(5);
b.values().append_value(6);
b.values().append_null();
b.append(true);
let b: GenericListViewArray<O> = b.finish().into();

let values = interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap();
let v = values
.as_any()
.downcast_ref::<GenericListViewArray<O>>()
.unwrap();

// [[3], null, [4], [5, 6, null], null]
let mut expected = GenericListBuilder::<O, _>::new(Int32Builder::new());
expected.values().append_value(3);
expected.append(true);
expected.append(false);
expected.values().append_value(4);
expected.append(true);
expected.values().append_value(5);
expected.values().append_value(6);
expected.values().append_null();
expected.append(true);
expected.append(false);
let expected: GenericListViewArray<O> = expected.finish().into();

assert_eq!(v, &expected);
}

#[test]
fn test_list_views() {
test_interleave_list_views::<i32>();
}

#[test]
fn test_large_list_views() {
test_interleave_list_views::<i64>();
}

#[test]
fn test_struct_without_nulls() {
let fields = Fields::from(vec![
Expand Down
7 changes: 7 additions & 0 deletions arrow/benches/interleave_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ fn add_benchmark(c: &mut Criterion) {
let list_i64_no_nulls =
create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.0, 0.0, 20, 42);

let list_view_i64: ListViewArray =
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we please add this benchmark as a separate PR (to make it easier to run the automated benchmark runners)?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merged!

create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.1, 0.1, 20, 42).into();
let list_view_i64_no_nulls: ListViewArray =
create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.0, 0.0, 20, 42).into();

let cases: &[(&str, &dyn Array)] = &[
("i32(0.0)", &i32),
("i32(0.5)", &i32_opt),
Expand All @@ -143,6 +148,8 @@ fn add_benchmark(c: &mut Criterion) {
),
("list<i64>(0.1,0.1,20)", &list_i64),
("list<i64>(0.0,0.0,20)", &list_i64_no_nulls),
("list_view<i64>(0.1,0.1,20)", &list_view_i64),
("list_view<i64>(0.0,0.0,20)", &list_view_i64_no_nulls),
];

for (prefix, base) in cases {
Expand Down
Loading