Skip to content

Commit fbc5aab

Browse files
vegarstialambdqkqd
authored
Add cast support for (Large)ListView <-> (Large)List (#8735)
# Which issue does this PR close? Related to tracking issue #5375 for `ListView`. # Rationale for this change We need cast support for `ListView` and `LargeListView`. # Are these changes tested? Yes --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> Co-authored-by: Khanh Duong <dqkqdlot@gmail.com>
1 parent e3a670e commit fbc5aab

File tree

3 files changed

+352
-22
lines changed

3 files changed

+352
-22
lines changed

arrow-array/src/array/list_view_array.rs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ use crate::array::{make_array, print_long_array};
2626
use crate::builder::{GenericListViewBuilder, PrimitiveBuilder};
2727
use crate::iterator::GenericListViewArrayIter;
2828
use crate::{
29-
Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, OffsetSizeTrait,
30-
new_empty_array,
29+
Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, GenericListArray,
30+
OffsetSizeTrait, new_empty_array,
3131
};
3232

3333
/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
@@ -498,6 +498,29 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<Offse
498498
}
499499
}
500500

501+
impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>>
502+
for GenericListViewArray<OffsetSize>
503+
{
504+
fn from(value: GenericListArray<OffsetSize>) -> Self {
505+
let (field, offsets, values, nulls) = value.into_parts();
506+
let len = offsets.len() - 1;
507+
let mut sizes = Vec::with_capacity(len);
508+
let mut view_offsets = Vec::with_capacity(len);
509+
for (i, offset) in offsets.iter().enumerate().take(len) {
510+
view_offsets.push(*offset);
511+
sizes.push(offsets[i + 1] - offsets[i]);
512+
}
513+
514+
Self::new(
515+
field,
516+
ScalarBuffer::from(view_offsets),
517+
ScalarBuffer::from(sizes),
518+
values,
519+
nulls,
520+
)
521+
}
522+
}
523+
501524
impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
502525
fn from(array: GenericListViewArray<OffsetSize>) -> Self {
503526
let len = array.len();

arrow-cast/src/cast/list_view.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::cast::*;
19+
20+
/// Helper function to cast a list view to a list
21+
pub(crate) fn cast_list_view_to_list<O: OffsetSizeTrait>(
22+
array: &dyn Array,
23+
to: &FieldRef,
24+
cast_options: &CastOptions,
25+
) -> Result<ArrayRef, ArrowError> {
26+
let list_view = array.as_list_view::<O>();
27+
let list_view_offsets = list_view.offsets();
28+
let sizes = list_view.sizes();
29+
let source_values = list_view.values();
30+
31+
// Construct the indices and offsets for the new list array by iterating over the list view subarrays
32+
let mut indices = Vec::with_capacity(list_view.values().len());
33+
let mut offsets = Vec::with_capacity(list_view.len() + 1);
34+
// Add the offset for the first subarray
35+
offsets.push(O::usize_as(0));
36+
for i in 0..list_view.len() {
37+
// For each subarray, add the indices of the values to take
38+
let offset = list_view_offsets[i].as_usize();
39+
let size = sizes[i].as_usize();
40+
let end = offset + size;
41+
for j in offset..end {
42+
indices.push(j as i32);
43+
}
44+
// Add the offset for the next subarray
45+
offsets.push(O::usize_as(indices.len()));
46+
}
47+
48+
// Take the values from the source values using the indices, creating a new array
49+
let values = arrow_select::take::take(source_values, &Int32Array::from(indices), None)?;
50+
51+
// Cast the values to the target data type
52+
let values = cast_with_options(&values, to.data_type(), cast_options)?;
53+
54+
Ok(Arc::new(GenericListArray::<O>::try_new(
55+
to.clone(),
56+
OffsetBuffer::new(offsets.into()),
57+
values,
58+
list_view.nulls().cloned(),
59+
)?))
60+
}
61+
62+
pub(crate) fn cast_list_view<I: OffsetSizeTrait, O: OffsetSizeTrait>(
63+
array: &dyn Array,
64+
to_field: &FieldRef,
65+
cast_options: &CastOptions,
66+
) -> Result<ArrayRef, ArrowError> {
67+
let list_view = array.as_list_view::<I>();
68+
let (_field, offsets, sizes, values, nulls) = list_view.clone().into_parts();
69+
70+
// Recursively cast values
71+
let values = cast_with_options(&values, to_field.data_type(), cast_options)?;
72+
73+
let new_offsets: Vec<_> = offsets.iter().map(|x| O::usize_as(x.as_usize())).collect();
74+
let new_sizes: Vec<_> = sizes.iter().map(|x| O::usize_as(x.as_usize())).collect();
75+
Ok(Arc::new(GenericListViewArray::<O>::try_new(
76+
to_field.clone(),
77+
new_offsets.into(),
78+
new_sizes.into(),
79+
values,
80+
nulls,
81+
)?))
82+
}
83+
84+
pub(crate) fn cast_list_to_list_view<OffsetSize>(array: &dyn Array) -> Result<ArrayRef, ArrowError>
85+
where
86+
OffsetSize: OffsetSizeTrait,
87+
{
88+
let list = array.as_list::<OffsetSize>();
89+
let list_view: GenericListViewArray<OffsetSize> = list.clone().into();
90+
Ok(Arc::new(list_view))
91+
}

0 commit comments

Comments
 (0)