11// SPDX-License-Identifier: Apache-2.0
22// SPDX-FileCopyrightText: Copyright the Vortex contributors
33
4- use std:: any:: type_name;
54use std:: sync:: Arc ;
65
76use arrow_array:: ArrayRef as ArrowArrayRef ;
@@ -13,26 +12,23 @@ use vortex_buffer::BufferMut;
1312use vortex_error:: VortexExpect ;
1413use vortex_error:: VortexResult ;
1514use vortex_error:: vortex_ensure;
16- use vortex_error:: vortex_err;
1715
1816use crate :: Array ;
1917use crate :: ArrayRef ;
2018use crate :: Canonical ;
2119use crate :: ExecutionCtx ;
22- use crate :: IntoArray ;
2320use crate :: arrays:: ListArray ;
2421use crate :: arrays:: ListVTable ;
2522use crate :: arrays:: ListViewArray ;
2623use crate :: arrays:: ListViewArrayParts ;
24+ use crate :: arrays:: ListViewRebuildMode ;
2725use crate :: arrays:: ListViewVTable ;
28- use crate :: arrays:: PrimitiveArray ;
2926use crate :: arrow:: ArrowArrayExecutor ;
3027use crate :: arrow:: executor:: validity:: to_arrow_null_buffer;
3128use crate :: builtins:: ArrayBuiltins ;
3229use crate :: dtype:: DType ;
3330use crate :: dtype:: NativePType ;
3431use crate :: dtype:: Nullability ;
35- use crate :: validity:: Validity ;
3632use crate :: vtable:: ValidityHelper ;
3733
3834/// Convert a Vortex array into an Arrow GenericBinaryArray.
@@ -46,37 +42,30 @@ pub(super) fn to_arrow_list<O: OffsetSizeTrait + NativePType>(
4642 return list_to_list :: < O > ( array, elements_field, ctx) ;
4743 }
4844
49- // If the Vortex array is a ListViewArray, we check for our magic cheap conversion flag .
45+ // If the Vortex array is a ListViewArray, rebuild to ZCTL if needed and convert .
5046 let array = match array. try_into :: < ListViewVTable > ( ) {
5147 Ok ( array) => {
52- if array. is_zero_copy_to_list ( ) {
53- return list_view_zctl :: < O > ( array, elements_field , ctx ) ;
48+ let zctl = if array. is_zero_copy_to_list ( ) {
49+ array
5450 } else {
55- return list_view_to_list :: < O > ( array, elements_field, ctx) ;
56- }
51+ array. rebuild ( ListViewRebuildMode :: MakeZeroCopyToList ) ?
52+ } ;
53+ return list_view_zctl :: < O > ( zctl, elements_field, ctx) ;
5754 }
5855 Err ( a) => a,
5956 } ;
6057
61- // TODO(ngates): we should do the slightly more expensive thing which is to verify ZCTL.
62- // In other words, check that offsets + sizes are monotonically increasing.
63-
64- // Otherwise, we execute the array to become a ListViewArray .
58+ // Otherwise, we execute the array to become a ListViewArray, then rebuild to ZCTL.
59+ // Note: arrow_cast::cast supports ListView → List (apache/arrow-rs#8735), but it
60+ // unconditionally uses take. Our rebuild uses a heuristic that picks list-by-list
61+ // for large lists, which avoids materializing a large index buffer .
6562 let list_view = array. execute :: < ListViewArray > ( ctx) ?;
66- if list_view. is_zero_copy_to_list ( ) {
67- list_view_zctl :: < O > ( list_view, elements_field , ctx )
63+ let zctl = if list_view. is_zero_copy_to_list ( ) {
64+ list_view
6865 } else {
69- list_view_to_list :: < O > ( list_view, elements_field, ctx)
70- }
71-
72- // FIXME(ngates): we need this PR from arrow-rs:
73- // https://github.com/apache/arrow-rs/pull/8735
74- // let list_view = array.execute(session)?.into_arrow()?;
75- // match O::IS_LARGE {
76- // true => arrow_cast::cast(&list_view, &DataType::LargeList(elements_field.clone())),
77- // false => arrow_cast::cast(&list_view, &DataType::List(elements_field.clone())),
78- // }
79- // .map_err(VortexError::from)
66+ list_view. rebuild ( ListViewRebuildMode :: MakeZeroCopyToList ) ?
67+ } ;
68+ list_view_zctl :: < O > ( zctl, elements_field, ctx)
8069}
8170
8271/// Convert a Vortex VarBinArray into an Arrow GenericBinaryArray.
@@ -190,70 +179,6 @@ fn list_view_zctl<O: OffsetSizeTrait + NativePType>(
190179 ) ) )
191180}
192181
193- fn list_view_to_list < O : OffsetSizeTrait + NativePType > (
194- array : ListViewArray ,
195- elements_field : & FieldRef ,
196- ctx : & mut ExecutionCtx ,
197- ) -> VortexResult < ArrowArrayRef > {
198- let ListViewArrayParts {
199- elements,
200- offsets,
201- sizes,
202- validity,
203- ..
204- } = array. into_parts ( ) ;
205-
206- let offsets = offsets
207- . cast ( DType :: Primitive ( O :: PTYPE , Nullability :: NonNullable ) ) ?
208- . execute :: < Canonical > ( ctx) ?
209- . into_primitive ( )
210- . to_buffer :: < O > ( ) ;
211- let sizes = sizes
212- . cast ( DType :: Primitive ( O :: PTYPE , Nullability :: NonNullable ) ) ?
213- . execute :: < Canonical > ( ctx) ?
214- . into_primitive ( )
215- . to_buffer :: < O > ( ) ;
216-
217- // We create a new offsets buffer for the final list array.
218- // And we also create an `indices` buffer for taking the elements.
219- let mut new_offsets = BufferMut :: < O > :: with_capacity ( offsets. len ( ) + 1 ) ;
220- let mut take_indices = BufferMut :: < u32 > :: with_capacity ( elements. len ( ) ) ;
221-
222- // Add the offset for the first subarray
223- new_offsets. push ( O :: zero ( ) ) ;
224- for ( offset, size) in offsets. iter ( ) . zip ( sizes. iter ( ) ) {
225- let offset = offset. as_usize ( ) ;
226- let size = size. as_usize ( ) ;
227- let end = offset + size;
228- for j in offset..end {
229- take_indices. push ( u32:: try_from ( j) . map_err ( |_| {
230- vortex_err ! ( "List array too large for {} indices" , type_name:: <O >( ) )
231- } ) ?) ;
232- }
233- new_offsets. push ( O :: usize_as ( take_indices. len ( ) ) ) ;
234- }
235- assert_eq ! ( new_offsets. len( ) , offsets. len( ) + 1 ) ;
236-
237- // Now we can "take" the elements using the computed indices.
238- let elements =
239- elements. take ( PrimitiveArray :: new ( take_indices, Validity :: NonNullable ) . into_array ( ) ) ?;
240-
241- let elements = elements. execute_arrow ( Some ( elements_field. data_type ( ) ) , ctx) ?;
242- vortex_ensure ! (
243- elements_field. is_nullable( ) || elements. null_count( ) == 0 ,
244- "Cannot convert to non-nullable Arrow array with null elements"
245- ) ;
246-
247- let null_buffer = to_arrow_null_buffer ( validity, sizes. len ( ) , ctx) ?;
248-
249- Ok ( Arc :: new ( GenericListArray :: < O > :: new (
250- elements_field. clone ( ) ,
251- new_offsets. freeze ( ) . into_arrow_offset_buffer ( ) ,
252- elements,
253- null_buffer,
254- ) ) )
255- }
256-
257182#[ cfg( test) ]
258183mod tests {
259184 use std:: sync:: Arc ;
@@ -364,6 +289,45 @@ mod tests {
364289 Ok ( ( ) )
365290 }
366291
292+ #[ test]
293+ fn test_to_arrow_list_non_zctl ( ) -> VortexResult < ( ) > {
294+ // Overlapping lists are NOT zero-copy-to-list, so this exercises the rebuild path.
295+ // Elements: [1, 2, 3, 4], List 0: [1,2,3], List 1: [2,3,4] (overlap at indices 1-2)
296+ let elements = PrimitiveArray :: new ( buffer ! [ 1i32 , 2 , 3 , 4 ] , Validity :: NonNullable ) ;
297+ let offsets = PrimitiveArray :: new ( buffer ! [ 0i32 , 1 ] , Validity :: NonNullable ) ;
298+ let sizes = PrimitiveArray :: new ( buffer ! [ 3i32 , 3 ] , Validity :: NonNullable ) ;
299+
300+ let list_array = ListViewArray :: new (
301+ elements. into_array ( ) ,
302+ offsets. into_array ( ) ,
303+ sizes. into_array ( ) ,
304+ Validity :: NonNullable ,
305+ ) ;
306+ assert ! ( !list_array. is_zero_copy_to_list( ) ) ;
307+
308+ let field = Field :: new ( "item" , DataType :: Int32 , false ) ;
309+ let arrow_dt = DataType :: List ( field. into ( ) ) ;
310+ let arrow_array = list_array. into_array ( ) . into_arrow ( & arrow_dt) ?;
311+
312+ let list = arrow_array
313+ . as_any ( )
314+ . downcast_ref :: < GenericListArray < i32 > > ( )
315+ . unwrap ( ) ;
316+
317+ assert_eq ! ( list. len( ) , 2 ) ;
318+
319+ let first = list. value ( 0 ) ;
320+ assert_eq ! ( first. len( ) , 3 ) ;
321+ let first_vals = first. as_any ( ) . downcast_ref :: < Int32Array > ( ) . unwrap ( ) ;
322+ assert_eq ! ( first_vals. values( ) , & [ 1 , 2 , 3 ] ) ;
323+
324+ let second = list. value ( 1 ) ;
325+ assert_eq ! ( second. len( ) , 3 ) ;
326+ let second_vals = second. as_any ( ) . downcast_ref :: < Int32Array > ( ) . unwrap ( ) ;
327+ assert_eq ! ( second_vals. values( ) , & [ 2 , 3 , 4 ] ) ;
328+ Ok ( ( ) )
329+ }
330+
367331 #[ test]
368332 fn test_to_arrow_list_empty_zctl ( ) -> VortexResult < ( ) > {
369333 let dtype = DType :: List (
0 commit comments