@@ -1705,6 +1705,36 @@ fn get_list_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, Arra
17051705 ( offsets, child_data)
17061706}
17071707
1708+ /// Returns the offsets, sizes, and child data buffers for a ListView array.
1709+ ///
1710+ /// Unlike List arrays, ListView arrays store both offsets and sizes explicitly,
1711+ /// and offsets can be non-monotonic. When slicing, we simply pass through the
1712+ /// offsets and sizes without re-encoding, and do not slice the child data.
1713+ fn get_list_view_array_buffers < O : OffsetSizeTrait > (
1714+ data : & ArrayData ,
1715+ ) -> ( Buffer , Buffer , ArrayData ) {
1716+ if data. is_empty ( ) {
1717+ return (
1718+ MutableBuffer :: new ( 0 ) . into ( ) ,
1719+ MutableBuffer :: new ( 0 ) . into ( ) ,
1720+ data. child_data ( ) [ 0 ] . slice ( 0 , 0 ) ,
1721+ ) ;
1722+ }
1723+
1724+ let offsets = & data. buffers ( ) [ 0 ] ;
1725+ let sizes = & data. buffers ( ) [ 1 ] ;
1726+
1727+ let element_size = std:: mem:: size_of :: < O > ( ) ;
1728+ let offsets_slice =
1729+ offsets. slice_with_length ( data. offset ( ) * element_size, data. len ( ) * element_size) ;
1730+ let sizes_slice =
1731+ sizes. slice_with_length ( data. offset ( ) * element_size, data. len ( ) * element_size) ;
1732+
1733+ let child_data = data. child_data ( ) [ 0 ] . clone ( ) ;
1734+
1735+ ( offsets_slice, sizes_slice, child_data)
1736+ }
1737+
17081738/// Returns the sliced views [`Buffer`] for a BinaryView/Utf8View array.
17091739///
17101740/// The views buffer is sliced to only include views in the valid range based on
@@ -1901,6 +1931,52 @@ fn write_array_data(
19011931 write_options,
19021932 ) ?;
19031933 return Ok ( offset) ;
1934+ } else if matches ! (
1935+ data_type,
1936+ DataType :: ListView ( _) | DataType :: LargeListView ( _)
1937+ ) {
1938+ assert_eq ! ( array_data. buffers( ) . len( ) , 2 ) ; // offsets + sizes
1939+ assert_eq ! ( array_data. child_data( ) . len( ) , 1 ) ;
1940+
1941+ let ( offsets, sizes, child_data) = match data_type {
1942+ DataType :: ListView ( _) => get_list_view_array_buffers :: < i32 > ( array_data) ,
1943+ DataType :: LargeListView ( _) => get_list_view_array_buffers :: < i64 > ( array_data) ,
1944+ _ => unreachable ! ( ) ,
1945+ } ;
1946+
1947+ offset = write_buffer (
1948+ offsets. as_slice ( ) ,
1949+ buffers,
1950+ arrow_data,
1951+ offset,
1952+ compression_codec,
1953+ compression_context,
1954+ write_options. alignment ,
1955+ ) ?;
1956+
1957+ offset = write_buffer (
1958+ sizes. as_slice ( ) ,
1959+ buffers,
1960+ arrow_data,
1961+ offset,
1962+ compression_codec,
1963+ compression_context,
1964+ write_options. alignment ,
1965+ ) ?;
1966+
1967+ offset = write_array_data (
1968+ & child_data,
1969+ buffers,
1970+ arrow_data,
1971+ nodes,
1972+ offset,
1973+ child_data. len ( ) ,
1974+ child_data. null_count ( ) ,
1975+ compression_codec,
1976+ compression_context,
1977+ write_options,
1978+ ) ?;
1979+ return Ok ( offset) ;
19041980 } else if let DataType :: FixedSizeList ( _, fixed_size) = data_type {
19051981 assert_eq ! ( array_data. child_data( ) . len( ) , 1 ) ;
19061982 let fixed_size = * fixed_size as usize ;
@@ -2043,7 +2119,9 @@ mod tests {
20432119 use arrow_array:: builder:: MapBuilder ;
20442120 use arrow_array:: builder:: StringViewBuilder ;
20452121 use arrow_array:: builder:: UnionBuilder ;
2046- use arrow_array:: builder:: { GenericListBuilder , ListBuilder , StringBuilder } ;
2122+ use arrow_array:: builder:: {
2123+ GenericListBuilder , GenericListViewBuilder , ListBuilder , StringBuilder ,
2124+ } ;
20472125 use arrow_array:: builder:: { PrimitiveRunBuilder , UInt32Builder } ;
20482126 use arrow_array:: types:: * ;
20492127 use arrow_buffer:: ScalarBuffer ;
@@ -3212,6 +3290,118 @@ mod tests {
32123290 roundtrip_ensure_sliced_smaller ( in_batch, 1000 ) ;
32133291 }
32143292
3293+ fn generate_list_view_data < O : OffsetSizeTrait > ( ) -> GenericListViewArray < O > {
3294+ let mut builder = GenericListViewBuilder :: < O , _ > :: new ( UInt32Builder :: new ( ) ) ;
3295+
3296+ for i in 0u32 ..100_000 {
3297+ if i. is_multiple_of ( 10_000 ) {
3298+ builder. append ( false ) ;
3299+ continue ;
3300+ }
3301+ for value in [ i, i, i] {
3302+ builder. values ( ) . append_value ( value) ;
3303+ }
3304+ builder. append ( true ) ;
3305+ }
3306+
3307+ builder. finish ( )
3308+ }
3309+
3310+ #[ test]
3311+ fn encode_list_view_arrays ( ) {
3312+ let val_inner = Field :: new_list_field ( DataType :: UInt32 , true ) ;
3313+ let val_field = Field :: new ( "val" , DataType :: ListView ( Arc :: new ( val_inner) ) , true ) ;
3314+ let schema = Arc :: new ( Schema :: new ( vec ! [ val_field] ) ) ;
3315+
3316+ let values = Arc :: new ( generate_list_view_data :: < i32 > ( ) ) ;
3317+
3318+ let in_batch = RecordBatch :: try_new ( schema, vec ! [ values] ) . unwrap ( ) ;
3319+ let out_batch = deserialize_file ( serialize_file ( & in_batch) ) ;
3320+ assert_eq ! ( in_batch, out_batch) ;
3321+ }
3322+
3323+ #[ test]
3324+ fn encode_large_list_view_arrays ( ) {
3325+ let val_inner = Field :: new_list_field ( DataType :: UInt32 , true ) ;
3326+ let val_field = Field :: new ( "val" , DataType :: LargeListView ( Arc :: new ( val_inner) ) , true ) ;
3327+ let schema = Arc :: new ( Schema :: new ( vec ! [ val_field] ) ) ;
3328+
3329+ let values = Arc :: new ( generate_list_view_data :: < i64 > ( ) ) ;
3330+
3331+ let in_batch = RecordBatch :: try_new ( schema, vec ! [ values] ) . unwrap ( ) ;
3332+ let out_batch = deserialize_file ( serialize_file ( & in_batch) ) ;
3333+ assert_eq ! ( in_batch, out_batch) ;
3334+ }
3335+
3336+ #[ test]
3337+ fn check_sliced_list_view_array ( ) {
3338+ let inner = Field :: new_list_field ( DataType :: UInt32 , true ) ;
3339+ let field = Field :: new ( "val" , DataType :: ListView ( Arc :: new ( inner) ) , true ) ;
3340+ let schema = Arc :: new ( Schema :: new ( vec ! [ field] ) ) ;
3341+ let values = Arc :: new ( generate_list_view_data :: < i32 > ( ) ) ;
3342+
3343+ for ( offset, len) in [ ( 999 , 1 ) , ( 0 , 13 ) , ( 47 , 12 ) , ( values. len ( ) - 13 , 13 ) ] {
3344+ let in_batch = RecordBatch :: try_new ( schema. clone ( ) , vec ! [ values. clone( ) ] )
3345+ . unwrap ( )
3346+ . slice ( offset, len) ;
3347+ let out_batch = deserialize_file ( serialize_file ( & in_batch) ) ;
3348+ assert_eq ! ( in_batch, out_batch) ;
3349+ }
3350+ }
3351+
3352+ #[ test]
3353+ fn check_sliced_large_list_view_array ( ) {
3354+ let inner = Field :: new_list_field ( DataType :: UInt32 , true ) ;
3355+ let field = Field :: new ( "val" , DataType :: LargeListView ( Arc :: new ( inner) ) , true ) ;
3356+ let schema = Arc :: new ( Schema :: new ( vec ! [ field] ) ) ;
3357+ let values = Arc :: new ( generate_list_view_data :: < i64 > ( ) ) ;
3358+
3359+ for ( offset, len) in [ ( 999 , 1 ) , ( 0 , 13 ) , ( 47 , 12 ) , ( values. len ( ) - 13 , 13 ) ] {
3360+ let in_batch = RecordBatch :: try_new ( schema. clone ( ) , vec ! [ values. clone( ) ] )
3361+ . unwrap ( )
3362+ . slice ( offset, len) ;
3363+ let out_batch = deserialize_file ( serialize_file ( & in_batch) ) ;
3364+ assert_eq ! ( in_batch, out_batch) ;
3365+ }
3366+ }
3367+
3368+ fn generate_nested_list_view_data < O : OffsetSizeTrait > ( ) -> GenericListViewArray < O > {
3369+ let inner_builder = UInt32Builder :: new ( ) ;
3370+ let middle_builder = GenericListViewBuilder :: < O , _ > :: new ( inner_builder) ;
3371+ let mut outer_builder = GenericListViewBuilder :: < O , _ > :: new ( middle_builder) ;
3372+
3373+ for i in 0u32 ..10_000 {
3374+ if i. is_multiple_of ( 1_000 ) {
3375+ outer_builder. append ( false ) ;
3376+ continue ;
3377+ }
3378+
3379+ for _ in 0 ..3 {
3380+ for value in [ i, i + 1 , i + 2 ] {
3381+ outer_builder. values ( ) . values ( ) . append_value ( value) ;
3382+ }
3383+ outer_builder. values ( ) . append ( true ) ;
3384+ }
3385+ outer_builder. append ( true ) ;
3386+ }
3387+
3388+ outer_builder. finish ( )
3389+ }
3390+
3391+ #[ test]
3392+ fn encode_nested_list_views ( ) {
3393+ let inner_int = Arc :: new ( Field :: new_list_field ( DataType :: UInt32 , true ) ) ;
3394+ let inner_list_field = Arc :: new ( Field :: new_list_field ( DataType :: ListView ( inner_int) , true ) ) ;
3395+ let list_field = Field :: new ( "val" , DataType :: ListView ( inner_list_field) , true ) ;
3396+ let schema = Arc :: new ( Schema :: new ( vec ! [ list_field] ) ) ;
3397+
3398+ let values = Arc :: new ( generate_nested_list_view_data :: < i32 > ( ) ) ;
3399+
3400+ let in_batch = RecordBatch :: try_new ( schema, vec ! [ values] ) . unwrap ( ) ;
3401+ let out_batch = deserialize_file ( serialize_file ( & in_batch) ) ;
3402+ assert_eq ! ( in_batch, out_batch) ;
3403+ }
3404+
32153405 #[ test]
32163406 fn test_decimal128_alignment16_is_sufficient ( ) {
32173407 const IPC_ALIGNMENT : usize = 16 ;
0 commit comments