@@ -1711,16 +1711,20 @@ fn get_list_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, Arra
17111711/// the array's offset and length. This helps reduce the encoded size of sliced
17121712/// arrays
17131713///
1714- fn get_view_buffer ( data : & ArrayData ) -> Buffer {
1715- const VIEW_SIZE : usize = 16 ;
1716-
1717- if data. is_empty ( ) {
1718- return MutableBuffer :: new ( 0 ) . into ( ) ;
1714+ fn get_or_truncate_buffer ( array_data : & ArrayData ) -> Buffer {
1715+ let buffer = & array_data. buffers ( ) [ 0 ] ;
1716+ let layout = layout ( array_data. data_type ( ) ) ;
1717+ let spec = & layout. buffers [ 0 ] ;
1718+
1719+ let byte_width = get_buffer_element_width ( spec) ;
1720+ let min_length = array_data. len ( ) * byte_width;
1721+ if buffer_need_truncate ( array_data. offset ( ) , buffer, spec, min_length) {
1722+ let byte_offset = array_data. offset ( ) * byte_width;
1723+ let buffer_length = min ( min_length, buffer. len ( ) - byte_offset) ;
1724+ buffer. slice_with_length ( byte_offset, buffer_length)
1725+ } else {
1726+ buffer. clone ( )
17191727 }
1720-
1721- let views_start = data. offset ( ) * VIEW_SIZE ;
1722- let views_len = data. len ( ) * VIEW_SIZE ;
1723- data. buffers ( ) [ 0 ] . slice_with_length ( views_start, views_len)
17241728}
17251729
17261730/// Write array data to a vector of bytes
@@ -1790,7 +1794,7 @@ fn write_array_data(
17901794 // Current implementation just serialize the raw arrays as given and not try to optimize anything.
17911795 // If users wants to "compact" the arrays prior to sending them over IPC,
17921796 // they should consider the gc API suggested in #5513
1793- let views = get_view_buffer ( array_data) ;
1797+ let views = get_or_truncate_buffer ( array_data) ;
17941798 offset = write_buffer (
17951799 views. as_slice ( ) ,
17961800 buffers,
@@ -1835,21 +1839,9 @@ fn write_array_data(
18351839 // Truncate values
18361840 assert_eq ! ( array_data. buffers( ) . len( ) , 1 ) ;
18371841
1838- let buffer = & array_data. buffers ( ) [ 0 ] ;
1839- let layout = layout ( data_type) ;
1840- let spec = & layout. buffers [ 0 ] ;
1841-
1842- let byte_width = get_buffer_element_width ( spec) ;
1843- let min_length = array_data. len ( ) * byte_width;
1844- let buffer_slice = if buffer_need_truncate ( array_data. offset ( ) , buffer, spec, min_length) {
1845- let byte_offset = array_data. offset ( ) * byte_width;
1846- let buffer_length = min ( min_length, buffer. len ( ) - byte_offset) ;
1847- & buffer. as_slice ( ) [ byte_offset..( byte_offset + buffer_length) ]
1848- } else {
1849- buffer. as_slice ( )
1850- } ;
1842+ let buffer = get_or_truncate_buffer ( array_data) ;
18511843 offset = write_buffer (
1852- buffer_slice ,
1844+ buffer . as_slice ( ) ,
18531845 buffers,
18541846 arrow_data,
18551847 offset,
@@ -3146,11 +3138,7 @@ mod tests {
31463138
31473139 let values = Arc :: new ( generate_list_data :: < i64 > ( ) ) ;
31483140
3149- let in_batch = RecordBatch :: try_new ( schema, vec ! [ values] )
3150- . unwrap ( )
3151- . slice ( 999 , 1 ) ;
3152- let out_batch = deserialize_file ( serialize_file ( & in_batch) ) ;
3153- assert_eq ! ( in_batch, out_batch) ;
3141+ check_sliced_list_array ( schema, values) ;
31543142 }
31553143
31563144 #[ test]
@@ -3161,11 +3149,7 @@ mod tests {
31613149
31623150 let values = Arc :: new ( generate_string_list_data :: < i64 > ( ) ) ;
31633151
3164- let in_batch = RecordBatch :: try_new ( schema, vec ! [ values] )
3165- . unwrap ( )
3166- . slice ( 999 , 1 ) ;
3167- let out_batch = deserialize_file ( serialize_file ( & in_batch) ) ;
3168- assert_eq ! ( in_batch, out_batch) ;
3152+ check_sliced_list_array ( schema, values) ;
31693153 }
31703154
31713155 #[ test]
@@ -3176,12 +3160,17 @@ mod tests {
31763160
31773161 let values = Arc :: new ( generate_utf8view_list_data :: < i64 > ( ) ) ;
31783162
3179- let in_batch = RecordBatch :: try_new ( schema, vec ! [ values] )
3180- . unwrap ( )
3181- . slice ( 999 , 1 ) ;
3182- dbg ! ( & in_batch) ;
3183- let out_batch = deserialize_file ( serialize_file ( & in_batch) ) ;
3184- assert_eq ! ( in_batch, out_batch) ;
3163+ check_sliced_list_array ( schema, values) ;
3164+ }
3165+
3166+ fn check_sliced_list_array ( schema : Arc < Schema > , values : Arc < GenericListArray < i64 > > ) {
3167+ for ( offset, len) in [ ( 999 , 1 ) , ( 0 , 13 ) , ( 47 , 12 ) , ( values. len ( ) - 13 , 13 ) ] {
3168+ let in_batch = RecordBatch :: try_new ( schema. clone ( ) , vec ! [ values. clone( ) ] )
3169+ . unwrap ( )
3170+ . slice ( offset, len) ;
3171+ let out_batch = deserialize_file ( serialize_file ( & in_batch) ) ;
3172+ assert_eq ! ( in_batch, out_batch) ;
3173+ }
31853174 }
31863175
31873176 #[ test]
0 commit comments