@@ -164,7 +164,7 @@ use std::hash::{Hash, Hasher};
164164use std:: sync:: Arc ;
165165
166166use arrow_array:: cast:: * ;
167- use arrow_array:: types:: ArrowDictionaryKeyType ;
167+ use arrow_array:: types:: { ArrowDictionaryKeyType , ByteArrayType } ;
168168use arrow_array:: * ;
169169use arrow_buffer:: { ArrowNativeType , Buffer , OffsetBuffer , ScalarBuffer } ;
170170use arrow_data:: { ArrayData , ArrayDataBuilder } ;
@@ -1498,31 +1498,15 @@ fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> LengthTracker {
14981498 array => tracker. push_fixed( fixed:: encoded_len( array) ) ,
14991499 DataType :: Null => { } ,
15001500 DataType :: Boolean => tracker. push_fixed( bool :: ENCODED_LEN ) ,
1501- DataType :: Binary => tracker. push_variable(
1502- as_generic_binary_array:: <i32 >( array)
1503- . iter( )
1504- . map( |slice| variable:: encoded_len( slice) )
1505- ) ,
1506- DataType :: LargeBinary => tracker. push_variable(
1507- as_generic_binary_array:: <i64 >( array)
1508- . iter( )
1509- . map( |slice| variable:: encoded_len( slice) )
1510- ) ,
1501+ DataType :: Binary => push_generic_byte_array_lengths( & mut tracker, as_generic_binary_array:: <i32 >( array) ) ,
1502+ DataType :: LargeBinary => push_generic_byte_array_lengths( & mut tracker, as_generic_binary_array:: <i64 >( array) ) ,
15111503 DataType :: BinaryView => tracker. push_variable(
15121504 array. as_binary_view( )
15131505 . iter( )
15141506 . map( |slice| variable:: encoded_len( slice) )
15151507 ) ,
1516- DataType :: Utf8 => tracker. push_variable(
1517- array. as_string:: <i32 >( )
1518- . iter( )
1519- . map( |slice| variable:: encoded_len( slice. map( |x| x. as_bytes( ) ) ) )
1520- ) ,
1521- DataType :: LargeUtf8 => tracker. push_variable(
1522- array. as_string:: <i64 >( )
1523- . iter( )
1524- . map( |slice| variable:: encoded_len( slice. map( |x| x. as_bytes( ) ) ) )
1525- ) ,
1508+ DataType :: Utf8 => push_generic_byte_array_lengths( & mut tracker, array. as_string:: <i32 >( ) ) ,
1509+ DataType :: LargeUtf8 => push_generic_byte_array_lengths( & mut tracker, array. as_string:: <i64 >( ) ) ,
15261510 DataType :: Utf8View => tracker. push_variable(
15271511 array. as_string_view( )
15281512 . iter( )
@@ -1617,6 +1601,31 @@ fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> LengthTracker {
16171601 tracker
16181602}
16191603
1604+ /// Add to [`LengthTracker`] the encoded length of each item in the [`GenericByteArray`]
1605+ fn push_generic_byte_array_lengths < T : ByteArrayType > (
1606+ tracker : & mut LengthTracker ,
1607+ array : & GenericByteArray < T > ,
1608+ ) {
1609+ if let Some ( nulls) = array. nulls ( ) . filter ( |n| n. null_count ( ) > 0 ) {
1610+ tracker. push_variable (
1611+ array
1612+ . offsets ( )
1613+ . lengths ( )
1614+ . zip ( nulls. iter ( ) )
1615+ . map ( |( length, is_valid) | if is_valid { Some ( length) } else { None } )
1616+ . map ( variable:: padded_length) ,
1617+ )
1618+ } else {
1619+ tracker. push_variable (
1620+ array
1621+ . offsets ( )
1622+ . lengths ( )
1623+ . map ( Some )
1624+ . map ( variable:: padded_length) ,
1625+ )
1626+ }
1627+ }
1628+
16201629/// Encodes a column to the provided [`Rows`] incrementing the offsets as it progresses
16211630fn encode_column (
16221631 data : & mut [ u8 ] ,
0 commit comments