@@ -40,14 +40,17 @@ use crate::util::bit_util::FromBytes;
4040/// A macro to reduce verbosity of [`make_byte_array_dictionary_reader`]
4141macro_rules! make_reader {
4242 (
43- ( $pages: expr, $column_desc: expr, $data_type: expr) => match ( $k: expr, $v: expr) {
43+ ( $pages: expr, $column_desc: expr, $data_type: expr, $batch_size : expr ) => match ( $k: expr, $v: expr) {
4444 $( ( $key_arrow: pat, $value_arrow: pat) => ( $key_type: ty, $value_type: ty) , ) +
4545 }
4646 ) => {
4747 match ( ( $k, $v) ) {
4848 $(
4949 ( $key_arrow, $value_arrow) => {
50- let reader = GenericRecordReader :: new( $column_desc) ;
50+ let reader = match $batch_size {
51+ Some ( capacity) => GenericRecordReader :: new_with_capacity( $column_desc, capacity) ,
52+ None => GenericRecordReader :: new( $column_desc) ,
53+ } ;
5154 Ok ( Box :: new( ByteArrayDictionaryReader :: <$key_type, $value_type>:: new(
5255 $pages, $data_type, reader,
5356 ) ) )
@@ -73,10 +76,13 @@ macro_rules! make_reader {
7376/// It is therefore recommended that if `pages` contains data from multiple column chunks,
7477/// that the read batch size used is a divisor of the row group size
7578///
79+ /// The optional `batch_size` parameter is used to pre-allocate internal buffers,
80+ /// avoiding reallocations when reading the first batch of data.
7681pub fn make_byte_array_dictionary_reader (
7782 pages : Box < dyn PageIterator > ,
7883 column_desc : ColumnDescPtr ,
7984 arrow_type : Option < ArrowType > ,
85+ batch_size : Option < usize > ,
8086) -> Result < Box < dyn ArrayReader > > {
8187 // Check if Arrow type is specified, else create it from Parquet type
8288 let data_type = match arrow_type {
@@ -89,7 +95,7 @@ pub fn make_byte_array_dictionary_reader(
8995 match & data_type {
9096 ArrowType :: Dictionary ( key_type, value_type) => {
9197 make_reader ! {
92- ( pages, column_desc, data_type) => match ( key_type. as_ref( ) , value_type. as_ref( ) ) {
98+ ( pages, column_desc, data_type, batch_size ) => match ( key_type. as_ref( ) , value_type. as_ref( ) ) {
9399 ( ArrowType :: UInt8 , ArrowType :: Binary | ArrowType :: Utf8 | ArrowType :: FixedSizeBinary ( _) ) => ( u8 , i32 ) ,
94100 ( ArrowType :: UInt8 , ArrowType :: LargeBinary | ArrowType :: LargeUtf8 ) => ( u8 , i64 ) ,
95101 ( ArrowType :: Int8 , ArrowType :: Binary | ArrowType :: Utf8 | ArrowType :: FixedSizeBinary ( _) ) => ( i8 , i32 ) ,
0 commit comments