@@ -58,6 +58,8 @@ pub struct GenericRecordReader<V, CV> {
5858 num_values : usize ,
5959 /// Number of buffered records
6060 num_records : usize ,
61+ /// Capacity hint for pre-allocating buffers based on batch size
62+ capacity_hint : usize ,
6163}
6264
6365impl < V , CV > GenericRecordReader < V , CV >
@@ -67,19 +69,23 @@ where
6769{
6870 /// Create a new [`GenericRecordReader`]
6971 pub fn new ( desc : ColumnDescPtr ) -> Self {
72+ // Start with a reasonable default capacity to avoid zero reallocations on first batch
73+ const DEFAULT_CAPACITY : usize = 1024 ;
74+
7075 let def_levels = ( desc. max_def_level ( ) > 0 )
7176 . then ( || DefinitionLevelBuffer :: new ( & desc, packed_null_mask ( & desc) ) ) ;
7277
7378 let rep_levels = ( desc. max_rep_level ( ) > 0 ) . then ( Vec :: new) ;
7479
7580 Self {
76- values : V :: default ( ) ,
81+ values : V :: with_capacity ( DEFAULT_CAPACITY ) ,
7782 def_levels,
7883 rep_levels,
7984 column_reader : None ,
8085 column_desc : desc,
8186 num_values : 0 ,
8287 num_records : 0 ,
88+ capacity_hint : DEFAULT_CAPACITY ,
8389 }
8490 }
8591
@@ -169,7 +175,9 @@ where
169175 /// Returns currently stored buffer data.
170176 /// The side effect is similar to `consume_def_levels`.
171177 pub fn consume_record_data ( & mut self ) -> V {
172- std:: mem:: take ( & mut self . values )
178+ // Replace the buffer with a new one that has the same capacity
179+ // This avoids reallocations on subsequent batches
180+ std:: mem:: replace ( & mut self . values , V :: with_capacity ( self . capacity_hint ) )
173181 }
174182
175183 /// Returns currently stored null bitmap data for nullable columns.
@@ -208,6 +216,11 @@ where
208216
209217 /// Try to read one batch of data returning the number of records read
210218 fn read_one_batch ( & mut self , batch_size : usize ) -> Result < usize > {
219+ // Update capacity hint to the largest batch size seen
220+ if batch_size > self . capacity_hint {
221+ self . capacity_hint = batch_size;
222+ }
223+
211224 let ( records_read, values_read, levels_read) =
212225 self . column_reader . as_mut ( ) . unwrap ( ) . read_records (
213226 batch_size,
0 commit comments