Skip to content

Commit cdceb7e

Browse files
committed
[Parquet] perf: Create StructArrays directly rather than use ArrayData
1 parent 96637fc commit cdceb7e

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

parquet/src/arrow/array_reader/struct_array.rs

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
use crate::arrow::array_reader::ArrayReader;
1919
use crate::errors::{ParquetError, Result};
2020
use arrow_array::{Array, ArrayRef, StructArray, builder::BooleanBufferBuilder};
21-
use arrow_data::{ArrayData, ArrayDataBuilder};
22-
use arrow_schema::DataType as ArrowType;
21+
use arrow_schema::{DataType as ArrowType, DataType};
2322
use std::any::Any;
2423
use std::sync::Arc;
24+
use arrow_buffer::NullBuffer;
2525

2626
/// Implementation of struct array reader.
2727
pub struct StructArrayReader {
@@ -124,16 +124,13 @@ impl ArrayReader for StructArrayReader {
124124
return Err(general_err!("Not all children array length are the same!"));
125125
}
126126

127-
// Now we can build array data
128-
let mut array_data_builder = ArrayDataBuilder::new(self.data_type.clone())
129-
.len(children_array_len)
130-
.child_data(
131-
children_array
132-
.into_iter()
133-
.map(|x| x.into_data())
134-
.collect::<Vec<ArrayData>>(),
135-
);
127+
// Now we can build the struct array
136128

129+
let DataType::Struct(fields) = &self.data_type else {
130+
return Err(general_err!("Internal error: StructArrayReader must have struct data type"));
131+
};
132+
133+
let mut nulls = None;
137134
if self.nullable {
138135
// calculate struct def level data
139136

@@ -169,11 +166,18 @@ impl ArrayReader for StructArrayReader {
169166
return Err(general_err!("Failed to decode level data for struct array"));
170167
}
171168

172-
array_data_builder = array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
169+
nulls = Some(NullBuffer::new(bitmap_builder.finish()));
170+
}
171+
// Safety: all children array data have same length and correct type
172+
unsafe {
173+
Ok(Arc::new(StructArray::new_unchecked_with_length(
174+
fields.clone(), // cloning Fields is cheap (an Arc internally)
175+
children_array,
176+
nulls,
177+
children_array_len
178+
)))
173179
}
174180

175-
let array_data = unsafe { array_data_builder.build_unchecked() };
176-
Ok(Arc::new(StructArray::from(array_data)))
177181
}
178182

179183
fn skip_records(&mut self, num_records: usize) -> Result<usize> {

0 commit comments

Comments
 (0)