Skip to content

Commit 2ea4097

Browse files
authored
Avoid a clone when creating UnionArray from ArrayData (#9188)
# Which issue does this PR close? - Part of #9061 - broken out of #9058 # Rationale for this change Let's make arrow-rs the fastest we can and the fewer allocations the better # What changes are included in this PR? Apply pattern from #9114 # Are these changes tested? Existing tests # Are there any user-facing changes? No
1 parent ac640da commit 2ea4097

File tree

1 file changed

+22
-18
lines changed

1 file changed

+22
-18
lines changed

arrow-array/src/array/union_array.rs

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
use crate::{Array, ArrayRef, make_array};
2020
use arrow_buffer::bit_chunk_iterator::{BitChunkIterator, BitChunks};
2121
use arrow_buffer::buffer::NullBuffer;
22-
use arrow_buffer::{BooleanBuffer, MutableBuffer, ScalarBuffer};
22+
use arrow_buffer::{BooleanBuffer, Buffer, MutableBuffer, ScalarBuffer};
2323
use arrow_data::{ArrayData, ArrayDataBuilder};
2424
use arrow_schema::{ArrowError, DataType, UnionFields, UnionMode};
2525
/// Contains the `UnionArray` type.
@@ -680,32 +680,36 @@ impl UnionArray {
680680

681681
impl From<ArrayData> for UnionArray {
682682
fn from(data: ArrayData) -> Self {
683-
let (fields, mode) = match data.data_type() {
684-
DataType::Union(fields, mode) => (fields, *mode),
683+
let (data_type, len, _nulls, offset, buffers, child_data) = data.into_parts();
684+
685+
let (fields, mode) = match &data_type {
686+
DataType::Union(fields, mode) => (fields, mode),
685687
d => panic!("UnionArray expected ArrayData with type Union got {d}"),
686688
};
689+
687690
let (type_ids, offsets) = match mode {
688-
UnionMode::Sparse => (
689-
ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()),
690-
None,
691-
),
692-
UnionMode::Dense => (
693-
ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()),
694-
Some(ScalarBuffer::new(
695-
data.buffers()[1].clone(),
696-
data.offset(),
697-
data.len(),
698-
)),
699-
),
691+
UnionMode::Sparse => {
692+
let [buffer]: [Buffer; 1] = buffers.try_into().expect("1 buffer for type_ids");
693+
(ScalarBuffer::new(buffer, offset, len), None)
694+
}
695+
UnionMode::Dense => {
696+
let [type_ids_buffer, offsets_buffer]: [Buffer; 2] = buffers
697+
.try_into()
698+
.expect("2 buffers for type_ids and offsets");
699+
(
700+
ScalarBuffer::new(type_ids_buffer, offset, len),
701+
Some(ScalarBuffer::new(offsets_buffer, offset, len)),
702+
)
703+
}
700704
};
701705

702706
let max_id = fields.iter().map(|(i, _)| i).max().unwrap_or_default() as usize;
703707
let mut boxed_fields = vec![None; max_id + 1];
704-
for (cd, (field_id, _)) in data.child_data().iter().zip(fields.iter()) {
705-
boxed_fields[field_id as usize] = Some(make_array(cd.clone()));
708+
for (cd, (field_id, _)) in child_data.into_iter().zip(fields.iter()) {
709+
boxed_fields[field_id as usize] = Some(make_array(cd));
706710
}
707711
Self {
708-
data_type: data.data_type().clone(),
712+
data_type,
709713
type_ids,
710714
offsets,
711715
fields: boxed_fields,

0 commit comments

Comments
 (0)