Skip to content

Commit 100c421

Browse files
authored
Merge branch 'main' into issue-9645-ree-optimization
2 parents 573c6f8 + 711fac8 commit 100c421

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+3251
-506
lines changed

.github/workflows/take.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
2929
runs-on: ubuntu-latest
3030
steps:
31-
- uses: actions/github-script@v8
31+
- uses: actions/github-script@v9
3232
with:
3333
script: |
3434
github.rest.issues.addAssignees({

arrow-array/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ num-complex = { version = "0.4.6", default-features = false, features = ["std"]
4949
num-integer = { version = "0.1.46", default-features = false, features = ["std"] }
5050
num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
5151
half = { version = "2.1", default-features = false, features = ["num-traits"] }
52-
hashbrown = { version = "0.16.0", default-features = false }
52+
hashbrown = { version = "0.17.0", default-features = false }
5353

5454
[package.metadata.docs.rs]
5555
all-features = true

arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ where
201201
fn finish_cloned(&self) -> ArrayRef {
202202
Arc::new(self.finish_cloned())
203203
}
204+
205+
fn finish_preserve_values(&mut self) -> ArrayRef {
206+
Arc::new(self.finish_preserve_values())
207+
}
204208
}
205209

206210
impl<K> FixedSizeBinaryDictionaryBuilder<K>

arrow-array/src/builder/fixed_size_list_builder.rs

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ where
140140
fn finish_cloned(&self) -> ArrayRef {
141141
Arc::new(self.finish_cloned())
142142
}
143+
144+
fn finish_preserve_values(&mut self) -> ArrayRef {
145+
Arc::new(self.finish_preserve_values())
146+
}
143147
}
144148

145149
impl<T: ArrayBuilder> FixedSizeListBuilder<T>
@@ -211,6 +215,28 @@ where
211215
FixedSizeListArray::new(field, self.list_len, values, nulls)
212216
}
213217

218+
fn finish_preserve_values(&mut self) -> FixedSizeListArray {
219+
let len = self.len();
220+
let values = self.values_builder.finish_preserve_values();
221+
let nulls = self.null_buffer_builder.finish();
222+
223+
assert_eq!(
224+
values.len(),
225+
len * self.list_len as usize,
226+
"Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
227+
values.len(),
228+
self.list_len,
229+
len,
230+
);
231+
232+
let field = self
233+
.field
234+
.clone()
235+
.unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
236+
237+
FixedSizeListArray::new(field, self.list_len, values, nulls)
238+
}
239+
214240
/// Returns the current null buffer as a slice
215241
pub fn validity_slice(&self) -> Option<&[u8]> {
216242
self.null_buffer_builder.as_slice()
@@ -224,7 +250,7 @@ mod tests {
224250

225251
use crate::Array;
226252
use crate::Int32Array;
227-
use crate::builder::Int32Builder;
253+
use crate::builder::{Int32Builder, tests::PreserveValuesMock};
228254

229255
fn make_list_builder(
230256
include_null_element: bool,
@@ -491,4 +517,18 @@ mod tests {
491517

492518
builder.finish();
493519
}
520+
521+
#[test]
522+
fn test_finish_preserve_values() {
523+
let mut builder = FixedSizeListBuilder::new(PreserveValuesMock::default(), 2);
524+
525+
builder.values().inner.append_value(0);
526+
builder.values().inner.append_value(1);
527+
builder.append(true);
528+
529+
let arr = builder.finish_preserve_values();
530+
531+
assert_eq!(1, arr.len());
532+
assert_eq!(1, builder.values().called);
533+
}
494534
}

arrow-array/src/builder/generic_bytes_dictionary_builder.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,10 @@ where
256256
fn finish_cloned(&self) -> ArrayRef {
257257
Arc::new(self.finish_cloned())
258258
}
259+
260+
fn finish_preserve_values(&mut self) -> ArrayRef {
261+
Arc::new(self.finish_preserve_values())
262+
}
259263
}
260264

261265
impl<K, T> GenericByteDictionaryBuilder<K, T>

arrow-array/src/builder/generic_list_builder.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ where
166166
fn finish_cloned(&self) -> ArrayRef {
167167
Arc::new(self.finish_cloned())
168168
}
169+
170+
fn finish_preserve_values(&mut self) -> ArrayRef {
171+
Arc::new(self.finish_preserve_values())
172+
}
169173
}
170174

171175
impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
@@ -329,6 +333,23 @@ where
329333
GenericListArray::new(field, offsets, values, nulls)
330334
}
331335

336+
fn finish_preserve_values(&mut self) -> GenericListArray<OffsetSize> {
337+
let values = self.values_builder.finish_preserve_values();
338+
let nulls = self.null_buffer_builder.finish();
339+
340+
let offsets = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
341+
// Safety: Safe by construction
342+
let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
343+
self.offsets_builder.push(OffsetSize::zero());
344+
345+
let field = match &self.field {
346+
Some(f) => f.clone(),
347+
None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
348+
};
349+
350+
GenericListArray::new(field, offsets, values, nulls)
351+
}
352+
332353
/// Returns the current offsets buffer as a slice
333354
pub fn offsets_slice(&self) -> &[OffsetSize] {
334355
self.offsets_builder.as_slice()
@@ -364,7 +385,7 @@ where
364385
mod tests {
365386
use super::*;
366387
use crate::Int32Array;
367-
use crate::builder::{Int32Builder, ListBuilder, make_builder};
388+
use crate::builder::{Int32Builder, ListBuilder, make_builder, tests::PreserveValuesMock};
368389
use crate::cast::AsArray;
369390
use crate::types::Int32Type;
370391
use arrow_schema::DataType;
@@ -818,4 +839,17 @@ mod tests {
818839
builder.append_value([Some(1)]);
819840
builder.finish();
820841
}
842+
843+
#[test]
844+
fn test_finish_preserve_values() {
845+
let mut builder = ListBuilder::new(PreserveValuesMock::default());
846+
847+
builder.values().inner.append_value(1);
848+
builder.append(true);
849+
850+
let arr = builder.finish_preserve_values();
851+
852+
assert_eq!(1, arr.len());
853+
assert_eq!(1, builder.values().called);
854+
}
821855
}

arrow-array/src/builder/generic_list_view_builder.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
7171
fn finish_cloned(&self) -> ArrayRef {
7272
Arc::new(self.finish_cloned())
7373
}
74+
75+
fn finish_preserve_values(&mut self) -> ArrayRef {
76+
Arc::new(self.finish_preserve_values())
77+
}
7478
}
7579

7680
impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListViewBuilder<OffsetSize, T> {
@@ -216,6 +220,23 @@ where
216220
GenericListViewArray::new(field, offsets, sizes, values, nulls)
217221
}
218222

223+
fn finish_preserve_values(&mut self) -> GenericListViewArray<OffsetSize> {
224+
let values = self.values_builder.finish_preserve_values();
225+
let nulls = self.null_buffer_builder.finish();
226+
let offsets = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
227+
self.current_offset = OffsetSize::zero();
228+
229+
// Safety: Safe by construction
230+
let offsets = ScalarBuffer::from(offsets);
231+
let sizes = Buffer::from_vec(std::mem::take(&mut self.sizes_builder));
232+
let sizes = ScalarBuffer::from(sizes);
233+
let field = match &self.field {
234+
Some(f) => f.clone(),
235+
None => Arc::new(Field::new("item", values.data_type().clone(), true)),
236+
};
237+
GenericListViewArray::new(field, offsets, sizes, values, nulls)
238+
}
239+
219240
/// Returns the current offsets buffer as a slice
220241
pub fn offsets_slice(&self) -> &[OffsetSize] {
221242
self.offsets_builder.as_slice()
@@ -245,7 +266,7 @@ where
245266
#[cfg(test)]
246267
mod tests {
247268
use super::*;
248-
use crate::builder::{Int32Builder, ListViewBuilder, make_builder};
269+
use crate::builder::{Int32Builder, ListViewBuilder, make_builder, tests::PreserveValuesMock};
249270
use crate::cast::AsArray;
250271
use crate::types::Int32Type;
251272
use crate::{Array, Int32Array};
@@ -703,4 +724,17 @@ mod tests {
703724
builder.append_value([Some(1)]);
704725
builder.finish();
705726
}
727+
728+
#[test]
729+
fn test_finish_preserve_values() {
730+
let mut builder = ListViewBuilder::new(PreserveValuesMock::default());
731+
732+
builder.values().inner.append_value(1);
733+
builder.append(true);
734+
735+
let arr = builder.finish_preserve_values();
736+
737+
assert_eq!(1, arr.len());
738+
assert_eq!(1, builder.values().called);
739+
}
706740
}

arrow-array/src/builder/map_builder.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,18 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
214214
self.finish_helper(keys_arr, values_arr, offset_buffer, nulls, len)
215215
}
216216

217+
fn finish_preserve_values(&mut self) -> MapArray {
218+
let len = self.len();
219+
// Build the keys
220+
let keys_arr = self.key_builder.finish_preserve_values();
221+
let values_arr = self.value_builder.finish_preserve_values();
222+
let offset_buffer = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
223+
self.offsets_builder.push(0);
224+
let null_bit_buffer = self.null_buffer_builder.finish();
225+
226+
self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
227+
}
228+
217229
fn finish_helper(
218230
&self,
219231
keys_arr: Arc<dyn Array>,
@@ -287,6 +299,10 @@ impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
287299
Arc::new(self.finish_cloned())
288300
}
289301

302+
fn finish_preserve_values(&mut self) -> ArrayRef {
303+
Arc::new(self.finish_preserve_values())
304+
}
305+
290306
fn as_any(&self) -> &dyn Any {
291307
self
292308
}
@@ -303,7 +319,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
303319
#[cfg(test)]
304320
mod tests {
305321
use super::*;
306-
use crate::builder::{Int32Builder, StringBuilder, make_builder};
322+
use crate::builder::{Int32Builder, StringBuilder, make_builder, tests::PreserveValuesMock};
307323
use crate::{Int32Array, StringArray};
308324
use std::collections::HashMap;
309325

@@ -516,4 +532,23 @@ mod tests {
516532

517533
builder.finish();
518534
}
535+
536+
#[test]
537+
fn test_finish_preserve_values() {
538+
let mut builder = MapBuilder::new(
539+
None,
540+
PreserveValuesMock::default(),
541+
PreserveValuesMock::default(),
542+
);
543+
544+
builder.keys().inner.append_value(1);
545+
builder.values().inner.append_value(2);
546+
builder.append(true).unwrap();
547+
548+
let map = builder.finish_preserve_values();
549+
550+
assert_eq!(1, map.len());
551+
assert_eq!(1, builder.keys().called);
552+
assert_eq!(1, builder.values().called);
553+
}
519554
}

arrow-array/src/builder/mod.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,18 @@ pub trait ArrayBuilder: Any + Send + Sync {
341341
/// Builds the array without resetting the underlying builder.
342342
fn finish_cloned(&self) -> ArrayRef;
343343

344+
/// Builds the array without resetting the values builder.
345+
///
346+
/// This is relevant for dictionary builders but also for composite builders.
347+
/// Those are not affected directly, but will call the corresponding method
348+
/// on their constituent builders.
349+
///
350+
/// The default implementation just calls [`finish`][Self::finish] which is sufficient
351+
/// for all but the above mentioned builders.
352+
fn finish_preserve_values(&mut self) -> ArrayRef {
353+
self.finish()
354+
}
355+
344356
/// Returns the builder as a non-mutable `Any` reference.
345357
///
346358
/// This is most useful when one wants to call non-mutable APIs on a specific builder
@@ -376,6 +388,10 @@ impl ArrayBuilder for Box<dyn ArrayBuilder> {
376388
(**self).finish_cloned()
377389
}
378390

391+
fn finish_preserve_values(&mut self) -> ArrayRef {
392+
(**self).finish_preserve_values()
393+
}
394+
379395
fn as_any(&self) -> &dyn Any {
380396
(**self).as_any()
381397
}
@@ -613,3 +629,45 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
613629
t => unimplemented!("Data type {t} is not currently supported"),
614630
}
615631
}
632+
633+
#[cfg(test)]
634+
mod tests {
635+
use super::*;
636+
637+
#[derive(Default)]
638+
pub struct PreserveValuesMock {
639+
pub called: usize,
640+
pub inner: Int32Builder,
641+
}
642+
643+
impl ArrayBuilder for PreserveValuesMock {
644+
fn as_any(&self) -> &dyn Any {
645+
self
646+
}
647+
648+
fn as_any_mut(&mut self) -> &mut dyn Any {
649+
self
650+
}
651+
652+
fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
653+
self
654+
}
655+
656+
fn len(&self) -> usize {
657+
self.inner.len()
658+
}
659+
660+
fn finish(&mut self) -> ArrayRef {
661+
panic!("finish should never be called on PreserveValuesMock")
662+
}
663+
664+
fn finish_cloned(&self) -> ArrayRef {
665+
panic!("finish_cloned should never be called on PreserveValuesMock")
666+
}
667+
668+
fn finish_preserve_values(&mut self) -> ArrayRef {
669+
self.called += 1;
670+
self.inner.finish_preserve_values()
671+
}
672+
}
673+
}

arrow-array/src/builder/primitive_dictionary_builder.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,10 @@ where
268268
fn finish_cloned(&self) -> ArrayRef {
269269
Arc::new(self.finish_cloned())
270270
}
271+
272+
fn finish_preserve_values(&mut self) -> ArrayRef {
273+
Arc::new(self.finish_preserve_values())
274+
}
271275
}
272276

273277
impl<K, V> PrimitiveDictionaryBuilder<K, V>

0 commit comments

Comments
 (0)