Skip to content

Commit 8b159ad

Browse files
authored
Add finish_preserve_values to ArrayBuilder trait (#9601)
# Which issue does this PR close? - Closes #9600. # Rationale for this change Delta dictionaries require user code to call `finish_preserve_values` instead of `finish` on the dictionary builders. But this is generally not possible if those builders are nested within composite builders like lists, maps or structs. Using the new trait method, user code can call this generically which has no effect on e.g. primitive builders due to the default implementation but forwards to this method for composite ones. # What changes are included in this PR? Adds a new method `finish_preserve_values` to `ArrayBuilder` with a default implementation that forwards to `finish`. Implements this method for dictionary and composite builders. # Are these changes tested? Adds tests where new `finish_preserve_values` inherent methods were created to check that the correct method on the constituent builders is called using a shared mock builder `PreserveValuesMock`. # Are there any user-facing changes? This adds a new method `finish_preserve_values` to the central `ArrayBuilder` trait, but it has a default implementation so this should be backwards compatible.
1 parent dad0be4 commit 8b159ad

9 files changed

Lines changed: 266 additions & 5 deletions

arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ where
201201
fn finish_cloned(&self) -> ArrayRef {
202202
Arc::new(self.finish_cloned())
203203
}
204+
205+
fn finish_preserve_values(&mut self) -> ArrayRef {
206+
Arc::new(self.finish_preserve_values())
207+
}
204208
}
205209

206210
impl<K> FixedSizeBinaryDictionaryBuilder<K>

arrow-array/src/builder/fixed_size_list_builder.rs

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ where
140140
fn finish_cloned(&self) -> ArrayRef {
141141
Arc::new(self.finish_cloned())
142142
}
143+
144+
fn finish_preserve_values(&mut self) -> ArrayRef {
145+
Arc::new(self.finish_preserve_values())
146+
}
143147
}
144148

145149
impl<T: ArrayBuilder> FixedSizeListBuilder<T>
@@ -211,6 +215,28 @@ where
211215
FixedSizeListArray::new(field, self.list_len, values, nulls)
212216
}
213217

218+
fn finish_preserve_values(&mut self) -> FixedSizeListArray {
219+
let len = self.len();
220+
let values = self.values_builder.finish_preserve_values();
221+
let nulls = self.null_buffer_builder.finish();
222+
223+
assert_eq!(
224+
values.len(),
225+
len * self.list_len as usize,
226+
"Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
227+
values.len(),
228+
self.list_len,
229+
len,
230+
);
231+
232+
let field = self
233+
.field
234+
.clone()
235+
.unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
236+
237+
FixedSizeListArray::new(field, self.list_len, values, nulls)
238+
}
239+
214240
/// Returns the current null buffer as a slice
215241
pub fn validity_slice(&self) -> Option<&[u8]> {
216242
self.null_buffer_builder.as_slice()
@@ -224,7 +250,7 @@ mod tests {
224250

225251
use crate::Array;
226252
use crate::Int32Array;
227-
use crate::builder::Int32Builder;
253+
use crate::builder::{Int32Builder, tests::PreserveValuesMock};
228254

229255
fn make_list_builder(
230256
include_null_element: bool,
@@ -491,4 +517,18 @@ mod tests {
491517

492518
builder.finish();
493519
}
520+
521+
#[test]
522+
fn test_finish_preserve_values() {
523+
let mut builder = FixedSizeListBuilder::new(PreserveValuesMock::default(), 2);
524+
525+
builder.values().inner.append_value(0);
526+
builder.values().inner.append_value(1);
527+
builder.append(true);
528+
529+
let arr = builder.finish_preserve_values();
530+
531+
assert_eq!(1, arr.len());
532+
assert_eq!(1, builder.values().called);
533+
}
494534
}

arrow-array/src/builder/generic_bytes_dictionary_builder.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,10 @@ where
256256
fn finish_cloned(&self) -> ArrayRef {
257257
Arc::new(self.finish_cloned())
258258
}
259+
260+
fn finish_preserve_values(&mut self) -> ArrayRef {
261+
Arc::new(self.finish_preserve_values())
262+
}
259263
}
260264

261265
impl<K, T> GenericByteDictionaryBuilder<K, T>

arrow-array/src/builder/generic_list_builder.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ where
166166
fn finish_cloned(&self) -> ArrayRef {
167167
Arc::new(self.finish_cloned())
168168
}
169+
170+
fn finish_preserve_values(&mut self) -> ArrayRef {
171+
Arc::new(self.finish_preserve_values())
172+
}
169173
}
170174

171175
impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
@@ -329,6 +333,23 @@ where
329333
GenericListArray::new(field, offsets, values, nulls)
330334
}
331335

336+
fn finish_preserve_values(&mut self) -> GenericListArray<OffsetSize> {
337+
let values = self.values_builder.finish_preserve_values();
338+
let nulls = self.null_buffer_builder.finish();
339+
340+
let offsets = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
341+
// Safety: Safe by construction
342+
let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
343+
self.offsets_builder.push(OffsetSize::zero());
344+
345+
let field = match &self.field {
346+
Some(f) => f.clone(),
347+
None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
348+
};
349+
350+
GenericListArray::new(field, offsets, values, nulls)
351+
}
352+
332353
/// Returns the current offsets buffer as a slice
333354
pub fn offsets_slice(&self) -> &[OffsetSize] {
334355
self.offsets_builder.as_slice()
@@ -364,7 +385,7 @@ where
364385
mod tests {
365386
use super::*;
366387
use crate::Int32Array;
367-
use crate::builder::{Int32Builder, ListBuilder, make_builder};
388+
use crate::builder::{Int32Builder, ListBuilder, make_builder, tests::PreserveValuesMock};
368389
use crate::cast::AsArray;
369390
use crate::types::Int32Type;
370391
use arrow_schema::DataType;
@@ -818,4 +839,17 @@ mod tests {
818839
builder.append_value([Some(1)]);
819840
builder.finish();
820841
}
842+
843+
#[test]
844+
fn test_finish_preserve_values() {
845+
let mut builder = ListBuilder::new(PreserveValuesMock::default());
846+
847+
builder.values().inner.append_value(1);
848+
builder.append(true);
849+
850+
let arr = builder.finish_preserve_values();
851+
852+
assert_eq!(1, arr.len());
853+
assert_eq!(1, builder.values().called);
854+
}
821855
}

arrow-array/src/builder/generic_list_view_builder.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
7171
fn finish_cloned(&self) -> ArrayRef {
7272
Arc::new(self.finish_cloned())
7373
}
74+
75+
fn finish_preserve_values(&mut self) -> ArrayRef {
76+
Arc::new(self.finish_preserve_values())
77+
}
7478
}
7579

7680
impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListViewBuilder<OffsetSize, T> {
@@ -216,6 +220,23 @@ where
216220
GenericListViewArray::new(field, offsets, sizes, values, nulls)
217221
}
218222

223+
fn finish_preserve_values(&mut self) -> GenericListViewArray<OffsetSize> {
224+
let values = self.values_builder.finish_preserve_values();
225+
let nulls = self.null_buffer_builder.finish();
226+
let offsets = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
227+
self.current_offset = OffsetSize::zero();
228+
229+
// Safety: Safe by construction
230+
let offsets = ScalarBuffer::from(offsets);
231+
let sizes = Buffer::from_vec(std::mem::take(&mut self.sizes_builder));
232+
let sizes = ScalarBuffer::from(sizes);
233+
let field = match &self.field {
234+
Some(f) => f.clone(),
235+
None => Arc::new(Field::new("item", values.data_type().clone(), true)),
236+
};
237+
GenericListViewArray::new(field, offsets, sizes, values, nulls)
238+
}
239+
219240
/// Returns the current offsets buffer as a slice
220241
pub fn offsets_slice(&self) -> &[OffsetSize] {
221242
self.offsets_builder.as_slice()
@@ -245,7 +266,7 @@ where
245266
#[cfg(test)]
246267
mod tests {
247268
use super::*;
248-
use crate::builder::{Int32Builder, ListViewBuilder, make_builder};
269+
use crate::builder::{Int32Builder, ListViewBuilder, make_builder, tests::PreserveValuesMock};
249270
use crate::cast::AsArray;
250271
use crate::types::Int32Type;
251272
use crate::{Array, Int32Array};
@@ -703,4 +724,17 @@ mod tests {
703724
builder.append_value([Some(1)]);
704725
builder.finish();
705726
}
727+
728+
#[test]
729+
fn test_finish_preserve_values() {
730+
let mut builder = ListViewBuilder::new(PreserveValuesMock::default());
731+
732+
builder.values().inner.append_value(1);
733+
builder.append(true);
734+
735+
let arr = builder.finish_preserve_values();
736+
737+
assert_eq!(1, arr.len());
738+
assert_eq!(1, builder.values().called);
739+
}
706740
}

arrow-array/src/builder/map_builder.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,18 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
214214
self.finish_helper(keys_arr, values_arr, offset_buffer, nulls, len)
215215
}
216216

217+
fn finish_preserve_values(&mut self) -> MapArray {
218+
let len = self.len();
219+
// Build the keys
220+
let keys_arr = self.key_builder.finish_preserve_values();
221+
let values_arr = self.value_builder.finish_preserve_values();
222+
let offset_buffer = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
223+
self.offsets_builder.push(0);
224+
let null_bit_buffer = self.null_buffer_builder.finish();
225+
226+
self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
227+
}
228+
217229
fn finish_helper(
218230
&self,
219231
keys_arr: Arc<dyn Array>,
@@ -287,6 +299,10 @@ impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
287299
Arc::new(self.finish_cloned())
288300
}
289301

302+
fn finish_preserve_values(&mut self) -> ArrayRef {
303+
Arc::new(self.finish_preserve_values())
304+
}
305+
290306
fn as_any(&self) -> &dyn Any {
291307
self
292308
}
@@ -303,7 +319,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
303319
#[cfg(test)]
304320
mod tests {
305321
use super::*;
306-
use crate::builder::{Int32Builder, StringBuilder, make_builder};
322+
use crate::builder::{Int32Builder, StringBuilder, make_builder, tests::PreserveValuesMock};
307323
use crate::{Int32Array, StringArray};
308324
use std::collections::HashMap;
309325

@@ -516,4 +532,23 @@ mod tests {
516532

517533
builder.finish();
518534
}
535+
536+
#[test]
537+
fn test_finish_preserve_values() {
538+
let mut builder = MapBuilder::new(
539+
None,
540+
PreserveValuesMock::default(),
541+
PreserveValuesMock::default(),
542+
);
543+
544+
builder.keys().inner.append_value(1);
545+
builder.values().inner.append_value(2);
546+
builder.append(true).unwrap();
547+
548+
let map = builder.finish_preserve_values();
549+
550+
assert_eq!(1, map.len());
551+
assert_eq!(1, builder.keys().called);
552+
assert_eq!(1, builder.values().called);
553+
}
519554
}

arrow-array/src/builder/mod.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,18 @@ pub trait ArrayBuilder: Any + Send + Sync {
341341
/// Builds the array without resetting the underlying builder.
342342
fn finish_cloned(&self) -> ArrayRef;
343343

344+
/// Builds the array without resetting the values builder.
345+
///
346+
/// This is relevant for dictionary builders but also for composite builders.
347+
/// Those are not affected directly, but will call the corresponding method
348+
/// on their constituent builders.
349+
///
350+
/// The default implementation just calls [`finish`][Self::finish] which is sufficient
351+
/// for all but the above mentioned builders.
352+
fn finish_preserve_values(&mut self) -> ArrayRef {
353+
self.finish()
354+
}
355+
344356
/// Returns the builder as a non-mutable `Any` reference.
345357
///
346358
/// This is most useful when one wants to call non-mutable APIs on a specific builder
@@ -376,6 +388,10 @@ impl ArrayBuilder for Box<dyn ArrayBuilder> {
376388
(**self).finish_cloned()
377389
}
378390

391+
fn finish_preserve_values(&mut self) -> ArrayRef {
392+
(**self).finish_preserve_values()
393+
}
394+
379395
fn as_any(&self) -> &dyn Any {
380396
(**self).as_any()
381397
}
@@ -613,3 +629,45 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
613629
t => unimplemented!("Data type {t} is not currently supported"),
614630
}
615631
}
632+
633+
#[cfg(test)]
634+
mod tests {
635+
use super::*;
636+
637+
#[derive(Default)]
638+
pub struct PreserveValuesMock {
639+
pub called: usize,
640+
pub inner: Int32Builder,
641+
}
642+
643+
impl ArrayBuilder for PreserveValuesMock {
644+
fn as_any(&self) -> &dyn Any {
645+
self
646+
}
647+
648+
fn as_any_mut(&mut self) -> &mut dyn Any {
649+
self
650+
}
651+
652+
fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
653+
self
654+
}
655+
656+
fn len(&self) -> usize {
657+
self.inner.len()
658+
}
659+
660+
fn finish(&mut self) -> ArrayRef {
661+
panic!("finish should never be called on PreserveValuesMock")
662+
}
663+
664+
fn finish_cloned(&self) -> ArrayRef {
665+
panic!("finish_cloned should never be called on PreserveValuesMock")
666+
}
667+
668+
fn finish_preserve_values(&mut self) -> ArrayRef {
669+
self.called += 1;
670+
self.inner.finish_preserve_values()
671+
}
672+
}
673+
}

arrow-array/src/builder/primitive_dictionary_builder.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,10 @@ where
268268
fn finish_cloned(&self) -> ArrayRef {
269269
Arc::new(self.finish_cloned())
270270
}
271+
272+
fn finish_preserve_values(&mut self) -> ArrayRef {
273+
Arc::new(self.finish_preserve_values())
274+
}
271275
}
272276

273277
impl<K, V> PrimitiveDictionaryBuilder<K, V>

0 commit comments

Comments
 (0)