Skip to content

Commit 2b851d9

Browse files
authored
Add List and ListView take benchmarks (#9626)
# Which issue does this PR close? - Closes #9627. # Rationale for this change Adding benchmarks makes it easier to measure performance and evaluate the impact of changes to the implementation. I also have a PR including some significant improvements, but figured its worth splitting it into two parts, LMK if its better to do that in one step. # What changes are included in this PR? Add a couple of utility functions to generate list and list_view arrays without providing a seed # Are these changes tested? Benchmarks run locally, same setup as other benchmarks. # Are there any user-facing changes? No
1 parent 07a3636 commit 2b851d9

File tree

2 files changed

+134
-0
lines changed

2 files changed

+134
-0
lines changed

arrow/benches/take_kernels.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,66 @@ fn add_benchmark(c: &mut Criterion) {
186186
b.iter(|| bench_take(&values, &indices))
187187
});
188188

189+
let values = create_primitive_list_array::<i32, Int32Type>(512, 0.0, 0.0, 20);
190+
let indices = create_random_index(512, 0.0);
191+
c.bench_function("take list i32 512", |b| {
192+
b.iter(|| bench_take(&values, &indices))
193+
});
194+
195+
let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.0, 0.0, 20);
196+
let indices = create_random_index(1024, 0.0);
197+
c.bench_function("take list i32 1024", |b| {
198+
b.iter(|| bench_take(&values, &indices))
199+
});
200+
201+
let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.5, 0.0, 20);
202+
let indices = create_random_index(1024, 0.0);
203+
c.bench_function("take list i32 null values 1024", |b| {
204+
b.iter(|| bench_take(&values, &indices))
205+
});
206+
207+
let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.0, 0.0, 202);
208+
let indices = create_random_index(1024, 0.5);
209+
c.bench_function("take list i32 null indices 1024", |b| {
210+
b.iter(|| bench_take(&values, &indices))
211+
});
212+
213+
let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.5, 0.5, 20);
214+
let indices = create_random_index(1024, 0.5);
215+
c.bench_function("take list i32 null values null indices 1024", |b| {
216+
b.iter(|| bench_take(&values, &indices))
217+
});
218+
219+
let values = create_primitive_list_view_array::<i32, Int32Type>(512, 0.0, 0.0, 20);
220+
let indices = create_random_index(512, 0.0);
221+
c.bench_function("take listview i32 512", |b| {
222+
b.iter(|| bench_take(&values, &indices))
223+
});
224+
225+
let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.0, 0.0, 20);
226+
let indices = create_random_index(1024, 0.0);
227+
c.bench_function("take listview i32 1024", |b| {
228+
b.iter(|| bench_take(&values, &indices))
229+
});
230+
231+
let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.5, 0.0, 20);
232+
let indices = create_random_index(1024, 0.0);
233+
c.bench_function("take listview i32 null values 1024", |b| {
234+
b.iter(|| bench_take(&values, &indices))
235+
});
236+
237+
let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.0, 0.0, 20);
238+
let indices = create_random_index(1024, 0.5);
239+
c.bench_function("take listview i32 null indices 1024", |b| {
240+
b.iter(|| bench_take(&values, &indices))
241+
});
242+
243+
let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.5, 0.5, 20);
244+
let indices = create_random_index(1024, 0.5);
245+
c.bench_function("take listview i32 null values null indices 1024", |b| {
246+
b.iter(|| bench_take(&values, &indices))
247+
});
248+
189249
let values = create_primitive_run_array::<Int32Type, Int32Type>(1024, 512);
190250
let indices = create_random_index(1024, 0.0);
191251
c.bench_function(

arrow/src/util/bench_util.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,80 @@ where
491491
GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
492492
}
493493

494+
/// Create a List/LargeList Array of primitive values using a fixed seed
495+
///
496+
/// See [`create_primitive_list_array_with_seed`] for details on arguments.
497+
pub fn create_primitive_list_array<O, T>(
498+
size: usize,
499+
null_density: f32,
500+
list_null_density: f32,
501+
max_list_size: usize,
502+
) -> GenericListArray<O>
503+
where
504+
O: OffsetSizeTrait,
505+
T: ArrowPrimitiveType,
506+
StandardUniform: Distribution<T::Native>,
507+
{
508+
let mut rng = seedable_rng();
509+
510+
let values = (0..size).map(|_| {
511+
if rng.random::<f32>() < null_density {
512+
None
513+
} else {
514+
let list_size = rng.random_range(0..=max_list_size);
515+
let list_values: Vec<Option<T::Native>> = (0..list_size)
516+
.map(|_| {
517+
if rng.random::<f32>() < list_null_density {
518+
None
519+
} else {
520+
Some(rng.random())
521+
}
522+
})
523+
.collect();
524+
Some(list_values)
525+
}
526+
});
527+
528+
GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
529+
}
530+
531+
/// Create a ListViewArray of primitive values using a fixed seed
532+
///
533+
/// See [`create_primitive_list_array_with_seed`] for details on arguments.
534+
pub fn create_primitive_list_view_array<O, T>(
535+
size: usize,
536+
null_density: f32,
537+
list_null_density: f32,
538+
max_list_size: usize,
539+
) -> GenericListViewArray<O>
540+
where
541+
T: ArrowPrimitiveType,
542+
StandardUniform: Distribution<T::Native>,
543+
O: OffsetSizeTrait,
544+
{
545+
let mut rng = seedable_rng();
546+
547+
let values = (0..size).map(|_| {
548+
if rng.random::<f32>() < null_density {
549+
None
550+
} else {
551+
let list_size = rng.random_range(0..=max_list_size);
552+
let list_values: Vec<Option<T::Native>> = (0..list_size)
553+
.map(|_| {
554+
if rng.random::<f32>() < list_null_density {
555+
None
556+
} else {
557+
Some(rng.random())
558+
}
559+
})
560+
.collect();
561+
Some(list_values)
562+
}
563+
});
564+
565+
GenericListViewArray::<O>::from_iter_primitive::<T, _, _>(values)
566+
}
567+
494568
/// Create primitive run array for given logical and physical array lengths
495569
pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
496570
logical_array_len: usize,

0 commit comments

Comments
 (0)