Skip to content

Commit 026a260

Browse files
authored
Add List to interleave_kernels benchmark (#8980)
# Which issue does this PR close? - Part of #8953 # Rationale for this change While reviewing #8953 from @asubiotto I noticed there was no benchmark for interleave with ListArray. Let's add some so we can evaluate the performance impact of ttps://github.com//pull/8953 and future changes. # What changes are included in this PR? Add benchmark for list interleaving # Are these changes tested? I ran the bechmarks manually ```shell cargo bench --bench interleave_kernels -- list ``` # Are there any user-facing changes? No
1 parent c3226a4 commit 026a260

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

arrow/benches/interleave_kernels.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ fn add_benchmark(c: &mut Criterion) {
116116

117117
let string_view = create_string_view_array(1024, 0.0);
118118

119+
// use 8192 as a standard list size for better coverage
120+
let list_i64 = create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.1, 0.1, 20, 42);
121+
let list_i64_no_nulls =
122+
create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.0, 0.0, 20, 42);
123+
119124
let cases: &[(&str, &dyn Array)] = &[
120125
("i32(0.0)", &i32),
121126
("i32(0.5)", &i32_opt),
@@ -136,6 +141,8 @@ fn add_benchmark(c: &mut Criterion) {
136141
"struct(i32(0.0), str(20, 0.0)",
137142
&struct_i32_no_nulls_string_no_nulls,
138143
),
144+
("list<i64>(0.1,0.1,20)", &list_i64),
145+
("list<i64>(0.0,0.0,20)", &list_i64_no_nulls),
139146
];
140147

141148
for (prefix, base) in cases {

arrow/src/util/bench_util.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,49 @@ pub fn create_string_dict_array<K: ArrowDictionaryKeyType>(
398398
data.iter().map(|x| x.as_deref()).collect()
399399
}
400400

401+
/// Create a List/LargeList Array of primitive values
402+
///
403+
/// Arguments:
404+
/// - `size`: number of lists in the array
405+
/// - `null_density`: density of nulls in the list array
406+
/// - `list_null_density`: density of nulls in the primitive arrays inside the lists
407+
/// - `max_list_size`: maximum size of each list (actual size is random between 0 and max_list_size)
408+
/// - `seed`: seed for the random number generator
409+
pub fn create_primitive_list_array_with_seed<O, T>(
410+
size: usize,
411+
null_density: f32,
412+
list_null_density: f32,
413+
max_list_size: usize,
414+
seed: u64,
415+
) -> GenericListArray<O>
416+
where
417+
O: OffsetSizeTrait,
418+
T: ArrowPrimitiveType,
419+
StandardUniform: Distribution<T::Native>,
420+
{
421+
let mut rng = StdRng::seed_from_u64(seed);
422+
423+
let values = (0..size).map(|_| {
424+
if rng.random::<f32>() < null_density {
425+
None
426+
} else {
427+
let list_size = rng.random_range(0..=max_list_size);
428+
let list_values: Vec<Option<T::Native>> = (0..list_size)
429+
.map(|_| {
430+
if rng.random::<f32>() < list_null_density {
431+
None
432+
} else {
433+
Some(rng.random())
434+
}
435+
})
436+
.collect();
437+
Some(list_values)
438+
}
439+
});
440+
441+
GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
442+
}
443+
401444
/// Create primitive run array for given logical and physical array lengths
402445
pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
403446
logical_array_len: usize,

0 commit comments

Comments
 (0)