Skip to content

Commit 93a5b60

Browse files
committed
Add benchmarks for Utf8View scalars for zip
1 parent c3226a4 commit 93a5b60

File tree

1 file changed

+153
-2
lines changed

1 file changed

+153
-2
lines changed

arrow/benches/zip_kernels.rs

Lines changed: 153 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,34 @@ where
133133
}
134134
}
135135

136+
struct GenerateStringView<T: ByteViewType> {
137+
str_len: usize,
138+
description: String,
139+
_marker: std::marker::PhantomData<T>,
140+
}
141+
142+
impl InputGenerator for GenerateStringView<StringViewType> {
143+
fn name(&self) -> &str {
144+
self.description.as_str()
145+
}
146+
fn generate_scalar_with_null_value(&self) -> ArrayRef {
147+
new_null_array(&DataType::Utf8View, 1)
148+
}
149+
150+
fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec<ArrayRef> {
151+
let array = self.generate_array(seed, number_of_scalars, 0.0);
152+
(0..number_of_scalars).map(|i| array.slice(i, 1)).collect()
153+
}
154+
155+
fn generate_array(&self, _seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef {
156+
Arc::new(create_string_view_array_with_fixed_len(
157+
array_length,
158+
null_percentage,
159+
self.str_len,
160+
))
161+
}
162+
}
163+
136164
fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> {
137165
vec![
138166
("all_true", create_boolean_array(len, 0.0, 1.0)),
@@ -145,10 +173,9 @@ fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> {
145173
("50pct_nulls", create_boolean_array(len, 0.5, 0.5)),
146174
]
147175
}
176+
const ARRAY_LEN: usize = 8192;
148177

149178
fn bench_zip_on_input_generator(c: &mut Criterion, input_generator: &impl InputGenerator) {
150-
const ARRAY_LEN: usize = 8192;
151-
152179
let mut group =
153180
c.benchmark_group(format!("zip_{ARRAY_LEN}_from_{}", input_generator.name()).as_str());
154181

@@ -224,6 +251,61 @@ fn bench_zip_input_on_all_masks(
224251
}
225252
}
226253

254+
fn bench_zip_on_input_generator_for_scalars(
255+
c: &mut Criterion,
256+
input_generator: &impl InputGenerator,
257+
) {
258+
bench_zip_on_input_generators_for_scalars(c, input_generator, input_generator);
259+
}
260+
261+
fn bench_zip_on_input_generators_for_scalars(
262+
c: &mut Criterion,
263+
input_generator_1: &impl InputGenerator,
264+
input_generator_2: &impl InputGenerator,
265+
) {
266+
let mut group = c.benchmark_group(
267+
format!(
268+
"zip_{ARRAY_LEN}_from_{} and {}",
269+
input_generator_1.name(),
270+
input_generator_2.name()
271+
)
272+
.as_str(),
273+
);
274+
275+
let null_scalar = input_generator_1.generate_scalar_with_null_value();
276+
277+
let [non_null_scalar_1]: [_; 1] = input_generator_1
278+
.generate_non_null_scalars(42, 1)
279+
.try_into()
280+
.unwrap();
281+
282+
let [non_null_scalar_2]: [_; 1] = input_generator_2
283+
.generate_non_null_scalars(18, 1)
284+
.try_into()
285+
.unwrap();
286+
287+
let masks = mask_cases(ARRAY_LEN);
288+
289+
for (description, truthy, falsy) in &[
290+
("null_vs_non_null_scalar", &null_scalar, &non_null_scalar_1),
291+
(
292+
"non_null_scalar_vs_null_scalar",
293+
&non_null_scalar_1,
294+
&null_scalar,
295+
),
296+
("non_nulls_scalars", &non_null_scalar_1, &non_null_scalar_2),
297+
] {
298+
bench_zip_input_on_all_masks(
299+
description,
300+
&mut group,
301+
&masks,
302+
&Scalar::new(truthy),
303+
&Scalar::new(falsy),
304+
);
305+
}
306+
group.finish();
307+
}
308+
227309
fn add_benchmark(c: &mut Criterion) {
228310
// Primitive
229311
bench_zip_on_input_generator(
@@ -273,6 +355,75 @@ fn add_benchmark(c: &mut Criterion) {
273355
_marker: std::marker::PhantomData,
274356
},
275357
);
358+
359+
bench_zip_on_input_generator_for_scalars(
360+
c,
361+
&GenerateStringView {
362+
description: "string_views size 3".to_string(),
363+
str_len: 3,
364+
_marker: std::marker::PhantomData,
365+
},
366+
);
367+
368+
bench_zip_on_input_generator_for_scalars(
369+
c,
370+
&GenerateStringView {
371+
description: "string_views size 10".to_string(),
372+
str_len: 10,
373+
_marker: std::marker::PhantomData,
374+
},
375+
);
376+
377+
bench_zip_on_input_generator_for_scalars(
378+
c,
379+
&GenerateStringView {
380+
description: "string_views size 100".to_string(),
381+
str_len: 10,
382+
_marker: std::marker::PhantomData,
383+
},
384+
);
385+
386+
bench_zip_on_input_generators_for_scalars(
387+
c,
388+
&GenerateStringView {
389+
description: "string_views size 3".to_string(),
390+
str_len: 3,
391+
_marker: std::marker::PhantomData,
392+
},
393+
&GenerateStringView {
394+
description: "string_views size 10".to_string(),
395+
str_len: 10,
396+
_marker: std::marker::PhantomData,
397+
},
398+
);
399+
400+
bench_zip_on_input_generators_for_scalars(
401+
c,
402+
&GenerateStringView {
403+
description: "string_views size 3".to_string(),
404+
str_len: 3,
405+
_marker: std::marker::PhantomData,
406+
},
407+
&GenerateStringView {
408+
description: "string_views size 100".to_string(),
409+
str_len: 100,
410+
_marker: std::marker::PhantomData,
411+
},
412+
);
413+
414+
bench_zip_on_input_generators_for_scalars(
415+
c,
416+
&GenerateStringView {
417+
description: "string_views size 10".to_string(),
418+
str_len: 10,
419+
_marker: std::marker::PhantomData,
420+
},
421+
&GenerateStringView {
422+
description: "string_views size 100".to_string(),
423+
str_len: 100,
424+
_marker: std::marker::PhantomData,
425+
},
426+
);
276427
}
277428

278429
criterion_group!(benches, add_benchmark);

0 commit comments

Comments
 (0)