Skip to content

Commit 995992b

Browse files
committed
Add range for string view zip benchmarks
1 parent 2c558de commit 995992b

File tree

2 files changed

+42
-63
lines changed

2 files changed

+42
-63
lines changed

arrow/benches/zip_kernels.rs

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ where
134134
}
135135

136136
struct GenerateStringView {
137-
str_len: usize,
137+
range_length: std::ops::Range<usize>,
138138
description: String,
139139
_marker: std::marker::PhantomData<StringViewType>,
140140
}
@@ -153,10 +153,11 @@ impl InputGenerator for GenerateStringView {
153153
}
154154

155155
fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef {
156-
Arc::new(create_string_view_array_with_fixed_len_with_seed(
156+
Arc::new(create_string_view_array_with_len_range_and_seed(
157157
array_length,
158158
null_percentage,
159-
self.str_len,
159+
self.range_length.start,
160+
self.range_length.end - 1,
160161
seed,
161162
))
162163
}
@@ -306,26 +307,17 @@ fn add_benchmark(c: &mut Criterion) {
306307
bench_zip_on_input_generator(
307308
c,
308309
&GenerateStringView {
309-
description: "string_views size 3".to_string(),
310-
str_len: 3,
311-
_marker: std::marker::PhantomData,
312-
},
313-
);
314-
315-
bench_zip_on_input_generator(
316-
c,
317-
&GenerateStringView {
318-
description: "string_views size 10".to_string(),
319-
str_len: 10,
310+
description: "string_views size (3..10)".to_string(),
311+
range_length: 3..10,
320312
_marker: std::marker::PhantomData,
321313
},
322314
);
323315

324316
bench_zip_on_input_generator(
325317
c,
326318
&GenerateStringView {
327-
description: "string_views size 100".to_string(),
328-
str_len: 100,
319+
description: "string_views size (10..100)".to_string(),
320+
range_length: 10..100,
329321
_marker: std::marker::PhantomData,
330322
},
331323
);

arrow/src/util/bench_util.rs

Lines changed: 34 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,39 @@ pub fn create_string_array_with_len_range_and_prefix_and_seed<Offset: OffsetSize
208208
})
209209
.collect()
210210
}
211+
/// Creates a string view array of a given range, null density and length
212+
///
213+
/// Arguments:
214+
/// - `size`: number of string view array
215+
/// - `null_density`: density of nulls in the string view array
216+
/// - `min_str_len`: minimum size of each string in the string view array
217+
/// - `max_str_len`: maximum size of each string in the string view array
218+
/// - `seed`: seed for the random number generator
219+
pub fn create_string_view_array_with_len_range_and_seed(
220+
size: usize,
221+
null_density: f32,
222+
min_str_len: usize,
223+
max_str_len: usize,
224+
seed: u64,
225+
) -> StringViewArray {
226+
assert!(
227+
min_str_len <= max_str_len,
228+
"min_str_len must be <= max_str_len"
229+
);
230+
let rng = &mut StdRng::seed_from_u64(seed);
231+
(0..size)
232+
.map(|_| {
233+
if rng.random::<f32>() < null_density {
234+
None
235+
} else {
236+
let str_len = rng.random_range(min_str_len..=max_str_len);
237+
let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
238+
let value = String::from_utf8(value).unwrap();
239+
Some(value)
240+
}
241+
})
242+
.collect()
243+
}
211244

212245
fn create_string_view_array_with_len_range_and_prefix(
213246
size: usize,
@@ -319,58 +352,12 @@ pub fn create_string_view_array_with_max_len(
319352
}
320353

321354
/// Creates a random (but fixed-seeded) array of a given size, null density and length
322-
///
323-
/// Arguments:
324-
/// - `size`: number of string view array
325-
/// - `null_density`: density of nulls in the string view array
326-
/// - `str_len`: size of each string in the string view array
327355
pub fn create_string_view_array_with_fixed_len(
328356
size: usize,
329357
null_density: f32,
330358
str_len: usize,
331359
) -> StringViewArray {
332-
create_string_view_array_with_fixed_len_with_rng(
333-
size,
334-
null_density,
335-
str_len,
336-
&mut seedable_rng(),
337-
)
338-
}
339-
340-
/// Creates a string view array of a given size, null density and length
341-
///
342-
/// Arguments:
343-
/// - `size`: number of string view array
344-
/// - `null_density`: density of nulls in the string view array
345-
/// - `str_len`: size of each string in the string view array
346-
/// - `seed`: seed for the random number generator
347-
pub fn create_string_view_array_with_fixed_len_with_seed(
348-
size: usize,
349-
null_density: f32,
350-
str_len: usize,
351-
seed: u64,
352-
) -> StringViewArray {
353-
create_string_view_array_with_fixed_len_with_rng(
354-
size,
355-
null_density,
356-
str_len,
357-
&mut StdRng::seed_from_u64(seed),
358-
)
359-
}
360-
361-
/// Creates a string view array of a given size, null density and length
362-
///
363-
/// Arguments:
364-
/// - `size`: number of string view array
365-
/// - `null_density`: density of nulls in the string view array
366-
/// - `str_len`: size of each string in the string view array
367-
/// - `rng` random number generator
368-
fn create_string_view_array_with_fixed_len_with_rng(
369-
size: usize,
370-
null_density: f32,
371-
str_len: usize,
372-
rng: &mut StdRng,
373-
) -> StringViewArray {
360+
let rng = &mut seedable_rng();
374361
(0..size)
375362
.map(|_| {
376363
if rng.random::<f32>() < null_density {

0 commit comments

Comments
 (0)