@@ -133,6 +133,34 @@ where
133133 }
134134}
135135
136+ struct GenerateStringView < T : ByteViewType > {
137+ str_len : usize ,
138+ description : String ,
139+ _marker : std:: marker:: PhantomData < T > ,
140+ }
141+
142+ impl InputGenerator for GenerateStringView < StringViewType > {
143+ fn name ( & self ) -> & str {
144+ self . description . as_str ( )
145+ }
146+ fn generate_scalar_with_null_value ( & self ) -> ArrayRef {
147+ new_null_array ( & DataType :: Utf8View , 1 )
148+ }
149+
150+ fn generate_non_null_scalars ( & self , seed : u64 , number_of_scalars : usize ) -> Vec < ArrayRef > {
151+ let array = self . generate_array ( seed, number_of_scalars, 0.0 ) ;
152+ ( 0 ..number_of_scalars) . map ( |i| array. slice ( i, 1 ) ) . collect ( )
153+ }
154+
155+ fn generate_array ( & self , _seed : u64 , array_length : usize , null_percentage : f32 ) -> ArrayRef {
156+ Arc :: new ( create_string_view_array_with_fixed_len (
157+ array_length,
158+ null_percentage,
159+ self . str_len ,
160+ ) )
161+ }
162+ }
163+
136164fn mask_cases ( len : usize ) -> Vec < ( & ' static str , BooleanArray ) > {
137165 vec ! [
138166 ( "all_true" , create_boolean_array( len, 0.0 , 1.0 ) ) ,
@@ -145,10 +173,9 @@ fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> {
145173 ( "50pct_nulls" , create_boolean_array( len, 0.5 , 0.5 ) ) ,
146174 ]
147175}
176+ const ARRAY_LEN : usize = 8192 ;
148177
149178fn bench_zip_on_input_generator ( c : & mut Criterion , input_generator : & impl InputGenerator ) {
150- const ARRAY_LEN : usize = 8192 ;
151-
152179 let mut group =
153180 c. benchmark_group ( format ! ( "zip_{ARRAY_LEN}_from_{}" , input_generator. name( ) ) . as_str ( ) ) ;
154181
@@ -224,6 +251,61 @@ fn bench_zip_input_on_all_masks(
224251 }
225252}
226253
254+ fn bench_zip_on_input_generator_for_scalars (
255+ c : & mut Criterion ,
256+ input_generator : & impl InputGenerator ,
257+ ) {
258+ bench_zip_on_input_generators_for_scalars ( c, input_generator, input_generator) ;
259+ }
260+
261+ fn bench_zip_on_input_generators_for_scalars (
262+ c : & mut Criterion ,
263+ input_generator_1 : & impl InputGenerator ,
264+ input_generator_2 : & impl InputGenerator ,
265+ ) {
266+ let mut group = c. benchmark_group (
267+ format ! (
268+ "zip_{ARRAY_LEN}_from_{} and {}" ,
269+ input_generator_1. name( ) ,
270+ input_generator_2. name( )
271+ )
272+ . as_str ( ) ,
273+ ) ;
274+
275+ let null_scalar = input_generator_1. generate_scalar_with_null_value ( ) ;
276+
277+ let [ non_null_scalar_1] : [ _ ; 1 ] = input_generator_1
278+ . generate_non_null_scalars ( 42 , 1 )
279+ . try_into ( )
280+ . unwrap ( ) ;
281+
282+ let [ non_null_scalar_2] : [ _ ; 1 ] = input_generator_2
283+ . generate_non_null_scalars ( 18 , 1 )
284+ . try_into ( )
285+ . unwrap ( ) ;
286+
287+ let masks = mask_cases ( ARRAY_LEN ) ;
288+
289+ for ( description, truthy, falsy) in & [
290+ ( "null_vs_non_null_scalar" , & null_scalar, & non_null_scalar_1) ,
291+ (
292+ "non_null_scalar_vs_null_scalar" ,
293+ & non_null_scalar_1,
294+ & null_scalar,
295+ ) ,
296+ ( "non_nulls_scalars" , & non_null_scalar_1, & non_null_scalar_2) ,
297+ ] {
298+ bench_zip_input_on_all_masks (
299+ description,
300+ & mut group,
301+ & masks,
302+ & Scalar :: new ( truthy) ,
303+ & Scalar :: new ( falsy) ,
304+ ) ;
305+ }
306+ group. finish ( ) ;
307+ }
308+
227309fn add_benchmark ( c : & mut Criterion ) {
228310 // Primitive
229311 bench_zip_on_input_generator (
@@ -273,6 +355,75 @@ fn add_benchmark(c: &mut Criterion) {
273355 _marker : std:: marker:: PhantomData ,
274356 } ,
275357 ) ;
358+
359+ bench_zip_on_input_generator_for_scalars (
360+ c,
361+ & GenerateStringView {
362+ description : "string_views size 3" . to_string ( ) ,
363+ str_len : 3 ,
364+ _marker : std:: marker:: PhantomData ,
365+ } ,
366+ ) ;
367+
368+ bench_zip_on_input_generator_for_scalars (
369+ c,
370+ & GenerateStringView {
371+ description : "string_views size 10" . to_string ( ) ,
372+ str_len : 10 ,
373+ _marker : std:: marker:: PhantomData ,
374+ } ,
375+ ) ;
376+
377+ bench_zip_on_input_generator_for_scalars (
378+ c,
379+ & GenerateStringView {
380+ description : "string_views size 100" . to_string ( ) ,
381+ str_len : 10 ,
382+ _marker : std:: marker:: PhantomData ,
383+ } ,
384+ ) ;
385+
386+ bench_zip_on_input_generators_for_scalars (
387+ c,
388+ & GenerateStringView {
389+ description : "string_views size 3" . to_string ( ) ,
390+ str_len : 3 ,
391+ _marker : std:: marker:: PhantomData ,
392+ } ,
393+ & GenerateStringView {
394+ description : "string_views size 10" . to_string ( ) ,
395+ str_len : 10 ,
396+ _marker : std:: marker:: PhantomData ,
397+ } ,
398+ ) ;
399+
400+ bench_zip_on_input_generators_for_scalars (
401+ c,
402+ & GenerateStringView {
403+ description : "string_views size 3" . to_string ( ) ,
404+ str_len : 3 ,
405+ _marker : std:: marker:: PhantomData ,
406+ } ,
407+ & GenerateStringView {
408+ description : "string_views size 100" . to_string ( ) ,
409+ str_len : 100 ,
410+ _marker : std:: marker:: PhantomData ,
411+ } ,
412+ ) ;
413+
414+ bench_zip_on_input_generators_for_scalars (
415+ c,
416+ & GenerateStringView {
417+ description : "string_views size 10" . to_string ( ) ,
418+ str_len : 10 ,
419+ _marker : std:: marker:: PhantomData ,
420+ } ,
421+ & GenerateStringView {
422+ description : "string_views size 100" . to_string ( ) ,
423+ str_len : 100 ,
424+ _marker : std:: marker:: PhantomData ,
425+ } ,
426+ ) ;
276427}
277428
278429criterion_group ! ( benches, add_benchmark) ;
0 commit comments