@@ -23,13 +23,15 @@ use arrow::array::ArrayRef;
2323use arrow:: datatypes:: { Int64Type , UInt64Type } ;
2424use arrow:: row:: { RowConverter , SortField } ;
2525use arrow:: util:: bench_util:: {
26- create_boolean_array, create_dict_from_values, create_primitive_array,
27- create_string_array_with_len, create_string_dict_array, create_string_view_array_with_len,
26+ create_boolean_array, create_boolean_array_with_seed, create_dict_from_values,
27+ create_f64_array_with_seed, create_primitive_array, create_primitive_array_with_seed,
28+ create_string_array_with_len, create_string_array_with_len_range_and_prefix_and_seed,
29+ create_string_dict_array, create_string_view_array_with_len,
2830 create_string_view_array_with_max_len,
2931} ;
3032use arrow:: util:: data_gen:: create_random_array;
3133use arrow_array:: Array ;
32- use arrow_array:: types:: Int32Type ;
34+ use arrow_array:: types:: { Int8Type , Int32Type } ;
3335use arrow_schema:: { DataType , Field } ;
3436use criterion:: Criterion ;
3537use std:: { hint, sync:: Arc } ;
@@ -85,6 +87,102 @@ fn bench_iter(c: &mut Criterion) {
8587 } ) ;
8688}
8789
90+ /// A single benchmark with a medium number of columns (around 50) without nested columns for real-world use cases
91+ /// This also makes sure there is a large gap between each value in the column and how it is laid out in the row format.
92+ /// and it is on the edge of not fitting in L3 on some machines
93+ fn run_benchmark_on_medium_amount_and_types_of_columns_without_nesting (
94+ batch_size : usize ,
95+ c : & mut Criterion ,
96+ ) {
97+ let mut seed = 0 ;
98+
99+ let mut cols: Vec < ArrayRef > = vec ! [ ] ;
100+
101+ for nulls in [ 0.0 , 0.1 , 0.2 , 0.5 ] {
102+ seed += 1 ;
103+ cols. push ( Arc :: new ( create_primitive_array_with_seed :: < Int8Type > (
104+ batch_size, nulls, seed,
105+ ) ) as ArrayRef ) ;
106+ }
107+
108+ for nulls in [ 0.0 , 0.1 , 0.2 , 0.5 ] {
109+ seed += 1 ;
110+ cols. push ( Arc :: new ( create_primitive_array_with_seed :: < Int32Type > (
111+ batch_size, nulls, seed,
112+ ) ) as ArrayRef ) ;
113+ }
114+
115+ for nulls in [ 0.0 , 0.1 , 0.2 , 0.5 ] {
116+ seed += 1 ;
117+ cols. push ( Arc :: new ( create_primitive_array_with_seed :: < Int64Type > (
118+ batch_size, nulls, seed,
119+ ) ) as ArrayRef ) ;
120+ }
121+
122+ for _ in 0 ..10 {
123+ seed += 1 ;
124+ cols. push ( Arc :: new ( create_primitive_array_with_seed :: < Int64Type > (
125+ batch_size, 0.0 , seed,
126+ ) ) as ArrayRef ) ;
127+ }
128+
129+ for nulls in [ 0.0 , 0.1 , 0.2 , 0.5 ] {
130+ seed += 1 ;
131+ cols. push ( Arc :: new (
132+ create_string_array_with_len_range_and_prefix_and_seed :: < i32 > (
133+ batch_size, nulls, 0 , 50 , "" , seed,
134+ ) ,
135+ ) ) ;
136+ }
137+
138+ for _ in 0 ..3 {
139+ seed += 1 ;
140+ cols. push ( Arc :: new (
141+ create_string_array_with_len_range_and_prefix_and_seed :: < i32 > (
142+ batch_size, 0.0 , 0 , 10 , "" , seed,
143+ ) ,
144+ ) ) ;
145+ }
146+ for _ in 0 ..3 {
147+ seed += 1 ;
148+ cols. push ( Arc :: new (
149+ create_string_array_with_len_range_and_prefix_and_seed :: < i32 > (
150+ batch_size, 0.0 , 10 , 20 , "" , seed,
151+ ) ,
152+ ) ) ;
153+ }
154+ for _ in 0 ..3 {
155+ seed += 1 ;
156+ cols. push ( Arc :: new (
157+ create_string_array_with_len_range_and_prefix_and_seed :: < i32 > (
158+ batch_size, 0.0 , 20 , 30 , "" , seed,
159+ ) ,
160+ ) ) ;
161+ }
162+
163+ for nulls in [ 0.0 , 0.1 , 0.2 , 0.5 ] {
164+ seed += 1 ;
165+ cols. push ( Arc :: new ( create_boolean_array_with_seed (
166+ batch_size, nulls, 0.5 , seed,
167+ ) ) ) ;
168+ }
169+
170+ for _ in 0 ..10 {
171+ seed += 1 ;
172+ cols. push ( Arc :: new ( create_primitive_array_with_seed :: < Int64Type > (
173+ batch_size, 0.0 , seed,
174+ ) ) as ArrayRef ) ;
175+ }
176+
177+ for nulls in [ 0.0 , 0.1 , 0.2 , 0.5 ] {
178+ seed += 1 ;
179+ cols. push ( Arc :: new ( create_f64_array_with_seed ( batch_size, nulls, seed) ) as ArrayRef ) ;
180+ }
181+
182+ assert_eq ! ( cols. len( ) , 53 ) ;
183+ do_bench ( c, format ! ( "{batch_size} 53 columns" ) . as_str ( ) , cols) ;
184+ }
185+
88186fn row_bench ( c : & mut Criterion ) {
89187 let cols = vec ! [ Arc :: new( create_primitive_array:: <UInt64Type >( 4096 , 0. ) ) as ArrayRef ] ;
90188 do_bench ( c, "4096 u64(0)" , cols) ;
@@ -279,6 +377,9 @@ fn row_bench(c: &mut Criterion) {
279377 ] ;
280378 do_bench ( c, "4096 large_list(0) sliced to 10 of u64(0)" , cols) ;
281379
380+ run_benchmark_on_medium_amount_and_types_of_columns_without_nesting ( 4096 , c) ;
381+ run_benchmark_on_medium_amount_and_types_of_columns_without_nesting ( 8192 , c) ;
382+
282383 bench_iter ( c) ;
283384}
284385
0 commit comments