Skip to content

Commit 1d9d799

Browse files
committed
feat: update benchmarks for wide JSON decoding and projection with increased row and batch sizes
1 parent 2879b22 commit 1d9d799

File tree

3 files changed

+18
-30
lines changed

3 files changed

+18
-30
lines changed

arrow-json/benches/binary_hex.rs

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@ use criterion::{Criterion, criterion_group, criterion_main};
2121
use std::hint::black_box;
2222
use std::sync::Arc;
2323

24-
const BINARY_ROWS: usize = 1 << 15;
24+
const BINARY_ROWS: usize = 1 << 17;
2525
const BINARY_BYTES: usize = 64;
26+
const BINARY_BATCH_SIZE: usize = 1 << 13;
2627

27-
fn bench_decode(c: &mut Criterion, name: &str, data: &[u8], field: Arc<Field>, rows: usize) {
28+
fn bench_decode(c: &mut Criterion, name: &str, data: &[u8], field: Arc<Field>) {
2829
c.bench_function(name, |b| {
2930
b.iter(|| {
3031
let mut decoder = ReaderBuilder::new_with_field(field.clone())
31-
.with_batch_size(rows)
32+
.with_batch_size(BINARY_BATCH_SIZE)
3233
.build_decoder()
3334
.unwrap();
3435

@@ -72,35 +73,17 @@ fn criterion_benchmark(c: &mut Criterion) {
7273
let binary_data = build_hex_lines(BINARY_ROWS, BINARY_BYTES);
7374

7475
let binary_field = Arc::new(Field::new("item", DataType::Binary, false));
75-
bench_decode(
76-
c,
77-
"decode_binary_hex_json",
78-
&binary_data,
79-
binary_field,
80-
BINARY_ROWS,
81-
);
76+
bench_decode(c, "decode_binary_hex_json", &binary_data, binary_field);
8277

8378
let fixed_field = Arc::new(Field::new(
8479
"item",
8580
DataType::FixedSizeBinary(BINARY_BYTES as i32),
8681
false,
8782
));
88-
bench_decode(
89-
c,
90-
"decode_fixed_binary_hex_json",
91-
&binary_data,
92-
fixed_field,
93-
BINARY_ROWS,
94-
);
83+
bench_decode(c, "decode_fixed_binary_hex_json", &binary_data, fixed_field);
9584

9685
let view_field = Arc::new(Field::new("item", DataType::BinaryView, false));
97-
bench_decode(
98-
c,
99-
"decode_binary_view_hex_json",
100-
&binary_data,
101-
view_field,
102-
BINARY_ROWS,
103-
);
86+
bench_decode(c, "decode_binary_view_hex_json", &binary_data, view_field);
10487
}
10588

10689
criterion_group!(benches, criterion_benchmark);

arrow-json/benches/wide_object.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ use serde_json::{Map, Number, Value};
2222
use std::fmt::Write;
2323
use std::sync::Arc;
2424

25+
const WIDE_ROWS: usize = 1 << 17; // 128K rows
26+
const WIDE_BATCH_SIZE: usize = 1 << 13; // 8K rows per batch
27+
2528
fn build_schema(field_count: usize) -> Arc<Schema> {
2629
// Builds a schema with fields named f0..f{field_count-1}, all Int64 and non-nullable.
2730
let fields: Vec<Field> = (0..field_count)
@@ -67,15 +70,15 @@ fn build_wide_values(rows: usize, fields: usize) -> Vec<Value> {
6770
}
6871

6972
fn bench_decode_wide_object(c: &mut Criterion) {
70-
let rows = 4096;
73+
let rows = WIDE_ROWS;
7174
let fields = 64;
7275
let data = build_wide_json(rows, fields);
7376
let schema = build_schema(fields);
7477

7578
c.bench_function("decode_wide_object_i64_json", |b| {
7679
b.iter(|| {
7780
let mut decoder = ReaderBuilder::new(schema.clone())
78-
.with_batch_size(1024)
81+
.with_batch_size(WIDE_BATCH_SIZE)
7982
.build_decoder()
8083
.unwrap();
8184

@@ -93,15 +96,15 @@ fn bench_decode_wide_object(c: &mut Criterion) {
9396
}
9497

9598
fn bench_serialize_wide_object(c: &mut Criterion) {
96-
let rows = 4096;
99+
let rows = WIDE_ROWS;
97100
let fields = 64;
98101
let values = build_wide_values(rows, fields);
99102
let schema = build_schema(fields);
100103

101104
c.bench_function("decode_wide_object_i64_serialize", |b| {
102105
b.iter(|| {
103106
let mut decoder = ReaderBuilder::new(schema.clone())
104-
.with_batch_size(1024)
107+
.with_batch_size(WIDE_BATCH_SIZE)
105108
.build_decoder()
106109
.unwrap();
107110

arrow-json/benches/wide_projection.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ use std::hint::black_box;
2525
use std::sync::Arc;
2626

2727
// Projection benchmark constants
28-
const WIDE_PROJECTION_ROWS: usize = 1 << 14; // 16K rows
28+
const WIDE_PROJECTION_ROWS: usize = 1 << 17; // 128K rows
2929
const WIDE_PROJECTION_TOTAL_FIELDS: usize = 100; // 100 fields total, select only 3
30+
const WIDE_PROJECTION_BATCH_SIZE: usize = 1 << 13; // 8K rows per batch
3031

3132
fn bench_decode_schema(
3233
c: &mut Criterion,
@@ -45,7 +46,8 @@ fn bench_decode_schema(
4546
group.bench_function(BenchmarkId::from_parameter(rows), |b| {
4647
b.iter(|| {
4748
let mut decoder = ReaderBuilder::new(schema.clone())
48-
.with_batch_size(rows)
49+
.with_batch_size(WIDE_PROJECTION_BATCH_SIZE)
50+
.with_projection(projection)
4951
.build_decoder()
5052
.unwrap();
5153

0 commit comments

Comments
 (0)