Skip to content

Commit f43baa5

Browse files
committed
feat: add benchmark
1 parent 251494b commit f43baa5

File tree

2 files changed

+126
-0
lines changed

2 files changed

+126
-0
lines changed

arrow-json/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,7 @@ rand = { version = "0.9", default-features = false, features = ["std", "std_rng"
6565
[[bench]]
6666
name = "serde"
6767
harness = false
68+
69+
[[bench]]
70+
name = "reader"
71+
harness = false

arrow-json/benches/reader.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow_json::ReaderBuilder;
19+
use arrow_schema::{DataType, Field, Schema};
20+
use criterion::{
21+
BenchmarkId, Criterion, SamplingMode, Throughput, criterion_group, criterion_main,
22+
};
23+
use std::fmt::Write;
24+
use std::hint::black_box;
25+
use std::sync::Arc;
26+
27+
// Projection benchmark constants
28+
const WIDE_PROJECTION_ROWS: usize = 1 << 14; // 16K rows
29+
const WIDE_PROJECTION_TOTAL_FIELDS: usize = 100; // 100 fields total, select only 3
30+
31+
fn bench_decode_schema(
32+
c: &mut Criterion,
33+
name: &str,
34+
data: &[u8],
35+
schema: Arc<Schema>,
36+
rows: usize,
37+
) {
38+
let mut group = c.benchmark_group(name);
39+
group.throughput(Throughput::Bytes(data.len() as u64));
40+
group.sample_size(50);
41+
group.measurement_time(std::time::Duration::from_secs(5));
42+
group.warm_up_time(std::time::Duration::from_secs(2));
43+
group.sampling_mode(SamplingMode::Flat);
44+
group.bench_function(BenchmarkId::from_parameter(rows), |b| {
45+
b.iter(|| {
46+
let mut decoder = ReaderBuilder::new(schema.clone())
47+
.with_batch_size(rows)
48+
.build_decoder()
49+
.unwrap();
50+
51+
let mut offset = 0;
52+
while offset < data.len() {
53+
let read = decoder.decode(black_box(&data[offset..])).unwrap();
54+
if read == 0 {
55+
break;
56+
}
57+
offset += read;
58+
}
59+
60+
let batch = decoder.flush().unwrap();
61+
black_box(batch);
62+
})
63+
});
64+
group.finish();
65+
}
66+
67+
fn build_wide_projection_json(rows: usize, total_fields: usize) -> Vec<u8> {
68+
// Estimate: each field ~15 bytes ("fXX":VVVVVVV,), total ~15*100 + overhead
69+
let per_row_size = total_fields * 15 + 10;
70+
let mut data = String::with_capacity(rows * per_row_size);
71+
72+
for _row in 0..rows {
73+
data.push('{');
74+
for i in 0..total_fields {
75+
if i > 0 {
76+
data.push(',');
77+
}
78+
// Use fixed-width values for stable benchmarks: 7 digits
79+
let _ = write!(data, "\"f{}\":{:07}", i, i);
80+
}
81+
data.push('}');
82+
data.push('\n');
83+
}
84+
data.into_bytes()
85+
}
86+
87+
fn criterion_benchmark(c: &mut Criterion) {
88+
// Wide projection workload: tests overhead of parsing unused fields
89+
let wide_projection_data =
90+
build_wide_projection_json(WIDE_PROJECTION_ROWS, WIDE_PROJECTION_TOTAL_FIELDS);
91+
92+
// Full schema: all 100 fields
93+
let mut full_fields = Vec::new();
94+
for i in 0..WIDE_PROJECTION_TOTAL_FIELDS {
95+
full_fields.push(Field::new(format!("f{}", i), DataType::Int64, false));
96+
}
97+
let full_schema = Arc::new(Schema::new(full_fields));
98+
bench_decode_schema(
99+
c,
100+
"decode_wide_projection_full_json",
101+
&wide_projection_data,
102+
full_schema,
103+
WIDE_PROJECTION_ROWS,
104+
);
105+
106+
// Projected schema: only 3 fields (f0, f10, f50) out of 100
107+
let projected_schema = Arc::new(Schema::new(vec![
108+
Field::new("f0", DataType::Int64, false),
109+
Field::new("f10", DataType::Int64, false),
110+
Field::new("f50", DataType::Int64, false),
111+
]));
112+
bench_decode_schema(
113+
c,
114+
"decode_wide_projection_narrow_json",
115+
&wide_projection_data,
116+
projected_schema,
117+
WIDE_PROJECTION_ROWS,
118+
);
119+
}
120+
121+
criterion_group!(benches, criterion_benchmark);
122+
criterion_main!(benches);

0 commit comments

Comments
 (0)