Skip to content

Commit cfaea69

Browse files
committed
Add bench for json-writer
1 parent e702a23 commit cfaea69

File tree

2 files changed

+133
-0
lines changed

2 files changed

+133
-0
lines changed

arrow-json/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,7 @@ harness = false
6969
[[bench]]
7070
name = "json_reader"
7171
harness = false
72+
73+
[[bench]]
74+
name = "json_writer"
75+
harness = false

arrow-json/benches/json_writer.rs

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow_array::builder::{FixedSizeListBuilder, Int64Builder, ListBuilder};
19+
use arrow_array::{Array, RecordBatch};
20+
use arrow_json::LineDelimitedWriter;
21+
use arrow_schema::{DataType, Field, Schema};
22+
use criterion::{Criterion, Throughput, criterion_group, criterion_main};
23+
use std::sync::Arc;
24+
25+
const ROWS: usize = 1 << 17; // 128K rows
26+
const LIST_SHORT_ELEMENTS: usize = 5;
27+
const LIST_LONG_ELEMENTS: usize = 100;
28+
29+
fn build_list_batch(rows: usize, elements: usize) -> RecordBatch {
30+
let mut list_builder = ListBuilder::new(Int64Builder::new());
31+
for row in 0..rows {
32+
for i in 0..elements {
33+
list_builder.values().append_value((row + i) as i64);
34+
}
35+
list_builder.append(true);
36+
}
37+
let list_array = list_builder.finish();
38+
39+
let schema = Arc::new(Schema::new(vec![Field::new(
40+
"list",
41+
DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))),
42+
false,
43+
)]));
44+
45+
RecordBatch::try_new(schema, vec![Arc::new(list_array)]).unwrap()
46+
}
47+
48+
fn bench_write_list(c: &mut Criterion) {
49+
let short_batch = build_list_batch(ROWS, LIST_SHORT_ELEMENTS);
50+
let long_batch = build_list_batch(ROWS, LIST_LONG_ELEMENTS);
51+
52+
let mut group = c.benchmark_group("write_list_i64");
53+
// Short lists: tests per-list overhead (few elements per row)
54+
group.throughput(Throughput::Elements(ROWS as u64));
55+
group.bench_function("short", |b| {
56+
b.iter(|| {
57+
let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8);
58+
let mut writer = LineDelimitedWriter::new(&mut buf);
59+
writer.write(&short_batch).unwrap();
60+
writer.finish().unwrap();
61+
buf
62+
})
63+
});
64+
65+
// Long lists: tests child element encode throughput (many elements per row)
66+
group.bench_function("long", |b| {
67+
b.iter(|| {
68+
let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8);
69+
let mut writer = LineDelimitedWriter::new(&mut buf);
70+
writer.write(&long_batch).unwrap();
71+
writer.finish().unwrap();
72+
buf
73+
})
74+
});
75+
76+
group.finish();
77+
}
78+
79+
fn build_fixed_size_list_batch(rows: usize, elements: usize) -> RecordBatch {
80+
let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), elements as i32);
81+
for row in 0..rows {
82+
for i in 0..elements {
83+
builder.values().append_value((row + i) as i64);
84+
}
85+
builder.append(true);
86+
}
87+
let fsl_array = builder.finish();
88+
89+
let schema = Arc::new(Schema::new(vec![Field::new(
90+
"fixed_size_list",
91+
fsl_array.data_type().clone(),
92+
false,
93+
)]));
94+
95+
RecordBatch::try_new(schema, vec![Arc::new(fsl_array)]).unwrap()
96+
}
97+
98+
fn bench_write_fixed_size_list(c: &mut Criterion) {
99+
let short_batch = build_fixed_size_list_batch(ROWS, LIST_SHORT_ELEMENTS);
100+
let long_batch = build_fixed_size_list_batch(ROWS, LIST_LONG_ELEMENTS);
101+
102+
let mut group = c.benchmark_group("write_fixed_size_list_i64");
103+
group.throughput(Throughput::Elements(ROWS as u64));
104+
105+
group.bench_function("short", |b| {
106+
b.iter(|| {
107+
let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8);
108+
let mut writer = LineDelimitedWriter::new(&mut buf);
109+
writer.write(&short_batch).unwrap();
110+
writer.finish().unwrap();
111+
buf
112+
})
113+
});
114+
115+
group.bench_function("long", |b| {
116+
b.iter(|| {
117+
let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8);
118+
let mut writer = LineDelimitedWriter::new(&mut buf);
119+
writer.write(&long_batch).unwrap();
120+
writer.finish().unwrap();
121+
buf
122+
})
123+
});
124+
125+
group.finish();
126+
}
127+
128+
criterion_group!(benches, bench_write_list, bench_write_fixed_size_list);
129+
criterion_main!(benches);

0 commit comments

Comments
 (0)