Skip to content

Commit 85c10b4

Browse files
committed
feat(parquet): add struct-column writer benchmarks
Add three new benchmark cases to the arrow_writer benchmark suite for evaluating write performance on struct columns at varying null densities: - `struct_non_null`: a nullable struct with 0% null rows and non-nullable primitive children; - `struct_sparse_99pct_null`: a nullable struct with 99% null rows, exercising null batching through one level of struct nesting; - `struct_all_null`: a nullable struct with 100% null rows, exercising the uniform-null path through struct nesting. Baseline results (Apple M1 Max): struct_non_null/default 29.9 ms struct_non_null/parquet_2 38.2 ms struct_non_null/zstd_parquet_2 50.9 ms struct_sparse_99pct_null/default 7.2 ms struct_sparse_99pct_null/parquet_2 7.3 ms struct_sparse_99pct_null/zstd_p2 8.1 ms struct_all_null/default 83.3 µs struct_all_null/parquet_2 82.5 µs struct_all_null/zstd_parquet_2 106.6 µs Signed-off-by: Hippolyte Barraud <hippolyte.barraud@datadoghq.com>
1 parent aac969d commit 85c10b4

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

parquet/benches/arrow_writer.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,25 @@ fn create_list_primitive_bench_batch_non_null(
266266
)?)
267267
}
268268

269+
fn create_struct_bench_batch(size: usize, null_density: f32) -> Result<RecordBatch> {
270+
let fields = vec![Field::new(
271+
"_1",
272+
DataType::Struct(Fields::from(vec![
273+
Field::new("_1", DataType::Int32, false),
274+
Field::new("_2", DataType::Int64, false),
275+
Field::new("_3", DataType::Float32, false),
276+
])),
277+
true,
278+
)];
279+
let schema = Schema::new(fields);
280+
Ok(create_random_batch(
281+
Arc::new(schema),
282+
size,
283+
null_density,
284+
0.75,
285+
)?)
286+
}
287+
269288
fn _create_nested_bench_batch(
270289
size: usize,
271290
null_density: f32,
@@ -400,6 +419,15 @@ fn create_batches() -> Vec<(&'static str, RecordBatch)> {
400419
let batch = create_primitive_bench_batch(BATCH_SIZE, 1.0, 0.75).unwrap();
401420
batches.push(("primitive_all_null", batch));
402421

422+
let batch = create_struct_bench_batch(BATCH_SIZE, 0.0).unwrap();
423+
batches.push(("struct_non_null", batch));
424+
425+
let batch = create_struct_bench_batch(BATCH_SIZE, 0.99).unwrap();
426+
batches.push(("struct_sparse_99pct_null", batch));
427+
428+
let batch = create_struct_bench_batch(BATCH_SIZE, 1.0).unwrap();
429+
batches.push(("struct_all_null", batch));
430+
403431
batches
404432
}
405433

0 commit comments

Comments
 (0)