Skip to content

Commit b10a0a3

Browse files
committed
refactor(parquet): reuse existing write_with_cdc_options in regression tests
1 parent 44a5860 commit b10a0a3

File tree

1 file changed

+24
-22
lines changed
  • parquet/src/column/chunker

1 file changed

+24
-22
lines changed

parquet/src/column/chunker/cdc.rs

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2205,23 +2205,6 @@ mod arrow_tests {
22052205
);
22062206
}
22072207

2208-
/// Helper to write a batch with CDC and read it back.
2209-
fn cdc_roundtrip(batch: &RecordBatch) -> RecordBatch {
2210-
let props = WriterProperties::builder()
2211-
.set_content_defined_chunking(Some(CdcOptions::default()))
2212-
.build();
2213-
let mut buffer = Vec::new();
2214-
let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), Some(props)).unwrap();
2215-
writer.write(batch).unwrap();
2216-
writer.close().unwrap();
2217-
2218-
let reader = ParquetRecordBatchReaderBuilder::try_new(bytes::Bytes::from(buffer))
2219-
.unwrap()
2220-
.build()
2221-
.unwrap();
2222-
reader.into_iter().next().unwrap().unwrap()
2223-
}
2224-
22252208
/// Regression test for <https://github.com/apache/arrow-rs/issues/9637>
22262209
///
22272210
/// Writing nested list data with CDC enabled panicked with an out-of-bounds
@@ -2245,8 +2228,14 @@ mod arrow_tests {
22452228
true,
22462229
),
22472230
]));
2248-
let batch = create_random_batch(schema, 2, 0.25, 0.75).unwrap();
2249-
assert_eq!(cdc_roundtrip(&batch), batch);
2231+
let batch = create_random_batch(schema, 10_000, 0.25, 0.75).unwrap();
2232+
write_with_cdc_options(
2233+
&[&batch],
2234+
CDC_MIN_CHUNK_SIZE,
2235+
CDC_MAX_CHUNK_SIZE,
2236+
None,
2237+
true,
2238+
);
22502239
}
22512240

22522241
/// Test CDC with deeply nested types: List<List<Int32>>, List<Struct<List<Int32>>>
@@ -2268,8 +2257,14 @@ mod arrow_tests {
22682257
Field::new("list_list", list_list_type, true),
22692258
Field::new("list_struct_list", list_struct_type, true),
22702259
]));
2271-
let batch = create_random_batch(schema, 200, 0.25, 0.75).unwrap();
2272-
assert_eq!(cdc_roundtrip(&batch), batch);
2260+
let batch = create_random_batch(schema, 10_000, 0.25, 0.75).unwrap();
2261+
write_with_cdc_options(
2262+
&[&batch],
2263+
CDC_MIN_CHUNK_SIZE,
2264+
CDC_MAX_CHUNK_SIZE,
2265+
None,
2266+
true,
2267+
);
22732268
}
22742269

22752270
/// Test CDC with list arrays that have non-empty null segments.
@@ -2306,7 +2301,14 @@ mod arrow_tests {
23062301
let schema = Arc::new(Schema::new(vec![Field::new("col", list_type, true)]));
23072302
let batch = RecordBatch::try_new(schema, vec![list_array]).unwrap();
23082303

2309-
let read = cdc_roundtrip(&batch);
2304+
let buf = write_with_cdc_options(
2305+
&[&batch],
2306+
CDC_MIN_CHUNK_SIZE,
2307+
CDC_MAX_CHUNK_SIZE,
2308+
None,
2309+
true,
2310+
);
2311+
let read = concat_batches(&read_batches(&buf));
23102312
let read_list = read.column(0).as_list::<i32>();
23112313
assert_eq!(read_list.len(), 5);
23122314
assert!(read_list.is_valid(0));

0 commit comments

Comments
 (0)