Skip to content

Commit 5ad31fd

Browse files
committed
add a from scratch round trip test
1 parent 6af5aa0 commit 5ad31fd

1 file changed

Lines changed: 170 additions & 4 deletions

File tree

parquet/tests/ieee754_nan_interop.rs

Lines changed: 170 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222
use bytes::Bytes;
2323
use core::f32;
2424
use half::f16;
25-
use std::{fs, path::PathBuf, sync::Arc};
25+
use std::{path::PathBuf, sync::Arc};
2626

2727
use arrow::util::test_util::parquet_test_data;
28-
use arrow_array::{Array, Float16Array, Float32Array, Float64Array, UInt64Array};
29-
use arrow_schema::Schema;
28+
use arrow_array::{Array, Float16Array, Float32Array, Float64Array, RecordBatch, UInt64Array};
29+
use arrow_schema::{DataType, Field, Schema};
3030
use parquet::{
3131
arrow::{
3232
ArrowWriter,
@@ -266,7 +266,173 @@ fn test_ieee754_interop() {
266266
.expect("validate written metadata");
267267
}
268268

269-
fs::write("output.pq", outbuf.clone()).unwrap();
269+
//fs::write("output.pq", outbuf.clone()).unwrap();
270+
271+
// now re-validate the bit we've written
272+
let options = ArrowReaderOptions::new()
273+
.with_page_index_policy(parquet::file::metadata::PageIndexPolicy::Required);
274+
let builder = ArrowReaderBuilder::try_new_with_options(Bytes::from(outbuf), options).unwrap();
275+
let file_metadata = builder.metadata().clone();
276+
let schema = builder.schema().clone();
277+
let parquet_schema = builder.parquet_schema().clone();
278+
279+
println!("validate from rust output");
280+
validate_metadata(file_metadata.as_ref(), schema.as_ref(), &parquet_schema)
281+
.expect("validate re-read metadata");
282+
}
283+
284+
// This test replicates the data produced by the parquet-java code that generated
285+
// parquet-testing/data/floating_orders_nan_count.parquet
286+
#[test]
287+
fn test_ieee754_interop2() {
288+
// define schema
289+
let schema = Schema::new(vec![
290+
Field::new("float_ieee754", DataType::Float32, false),
291+
Field::new("double_ieee754", DataType::Float64, false),
292+
Field::new("float16_ieee754", DataType::Float16, false),
293+
]);
294+
let schema = Arc::new(schema);
295+
296+
let mut outbuf = Vec::new();
297+
{
298+
let writer_options = WriterProperties::builder()
299+
.set_max_row_group_row_count(Some(10))
300+
.build();
301+
let mut writer = ArrowWriter::try_new(&mut outbuf, schema.clone(), Some(writer_options))
302+
.expect("create arrow writer");
303+
304+
// this only works for non-NaN cases
305+
let make_batch = |data: &[f32]| -> RecordBatch {
306+
let arr1 = Float32Array::from(data.to_vec());
307+
let arr2 = Float64Array::from(data.iter().map(|v| *v as f64).collect::<Vec<_>>());
308+
let arr3 =
309+
Float16Array::from(data.iter().map(|v| f16::from_f32(*v)).collect::<Vec<_>>());
310+
311+
RecordBatch::try_new(
312+
schema.clone(),
313+
vec![Arc::new(arr1), Arc::new(arr2), Arc::new(arr3)],
314+
)
315+
.unwrap()
316+
};
317+
318+
// batch 1: no NaNs
319+
let batch = make_batch(&[-2.0f32, -1.0, -0.0, 0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0]);
320+
writer.write(&batch).expect("writing batch1");
321+
322+
// batch 2: mixed
323+
let float_data = vec![
324+
FLOAT_NEG_NAN_SMALL,
325+
-2.0,
326+
FLOAT_NEG_NAN_LARGE,
327+
-1.0,
328+
-0.0,
329+
0.0,
330+
1.0,
331+
FLOAT_NAN_SMALL,
332+
3.0,
333+
FLOAT_NAN_LARGE,
334+
];
335+
let double_data = vec![
336+
DOUBLE_NEG_NAN_SMALL,
337+
-2.0,
338+
DOUBLE_NEG_NAN_LARGE,
339+
-1.0,
340+
-0.0,
341+
0.0,
342+
1.0,
343+
DOUBLE_NAN_SMALL,
344+
3.0,
345+
DOUBLE_NAN_LARGE,
346+
];
347+
let float16_data = vec![
348+
FLOAT16_NEG_NAN_SMALL,
349+
f16::from_f32(-2.0),
350+
FLOAT16_NEG_NAN_LARGE,
351+
f16::from_f32(-1.0),
352+
f16::from_f32(-0.0),
353+
f16::from_f32(0.0),
354+
f16::from_f32(1.0),
355+
FLOAT16_NAN_SMALL,
356+
f16::from_f32(3.0),
357+
FLOAT16_NAN_LARGE,
358+
];
359+
let batch = RecordBatch::try_new(
360+
schema.clone(),
361+
vec![
362+
Arc::new(Float32Array::from(float_data)),
363+
Arc::new(Float64Array::from(double_data)),
364+
Arc::new(Float16Array::from(float16_data)),
365+
],
366+
)
367+
.unwrap();
368+
writer.write(&batch).expect("writing batch2");
369+
370+
// batch 3: all NaN
371+
let float_data = vec![
372+
FLOAT_NEG_NAN_SMALL,
373+
FLOAT_NEG_NAN_LARGE,
374+
FLOAT_NAN_SMALL,
375+
FLOAT_NAN_LARGE,
376+
FLOAT_NEG_NAN_SMALL,
377+
FLOAT_NEG_NAN_LARGE,
378+
FLOAT_NAN_SMALL,
379+
FLOAT_NAN_LARGE,
380+
FLOAT_NEG_NAN_SMALL,
381+
FLOAT_NAN_LARGE,
382+
];
383+
let double_data = vec![
384+
DOUBLE_NEG_NAN_SMALL,
385+
DOUBLE_NEG_NAN_LARGE,
386+
DOUBLE_NAN_SMALL,
387+
DOUBLE_NAN_LARGE,
388+
DOUBLE_NEG_NAN_SMALL,
389+
DOUBLE_NEG_NAN_LARGE,
390+
DOUBLE_NAN_SMALL,
391+
DOUBLE_NAN_LARGE,
392+
DOUBLE_NEG_NAN_SMALL,
393+
DOUBLE_NAN_LARGE,
394+
];
395+
let float16_data = vec![
396+
FLOAT16_NEG_NAN_SMALL,
397+
FLOAT16_NEG_NAN_LARGE,
398+
FLOAT16_NAN_SMALL,
399+
FLOAT16_NAN_LARGE,
400+
FLOAT16_NEG_NAN_SMALL,
401+
FLOAT16_NEG_NAN_LARGE,
402+
FLOAT16_NAN_SMALL,
403+
FLOAT16_NAN_LARGE,
404+
FLOAT16_NEG_NAN_SMALL,
405+
FLOAT16_NAN_LARGE,
406+
];
407+
let batch = RecordBatch::try_new(
408+
schema.clone(),
409+
vec![
410+
Arc::new(Float32Array::from(float_data)),
411+
Arc::new(Float64Array::from(double_data)),
412+
Arc::new(Float16Array::from(float16_data)),
413+
],
414+
)
415+
.unwrap();
416+
writer.write(&batch).expect("writing batch3");
417+
418+
// batch 4: 0 min
419+
let batch = make_batch(&[0.0f32, 0.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 5.0]);
420+
writer.write(&batch).expect("writing batch4");
421+
422+
// batch 5: -0 max
423+
let batch = make_batch(&[
424+
-5.0f32, -4.0, -3.0, -2.0, -1.5, -1.0, -0.5, -0.0, -0.0, -0.0,
425+
]);
426+
writer.write(&batch).expect("writing batch5");
427+
428+
let write_meta = writer.close().expect("closing file");
429+
let parquet_schema = write_meta.file_metadata().schema_descr();
430+
println!("validate writer output");
431+
validate_metadata(&write_meta, schema.as_ref(), parquet_schema)
432+
.expect("validate written metadata");
433+
}
434+
435+
//fs::write("output2.pq", outbuf.clone()).unwrap();
270436

271437
// now re-validate the bit we've written
272438
let options = ArrowReaderOptions::new()

0 commit comments

Comments
 (0)