Skip to content

Commit aae2c0d

Browse files
committed
Test more files and add tolerance
1 parent e4662ee commit aae2c0d

File tree

1 file changed

+40
-9
lines changed

1 file changed

+40
-9
lines changed

parquet/tests/metadata_memory.rs

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
2020
use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
2121
use parquet::encryption::decrypt::FileDecryptionProperties;
22+
use parquet::file::metadata::PageIndexPolicy;
2223
use std::alloc::{GlobalAlloc, Layout, System};
2324
use std::fs::File;
2425
use std::hint::black_box;
@@ -69,10 +70,24 @@ fn test_metadata_heap_memory() {
6970
// Run test cases sequentially so that heap allocations
7071
// are restricted to a single test case at a time.
7172
let test_data = arrow::util::test_util::parquet_test_data();
73+
let reader_options =
74+
ArrowReaderOptions::default().with_page_index_policy(PageIndexPolicy::Required);
7275

7376
{
7477
let path = format!("{test_data}/alltypes_dictionary.parquet");
75-
verify_metadata_heap_memory(&path, ArrowReaderOptions::default);
78+
verify_metadata_heap_memory(&path, 0.0, || reader_options.clone());
79+
}
80+
81+
{
82+
// Calculated heap size doesn't match exactly, possibly due to extra overhead not accounted
83+
// for in the HeapSize implementation for parquet::data_type::ByteArray.
84+
let path = format!("{test_data}/alltypes_tiny_pages_plain.parquet");
85+
verify_metadata_heap_memory(&path, 0.02, || reader_options.clone());
86+
}
87+
88+
{
89+
let path = format!("{test_data}/data_index_bloom_encoding_with_length.parquet");
90+
verify_metadata_heap_memory(&path, 0.02, || reader_options.clone());
7691
}
7792

7893
{
@@ -90,14 +105,16 @@ fn test_metadata_heap_memory() {
90105
.with_column_key("float_field", column_2_key.into())
91106
.build()
92107
.unwrap();
93-
ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties)
108+
reader_options
109+
.clone()
110+
.with_file_decryption_properties(decryption_properties)
94111
};
95112

96-
verify_metadata_heap_memory(&path, get_options);
113+
verify_metadata_heap_memory(&path, 0.0, get_options);
97114
}
98115
}
99116

100-
fn verify_metadata_heap_memory<F>(path: &str, get_options: F)
117+
fn verify_metadata_heap_memory<F>(path: &str, rel_tol: f64, get_options: F)
101118
where
102119
F: FnOnce() -> ArrowReaderOptions,
103120
{
@@ -117,9 +134,23 @@ where
117134
black_box(metadata);
118135

119136
assert!(metadata_heap_size > 0);
120-
assert_eq!(
121-
metadata_heap_size, allocated,
122-
"Calculated heap size {} doesn't match allocated size {} for file {}",
123-
metadata_heap_size, allocated, path
124-
);
137+
if rel_tol == 0.0 {
138+
assert_eq!(
139+
metadata_heap_size, allocated,
140+
"Calculated heap size {} doesn't match the allocated size {} for file {}",
141+
metadata_heap_size, allocated, path
142+
);
143+
} else {
144+
assert!(rel_tol > 0.0 && rel_tol < 1.0);
145+
let min_size = ((allocated as f64) * (1.0 - rel_tol)) as usize;
146+
let max_size = ((allocated as f64) * (1.0 + rel_tol)) as usize;
147+
assert!(
148+
metadata_heap_size >= min_size && metadata_heap_size <= max_size,
149+
"Calculated heap size {} doesn't match the allocated size {} within a relative tolerance of {} for file {}",
150+
metadata_heap_size,
151+
allocated,
152+
rel_tol,
153+
path
154+
);
155+
}
125156
}

0 commit comments

Comments
 (0)