1919
2020use parquet:: arrow:: arrow_reader:: { ArrowReaderMetadata , ArrowReaderOptions } ;
2121use parquet:: encryption:: decrypt:: FileDecryptionProperties ;
22+ use parquet:: file:: metadata:: PageIndexPolicy ;
2223use std:: alloc:: { GlobalAlloc , Layout , System } ;
2324use std:: fs:: File ;
2425use std:: hint:: black_box;
@@ -69,10 +70,24 @@ fn test_metadata_heap_memory() {
6970 // Run test cases sequentially so that heap allocations
7071 // are restricted to a single test case at a time.
7172 let test_data = arrow:: util:: test_util:: parquet_test_data ( ) ;
73+ let reader_options =
74+ ArrowReaderOptions :: default ( ) . with_page_index_policy ( PageIndexPolicy :: Required ) ;
7275
7376 {
7477 let path = format ! ( "{test_data}/alltypes_dictionary.parquet" ) ;
75- verify_metadata_heap_memory ( & path, ArrowReaderOptions :: default) ;
78+ verify_metadata_heap_memory ( & path, 0.0 , || reader_options. clone ( ) ) ;
79+ }
80+
81+ {
82+ // Calculated heap size doesn't match exactly, possibly due to extra overhead not accounted
83+ // for in the HeapSize implementation for parquet::data_type::ByteArray.
84+ let path = format ! ( "{test_data}/alltypes_tiny_pages_plain.parquet" ) ;
85+ verify_metadata_heap_memory ( & path, 0.02 , || reader_options. clone ( ) ) ;
86+ }
87+
88+ {
89+ let path = format ! ( "{test_data}/data_index_bloom_encoding_with_length.parquet" ) ;
90+ verify_metadata_heap_memory ( & path, 0.02 , || reader_options. clone ( ) ) ;
7691 }
7792
7893 {
@@ -90,14 +105,16 @@ fn test_metadata_heap_memory() {
90105 . with_column_key ( "float_field" , column_2_key. into ( ) )
91106 . build ( )
92107 . unwrap ( ) ;
93- ArrowReaderOptions :: default ( ) . with_file_decryption_properties ( decryption_properties)
108+ reader_options
109+ . clone ( )
110+ . with_file_decryption_properties ( decryption_properties)
94111 } ;
95112
96- verify_metadata_heap_memory ( & path, get_options) ;
113+ verify_metadata_heap_memory ( & path, 0.0 , get_options) ;
97114 }
98115}
99116
100- fn verify_metadata_heap_memory < F > ( path : & str , get_options : F )
117+ fn verify_metadata_heap_memory < F > ( path : & str , rel_tol : f64 , get_options : F )
101118where
102119 F : FnOnce ( ) -> ArrowReaderOptions ,
103120{
@@ -117,9 +134,23 @@ where
117134 black_box ( metadata) ;
118135
119136 assert ! ( metadata_heap_size > 0 ) ;
120- assert_eq ! (
121- metadata_heap_size, allocated,
122- "Calculated heap size {} doesn't match allocated size {} for file {}" ,
123- metadata_heap_size, allocated, path
124- ) ;
137+ if rel_tol == 0.0 {
138+ assert_eq ! (
139+ metadata_heap_size, allocated,
140+ "Calculated heap size {} doesn't match the allocated size {} for file {}" ,
141+ metadata_heap_size, allocated, path
142+ ) ;
143+ } else {
144+ assert ! ( rel_tol > 0.0 && rel_tol < 1.0 ) ;
145+ let min_size = ( ( allocated as f64 ) * ( 1.0 - rel_tol) ) as usize ;
146+ let max_size = ( ( allocated as f64 ) * ( 1.0 + rel_tol) ) as usize ;
147+ assert ! (
148+ metadata_heap_size >= min_size && metadata_heap_size <= max_size,
149+ "Calculated heap size {} doesn't match the allocated size {} within a relative tolerance of {} for file {}" ,
150+ metadata_heap_size,
151+ allocated,
152+ rel_tol,
153+ path
154+ ) ;
155+ }
125156}
0 commit comments