Skip to content

Commit 3a58d23

Browse files
fix: remove with_encoding_stats_as_mask and modify benchmarks
1 parent 8afb8a6 commit 3a58d23

File tree

3 files changed

+27
-58
lines changed

3 files changed

+27
-58
lines changed

parquet/benches/metadata.rs

Lines changed: 12 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use std::sync::Arc;
2121
use parquet::basic::{Encoding, PageType, Type as PhysicalType};
2222
use parquet::file::metadata::{
2323
ColumnChunkMetaData, FileMetaData, PageEncodingStats, ParquetMetaData, ParquetMetaDataOptions,
24-
ParquetMetaDataReader, ParquetMetaDataWriter, ParquetStatisticsPolicy, RowGroupMetaData,
24+
ParquetMetaDataReader, ParquetMetaDataWriter, RowGroupMetaData,
2525
};
2626
use parquet::file::statistics::Statistics;
2727
use parquet::file::writer::TrackedWrite;
@@ -158,67 +158,42 @@ fn criterion_benchmark(c: &mut Criterion) {
158158
});
159159

160160
let meta_data = get_footer_bytes(data.clone());
161+
let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
161162
c.bench_function("decode parquet metadata", |b| {
162-
b.iter(|| {
163-
ParquetMetaDataReader::decode_metadata(&meta_data).unwrap();
164-
})
165-
});
166-
167-
let schema = ParquetMetaDataReader::decode_schema(&meta_data).unwrap();
168-
let options = ParquetMetaDataOptions::new().with_schema(schema);
169-
c.bench_function("decode metadata with schema", |b| {
170163
b.iter(|| {
171164
ParquetMetaDataReader::decode_metadata_with_options(&meta_data, Some(&options))
172165
.unwrap();
173166
})
174167
});
175168

176-
let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(true);
177-
c.bench_function("decode metadata with stats mask", |b| {
178-
b.iter(|| {
179-
ParquetMetaDataReader::decode_metadata_with_options(&meta_data, Some(&options))
180-
.unwrap();
181-
})
182-
});
183-
184-
let options =
185-
ParquetMetaDataOptions::new().with_encoding_stats_policy(ParquetStatisticsPolicy::SkipAll);
186-
c.bench_function("decode metadata with skip PES", |b| {
169+
let schema = ParquetMetaDataReader::decode_schema(&meta_data).unwrap();
170+
let options = ParquetMetaDataOptions::new()
171+
.with_schema(schema)
172+
.with_encoding_stats_as_mask(false);
173+
c.bench_function("decode metadata with schema", |b| {
187174
b.iter(|| {
188175
ParquetMetaDataReader::decode_metadata_with_options(&meta_data, Some(&options))
189176
.unwrap();
190177
})
191178
});
192179

193180
let buf: Bytes = black_box(encoded_meta()).into();
181+
let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
194182
c.bench_function("decode parquet metadata (wide)", |b| {
195183
b.iter(|| {
196-
ParquetMetaDataReader::decode_metadata(&buf).unwrap();
184+
ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
197185
})
198186
});
199187

200188
let schema = ParquetMetaDataReader::decode_schema(&buf).unwrap();
201-
let options = ParquetMetaDataOptions::new().with_schema(schema);
189+
let options = ParquetMetaDataOptions::new()
190+
.with_schema(schema)
191+
.with_encoding_stats_as_mask(false);
202192
c.bench_function("decode metadata (wide) with schema", |b| {
203193
b.iter(|| {
204194
ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
205195
})
206196
});
207-
208-
let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(true);
209-
c.bench_function("decode metadata (wide) with stats mask", |b| {
210-
b.iter(|| {
211-
ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
212-
})
213-
});
214-
215-
let options =
216-
ParquetMetaDataOptions::new().with_encoding_stats_policy(ParquetStatisticsPolicy::SkipAll);
217-
c.bench_function("decode metadata (wide) with skip PES", |b| {
218-
b.iter(|| {
219-
ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
220-
})
221-
});
222197
}
223198

224199
criterion_group!(benches, criterion_benchmark);

parquet/src/arrow/mod.rs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,9 @@ pub fn parquet_column<'a>(
494494
#[cfg(test)]
495495
mod test {
496496
use crate::arrow::ArrowWriter;
497-
use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader, ParquetMetaDataWriter};
497+
use crate::file::metadata::{
498+
ParquetMetaData, ParquetMetaDataOptions, ParquetMetaDataReader, ParquetMetaDataWriter,
499+
};
498500
use crate::file::properties::{EnabledStatistics, WriterProperties};
499501
use crate::schema::parser::parse_message_type;
500502
use crate::schema::types::SchemaDescriptor;
@@ -511,15 +513,17 @@ mod test {
511513
let parquet_bytes = create_parquet_file();
512514

513515
// read the metadata from the file WITHOUT the page index structures
516+
let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
514517
let original_metadata = ParquetMetaDataReader::new()
515-
.with_encoding_stats_as_mask(false)
518+
.with_metadata_options(Some(options))
516519
.parse_and_finish(&parquet_bytes)
517520
.unwrap();
518521

519522
// this should error because the page indexes are not present, but have offsets specified
520523
let metadata_bytes = metadata_to_bytes(&original_metadata);
524+
let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
521525
let err = ParquetMetaDataReader::new()
522-
.with_encoding_stats_as_mask(false)
526+
.with_metadata_options(Some(options))
523527
.with_page_indexes(true) // there are no page indexes in the metadata
524528
.parse_and_finish(&metadata_bytes)
525529
.err()
@@ -535,8 +539,9 @@ mod test {
535539
let parquet_bytes = create_parquet_file();
536540

537541
// read the metadata from the file
542+
let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
538543
let original_metadata = ParquetMetaDataReader::new()
539-
.with_encoding_stats_as_mask(false)
544+
.with_metadata_options(Some(options))
540545
.parse_and_finish(&parquet_bytes)
541546
.unwrap();
542547

@@ -548,8 +553,9 @@ mod test {
548553
"metadata is subset of parquet"
549554
);
550555

556+
let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
551557
let roundtrip_metadata = ParquetMetaDataReader::new()
552-
.with_encoding_stats_as_mask(false)
558+
.with_metadata_options(Some(options))
553559
.parse_and_finish(&metadata_bytes)
554560
.unwrap();
555561

@@ -563,16 +569,18 @@ mod test {
563569

564570
// read the metadata from the file including the page index structures
565571
// (which are stored elsewhere in the footer)
572+
let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
566573
let original_metadata = ParquetMetaDataReader::new()
567-
.with_encoding_stats_as_mask(false)
574+
.with_metadata_options(Some(options))
568575
.with_page_indexes(true)
569576
.parse_and_finish(&parquet_bytes)
570577
.unwrap();
571578

572579
// read metadata back from the serialized bytes and ensure it is the same
573580
let metadata_bytes = metadata_to_bytes(&original_metadata);
581+
let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
574582
let roundtrip_metadata = ParquetMetaDataReader::new()
575-
.with_encoding_stats_as_mask(false)
583+
.with_metadata_options(Some(options))
576584
.with_page_indexes(true)
577585
.parse_and_finish(&metadata_bytes)
578586
.unwrap();

parquet/src/file/metadata/reader.rs

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -152,20 +152,6 @@ impl ParquetMetaDataReader {
152152
.with_offset_index_policy(policy)
153153
}
154154

155-
/// Sets whether to read [`PageEncodingStats`] as a bitmask.
156-
///
157-
/// See [`ParquetMetaDataOptions::with_encoding_stats_as_mask`] for more details.
158-
pub fn with_encoding_stats_as_mask(mut self, val: bool) -> Self {
159-
let mut options = self
160-
.metadata_options
161-
.as_deref()
162-
.cloned()
163-
.unwrap_or_default();
164-
options = options.with_encoding_stats_as_mask(val);
165-
self.metadata_options = Some(Arc::new(options));
166-
self
167-
}
168-
169155
/// Sets the [`PageIndexPolicy`] for the column index
170156
pub fn with_column_index_policy(mut self, policy: PageIndexPolicy) -> Self {
171157
self.column_index = policy;

0 commit comments

Comments
 (0)