@@ -67,6 +67,8 @@ pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH: Option<usize> = Some(64);
6767pub const DEFAULT_OFFSET_INDEX_DISABLED : bool = false ;
6868/// Default values for [`WriterProperties::coerce_types`]
6969pub const DEFAULT_COERCE_TYPES : bool = false ;
70+ /// Default value for [`WriterProperties::write_path_in_schema`]
71+ pub const DEFAULT_WRITE_PATH_IN_SCHEMA : bool = true ;
7072/// Default minimum chunk size for content-defined chunking: 256 KiB.
7173pub const DEFAULT_CDC_MIN_CHUNK_SIZE : usize = 256 * 1024 ;
7274/// Default maximum chunk size for content-defined chunking: 1024 KiB.
@@ -233,6 +235,7 @@ pub struct WriterProperties {
233235 statistics_truncate_length : Option < usize > ,
234236 coerce_types : bool ,
235237 content_defined_chunking : Option < CdcOptions > ,
238+ write_path_in_schema : bool ,
236239 #[ cfg( feature = "encryption" ) ]
237240 pub ( crate ) file_encryption_properties : Option < Arc < FileEncryptionProperties > > ,
238241}
@@ -429,6 +432,14 @@ impl WriterProperties {
429432 self . coerce_types
430433 }
431434
435+ /// Returns `true` if the `path_in_schema` field of the `ColumnMetaData` Thrift struct
436+ /// should be written.
437+ ///
438+ /// For more details see [`WriterPropertiesBuilder::set_write_path_in_schema`]
439+ pub fn write_path_in_schema ( & self ) -> bool {
440+ self . write_path_in_schema
441+ }
442+
432443 /// EXPERIMENTAL: Returns content-defined chunking options, or `None` if CDC is disabled.
433444 ///
434445 /// For more details see [`WriterPropertiesBuilder::set_content_defined_chunking`]
@@ -560,6 +571,7 @@ pub struct WriterPropertiesBuilder {
560571 statistics_truncate_length : Option < usize > ,
561572 coerce_types : bool ,
562573 content_defined_chunking : Option < CdcOptions > ,
574+ write_path_in_schema : bool ,
563575 #[ cfg( feature = "encryption" ) ]
564576 file_encryption_properties : Option < Arc < FileEncryptionProperties > > ,
565577}
@@ -584,6 +596,7 @@ impl Default for WriterPropertiesBuilder {
584596 statistics_truncate_length : DEFAULT_STATISTICS_TRUNCATE_LENGTH ,
585597 coerce_types : DEFAULT_COERCE_TYPES ,
586598 content_defined_chunking : None ,
599+ write_path_in_schema : DEFAULT_WRITE_PATH_IN_SCHEMA ,
587600 #[ cfg( feature = "encryption" ) ]
588601 file_encryption_properties : None ,
589602 }
@@ -622,6 +635,7 @@ impl WriterPropertiesBuilder {
622635 statistics_truncate_length : self . statistics_truncate_length ,
623636 coerce_types : self . coerce_types ,
624637 content_defined_chunking : self . content_defined_chunking ,
638+ write_path_in_schema : self . write_path_in_schema ,
625639 #[ cfg( feature = "encryption" ) ]
626640 file_encryption_properties : self . file_encryption_properties ,
627641 }
@@ -837,6 +851,22 @@ impl WriterPropertiesBuilder {
837851 self
838852 }
839853
854+ /// Should the writer should emit the `path_in_schema` element of the
855+ /// `ColumnMetaData` Thrift struct.
856+ ///
857+ /// The `path_in_schema` field in the Thrift metadata is redundant and wastes a great
858+ /// deal of space. Parquet file footers can be made much smaller by omitting this field.
859+ /// Because the field was originally a mandatory field, this property defaults to `true`
860+ /// to maintain compatibility with older readers that expect this field to be present.
861+ /// If one knows that all readers one plans to use are tolerant of the absense of this field,
862+ /// this may be safely set to `false`.
863+ ///
864+ /// At some point in the future this will default to `false`.
865+ pub fn set_write_path_in_schema ( mut self , write_path_in_schema : bool ) -> Self {
866+ self . write_path_in_schema = write_path_in_schema;
867+ self
868+ }
869+
840870 /// EXPERIMENTAL: Sets content-defined chunking options, or disables CDC with `None`.
841871 ///
842872 /// When enabled, data page boundaries are determined by a rolling hash of the
@@ -1157,6 +1187,7 @@ impl From<WriterProperties> for WriterPropertiesBuilder {
11571187 statistics_truncate_length : props. statistics_truncate_length ,
11581188 coerce_types : props. coerce_types ,
11591189 content_defined_chunking : props. content_defined_chunking ,
1190+ write_path_in_schema : props. write_path_in_schema ,
11601191 #[ cfg( feature = "encryption" ) ]
11611192 file_encryption_properties : props. file_encryption_properties ,
11621193 }
0 commit comments