Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions parquet/src/arrow/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5048,10 +5048,13 @@ mod tests {
.expect("Error creating reader builder");

let schema = builder.metadata().file_metadata().schema_descr();
assert_eq!(schema.column(0).logical_type(), Some(LogicalType::String));
assert_eq!(
schema.column(1).logical_type(),
Some(LogicalType::_Unknown { field_id: 2555 })
schema.column(0).logical_type_ref(),
Some(&LogicalType::String)
);
assert_eq!(
schema.column(1).logical_type_ref(),
Some(&LogicalType::_Unknown { field_id: 2555 })
);
assert_eq!(schema.column(1).physical_type(), PhysicalType::BYTE_ARRAY);

Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/schema/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub(crate) fn try_add_extension_type(
mut arrow_field: Field,
parquet_type: &Type,
) -> Result<Field, ParquetError> {
let Some(parquet_logical_type) = parquet_type.get_basic_info().logical_type() else {
let Some(parquet_logical_type) = parquet_type.get_basic_info().logical_type_ref() else {
return Ok(arrow_field);
};
match parquet_logical_type {
Expand All @@ -65,7 +65,7 @@ pub(crate) fn try_add_extension_type(
///
/// This is used to preallocate the metadata hashmap size
pub(crate) fn has_extension_type(parquet_type: &Type) -> bool {
let Some(parquet_logical_type) = parquet_type.get_basic_info().logical_type() else {
let Some(parquet_logical_type) = parquet_type.get_basic_info().logical_type_ref() else {
return false;
};
match parquet_logical_type {
Expand Down
10 changes: 5 additions & 5 deletions parquet/src/arrow/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1840,7 +1840,7 @@ mod tests {
// This is because the Arrow conversion always sets logical type,
// even if there wasn't originally one.
// This is not an issue, but is an inconvenience for this test.
match a.logical_type() {
match a.logical_type_ref() {
Some(_) => {
assert_eq!(a, b)
}
Expand Down Expand Up @@ -2216,8 +2216,8 @@ mod tests {
let parquet_schema = ArrowSchemaConverter::new().convert(&arrow_schema)?;

assert_eq!(
parquet_schema.column(0).logical_type(),
Some(LogicalType::Uuid)
parquet_schema.column(0).logical_type_ref(),
Some(&LogicalType::Uuid)
);

let arrow_schema = parquet_to_arrow_schema(&parquet_schema, None)?;
Expand All @@ -2237,8 +2237,8 @@ mod tests {
let parquet_schema = ArrowSchemaConverter::new().convert(&arrow_schema)?;

assert_eq!(
parquet_schema.column(0).logical_type(),
Some(LogicalType::Json)
parquet_schema.column(0).logical_type_ref(),
Some(&LogicalType::Json)
);

let arrow_schema = parquet_to_arrow_schema(&parquet_schema, None)?;
Expand Down
24 changes: 14 additions & 10 deletions parquet/src/arrow/schema/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ fn decimal_256_type(scale: i32, precision: i32) -> Result<DataType> {
}

fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataType> {
match (info.logical_type(), info.converted_type()) {
match (info.logical_type_ref(), info.converted_type()) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In from_int32, the arm => match (bit_width, is_signed) is matching references (from logical_type_ref()) against value patterns, which will not match/compile; consider dereferencing before matching or using reference patterns (also applies to similar cases in this function).

🤖 Was this useful? React with 👍 or 👎

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:annoying; category:bug; feedback:The Augment AI reviewer is not correct! Here the types are i32 which implement Copy trait and there is no need to dereference them

(None, ConvertedType::NONE) => Ok(DataType::Int32),
(
Some(
Expand All @@ -183,7 +183,9 @@ fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
(32, false) => Ok(DataType::UInt32),
_ => Err(arrow_err!("Cannot create INT32 physical type from {:?}", t)),
},
(Some(LogicalType::Decimal { scale, precision }), _) => decimal_128_type(scale, precision),
(Some(LogicalType::Decimal { scale, precision }), _) => {
decimal_128_type(*scale, *precision)
}
(Some(LogicalType::Date), _) => Ok(DataType::Date32),
(Some(LogicalType::Time { unit, .. }), _) => match unit {
ParquetTimeUnit::MILLIS => Ok(DataType::Time32(TimeUnit::Millisecond)),
Expand Down Expand Up @@ -212,7 +214,7 @@ fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
}

fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataType> {
match (info.logical_type(), info.converted_type()) {
match (info.logical_type_ref(), info.converted_type()) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In from_int64, the arm => match is_signed { ... } matches a &bool (from logical_type_ref()) against true/false; this likely won’t compile without dereferencing or changing the patterns to reference forms.

🤖 Was this useful? React with 👍 or 👎

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:annoying; category:bug; feedback:The Augment AI reviewer is not correct! Here the types are i64 which implement Copy trait and there is no need to dereference them

(None, ConvertedType::NONE) => Ok(DataType::Int64),
(
Some(LogicalType::Integer {
Expand Down Expand Up @@ -243,7 +245,7 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
ParquetTimeUnit::MICROS => TimeUnit::Microsecond,
ParquetTimeUnit::NANOS => TimeUnit::Nanosecond,
},
if is_adjusted_to_u_t_c {
if *is_adjusted_to_u_t_c {
Some("UTC".into())
} else {
None
Expand All @@ -260,7 +262,9 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
TimeUnit::Microsecond,
Some("UTC".into()),
)),
(Some(LogicalType::Decimal { scale, precision }), _) => decimal_128_type(scale, precision),
(Some(LogicalType::Decimal { scale, precision }), _) => {
decimal_128_type(*scale, *precision)
}
(None, ConvertedType::DECIMAL) => decimal_128_type(scale, precision),
(logical, converted) => Err(arrow_err!(
"Unable to convert parquet INT64 logical type {:?} or converted type {}",
Expand All @@ -271,7 +275,7 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
}

fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result<DataType> {
match (info.logical_type(), info.converted_type()) {
match (info.logical_type_ref(), info.converted_type()) {
(Some(LogicalType::String), _) => Ok(DataType::Utf8),
(Some(LogicalType::Json), _) => Ok(DataType::Utf8),
(Some(LogicalType::Bson), _) => Ok(DataType::Binary),
Expand All @@ -290,7 +294,7 @@ fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result<D
precision: p,
}),
_,
) => decimal_type(s, p),
) => decimal_type(*s, *p),
(None, ConvertedType::DECIMAL) => decimal_type(scale, precision),
(logical, converted) => Err(arrow_err!(
"Unable to convert parquet BYTE_ARRAY logical type {:?} or converted type {}",
Expand All @@ -307,12 +311,12 @@ fn from_fixed_len_byte_array(
type_length: i32,
) -> Result<DataType> {
// TODO: This should check the type length for the decimal and interval types
match (info.logical_type(), info.converted_type()) {
match (info.logical_type_ref(), info.converted_type()) {
(Some(LogicalType::Decimal { scale, precision }), _) => {
if type_length <= 16 {
decimal_128_type(scale, precision)
decimal_128_type(*scale, *precision)
} else {
decimal_256_type(scale, precision)
decimal_256_type(*scale, *precision)
}
}
(None, ConvertedType::DECIMAL) => {
Expand Down
14 changes: 13 additions & 1 deletion parquet/src/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1132,12 +1132,24 @@ pub enum ColumnOrder {

impl ColumnOrder {
/// Returns sort order for a physical/logical type.
#[deprecated(
since = "57.1.0",
note = "use `ColumnOrder::sort_order_for_type` instead"
)]
pub fn get_sort_order(
logical_type: Option<LogicalType>,
converted_type: ConvertedType,
physical_type: Type,
) -> SortOrder {
// TODO: Should this take converted and logical type, for compatibility?
Self::sort_order_for_type(logical_type.as_ref(), converted_type, physical_type)
}
Comment on lines +1135 to +1145
Copy link
Copy Markdown

@coderabbitai coderabbitai bot Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Deprecation is fine; verify the version string before tagging.

#[deprecated(since = "57.1.0", ..)] is okay now, but please confirm the crate version at release time to avoid stale metadata.

Run to spot-check before cutting a release:


🏁 Script executed:

#!/bin/bash
# Verify any deprecated attrs reference the intended next version
rg -nP 'deprecated\(\s*since\s*=\s*"[^"]+"\s*,\s*note' -C1

Length of output: 4650



Wait, I need to run this with the repo cloned. Let me correct:




🏁 Script executed:

#!/bin/bash
# Check parquet crate version from Cargo.toml
fd -n "Cargo.toml" -x grep -A5 "^\[package\]" {} \; | head -20

# Also search for all deprecated attributes including multiline ones using rg
echo "=== All deprecated attributes in parquet/src/basic.rs ==="
rg -nP '#\[deprecated' parquet/src/basic.rs -A3

Length of output: 604


🏁 Script executed:

#!/bin/bash
# Find and display parquet crate version
find . -path "*/parquet/Cargo.toml" -exec grep -A 5 '^\[package\]' {} \; | head -20

# Check all deprecated attrs in parquet/src/basic.rs with full context
rg -nP '#\[deprecated' parquet/src/basic.rs -A5 --multiline-dotall

Length of output: 654



---

🏁 Script executed:

```shell
#!/bin/bash
# Find workspace version
grep -A 20 '^\[workspace' Cargo.toml | head -30

# Also check if there's a workspace.package section with version
grep -A 50 '^\[workspace.package\]' Cargo.toml | grep version

Length of output: 2255


Version mismatch detected—confirm deprecation since version before release.

The deprecation attribute uses since = "57.1.0" but the workspace version is currently 57.0.0. Either the version should be 57.0.0 (if deprecating in the current release) or confirm this is intentionally forward-dated for a planned 57.1.0 patch release. This requires clarification before cutting any release.

🤖 Prompt for AI Agents
In parquet/src/basic.rs around lines 1135 to 1145, the deprecation attribute on
get_sort_order uses since = "57.1.0" which doesn't match the workspace version
57.0.0; change the attribute to since = "57.0.0" if you intend this deprecation
to apply in the current release, or confirm and document that the forward-dated
"57.1.0" is intentional for a planned patch release (and leave as-is). Ensure
the final change is reflected in the commit message and any release notes.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:annoying; category:bug; feedback: The CodeRabbit AI reviewer is not correct! According to https://crates.io/crates/arrow v57.0.0 is the latest released one (https://crates.io/crates/arrow/57.0.0), so the next one would be 57.1.0 or 58.0.0

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

🌐 Web query:

crates.io arrow crate version 57.0.0

💡 Result:

The Apache Arrow Rust crate version 57.0.0 was released and published to crates.io on October 30, 2025. Notable items in the release include a much faster Parquet metadata parser, a new arrow-avro crate, and Parquet Variant/Geometry support. See the release announcement and docs for details. [1][2][3]

Sources:

  • Apache Arrow release announcement (Oct 30, 2025). [1]
  • ARROW_VERSION / docs showing "57.0.0". [2]
  • Docs.rs / crate documentation for arrow (57.0.0). [3]

@martin-augment, you're absolutely correct, and I apologize for the incorrect analysis.

Version 57.0.0 is indeed the latest released version on crates.io, published on October 30, 2025. Using since = "57.1.0" in the deprecation attribute is the correct approach because:

  1. Standard practice: Deprecation attributes should reference the version in which the deprecation will be released, not the current development version
  2. Next release: Since this PR is being developed after the 57.0.0 release, it will be part of either 57.1.0 or 58.0.0
  3. Forward-looking: The since parameter tells users "this was deprecated starting from version X", which will be accurate when the next version is published

The deprecation annotation is properly configured for the upcoming release. My previous comment incorrectly flagged this as a version mismatch when it's actually following best practices. Thank you for the correction!


/// Returns sort order for a physical/logical type.
pub fn sort_order_for_type(
logical_type: Option<&LogicalType>,
converted_type: ConvertedType,
physical_type: Type,
) -> SortOrder {
match logical_type {
Some(logical) => match logical {
LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
Expand Down
8 changes: 4 additions & 4 deletions parquet/src/file/metadata/thrift/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -784,8 +784,8 @@ pub(crate) fn parquet_metadata_from_bytes(buf: &[u8]) -> Result<ParquetMetaData>
let column_orders = column_orders.map(|mut cos| {
for (i, column) in schema_descr.columns().iter().enumerate() {
if let ColumnOrder::TYPE_DEFINED_ORDER(_) = cos[i] {
let sort_order = ColumnOrder::get_sort_order(
column.logical_type(),
let sort_order = ColumnOrder::sort_order_for_type(
column.logical_type_ref(),
column.converted_type(),
column.physical_type(),
);
Expand Down Expand Up @@ -1357,7 +1357,7 @@ fn write_schema_helper<W: Write>(
} else {
None
},
logical_type: basic_info.logical_type(),
logical_type: basic_info.logical_type_ref().cloned(),
};
element.write_thrift(writer)
}
Expand Down Expand Up @@ -1385,7 +1385,7 @@ fn write_schema_helper<W: Write>(
} else {
None
},
logical_type: basic_info.logical_type(),
logical_type: basic_info.logical_type_ref().cloned(),
};

element.write_thrift(writer)?;
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/file/metadata/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
.columns()
.iter()
.map(|col| {
let sort_order = ColumnOrder::get_sort_order(
col.logical_type(),
let sort_order = ColumnOrder::sort_order_for_type(
col.logical_type_ref(),
col.converted_type(),
col.physical_type(),
);
Expand Down
8 changes: 4 additions & 4 deletions parquet/src/file/serialized_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2704,12 +2704,12 @@ mod tests {

let schema = reader.metadata().file_metadata().schema_descr();
assert_eq!(
schema.column(0).logical_type(),
Some(basic::LogicalType::String)
schema.column(0).logical_type_ref(),
Some(&basic::LogicalType::String)
);
assert_eq!(
schema.column(1).logical_type(),
Some(basic::LogicalType::_Unknown { field_id: 2555 })
schema.column(1).logical_type_ref(),
Some(&basic::LogicalType::_Unknown { field_id: 2555 })
);
assert_eq!(schema.column(1).physical_type(), Type::BYTE_ARRAY);

Expand Down
2 changes: 1 addition & 1 deletion parquet/src/geospatial/accumulator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ pub struct DefaultGeoStatsAccumulatorFactory {}
impl GeoStatsAccumulatorFactory for DefaultGeoStatsAccumulatorFactory {
fn new_accumulator(&self, _descr: &ColumnDescPtr) -> Box<dyn GeoStatsAccumulator> {
#[cfg(feature = "geospatial")]
if let Some(crate::basic::LogicalType::Geometry { .. }) = _descr.logical_type() {
if let Some(crate::basic::LogicalType::Geometry { .. }) = _descr.logical_type_ref() {
Box::new(ParquetGeoStatsAccumulator::default())
} else {
Box::new(VoidGeoStatsAccumulator::default())
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/schema/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ impl Printer<'_> {
// Also print logical type if it is available
// If there is a logical type, do not print converted type
let logical_type_str = print_logical_and_converted(
basic_info.logical_type().as_ref(),
basic_info.logical_type_ref(),
basic_info.converted_type(),
precision,
scale,
Expand All @@ -426,7 +426,7 @@ impl Printer<'_> {
write!(self.output, "[{}] ", basic_info.id());
}
let logical_str = print_logical_and_converted(
basic_info.logical_type().as_ref(),
basic_info.logical_type_ref(),
basic_info.converted_type(),
0,
0,
Expand Down
27 changes: 19 additions & 8 deletions parquet/src/schema/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ impl Type {
pub(crate) fn is_list(&self) -> bool {
if self.is_group() {
let basic_info = self.get_basic_info();
if let Some(logical_type) = basic_info.logical_type() {
return logical_type == LogicalType::List;
if let Some(logical_type) = basic_info.logical_type_ref() {
return logical_type == &LogicalType::List;
}
return basic_info.converted_type() == ConvertedType::LIST;
}
Expand Down Expand Up @@ -710,6 +710,10 @@ impl BasicTypeInfo {
///
/// Note that this function will clone the `LogicalType`. If performance is a concern,
/// use [`Self::logical_type_ref`] instead.
#[deprecated(
since = "57.1.0",
note = "use `BasicTypeInfo::logical_type_ref` instead (LogicalType cloning is non trivial)"
)]
pub fn logical_type(&self) -> Option<LogicalType> {
// Unlike ConvertedType, LogicalType cannot implement Copy, thus we clone it
self.logical_type.clone()
Expand Down Expand Up @@ -919,8 +923,15 @@ impl ColumnDescriptor {
///
/// Note that this function will clone the `LogicalType`. If performance is a concern,
/// use [`Self::logical_type_ref`] instead.
#[deprecated(
since = "57.1.0",
note = "use `ColumnDescriptor::logical_type_ref` instead (LogicalType cloning is non trivial)"
)]
pub fn logical_type(&self) -> Option<LogicalType> {
self.primitive_type.get_basic_info().logical_type()
self.primitive_type
.get_basic_info()
.logical_type_ref()
.cloned()
}

/// Returns a reference to the [`LogicalType`] for this column.
Expand Down Expand Up @@ -966,8 +977,8 @@ impl ColumnDescriptor {

/// Returns the sort order for this column
pub fn sort_order(&self) -> SortOrder {
ColumnOrder::get_sort_order(
self.logical_type(),
ColumnOrder::sort_order_for_type(
self.logical_type_ref(),
self.converted_type(),
self.physical_type(),
)
Expand Down Expand Up @@ -1417,8 +1428,8 @@ mod tests {
let basic_info = tp.get_basic_info();
assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
assert_eq!(
basic_info.logical_type(),
Some(LogicalType::Integer {
basic_info.logical_type_ref(),
Some(&LogicalType::Integer {
bit_width: 32,
is_signed: true
})
Expand Down Expand Up @@ -1768,7 +1779,7 @@ mod tests {
assert!(tp.is_group());
assert!(!tp.is_primitive());
assert_eq!(basic_info.repetition(), Repetition::REPEATED);
assert_eq!(basic_info.logical_type(), Some(LogicalType::List));
assert_eq!(basic_info.logical_type_ref(), Some(&LogicalType::List));
assert_eq!(basic_info.converted_type(), ConvertedType::LIST);
assert_eq!(basic_info.id(), 1);
assert_eq!(tp.get_fields().len(), 2);
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ mod tests {
assert_eq!(field.name(), "data");
// data should have been written with the Variant logical type
assert_eq!(
field.get_basic_info().logical_type(),
Some(crate::basic::LogicalType::Variant {
field.get_basic_info().logical_type_ref(),
Some(&crate::basic::LogicalType::Variant {
specification_version: None
})
);
Expand Down
27 changes: 10 additions & 17 deletions parquet/tests/geospatial.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,28 +66,21 @@ fn test_read_logical_type() {

for (geospatial_file, expected_type) in expected_logical_type {
let metadata = read_metadata(geospatial_file);
let logical_type = metadata
.file_metadata()
.schema_descr()
.column(1)
.logical_type()
.unwrap();

assert_eq!(logical_type, expected_type);
let column_descr = metadata.file_metadata().schema_descr().column(1);
let logical_type = column_descr.logical_type_ref().unwrap();

assert_eq!(logical_type, &expected_type);
}

// The crs value may also contain arbitrary values (in this case some JSON
// a bit too lengthy to type out)
let metadata = read_metadata("crs-arbitrary-value.parquet");
let logical_type = metadata
.file_metadata()
.schema_descr()
.column(1)
.logical_type()
.unwrap();
let column_descr = metadata.file_metadata().schema_descr().column(1);
let logical_type = column_descr.logical_type_ref().unwrap();

if let LogicalType::Geometry { crs } = logical_type {
let crs_parsed: Value = serde_json::from_str(&crs.unwrap()).unwrap();
let crs = crs.as_ref();
let crs_parsed: Value = serde_json::from_str(crs.unwrap()).unwrap();
assert_eq!(crs_parsed.get("id").unwrap().get("code").unwrap(), 5070);
} else {
panic!("Expected geometry type but got {logical_type:?}");
Expand All @@ -103,8 +96,8 @@ fn test_read_geospatial_statistics() {
// optional binary field_id=-1 wkt (String);
// optional binary field_id=-1 geometry (Geometry(crs=));
let fields = metadata.file_metadata().schema().get_fields();
let logical_type = fields[2].get_basic_info().logical_type().unwrap();
assert_eq!(logical_type, LogicalType::Geometry { crs: None });
let logical_type = fields[2].get_basic_info().logical_type_ref().unwrap();
assert_eq!(logical_type, &LogicalType::Geometry { crs: None });

let geo_statistics = metadata.row_group(0).column(2).geo_statistics();
assert!(geo_statistics.is_some());
Expand Down
Loading