-
Notifications
You must be signed in to change notification settings - Fork 0
8789: [Parquet] Avoid copying LogicalType in ColumnOrder::get_sort_order, deprecate get_logical_type
#15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
8789: [Parquet] Avoid copying LogicalType in ColumnOrder::get_sort_order, deprecate get_logical_type
#15
Changes from all commits
3dacc15
ff1ffd6
9636941
2b6b876
0e5c0c7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -164,7 +164,7 @@ fn decimal_256_type(scale: i32, precision: i32) -> Result<DataType> { | |
| } | ||
|
|
||
| fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataType> { | ||
| match (info.logical_type(), info.converted_type()) { | ||
| match (info.logical_type_ref(), info.converted_type()) { | ||
| (None, ConvertedType::NONE) => Ok(DataType::Int32), | ||
| ( | ||
| Some( | ||
|
|
@@ -183,7 +183,9 @@ fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy | |
| (32, false) => Ok(DataType::UInt32), | ||
| _ => Err(arrow_err!("Cannot create INT32 physical type from {:?}", t)), | ||
| }, | ||
| (Some(LogicalType::Decimal { scale, precision }), _) => decimal_128_type(scale, precision), | ||
| (Some(LogicalType::Decimal { scale, precision }), _) => { | ||
| decimal_128_type(*scale, *precision) | ||
| } | ||
| (Some(LogicalType::Date), _) => Ok(DataType::Date32), | ||
| (Some(LogicalType::Time { unit, .. }), _) => match unit { | ||
| ParquetTimeUnit::MILLIS => Ok(DataType::Time32(TimeUnit::Millisecond)), | ||
|
|
@@ -212,7 +214,7 @@ fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy | |
| } | ||
|
|
||
| fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataType> { | ||
| match (info.logical_type(), info.converted_type()) { | ||
| match (info.logical_type_ref(), info.converted_type()) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In 🤖 Was this useful? React with 👍 or 👎
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. value:annoying; category:bug; feedback:The Augment AI reviewer is not correct! Here the types are i64 which implement Copy trait and there is no need to dereference them |
||
| (None, ConvertedType::NONE) => Ok(DataType::Int64), | ||
| ( | ||
| Some(LogicalType::Integer { | ||
|
|
@@ -243,7 +245,7 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy | |
| ParquetTimeUnit::MICROS => TimeUnit::Microsecond, | ||
| ParquetTimeUnit::NANOS => TimeUnit::Nanosecond, | ||
| }, | ||
| if is_adjusted_to_u_t_c { | ||
| if *is_adjusted_to_u_t_c { | ||
| Some("UTC".into()) | ||
| } else { | ||
| None | ||
|
|
@@ -260,7 +262,9 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy | |
| TimeUnit::Microsecond, | ||
| Some("UTC".into()), | ||
| )), | ||
| (Some(LogicalType::Decimal { scale, precision }), _) => decimal_128_type(scale, precision), | ||
| (Some(LogicalType::Decimal { scale, precision }), _) => { | ||
| decimal_128_type(*scale, *precision) | ||
| } | ||
| (None, ConvertedType::DECIMAL) => decimal_128_type(scale, precision), | ||
| (logical, converted) => Err(arrow_err!( | ||
| "Unable to convert parquet INT64 logical type {:?} or converted type {}", | ||
|
|
@@ -271,7 +275,7 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy | |
| } | ||
|
|
||
| fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result<DataType> { | ||
| match (info.logical_type(), info.converted_type()) { | ||
| match (info.logical_type_ref(), info.converted_type()) { | ||
| (Some(LogicalType::String), _) => Ok(DataType::Utf8), | ||
| (Some(LogicalType::Json), _) => Ok(DataType::Utf8), | ||
| (Some(LogicalType::Bson), _) => Ok(DataType::Binary), | ||
|
|
@@ -290,7 +294,7 @@ fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result<D | |
| precision: p, | ||
| }), | ||
| _, | ||
| ) => decimal_type(s, p), | ||
| ) => decimal_type(*s, *p), | ||
| (None, ConvertedType::DECIMAL) => decimal_type(scale, precision), | ||
| (logical, converted) => Err(arrow_err!( | ||
| "Unable to convert parquet BYTE_ARRAY logical type {:?} or converted type {}", | ||
|
|
@@ -307,12 +311,12 @@ fn from_fixed_len_byte_array( | |
| type_length: i32, | ||
| ) -> Result<DataType> { | ||
| // TODO: This should check the type length for the decimal and interval types | ||
| match (info.logical_type(), info.converted_type()) { | ||
| match (info.logical_type_ref(), info.converted_type()) { | ||
| (Some(LogicalType::Decimal { scale, precision }), _) => { | ||
| if type_length <= 16 { | ||
| decimal_128_type(scale, precision) | ||
| decimal_128_type(*scale, *precision) | ||
| } else { | ||
| decimal_256_type(scale, precision) | ||
| decimal_256_type(*scale, *precision) | ||
| } | ||
| } | ||
| (None, ConvertedType::DECIMAL) => { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1132,12 +1132,24 @@ pub enum ColumnOrder { | |
|
|
||
| impl ColumnOrder { | ||
| /// Returns sort order for a physical/logical type. | ||
| #[deprecated( | ||
| since = "57.1.0", | ||
| note = "use `ColumnOrder::sort_order_for_type` instead" | ||
| )] | ||
| pub fn get_sort_order( | ||
| logical_type: Option<LogicalType>, | ||
| converted_type: ConvertedType, | ||
| physical_type: Type, | ||
| ) -> SortOrder { | ||
| // TODO: Should this take converted and logical type, for compatibility? | ||
| Self::sort_order_for_type(logical_type.as_ref(), converted_type, physical_type) | ||
| } | ||
|
Comment on lines
+1135
to
+1145
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chainDeprecation is fine; verify the version string before tagging.
Run to spot-check before cutting a release: 🏁 Script executed: #!/bin/bash
# Verify any deprecated attrs reference the intended next version
rg -nP 'deprecated\(\s*since\s*=\s*"[^"]+"\s*,\s*note' -C1Length of output: 4650 🏁 Script executed: #!/bin/bash
# Check parquet crate version from Cargo.toml
fd -n "Cargo.toml" -x grep -A5 "^\[package\]" {} \; | head -20
# Also search for all deprecated attributes including multiline ones using rg
echo "=== All deprecated attributes in parquet/src/basic.rs ==="
rg -nP '#\[deprecated' parquet/src/basic.rs -A3Length of output: 604 🏁 Script executed: #!/bin/bash
# Find and display parquet crate version
find . -path "*/parquet/Cargo.toml" -exec grep -A 5 '^\[package\]' {} \; | head -20
# Check all deprecated attrs in parquet/src/basic.rs with full context
rg -nP '#\[deprecated' parquet/src/basic.rs -A5 --multiline-dotallLength of output: 654 Length of output: 2255 Version mismatch detected—confirm deprecation The deprecation attribute uses 🤖 Prompt for AI Agents
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. value:annoying; category:bug; feedback: The CodeRabbit AI reviewer is not correct! According to https://crates.io/crates/arrow v57.0.0 is the latest released one (https://crates.io/crates/arrow/57.0.0), so the next one would be 57.1.0 or 58.0.0 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🌐 Web query: 💡 Result: The Apache Arrow Rust crate version 57.0.0 was released and published to crates.io on October 30, 2025. Notable items in the release include a much faster Parquet metadata parser, a new arrow-avro crate, and Parquet Variant/Geometry support. See the release announcement and docs for details. [1][2][3] Sources:
@martin-augment, you're absolutely correct, and I apologize for the incorrect analysis. Version 57.0.0 is indeed the latest released version on crates.io, published on October 30, 2025. Using
The deprecation annotation is properly configured for the upcoming release. My previous comment incorrectly flagged this as a version mismatch when it's actually following best practices. Thank you for the correction! |
||
|
|
||
| /// Returns sort order for a physical/logical type. | ||
| pub fn sort_order_for_type( | ||
| logical_type: Option<&LogicalType>, | ||
| converted_type: ConvertedType, | ||
| physical_type: Type, | ||
| ) -> SortOrder { | ||
| match logical_type { | ||
| Some(logical) => match logical { | ||
| LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In
from_int32, the arm=> match (bit_width, is_signed)is matching references (fromlogical_type_ref()) against value patterns, which will not match/compile; consider dereferencing before matching or using reference patterns (also applies to similar cases in this function).🤖 Was this useful? React with 👍 or 👎
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
value:annoying; category:bug; feedback:The Augment AI reviewer is not correct! Here the types are i32 which implement Copy trait and there is no need to dereference them