Skip to content

Commit d0ed407

Browse files
authored
Move extension type construction logic out of Field (#9266)
# Which issue does this PR close? - Part of #8987 # Rationale for this change The logic to instantiate a type extension does not really depend on `Field`, other than indirectly because that struct happens to contain all the necessary bits of information. As part of the work to make the JSON decoder support extension types, it was [observed](#9021 (comment)) that a field is not always available (or at least, not desirable because it creates redundancy). This change addresses the concern by making it possible to work directly with extension types instead of being forced to route through a `Field` instance. # What changes are included in this PR? Factor out the body of `Field::try_extension_type` as a new associated function `ExtensionType::try_new_from_field_metadata` that takes data type and field metadata map and delegates to `ExtensionType::try_new`. `Field::try_extension_type` then simply calls that new method. # Are these changes tested? Code movement. Existing unit tests validate it. # Are there any user-facing changes? New provided trait method.
1 parent fab8e75 commit d0ed407

10 files changed

Lines changed: 53 additions & 28 deletions

File tree

arrow-schema/src/extension/canonical/bool8.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ mod tests {
9696
}
9797

9898
#[test]
99-
#[should_panic(expected = "Field extension type name missing")]
99+
#[should_panic(expected = "Extension type name missing")]
100100
fn missing_name() {
101101
let field = Field::new("", DataType::Int8, false).with_metadata(
102102
[(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]

arrow-schema/src/extension/canonical/fixed_shape_tensor.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ mod tests {
471471
}
472472

473473
#[test]
474-
#[should_panic(expected = "Field extension type name missing")]
474+
#[should_panic(expected = "Extension type name missing")]
475475
fn missing_name() {
476476
let field =
477477
Field::new_fixed_size_list("", Field::new("", DataType::Float32, false), 3, false)

arrow-schema/src/extension/canonical/json.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ mod tests {
222222
}
223223

224224
#[test]
225-
#[should_panic(expected = "Field extension type name missing")]
225+
#[should_panic(expected = "Extension type name missing")]
226226
fn missing_name() {
227227
let field = Field::new("", DataType::Int8, false).with_metadata(
228228
[(EXTENSION_TYPE_METADATA_KEY.to_owned(), "{}".to_owned())]

arrow-schema/src/extension/canonical/opaque.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ mod tests {
285285
}
286286

287287
#[test]
288-
#[should_panic(expected = "Field extension type name missing")]
288+
#[should_panic(expected = "Extension type name missing")]
289289
fn missing_name() {
290290
let field = Field::new("", DataType::Null, false).with_metadata(
291291
[(

arrow-schema/src/extension/canonical/timestamp_with_offset.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ mod tests {
300300
}
301301

302302
#[test]
303-
#[should_panic(expected = "Field extension type name missing")]
303+
#[should_panic(expected = "Extension type name missing")]
304304
fn missing_name() {
305305
let field = make_valid_field_primitive(TimeUnit::Second)
306306
.with_metadata([(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())].into());

arrow-schema/src/extension/canonical/uuid.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ mod tests {
100100
}
101101

102102
#[test]
103-
#[should_panic(expected = "Field extension type name missing")]
103+
#[should_panic(expected = "Extension type name missing")]
104104
fn missing_name() {
105105
let field = Field::new("", DataType::FixedSizeBinary(16), false);
106106
field.extension_type::<Uuid>();

arrow-schema/src/extension/canonical/variable_shape_tensor.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ mod tests {
529529
}
530530

531531
#[test]
532-
#[should_panic(expected = "Field extension type name missing")]
532+
#[should_panic(expected = "Extension type name missing")]
533533
fn missing_name() {
534534
let field = Field::new_struct(
535535
"",

arrow-schema/src/extension/mod.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ mod canonical;
2323
pub use canonical::*;
2424

2525
use crate::{ArrowError, DataType};
26+
use std::collections::HashMap;
2627

2728
/// The metadata key for the string name identifying an [`ExtensionType`].
2829
pub const EXTENSION_TYPE_NAME_KEY: &str = "ARROW:extension:name";
@@ -255,4 +256,46 @@ pub trait ExtensionType: Sized {
255256
/// This should return an error if the given data type is not supported by
256257
/// this extension type.
257258
fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError>;
259+
260+
/// Construct this extension type from field metadata and data type.
261+
///
262+
/// This is a provided method that extracts extension type information from
263+
/// metadata (using [`EXTENSION_TYPE_NAME_KEY`] and
264+
/// [`EXTENSION_TYPE_METADATA_KEY`]) and delegates to [`Self::try_new`].
265+
///
266+
/// Returns an error if:
267+
/// - The extension type name is missing or doesn't match [`Self::NAME`]
268+
/// - Metadata deserialization fails
269+
/// - The data type is not supported
270+
///
271+
/// This method enables extension type checking without requiring a full
272+
/// [`Field`] instance, useful when only metadata and data type are available.
273+
///
274+
/// [`Field`]: crate::Field
275+
fn try_new_from_field_metadata(
276+
data_type: &DataType,
277+
metadata: &HashMap<String, String>,
278+
) -> Result<Self, ArrowError> {
279+
// Check the extension name in the metadata
280+
match metadata.get(EXTENSION_TYPE_NAME_KEY).map(|s| s.as_str()) {
281+
// It should match the name of the given extension type
282+
Some(name) if name == Self::NAME => {
283+
// Deserialize the metadata and try to construct the extension type
284+
let ext_metadata = metadata
285+
.get(EXTENSION_TYPE_METADATA_KEY)
286+
.map(|s| s.as_str());
287+
let parsed = Self::deserialize_metadata(ext_metadata)?;
288+
Self::try_new(data_type, parsed)
289+
}
290+
// Name mismatch
291+
Some(name) => Err(ArrowError::InvalidArgumentError(format!(
292+
"Extension type name mismatch: expected {}, got {name}",
293+
Self::NAME
294+
))),
295+
// Name missing
296+
None => Err(ArrowError::InvalidArgumentError(
297+
"Extension type name missing".to_string(),
298+
)),
299+
}
300+
}
258301
}

arrow-schema/src/field.rs

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -575,25 +575,7 @@ impl Field {
575575
/// }
576576
/// ```
577577
pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
578-
// Check the extension name in the metadata
579-
match self.extension_type_name() {
580-
// It should match the name of the given extension type
581-
Some(name) if name == E::NAME => {
582-
// Deserialize the metadata and try to construct the extension
583-
// type
584-
E::deserialize_metadata(self.extension_type_metadata())
585-
.and_then(|metadata| E::try_new(self.data_type(), metadata))
586-
}
587-
// Name mismatch
588-
Some(name) => Err(ArrowError::InvalidArgumentError(format!(
589-
"Field extension type name mismatch, expected {}, found {name}",
590-
E::NAME
591-
))),
592-
// Name missing
593-
None => Err(ArrowError::InvalidArgumentError(
594-
"Field extension type name missing".to_owned(),
595-
)),
596-
}
578+
E::try_new_from_field_metadata(self.data_type(), self.metadata())
597579
}
598580

599581
/// Returns an instance of the given [`ExtensionType`] of this [`Field`],

parquet/src/arrow/schema/virtual_type.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ mod tests {
143143
}
144144

145145
#[test]
146-
#[should_panic(expected = "Field extension type name missing")]
146+
#[should_panic(expected = "Extension type name missing")]
147147
fn row_number_missing_name() {
148148
let field = Field::new("", DataType::Int64, false).with_metadata(
149149
[(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
@@ -203,7 +203,7 @@ mod tests {
203203
}
204204

205205
#[test]
206-
#[should_panic(expected = "Field extension type name missing")]
206+
#[should_panic(expected = "Extension type name missing")]
207207
fn row_group_index_missing_name() {
208208
let field = Field::new("", DataType::Int64, false).with_metadata(
209209
[(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]

0 commit comments

Comments
 (0)