@@ -24,6 +24,7 @@ use arrow_schema::{ArrowError, DataType, FieldRef};
2424use parquet_variant:: { VariantPath , VariantPathElement } ;
2525
2626use crate :: VariantArray ;
27+ use crate :: VariantArrayBuilder ;
2728use crate :: VariantType ;
2829use crate :: variant_array:: BorrowedShreddingState ;
2930use crate :: variant_to_arrow:: make_variant_to_arrow_row_builder;
@@ -90,17 +91,10 @@ pub(crate) fn follow_shredded_path_element<'a>(
9091 // The field might be a VariantArray (StructArray) if shredded,
9192 // or it might be a primitive array. Only proceed if it's a StructArray.
9293 let Some ( struct_array) = field. as_struct_opt ( ) else {
93- // Field exists but is not a StructArray (VariantArray),
94- // which means it's not shredded further.
95- if !cast_options. safe {
96- return Err ( ArrowError :: CastError ( format ! (
97- "Expected Struct array while following path, got {}" ,
98- field. data_type( ) ,
99- ) ) ) ;
100- }
101- // In safe mode, return NotShredded to let the caller
102- // handle it via value column.
103- return Ok ( ShreddedPathStep :: NotShredded ) ;
94+ // Field exists but is not a StructArray, so it cannot be
95+ // followed further. Fall back to the value column if present,
96+ // otherwise the path is missing.
97+ return Ok ( missing_path_step ( ) ) ;
10498 } ;
10599
106100 let state = BorrowedShreddingState :: try_from ( struct_array) ?;
@@ -224,47 +218,48 @@ fn shredded_get_path(
224218 // further, and build up the final struct from those individually shredded results.
225219 if let DataType :: Struct ( fields) = as_field. data_type ( ) {
226220 let mut updated_fields = Vec :: with_capacity ( fields. len ( ) ) ;
227- let children: Result < Vec < _ > > = fields
228- . iter ( )
229- . map ( |field| {
230- // If the field has VariantType extension metadata, extract it as a
231- // VariantArray instead of casting to the declared data type. This allows
232- // callers to request structs where some fields remain as variants.
233- // See test_struct_extraction_with_variant_fields for usage example.
234- let is_strongly_typed = field. try_extension_type :: < VariantType > ( ) . is_err ( ) ;
235- let field_as_type = is_strongly_typed. then ( || field. as_ref ( ) ) ;
236- let child = shredded_get_path (
237- & target,
238- & [ VariantPathElement :: from ( field. name ( ) . as_str ( ) ) ] ,
239- field_as_type,
240- cast_options,
241- ) ?;
242-
243- // Update field type if it was a Variant marker (extracted as VariantArray).
244- // The actual data type will be the internal structure of VariantArray.
245- // Preserve VariantType extension metadata so downstream consumers
246- // can recognize this field as a Variant column.
247- //
248- // When the field is entirely absent in the data, shredded_get_path
249- // returns a NullArray (DataType::Null). VariantType only supports
250- // Struct storage, so we must skip the extension in that case.
251- let is_variant_field = !is_strongly_typed;
252- let new_field = field
253- . as_ref ( )
254- . clone ( )
255- . with_data_type ( child. data_type ( ) . clone ( ) ) ;
256- let updated_field =
257- if is_variant_field && matches ! ( child. data_type( ) , DataType :: Struct ( _) ) {
258- new_field. with_extension_type ( VariantType )
259- } else {
260- new_field
261- } ;
262- updated_fields. push ( updated_field) ;
221+ let mut children = Vec :: with_capacity ( fields. len ( ) ) ;
222+ for field in fields. iter ( ) {
223+ // If the field has VariantType extension metadata, extract it as a
224+ // VariantArray instead of casting to the declared data type. This allows
225+ // callers to request structs where some fields remain as variants.
226+ // See test_struct_extraction_with_variant_fields for usage example.
227+ let is_variant_field = field. try_extension_type :: < VariantType > ( ) . is_ok ( ) ;
228+ let field_as_type = ( !is_variant_field) . then ( || field. as_ref ( ) ) ;
229+ let child = shredded_get_path (
230+ & target,
231+ & [ VariantPathElement :: from ( field. name ( ) . as_str ( ) ) ] ,
232+ field_as_type,
233+ cast_options,
234+ ) ?;
235+
236+ // When the field is entirely absent in the data, shredded_get_path
237+ // returns a NullArray. For variant fields, construct an all-null
238+ // VariantArray so the extension metadata is preserved.
239+ let child = if is_variant_field && child. data_type ( ) == & DataType :: Null {
240+ let mut builder = VariantArrayBuilder :: new ( child. len ( ) ) ;
241+ for _ in 0 ..child. len ( ) {
242+ builder. append_null ( ) ;
243+ }
244+ let null_variant = builder. build ( ) ;
245+ Arc :: new ( null_variant. into_inner ( ) ) as ArrayRef
246+ } else {
247+ child
248+ } ;
263249
264- Ok ( child)
265- } )
266- . collect ( ) ;
267- let children = children?;
250+ // Update field data type to match the actual child array.
251+ // Preserve VariantType extension metadata for variant fields so
252+ // downstream consumers can recognize them as Variant columns.
253+ let mut new_field = field
254+ . as_ref ( )
255+ . clone ( )
256+ . with_data_type ( child. data_type ( ) . clone ( ) ) ;
257+ if is_variant_field {
258+ new_field = new_field. with_extension_type ( VariantType ) ;
259+ }
260+ updated_fields. push ( new_field) ;
261+ children. push ( child) ;
262+ }
268263
269264 let struct_nulls = target. nulls ( ) . cloned ( ) ;
270265
@@ -4383,7 +4378,9 @@ mod test {
43834378
43844379 // Row 0: data = {"score": 95}
43854380 let data0 = data_variant_array. value ( 0 ) ;
4386- assert ! ( matches!( data0, Variant :: Object ( _) ) ) ;
4381+ let obj0 = data0. as_object ( ) . expect ( "row 0 data should be an object" ) ;
4382+ let score = obj0. get ( "score" ) . expect ( "row 0 data should have 'score'" ) ;
4383+ assert_eq ! ( score. as_int16( ) , Some ( 95 ) ) ;
43874384
43884385 // Row 1: data = null
43894386 assert ! (
@@ -4392,7 +4389,9 @@ mod test {
43924389
43934390 // Row 2: data = {"level": 5}
43944391 let data2 = data_variant_array. value ( 2 ) ;
4395- assert ! ( matches!( data2, Variant :: Object ( _) ) ) ;
4392+ let obj2 = data2. as_object ( ) . expect ( "row 2 data should be an object" ) ;
4393+ let level = obj2. get ( "level" ) . expect ( "row 2 data should have 'level'" ) ;
4394+ assert_eq ! ( level. as_int8( ) , Some ( 5 ) ) ;
43964395 }
43974396
43984397 /// Test that requesting a variant field absent in all rows does not panic.
@@ -4429,5 +4428,25 @@ mod test {
44294428 // The missing variant field should be all nulls
44304429 let missing_col = struct_result. column ( 1 ) ;
44314430 assert_eq ! ( missing_col. null_count( ) , missing_col. len( ) ) ;
4431+
4432+ // The missing variant field should preserve VariantType extension metadata
4433+ let missing_schema_field = struct_result
4434+ . fields ( )
4435+ . iter ( )
4436+ . find ( |f| f. name ( ) == "missing_field" )
4437+ . unwrap ( ) ;
4438+ assert ! (
4439+ missing_schema_field
4440+ . try_extension_type:: <VariantType >( )
4441+ . is_ok( ) ,
4442+ "missing variant field should preserve VariantType extension metadata"
4443+ ) ;
4444+
4445+ // The missing variant field should be a valid VariantArray
4446+ let missing_variant = VariantArray :: try_new ( missing_col) ;
4447+ assert ! (
4448+ missing_variant. is_ok( ) ,
4449+ "missing variant field should be a valid VariantArray"
4450+ ) ;
44324451 }
44334452}
0 commit comments