From 6d23937416504f1cf1092cd9d2f59b8483eede93 Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Wed, 8 Apr 2026 11:04:57 -0400 Subject: [PATCH 1/3] Follow JSONPath semantics for Field path element --- parquet-variant-compute/src/variant_get.rs | 71 ++++++---------------- 1 file changed, 18 insertions(+), 53 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 29e28c850be..6b847c9e472 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -50,7 +50,7 @@ pub(crate) enum ShreddedPathStep<'a> { pub(crate) fn follow_shredded_path_element<'a>( shredding_state: &BorrowedShreddingState<'a>, path_element: &VariantPathElement<'_>, - cast_options: &CastOptions, + _cast_options: &CastOptions, ) -> Result> { // If the requested path element is not present in `typed_value`, and `value` is missing, then // we know it does not exist; it, and all paths under it, are all-NULL. @@ -68,15 +68,7 @@ pub(crate) fn follow_shredded_path_element<'a>( // Try to step into the requested field name of a struct. // First, try to downcast to StructArray let Some(struct_array) = typed_value.as_any().downcast_ref::() else { - // Downcast failure - if strict cast options are enabled, this should be an error - if !cast_options.safe { - return Err(ArrowError::CastError(format!( - "Cannot access field '{}' on non-struct type: {}", - name, - typed_value.data_type() - ))); - } - // With safe cast options, return NULL (missing_path_step) + // Object field path step follows JSONPath semantics and returns missing path step (NotShredded/Missing) on non-struct path return Ok(missing_path_step()); }; @@ -2456,53 +2448,26 @@ mod test { } #[test] - fn test_strict_cast_options_downcast_failure() { - use arrow::compute::CastOptions; - use arrow::datatypes::{DataType, Field}; - use arrow::error::ArrowError; - use parquet_variant::VariantPath; - use std::sync::Arc; - + fn test_field_path_non_struct_returns_missing_path_step() { // Use the existing simple test data that has Int32 as typed_value let variant_array = perfectly_shredded_int32_variant_array(); - // Try to access a field with safe cast options (should return NULLs) - let safe_options = GetOptions { - path: VariantPath::try_from("nonexistent_field").unwrap(), - as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), - cast_options: CastOptions::default(), // safe = true - }; - - let variant_array_ref: Arc = variant_array.clone(); - let result = variant_get(&variant_array_ref, safe_options); - // Should succeed and return NULLs (safe behavior) - assert!(result.is_ok()); - let result_array = result.unwrap(); - assert_eq!(result_array.len(), 3); - assert!(result_array.is_null(0)); - assert!(result_array.is_null(1)); - assert!(result_array.is_null(2)); - - // Try to access a field with strict cast options (should error) - let strict_options = GetOptions { - path: VariantPath::try_from("nonexistent_field").unwrap(), - as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), - cast_options: CastOptions { - safe: false, - ..Default::default() - }, - }; + for safe in [true, false] { + let options = GetOptions { + path: VariantPath::try_from("nonexistent_field").unwrap(), + as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), + cast_options: CastOptions { + safe, + ..Default::default() + }, + }; - let result = variant_get(&variant_array_ref, strict_options); - // Should fail with a cast error - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(matches!(error, ArrowError::CastError(_))); - assert!( - error - .to_string() - .contains("Cannot access field 'nonexistent_field' on non-struct type") - ); + let result_array = variant_get(&variant_array, options).unwrap(); + assert_eq!(result_array.len(), 3); + assert!(result_array.is_null(0)); + assert!(result_array.is_null(1)); + assert!(result_array.is_null(2)); + } } #[test] From dac3cf3aad5ad18139ed485a9ef347f740d38202 Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Wed, 8 Apr 2026 11:05:13 -0400 Subject: [PATCH 2/3] Same clean up for tests nearby --- parquet-variant-compute/src/variant_get.rs | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 6b847c9e472..ea1efea0fdf 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -2539,11 +2539,6 @@ mod test { #[test] fn test_null_buffer_union_for_shredded_paths() { - use arrow::compute::CastOptions; - use arrow::datatypes::{DataType, Field}; - use parquet_variant::VariantPath; - use std::sync::Arc; - // Test that null buffers are properly unioned when traversing shredded paths // This test verifies scovich's null buffer union requirement @@ -2564,8 +2559,7 @@ mod test { cast_options: CastOptions::default(), }; - let variant_array_ref: Arc = variant_array.clone(); - let result = variant_get(&variant_array_ref, options).unwrap(); + let result = variant_get(&variant_array, options).unwrap(); // Verify the result length matches input assert_eq!(result.len(), variant_array.len()); @@ -2586,13 +2580,6 @@ mod test { #[test] fn test_struct_null_mask_union_from_children() { - use arrow::compute::CastOptions; - use arrow::datatypes::{DataType, Field, Fields}; - use parquet_variant::VariantPath; - use std::sync::Arc; - - use arrow::array::StringArray; - // Test that struct null masks properly union nulls from children field extractions // This verifies scovich's concern about incomplete null masks in struct construction @@ -2666,13 +2653,6 @@ mod test { #[test] fn test_field_nullability_preservation() { - use arrow::compute::CastOptions; - use arrow::datatypes::{DataType, Field}; - use parquet_variant::VariantPath; - use std::sync::Arc; - - use arrow::array::StringArray; - // Test that field nullability from GetOptions.as_type is preserved in the result let json_strings = vec![ From 417ec8c0365dc2572ca104e813d0050120598af0 Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Thu, 9 Apr 2026 11:59:47 -0400 Subject: [PATCH 3/3] Fix stale doc --- parquet-variant-compute/src/variant_get.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index ea1efea0fdf..83df6879aa5 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -43,8 +43,9 @@ pub(crate) enum ShreddedPathStep<'a> { } /// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try to take one path step -/// deeper. For a `VariantPathElement::Field`, the step fails if there is no `typed_value` at this -/// level, or if `typed_value` is not a struct, or if the requested field name does not exist. +/// deeper. For a `VariantPathElement::Field`, if there is no `typed_value` at this level, if +/// `typed_value` is not a struct, or if the requested field name does not exist, traversal returns +/// a missing-path step (`Missing` or `NotShredded` depending on whether `value` exists). /// /// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe not even possible. pub(crate) fn follow_shredded_path_element<'a>(