Skip to content

Commit b36beac

Browse files
authored
[Variant] variant_get should follow JSONPath semantics for Field path element (#9676)
# Which issue does this PR close? Currently this is the only place in `main` that handles `Path` in `variant_get`. Other `variant_get` related PRs already follow the JSONPath sementics. (#9598 and #8354) - Closes #9606. # Rationale for this change Check issue # What changes are included in this PR? - Changed `variant_get` field path handling when can't cast to Struct - Updated the related unit test to check the new logic - Cleaned up some nearby tests # Are these changes tested? Yes, unit tests # Are there any user-facing changes? Yes, behavior change for `variant_get` kernel
1 parent adf9308 commit b36beac

File tree

1 file changed

+22
-76
lines changed

1 file changed

+22
-76
lines changed

parquet-variant-compute/src/variant_get.rs

Lines changed: 22 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,15 @@ pub(crate) enum ShreddedPathStep<'a> {
4343
}
4444

4545
/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try to take one path step
46-
/// deeper. For a `VariantPathElement::Field`, the step fails if there is no `typed_value` at this
47-
/// level, or if `typed_value` is not a struct, or if the requested field name does not exist.
46+
/// deeper. For a `VariantPathElement::Field`, if there is no `typed_value` at this level, if
47+
/// `typed_value` is not a struct, or if the requested field name does not exist, traversal returns
48+
/// a missing-path step (`Missing` or `NotShredded` depending on whether `value` exists).
4849
///
4950
/// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe not even possible.
5051
pub(crate) fn follow_shredded_path_element<'a>(
5152
shredding_state: &BorrowedShreddingState<'a>,
5253
path_element: &VariantPathElement<'_>,
53-
cast_options: &CastOptions,
54+
_cast_options: &CastOptions,
5455
) -> Result<ShreddedPathStep<'a>> {
5556
// If the requested path element is not present in `typed_value`, and `value` is missing, then
5657
// we know it does not exist; it, and all paths under it, are all-NULL.
@@ -68,15 +69,7 @@ pub(crate) fn follow_shredded_path_element<'a>(
6869
// Try to step into the requested field name of a struct.
6970
// First, try to downcast to StructArray
7071
let Some(struct_array) = typed_value.as_any().downcast_ref::<StructArray>() else {
71-
// Downcast failure - if strict cast options are enabled, this should be an error
72-
if !cast_options.safe {
73-
return Err(ArrowError::CastError(format!(
74-
"Cannot access field '{}' on non-struct type: {}",
75-
name,
76-
typed_value.data_type()
77-
)));
78-
}
79-
// With safe cast options, return NULL (missing_path_step)
72+
// Object field path step follows JSONPath semantics and returns missing path step (NotShredded/Missing) on non-struct path
8073
return Ok(missing_path_step());
8174
};
8275

@@ -2456,53 +2449,26 @@ mod test {
24562449
}
24572450

24582451
#[test]
2459-
fn test_strict_cast_options_downcast_failure() {
2460-
use arrow::compute::CastOptions;
2461-
use arrow::datatypes::{DataType, Field};
2462-
use arrow::error::ArrowError;
2463-
use parquet_variant::VariantPath;
2464-
use std::sync::Arc;
2465-
2452+
fn test_field_path_non_struct_returns_missing_path_step() {
24662453
// Use the existing simple test data that has Int32 as typed_value
24672454
let variant_array = perfectly_shredded_int32_variant_array();
24682455

2469-
// Try to access a field with safe cast options (should return NULLs)
2470-
let safe_options = GetOptions {
2471-
path: VariantPath::try_from("nonexistent_field").unwrap(),
2472-
as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2473-
cast_options: CastOptions::default(), // safe = true
2474-
};
2475-
2476-
let variant_array_ref: Arc<dyn Array> = variant_array.clone();
2477-
let result = variant_get(&variant_array_ref, safe_options);
2478-
// Should succeed and return NULLs (safe behavior)
2479-
assert!(result.is_ok());
2480-
let result_array = result.unwrap();
2481-
assert_eq!(result_array.len(), 3);
2482-
assert!(result_array.is_null(0));
2483-
assert!(result_array.is_null(1));
2484-
assert!(result_array.is_null(2));
2485-
2486-
// Try to access a field with strict cast options (should error)
2487-
let strict_options = GetOptions {
2488-
path: VariantPath::try_from("nonexistent_field").unwrap(),
2489-
as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2490-
cast_options: CastOptions {
2491-
safe: false,
2492-
..Default::default()
2493-
},
2494-
};
2456+
for safe in [true, false] {
2457+
let options = GetOptions {
2458+
path: VariantPath::try_from("nonexistent_field").unwrap(),
2459+
as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2460+
cast_options: CastOptions {
2461+
safe,
2462+
..Default::default()
2463+
},
2464+
};
24952465

2496-
let result = variant_get(&variant_array_ref, strict_options);
2497-
// Should fail with a cast error
2498-
assert!(result.is_err());
2499-
let error = result.unwrap_err();
2500-
assert!(matches!(error, ArrowError::CastError(_)));
2501-
assert!(
2502-
error
2503-
.to_string()
2504-
.contains("Cannot access field 'nonexistent_field' on non-struct type")
2505-
);
2466+
let result_array = variant_get(&variant_array, options).unwrap();
2467+
assert_eq!(result_array.len(), 3);
2468+
assert!(result_array.is_null(0));
2469+
assert!(result_array.is_null(1));
2470+
assert!(result_array.is_null(2));
2471+
}
25062472
}
25072473

25082474
#[test]
@@ -2574,11 +2540,6 @@ mod test {
25742540

25752541
#[test]
25762542
fn test_null_buffer_union_for_shredded_paths() {
2577-
use arrow::compute::CastOptions;
2578-
use arrow::datatypes::{DataType, Field};
2579-
use parquet_variant::VariantPath;
2580-
use std::sync::Arc;
2581-
25822543
// Test that null buffers are properly unioned when traversing shredded paths
25832544
// This test verifies scovich's null buffer union requirement
25842545

@@ -2599,8 +2560,7 @@ mod test {
25992560
cast_options: CastOptions::default(),
26002561
};
26012562

2602-
let variant_array_ref: Arc<dyn Array> = variant_array.clone();
2603-
let result = variant_get(&variant_array_ref, options).unwrap();
2563+
let result = variant_get(&variant_array, options).unwrap();
26042564

26052565
// Verify the result length matches input
26062566
assert_eq!(result.len(), variant_array.len());
@@ -2621,13 +2581,6 @@ mod test {
26212581

26222582
#[test]
26232583
fn test_struct_null_mask_union_from_children() {
2624-
use arrow::compute::CastOptions;
2625-
use arrow::datatypes::{DataType, Field, Fields};
2626-
use parquet_variant::VariantPath;
2627-
use std::sync::Arc;
2628-
2629-
use arrow::array::StringArray;
2630-
26312584
// Test that struct null masks properly union nulls from children field extractions
26322585
// This verifies scovich's concern about incomplete null masks in struct construction
26332586

@@ -2701,13 +2654,6 @@ mod test {
27012654

27022655
#[test]
27032656
fn test_field_nullability_preservation() {
2704-
use arrow::compute::CastOptions;
2705-
use arrow::datatypes::{DataType, Field};
2706-
use parquet_variant::VariantPath;
2707-
use std::sync::Arc;
2708-
2709-
use arrow::array::StringArray;
2710-
27112657
// Test that field nullability from GetOptions.as_type is preserved in the result
27122658

27132659
let json_strings = vec![

0 commit comments

Comments
 (0)