Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 22 additions & 76 deletions parquet-variant-compute/src/variant_get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,15 @@ pub(crate) enum ShreddedPathStep<'a> {
}

/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try to take one path step
/// deeper. For a `VariantPathElement::Field`, the step fails if there is no `typed_value` at this
/// level, or if `typed_value` is not a struct, or if the requested field name does not exist.
/// deeper. For a `VariantPathElement::Field`, if there is no `typed_value` at this level, if
/// `typed_value` is not a struct, or if the requested field name does not exist, traversal returns
/// a missing-path step (`Missing` or `NotShredded` depending on whether `value` exists).
///
/// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe not even possible.
pub(crate) fn follow_shredded_path_element<'a>(
shredding_state: &BorrowedShreddingState<'a>,
path_element: &VariantPathElement<'_>,
cast_options: &CastOptions,
_cast_options: &CastOptions,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still want to keep it if it doesn't needed

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this PR goes first, the next variant_get PR will return it.

) -> Result<ShreddedPathStep<'a>> {
// If the requested path element is not present in `typed_value`, and `value` is missing, then
// we know it does not exist; it, and all paths under it, are all-NULL.
Expand All @@ -68,15 +69,7 @@ pub(crate) fn follow_shredded_path_element<'a>(
// Try to step into the requested field name of a struct.
// First, try to downcast to StructArray
let Some(struct_array) = typed_value.as_any().downcast_ref::<StructArray>() else {
// Downcast failure - if strict cast options are enabled, this should be an error
if !cast_options.safe {
return Err(ArrowError::CastError(format!(
"Cannot access field '{}' on non-struct type: {}",
name,
typed_value.data_type()
)));
}
// With safe cast options, return NULL (missing_path_step)
// Object field path step follows JSONPath semantics and returns missing path step (NotShredded/Missing) on non-struct path
return Ok(missing_path_step());
};

Expand Down Expand Up @@ -2456,53 +2449,26 @@ mod test {
}

#[test]
fn test_strict_cast_options_downcast_failure() {
use arrow::compute::CastOptions;
use arrow::datatypes::{DataType, Field};
use arrow::error::ArrowError;
use parquet_variant::VariantPath;
use std::sync::Arc;

fn test_field_path_non_struct_returns_missing_path_step() {
// Use the existing simple test data that has Int32 as typed_value
let variant_array = perfectly_shredded_int32_variant_array();

// Try to access a field with safe cast options (should return NULLs)
let safe_options = GetOptions {
path: VariantPath::try_from("nonexistent_field").unwrap(),
as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
cast_options: CastOptions::default(), // safe = true
};

let variant_array_ref: Arc<dyn Array> = variant_array.clone();
let result = variant_get(&variant_array_ref, safe_options);
// Should succeed and return NULLs (safe behavior)
assert!(result.is_ok());
let result_array = result.unwrap();
assert_eq!(result_array.len(), 3);
assert!(result_array.is_null(0));
assert!(result_array.is_null(1));
assert!(result_array.is_null(2));

// Try to access a field with strict cast options (should error)
let strict_options = GetOptions {
path: VariantPath::try_from("nonexistent_field").unwrap(),
as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
cast_options: CastOptions {
safe: false,
..Default::default()
},
};
for safe in [true, false] {
let options = GetOptions {
path: VariantPath::try_from("nonexistent_field").unwrap(),
as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
cast_options: CastOptions {
safe,
..Default::default()
},
};

let result = variant_get(&variant_array_ref, strict_options);
// Should fail with a cast error
assert!(result.is_err());
let error = result.unwrap_err();
assert!(matches!(error, ArrowError::CastError(_)));
assert!(
error
.to_string()
.contains("Cannot access field 'nonexistent_field' on non-struct type")
);
let result_array = variant_get(&variant_array, options).unwrap();
assert_eq!(result_array.len(), 3);
assert!(result_array.is_null(0));
assert!(result_array.is_null(1));
assert!(result_array.is_null(2));
}
}

#[test]
Expand Down Expand Up @@ -2574,11 +2540,6 @@ mod test {

#[test]
fn test_null_buffer_union_for_shredded_paths() {
use arrow::compute::CastOptions;
use arrow::datatypes::{DataType, Field};
use parquet_variant::VariantPath;
use std::sync::Arc;

// Test that null buffers are properly unioned when traversing shredded paths
// This test verifies scovich's null buffer union requirement

Expand All @@ -2599,8 +2560,7 @@ mod test {
cast_options: CastOptions::default(),
};

let variant_array_ref: Arc<dyn Array> = variant_array.clone();
let result = variant_get(&variant_array_ref, options).unwrap();
let result = variant_get(&variant_array, options).unwrap();

// Verify the result length matches input
assert_eq!(result.len(), variant_array.len());
Expand All @@ -2621,13 +2581,6 @@ mod test {

#[test]
fn test_struct_null_mask_union_from_children() {
use arrow::compute::CastOptions;
use arrow::datatypes::{DataType, Field, Fields};
use parquet_variant::VariantPath;
use std::sync::Arc;

use arrow::array::StringArray;

// Test that struct null masks properly union nulls from children field extractions
// This verifies scovich's concern about incomplete null masks in struct construction

Expand Down Expand Up @@ -2701,13 +2654,6 @@ mod test {

#[test]
fn test_field_nullability_preservation() {
use arrow::compute::CastOptions;
use arrow::datatypes::{DataType, Field};
use parquet_variant::VariantPath;
use std::sync::Arc;

use arrow::array::StringArray;

// Test that field nullability from GetOptions.as_type is preserved in the result

let json_strings = vec![
Expand Down
Loading