Skip to content

Commit 61f78ed

Browse files
Add test to distinguish missing vs JSON null
Add a unit test in parquet-variant-compute/src/variant_get.rs that verifies VariantType field extraction distinguishes between an explicit JSON null (which should yield a present Variant::Null) and a missing field (which should be SQL NULL). Co-Authored-By: Konstantin Tarasov <33369833+sdf-jkl@users.noreply.github.com>
1 parent c0fe077 commit 61f78ed

File tree

1 file changed

+65
-2
lines changed

1 file changed

+65
-2
lines changed

parquet-variant-compute/src/variant_get.rs

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4439,8 +4439,7 @@ mod test {
44394439
/// Fields with VariantType extension metadata should be extracted as VariantArrays.
44404440
#[test]
44414441
fn test_struct_extraction_with_variant_fields() {
4442-
// Create test data: [{"id": 1, "name": "Alice", "data": {"score": 95}},
4443-
// {"id": 2, "name": "Bob", "data": null}]
4442+
// Create test data
44444443
let json_strings = vec![
44454444
r#"{"id": 1, "name": "Alice", "data": {"score": 95}}"#,
44464445
r#"{"id": 2, "name": "Bob", "data": null}"#,
@@ -4597,4 +4596,68 @@ mod test {
45974596
"missing variant field should be a valid VariantArray"
45984597
);
45994598
}
4599+
4600+
/// VariantType field extraction should distinguish:
4601+
/// - explicit JSON null => present Variant::Null
4602+
/// - missing path => SQL NULL
4603+
#[test]
4604+
fn test_struct_variant_field_distinguishes_missing_and_variant_null() {
4605+
let json_strings = vec![
4606+
r#"{"id": 1, "data": null}"#,
4607+
r#"{"id": 2}"#,
4608+
r#"{"id": 3, "data": {"score": 95}}"#,
4609+
];
4610+
let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
4611+
let variant_array = json_to_variant(&string_array).unwrap();
4612+
4613+
let struct_fields = Fields::from(vec![
4614+
Field::new("id", DataType::Int32, true),
4615+
Field::new("data", DataType::Struct(Fields::empty()), true)
4616+
.with_extension_type(VariantType),
4617+
]);
4618+
let struct_type = DataType::Struct(struct_fields);
4619+
4620+
let options = GetOptions {
4621+
path: VariantPath::default(),
4622+
as_type: Some(Arc::new(Field::new("result", struct_type, true))),
4623+
cast_options: CastOptions::default(),
4624+
};
4625+
4626+
let variant_array_ref = ArrayRef::from(variant_array);
4627+
let result = variant_get(&variant_array_ref, options).unwrap();
4628+
4629+
let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
4630+
assert_eq!(struct_result.len(), 3);
4631+
4632+
let data_field = struct_result.column(1);
4633+
let data_variant_array = VariantArray::try_new(data_field).unwrap();
4634+
assert_eq!(data_variant_array.len(), 3);
4635+
4636+
// Row 0: explicit JSON null => present Variant::Null (NOT SQL NULL)
4637+
assert!(
4638+
!data_variant_array.is_null(0),
4639+
"explicit JSON null should be a present value, not SQL NULL"
4640+
);
4641+
assert!(
4642+
matches!(data_variant_array.value(0), Variant::Null),
4643+
"explicit JSON null should surface as Variant::Null"
4644+
);
4645+
4646+
// Row 1: missing path => SQL NULL
4647+
assert!(
4648+
data_variant_array.is_null(1),
4649+
"missing field should be SQL NULL"
4650+
);
4651+
4652+
// Row 2: present object
4653+
assert!(!data_variant_array.is_null(2));
4654+
let row2 = data_variant_array.value(2);
4655+
let obj = row2.as_object().expect("row 2 data should be an object");
4656+
assert_eq!(
4657+
obj.get("score")
4658+
.expect("row 2 should have 'score'")
4659+
.as_int16(),
4660+
Some(95)
4661+
);
4662+
}
46004663
}

0 commit comments

Comments
 (0)