Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions parquet-variant-compute/src/from_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
//! Module for transforming a batch of JSON strings into a batch of Variants represented as
//! STRUCT<metadata: BINARY, value: BINARY>

use crate::{VariantArray, VariantArrayBuilder};
use crate::variant_array::VariantArray;
use crate::variant_array_builder::VariantArrayBuilder;
use arrow::array::{Array, ArrayRef, StringArray};
use arrow_schema::ArrowError;
use parquet_variant::VariantBuilder;
Expand Down Expand Up @@ -47,15 +48,14 @@ pub fn batch_json_string_to_variant(input: &ArrayRef) -> Result<VariantArray, Ar
variant_array_builder.append_variant_buffers(&metadata, &value);
}
}
Ok(variant_array_builder.build())
Ok(variant_array_builder.finish())
}

#[cfg(test)]
mod test {
use crate::batch_json_string_to_variant;
use arrow::array::{Array, ArrayRef, AsArray, StringArray};
use arrow_schema::ArrowError;
use parquet_variant::{Variant, VariantBuilder};
use std::sync::Arc;

#[test]
Expand All @@ -70,40 +70,43 @@ mod test {
let array_ref: ArrayRef = Arc::new(input);
let variant_array = batch_json_string_to_variant(&array_ref).unwrap();

let metadata_array = variant_array.metadata_field().as_binary_view();
let value_array = variant_array.value_field().as_binary_view();
let metadata_array = variant_array.metadata_field();
let value_array = variant_array.value_field();

// Compare row 0
assert!(!variant_array.is_null(0));
assert_eq!(variant_array.value(0), Variant::Int8(1));
assert_eq!(variant_array.value(0).as_int8(), Some(1));

// Compare row 1
assert!(variant_array.is_null(1));

// Compare row 2
assert!(!variant_array.is_null(2));
{
let mut vb = VariantBuilder::new();
let mut ob = vb.new_object();
ob.insert("a", Variant::Int8(32));
ob.finish()?;
let (object_metadata, object_value) = vb.finish();
let expected = Variant::new(&object_metadata, &object_value);
assert_eq!(variant_array.value(2), expected);
let variant = variant_array.value(2);
let obj = variant.as_object().unwrap();
assert_eq!(obj.len(), 1);
assert_eq!(obj.get("a").unwrap().as_int8(), Some(32));
}

// Compare row 3 (Note this is a variant NULL, not a null row)
assert!(!variant_array.is_null(3));
assert_eq!(variant_array.value(3), Variant::Null);
assert_eq!(variant_array.value(3).as_null(), Some(()));

// Compare row 4
assert!(variant_array.is_null(4));

// Ensure that the subfields are not nullable
// Ensure that the subfields are non-nullable but have 0-length entries for null rows
assert!(!metadata_array.is_null(1));
assert!(!value_array.is_null(1));
assert!(!metadata_array.is_null(4));
assert!(!value_array.is_null(4));

// Null rows should have 0-length metadata and value
assert_eq!(metadata_array.as_binary_view().value(1).len(), 0);
assert_eq!(value_array.as_binary_view().value(1).len(), 0);
assert_eq!(metadata_array.as_binary_view().value(4).len(), 0);
assert_eq!(value_array.as_binary_view().value(4).len(), 0);
Ok(())
}
}
16 changes: 9 additions & 7 deletions parquet-variant-compute/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@
// specific language governing permissions and limitations
// under the License.

mod from_json;
mod to_json;
mod variant_array;
mod variant_array_builder;
pub mod variant_get;
//! Parquet variant compute functions

pub use variant_array::VariantArray;
pub use variant_array_builder::VariantArrayBuilder;
pub mod from_json;
pub mod to_json;
pub mod variant_array;
pub mod variant_array_builder;
pub mod variant_get;

pub use from_json::batch_json_string_to_variant;
pub use to_json::batch_variant_to_json_string;
pub use variant_array::VariantArray;
pub use variant_array_builder::VariantArrayBuilder;
pub use variant_get::{variant_get, GetOptions};
179 changes: 179 additions & 0 deletions parquet-variant-compute/src/variant_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,64 @@ impl VariantArray {
// spec says fields order is not guaranteed, so we search by name
&self.value_ref
}

/// Get the field names for an object at the given index
pub fn get_field_names(&self, index: usize) -> Vec<String> {
if index >= self.len() || self.is_null(index) {
return vec![];
}

let variant = self.value(index);
if let Some(obj) = variant.as_object() {
Vec::from_iter((0..obj.len()).map(|i| obj.field_name(i).unwrap().to_string()))
} else {
vec![]
}
}

/// Create a new VariantArray with a field removed from all variants
pub fn with_field_removed(&self, field_name: &str) -> Result<Self, ArrowError> {
self.with_fields_removed(&[field_name])
}

/// Create a new VariantArray with multiple fields removed from all variants
pub fn with_fields_removed(&self, field_names: &[&str]) -> Result<Self, ArrowError> {
use parquet_variant::VariantBuilder;
use std::collections::HashSet;

let fields_to_remove: HashSet<&str> = field_names.iter().copied().collect();
let mut builder = crate::variant_array_builder::VariantArrayBuilder::new(self.len());

for i in 0..self.len() {
if self.is_null(i) {
builder.append_null();
} else {
let variant = self.value(i);

// If it's an object, create a new object without the specified fields
if let Some(obj) = variant.as_object() {
let mut variant_builder = VariantBuilder::new();
let mut object_builder = variant_builder.new_object();

// Add all fields except the ones to remove
for (field_name, field_value) in obj.iter() {
if !fields_to_remove.contains(field_name) {
object_builder.insert(field_name, field_value);
}
}

object_builder.finish().unwrap();
let (metadata, value) = variant_builder.finish();
builder.append_variant_buffers(&metadata, &value);
} else {
// Not an object, append as-is
builder.append_variant(variant);
}
}
}

Ok(builder.build())
}
}

impl Array for VariantArray {
Expand Down Expand Up @@ -224,8 +282,10 @@ impl Array for VariantArray {
#[cfg(test)]
mod test {
use super::*;
use crate::variant_array_builder::VariantArrayBuilder;
use arrow::array::{BinaryArray, BinaryViewArray};
use arrow_schema::{Field, Fields};
use parquet_variant::VariantBuilder;

#[test]
fn invalid_not_a_struct_array() {
Expand Down Expand Up @@ -298,6 +358,125 @@ mod test {
);
}

fn create_test_variant_array() -> VariantArray {
let mut builder = VariantArrayBuilder::new(2);

// Create variant 1: {"name": "Alice", "age": 30}
let mut builder1 = VariantBuilder::new();
builder1
.new_object()
.with_field("name", "Alice")
.with_field("age", 30i32)
.finish()
.unwrap();
let (metadata1, value1) = builder1.finish();
builder.append_variant_buffers(&metadata1, &value1);

// Create variant 2: {"name": "Bob", "age": 25, "city": "NYC"}
let mut builder2 = VariantBuilder::new();
builder2
.new_object()
.with_field("name", "Bob")
.with_field("age", 25i32)
.with_field("city", "NYC")
.finish()
.unwrap();
let (metadata2, value2) = builder2.finish();
builder.append_variant_buffers(&metadata2, &value2);

builder.build()
}

#[test]
fn test_variant_array_basic() {
let array = create_test_variant_array();
assert_eq!(array.len(), 2);
assert!(!array.is_empty());

// Test accessing variants
let variant1 = array.value(0);
assert_eq!(
variant1.get_object_field("name").unwrap().as_string(),
Some("Alice")
);
assert_eq!(
variant1.get_object_field("age").unwrap().as_int32(),
Some(30)
);

let variant2 = array.value(1);
assert_eq!(
variant2.get_object_field("name").unwrap().as_string(),
Some("Bob")
);
assert_eq!(
variant2.get_object_field("age").unwrap().as_int32(),
Some(25)
);
assert_eq!(
variant2.get_object_field("city").unwrap().as_string(),
Some("NYC")
);
}

#[test]
fn test_get_field_names() {
let array = create_test_variant_array();

let paths1 = array.get_field_names(0);
assert_eq!(paths1.len(), 2);
assert!(paths1.contains(&"name".to_string()));
assert!(paths1.contains(&"age".to_string()));

let paths2 = array.get_field_names(1);
assert_eq!(paths2.len(), 3);
assert!(paths2.contains(&"name".to_string()));
assert!(paths2.contains(&"age".to_string()));
assert!(paths2.contains(&"city".to_string()));
}

// Note: test_get_path was removed as it tested the duplicate VariantPath implementation
// Use the official parquet_variant::VariantPath with variant_get functionality instead

#[test]
fn test_with_field_removed() {
let array = create_test_variant_array();

let new_array = array.with_field_removed("age").unwrap();

// Check that age field was removed from all variants
let variant1 = new_array.value(0);
let obj1 = variant1.as_object().unwrap();
assert_eq!(obj1.len(), 1);
assert!(obj1.get("name").is_some());
assert!(obj1.get("age").is_none());

let variant2 = new_array.value(1);
let obj2 = variant2.as_object().unwrap();
assert_eq!(obj2.len(), 2);
assert!(obj2.get("name").is_some());
assert!(obj2.get("age").is_none());
assert!(obj2.get("city").is_some());
}

#[test]
fn test_metadata_and_value_fields() {
let array = create_test_variant_array();

let metadata_field = array.metadata_field();
let value_field = array.value_field();

// Check that we got the expected arrays
assert_eq!(metadata_field.len(), 2);
assert_eq!(value_field.len(), 2);

// Check that metadata and value bytes are non-empty
assert!(!metadata_field.as_binary_view().value(0).is_empty());
assert!(!value_field.as_binary_view().value(0).is_empty());
assert!(!metadata_field.as_binary_view().value(1).is_empty());
assert!(!value_field.as_binary_view().value(1).is_empty());
}

fn make_binary_view_array() -> ArrayRef {
Arc::new(BinaryViewArray::from(vec![b"test" as &[u8]]))
}
Expand Down
12 changes: 9 additions & 3 deletions parquet-variant-compute/src/variant_array_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ use std::sync::Arc;
/// // append a pre-constructed metadata and value buffers
/// let (metadata, value) = {
/// let mut vb = VariantBuilder::new();
/// let mut obj = vb.new_object();
/// obj.insert("foo", "bar");
/// obj.finish().unwrap();
/// vb.new_object()
/// .with_field("foo", "bar")
/// .finish()
/// .unwrap();
/// vb.finish()
/// };
/// builder.append_variant_buffers(&metadata, &value);
Expand Down Expand Up @@ -132,6 +133,11 @@ impl VariantArrayBuilder {
VariantArray::try_new(Arc::new(inner)).expect("valid VariantArray by construction")
}

/// Finish building the VariantArray (alias for build for compatibility)
pub fn finish(self) -> VariantArray {
self.build()
}

/// Appends a null row to the builder.
pub fn append_null(&mut self) {
self.nulls.append_null();
Expand Down
Loading
Loading