Skip to content

Commit 3890e89

Browse files
committed
More CR comments + pull some stuff to other PR
1 parent 7635dd6 commit 3890e89

File tree

3 files changed

+50
-119
lines changed

3 files changed

+50
-119
lines changed

parquet-variant-compute/src/shred_variant.rs

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,7 @@ impl VariantSchemaNode {
692692
mod tests {
693693
use super::*;
694694
use crate::VariantArrayBuilder;
695-
use crate::variant_array::binary_array_value;
695+
use crate::variant_array::{binary_array_value, variant_from_arrays_at};
696696
use arrow::array::{
697697
Array, BinaryViewArray, FixedSizeBinaryArray, Float64Array, GenericListArray,
698698
GenericListViewArray, Int64Array, LargeBinaryArray, LargeStringArray, ListArray,
@@ -1256,10 +1256,7 @@ mod tests {
12561256
assert!(!value_field.is_null(1)); // value should contain original
12571257
assert!(typed_value_field.is_null(1)); // typed_value should be null
12581258
assert_eq!(
1259-
Variant::new(
1260-
binary_array_value(metadata_field.as_ref(), 1).unwrap(),
1261-
binary_array_value(value_field.as_ref(), 1).unwrap()
1262-
),
1259+
variant_from_arrays_at(metadata_field, value_field, 1).unwrap(),
12631260
Variant::from("hello")
12641261
);
12651262

@@ -1275,10 +1272,7 @@ mod tests {
12751272
assert!(!result.is_null(4));
12761273
assert!(!value_field.is_null(4)); // should contain Variant::Null
12771274
assert_eq!(
1278-
Variant::new(
1279-
binary_array_value(metadata_field.as_ref(), 4).unwrap(),
1280-
binary_array_value(value_field.as_ref(), 4).unwrap()
1281-
),
1275+
variant_from_arrays_at(metadata_field, value_field, 4).unwrap(),
12821276
Variant::Null
12831277
);
12841278
assert!(typed_value_field.is_null(4));
@@ -1355,10 +1349,7 @@ mod tests {
13551349
assert!(value.is_valid(1));
13561350
assert!(typed_value.is_null(1));
13571351
assert_eq!(
1358-
Variant::new(
1359-
binary_array_value(metadata.as_ref(), 1).unwrap(),
1360-
binary_array_value(value.as_ref(), 1).unwrap()
1361-
),
1352+
variant_from_arrays_at(metadata, value, 1).unwrap(),
13621353
Variant::from(42i64)
13631354
);
13641355

@@ -1372,10 +1363,7 @@ mod tests {
13721363
assert!(value.is_valid(3));
13731364
assert!(typed_value.is_null(3));
13741365
assert_eq!(
1375-
Variant::new(
1376-
binary_array_value(metadata.as_ref(), 3).unwrap(),
1377-
binary_array_value(value.as_ref(), 3).unwrap()
1378-
),
1366+
variant_from_arrays_at(metadata, value, 3).unwrap(),
13791367
Variant::Null
13801368
);
13811369

@@ -1417,10 +1405,7 @@ mod tests {
14171405
assert!(value.is_valid(1));
14181406
assert!(typed_value.is_null(1));
14191407
assert_eq!(
1420-
Variant::new(
1421-
binary_array_value(metadata.as_ref(), 1).unwrap(),
1422-
binary_array_value(value.as_ref(), 1).unwrap()
1423-
),
1408+
variant_from_arrays_at(metadata, value, 1).unwrap(),
14241409
Variant::from("not_binary")
14251410
);
14261411

@@ -1434,10 +1419,7 @@ mod tests {
14341419
assert!(value.is_valid(3));
14351420
assert!(typed_value.is_null(3));
14361421
assert_eq!(
1437-
Variant::new(
1438-
binary_array_value(metadata.as_ref(), 3).unwrap(),
1439-
binary_array_value(value.as_ref(), 3).unwrap()
1440-
),
1422+
variant_from_arrays_at(metadata, value, 3).unwrap(),
14411423
Variant::Null
14421424
);
14431425

@@ -1944,10 +1926,7 @@ mod tests {
19441926
metadata: &'m dyn Array,
19451927
value: &'v dyn Array,
19461928
) -> Variant<'m, 'v> {
1947-
Variant::new(
1948-
binary_array_value(metadata, i).unwrap(),
1949-
binary_array_value(value, i).unwrap(),
1950-
)
1929+
variant_from_arrays_at(metadata, value, i).unwrap()
19511930
}
19521931
let expect = |i, expected_result: Option<ShreddedValue<ShreddedStruct>>| {
19531932
match expected_result {

parquet-variant-compute/src/variant_array.rs

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,18 @@ pub(crate) fn binary_array_value(array: &dyn Array, index: usize) -> Option<&[u8
5151
}
5252
}
5353

54+
/// Returns a [`Variant`] from a `metadata` and `value` byte arrays, returns `None`
55+
/// if one of them is of invalid type.
56+
pub(crate) fn variant_from_arrays_at<'m, 'v>(
57+
metadata: &'m dyn Array,
58+
value: &'v dyn Array,
59+
index: usize,
60+
) -> Option<Variant<'m, 'v>> {
61+
let metadata = binary_array_value(metadata, index)?;
62+
let value = binary_array_value(value, index)?;
63+
Some(Variant::new(metadata, value))
64+
}
65+
5466
/// Validates that an array has a binary-like data type.
5567
fn validate_binary_array(array: &dyn Array, field_name: &str) -> Result<()> {
5668
match array.data_type() {
@@ -387,22 +399,18 @@ impl VariantArray {
387399
typed_value_to_variant(typed_value, value, index)
388400
}
389401
// Otherwise fall back to value, if available
390-
(_, Some(value)) if value.is_valid(index) => {
391-
let metadata =
392-
binary_array_value(self.metadata.as_ref(), index).ok_or_else(|| {
393-
ArrowError::InvalidArgumentError(format!(
394-
"metadata field must be a binary-like array, instead got {}",
395-
self.metadata.data_type(),
396-
))
397-
})?;
398-
let value = binary_array_value(value.as_ref(), index).ok_or_else(|| {
399-
ArrowError::InvalidArgumentError(format!(
400-
"value field must be a binary-like array, instead got {}",
401-
value.data_type(),
402-
))
403-
})?;
404-
Ok(Variant::new(metadata, value))
405-
}
402+
(_, Some(value)) if value.is_valid(index) => variant_from_arrays_at(
403+
&self.metadata,
404+
value,
405+
index,
406+
)
407+
.ok_or_else(|| {
408+
ArrowError::InvalidArgumentError(format!(
409+
"metadata and value fields must be binary-like arrays, instead got {} and {}",
410+
self.metadata.data_type(),
411+
value.data_type()
412+
))
413+
}),
406414
// It is technically invalid for neither value nor typed_value fields to be available,
407415
// but the spec specifically requires readers to return Variant::Null in this case.
408416
_ => Ok(Variant::Null),

parquet-variant-compute/src/variant_get.rs

Lines changed: 18 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717
use arrow::{
18-
array::{self, Array, ArrayRef, StructArray, make_array},
18+
array::{self, Array, ArrayRef, StructArray},
1919
buffer::NullBuffer,
2020
compute::CastOptions,
2121
datatypes::Field,
@@ -268,23 +268,8 @@ fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Opti
268268
// 2. If every row in the `value` column is null
269269

270270
// This is a perfect shredding, where the value is entirely shredded out,
271-
// so we can just return the typed value after merging the accumulated nulls.
272-
let parent_nulls = variant_array.nulls();
273-
274-
let target_array = if parent_nulls.is_none() {
275-
typed_value.clone()
276-
} else {
277-
let merged_nulls = NullBuffer::union(parent_nulls, typed_value.nulls());
278-
let data = typed_value
279-
.to_data()
280-
.into_builder()
281-
.nulls(merged_nulls)
282-
.build()
283-
.ok()?;
284-
make_array(data)
285-
};
286-
287-
return Some(target_array);
271+
// so we can just return the typed value.
272+
return Some(typed_value.clone());
288273
}
289274

290275
None
@@ -1071,13 +1056,7 @@ mod test {
10711056
EMPTY_VARIANT_METADATA_BYTES,
10721057
typed_value.len(),
10731058
));
1074-
VariantArray::from_parts(
1075-
Arc::new(metadata) as ArrayRef,
1076-
None,
1077-
Some(typed_value),
1078-
None,
1079-
)
1080-
.into()
1059+
VariantArray::from_parts(Arc::new(metadata), None, Some(typed_value), None).into()
10811060
}
10821061
};
10831062
}
@@ -1718,41 +1697,6 @@ mod test {
17181697
])
17191698
);
17201699

1721-
#[test]
1722-
fn test_variant_get_perfectly_shredded_binary_preserves_top_level_nulls() {
1723-
let metadata =
1724-
BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3));
1725-
let typed_value: ArrayRef = Arc::new(BinaryArray::from(vec![
1726-
Some(b"Apache" as &[u8]),
1727-
Some(b"masked-null" as &[u8]),
1728-
Some(b"Parquet-variant" as &[u8]),
1729-
]));
1730-
let variant_array: ArrayRef = VariantArray::from_parts(
1731-
Arc::new(metadata) as _,
1732-
None,
1733-
Some(typed_value),
1734-
Some(NullBuffer::from(vec![true, false, true])),
1735-
)
1736-
.into();
1737-
1738-
let result = variant_get(
1739-
&variant_array,
1740-
GetOptions::new().with_as_type(Some(FieldRef::from(Field::new(
1741-
"result",
1742-
DataType::Binary,
1743-
true,
1744-
)))),
1745-
)
1746-
.unwrap();
1747-
1748-
let result = result.as_binary::<i32>();
1749-
assert_eq!(result.len(), 3);
1750-
assert_eq!(result.null_count(), 1);
1751-
assert_eq!(result.value(0), b"Apache");
1752-
assert!(result.is_null(1));
1753-
assert_eq!(result.value(2), b"Parquet-variant");
1754-
}
1755-
17561700
/// Return a VariantArray that represents an "all null" variant
17571701
/// for the following example (3 null values):
17581702
///
@@ -1781,7 +1725,7 @@ mod test {
17811725
BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3));
17821726

17831727
ArrayRef::from(VariantArray::from_parts(
1784-
Arc::new(metadata) as ArrayRef,
1728+
Arc::new(metadata),
17851729
None,
17861730
None,
17871731
Some(nulls),
@@ -1891,8 +1835,8 @@ mod test {
18911835

18921836
// Create the main VariantArray
18931837
ArrayRef::from(VariantArray::from_parts(
1894-
Arc::new(metadata_array) as ArrayRef,
1895-
Some(Arc::new(value_array) as ArrayRef),
1838+
Arc::new(metadata_array),
1839+
Some(Arc::new(value_array)),
18961840
Some(Arc::new(typed_value_struct)),
18971841
None,
18981842
))
@@ -2268,8 +2212,8 @@ mod test {
22682212

22692213
// Build final VariantArray
22702214
ArrayRef::from(VariantArray::from_parts(
2271-
Arc::new(metadata_array) as ArrayRef,
2272-
Some(Arc::new(value_array) as ArrayRef),
2215+
Arc::new(metadata_array),
2216+
Some(Arc::new(value_array)),
22732217
Some(Arc::new(typed_value_struct)),
22742218
None,
22752219
))
@@ -2379,8 +2323,8 @@ mod test {
23792323

23802324
// Build final VariantArray
23812325
ArrayRef::from(VariantArray::from_parts(
2382-
Arc::new(metadata_array) as ArrayRef,
2383-
Some(Arc::new(value_array) as ArrayRef),
2326+
Arc::new(metadata_array),
2327+
Some(Arc::new(value_array)),
23842328
Some(Arc::new(typed_value_struct)),
23852329
None,
23862330
))
@@ -2510,8 +2454,8 @@ mod test {
25102454

25112455
// Build final VariantArray
25122456
ArrayRef::from(VariantArray::from_parts(
2513-
Arc::new(metadata_array) as ArrayRef,
2514-
Some(Arc::new(value_array) as ArrayRef),
2457+
Arc::new(metadata_array),
2458+
Some(Arc::new(value_array)),
25152459
Some(Arc::new(typed_value_struct)),
25162460
None,
25172461
))
@@ -3324,7 +3268,7 @@ mod test {
33243268

33253269
// Build final VariantArray with top-level nulls
33263270
ArrayRef::from(VariantArray::from_parts(
3327-
Arc::new(metadata_array) as ArrayRef,
3271+
Arc::new(metadata_array),
33283272
None,
33293273
Some(Arc::new(typed_value_struct)),
33303274
Some(nulls),
@@ -3383,7 +3327,7 @@ mod test {
33833327
false, // row 3: top-level NULL
33843328
]);
33853329
ArrayRef::from(VariantArray::from_parts(
3386-
Arc::new(metadata_array) as ArrayRef,
3330+
Arc::new(metadata_array),
33873331
None,
33883332
Some(Arc::new(typed_value)),
33893333
Some(nulls),
@@ -3452,8 +3396,8 @@ mod test {
34523396
// Top-level null is encoded in the main StructArray's null mask
34533397
let variant_nulls = NullBuffer::from(vec![true, true, true, false]); // Row 3 is top-level null
34543398
ArrayRef::from(VariantArray::from_parts(
3455-
Arc::new(metadata_array) as ArrayRef,
3456-
Some(Arc::new(value_array) as ArrayRef),
3399+
Arc::new(metadata_array),
3400+
Some(Arc::new(value_array)),
34573401
Some(Arc::new(typed_value_struct)),
34583402
Some(variant_nulls),
34593403
))
@@ -4131,7 +4075,7 @@ mod test {
41314075
all_nulls_values.len(),
41324076
));
41334077
let variant_array: ArrayRef = VariantArray::from_parts(
4134-
Arc::new(metadata) as ArrayRef,
4078+
Arc::new(metadata),
41354079
None,
41364080
Some(Arc::new(typed_value_struct)),
41374081
None,

0 commit comments

Comments
 (0)