|
19 | 19 |
|
20 | 20 | use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder}; |
21 | 21 | use crate::variant_to_arrow::{ |
22 | | - PrimitiveVariantToArrowRowBuilder, make_primitive_variant_to_arrow_row_builder, |
| 22 | + ArrayVariantToArrowRowBuilder, PrimitiveVariantToArrowRowBuilder, |
| 23 | + make_primitive_variant_to_arrow_row_builder, |
23 | 24 | }; |
24 | 25 | use crate::{VariantArray, VariantValueArrayBuilder}; |
25 | | -use arrow::array::{ |
26 | | - ArrayRef, BinaryViewArray, GenericListArray, GenericListViewArray, NullBufferBuilder, |
27 | | - OffsetSizeTrait, |
28 | | -}; |
29 | | -use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; |
| 26 | +use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder}; |
| 27 | +use arrow::buffer::NullBuffer; |
30 | 28 | use arrow::compute::CastOptions; |
31 | | -use arrow::datatypes::{ArrowNativeTypeOp, DataType, Field, FieldRef, Fields, TimeUnit}; |
| 29 | +use arrow::datatypes::{DataType, Field, FieldRef, Fields, TimeUnit}; |
32 | 30 | use arrow::error::{ArrowError, Result}; |
33 | 31 | use indexmap::IndexMap; |
34 | | -use parquet_variant::{Variant, VariantBuilderExt, VariantList, VariantPath, VariantPathElement}; |
| 32 | +use parquet_variant::{Variant, VariantBuilderExt, VariantPath, VariantPathElement}; |
35 | 33 | use std::collections::BTreeMap; |
36 | 34 | use std::sync::Arc; |
37 | 35 |
|
@@ -123,19 +121,15 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>( |
123 | 121 | DataType::List(_) |
124 | 122 | | DataType::LargeList(_) |
125 | 123 | | DataType::ListView(_) |
126 | | - | DataType::LargeListView(_) => { |
| 124 | + | DataType::LargeListView(_) |
| 125 | + | DataType::FixedSizeList(..) => { |
127 | 126 | let typed_value_builder = VariantToShreddedArrayVariantRowBuilder::try_new( |
128 | 127 | data_type, |
129 | 128 | cast_options, |
130 | 129 | capacity, |
131 | 130 | )?; |
132 | 131 | VariantToShreddedVariantRowBuilder::Array(typed_value_builder) |
133 | 132 | } |
134 | | - DataType::FixedSizeList(..) => { |
135 | | - return Err(ArrowError::NotYetImplemented( |
136 | | - "Shredding variant array values as fixed-size lists".to_string(), |
137 | | - )); |
138 | | - } |
139 | 133 | // Supported shredded primitive types, see Variant shredding spec: |
140 | 134 | // https://github.com/apache/parquet-format/blob/master/VariantShredding.md#shredded-value-types |
141 | 135 | DataType::Boolean |
@@ -312,171 +306,6 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> { |
312 | 306 | } |
313 | 307 | } |
314 | 308 |
|
315 | | -enum ArrayVariantToArrowRowBuilder<'a> { |
316 | | - List(VariantToListArrowRowBuilder<'a, i32, false>), |
317 | | - LargeList(VariantToListArrowRowBuilder<'a, i64, false>), |
318 | | - ListView(VariantToListArrowRowBuilder<'a, i32, true>), |
319 | | - LargeListView(VariantToListArrowRowBuilder<'a, i64, true>), |
320 | | -} |
321 | | - |
322 | | -impl<'a> ArrayVariantToArrowRowBuilder<'a> { |
323 | | - fn try_new( |
324 | | - data_type: &'a DataType, |
325 | | - cast_options: &'a CastOptions, |
326 | | - capacity: usize, |
327 | | - ) -> Result<Self> { |
328 | | - use ArrayVariantToArrowRowBuilder::*; |
329 | | - |
330 | | - // Make List/ListView builders without repeating the constructor boilerplate. |
331 | | - macro_rules! make_list_builder { |
332 | | - ($variant:ident, $offset:ty, $is_view:expr, $field:ident) => { |
333 | | - $variant(VariantToListArrowRowBuilder::<$offset, $is_view>::try_new( |
334 | | - $field.clone(), |
335 | | - $field.data_type(), |
336 | | - cast_options, |
337 | | - capacity, |
338 | | - )?) |
339 | | - }; |
340 | | - } |
341 | | - |
342 | | - let builder = match data_type { |
343 | | - DataType::List(field) => make_list_builder!(List, i32, false, field), |
344 | | - DataType::LargeList(field) => make_list_builder!(LargeList, i64, false, field), |
345 | | - DataType::ListView(field) => make_list_builder!(ListView, i32, true, field), |
346 | | - DataType::LargeListView(field) => make_list_builder!(LargeListView, i64, true, field), |
347 | | - other => { |
348 | | - return Err(ArrowError::InvalidArgumentError(format!( |
349 | | - "Casting to {other:?} is not applicable for array Variant types" |
350 | | - ))); |
351 | | - } |
352 | | - }; |
353 | | - Ok(builder) |
354 | | - } |
355 | | - |
356 | | - fn append_null(&mut self) { |
357 | | - match self { |
358 | | - Self::List(builder) => builder.append_null(), |
359 | | - Self::LargeList(builder) => builder.append_null(), |
360 | | - Self::ListView(builder) => builder.append_null(), |
361 | | - Self::LargeListView(builder) => builder.append_null(), |
362 | | - } |
363 | | - } |
364 | | - |
365 | | - fn append_value(&mut self, list: VariantList<'_, '_>) -> Result<()> { |
366 | | - match self { |
367 | | - Self::List(builder) => builder.append_value(list), |
368 | | - Self::LargeList(builder) => builder.append_value(list), |
369 | | - Self::ListView(builder) => builder.append_value(list), |
370 | | - Self::LargeListView(builder) => builder.append_value(list), |
371 | | - } |
372 | | - } |
373 | | - |
374 | | - fn finish(self) -> Result<ArrayRef> { |
375 | | - match self { |
376 | | - Self::List(builder) => builder.finish(), |
377 | | - Self::LargeList(builder) => builder.finish(), |
378 | | - Self::ListView(builder) => builder.finish(), |
379 | | - Self::LargeListView(builder) => builder.finish(), |
380 | | - } |
381 | | - } |
382 | | -} |
383 | | - |
384 | | -struct VariantToListArrowRowBuilder<'a, O, const IS_VIEW: bool> |
385 | | -where |
386 | | - O: OffsetSizeTrait + ArrowNativeTypeOp, |
387 | | -{ |
388 | | - field: FieldRef, |
389 | | - offsets: Vec<O>, |
390 | | - element_builder: Box<VariantToShreddedVariantRowBuilder<'a>>, |
391 | | - nulls: NullBufferBuilder, |
392 | | - current_offset: O, |
393 | | -} |
394 | | - |
395 | | -impl<'a, O, const IS_VIEW: bool> VariantToListArrowRowBuilder<'a, O, IS_VIEW> |
396 | | -where |
397 | | - O: OffsetSizeTrait + ArrowNativeTypeOp, |
398 | | -{ |
399 | | - fn try_new( |
400 | | - field: FieldRef, |
401 | | - element_data_type: &'a DataType, |
402 | | - cast_options: &'a CastOptions, |
403 | | - capacity: usize, |
404 | | - ) -> Result<Self> { |
405 | | - if capacity >= isize::MAX as usize { |
406 | | - return Err(ArrowError::ComputeError( |
407 | | - "Capacity exceeds isize::MAX when reserving list offsets".to_string(), |
408 | | - )); |
409 | | - } |
410 | | - let mut offsets = Vec::with_capacity(capacity + 1); |
411 | | - offsets.push(O::ZERO); |
412 | | - let element_builder = make_variant_to_shredded_variant_arrow_row_builder( |
413 | | - element_data_type, |
414 | | - cast_options, |
415 | | - capacity, |
416 | | - false, |
417 | | - )?; |
418 | | - Ok(Self { |
419 | | - field, |
420 | | - offsets, |
421 | | - element_builder: Box::new(element_builder), |
422 | | - nulls: NullBufferBuilder::new(capacity), |
423 | | - current_offset: O::ZERO, |
424 | | - }) |
425 | | - } |
426 | | - |
427 | | - fn append_null(&mut self) { |
428 | | - self.offsets.push(self.current_offset); |
429 | | - self.nulls.append_null(); |
430 | | - } |
431 | | - |
432 | | - fn append_value(&mut self, list: VariantList<'_, '_>) -> Result<()> { |
433 | | - for element in list.iter() { |
434 | | - self.element_builder.append_value(element)?; |
435 | | - self.current_offset = self.current_offset.add_checked(O::ONE)?; |
436 | | - } |
437 | | - self.offsets.push(self.current_offset); |
438 | | - self.nulls.append_non_null(); |
439 | | - Ok(()) |
440 | | - } |
441 | | - |
442 | | - fn finish(mut self) -> Result<ArrayRef> { |
443 | | - let (value, typed_value, nulls) = self.element_builder.finish()?; |
444 | | - let element_array = |
445 | | - ShreddedVariantFieldArray::from_parts(Some(value), Some(typed_value), nulls); |
446 | | - let field = Arc::new( |
447 | | - self.field |
448 | | - .as_ref() |
449 | | - .clone() |
450 | | - .with_data_type(element_array.data_type().clone()), |
451 | | - ); |
452 | | - |
453 | | - if IS_VIEW { |
454 | | - // NOTE: `offsets` is never empty (constructor pushes an entry) |
455 | | - let mut sizes = Vec::with_capacity(self.offsets.len() - 1); |
456 | | - for i in 1..self.offsets.len() { |
457 | | - sizes.push(self.offsets[i] - self.offsets[i - 1]); |
458 | | - } |
459 | | - self.offsets.pop(); |
460 | | - let list_view_array = GenericListViewArray::<O>::new( |
461 | | - field, |
462 | | - ScalarBuffer::from(self.offsets), |
463 | | - ScalarBuffer::from(sizes), |
464 | | - ArrayRef::from(element_array), |
465 | | - self.nulls.finish(), |
466 | | - ); |
467 | | - Ok(Arc::new(list_view_array)) |
468 | | - } else { |
469 | | - let list_array = GenericListArray::<O>::new( |
470 | | - field, |
471 | | - OffsetBuffer::<O>::new(ScalarBuffer::from(self.offsets)), |
472 | | - ArrayRef::from(element_array), |
473 | | - self.nulls.finish(), |
474 | | - ); |
475 | | - Ok(Arc::new(list_array)) |
476 | | - } |
477 | | - } |
478 | | -} |
479 | | - |
480 | 309 | pub(crate) struct VariantToShreddedObjectVariantRowBuilder<'a> { |
481 | 310 | value_builder: VariantValueArrayBuilder, |
482 | 311 | typed_value_builders: IndexMap<&'a str, VariantToShreddedVariantRowBuilder<'a>>, |
@@ -1513,6 +1342,22 @@ mod tests { |
1513 | 1342 | ); |
1514 | 1343 | } |
1515 | 1344 |
|
| 1345 | + #[test] |
| 1346 | + fn test_array_shredding_as_fixed_size_list() { |
| 1347 | + let input = build_variant_array(vec![VariantRow::List(vec![ |
| 1348 | + VariantValue::from(1i64), |
| 1349 | + VariantValue::from(2i64), |
| 1350 | + VariantValue::from(3i64), |
| 1351 | + ])]); |
| 1352 | + let list_schema = |
| 1353 | + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 2); |
| 1354 | + let err = shred_variant(&input, &list_schema).unwrap_err(); |
| 1355 | + assert_eq!( |
| 1356 | + err.to_string(), |
| 1357 | + "Not yet implemented: Converting unshredded variant arrays to arrow fixed-size lists" |
| 1358 | + ); |
| 1359 | + } |
| 1360 | + |
1516 | 1361 | #[test] |
1517 | 1362 | fn test_array_shredding_with_array_elements() { |
1518 | 1363 | let input = build_variant_array(vec![ |
|
0 commit comments