|
19 | 19 |
|
20 | 20 | use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder}; |
21 | 21 | use crate::variant_to_arrow::{ |
22 | | - PrimitiveVariantToArrowRowBuilder, make_primitive_variant_to_arrow_row_builder, |
| 22 | + ArrayVariantToArrowRowBuilder, PrimitiveVariantToArrowRowBuilder, |
| 23 | + make_primitive_variant_to_arrow_row_builder, |
23 | 24 | }; |
24 | 25 | use crate::{VariantArray, VariantValueArrayBuilder}; |
25 | | -use arrow::array::{ |
26 | | - ArrayRef, BinaryViewArray, GenericListArray, GenericListViewArray, NullBufferBuilder, |
27 | | - OffsetSizeTrait, |
28 | | -}; |
29 | | -use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; |
| 26 | +use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder}; |
| 27 | +use arrow::buffer::NullBuffer; |
30 | 28 | use arrow::compute::CastOptions; |
31 | | -use arrow::datatypes::{ArrowNativeTypeOp, DataType, Field, FieldRef, Fields, TimeUnit}; |
| 29 | +use arrow::datatypes::{DataType, Field, FieldRef, Fields, TimeUnit}; |
32 | 30 | use arrow::error::{ArrowError, Result}; |
33 | 31 | use indexmap::IndexMap; |
34 | | -use parquet_variant::{Variant, VariantBuilderExt, VariantList, VariantPath, VariantPathElement}; |
| 32 | +use parquet_variant::{Variant, VariantBuilderExt, VariantPath, VariantPathElement}; |
35 | 33 | use std::collections::BTreeMap; |
36 | 34 | use std::sync::Arc; |
37 | 35 |
|
@@ -308,176 +306,6 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> { |
308 | 306 | } |
309 | 307 | } |
310 | 308 |
|
311 | | -enum ArrayVariantToArrowRowBuilder<'a> { |
312 | | - List(VariantToListArrowRowBuilder<'a, i32, false>), |
313 | | - LargeList(VariantToListArrowRowBuilder<'a, i64, false>), |
314 | | - ListView(VariantToListArrowRowBuilder<'a, i32, true>), |
315 | | - LargeListView(VariantToListArrowRowBuilder<'a, i64, true>), |
316 | | -} |
317 | | - |
318 | | -impl<'a> ArrayVariantToArrowRowBuilder<'a> { |
319 | | - fn try_new( |
320 | | - data_type: &'a DataType, |
321 | | - cast_options: &'a CastOptions, |
322 | | - capacity: usize, |
323 | | - ) -> Result<Self> { |
324 | | - use ArrayVariantToArrowRowBuilder::*; |
325 | | - |
326 | | - // Make List/ListView builders without repeating the constructor boilerplate. |
327 | | - macro_rules! make_list_builder { |
328 | | - ($variant:ident, $offset:ty, $is_view:expr, $field:ident) => { |
329 | | - $variant(VariantToListArrowRowBuilder::<$offset, $is_view>::try_new( |
330 | | - $field.clone(), |
331 | | - $field.data_type(), |
332 | | - cast_options, |
333 | | - capacity, |
334 | | - )?) |
335 | | - }; |
336 | | - } |
337 | | - |
338 | | - let builder = match data_type { |
339 | | - DataType::List(field) => make_list_builder!(List, i32, false, field), |
340 | | - DataType::LargeList(field) => make_list_builder!(LargeList, i64, false, field), |
341 | | - DataType::ListView(field) => make_list_builder!(ListView, i32, true, field), |
342 | | - DataType::LargeListView(field) => make_list_builder!(LargeListView, i64, true, field), |
343 | | - DataType::FixedSizeList(..) => { |
344 | | - return Err(ArrowError::NotYetImplemented( |
345 | | - "Converting unshredded variant arrays to arrow fixed-size lists".to_string(), |
346 | | - )); |
347 | | - } |
348 | | - other => { |
349 | | - return Err(ArrowError::InvalidArgumentError(format!( |
350 | | - "Casting to {other:?} is not applicable for array Variant types" |
351 | | - ))); |
352 | | - } |
353 | | - }; |
354 | | - Ok(builder) |
355 | | - } |
356 | | - |
357 | | - fn append_null(&mut self) { |
358 | | - match self { |
359 | | - Self::List(builder) => builder.append_null(), |
360 | | - Self::LargeList(builder) => builder.append_null(), |
361 | | - Self::ListView(builder) => builder.append_null(), |
362 | | - Self::LargeListView(builder) => builder.append_null(), |
363 | | - } |
364 | | - } |
365 | | - |
366 | | - fn append_value(&mut self, list: VariantList<'_, '_>) -> Result<()> { |
367 | | - match self { |
368 | | - Self::List(builder) => builder.append_value(list), |
369 | | - Self::LargeList(builder) => builder.append_value(list), |
370 | | - Self::ListView(builder) => builder.append_value(list), |
371 | | - Self::LargeListView(builder) => builder.append_value(list), |
372 | | - } |
373 | | - } |
374 | | - |
375 | | - fn finish(self) -> Result<ArrayRef> { |
376 | | - match self { |
377 | | - Self::List(builder) => builder.finish(), |
378 | | - Self::LargeList(builder) => builder.finish(), |
379 | | - Self::ListView(builder) => builder.finish(), |
380 | | - Self::LargeListView(builder) => builder.finish(), |
381 | | - } |
382 | | - } |
383 | | -} |
384 | | - |
385 | | -struct VariantToListArrowRowBuilder<'a, O, const IS_VIEW: bool> |
386 | | -where |
387 | | - O: OffsetSizeTrait + ArrowNativeTypeOp, |
388 | | -{ |
389 | | - field: FieldRef, |
390 | | - offsets: Vec<O>, |
391 | | - element_builder: Box<VariantToShreddedVariantRowBuilder<'a>>, |
392 | | - nulls: NullBufferBuilder, |
393 | | - current_offset: O, |
394 | | -} |
395 | | - |
396 | | -impl<'a, O, const IS_VIEW: bool> VariantToListArrowRowBuilder<'a, O, IS_VIEW> |
397 | | -where |
398 | | - O: OffsetSizeTrait + ArrowNativeTypeOp, |
399 | | -{ |
400 | | - fn try_new( |
401 | | - field: FieldRef, |
402 | | - element_data_type: &'a DataType, |
403 | | - cast_options: &'a CastOptions, |
404 | | - capacity: usize, |
405 | | - ) -> Result<Self> { |
406 | | - if capacity >= isize::MAX as usize { |
407 | | - return Err(ArrowError::ComputeError( |
408 | | - "Capacity exceeds isize::MAX when reserving list offsets".to_string(), |
409 | | - )); |
410 | | - } |
411 | | - let mut offsets = Vec::with_capacity(capacity + 1); |
412 | | - offsets.push(O::ZERO); |
413 | | - let element_builder = make_variant_to_shredded_variant_arrow_row_builder( |
414 | | - element_data_type, |
415 | | - cast_options, |
416 | | - capacity, |
417 | | - false, |
418 | | - )?; |
419 | | - Ok(Self { |
420 | | - field, |
421 | | - offsets, |
422 | | - element_builder: Box::new(element_builder), |
423 | | - nulls: NullBufferBuilder::new(capacity), |
424 | | - current_offset: O::ZERO, |
425 | | - }) |
426 | | - } |
427 | | - |
428 | | - fn append_null(&mut self) { |
429 | | - self.offsets.push(self.current_offset); |
430 | | - self.nulls.append_null(); |
431 | | - } |
432 | | - |
433 | | - fn append_value(&mut self, list: VariantList<'_, '_>) -> Result<()> { |
434 | | - for element in list.iter() { |
435 | | - self.element_builder.append_value(element)?; |
436 | | - self.current_offset = self.current_offset.add_checked(O::ONE)?; |
437 | | - } |
438 | | - self.offsets.push(self.current_offset); |
439 | | - self.nulls.append_non_null(); |
440 | | - Ok(()) |
441 | | - } |
442 | | - |
443 | | - fn finish(mut self) -> Result<ArrayRef> { |
444 | | - let (value, typed_value, nulls) = self.element_builder.finish()?; |
445 | | - let element_array = |
446 | | - ShreddedVariantFieldArray::from_parts(Some(value), Some(typed_value), nulls); |
447 | | - let field = Arc::new( |
448 | | - self.field |
449 | | - .as_ref() |
450 | | - .clone() |
451 | | - .with_data_type(element_array.data_type().clone()), |
452 | | - ); |
453 | | - |
454 | | - if IS_VIEW { |
455 | | - // NOTE: `offsets` is never empty (constructor pushes an entry) |
456 | | - let mut sizes = Vec::with_capacity(self.offsets.len() - 1); |
457 | | - for i in 1..self.offsets.len() { |
458 | | - sizes.push(self.offsets[i] - self.offsets[i - 1]); |
459 | | - } |
460 | | - self.offsets.pop(); |
461 | | - let list_view_array = GenericListViewArray::<O>::new( |
462 | | - field, |
463 | | - ScalarBuffer::from(self.offsets), |
464 | | - ScalarBuffer::from(sizes), |
465 | | - ArrayRef::from(element_array), |
466 | | - self.nulls.finish(), |
467 | | - ); |
468 | | - Ok(Arc::new(list_view_array)) |
469 | | - } else { |
470 | | - let list_array = GenericListArray::<O>::new( |
471 | | - field, |
472 | | - OffsetBuffer::<O>::new(ScalarBuffer::from(self.offsets)), |
473 | | - ArrayRef::from(element_array), |
474 | | - self.nulls.finish(), |
475 | | - ); |
476 | | - Ok(Arc::new(list_array)) |
477 | | - } |
478 | | - } |
479 | | -} |
480 | | - |
481 | 309 | pub(crate) struct VariantToShreddedObjectVariantRowBuilder<'a> { |
482 | 310 | value_builder: VariantValueArrayBuilder, |
483 | 311 | typed_value_builders: IndexMap<&'a str, VariantToShreddedVariantRowBuilder<'a>>, |
|
0 commit comments