Skip to content

Commit 33fdb1e

Browse files
committed
Move ArrayVariantToArrowRowBuilder and VariantToListArrowRowBuilder to variant_to_arrow
1 parent b2f8493 commit 33fdb1e

File tree

2 files changed

+191
-189
lines changed

2 files changed

+191
-189
lines changed

parquet-variant-compute/src/shred_variant.rs

Lines changed: 6 additions & 178 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,17 @@
1919
2020
use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
2121
use crate::variant_to_arrow::{
22-
PrimitiveVariantToArrowRowBuilder, make_primitive_variant_to_arrow_row_builder,
22+
ArrayVariantToArrowRowBuilder, PrimitiveVariantToArrowRowBuilder,
23+
make_primitive_variant_to_arrow_row_builder,
2324
};
2425
use crate::{VariantArray, VariantValueArrayBuilder};
25-
use arrow::array::{
26-
ArrayRef, BinaryViewArray, GenericListArray, GenericListViewArray, NullBufferBuilder,
27-
OffsetSizeTrait,
28-
};
29-
use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
26+
use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder};
27+
use arrow::buffer::NullBuffer;
3028
use arrow::compute::CastOptions;
31-
use arrow::datatypes::{ArrowNativeTypeOp, DataType, Field, FieldRef, Fields, TimeUnit};
29+
use arrow::datatypes::{DataType, Field, FieldRef, Fields, TimeUnit};
3230
use arrow::error::{ArrowError, Result};
3331
use indexmap::IndexMap;
34-
use parquet_variant::{Variant, VariantBuilderExt, VariantList, VariantPath, VariantPathElement};
32+
use parquet_variant::{Variant, VariantBuilderExt, VariantPath, VariantPathElement};
3533
use std::collections::BTreeMap;
3634
use std::sync::Arc;
3735

@@ -308,176 +306,6 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> {
308306
}
309307
}
310308

311-
enum ArrayVariantToArrowRowBuilder<'a> {
312-
List(VariantToListArrowRowBuilder<'a, i32, false>),
313-
LargeList(VariantToListArrowRowBuilder<'a, i64, false>),
314-
ListView(VariantToListArrowRowBuilder<'a, i32, true>),
315-
LargeListView(VariantToListArrowRowBuilder<'a, i64, true>),
316-
}
317-
318-
impl<'a> ArrayVariantToArrowRowBuilder<'a> {
319-
fn try_new(
320-
data_type: &'a DataType,
321-
cast_options: &'a CastOptions,
322-
capacity: usize,
323-
) -> Result<Self> {
324-
use ArrayVariantToArrowRowBuilder::*;
325-
326-
// Make List/ListView builders without repeating the constructor boilerplate.
327-
macro_rules! make_list_builder {
328-
($variant:ident, $offset:ty, $is_view:expr, $field:ident) => {
329-
$variant(VariantToListArrowRowBuilder::<$offset, $is_view>::try_new(
330-
$field.clone(),
331-
$field.data_type(),
332-
cast_options,
333-
capacity,
334-
)?)
335-
};
336-
}
337-
338-
let builder = match data_type {
339-
DataType::List(field) => make_list_builder!(List, i32, false, field),
340-
DataType::LargeList(field) => make_list_builder!(LargeList, i64, false, field),
341-
DataType::ListView(field) => make_list_builder!(ListView, i32, true, field),
342-
DataType::LargeListView(field) => make_list_builder!(LargeListView, i64, true, field),
343-
DataType::FixedSizeList(..) => {
344-
return Err(ArrowError::NotYetImplemented(
345-
"Converting unshredded variant arrays to arrow fixed-size lists".to_string(),
346-
));
347-
}
348-
other => {
349-
return Err(ArrowError::InvalidArgumentError(format!(
350-
"Casting to {other:?} is not applicable for array Variant types"
351-
)));
352-
}
353-
};
354-
Ok(builder)
355-
}
356-
357-
fn append_null(&mut self) {
358-
match self {
359-
Self::List(builder) => builder.append_null(),
360-
Self::LargeList(builder) => builder.append_null(),
361-
Self::ListView(builder) => builder.append_null(),
362-
Self::LargeListView(builder) => builder.append_null(),
363-
}
364-
}
365-
366-
fn append_value(&mut self, list: VariantList<'_, '_>) -> Result<()> {
367-
match self {
368-
Self::List(builder) => builder.append_value(list),
369-
Self::LargeList(builder) => builder.append_value(list),
370-
Self::ListView(builder) => builder.append_value(list),
371-
Self::LargeListView(builder) => builder.append_value(list),
372-
}
373-
}
374-
375-
fn finish(self) -> Result<ArrayRef> {
376-
match self {
377-
Self::List(builder) => builder.finish(),
378-
Self::LargeList(builder) => builder.finish(),
379-
Self::ListView(builder) => builder.finish(),
380-
Self::LargeListView(builder) => builder.finish(),
381-
}
382-
}
383-
}
384-
385-
struct VariantToListArrowRowBuilder<'a, O, const IS_VIEW: bool>
386-
where
387-
O: OffsetSizeTrait + ArrowNativeTypeOp,
388-
{
389-
field: FieldRef,
390-
offsets: Vec<O>,
391-
element_builder: Box<VariantToShreddedVariantRowBuilder<'a>>,
392-
nulls: NullBufferBuilder,
393-
current_offset: O,
394-
}
395-
396-
impl<'a, O, const IS_VIEW: bool> VariantToListArrowRowBuilder<'a, O, IS_VIEW>
397-
where
398-
O: OffsetSizeTrait + ArrowNativeTypeOp,
399-
{
400-
fn try_new(
401-
field: FieldRef,
402-
element_data_type: &'a DataType,
403-
cast_options: &'a CastOptions,
404-
capacity: usize,
405-
) -> Result<Self> {
406-
if capacity >= isize::MAX as usize {
407-
return Err(ArrowError::ComputeError(
408-
"Capacity exceeds isize::MAX when reserving list offsets".to_string(),
409-
));
410-
}
411-
let mut offsets = Vec::with_capacity(capacity + 1);
412-
offsets.push(O::ZERO);
413-
let element_builder = make_variant_to_shredded_variant_arrow_row_builder(
414-
element_data_type,
415-
cast_options,
416-
capacity,
417-
false,
418-
)?;
419-
Ok(Self {
420-
field,
421-
offsets,
422-
element_builder: Box::new(element_builder),
423-
nulls: NullBufferBuilder::new(capacity),
424-
current_offset: O::ZERO,
425-
})
426-
}
427-
428-
fn append_null(&mut self) {
429-
self.offsets.push(self.current_offset);
430-
self.nulls.append_null();
431-
}
432-
433-
fn append_value(&mut self, list: VariantList<'_, '_>) -> Result<()> {
434-
for element in list.iter() {
435-
self.element_builder.append_value(element)?;
436-
self.current_offset = self.current_offset.add_checked(O::ONE)?;
437-
}
438-
self.offsets.push(self.current_offset);
439-
self.nulls.append_non_null();
440-
Ok(())
441-
}
442-
443-
fn finish(mut self) -> Result<ArrayRef> {
444-
let (value, typed_value, nulls) = self.element_builder.finish()?;
445-
let element_array =
446-
ShreddedVariantFieldArray::from_parts(Some(value), Some(typed_value), nulls);
447-
let field = Arc::new(
448-
self.field
449-
.as_ref()
450-
.clone()
451-
.with_data_type(element_array.data_type().clone()),
452-
);
453-
454-
if IS_VIEW {
455-
// NOTE: `offsets` is never empty (constructor pushes an entry)
456-
let mut sizes = Vec::with_capacity(self.offsets.len() - 1);
457-
for i in 1..self.offsets.len() {
458-
sizes.push(self.offsets[i] - self.offsets[i - 1]);
459-
}
460-
self.offsets.pop();
461-
let list_view_array = GenericListViewArray::<O>::new(
462-
field,
463-
ScalarBuffer::from(self.offsets),
464-
ScalarBuffer::from(sizes),
465-
ArrayRef::from(element_array),
466-
self.nulls.finish(),
467-
);
468-
Ok(Arc::new(list_view_array))
469-
} else {
470-
let list_array = GenericListArray::<O>::new(
471-
field,
472-
OffsetBuffer::<O>::new(ScalarBuffer::from(self.offsets)),
473-
ArrayRef::from(element_array),
474-
self.nulls.finish(),
475-
);
476-
Ok(Arc::new(list_array))
477-
}
478-
}
479-
}
480-
481309
pub(crate) struct VariantToShreddedObjectVariantRowBuilder<'a> {
482310
value_builder: VariantValueArrayBuilder,
483311
typed_value_builders: IndexMap<&'a str, VariantToShreddedVariantRowBuilder<'a>>,

0 commit comments

Comments
 (0)