Skip to content

Commit c199811

Browse files
committed
Merge branch 'main' into fix-encoder-nullability-bug
2 parents d96fc62 + c3226a4 commit c199811

File tree

31 files changed

+2290
-115
lines changed

31 files changed

+2290
-115
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ chrono = { version = "0.4.40", default-features = false, features = ["clock"] }
110110

111111
simdutf8 = { version = "0.1.5", default-features = false }
112112

113-
criterion = { version = "0.7.0", default-features = false }
113+
criterion = { version = "0.8.0", default-features = false }
114114

115115
# release inherited profile keeping debug information and symbols
116116
# for mem/cpu profiling

arrow-array/src/array/list_view_array.rs

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,12 @@ use std::ops::Add;
2323
use std::sync::Arc;
2424

2525
use crate::array::{make_array, print_long_array};
26+
use crate::builder::{GenericListViewBuilder, PrimitiveBuilder};
2627
use crate::iterator::GenericListViewArrayIter;
27-
use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait, new_empty_array};
28+
use crate::{
29+
Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, OffsetSizeTrait,
30+
new_empty_array,
31+
};
2832

2933
/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
3034
pub type ListViewArray = GenericListViewArray<i32>;
@@ -357,6 +361,46 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
357361
value_sizes: self.value_sizes.slice(offset, length),
358362
}
359363
}
364+
365+
/// Creates a [`GenericListViewArray`] from an iterator of primitive values
366+
/// # Example
367+
/// ```
368+
/// # use arrow_array::ListViewArray;
369+
/// # use arrow_array::types::Int32Type;
370+
///
371+
/// let data = vec![
372+
/// Some(vec![Some(0), Some(1), Some(2)]),
373+
/// None,
374+
/// Some(vec![Some(3), None, Some(5)]),
375+
/// Some(vec![Some(6), Some(7)]),
376+
/// ];
377+
/// let list_array = ListViewArray::from_iter_primitive::<Int32Type, _, _>(data);
378+
/// println!("{:?}", list_array);
379+
/// ```
380+
pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
381+
where
382+
T: ArrowPrimitiveType,
383+
P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
384+
I: IntoIterator<Item = Option<P>>,
385+
{
386+
let iter = iter.into_iter();
387+
let size_hint = iter.size_hint().0;
388+
let mut builder =
389+
GenericListViewBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
390+
391+
for i in iter {
392+
match i {
393+
Some(p) => {
394+
for t in p {
395+
builder.values().append_option(t);
396+
}
397+
builder.append(true);
398+
}
399+
None => builder.append(false),
400+
}
401+
}
402+
builder.finish()
403+
}
360404
}
361405

362406
impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListViewArray<OffsetSize> {
@@ -559,7 +603,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
559603

560604
#[cfg(test)]
561605
mod tests {
562-
use arrow_buffer::{BooleanBuffer, Buffer, ScalarBuffer, bit_util};
606+
use arrow_buffer::{BooleanBuffer, Buffer, NullBufferBuilder, ScalarBuffer, bit_util};
563607
use arrow_schema::Field;
564608

565609
use crate::builder::{FixedSizeListBuilder, Int32Builder};
@@ -1127,4 +1171,29 @@ mod tests {
11271171
let array = ListViewArray::new_null(field, 5);
11281172
assert_eq!(array.len(), 5);
11291173
}
1174+
1175+
#[test]
1176+
fn test_from_iter_primitive() {
1177+
let data = vec![
1178+
Some(vec![Some(0), Some(1), Some(2)]),
1179+
None,
1180+
Some(vec![Some(3), Some(4), Some(5)]),
1181+
Some(vec![Some(6), Some(7)]),
1182+
];
1183+
let list_array = ListViewArray::from_iter_primitive::<Int32Type, _, _>(data);
1184+
1185+
// [[0, 1, 2], NULL, [3, 4, 5], [6, 7]]
1186+
let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
1187+
let offsets = ScalarBuffer::from(vec![0, 3, 3, 6]);
1188+
let sizes = ScalarBuffer::from(vec![3, 0, 3, 2]);
1189+
let field = Arc::new(Field::new_list_field(DataType::Int32, true));
1190+
1191+
let mut nulls = NullBufferBuilder::new(4);
1192+
nulls.append(true);
1193+
nulls.append(false);
1194+
nulls.append_n_non_nulls(2);
1195+
let another = ListViewArray::new(field, offsets, sizes, Arc::new(values), nulls.finish());
1196+
1197+
assert_eq!(list_array, another)
1198+
}
11301199
}

arrow-array/src/array/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -824,20 +824,20 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
824824
DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
825825
DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
826826
DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
827-
dt => panic!("Unexpected dictionary key type {dt}"),
827+
dt => unimplemented!("Unexpected dictionary key type {dt}"),
828828
},
829829
DataType::RunEndEncoded(run_ends_type, _) => match run_ends_type.data_type() {
830830
DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
831831
DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
832832
DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
833-
dt => panic!("Unexpected data type for run_ends array {dt}"),
833+
dt => unimplemented!("Unexpected data type for run_ends array {dt}"),
834834
},
835835
DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
836836
DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
837837
DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
838838
DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
839839
DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
840-
dt => panic!("Unexpected data type {dt}"),
840+
dt => unimplemented!("Unexpected data type {dt}"),
841841
}
842842
}
843843

arrow-array/src/builder/mod.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
594594
LargeBinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
595595
Box::new(dict_builder)
596596
}
597-
t => panic!("Dictionary value type {t} is not currently supported"),
597+
t => unimplemented!("Dictionary value type {t} is not currently supported"),
598598
}
599599
};
600600
}
@@ -604,10 +604,12 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
604604
DataType::Int32 => dict_builder!(Int32Type),
605605
DataType::Int64 => dict_builder!(Int64Type),
606606
_ => {
607-
panic!("Data type {t} with key type {key_type} is not currently supported")
607+
unimplemented!(
608+
"Data type {t} with key type {key_type} is not currently supported"
609+
)
608610
}
609611
}
610612
}
611-
t => panic!("Data type {t} is not currently supported"),
613+
t => unimplemented!("Data type {t} is not currently supported"),
612614
}
613615
}

arrow-array/src/ffi.rs

Lines changed: 143 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,8 @@ pub unsafe fn export_array_into_raw(
140140
Ok(())
141141
}
142142

143-
// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
144-
// This is set by the Arrow specification
143+
/// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
144+
/// This is set by the Arrow specification
145145
fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
146146
if let Some(primitive) = data_type.primitive_width() {
147147
return match i {
@@ -180,6 +180,10 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
180180
| (DataType::List(_), 1)
181181
| (DataType::Map(_, _), 1) => i32::BITS as _,
182182
(DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
183+
// List views have two i32 buffers, offsets and sizes
184+
(DataType::ListView(_), 1) | (DataType::ListView(_), 2) => i32::BITS as _,
185+
// Large list views have two i64 buffers, offsets and sizes
186+
(DataType::LargeListView(_), 1) | (DataType::LargeListView(_), 2) => i64::BITS as _,
183187
(DataType::List(_), _) | (DataType::Map(_, _), _) => {
184188
return Err(ArrowError::CDataInterface(format!(
185189
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
@@ -351,6 +355,8 @@ impl ImportedArrowArray<'_> {
351355
DataType::List(field)
352356
| DataType::FixedSizeList(field, _)
353357
| DataType::LargeList(field)
358+
| DataType::ListView(field)
359+
| DataType::LargeListView(field)
354360
| DataType::Map(field, _) => Ok([self.consume_child(0, field.data_type())?].to_vec()),
355361
DataType::Struct(fields) => {
356362
assert!(fields.len() == self.array.num_children());
@@ -471,6 +477,14 @@ impl ImportedArrowArray<'_> {
471477
debug_assert_eq!(bits % 8, 0);
472478
(length + 1) * (bits / 8)
473479
}
480+
(DataType::ListView(_), 1)
481+
| (DataType::ListView(_), 2)
482+
| (DataType::LargeListView(_), 1)
483+
| (DataType::LargeListView(_), 2) => {
484+
let bits = bit_width(data_type, i)?;
485+
debug_assert_eq!(bits % 8, 0);
486+
length * (bits / 8)
487+
}
474488
(DataType::Utf8, 2) | (DataType::Binary, 2) => {
475489
if self.array.is_empty() {
476490
return Ok(0);
@@ -553,7 +567,7 @@ mod tests_to_then_from_ffi {
553567
use std::collections::HashMap;
554568
use std::mem::ManuallyDrop;
555569

556-
use arrow_buffer::NullBuffer;
570+
use arrow_buffer::{ArrowNativeType, NullBuffer};
557571
use arrow_schema::Field;
558572

559573
use crate::builder::UnionBuilder;
@@ -783,6 +797,71 @@ mod tests_to_then_from_ffi {
783797
test_generic_list::<i64>()
784798
}
785799

800+
fn test_generic_list_view<Offset: OffsetSizeTrait + ArrowNativeType>() -> Result<()> {
801+
// Construct a value array
802+
let value_data = ArrayData::builder(DataType::Int16)
803+
.len(8)
804+
.add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
805+
.build()
806+
.unwrap();
807+
808+
// Construct a buffer for value offsets, for the nested array:
809+
// [[0, 1, 2], [3, 4, 5], [6, 7]]
810+
let value_offsets = [0_usize, 3, 6]
811+
.iter()
812+
.map(|i| Offset::from_usize(*i).unwrap())
813+
.collect::<Buffer>();
814+
815+
let sizes_buffer = [3_usize, 3, 2]
816+
.iter()
817+
.map(|i| Offset::from_usize(*i).unwrap())
818+
.collect::<Buffer>();
819+
820+
// Construct a list array from the above two
821+
let list_view_dt = GenericListViewArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
822+
Field::new_list_field(DataType::Int16, false),
823+
));
824+
825+
let list_data = ArrayData::builder(list_view_dt)
826+
.len(3)
827+
.add_buffer(value_offsets)
828+
.add_buffer(sizes_buffer)
829+
.add_child_data(value_data)
830+
.build()
831+
.unwrap();
832+
833+
let original = GenericListViewArray::<Offset>::from(list_data.clone());
834+
835+
// export it
836+
let (array, schema) = to_ffi(&original.to_data())?;
837+
838+
// (simulate consumer) import it
839+
let data = unsafe { from_ffi(array, &schema) }?;
840+
let array = make_array(data);
841+
842+
// downcast
843+
let array = array
844+
.as_any()
845+
.downcast_ref::<GenericListViewArray<Offset>>()
846+
.unwrap();
847+
848+
assert_eq!(&array.value(0), &original.value(0));
849+
assert_eq!(&array.value(1), &original.value(1));
850+
assert_eq!(&array.value(2), &original.value(2));
851+
852+
Ok(())
853+
}
854+
855+
#[test]
856+
fn test_list_view() -> Result<()> {
857+
test_generic_list_view::<i32>()
858+
}
859+
860+
#[test]
861+
fn test_large_list_view() -> Result<()> {
862+
test_generic_list_view::<i64>()
863+
}
864+
786865
fn test_generic_binary<Offset: OffsetSizeTrait>() -> Result<()> {
787866
// create an array natively
788867
let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
@@ -1315,6 +1394,7 @@ mod tests_from_ffi {
13151394
use std::ptr::NonNull;
13161395
use std::sync::Arc;
13171396

1397+
use arrow_buffer::NullBuffer;
13181398
#[cfg(not(feature = "force_validate"))]
13191399
use arrow_buffer::{ScalarBuffer, bit_util, buffer::Buffer};
13201400
#[cfg(feature = "force_validate")]
@@ -1325,6 +1405,7 @@ mod tests_from_ffi {
13251405
use arrow_schema::{DataType, Field};
13261406

13271407
use super::Result;
1408+
13281409
use crate::builder::GenericByteViewBuilder;
13291410
use crate::types::{BinaryViewType, ByteViewType, Int32Type, StringViewType};
13301411
use crate::{
@@ -1528,6 +1609,65 @@ mod tests_from_ffi {
15281609
test_round_trip(&data)
15291610
}
15301611

1612+
#[test]
1613+
fn test_list_view() -> Result<()> {
1614+
// Construct a value array
1615+
let value_data = ArrayData::builder(DataType::Int16)
1616+
.len(8)
1617+
.add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
1618+
.build()
1619+
.unwrap();
1620+
1621+
// Construct a buffer for value offsets, for the nested array:
1622+
// [[0, 1, 2], [3, 4, 5], [6, 7]]
1623+
let value_offsets = Buffer::from(vec![0_i32, 3, 6]);
1624+
let sizes_buffer = Buffer::from(vec![3_i32, 3, 2]);
1625+
1626+
// Construct a list array from the above two
1627+
let list_view_dt =
1628+
DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, false)));
1629+
1630+
let list_view_data = ArrayData::builder(list_view_dt)
1631+
.len(3)
1632+
.add_buffer(value_offsets)
1633+
.add_buffer(sizes_buffer)
1634+
.add_child_data(value_data)
1635+
.build()
1636+
.unwrap();
1637+
1638+
test_round_trip(&list_view_data)
1639+
}
1640+
1641+
#[test]
1642+
fn test_list_view_with_nulls() -> Result<()> {
1643+
// Construct a value array
1644+
let value_data = ArrayData::builder(DataType::Int16)
1645+
.len(8)
1646+
.add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
1647+
.build()
1648+
.unwrap();
1649+
1650+
// Construct a buffer for value offsets, for the nested array:
1651+
// [[0, 1, 2], [3, 4, 5], [6, 7], null]
1652+
let value_offsets = Buffer::from(vec![0_i32, 3, 6, 8]);
1653+
let sizes_buffer = Buffer::from(vec![3_i32, 3, 2, 0]);
1654+
1655+
// Construct a list array from the above two
1656+
let list_view_dt =
1657+
DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, true)));
1658+
1659+
let list_view_data = ArrayData::builder(list_view_dt)
1660+
.len(4)
1661+
.add_buffer(value_offsets)
1662+
.add_buffer(sizes_buffer)
1663+
.add_child_data(value_data)
1664+
.nulls(Some(NullBuffer::from(vec![true, true, true, false])))
1665+
.build()
1666+
.unwrap();
1667+
1668+
test_round_trip(&list_view_data)
1669+
}
1670+
15311671
#[test]
15321672
#[cfg(not(feature = "force_validate"))]
15331673
fn test_empty_string_with_non_zero_offset() -> Result<()> {

0 commit comments

Comments
 (0)