diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs index bd85bffcfe44..8e8ad91ceaeb 100644 --- a/arrow-array/src/array/byte_array.rs +++ b/arrow-array/src/array/byte_array.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::array::{get_offsets, print_long_array}; +use crate::array::{get_offsets_from_buffer, print_long_array}; use crate::builder::GenericByteBuilder; use crate::iterator::ArrayIter; use crate::types::ByteArrayType; @@ -542,30 +542,34 @@ impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray { impl From for GenericByteArray { fn from(data: ArrayData) -> Self { + let (data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts(); assert_eq!( - data.data_type(), - &Self::DATA_TYPE, + data_type, + Self::DATA_TYPE, "{}{}Array expects DataType::{}", T::Offset::PREFIX, T::PREFIX, Self::DATA_TYPE ); assert_eq!( - data.buffers().len(), + buffers.len(), 2, "{}{}Array data should contain 2 buffers only (offsets and values)", T::Offset::PREFIX, T::PREFIX, ); + // buffers are offset then value, so pop in reverse + let value_data = buffers.pop().expect("checked above"); + let offset_buffer = buffers.pop().expect("checked above"); + // SAFETY: // ArrayData is valid, and verified type above - let value_offsets = unsafe { get_offsets(&data) }; - let value_data = data.buffers()[1].clone(); + let value_offsets = unsafe { get_offsets_from_buffer(offset_buffer, offset, len) }; Self { value_offsets, value_data, - data_type: T::DATA_TYPE, - nulls: data.nulls().cloned(), + data_type, + nulls, } } } diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index aae382ace7b4..6fcb80c533fe 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -20,7 +20,7 @@ mod binary_array; use crate::types::*; -use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer}; +use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ScalarBuffer}; use arrow_data::ArrayData; use arrow_schema::{DataType, IntervalUnit, TimeUnit}; use std::any::Any; @@ -939,6 +939,27 @@ unsafe fn get_offsets(data: &ArrayData) -> OffsetBuffer { } } +/// Helper function that creates an [`OffsetBuffer`] from a buffer and array offset/ length +/// +/// # Safety +/// +/// - buffer must contain valid arrow offsets ( [`OffsetBuffer`] ) for the +/// given length and offset. +unsafe fn get_offsets_from_buffer( + buffer: Buffer, + offset: usize, + len: usize, +) -> OffsetBuffer { + if len == 0 && buffer.is_empty() { + return OffsetBuffer::new_empty(); + } + + let scalar_buffer = ScalarBuffer::new(buffer, offset, len + 1); + // Safety: + // Arguments were valid + unsafe { OffsetBuffer::new_unchecked(scalar_buffer) } +} + /// Helper function for printing potentially long arrays. fn print_long_array(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result where