diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 28c65c5994bf..a54e4f62ec7e 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -180,6 +180,7 @@ mod fixed; mod list; mod run; mod variable; +pub mod unordered_row; /// Converts [`ArrayRef`] columns into a [row-oriented](self) format. /// diff --git a/arrow-row/src/unordered_row/boolean.rs b/arrow-row/src/unordered_row/boolean.rs new file mode 100644 index 000000000000..30fbca698f53 --- /dev/null +++ b/arrow-row/src/unordered_row/boolean.rs @@ -0,0 +1,118 @@ +use arrow_array::BooleanArray; +use arrow_buffer::{bit_util, BooleanBuffer, MutableBuffer, NullBuffer}; +use arrow_data::ArrayDataBuilder; +use arrow_schema::{DataType}; +use super::fixed::{FixedLengthEncoding, split_off}; + +pub(super) const FIXED_SIZE: usize = 1; + +// Inline always to make sure the other dedicated functions will have optimized away the valid +#[inline(always)] +fn encode_bool_and_null(mut value: bool, valid: bool) -> u8 { + // if valid is false, set value to false + // if valid is true take the value + value = value & valid; + + let value_bit = value as u8; + let valid_bit = valid as u8; + + // Doing shift on the valid bit and not on the value bit, so in case when there is no nulls we can avoid the shift and it will be optimized away + valid_bit << 1 | value_bit +} + +fn encode_bool_and_nullable_with_no_nulls(value: bool) -> u8 { + encode_bool_and_null(value, true) +} + +fn decode_null_and_bool(encoded: u8) -> (bool, bool) { + // we know if the value is valid if it is not 0 + // as for invalid we set also the value bit to 0 + let is_valid = encoded != 0; + let value = encoded & 1 == 1; + + (is_valid, value) +} + +/// Boolean values are encoded as +/// +/// - 1 byte `0` if null or `1` if valid +/// - bytes of [`crate::unordered_row::fixed::FixedLengthEncoding`] +pub fn encode_boolean( + data: &mut [u8], + offsets: &mut [usize], + values: &BooleanBuffer, + nulls: &NullBuffer, +) { + for (idx, (value, is_valid)) in values.iter().zip(nulls.iter()).enumerate() { + let offset = &mut offsets[idx + 1]; + data[*offset] = encode_bool_and_null(value, is_valid); + *offset += 1; + } +} + +/// Encoding for non-nullable boolean arrays. +/// Iterates directly over `values`, and skips NULLs-checking. +pub fn encode_boolean_not_null( + data: &mut [u8], + offsets: &mut [usize], + values: &BooleanBuffer, +) { + for (idx, value) in values.iter().enumerate() { + let offset = &mut offsets[idx + 1]; + data[*offset] = encode_bool_and_nullable_with_no_nulls(value); + *offset += 1; + } +} + +/// Decodes a `BooleanArray` from rows +pub fn decode_bool(rows: &mut [&[u8]]) -> BooleanArray { + let len = rows.len(); + + let mut null_count = 0; + let mut nulls = MutableBuffer::new(bit_util::ceil(len, 64) * 8); + let mut values = MutableBuffer::new(bit_util::ceil(len, 64) * 8); + + let chunks = len / 64; + let remainder = len % 64; + for chunk in 0..chunks { + let mut null_packed = 0; + let mut values_packed = 0; + + for bit_idx in 0..64 { + let i = split_off(&mut rows[bit_idx + chunk * 64], 1); + let (is_valid, value) = decode_null_and_bool(i[0]); + null_count += !is_valid as usize; + null_packed |= (is_valid as u64) << bit_idx; + values_packed |= (value as u64) << bit_idx; + } + + nulls.push(null_packed); + values.push(values_packed); + } + + if remainder != 0 { + let mut null_packed = 0; + let mut values_packed = 0; + + for bit_idx in 0..remainder { + let i = split_off(&mut rows[bit_idx + chunks * 64], 1); + let (is_valid, value) = decode_null_and_bool(i[0]); + null_count += !is_valid as usize; + null_packed |= (is_valid as u64) << bit_idx; + values_packed |= (value as u64) << bit_idx; + } + + nulls.push(null_packed); + values.push(values_packed); + } + + let builder = ArrayDataBuilder::new(DataType::Boolean) + .len(rows.len()) + .null_count(null_count) + .add_buffer(values.into()) + .null_bit_buffer(Some(nulls.into())); + + // SAFETY: + // Buffers are the correct length + unsafe { BooleanArray::from(builder.build_unchecked()) } +} diff --git a/arrow-row/src/unordered_row/fixed.rs b/arrow-row/src/unordered_row/fixed.rs new file mode 100644 index 000000000000..2459bbf10f48 --- /dev/null +++ b/arrow-row/src/unordered_row/fixed.rs @@ -0,0 +1,942 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::null_sentinel; +use crate::array::PrimitiveArray; +use arrow_array::builder::BufferBuilder; +use arrow_array::{Array, ArrowPrimitiveType, BooleanArray, FixedSizeBinaryArray}; +use arrow_buffer::{ + ArrowNativeType, BooleanBuffer, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer, + NullBuffer, bit_util, i256, +}; +use arrow_data::{ArrayData, ArrayDataBuilder}; +use arrow_schema::DataType; +use half::f16; + +pub trait FromSlice { + fn from_slice(slice: &[u8]) -> Self; +} + +impl FromSlice for [u8; N] { + #[inline] + fn from_slice(slice: &[u8]) -> Self { + let mut t: Self = slice.try_into().unwrap(); + t + } +} + +/// Encodes a value of a particular fixed width type into bytes according to the rules +/// described on [`super::UnorderedRowConverter`] +pub trait FixedLengthEncoding: Copy { + const ENCODED_LEN: usize = std::mem::size_of::(); + + type Encoded: Sized + Copy + FromSlice + AsRef<[u8]> + AsMut<[u8]>; + + fn encode(self) -> Self::Encoded; + + fn encode_with_null(self, is_valid: bool) -> Self::Encoded { + if is_valid { + self.encode() + } else { + unimplemented!("encode_with_null not implemented for this type") + } + } + + fn decode(encoded: Self::Encoded) -> Self; +} + +macro_rules! encode_signed { + ($n:expr, $t:ty) => { + impl FixedLengthEncoding for $t { + type Encoded = [u8; $n]; + + fn encode_with_null(self, is_valid: bool) -> Self::Encoded { + (self * (is_valid as $t)).encode() + } + + fn encode(self) -> [u8; $n] { + let mut b = self.to_be_bytes(); + b + } + + fn decode(mut encoded: Self::Encoded) -> Self { + Self::from_be_bytes(encoded) + } + } + }; +} + +encode_signed!(1, i8); +encode_signed!(2, i16); +encode_signed!(4, i32); +encode_signed!(8, i64); +encode_signed!(16, i128); +// encode_signed!(32, i256); +impl FixedLengthEncoding for i256 { + type Encoded = [u8; 32]; + + fn encode_with_null(self, is_valid: bool) -> Self::Encoded { + (self * (i256::usize_as(is_valid as usize))).encode() + } + + fn encode(self) -> [u8; 32] { + self.to_be_bytes() + } + + fn decode(mut encoded: Self::Encoded) -> Self { + Self::from_be_bytes(encoded) + } +} +// impl FixedLengthEncoding for i32 { +// type Encoded = [u8; 4]; +// +// fn encode(self) -> [u8; 4] { +// // (self as u32).swap_bytes() +// +// let mut b = self.to_be_bytes(); +// +// b[0] ^= 0x80; +// b +// } +// +// fn decode(mut encoded: Self::Encoded) -> Self { +// encoded[0] ^= 0x80; +// Self::from_be_bytes(encoded) +// } +// } +macro_rules! encode_unsigned { + ($n:expr, $t:ty) => { + impl FixedLengthEncoding for $t { + type Encoded = [u8; $n]; + + fn encode(self) -> [u8; $n] { + self.to_be_bytes() + } + + fn encode_with_null(self, is_valid: bool) -> Self::Encoded { + (self * (is_valid as $t)).encode() + } + + fn decode(encoded: Self::Encoded) -> Self { + Self::from_be_bytes(encoded) + } + } + }; +} + +encode_unsigned!(1, u8); +encode_unsigned!(2, u16); +encode_unsigned!(4, u32); +encode_unsigned!(8, u64); + +impl FixedLengthEncoding for f16 { + type Encoded = [u8; 2]; + + fn encode(self) -> [u8; 2] { + // https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260 + let s = self.to_bits() as i16; + let val = s ^ (((s >> 15) as u16) >> 1) as i16; + val.encode() + } + + fn encode_with_null(self, is_valid: bool) -> Self::Encoded { + let value = if is_valid { self } else { f16::ZERO }; + value.encode() + } + + fn decode(encoded: Self::Encoded) -> Self { + let bits = i16::decode(encoded); + let val = bits ^ (((bits >> 15) as u16) >> 1) as i16; + Self::from_bits(val as u16) + } +} + +impl FixedLengthEncoding for f32 { + type Encoded = [u8; 4]; + + fn encode(self) -> [u8; 4] { + // https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260 + let s = self.to_bits() as i32; + let val = s ^ (((s >> 31) as u32) >> 1) as i32; + val.encode() + } + + fn encode_with_null(self, is_valid: bool) -> Self::Encoded { + let value = if is_valid { self } else { 0.0 }; + value.encode() + } + + fn decode(encoded: Self::Encoded) -> Self { + let bits = i32::decode(encoded); + let val = bits ^ (((bits >> 31) as u32) >> 1) as i32; + Self::from_bits(val as u32) + } +} + +impl FixedLengthEncoding for f64 { + type Encoded = [u8; 8]; + + fn encode(self) -> [u8; 8] { + // https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260 + let s = self.to_bits() as i64; + let val = s ^ (((s >> 63) as u64) >> 1) as i64; + val.encode() + } + fn encode_with_null(self, is_valid: bool) -> Self::Encoded { + let value = if is_valid { self } else { 0.0 }; + value.encode() + } + + fn decode(encoded: Self::Encoded) -> Self { + let bits = i64::decode(encoded); + let val = bits ^ (((bits >> 63) as u64) >> 1) as i64; + Self::from_bits(val as u64) + } +} + +impl FixedLengthEncoding for IntervalDayTime { + type Encoded = [u8; 8]; + + fn encode(self) -> Self::Encoded { + let mut out = [0_u8; 8]; + out[..4].copy_from_slice(&self.days.encode()); + out[4..].copy_from_slice(&self.milliseconds.encode()); + out + } + + fn encode_with_null(self, is_valid: bool) -> Self::Encoded { + let value = if is_valid { self } else { Self::ZERO }; + value.encode() + } + + fn decode(encoded: Self::Encoded) -> Self { + Self { + days: i32::decode(encoded[..4].try_into().unwrap()), + milliseconds: i32::decode(encoded[4..].try_into().unwrap()), + } + } +} + +impl FixedLengthEncoding for IntervalMonthDayNano { + type Encoded = [u8; 16]; + + fn encode(self) -> Self::Encoded { + let mut out = [0_u8; 16]; + out[..4].copy_from_slice(&self.months.encode()); + out[4..8].copy_from_slice(&self.days.encode()); + out[8..].copy_from_slice(&self.nanoseconds.encode()); + out + } + fn encode_with_null(self, is_valid: bool) -> Self::Encoded { + let value = if is_valid { self } else { Self::ZERO }; + value.encode() + } + + fn decode(encoded: Self::Encoded) -> Self { + Self { + months: i32::decode(encoded[..4].try_into().unwrap()), + days: i32::decode(encoded[4..8].try_into().unwrap()), + nanoseconds: i64::decode(encoded[8..].try_into().unwrap()), + } + } +} + +/// Returns the total encoded length (including null byte) for a value of type `T::Native` +pub const fn encoded_len(_col: &PrimitiveArray) -> usize +where + T: ArrowPrimitiveType, + T::Native: FixedLengthEncoding, +{ + T::Native::ENCODED_LEN +} + +/// Fixed width types are encoded as +/// +/// - 1 byte `0` if null or `1` if valid +/// - bytes of [`FixedLengthEncoding`] +pub fn encode( + data: &mut [u8], + offsets: &mut [usize], + values: &[T], + nulls: &NullBuffer, +) { + for ((value, is_valid), offset) in values.iter().zip(nulls.iter()).zip(offsets.iter_mut().skip(1)) { + let end_offset = *offset + T::ENCODED_LEN; + if is_valid { + let to_write = &mut data[*offset..end_offset]; + let mut encoded = (value).encode_with_null(is_valid); + to_write.copy_from_slice(encoded.as_ref()); + } + *offset = end_offset; + } +} + + + +/// Encoding for non-nullable primitive arrays. +/// Iterates directly over the `values`, and skips NULLs-checking. +pub fn encode_fixed( + data: &mut [u8], + offsets: &mut [usize], + arrays: [&PrimitiveArray; N], + // iters: [impl ExactSizeIterator; N], +) where + T::Native: FixedLengthEncoding, +{ + let iters = arrays.map(|a| a.values().iter().copied().zip(a.nulls().unwrap().iter())); + match N { + 0 => panic!("N must be greater than 0"), + 1 => unimplemented!(), + 2 => { + let iter = iters[0].clone().zip(iters[1].clone()); + for (value_idx, ((val1, is_valid1), (val2, is_valid2))) in iter.enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + T::Native::ENCODED_LEN * N; + + let to_write = &mut data[*offset..end_offset]; + { + let mut encoded = val1.encode_with_null(is_valid1); + to_write[..T::Native::ENCODED_LEN].copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val2.encode_with_null(is_valid2); + to_write[T::Native::ENCODED_LEN..].copy_from_slice(encoded.as_ref()); + } + + *offset = end_offset; + } + } + 3 => { + let iter = iters[0].clone().zip(iters[1].clone()).zip(iters[2].clone()); + for (value_idx, (((val1, is_valid_1), (val2, is_valid_2)), (val3, is_valid_3))) in iter.enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + T::Native::ENCODED_LEN * N; + + let to_write = &mut data[*offset..end_offset]; + + { + let mut encoded = val1.encode_with_null(is_valid_1); + to_write[T::Native::ENCODED_LEN * 0..T::Native::ENCODED_LEN * 1] + .copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val2.encode_with_null(is_valid_2); + to_write[T::Native::ENCODED_LEN * 1..T::Native::ENCODED_LEN * 2] + .copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val3.encode_with_null(is_valid_3); + to_write[T::Native::ENCODED_LEN * 2..T::Native::ENCODED_LEN * 3] + .copy_from_slice(encoded.as_ref()); + } + + *offset = end_offset; + } + } + 4 => { + let iter = iters[0] + .clone() + .zip(iters[1].clone()) + .zip(iters[2].clone()) + .zip(iters[3].clone()); + for (value_idx, ((((val1, is_valid_1), (val2, is_valid_2)), (val3, is_valid_3)), (val4, is_valid_4))) in iter.enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + T::Native::ENCODED_LEN * N; + + let to_write = &mut data[*offset..end_offset]; + + { + let mut encoded = val1.encode_with_null(is_valid_1); + to_write[T::Native::ENCODED_LEN * 0..T::Native::ENCODED_LEN * 1] + .copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val2.encode_with_null(is_valid_2); + to_write[T::Native::ENCODED_LEN * 1..T::Native::ENCODED_LEN * 2] + .copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val3.encode_with_null(is_valid_3); + to_write[T::Native::ENCODED_LEN * 2..T::Native::ENCODED_LEN * 3] + .copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val4.encode_with_null(is_valid_4); + to_write[T::Native::ENCODED_LEN * 3..T::Native::ENCODED_LEN * 4] + .copy_from_slice(encoded.as_ref()); + } + + *offset = end_offset; + } + } + _ => panic!("N must be less than or equal to 8"), + } + // + // let zip_iter = zip_array::<_, N>(arrays.map(|a| a.values().iter().copied())); + // for (value_idx, array) in zip_iter.enumerate() { + // let offset = &mut offsets[value_idx + 1]; + // let end_offset = *offset + (T::Native::ENCODED_LEN - 1) * N; + // + // let to_write = &mut data[*offset..end_offset]; + // // for i in 0..N { + // // to_write[i * T::Native::ENCODED_LEN] = 1; + // // } + // to_write[0] = valid_bits; + // for (i, val) in array.iter().enumerate() { + // let mut encoded = val.encode(); + // to_write[1 + i * (T::Native::ENCODED_LEN - 1)..(i + 1) * (T::Native::ENCODED_LEN - 1) + 1].copy_from_slice(encoded.as_ref()); + // } + // + // *offset = end_offset; + // } +} + +/// Encoding for non-nullable primitive arrays. +/// Iterates directly over the `values`, and skips NULLs-checking. +pub fn encode_not_null( + data: &mut [u8], + offsets: &mut [usize], + values: &[T], +) { + for (value_idx, val) in values.iter().enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + T::ENCODED_LEN; + + let to_write = &mut data[*offset..end_offset]; + let mut encoded = val.encode(); + to_write.copy_from_slice(encoded.as_ref()); + + *offset = end_offset; + } +} + +/// Encoding for non-nullable primitive arrays. +/// Iterates directly over the `values`, and skips NULLs-checking. +pub fn encode_not_null_double( + data: &mut [u8], + offsets: &mut [usize], + values_1: impl Iterator, + values_2: impl Iterator, +) { + for (value_idx, (val1, val2)) in values_1.zip(values_2).enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + T::ENCODED_LEN * 2; + + let to_write = &mut data[*offset..end_offset]; + + { + let mut encoded = val1.encode(); + to_write[..T::ENCODED_LEN].copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val2.encode(); + to_write[T::ENCODED_LEN..].copy_from_slice(encoded.as_ref()); + } + + *offset = end_offset; + } +} + +pub struct ZipArraySameLength { + array: [T; N], +} + +pub fn zip_array(array: [T; N]) -> ZipArraySameLength { + assert_ne!(N, 0); + + ZipArraySameLength { array } +} + +impl Iterator for ZipArraySameLength { + type Item = [T::Item; N]; + + fn next(&mut self) -> Option { + // SAFETY: It is always valid to `assume_init()` an array of `MaybeUninit`s (can be replaced + // with `MaybeUninit::uninit_array()` once stable). + let mut result: [std::mem::MaybeUninit; N] = + unsafe { std::mem::MaybeUninit::uninit().assume_init() }; + for (item, iterator) in std::iter::zip(&mut result, &mut self.array) { + item.write(iterator.next()?); + } + // SAFETY: We initialized the array above (can be replaced with `MaybeUninit::array_assume_init()` + // once stable). + Some(unsafe { + std::mem::transmute_copy::<[std::mem::MaybeUninit; N], [T::Item; N]>(&result) + }) + } +} + +impl ExactSizeIterator for ZipArraySameLength { + fn len(&self) -> usize { + self.array[0].len() + } +} + +/// Encoding for non-nullable primitive arrays. +/// Iterates directly over the `values`, and skips NULLs-checking. +pub fn encode_not_null_fixed_2( + data: &mut [u8], + offsets: &mut [usize], + arrays: [&PrimitiveArray; N], + // iters: [impl ExactSizeIterator; N], +) where + T::Native: FixedLengthEncoding, +{ + let zip_iter = zip_array::<_, N>(arrays.map(|a| a.values().iter().copied())); + for (value_idx, array) in zip_iter.enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + T::Native::ENCODED_LEN * N; + + let to_write = &mut data[*offset..end_offset]; + for (i, val) in array.iter().enumerate() { + let mut encoded = val.encode(); + to_write[i * (T::Native::ENCODED_LEN)..(i + 1) * (T::Native::ENCODED_LEN)] + .copy_from_slice(encoded.as_ref()); + } + + *offset = end_offset; + } +} + +/// Encoding for non-nullable primitive arrays. +/// Iterates directly over the `values`, and skips NULLs-checking. +pub fn encode_not_null_fixed( + data: &mut [u8], + offsets: &mut [usize], + arrays: [&PrimitiveArray; N], + // iters: [impl ExactSizeIterator; N], +) where + T::Native: FixedLengthEncoding, +{ + let iters = arrays.map(|a| a.values().iter().copied()); + match N { + 0 => panic!("N must be greater than 0"), + 1 => unimplemented!(), + 2 => { + let iter = iters[0].clone().zip(iters[1].clone()); + for (value_idx, (val1, val2)) in iter.enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + T::Native::ENCODED_LEN * N; + + let to_write = &mut data[*offset..end_offset]; + { + let mut encoded = val1.encode(); + to_write[..T::Native::ENCODED_LEN].copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val2.encode(); + to_write[T::Native::ENCODED_LEN..].copy_from_slice(encoded.as_ref()); + } + + *offset = end_offset; + } + } + 3 => { + let iter = iters[0].clone().zip(iters[1].clone()).zip(iters[2].clone()); + for (value_idx, ((val1, val2), val3)) in iter.enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + T::Native::ENCODED_LEN * N; + + let to_write = &mut data[*offset..end_offset]; + + { + let mut encoded = val1.encode(); + to_write[T::Native::ENCODED_LEN * 0..T::Native::ENCODED_LEN * 1] + .copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val2.encode(); + to_write[T::Native::ENCODED_LEN * 1..T::Native::ENCODED_LEN * 2] + .copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val3.encode(); + to_write[T::Native::ENCODED_LEN * 2..T::Native::ENCODED_LEN * 3] + .copy_from_slice(encoded.as_ref()); + } + + *offset = end_offset; + } + } + 4 => { + let iter = iters[0] + .clone() + .zip(iters[1].clone()) + .zip(iters[2].clone()) + .zip(iters[3].clone()); + for (value_idx, (((val1, val2), val3), val4)) in iter.enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + T::Native::ENCODED_LEN * N; + + let to_write = &mut data[*offset..end_offset]; + + { + let mut encoded = val1.encode(); + to_write[T::Native::ENCODED_LEN * 0..T::Native::ENCODED_LEN * 1] + .copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val2.encode(); + to_write[T::Native::ENCODED_LEN * 1..T::Native::ENCODED_LEN * 2] + .copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val3.encode(); + to_write[T::Native::ENCODED_LEN * 2..T::Native::ENCODED_LEN * 3] + .copy_from_slice(encoded.as_ref()); + } + + { + let mut encoded = val4.encode(); + to_write[T::Native::ENCODED_LEN * 3..T::Native::ENCODED_LEN * 4] + .copy_from_slice(encoded.as_ref()); + } + + *offset = end_offset; + } + } + _ => panic!("N must be less than or equal to 8"), + } + // + // let zip_iter = zip_array::<_, N>(arrays.map(|a| a.values().iter().copied())); + // for (value_idx, array) in zip_iter.enumerate() { + // let offset = &mut offsets[value_idx + 1]; + // let end_offset = *offset + (T::Native::ENCODED_LEN - 1) * N; + // + // let to_write = &mut data[*offset..end_offset]; + // // for i in 0..N { + // // to_write[i * T::Native::ENCODED_LEN] = 1; + // // } + // to_write[0] = valid_bits; + // for (i, val) in array.iter().enumerate() { + // let mut encoded = val.encode(); + // to_write[1 + i * (T::Native::ENCODED_LEN - 1)..(i + 1) * (T::Native::ENCODED_LEN - 1) + 1].copy_from_slice(encoded.as_ref()); + // } + // + // *offset = end_offset; + // } +} +// +// /// Encoding for non-nullable primitive arrays. +// /// Iterates directly over the `values`, and skips NULLs-checking. +// pub fn encode_not_null_four<'a>( +// data: &'a mut [u8], +// offsets: &'a mut [usize], +// values_1: (usize, &'a Buffer), +// values_2: (usize, &'a Buffer), +// values_3: (usize, &'a Buffer), +// values_4: (usize, &'a Buffer), +// ) { +// let shift_1 = 1; +// let values_1_slice = values_1.1.as_slice(); +// let shift_2 = shift_1 + values_1.0; +// let values_2_slice = values_2.1.as_slice(); +// let shift_3 = shift_2 + values_2.0; +// let values_3_slice = values_3.1.as_slice(); +// let shift_4 = shift_3 + values_3.0; +// let values_4_slice = values_4.1.as_slice(); +// +// let total_size = shift_4 + values_4.0; +// for (value_idx, offset) in offsets.iter_mut().skip(1).enumerate() +// { +// // let offset = &mut offsets[value_idx + 1]; +// +// // let val1 = values_1_slice.; +// let end_offset = *offset + 1 + values_1.0 + values_2.0 + values_3.0 + values_4.0; +// +// let to_write = &mut data[*offset..end_offset]; +// +// +// // let size = std::mem::size_of::(); +// // data[*offset..*offset + slice.len()].copy_from_slice(slice.as_slice()); +// // +// // let slice = [val1, val2, val3, val4].concat(); +// +// // all valid +// let valid_bits = 0b0000_1111; +// to_write[0] = valid_bits; +// +// unsafe { to_write.get_unchecked_mut(1..1 + values_1.0).copy_from_slice(values_1_slice.get_unchecked((value_idx * values_1.0)..(value_idx + 1) * values_1.0)); } +// let to_write = &mut to_write[1 + values_1.0..]; +// unsafe { to_write.get_unchecked_mut(..values_2.0).copy_from_slice(values_2_slice.get_unchecked((value_idx * values_2.0)..(value_idx + 1) * values_2.0)); } +// let to_write = &mut to_write[values_2.0..]; +// unsafe { to_write.get_unchecked_mut(..values_3.0).copy_from_slice(values_3_slice.get_unchecked((value_idx * values_3.0)..(value_idx + 1) * values_3.0)); } +// let to_write = &mut to_write[values_3.0..]; +// unsafe { to_write.get_unchecked_mut(..).copy_from_slice(values_4_slice.get_unchecked((value_idx * values_4.0)..(value_idx + 1) * values_4.0)); } +// // to_write[1 + values_1.0..1 + values_1.0 + values_2.0].copy_from_slice(&values_2_slice[(value_idx * values_2.0)..(value_idx + 1) * values_2.0]); +// +// // { +// // let mut encoded = val1; +// // data[*offset..*offset + slice.len()].copy_from_slice(slice.as_slice()); +// // *offset += slice.len(); +// // } +// // +// // { +// // let mut encoded = val2; +// // data[*offset..*offset + val2.len()].copy_from_slice(encoded); +// // *offset += val2.len(); +// // // to_write[shift_2..shift_3].copy_from_slice(encoded); +// // } +// // +// // { +// // let mut encoded = val3; +// // data[*offset..*offset + val3.len()].copy_from_slice(encoded); +// // *offset += val3.len(); +// // // to_write[shift_3..shift_4].copy_from_slice(encoded); +// // } +// // +// // { +// // let mut encoded = val4; +// // data[*offset..*offset + val4.len()].copy_from_slice(encoded); +// // *offset += val4.len(); +// // // to_write[shift_4..].copy_from_slice(encoded); +// // } +// +// *offset = end_offset; +// } +// } + +pub fn encode_fixed_size_binary( + data: &mut [u8], + offsets: &mut [usize], + array: &FixedSizeBinaryArray, +) { + let len = array.value_length() as usize; + for (offset, maybe_val) in offsets.iter_mut().skip(1).zip(array.iter()) { + let end_offset = *offset + len; + if let Some(val) = maybe_val { + let to_write = &mut data[*offset..end_offset]; + to_write.copy_from_slice(&val[..len]); + } else { + data[*offset] = null_sentinel(); + } + *offset = end_offset; + } +} + +/// Splits `len` bytes from `src` +#[inline] +pub(super) fn split_off<'a>(src: &mut &'a [u8], len: usize) -> &'a [u8] { + let v = &src[..len]; + *src = &src[len..]; + v +} + +/// Decodes a single byte from each row, interpreting `0x01` as a valid value +/// and all other values as a null +/// +/// Returns the null count and null buffer +pub fn decode_nulls(rows: &[&[u8]]) -> (usize, Buffer) { + let mut null_count = 0; + let buffer = MutableBuffer::collect_bool(rows.len(), |idx| { + let valid = rows[idx][0] == 1; + null_count += !valid as usize; + valid + }) + .into(); + (null_count, buffer) +} + +/// Decodes a `ArrayData` from rows based on the provided `FixedLengthEncoding` `T` +/// +/// # Safety +/// +/// `data_type` must be appropriate native type for `T` +unsafe fn decode_fixed( + rows: &mut [&[u8]], + data_type: DataType, + nulls: Option, +) -> ArrayData { + let len = rows.len(); + + let mut values = BufferBuilder::::new(len); + + for row in rows { + let i = split_off(row, T::ENCODED_LEN); + let value = T::Encoded::from_slice(i); + values.append(T::decode(value)); + } + let null_count = nulls.as_ref().map(|n| n.null_count()).unwrap_or(0); + + let builder = ArrayDataBuilder::new(data_type) + .len(len) + .add_buffer(values.finish()) + .nulls(nulls) + .null_count(null_count); + + // SAFETY: Buffers correct length + unsafe { builder.build_unchecked() } +} + +/// Decodes a `ArrayData` from rows based on the provided `FixedLengthEncoding` `T` +/// +/// # Safety +/// +/// `data_type` must be appropriate native type for `T` +unsafe fn decode_fixed_four( + rows: &mut [&[u8]], + data_types: [DataType; 4], + nulls: [Option; 4], +) -> [ArrayData; 4] { + let len = rows.len(); + + let mut values1 = BufferBuilder::::new(len); + let mut values2 = BufferBuilder::::new(len); + let mut values3 = BufferBuilder::::new(len); + let mut values4 = BufferBuilder::::new(len); + // let (null_count, nulls) = decode_nulls(rows); + + // (null_count, buffer) + + for row in rows { + let size = std::mem::size_of::(); + let i = split_off(row, size * 4 + 1); + + { + let value = T::Encoded::from_slice(&i[size * 0..size * 1]); + values1.append(T::decode(value)); + } + + { + let value = T::Encoded::from_slice(&i[size * 1..size * 2]); + values2.append(T::decode(value)); + } + + { + let value = T::Encoded::from_slice(&i[size * 2..size * 3]); + values3.append(T::decode(value)); + } + + { + let value = T::Encoded::from_slice(&i[size * 3..size * 4]); + values4.append(T::decode(value)); + } + } + + // TODO - assert all have the same length + + let [data_type1, data_type2, data_type3, data_type4] = data_types; + let [nulls1, nulls2, nulls3, nulls4] = nulls; + let null_count1 = nulls1.as_ref().map(|n| n.null_count()).unwrap_or(0); + let null_count2 = nulls2.as_ref().map(|n| n.null_count()).unwrap_or(0); + let null_count3 = nulls3.as_ref().map(|n| n.null_count()).unwrap_or(0); + let null_count4 = nulls4.as_ref().map(|n| n.null_count()).unwrap_or(0); + + let builder1 = ArrayDataBuilder::new(data_type1) + .len(len) + .add_buffer(values1.finish()) + .nulls(nulls1) + .null_count(null_count1); + + let builder2 = ArrayDataBuilder::new(data_type2) + .len(len) + .add_buffer(values2.finish()) + .nulls(nulls2) + .null_count(null_count2); + + let builder3 = ArrayDataBuilder::new(data_type3) + .len(len) + .add_buffer(values3.finish()) + .nulls(nulls3) + .null_count(null_count3); + + let builder4 = ArrayDataBuilder::new(data_type4) + .len(len) + .add_buffer(values4.finish()) + .nulls(nulls4) + .null_count(null_count4); + + // SAFETY: Buffers correct length + let array1 = unsafe { builder1.build_unchecked() }; + // SAFETY: Buffers correct length + let array2 = unsafe { builder2.build_unchecked() }; + // SAFETY: Buffers correct length + let array3 = unsafe { builder3.build_unchecked() }; + // SAFETY: Buffers correct length + let array4 = unsafe { builder4.build_unchecked() }; + + [array1, array2, array3, array4] +} + +/// Decodes a `PrimitiveArray` from rows +pub fn decode_primitive( + rows: &mut [&[u8]], + data_type: DataType, + nulls: Option, +) -> PrimitiveArray +where + T::Native: FixedLengthEncoding, +{ + assert!(PrimitiveArray::::is_compatible(&data_type)); + // SAFETY: + // Validated data type above + unsafe { decode_fixed::(rows, data_type, nulls).into() } +} + +/// Decodes a `PrimitiveArray` from rows +pub fn decode_primitive4( + rows: &mut [&[u8]], + data_types: [DataType; 4], + nulls: [Option; 4], +) -> [PrimitiveArray; 4] +where + T::Native: FixedLengthEncoding, +{ + for data_type in &data_types { + PrimitiveArray::::is_compatible(data_type); + } + + // SAFETY: + // Validated data type above + let datas = unsafe { decode_fixed_four::(rows, data_types, nulls) }; + + datas.map(Into::into) +} + +/// Decodes a `FixedLengthBinary` from rows +pub(crate) fn decode_fixed_size_binary(rows: &mut [&[u8]], size: i32, nulls: Option) -> FixedSizeBinaryArray { + let len = rows.len(); + + let mut values = MutableBuffer::new(size as usize * rows.len()); + + let encoded_len = size as usize; + + for row in rows { + let i = split_off(row, encoded_len); + values.extend_from_slice(i); + } + + let builder = ArrayDataBuilder::new(DataType::FixedSizeBinary(size)) + .len(len) + .nulls(nulls) + .add_buffer(values.into()); + + // SAFETY: Buffers correct length + unsafe { builder.build_unchecked().into() } +} diff --git a/arrow-row/src/unordered_row/list.rs b/arrow-row/src/unordered_row/list.rs new file mode 100644 index 000000000000..a5828a7bf58e --- /dev/null +++ b/arrow-row/src/unordered_row/list.rs @@ -0,0 +1,364 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::{LengthTracker, UnorderedRowConverter, UnorderedRows, fixed, null_sentinel}; +use arrow_array::{Array, FixedSizeListArray, GenericListArray, OffsetSizeTrait, new_null_array}; +use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, NullBuffer}; +use arrow_data::ArrayDataBuilder; +use arrow_schema::{ArrowError, DataType, Field}; +use std::{ops::Range, sync::Arc}; + +pub fn compute_lengths( + lengths: &mut [usize], + rows: &UnorderedRows, + array: &GenericListArray, +) { + let shift = array.value_offsets()[0].as_usize(); + + let offsets = array.value_offsets().windows(2); + lengths + .iter_mut() + .zip(offsets) + .enumerate() + .for_each(|(idx, (length, offsets))| { + let start = offsets[0].as_usize() - shift; + let end = offsets[1].as_usize() - shift; + let range = array.is_valid(idx).then_some(start..end); + *length += encoded_len(rows, range); + }); +} + +fn encoded_len(rows: &UnorderedRows, range: Option>) -> usize { + // super::variable::encoded_len( + // match range { + // None => None, + // Some(range) if range.is_empty() => Some(&[]), + // Some(range) => Some(rows.data_range(range)) + // } + // ) + match range.filter(|r| !r.is_empty()) { + None => + // Only the ctrl byte + 1, + Some(range) => { + // Number of items + super::variable::length_of_encoding_length(range.len()) + + // ctrl byte for the length type that will be used for all lengths here + 1 + + // what is the worst case scenerio for how much bytes are needed to encode the length of a row + // if the range is a single item (this is worst case scenerio as we don't know how much each row will take) + super::variable::get_number_of_bytes_needed_to_encode(rows.data_range_len(&range)) * range.len() + + // The bytes themselves + super::variable::padded_length(Some(rows.data_range(range).len())) + } + } +} + +/// Encodes the provided `GenericListArray` to `out` with the provided `SortOptions` +/// +/// `rows` should contain the encoded child elements +pub fn encode( + data: &mut [u8], + offsets: &mut [usize], + rows: &UnorderedRows, + array: &GenericListArray, +) { + let shift = array.value_offsets()[0].as_usize(); + + offsets + .iter_mut() + .skip(1) + .zip(array.value_offsets().windows(2)) + .enumerate() + .for_each(|(idx, (offset, offsets))| { + let start = offsets[0].as_usize() - shift; + let end = offsets[1].as_usize() - shift; + let range = array.is_valid(idx).then_some(start..end); + let out = &mut data[*offset..]; + *offset += encode_one(out, rows, range) + }); +} + +#[inline] +fn encode_one(out: &mut [u8], rows: &UnorderedRows, range: Option>) -> usize { + + match range.filter(|r| !r.is_empty()) { + None => { + super::variable::encode_empty(out) + }, + Some(range) => { + let mut offset = 0; + + // Encode the number of items in the list + offset += super::variable::encode_len(&mut out[offset..], range.len()); + + // Encode the type of the lengths of the rows and the lengths themselves + // this is used to avoid using more memory than needed for small rows + offset += super::variable::encode_lengths_with_prefix( + &mut out[offset..], + + // Encode using the worst case if there is a single row + // as we don't know the maximum length of the rows without iterating over them + // so we use the worst case scenario + rows.data_range_len(&range), + rows.lengths_from(&range), + ); + + // Encode the whole list in one go + offset += super::variable::fast_encode_bytes( + &mut out[offset..], + rows.data_range(range.clone()), + ); + + offset + } + } +} + +/// Decodes an array from `rows` with the provided `options` +/// +/// # Safety +/// +/// `rows` must contain valid data for the provided `converter` +pub unsafe fn decode( + converter: &UnorderedRowConverter, + rows: &mut [&[u8]], + field: &Field, + validate_utf8: bool, + list_nulls: Option, +) -> Result, ArrowError> { + let mut values_bytes = 0; + + let mut offset = 0; + let mut offsets = Vec::with_capacity(rows.len() + 1); + offsets.push(O::usize_as(0)); + + for row in rows.iter_mut() { + let mut row_offset = 0; + + let (number_of_items, start_offset) = super::variable::decode_len(&row[row_offset..]); + row_offset += start_offset; + + offset += number_of_items; + offsets.push(O::usize_as(offset)); + + if number_of_items == 0 { + continue; + } + + // TODO - encode the bytes first and then the lengths so we don't have to jump here in memory only to get to the number + // of bytes the lengths is using + // read ctrl byte + let byte_size = super::variable::get_number_of_bytes_used_to_encode_from_ctrl_byte(row[row_offset]); + // Skip the ctrl byte + row_offset += 1; + + // Skip the lengths + row_offset += byte_size * number_of_items; + + let (number_of_bytes, start_offset) = super::variable::decode_len(&row[row_offset..]); + row_offset += start_offset; + + values_bytes += number_of_bytes; + } + O::from_usize(offset).expect("overflow"); + + let mut values_offsets = Vec::with_capacity(offset); + values_offsets.push(0); + let mut values_bytes = Vec::with_capacity(values_bytes); + for row in rows.iter_mut() { + let mut row_offset = 0; + + // Decode the number of items in the list + let (number_of_items, start_offset) = super::variable::decode_len(&&row[row_offset..]); + row_offset += start_offset; + + if number_of_items == 0 { + *row = &row[row_offset..]; + continue; + } + + // decode the lengths of the rows + let mut initial_value_offset = values_bytes.len(); + row_offset += super::variable::decode_lengths_with_prefix(&row[row_offset..], number_of_items, |len: usize| { + initial_value_offset += len; + + values_offsets.push(initial_value_offset); + }); + + // copy the rows bytes in a single pass + let decoded = super::variable::decode_blocks(&row[row_offset..], |x| { + values_bytes.extend_from_slice(x) + }); + row_offset += decoded; + *row = &row[row_offset..]; + } + + let mut child_rows: Vec<_> = values_offsets + .windows(2) + .map(|start_and_end| { + let v = &values_bytes[start_and_end[0]..start_and_end[1]]; + v + }) + .collect(); + + let child = unsafe { converter.convert_raw(&mut child_rows, validate_utf8) }?; + assert_eq!(child.len(), 1); + + let child_data = child[0].to_data(); + + // Since RowConverter flattens certain data types (i.e. Dictionary), + // we need to use updated data type instead of original field + let corrected_type = match field.data_type() { + DataType::List(inner_field) => DataType::List(Arc::new( + inner_field + .as_ref() + .clone() + .with_data_type(child_data.data_type().clone()), + )), + DataType::LargeList(inner_field) => DataType::LargeList(Arc::new( + inner_field + .as_ref() + .clone() + .with_data_type(child_data.data_type().clone()), + )), + _ => unreachable!(), + }; + + let builder = ArrayDataBuilder::new(corrected_type) + .len(rows.len()) + .nulls(list_nulls) + .add_buffer(Buffer::from_vec(offsets)) + .add_child_data(child_data); + + Ok(GenericListArray::from(unsafe { builder.build_unchecked() })) +} + +pub fn compute_lengths_fixed_size_list( + tracker: &mut LengthTracker, + rows: &UnorderedRows, + array: &FixedSizeListArray, +) { + let value_length = array.value_length().as_usize(); + tracker.push_variable((0..array.len()).map(|idx| { + match array.is_valid(idx) { + true => { + 1 + ((idx * value_length)..(idx + 1) * value_length) + .map(|child_idx| rows.row(child_idx).as_ref().len()) + .sum::() + } + false => 1, + } + })) +} + +/// Encodes the provided `FixedSizeListArray` to `out` with the provided `SortOptions` +/// +/// `rows` should contain the encoded child elements +pub fn encode_fixed_size_list( + data: &mut [u8], + offsets: &mut [usize], + rows: &UnorderedRows, + array: &FixedSizeListArray, +) { + let null_sentinel = null_sentinel(); + offsets + .iter_mut() + .skip(1) + .enumerate() + .for_each(|(idx, offset)| { + let value_length = array.value_length().as_usize(); + match array.is_valid(idx) { + true => { + data[*offset] = 0x01; + *offset += 1; + for child_idx in (idx * value_length)..(idx + 1) * value_length { + let row = rows.row(child_idx); + let end_offset = *offset + row.as_ref().len(); + data[*offset..end_offset].copy_from_slice(row.as_ref()); + *offset = end_offset; + } + } + false => { + data[*offset] = null_sentinel; + *offset += 1; + } + }; + }) +} + +/// Decodes a fixed size list array from `rows` with the provided `options` +/// +/// # Safety +/// +/// `rows` must contain valid data for the provided `converter` +pub unsafe fn decode_fixed_size_list( + converter: &UnorderedRowConverter, + rows: &mut [&[u8]], + field: &Field, + validate_utf8: bool, + value_length: usize, + nulls: Option, +) -> Result { + let list_type = field.data_type(); + let element_type = match list_type { + DataType::FixedSizeList(element_field, _) => element_field.data_type(), + _ => { + return Err(ArrowError::InvalidArgumentError(format!( + "Expected FixedSizeListArray, found: {list_type}", + ))); + } + }; + + let len = rows.len(); + + let null_element_encoded = converter.convert_columns(&[new_null_array(element_type, 1)])?; + let null_element_encoded = null_element_encoded.row(0); + let null_element_slice = null_element_encoded.as_ref(); + + let mut child_rows = Vec::new(); + for row in rows { + let valid = row[0] == 1; + let mut row_offset = 1; + if !valid { + for _ in 0..value_length { + child_rows.push(null_element_slice); + } + } else { + for _ in 0..value_length { + let mut temp_child_rows = vec![&row[row_offset..]]; + unsafe { converter.convert_raw(&mut temp_child_rows, validate_utf8) }?; + let decoded_bytes = row.len() - row_offset - temp_child_rows[0].len(); + let next_offset = row_offset + decoded_bytes; + child_rows.push(&row[row_offset..next_offset]); + row_offset = next_offset; + } + } + *row = &row[row_offset..]; // Update row for the next decoder + } + + let children = unsafe { converter.convert_raw(&mut child_rows, validate_utf8) }?; + let child_data = children.iter().map(|c| c.to_data()).collect(); + let builder = ArrayDataBuilder::new(list_type.clone()) + .len(len) + .nulls(nulls) + .child_data(child_data); + + Ok(FixedSizeListArray::from(unsafe { + builder.build_unchecked() + })) +} diff --git a/arrow-row/src/unordered_row/mod.rs b/arrow-row/src/unordered_row/mod.rs new file mode 100644 index 000000000000..3e5464e052a4 --- /dev/null +++ b/arrow-row/src/unordered_row/mod.rs @@ -0,0 +1,5090 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! A comparable row-oriented representation of a collection of [`Array`]. +//! +//! [`UnorderedRow`]s are [normalized for sorting], and can therefore be very efficiently [compared], +//! using [`memcmp`] under the hood, or used in [non-comparison sorts] such as [radix sort]. +//! This makes the row format ideal for implementing efficient multi-column sorting, +//! grouping, aggregation, windowing and more, as described in more detail +//! [in this blog post](https://arrow.apache.org/blog/2022/11/07/multi-column-sorts-in-arrow-rust-part-1/). +//! +//! For example, given three input [`Array`], [`UnorderedRowConverter`] creates byte +//! sequences that [compare] the same as when using [`lexsort`]. +//! +//! ```text +//! ┌─────┐ ┌─────┐ ┌─────┐ +//! │ │ │ │ │ │ +//! ├─────┤ ┌ ┼─────┼ ─ ┼─────┼ ┐ ┏━━━━━━━━━━━━━┓ +//! │ │ │ │ │ │ ─────────────▶┃ ┃ +//! ├─────┤ └ ┼─────┼ ─ ┼─────┼ ┘ ┗━━━━━━━━━━━━━┛ +//! │ │ │ │ │ │ +//! └─────┘ └─────┘ └─────┘ +//! ... +//! ┌─────┐ ┌ ┬─────┬ ─ ┬─────┬ ┐ ┏━━━━━━━━┓ +//! │ │ │ │ │ │ ─────────────▶┃ ┃ +//! └─────┘ └ ┴─────┴ ─ ┴─────┴ ┘ ┗━━━━━━━━┛ +//! UInt64 Utf8 F64 +//! +//! Input Arrays Row Format +//! (Columns) +//! ``` +//! +//! _[`UnorderedRows`] must be generated by the same [`UnorderedRowConverter`] for the comparison +//! to be meaningful._ +//! +//! # Basic Example +//! ``` +//! # use std::sync::Arc; +//! # use arrow_row::{RowConverter, SortField}; +//! # use arrow_array::{ArrayRef, Int32Array, StringArray}; +//! # use arrow_array::cast::{AsArray, as_string_array}; +//! # use arrow_array::types::Int32Type; +//! # use arrow_schema::DataType; +//! +//! let a1 = Arc::new(Int32Array::from_iter_values([-1, -1, 0, 3, 3])) as ArrayRef; +//! let a2 = Arc::new(StringArray::from_iter_values(["a", "b", "c", "d", "d"])) as ArrayRef; +//! let arrays = vec![a1, a2]; +//! +//! // Convert arrays to rows +//! let converter = RowConverter::new(vec![ +//! SortField::new(DataType::Int32), +//! SortField::new(DataType::Utf8), +//! ]).unwrap(); +//! let rows = converter.convert_columns(&arrays).unwrap(); +//! +//! // Compare rows +//! for i in 0..4 { +//! assert!(rows.row(i) <= rows.row(i + 1)); +//! } +//! assert_eq!(rows.row(3), rows.row(4)); +//! +//! // Convert rows back to arrays +//! let converted = converter.convert_rows(&rows).unwrap(); +//! assert_eq!(arrays, converted); +//! +//! // Compare rows from different arrays +//! let a1 = Arc::new(Int32Array::from_iter_values([3, 4])) as ArrayRef; +//! let a2 = Arc::new(StringArray::from_iter_values(["e", "f"])) as ArrayRef; +//! let arrays = vec![a1, a2]; +//! let rows2 = converter.convert_columns(&arrays).unwrap(); +//! +//! assert!(rows.row(4) < rows2.row(0)); +//! assert!(rows.row(4) < rows2.row(1)); +//! +//! // Convert selection of rows back to arrays +//! let selection = [rows.row(0), rows2.row(1), rows.row(2), rows2.row(0)]; +//! let converted = converter.convert_rows(selection).unwrap(); +//! let c1 = converted[0].as_primitive::(); +//! assert_eq!(c1.values(), &[-1, 4, 0, 3]); +//! +//! let c2 = converted[1].as_string::(); +//! let c2_values: Vec<_> = c2.iter().flatten().collect(); +//! assert_eq!(&c2_values, &["a", "f", "c", "e"]); +//! ``` +//! +//! # Lexicographic Sorts (lexsort) +//! +//! The row format can also be used to implement a fast multi-column / lexicographic sort +//! +//! ``` +//! # use arrow_row::{RowConverter, SortField}; +//! # use arrow_array::{ArrayRef, UInt32Array}; +//! fn lexsort_to_indices(arrays: &[ArrayRef]) -> UInt32Array { +//! let fields = arrays +//! .iter() +//! .map(|a| SortField::new(a.data_type().clone())) +//! .collect(); +//! let converter = RowConverter::new(fields).unwrap(); +//! let rows = converter.convert_columns(arrays).unwrap(); +//! let mut sort: Vec<_> = rows.iter().enumerate().collect(); +//! sort.sort_unstable_by(|(_, a), (_, b)| a.cmp(b)); +//! UInt32Array::from_iter_values(sort.iter().map(|(i, _)| *i as u32)) +//! } +//! ``` +//! +//! # Flattening Dictionaries +//! +//! For performance reasons, dictionary arrays are flattened ("hydrated") to their +//! underlying values during row conversion. See [the issue] for more details. +//! +//! This means that the arrays that come out of [`UnorderedRowConverter::convert_rows`] +//! may not have the same data types as the input arrays. For example, encoding +//! a `Dictionary` and then will come out as a `Utf8` array. +//! +//! ``` +//! # use arrow_array::{Array, ArrayRef, DictionaryArray}; +//! # use arrow_array::types::Int8Type; +//! # use arrow_row::{RowConverter, SortField}; +//! # use arrow_schema::DataType; +//! # use std::sync::Arc; +//! // Input is a Dictionary array +//! let dict: DictionaryArray:: = ["a", "b", "c", "a", "b"].into_iter().collect(); +//! let sort_fields = vec![SortField::new(dict.data_type().clone())]; +//! let arrays = vec![Arc::new(dict) as ArrayRef]; +//! let converter = RowConverter::new(sort_fields).unwrap(); +//! // Convert to rows +//! let rows = converter.convert_columns(&arrays).unwrap(); +//! let converted = converter.convert_rows(&rows).unwrap(); +//! // result was a Utf8 array, not a Dictionary array +//! assert_eq!(converted[0].data_type(), &DataType::Utf8); +//! ``` +//! +//! [non-comparison sorts]: https://en.wikipedia.org/wiki/Sorting_algorithm#Non-comparison_sorts +//! [radix sort]: https://en.wikipedia.org/wiki/Radix_sort +//! [normalized for sorting]: http://wwwlgis.informatik.uni-kl.de/archiv/wwwdvs.informatik.uni-kl.de/courses/DBSREAL/SS2005/Vorlesungsunterlagen/Implementing_Sorting.pdf +//! [`memcmp`]: https://www.man7.org/linux/man-pages/man3/memcmp.3.html +//! [`lexsort`]: https://docs.rs/arrow-ord/latest/arrow_ord/sort/fn.lexsort.html +//! [compared]: PartialOrd +//! [compare]: PartialOrd +//! [the issue]: https://github.com/apache/arrow-rs/issues/4811 + +use std::hash::{Hash, Hasher}; +use std::ops::Range; +use std::sync::Arc; + +use arrow_array::cast::*; +use arrow_array::types::ArrowDictionaryKeyType; +use arrow_array::*; +use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ScalarBuffer}; +use arrow_data::{ArrayData, ArrayDataBuilder}; +use arrow_schema::*; +use variable::{decode_binary_view, decode_string_view}; + +use crate::unordered_row::fixed::{FixedLengthEncoding, decode_primitive4, encode_not_null_fixed}; +use arrow_array::types::{Int16Type, Int32Type, Int64Type}; +use fixed::{decode_fixed_size_binary, decode_primitive}; +use list::{compute_lengths_fixed_size_list, encode_fixed_size_list}; +use variable::{decode_binary, decode_string}; +use crate::SortField; +use crate::unordered_row::nulls::encode_nulls_naive; + +mod boolean; +mod fixed; +mod list; +mod run; +mod variable; +mod nulls; + +/// Converts [`ArrayRef`] columns into a [row-oriented](self) format. +/// +/// *Note: The encoding of the row format may change from release to release.* +/// +/// ## Overview +/// +/// The row format is a variable length byte sequence created by +/// concatenating the encoded form of each column. The encoding for +/// each column depends on its datatype (and sort options). +/// +/// The encoding is carefully designed in such a way that escaping is +/// unnecessary: it is never ambiguous as to whether a byte is part of +/// a sentinel (e.g. null) or a value. +/// +/// ## Unsigned Integer Encoding +/// +/// A null integer is encoded as a `0_u8`, followed by a zero-ed number of bytes corresponding +/// to the integer's length. +/// +/// A valid integer is encoded as `1_u8`, followed by the big-endian representation of the +/// integer. +/// +/// ```text +/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐ +/// 3 │03│00│00│00│ │01│00│00│00│03│ +/// └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘ +/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐ +/// 258 │02│01│00│00│ │01│00│00│01│02│ +/// └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘ +/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐ +/// 23423 │7F│5B│00│00│ │01│00│00│5B│7F│ +/// └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘ +/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐ +/// NULL │??│??│??│??│ │00│00│00│00│00│ +/// └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘ +/// +/// 32-bit (4 bytes) Row Format +/// Value Little Endian +/// ``` +/// +/// ## Signed Integer Encoding +/// +/// Signed integers have their most significant sign bit flipped, and are then encoded in the +/// same manner as an unsigned integer. +/// +/// ```text +/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐ +/// 5 │05│00│00│00│ │05│00│00│80│ │01│80│00│00│05│ +/// └──┴──┴──┴──┘ └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘ +/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐ +/// -5 │FB│FF│FF│FF│ │FB│FF│FF│7F│ │01│7F│FF│FF│FB│ +/// └──┴──┴──┴──┘ └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘ +/// +/// Value 32-bit (4 bytes) High bit flipped Row Format +/// Little Endian +/// ``` +/// +/// ## Float Encoding +/// +/// Floats are converted from IEEE 754 representation to a signed integer representation +/// by flipping all bar the sign bit if they are negative. +/// +/// They are then encoded in the same manner as a signed integer. +/// +/// ## Fixed Length Bytes Encoding +/// +/// Fixed length bytes are encoded in the same fashion as primitive types above. +/// +/// For a fixed length array of length `n`: +/// +/// A null is encoded as `0_u8` null sentinel followed by `n` `0_u8` bytes +/// +/// A valid value is encoded as `1_u8` followed by the value bytes +/// +/// ## Variable Length Bytes (including Strings) Encoding +/// +/// A null is encoded as a `0_u8`. +/// +/// An empty byte array is encoded as `1_u8`. +/// +/// A non-null, non-empty byte array is encoded as `2_u8` followed by the byte array +/// encoded using a block based scheme described below. +/// +/// The byte array is broken up into fixed-width blocks, each block is written in turn +/// to the output, followed by `0xFF_u8`. The final block is padded to 32-bytes +/// with `0_u8` and written to the output, followed by the un-padded length in bytes +/// of this final block as a `u8`. The first 4 blocks have a length of 8, with subsequent +/// blocks using a length of 32, this is to reduce space amplification for small strings. +/// +/// Note the following example encodings use a block size of 4 bytes for brevity: +/// +/// ```text +/// ┌───┬───┬───┬───┬───┬───┐ +/// "MEEP" │02 │'M'│'E'│'E'│'P'│04 │ +/// └───┴───┴───┴───┴───┴───┘ +/// +/// ┌───┐ +/// "" │01 | +/// └───┘ +/// +/// NULL ┌───┐ +/// │00 │ +/// └───┘ +/// +/// "Defenestration" ┌───┬───┬───┬───┬───┬───┐ +/// │02 │'D'│'e'│'f'│'e'│FF │ +/// └───┼───┼───┼───┼───┼───┤ +/// │'n'│'e'│'s'│'t'│FF │ +/// ├───┼───┼───┼───┼───┤ +/// │'r'│'a'│'t'│'r'│FF │ +/// ├───┼───┼───┼───┼───┤ +/// │'a'│'t'│'i'│'o'│FF │ +/// ├───┼───┼───┼───┼───┤ +/// │'n'│00 │00 │00 │01 │ +/// └───┴───┴───┴───┴───┘ +/// ``` +/// +/// This approach is loosely inspired by [COBS] encoding, and chosen over more traditional +/// [byte stuffing] as it is more amenable to vectorisation, in particular AVX-256. +/// +/// ## Dictionary Encoding +/// +/// Dictionary encoded arrays are hydrated to their underlying values +/// +/// ## REE Encoding +/// +/// REE (Run End Encoding) arrays, A form of Run Length Encoding, are hydrated to their underlying values. +/// +/// ## Struct Encoding +/// +/// A null is encoded as a `0_u8`. +/// +/// A valid value is encoded as `1_u8` followed by the row encoding of each child. +/// +/// This encoding effectively flattens the schema in a depth-first fashion. +/// +/// For example +/// +/// ```text +/// ┌───────┬────────────────────────┬───────┐ +/// │ Int32 │ Struct[Int32, Float32] │ Int32 │ +/// └───────┴────────────────────────┴───────┘ +/// ``` +/// +/// Is encoded as +/// +/// ```text +/// ┌───────┬───────────────┬───────┬─────────┬───────┐ +/// │ Int32 │ Null Sentinel │ Int32 │ Float32 │ Int32 │ +/// └───────┴───────────────┴───────┴─────────┴───────┘ +/// ``` +/// +/// ## List Encoding +/// +/// Lists are encoded by first encoding all child elements to the row format. +/// +/// A list value is then encoded as the concatenation of each of the child elements, +/// separately encoded using the variable length encoding described above, followed +/// by the variable length encoding of an empty byte array. +/// +/// For example given: +/// +/// ```text +/// [1_u8, 2_u8, 3_u8] +/// [1_u8, null] +/// [] +/// null +/// ``` +/// +/// The elements would be converted to: +/// +/// ```text +/// ┌──┬──┐ ┌──┬──┐ ┌──┬──┐ ┌──┬──┐ ┌──┬──┐ +/// 1 │01│01│ 2 │01│02│ 3 │01│03│ 1 │01│01│ null │00│00│ +/// └──┴──┘ └──┴──┘ └──┴──┘ └──┴──┘ └──┴──┘ +///``` +/// +/// Which would be encoded as +/// +/// ```text +/// ┌──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┐ +/// [1_u8, 2_u8, 3_u8] │02│01│01│00│00│02│02│01│02│00│00│02│02│01│03│00│00│02│01│ +/// └──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┘ +/// └──── 1_u8 ────┘ └──── 2_u8 ────┘ └──── 3_u8 ────┘ +/// +/// ┌──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┐ +/// [1_u8, null] │02│01│01│00│00│02│02│00│00│00│00│02│01│ +/// └──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┘ +/// └──── 1_u8 ────┘ └──── null ────┘ +/// +///``` +/// +/// With `[]` represented by an empty byte array, and `null` a null byte array. +/// +/// ## Fixed Size List Encoding +/// +/// Fixed Size Lists are encoded by first encoding all child elements to the row format. +/// +/// A non-null list value is then encoded as 0x01 followed by the concatenation of each +/// of the child elements. A null list value is encoded as a null marker. +/// +/// For example given: +/// +/// ```text +/// [1_u8, 2_u8] +/// [3_u8, null] +/// null +/// ``` +/// +/// The elements would be converted to: +/// +/// ```text +/// ┌──┬──┐ ┌──┬──┐ ┌──┬──┐ ┌──┬──┐ +/// 1 │01│01│ 2 │01│02│ 3 │01│03│ null │00│00│ +/// └──┴──┘ └──┴──┘ └──┴──┘ └──┴──┘ +///``` +/// +/// Which would be encoded as +/// +/// ```text +/// ┌──┬──┬──┬──┬──┐ +/// [1_u8, 2_u8] │01│01│01│01│02│ +/// └──┴──┴──┴──┴──┘ +/// └ 1 ┘ └ 2 ┘ +/// ┌──┬──┬──┬──┬──┐ +/// [3_u8, null] │01│01│03│00│00│ +/// └──┴──┴──┴──┴──┘ +/// └ 1 ┘ └null┘ +/// ┌──┐ +/// null │00│ +/// └──┘ +/// +///``` +/// +/// # Ordering +/// +/// ## Float Ordering +/// +/// Floats are totally ordered in accordance to the `totalOrder` predicate as defined +/// in the IEEE 754 (2008 revision) floating point standard. +/// +/// The ordering established by this does not always agree with the +/// [`PartialOrd`] and [`PartialEq`] implementations of `f32`. For example, +/// they consider negative and positive zero equal, while this does not +/// +/// ## Null Ordering +/// +/// The encoding described above will order nulls first, this can be inverted by representing +/// nulls as `0xFF_u8` instead of `0_u8` +/// +/// ## Reverse Column Ordering +/// +/// The order of a given column can be reversed by negating the encoded bytes of non-null values +/// +/// [COBS]: https://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing +/// [byte stuffing]: https://en.wikipedia.org/wiki/High-Level_Data_Link_Control#Asynchronous_framing +#[derive(Debug)] +pub struct UnorderedRowConverter { + fields: Fields, + indices: Vec, + + /// Reverse mapping for indices + reverse_indices: Vec, + /// State for codecs + codecs: Vec, +} + +#[derive(Debug)] +enum Codec { + /// No additional codec state is necessary + Stateless, + /// A row converter for the dictionary values + /// and the encoding of a row containing only nulls + Dictionary(UnorderedRowConverter, OwnedUnorderedRow), + /// A row converter for the child fields + /// and the encoding of a row containing only nulls + Struct(UnorderedRowConverter, OwnedUnorderedRow), + /// A row converter for the child field + List(UnorderedRowConverter), + /// A row converter for the values array of a run-end encoded array + RunEndEncoded(UnorderedRowConverter), + /// Row converters for each union field (indexed by type_id) + /// and the encoding of null rows for each field + Union(Vec, Vec), +} + +impl Codec { + fn new(sort_field: &FieldRef) -> Result { + match sort_field.data_type() { + DataType::Dictionary(_, values) => { + // let sort_field = + // SortField::new_with_options(values.as_ref().clone(), sort_field.options); + + // Should take the nullable from the field + let converter = UnorderedRowConverter::new( + vec![Field::new( + "col_1", + values.as_ref().clone(), + sort_field.is_nullable(), + )] + .into(), + )?; + let null_array = new_null_array(values.as_ref(), 1); + let nulls = converter.convert_columns(&[null_array])?; + + let owned = OwnedUnorderedRow { + data: nulls.buffer.into(), + config: nulls.config, + }; + Ok(Self::Dictionary(converter, owned)) + } + DataType::RunEndEncoded(_, values) => { + // Similar to List implementation + // let options = SortOptions { + // descending: false, + // nulls_first: sort_field.options.nulls_first != sort_field.options.descending, + // }; + + // let field = SortField::new_with_options(values.data_type().clone(), options); + let converter = UnorderedRowConverter::new(vec![values.clone()].into())?; + Ok(Self::RunEndEncoded(converter)) + } + d if !d.is_nested() => Ok(Self::Stateless), + DataType::List(f) | DataType::LargeList(f) => { + // The encoded contents will be inverted if descending is set to true + // As such we set `descending` to false and negate nulls first if it + // it set to true + // let options = SortOptions { + // descending: false, + // nulls_first: sort_field.options.nulls_first != sort_field.options.descending, + // }; + + // let field = SortField::new_with_options(f.data_type().clone(), options); + let converter = UnorderedRowConverter::new(vec![f.clone()].into())?; + Ok(Self::List(converter)) + } + DataType::FixedSizeList(f, _) => { + // let field = SortField::new_with_options(f.data_type().clone(), sort_field.options); + let converter = UnorderedRowConverter::new(vec![f.clone()].into())?; + Ok(Self::List(converter)) + } + DataType::Struct(f) => { + // let sort_fields = f + // .iter() + // .map(|x| SortField::new_with_options(x.data_type().clone(), sort_field.options)) + // .collect(); + + let converter = UnorderedRowConverter::new(f.clone())?; + let nulls: Vec<_> = f.iter().map(|x| new_null_array(x.data_type(), 1)).collect(); + + let nulls = converter.convert_columns(&nulls)?; + let owned = OwnedUnorderedRow { + data: nulls.buffer.into(), + config: nulls.config, + }; + + Ok(Self::Struct(converter, owned)) + } + DataType::Union(fields, _mode) => { + // similar to dictionaries and lists, we set descending to false and negate nulls_first + // since the encoded contents will be inverted if descending is set + // let options = SortOptions { + // descending: false, + // nulls_first: sort_field.options.nulls_first != sort_field.options.descending, + // }; + + let mut converters = Vec::with_capacity(fields.len()); + let mut null_rows = Vec::with_capacity(fields.len()); + + for (_type_id, field) in fields.iter() { + // let sort_field = + // SortField::new_with_options(field.data_type().clone(), options); + let converter = UnorderedRowConverter::new(vec![field.clone()].into())?; + + let null_array = new_null_array(field.data_type(), 1); + let nulls = converter.convert_columns(&[null_array])?; + let owned = OwnedUnorderedRow { + data: nulls.buffer.into(), + config: nulls.config, + }; + + converters.push(converter); + null_rows.push(owned); + } + + Ok(Self::Union(converters, null_rows)) + } + _ => Err(ArrowError::NotYetImplemented(format!( + "not yet implemented: {:?}", + sort_field.data_type() + ))), + } + } + + fn encoder(&self, array: &dyn Array) -> Result, ArrowError> { + match self { + Codec::Stateless => Ok(Encoder::Stateless), + Codec::Dictionary(converter, nulls) => { + let values = array.as_any_dictionary().values().clone(); + let rows = converter.convert_columns(&[values])?; + Ok(Encoder::Dictionary(rows, nulls.row())) + } + Codec::Struct(converter, null) => { + let v = as_struct_array(array); + let rows = converter.convert_columns(v.columns())?; + Ok(Encoder::Struct(rows, null.row())) + } + Codec::List(converter) => { + let values = match array.data_type() { + DataType::List(_) => { + let list_array = as_list_array(array); + let first_offset = list_array.offsets()[0] as usize; + let last_offset = + list_array.offsets()[list_array.offsets().len() - 1] as usize; + + // values can include more data than referenced in the ListArray, only encode + // the referenced values. + list_array + .values() + .slice(first_offset, last_offset - first_offset) + } + DataType::LargeList(_) => { + let list_array = as_large_list_array(array); + + let first_offset = list_array.offsets()[0] as usize; + let last_offset = + list_array.offsets()[list_array.offsets().len() - 1] as usize; + + // values can include more data than referenced in the LargeListArray, only encode + // the referenced values. + list_array + .values() + .slice(first_offset, last_offset - first_offset) + } + DataType::FixedSizeList(_, _) => { + as_fixed_size_list_array(array).values().clone() + } + _ => unreachable!(), + }; + let rows = converter.convert_columns(&[values])?; + Ok(Encoder::List(rows)) + } + Codec::RunEndEncoded(converter) => { + let values = match array.data_type() { + DataType::RunEndEncoded(r, _) => match r.data_type() { + DataType::Int16 => array.as_run::().values(), + DataType::Int32 => array.as_run::().values(), + DataType::Int64 => array.as_run::().values(), + _ => unreachable!("Unsupported run end index type: {r:?}"), + }, + _ => unreachable!(), + }; + let rows = converter.convert_columns(std::slice::from_ref(values))?; + Ok(Encoder::RunEndEncoded(rows)) + } + Codec::Union(converters, _) => { + let union_array = array + .as_any() + .downcast_ref::() + .expect("expected Union array"); + + let type_ids = union_array.type_ids().clone(); + let offsets = union_array.offsets().cloned(); + + let mut child_rows = Vec::with_capacity(converters.len()); + for (type_id, converter) in converters.iter().enumerate() { + let child_array = union_array.child(type_id as i8); + let rows = converter.convert_columns(std::slice::from_ref(child_array))?; + child_rows.push(rows); + } + + Ok(Encoder::Union { + child_rows, + type_ids, + offsets, + }) + } + } + } + + fn size(&self) -> usize { + match self { + Codec::Stateless => 0, + Codec::Dictionary(converter, nulls) => converter.size() + nulls.data.len(), + Codec::Struct(converter, nulls) => converter.size() + nulls.data.len(), + Codec::List(converter) => converter.size(), + Codec::RunEndEncoded(converter) => converter.size(), + Codec::Union(converters, null_rows) => { + converters.iter().map(|c| c.size()).sum::() + + null_rows.iter().map(|n| n.data.len()).sum::() + } + } + } +} + +#[derive(Debug)] +enum Encoder<'a> { + /// No additional encoder state is necessary + Stateless, + /// The encoding of the child array and the encoding of a null row + Dictionary(UnorderedRows, UnorderedRow<'a>), + /// The row encoding of the child arrays and the encoding of a null row + /// + /// It is necessary to encode to a temporary [`UnorderedRows`] to avoid serializing + /// values that are masked by a null in the parent StructArray, otherwise + /// this would establish an ordering between semantically null values + Struct(UnorderedRows, UnorderedRow<'a>), + /// The row encoding of the child array + List(UnorderedRows), + /// The row encoding of the values array + RunEndEncoded(UnorderedRows), + /// The row encoding of each union field's child array, type_ids buffer, offsets buffer (for Dense), and mode + Union { + child_rows: Vec, + type_ids: ScalarBuffer, + offsets: Option>, + }, +} + +/// Groups consecutive elements in a slice by a key function. +/// +/// Elements are grouped together as long as they produce the same key. +/// When the key changes, a new group starts. +/// +/// # Example +/// ``` +/// let numbers = [1, 1, 2, 2, 2, 3, 1, 1]; +/// let groups = group_by(&numbers, |&x| x); +/// // Results in: [[1, 1], [2, 2, 2], [3], [1, 1]] +/// ``` +fn group_by(slice: &[T], key_fn: F) -> Vec<&[T]> +where + K: PartialEq, + F: Fn(&T) -> K, +{ + if slice.is_empty() { + return Vec::new(); + } + + let mut result = Vec::new(); + let mut start = 0; + + for i in 1..slice.len() { + if key_fn(&slice[i]) != key_fn(&slice[start]) { + result.push(&slice[start..i]); + start = i; + } + } + + // Don't forget the last group + result.push(&slice[start..]); + + result +} + +impl UnorderedRowConverter { + /// Create a new [`UnorderedRowConverter`] with the provided schema + pub fn new(fields: Fields) -> Result { + if !Self::supports_fields(&fields) { + return Err(ArrowError::NotYetImplemented(format!( + "Unordered row format support not yet implemented for: {fields:?}" + ))); + } + + let sort_by_data_type = Self::optimize_field_ordering(fields.as_ref()); + + // Split to 2 vectors + let (indices, sort_by_data_type): (Vec, Vec<&FieldRef>) = sort_by_data_type.into_iter().unzip(); + + // let a = indices.iter().enumarate().map(|(index, original_idx)| (original_idx, index)).collect::>(); + + let reverse_mapping_indices = { + let mut reverse_indices = indices.iter().copied().enumerate().collect::>(); + // Sort by the original index of the column + reverse_indices.sort_by(|(_, original_idx_a), (_, original_idx_b)| original_idx_a.cmp(original_idx_b)); + + reverse_indices.into_iter().map(|(mapped_index, _)| mapped_index).collect::>() + }; + + let sorted_fields = Fields::from_iter(sort_by_data_type.into_iter().map(|x| x.clone())); + + let codecs = sorted_fields.iter().map(Codec::new).collect::>()?; + Ok(Self { + fields: sorted_fields.into(), + indices, + reverse_indices: reverse_mapping_indices, + codecs, + }) + } + + fn optimize_field_ordering(fields: &[FieldRef]) -> Vec<(usize, &FieldRef)> { + let mut sort_by_data_type = fields.iter().enumerate().collect::>(); + sort_by_data_type + .sort_by(|(_, a), (_, b)| { + let a_data_type = a.data_type(); + let b_data_type = b.data_type(); + match (a_data_type.primitive_width(), b_data_type.primitive_width()) { + // Make variable types come last + (Some(_), None) => { + // a has a primitive width, b does not, a comes first + return std::cmp::Ordering::Less + } + (None, Some(_)) => { + // b has a primitive width, a does not, b comes first + return std::cmp::Ordering::Greater + } + _ => {} + } + + // Sort by largest first and if same size sort by same type + let res = a.data_type().primitive_width().cmp(&b.data_type().primitive_width()).reverse(); + + // If both have the same primitive width, sort by data type to group same types together + let res = match res { + std::cmp::Ordering::Equal => a.data_type().cmp(b.data_type()), + _ => res + }; + + // If both have the same data type, sort by nullable to group nullable types together + match res { + std::cmp::Ordering::Equal => a.is_nullable().cmp(&b.is_nullable()), + _ => res + } + }); + + sort_by_data_type + } + + /// Check if the given fields are supported by the row format. + pub fn supports_fields(fields: &Fields) -> bool { + fields + .iter() + .all(|x| Self::supports_datatype(&x.data_type())) + } + + fn supports_datatype(d: &DataType) -> bool { + match d { + _ if !d.is_nested() => true, + DataType::List(f) | DataType::LargeList(f) | DataType::FixedSizeList(f, _) => { + Self::supports_datatype(f.data_type()) + } + DataType::Struct(f) => f.iter().all(|x| Self::supports_datatype(x.data_type())), + DataType::RunEndEncoded(_, values) => Self::supports_datatype(values.data_type()), + DataType::Union(fs, _mode) => fs + .iter() + .all(|(_, f)| Self::supports_datatype(f.data_type())), + _ => false, + } + } + + /// Reorder columns based on the indices + fn reorder_columns(&self, columns: &[ArrayRef]) -> Vec { + self.indices.iter().map(|&i| columns[i].clone()).collect() + } + + /// Reorder columns based on the indices + fn reverse_reorder_columns(&self, columns: Vec) -> Vec { + self.reverse_indices.iter().map(|&i| columns[i].clone()).collect() + } + + /// Convert [`ArrayRef`] columns into [`UnorderedRows`] + /// + /// See [`UnorderedRow`] for information on when [`UnorderedRow`] can be compared + /// + /// See [`Self::convert_rows`] for converting [`UnorderedRows`] back into [`ArrayRef`] + /// + /// # Panics + /// + /// Panics if the schema of `columns` does not match that provided to [`UnorderedRowConverter::new`] + pub fn convert_columns(&self, columns: &[ArrayRef]) -> Result { + let num_rows = columns.first().map(|x| x.len()).unwrap_or(0); + let mut rows = self.empty_rows(num_rows, 0); + self.append(&mut rows, columns)?; + Ok(rows) + } + + /// Convert [`ArrayRef`] columns appending to an existing [`UnorderedRows`] + /// + /// See [`UnorderedRow`] for information on when [`UnorderedRow`] can be compared + /// + /// # Panics + /// + /// Panics if + /// * The schema of `columns` does not match that provided to [`UnorderedRowConverter::new`] + /// * The provided [`UnorderedRows`] were not created by this [`UnorderedRowConverter`] + /// + /// ``` + /// # use std::sync::Arc; + /// # use std::collections::HashSet; + /// # use arrow_array::cast::AsArray; + /// # use arrow_array::StringArray; + /// # use arrow_row::{Row, RowConverter, SortField}; + /// # use arrow_schema::DataType; + /// # + /// let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap(); + /// let a1 = StringArray::from(vec!["hello", "world"]); + /// let a2 = StringArray::from(vec!["a", "a", "hello"]); + /// + /// let mut rows = converter.empty_rows(5, 128); + /// converter.append(&mut rows, &[Arc::new(a1)]).unwrap(); + /// converter.append(&mut rows, &[Arc::new(a2)]).unwrap(); + /// + /// let back = converter.convert_rows(&rows).unwrap(); + /// let values: Vec<_> = back[0].as_string::().iter().map(Option::unwrap).collect(); + /// assert_eq!(&values, &["hello", "world", "a", "a", "hello"]); + /// ``` + pub fn append(&self, rows: &mut UnorderedRows, columns: &[ArrayRef]) -> Result<(), ArrowError> { + // TODO - return this + // assert!( + // Arc::ptr_eq(&rows.config.fields, &self.fields), + // "rows were not produced by this RowConverter" + // ); + + // group columns by same data types + let columns = self.reorder_columns(columns); + let columns = columns.as_slice(); + + if columns.len() != self.fields.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect number of arrays provided to RowConverter, expected {} got {}", + self.fields.len(), + columns.len() + ))); + } + for colum in columns.iter().skip(1) { + if colum.len() != columns[0].len() { + return Err(ArrowError::InvalidArgumentError(format!( + "RowConverter columns must all have the same length, expected {} got {}", + columns[0].len(), + colum.len() + ))); + } + } + + + let encoders = columns + .iter() + .zip(&self.codecs) + .zip(self.fields.iter()) + .map(|((column, codec), field)| { + if !column.data_type().equals_datatype(field.data_type()) { + return Err(ArrowError::InvalidArgumentError(format!( + "RowConverter column schema mismatch, expected {} got {}", + field.data_type(), + column.data_type() + ))); + } + codec.encoder(column.as_ref()) + }) + .collect::, _>>()?; + + let write_offset = rows.num_rows(); + let lengths = row_lengths(columns, &encoders, &self.fields); + let total = lengths.extend_offsets(rows.offsets[write_offset], &mut rows.offsets); + rows.buffer.resize(total, 0); + + + // Encode all nulls separately + { + let nulls = columns + .iter() + .zip(get_fields_should_encode_nulls_for(&self.fields)) + .filter(|(c, should_encode)| *should_encode) + .map(|(c, _)| c.logical_nulls()) + .collect::>(); + let logical_nulls = nulls + .iter() + .map(|n| n.as_ref()) + .collect::>(); + encode_nulls_naive( + &mut rows.buffer, + &mut rows.offsets[write_offset..], + logical_nulls, + columns[0].len() + ); + } + + // grouping by same type + enum ColumnChunk<'a> { + ContinuesSamePrimitiveType { + arrays: &'a [&'a dyn Array], + encoders: Vec>, + }, + ContinuesSamePrimitiveTypeWithNulls { + arrays: &'a [&'a dyn Array], + encoders: Vec>, + }, + SingleColumn { + array: &'a dyn Array, + encoder: Encoder<'a>, + }, + } + + let columns_array = columns.iter().map(|col| col.as_ref()).collect::>(); + let subslices = group_by(&columns_array, |col| (col.null_count() > 0, col.data_type().clone())); + + let mut encoders_iter = encoders.into_iter(); + + let mut chunks: Vec> = vec![]; + + + for slice in subslices { + // If all the same type + if slice[0].data_type().is_primitive() && slice.len() > 1 { + if slice[0].null_count() == 0 { + let encoders = encoders_iter.by_ref().take(slice.len()).collect::>(); + chunks.push(ColumnChunk::ContinuesSamePrimitiveType { + encoders, + arrays: slice, + }); + } else { + let encoders = encoders_iter.by_ref().take(slice.len()).collect::>(); + chunks.push(ColumnChunk::ContinuesSamePrimitiveTypeWithNulls { + encoders, + arrays: slice, + }); + } + } else { + slice.iter().for_each(|&array| { + chunks.push(ColumnChunk::SingleColumn { + array, + encoder: encoders_iter.next().unwrap(), + }); + }); + } + } + + + + for chunk in chunks { + match chunk { + ColumnChunk::ContinuesSamePrimitiveType { + encoders, + arrays, + } => { + let column1 = &arrays[0]; + + fn find_matching_size(rows: &mut UnorderedRows, write_offset: usize, arrays: &[&dyn Array]) + where T: ArrowPrimitiveType, + ::Native: fixed::FixedLengthEncoding, + { + let data = &mut rows.buffer; + let offsets = &mut rows.offsets[write_offset..]; + match arrays.len() { + 0 => {}, + 1 => { + encode_column_fixed::<1, T>( + data, + offsets, + arrays, + ) + } + 2 => encode_column_fixed::<2, T>( + data, + offsets, + arrays, + ), + 3 => encode_column_fixed::<3, T>( + data, + offsets, + arrays, + ), + 4 => encode_column_fixed::<4, T>( + data, + offsets, + arrays, + ), + _ => { + // + let iter = arrays.chunks_exact(4); + let remainder = iter.remainder(); + + iter.for_each(|chunk| { + encode_column_fixed::<4, T>( + data, + offsets, + chunk, + ) + }); + + find_matching_size::(rows, write_offset, remainder); + } + } + } + + macro_rules! decode_primitive_helper { + ($t:ty) => { + find_matching_size::<$t>(rows, write_offset, arrays) + }; + } + + downcast_primitive! { + arrays[0].data_type() => (decode_primitive_helper), + + _ => unreachable!("unsupported data type: {}", arrays[0].data_type()), + } + + } + ColumnChunk::ContinuesSamePrimitiveTypeWithNulls { + encoders, + arrays, + } => { + let column1 = &arrays[0]; + + fn find_matching_size(rows: &mut UnorderedRows, write_offset: usize, arrays: &[&dyn Array]) + where T: ArrowPrimitiveType, + ::Native: fixed::FixedLengthEncoding, + { + let data = &mut rows.buffer; + let offsets = &mut rows.offsets[write_offset..]; + match arrays.len() { + 0 => {}, + 1 => { + encode_column_nulls_fixed::<1, T>( + data, + offsets, + arrays, + ) + } + 2 => encode_column_nulls_fixed::<2, T>( + data, + offsets, + arrays, + ), + 3 => encode_column_nulls_fixed::<3, T>( + data, + offsets, + arrays, + ), + // 4 => encode_column_nulls_fixed::<4, T>( + // data, + // offsets, + // arrays, + // ), + _ => { + // + let iter = arrays.chunks_exact(4); + let remainder = iter.remainder(); + + iter.for_each(|chunk| { + encode_column_nulls_fixed::<4, T>( + data, + offsets, + chunk, + ) + }); + + find_matching_size::(rows, write_offset, remainder); + } + } + } + + macro_rules! decode_primitive_helper { + ($t:ty) => { + find_matching_size::<$t>(rows, write_offset, arrays) + }; + } + + downcast_primitive! { + arrays[0].data_type() => (decode_primitive_helper), + + _ => unreachable!("unsupported data type: {}", arrays[0].data_type()), + } + + } + ColumnChunk::SingleColumn { + array, + encoder, + } => { + // We encode a column at a time to minimise dispatch overheads + encode_column( + &mut rows.buffer, + &mut rows.offsets[write_offset..], + array, + &encoder, + ) + } + } + } + + // if columns.len() == 2 + // && self.fields.len() == 2 + // && self.fields[0].data_type() == self.fields[1].data_type() + // && columns[0].null_count() == 0 + // && columns[1].null_count() == 0 + // && self.fields[0].data_type().is_primitive() + // { + // let column1 = &columns[0]; + // let column2 = &columns[1]; + // + // downcast_primitive_array! { + // column1 => { + // encode_column_double( + // &mut rows.buffer, + // &mut rows.offsets[write_offset..], + // column1, + // column2, + // ); + // } + // _ => unreachable!("unsupported data type: {}", column1.data_type()), + // } + // } + // // else if columns.len() == 4 + // // && self.fields.len() == 4 + // // && self + // // .fields + // // .iter() + // // .all(|item| item.data_type().is_primitive()) + // // && columns.iter().all(|col| col.null_count() == 0) + // // { + // // let column1 = &columns[0]; + // // let column2 = &columns[1]; + // // let column3 = &columns[2]; + // // let column4 = &columns[3]; + // // + // // encode_column_four_primitive( + // // &mut rows.buffer, + // // &mut rows.offsets[write_offset..], + // // column1, + // // column2, + // // column3, + // // column4, + // // ); + // // } + // // else if columns.len() == 4 + // // && self.fields.len() == 4 + // // && self.fields[0].data_type().is_primitive() + // // && self + // // .fields + // // .iter() + // // .all(|item| item.data_type() == self.fields[0].data_type()) + // // && columns.iter().all(|col| col.null_count() == 0) + // // { + // // let column1 = &columns[0]; + // // let column2 = &columns[1]; + // // let column3 = &columns[2]; + // // let column4 = &columns[3]; + // // + // // downcast_primitive_array! { + // // column1 => { + // // encode_column_four( + // // &mut rows.buffer, + // // &mut rows.offsets[write_offset..], + // // column1, + // // column2, + // // column3, + // // column4, + // // ); + // // } + // // _ => unreachable!("unsupported data type: {}", column1.data_type()), + // // } + // // } + // else { + // for ((column, field), encoder) in columns.iter().zip(self.fields.iter()).zip(encoders) { + // // We encode a column at a time to minimise dispatch overheads + // encode_column( + // &mut rows.buffer, + // &mut rows.offsets[write_offset..], + // column.as_ref(), + // &encoder, + // ) + // } + // } + + if cfg!(debug_assertions) { + assert_eq!(*rows.offsets.last().unwrap(), rows.buffer.len()); + rows.offsets + .windows(2) + .for_each(|w| assert!(w[0] <= w[1], "offsets should be monotonic")); + } + + Ok(()) + } + + /// Convert [`UnorderedRows`] columns into [`ArrayRef`] + /// + /// See [`Self::convert_columns`] for converting [`ArrayRef`] into [`UnorderedRows`] + /// + /// # Panics + /// + /// Panics if the rows were not produced by this [`UnorderedRowConverter`] + pub fn convert_rows<'a, I>(&self, rows: I) -> Result, ArrowError> + where + I: IntoIterator>, + { + let mut validate_utf8 = false; + let mut rows: Vec<_> = rows + .into_iter() + .map(|row| { + // TODO - return this + // assert!( + // Arc::ptr_eq(&row.config.fields, &self.fields), + // "rows were not produced by this RowConverter" + // ); + validate_utf8 |= row.config.validate_utf8; + row.data + }) + .collect(); + + // SAFETY + // We have validated that the rows came from this [`RowConverter`] + // and therefore must be valid + let result = unsafe { self.convert_raw(&mut rows, validate_utf8) }?; + + if cfg!(test) { + for (i, row) in rows.iter().enumerate() { + if !row.is_empty() { + return Err(ArrowError::InvalidArgumentError(format!( + "Codecs {codecs:?} did not consume all bytes for row {i}, remaining bytes: {row:?}", + codecs = &self.codecs + ))); + } + } + } + + Ok(result) + } + + /// Returns an empty [`UnorderedRows`] with capacity for `row_capacity` rows with + /// a total length of `data_capacity` + /// + /// This can be used to buffer a selection of [`UnorderedRow`] + /// + /// ``` + /// # use std::sync::Arc; + /// # use std::collections::HashSet; + /// # use arrow_array::cast::AsArray; + /// # use arrow_array::StringArray; + /// # use arrow_row::{Row, RowConverter, SortField}; + /// # use arrow_schema::DataType; + /// # + /// let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap(); + /// let array = StringArray::from(vec!["hello", "world", "a", "a", "hello"]); + /// + /// // Convert to row format and deduplicate + /// let converted = converter.convert_columns(&[Arc::new(array)]).unwrap(); + /// let mut distinct_rows = converter.empty_rows(3, 100); + /// let mut dedup: HashSet = HashSet::with_capacity(3); + /// converted.iter().filter(|row| dedup.insert(*row)).for_each(|row| distinct_rows.push(row)); + /// + /// // Note: we could skip buffering and feed the filtered iterator directly + /// // into convert_rows, this is done for demonstration purposes only + /// let distinct = converter.convert_rows(&distinct_rows).unwrap(); + /// let values: Vec<_> = distinct[0].as_string::().iter().map(Option::unwrap).collect(); + /// assert_eq!(&values, &["hello", "world", "a"]); + /// ``` + pub fn empty_rows(&self, row_capacity: usize, data_capacity: usize) -> UnorderedRows { + let mut offsets = Vec::with_capacity(row_capacity.saturating_add(1)); + offsets.push(0); + + UnorderedRows { + offsets, + buffer: Vec::with_capacity(data_capacity), + config: UnorderedRowConfig { + fields: self.fields.clone(), + validate_utf8: false, + }, + } + } + + /// Create a new [UnorderedRows] instance from the given binary data. + /// + /// ``` + /// # use std::sync::Arc; + /// # use std::collections::HashSet; + /// # use arrow_array::cast::AsArray; + /// # use arrow_array::StringArray; + /// # use arrow_row::{OwnedRow, Row, RowConverter, RowParser, SortField}; + /// # use arrow_schema::DataType; + /// # + /// let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap(); + /// let array = StringArray::from(vec!["hello", "world", "a", "a", "hello"]); + /// let rows = converter.convert_columns(&[Arc::new(array)]).unwrap(); + /// + /// // We can convert rows into binary format and back in batch. + /// let values: Vec = rows.iter().map(|r| r.owned()).collect(); + /// let binary = rows.try_into_binary().expect("known-small array"); + /// let converted = converter.from_binary(binary.clone()); + /// assert!(converted.iter().eq(values.iter().map(|r| r.row()))); + /// ``` + /// + /// # Panics + /// + /// This function expects the passed [BinaryArray] to contain valid row data as produced by this + /// [UnorderedRowConverter]. It will panic if any rows are null. Operations on the returned [UnorderedRows] may + /// panic if the data is malformed. + pub fn from_binary(&self, array: BinaryArray) -> UnorderedRows { + assert_eq!( + array.null_count(), + 0, + "can't construct Rows instance from array with nulls" + ); + let (offsets, values, _) = array.into_parts(); + let offsets = offsets.iter().map(|&i| i.as_usize()).collect(); + // Try zero-copy, if it does not succeed, fall back to copying the values. + let buffer = values.into_vec().unwrap_or_else(|values| values.to_vec()); + UnorderedRows { + buffer, + offsets, + config: UnorderedRowConfig { + fields: self.fields.clone(), + validate_utf8: true, + }, + } + } + + /// Convert raw bytes into [`ArrayRef`] + /// + /// # Safety + /// + /// `rows` must contain valid data for this [`UnorderedRowConverter`] + unsafe fn convert_raw( + &self, + rows: &mut [&[u8]], + validate_utf8: bool, + ) -> Result, ArrowError> { + let null_buffer_for_fields = { + let fields_indices_that_have_nulls = get_fields_should_encode_nulls_for(&self.fields); + let number_of_encoded_nulls = fields_indices_that_have_nulls.filter(|&n| n).count(); + let null_buffers = nulls::decode_packed_nulls_in_rows(rows, number_of_encoded_nulls); + let mut null_buffers = null_buffers.into_iter(); + + get_fields_should_encode_nulls_for(&self.fields).map(|should_encode_nulls| { + if should_encode_nulls { + null_buffers.next().unwrap() + } else { + None + } + }).collect::>() + }; + + + if self.fields.len() == 4 + && self.fields[0].data_type().is_primitive() + && self + .fields + .iter() + .all(|item| item.data_type() == self.fields[0].data_type()) + { + let data_type = self.fields[0].data_type(); + + macro_rules! decode_primitive_helper { + ($t:ty, $rows:ident) => { + decode_column_four::<$t>(&self.fields, $rows, null_buffer_for_fields) + }; + } + + let results = downcast_primitive! { + data_type => (decode_primitive_helper, rows), + + _ => unreachable!("unsupported data type: {data_type}"), + }?; + + let results = self.reverse_reorder_columns(results); + + + Ok(results) + } else { + let results = self.fields + .iter() + .zip(&self.codecs) + .zip(null_buffer_for_fields.into_iter()) + .map(|((field, codec), nulls)| unsafe { decode_column(field, rows, codec, validate_utf8, nulls) }) + .collect::, _>>()?; + + let results = self.reverse_reorder_columns(results); + + Ok(results) + } + } + + /// Returns a [`UnorderedRowParser`] that can be used to parse [`UnorderedRow`] from bytes + pub fn parser(&self) -> UnorderedRowParser { + UnorderedRowParser::new(self.fields.clone()) + } + + /// Returns the size of this instance in bytes + /// + /// Includes the size of `Self`. + pub fn size(&self) -> usize { + std::mem::size_of::() + + self.fields.iter().map(|x| x.size()).sum::() + + self.codecs.capacity() * std::mem::size_of::() + + self.codecs.iter().map(Codec::size).sum::() + } +} + +/// A [`UnorderedRowParser`] can be created from a [`UnorderedRowConverter`] and used to parse bytes to [`UnorderedRow`] +#[derive(Debug)] +pub struct UnorderedRowParser { + config: UnorderedRowConfig, +} + +impl UnorderedRowParser { + fn new(fields: Fields) -> Self { + Self { + config: UnorderedRowConfig { + fields, + validate_utf8: true, + }, + } + } + + /// Creates a [`UnorderedRow`] from the provided `bytes`. + /// + /// `bytes` must be a [`UnorderedRow`] produced by the [`UnorderedRowConverter`] associated with + /// this [`UnorderedRowParser`], otherwise subsequent operations with the produced [`UnorderedRow`] may panic + pub fn parse<'a>(&'a self, bytes: &'a [u8]) -> UnorderedRow<'a> { + UnorderedRow { + data: bytes, + config: &self.config, + } + } +} + +/// The config of a given set of [`UnorderedRow`] +#[derive(Debug, Clone)] +struct UnorderedRowConfig { + /// The schema for these rows + fields: Fields, + /// Whether to run UTF-8 validation when converting to arrow arrays + validate_utf8: bool, +} + +/// A row-oriented representation of arrow data, that is normalized for comparison. +/// +/// See the [module level documentation](self) and [`UnorderedRowConverter`] for more details. +#[derive(Debug)] +pub struct UnorderedRows { + /// Underlying row bytes + buffer: Vec, + /// Row `i` has data `&buffer[offsets[i]..offsets[i+1]]` + offsets: Vec, + /// The config for these rows + config: UnorderedRowConfig, +} + +impl UnorderedRows { + /// Append a [`UnorderedRow`] to this [`UnorderedRows`] + pub fn push(&mut self, row: UnorderedRow<'_>) { + // TODO - returned this + // assert!( + // Arc::ptr_eq(&row.config.fields, &self.config.fields), + // "row was not produced by this RowConverter" + // ); + self.config.validate_utf8 |= row.config.validate_utf8; + self.buffer.extend_from_slice(row.data); + self.offsets.push(self.buffer.len()) + } + + /// Return the length of each row in this [`Rows`] + pub fn lengths(&self) -> impl ExactSizeIterator + '_ { + self.offsets.windows(2).map(|x| x[1] - x[0]) + } + /// Return the length of each row in this [`Rows`] + pub fn lengths_from(&self, data_range: &Range) -> impl ExactSizeIterator + '_ { + self.offsets[data_range.start..].windows(2).map(|x| x[1] - x[0]).take(data_range.len()) + } + + /// Returns the row at index `row` + pub fn row(&self, row: usize) -> UnorderedRow<'_> { + assert!(row + 1 < self.offsets.len()); + unsafe { self.row_unchecked(row) } + } + + /// Returns the row at `index` without bounds checking + /// + /// # Safety + /// Caller must ensure that `index` is less than the number of offsets (#rows + 1) + pub unsafe fn row_unchecked(&self, index: usize) -> UnorderedRow<'_> { + let end = unsafe { self.offsets.get_unchecked(index + 1) }; + let start = unsafe { self.offsets.get_unchecked(index) }; + let data = unsafe { self.buffer.get_unchecked(*start..*end) }; + UnorderedRow { + data, + config: &self.config, + } + } + + /// Get data for rows in start..end + pub(crate) fn data_range(&self, data_range: Range) -> &[u8] { + assert!(data_range.start < self.offsets.len()); + assert!(data_range.end < self.offsets.len()); + // We want to exclude end, so we take the one before it + let end_row = data_range.end - 1; + + { + let end = unsafe { self.offsets.get_unchecked(end_row + 1) }; + let start = unsafe { self.offsets.get_unchecked(data_range.start) }; + let data = unsafe { self.buffer.get_unchecked(*start..*end) }; + + data + } + // + // let start = self.offsets[data_range.start]; + // let end = self.offsets[data_range.end]; + // &self.buffer[start..end] + } + + /// Get the number of bytes the rows will take + pub(crate) fn data_range_len(&self, data_range: &Range) -> usize { + assert!(data_range.start < self.offsets.len()); + assert!(data_range.end < self.offsets.len()); + // We want to exclude end, so we take the one before it + let end_row = data_range.end - 1; + + let end = unsafe { self.offsets.get_unchecked(end_row + 1) }; + let start = unsafe { self.offsets.get_unchecked(data_range.start) }; + + *end - *start + } + + /// Sets the length of this [`UnorderedRows`] to 0 + pub fn clear(&mut self) { + self.offsets.truncate(1); + self.buffer.clear(); + } + + /// Returns the number of [`UnorderedRow`] in this [`UnorderedRows`] + pub fn num_rows(&self) -> usize { + self.offsets.len() - 1 + } + + /// Returns an iterator over the [`UnorderedRow`] in this [`UnorderedRows`] + pub fn iter(&self) -> UnorderedRowsIter<'_> { + self.into_iter() + } + + /// Returns the size of this instance in bytes + /// + /// Includes the size of `Self`. + pub fn size(&self) -> usize { + // Size of fields is accounted for as part of RowConverter + std::mem::size_of::() + + self.buffer.capacity() + + self.offsets.capacity() * std::mem::size_of::() + } + + /// Create a [BinaryArray] from the [UnorderedRows] data without reallocating the + /// underlying bytes. + /// + /// + /// ``` + /// # use std::sync::Arc; + /// # use std::collections::HashSet; + /// # use arrow_array::cast::AsArray; + /// # use arrow_array::StringArray; + /// # use arrow_row::{OwnedRow, Row, RowConverter, RowParser, SortField}; + /// # use arrow_schema::DataType; + /// # + /// let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap(); + /// let array = StringArray::from(vec!["hello", "world", "a", "a", "hello"]); + /// let rows = converter.convert_columns(&[Arc::new(array)]).unwrap(); + /// + /// // We can convert rows into binary format and back. + /// let values: Vec = rows.iter().map(|r| r.owned()).collect(); + /// let binary = rows.try_into_binary().expect("known-small array"); + /// let parser = converter.parser(); + /// let parsed: Vec = + /// binary.iter().flatten().map(|b| parser.parse(b).owned()).collect(); + /// assert_eq!(values, parsed); + /// ``` + /// + /// # Errors + /// + /// This function will return an error if there is more data than can be stored in + /// a [BinaryArray] -- i.e. if the total data size is more than 2GiB. + pub fn try_into_binary(self) -> Result { + if self.buffer.len() > i32::MAX as usize { + return Err(ArrowError::InvalidArgumentError(format!( + "{}-byte rows buffer too long to convert into a i32-indexed BinaryArray", + self.buffer.len() + ))); + } + // We've checked that the buffer length fits in an i32; so all offsets into that buffer should fit as well. + let offsets_scalar = ScalarBuffer::from_iter(self.offsets.into_iter().map(i32::usize_as)); + // SAFETY: offsets buffer is nonempty, monotonically increasing, and all represent valid indexes into buffer. + let array = unsafe { + BinaryArray::new_unchecked( + OffsetBuffer::new_unchecked(offsets_scalar), + Buffer::from_vec(self.buffer), + None, + ) + }; + Ok(array) + } +} + +impl<'a> IntoIterator for &'a UnorderedRows { + type Item = UnorderedRow<'a>; + type IntoIter = UnorderedRowsIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + UnorderedRowsIter { + rows: self, + start: 0, + end: self.num_rows(), + } + } +} + +/// An iterator over [`UnorderedRows`] +#[derive(Debug)] +pub struct UnorderedRowsIter<'a> { + rows: &'a UnorderedRows, + start: usize, + end: usize, +} + +impl<'a> Iterator for UnorderedRowsIter<'a> { + type Item = UnorderedRow<'a>; + + fn next(&mut self) -> Option { + if self.end == self.start { + return None; + } + + // SAFETY: We have checked that `start` is less than `end` + let row = unsafe { self.rows.row_unchecked(self.start) }; + self.start += 1; + Some(row) + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.len(); + (len, Some(len)) + } +} + +impl ExactSizeIterator for UnorderedRowsIter<'_> { + fn len(&self) -> usize { + self.end - self.start + } +} + +impl DoubleEndedIterator for UnorderedRowsIter<'_> { + fn next_back(&mut self) -> Option { + if self.end == self.start { + return None; + } + // Safety: We have checked that `start` is less than `end` + let row = unsafe { self.rows.row_unchecked(self.end) }; + self.end -= 1; + Some(row) + } +} + +/// A comparable representation of a row. +/// +/// See the [module level documentation](self) for more details. +/// +/// Two [`UnorderedRow`] can only be compared if they both belong to [`UnorderedRows`] +/// returned by calls to [`UnorderedRowConverter::convert_columns`] on the same +/// [`UnorderedRowConverter`]. If different [`UnorderedRowConverter`]s are used, any +/// ordering established by comparing the [`UnorderedRow`] is arbitrary. +#[derive(Debug, Copy, Clone)] +pub struct UnorderedRow<'a> { + data: &'a [u8], + config: &'a UnorderedRowConfig, +} + +impl<'a> UnorderedRow<'a> { + /// Create owned version of the row to detach it from the shared [`UnorderedRows`]. + pub fn owned(&self) -> OwnedUnorderedRow { + OwnedUnorderedRow { + data: self.data.into(), + config: self.config.clone(), + } + } + + /// The row's bytes, with the lifetime of the underlying data. + pub fn data(&self) -> &'a [u8] { + self.data + } +} + +// Manually derive these as don't wish to include `fields` + +impl PartialEq for UnorderedRow<'_> { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.data.eq(other.data) + } +} + +impl Eq for UnorderedRow<'_> {} + +impl Hash for UnorderedRow<'_> { + #[inline] + fn hash(&self, state: &mut H) { + self.data.hash(state) + } +} + +impl AsRef<[u8]> for UnorderedRow<'_> { + #[inline] + fn as_ref(&self) -> &[u8] { + self.data + } +} + +/// Owned version of a [`UnorderedRow`] that can be moved/cloned freely. +/// +/// This contains the data for the one specific row (not the entire buffer of all rows). +#[derive(Debug, Clone)] +pub struct OwnedUnorderedRow { + data: Box<[u8]>, + config: UnorderedRowConfig, +} + +impl OwnedUnorderedRow { + /// Get borrowed [`UnorderedRow`] from owned version. + /// + /// This is helpful if you want to compare an [`OwnedUnorderedRow`] with a [`UnorderedRow`]. + pub fn row(&self) -> UnorderedRow<'_> { + UnorderedRow { + data: &self.data, + config: &self.config, + } + } +} + +// Manually derive these as don't wish to include `fields`. Also we just want to use the same `Row` implementations here. + +impl PartialEq for OwnedUnorderedRow { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.row().eq(&other.row()) + } +} + +impl Eq for OwnedUnorderedRow {} + +impl Hash for OwnedUnorderedRow { + #[inline] + fn hash(&self, state: &mut H) { + self.row().hash(state) + } +} + +impl AsRef<[u8]> for OwnedUnorderedRow { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.data + } +} + +/// Returns the null sentinel, negated if `invert` is true +#[inline] +const fn null_sentinel() -> u8 { + 0 +} + +/// Stores the lengths of the rows. Lazily materializes lengths for columns with fixed-size types. +enum LengthTracker { + /// Fixed state: All rows have length `length` + Fixed { length: usize, num_rows: usize }, + /// Variable state: The length of row `i` is `lengths[i] + fixed_length` + Variable { + fixed_length: usize, + lengths: Vec, + }, +} + +impl LengthTracker { + fn new(num_rows: usize) -> Self { + Self::Fixed { + length: 0, + num_rows, + } + } + + /// Adds a column of fixed-length elements, each of size `new_length` to the LengthTracker + fn push_fixed(&mut self, new_length: usize) { + match self { + LengthTracker::Fixed { length, .. } => *length += new_length, + LengthTracker::Variable { fixed_length, .. } => *fixed_length += new_length, + } + } + + /// Adds a column of possibly variable-length elements, element `i` has length `new_lengths.nth(i)` + fn push_variable(&mut self, new_lengths: impl ExactSizeIterator) { + match self { + LengthTracker::Fixed { length, .. } => { + *self = LengthTracker::Variable { + fixed_length: *length, + lengths: new_lengths.collect(), + } + } + LengthTracker::Variable { lengths, .. } => { + assert_eq!(lengths.len(), new_lengths.len()); + lengths + .iter_mut() + .zip(new_lengths) + .for_each(|(length, new_length)| *length += new_length); + } + } + } + + /// Returns the tracked row lengths as a slice + fn materialized(&mut self) -> &mut [usize] { + if let LengthTracker::Fixed { length, num_rows } = *self { + *self = LengthTracker::Variable { + fixed_length: length, + lengths: vec![0; num_rows], + }; + } + + match self { + LengthTracker::Variable { lengths, .. } => lengths, + LengthTracker::Fixed { .. } => unreachable!(), + } + } + + /// Initializes the offsets using the tracked lengths. Returns the sum of the + /// lengths of the rows added. + /// + /// We initialize the offsets shifted down by one row index. + /// + /// As the rows are appended to the offsets will be incremented to match + /// + /// For example, consider the case of 3 rows of length 3, 4, and 6 respectively. + /// The offsets would be initialized to `0, 0, 3, 7` + /// + /// Writing the first row entirely would yield `0, 3, 3, 7` + /// The second, `0, 3, 7, 7` + /// The third, `0, 3, 7, 13` + // + /// This would be the final offsets for reading + // + /// In this way offsets tracks the position during writing whilst eventually serving + fn extend_offsets(&self, initial_offset: usize, offsets: &mut Vec) -> usize { + match self { + LengthTracker::Fixed { length, num_rows } => { + offsets.extend((0..*num_rows).map(|i| initial_offset + i * length)); + + initial_offset + num_rows * length + } + LengthTracker::Variable { + fixed_length, + lengths, + } => { + let mut acc = initial_offset; + + offsets.extend(lengths.iter().map(|length| { + let current = acc; + acc += length + fixed_length; + current + })); + + acc + } + } + } +} + +fn get_fields_should_encode_nulls_for(fields: &Fields) -> impl ExactSizeIterator { + fields + .iter() + .map(|field| should_encode_null_for_field(field)) +} + +fn should_encode_null_for_field(field: &Field) -> bool { + // Only account for nulls for nullable fields + field.is_nullable() && + // Boolean nulls are encoded together + // and NullArray is not encoded at all + !matches!(field.data_type(), + // Boolean encode its own nulls + DataType::Boolean | + // NullArray is not encoded at all + DataType::Null | + // Dictionary encodes its own nulls + DataType::Dictionary(_, _) + ) +} + +/// Computes the length of each encoded [`UnorderedRows`] and returns an empty [`UnorderedRows`] +fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder], fields: &Fields) -> LengthTracker { + use fixed::FixedLengthEncoding; + + let num_rows = cols.first().map(|x| x.len()).unwrap_or(0); + let mut tracker = LengthTracker::new(num_rows); + + // Account for nulls as they are handled separately + tracker.push_fixed(nulls::get_number_of_bytes_for_nulls( + get_fields_should_encode_nulls_for(fields).filter(|should_encode| *should_encode).count() + )); + + for (array, encoder) in cols.iter().zip(encoders) { + match encoder { + Encoder::Stateless => { + downcast_primitive_array! { + array => tracker.push_fixed(fixed::encoded_len(array)), + DataType::Null => {}, + DataType::Boolean => tracker.push_fixed(boolean::FIXED_SIZE), + DataType::Binary => tracker.push_variable( + as_generic_binary_array::(array) + .iter() + .map(|slice| variable::encoded_len(slice)) + ), + DataType::LargeBinary => tracker.push_variable( + as_generic_binary_array::(array) + .iter() + .map(|slice| variable::encoded_len(slice)) + ), + DataType::BinaryView => tracker.push_variable( + array.as_binary_view() + .iter() + .map(|slice| variable::encoded_len(slice)) + ), + DataType::Utf8 => tracker.push_variable( + array.as_string::() + .iter() + .map(|slice| variable::encoded_len(slice.map(|x| x.as_bytes()))) + ), + DataType::LargeUtf8 => tracker.push_variable( + array.as_string::() + .iter() + .map(|slice| variable::encoded_len(slice.map(|x| x.as_bytes()))) + ), + DataType::Utf8View => tracker.push_variable( + array.as_string_view() + .iter() + .map(|slice| variable::encoded_len(slice.map(|x| x.as_bytes()))) + ), + DataType::FixedSizeBinary(len) => { + let len = len.to_usize().unwrap(); + tracker.push_fixed(len) + } + _ => unimplemented!("unsupported data type: {}", array.data_type()), + } + } + Encoder::Dictionary(values, null) => { + downcast_dictionary_array! { + array => { + tracker.push_variable( + array.keys().iter().map(|v| match v { + Some(k) => values.row(k.as_usize()).data.len(), + // TODO - handle nulls + None => null.data.len(), + }) + ) + } + _ => unreachable!(), + } + } + Encoder::Struct(rows, null) => { + let array = as_struct_array(array); + tracker.push_variable((0..array.len()).map(|idx| match array.is_valid(idx) { + true => 1 + rows.row(idx).as_ref().len(), + // TODO - handle nulls + false => 1 + null.data.len(), + })); + } + Encoder::List(rows) => match array.data_type() { + DataType::List(_) => { + list::compute_lengths(tracker.materialized(), rows, as_list_array(array)) + } + DataType::LargeList(_) => { + list::compute_lengths(tracker.materialized(), rows, as_large_list_array(array)) + } + DataType::FixedSizeList(_, _) => compute_lengths_fixed_size_list( + &mut tracker, + rows, + as_fixed_size_list_array(array), + ), + _ => unreachable!(), + }, + Encoder::RunEndEncoded(rows) => match array.data_type() { + DataType::RunEndEncoded(r, _) => match r.data_type() { + DataType::Int16 => run::compute_lengths( + tracker.materialized(), + rows, + array.as_run::(), + ), + DataType::Int32 => run::compute_lengths( + tracker.materialized(), + rows, + array.as_run::(), + ), + DataType::Int64 => run::compute_lengths( + tracker.materialized(), + rows, + array.as_run::(), + ), + _ => unreachable!("Unsupported run end index type: {r:?}"), + }, + _ => unreachable!(), + }, + Encoder::Union { + child_rows, + type_ids, + offsets, + } => { + let union_array = array + .as_any() + .downcast_ref::() + .expect("expected UnionArray"); + + let lengths = (0..union_array.len()).map(|i| { + let type_id = type_ids[i]; + let child_row_i = offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i); + let child_row = child_rows[type_id as usize].row(child_row_i); + + // length: 1 byte type_id + child row bytes + 1 + child_row.as_ref().len() + }); + + tracker.push_variable(lengths); + } + } + } + + tracker +} + +/// Encodes a column to the provided [`UnorderedRows`] incrementing the offsets as it progresses +fn encode_column( + data: &mut [u8], + offsets: &mut [usize], + column: &dyn Array, + encoder: &Encoder<'_>, +) { + match encoder { + Encoder::Stateless => { + downcast_primitive_array! { + column => { + if let Some(nulls) = column.nulls().filter(|n| n.null_count() > 0){ + fixed::encode(data, offsets, column.values(), nulls) + } else { + fixed::encode_not_null(data, offsets, column.values()) + } + } + DataType::Null => {} + DataType::Boolean => { + if let Some(nulls) = column.nulls().filter(|n| n.null_count() > 0){ + boolean::encode_boolean(data, offsets, column.as_boolean().values(), nulls) + } else { + boolean::encode_boolean_not_null(data, offsets, column.as_boolean().values()) + } + } + DataType::Binary => { + variable::encode_generic_byte_array(data, offsets, as_generic_binary_array::(column)) + } + DataType::BinaryView => { + variable::encode(data, offsets, column.as_binary_view().iter()) + } + DataType::LargeBinary => { + variable::encode_generic_byte_array(data, offsets, as_generic_binary_array::(column)) + } + DataType::Utf8 => variable::encode_generic_byte_array( + data, offsets, + column.as_string::(), + ), + DataType::LargeUtf8 => variable::encode_generic_byte_array( + data, offsets, + column.as_string::(), + ), + DataType::Utf8View => variable::encode( + data, offsets, + column.as_string_view().iter().map(|x| x.map(|x| x.as_bytes())), + ), + DataType::FixedSizeBinary(_) => { + let array = column.as_any().downcast_ref().unwrap(); + fixed::encode_fixed_size_binary(data, offsets, array) + } + _ => unimplemented!("unsupported data type: {}", column.data_type()), + } + } + Encoder::Dictionary(values, nulls) => { + downcast_dictionary_array! { + column => encode_dictionary_values(data, offsets, column, values, nulls), + _ => unreachable!() + } + } + Encoder::Struct(rows, null) => { + let array = as_struct_array(column); + let null_sentinel = null_sentinel(); + offsets + .iter_mut() + .skip(1) + .enumerate() + .for_each(|(idx, offset)| { + let (row, sentinel) = match array.is_valid(idx) { + true => (rows.row(idx), 0x01), + false => (*null, null_sentinel), + }; + let end_offset = *offset + 1 + row.as_ref().len(); + data[*offset] = sentinel; + data[*offset + 1..end_offset].copy_from_slice(row.as_ref()); + *offset = end_offset; + }) + } + Encoder::List(rows) => match column.data_type() { + DataType::List(_) => list::encode(data, offsets, rows, as_list_array(column)), + DataType::LargeList(_) => { + list::encode(data, offsets, rows, as_large_list_array(column)) + } + DataType::FixedSizeList(_, _) => { + encode_fixed_size_list(data, offsets, rows, as_fixed_size_list_array(column)) + } + _ => unreachable!(), + }, + Encoder::RunEndEncoded(rows) => match column.data_type() { + DataType::RunEndEncoded(r, _) => match r.data_type() { + DataType::Int16 => run::encode(data, offsets, rows, column.as_run::()), + DataType::Int32 => run::encode(data, offsets, rows, column.as_run::()), + DataType::Int64 => run::encode(data, offsets, rows, column.as_run::()), + _ => unreachable!("Unsupported run end index type: {r:?}"), + }, + _ => unreachable!(), + }, + Encoder::Union { + child_rows, + type_ids, + offsets: offsets_buf, + } => { + offsets + .iter_mut() + .skip(1) + .enumerate() + .for_each(|(i, offset)| { + let type_id = type_ids[i]; + + let child_row_idx = offsets_buf.as_ref().map(|o| o[i] as usize).unwrap_or(i); + let child_row = child_rows[type_id as usize].row(child_row_idx); + let child_bytes = child_row.as_ref(); + + let type_id_byte = type_id as u8; + data[*offset] = type_id_byte; + + let child_start = *offset + 1; + let child_end = child_start + child_bytes.len(); + data[child_start..child_end].copy_from_slice(child_bytes); + + *offset = child_end; + }); + } + } +} + +/// Encodes a column to the provided [`UnorderedRows`] incrementing the offsets as it progresses +fn encode_column_double( + data: &mut [u8], + offsets: &mut [usize], + column1: &PrimitiveArray, + column2: &dyn Array, +) where + ::Native: fixed::FixedLengthEncoding, +{ + let col2 = column2.as_primitive::(); + if let Some(_) = column1 + .nulls() + .filter(|n| n.null_count() > 0) + .or_else(|| col2.nulls()) + .filter(|n| n.null_count() > 0) + { + unreachable!() + } else { + fixed::encode_not_null_double( + data, + offsets, + column1.values().iter().copied(), + col2.values().iter().copied(), + ) + } +} + +/// Encodes a column to the provided [`UnorderedRows`] incrementing the offsets as it progresses +fn encode_column_fixed( + data: &mut [u8], + offsets: &mut [usize], + columns: &[&dyn Array], +) where + ::Native: fixed::FixedLengthEncoding, +{ + for col in columns { + assert_eq!(col.null_count(), 0); + } + if N == 1 { + fixed::encode_not_null(data, offsets, columns[0].as_primitive::().values()); + return; + } + + let columns_arr: [&dyn Array; N] = columns.to_vec().try_into().unwrap(); + let values = columns_arr.map(|col| col.as_primitive::()); + + fixed::encode_not_null_fixed::( + data, + offsets, + values + ) +} +/// Encodes a column to the provided [`UnorderedRows`] incrementing the offsets as it progresses +fn encode_column_nulls_fixed( + data: &mut [u8], + offsets: &mut [usize], + columns: &[&dyn Array], +) where + ::Native: fixed::FixedLengthEncoding, +{ + for col in columns { + assert_ne!(col.null_count(), 0); + } + if N == 1 { + fixed::encode(data, offsets, columns[0].as_primitive::().values(), columns[0].nulls().unwrap()); + return; + } + + let columns_arr: [&dyn Array; N] = columns.to_vec().try_into().unwrap(); + let values = columns_arr.map(|col| col.as_primitive::()); + + fixed::encode_fixed::( + data, + offsets, + values + ) +} +// +// /// Encodes a column to the provided [`UnorderedRows`] incrementing the offsets as it progresses +// fn encode_column_four( +// data: &mut [u8], +// offsets: &mut [usize], +// column1: &PrimitiveArray, +// column2: &dyn Array, +// column3: &dyn Array, +// column4: &dyn Array, +// ) where +// ::Native: fixed::FixedLengthEncoding, +// { +// let col1 = column1; +// let col2 = column2.as_primitive::(); +// let col3 = column3.as_primitive::(); +// let col4 = column4.as_primitive::(); +// if let Some(_) = column1 +// .nulls() +// .filter(|n| n.null_count() > 0) +// .or_else(|| col2.nulls()) +// .filter(|n| n.null_count() > 0) +// { +// unreachable!() +// } else { +// fixed::encode_not_null_four( +// data, +// offsets, +// ( +// 1, +// Box::new(col1.values().iter().copied().map(|v| v.encode().as_ref())), +// ), +// ( +// 1, +// Box::new(col2.values().iter().copied().map(|v| v.encode().as_ref())), +// ), +// ( +// 1, +// Box::new(col3.values().iter().copied().map(|v| v.encode().as_ref())), +// ), +// ( +// 1, +// Box::new(col4.values().iter().copied().map(|v| v.encode().as_ref())), +// ), +// ) +// } +// } + + +/// Encodes a column to the provided [`UnorderedRows`] incrementing the offsets as it progresses +// fn encode_column_four_primitive( +// data: &mut [u8], +// offsets: &mut [usize], +// column1: &dyn Array, +// column2: &dyn Array, +// column3: &dyn Array, +// column4: &dyn Array, +// ) { +// [column1, column2, column3, column4].iter().for_each(|col| { +// assert_eq!(col.null_count(), 0); +// }); +// +// fixed::encode_not_null_four( +// data, +// offsets, +// get_primitive_iterator_with_size(column1), +// get_primitive_iterator_with_size(column2), +// get_primitive_iterator_with_size(column3), +// get_primitive_iterator_with_size(column4), +// ); +// +// +// } + + +fn get_primitive_iterator_with_size_for_primitive_array(array: &dyn Array) -> (usize, &Buffer) where + T: ArrowPrimitiveType, + ::Native: fixed::FixedLengthEncoding, +{ + let iter = array.as_primitive::() + .values().inner(); + + (size_of::<::Encoded>(), iter) +} + +fn get_primitive_iterator_with_size(array: &dyn Array) -> (usize, &Buffer) { + + macro_rules! decode_primitive_helper { + ($t:ty) => { + get_primitive_iterator_with_size_for_primitive_array::<$t>(array) + }; + } + + downcast_primitive! { + array.data_type() => (decode_primitive_helper), + + _ => unreachable!("unsupported data type: {}", array.data_type()), + } +} + +/// Encode dictionary values not preserving the dictionary encoding +pub fn encode_dictionary_values( + data: &mut [u8], + offsets: &mut [usize], + column: &DictionaryArray, + values: &UnorderedRows, + null: &UnorderedRow<'_>, +) { + for (offset, k) in offsets.iter_mut().skip(1).zip(column.keys()) { + let row = match k { + Some(k) => values.row(k.as_usize()).data, + None => null.data, + }; + let end_offset = *offset + row.len(); + data[*offset..end_offset].copy_from_slice(row); + *offset = end_offset; + } +} + +macro_rules! decode_primitive_helper { + ($t:ty, $rows:ident, $data_type:ident, $nulls:ident) => { + Arc::new(decode_primitive::<$t>($rows, $data_type, $nulls)) + }; +} + +/// Decodes a the provided `field` from `rows` +/// +/// # Safety +/// +/// Rows must contain valid data for the provided field +unsafe fn decode_column_four( + fields: &Fields, + rows: &mut [&[u8]], + nulls: Vec> +) -> Result, ArrowError> +where + T::Native: FixedLengthEncoding, +{ + assert_eq!(fields.len(), 4); + assert_eq!(nulls.len(), fields.len()); + + let nulls: [Option; 4] = nulls.try_into().unwrap(); + let arrays = decode_primitive4::( + rows, + [fields[0].data_type().clone(), fields[1].data_type().clone(), fields[2].data_type().clone(), fields[3].data_type().clone()], + nulls, + ); + + Ok(arrays.map(|array| Arc::new(array) as ArrayRef).to_vec()) +} + +/// Decodes a the provided `field` from `rows` +/// +/// # Safety +/// +/// Rows must contain valid data for the provided field +unsafe fn decode_column( + field: &Field, + rows: &mut [&[u8]], + codec: &Codec, + validate_utf8: bool, + nulls: Option, +) -> Result { + let array: ArrayRef = match codec { + Codec::Stateless => { + let data_type = field.data_type().clone(); + downcast_primitive! { + data_type => (decode_primitive_helper, rows, data_type, nulls), + DataType::Null => Arc::new(NullArray::new(rows.len())), + DataType::Boolean => { + assert_eq!(nulls, None, "Boolean columns encode its own nulls"); + Arc::new(boolean::decode_bool(rows)) + } + DataType::Binary => Arc::new(decode_binary::(rows, nulls)), + DataType::LargeBinary => Arc::new(decode_binary::(rows, nulls)), + DataType::BinaryView => Arc::new(decode_binary_view(rows, nulls)), + DataType::FixedSizeBinary(size) => Arc::new(decode_fixed_size_binary(rows, size, nulls)), + DataType::Utf8 => Arc::new(unsafe{ decode_string::(rows, validate_utf8, nulls) }), + DataType::LargeUtf8 => Arc::new(unsafe { decode_string::(rows, validate_utf8, nulls) }), + DataType::Utf8View => Arc::new(unsafe { decode_string_view(rows, validate_utf8, nulls) }), + _ => return Err(ArrowError::NotYetImplemented(format!("unsupported data type: {data_type}" ))) + } + } + Codec::Dictionary(converter, _) => { + assert_eq!(nulls, None, "Dictionary columns encode its own nulls"); + + let cols = unsafe { converter.convert_raw(rows, validate_utf8) }?; + cols.into_iter().next().unwrap() + } + Codec::Struct(converter, _) => { + let null_count = nulls.as_ref().map_or(0, |n| n.null_count()); + rows.iter_mut().for_each(|row| *row = &row[1..]); + let children = unsafe { converter.convert_raw(rows, validate_utf8) }?; + + let child_data: Vec = children.iter().map(|c| c.to_data()).collect(); + // Since RowConverter flattens certain data types (i.e. Dictionary), + // we need to use updated data type instead of original field + let corrected_fields: Vec = match field.data_type() { + DataType::Struct(struct_fields) => struct_fields + .iter() + .zip(child_data.iter()) + .map(|(orig_field, child_array)| { + orig_field + .as_ref() + .clone() + .with_data_type(child_array.data_type().clone()) + }) + .collect(), + _ => unreachable!("Only Struct types should be corrected here"), + }; + let corrected_struct_type = DataType::Struct(corrected_fields.into()); + let builder = ArrayDataBuilder::new(corrected_struct_type) + .len(rows.len()) + .child_data(child_data) + .nulls(nulls) + .null_count(null_count); + + Arc::new(StructArray::from(unsafe { builder.build_unchecked() })) + } + Codec::List(converter) => match field.data_type() { + DataType::List(_) => { + Arc::new(unsafe { list::decode::(converter, rows, field, validate_utf8, nulls) }?) + } + DataType::LargeList(_) => { + Arc::new(unsafe { list::decode::(converter, rows, field, validate_utf8, nulls) }?) + } + DataType::FixedSizeList(_, value_length) => Arc::new(unsafe { + list::decode_fixed_size_list( + converter, + rows, + field, + validate_utf8, + value_length.as_usize(), + nulls, + ) + }?), + _ => unreachable!(), + }, + Codec::RunEndEncoded(converter) => match field.data_type() { + DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() { + DataType::Int16 => { + Arc::new(unsafe { run::decode::(converter, rows, validate_utf8) }?) + } + DataType::Int32 => { + Arc::new(unsafe { run::decode::(converter, rows, validate_utf8) }?) + } + DataType::Int64 => { + Arc::new(unsafe { run::decode::(converter, rows, validate_utf8) }?) + } + _ => unreachable!(), + }, + _ => unreachable!(), + }, + Codec::Union(converters, null_rows) => { + let len = rows.len(); + + let DataType::Union(union_fields, mode) = field.data_type() else { + unreachable!() + }; + + let mut type_ids = Vec::with_capacity(len); + let mut rows_by_field: Vec> = vec![Vec::new(); converters.len()]; + + for (idx, row) in rows.iter_mut().enumerate() { + let type_id_byte = { + let id = row[0]; + id + }; + + let type_id = type_id_byte as i8; + type_ids.push(type_id); + + let field_idx = type_id as usize; + + let child_row = &row[1..]; + rows_by_field[field_idx].push((idx, child_row)); + + *row = &row[row.len()..]; + } + + let mut child_arrays: Vec = Vec::with_capacity(converters.len()); + + let mut offsets = (*mode == UnionMode::Dense).then(|| Vec::with_capacity(len)); + + for (field_idx, converter) in converters.iter().enumerate() { + let field_rows = &rows_by_field[field_idx]; + + match &mode { + UnionMode::Dense => { + if field_rows.is_empty() { + let (_, field) = union_fields.iter().nth(field_idx).unwrap(); + child_arrays.push(arrow_array::new_empty_array(field.data_type())); + continue; + } + + let mut child_data = field_rows + .iter() + .map(|(_, bytes)| *bytes) + .collect::>(); + + let child_array = + unsafe { converter.convert_raw(&mut child_data, validate_utf8) }?; + + child_arrays.push(child_array.into_iter().next().unwrap()); + } + UnionMode::Sparse => { + let mut sparse_data: Vec<&[u8]> = Vec::with_capacity(len); + let mut field_row_iter = field_rows.iter().peekable(); + let null_row_bytes: &[u8] = &null_rows[field_idx].data; + + for idx in 0..len { + if let Some((next_idx, bytes)) = field_row_iter.peek() { + if *next_idx == idx { + sparse_data.push(*bytes); + + field_row_iter.next(); + continue; + } + } + sparse_data.push(null_row_bytes); + } + + let child_array = + unsafe { converter.convert_raw(&mut sparse_data, validate_utf8) }?; + child_arrays.push(child_array.into_iter().next().unwrap()); + } + } + } + + // build offsets for dense unions + if let Some(ref mut offsets_vec) = offsets { + let mut count = vec![0i32; converters.len()]; + for type_id in &type_ids { + let field_idx = *type_id as usize; + offsets_vec.push(count[field_idx]); + + count[field_idx] += 1; + } + } + + let type_ids_buffer = ScalarBuffer::from(type_ids); + let offsets_buffer = offsets.map(ScalarBuffer::from); + + let union_array = UnionArray::try_new( + union_fields.clone(), + type_ids_buffer, + offsets_buffer, + child_arrays, + )?; + + // note: union arrays don't support physical null buffers + // nulls are represented logically though child arrays + Arc::new(union_array) + } + }; + Ok(array) +} + +#[cfg(test)] +mod tests { + use arrow_array::builder::*; + use arrow_array::types::*; + use arrow_array::*; + use arrow_buffer::{Buffer, OffsetBuffer}; + use arrow_buffer::{NullBuffer, i256}; + use arrow_cast::display::{ArrayFormatter, FormatOptions}; + use arrow_ord::sort::{LexicographicalComparator, SortColumn}; + use rand::distr::uniform::SampleUniform; + use rand::distr::{Distribution, StandardUniform}; + use rand::rngs::StdRng; + use rand::{Rng, RngCore, SeedableRng, rng}; + use std::cmp::Ordering; + + use super::*; + + #[test] + fn test_fixed_width() { + let cols = [ + Arc::new(Int16Array::from_iter([ + Some(1), + Some(2), + None, + Some(-5), + Some(2), + Some(2), + Some(0), + ])) as ArrayRef, + Arc::new(Float32Array::from_iter([ + Some(1.3), + Some(2.5), + None, + Some(4.), + Some(0.1), + Some(-4.), + Some(-0.), + ])) as ArrayRef, + ]; + + let converter = UnorderedRowConverter::new( + vec![ + Field::new("col_1", DataType::Int16, true), + Field::new("col_2", DataType::Float32, true), + ] + .into(), + ) + .unwrap(); + let rows = converter.convert_columns(&cols).unwrap(); + + // assert_eq!(rows.offsets, &[0, 8, 16, 24, 32, 40, 48, 56]); + // assert_eq!( + // rows.buffer, + // &[ + // 1, 128, 1, // + // 1, 191, 166, 102, 102, // + // 1, 128, 2, // + // 1, 192, 32, 0, 0, // + // 0, 0, 0, // + // 0, 0, 0, 0, 0, // + // 1, 127, 251, // + // 1, 192, 128, 0, 0, // + // 1, 128, 2, // + // 1, 189, 204, 204, 205, // + // 1, 128, 2, // + // 1, 63, 127, 255, 255, // + // 1, 128, 0, // + // 1, 127, 255, 255, 255 // + // ] + // ); + + // assert!(rows.row(3) < rows.row(6)); + // assert!(rows.row(0) < rows.row(1)); + // assert!(rows.row(3) < rows.row(0)); + // assert!(rows.row(4) < rows.row(1)); + // assert!(rows.row(5) < rows.row(4)); + + let back = converter.convert_rows(&rows).unwrap(); + for (expected, actual) in cols.iter().zip(&back) { + assert_eq!(expected, actual); + } + } + + #[test] + fn test_decimal32() { + let converter = UnorderedRowConverter::new( + vec![Field::new( + "col_1", + DataType::Decimal32(DECIMAL32_MAX_PRECISION, 7), + true, + )] + .into(), + ) + .unwrap(); + let col = Arc::new( + Decimal32Array::from_iter([ + None, + Some(i32::MIN), + Some(-13), + Some(46_i32), + Some(5456_i32), + Some(i32::MAX), + ]) + .with_precision_and_scale(9, 7) + .unwrap(), + ) as ArrayRef; + + let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); + // for i in 0..rows.num_rows() - 1 { + // assert!(rows.row(i) < rows.row(i + 1)); + // } + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + assert_eq!(col.as_ref(), back[0].as_ref()) + } + + #[test] + fn test_decimal64() { + let converter = UnorderedRowConverter::new( + vec![Field::new( + "col_1", + DataType::Decimal64(DECIMAL64_MAX_PRECISION, 7), + true, + )] + .into(), + ) + .unwrap(); + let col = Arc::new( + Decimal64Array::from_iter([ + None, + Some(i64::MIN), + Some(-13), + Some(46_i64), + Some(5456_i64), + Some(i64::MAX), + ]) + .with_precision_and_scale(18, 7) + .unwrap(), + ) as ArrayRef; + + let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); + // for i in 0..rows.num_rows() - 1 { + // assert!(rows.row(i) < rows.row(i + 1)); + // } + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + assert_eq!(col.as_ref(), back[0].as_ref()) + } + + #[test] + fn test_decimal128() { + let converter = UnorderedRowConverter::new( + vec![Field::new( + "col_1", + DataType::Decimal128(DECIMAL128_MAX_PRECISION, 7), + true, + )] + .into(), + ) + .unwrap(); + let col = Arc::new( + Decimal128Array::from_iter([ + None, + Some(i128::MIN), + Some(-13), + Some(46_i128), + Some(5456_i128), + Some(i128::MAX), + ]) + .with_precision_and_scale(38, 7) + .unwrap(), + ) as ArrayRef; + + let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); + // for i in 0..rows.num_rows() - 1 { + // assert!(rows.row(i) < rows.row(i + 1)); + // } + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + assert_eq!(col.as_ref(), back[0].as_ref()) + } + + #[test] + fn test_decimal256() { + let converter = UnorderedRowConverter::new( + vec![Field::new( + "col_1", + DataType::Decimal256(DECIMAL256_MAX_PRECISION, 7), + true, + )] + .into(), + ) + .unwrap(); + let col = Arc::new( + Decimal256Array::from_iter([ + None, + Some(i256::MIN), + Some(i256::from_parts(0, -1)), + Some(i256::from_parts(u128::MAX, -1)), + Some(i256::from_parts(u128::MAX, 0)), + Some(i256::from_parts(0, 46_i128)), + Some(i256::from_parts(5, 46_i128)), + Some(i256::MAX), + ]) + .with_precision_and_scale(DECIMAL256_MAX_PRECISION, 7) + .unwrap(), + ) as ArrayRef; + + let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); + // for i in 0..rows.num_rows() - 1 { + // assert!(rows.row(i) < rows.row(i + 1)); + // } + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + assert_eq!(col.as_ref(), back[0].as_ref()) + } + + #[test] + fn test_bool() { + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Boolean, true)].into()) + .unwrap(); + + let col = Arc::new(BooleanArray::from_iter([None, Some(false), Some(true)])) as ArrayRef; + + let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); + // assert!(rows.row(2) > rows.row(1)); + // assert!(rows.row(2) > rows.row(0)); + // assert!(rows.row(1) > rows.row(0)); + + let cols = converter.convert_rows(&rows).unwrap(); + assert_eq!(&cols[0], &col); + + let converter = UnorderedRowConverter::new( + vec![Field::new( + "col_1", + ( + DataType::Boolean + // SortOptions::default().desc().with_nulls_first(false), + ), + true, + )] + .into(), + ) + .unwrap(); + + let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); + // assert!(rows.row(2) < rows.row(1)); + // assert!(rows.row(2) < rows.row(0)); + // assert!(rows.row(1) < rows.row(0)); + let cols = converter.convert_rows(&rows).unwrap(); + assert_eq!(&cols[0], &col); + } + + #[test] + fn test_timezone() { + let a = + TimestampNanosecondArray::from(vec![1, 2, 3, 4, 5]).with_timezone("+01:00".to_string()); + let d = a.data_type().clone(); + + let converter = UnorderedRowConverter::new( + vec![Field::new("col_1", a.data_type().clone(), true)].into(), + ) + .unwrap(); + let rows = converter.convert_columns(&[Arc::new(a) as _]).unwrap(); + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + assert_eq!(back[0].data_type(), &d); + + // Test dictionary + let mut a = PrimitiveDictionaryBuilder::::new(); + a.append(34).unwrap(); + a.append_null(); + a.append(345).unwrap(); + + // Construct dictionary with a timezone + let dict = a.finish(); + let values = TimestampNanosecondArray::from(dict.values().to_data()); + let dict_with_tz = dict.with_values(Arc::new(values.with_timezone("+02:00"))); + let v = DataType::Timestamp(TimeUnit::Nanosecond, Some("+02:00".into())); + let d = DataType::Dictionary(Box::new(DataType::Int32), Box::new(v.clone())); + + assert_eq!(dict_with_tz.data_type(), &d); + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", d.clone(), true)].into()).unwrap(); + let rows = converter + .convert_columns(&[Arc::new(dict_with_tz) as _]) + .unwrap(); + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + assert_eq!(back[0].data_type(), &v); + } + + #[test] + fn test_null_encoding() { + let col = Arc::new(NullArray::new(10)); + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Null, true)].into()) + .unwrap(); + let rows = converter.convert_columns(&[col]).unwrap(); + assert_eq!(rows.num_rows(), 10); + assert_eq!(rows.row(1).data.len(), 0); + } + + #[test] + fn test_variable_width() { + let col = Arc::new(StringArray::from_iter([ + Some("hello"), + Some("he"), + None, + Some("foo"), + Some(""), + ])) as ArrayRef; + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Utf8, true)].into()) + .unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); + + // assert!(rows.row(1) < rows.row(0)); + // assert!(rows.row(2) < rows.row(4)); + // assert!(rows.row(3) < rows.row(0)); + // assert!(rows.row(3) < rows.row(1)); + + let cols = converter.convert_rows(&rows).unwrap(); + assert_eq!(&cols[0], &col); + + let col = Arc::new(BinaryArray::from_iter([ + None, + Some(vec![0_u8; 0]), + Some(vec![0_u8; 6]), + Some(vec![0_u8; variable::MINI_BLOCK_SIZE]), + Some(vec![0_u8; variable::MINI_BLOCK_SIZE + 1]), + Some(vec![0_u8; variable::BLOCK_SIZE]), + Some(vec![0_u8; variable::BLOCK_SIZE + 1]), + Some(vec![1_u8; 6]), + Some(vec![1_u8; variable::MINI_BLOCK_SIZE]), + Some(vec![1_u8; variable::MINI_BLOCK_SIZE + 1]), + Some(vec![1_u8; variable::BLOCK_SIZE]), + Some(vec![1_u8; variable::BLOCK_SIZE + 1]), + Some(vec![0xFF_u8; 6]), + Some(vec![0xFF_u8; variable::MINI_BLOCK_SIZE]), + Some(vec![0xFF_u8; variable::MINI_BLOCK_SIZE + 1]), + Some(vec![0xFF_u8; variable::BLOCK_SIZE]), + Some(vec![0xFF_u8; variable::BLOCK_SIZE + 1]), + ])) as ArrayRef; + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Binary, true)].into()) + .unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); + // + // for i in 0..rows.num_rows() { + // for j in i + 1..rows.num_rows() { + // assert!( + // rows.row(i) < rows.row(j), + // "{} < {} - {:?} < {:?}", + // i, + // j, + // rows.row(i), + // rows.row(j) + // ); + // } + // } + + let cols = converter.convert_rows(&rows).unwrap(); + assert_eq!(&cols[0], &col); + + let converter = UnorderedRowConverter::new( + vec![Field::new( + "col_1", + DataType::Binary, // SortOptions::default().desc().with_nulls_first(false), + true, + )] + .into(), + ) + .unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap(); + + // for i in 0..rows.num_rows() { + // for j in i + 1..rows.num_rows() { + // assert!( + // rows.row(i) > rows.row(j), + // "{} > {} - {:?} > {:?}", + // i, + // j, + // rows.row(i), + // rows.row(j) + // ); + // } + // } + + let cols = converter.convert_rows(&rows).unwrap(); + assert_eq!(&cols[0], &col); + } + + /// If `exact` is false performs a logical comparison between a and dictionary-encoded b + fn dictionary_eq(a: &dyn Array, b: &dyn Array) { + match b.data_type() { + DataType::Dictionary(_, v) => { + assert_eq!(a.data_type(), v.as_ref()); + let b = arrow_cast::cast(b, v).unwrap(); + assert_eq!(a, b.as_ref()) + } + _ => assert_eq!(a, b), + } + } + + #[test] + fn test_string_dictionary() { + let a = Arc::new(DictionaryArray::::from_iter([ + Some("foo"), + Some("hello"), + Some("he"), + None, + Some("hello"), + Some(""), + Some("hello"), + Some("hello"), + ])) as ArrayRef; + + let field = Field::new("col_1", a.data_type().clone(), true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows_a = converter.convert_columns(&[Arc::clone(&a)]).unwrap(); + + // assert!(rows_a.row(3) < rows_a.row(5)); + // assert!(rows_a.row(2) < rows_a.row(1)); + // assert!(rows_a.row(0) < rows_a.row(1)); + // assert!(rows_a.row(3) < rows_a.row(0)); + + assert_eq!(rows_a.row(1), rows_a.row(4)); + assert_eq!(rows_a.row(1), rows_a.row(6)); + assert_eq!(rows_a.row(1), rows_a.row(7)); + + let cols = converter.convert_rows(&rows_a).unwrap(); + dictionary_eq(&cols[0], &a); + + let b = Arc::new(DictionaryArray::::from_iter([ + Some("hello"), + None, + Some("cupcakes"), + ])) as ArrayRef; + + let rows_b = converter.convert_columns(&[Arc::clone(&b)]).unwrap(); + assert_eq!(rows_a.row(1), rows_b.row(0)); + assert_eq!(rows_a.row(3), rows_b.row(1)); + // assert!(rows_b.row(2) < rows_a.row(0)); + + let cols = converter.convert_rows(&rows_b).unwrap(); + dictionary_eq(&cols[0], &b); + + let converter = UnorderedRowConverter::new( + vec![Field::new( + "col_1", + a.data_type().clone(), + true, + // SortOptions::default().desc().with_nulls_first(false), + )] + .into(), + ) + .unwrap(); + + let rows_c = converter.convert_columns(&[Arc::clone(&a)]).unwrap(); + // assert!(rows_c.row(3) > rows_c.row(5)); + // assert!(rows_c.row(2) > rows_c.row(1)); + // assert!(rows_c.row(0) > rows_c.row(1)); + // assert!(rows_c.row(3) > rows_c.row(0)); + + let cols = converter.convert_rows(&rows_c).unwrap(); + dictionary_eq(&cols[0], &a); + + let converter = UnorderedRowConverter::new( + vec![Field::new( + "col_1", + a.data_type().clone(), + true, + // SortOptions::default().desc().with_nulls_first(true), + )] + .into(), + ) + .unwrap(); + + let rows_c = converter.convert_columns(&[Arc::clone(&a)]).unwrap(); + // assert!(rows_c.row(3) < rows_c.row(5)); + // assert!(rows_c.row(2) > rows_c.row(1)); + // assert!(rows_c.row(0) > rows_c.row(1)); + // assert!(rows_c.row(3) < rows_c.row(0)); + + let cols = converter.convert_rows(&rows_c).unwrap(); + dictionary_eq(&cols[0], &a); + } + + #[test] + fn test_struct() { + // Test basic + let a = Arc::new(Int32Array::from(vec![1, 1, 2, 2])) as ArrayRef; + let a_f = Arc::new(Field::new("int", DataType::Int32, false)); + let u = Arc::new(StringArray::from(vec!["a", "b", "c", "d"])) as ArrayRef; + let u_f = Arc::new(Field::new("s", DataType::Utf8, false)); + let s1 = Arc::new(StructArray::from(vec![(a_f, a), (u_f, u)])) as ArrayRef; + + let sort_fields = vec![Field::new("col_1", s1.data_type().clone(), true)].into(); + let converter = UnorderedRowConverter::new(sort_fields).unwrap(); + let r1 = converter.convert_columns(&[Arc::clone(&s1)]).unwrap(); + + // for (a, b) in r1.iter().zip(r1.iter().skip(1)) { + // assert!(a < b); + // } + + let back = converter.convert_rows(&r1).unwrap(); + assert_eq!(back.len(), 1); + assert_eq!(&back[0], &s1); + + // Test struct nullability + let data = s1 + .to_data() + .into_builder() + .null_bit_buffer(Some(Buffer::from_slice_ref([0b00001010]))) + .null_count(2) + .build() + .unwrap(); + + let s2 = Arc::new(StructArray::from(data)) as ArrayRef; + let r2 = converter.convert_columns(&[Arc::clone(&s2)]).unwrap(); + assert_eq!(r2.row(0), r2.row(2)); // Nulls equal + // assert!(r2.row(0) < r2.row(1)); // Nulls first + assert_ne!(r1.row(0), r2.row(0)); // Value does not equal null + assert_eq!(r1.row(1), r2.row(1)); // Values equal + + let back = converter.convert_rows(&r2).unwrap(); + assert_eq!(back.len(), 1); + assert_eq!(&back[0], &s2); + + back[0].to_data().validate_full().unwrap(); + } + + #[test] + fn test_dictionary_in_struct() { + let builder = StringDictionaryBuilder::::new(); + let mut struct_builder = StructBuilder::new( + vec![Field::new_dictionary( + "foo", + DataType::Int32, + DataType::Utf8, + true, + )], + vec![Box::new(builder)], + ); + + let dict_builder = struct_builder + .field_builder::>(0) + .unwrap(); + + // Flattened: ["a", null, "a", "b"] + dict_builder.append_value("a"); + dict_builder.append_null(); + dict_builder.append_value("a"); + dict_builder.append_value("b"); + + for _ in 0..4 { + struct_builder.append(true); + } + + let s = Arc::new(struct_builder.finish()) as ArrayRef; + let sort_fields = vec![Field::new("col_1", s.data_type().clone(), true)].into(); + let converter = UnorderedRowConverter::new(sort_fields).unwrap(); + let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap(); + + let back = converter.convert_rows(&r).unwrap(); + let [s2] = back.try_into().unwrap(); + + // RowConverter flattens Dictionary + // s.ty = Struct("foo": Dictionary(Int32, Utf8)), s2.ty = Struct("foo": Utf8) + assert_ne!(&s.data_type(), &s2.data_type()); + s2.to_data().validate_full().unwrap(); + + // Check if the logical data remains the same + // Keys: [0, null, 0, 1] + // Values: ["a", "b"] + let s1_struct = s.as_struct(); + let s1_0 = s1_struct.column(0); + let s1_idx_0 = s1_0.as_dictionary::(); + let keys = s1_idx_0.keys(); + let values = s1_idx_0.values().as_string::(); + // Flattened: ["a", null, "a", "b"] + let s2_struct = s2.as_struct(); + let s2_0 = s2_struct.column(0); + let s2_idx_0 = s2_0.as_string::(); + + for i in 0..keys.len() { + if keys.is_null(i) { + assert!(s2_idx_0.is_null(i)); + } else { + let dict_index = keys.value(i) as usize; + assert_eq!(values.value(dict_index), s2_idx_0.value(i)); + } + } + } + + #[test] + fn test_dictionary_in_struct_empty() { + let ty = DataType::Struct( + vec![Field::new_dictionary( + "foo", + DataType::Int32, + DataType::Int32, + false, + )] + .into(), + ); + let s = arrow_array::new_empty_array(&ty); + + let sort_fields = vec![Field::new("col_1", s.data_type().clone(), true)].into(); + let converter = UnorderedRowConverter::new(sort_fields).unwrap(); + let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap(); + + let back = converter.convert_rows(&r).unwrap(); + let [s2] = back.try_into().unwrap(); + + // RowConverter flattens Dictionary + // s.ty = Struct("foo": Dictionary(Int32, Int32)), s2.ty = Struct("foo": Int32) + assert_ne!(&s.data_type(), &s2.data_type()); + s2.to_data().validate_full().unwrap(); + assert_eq!(s.len(), 0); + assert_eq!(s2.len(), 0); + } + + #[test] + fn test_list_of_string_dictionary() { + let mut builder = ListBuilder::>::default(); + // List[0] = ["a", "b", "zero", null, "c", "b", "d" (dict)] + builder.values().append("a").unwrap(); + builder.values().append("b").unwrap(); + builder.values().append("zero").unwrap(); + builder.values().append_null(); + builder.values().append("c").unwrap(); + builder.values().append("b").unwrap(); + builder.values().append("d").unwrap(); + builder.append(true); + // List[1] = null + builder.append(false); + // List[2] = ["e", "zero", "a" (dict)] + builder.values().append("e").unwrap(); + builder.values().append("zero").unwrap(); + builder.values().append("a").unwrap(); + builder.append(true); + + let a = Arc::new(builder.finish()) as ArrayRef; + let data_type = a.data_type().clone(); + + let field = Field::new("col_1", data_type.clone(), true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&a)]).unwrap(); + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + let [a2] = back.try_into().unwrap(); + + // RowConverter flattens Dictionary + // a.ty: List(Dictionary(Int32, Utf8)), a2.ty: List(Utf8) + assert_ne!(&a.data_type(), &a2.data_type()); + + a2.to_data().validate_full().unwrap(); + + let a2_list = a2.as_list::(); + let a1_list = a.as_list::(); + + // Check if the logical data remains the same + // List[0] = ["a", "b", "zero", null, "c", "b", "d" (dict)] + let a1_0 = a1_list.value(0); + let a1_idx_0 = a1_0.as_dictionary::(); + let keys = a1_idx_0.keys(); + let values = a1_idx_0.values().as_string::(); + let a2_0 = a2_list.value(0); + let a2_idx_0 = a2_0.as_string::(); + + for i in 0..keys.len() { + if keys.is_null(i) { + assert!(a2_idx_0.is_null(i)); + } else { + let dict_index = keys.value(i) as usize; + assert_eq!(values.value(dict_index), a2_idx_0.value(i)); + } + } + + // List[1] = null + assert!(a1_list.is_null(1)); + assert!(a2_list.is_null(1)); + + // List[2] = ["e", "zero", "a" (dict)] + let a1_2 = a1_list.value(2); + let a1_idx_2 = a1_2.as_dictionary::(); + let keys = a1_idx_2.keys(); + let values = a1_idx_2.values().as_string::(); + let a2_2 = a2_list.value(2); + let a2_idx_2 = a2_2.as_string::(); + + for i in 0..keys.len() { + if keys.is_null(i) { + assert!(a2_idx_2.is_null(i)); + } else { + let dict_index = keys.value(i) as usize; + assert_eq!(values.value(dict_index), a2_idx_2.value(i)); + } + } + } + + #[test] + fn test_primitive_dictionary() { + let mut builder = PrimitiveDictionaryBuilder::::new(); + builder.append(2).unwrap(); + builder.append(3).unwrap(); + builder.append(0).unwrap(); + builder.append_null(); + builder.append(5).unwrap(); + builder.append(3).unwrap(); + builder.append(-1).unwrap(); + + let a = builder.finish(); + let data_type = a.data_type().clone(); + let columns = [Arc::new(a) as ArrayRef]; + + let field = Field::new("col_1", data_type.clone(), true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&columns).unwrap(); + // assert!(rows.row(0) < rows.row(1)); + // assert!(rows.row(2) < rows.row(0)); + // assert!(rows.row(3) < rows.row(2)); + // assert!(rows.row(6) < rows.row(2)); + // assert!(rows.row(3) < rows.row(6)); + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + } + + #[test] + fn test_dictionary_nulls() { + let values = Int32Array::from_iter([Some(1), Some(-1), None, Some(4), None]).into_data(); + let keys = + Int32Array::from_iter([Some(0), Some(0), Some(1), Some(2), Some(4), None]).into_data(); + + let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32)); + let data = keys + .into_builder() + .data_type(data_type.clone()) + .child_data(vec![values]) + .build() + .unwrap(); + + let columns = [Arc::new(DictionaryArray::::from(data)) as ArrayRef]; + let field = Field::new("col_1", data_type.clone(), true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&columns).unwrap(); + + assert_eq!(rows.row(0), rows.row(1)); + assert_eq!(rows.row(3), rows.row(4)); + assert_eq!(rows.row(4), rows.row(5)); + // assert!(rows.row(3) < rows.row(0)); + } + + #[test] + fn test_from_binary_shared_buffer() { + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Binary, true)].into()) + .unwrap(); + let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _; + let rows = converter.convert_columns(&[array]).unwrap(); + let binary_rows = rows.try_into_binary().expect("known-small rows"); + let _binary_rows_shared_buffer = binary_rows.clone(); + + let parsed = converter.from_binary(binary_rows); + + converter.convert_rows(parsed.iter()).unwrap(); + } + + #[test] + #[should_panic(expected = "Encountered non UTF-8 data")] + fn test_invalid_utf8() { + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Binary, true)].into()) + .unwrap(); + let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _; + let rows = converter.convert_columns(&[array]).unwrap(); + let binary_row = rows.row(0); + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Utf8, true)].into()) + .unwrap(); + let parser = converter.parser(); + let utf8_row = parser.parse(binary_row.as_ref()); + + converter.convert_rows(std::iter::once(utf8_row)).unwrap(); + } + + #[test] + #[should_panic(expected = "Encountered non UTF-8 data")] + fn test_invalid_utf8_array() { + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Binary, true)].into()) + .unwrap(); + let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _; + let rows = converter.convert_columns(&[array]).unwrap(); + let binary_rows = rows.try_into_binary().expect("known-small rows"); + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Utf8, true)].into()) + .unwrap(); + let parsed = converter.from_binary(binary_rows); + + converter.convert_rows(parsed.iter()).unwrap(); + } + + #[test] + #[should_panic(expected = "index out of bounds")] + fn test_invalid_empty() { + let binary_row: &[u8] = &[]; + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Utf8, true)].into()) + .unwrap(); + let parser = converter.parser(); + let utf8_row = parser.parse(binary_row.as_ref()); + + converter.convert_rows(std::iter::once(utf8_row)).unwrap(); + } + + #[test] + #[should_panic(expected = "index out of bounds")] + fn test_invalid_empty_array() { + let row: &[u8] = &[]; + let binary_rows = BinaryArray::from(vec![row]); + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Utf8, true)].into()) + .unwrap(); + let parsed = converter.from_binary(binary_rows); + + converter.convert_rows(parsed.iter()).unwrap(); + } + + #[test] + #[should_panic(expected = "index out of bounds")] + fn test_invalid_truncated() { + let binary_row: &[u8] = &[0x02]; + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Utf8, true)].into()) + .unwrap(); + let parser = converter.parser(); + let utf8_row = parser.parse(binary_row.as_ref()); + + converter.convert_rows(std::iter::once(utf8_row)).unwrap(); + } + + #[test] + #[should_panic(expected = "index out of bounds")] + fn test_invalid_truncated_array() { + let row: &[u8] = &[0x02]; + let binary_rows = BinaryArray::from(vec![row]); + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Utf8, true)].into()) + .unwrap(); + let parsed = converter.from_binary(binary_rows); + + converter.convert_rows(parsed.iter()).unwrap(); + } + + // #[test] + #[ignore] + #[should_panic(expected = "rows were not produced by this UnorderedRowConverter")] + fn test_different_converter() { + let values = Arc::new(Int32Array::from_iter([Some(1), Some(-1)])); + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Int32, true)].into()) + .unwrap(); + let rows = converter.convert_columns(&[values]).unwrap(); + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Int32, true)].into()) + .unwrap(); + let _ = converter.convert_rows(&rows); + } + + fn test_single_list() { + let mut builder = GenericListBuilder::::new(Int32Builder::new()); + builder.values().append_value(32); + builder.values().append_value(52); + builder.values().append_value(32); + builder.append(true); + builder.values().append_value(32); + builder.values().append_value(52); + builder.values().append_value(12); + builder.append(true); + builder.values().append_value(32); + builder.values().append_value(52); + builder.append(true); + builder.values().append_value(32); // MASKED + builder.values().append_value(52); // MASKED + builder.append(false); + builder.values().append_value(32); + builder.values().append_null(); + builder.append(true); + builder.append(true); + builder.values().append_value(17); // MASKED + builder.values().append_null(); // MASKED + builder.append(false); + + let list = Arc::new(builder.finish()) as ArrayRef; + let d = list.data_type().clone(); + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", d.clone(), true)].into()).unwrap(); + + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + // assert!(rows.row(0) > rows.row(1)); // [32, 52, 32] > [32, 52, 12] + // assert!(rows.row(2) < rows.row(1)); // [32, 52] < [32, 52, 12] + // assert!(rows.row(3) < rows.row(2)); // null < [32, 52] + // assert!(rows.row(4) < rows.row(2)); // [32, null] < [32, 52] + // assert!(rows.row(5) < rows.row(2)); // [] < [32, 52] + // assert!(rows.row(3) < rows.row(5)); // null < [] + assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values) + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + let field = Field::new("col_1", d.clone(), true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + + // assert!(rows.row(0) > rows.row(1)); // [32, 52, 32] > [32, 52, 12] + // assert!(rows.row(2) < rows.row(1)); // [32, 52] < [32, 52, 12] + // assert!(rows.row(3) > rows.row(2)); // null > [32, 52] + // assert!(rows.row(4) > rows.row(2)); // [32, null] > [32, 52] + // assert!(rows.row(5) < rows.row(2)); // [] < [32, 52] + // assert!(rows.row(3) > rows.row(5)); // null > [] + assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values) + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + let options = SortOptions::default().desc().with_nulls_first(false); + let field = Field::new("col_1", d.clone(), true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + + // assert!(rows.row(0) < rows.row(1)); // [32, 52, 32] < [32, 52, 12] + // assert!(rows.row(2) > rows.row(1)); // [32, 52] > [32, 52, 12] + // assert!(rows.row(3) > rows.row(2)); // null > [32, 52] + // assert!(rows.row(4) > rows.row(2)); // [32, null] > [32, 52] + // assert!(rows.row(5) > rows.row(2)); // [] > [32, 52] + // assert!(rows.row(3) > rows.row(5)); // null > [] + assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values) + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + let options = SortOptions::default().desc().with_nulls_first(true); + let field = Field::new("col_1", d, true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + + // assert!(rows.row(0) < rows.row(1)); // [32, 52, 32] < [32, 52, 12] + // assert!(rows.row(2) > rows.row(1)); // [32, 52] > [32, 52, 12] + // assert!(rows.row(3) < rows.row(2)); // null < [32, 52] + // assert!(rows.row(4) < rows.row(2)); // [32, null] < [32, 52] + // assert!(rows.row(5) > rows.row(2)); // [] > [32, 52] + // assert!(rows.row(3) < rows.row(5)); // null < [] + assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values) + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + let sliced_list = list.slice(1, 5); + let rows_on_sliced_list = converter + .convert_columns(&[Arc::clone(&sliced_list)]) + .unwrap(); + + // assert!(rows_on_sliced_list.row(1) > rows_on_sliced_list.row(0)); // [32, 52] > [32, 52, 12] + // assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(1)); // null < [32, 52] + // assert!(rows_on_sliced_list.row(3) < rows_on_sliced_list.row(1)); // [32, null] < [32, 52] + // assert!(rows_on_sliced_list.row(4) > rows_on_sliced_list.row(1)); // [] > [32, 52] + // assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(4)); // null < [] + + let back = converter.convert_rows(&rows_on_sliced_list).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &sliced_list); + } + + fn test_nested_list() { + let mut builder = + GenericListBuilder::::new(GenericListBuilder::::new(Int32Builder::new())); + + builder.values().values().append_value(1); + builder.values().values().append_value(2); + builder.values().append(true); + builder.values().values().append_value(1); + builder.values().values().append_null(); + builder.values().append(true); + builder.append(true); + + builder.values().values().append_value(1); + builder.values().values().append_null(); + builder.values().append(true); + builder.values().values().append_value(1); + builder.values().values().append_null(); + builder.values().append(true); + builder.append(true); + + builder.values().values().append_value(1); + builder.values().values().append_null(); + builder.values().append(true); + builder.values().append(false); + builder.append(true); + builder.append(false); + + builder.values().values().append_value(1); + builder.values().values().append_value(2); + builder.values().append(true); + builder.append(true); + + let list = Arc::new(builder.finish()) as ArrayRef; + let d = list.data_type().clone(); + + // [ + // [[1, 2], [1, null]], + // [[1, null], [1, null]], + // [[1, null], null] + // null + // [[1, 2]] + // ] + let options = SortOptions::default().asc().with_nulls_first(true); + let field = Field::new("col_1", d.clone(), true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + + // assert!(rows.row(0) > rows.row(1)); + // assert!(rows.row(1) > rows.row(2)); + // assert!(rows.row(2) > rows.row(3)); + // assert!(rows.row(4) < rows.row(0)); + // assert!(rows.row(4) > rows.row(1)); + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + let options = SortOptions::default().desc().with_nulls_first(true); + let field = Field::new("col_1", d.clone(), true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + + // assert!(rows.row(0) > rows.row(1)); + // assert!(rows.row(1) > rows.row(2)); + // assert!(rows.row(2) > rows.row(3)); + // assert!(rows.row(4) > rows.row(0)); + // assert!(rows.row(4) > rows.row(1)); + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + let options = SortOptions::default().desc().with_nulls_first(false); + let field = Field::new("col_1", d, true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + + // assert!(rows.row(0) < rows.row(1)); + // assert!(rows.row(1) < rows.row(2)); + // assert!(rows.row(2) < rows.row(3)); + // assert!(rows.row(4) > rows.row(0)); + // assert!(rows.row(4) < rows.row(1)); + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + let sliced_list = list.slice(1, 3); + let rows = converter + .convert_columns(&[Arc::clone(&sliced_list)]) + .unwrap(); + + // assert!(rows.row(0) < rows.row(1)); + // assert!(rows.row(1) < rows.row(2)); + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &sliced_list); + } + + #[test] + fn test_list() { + test_single_list::(); + test_nested_list::(); + } + + #[test] + fn test_large_list() { + test_single_list::(); + test_nested_list::(); + } + + #[test] + fn test_fixed_size_list() { + let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3); + builder.values().append_value(32); + builder.values().append_value(52); + builder.values().append_value(32); + builder.append(true); + builder.values().append_value(32); + builder.values().append_value(52); + builder.values().append_value(12); + builder.append(true); + builder.values().append_value(32); + builder.values().append_value(52); + builder.values().append_null(); + builder.append(true); + builder.values().append_value(32); // MASKED + builder.values().append_value(52); // MASKED + builder.values().append_value(13); // MASKED + builder.append(false); + builder.values().append_value(32); + builder.values().append_null(); + builder.values().append_null(); + builder.append(true); + builder.values().append_null(); + builder.values().append_null(); + builder.values().append_null(); + builder.append(true); + builder.values().append_value(17); // MASKED + builder.values().append_null(); // MASKED + builder.values().append_value(77); // MASKED + builder.append(false); + + let list = Arc::new(builder.finish()) as ArrayRef; + let d = list.data_type().clone(); + + // Default sorting (ascending, nulls first) + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", d.clone(), true)].into()).unwrap(); + + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + // assert!(rows.row(0) > rows.row(1)); // [32, 52, 32] > [32, 52, 12] + // assert!(rows.row(2) < rows.row(1)); // [32, 52, null] < [32, 52, 12] + // assert!(rows.row(3) < rows.row(2)); // null < [32, 52, null] + // assert!(rows.row(4) < rows.row(2)); // [32, null, null] < [32, 52, null] + // assert!(rows.row(5) < rows.row(2)); // [null, null, null] < [32, 52, null] + // assert!(rows.row(3) < rows.row(5)); // null < [null, null, null] + assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values) + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + // Ascending, null last + let options = SortOptions::default().asc().with_nulls_first(false); + let field = Field::new("col_1", d.clone(), true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + // assert!(rows.row(0) > rows.row(1)); // [32, 52, 32] > [32, 52, 12] + // assert!(rows.row(2) > rows.row(1)); // [32, 52, null] > [32, 52, 12] + // assert!(rows.row(3) > rows.row(2)); // null > [32, 52, null] + // assert!(rows.row(4) > rows.row(2)); // [32, null, null] > [32, 52, null] + // assert!(rows.row(5) > rows.row(2)); // [null, null, null] > [32, 52, null] + // assert!(rows.row(3) > rows.row(5)); // null > [null, null, null] + assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values) + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + // Descending, nulls last + let options = SortOptions::default().desc().with_nulls_first(false); + let field = Field::new("col_1", d.clone(), true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + // assert!(rows.row(0) < rows.row(1)); // [32, 52, 32] < [32, 52, 12] + // assert!(rows.row(2) > rows.row(1)); // [32, 52, null] > [32, 52, 12] + // assert!(rows.row(3) > rows.row(2)); // null > [32, 52, null] + // assert!(rows.row(4) > rows.row(2)); // [32, null, null] > [32, 52, null] + // assert!(rows.row(5) > rows.row(2)); // [null, null, null] > [32, 52, null] + // assert!(rows.row(3) > rows.row(5)); // null > [null, null, null] + assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values) + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + // Descending, nulls first + let options = SortOptions::default().desc().with_nulls_first(true); + let field = Field::new("col_1", d, true); + let converter = UnorderedRowConverter::new(vec![field].into()).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap(); + + // assert!(rows.row(0) < rows.row(1)); // [32, 52, 32] < [32, 52, 12] + // assert!(rows.row(2) < rows.row(1)); // [32, 52, null] > [32, 52, 12] + // assert!(rows.row(3) < rows.row(2)); // null < [32, 52, null] + // assert!(rows.row(4) < rows.row(2)); // [32, null, null] < [32, 52, null] + // assert!(rows.row(5) < rows.row(2)); // [null, null, null] > [32, 52, null] + // assert!(rows.row(3) < rows.row(5)); // null < [null, null, null] + assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values) + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &list); + + let sliced_list = list.slice(1, 5); + let rows_on_sliced_list = converter + .convert_columns(&[Arc::clone(&sliced_list)]) + .unwrap(); + + // assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(1)); // null < [32, 52, null] + // assert!(rows_on_sliced_list.row(3) < rows_on_sliced_list.row(1)); // [32, null, null] < [32, 52, null] + // assert!(rows_on_sliced_list.row(4) < rows_on_sliced_list.row(1)); // [null, null, null] > [32, 52, null] + // assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(4)); // null < [null, null, null] + + let back = converter.convert_rows(&rows_on_sliced_list).unwrap(); + assert_eq!(back.len(), 1); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &sliced_list); + } + + #[test] + fn test_two_fixed_size_lists() { + let mut first = FixedSizeListBuilder::new(UInt8Builder::new(), 1); + // 0: [100] + first.values().append_value(100); + first.append(true); + // 1: [101] + first.values().append_value(101); + first.append(true); + // 2: [102] + first.values().append_value(102); + first.append(true); + // 3: [null] + first.values().append_null(); + first.append(true); + // 4: null + first.values().append_null(); // MASKED + first.append(false); + let first = Arc::new(first.finish()) as ArrayRef; + let first_type = first.data_type().clone(); + + let mut second = FixedSizeListBuilder::new(UInt8Builder::new(), 1); + // 0: [200] + second.values().append_value(200); + second.append(true); + // 1: [201] + second.values().append_value(201); + second.append(true); + // 2: [202] + second.values().append_value(202); + second.append(true); + // 3: [null] + second.values().append_null(); + second.append(true); + // 4: null + second.values().append_null(); // MASKED + second.append(false); + let second = Arc::new(second.finish()) as ArrayRef; + let second_type = second.data_type().clone(); + + let converter = UnorderedRowConverter::new( + vec![ + Field::new("col_1", first_type.clone(), true), + Field::new("col_1", second_type.clone(), true), + ] + .into(), + ) + .unwrap(); + + let rows = converter + .convert_columns(&[Arc::clone(&first), Arc::clone(&second)]) + .unwrap(); + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 2); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &first); + back[1].to_data().validate_full().unwrap(); + assert_eq!(&back[1], &second); + } + + #[test] + fn test_fixed_size_list_with_variable_width_content() { + let mut first = FixedSizeListBuilder::new( + StructBuilder::from_fields( + vec![ + Field::new( + "timestamp", + DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("UTC"))), + false, + ), + Field::new("offset_minutes", DataType::Int16, false), + Field::new("time_zone", DataType::Utf8, false), + ], + 1, + ), + 1, + ); + // 0: null + first + .values() + .field_builder::(0) + .unwrap() + .append_null(); + first + .values() + .field_builder::(1) + .unwrap() + .append_null(); + first + .values() + .field_builder::(2) + .unwrap() + .append_null(); + first.values().append(false); + first.append(false); + // 1: [null] + first + .values() + .field_builder::(0) + .unwrap() + .append_null(); + first + .values() + .field_builder::(1) + .unwrap() + .append_null(); + first + .values() + .field_builder::(2) + .unwrap() + .append_null(); + first.values().append(false); + first.append(true); + // 2: [1970-01-01 00:00:00.000000 UTC] + first + .values() + .field_builder::(0) + .unwrap() + .append_value(0); + first + .values() + .field_builder::(1) + .unwrap() + .append_value(0); + first + .values() + .field_builder::(2) + .unwrap() + .append_value("UTC"); + first.values().append(true); + first.append(true); + // 3: [2005-09-10 13:30:00.123456 Europe/Warsaw] + first + .values() + .field_builder::(0) + .unwrap() + .append_value(1126351800123456); + first + .values() + .field_builder::(1) + .unwrap() + .append_value(120); + first + .values() + .field_builder::(2) + .unwrap() + .append_value("Europe/Warsaw"); + first.values().append(true); + first.append(true); + let first = Arc::new(first.finish()) as ArrayRef; + let first_type = first.data_type().clone(); + + let mut second = StringBuilder::new(); + second.append_value("somewhere near"); + second.append_null(); + second.append_value("Greenwich"); + second.append_value("Warsaw"); + let second = Arc::new(second.finish()) as ArrayRef; + let second_type = second.data_type().clone(); + + let converter = UnorderedRowConverter::new( + vec![ + Field::new("col_1", first_type.clone(), true), + Field::new("col_1", second_type.clone(), true), + ] + .into(), + ) + .unwrap(); + + let rows = converter + .convert_columns(&[Arc::clone(&first), Arc::clone(&second)]) + .unwrap(); + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 2); + back[0].to_data().validate_full().unwrap(); + assert_eq!(&back[0], &first); + back[1].to_data().validate_full().unwrap(); + assert_eq!(&back[1], &second); + } + + fn generate_primitive_array( + rng: &mut impl RngCore, + len: usize, + valid_percent: f64, + ) -> PrimitiveArray + where + K: ArrowPrimitiveType, + StandardUniform: Distribution, + { + (0..len) + .map(|_| rng.random_bool(valid_percent).then(|| rng.random())) + .collect() + } + + fn generate_boolean_array( + rng: &mut impl RngCore, + len: usize, + valid_percent: f64, + ) -> BooleanArray { + (0..len) + .map(|_| rng.random_bool(valid_percent).then(|| rng.random_bool(0.5))) + .collect() + } + + fn generate_strings( + rng: &mut impl RngCore, + len: usize, + valid_percent: f64, + ) -> GenericStringArray { + (0..len) + .map(|_| { + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(0..100); + let bytes = (0..len).map(|_| rng.random_range(0..128)).collect(); + String::from_utf8(bytes).unwrap() + }) + }) + .collect() + } + + fn generate_string_view( + rng: &mut impl RngCore, + len: usize, + valid_percent: f64, + ) -> StringViewArray { + (0..len) + .map(|_| { + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(0..100); + let bytes = (0..len).map(|_| rng.random_range(0..128)).collect(); + String::from_utf8(bytes).unwrap() + }) + }) + .collect() + } + + fn generate_byte_view( + rng: &mut impl RngCore, + len: usize, + valid_percent: f64, + ) -> BinaryViewArray { + (0..len) + .map(|_| { + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(0..100); + let bytes: Vec<_> = (0..len).map(|_| rng.random_range(0..128)).collect(); + bytes + }) + }) + .collect() + } + + fn generate_fixed_stringview_column(len: usize) -> StringViewArray { + let edge_cases = vec![ + Some("bar".to_string()), + Some("bar\0".to_string()), + Some("LongerThan12Bytes".to_string()), + Some("LongerThan12Bytez".to_string()), + Some("LongerThan12Bytes\0".to_string()), + Some("LongerThan12Byt".to_string()), + Some("backend one".to_string()), + Some("backend two".to_string()), + Some("a".repeat(257)), + Some("a".repeat(300)), + ]; + + // Fill up to `len` by repeating edge cases and trimming + let mut values = Vec::with_capacity(len); + for i in 0..len { + values.push( + edge_cases + .get(i % edge_cases.len()) + .cloned() + .unwrap_or(None), + ); + } + + StringViewArray::from(values) + } + + fn generate_dictionary( + rng: &mut impl RngCore, + values: ArrayRef, + len: usize, + valid_percent: f64, + ) -> DictionaryArray + where + K: ArrowDictionaryKeyType, + K::Native: SampleUniform, + { + let min_key = K::Native::from_usize(0).unwrap(); + let max_key = K::Native::from_usize(values.len()).unwrap(); + let keys: PrimitiveArray = (0..len) + .map(|_| { + rng.random_bool(valid_percent) + .then(|| rng.random_range(min_key..max_key)) + }) + .collect(); + + let data_type = + DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone())); + + let data = keys + .into_data() + .into_builder() + .data_type(data_type) + .add_child_data(values.to_data()) + .build() + .unwrap(); + + DictionaryArray::from(data) + } + + fn generate_fixed_size_binary( + rng: &mut impl RngCore, + len: usize, + valid_percent: f64, + ) -> FixedSizeBinaryArray { + let width = rng.random_range(0..20); + let mut builder = FixedSizeBinaryBuilder::new(width); + + let mut b = vec![0; width as usize]; + for _ in 0..len { + match rng.random_bool(valid_percent) { + true => { + b.iter_mut().for_each(|x| *x = rng.random()); + builder.append_value(&b).unwrap(); + } + false => builder.append_null(), + } + } + + builder.finish() + } + + fn generate_struct(rng: &mut impl RngCore, len: usize, valid_percent: f64) -> StructArray { + let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent))); + let a = generate_primitive_array::(rng, len, valid_percent); + let b = generate_strings::(rng, len, valid_percent); + let fields = Fields::from(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Utf8, true), + ]); + let values = vec![Arc::new(a) as _, Arc::new(b) as _]; + StructArray::new(fields, values, Some(nulls)) + } + + fn generate_list( + rng: &mut R, + len: usize, + valid_percent: f64, + values: F, + ) -> ListArray + where + F: FnOnce(&mut R, usize) -> ArrayRef, + { + let offsets = OffsetBuffer::::from_lengths((0..len).map(|_| rng.random_range(0..10))); + let values_len = offsets.last().unwrap().to_usize().unwrap(); + let values = values(rng, values_len); + let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent))); + let field = Arc::new(Field::new_list_field(values.data_type().clone(), true)); + ListArray::new(field, offsets, values, Some(nulls)) + } + + fn generate_nulls(rng: &mut impl RngCore, len: usize) -> Option { + Some(NullBuffer::from_iter( + (0..len).map(|_| rng.random_bool(0.8)), + )) + } + + fn generate_column(rng: &mut impl RngCore, len: usize) -> ArrayRef { + match rng.random_range(0..19) { + 0 => Arc::new(generate_primitive_array::(rng, len, 0.8)), + 1 => Arc::new(generate_primitive_array::(rng, len, 0.8)), + 2 => Arc::new(generate_primitive_array::(rng, len, 0.8)), + 3 => Arc::new(generate_primitive_array::(rng, len, 0.8)), + 4 => Arc::new(generate_primitive_array::(rng, len, 0.8)), + 5 => Arc::new(generate_primitive_array::(rng, len, 0.8)), + 6 => Arc::new(generate_strings::(rng, len, 0.8)), + 7 => { + let dict_values_len = rng.random_range(1..len); + // Cannot test dictionaries containing null values because of #2687 + let strings = Arc::new(generate_strings::(rng, dict_values_len, 1.0)); + Arc::new(generate_dictionary::(rng, strings, len, 0.8)) + } + 8 => { + let dict_values_len = rng.random_range(1..len); + // Cannot test dictionaries containing null values because of #2687 + let values = Arc::new(generate_primitive_array::( + rng, + dict_values_len, + 1.0, + )); + Arc::new(generate_dictionary::(rng, values, len, 0.8)) + } + 9 => Arc::new(generate_fixed_size_binary(rng, len, 0.8)), + 10 => Arc::new(generate_struct(rng, len, 0.8)), + 11 => Arc::new(generate_list(rng, len, 0.8, |rng, values_len| { + Arc::new(generate_primitive_array::(rng, values_len, 0.8)) + })), + 12 => Arc::new(generate_list(rng, len, 0.8, |rng, values_len| { + Arc::new(generate_strings::(rng, values_len, 0.8)) + })), + 13 => Arc::new(generate_list(rng, len, 0.8, |rng, values_len| { + Arc::new(generate_struct(rng, values_len, 0.8)) + })), + 14 => Arc::new(generate_string_view(rng, len, 0.8)), + 15 => Arc::new(generate_byte_view(rng, len, 0.8)), + 16 => Arc::new(generate_fixed_stringview_column(len)), + 17 => Arc::new( + generate_list(rng, len + 1000, 0.8, |rng, values_len| { + Arc::new(generate_primitive_array::(rng, values_len, 0.8)) + }) + .slice(500, len), + ), + 18 => Arc::new(generate_boolean_array(rng, len, 0.8)), + _ => unreachable!(), + } + } + + fn print_row(cols: &[SortColumn], row: usize) -> String { + let t: Vec<_> = cols + .iter() + .map(|x| match x.values.is_valid(row) { + true => { + let opts = FormatOptions::default().with_null("NULL"); + let formatter = ArrayFormatter::try_new(x.values.as_ref(), &opts).unwrap(); + formatter.value(row).to_string() + } + false => "NULL".to_string(), + }) + .collect(); + t.join(",") + } + + fn print_col_types(cols: &[SortColumn]) -> String { + let t: Vec<_> = cols + .iter() + .map(|x| x.values.data_type().to_string()) + .collect(); + t.join(",") + } + + fn change_underline_null_values_for_primitive(array: &PrimitiveArray) -> PrimitiveArray { + let (dt, values, nulls) = array.clone().into_parts(); + + let new_values = ScalarBuffer::::from_iter( + values.iter().zip(nulls.as_ref().unwrap().iter()) + .map(|(val, is_valid)| { + if is_valid { + *val + } else { + val.add_wrapping(T::Native::usize_as(1)) + } + }) + ); + + PrimitiveArray::new( + new_values, + nulls, + ).with_data_type(dt) + } + + fn change_underline_null_values_for_byte_array(array: &GenericByteArray) -> GenericByteArray { + + let (offsets, values, nulls) = array.clone().into_parts(); + + let new_offsets = OffsetBuffer::::from_lengths( + offsets.lengths().zip(nulls.as_ref().unwrap().iter()) + .map(|(len, is_valid)| { + if is_valid { + len + } else { + len + 1 + } + }) + ); + + let mut new_bytes = Vec::::with_capacity(new_offsets[new_offsets.len() - 1].as_usize()); + + offsets.windows(2).zip(nulls.as_ref().unwrap().iter()).for_each(|(start_and_end, is_valid)| { + let start = start_and_end[0].as_usize(); + let end = start_and_end[1].as_usize(); + new_bytes.extend_from_slice(&values.as_slice()[start..end]); + + // add an extra byte + if !is_valid { + new_bytes.push(b'c'); + } + }); + + + GenericByteArray::::new( + new_offsets, + Buffer::from_vec(new_bytes), + nulls, + ) + } + + fn change_underline_null_values(array: &ArrayRef) -> ArrayRef { + if array.null_count() == 0 { + return Arc::clone(array) + } + + downcast_primitive_array!( + array => { + let output = change_underline_null_values_for_primitive(array); + + Arc::new(output) + } + + DataType::Utf8 => { + Arc::new(change_underline_null_values_for_byte_array(array.as_string::())) + } + DataType::LargeUtf8 => { + Arc::new(change_underline_null_values_for_byte_array(array.as_string::())) + } + DataType::Binary => { + Arc::new(change_underline_null_values_for_byte_array(array.as_binary::())) + } + DataType::LargeBinary => { + Arc::new(change_underline_null_values_for_byte_array(array.as_binary::())) + } + _ => { + Arc::clone(array) + } + ) + } + + #[test] + #[cfg_attr(miri, ignore)] + fn fuzz_test() { + #[derive(Debug, PartialEq)] + enum Nulls { + /// Keep the generated array as is + HaveNulls, + + /// Replace the null buffer with different null buffer to point to different positions as null + DifferentNulls, + + /// Keep nullable field but remove all nulls + NullableWithNoNulls, + + /// Remove all nulls and mark field as not nullable + NoNulls, + } + let mut rng = StdRng::seed_from_u64(42); + for index in 0..100 { + for n in [ + Nulls::HaveNulls, + Nulls::DifferentNulls, + Nulls::NullableWithNoNulls, + Nulls::NoNulls, + ] { + let mut num_columns = rng.random_range(1..5); + let len = rng.random_range(5..100); + let mut arrays: Vec<_> = (0..num_columns) + .map(|_| generate_column(&mut rng, len)) + .collect(); + + match n { + Nulls::HaveNulls => { + // Keep as is + } + Nulls::DifferentNulls => { + // Remove nulls + arrays = arrays + .into_iter() + .map(|a| a.into_data().into_builder()) + .map(|d| { + make_array( + d.nulls(None) + .null_count(0) + .null_bit_buffer(None) + .nulls(generate_nulls(&mut rng, len)) + .build() + .unwrap(), + ) + }) + .collect() + } + // TODO - what about nested + Nulls::NoNulls | Nulls::NullableWithNoNulls => { + // Remove nulls + arrays = arrays + .into_iter() + .map(|a| a.into_data().into_builder()) + .map(|d| { + make_array( + d.nulls(None) + .null_count(0) + .null_bit_buffer(None) + .build() + .unwrap(), + ) + }) + .collect() + } + } + + let options: Vec<_> = (0..num_columns) + .map(|_| SortOptions { + descending: rng.random_bool(0.5), + nulls_first: rng.random_bool(0.5), + }) + .collect(); + + let sort_columns: Vec<_> = options + .iter() + .zip(&arrays) + .map(|(o, c)| SortColumn { + values: Arc::clone(c), + options: Some(*o), + }) + .collect(); + + let comparator = LexicographicalComparator::try_new(&sort_columns).unwrap(); + + let columns: Fields = options + .into_iter() + .zip(&arrays) + .map(|(o, a)| { + Field::new("col_1", a.data_type().clone(), !matches!(n, Nulls::NoNulls)) + }) + .collect(); + + let converter = UnorderedRowConverter::new(columns.clone()).unwrap(); + let rows = converter.convert_columns(&arrays).unwrap(); + let maybe_compare = if matches!(n, Nulls::DifferentNulls) { + let converter = UnorderedRowConverter::new(columns).unwrap(); + let arrays_with_different_data_behind_nulls = arrays.iter().map(|arr| change_underline_null_values(arr)).collect::>(); + let rows = converter.convert_columns(&arrays_with_different_data_behind_nulls).unwrap(); + + Some(rows) + } else { + None + }; + + for i in 0..rows.num_rows() { + if let Some(different_underline_nulls_rows) = &maybe_compare { + assert_eq!(different_underline_nulls_rows.row(i), rows.row(i), + "rows with different underline null values should be equal at row {}", i + ); + } + + for j in 0..rows.num_rows() { + let row_i = rows.row(i); + let row_j = rows.row(j); + let lex_cmp = comparator.compare(i, j); + match lex_cmp { + Ordering::Equal => { + assert_eq!(row_i, row_j); + } + _ => { + assert_ne!( + row_i, row_j, + "rows {} and {} should not be equal", + i, j + ); + } + } + // assert_eq!( + // row_cmp, + // lex_cmp, + // "({:?} vs {:?}) vs ({:?} vs {:?}) for types {}", + // print_row(&sort_columns, i), + // print_row(&sort_columns, j), + // row_i, + // row_j, + // print_col_types(&sort_columns) + // ); + } + } + + // Convert rows produced from convert_columns(). + // Note: validate_utf8 is set to false since Row is initialized through empty_rows() + let back = converter.convert_rows(&rows).expect( + format!( + "index: {index} {n:?} - {:?}", + arrays + .iter() + .map(|item| item.data_type()) + .collect::>() + ) + .as_str(), + ); + for (actual, expected) in back.iter().zip(&arrays) { + actual.to_data().validate_full().unwrap(); + dictionary_eq(actual, expected) + } + + // Check that we can convert rows into ByteArray and then parse, convert it back to array + // Note: validate_utf8 is set to true since Row is initialized through RowParser + let rows = rows.try_into_binary().expect("reasonable size"); + let parser = converter.parser(); + let back = converter + .convert_rows(rows.iter().map(|b| parser.parse(b.expect("valid bytes")))) + .unwrap(); + for (actual, expected) in back.iter().zip(&arrays) { + actual.to_data().validate_full().unwrap(); + dictionary_eq(actual, expected) + } + + let rows = converter.from_binary(rows); + let back = converter.convert_rows(&rows).unwrap(); + for (actual, expected) in back.iter().zip(&arrays) { + actual.to_data().validate_full().unwrap(); + dictionary_eq(actual, expected) + } + } + } + } + + #[test] + fn test_clear() { + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::Int32, true)].into()) + .unwrap(); + let mut rows = converter.empty_rows(3, 128); + + let first = Int32Array::from(vec![None, Some(2), Some(4)]); + let second = Int32Array::from(vec![Some(2), None, Some(4)]); + let arrays = [Arc::new(first) as ArrayRef, Arc::new(second) as ArrayRef]; + + for array in arrays.iter() { + rows.clear(); + converter + .append(&mut rows, std::slice::from_ref(array)) + .unwrap(); + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(&back[0], array); + } + + let mut rows_expected = converter.empty_rows(3, 128); + converter.append(&mut rows_expected, &arrays[1..]).unwrap(); + + for (i, (actual, expected)) in rows.iter().zip(rows_expected.iter()).enumerate() { + assert_eq!( + actual, expected, + "For row {i}: expected {expected:?}, actual: {actual:?}", + ); + } + } + + #[test] + fn test_append_codec_dictionary_binary() { + use DataType::*; + // Dictionary RowConverter + let converter = UnorderedRowConverter::new( + vec![Field::new( + "col_1", + Dictionary(Box::new(Int32), Box::new(Binary)), + true, + )] + .into(), + ) + .unwrap(); + let mut rows = converter.empty_rows(4, 128); + + let keys = Int32Array::from_iter_values([0, 1, 2, 3]); + let values = BinaryArray::from(vec![ + Some("a".as_bytes()), + Some(b"b"), + Some(b"c"), + Some(b"d"), + ]); + let dict_array = DictionaryArray::new(keys, Arc::new(values)); + + rows.clear(); + let array = Arc::new(dict_array) as ArrayRef; + converter + .append(&mut rows, std::slice::from_ref(&array)) + .unwrap(); + let back = converter.convert_rows(&rows).unwrap(); + + dictionary_eq(&back[0], &array); + } + + #[test] + fn test_list_prefix() { + let mut a = ListBuilder::new(Int8Builder::new()); + a.append_value([None]); + a.append_value([None, None]); + let a = a.finish(); + + let converter = UnorderedRowConverter::new( + vec![Field::new("col_1", a.data_type().clone(), true)].into(), + ) + .unwrap(); + let rows = converter.convert_columns(&[Arc::new(a) as _]).unwrap(); + // assert_eq!(rows.row(0).cmp(&rows.row(1)), Ordering::Less); + } + + #[test] + fn map_should_be_marked_as_unsupported() { + let map_data_type = Field::new_map( + "map", + "entries", + Field::new("key", DataType::Utf8, false), + Field::new("value", DataType::Utf8, true), + false, + true, + ) + .data_type() + .clone(); + + let fields = vec![Field::new("col_1", map_data_type, true)].into(); + let is_supported = UnorderedRowConverter::supports_fields(&fields); + + assert!(!is_supported, "Map should not be supported"); + } + + #[test] + fn should_fail_to_create_row_converter_for_unsupported_map_type() { + let map_data_type = Field::new_map( + "map", + "entries", + Field::new("key", DataType::Utf8, false), + Field::new("value", DataType::Utf8, true), + false, + true, + ) + .data_type() + .clone(); + + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", map_data_type, true)].into()); + + match converter { + Err(ArrowError::NotYetImplemented(message)) => { + assert!( + message.contains("Unordered row format support not yet implemented for"), + "Expected NotYetImplemented error for map data type, got: {message}", + ); + } + Err(e) => panic!("Expected NotYetImplemented error, got: {e}"), + Ok(_) => panic!("Expected NotYetImplemented error for map data type"), + } + } + + #[test] + fn test_values_buffer_smaller_when_utf8_validation_disabled() { + fn get_values_buffer_len(col: ArrayRef) -> (usize, usize) { + // 1. Convert cols into rows + let converter = UnorderedRowConverter::new( + vec![Field::new("col_1", DataType::Utf8View, true)].into(), + ) + .unwrap(); + + // 2a. Convert rows into colsa (validate_utf8 = false) + let rows = converter.convert_columns(&[col]).unwrap(); + let converted = converter.convert_rows(&rows).unwrap(); + let unchecked_values_len = converted[0].as_string_view().data_buffers()[0].len(); + + // 2b. Convert rows into cols (validate_utf8 = true since Row is initialized through RowParser) + let rows = rows.try_into_binary().expect("reasonable size"); + let parser = converter.parser(); + let converted = converter + .convert_rows(rows.iter().map(|b| parser.parse(b.expect("valid bytes")))) + .unwrap(); + let checked_values_len = converted[0].as_string_view().data_buffers()[0].len(); + (unchecked_values_len, checked_values_len) + } + + // Case1. StringViewArray with inline strings + let col = Arc::new(StringViewArray::from_iter([ + Some("hello"), // short(5) + None, // null + Some("short"), // short(5) + Some("tiny"), // short(4) + ])) as ArrayRef; + + let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col); + // Since there are no long (>12) strings, len of values buffer is 0 + assert_eq!(unchecked_values_len, 0); + // When utf8 validation enabled, values buffer includes inline strings (5+5+4) + assert_eq!(checked_values_len, 14); + + // Case2. StringViewArray with long(>12) strings + let col = Arc::new(StringViewArray::from_iter([ + Some("this is a very long string over 12 bytes"), + Some("another long string to test the buffer"), + ])) as ArrayRef; + + let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col); + // Since there are no inline strings, expected length of values buffer is the same + assert!(unchecked_values_len > 0); + assert_eq!(unchecked_values_len, checked_values_len); + + // Case3. StringViewArray with both short and long strings + let col = Arc::new(StringViewArray::from_iter([ + Some("tiny"), // 4 (short) + Some("thisisexact13"), // 13 (long) + None, + Some("short"), // 5 (short) + ])) as ArrayRef; + + let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col); + // Since there is single long string, len of values buffer is 13 + assert_eq!(unchecked_values_len, 13); + assert!(checked_values_len > unchecked_values_len); + } + + #[test] + fn test_sparse_union() { + // create a sparse union with Int32 (type_id = 0) and Utf8 (type_id = 1) + let int_array = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]); + let str_array = StringArray::from(vec![None, Some("b"), None, Some("d"), None]); + + // [1, "b", 3, "d", 5] + let type_ids = vec![0, 1, 0, 1, 0].into(); + + let union_fields = [ + (0, Arc::new(Field::new("int", DataType::Int32, false))), + (1, Arc::new(Field::new("str", DataType::Utf8, false))), + ] + .into_iter() + .collect(); + + let union_array = UnionArray::try_new( + union_fields, + type_ids, + None, + vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)], + ) + .unwrap(); + + let union_type = union_array.data_type().clone(); + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", union_type, true)].into()).unwrap(); + + let rows = converter + .convert_columns(&[Arc::new(union_array.clone())]) + .unwrap(); + + // round trip + let back = converter.convert_rows(&rows).unwrap(); + let back_union = back[0].as_any().downcast_ref::().unwrap(); + + assert_eq!(union_array.len(), back_union.len()); + for i in 0..union_array.len() { + assert_eq!(union_array.type_id(i), back_union.type_id(i)); + } + } + + #[test] + fn test_sparse_union_with_nulls() { + // create a sparse union with Int32 (type_id = 0) and Utf8 (type_id = 1) + let int_array = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]); + let str_array = StringArray::from(vec![None::<&str>; 5]); + + // [1, null (both children null), 3, null (both children null), 5] + let type_ids = vec![0, 1, 0, 1, 0].into(); + + let union_fields = [ + (0, Arc::new(Field::new("int", DataType::Int32, true))), + (1, Arc::new(Field::new("str", DataType::Utf8, true))), + ] + .into_iter() + .collect(); + + let union_array = UnionArray::try_new( + union_fields, + type_ids, + None, + vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)], + ) + .unwrap(); + + let union_type = union_array.data_type().clone(); + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", union_type, true)].into()).unwrap(); + + let rows = converter + .convert_columns(&[Arc::new(union_array.clone())]) + .unwrap(); + + // round trip + let back = converter.convert_rows(&rows).unwrap(); + let back_union = back[0].as_any().downcast_ref::().unwrap(); + + assert_eq!(union_array.len(), back_union.len()); + for i in 0..union_array.len() { + let expected_null = union_array.is_null(i); + let actual_null = back_union.is_null(i); + assert_eq!(expected_null, actual_null, "Null mismatch at index {i}"); + if !expected_null { + assert_eq!(union_array.type_id(i), back_union.type_id(i)); + } + } + } + + #[test] + fn test_dense_union() { + // create a dense union with Int32 (type_id = 0) and use Utf8 (type_id = 1) + let int_array = Int32Array::from(vec![1, 3, 5]); + let str_array = StringArray::from(vec!["a", "b"]); + + let type_ids = vec![0, 1, 0, 1, 0].into(); + + // [1, "a", 3, "b", 5] + let offsets = vec![0, 0, 1, 1, 2].into(); + + let union_fields = [ + (0, Arc::new(Field::new("int", DataType::Int32, false))), + (1, Arc::new(Field::new("str", DataType::Utf8, false))), + ] + .into_iter() + .collect(); + + let union_array = UnionArray::try_new( + union_fields, + type_ids, + Some(offsets), // Dense mode + vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)], + ) + .unwrap(); + + let union_type = union_array.data_type().clone(); + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", union_type, true)].into()).unwrap(); + + let rows = converter + .convert_columns(&[Arc::new(union_array.clone())]) + .unwrap(); + + // round trip + let back = converter.convert_rows(&rows).unwrap(); + let back_union = back[0].as_any().downcast_ref::().unwrap(); + + assert_eq!(union_array.len(), back_union.len()); + for i in 0..union_array.len() { + assert_eq!(union_array.type_id(i), back_union.type_id(i)); + } + } + + #[test] + fn test_dense_union_with_nulls() { + // create a dense union with Int32 (type_id = 0) and Utf8 (type_id = 1) + let int_array = Int32Array::from(vec![Some(1), None, Some(5)]); + let str_array = StringArray::from(vec![Some("a"), None]); + + // [1, "a", 5, null (str null), null (int null)] + let type_ids = vec![0, 1, 0, 1, 0].into(); + let offsets = vec![0, 0, 1, 1, 2].into(); + + let union_fields = [ + (0, Arc::new(Field::new("int", DataType::Int32, true))), + (1, Arc::new(Field::new("str", DataType::Utf8, true))), + ] + .into_iter() + .collect(); + + let union_array = UnionArray::try_new( + union_fields, + type_ids, + Some(offsets), + vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)], + ) + .unwrap(); + + let union_type = union_array.data_type().clone(); + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", union_type, true)].into()).unwrap(); + + let rows = converter + .convert_columns(&[Arc::new(union_array.clone())]) + .unwrap(); + + // round trip + let back = converter.convert_rows(&rows).unwrap(); + let back_union = back[0].as_any().downcast_ref::().unwrap(); + + assert_eq!(union_array.len(), back_union.len()); + for i in 0..union_array.len() { + let expected_null = union_array.is_null(i); + let actual_null = back_union.is_null(i); + assert_eq!(expected_null, actual_null, "Null mismatch at index {i}"); + if !expected_null { + assert_eq!(union_array.type_id(i), back_union.type_id(i)); + } + } + } + + #[test] + fn test_union_ordering() { + let int_array = Int32Array::from(vec![100, 5, 20]); + let str_array = StringArray::from(vec!["z", "a"]); + + // [100, "z", 5, "a", 20] + let type_ids = vec![0, 1, 0, 1, 0].into(); + let offsets = vec![0, 0, 1, 1, 2].into(); + + let union_fields = [ + (0, Arc::new(Field::new("int", DataType::Int32, false))), + (1, Arc::new(Field::new("str", DataType::Utf8, false))), + ] + .into_iter() + .collect(); + + let union_array = UnionArray::try_new( + union_fields, + type_ids, + Some(offsets), + vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)], + ) + .unwrap(); + + let union_type = union_array.data_type().clone(); + let converter = + UnorderedRowConverter::new(vec![Field::new("col_1", union_type, true)].into()).unwrap(); + + let rows = converter.convert_columns(&[Arc::new(union_array)]).unwrap(); + + /* + expected ordering + + row 2: 5 - type_id 0 + row 4: 20 - type_id 0 + row 0: 100 - type id 0 + row 3: "a" - type id 1 + row 1: "z" - type id 1 + */ + // + // // 5 < "z" + // assert!(rows.row(2) < rows.row(1)); + // + // // 100 < "a" + // assert!(rows.row(0) < rows.row(3)); + // + // // among ints + // // 5 < 20 + // assert!(rows.row(2) < rows.row(4)); + // // 20 < 100 + // assert!(rows.row(4) < rows.row(0)); + // + // // among strigns + // // "a" < "z" + // assert!(rows.row(3) < rows.row(1)); + } + + #[test] + fn rows_size_should_count_for_capacity() { + let row_converter = + UnorderedRowConverter::new(vec![Field::new("col_1", DataType::UInt8, true)].into()) + .unwrap(); + + let empty_rows_size_with_preallocate_rows_and_data = { + let rows = row_converter.empty_rows(1000, 1000); + + rows.size() + }; + let empty_rows_size_with_preallocate_rows = { + let rows = row_converter.empty_rows(1000, 0); + + rows.size() + }; + let empty_rows_size_with_preallocate_data = { + let rows = row_converter.empty_rows(0, 1000); + + rows.size() + }; + let empty_rows_size_without_preallocate = { + let rows = row_converter.empty_rows(0, 0); + + rows.size() + }; + + assert!( + empty_rows_size_with_preallocate_rows_and_data > empty_rows_size_with_preallocate_rows, + "{empty_rows_size_with_preallocate_rows_and_data} should be larger than {empty_rows_size_with_preallocate_rows}" + ); + assert!( + empty_rows_size_with_preallocate_rows_and_data > empty_rows_size_with_preallocate_data, + "{empty_rows_size_with_preallocate_rows_and_data} should be larger than {empty_rows_size_with_preallocate_data}" + ); + assert!( + empty_rows_size_with_preallocate_rows > empty_rows_size_without_preallocate, + "{empty_rows_size_with_preallocate_rows} should be larger than {empty_rows_size_without_preallocate}" + ); + assert!( + empty_rows_size_with_preallocate_data > empty_rows_size_without_preallocate, + "{empty_rows_size_with_preallocate_data} should be larger than {empty_rows_size_without_preallocate}" + ); + } + +} diff --git a/arrow-row/src/unordered_row/nulls.rs b/arrow-row/src/unordered_row/nulls.rs new file mode 100644 index 000000000000..b91e72c5b2b5 --- /dev/null +++ b/arrow-row/src/unordered_row/nulls.rs @@ -0,0 +1,645 @@ +use crate::unordered_row::fixed::split_off; +use arrow_buffer::bit_chunk_iterator::BitChunkIterator; +use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer, NullBufferBuilder}; +use std::iter::{Chain, Once}; +use arrow_array::BooleanArray; + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[repr(u8)] +enum MetadataEncodingType { + None = 0, + FullByte = 1, + SingleBit = 2, +} + +impl MetadataEncodingType { + #[inline] + fn is_known_to_be_all_valid(&self, byte: u8) -> bool { + match self { + // No metadata so unknown + MetadataEncodingType::None => false, + MetadataEncodingType::FullByte => byte == u8::MAX, + MetadataEncodingType::SingleBit => (byte & 1) != 0, + } + } +} + +impl From for MetadataEncodingType { + // Always inline to make sure that converting to MetadataEncodingType is hopefully + // done at compile time + #[inline(always)] + fn from(value: u8) -> Self { + match value { + 0 => Self::None, + 1 => Self::FullByte, + 2 => Self::SingleBit, + _ => unreachable!("invalid metadata type: {value}"), + } + } +} + +fn get_metadata_encoding_type(number_of_columns: usize) -> MetadataEncodingType { + // If we have less than 8 columns having a metadata bit is unnecessary as we can compare the value to u8::MAX + if number_of_columns <= 8 { + return MetadataEncodingType::None; + } + + // If we have a multiple of 8 columns, we will use an extra byte for metadata to avoid bit ops + if number_of_columns % 8 == 0 { + return MetadataEncodingType::FullByte; + } + + MetadataEncodingType::SingleBit +} + +#[inline(always)] +pub(crate) fn get_number_of_bytes_for_nulls(number_of_columns: usize) -> usize { + get_number_of_bytes_for_nulls_from_metadata( + get_metadata_encoding_type(number_of_columns), + number_of_columns, + ) +} + +#[inline(always)] +fn get_number_of_bytes_for_nulls_from_metadata( + metadata: MetadataEncodingType, + number_of_columns: usize, +) -> usize { + if number_of_columns == 1 { + return 1; + } + match metadata { + MetadataEncodingType::None => bit_util::ceil(number_of_columns, 8), + MetadataEncodingType::FullByte => 1 + bit_util::ceil(number_of_columns, 8), + MetadataEncodingType::SingleBit => bit_util::ceil(1 + number_of_columns, 8), + } +} + +/// Get bytes to use when all columns are valid +#[inline] +fn get_all_valid_bytes(number_of_columns: usize) -> Vec { + let metadata_type = get_metadata_encoding_type(number_of_columns); + + let number_of_bytes = + get_number_of_bytes_for_nulls_from_metadata(metadata_type, number_of_columns); + + // Unused bit are set as well for simplicity, there is no benefit in setting them to 0 + vec![u8::MAX; number_of_bytes] +} + +fn encode_nulls_to_slice( + mut output: &mut [u8], + merge_iters: &mut [MergeIter], +) { + let metadata_type = MetadataEncodingType::from(METADATA_TYPE); + + let mut are_all_valid = true; + + for (mut index, merge_iter) in merge_iters.iter_mut().enumerate() { + if metadata_type == MetadataEncodingType::FullByte { + // Skip the initial byte + index += 1; + } + + let byte = unsafe { merge_iter.next().unwrap_unchecked() } ; + // Unused bytes are set to u8::MAX as well + are_all_valid = are_all_valid && byte == u8::MAX; + output[index] = byte; + } + + match metadata_type { + MetadataEncodingType::None => {} + MetadataEncodingType::FullByte => { + // as we have the metadata bit + output[0] = if are_all_valid { u8::MAX } else { 0 }; + } + MetadataEncodingType::SingleBit => { + if are_all_valid { + output[0] |= 1; + } else { + output[0] &= !1; + } + } + } +} + +struct MergeIter<'a> { + inner: [Option, Once>>; 8], + scratch: [u8; 8], + current: [u64; 8], + bit_index: usize, + number_of_bits_remaining: usize, +} + +impl<'a> MergeIter<'a> { + fn new(nulls: &'a [Option<&'a NullBuffer>], len: usize) -> Self { + Self::new_with_offset_all_valid(nulls, len, 0) + } + + + /// Having offset and not getting a vector to make it simpler with the lifetimes + fn new_with_offset_all_valid(nulls: &'a [Option<&'a NullBuffer>], len: usize, offset: usize) -> Self { + assert!( + nulls.len() + offset <= 8, + "MergeIter only supports up to 8 null buffers" + ); + assert_ne!(nulls.len(), 0, "Must have columns nulls to encode"); + assert_ne!(len, 0, "Must have columns with data to encode"); + assert!( + nulls.iter().all(|n| n.is_none_or(|n| n.len() == len)), + "All null buffers must have the same length as the data" + ); + + let normalized_iterators = nulls + .iter() + .map(|n| match n { + None => None, + Some(null_buffer) => Some(null_buffer.inner().bit_chunks()), + }) + .map(|n| { + n.map(|bit_chunks| { + bit_chunks + .iter() + .chain(std::iter::once(bit_chunks.remainder_bits())) + }) + }) + .collect::>(); + + let mut inner = [const { None }; 8]; + for (i, it) in normalized_iterators.into_iter().enumerate() { + inner[i + offset] = it; + } + + let mut current = { + let mut current = [0; 8]; + inner + .iter_mut() + .zip(current.iter_mut()) + .for_each(|(inner, current)| { + *current = match inner { + None => u64::MAX, + Some(it) => { + // We already asserted that length cannot be 0 + it.next().unwrap() + } + } + }); + + current + }; + + MergeIter { + inner, + current, + bit_index: 0, + number_of_bits_remaining: len, + scratch: [0; 8], + } + } + + fn advance_to_next_iter(&mut self) { + assert_ne!( + self.number_of_bits_remaining, 0, + "Should have at least one u64 remaining" + ); + + self.inner + .iter_mut() + .zip(self.current.iter_mut()) + .for_each(|(inner, current)| { + match inner { + None => { + // We don't modify current for None iterators, so it should already match u64::MAX + assert_eq!(current, &u64::MAX); + } + Some(inner) => { + *current = unsafe { inner.next().unwrap_unchecked() }; + } + } + }); + + // Reset bit index to start over + self.bit_index = 0; + } +} + +impl<'a> Iterator for MergeIter<'a> { + type Item = u8; + + fn next(&mut self) -> Option { + if self.number_of_bits_remaining == 0 { + return None; + } + + if self.bit_index > 63 { + self.advance_to_next_iter(); + } + + self.number_of_bits_remaining -= 1; + + let item = fetch_and_shift(self.current, self.bit_index, &mut self.scratch); + + self.bit_index += 1; + + Some(item) + } + + fn size_hint(&self) -> (usize, Option) { + ( + self.number_of_bits_remaining, + Some(self.number_of_bits_remaining), + ) + } +} + +impl ExactSizeIterator for MergeIter<'_> { + fn len(&self) -> usize { + self.number_of_bits_remaining + } +} + +/// Decode single row nulls +fn decode_nulls_from_slice(bitpacked: &[u8], length: usize) -> Vec { + let number_of_bytes = bit_util::ceil(length, 8); + let mut result = vec![false; length]; + + let mut index = 0; + + for byte_index in 0..(number_of_bytes - 1) { + let byte = bitpacked[byte_index]; + for bit_index in 0..8 { + let overall_index = byte_index * 8 + bit_index; + if overall_index >= length { + break; + } + let is_valid = (byte & (1 << bit_index)) != 0; + result[index] = is_valid; + index += 1; + } + } + + for bit_index in 0..(length % 8) { + let byte = bitpacked[number_of_bytes - 1]; + let is_valid = (byte & (1 << bit_index)) != 0; + result[index] = is_valid; + index += 1; + } + + result +} + +// Naive implementation of encoding nulls +pub(crate) fn encode_nulls_naive( + data: &mut [u8], + offsets: &mut [usize], + mut nulls: Vec>, + number_of_rows: usize, +) { + let number_of_columns = nulls.len(); + + // If nothing to encode + if number_of_columns == 0 { + return; + } + + assert!( + nulls.iter().all(|n| n.is_none_or(|n| n.len() == number_of_rows)), + "All null buffers must have the same length as the data" + ); + + // Replace all Null buffers with no nulls with None for normalization + nulls.iter_mut().for_each(|n| { + if n.is_some_and(|n| n.null_count() == 0) { + *n = None; + } + }); + + // Fast path, if all valid + if nulls.iter().all(|n| n.is_none()) { + encode_all_valid(data, offsets, number_of_columns); + return; + } + + if number_of_columns == 1 { + let nulls = nulls.into_iter().next().unwrap(); + + // Unwrap as we know there are nulls as we checked above + let nulls = nulls.unwrap(); + encode_all_as_single_byte(data, offsets, nulls); + + return; + } + + let mut merge_iters: Vec = vec![]; + + match get_metadata_encoding_type(number_of_columns) { + MetadataEncodingType::None => { + { + let mut left_nulls = nulls.as_mut_slice(); + while !left_nulls.is_empty() { + let (current_chunk, next_slice) = + left_nulls.split_at_mut(std::cmp::min(8, left_nulls.len())); + let merge_iter = MergeIter::new(current_chunk, number_of_rows); + merge_iters.push(merge_iter); + left_nulls = next_slice; + } + } + + encode_slice_with_metadata_const::<{ MetadataEncodingType::None as u8 }>( + data, + offsets, + merge_iters, + number_of_columns, + ); + } + MetadataEncodingType::FullByte => { + { + let mut left_nulls = nulls.as_mut_slice(); + while !left_nulls.is_empty() { + let (current_chunk, next_slice) = + left_nulls.split_at_mut(std::cmp::min(8, left_nulls.len())); + let merge_iter = MergeIter::new(current_chunk, number_of_rows); + merge_iters.push(merge_iter); + left_nulls = next_slice; + } + } + + encode_slice_with_metadata_const::<{ MetadataEncodingType::FullByte as u8 }>( + data, + offsets, + merge_iters, + number_of_columns, + ); + } + MetadataEncodingType::SingleBit => { + { + let take = std::cmp::min(7, nulls.len()); + let mut left_nulls = nulls.as_mut_slice(); + let (current_chunk, next_slice) = left_nulls.split_at_mut(take); + left_nulls = next_slice; + + // First None to reserve space for the metadata bit + let mut first_byte = vec![None]; + first_byte.extend(current_chunk.iter().copied()); + let merge_iter = MergeIter::new(current_chunk, number_of_rows); + merge_iters.push(merge_iter); + + while !left_nulls.is_empty() { + let (current_chunk, next_slice) = + left_nulls.split_at_mut(std::cmp::min(8, left_nulls.len())); + let merge_iter = MergeIter::new(current_chunk, number_of_rows); + merge_iters.push(merge_iter); + left_nulls = next_slice; + } + } + + encode_slice_with_metadata_const::<{ MetadataEncodingType::SingleBit as u8 }>( + data, + offsets, + merge_iters, + number_of_columns, + ); + } + } +} + +fn encode_slice_with_metadata_const( + data: &mut [u8], + offsets: &mut [usize], + mut merge_iters: Vec, + number_of_columns: usize, +) { + let number_of_bytes = { + let metadata_type = MetadataEncodingType::from(METADATA_TYPE); + assert_eq!( + metadata_type, + get_metadata_encoding_type(number_of_columns), + "metadata type mismatch" + ); + + get_number_of_bytes_for_nulls_from_metadata(metadata_type, number_of_columns) + }; + for offset in offsets.iter_mut().skip(1) { + encode_nulls_to_slice::(&mut data[*offset..], merge_iters.as_mut_slice()); + *offset += number_of_bytes; + } +} + +// Optimized implementation when all columns don't have nulls in them +fn encode_all_valid(data: &mut [u8], offsets: &mut [usize], null_bits: usize) { + assert_ne!(null_bits, 0, "Number of null bits must be greater than 0"); + + if null_bits == 1 { + for offset in offsets.iter_mut().skip(1) { + data[*offset] = true as u8; + *offset += 1; + } + } else { + let bytes_to_copy = get_all_valid_bytes(null_bits); + let number_of_bytes = bytes_to_copy.len(); + + for offset in offsets.iter_mut().skip(1) { + data[*offset..*offset + number_of_bytes].copy_from_slice(&bytes_to_copy); + *offset += number_of_bytes; + } + } +} + +fn encode_all_as_single_byte(data: &mut [u8], offsets: &mut [usize], nulls: &NullBuffer) { + for (offset, is_valid) in offsets.iter_mut().skip(1).zip(nulls.iter()) { + data[*offset] = is_valid as u8; + *offset += 1; + } +} + +/// Decodes packed nulls from rows +/// +/// TODO - maybe have a function to only do for 8 nulls and then we avoid slicing and maybe we could shift each bit by the position of the column and then shift again to the +pub(crate) fn decode_packed_nulls_in_rows( + rows: &mut [&[u8]], + number_of_columns: usize, +) -> Vec> { + if number_of_columns == 0 { + return vec![]; + } + + // If only 1 column than we use a single byte + if number_of_columns == 1 { + let mut null_count = 0; + let buffer = MutableBuffer::collect_bool(rows.len(), |idx| { + let valid = rows[idx][0] == 1; + null_count += !valid as usize; + + // Advance the row slice + let row = rows[idx]; + rows[idx] = &row[1..]; + valid + }) + .into(); + + if null_count == 0 { + return vec![None]; + } + + let boolean_buffer= BooleanBuffer::new(buffer, 0, rows.len()); + + // SAFETY: we know that the buffer is valid as we just created it + let null_buffer = unsafe {NullBuffer::new_unchecked( + boolean_buffer, + null_count + )}; + + return vec![Some(null_buffer)]; + } + + match get_metadata_encoding_type(number_of_columns) { + MetadataEncodingType::None => decode_packed_nulls_in_rows_with_metadata_type::< + { MetadataEncodingType::None as u8 }, + >(rows, number_of_columns), + MetadataEncodingType::FullByte => decode_packed_nulls_in_rows_with_metadata_type::< + { MetadataEncodingType::FullByte as u8 }, + >(rows, number_of_columns), + MetadataEncodingType::SingleBit => decode_packed_nulls_in_rows_with_metadata_type::< + { MetadataEncodingType::SingleBit as u8 }, + >(rows, number_of_columns), + } +} + +/// Decodes packed nulls from rows +/// +/// TODO - maybe have a function to only do for 8 nulls and then we avoid slicing and maybe we could shift each bit by the position of the column and then shift again to the +pub fn decode_packed_nulls_in_rows_with_metadata_type( + rows: &mut [&[u8]], + number_of_columns: usize, +) -> Vec> { + let metadata_type = MetadataEncodingType::from(METADATA_TYPE); + assert_eq!( + metadata_type, + get_metadata_encoding_type(number_of_columns), + "metadata type mismatch" + ); + + let number_of_rows = rows.len(); + let mut builders = (0..number_of_columns).map(|_| NullBufferBuilder::new(number_of_rows)).collect::>(); + let number_of_bytes = + get_number_of_bytes_for_nulls_from_metadata(metadata_type, number_of_columns); + + let unset_metadata_bit = if metadata_type == MetadataEncodingType::SingleBit { + // All bits are set except the first one + 0b1111_1110 + } else { + u8::MAX + }; + + for row in rows.iter_mut() { + let mut null_bytes = split_off(row, number_of_bytes); + let known_to_be_all_valid = metadata_type.is_known_to_be_all_valid(null_bytes[0]); + + if known_to_be_all_valid { + builders.iter_mut().for_each(|b| b.append(true)); + continue; + } + + let mut builders_slice = builders.as_mut_slice(); + + match metadata_type { + MetadataEncodingType::None => {} + MetadataEncodingType::FullByte => { + // Skip the first byte + null_bytes = &null_bytes[1..]; + } + MetadataEncodingType::SingleBit => { + // Adding this assertion as the implementation assume that + assert_ne!( + null_bytes.len(), + 1, + "Must have more bytes when using single bit metadata" + ); + + let byte_builders; + (byte_builders, builders_slice) = builders_slice.split_at_mut(7); + + decode_to_builder::< + // Has metadata bit as we are in the first byte + true, + >( + null_bytes[0], + // Because we already asserted that there are null bits, we need to check with the metadata bit unset + unset_metadata_bit, + byte_builders, + ); + + null_bytes = &null_bytes[1..]; + } + } + + for &byte in &null_bytes[..null_bytes.len() - 1] { + let byte_builders; + (byte_builders, builders_slice) = builders_slice.split_at_mut(8); + // No metadata bit in this byte as we already handled that + decode_to_builder::(byte, u8::MAX, byte_builders); + } + + // No metadata bit in this byte as we already handled that + decode_to_builder::(null_bytes[null_bytes.len() - 1], u8::MAX, builders_slice); + } + + // Finalize null buffers + builders + .into_iter() + .map(|mut builder| builder.finish()) + .collect() +} + +fn decode_to_builder( + null_byte: u8, + all_valid_byte: u8, + byte_builders: &mut [NullBufferBuilder], +) { + // assert to verify that we won't shift by too many bits + assert!(byte_builders.len() <= if HAS_METADATA_BIT { 7 } else { 8 }); + + // The all valid should account the metadata bit if has. + // + // No all null condition as it is not that column that all the columns are nulls, I think + // so avoid adding a condition in the hot path + if null_byte == all_valid_byte { + // All valid + byte_builders.iter_mut().for_each(|b| b.append(true)); + } else { + for (mut bit_index, builder) in byte_builders.iter_mut().enumerate() { + if HAS_METADATA_BIT { + bit_index += 1; + } + let is_valid = (null_byte & (1 << bit_index)) != 0; + builder.append(is_valid); + } + } +} + +/// Create a bit packed from 8 u64 items at bit index +/// +/// This is carefully done to be vectorized +pub fn fetch_and_shift(bitpacked: [u64; 8], bit_index: usize, scratch: &mut [u8; 8]) -> u8 { + // Each bit should be shift by bit_index + + const SHIFT: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + + // single value logic: + // shift bitpacked by bit_index and mask with 1 + // then shift by the corresponding SHIFT value + // to make it in the correct position + // and then OR with the rest of the items. + + // Not doing manual loop as it will not be vectorized + bitpacked + .iter() + .map(|&item| ((item >> bit_index) & 1) as u8) + .zip(SHIFT) + .map(|(item, shift)| item << shift) + // Collecting as the fold break the vectorization + .zip(scratch.iter_mut()) + .for_each(|(item, scratch)| *scratch = item); + + scratch.iter().fold(0, |acc, item| acc | item) +} diff --git a/arrow-row/src/unordered_row/run.rs b/arrow-row/src/unordered_row/run.rs new file mode 100644 index 000000000000..fcdcef02a593 --- /dev/null +++ b/arrow-row/src/unordered_row/run.rs @@ -0,0 +1,569 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::{UnorderedRowConverter, UnorderedRows, variable}; +use arrow_array::types::RunEndIndexType; +use arrow_array::{PrimitiveArray, RunArray}; +use arrow_buffer::{ArrowNativeType, ScalarBuffer}; +use arrow_schema::{ArrowError}; + +/// Computes the lengths of each row for a RunEndEncodedArray +pub fn compute_lengths( + lengths: &mut [usize], + rows: &UnorderedRows, + array: &RunArray, +) { + let run_ends = array.run_ends().values(); + let mut logical_start = 0; + + // Iterate over each run and apply the same length to all logical positions in the run + for (physical_idx, &run_end) in run_ends.iter().enumerate() { + let logical_end = run_end.as_usize(); + let row = rows.row(physical_idx); + let encoded_len = variable::encoded_len(Some(row.data)); + + // Add the same length for all logical positions in this run + for length in &mut lengths[logical_start..logical_end] { + *length += encoded_len; + } + + logical_start = logical_end; + } +} + +/// Encodes the provided `RunEndEncodedArray` to `out` with the provided `SortOptions` +/// +/// `rows` should contain the encoded values +pub fn encode( + data: &mut [u8], + offsets: &mut [usize], + rows: &UnorderedRows, + array: &RunArray, +) { + let run_ends = array.run_ends(); + + let mut logical_idx = 0; + let mut offset_idx = 1; // Skip first offset + + // Iterate over each run + for physical_idx in 0..run_ends.values().len() { + let run_end = run_ends.values()[physical_idx].as_usize(); + + // Process all elements in this run + while logical_idx < run_end && offset_idx < offsets.len() { + let offset = &mut offsets[offset_idx]; + let out = &mut data[*offset..]; + + // Use variable-length encoding to make the data self-describing + let row = rows.row(physical_idx); + let bytes_written = variable::encode_one(out, Some(row.data)); + *offset += bytes_written; + + logical_idx += 1; + offset_idx += 1; + } + + // Break if we've processed all offsets + if offset_idx >= offsets.len() { + break; + } + } +} + +/// Decodes a RunEndEncodedArray from `rows` with the provided `options` +/// +/// # Safety +/// +/// `rows` must contain valid data for the provided `converter` +pub unsafe fn decode( + converter: &UnorderedRowConverter, + rows: &mut [&[u8]], + validate_utf8: bool, +) -> Result, ArrowError> { + if rows.is_empty() { + let values = unsafe { converter.convert_raw(&mut [], validate_utf8) }?; + let run_ends_array = PrimitiveArray::::try_new(ScalarBuffer::from(vec![]), None)?; + return RunArray::::try_new(&run_ends_array, &values[0]); + } + + // Decode each row's REE data and collect the decoded values + let mut decoded_values = Vec::new(); + let mut run_ends = Vec::new(); + let mut unique_row_indices = Vec::new(); + + // Process each row to extract its REE data (following decode_binary pattern) + let mut decoded_data = Vec::new(); + for (idx, row) in rows.iter_mut().enumerate() { + decoded_data.clear(); + // Extract the decoded value data from this row + let consumed = variable::decode_blocks(row, |block| { + decoded_data.extend_from_slice(block); + }); + + // Update the row to point past the consumed REE data + *row = &row[consumed..]; + + // Check if this decoded value is the same as the previous one to identify runs + let is_new_run = + idx == 0 || decoded_data != decoded_values[*unique_row_indices.last().unwrap()]; + + if is_new_run { + // This is a new unique value - end the previous run if any + if idx > 0 { + run_ends.push(R::Native::usize_as(idx)); + } + unique_row_indices.push(decoded_values.len()); + let capacity = decoded_data.capacity(); + decoded_values.push(std::mem::replace( + &mut decoded_data, + Vec::with_capacity(capacity), + )); + } + } + // Add the final run end + run_ends.push(R::Native::usize_as(rows.len())); + + // Convert the unique decoded values using the row converter + let mut unique_rows: Vec<&[u8]> = decoded_values.iter().map(|v| v.as_slice()).collect(); + let values = if unique_rows.is_empty() { + unsafe { converter.convert_raw(&mut [], validate_utf8) }? + } else { + unsafe { converter.convert_raw(&mut unique_rows, validate_utf8) }? + }; + + // Create run ends array + let run_ends_array = PrimitiveArray::::try_new(ScalarBuffer::from(run_ends), None)?; + + // Create the RunEndEncodedArray + RunArray::::try_new(&run_ends_array, &values[0]) +} + +#[cfg(test)] +mod tests { + use super::{UnorderedRowConverter}; + use arrow_array::cast::AsArray; + use arrow_array::types::{Int16Type, Int32Type, Int64Type, RunEndIndexType}; + use arrow_array::{Array, Int64Array, PrimitiveArray, RunArray, StringArray}; + use arrow_schema::{DataType, Field, SortOptions}; + use std::sync::Arc; + + fn assert_roundtrip( + array: &RunArray, + run_end_type: DataType, + values_type: DataType, + ) { + let sort_field = Field::new("col_1", DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", run_end_type, false)), + Arc::new(arrow_schema::Field::new("values", values_type, true)), + ), true); + + let converter = UnorderedRowConverter::new(vec![sort_field].into()).unwrap(); + + let rows = converter + .convert_columns(&[Arc::new(array.clone())]) + .unwrap(); + + let arrays = converter.convert_rows(&rows).unwrap(); + let result = arrays[0].as_any().downcast_ref::>().unwrap(); + + assert_eq!(array, result); + } + + #[test] + fn test_run_end_encoded_supports_datatype() { + // Test that the UnorderedRowConverter correctly supports run-end encoded arrays + assert!(UnorderedRowConverter::supports_datatype(&DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", DataType::Int32, false)), + Arc::new(arrow_schema::Field::new("values", DataType::Utf8, true)), + ))); + } + + #[test] + fn test_run_end_encoded_round_trip_int16_int64s() { + // Test round-trip correctness for RunEndEncodedArray with Int64 values making sure it + // doesn't just work with eg. strings (which are all the other tests). + + let values = Int64Array::from(vec![100, 200, 100, 300]); + let run_ends = vec![2, 3, 5, 6]; + let array: RunArray = + RunArray::try_new(&PrimitiveArray::from(run_ends), &values).unwrap(); + + assert_roundtrip(&array, DataType::Int16, DataType::Int64); + } + + #[test] + fn test_run_end_encoded_round_trip_int32_int64s() { + // Test round-trip correctness for RunEndEncodedArray with Int64 values making sure it + // doesn't just work with eg. strings (which are all the other tests). + + let values = Int64Array::from(vec![100, 200, 100, 300]); + let run_ends = vec![2, 3, 5, 6]; + let array: RunArray = + RunArray::try_new(&PrimitiveArray::from(run_ends), &values).unwrap(); + + assert_roundtrip(&array, DataType::Int32, DataType::Int64); + } + + #[test] + fn test_run_end_encoded_round_trip_int64_int64s() { + // Test round-trip correctness for RunEndEncodedArray with Int64 values making sure it + // doesn't just work with eg. strings (which are all the other tests). + + let values = Int64Array::from(vec![100, 200, 100, 300]); + let run_ends = vec![2, 3, 5, 6]; + let array: RunArray = + RunArray::try_new(&PrimitiveArray::from(run_ends), &values).unwrap(); + + assert_roundtrip(&array, DataType::Int64, DataType::Int64); + } + + #[test] + fn test_run_end_encoded_round_trip_strings() { + // Test round-trip correctness for RunEndEncodedArray with strings + + let array: RunArray = vec!["b", "b", "a"].into_iter().collect(); + + assert_roundtrip(&array, DataType::Int32, DataType::Utf8); + } + + #[test] + fn test_run_end_encoded_round_trip_strings_with_nulls() { + // Test round-trip correctness for RunEndEncodedArray with nulls + + let array: RunArray = vec![Some("b"), Some("b"), None, Some("a")] + .into_iter() + .collect(); + + assert_roundtrip(&array, DataType::Int32, DataType::Utf8); + } + + #[test] + fn test_run_end_encoded_ascending_descending_round_trip() { + // Test round-trip correctness for ascending vs descending sort options + + let values_asc = + arrow_array::StringArray::from(vec![Some("apple"), Some("banana"), Some("cherry")]); + let run_ends_asc = vec![2, 4, 6]; + let run_array_asc: RunArray = RunArray::try_new( + &arrow_array::PrimitiveArray::from(run_ends_asc), + &values_asc, + ) + .unwrap(); + + // Test ascending order + assert_roundtrip( + &run_array_asc, + DataType::Int32, + DataType::Utf8, + ); + + // Test descending order + assert_roundtrip( + &run_array_asc, + DataType::Int32, + DataType::Utf8, + ); + } + + #[test] + fn test_run_end_encoded_sort_configurations_basic() { + // Test that different sort configurations work and can round-trip successfully + + let test_array: RunArray = vec!["test"].into_iter().collect(); + + // Test ascending order + assert_roundtrip( + &test_array, + DataType::Int32, + DataType::Utf8, + ); + + // Test descending order + assert_roundtrip( + &test_array, + DataType::Int32, + DataType::Utf8, + ); + } + + #[test] + fn test_run_end_encoded_nulls_first_last_configurations() { + // Test that nulls_first vs nulls_last configurations work + + let simple_array: RunArray = vec!["simple"].into_iter().collect(); + + let converter_nulls_first = UnorderedRowConverter::new(vec![Field::new("col_1", + DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", DataType::Int32, false)), + Arc::new(arrow_schema::Field::new("values", DataType::Utf8, true)), + ), + true, + )].into()) + .unwrap(); + + let converter_nulls_last = UnorderedRowConverter::new(vec![Field::new("col_1", + DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", DataType::Int32, false)), + Arc::new(arrow_schema::Field::new("values", DataType::Utf8, true)), + ), + true)].into()) + .unwrap(); + + // Test that both configurations can handle simple arrays + let rows_nulls_first = converter_nulls_first + .convert_columns(&[Arc::new(simple_array.clone())]) + .unwrap(); + let arrays_nulls_first = converter_nulls_first + .convert_rows(&rows_nulls_first) + .unwrap(); + let result_nulls_first = arrays_nulls_first[0] + .as_any() + .downcast_ref::>() + .unwrap(); + + let rows_nulls_last = converter_nulls_last + .convert_columns(&[Arc::new(simple_array.clone())]) + .unwrap(); + let arrays_nulls_last = converter_nulls_last.convert_rows(&rows_nulls_last).unwrap(); + let result_nulls_last = arrays_nulls_last[0] + .as_any() + .downcast_ref::>() + .unwrap(); + + // Both should successfully convert the simple array + assert_eq!(simple_array.len(), result_nulls_first.len()); + assert_eq!(simple_array.len(), result_nulls_last.len()); + } + + #[test] + fn test_run_end_encoded_row_consumption() { + // This test verifies that ALL rows are properly consumed during decoding, + // not just the unique values. We test this by ensuring multi-column conversion + // works correctly - if rows aren't consumed properly, the second column would fail. + + // Create a REE array with multiple runs + let array: RunArray = vec!["a", "a", "b", "b", "b", "c"].into_iter().collect(); + let string_array = StringArray::from(vec!["x", "y", "z", "w", "u", "v"]); + + let multi_converter = UnorderedRowConverter::new(vec![ + Field::new("col_1", DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", DataType::Int32, false)), + Arc::new(arrow_schema::Field::new("values", DataType::Utf8, true)), + ), true), + Field::new("col_2", DataType::Utf8, true), + ].into()) + .unwrap(); + + let multi_rows = multi_converter + .convert_columns(&[Arc::new(array.clone()), Arc::new(string_array.clone())]) + .unwrap(); + + // Convert back - this will test that all rows are consumed properly + let arrays = multi_converter.convert_rows(&multi_rows).unwrap(); + + // Verify both columns round-trip correctly + let result_ree = arrays[0] + .as_any() + .downcast_ref::>() + .unwrap(); + + let result_string = arrays[1].as_any().downcast_ref::().unwrap(); + + // This should pass - both arrays should be identical to originals + assert_eq!(result_ree.values().as_ref(), array.values().as_ref()); + assert_eq!(result_ree.run_ends().values(), array.run_ends().values()); + assert_eq!(*result_string, string_array); + } + + #[test] + fn test_run_end_encoded_sorting_behavior() { + // Test that the binary row encoding actually produces the correct sort order + + // Create REE arrays with different values to test sorting + let array1: RunArray = vec!["apple", "apple"].into_iter().collect(); + let array2: RunArray = vec!["banana", "banana"].into_iter().collect(); + let array3: RunArray = vec!["cherry", "cherry"].into_iter().collect(); + + // Test ascending sort + let converter_asc = UnorderedRowConverter::new(vec![Field::new("col_1", DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", DataType::Int32, false)), + Arc::new(arrow_schema::Field::new("values", DataType::Utf8, true)), + ), true)].into()) + .unwrap(); + + let rows1_asc = converter_asc + .convert_columns(&[Arc::new(array1.clone())]) + .unwrap(); + let rows2_asc = converter_asc + .convert_columns(&[Arc::new(array2.clone())]) + .unwrap(); + let rows3_asc = converter_asc + .convert_columns(&[Arc::new(array3.clone())]) + .unwrap(); + + + // Test descending sort + let converter_desc = UnorderedRowConverter::new(vec![Field::new( + "col_1", + DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", DataType::Int32, false)), + Arc::new(arrow_schema::Field::new("values", DataType::Utf8, true)), + ), + true, + )].into()) + .unwrap(); + + let rows1_desc = converter_desc + .convert_columns(&[Arc::new(array1.clone())]) + .unwrap(); + let rows2_desc = converter_desc + .convert_columns(&[Arc::new(array2.clone())]) + .unwrap(); + let rows3_desc = converter_desc + .convert_columns(&[Arc::new(array3.clone())]) + .unwrap(); + + assert_eq!(rows1_asc.iter().collect::>(), rows1_desc.iter().collect::>()); + assert_eq!(rows2_asc.iter().collect::>(), rows2_desc.iter().collect::>()); + assert_eq!(rows3_asc.iter().collect::>(), rows3_desc.iter().collect::>()); + } + + #[test] + fn test_run_end_encoded_null_sorting() { + // Test null handling in sort order + + let array_with_nulls: RunArray = vec![None, None].into_iter().collect(); + let array_with_values: RunArray = vec!["apple", "apple"].into_iter().collect(); + + // Test nulls_first = true + let converter_nulls_first = UnorderedRowConverter::new(vec![Field::new("col_1", + DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", DataType::Int32, false)), + Arc::new(arrow_schema::Field::new("values", DataType::Utf8, true)), + ), + true + )].into()) + .unwrap(); + + let rows_nulls = converter_nulls_first + .convert_columns(&[Arc::new(array_with_nulls.clone())]) + .unwrap(); + let rows_values = converter_nulls_first + .convert_columns(&[Arc::new(array_with_values.clone())]) + .unwrap(); + + // nulls should come before values when nulls_first = true + // assert!( + // rows_nulls.row(0) < rows_values.row(0), + // "nulls should come before values when nulls_first=true" + // ); + + // Test nulls_first = false + let converter_nulls_last = UnorderedRowConverter::new(vec![Field::new("col_1", + DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", DataType::Int32, false)), + Arc::new(arrow_schema::Field::new("values", DataType::Utf8, true)), + ), + true + )].into()) + .unwrap(); + + let rows_nulls_last = converter_nulls_last + .convert_columns(&[Arc::new(array_with_nulls.clone())]) + .unwrap(); + let rows_values_last = converter_nulls_last + .convert_columns(&[Arc::new(array_with_values.clone())]) + .unwrap(); + + // values should come before nulls when nulls_first = false + // assert!( + // rows_values_last.row(0) < rows_nulls_last.row(0), + // "values should come before nulls when nulls_first=false" + // ); + } + + #[test] + fn test_run_end_encoded_mixed_sorting() { + // Test sorting with mixed values and nulls to ensure complex scenarios work + + let array1: RunArray = vec![Some("apple"), None].into_iter().collect(); + let array2: RunArray = vec![None, Some("banana")].into_iter().collect(); + let array3: RunArray = + vec![Some("cherry"), Some("cherry")].into_iter().collect(); + + let converter = UnorderedRowConverter::new(vec![Field::new("col_1", + DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", DataType::Int32, false)), + Arc::new(arrow_schema::Field::new("values", DataType::Utf8, true)), + ), + true + // arrow_schema::SortOptions { + // descending: false, + // nulls_first: true, + // }, + )].into()) + .unwrap(); + + let rows1 = converter.convert_columns(&[Arc::new(array1)]).unwrap(); + let rows2 = converter.convert_columns(&[Arc::new(array2)]).unwrap(); + let rows3 = converter.convert_columns(&[Arc::new(array3)]).unwrap(); + + // With nulls_first=true, ascending: + // Row 0: array1[0]="apple", array2[0]=null, array3[0]="cherry" -> null < apple < cherry + // Row 1: array1[1]=null, array2[1]="banana", array3[1]="cherry" -> null < banana < cherry + + // Compare first rows: null < apple < cherry + // assert!(rows2.row(0) < rows1.row(0), "null should come before apple"); + // assert!( + // rows1.row(0) < rows3.row(0), + // "apple should come before cherry" + // ); + // + // // Compare second rows: null < banana < cherry + // assert!( + // rows1.row(1) < rows2.row(1), + // "null should come before banana" + // ); + // assert!( + // rows2.row(1) < rows3.row(1), + // "banana should come before cherry" + // ); + } + + #[test] + fn test_run_end_encoded_empty() { + // Test converting / decoding an empty RunEndEncodedArray + let values: Vec<&str> = vec![]; + let array: RunArray = values.into_iter().collect(); + + let converter = UnorderedRowConverter::new(vec![Field::new("col_1", DataType::RunEndEncoded( + Arc::new(arrow_schema::Field::new("run_ends", DataType::Int32, false)), + Arc::new(arrow_schema::Field::new("values", DataType::Utf8, true)), + ), true)].into()) + .unwrap(); + + let rows = converter.convert_columns(&[Arc::new(array)]).unwrap(); + assert_eq!(rows.num_rows(), 0); + + // Likewise converting empty rows should yield an empty RunEndEncodedArray + let arrays = converter.convert_rows(&rows).unwrap(); + assert_eq!(arrays.len(), 1); + // Verify both columns round-trip correctly + let result_ree = arrays[0].as_run::(); + assert_eq!(result_ree.len(), 0); + } +} diff --git a/arrow-row/src/unordered_row/variable.rs b/arrow-row/src/unordered_row/variable.rs new file mode 100644 index 000000000000..dfa3bc988571 --- /dev/null +++ b/arrow-row/src/unordered_row/variable.rs @@ -0,0 +1,746 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::null_sentinel; +use arrow_array::builder::BufferBuilder; +use arrow_array::types::ByteArrayType; +use arrow_array::*; +use arrow_buffer::bit_util::ceil; +use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer}; +use arrow_data::{ArrayDataBuilder, MAX_INLINE_VIEW_LEN}; +use arrow_schema::DataType; +use builder::make_view; + + +/// The block size of the variable length encoding +pub const BLOCK_SIZE: usize = 32; + +/// The first block is split into `MINI_BLOCK_COUNT` mini-blocks +/// +/// This helps to reduce the space amplification for small strings +pub const MINI_BLOCK_COUNT: usize = 4; + +/// The mini block size +pub const MINI_BLOCK_SIZE: usize = BLOCK_SIZE / MINI_BLOCK_COUNT; + +/// The continuation token +pub const BLOCK_CONTINUATION: u8 = 0xFF; + +pub const EMPTY_SENTINEL: u8 = 0b00000001; + +/// Indicates a non-empty string +pub const NON_EMPTY_SENTINEL: u8 = 0b00000010; +// pub const NULL_SENTINEL: u8 = null_sentinel(); + +// u8 must be smaller value than u16 in the bit representation so we can sort by them +pub const LENGTH_TYPE_U8: u8 = 0b00000100; +pub const LENGTH_TYPE_U16: u8 = 0b00001000; +pub const LENGTH_TYPE_U32: u8 = 0b00010000; +pub const LENGTH_TYPE_U64: u8 = 0b00100000; + +/// Returns the length of the encoded representation of a byte array, including the null byte +#[inline] +pub fn encoded_len(a: Option<&[u8]>) -> usize { + padded_length(a.map(|x| x.len())) +} + +/// How many bytes are needed to encode the length WITHOUT encoding the ctrl byte (which includes the length type) +#[inline] +pub(crate) fn get_number_of_bytes_needed_to_encode(len: usize) -> usize { + (usize::BITS as usize - len.leading_zeros() as usize + 7) / 8 +} + +/// How many bytes are needed to encode the length +#[inline] +pub(crate) fn length_of_encoding_length(len: usize) -> usize { + // + 1 for the ctrl byte + 1 + get_number_of_bytes_needed_to_encode(len) +} + +/// Returns the padded length of the encoded length of the given length +#[inline] +pub fn padded_length(a: Option) -> usize { + let value_len = match a { + // None should be encoded as empty + None => 0, + Some(a) if a == 0 => 0, + Some(a) => get_number_of_bytes_needed_to_encode(a) + a, + }; + + value_len + // ctrl byte + + 1 +} + +/// Variable length values are encoded as +/// +/// - single `0_u8` if null +/// - single `1_u8` if empty array +/// - `2_u8` if not empty, followed by one or more blocks +/// +/// where a block is encoded as +/// +/// - [`BLOCK_SIZE`] bytes of string data, padded with 0s +/// - `0xFF_u8` if this is not the last block for this string +/// - otherwise the length of the block as a `u8` +pub fn encode<'a, I: Iterator>>( + data: &mut [u8], + offsets: &mut [usize], + i: I, +) { + for (offset, maybe_val) in offsets.iter_mut().skip(1).zip(i) { + *offset += encode_one(&mut data[*offset..], maybe_val); + } +} + +/// Calls [`encode`] with optimized iterator for generic byte arrays +pub(crate) fn encode_generic_byte_array( + data: &mut [u8], + offsets: &mut [usize], + input_array: &GenericByteArray, +) { + let input_offsets = input_array.value_offsets(); + let bytes = input_array.values().as_slice(); + + if let Some(null_buffer) = input_array.nulls().filter(|x| x.null_count() > 0) { + let input_iter = + input_offsets + .windows(2) + .zip(null_buffer.iter()) + .map(|(start_end, is_valid)| { + if is_valid { + let item_range = start_end[0].as_usize()..start_end[1].as_usize(); + // SAFETY: the offsets of the input are valid by construction + // so it is ok to use unsafe here + let item = unsafe { bytes.get_unchecked(item_range) }; + Some(item) + } else { + None + } + }); + + encode(data, offsets, input_iter); + } else { + // Skip null checks + let input_iter = input_offsets.windows(2).map(|start_end| { + let item_range = start_end[0].as_usize()..start_end[1].as_usize(); + // SAFETY: the offsets of the input are valid by construction + // so it is ok to use unsafe here + let item = unsafe { bytes.get_unchecked(item_range) }; + Some(item) + }); + + encode(data, offsets, input_iter); + } +} +// +// pub fn encode_null(out: &mut [u8]) -> usize { +// out[0] = null_sentinel(); +// 1 +// } + +#[inline] +pub fn encode_one(out: &mut [u8], val: Option<&[u8]>) -> usize { + match val { + None => encode_empty(out), + Some(val) => fast_encode_bytes(out, val), + } +} + +#[inline] +pub(crate) fn encode_len(out: &mut [u8], len: usize) -> usize { + let start_data_offset = { + match get_number_of_bytes_needed_to_encode(len) { + // It is more common to have short strings than empty strings than long strings + 1 => { + out[0] = NON_EMPTY_SENTINEL | LENGTH_TYPE_U8; + + // encode length + let start_data_offset = 1 + size_of::(); + out[1] = len as u8; + + start_data_offset + } + 0 => { + return encode_empty(out); + } + 2 => { + out[0] = NON_EMPTY_SENTINEL | LENGTH_TYPE_U16; + + // encode length + let start_data_offset = 1 + size_of::(); + unsafe { out.get_unchecked_mut(1..start_data_offset) } + .copy_from_slice(&(len as u16).to_be_bytes()); + + start_data_offset + } + 4 => { + out[0] = NON_EMPTY_SENTINEL | LENGTH_TYPE_U32; + + // encode length + let start_data_offset = 1 + size_of::(); + unsafe { out.get_unchecked_mut(1..start_data_offset) } + .copy_from_slice(&(len as u32).to_be_bytes()); + + start_data_offset + } + 8 => { + out[0] = NON_EMPTY_SENTINEL | LENGTH_TYPE_U64; + + // encode length + let start_data_offset = 1 + size_of::(); + unsafe { out.get_unchecked_mut(1..start_data_offset) } + .copy_from_slice(&(len as u64).to_be_bytes()); + + start_data_offset + } + bits_required => { + unreachable!("invalid length type {len}. numbr of bits required {bits_required}"); + } + } + }; + + start_data_offset +} + + +/// Encode all lengths using the same encoding size determined by `len_to_encode_by` +#[inline] +pub(crate) fn encode_lengths_with_prefix(out: &mut [u8], len_to_encode_by: usize, lengths: impl ExactSizeIterator) -> usize { + let start_data_offset = { + match get_number_of_bytes_needed_to_encode(len_to_encode_by) { + 0 => { + return encode_empty(out); + } + // It is more common to have short strings than empty strings than long strings + 1 => { + out[0] = NON_EMPTY_SENTINEL | LENGTH_TYPE_U8; + + let number_of_lengths = lengths.len(); + + lengths.enumerate().for_each(|(index, length)| { + out[index + 1] = length as u8; + }); + + // encode length + let offset = + // ctrl byte + 1 + + // the lengths themselves + size_of::() * number_of_lengths; + + offset + } + 2 => { + out[0] = NON_EMPTY_SENTINEL | LENGTH_TYPE_U16; + let encoded_len_size = size_of::(); + + let number_of_lengths = lengths.len(); + + let out_length_only = &mut out[1..]; + let out_length_only_sizes = out_length_only.chunks_exact_mut(encoded_len_size); + + lengths.zip(out_length_only_sizes).for_each(|(length, encode_dest)| { + encode_dest + .copy_from_slice(&(length as u16).to_be_bytes()); + }); + + // encode length + let offset = + // ctrl byte + 1 + + // the lengths themselves + encoded_len_size * number_of_lengths; + + offset + } + 4 => { + out[0] = NON_EMPTY_SENTINEL | LENGTH_TYPE_U32; + + let encoded_len_size = size_of::(); + + let number_of_lengths = lengths.len(); + + let out_length_only = &mut out[1..]; + let out_length_only_sizes = out_length_only.chunks_exact_mut(encoded_len_size); + + lengths.zip(out_length_only_sizes).for_each(|(length, encode_dest)| { + encode_dest + .copy_from_slice(&(length as u32).to_be_bytes()); + }); + + // encode length + let offset = + // ctrl byte + 1 + + // the lengths themselves + encoded_len_size * number_of_lengths; + + offset + } + 8 => { + out[0] = NON_EMPTY_SENTINEL | LENGTH_TYPE_U64; + + let encoded_len_size = size_of::(); + + let number_of_lengths = lengths.len(); + + let out_length_only = &mut out[1..]; + let out_length_only_sizes = out_length_only.chunks_exact_mut(encoded_len_size); + + lengths.zip(out_length_only_sizes).for_each(|(length, encode_dest)| { + encode_dest + .copy_from_slice(&(length as u64).to_be_bytes()); + }); + + // encode length + let offset = + // ctrl byte + 1 + + // the lengths themselves + encoded_len_size * number_of_lengths; + + offset + } + bits_required => { + unreachable!("invalid length type {len_to_encode_by}. numbr of bits required {bits_required}"); + } + } + }; + + start_data_offset +} + +#[inline] +pub(crate) fn get_ctrl_byte(len: usize) -> u8 { + let number_of_bytes = get_number_of_bytes_needed_to_encode(len); + debug_assert!(number_of_bytes == 0 || number_of_bytes == 1 || number_of_bytes == 2 || number_of_bytes == 4 || number_of_bytes == 8, "unknown number of bytes {number_of_bytes} needed to encode length {len}"); + let length_bit = 0b00000010 << number_of_bytes; + + let result = length_bit | NON_EMPTY_SENTINEL; + + if number_of_bytes == 0 { + EMPTY_SENTINEL + } else { + // Make sure that we provide the correct result + if cfg!(debug_assertions) { + // TODO - all non empty can be changed to be just bit op without branches + match number_of_bytes { + 0 => { + unreachable!("should already handle empty"); + } + // It is more common to have short strings than empty strings than long strings + 1 => { + assert_eq!(result, NON_EMPTY_SENTINEL | LENGTH_TYPE_U8, "should match u8"); + } + 2 => { + assert_eq!(result, NON_EMPTY_SENTINEL | LENGTH_TYPE_U16, "should match u16"); + } + 4 => { + assert_eq!(result, NON_EMPTY_SENTINEL | LENGTH_TYPE_U32, "should match u32"); + } + 8 => { + assert_eq!(result, NON_EMPTY_SENTINEL | LENGTH_TYPE_U64, "should match u64"); + } + bits_required => { + unreachable!("invalid length type {len}. numbr of bits required {bits_required}"); + } + } + } + result + } +} + +/// Faster encode_blocks that first copy all the data and then iterate over it and +#[inline] +pub(crate) fn fast_encode_bytes(out: &mut [u8], val: &[u8]) -> usize { + // Encode the length using the smallest type possible + let start_data_offset = encode_len(out, val.len()); + + let len = start_data_offset + val.len(); + + // Copy the data in one go + out[start_data_offset..len].copy_from_slice(val); + + len +} + +#[inline] +pub(crate) fn encode_empty(out: &mut [u8]) -> usize { + out[0] = EMPTY_SENTINEL; + 1 +} + +/// Decodes a single block of data +/// The `f` function accepts a slice of the decoded data, it may be called multiple times +pub fn decode_blocks_fast(row: &[u8], f: impl FnMut(&[u8])) -> usize { + decode_blocks_fast_order(row, f) +} + +/// Decodes a single block of data +/// The `f` function accepts a slice of the decoded data, it may be called multiple times +pub fn decode_blocks_fast_order(row: &[u8], mut f: impl FnMut(&[u8])) -> usize { + let (len, start_offset) = decode_len(&row); + + if len == 0 { + return start_offset; + } + + let start_offset = start_offset; + + f(&row[start_offset..start_offset + len]); + start_offset + len +} + +/// Return (length, start_offset) +#[inline] +pub(crate) fn decode_len(row: &[u8]) -> (usize, usize) { + // TODO - we can avoid the no if we change the ifs + let normalized_ctrl_byte = row[0]; + + if normalized_ctrl_byte == EMPTY_SENTINEL { + // Empty or null string + return (0, 1); + } + + let (len, start_offset) = if normalized_ctrl_byte & LENGTH_TYPE_U8 > 0 { + let len_normalized = row[1]; + let len = len_normalized as usize; + (len, size_of::()) + } else if normalized_ctrl_byte & LENGTH_TYPE_U16 > 0 { + let bytes = &row[1..3]; + let bytes_array: [u8; 2] = bytes.try_into().unwrap(); + // let bytes_needed: [u8; 2] = row[1..=1 + size_of::()].try_into().unwrap(); + let raw_len = u16::from_be_bytes(bytes_array); + let len_normalized = raw_len; + + (len_normalized as usize, size_of::()) + } else if normalized_ctrl_byte & LENGTH_TYPE_U32 > 0 { + let bytes_needed: [u8; 4] = row[1..=1 + size_of::()].try_into().unwrap(); + let raw_len = u32::from_be_bytes(bytes_needed); + let len_normalized = raw_len; + + (len_normalized as usize, size_of::()) + } else if normalized_ctrl_byte & LENGTH_TYPE_U64 > 0 { + let bytes_needed: [u8; 8] = row[1..=1 + size_of::()].try_into().unwrap(); + let raw_len = u64::from_be_bytes(bytes_needed); + let len_normalized = raw_len; + + (len_normalized as usize, size_of::()) + } else { + unreachable!("invalid length type"); + }; + + // Asserting no mismatch + debug_assert_eq!( + get_number_of_bytes_used_to_encode_from_ctrl_byte(normalized_ctrl_byte), + start_offset, + ); + + // + 1 for the control byte + let start_offset = start_offset + 1; + + (len, start_offset) +} + + + +/// Decode all lengths using the same encoding size determined by `len_to_encode_by` +#[inline] +pub(crate) fn decode_lengths_with_prefix(input: &[u8], number_of_items: usize, mut call_on_length: impl FnMut(usize)) -> usize { + // TODO - we can avoid the no if we change the ifs + let normalized_ctrl_byte = input[0]; + + if normalized_ctrl_byte == EMPTY_SENTINEL { + assert_eq!(number_of_items, 0); + return 1; + } + + let size_encoding_len = if normalized_ctrl_byte & LENGTH_TYPE_U8 > 0 { + input[1..1 + number_of_items].iter().for_each(|b| { + let len_normalized = *b; + let len = len_normalized as usize; + call_on_length(len); + }); + + size_of::() + } else if normalized_ctrl_byte & LENGTH_TYPE_U16 > 0 { + let size_to_encode_byte = size_of::(); + + input[1..1 + size_to_encode_byte * number_of_items].chunks_exact(size_to_encode_byte).for_each(|bytes| { + let bytes_array: [u8; 2] = bytes.try_into().unwrap(); + let raw_len = u16::from_be_bytes(bytes_array); + let len = raw_len as usize; + call_on_length(len); + }); + + size_to_encode_byte + } else if normalized_ctrl_byte & LENGTH_TYPE_U32 > 0 { + let size_to_encode_byte = size_of::(); + + input[1..1 + size_to_encode_byte * number_of_items].chunks_exact(size_to_encode_byte).for_each(|bytes| { + let bytes_array: [u8; 4] = bytes.try_into().unwrap(); + let raw_len = u32::from_be_bytes(bytes_array); + let len = raw_len as usize; + call_on_length(len); + }); + + size_to_encode_byte + } else if normalized_ctrl_byte & LENGTH_TYPE_U64 > 0 { + let size_to_encode_byte = size_of::(); + + input[1..1 + size_to_encode_byte * number_of_items].chunks_exact(size_to_encode_byte).for_each(|bytes| { + let bytes_array: [u8; 8] = bytes.try_into().unwrap(); + let raw_len = u64::from_be_bytes(bytes_array); + let len = raw_len as usize; + call_on_length(len); + }); + + size_to_encode_byte + } else { + unreachable!("invalid length type"); + }; + + // Asserting no mismatch + debug_assert_eq!( + get_number_of_bytes_used_to_encode_from_ctrl_byte(normalized_ctrl_byte), + size_encoding_len, + ); + + // 1 for the control byte + 1 + size_encoding_len * number_of_items +} + + +/// Return the number of bytes needed to encode the length +#[inline] +pub(crate) fn get_number_of_bytes_used_to_encode_from_ctrl_byte(ctrl_byte: u8) -> usize { + // TODO - we can probably avoid the if by some bitwise ops + + if ctrl_byte == EMPTY_SENTINEL { + // Empty or null string + 0 + } else if ctrl_byte & LENGTH_TYPE_U8 > 0 { + size_of::() + } else if ctrl_byte & LENGTH_TYPE_U16 > 0 { + size_of::() + } else if ctrl_byte & LENGTH_TYPE_U32 > 0 { + size_of::() + } else if ctrl_byte & LENGTH_TYPE_U64 > 0 { + size_of::() + } else { + unreachable!("invalid length type"); + } +} +// +// /// Writes `val` in `SIZE` blocks with the appropriate continuation tokens +// #[inline] +// fn encode_mini_blocks(out: &mut [u8], val: &[u8]) -> usize { +// const SIZE: usize = MINI_BLOCK_SIZE; +// +// +// let block_count = ceil(val.len(), SIZE); +// let end_offset = block_count * (SIZE + 1); +// let to_write = &mut out[..end_offset]; +// +// let chunks = val.chunks_exact(SIZE); +// let remainder = chunks.remainder(); +// for (input, output) in chunks.clone().zip(to_write.chunks_exact_mut(SIZE + 1)) { +// let input: &[u8; SIZE] = input.try_into().unwrap(); +// let out_block: &mut [u8; SIZE] = (&mut output[..SIZE]).try_into().unwrap(); +// +// *out_block = *input; +// +// // Indicate that there are further blocks to follow +// output[SIZE] = BLOCK_CONTINUATION; +// } +// +// if !remainder.is_empty() { +// let start_offset = (block_count - 1) * (SIZE + 1); +// to_write[start_offset..start_offset + remainder.len()].copy_from_slice(remainder); +// *to_write.last_mut().unwrap() = remainder.len() as u8; +// } else { +// // We must overwrite the continuation marker written by the loop above +// *to_write.last_mut().unwrap() = SIZE as u8; +// } +// end_offset +// } + +/// Decodes a single block of data +/// The `f` function accepts a slice of the decoded data, it may be called multiple times +pub fn decode_blocks(row: &[u8], mut f: impl FnMut(&[u8])) -> usize { + decode_blocks_fast(row, &mut f) +} + +/// Returns the number of bytes of encoded data +fn decoded_len(row: &[u8]) -> usize { + let mut len = 0; + decode_blocks(row, |block| len += block.len()); + len +} + +/// Decodes a binary array from `rows` with the provided `options` +pub fn decode_binary( + rows: &mut [&[u8]], + nulls: Option, +) -> GenericBinaryArray { + let len = rows.len(); + + let values_capacity = rows.iter().map(|row| decoded_len(row)).sum(); + let mut offsets = BufferBuilder::::new(len + 1); + offsets.append(I::zero()); + let mut values = MutableBuffer::new(values_capacity); + + for row in rows { + let offset = decode_blocks(row, |b| values.extend_from_slice(b)); + *row = &row[offset..]; + offsets.append(I::from_usize(values.len()).expect("offset overflow")) + } + + let d = match I::IS_LARGE { + true => DataType::LargeBinary, + false => DataType::Binary, + }; + + let builder = ArrayDataBuilder::new(d) + .len(len) + .nulls(nulls) + .add_buffer(offsets.finish()) + .add_buffer(values.into()); + + // SAFETY: + // Valid by construction above + unsafe { GenericBinaryArray::from(builder.build_unchecked()) } +} + +fn decode_binary_view_inner( + rows: &mut [&[u8]], + validate_utf8: bool, + nulls: Option, +) -> BinaryViewArray { + let len = rows.len(); + let inline_str_max_len = MAX_INLINE_VIEW_LEN as usize; + + // If we are validating UTF-8, decode all string values (including short strings) + // into the values buffer and validate UTF-8 once. If not validating, + // we save memory by only copying long strings to the values buffer, as short strings + // will be inlined into the view and do not need to be stored redundantly. + let values_capacity = if validate_utf8 { + // Capacity for all long and short strings + rows.iter().map(|row| decoded_len(row)).sum() + } else { + // Capacity for all long strings plus room for one short string + rows.iter().fold(0, |acc, row| { + let len = decoded_len(row); + if len > inline_str_max_len { + acc + len + } else { + acc + } + }) + inline_str_max_len + }; + let mut values = MutableBuffer::new(values_capacity); + + let mut views = BufferBuilder::::new(len); + for row in rows { + let start_offset = values.len(); + let offset = decode_blocks(row, |b| values.extend_from_slice(b)); + // assert_eq!(values.len(), start_offset + offset, "offset is too large"); + // Measure string length via change in values buffer. + // Used to check if decoded value should be truncated (short string) when validate_utf8 is false + let decoded_len = values.len() - start_offset; + if row[0] == null_sentinel() { + debug_assert_eq!(offset, 1); + debug_assert_eq!(start_offset, values.len()); + views.append(0); + } else { + // Safety: we just appended the data to the end of the buffer + let val = unsafe { values.get_unchecked_mut(start_offset..) }; + + let view = make_view(val, 0, start_offset as u32); + views.append(view); + + // truncate inline string in values buffer if validate_utf8 is false + if !validate_utf8 && decoded_len <= inline_str_max_len { + values.truncate(start_offset); + } + } + *row = &row[offset..]; + } + + if validate_utf8 { + // the values contains all data, no matter if it is short or long + // we can validate utf8 in one go. + std::str::from_utf8(values.as_slice()).unwrap(); + } + + let builder = ArrayDataBuilder::new(DataType::BinaryView) + .len(len) + .nulls(nulls) + .add_buffer(views.finish()) + .add_buffer(values.into()); + + // SAFETY: + // Valid by construction above + unsafe { BinaryViewArray::from(builder.build_unchecked()) } +} + +/// Decodes a binary view array from `rows` with the provided `options` +pub fn decode_binary_view(rows: &mut [&[u8]], nulls: Option) -> BinaryViewArray { + decode_binary_view_inner(rows, false, nulls) +} + +/// Decodes a string array from `rows` with the provided `options` +/// +/// # Safety +/// +/// The row must contain valid UTF-8 data +pub unsafe fn decode_string( + rows: &mut [&[u8]], + validate_utf8: bool, + nulls: Option, +) -> GenericStringArray { + let decoded = decode_binary::(rows, nulls); + + if validate_utf8 { + return GenericStringArray::from(decoded); + } + + let builder = decoded + .into_data() + .into_builder() + .data_type(GenericStringArray::::DATA_TYPE); + + // SAFETY: + // Row data must have come from a valid UTF-8 array + GenericStringArray::from(unsafe { builder.build_unchecked() }) +} + +/// Decodes a string view array from `rows` with the provided `options` +/// +/// # Safety +/// +/// The row must contain valid UTF-8 data +pub unsafe fn decode_string_view( + rows: &mut [&[u8]], + validate_utf8: bool, + nulls: Option, +) -> StringViewArray { + let view = decode_binary_view_inner(rows, validate_utf8, nulls); + unsafe { view.to_string_view_unchecked() } +} + diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs index 1c120bb2f24e..c8b381c60ab0 100644 --- a/arrow/benches/row_format.rs +++ b/arrow/benches/row_format.rs @@ -21,7 +21,7 @@ extern crate core; use arrow::array::ArrayRef; use arrow::datatypes::{Int64Type, UInt64Type}; -use arrow::row::{RowConverter, SortField}; +use arrow::row::{RowConverter, SortField, unordered_row::UnorderedRowConverter}; use arrow::util::bench_util::{ create_boolean_array, create_boolean_array_with_seed, create_dict_from_values, create_f64_array_with_seed, create_primitive_array, create_primitive_array_with_seed, @@ -32,24 +32,30 @@ use arrow::util::bench_util::{ use arrow::util::data_gen::create_random_array; use arrow_array::Array; use arrow_array::types::{Int8Type, Int32Type}; -use arrow_schema::{DataType, Field}; +use arrow_schema::{DataType, Field, Fields}; use criterion::Criterion; use std::{hint, sync::Arc}; fn do_bench(c: &mut Criterion, name: &str, cols: Vec) { let fields: Vec<_> = cols - .iter() - .map(|x| SortField::new(x.data_type().clone())) - .collect(); + .iter() + .enumerate() + .map(|x| Field::new( + format!("c{}", x.0).as_str(), + x.1.data_type().clone(), + x.1.null_count() > 0, + )) + .collect(); + let fields: Fields = fields.into(); c.bench_function(&format!("convert_columns {name}"), |b| { b.iter(|| { - let converter = RowConverter::new(fields.clone()).unwrap(); + let converter = UnorderedRowConverter::new(fields.clone()).unwrap(); hint::black_box(converter.convert_columns(&cols).unwrap()) }); }); - let converter = RowConverter::new(fields).unwrap(); + let converter = UnorderedRowConverter::new(fields).unwrap(); let rows = converter.convert_columns(&cols).unwrap(); // using a pre-prepared row converter should be faster than the first time c.bench_function(&format!("convert_columns_prepared {name}"), |b| { @@ -75,8 +81,8 @@ fn bench_iter(c: &mut Criterion) { let col = create_string_view_array_with_len(4096, 0., 100, false); let converter = RowConverter::new(vec![SortField::new(col.data_type().clone())]).unwrap(); let rows = converter - .convert_columns(&[Arc::new(col) as ArrayRef]) - .unwrap(); + .convert_columns(&[Arc::new(col) as ArrayRef]) + .unwrap(); c.bench_function("iterate rows", |b| { b.iter(|| { @@ -296,7 +302,7 @@ fn row_bench(c: &mut Criterion) { 0., 1.0, ) - .unwrap(), + .unwrap(), ]; do_bench(c, "4096 list(0) of u64(0)", cols); @@ -311,7 +317,7 @@ fn row_bench(c: &mut Criterion) { 0., 1.0, ) - .unwrap(), + .unwrap(), ]; do_bench(c, "4096 large_list(0) of u64(0)", cols); @@ -326,7 +332,7 @@ fn row_bench(c: &mut Criterion) { 0., 1.0, ) - .unwrap(), + .unwrap(), ]; do_bench(c, "10 list(0) of u64(0)", cols); @@ -341,7 +347,7 @@ fn row_bench(c: &mut Criterion) { 0., 1.0, ) - .unwrap(), + .unwrap(), ]; do_bench(c, "10 large_list(0) of u64(0)", cols); @@ -356,8 +362,8 @@ fn row_bench(c: &mut Criterion) { 0., 1.0, ) - .unwrap() - .slice(10, 20), + .unwrap() + .slice(10, 20), ]; do_bench(c, "4096 list(0) sliced to 10 of u64(0)", cols); @@ -372,8 +378,8 @@ fn row_bench(c: &mut Criterion) { 0., 1.0, ) - .unwrap() - .slice(10, 20), + .unwrap() + .slice(10, 20), ]; do_bench(c, "4096 large_list(0) sliced to 10 of u64(0)", cols); diff --git a/parquet-testing b/parquet-testing index a3d96a65e11e..f4d7ed772a62 160000 --- a/parquet-testing +++ b/parquet-testing @@ -1 +1 @@ -Subproject commit a3d96a65e11e2bbca7d22a894e8313ede90a33a3 +Subproject commit f4d7ed772a62a95111db50fbcad2460833e8c882