Skip to content

Commit 0c30129

Browse files
committed
Merge remote-tracking branch 'apache/main' into fix/ipc-null-dictionary
2 parents b9e63df + 65ad652 commit 0c30129

File tree

54 files changed

+3716
-948
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+3716
-948
lines changed

.github/workflows/miri.sh

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@ cargo miri setup
1212
cargo clean
1313

1414
echo "Starting Arrow MIRI run..."
15-
cargo miri test -p arrow-buffer
16-
cargo miri test -p arrow-data --features ffi
17-
cargo miri test -p arrow-schema --features ffi
18-
cargo miri test -p arrow-ord
19-
cargo miri test -p arrow-array
20-
cargo miri test -p arrow-arith
15+
cargo miri nextest run \
16+
-p arrow-buffer -p arrow-data \
17+
-p arrow-schema -p arrow-ord \
18+
-p arrow-array -p arrow-arith \
19+
--features ffi --no-fail-fast

.github/workflows/miri.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ jobs:
5555
rustup toolchain install nightly --component miri
5656
rustup override set nightly
5757
cargo miri setup
58+
- name: Set up nextest
59+
run: |
60+
cargo install cargo-nextest --version 0.9.132 --locked
5861
- name: Run Miri Checks
5962
env:
6063
RUST_BACKTRACE: full

arrow-arith/src/aggregate.rs

Lines changed: 18 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
use arrow_array::cast::*;
2121
use arrow_array::iterator::ArrayIter;
2222
use arrow_array::*;
23-
use arrow_buffer::{ArrowNativeType, NullBuffer};
23+
use arrow_buffer::NullBuffer;
2424
use arrow_data::bit_iterator::try_for_each_valid_idx;
2525
use arrow_schema::*;
2626
use std::borrow::BorrowMut;
@@ -541,11 +541,9 @@ pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
541541
///
542542
/// This doesn't detect overflow. Once overflowing, the result will wrap around.
543543
/// For an overflow-checking variant, use [`sum_array_checked`] instead.
544-
pub fn sum_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
545-
where
546-
T: ArrowNumericType,
547-
T::Native: ArrowNativeTypeOp,
548-
{
544+
pub fn sum_array<T: ArrowNumericType, A: ArrayAccessor<Item = T::Native>>(
545+
array: A,
546+
) -> Option<T::Native> {
549547
match array.data_type() {
550548
DataType::Dictionary(_, _) => {
551549
let null_count = array.null_count();
@@ -583,13 +581,9 @@ where
583581
/// use [`sum_array`] instead.
584582
/// Additionally returns an `Err` on run-end-encoded arrays with a provided
585583
/// values type parameter that is incorrect.
586-
pub fn sum_array_checked<T, A: ArrayAccessor<Item = T::Native>>(
584+
pub fn sum_array_checked<T: ArrowNumericType, A: ArrayAccessor<Item = T::Native>>(
587585
array: A,
588-
) -> Result<Option<T::Native>, ArrowError>
589-
where
590-
T: ArrowNumericType,
591-
T::Native: ArrowNativeTypeOp,
592-
{
586+
) -> Result<Option<T::Native>, ArrowError> {
593587
match array.data_type() {
594588
DataType::Dictionary(_, _) => {
595589
let null_count = array.null_count();
@@ -717,21 +711,17 @@ mod ree {
717711

718712
/// Returns the min of values in the array of `ArrowNumericType` type, or dictionary
719713
/// array with value of `ArrowNumericType` type.
720-
pub fn min_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
721-
where
722-
T: ArrowNumericType,
723-
T::Native: ArrowNativeType,
724-
{
714+
pub fn min_array<T: ArrowNumericType, A: ArrayAccessor<Item = T::Native>>(
715+
array: A,
716+
) -> Option<T::Native> {
725717
min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_gt(*b), min)
726718
}
727719

728720
/// Returns the max of values in the array of `ArrowNumericType` type, or dictionary
729721
/// array with value of `ArrowNumericType` type.
730-
pub fn max_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
731-
where
732-
T: ArrowNumericType,
733-
T::Native: ArrowNativeTypeOp,
734-
{
722+
pub fn max_array<T: ArrowNumericType, A: ArrayAccessor<Item = T::Native>>(
723+
array: A,
724+
) -> Option<T::Native> {
735725
min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_lt(*b), max)
736726
}
737727

@@ -874,11 +864,9 @@ pub fn bool_or(array: &BooleanArray) -> Option<bool> {
874864
///
875865
/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
876866
/// use [`sum`] instead.
877-
pub fn sum_checked<T>(array: &PrimitiveArray<T>) -> Result<Option<T::Native>, ArrowError>
878-
where
879-
T: ArrowNumericType,
880-
T::Native: ArrowNativeTypeOp,
881-
{
867+
pub fn sum_checked<T: ArrowNumericType>(
868+
array: &PrimitiveArray<T>,
869+
) -> Result<Option<T::Native>, ArrowError> {
882870
let null_count = array.null_count();
883871

884872
if null_count == array.len() {
@@ -922,10 +910,7 @@ where
922910
///
923911
/// This doesn't detect overflow in release mode by default. Once overflowing, the result will
924912
/// wrap around. For an overflow-checking variant, use [`sum_checked`] instead.
925-
pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
926-
where
927-
T::Native: ArrowNativeTypeOp,
928-
{
913+
pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native> {
929914
aggregate::<T::Native, T, SumAccumulator<T::Native>>(array)
930915
}
931916

@@ -940,10 +925,7 @@ where
940925
/// let result = min(&array);
941926
/// assert_eq!(result, Some(2));
942927
/// ```
943-
pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
944-
where
945-
T::Native: PartialOrd,
946-
{
928+
pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native> {
947929
aggregate::<T::Native, T, MinAccumulator<T::Native>>(array)
948930
}
949931

@@ -958,10 +940,7 @@ where
958940
/// let result = max(&array);
959941
/// assert_eq!(result, Some(8));
960942
/// ```
961-
pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
962-
where
963-
T::Native: PartialOrd,
964-
{
943+
pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native> {
965944
aggregate::<T::Native, T, MaxAccumulator<T::Native>>(array)
966945
}
967946

arrow-arith/src/arithmetic.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,7 @@ pub fn multiply_fixed_point(
170170
}
171171

172172
/// Divide a decimal native value by given divisor and round the result.
173-
fn divide_and_round<I>(input: I::Native, div: I::Native) -> I::Native
174-
where
175-
I: DecimalType,
176-
I::Native: ArrowNativeTypeOp,
177-
{
173+
fn divide_and_round<I: DecimalType>(input: I::Native, div: I::Native) -> I::Native {
178174
let d = input.div_wrapping(div);
179175
let r = input.mod_wrapping(div);
180176

arrow-array/src/array/byte_view_array.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,37 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
670670
}
671671
}
672672

673+
/// Returns the total number of bytes of all non-null values in this array.
674+
///
675+
/// Unlike [`Self::total_buffer_bytes_used`], this method includes inlined strings
676+
/// (those with length ≤ [`MAX_INLINE_VIEW_LEN`]), making it suitable as a
677+
/// capacity hint when pre-allocating output buffers.
678+
///
679+
/// Null values are excluded from the sum.
680+
///
681+
/// # Example
682+
///
683+
/// ```rust
684+
/// # use arrow_array::StringViewArray;
685+
/// let array = StringViewArray::from_iter(vec![
686+
/// Some("hello"), // 5 bytes, inlined
687+
/// None, // excluded
688+
/// Some("large payload over 12 bytes"), // 27 bytes, non-inlined
689+
/// ]);
690+
/// assert_eq!(array.total_bytes_len(), 5 + 27);
691+
/// ```
692+
pub fn total_bytes_len(&self) -> usize {
693+
match self.nulls() {
694+
None => self.views().iter().map(|v| (*v as u32) as usize).sum(),
695+
Some(nulls) => self
696+
.views()
697+
.iter()
698+
.zip(nulls.iter())
699+
.map(|(v, is_valid)| if is_valid { (*v as u32) as usize } else { 0 })
700+
.sum(),
701+
}
702+
}
703+
673704
/// Returns the total number of bytes used by all non inlined views in all
674705
/// buffers.
675706
///
@@ -1809,4 +1840,41 @@ mod tests {
18091840
assert!(from_utf8(array.value(2)).is_ok());
18101841
array
18111842
}
1843+
1844+
#[test]
1845+
fn test_total_bytes_len() {
1846+
// inlined: "hello"=5, "world"=5, "lulu"=4 → 14
1847+
// non-inlined: "large payload over 12 bytes"=27
1848+
// null: should not count
1849+
let mut builder = StringViewBuilder::new();
1850+
builder.append_value("hello");
1851+
builder.append_value("world");
1852+
builder.append_value("lulu");
1853+
builder.append_null();
1854+
builder.append_value("large payload over 12 bytes");
1855+
let array = builder.finish();
1856+
assert_eq!(array.total_bytes_len(), 5 + 5 + 4 + 27);
1857+
}
1858+
1859+
#[test]
1860+
fn test_total_bytes_len_empty() {
1861+
let array = StringViewArray::from_iter::<Vec<Option<&str>>>(vec![]);
1862+
assert_eq!(array.total_bytes_len(), 0);
1863+
}
1864+
1865+
#[test]
1866+
fn test_total_bytes_len_all_nulls() {
1867+
let array = StringViewArray::new_null(5);
1868+
assert_eq!(array.total_bytes_len(), 0);
1869+
}
1870+
1871+
#[test]
1872+
fn test_total_bytes_len_binary_view() {
1873+
let array = BinaryViewArray::from_iter(vec![
1874+
Some(b"hi".as_ref()),
1875+
None,
1876+
Some(b"large payload over 12 bytes".as_ref()),
1877+
]);
1878+
assert_eq!(array.total_bytes_len(), 2 + 27);
1879+
}
18121880
}

arrow-buffer/src/buffer/mutable.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,13 @@ impl MutableBuffer {
450450

451451
/// Clear all existing data from this buffer.
452452
pub fn clear(&mut self) {
453-
self.len = 0
453+
self.len = 0;
454+
#[cfg(feature = "pool")]
455+
{
456+
if let Some(reservation) = self.reservation.lock().unwrap().as_mut() {
457+
reservation.resize(self.len);
458+
}
459+
}
454460
}
455461

456462
/// Returns the data stored in this buffer as a slice.
@@ -1371,7 +1377,7 @@ mod tests {
13711377
assert_eq!(pool.used(), 40);
13721378

13731379
// Truncate to zero
1374-
buffer.truncate(0);
1380+
buffer.clear();
13751381
assert_eq!(buffer.len(), 0);
13761382
assert_eq!(pool.used(), 0);
13771383
}

arrow-cast/src/cast/decimal.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -812,7 +812,7 @@ where
812812
T: ArrowPrimitiveType,
813813
<T as ArrowPrimitiveType>::Native: NumCast,
814814
D: DecimalType + ArrowPrimitiveType,
815-
<D as ArrowPrimitiveType>::Native: ArrowNativeTypeOp + ToPrimitive,
815+
<D as ArrowPrimitiveType>::Native: ToPrimitive,
816816
{
817817
let array = array.as_primitive::<D>();
818818

0 commit comments

Comments
 (0)