From a810e50714493d69d90e342fa2cb2c841f5266c7 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 3 Dec 2025 22:27:39 +0100 Subject: [PATCH 1/5] Check validity of indices in take_fixed_size_binary --- arrow-select/src/take.rs | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index a42e28410ca7..38844d7359da 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -24,8 +24,8 @@ use arrow_array::cast::AsArray; use arrow_array::types::*; use arrow_array::*; use arrow_buffer::{ - ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, NullBuffer, OffsetBuffer, ScalarBuffer, - bit_util, + bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, NullBuffer, OffsetBuffer, + ScalarBuffer, }; use arrow_data::ArrayDataBuilder; use arrow_schema::{ArrowError, DataType, FieldRef, UnionMode}; @@ -703,10 +703,13 @@ fn take_fixed_size_binary( ) -> Result { let nulls = values.nulls(); let array_iter = indices - .values() .iter() .map(|idx| { - let idx = maybe_usize::(*idx)?; + let Some(idx) = idx else { + return Ok(None); + }; + + let idx = maybe_usize::(idx)?; if nulls.map(|n| n.is_valid(idx)).unwrap_or(true) { Ok(Some(values.value(idx))) } else { @@ -2079,6 +2082,32 @@ mod tests { ); } + #[test] + fn test_take_fixed_size_binary_with_nulls_indices() { + let fsb = FixedSizeBinaryArray::try_from_sparse_iter_with_size( + [ + Some(vec![0x01, 0x01, 0x01, 0x01]), + Some(vec![0x02, 0x02, 0x02, 0x02]), + Some(vec![0x03, 0x03, 0x03, 0x03]), + Some(vec![0x04, 0x04, 0x04, 0x04]), + ] + .into_iter(), + 4, + ) + .unwrap(); + + // The two middle indices are null -> Should be null in the output. + let indices = UInt32Array::from(vec![Some(0), None, None, Some(3)]); + + let result = take_fixed_size_binary(&fsb, &indices, 4).unwrap(); + assert_eq!(result.len(), 4); + assert_eq!(result.null_count(), 2); + assert_eq!( + result.nulls().unwrap().iter().collect::>(), + vec![true, false, false, true] + ); + } + #[test] #[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")] fn test_take_list_out_of_bounds() { From 10425c73be49787a340273fbf1203c9ce4b05158 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 3 Dec 2025 22:37:58 +0100 Subject: [PATCH 2/5] Formatting --- arrow-select/src/take.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 38844d7359da..9f177d73d41e 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -24,8 +24,8 @@ use arrow_array::cast::AsArray; use arrow_array::types::*; use arrow_array::*; use arrow_buffer::{ - bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, NullBuffer, OffsetBuffer, - ScalarBuffer, + ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, NullBuffer, OffsetBuffer, ScalarBuffer, + bit_util, }; use arrow_data::ArrayDataBuilder; use arrow_schema::{ArrowError, DataType, FieldRef, UnionMode}; From 8941959984aae85d9f8227ed0e999c70f4ea9072 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 10 Dec 2025 21:36:44 +0100 Subject: [PATCH 3/5] Use a different approach to take fixed size binary that applies null buffers separately at the end --- arrow-select/src/take.rs | 50 ++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 9f177d73d41e..8fd4b77604ef 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -367,13 +367,6 @@ pub struct TakeOptions { pub check_bounds: bool, } -#[inline(always)] -fn maybe_usize(index: I) -> Result { - index - .to_usize() - .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string())) -} - /// `take` implementation for all primitive arrays /// /// This checks if an `indices` slot is populated, and gets the value from `values` @@ -701,25 +694,32 @@ fn take_fixed_size_binary( indices: &PrimitiveArray, size: i32, ) -> Result { - let nulls = values.nulls(); - let array_iter = indices - .iter() - .map(|idx| { - let Some(idx) = idx else { - return Ok(None); - }; - - let idx = maybe_usize::(idx)?; - if nulls.map(|n| n.is_valid(idx)).unwrap_or(true) { - Ok(Some(values.value(idx))) - } else { - Ok(None) - } - }) - .collect::, ArrowError>>()? - .into_iter(); + let size_usize = usize::try_from(size).map_err(|_| { + ArrowError::InvalidArgumentError(format!("Cannot convert size '{}' to usize", size)) + })?; + + let values_buffer = values.values().as_slice(); + let mut values_buffer_builder = BufferBuilder::new(indices.len() * size_usize); + let array_iter = indices.values().iter().map(|idx| { + let offset = idx.as_usize() * size_usize; + &values_buffer[offset..offset + size_usize] + }); + for slice in array_iter { + values_buffer_builder.append_slice(slice); + } + let values_buffer = values_buffer_builder.finish(); + + let value_nulls = take_nulls(values.nulls(), indices); + let final_nulls = NullBuffer::union(value_nulls.as_ref(), indices.nulls()); + + let array_data = ArrayDataBuilder::new(DataType::FixedSizeBinary(size)) + .len(indices.len()) + .nulls(final_nulls) + .offset(0) + .add_buffer(values_buffer) + .build()?; - FixedSizeBinaryArray::try_from_sparse_iter_with_size(array_iter, size) + Ok(FixedSizeBinaryArray::from(array_data)) } /// `take` implementation for dictionary arrays From 286ddfe7fbe939bd2b45ecfe5180f58136815d6f Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 10 Dec 2025 21:44:20 +0100 Subject: [PATCH 4/5] Add small Comment --- arrow-select/src/take.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 8fd4b77604ef..5e49c1b4255d 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -689,6 +689,11 @@ fn take_fixed_size_list( Ok(FixedSizeListArray::from(list_data)) } +/// The take kernel implementation for `FixedSizeBinaryArray`. +/// +/// The computation is done in two steps: +/// - Compute the values buffer +/// - Compute the null buffer fn take_fixed_size_binary( values: &FixedSizeBinaryArray, indices: &PrimitiveArray, From 885f31e26ec2019132f7b1512b9b3f08dc873c1a Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 10 Dec 2025 22:05:09 +0100 Subject: [PATCH 5/5] Ensure that a null index cannot create a panic --- arrow-select/src/take.rs | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 5e49c1b4255d..8b32f0ec7631 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -705,15 +705,33 @@ fn take_fixed_size_binary( let values_buffer = values.values().as_slice(); let mut values_buffer_builder = BufferBuilder::new(indices.len() * size_usize); - let array_iter = indices.values().iter().map(|idx| { - let offset = idx.as_usize() * size_usize; - &values_buffer[offset..offset + size_usize] - }); - for slice in array_iter { - values_buffer_builder.append_slice(slice); + + if indices.null_count() == 0 { + let array_iter = indices.values().iter().map(|idx| { + let offset = idx.as_usize() * size_usize; + &values_buffer[offset..offset + size_usize] + }); + for slice in array_iter { + values_buffer_builder.append_slice(slice); + } + } else { + // The indices nullability cannot be ignored here because the values buffer may contain + // nulls which should not cause a panic. + let array_iter = indices.iter().map(|idx| { + idx.map(|idx| { + let offset = idx.as_usize() * size_usize; + &values_buffer[offset..offset + size_usize] + }) + }); + for slice in array_iter { + match slice { + None => values_buffer_builder.append_n(size_usize, 0), + Some(slice) => values_buffer_builder.append_slice(slice), + } + } } - let values_buffer = values_buffer_builder.finish(); + let values_buffer = values_buffer_builder.finish(); let value_nulls = take_nulls(values.nulls(), indices); let final_nulls = NullBuffer::union(value_nulls.as_ref(), indices.nulls());