Skip to content

Commit 9293c89

Browse files
alambtobixdev
andauthored
Take fsb null indices (#8981)
# Which issue does this PR close? - redo of #8948 from @tobixdev - closes #8947 # Rationale for this change I messed up @tobixdev 's PR / branch by accident in #8948 This PR contains the previous commits that were in that PR # What changes are included in this PR? - See #8948 # Are these changes tested? Yes, by CI # Are there any user-facing changes? Less bugs, faster code --------- Co-authored-by: Tobias Schwarzinger <tobias.schwarzinger@tuwien.ac.at>
1 parent 55bd73f commit 9293c89

File tree

1 file changed

+73
-21
lines changed

1 file changed

+73
-21
lines changed

arrow-select/src/take.rs

Lines changed: 73 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -384,13 +384,6 @@ pub struct TakeOptions {
384384
pub check_bounds: bool,
385385
}
386386

387-
#[inline(always)]
388-
fn maybe_usize<I: ArrowNativeType>(index: I) -> Result<usize, ArrowError> {
389-
index
390-
.to_usize()
391-
.ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))
392-
}
393-
394387
/// `take` implementation for all primitive arrays
395388
///
396389
/// This checks if an `indices` slot is populated, and gets the value from `values`
@@ -713,27 +706,60 @@ fn take_fixed_size_list<IndexType: ArrowPrimitiveType>(
713706
Ok(FixedSizeListArray::from(list_data))
714707
}
715708

709+
/// The take kernel implementation for `FixedSizeBinaryArray`.
710+
///
711+
/// The computation is done in two steps:
712+
/// - Compute the values buffer
713+
/// - Compute the null buffer
716714
fn take_fixed_size_binary<IndexType: ArrowPrimitiveType>(
717715
values: &FixedSizeBinaryArray,
718716
indices: &PrimitiveArray<IndexType>,
719717
size: i32,
720718
) -> Result<FixedSizeBinaryArray, ArrowError> {
721-
let nulls = values.nulls();
722-
let array_iter = indices
723-
.values()
724-
.iter()
725-
.map(|idx| {
726-
let idx = maybe_usize::<IndexType::Native>(*idx)?;
727-
if nulls.map(|n| n.is_valid(idx)).unwrap_or(true) {
728-
Ok(Some(values.value(idx)))
729-
} else {
730-
Ok(None)
719+
let size_usize = usize::try_from(size).map_err(|_| {
720+
ArrowError::InvalidArgumentError(format!("Cannot convert size '{}' to usize", size))
721+
})?;
722+
723+
let values_buffer = values.values().as_slice();
724+
let mut values_buffer_builder = BufferBuilder::new(indices.len() * size_usize);
725+
726+
if indices.null_count() == 0 {
727+
let array_iter = indices.values().iter().map(|idx| {
728+
let offset = idx.as_usize() * size_usize;
729+
&values_buffer[offset..offset + size_usize]
730+
});
731+
for slice in array_iter {
732+
values_buffer_builder.append_slice(slice);
733+
}
734+
} else {
735+
// The indices nullability cannot be ignored here because the values buffer may contain
736+
// nulls which should not cause a panic.
737+
let array_iter = indices.iter().map(|idx| {
738+
idx.map(|idx| {
739+
let offset = idx.as_usize() * size_usize;
740+
&values_buffer[offset..offset + size_usize]
741+
})
742+
});
743+
for slice in array_iter {
744+
match slice {
745+
None => values_buffer_builder.append_n(size_usize, 0),
746+
Some(slice) => values_buffer_builder.append_slice(slice),
731747
}
732-
})
733-
.collect::<Result<Vec<_>, ArrowError>>()?
734-
.into_iter();
748+
}
749+
}
735750

736-
FixedSizeBinaryArray::try_from_sparse_iter_with_size(array_iter, size)
751+
let values_buffer = values_buffer_builder.finish();
752+
let value_nulls = take_nulls(values.nulls(), indices);
753+
let final_nulls = NullBuffer::union(value_nulls.as_ref(), indices.nulls());
754+
755+
let array_data = ArrayDataBuilder::new(DataType::FixedSizeBinary(size))
756+
.len(indices.len())
757+
.nulls(final_nulls)
758+
.offset(0)
759+
.add_buffer(values_buffer)
760+
.build()?;
761+
762+
Ok(FixedSizeBinaryArray::from(array_data))
737763
}
738764

739765
/// `take` implementation for dictionary arrays
@@ -2096,6 +2122,32 @@ mod tests {
20962122
);
20972123
}
20982124

2125+
#[test]
2126+
fn test_take_fixed_size_binary_with_nulls_indices() {
2127+
let fsb = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2128+
[
2129+
Some(vec![0x01, 0x01, 0x01, 0x01]),
2130+
Some(vec![0x02, 0x02, 0x02, 0x02]),
2131+
Some(vec![0x03, 0x03, 0x03, 0x03]),
2132+
Some(vec![0x04, 0x04, 0x04, 0x04]),
2133+
]
2134+
.into_iter(),
2135+
4,
2136+
)
2137+
.unwrap();
2138+
2139+
// The two middle indices are null -> Should be null in the output.
2140+
let indices = UInt32Array::from(vec![Some(0), None, None, Some(3)]);
2141+
2142+
let result = take_fixed_size_binary(&fsb, &indices, 4).unwrap();
2143+
assert_eq!(result.len(), 4);
2144+
assert_eq!(result.null_count(), 2);
2145+
assert_eq!(
2146+
result.nulls().unwrap().iter().collect::<Vec<_>>(),
2147+
vec![true, false, false, true]
2148+
);
2149+
}
2150+
20992151
#[test]
21002152
#[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")]
21012153
fn test_take_list_out_of_bounds() {

0 commit comments

Comments
 (0)