Skip to content

Commit c39a455

Browse files
committed
More speed
1 parent 6ee1f04 commit c39a455

File tree

2 files changed

+92
-21
lines changed

2 files changed

+92
-21
lines changed

arrow-buffer/src/builder/null.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,57 @@ impl NullBufferBuilder {
193193
}
194194
}
195195

196+
/// Extends this builder with validity values from a trusted length iterator.
197+
///
198+
/// This is more efficient than calling `append` in a loop as it processes
199+
/// 64 bits at a time internally.
200+
///
201+
/// # Safety
202+
///
203+
/// The iterator must report its length correctly via `size_hint()`.
204+
/// Using an iterator that reports an incorrect length is undefined behavior.
205+
///
206+
/// # Example
207+
/// ```
208+
/// # use arrow_buffer::NullBufferBuilder;
209+
/// let mut builder = NullBufferBuilder::new(8);
210+
/// let validities = [true, false, true, true];
211+
/// // SAFETY: slice iterator reports correct length
212+
/// unsafe { builder.extend_from_trusted_len_iter(validities.iter().copied()) };
213+
/// assert_eq!(builder.len(), 4);
214+
/// ```
215+
pub unsafe fn extend_from_trusted_len_iter<I: Iterator<Item = bool>>(&mut self, iter: I) {
216+
let (_, upper) = iter.size_hint();
217+
let len = upper.expect("extend_from_trusted_len_iter requires an upper limit");
218+
219+
if len == 0 {
220+
return;
221+
}
222+
223+
// Materialize since we're about to append bits
224+
self.materialize_if_needed();
225+
226+
let buf = self.bitmap_builder.as_mut().unwrap();
227+
let start_len = buf.len();
228+
// Advance to allocate space, initializing new bits to 0
229+
buf.advance(len);
230+
231+
let slice = buf.as_slice_mut();
232+
let mut bit_idx = start_len;
233+
234+
// Process bits - set bit if true (buffer initialized to 0, so false bits are already correct)
235+
for valid in iter {
236+
if valid {
237+
let byte_idx = bit_idx / 8;
238+
let bit_offset = bit_idx % 8;
239+
slice[byte_idx] |= 1 << bit_offset;
240+
}
241+
bit_idx += 1;
242+
}
243+
244+
debug_assert_eq!(bit_idx, start_len + len);
245+
}
246+
196247
/// Builds the null buffer and resets the builder.
197248
/// Returns `None` if the builder only contains `true`s.
198249
pub fn finish(&mut self) -> Option<NullBuffer> {

arrow-select/src/coalesce/primitive.rs

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use crate::coalesce::InProgressArray;
1919
use crate::filter::{FilterPredicate, IndexIterator, IterationStrategy, SlicesIterator};
2020
use arrow_array::cast::AsArray;
2121
use arrow_array::{Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray};
22-
use arrow_buffer::{NullBufferBuilder, ScalarBuffer};
22+
use arrow_buffer::{NullBufferBuilder, ScalarBuffer, bit_util};
2323
use arrow_schema::{ArrowError, DataType};
2424
use std::fmt::Debug;
2525
use std::sync::Arc;
@@ -153,16 +153,28 @@ impl<T: ArrowPrimitiveType + Debug> InProgressArray for InProgressPrimitiveArray
153153
IterationStrategy::IndexIterator => {
154154
// Copy values and nulls for each index
155155
if let Some(nulls) = s.nulls().filter(|n| n.null_count() > 0) {
156-
let indices = IndexIterator::new(filter.filter_array(), count);
157-
self.current.extend(indices.map(|idx: usize| {
158-
if nulls.is_null(idx) {
159-
self.nulls.append_null();
160-
} else {
161-
self.nulls.append_non_null();
162-
}
163-
// SAFETY: idx is derived from filter predicate
164-
unsafe { *values.get_unchecked(idx) }
165-
}));
156+
let null_buffer = nulls.inner();
157+
let null_ptr = null_buffer.values().as_ptr();
158+
let null_offset = null_buffer.offset();
159+
160+
// Collect indices for reuse (values + nulls)
161+
let indices: Vec<usize> =
162+
IndexIterator::new(filter.filter_array(), count).collect();
163+
164+
// Efficiently extend null buffer
165+
// SAFETY: indices iterator reports correct length
166+
unsafe {
167+
self.nulls.extend_from_trusted_len_iter(
168+
indices
169+
.iter()
170+
.map(|&idx| bit_util::get_bit_raw(null_ptr, idx + null_offset)),
171+
);
172+
}
173+
174+
// Copy values
175+
// SAFETY: indices are derived from filter predicate
176+
self.current
177+
.extend(indices.iter().map(|&idx| unsafe { *values.get_unchecked(idx) }));
166178
} else {
167179
self.nulls.append_n_non_nulls(count);
168180
let indices = IndexIterator::new(filter.filter_array(), count);
@@ -174,16 +186,24 @@ impl<T: ArrowPrimitiveType + Debug> InProgressArray for InProgressPrimitiveArray
174186
IterationStrategy::Indices(indices) => {
175187
// Copy values and nulls using precomputed indices
176188
if let Some(nulls) = s.nulls().filter(|n| n.null_count() > 0) {
177-
self.current.extend(indices.iter().map(|&idx| {
178-
// TODO: speed up
179-
if nulls.is_null(idx) {
180-
self.nulls.append_null();
181-
} else {
182-
self.nulls.append_non_null();
183-
}
184-
// SAFETY: indices are derived from filter predicate
185-
unsafe { *values.get_unchecked(idx) }
186-
}));
189+
let null_buffer = nulls.inner();
190+
let null_ptr = null_buffer.values().as_ptr();
191+
let null_offset = null_buffer.offset();
192+
193+
// Efficiently extend null buffer
194+
// SAFETY: indices iterator reports correct length
195+
unsafe {
196+
self.nulls.extend_from_trusted_len_iter(
197+
indices
198+
.iter()
199+
.map(|&idx| bit_util::get_bit_raw(null_ptr, idx + null_offset)),
200+
);
201+
}
202+
203+
// Copy values
204+
// SAFETY: indices are derived from filter predicate
205+
self.current
206+
.extend(indices.iter().map(|&idx| unsafe { *values.get_unchecked(idx) }));
187207
} else {
188208
self.nulls.append_n_non_nulls(count);
189209
// SAFETY: indices are derived from filter predicate

0 commit comments

Comments
 (0)