Skip to content

Commit 786f161

Browse files
committed
Add BooleanBufferBuilder::extend
1 parent 96637fc commit 786f161

File tree

1 file changed

+187
-0
lines changed

1 file changed

+187
-0
lines changed

arrow-buffer/src/builder/boolean.rs

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,132 @@ impl BooleanBufferBuilder {
259259
pub fn finish_cloned(&self) -> BooleanBuffer {
260260
BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len)
261261
}
262+
263+
/// Advances the buffer by `additional` bits without initializing the new bytes.
264+
///
265+
/// # Safety
266+
/// Callers must ensure that all newly added bits are written before the buffer is read.
267+
#[inline]
268+
unsafe fn advance_uninit(&mut self, additional: usize) {
269+
let new_len = self.len + additional;
270+
let new_len_bytes = bit_util::ceil(new_len, 8);
271+
if new_len_bytes > self.buffer.len() {
272+
self.buffer.reserve(new_len_bytes - self.buffer.len());
273+
// SAFETY: caller will initialize all newly exposed bytes
274+
unsafe { self.buffer.set_len(new_len_bytes) };
275+
}
276+
self.len = new_len;
277+
}
278+
279+
/// Extends this builder with boolean values.
280+
///
281+
/// This requires `iter` to report an exact size via `size_hint`.
282+
#[inline]
283+
pub fn extend<I: Iterator<Item = bool>>(&mut self, iter: I) {
284+
let (lower, upper) = iter.size_hint();
285+
let len = upper.expect("Iterator must have exact size_hint");
286+
assert_eq!(lower, len, "Iterator must have exact size_hint");
287+
288+
if len == 0 {
289+
return;
290+
}
291+
292+
let start_len = self.len;
293+
let end_bit = start_len + len;
294+
295+
// SAFETY: we will initialize all newly exposed bytes before they are read
296+
unsafe { self.advance_uninit(len) };
297+
let slice = self.buffer.as_slice_mut();
298+
299+
let mut iter = iter;
300+
let mut bit_idx = start_len;
301+
302+
// ---- Unaligned prefix: advance to the next 64-bit boundary ----
303+
let misalignment = bit_idx & 63;
304+
let prefix_bits = if misalignment == 0 {
305+
0
306+
} else {
307+
(64 - misalignment).min(end_bit - bit_idx)
308+
};
309+
310+
if prefix_bits != 0 {
311+
let byte_start = bit_idx / 8;
312+
let byte_end = bit_util::ceil(bit_idx + prefix_bits, 8);
313+
let bit_offset = bit_idx % 8;
314+
315+
// Clear any newly-visible bits in the existing partial byte
316+
if bit_offset != 0 {
317+
let keep_mask = (1u8 << bit_offset).wrapping_sub(1);
318+
slice[byte_start] &= keep_mask;
319+
}
320+
321+
// Zero any new bytes we will partially fill in this prefix
322+
let zero_from = if bit_offset == 0 {
323+
byte_start
324+
} else {
325+
byte_start + 1
326+
};
327+
if byte_end > zero_from {
328+
slice[zero_from..byte_end].fill(0);
329+
}
330+
331+
for _ in 0..prefix_bits {
332+
let v = iter.next().unwrap();
333+
if v {
334+
let byte_idx = bit_idx / 8;
335+
let bit = bit_idx % 8;
336+
slice[byte_idx] |= 1 << bit;
337+
}
338+
bit_idx += 1;
339+
}
340+
}
341+
342+
if bit_idx < end_bit {
343+
// ---- Aligned middle: write u64 chunks ----
344+
debug_assert_eq!(bit_idx & 63, 0);
345+
let remaining_bits = end_bit - bit_idx;
346+
let chunks = remaining_bits / 64;
347+
348+
let words_start = bit_idx / 8;
349+
let words_end = words_start + chunks * 8;
350+
for dst in slice[words_start..words_end].chunks_exact_mut(8) {
351+
let mut packed: u64 = 0;
352+
for i in 0..64 {
353+
packed |= (iter.next().unwrap() as u64) << i;
354+
}
355+
dst.copy_from_slice(&packed.to_le_bytes());
356+
bit_idx += 64;
357+
}
358+
359+
// ---- Unaligned suffix: remaining < 64 bits ----
360+
let suffix_bits = end_bit - bit_idx;
361+
if suffix_bits != 0 {
362+
debug_assert_eq!(bit_idx % 8, 0);
363+
let byte_start = bit_idx / 8;
364+
let byte_end = bit_util::ceil(end_bit, 8);
365+
slice[byte_start..byte_end].fill(0);
366+
367+
for _ in 0..suffix_bits {
368+
let v = iter.next().unwrap();
369+
if v {
370+
let byte_idx = bit_idx / 8;
371+
let bit = bit_idx % 8;
372+
slice[byte_idx] |= 1 << bit;
373+
}
374+
bit_idx += 1;
375+
}
376+
}
377+
}
378+
379+
// Clear any unused bits in the last byte
380+
let remainder = end_bit % 8;
381+
if remainder != 0 {
382+
let mask = (1u8 << remainder).wrapping_sub(1);
383+
slice[bit_util::ceil(end_bit, 8) - 1] &= mask;
384+
}
385+
386+
debug_assert_eq!(bit_idx, end_bit);
387+
}
262388
}
263389

264390
impl From<BooleanBufferBuilder> for Buffer {
@@ -526,4 +652,65 @@ mod tests {
526652
assert_eq!(buf.len(), buf2.inner().len());
527653
assert_eq!(buf.as_slice(), buf2.values());
528654
}
655+
656+
#[test]
657+
fn test_extend() {
658+
let mut builder = BooleanBufferBuilder::new(0);
659+
let bools = vec![true, false, true, true, false, true, true, true, false];
660+
builder.extend(bools.clone().into_iter());
661+
assert_eq!(builder.len(), 9);
662+
let finished = builder.finish();
663+
for (i, v) in bools.into_iter().enumerate() {
664+
assert_eq!(finished.value(i), v);
665+
}
666+
667+
// Test > 64 bits
668+
let mut builder = BooleanBufferBuilder::new(0);
669+
let bools: Vec<_> = (0..100).map(|i| i % 3 == 0 || i % 7 == 0).collect();
670+
builder.extend(bools.clone().into_iter());
671+
assert_eq!(builder.len(), 100);
672+
let finished = builder.finish();
673+
for (i, v) in bools.into_iter().enumerate() {
674+
assert_eq!(finished.value(i), v, "at index {}", i);
675+
}
676+
}
677+
678+
#[test]
679+
fn test_extend_misaligned() {
680+
// Test misaligned start
681+
for offset in 1..65 {
682+
let mut builder = BooleanBufferBuilder::new(0);
683+
builder.append_n(offset, false);
684+
685+
let bools: Vec<_> = (0..100).map(|i| i % 3 == 0 || i % 7 == 0).collect();
686+
builder.extend(bools.clone().into_iter());
687+
assert_eq!(builder.len(), offset + 100);
688+
689+
let finished = builder.finish();
690+
for i in 0..offset {
691+
assert!(!finished.value(i));
692+
}
693+
for (i, v) in bools.into_iter().enumerate() {
694+
assert_eq!(finished.value(offset + i), v, "at index {}", offset + i);
695+
}
696+
}
697+
}
698+
699+
#[test]
700+
fn test_extend_misaligned_end() {
701+
for len in 1..130 {
702+
let mut builder = BooleanBufferBuilder::new(0);
703+
let mut bools: Vec<_> = (0..len).map(|i| i % 2 == 0).collect();
704+
builder.extend(bools.clone().into_iter());
705+
builder.extend(bools.clone().into_iter());
706+
let copy = bools.clone();
707+
bools.extend(copy);
708+
assert_eq!(builder.len(), 2 * len);
709+
710+
let finished = builder.finish();
711+
for (i, &v) in bools.iter().enumerate() {
712+
assert_eq!(finished.value(i), v, "at index {} for len {}", i, len);
713+
}
714+
}
715+
}
529716
}

0 commit comments

Comments
 (0)