Skip to content

Commit 1acccc7

Browse files
committed
faster null handling
1 parent 82acfe1 commit 1acccc7

File tree

1 file changed

+80
-7
lines changed

1 file changed

+80
-7
lines changed

arrow-buffer/src/builder/null.rs

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -223,15 +223,50 @@ impl NullBufferBuilder {
223223

224224
let slice = buf.as_slice_mut();
225225
let mut bit_idx = start_len;
226+
let end_bit = start_len + len;
227+
228+
// Process in chunks of 64 bits when byte-aligned for better performance
229+
if start_len % 8 == 0 {
230+
let start_byte = start_len / 8;
231+
let mut iter = iter.peekable();
232+
233+
// Process full u64 chunks (64 bits at a time)
234+
while bit_idx + 64 <= end_bit && iter.peek().is_some() {
235+
let mut chunk: u64 = 0;
236+
for i in 0..64 {
237+
if let Some(valid) = iter.next() {
238+
if valid {
239+
chunk |= 1u64 << i;
240+
}
241+
} else {
242+
break;
243+
}
244+
}
245+
let byte_idx = (bit_idx - start_len) / 8 + start_byte;
246+
// Write the u64 chunk as 8 bytes
247+
slice[byte_idx..byte_idx + 8].copy_from_slice(&chunk.to_le_bytes());
248+
bit_idx += 64;
249+
}
226250

227-
// Process bits - set bit if true (buffer initialized to 0, so false bits are already correct)
228-
for valid in iter {
229-
if valid {
230-
let byte_idx = bit_idx / 8;
231-
let bit_offset = bit_idx % 8;
232-
slice[byte_idx] |= 1 << bit_offset;
251+
// Process remaining bits
252+
for valid in iter {
253+
if valid {
254+
let byte_idx = bit_idx / 8;
255+
let bit_offset = bit_idx % 8;
256+
slice[byte_idx] |= 1 << bit_offset;
257+
}
258+
bit_idx += 1;
259+
}
260+
} else {
261+
// Non-aligned case: process bit by bit
262+
for valid in iter {
263+
if valid {
264+
let byte_idx = bit_idx / 8;
265+
let bit_offset = bit_idx % 8;
266+
slice[byte_idx] |= 1 << bit_offset;
267+
}
268+
bit_idx += 1;
233269
}
234-
bit_idx += 1;
235270
}
236271

237272
debug_assert_eq!(bit_idx, start_len + len);
@@ -446,4 +481,42 @@ mod tests {
446481

447482
assert_eq!(builder.finish(), None);
448483
}
484+
485+
#[test]
486+
fn test_extend() {
487+
// Test small extend (less than 64 bits)
488+
let mut builder = NullBufferBuilder::new(0);
489+
builder.extend([true, false, true, true].iter().copied());
490+
assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
491+
492+
// Test extend with exactly 64 bits
493+
let mut builder = NullBufferBuilder::new(0);
494+
let pattern: Vec<bool> = (0..64).map(|i| i % 2 == 0).collect();
495+
builder.extend(pattern.iter().copied());
496+
// Even positions are true: 0, 2, 4, ... -> bits 0, 2, 4, ...
497+
// In little-endian: 0b01010101 repeated
498+
assert_eq!(
499+
builder.as_slice().unwrap(),
500+
&[0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55]
501+
);
502+
503+
// Test extend with more than 64 bits (tests chunking)
504+
let mut builder = NullBufferBuilder::new(0);
505+
let pattern: Vec<bool> = (0..100).map(|i| i % 3 == 0).collect();
506+
builder.extend(pattern.iter().copied());
507+
assert_eq!(builder.len(), 100);
508+
// Verify a few specific bits
509+
let buf = builder.finish().unwrap();
510+
assert!(buf.is_valid(0)); // 0 % 3 == 0
511+
assert!(!buf.is_valid(1)); // 1 % 3 != 0
512+
assert!(!buf.is_valid(2)); // 2 % 3 != 0
513+
assert!(buf.is_valid(3)); // 3 % 3 == 0
514+
assert!(buf.is_valid(99)); // 99 % 3 == 0
515+
516+
// Test extend with non-aligned start (tests bit-by-bit path)
517+
let mut builder = NullBufferBuilder::new(0);
518+
builder.append_non_null(); // Start at bit 1 (non-aligned)
519+
builder.extend([false, true, false, true].iter().copied());
520+
assert_eq!(builder.as_slice().unwrap(), &[0b10101_u8]);
521+
}
449522
}

0 commit comments

Comments
 (0)