@@ -169,9 +169,10 @@ impl BooleanBuffer {
169169 /// * The output always has zero offset
170170 ///
171171 /// # See Also
172+ /// - [`BooleanBuffer::from_bitwise_binary_op`] to create a new buffer from a binary operation
172173 /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for in-place unary bitwise operations
173174 ///
174- /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of an input [`Buffer`]
175+ /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of a byte slice
175176 /// ```
176177 /// # use arrow_buffer::BooleanBuffer;
177178 /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
@@ -221,9 +222,8 @@ impl BooleanBuffer {
221222 result. truncate ( chunks. num_bytes ( ) ) ;
222223 }
223224
224- let buffer = Buffer :: from ( result) ;
225225 BooleanBuffer {
226- buffer,
226+ buffer : Buffer :: from ( result ) ,
227227 bit_offset : 0 ,
228228 bit_len : len_in_bits,
229229 }
@@ -254,6 +254,112 @@ impl BooleanBuffer {
254254 Some ( BooleanBuffer :: new ( buffer, 0 , len_in_bits) )
255255 }
256256
257+ /// Create a new [`BooleanBuffer`] by applying the bitwise operation `op` to
258+ /// the relevant bits from two input buffers.
259+ ///
260+ /// This function is faster than applying the operation bit by bit as
261+ /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
262+ ///
263+ /// # Notes:
264+ /// See notes on [Self::from_bitwise_unary_op]
265+ ///
266+ /// # See Also
267+ /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
268+ /// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations
269+ ///
270+ /// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two [`Buffer`]s
271+ /// ```
272+ /// # use arrow_buffer::{Buffer, BooleanBuffer};
273+ /// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
274+ /// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits
275+ /// // AND of the first 12 bits
276+ /// let result = BooleanBuffer::from_bitwise_binary_op(
277+ /// &left, 0, &right, 0, 12, |a, b| a & b
278+ /// );
279+ /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
280+ /// ```
281+ ///
282+ /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices
283+ /// ```
284+ /// # use arrow_buffer::BooleanBuffer;
285+ /// let left = [0b11001100u8, 0b10111010u8];
286+ /// let right = [0b10101010u8, 0b11011100u8];
287+ /// // OR of bits 4..16 from left and bits 0..12 from right
288+ /// let result = BooleanBuffer::from_bitwise_binary_op(
289+ /// &left, 4, &right, 0, 12, |a, b| a | b
290+ /// );
291+ /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
292+ /// ```
293+ pub fn from_bitwise_binary_op < F > (
294+ left : impl AsRef < [ u8 ] > ,
295+ left_offset_in_bits : usize ,
296+ right : impl AsRef < [ u8 ] > ,
297+ right_offset_in_bits : usize ,
298+ len_in_bits : usize ,
299+ mut op : F ,
300+ ) -> Self
301+ where
302+ F : FnMut ( u64 , u64 ) -> u64 ,
303+ {
304+ let left = left. as_ref ( ) ;
305+ let right = right. as_ref ( ) ;
306+ // try fast path for aligned input
307+ // If the underlying buffers are aligned to u64 we can apply the operation directly on the u64 slices
308+ // to improve performance.
309+ if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 {
310+ // align to byte boundary
311+ let left = & left[ left_offset_in_bits / 8 ..] ;
312+ let right = & right[ right_offset_in_bits / 8 ..] ;
313+
314+ unsafe {
315+ let ( left_prefix, left_u64s, left_suffix) = left. align_to :: < u64 > ( ) ;
316+ let ( right_prefix, right_u64s, right_suffix) = right. align_to :: < u64 > ( ) ;
317+ // if there is no prefix or suffix, both buffers are aligned and
318+ // we can do the operation directly on u64s.
319+ // TODO: consider `slice::as_chunks` and `u64::from_le_bytes` when MSRV reaches 1.88.
320+ // https://github.com/apache/arrow-rs/pull/9022#discussion_r2639949361
321+ if left_prefix. is_empty ( )
322+ && right_prefix. is_empty ( )
323+ && left_suffix. is_empty ( )
324+ && right_suffix. is_empty ( )
325+ {
326+ let result_u64s = left_u64s
327+ . iter ( )
328+ . zip ( right_u64s. iter ( ) )
329+ . map ( |( l, r) | op ( * l, * r) )
330+ . collect :: < Vec < u64 > > ( ) ;
331+ return BooleanBuffer {
332+ buffer : Buffer :: from ( result_u64s) ,
333+ bit_offset : 0 ,
334+ bit_len : len_in_bits,
335+ } ;
336+ }
337+ }
338+ }
339+ let left_chunks = BitChunks :: new ( left, left_offset_in_bits, len_in_bits) ;
340+ let right_chunks = BitChunks :: new ( right, right_offset_in_bits, len_in_bits) ;
341+
342+ let chunks = left_chunks
343+ . iter ( )
344+ . zip ( right_chunks. iter ( ) )
345+ . map ( |( left, right) | op ( left, right) ) ;
346+ // Soundness: `BitChunks` is a `BitChunks` iterator which
347+ // correctly reports its upper bound
348+ let mut buffer = unsafe { MutableBuffer :: from_trusted_len_iter ( chunks) } ;
349+
350+ let remainder_bytes = bit_util:: ceil ( left_chunks. remainder_len ( ) , 8 ) ;
351+ let rem = op ( left_chunks. remainder_bits ( ) , right_chunks. remainder_bits ( ) ) ;
352+ // we are counting its starting from the least significant bit, to to_le_bytes should be correct
353+ let rem = & rem. to_le_bytes ( ) [ 0 ..remainder_bytes] ;
354+ buffer. extend_from_slice ( rem) ;
355+
356+ BooleanBuffer {
357+ buffer : Buffer :: from ( buffer) ,
358+ bit_offset : 0 ,
359+ bit_len : len_in_bits,
360+ }
361+ }
362+
257363 /// Returns the number of set bits in this buffer
258364 pub fn count_set_bits ( & self ) -> usize {
259365 self . buffer
@@ -656,4 +762,42 @@ mod tests {
656762 assert_eq ! ( result, expected) ;
657763 }
658764 }
765+
766+ #[ test]
767+ fn test_from_bitwise_binary_op ( ) {
768+ // pick random boolean inputs
769+ let input_bools_left = ( 0 ..1024 )
770+ . map ( |_| rand:: random :: < bool > ( ) )
771+ . collect :: < Vec < bool > > ( ) ;
772+ let input_bools_right = ( 0 ..1024 )
773+ . map ( |_| rand:: random :: < bool > ( ) )
774+ . collect :: < Vec < bool > > ( ) ;
775+ let input_buffer_left = BooleanBuffer :: from ( & input_bools_left[ ..] ) ;
776+ let input_buffer_right = BooleanBuffer :: from ( & input_bools_right[ ..] ) ;
777+
778+ for left_offset in 0 ..200 {
779+ for right_offset in [ 0 , 4 , 5 , 17 , 33 , 24 , 45 , 64 , 65 , 100 , 200 ] {
780+ for len_offset in [ 0 , 1 , 44 , 100 , 256 , 300 , 512 ] {
781+ let len = 1024 - len_offset - left_offset. max ( right_offset) ; // ensure we don't go out of bounds
782+ // compute with AND
783+ let result = BooleanBuffer :: from_bitwise_binary_op (
784+ input_buffer_left. values ( ) ,
785+ left_offset,
786+ input_buffer_right. values ( ) ,
787+ right_offset,
788+ len,
789+ |a, b| a & b,
790+ ) ;
791+ // compute directly from bools
792+ let expected = input_bools_left[ left_offset..]
793+ . iter ( )
794+ . zip ( & input_bools_right[ right_offset..] )
795+ . take ( len)
796+ . map ( |( a, b) | * a & * b)
797+ . collect :: < BooleanBuffer > ( ) ;
798+ assert_eq ! ( result, expected) ;
799+ }
800+ }
801+ }
802+ }
659803}
0 commit comments