@@ -253,6 +253,107 @@ impl BooleanBuffer {
253253 Some ( BooleanBuffer :: new ( buffer, 0 , len_in_bits) )
254254 }
255255
256+ /// Create a new [`BooleanBuffer`] by applying the bitwise operation `op` to
257+ /// the relevant bits from two input buffers.
258+ ///
259+ /// This function is faster than applying the operation bit by bit as
260+ /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
261+ ///
262+ /// # Notes:
263+ /// See notes on [Self::from_bitwise_unary_op]
264+ ///
265+ /// # See Also
266+ /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
267+ /// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations
268+ ///
269+ /// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two [`Buffer`]s
270+ /// ```
271+ /// # use arrow_buffer::{Buffer, BooleanBuffer};
272+ /// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
273+ /// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits
274+ /// // AND of the first 12 bits
275+ /// let result = BooleanBuffer::from_bitwise_binary_op(
276+ /// &left, 0, &right, 0, 12, |a, b| a & b
277+ /// );
278+ /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
279+ /// ```
280+ ///
281+ /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices
282+ /// ```
283+ /// # use arrow_buffer::BooleanBuffer;
284+ /// let left = [0b11001100u8, 0b10111010u8];
285+ /// let right = [0b10101010u8, 0b11011100u8];
286+ /// // OR of bits 4..16 from left and bits 0..12 from right
287+ /// let result = BooleanBuffer::from_bitwise_binary_op(
288+ /// &left, 4, &right, 0, 12, |a, b| a | b
289+ /// );
290+ /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
291+ /// ```
292+ pub fn from_bitwise_binary_op < F > (
293+ left : impl AsRef < [ u8 ] > ,
294+ left_offset_in_bits : usize ,
295+ right : impl AsRef < [ u8 ] > ,
296+ right_offset_in_bits : usize ,
297+ len_in_bits : usize ,
298+ mut op : F ,
299+ ) -> Self
300+ where
301+ F : FnMut ( u64 , u64 ) -> u64 ,
302+ {
303+ let left = left. as_ref ( ) ;
304+ let right = right. as_ref ( ) ;
305+ // try fast path for aligned input
306+ // If the underlying buffers are aligned to u64 we can apply the operation directly on the u64 slices
307+ // to improve performance.
308+ if left_offset_in_bits == 0 && right_offset_in_bits == 0 {
309+ unsafe {
310+ let ( left_prefix, left_u64s, left_suffix) = left. align_to :: < u64 > ( ) ;
311+ let ( right_prefix, right_u64s, right_suffix) = right. align_to :: < u64 > ( ) ;
312+ // if there is no prefix or suffix, both buffers are aligned and we can do the operation directly
313+ // on u64s
314+ // TODO also handle non empty suffixes by processing them separately
315+ if left_prefix. is_empty ( )
316+ && right_prefix. is_empty ( )
317+ && left_suffix. is_empty ( )
318+ && right_suffix. is_empty ( )
319+ {
320+ let result_u64s = left_u64s
321+ . iter ( )
322+ . zip ( right_u64s. iter ( ) )
323+ . map ( |( l, r) | op ( * l, * r) )
324+ . collect :: < Vec < u64 > > ( ) ;
325+ return BooleanBuffer {
326+ buffer : Buffer :: from ( result_u64s) ,
327+ bit_offset : 0 ,
328+ bit_len : len_in_bits,
329+ }
330+ }
331+ }
332+ }
333+ let left_chunks = BitChunks :: new ( left, left_offset_in_bits, len_in_bits) ;
334+ let right_chunks = BitChunks :: new ( right, right_offset_in_bits, len_in_bits) ;
335+
336+ let chunks = left_chunks
337+ . iter ( )
338+ . zip ( right_chunks. iter ( ) )
339+ . map ( |( left, right) | op ( left, right) ) ;
340+ // Soundness: `BitChunks` is a `BitChunks` iterator which
341+ // correctly reports its upper bound
342+ let mut buffer = unsafe { MutableBuffer :: from_trusted_len_iter ( chunks) } ;
343+
344+ let remainder_bytes = bit_util:: ceil ( left_chunks. remainder_len ( ) , 8 ) ;
345+ let rem = op ( left_chunks. remainder_bits ( ) , right_chunks. remainder_bits ( ) ) ;
346+ // we are counting its starting from the least significant bit, to to_le_bytes should be correct
347+ let rem = & rem. to_le_bytes ( ) [ 0 ..remainder_bytes] ;
348+ buffer. extend_from_slice ( rem) ;
349+
350+ BooleanBuffer {
351+ buffer : Buffer :: from ( buffer) ,
352+ bit_offset : 0 ,
353+ bit_len : len_in_bits,
354+ }
355+ }
356+
256357 /// Returns the number of set bits in this buffer
257358 pub fn count_set_bits ( & self ) -> usize {
258359 self . buffer
@@ -655,4 +756,42 @@ mod tests {
655756 assert_eq ! ( result, expected) ;
656757 }
657758 }
759+
760+ #[ test]
761+ fn test_from_bitwise_binary_op ( ) {
762+ // pick random boolean inputs
763+ let input_bools_left = ( 0 ..1024 )
764+ . map ( |_| rand:: random :: < bool > ( ) )
765+ . collect :: < Vec < bool > > ( ) ;
766+ let input_bools_right = ( 0 ..1024 )
767+ . map ( |_| rand:: random :: < bool > ( ) )
768+ . collect :: < Vec < bool > > ( ) ;
769+ let input_buffer_left = BooleanBuffer :: from ( & input_bools_left[ ..] ) ;
770+ let input_buffer_right = BooleanBuffer :: from ( & input_bools_right[ ..] ) ;
771+
772+ for left_offset in 0 ..200 {
773+ for right_offset in [ 0 , 4 , 5 , 17 , 33 , 24 , 45 , 64 , 65 , 100 , 200 ] {
774+ for len_offset in [ 0 , 1 , 44 , 100 , 256 , 300 , 512 ] {
775+ let len = 1024 - len_offset - left_offset. max ( right_offset) ; // ensure we don't go out of bounds
776+ // compute with AND
777+ let result = BooleanBuffer :: from_bitwise_binary_op (
778+ input_buffer_left. values ( ) ,
779+ left_offset,
780+ input_buffer_right. values ( ) ,
781+ right_offset,
782+ len,
783+ |a, b| a & b,
784+ ) ;
785+ // compute directly from bools
786+ let expected = input_bools_left[ left_offset..]
787+ . iter ( )
788+ . zip ( & input_bools_right[ right_offset..] )
789+ . take ( len)
790+ . map ( |( a, b) | * a & * b)
791+ . collect :: < BooleanBuffer > ( ) ;
792+ assert_eq ! ( result, expected) ;
793+ }
794+ }
795+ }
796+ }
658797}
0 commit comments