@@ -495,14 +495,19 @@ fn take_boolean<IndexType: ArrowPrimitiveType>(
495495/// # Safety
496496/// Each `(start, end)` in `ranges` must be in-bounds of `src`, and
497497/// `capacity` must equal the total bytes across all ranges.
498- unsafe fn copy_byte_ranges ( src : & [ u8 ] , ranges : & [ ( usize , usize ) ] , capacity : usize ) -> Vec < u8 > {
498+ unsafe fn copy_byte_ranges (
499+ src : & [ u8 ] ,
500+ ranges : & [ ( usize , usize ) ] ,
501+ capacity : usize ,
502+ values : & mut Vec < u8 > ,
503+ ) {
504+ values. reserve ( capacity) ;
499505 debug_assert_eq ! (
500506 ranges. iter( ) . map( |( s, e) | e - s) . sum:: <usize >( ) ,
501507 capacity,
502508 "capacity must equal total bytes across all ranges"
503509 ) ;
504510 let src_len = src. len ( ) ;
505- let mut values = Vec :: with_capacity ( capacity) ;
506511 let src = src. as_ptr ( ) ;
507512 let mut dst = values. as_mut_ptr ( ) ;
508513 for & ( start, end) in ranges {
@@ -523,14 +528,14 @@ unsafe fn copy_byte_ranges(src: &[u8], ranges: &[(usize, usize)], capacity: usiz
523528 // SAFETY: caller guarantees `capacity` == total bytes across all ranges,
524529 // so the loop above wrote exactly `capacity` bytes.
525530 unsafe { values. set_len ( capacity) } ;
526- values
527531}
528532
529533/// `take` implementation for string arrays
530534fn take_bytes < T : ByteArrayType , IndexType : ArrowPrimitiveType > (
531535 array : & GenericByteArray < T > ,
532536 indices : & PrimitiveArray < IndexType > ,
533537) -> Result < GenericByteArray < T > , ArrowError > {
538+ let mut values = Vec :: new ( ) ;
534539 let mut offsets = Vec :: with_capacity ( indices. len ( ) + 1 ) ;
535540 offsets. push ( T :: Offset :: default ( ) ) ;
536541
@@ -539,12 +544,10 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
539544 let mut capacity = 0 ;
540545 let nulls = take_nulls ( array. nulls ( ) , indices) ;
541546
542- // Pass 1: compute offsets and collect byte ranges.
543547 // Branch on output nulls — `None` means every output slot is valid.
544- let ranges = match nulls. as_ref ( ) . filter ( |n| n. null_count ( ) > 0 ) {
548+ match nulls. as_ref ( ) . filter ( |n| n. null_count ( ) > 0 ) {
545549 // Fast path: no nulls in output, every index is valid.
546550 None => {
547- let mut ranges = Vec :: with_capacity ( indices. len ( ) ) ;
548551 for index in indices. values ( ) {
549552 let index = index. as_usize ( ) ;
550553 let start = input_offsets[ index] . as_usize ( ) ;
@@ -554,9 +557,26 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
554557 T :: Offset :: from_usize ( capacity)
555558 . ok_or_else ( || ArrowError :: OffsetOverflowError ( capacity) ) ?,
556559 ) ;
557- ranges. push ( ( start, end) ) ;
558560 }
559- ranges
561+
562+ values. reserve ( capacity) ;
563+
564+ let mut dst = values. as_mut_ptr ( ) ;
565+
566+ for index in indices. values ( ) {
567+ // SAFETY: in-bounds proven by the first loop's bounds-checked offset access.
568+ // dst stays within reserved capacity computed from the same indices.
569+ unsafe {
570+ let data: & [ u8 ] = array. value_unchecked ( index. as_usize ( ) ) . as_ref ( ) ;
571+ std:: ptr:: copy_nonoverlapping ( data. as_ptr ( ) , dst, data. len ( ) ) ;
572+ dst = dst. add ( data. len ( ) ) ;
573+ }
574+ }
575+
576+ // SAFETY: wrote exactly `capacity` bytes above; reserved on line above.
577+ unsafe {
578+ values. set_len ( capacity) ;
579+ }
560580 }
561581 // Nullable path: only process valid (non-null) output positions.
562582 Some ( output_nulls) => {
@@ -566,6 +586,7 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
566586 // Pre-fill offsets; we overwrite valid positions below.
567587 offsets. resize ( indices. len ( ) + 1 , T :: Offset :: default ( ) ) ;
568588
589+ // Pass 1: find all valid ranges that need to be copied.
569590 for i in output_nulls. valid_indices ( ) {
570591 let current_offset = T :: Offset :: from_usize ( capacity)
571592 . ok_or_else ( || ArrowError :: OffsetOverflowError ( capacity) ) ?;
@@ -589,15 +610,11 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
589610 let final_offset = T :: Offset :: from_usize ( capacity)
590611 . ok_or_else ( || ArrowError :: OffsetOverflowError ( capacity) ) ?;
591612 offsets[ last_filled + 1 ..] . fill ( final_offset) ;
592- ranges
613+ // Pass 2: copy byte data for all collected ranges.
614+ unsafe { copy_byte_ranges ( input_values, & ranges, capacity, & mut values) } ;
593615 }
594616 } ;
595617
596- // Pass 2: copy byte data for all collected ranges.
597- let values = unsafe { copy_byte_ranges ( input_values, & ranges, capacity) } ;
598-
599- debug_assert_eq ! ( capacity, values. len( ) ) ;
600-
601618 // SAFETY: offsets are monotonically increasing and in-bounds of `values`,
602619 // and `nulls` (if present) has length == `indices.len()`.
603620 let array = unsafe {
0 commit comments