@@ -45,9 +45,10 @@ mod variable_size;
4545type ExtendNullBits < ' a > = Box < dyn Fn ( & mut _MutableArrayData , usize , usize ) + ' a > ;
4646// function that extends `[start..start+len]` to the mutable array.
4747// this is dynamic because different data_types influence how buffers and children are extended.
48- type Extend < ' a > = Box < dyn Fn ( & mut _MutableArrayData , usize , usize , usize ) + ' a > ;
48+ type Extend < ' a > =
49+ Box < dyn Fn ( & mut _MutableArrayData , usize , usize , usize ) -> Result < ( ) , ArrowError > + ' a > ;
4950
50- type ExtendNulls = Box < dyn Fn ( & mut _MutableArrayData , usize ) > ;
51+ type ExtendNulls = Box < dyn Fn ( & mut _MutableArrayData , usize ) -> Result < ( ) , ArrowError > > ;
5152
5253/// A mutable [ArrayData] that knows how to freeze itself into an [ArrayData].
5354/// This is just a data container.
@@ -230,7 +231,8 @@ fn build_extend_view(array: &ArrayData, buffer_offset: u32) -> Extend<'_> {
230231 let mut view = ByteView :: from ( * v) ;
231232 view. buffer_index += buffer_offset;
232233 view. into ( )
233- } ) )
234+ } ) ) ;
235+ Ok ( ( ) )
234236 } ,
235237 )
236238}
@@ -628,7 +630,10 @@ impl<'a> MutableArrayData<'a> {
628630 let mut mutable = MutableArrayData :: new ( dictionaries, false , capacity) ;
629631
630632 for ( i, len) in lengths. iter ( ) . enumerate ( ) {
631- mutable. extend ( i, 0 , * len)
633+ mutable. try_extend ( i, 0 , * len) . expect (
634+ "extend failed while building dictionary; \
635+ this is a bug in MutableArrayData",
636+ )
632637 }
633638
634639 ( Some ( mutable. freeze ( ) ) , true )
@@ -716,36 +721,103 @@ impl<'a> MutableArrayData<'a> {
716721 }
717722 }
718723
719- /// Extends the in progress array with a region of the input arrays
724+ /// Extends the in progress array with a region of the input arrays, returning an error on
725+ /// overflow.
726+ ///
727+ /// Prefer this over [`extend`](Self::extend) to handle cases where the data exceeds the
728+ /// capacity of the offset type (e.g. more than 2 GiB in a `StringArray`). The error message
729+ /// will indicate which array type overflowed and suggest a larger type.
720730 ///
721731 /// # Arguments
722- /// * `index` - the index of array that you what to copy values from
732+ /// * `index` - the index of array that you want to copy values from
723733 /// * `start` - the start index of the chunk (inclusive)
724734 /// * `end` - the end index of the chunk (exclusive)
725735 ///
736+ /// # Errors
737+ /// Returns an error if offset arithmetic overflows the underlying integer type.
738+ ///
726739 /// # Panic
727740 /// This function panics if there is an invalid index,
728741 /// i.e. `index` >= the number of source arrays
729742 /// or `end` > the length of the `index`th array
730- pub fn extend ( & mut self , index : usize , start : usize , end : usize ) {
743+ pub fn try_extend (
744+ & mut self ,
745+ index : usize ,
746+ start : usize ,
747+ end : usize ,
748+ ) -> Result < ( ) , ArrowError > {
731749 let len = end - start;
732750 ( self . extend_null_bits [ index] ) ( & mut self . data , start, len) ;
733- ( self . extend_values [ index] ) ( & mut self . data , index, start, len) ;
751+ // Snapshot buffer lengths before attempting the extend so we can roll
752+ // back to a consistent state if it fails.
753+ let buf1_len = self . data . buffer1 . len ( ) ;
754+ let buf2_len = self . data . buffer2 . len ( ) ;
755+ if let Err ( e) = ( self . extend_values [ index] ) ( & mut self . data , index, start, len) {
756+ // Restore buffers to their pre-call lengths so the array remains
757+ // in a valid state for the caller to inspect or retry.
758+ self . data . buffer1 . truncate ( buf1_len) ;
759+ self . data . buffer2 . truncate ( buf2_len) ;
760+ return Err ( e) ;
761+ }
734762 self . data . len += len;
763+ Ok ( ( ) )
735764 }
736765
737- /// Extends the in progress array with null elements, ignoring the input arrays.
766+
767+ /// Extends the in progress array with a region of the input arrays.
768+ ///
769+ /// # Deprecated
770+ /// Use [`try_extend`](Self::try_extend) instead, which returns an [`ArrowError`] on overflow
771+ /// rather than panicking.
772+ ///
773+ /// # Panic
774+ /// This function panics if there is an invalid index,
775+ /// i.e. `index` >= the number of source arrays,
776+ /// `end` > the length of the `index`th array,
777+ /// or the offset type overflows (e.g. more than 2 GiB in a `StringArray`).
778+ #[ deprecated(
779+ note = "Use `try_extend` which returns an error on overflow instead of panicking"
780+ ) ]
781+ pub fn extend ( & mut self , index : usize , start : usize , end : usize ) {
782+ self . try_extend ( index, start, end)
783+ . expect ( "extend failed due to offset overflow" )
784+ }
785+
786+ /// Extends the in progress array with null elements, ignoring the input arrays, returning an
787+ /// error on overflow.
788+ ///
789+ /// Prefer this over [`extend_nulls`](Self::extend_nulls) to handle cases where the run-end
790+ /// counter overflows (relevant for `RunEndEncoded` arrays).
738791 ///
739792 /// # Panics
740793 ///
741794 /// Panics if [`MutableArrayData`] not created with `use_nulls` or nullable source arrays
742- pub fn extend_nulls ( & mut self , len : usize ) {
795+ pub fn try_extend_nulls ( & mut self , len : usize ) -> Result < ( ) , ArrowError > {
743796 self . data . len += len;
744797 let bit_len = bit_util:: ceil ( self . data . len , 8 ) ;
745798 let nulls = self . data . null_buffer ( ) ;
746799 nulls. resize ( bit_len, 0 ) ;
747800 self . data . null_count += len;
748- ( self . extend_nulls ) ( & mut self . data , len) ;
801+ ( self . extend_nulls ) ( & mut self . data , len) ?;
802+ Ok ( ( ) )
803+ }
804+
805+ /// Extends the in progress array with null elements, ignoring the input arrays.
806+ ///
807+ /// # Deprecated
808+ /// Use [`try_extend_nulls`](Self::try_extend_nulls) instead, which returns an [`ArrowError`]
809+ /// on overflow rather than panicking.
810+ ///
811+ /// # Panics
812+ ///
813+ /// Panics if [`MutableArrayData`] not created with `use_nulls` or nullable source arrays,
814+ /// or if the run-end counter overflows for `RunEndEncoded` arrays.
815+ #[ deprecated(
816+ note = "Use `try_extend_nulls` which returns an error on overflow instead of panicking"
817+ ) ]
818+ pub fn extend_nulls ( & mut self , len : usize ) {
819+ self . try_extend_nulls ( len)
820+ . expect ( "extend_nulls failed due to overflow" )
749821 }
750822
751823 /// Returns the current length
0 commit comments