@@ -2205,23 +2205,6 @@ mod arrow_tests {
22052205 ) ;
22062206 }
22072207
2208- /// Helper to write a batch with CDC and read it back.
2209- fn cdc_roundtrip ( batch : & RecordBatch ) -> RecordBatch {
2210- let props = WriterProperties :: builder ( )
2211- . set_content_defined_chunking ( Some ( CdcOptions :: default ( ) ) )
2212- . build ( ) ;
2213- let mut buffer = Vec :: new ( ) ;
2214- let mut writer = ArrowWriter :: try_new ( & mut buffer, batch. schema ( ) , Some ( props) ) . unwrap ( ) ;
2215- writer. write ( batch) . unwrap ( ) ;
2216- writer. close ( ) . unwrap ( ) ;
2217-
2218- let reader = ParquetRecordBatchReaderBuilder :: try_new ( bytes:: Bytes :: from ( buffer) )
2219- . unwrap ( )
2220- . build ( )
2221- . unwrap ( ) ;
2222- reader. into_iter ( ) . next ( ) . unwrap ( ) . unwrap ( )
2223- }
2224-
22252208 /// Regression test for <https://github.com/apache/arrow-rs/issues/9637>
22262209 ///
22272210 /// Writing nested list data with CDC enabled panicked with an out-of-bounds
@@ -2245,8 +2228,14 @@ mod arrow_tests {
22452228 true ,
22462229 ) ,
22472230 ] ) ) ;
2248- let batch = create_random_batch ( schema, 2 , 0.25 , 0.75 ) . unwrap ( ) ;
2249- assert_eq ! ( cdc_roundtrip( & batch) , batch) ;
2231+ let batch = create_random_batch ( schema, 10_000 , 0.25 , 0.75 ) . unwrap ( ) ;
2232+ write_with_cdc_options (
2233+ & [ & batch] ,
2234+ CDC_MIN_CHUNK_SIZE ,
2235+ CDC_MAX_CHUNK_SIZE ,
2236+ None ,
2237+ true ,
2238+ ) ;
22502239 }
22512240
22522241 /// Test CDC with deeply nested types: List<List<Int32>>, List<Struct<List<Int32>>>
@@ -2268,8 +2257,14 @@ mod arrow_tests {
22682257 Field :: new( "list_list" , list_list_type, true ) ,
22692258 Field :: new( "list_struct_list" , list_struct_type, true ) ,
22702259 ] ) ) ;
2271- let batch = create_random_batch ( schema, 200 , 0.25 , 0.75 ) . unwrap ( ) ;
2272- assert_eq ! ( cdc_roundtrip( & batch) , batch) ;
2260+ let batch = create_random_batch ( schema, 10_000 , 0.25 , 0.75 ) . unwrap ( ) ;
2261+ write_with_cdc_options (
2262+ & [ & batch] ,
2263+ CDC_MIN_CHUNK_SIZE ,
2264+ CDC_MAX_CHUNK_SIZE ,
2265+ None ,
2266+ true ,
2267+ ) ;
22732268 }
22742269
22752270 /// Test CDC with list arrays that have non-empty null segments.
@@ -2306,7 +2301,14 @@ mod arrow_tests {
23062301 let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( "col" , list_type, true ) ] ) ) ;
23072302 let batch = RecordBatch :: try_new ( schema, vec ! [ list_array] ) . unwrap ( ) ;
23082303
2309- let read = cdc_roundtrip ( & batch) ;
2304+ let buf = write_with_cdc_options (
2305+ & [ & batch] ,
2306+ CDC_MIN_CHUNK_SIZE ,
2307+ CDC_MAX_CHUNK_SIZE ,
2308+ None ,
2309+ true ,
2310+ ) ;
2311+ let read = concat_batches ( & read_batches ( & buf) ) ;
23102312 let read_list = read. column ( 0 ) . as_list :: < i32 > ( ) ;
23112313 assert_eq ! ( read_list. len( ) , 5 ) ;
23122314 assert ! ( read_list. is_valid( 0 ) ) ;
0 commit comments