@@ -805,37 +805,29 @@ impl ArrayLevels {
805805
806806 /// Create a sliced view of this `ArrayLevels` for a CDC chunk.
807807 ///
808- /// Note: `def_levels`, `rep_levels`, and `non_null_indices` are copied (not zero-copy),
809- /// while `array` is sliced without copying.
808+ /// The chunk's `value_offset`/`num_values` select the relevant slice of
809+ /// `non_null_indices`. The array is sliced to the range covered by
810+ /// those indices, and they are shifted to be relative to the slice.
810811 pub ( crate ) fn slice_for_chunk ( & self , chunk : & CdcChunk ) -> Self {
811- let level_offset = chunk. level_offset ;
812- let num_levels = chunk. num_levels ;
813- let value_offset = chunk. value_offset ;
814- let num_values = chunk. num_values ;
815- let def_levels = self
816- . def_levels
817- . as_ref ( )
818- . map ( |levels| levels[ level_offset..level_offset + num_levels] . to_vec ( ) ) ;
819- let rep_levels = self
820- . rep_levels
821- . as_ref ( )
822- . map ( |levels| levels[ level_offset..level_offset + num_levels] . to_vec ( ) ) ;
823-
824- // Filter non_null_indices to [value_offset, value_offset + num_values)
825- // and shift by -value_offset. Use binary search since the slice is sorted.
826- let value_end = value_offset + num_values;
827- let start = self
828- . non_null_indices
829- . partition_point ( |& idx| idx < value_offset) ;
830- let end = self
831- . non_null_indices
832- . partition_point ( |& idx| idx < value_end) ;
833- let non_null_indices: Vec < usize > = self . non_null_indices [ start..end]
834- . iter ( )
835- . map ( |& idx| idx - value_offset)
836- . collect ( ) ;
812+ let def_levels = self . def_levels . as_ref ( ) . map ( |levels| {
813+ levels[ chunk. level_offset ..chunk. level_offset + chunk. num_levels ] . to_vec ( )
814+ } ) ;
815+ let rep_levels = self . rep_levels . as_ref ( ) . map ( |levels| {
816+ levels[ chunk. level_offset ..chunk. level_offset + chunk. num_levels ] . to_vec ( )
817+ } ) ;
837818
838- let array = self . array . slice ( value_offset, num_values) ;
819+ // Select the non-null indices for this chunk.
820+ let nni = & self . non_null_indices [ chunk. value_offset ..chunk. value_offset + chunk. num_values ] ;
821+ // Compute the array range spanned by the non-null indices.
822+ // When nni is empty (all-null chunk), start=0, end=0 → zero-length
823+ // array slice; write_batch_internal will process only the def/rep
824+ // levels and write no values.
825+ let start = nni. first ( ) . copied ( ) . unwrap_or ( 0 ) ;
826+ let end = nni. last ( ) . map_or ( 0 , |& i| i + 1 ) ;
827+ // Shift indices to be relative to the sliced array.
828+ let non_null_indices = nni. iter ( ) . map ( |& idx| idx - start) . collect ( ) ;
829+ // Slice the array to the computed range.
830+ let array = self . array . slice ( start, end - start) ;
839831 let logical_nulls = array. logical_nulls ( ) ;
840832
841833 Self {
@@ -2149,9 +2141,8 @@ mod tests {
21492141 fn test_slice_for_chunk_flat ( ) {
21502142 // Case 1: required field (max_def_level=0, no def/rep levels stored).
21512143 // Array has 6 values; all are non-null so non_null_indices covers every position.
2152- // The chunk selects value_offset=2, num_values=3 → the sub-array [3, 4, 5].
2153- // Since there are no levels, num_levels=0 and level_offset are irrelevant.
2154- // non_null_indices [0,1,2,3,4,5] filtered to [2,4) and shifted by -2 → [0,1,2].
2144+ // value_offset=2, num_values=3 → non_null_indices[2..5] = [2,3,4].
2145+ // Array is sliced (no def_levels → write_batch_internal uses values.len()).
21552146 let array: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 , 4 , 5 , 6 ] ) ) ;
21562147 let logical_nulls = array. logical_nulls ( ) ;
21572148 let levels = ArrayLevels {
@@ -2176,14 +2167,9 @@ mod tests {
21762167
21772168 // Case 2: optional field (max_def_level=1, def levels present, no rep levels).
21782169 // Array: [Some(1), None, Some(3), None, Some(5), Some(6)]
2179- // def_levels: [1, 0, 1, 0, 1, 1] (1=non-null, 0=null)
2180- // non_null_indices: [0, 2, 4, 5] (array positions of the four non-null values)
2181- //
2182- // The chunk selects level_offset=1, num_levels=3, value_offset=1, num_values=3:
2183- // - def_levels[1..4] = [0, 1, 0] → null, non-null, null
2184- // - sub-array slice(1, 3) = [None, Some(3), None]
2185- // - non_null_indices filtered to [value_offset=1, value_end=4): only index 2 qualifies,
2186- // shifted by -1 → [1] (position of Some(3) within the sliced sub-array)
2170+ // non_null_indices: [0, 2, 4, 5]
2171+ // value_offset=1, num_values=1 → non_null_indices[1..2] = [2].
2172+ // Array is not sliced (def_levels present → num_levels from def_levels.len()).
21872173 let array: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [
21882174 Some ( 1 ) ,
21892175 None ,
@@ -2206,90 +2192,111 @@ mod tests {
22062192 level_offset : 1 ,
22072193 num_levels : 3 ,
22082194 value_offset : 1 ,
2209- num_values : 3 ,
2195+ num_values : 1 ,
22102196 } ) ;
22112197 assert_eq ! ( sliced. def_levels, Some ( vec![ 0 , 1 , 0 ] ) ) ;
22122198 assert ! ( sliced. rep_levels. is_none( ) ) ;
2213- assert_eq ! ( sliced. non_null_indices, vec![ 1 ] ) ;
2214- assert_eq ! ( sliced. array. len( ) , 3 ) ;
2199+ assert_eq ! ( sliced. non_null_indices, vec![ 0 ] ) ; // [2] shifted by -2 (nni[0])
2200+ assert_eq ! ( sliced. array. len( ) , 1 ) ;
22152201 }
22162202
22172203 #[ test]
2218- fn test_slice_for_chunk_nested ( ) {
2219- // [[1,2],[3],[4,5]]: def=[2,2,2,2,2], rep=[0,1,0,0,1]
2220- // Slice levels 2..5 (def=[2,2,2], rep=[0,0,1]), values 2..5
2221- let array: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 , 4 , 5 ] ) ) ;
2204+ fn test_slice_for_chunk_nested_with_nulls ( ) {
2205+ // Regression test for https://github.com/apache/arrow-rs/issues/9637
2206+ //
2207+ // Simulates a List<Int32?> where null list entries have non-zero child
2208+ // ranges (valid per Arrow spec: "a null value may correspond to a
2209+ // non-empty segment in the child array"). This creates gaps in the
2210+ // leaf array that don't correspond to any levels.
2211+ //
2212+ // 5 rows with 2 null list entries owning non-empty child ranges:
2213+ // row 0: [1] → leaf[0]
2214+ // row 1: null list → owns leaf[1..3] (gap of 2)
2215+ // row 2: [2, null] → leaf[3], leaf[4]=null element
2216+ // row 3: null list → owns leaf[5..8] (gap of 3)
2217+ // row 4: [4, 5] → leaf[8], leaf[9]
2218+ //
2219+ // def_levels: [3, 0, 3, 2, 0, 3, 3]
2220+ // rep_levels: [0, 0, 0, 1, 0, 0, 1]
2221+ // non_null_indices: [0, 3, 8, 9]
2222+ // gaps in array: 0→3 (skip 1,2), 3→8 (skip 5,6,7)
2223+ let array: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [
2224+ Some ( 1 ) , // 0: row 0
2225+ None , // 1: gap (null list row 1)
2226+ None , // 2: gap (null list row 1)
2227+ Some ( 2 ) , // 3: row 2
2228+ None , // 4: row 2, null element
2229+ None , // 5: gap (null list row 3)
2230+ None , // 6: gap (null list row 3)
2231+ None , // 7: gap (null list row 3)
2232+ Some ( 4 ) , // 8: row 4
2233+ Some ( 5 ) , // 9: row 4
2234+ ] ) ) ;
22222235 let logical_nulls = array. logical_nulls ( ) ;
22232236 let levels = ArrayLevels {
2224- def_levels : Some ( vec ! [ 2 , 2 , 2 , 2 , 2 ] ) ,
2225- rep_levels : Some ( vec ! [ 0 , 1 , 0 , 0 , 1 ] ) ,
2226- non_null_indices : vec ! [ 0 , 1 , 2 , 3 , 4 ] ,
2227- max_def_level : 2 ,
2237+ def_levels : Some ( vec ! [ 3 , 0 , 3 , 2 , 0 , 3 , 3 ] ) ,
2238+ rep_levels : Some ( vec ! [ 0 , 0 , 0 , 1 , 0 , 0 , 1 ] ) ,
2239+ non_null_indices : vec ! [ 0 , 3 , 8 , 9 ] ,
2240+ max_def_level : 3 ,
22282241 max_rep_level : 1 ,
22292242 array,
22302243 logical_nulls,
22312244 } ;
2232- let sliced = levels. slice_for_chunk ( & CdcChunk {
2245+
2246+ // Chunk 0: rows 0-1, nni=[0] → array sliced to [0..1]
2247+ let chunk0 = levels. slice_for_chunk ( & CdcChunk {
2248+ level_offset : 0 ,
2249+ num_levels : 2 ,
2250+ value_offset : 0 ,
2251+ num_values : 1 ,
2252+ } ) ;
2253+ assert_eq ! ( chunk0. non_null_indices, vec![ 0 ] ) ;
2254+ assert_eq ! ( chunk0. array. len( ) , 1 ) ;
2255+
2256+ // Chunk 1: rows 2-3, nni=[3] → array sliced to [3..4]
2257+ let chunk1 = levels. slice_for_chunk ( & CdcChunk {
22332258 level_offset : 2 ,
22342259 num_levels : 3 ,
2260+ value_offset : 1 ,
2261+ num_values : 1 ,
2262+ } ) ;
2263+ assert_eq ! ( chunk1. non_null_indices, vec![ 0 ] ) ;
2264+ assert_eq ! ( chunk1. array. len( ) , 1 ) ;
2265+
2266+ // Chunk 2: row 4, nni=[8, 9] → array sliced to [8..10]
2267+ let chunk2 = levels. slice_for_chunk ( & CdcChunk {
2268+ level_offset : 5 ,
2269+ num_levels : 2 ,
22352270 value_offset : 2 ,
2236- num_values : 3 ,
2271+ num_values : 2 ,
22372272 } ) ;
2238- assert_eq ! ( sliced. def_levels, Some ( vec![ 2 , 2 , 2 ] ) ) ;
2239- assert_eq ! ( sliced. rep_levels, Some ( vec![ 0 , 0 , 1 ] ) ) ;
2240- // [0,1,2,3,4] filtered to [2,5) → [2,3,4] → shifted -2 → [0,1,2]
2241- assert_eq ! ( sliced. non_null_indices, vec![ 0 , 1 , 2 ] ) ;
2242- assert_eq ! ( sliced. array. len( ) , 3 ) ;
2273+ assert_eq ! ( chunk2. non_null_indices, vec![ 0 , 1 ] ) ;
2274+ assert_eq ! ( chunk2. array. len( ) , 2 ) ;
22432275 }
22442276
22452277 #[ test]
2246- fn test_slice_for_chunk_non_null_indices_boundary ( ) {
2247- // [1, null, 3]: non_null_indices=[0, 2]; test inclusive lower / exclusive upper bounds
2248- let array: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [ Some ( 1 ) , None , Some ( 3 ) ] ) ) ;
2278+ fn test_slice_for_chunk_all_null ( ) {
2279+ // All- null chunk: num_values=0 → empty nni slice → zero-length array.
2280+ let array: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [ Some ( 1 ) , None , None , Some ( 4 ) ] ) ) ;
22492281 let logical_nulls = array. logical_nulls ( ) ;
22502282 let levels = ArrayLevels {
2251- def_levels : Some ( vec ! [ 1 , 0 , 1 ] ) ,
2283+ def_levels : Some ( vec ! [ 1 , 0 , 0 , 1 ] ) ,
22522284 rep_levels : None ,
2253- non_null_indices : vec ! [ 0 , 2 ] ,
2285+ non_null_indices : vec ! [ 0 , 3 ] ,
22542286 max_def_level : 1 ,
22552287 max_rep_level : 0 ,
22562288 array,
22572289 logical_nulls,
22582290 } ;
2259- assert_eq ! (
2260- levels
2261- . slice_for_chunk( & CdcChunk {
2262- level_offset: 0 ,
2263- num_levels: 1 ,
2264- value_offset: 0 ,
2265- num_values: 1
2266- } )
2267- . non_null_indices,
2268- vec![ 0 ]
2269- ) ;
2270- // idx 2 in range [1,3), shifted -1 → 1
2271- assert_eq ! (
2272- levels
2273- . slice_for_chunk( & CdcChunk {
2274- level_offset: 1 ,
2275- num_levels: 2 ,
2276- value_offset: 1 ,
2277- num_values: 2
2278- } )
2279- . non_null_indices,
2280- vec![ 1 ]
2281- ) ;
2282- // idx 2 excluded from [1,2)
2283- assert_eq ! (
2284- levels
2285- . slice_for_chunk( & CdcChunk {
2286- level_offset: 1 ,
2287- num_levels: 1 ,
2288- value_offset: 1 ,
2289- num_values: 1
2290- } )
2291- . non_null_indices,
2292- Vec :: <usize >:: new( )
2293- ) ;
2291+ // Chunk covering only the two null rows (levels 1..3), zero non-null values.
2292+ let sliced = levels. slice_for_chunk ( & CdcChunk {
2293+ level_offset : 1 ,
2294+ num_levels : 2 ,
2295+ value_offset : 1 ,
2296+ num_values : 0 ,
2297+ } ) ;
2298+ assert_eq ! ( sliced. def_levels, Some ( vec![ 0 , 0 ] ) ) ;
2299+ assert_eq ! ( sliced. non_null_indices, Vec :: <usize >:: new( ) ) ;
2300+ assert_eq ! ( sliced. array. len( ) , 0 ) ;
22942301 }
22952302}
0 commit comments