Skip to content

Commit ce1020c

Browse files
committed
test(parquet): test slice for all null chunk
1 parent 67213db commit ce1020c

File tree

2 files changed

+41
-3
lines changed

2 files changed

+41
-3
lines changed

parquet/src/arrow/arrow_writer/levels.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -818,7 +818,10 @@ impl ArrayLevels {
818818

819819
// Select the non-null indices for this chunk.
820820
let nni = &self.non_null_indices[chunk.value_offset..chunk.value_offset + chunk.num_values];
821-
// Compute the array range spanned by the non-null indices
821+
// Compute the array range spanned by the non-null indices.
822+
// When nni is empty (all-null chunk), start=0, end=0 → zero-length
823+
// array slice; write_batch_internal will process only the def/rep
824+
// levels and write no values.
822825
let start = nni.first().copied().unwrap_or(0);
823826
let end = nni.last().map_or(0, |&i| i + 1);
824827
// Shift indices to be relative to the sliced array.
@@ -2270,4 +2273,30 @@ mod tests {
22702273
assert_eq!(chunk2.non_null_indices, vec![0, 1]);
22712274
assert_eq!(chunk2.array.len(), 2);
22722275
}
2276+
2277+
#[test]
2278+
fn test_slice_for_chunk_all_null() {
2279+
// All-null chunk: num_values=0 → empty nni slice → zero-length array.
2280+
let array: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, None, Some(4)]));
2281+
let logical_nulls = array.logical_nulls();
2282+
let levels = ArrayLevels {
2283+
def_levels: Some(vec![1, 0, 0, 1]),
2284+
rep_levels: None,
2285+
non_null_indices: vec![0, 3],
2286+
max_def_level: 1,
2287+
max_rep_level: 0,
2288+
array,
2289+
logical_nulls,
2290+
};
2291+
// Chunk covering only the two null rows (levels 1..3), zero non-null values.
2292+
let sliced = levels.slice_for_chunk(&CdcChunk {
2293+
level_offset: 1,
2294+
num_levels: 2,
2295+
value_offset: 1,
2296+
num_values: 0,
2297+
});
2298+
assert_eq!(sliced.def_levels, Some(vec![0, 0]));
2299+
assert_eq!(sliced.non_null_indices, Vec::<usize>::new());
2300+
assert_eq!(sliced.array.len(), 0);
2301+
}
22732302
}

parquet/src/column/chunker/cdc.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,9 @@ impl ContentDefinedChunker {
328328
let def_level = def_levels[offset];
329329
self.roll_level(def_level);
330330
if def_level == self.max_def_level {
331+
// For non-nested data, the leaf array has one slot per
332+
// level (nulls are array elements), so `offset` (the
333+
// level index) is the correct array index for hashing.
331334
roll_value(self, offset);
332335
}
333336
// Check boundary before incrementing value_offset so that
@@ -690,17 +693,23 @@ mod tests {
690693

691694
let num_levels = 20;
692695
// def_level=1 means non-null, def_level=0 means null
696+
// Pattern: null at indices 0, 3, 6, 9, 12, 15, 18 → 7 nulls, 13 non-null
693697
let def_levels: Vec<i16> = (0..num_levels)
694698
.map(|i| if i % 3 == 0 { 0 } else { 1 })
695699
.collect();
700+
let expected_non_null: usize = def_levels.iter().filter(|&&d| d == 1).count();
696701

697702
let chunks = chunker.calculate(Some(&def_levels), None, num_levels, |c, i| {
698703
c.roll_fixed::<4>(&(i as i32).to_le_bytes());
699704
});
700705

701706
assert!(!chunks.is_empty());
702-
let total: usize = chunks.iter().map(|c| c.num_levels).sum();
703-
assert_eq!(total, num_levels);
707+
let total_levels: usize = chunks.iter().map(|c| c.num_levels).sum();
708+
let total_values: usize = chunks.iter().map(|c| c.num_values).sum();
709+
assert_eq!(total_levels, num_levels);
710+
assert_eq!(total_values, expected_non_null);
711+
// With nulls present, total_values < total_levels
712+
assert!(total_values < total_levels);
704713
}
705714
}
706715

0 commit comments

Comments
 (0)