From 5aa9f015695e51c740fbe7fd604f545ecffad589 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:20:02 +0300 Subject: [PATCH] feat: add `is_there_null_pointing_to_non_empty_value` helper function in `OffsetBuffer` --- arrow-buffer/src/buffer/offset.rs | 538 +++++++++++++++++++++++++++++- 1 file changed, 537 insertions(+), 1 deletion(-) diff --git a/arrow-buffer/src/buffer/offset.rs b/arrow-buffer/src/buffer/offset.rs index bb34c8b23892..1df88bb7e107 100644 --- a/arrow-buffer/src/buffer/offset.rs +++ b/arrow-buffer/src/buffer/offset.rs @@ -16,7 +16,7 @@ // under the License. use crate::buffer::ScalarBuffer; -use crate::{ArrowNativeType, MutableBuffer, OffsetBufferBuilder}; +use crate::{ArrowNativeType, MutableBuffer, NullBuffer, OffsetBufferBuilder}; use std::ops::Deref; /// A non-empty buffer of monotonically increasing, positive integers. @@ -238,6 +238,97 @@ impl OffsetBuffer { pub fn ptr_eq(&self, other: &Self) -> bool { self.0.ptr_eq(&other.0) } + + /// Check if any null positions in the `null_buffer` correspond to + /// non-empty ranges in this [`OffsetBuffer`]. + /// + /// In variable-length array types (e.g., `StringArray`, `ListArray`), + /// null entries may or may not have empty offset ranges. This method + /// detects cases where a null entry has a non-empty range + /// (i.e., `offsets[i] != offsets[i+1]`), which means the underlying + /// data buffer contains data behind nulls. + /// + /// This matters because unwrapping (flattening) a list array exposes + /// the child values, including those behind null entries. If null + /// entries point to non-empty ranges, the unwrapped values will + /// contain data that may not be meaningful to operate on and could + /// cause errors (e.g., division by zero in the child values). + /// + /// Returns `false` if `null_buffer` is `None` or contains no nulls. + /// + /// # Example + /// + /// ``` + /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer, NullBuffer}; + /// // Offsets where null at index 1 has an empty range (3..3) + /// let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 3, 6])); + /// let nulls = NullBuffer::from(vec![true, false, true]); + /// assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + /// + /// // Offsets where null at index 1 has a non-empty range (3..7) + /// let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 7, 10])); + /// let nulls = NullBuffer::from(vec![true, false, true]); + /// assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + /// ``` + /// + /// # Panics + /// + /// Panics if the length of the `null_buffer` does not equal `self.len() - 1`. + pub fn is_there_null_pointing_to_non_empty_value( + &self, + null_buffer: Option<&NullBuffer>, + ) -> bool { + let Some(null_buffer) = null_buffer else { + return false; + }; + + assert_eq!( + self.len() - 1, + null_buffer.len(), + "The length of the offsets should be 1 more than the length of the null buffer" + ); + + if null_buffer.null_count() == 0 { + return false; + } + + // Offsets always have at least 1 value + let initial_offset = self[0]; + let last_offset = self[self.len() - 1]; + + // If all the values are null (offsets have 1 more value than the length of the array) + if null_buffer.null_count() == self.len() - 1 { + return last_offset != initial_offset; + } + + let mut valid_slices_iter = null_buffer.valid_slices(); + + // This is safe as we validated that are at least 1 valid value in the array + let (start, end) = valid_slices_iter.next().unwrap(); + + // If the nulls before have length greater than 0 + if self[start] != initial_offset { + return true; + } + + // End is exclusive, so it already point to the last offset value + // This is valid as the length of the array is always 1 less than the length of the offsets + let mut end_offset_of_last_valid_value = self[end]; + + for (start, end) in valid_slices_iter { + // If there is a null value that point to a non-empty value than the start offset of the valid value + // will be different that the end offset of the last valid value + if self[start] != end_offset_of_last_valid_value { + return true; + } + + // End is exclusive, so it already point to the last offset value + // This is valid as the length of the array is always 1 less than the length of the offsets + end_offset_of_last_valid_value = self[end]; + } + + end_offset_of_last_valid_value != last_offset + } } impl Deref for OffsetBuffer { @@ -471,4 +562,449 @@ mod tests { &[0, third_max as i32, (third_max * 2) as i32] ); } + + // --------------------------------------------------------------- + // Tests for is_there_null_pointing_to_non_empty_value + // --------------------------------------------------------------- + + #[test] + fn null_pointing_none_null_buffer() { + // No null buffer at all -> false + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 5, 8])); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(None)); + } + + #[test] + fn null_pointing_all_valid() { + // Null buffer with zero nulls -> false (early return via filter) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 5, 8])); + let nulls = NullBuffer::new_valid(3); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_all_null_empty_offsets() { + // All values are null and all offsets are equal (no data behind nulls) -> false + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 0, 0, 0])); + let nulls = NullBuffer::new_null(3); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_all_null_non_empty_offsets() { + // All values are null but offsets span data -> true + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 2, 5, 7])); + let nulls = NullBuffer::new_null(3); + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_all_null_nonzero_but_equal_offsets() { + // All null, offsets start at non-zero but are all equal -> false + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![5, 5, 5])); + let nulls = NullBuffer::new_null(2); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_leading_nulls_with_data() { + // Nulls at the beginning that point to non-empty ranges -> true + // offsets: [0, 3, 5, 8] nulls: [false, true, true] + // Index 0 is null with range 0..3 (non-empty) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 5, 8])); + let nulls = NullBuffer::from(vec![false, true, true]); + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_leading_nulls_without_data() { + // Nulls at the beginning with empty ranges -> continue checking + // offsets: [0, 0, 3, 6] nulls: [false, true, true] + // Index 0 is null with range 0..0 (empty) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 0, 3, 6])); + let nulls = NullBuffer::from(vec![false, true, true]); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_trailing_nulls_with_data() { + // Nulls at the end that point to non-empty ranges -> true + // offsets: [0, 3, 5, 8] nulls: [true, true, false] + // Index 2 is null with range 5..8 (non-empty) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 5, 8])); + let nulls = NullBuffer::from(vec![true, true, false]); + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_trailing_nulls_without_data() { + // Nulls at the end with empty ranges -> false + // offsets: [0, 3, 6, 6] nulls: [true, true, false] + // Index 2 is null with range 6..6 (empty) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 6, 6])); + let nulls = NullBuffer::from(vec![true, true, false]); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_middle_nulls_with_data() { + // Null in the middle with non-empty range -> true + // offsets: [0, 3, 7, 10] nulls: [true, false, true] + // Index 1 is null with range 3..7 (non-empty) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 7, 10])); + let nulls = NullBuffer::from(vec![true, false, true]); + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_middle_nulls_without_data() { + // Null in the middle with empty range -> false + // offsets: [0, 3, 3, 6] nulls: [true, false, true] + // Index 1 is null with range 3..3 (empty) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 3, 6])); + let nulls = NullBuffer::from(vec![true, false, true]); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_multiple_null_regions_all_empty() { + // Multiple null regions, all with empty ranges -> false + // offsets: [0, 0, 3, 3, 6, 6] nulls: [false, true, false, true, false] + // Nulls at index 0 (0..0) and index 2 (3..3) are empty + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 0, 3, 3, 6, 6])); + let nulls = NullBuffer::from(vec![false, true, false, true, false]); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_multiple_null_regions_second_has_data() { + // Two null regions: first empty, second non-empty -> true + // offsets: [0, 0, 3, 5, 6] nulls: [false, true, false, true] + // Null at index 0 (0..0 empty), null at index 2 (3..5 non-empty) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 0, 3, 5, 6])); + let nulls = NullBuffer::from(vec![false, true, false, true]); + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_multiple_null_regions_later_gap_has_data() { + // Three null regions: first two empty, third non-empty -> true + // offsets: [0, 0, 3, 3, 6, 8, 10] nulls: [false, true, false, true, false, true] + // valid_slices: (1,2), (3,4), (5,6) + // first slice: start=1, self[1]=0 == initial_offset=0 OK, end_offset=self[2]=3 + // loop iter 1: start=3, self[3]=3 == 3 OK (first gap empty), end_offset=self[4]=6 + // loop iter 2: start=5, self[5]=8 != 6 -> true (second gap has data) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 0, 3, 3, 6, 8, 10])); + let nulls = NullBuffer::from(vec![false, true, false, true, false, true]); + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_single_element_null_empty() { + // Single element, null with empty range -> false + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 0])); + let nulls = NullBuffer::new_null(1); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_single_element_null_non_empty() { + // Single element, null with non-empty range -> true + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 5])); + let nulls = NullBuffer::new_null(1); + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_single_element_valid() { + // Single element, valid -> false (no nulls at all) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 5])); + let nulls = NullBuffer::new_valid(1); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_consecutive_nulls_between_valid_slices() { + // Multiple consecutive nulls between valid regions + // offsets: [0, 2, 2, 2, 5, 8] nulls: [true, false, false, true, true] + // Valid: [0], nulls: [1,2], valid: [3,4] + // Null region [1,2] has offsets 2..2..2 (empty) -> false + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 2, 2, 2, 5, 8])); + let nulls = NullBuffer::from(vec![true, false, false, true, true]); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_consecutive_nulls_between_valid_slices_with_data() { + // Multiple consecutive nulls between valid regions, nulls have data + // offsets: [0, 2, 3, 4, 5, 8] nulls: [true, false, false, true, true] + // valid_slices: (0,1), (3,5) + // first slice: start=0, end=1 -> self[0]=0 == initial_offset=0 OK + // end_offset_of_last_valid_value = self[1] = 2 + // second slice: start=3, end=5 -> self[3]=4 != 2 -> true + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 2, 3, 4, 5, 8])); + let nulls = NullBuffer::from(vec![true, false, false, true, true]); + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_nonzero_initial_offset_all_null_equal() { + // Non-zero starting offset, all null, all offsets equal -> false + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![10, 10, 10])); + let nulls = NullBuffer::new_null(2); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_nonzero_initial_offset_with_data() { + // Non-zero starting offset, null has data + // offsets: [10, 15, 20] nulls: [false, true] + // Null at index 0 with range 10..15 (non-empty) -> true + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![10, 15, 20])); + let nulls = NullBuffer::from(vec![false, true]); + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_i64_offsets() { + // Test with i64 offset type + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 0, 5, 5])); + let nulls = NullBuffer::from(vec![false, true, false]); + // Null at index 0 (0..0 empty), valid at index 1 (0..5), null at index 2 (5..5 empty) -> false + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_i64_offsets_with_data_in_null() { + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 5, 10])); + let nulls = NullBuffer::from(vec![true, false, true]); + // Valid at index 0 (0..3), null at index 1 (3..5 non-empty) -> true + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + #[should_panic( + expected = "The length of the offsets should be 1 more than the length of the null buffer" + )] + fn null_pointing_mismatched_lengths() { + // Offset buffer length - 1 != null buffer length -> panic + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 5, 8])); + let nulls = NullBuffer::from(vec![true, false]); // length 2, but offsets has 3 elements + offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls)); + } + + #[test] + fn null_pointing_alternating_null_valid_all_empty_nulls() { + // Alternating null/valid pattern, all null ranges are empty + // offsets: [0, 0, 3, 3, 6, 6, 9] nulls: [false, true, false, true, false, true] + // Nulls at 0,2,4 all have empty ranges + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 0, 3, 3, 6, 6, 9])); + let nulls = NullBuffer::from(vec![false, true, false, true, false, true]); + assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_only_trailing_null_has_data() { + // Only the trailing null region has data, everything else is clean + // offsets: [0, 0, 3, 6, 8] nulls: [false, true, true, false] + // Null at 0 (0..0 empty), valid at 1,2 (0..3, 3..6), null at 3 (6..8 non-empty) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 0, 3, 6, 8])); + let nulls = NullBuffer::from(vec![false, true, true, false]); + assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_no_nulls_in_null_region() { + // Original: [0, 3, 3, 6, 6, 9] -> slice(1, 3) -> [3, 3, 6, 6] + // initial_offset=3, last_offset=6 + // nulls: [false, true, false] (null at index 0 has range 3..3 = empty) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 3, 6, 6, 9])); + let sliced = offsets.slice(1, 3); + let nulls = NullBuffer::from(vec![false, true, false]); + assert!(!sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_null_has_data() { + // Original: [0, 3, 7, 10, 15] -> slice(1, 2) -> [3, 7, 10] + // initial_offset=3, last_offset=10 + // nulls: [false, true] (null at index 0 has range 3..7 = non-empty) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 7, 10, 15])); + let sliced = offsets.slice(1, 2); + let nulls = NullBuffer::from(vec![false, true]); + assert!(sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_all_null_equal_offsets() { + // Original: [0, 5, 5, 5, 10] -> slice(1, 2) -> [5, 5, 5] + // All null, initial_offset=5, last_offset=5 -> false + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 5, 5, 5, 10])); + let sliced = offsets.slice(1, 2); + let nulls = NullBuffer::new_null(2); + assert!(!sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_all_null_different_offsets() { + // Original: [0, 3, 5, 8, 12] -> slice(1, 2) -> [3, 5, 8] + // All null, initial_offset=3, last_offset=8, 3 != 8 -> true + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 5, 8, 12])); + let sliced = offsets.slice(1, 2); + let nulls = NullBuffer::new_null(2); + assert!(sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_leading_null_with_data() { + // Original: [0, 2, 5, 8, 10] -> slice(1, 3) -> [2, 5, 8, 10] + // nulls: [false, true, true] + // Null at index 0: range 2..5 (non-empty), initial_offset=2, self[start=1]=5 != 2 -> true + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 2, 5, 8, 10])); + let sliced = offsets.slice(1, 3); + let nulls = NullBuffer::from(vec![false, true, true]); + assert!(sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_trailing_null_with_data() { + // Original: [0, 3, 6, 9, 12] -> slice(1, 2) -> [3, 6, 9] + // nulls: [true, false] + // Valid at 0: (3..6), null at 1: range 6..9 (non-empty) + // end_offset_of_last_valid_value = self[1]=6, last_offset=9, 6 != 9 -> true + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 6, 9, 12])); + let sliced = offsets.slice(1, 2); + let nulls = NullBuffer::from(vec![true, false]); + assert!(sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_trailing_null_without_data() { + // Original: [0, 3, 6, 6, 10] -> slice(1, 2) -> [3, 6, 6] + // nulls: [true, false] + // Valid at 0: (3..6), null at 1: range 6..6 (empty) + // end_offset_of_last_valid_value = self[1]=6, last_offset=6 -> false + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 6, 6, 10])); + let sliced = offsets.slice(1, 2); + let nulls = NullBuffer::from(vec![true, false]); + assert!(!sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_middle_null_with_data() { + // Original: [0, 2, 5, 9, 12, 15] -> slice(1, 3) -> [2, 5, 9, 12] + // nulls: [true, false, true] + // valid_slices: (0,1), (2,3) + // first: start=0 -> self[0]=2 == initial_offset=2 OK, end_offset=self[1]=5 + // second: start=2 -> self[2]=9 != 5 -> true + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 2, 5, 9, 12, 15])); + let sliced = offsets.slice(1, 3); + let nulls = NullBuffer::from(vec![true, false, true]); + assert!(sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_middle_null_without_data() { + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 2, 5, 5, 8, 12])); + let sliced = offsets.slice(1, 3); + let nulls = NullBuffer::from(vec![true, false, true]); + assert!(!sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_single_element_null_empty() { + // Original: [0, 3, 3, 6] -> slice(1, 1) -> [3, 3] + // Single element, null, range 3..3 (empty) -> false + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 3, 6])); + let sliced = offsets.slice(1, 1); + let nulls = NullBuffer::new_null(1); + assert!(!sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_single_element_null_non_empty() { + // Original: [0, 3, 7, 10] -> slice(1, 1) -> [3, 7] + // Single element, null, range 3..7 (non-empty) -> true + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 7, 10])); + let sliced = offsets.slice(1, 1); + let nulls = NullBuffer::new_null(1); + assert!(sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_from_start() { + // Slice from the beginning (offset=0) -- offsets stay the same + // Original: [0, 3, 3, 6] -> slice(0, 2) -> [0, 3, 3] + // nulls: [true, false] (null at 1 has range 3..3 = empty) -> false + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 3, 6])); + let sliced = offsets.slice(0, 2); + let nulls = NullBuffer::from(vec![true, false]); + assert!(!sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_to_end() { + // Slice to the end + // Original: [0, 3, 6, 9, 12] -> slice(2, 2) -> [6, 9, 12] + // nulls: [false, true] (null at 0 has range 6..9 = non-empty) -> true + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 6, 9, 12])); + let sliced = offsets.slice(2, 2); + let nulls = NullBuffer::from(vec![false, true]); + assert!(sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + fn null_pointing_sliced_all_valid_no_nulls() { + // Sliced buffer with all-valid nulls -> false (early return) + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 6, 9, 12])); + let sliced = offsets.slice(1, 2); + let nulls = NullBuffer::new_valid(2); + assert!(!sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls))); + } + + #[test] + #[should_panic( + expected = "The length of the offsets should be 1 more than the length of the null buffer" + )] + fn null_pointing_mismatched_lengths_null_buffer_too_long() { + // Null buffer longer than offsets - 1 -> panic + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 5])); + // offsets has 3 elements -> expects null buffer of length 2 + let nulls = NullBuffer::from(vec![false, true, false]); + offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls)); + } + + #[test] + #[should_panic( + expected = "The length of the offsets should be 1 more than the length of the null buffer" + )] + fn null_pointing_sliced_mismatched_lengths() { + // Sliced offset buffer with wrong null buffer length -> panic + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 6, 9, 12])); + let sliced = offsets.slice(1, 2); // [3, 6, 9] -> expects null buffer of length 2 + let nulls = NullBuffer::from(vec![false, true, false]); // length 3 + sliced.is_there_null_pointing_to_non_empty_value(Some(&nulls)); + } + + #[test] + #[should_panic( + expected = "The length of the offsets should be 1 more than the length of the null buffer" + )] + fn null_pointing_all_valid_mismatched_lengths_too_short() { + // All-valid null buffer with wrong length should still panic + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 5, 8])); + let nulls = NullBuffer::new_valid(2); // expects 3 + offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls)); + } + + #[test] + #[should_panic( + expected = "The length of the offsets should be 1 more than the length of the null buffer" + )] + fn null_pointing_all_valid_mismatched_lengths_too_long() { + // All-valid null buffer with wrong length should still panic + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0, 3, 5, 8])); + let nulls = NullBuffer::new_valid(5); // expects 3 + offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls)); + } }