Skip to content

Commit aa9432c

Browse files
adriangbclaudeJefffrey
authored
Fix extend_nulls panic for UnionArray (#9607)
## Summary - Fix `MutableArrayData::extend_nulls` which previously panicked unconditionally for both sparse and dense Union arrays - For sparse unions: append the first type_id and extend nulls in all children - For dense unions: append the first type_id, compute offsets into the first child, and extend nulls in that child only ## Background This bug was discovered via DataFusion. `CaseExpr` uses `MutableArrayData` via `scatter()` to build result arrays. When a `CASE` expression returns a Union type (e.g., from `json_get` which returns a JSON union) and there are rows where no `WHEN` branch matches (implicit `ELSE NULL`), `scatter` calls `extend_nulls` which panics with "cannot call extend_nulls on UnionArray as cannot infer type". Any query like: ```sql SELECT CASE WHEN condition THEN returns_union(col, 'key') END FROM table ``` would panic if `condition` is false for any row. ## Root Cause The `extend_nulls` implementation for Union arrays unconditionally panicked because it claimed it "cannot infer type". However, the Union's field definitions (child types and type IDs) are available in the `MutableArrayData`'s data type — there's enough information to produce valid null entries by picking the first declared type_id. ## Test plan - [x] Added test for sparse union `extend_nulls` - [x] Added test for dense union `extend_nulls` - [x] Existing `test_union_dense` continues to pass - [x] All `array_transform` tests pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
1 parent c194e54 commit aa9432c

File tree

3 files changed

+127
-6
lines changed

3 files changed

+127
-6
lines changed

arrow-data/src/transform/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -813,8 +813,8 @@ impl<'a> MutableArrayData<'a> {
813813
};
814814

815815
let nulls = match data.data_type {
816-
// RunEndEncoded and Null arrays cannot have top-level null bitmasks
817-
DataType::RunEndEncoded(_, _) | DataType::Null => None,
816+
// RunEndEncoded, Null, and Union arrays cannot have top-level null bitmasks
817+
DataType::RunEndEncoded(_, _) | DataType::Null | DataType::Union(_, _) => None,
818818
_ => data
819819
.null_buffer
820820
.map(|nulls| {

arrow-data/src/transform/union.rs

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
use super::{_MutableArrayData, Extend};
1919
use crate::ArrayData;
20+
use arrow_schema::DataType;
2021

2122
pub(super) fn build_extend_sparse(array: &ArrayData) -> Extend<'_> {
2223
let type_ids = array.buffer::<i8>(0);
@@ -68,10 +69,42 @@ pub(super) fn build_extend_dense(array: &ArrayData) -> Extend<'_> {
6869
)
6970
}
7071

71-
pub(super) fn extend_nulls_dense(_mutable: &mut _MutableArrayData, _len: usize) {
72-
panic!("cannot call extend_nulls on UnionArray as cannot infer type");
72+
pub(super) fn extend_nulls_dense(mutable: &mut _MutableArrayData, len: usize) {
73+
let DataType::Union(fields, _) = &mutable.data_type else {
74+
unreachable!()
75+
};
76+
let first_type_id = fields
77+
.iter()
78+
.next()
79+
.expect("union must have at least one field")
80+
.0;
81+
82+
// Extend type_ids buffer
83+
mutable.buffer1.extend_from_slice(&vec![first_type_id; len]);
84+
85+
// Dense: extend offsets pointing into the first child, then extend nulls in that child
86+
let child_offset = mutable.child_data[0].len();
87+
let (start, end) = (child_offset as i32, (child_offset + len) as i32);
88+
mutable.buffer2.extend(start..end);
89+
mutable.child_data[0].extend_nulls(len);
7390
}
7491

75-
pub(super) fn extend_nulls_sparse(_mutable: &mut _MutableArrayData, _len: usize) {
76-
panic!("cannot call extend_nulls on UnionArray as cannot infer type");
92+
pub(super) fn extend_nulls_sparse(mutable: &mut _MutableArrayData, len: usize) {
93+
let DataType::Union(fields, _) = &mutable.data_type else {
94+
unreachable!()
95+
};
96+
let first_type_id = fields
97+
.iter()
98+
.next()
99+
.expect("union must have at least one field")
100+
.0;
101+
102+
// Extend type_ids buffer
103+
mutable.buffer1.extend_from_slice(&vec![first_type_id; len]);
104+
105+
// Sparse: extend nulls in ALL children
106+
mutable
107+
.child_data
108+
.iter_mut()
109+
.for_each(|child| child.extend_nulls(len));
77110
}

arrow/tests/array_transform.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,3 +1151,91 @@ fn test_fixed_size_list_append() {
11511151
.unwrap();
11521152
assert_eq!(finished, expected_fixed_size_list_data);
11531153
}
1154+
1155+
#[test]
1156+
fn test_extend_nulls_sparse_union() {
1157+
let fields = UnionFields::try_new(
1158+
vec![0, 1],
1159+
vec![
1160+
Field::new("null", DataType::Null, true),
1161+
Field::new("str", DataType::Utf8, true),
1162+
],
1163+
)
1164+
.unwrap();
1165+
1166+
let type_ids = ScalarBuffer::from(vec![1i8]);
1167+
let child_null = Arc::new(NullArray::new(1)) as ArrayRef;
1168+
let child_str = Arc::new(StringArray::from(vec![Some("hello")])) as ArrayRef;
1169+
let union_array = UnionArray::try_new(
1170+
fields.clone(),
1171+
type_ids,
1172+
None, // sparse
1173+
vec![child_null, child_str],
1174+
)
1175+
.unwrap();
1176+
1177+
let data = union_array.to_data();
1178+
let mut mutable = MutableArrayData::new(vec![&data], true, 4);
1179+
mutable.extend(0, 0, 1); // copy the first element
1180+
mutable.extend_nulls(2); // add two nulls
1181+
let result = mutable.freeze();
1182+
1183+
// Union arrays must not have a null bitmap per Arrow spec
1184+
assert!(result.nulls().is_none());
1185+
1186+
let result_array = UnionArray::from(result);
1187+
assert_eq!(result_array.len(), 3);
1188+
// First element should be type_id 1 (str)
1189+
assert_eq!(result_array.type_id(0), 1);
1190+
// Null elements use the first type_id (0)
1191+
assert_eq!(result_array.type_id(1), 0);
1192+
assert_eq!(result_array.type_id(2), 0);
1193+
// All children should have length 3 (sparse invariant)
1194+
assert_eq!(result_array.child(0).len(), 3);
1195+
assert_eq!(result_array.child(1).len(), 3);
1196+
}
1197+
1198+
#[test]
1199+
fn test_extend_nulls_dense_union() {
1200+
let fields = UnionFields::try_new(
1201+
vec![0, 1],
1202+
vec![
1203+
Field::new("i", DataType::Int32, true),
1204+
Field::new("str", DataType::Utf8, true),
1205+
],
1206+
)
1207+
.unwrap();
1208+
1209+
let type_ids = ScalarBuffer::from(vec![1i8]);
1210+
let offsets = ScalarBuffer::from(vec![0i32]);
1211+
let child_int = Arc::new(Int32Array::new_null(0)) as ArrayRef;
1212+
let child_str = Arc::new(StringArray::from(vec![Some("hello")])) as ArrayRef;
1213+
let union_array = UnionArray::try_new(
1214+
fields.clone(),
1215+
type_ids,
1216+
Some(offsets),
1217+
vec![child_int, child_str],
1218+
)
1219+
.unwrap();
1220+
1221+
let data = union_array.to_data();
1222+
let mut mutable = MutableArrayData::new(vec![&data], true, 4);
1223+
mutable.extend(0, 0, 1); // copy the first element
1224+
mutable.extend_nulls(2); // add two nulls
1225+
let result = mutable.freeze();
1226+
1227+
// Union arrays must not have a null bitmap per Arrow spec
1228+
assert!(result.nulls().is_none());
1229+
1230+
let result_array = UnionArray::from(result);
1231+
assert_eq!(result_array.len(), 3);
1232+
// First element is type_id 1 (str)
1233+
assert_eq!(result_array.type_id(0), 1);
1234+
// Null elements use the first type_id (0)
1235+
assert_eq!(result_array.type_id(1), 0);
1236+
assert_eq!(result_array.type_id(2), 0);
1237+
// First child (int) should have 2 null entries from extend_nulls
1238+
assert_eq!(result_array.child(0).len(), 2);
1239+
// Second child (str) should have 1 entry from extend
1240+
assert_eq!(result_array.child(1).len(), 1);
1241+
}

0 commit comments

Comments
 (0)