Skip to content

Commit 0d23573

Browse files
author
Damien Couture
committed
arrow-ipc: Ensure writer conforms to specs on length-0 variable-size arrays
1 parent 711fac8 commit 0d23573

1 file changed

Lines changed: 63 additions & 6 deletions

File tree

arrow-ipc/src/writer.rs

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ use arrow_array::cast::*;
3737
use arrow_array::types::{Int16Type, Int32Type, Int64Type, RunEndIndexType};
3838
use arrow_array::*;
3939
use arrow_buffer::bit_util;
40-
use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
40+
use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, ToByteSlice};
4141
use arrow_data::{ArrayData, ArrayDataBuilder, BufferSpec, layout};
4242
use arrow_schema::*;
4343

@@ -1724,7 +1724,9 @@ fn reencode_offsets<O: OffsetSizeTrait>(
17241724
/// size of sliced arrays, as values that have been sliced away are not encoded
17251725
fn get_byte_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, Buffer) {
17261726
if data.is_empty() {
1727-
return (MutableBuffer::new(0).into(), MutableBuffer::new(0).into());
1727+
let mut offsets = MutableBuffer::new(size_of::<O>());
1728+
offsets.extend_from_slice(O::usize_as(0).to_byte_slice());
1729+
return (offsets.into(), MutableBuffer::new(0).into());
17281730
}
17291731

17301732
let (offsets, original_start_offset, len) = reencode_offsets::<O>(&data.buffers()[0], data);
@@ -1736,10 +1738,9 @@ fn get_byte_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, Buff
17361738
/// of a values buffer.
17371739
fn get_list_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, ArrayData) {
17381740
if data.is_empty() {
1739-
return (
1740-
MutableBuffer::new(0).into(),
1741-
data.child_data()[0].slice(0, 0),
1742-
);
1741+
let mut offsets = MutableBuffer::new(size_of::<O>());
1742+
offsets.extend_from_slice(O::usize_as(0).to_byte_slice());
1743+
return (offsets.into(), data.child_data()[0].slice(0, 0));
17431744
}
17441745

17451746
let (offsets, original_start_offset, len) = reencode_offsets::<O>(&data.buffers()[0], data);
@@ -2372,6 +2373,62 @@ mod tests {
23722373
}
23732374
}
23742375

2376+
#[test]
2377+
fn test_empty_utf8_ipc_writes_nonempty_offsets_buffer() {
2378+
let name = StringArray::from(Vec::<String>::new());
2379+
let (offsets, values) = get_byte_array_buffers::<i32>(&name.to_data());
2380+
2381+
assert_eq!(name.len(), 0);
2382+
assert_eq!(
2383+
offsets.len(),
2384+
std::mem::size_of::<i32>(),
2385+
"offsets buffer should contain one zero i32 offset"
2386+
);
2387+
assert_eq!(values.len(), 0, "values buffer should remain empty");
2388+
}
2389+
2390+
#[test]
2391+
fn test_empty_large_utf8_ipc_writes_nonempty_offsets_buffer() {
2392+
let name = LargeStringArray::from(Vec::<String>::new());
2393+
let (offsets, values) = get_byte_array_buffers::<i64>(&name.to_data());
2394+
2395+
assert_eq!(name.len(), 0);
2396+
assert_eq!(
2397+
offsets.len(),
2398+
std::mem::size_of::<i64>(),
2399+
"offsets buffer should contain one zero i64 offset"
2400+
);
2401+
assert_eq!(values.len(), 0, "values buffer should remain empty");
2402+
}
2403+
2404+
#[test]
2405+
fn test_empty_list_ipc_writes_nonempty_offsets_buffer() {
2406+
let list = GenericListBuilder::<i32, _>::new(UInt32Builder::new()).finish();
2407+
let (offsets, child_data) = get_list_array_buffers::<i32>(&list.to_data());
2408+
2409+
assert_eq!(list.len(), 0);
2410+
assert_eq!(
2411+
offsets.len(),
2412+
std::mem::size_of::<i32>(),
2413+
"offsets buffer should contain one zero i32 offset"
2414+
);
2415+
assert_eq!(child_data.len(), 0, "child data should remain empty");
2416+
}
2417+
2418+
#[test]
2419+
fn test_empty_large_list_ipc_writes_nonempty_offsets_buffer() {
2420+
let list = GenericListBuilder::<i64, _>::new(UInt32Builder::new()).finish();
2421+
let (offsets, child_data) = get_list_array_buffers::<i64>(&list.to_data());
2422+
2423+
assert_eq!(list.len(), 0);
2424+
assert_eq!(
2425+
offsets.len(),
2426+
std::mem::size_of::<i64>(),
2427+
"offsets buffer should contain one zero i64 offset"
2428+
);
2429+
assert_eq!(child_data.len(), 0, "child data should remain empty");
2430+
}
2431+
23752432
fn write_null_file(options: IpcWriteOptions) {
23762433
let schema = Schema::new(vec![
23772434
Field::new("nulls", DataType::Null, true),

0 commit comments

Comments
 (0)