Skip to content

Commit 81de0ec

Browse files
committed
extend the kinds of offsets we test for in sliced list arrays arrow-ipc
1 parent 03d3b10 commit 81de0ec

File tree

1 file changed

+29
-40
lines changed

1 file changed

+29
-40
lines changed

arrow-ipc/src/writer.rs

Lines changed: 29 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,16 +1711,20 @@ fn get_list_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, Arra
17111711
/// the array's offset and length. This helps reduce the encoded size of sliced
17121712
/// arrays
17131713
///
1714-
fn get_view_buffer(data: &ArrayData) -> Buffer {
1715-
const VIEW_SIZE: usize = 16;
1716-
1717-
if data.is_empty() {
1718-
return MutableBuffer::new(0).into();
1714+
fn get_or_truncate_buffer(array_data: &ArrayData) -> Buffer {
1715+
let buffer = &array_data.buffers()[0];
1716+
let layout = layout(array_data.data_type());
1717+
let spec = &layout.buffers[0];
1718+
1719+
let byte_width = get_buffer_element_width(spec);
1720+
let min_length = array_data.len() * byte_width;
1721+
if buffer_need_truncate(array_data.offset(), buffer, spec, min_length) {
1722+
let byte_offset = array_data.offset() * byte_width;
1723+
let buffer_length = min(min_length, buffer.len() - byte_offset);
1724+
buffer.slice_with_length(byte_offset, buffer_length)
1725+
} else {
1726+
buffer.clone()
17191727
}
1720-
1721-
let views_start = data.offset() * VIEW_SIZE;
1722-
let views_len = data.len() * VIEW_SIZE;
1723-
data.buffers()[0].slice_with_length(views_start, views_len)
17241728
}
17251729

17261730
/// Write array data to a vector of bytes
@@ -1790,7 +1794,7 @@ fn write_array_data(
17901794
// Current implementation just serialize the raw arrays as given and not try to optimize anything.
17911795
// If users wants to "compact" the arrays prior to sending them over IPC,
17921796
// they should consider the gc API suggested in #5513
1793-
let views = get_view_buffer(array_data);
1797+
let views = get_or_truncate_buffer(array_data);
17941798
offset = write_buffer(
17951799
views.as_slice(),
17961800
buffers,
@@ -1835,21 +1839,9 @@ fn write_array_data(
18351839
// Truncate values
18361840
assert_eq!(array_data.buffers().len(), 1);
18371841

1838-
let buffer = &array_data.buffers()[0];
1839-
let layout = layout(data_type);
1840-
let spec = &layout.buffers[0];
1841-
1842-
let byte_width = get_buffer_element_width(spec);
1843-
let min_length = array_data.len() * byte_width;
1844-
let buffer_slice = if buffer_need_truncate(array_data.offset(), buffer, spec, min_length) {
1845-
let byte_offset = array_data.offset() * byte_width;
1846-
let buffer_length = min(min_length, buffer.len() - byte_offset);
1847-
&buffer.as_slice()[byte_offset..(byte_offset + buffer_length)]
1848-
} else {
1849-
buffer.as_slice()
1850-
};
1842+
let buffer = get_or_truncate_buffer(array_data);
18511843
offset = write_buffer(
1852-
buffer_slice,
1844+
buffer.as_slice(),
18531845
buffers,
18541846
arrow_data,
18551847
offset,
@@ -3146,11 +3138,7 @@ mod tests {
31463138

31473139
let values = Arc::new(generate_list_data::<i64>());
31483140

3149-
let in_batch = RecordBatch::try_new(schema, vec![values])
3150-
.unwrap()
3151-
.slice(999, 1);
3152-
let out_batch = deserialize_file(serialize_file(&in_batch));
3153-
assert_eq!(in_batch, out_batch);
3141+
check_sliced_list_array(schema, values);
31543142
}
31553143

31563144
#[test]
@@ -3161,11 +3149,7 @@ mod tests {
31613149

31623150
let values = Arc::new(generate_string_list_data::<i64>());
31633151

3164-
let in_batch = RecordBatch::try_new(schema, vec![values])
3165-
.unwrap()
3166-
.slice(999, 1);
3167-
let out_batch = deserialize_file(serialize_file(&in_batch));
3168-
assert_eq!(in_batch, out_batch);
3152+
check_sliced_list_array(schema, values);
31693153
}
31703154

31713155
#[test]
@@ -3176,12 +3160,17 @@ mod tests {
31763160

31773161
let values = Arc::new(generate_utf8view_list_data::<i64>());
31783162

3179-
let in_batch = RecordBatch::try_new(schema, vec![values])
3180-
.unwrap()
3181-
.slice(999, 1);
3182-
dbg!(&in_batch);
3183-
let out_batch = deserialize_file(serialize_file(&in_batch));
3184-
assert_eq!(in_batch, out_batch);
3163+
check_sliced_list_array(schema, values);
3164+
}
3165+
3166+
fn check_sliced_list_array(schema: Arc<Schema>, values: Arc<GenericListArray<i64>>) {
3167+
for (offset, len) in [(999, 1), (0, 13), (47, 12), (values.len() - 13, 13)] {
3168+
let in_batch = RecordBatch::try_new(schema.clone(), vec![values.clone()])
3169+
.unwrap()
3170+
.slice(offset, len);
3171+
let out_batch = deserialize_file(serialize_file(&in_batch));
3172+
assert_eq!(in_batch, out_batch);
3173+
}
31853174
}
31863175

31873176
#[test]

0 commit comments

Comments
 (0)