Skip to content

Commit 5842759

Browse files
committed
fix: [9018]Fixed RunArray slice offsets(row, cast, eq)
1 parent 3c6ca57 commit 5842759

File tree

7 files changed

+298
-64
lines changed

7 files changed

+298
-64
lines changed

arrow-array/src/array/run_array.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ impl<R: RunEndIndexType> RunArray<R> {
141141
///
142142
/// [`values`]: Self::values
143143
pub fn values_slice(&self) -> ArrayRef {
144+
if self.is_empty() {
145+
return self.values.slice(0, 0);
146+
}
144147
let start = self.get_start_physical_index();
145148
let end = self.get_end_physical_index();
146149
self.values.slice(start, end - start + 1)
@@ -655,6 +658,7 @@ mod tests {
655658
use super::*;
656659
use crate::builder::PrimitiveRunBuilder;
657660
use crate::cast::AsArray;
661+
use crate::new_empty_array;
658662
use crate::types::{Int8Type, UInt32Type};
659663
use crate::{Int16Array, Int32Array, StringArray};
660664

@@ -752,6 +756,26 @@ mod tests {
752756
assert_eq!(run_ends.values(), &run_ends_values);
753757
}
754758

759+
#[test]
760+
fn test_run_array_empty() {
761+
let runs = new_empty_array(&DataType::Int16);
762+
let runs = runs.as_primitive::<Int16Type>();
763+
let values = new_empty_array(&DataType::Int64);
764+
let array = RunArray::try_new(runs, &values).unwrap();
765+
766+
fn assertions(array: &RunArray<Int16Type>) {
767+
assert!(array.is_empty());
768+
assert_eq!(array.get_start_physical_index(), 0);
769+
assert_eq!(array.get_end_physical_index(), 0);
770+
assert!(array.get_physical_indices::<i16>(&[]).unwrap().is_empty());
771+
assert!(array.run_ends().is_empty());
772+
assert_eq!(array.run_ends().sliced_values().count(), 0);
773+
}
774+
775+
assertions(&array);
776+
assertions(&array.slice(0, 0));
777+
}
778+
755779
#[test]
756780
fn test_run_array_fmt_debug() {
757781
let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
@@ -1186,4 +1210,89 @@ mod tests {
11861210
let values_slice2 = values_slice2.as_primitive::<Int32Type>();
11871211
assert_eq!(values_slice2.values(), &[1]);
11881212
}
1213+
1214+
#[test]
1215+
fn test_run_array_values_slice_empty() {
1216+
let run_ends = Int32Array::from(vec![2, 5, 10]);
1217+
let values = StringArray::from(vec!["a", "b", "c"]);
1218+
let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1219+
1220+
let slice = array.slice(0, 0);
1221+
assert_eq!(slice.len(), 0);
1222+
1223+
let values_slice = slice.values_slice();
1224+
assert_eq!(values_slice.len(), 0);
1225+
assert_eq!(values_slice.data_type(), &DataType::Utf8);
1226+
}
1227+
1228+
#[test]
1229+
fn test_run_array_eq_empty() {
1230+
let run_ends = Int32Array::from(vec![2, 5, 10]);
1231+
let values = StringArray::from(vec!["a", "b", "c"]);
1232+
let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1233+
1234+
let slice1 = array.slice(0, 0);
1235+
let slice2 = array.slice(1, 0);
1236+
let slice3 = array.slice(10, 0);
1237+
1238+
assert_eq!(slice1, slice2);
1239+
assert_eq!(slice2, slice3);
1240+
1241+
let empty_array = new_empty_array(array.data_type());
1242+
let empty_array = crate::cast::as_run_array::<Int32Type>(empty_array.as_ref());
1243+
1244+
assert_eq!(&slice1, empty_array);
1245+
}
1246+
1247+
#[test]
1248+
fn test_run_array_eq_diff_physical_same_logical() {
1249+
let run_ends1 = Int32Array::from(vec![1, 3, 6]);
1250+
let values1 = StringArray::from(vec!["a", "b", "c"]);
1251+
let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1252+
1253+
let run_ends2 = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
1254+
let values2 = StringArray::from(vec!["a", "b", "b", "c", "c", "c"]);
1255+
let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1256+
1257+
assert_eq!(array1, array2);
1258+
}
1259+
1260+
#[test]
1261+
fn test_run_array_eq_sliced() {
1262+
let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1263+
let values1 = StringArray::from(vec!["a", "b", "c"]);
1264+
let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1265+
// Logical: a, a, b, b, b, c, c, c, c, c
1266+
1267+
let slice1 = array1.slice(1, 6);
1268+
// Logical: a, b, b, b, c, c
1269+
1270+
let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1271+
let values2 = StringArray::from(vec!["a", "b", "c"]);
1272+
let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1273+
// Logical: a, b, b, b, c, c
1274+
1275+
assert_eq!(slice1, array2);
1276+
1277+
let slice2 = array1.slice(2, 3);
1278+
// Logical: b, b, b
1279+
let run_ends3 = Int32Array::from(vec![3]);
1280+
let values3 = StringArray::from(vec!["b"]);
1281+
let array3 = RunArray::<Int32Type>::try_new(&run_ends3, &values3).unwrap();
1282+
assert_eq!(slice2, array3);
1283+
}
1284+
1285+
#[test]
1286+
fn test_run_array_eq_sliced_different_offsets() {
1287+
let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1288+
let values1 = StringArray::from(vec!["a", "b", "c"]);
1289+
let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1290+
1291+
let slice1 = array1.slice(1, 4); // a, b, b, b
1292+
let slice2 = array1.slice(1, 4);
1293+
assert_eq!(slice1, slice2);
1294+
1295+
let slice3 = array1.slice(0, 4); // a, a, b, b
1296+
assert_ne!(slice1, slice3);
1297+
}
11891298
}

arrow-buffer/src/buffer/run.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,16 @@ where
199199
pub fn sliced_values(&self) -> impl Iterator<Item = E> + '_ {
200200
let offset = self.logical_offset;
201201
let len = self.logical_length;
202-
let start = self.get_start_physical_index();
203-
let end = self.get_end_physical_index();
204-
self.run_ends[start..=end].iter().map(move |&val| {
202+
// Doing this roundabout way since the iterator type we return must be
203+
// the same (i.e. cannot use std::iter::empty())
204+
let physical_slice = if self.is_empty() {
205+
&self.run_ends[0..0]
206+
} else {
207+
let start = self.get_start_physical_index();
208+
let end = self.get_end_physical_index();
209+
&self.run_ends[start..=end]
210+
};
211+
physical_slice.iter().map(move |&val| {
205212
let val = val.as_usize().saturating_sub(offset).min(len);
206213
E::from_usize(val).unwrap()
207214
})

arrow-cast/src/cast/mod.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12451,4 +12451,32 @@ mod tests {
1245112451
assert_eq!(casted.as_ref(), &expected);
1245212452
}
1245312453
}
12454+
12455+
#[test]
12456+
fn test_cast_between_sliced_run_end_encoded() {
12457+
let run_ends = Int16Array::from(vec![2, 5, 8]);
12458+
let values = StringArray::from(vec!["a", "b", "c"]);
12459+
12460+
let ree_array = RunArray::<Int16Type>::try_new(&run_ends, &values).unwrap();
12461+
let ree_array = ree_array.slice(1, 2);
12462+
let array_ref = Arc::new(ree_array) as ArrayRef;
12463+
12464+
let target_type = DataType::RunEndEncoded(
12465+
Arc::new(Field::new("run_ends", DataType::Int64, false)),
12466+
Arc::new(Field::new("values", DataType::Utf8, true)),
12467+
);
12468+
let cast_options = CastOptions {
12469+
safe: false,
12470+
format_options: FormatOptions::default(),
12471+
};
12472+
12473+
let result = cast_with_options(&array_ref, &target_type, &cast_options).unwrap();
12474+
let run_array = result.as_run::<Int64Type>();
12475+
let run_array = run_array.downcast::<StringArray>().unwrap();
12476+
12477+
let expected = vec!["a", "b"];
12478+
let actual = run_array.into_iter().flatten().collect::<Vec<_>>();
12479+
12480+
assert_eq!(expected, actual);
12481+
}
1245412482
}

arrow-cast/src/cast/run_array.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,18 @@ pub(crate) fn run_end_encoded_cast<K: RunEndIndexType>(
3232
.downcast_ref::<RunArray<K>>()
3333
.ok_or_else(|| ArrowError::CastError("Expected RunArray".to_string()))?;
3434

35-
let values = run_array.values();
36-
3735
match to_type {
3836
// Stay as RunEndEncoded, cast only the values
3937
DataType::RunEndEncoded(target_index_field, target_value_field) => {
40-
let cast_values =
41-
cast_with_options(values, target_value_field.data_type(), cast_options)?;
38+
let values = run_array.values_slice();
39+
let cast_values = cast_with_options(
40+
values.as_ref(),
41+
target_value_field.data_type(),
42+
cast_options,
43+
)?;
4244

43-
let run_ends_array = PrimitiveArray::<K>::from_iter_values(
44-
run_array.run_ends().values().iter().copied(),
45-
);
45+
let run_ends_array =
46+
PrimitiveArray::<K>::from_iter_values(run_array.run_ends().sliced_values());
4647
let cast_run_ends = cast_with_options(
4748
&run_ends_array,
4849
target_index_field.data_type(),
@@ -72,6 +73,7 @@ pub(crate) fn run_end_encoded_cast<K: RunEndIndexType>(
7273

7374
// Expand to logical form
7475
_ => {
76+
let values = run_array.values();
7577
let len = run_array.len();
7678
let offset = run_array.offset();
7779
let run_ends = run_array.run_ends().values();

0 commit comments

Comments
 (0)