Skip to content

Commit e9e4306

Browse files
sort
1 parent 7d4e680 commit e9e4306

File tree

3 files changed

+12
-59
lines changed

3 files changed

+12
-59
lines changed

arrow-array/src/array/union_array.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1721,9 +1721,8 @@ mod tests {
17211721
.len(7)
17221722
.buffers(vec![type_ids, value_offsets])
17231723
.child_data(vec![
1724-
// Child arrays must be in sorted order by type ID: 4, 8, 9
1725-
int_array.into_data(),
17261724
string_array.into_data(),
1725+
int_array.into_data(),
17271726
float_array.into_data(),
17281727
])
17291728
.build()
@@ -1834,9 +1833,8 @@ mod tests {
18341833
.len(7)
18351834
.buffers(vec![type_ids, value_offsets])
18361835
.child_data(vec![
1837-
// Child arrays must be in sorted order by type ID: 4, 8, 9
1838-
int_array.into_data(),
18391836
string_array.into_data(),
1837+
int_array.into_data(),
18401838
float_array.into_data(),
18411839
])
18421840
.build()
@@ -1864,10 +1862,9 @@ mod tests {
18641862
],
18651863
)
18661864
.unwrap();
1867-
// Child arrays must be in sorted order by type ID: 2, 3
18681865
let children = vec![
1869-
Arc::new(StringArray::from_iter_values(["c", "d"])) as _,
18701866
Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
1867+
Arc::new(StringArray::from_iter_values(["c", "d"])) as _,
18711868
];
18721869

18731870
let type_ids = vec![3, 3, 2].into();
@@ -1893,10 +1890,9 @@ mod tests {
18931890
"Invalid argument error: Type Ids values must match one of the field type ids"
18941891
);
18951892

1896-
// Child arrays must be in sorted order by type ID: 2, 3
18971893
let children = vec![
1898-
Arc::new(StringArray::from_iter_values(["c"])) as _,
18991894
Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
1895+
Arc::new(StringArray::from_iter_values(["c"])) as _,
19001896
];
19011897
let type_ids = ScalarBuffer::from(vec![3_i8, 3, 2]);
19021898
let offsets = Some(vec![0, 1, 0].into());

arrow-avro/src/reader/record.rs

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4330,17 +4330,8 @@ mod tests {
43304330
avro_children.push(AvroDataType::new(codec, Default::default(), None));
43314331
fields.push(arrow_schema::Field::new(name, dt, true));
43324332
}
4333-
let union_fields = UnionFields::try_new(type_ids.clone(), fields).unwrap();
4334-
4335-
// UnionFields are sorted by type_id, so we need to reorder avro_children to match
4336-
let mut sorted_indices: Vec<usize> = (0..type_ids.len()).collect();
4337-
sorted_indices.sort_by_key(|&i| type_ids[i]);
4338-
let sorted_avro_children: Vec<AvroDataType> = sorted_indices
4339-
.iter()
4340-
.map(|&i| avro_children[i].clone())
4341-
.collect();
4342-
4343-
let union_codec = Codec::Union(sorted_avro_children.into(), union_fields, UnionMode::Dense);
4333+
let union_fields = UnionFields::try_new(type_ids, fields).unwrap();
4334+
let union_codec = Codec::Union(avro_children.into(), union_fields, UnionMode::Dense);
43444335
AvroDataType::new(union_codec, Default::default(), None)
43454336
}
43464337

@@ -4405,13 +4396,11 @@ mod tests {
44054396
vec![42, 7],
44064397
);
44074398
let mut dec = Decoder::try_new(&union_dt).unwrap();
4408-
// after sorting by type_id, schema order is [string(7), null(42)]
4409-
// to encode null, use branch 1; to encode string, use branch 0
4410-
let r1 = encode_avro_long(1);
4399+
let r1 = encode_avro_long(0);
44114400
let mut r2 = Vec::new();
4412-
r2.extend_from_slice(&encode_avro_long(0));
4401+
r2.extend_from_slice(&encode_avro_long(1));
44134402
r2.extend_from_slice(&encode_avro_bytes(b"abc"));
4414-
let r3 = encode_avro_long(1);
4403+
let r3 = encode_avro_long(0);
44154404
dec.decode(&mut AvroCursor::new(&r1)).unwrap();
44164405
dec.decode(&mut AvroCursor::new(&r2)).unwrap();
44174406
dec.decode(&mut AvroCursor::new(&r3)).unwrap();

arrow-schema/src/fields.rs

Lines changed: 3 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::ops::Deref;
1819
use std::sync::Arc;
19-
use std::{hash::Hash, ops::Deref};
2020

2121
use crate::{ArrowError, DataType, Field, FieldRef};
2222

@@ -318,7 +318,7 @@ impl<'a> IntoIterator for &'a Fields {
318318
}
319319

320320
/// A cheaply cloneable, owned collection of [`FieldRef`] and their corresponding type ids
321-
#[derive(Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
321+
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
322322
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
323323
#[cfg_attr(feature = "serde", serde(transparent))]
324324
pub struct UnionFields(Arc<[(i8, FieldRef)]>);
@@ -402,7 +402,7 @@ impl UnionFields {
402402

403403
loop {
404404
match (type_ids_iter.next(), fields_iter.next()) {
405-
(None, None) => break,
405+
(None, None) => return Ok(Self(out.into())),
406406
(Some(type_id), Some(field)) => {
407407
// check type id is non-negative
408408
if type_id < 0 {
@@ -435,11 +435,6 @@ impl UnionFields {
435435
}
436436
}
437437
}
438-
439-
// sort by type ids to produce a consistent ordering
440-
out.sort_unstable_by_key(|&(i, _)| i);
441-
442-
Ok(Self(out.into()))
443438
}
444439

445440
/// Create a new [`UnionFields`] from a collection of fields with automatically
@@ -1044,31 +1039,4 @@ mod tests {
10441039
assert!(res.is_ok());
10451040
assert_eq!(res.unwrap().len(), 3);
10461041
}
1047-
1048-
#[test]
1049-
fn test_union_field_equality() {
1050-
let ids = vec![0, 1, 2];
1051-
let fields = vec![
1052-
Field::new("a", DataType::Binary, true),
1053-
Field::new("b", DataType::Utf8, true),
1054-
Field::new("c", DataType::Int16, true),
1055-
];
1056-
1057-
let u = UnionFields::try_new(ids.clone(), fields.clone()).unwrap();
1058-
assert_eq!(u.clone(), u.clone());
1059-
1060-
let u_rev =
1061-
UnionFields::try_new(ids.clone().into_iter().rev(), fields.into_iter().rev()).unwrap();
1062-
assert_eq!(u, u_rev);
1063-
1064-
let fields_2 = vec![
1065-
Field::new("a", DataType::Binary, true),
1066-
Field::new("b", DataType::Utf8, true),
1067-
// everything is the same from `fields` except Field "c" is not nullable
1068-
Field::new("c", DataType::Int16, false),
1069-
];
1070-
1071-
let u2 = UnionFields::try_new(ids.clone(), fields_2.clone()).unwrap();
1072-
assert_ne!(u, u2);
1073-
}
10741042
}

0 commit comments

Comments
 (0)