Skip to content

Commit 7d4e680

Browse files
Fix up tests
1 parent 7bcc85f commit 7d4e680

File tree

3 files changed

+25
-25
lines changed

3 files changed

+25
-25
lines changed

arrow-array/src/array/union_array.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1721,8 +1721,9 @@ mod tests {
17211721
.len(7)
17221722
.buffers(vec![type_ids, value_offsets])
17231723
.child_data(vec![
1724-
string_array.into_data(),
1724+
// Child arrays must be in sorted order by type ID: 4, 8, 9
17251725
int_array.into_data(),
1726+
string_array.into_data(),
17261727
float_array.into_data(),
17271728
])
17281729
.build()
@@ -1833,8 +1834,9 @@ mod tests {
18331834
.len(7)
18341835
.buffers(vec![type_ids, value_offsets])
18351836
.child_data(vec![
1836-
string_array.into_data(),
1837+
// Child arrays must be in sorted order by type ID: 4, 8, 9
18371838
int_array.into_data(),
1839+
string_array.into_data(),
18381840
float_array.into_data(),
18391841
])
18401842
.build()
@@ -1862,9 +1864,10 @@ mod tests {
18621864
],
18631865
)
18641866
.unwrap();
1867+
// Child arrays must be in sorted order by type ID: 2, 3
18651868
let children = vec![
1866-
Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
18671869
Arc::new(StringArray::from_iter_values(["c", "d"])) as _,
1870+
Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
18681871
];
18691872

18701873
let type_ids = vec![3, 3, 2].into();
@@ -1890,9 +1893,10 @@ mod tests {
18901893
"Invalid argument error: Type Ids values must match one of the field type ids"
18911894
);
18921895

1896+
// Child arrays must be in sorted order by type ID: 2, 3
18931897
let children = vec![
1894-
Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
18951898
Arc::new(StringArray::from_iter_values(["c"])) as _,
1899+
Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
18961900
];
18971901
let type_ids = ScalarBuffer::from(vec![3_i8, 3, 2]);
18981902
let offsets = Some(vec![0, 1, 0].into());

arrow-avro/src/reader/record.rs

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4330,8 +4330,17 @@ mod tests {
43304330
avro_children.push(AvroDataType::new(codec, Default::default(), None));
43314331
fields.push(arrow_schema::Field::new(name, dt, true));
43324332
}
4333-
let union_fields = UnionFields::try_new(type_ids, fields).unwrap();
4334-
let union_codec = Codec::Union(avro_children.into(), union_fields, UnionMode::Dense);
4333+
let union_fields = UnionFields::try_new(type_ids.clone(), fields).unwrap();
4334+
4335+
// UnionFields are sorted by type_id, so we need to reorder avro_children to match
4336+
let mut sorted_indices: Vec<usize> = (0..type_ids.len()).collect();
4337+
sorted_indices.sort_by_key(|&i| type_ids[i]);
4338+
let sorted_avro_children: Vec<AvroDataType> = sorted_indices
4339+
.iter()
4340+
.map(|&i| avro_children[i].clone())
4341+
.collect();
4342+
4343+
let union_codec = Codec::Union(sorted_avro_children.into(), union_fields, UnionMode::Dense);
43354344
AvroDataType::new(union_codec, Default::default(), None)
43364345
}
43374346

@@ -4396,11 +4405,13 @@ mod tests {
43964405
vec![42, 7],
43974406
);
43984407
let mut dec = Decoder::try_new(&union_dt).unwrap();
4399-
let r1 = encode_avro_long(0);
4408+
// after sorting by type_id, schema order is [string(7), null(42)]
4409+
// to encode null, use branch 1; to encode string, use branch 0
4410+
let r1 = encode_avro_long(1);
44004411
let mut r2 = Vec::new();
4401-
r2.extend_from_slice(&encode_avro_long(1));
4412+
r2.extend_from_slice(&encode_avro_long(0));
44024413
r2.extend_from_slice(&encode_avro_bytes(b"abc"));
4403-
let r3 = encode_avro_long(0);
4414+
let r3 = encode_avro_long(1);
44044415
dec.decode(&mut AvroCursor::new(&r1)).unwrap();
44054416
dec.decode(&mut AvroCursor::new(&r2)).unwrap();
44064417
dec.decode(&mut AvroCursor::new(&r3)).unwrap();

arrow-schema/src/fields.rs

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ impl<'a> IntoIterator for &'a Fields {
318318
}
319319

320320
/// A cheaply cloneable, owned collection of [`FieldRef`] and their corresponding type ids
321-
#[derive(Clone, Eq, Ord, PartialOrd)]
321+
#[derive(Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
322322
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
323323
#[cfg_attr(feature = "serde", serde(transparent))]
324324
pub struct UnionFields(Arc<[(i8, FieldRef)]>);
@@ -345,21 +345,6 @@ impl std::ops::Index<usize> for UnionFields {
345345
}
346346
}
347347

348-
impl PartialEq for UnionFields {
349-
fn eq(&self, other: &Self) -> bool {
350-
self.len() == other.len() && self.iter().all(|a| other.iter().any(|b| a == b))
351-
}
352-
}
353-
354-
impl Hash for UnionFields {
355-
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
356-
let mut v = self.0.iter().collect::<Vec<_>>();
357-
v.sort_by_key(|(id, _)| *id);
358-
359-
v.hash(state);
360-
}
361-
}
362-
363348
impl UnionFields {
364349
/// Create a new [`UnionFields`] with no fields
365350
pub fn empty() -> Self {

0 commit comments

Comments
 (0)