Skip to content

Commit e0b049e

Browse files
authored
Merge branch 'main' into feat/fixed-size-binary-builder/append-array
2 parents 1e1469f + 91234b5 commit e0b049e

File tree

36 files changed

+2504
-463
lines changed

36 files changed

+2504
-463
lines changed

arrow-array/benches/union_array.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ fn criterion_benchmark(c: &mut Criterion) {
5454
|b| {
5555
let type_ids = 0..with_nulls+without_nulls;
5656

57-
let fields = UnionFields::new(
57+
let fields = UnionFields::try_new(
5858
type_ids.clone(),
5959
type_ids.clone().map(|i| Field::new(format!("f{i}"), DataType::Int32, true)),
60-
);
60+
).unwrap();
6161

6262
let array = UnionArray::try_new(
6363
fields,

arrow-array/src/array/mod.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -620,10 +620,11 @@ impl<'a> StringArrayType<'a> for &'a StringViewArray {
620620
}
621621
}
622622

623-
/// A trait for Arrow String Arrays, currently three types are supported:
623+
/// A trait for Arrow Binary Arrays, currently four types are supported:
624624
/// - `BinaryArray`
625625
/// - `LargeBinaryArray`
626626
/// - `BinaryViewArray`
627+
/// - `FixedSizeBinaryArray`
627628
///
628629
/// This trait helps to abstract over the different types of binary arrays
629630
/// so that we don't need to duplicate the implementation for each type.
@@ -642,6 +643,11 @@ impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
642643
BinaryViewArray::iter(self)
643644
}
644645
}
646+
impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
647+
fn iter(&self) -> ArrayIter<Self> {
648+
FixedSizeBinaryArray::iter(self)
649+
}
650+
}
645651

646652
impl PartialEq for dyn Array + '_ {
647653
fn eq(&self, other: &Self) -> bool {
@@ -1067,13 +1073,14 @@ mod tests {
10671073
fn test_null_union() {
10681074
for mode in [UnionMode::Sparse, UnionMode::Dense] {
10691075
let data_type = DataType::Union(
1070-
UnionFields::new(
1076+
UnionFields::try_new(
10711077
vec![2, 1],
10721078
vec![
10731079
Field::new("foo", DataType::Int32, true),
10741080
Field::new("bar", DataType::Int64, true),
10751081
],
1076-
),
1082+
)
1083+
.unwrap(),
10771084
mode,
10781085
);
10791086
let array = new_null_array(&data_type, 4);

arrow-array/src/array/union_array.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,14 +1682,15 @@ mod tests {
16821682
#[test]
16831683
fn test_custom_type_ids() {
16841684
let data_type = DataType::Union(
1685-
UnionFields::new(
1685+
UnionFields::try_new(
16861686
vec![8, 4, 9],
16871687
vec![
16881688
Field::new("strings", DataType::Utf8, false),
16891689
Field::new("integers", DataType::Int32, false),
16901690
Field::new("floats", DataType::Float64, false),
16911691
],
1692-
),
1692+
)
1693+
.unwrap(),
16931694
UnionMode::Dense,
16941695
);
16951696

@@ -1796,14 +1797,15 @@ mod tests {
17961797
fn into_parts_custom_type_ids() {
17971798
let set_field_type_ids: [i8; 3] = [8, 4, 9];
17981799
let data_type = DataType::Union(
1799-
UnionFields::new(
1800+
UnionFields::try_new(
18001801
set_field_type_ids,
18011802
[
18021803
Field::new("strings", DataType::Utf8, false),
18031804
Field::new("integers", DataType::Int32, false),
18041805
Field::new("floats", DataType::Float64, false),
18051806
],
1806-
),
1807+
)
1808+
.unwrap(),
18071809
UnionMode::Dense,
18081810
);
18091811
let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
@@ -1836,13 +1838,14 @@ mod tests {
18361838

18371839
#[test]
18381840
fn test_invalid() {
1839-
let fields = UnionFields::new(
1841+
let fields = UnionFields::try_new(
18401842
[3, 2],
18411843
[
18421844
Field::new("a", DataType::Utf8, false),
18431845
Field::new("b", DataType::Utf8, false),
18441846
],
1845-
);
1847+
)
1848+
.unwrap();
18461849
let children = vec![
18471850
Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
18481851
Arc::new(StringArray::from_iter_values(["c", "d"])) as _,
@@ -1912,13 +1915,14 @@ mod tests {
19121915

19131916
assert_eq!(array.logical_nulls(), None);
19141917

1915-
let fields = UnionFields::new(
1918+
let fields = UnionFields::try_new(
19161919
[1, 3],
19171920
[
19181921
Field::new("a", DataType::Int8, false), // non nullable
19191922
Field::new("b", DataType::Int8, false), // non nullable
19201923
],
1921-
);
1924+
)
1925+
.unwrap();
19221926
let array = UnionArray::try_new(
19231927
fields,
19241928
vec![1].into(),
@@ -1932,13 +1936,14 @@ mod tests {
19321936

19331937
assert_eq!(array.logical_nulls(), None);
19341938

1935-
let nullable_fields = UnionFields::new(
1939+
let nullable_fields = UnionFields::try_new(
19361940
[1, 3],
19371941
[
19381942
Field::new("a", DataType::Int8, true), // nullable but without nulls
19391943
Field::new("b", DataType::Int8, true), // nullable but without nulls
19401944
],
1941-
);
1945+
)
1946+
.unwrap();
19421947
let array = UnionArray::try_new(
19431948
nullable_fields.clone(),
19441949
vec![1, 1].into(),

arrow-avro/benches/avro_writer.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,14 +688,15 @@ static ENUM_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
688688

689689
static UNION_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
690690
// Basic Dense Union of three types: Utf8, Int32, Float64
691-
let union_fields = UnionFields::new(
691+
let union_fields = UnionFields::try_new(
692692
vec![0, 1, 2],
693693
vec![
694694
Field::new("u_str", DataType::Utf8, true),
695695
Field::new("u_int", DataType::Int32, true),
696696
Field::new("u_f64", DataType::Float64, true),
697697
],
698-
);
698+
)
699+
.expect("UnionFields should be valid");
699700
let union_dt = DataType::Union(union_fields.clone(), UnionMode::Dense);
700701
let schema = schema_single("field1", union_dt);
701702

arrow-avro/src/codec.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -993,13 +993,13 @@ fn union_branch_name(dt: &AvroDataType) -> String {
993993
dt.codec.union_field_name()
994994
}
995995

996-
fn build_union_fields(encodings: &[AvroDataType]) -> UnionFields {
996+
fn build_union_fields(encodings: &[AvroDataType]) -> Result<UnionFields, ArrowError> {
997997
let arrow_fields: Vec<Field> = encodings
998998
.iter()
999999
.map(|encoding| encoding.field_with_name(&union_branch_name(encoding)))
10001000
.collect();
10011001
let type_ids: Vec<i8> = (0..arrow_fields.len()).map(|i| i as i8).collect();
1002-
UnionFields::new(type_ids, arrow_fields)
1002+
UnionFields::try_new(type_ids, arrow_fields)
10031003
}
10041004

10051005
/// Resolves Avro type names to [`AvroDataType`]
@@ -1267,7 +1267,7 @@ impl<'a> Maker<'a> {
12671267
.map(|s| self.parse_type(s, namespace))
12681268
.collect::<Result<_, _>>()?;
12691269
// Build Arrow layout once here
1270-
let union_fields = build_union_fields(&children);
1270+
let union_fields = build_union_fields(&children)?;
12711271
Ok(AvroDataType::new(
12721272
Codec::Union(Arc::from(children), union_fields, UnionMode::Dense),
12731273
Default::default(),
@@ -1620,7 +1620,7 @@ impl<'a> Maker<'a> {
16201620
for writer in writer_variants {
16211621
writer_to_reader.push(self.find_best_promotion(writer, reader_variants, namespace));
16221622
}
1623-
let union_fields = build_union_fields(&reader_encodings);
1623+
let union_fields = build_union_fields(&reader_encodings)?;
16241624
let mut dt = AvroDataType::new(
16251625
Codec::Union(reader_encodings.into(), union_fields, UnionMode::Dense),
16261626
Default::default(),

arrow-avro/src/reader/mod.rs

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7839,35 +7839,38 @@ mod test {
78397839
let uuid1 = uuid16_from_str("fe7bc30b-4ce8-4c5e-b67c-2234a2d38e66");
78407840
let uuid2 = uuid16_from_str("0826cc06-d2e3-4599-b4ad-af5fa6905cdb");
78417841
let item_name = Field::LIST_FIELD_DEFAULT_NAME;
7842-
let uf_tri = UnionFields::new(
7842+
let uf_tri = UnionFields::try_new(
78437843
vec![0, 1, 2],
78447844
vec![
78457845
Field::new("int", DataType::Int32, false),
78467846
Field::new("string", DataType::Utf8, false),
78477847
Field::new("boolean", DataType::Boolean, false),
78487848
],
7849-
);
7850-
let uf_arr_items = UnionFields::new(
7849+
)
7850+
.unwrap();
7851+
let uf_arr_items = UnionFields::try_new(
78517852
vec![0, 1, 2],
78527853
vec![
78537854
Field::new("null", DataType::Null, false),
78547855
Field::new("string", DataType::Utf8, false),
78557856
Field::new("long", DataType::Int64, false),
78567857
],
7857-
);
7858+
)
7859+
.unwrap();
78587860
let arr_items_field = Arc::new(Field::new(
78597861
item_name,
78607862
DataType::Union(uf_arr_items.clone(), UnionMode::Dense),
78617863
true,
78627864
));
7863-
let uf_map_vals = UnionFields::new(
7865+
let uf_map_vals = UnionFields::try_new(
78647866
vec![0, 1, 2],
78657867
vec![
78667868
Field::new("string", DataType::Utf8, false),
78677869
Field::new("double", DataType::Float64, false),
78687870
Field::new("null", DataType::Null, false),
78697871
],
7870-
);
7872+
)
7873+
.unwrap();
78717874
let map_entries_field = Arc::new(Field::new(
78727875
"entries",
78737876
DataType::Struct(Fields::from(vec![
@@ -7928,7 +7931,7 @@ mod test {
79287931
);
79297932
m
79307933
};
7931-
let uf_union_big = UnionFields::new(
7934+
let uf_union_big = UnionFields::try_new(
79327935
vec![0, 1, 2, 3, 4],
79337936
vec![
79347937
Field::new(
@@ -7960,7 +7963,8 @@ mod test {
79607963
)
79617964
.with_metadata(enum_md_color.clone()),
79627965
],
7963-
);
7966+
)
7967+
.unwrap();
79647968
let fx4_md = {
79657969
let mut m = HashMap::<String, String>::new();
79667970
m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Fx4".to_string());
@@ -7970,7 +7974,7 @@ mod test {
79707974
);
79717975
m
79727976
};
7973-
let uf_date_fixed4 = UnionFields::new(
7977+
let uf_date_fixed4 = UnionFields::try_new(
79747978
vec![0, 1],
79757979
vec![
79767980
Field::new(
@@ -7981,7 +7985,8 @@ mod test {
79817985
.with_metadata(fx4_md.clone()),
79827986
Field::new("date", DataType::Date32, false),
79837987
],
7984-
);
7988+
)
7989+
.unwrap();
79857990
let dur12u_md = {
79867991
let mut m = HashMap::<String, String>::new();
79877992
m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Dur12U".to_string());
@@ -7991,7 +7996,7 @@ mod test {
79917996
);
79927997
m
79937998
};
7994-
let uf_dur_or_str = UnionFields::new(
7999+
let uf_dur_or_str = UnionFields::try_new(
79958000
vec![0, 1],
79968001
vec![
79978002
Field::new("string", DataType::Utf8, false),
@@ -8002,7 +8007,8 @@ mod test {
80028007
)
80038008
.with_metadata(dur12u_md.clone()),
80048009
],
8005-
);
8010+
)
8011+
.unwrap();
80068012
let fx10_md = {
80078013
let mut m = HashMap::<String, String>::new();
80088014
m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Fx10".to_string());
@@ -8012,7 +8018,7 @@ mod test {
80128018
);
80138019
m
80148020
};
8015-
let uf_uuid_or_fx10 = UnionFields::new(
8021+
let uf_uuid_or_fx10 = UnionFields::try_new(
80168022
vec![0, 1],
80178023
vec![
80188024
Field::new(
@@ -8023,15 +8029,17 @@ mod test {
80238029
.with_metadata(fx10_md.clone()),
80248030
add_uuid_ext_union(Field::new("uuid", DataType::FixedSizeBinary(16), false)),
80258031
],
8026-
);
8027-
let uf_kv_val = UnionFields::new(
8032+
)
8033+
.unwrap();
8034+
let uf_kv_val = UnionFields::try_new(
80288035
vec![0, 1, 2],
80298036
vec![
80308037
Field::new("null", DataType::Null, false),
80318038
Field::new("int", DataType::Int32, false),
80328039
Field::new("long", DataType::Int64, false),
80338040
],
8034-
);
8041+
)
8042+
.unwrap();
80358043
let kv_fields = Fields::from(vec![
80368044
Field::new("key", DataType::Utf8, false),
80378045
Field::new(
@@ -8053,7 +8061,7 @@ mod test {
80538061
])),
80548062
false,
80558063
));
8056-
let uf_map_or_array = UnionFields::new(
8064+
let uf_map_or_array = UnionFields::try_new(
80578065
vec![0, 1],
80588066
vec![
80598067
Field::new(
@@ -8063,7 +8071,8 @@ mod test {
80638071
),
80648072
Field::new("map", DataType::Map(map_int_entries.clone(), false), false),
80658073
],
8066-
);
8074+
)
8075+
.unwrap();
80678076
let mut enum_md_status = {
80688077
let mut m = HashMap::<String, String>::new();
80698078
m.insert(

arrow-avro/src/reader/record.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3674,7 +3674,7 @@ mod tests {
36743674
avro_children.push(AvroDataType::new(codec, Default::default(), None));
36753675
fields.push(arrow_schema::Field::new(name, dt, true));
36763676
}
3677-
let union_fields = UnionFields::new(type_ids, fields);
3677+
let union_fields = UnionFields::try_new(type_ids, fields).unwrap();
36783678
let union_codec = Codec::Union(avro_children.into(), union_fields, UnionMode::Dense);
36793679
AvroDataType::new(union_codec, Default::default(), None)
36803680
}
@@ -3823,13 +3823,14 @@ mod tests {
38233823
AvroDataType::new(Codec::Int32, Default::default(), None),
38243824
AvroDataType::new(Codec::Utf8, Default::default(), None),
38253825
];
3826-
let uf = UnionFields::new(
3826+
let uf = UnionFields::try_new(
38273827
vec![1, 3],
38283828
vec![
38293829
arrow_schema::Field::new("i", DataType::Int32, true),
38303830
arrow_schema::Field::new("s", DataType::Utf8, true),
38313831
],
3832-
);
3832+
)
3833+
.unwrap();
38333834
let codec = Codec::Union(children.into(), uf, UnionMode::Sparse);
38343835
let dt = AvroDataType::new(codec, Default::default(), None);
38353836
let err = Decoder::try_new(&dt).expect_err("sparse union should not be supported");

0 commit comments

Comments
 (0)