Skip to content

Commit 5c7c509

Browse files
Add try_from_fields
1 parent d678756 commit 5c7c509

File tree

1 file changed

+137
-1
lines changed

1 file changed

+137
-1
lines changed

arrow-schema/src/fields.rs

Lines changed: 137 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,9 @@ impl UnionFields {
432432
///
433433
/// Panics if the number of fields exceeds 127 (the maximum value for i8 type IDs).
434434
///
435+
/// If you want to avoid panics, use [`UnionFields::try_from_fields`] instead, which
436+
/// returns a `Result`.
437+
///
435438
/// # Examples
436439
///
437440
/// ```
@@ -454,13 +457,67 @@ impl UnionFields {
454457
.into_iter()
455458
.enumerate()
456459
.map(|(i, field)| {
457-
let id = i8::try_from(i).expect("UnionFields cannot contain more than 127 fields");
460+
let id = i8::try_from(i).expect("UnionFields cannot contain more than 128 fields");
458461

459462
(id, field.into())
460463
})
461464
.collect()
462465
}
463466

467+
/// Create a new [`UnionFields`] from a collection of fields with automatically
468+
/// assigned type IDs starting from 0.
469+
///
470+
/// The type IDs are assigned in increasing order: 0, 1, 2, 3, etc.
471+
///
472+
/// This is the non-panicking version of [`UnionFields::from_fields`].
473+
///
474+
/// See <https://arrow.apache.org/docs/format/Columnar.html#union-layout>
475+
///
476+
/// # Errors
477+
///
478+
/// Returns an error if the number of fields exceeds 127 (the maximum value for i8 type IDs).
479+
///
480+
/// # Examples
481+
///
482+
/// ```
483+
/// use arrow_schema::{DataType, Field, UnionFields};
484+
/// // Create a new UnionFields with automatic type id assignment
485+
/// // 0 -> DataType::UInt8
486+
/// // 1 -> DataType::Utf8
487+
/// let result = UnionFields::try_from_fields(vec![
488+
/// Field::new("field1", DataType::UInt8, false),
489+
/// Field::new("field2", DataType::Utf8, false),
490+
/// ]);
491+
/// assert!(result.is_ok());
492+
/// assert_eq!(result.unwrap().len(), 2);
493+
///
494+
/// // This will fail with too many fields
495+
/// let many_fields: Vec<_> = (0..200)
496+
/// .map(|i| Field::new(format!("field{}", i), DataType::Int32, false))
497+
/// .collect();
498+
/// let result = UnionFields::try_from_fields(many_fields);
499+
/// assert!(result.is_err());
500+
/// ```
501+
pub fn try_from_fields<F>(fields: F) -> Result<Self, ArrowError>
502+
where
503+
F: IntoIterator,
504+
F::Item: Into<FieldRef>,
505+
{
506+
let mut out = Vec::with_capacity(i8::MAX as usize + 1);
507+
508+
for (i, field) in fields.into_iter().enumerate() {
509+
let id = i8::try_from(i).map_err(|_| {
510+
ArrowError::InvalidArgumentError(
511+
"UnionFields cannot contain more than 128 fields".into(),
512+
)
513+
})?;
514+
515+
out.push((id, field.into()));
516+
}
517+
518+
Ok(Self(out.into()))
519+
}
520+
464521
/// Create a new [`UnionFields`] from a [`Fields`] and array of type_ids
465522
///
466523
/// See <https://arrow.apache.org/docs/format/Columnar.html#union-layout>
@@ -850,4 +907,83 @@ mod tests {
850907
assert_eq!(union_fields.len(), 1);
851908
assert_eq!(union_fields.iter().next().unwrap().0, 42);
852909
}
910+
911+
#[test]
912+
fn test_union_fields_try_from_fields_empty() {
913+
let res = UnionFields::try_from_fields(Vec::<Field>::new());
914+
assert!(res.is_ok());
915+
assert!(res.unwrap().is_empty());
916+
}
917+
918+
#[test]
919+
fn test_union_fields_try_from_fields_single() {
920+
let res = UnionFields::try_from_fields(vec![Field::new("only", DataType::Int64, false)]);
921+
assert!(res.is_ok());
922+
let union_fields = res.unwrap();
923+
assert_eq!(union_fields.len(), 1);
924+
assert_eq!(union_fields.iter().next().unwrap().0, 0);
925+
}
926+
927+
#[test]
928+
fn test_union_fields_try_from_fields_too_many() {
929+
let many_fields: Vec<_> = (0..200)
930+
.map(|i| Field::new(format!("field{}", i), DataType::Int32, false))
931+
.collect();
932+
let res = UnionFields::try_from_fields(many_fields);
933+
assert!(res.is_err());
934+
assert!(
935+
res.unwrap_err()
936+
.to_string()
937+
.contains("UnionFields cannot contain more than 128 fields")
938+
);
939+
}
940+
941+
#[test]
942+
fn test_union_fields_try_from_fields_max_valid() {
943+
let fields: Vec<_> = (0..=i8::MAX)
944+
.map(|i| Field::new(format!("field{}", i), DataType::Int32, false))
945+
.collect();
946+
let res = UnionFields::try_from_fields(fields);
947+
assert!(res.is_ok());
948+
let union_fields = res.unwrap();
949+
assert_eq!(union_fields.len(), 128);
950+
assert_eq!(union_fields.iter().map(|(id, _)| id).min().unwrap(), 0);
951+
assert_eq!(union_fields.iter().map(|(id, _)| id).max().unwrap(), 127);
952+
}
953+
954+
#[test]
955+
fn test_union_fields_try_from_fields_over_max() {
956+
// 129 fields should fail
957+
let fields: Vec<_> = (0..129)
958+
.map(|i| Field::new(format!("field{}", i), DataType::Int32, false))
959+
.collect();
960+
let res = UnionFields::try_from_fields(fields);
961+
assert!(res.is_err());
962+
}
963+
964+
#[test]
965+
fn test_union_fields_try_from_fields_complex_types() {
966+
let res = UnionFields::try_from_fields(vec![
967+
Field::new(
968+
"struct_field",
969+
DataType::Struct(Fields::from(vec![
970+
Field::new("a", DataType::Int32, false),
971+
Field::new("b", DataType::Utf8, true),
972+
])),
973+
false,
974+
),
975+
Field::new_list(
976+
"list_field",
977+
Field::new("item", DataType::Float64, true),
978+
true,
979+
),
980+
Field::new(
981+
"dict_field",
982+
DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
983+
false,
984+
),
985+
]);
986+
assert!(res.is_ok());
987+
assert_eq!(res.unwrap().len(), 3);
988+
}
853989
}

0 commit comments

Comments
 (0)