@@ -45,16 +45,22 @@ fn same_type_family(a: &DataType, b: &DataType) -> bool {
4545 )
4646}
4747
48- // variant selection heuristic — 3 passes with decreasing specificity:
49- //
50- // first pass: field type == target type
51- // second pass: field and target are in the same equivalence class
52- // (e.g., Utf8 and Utf8View are both strings)
53- // third pass: field can be cast to target
54- // note: this is the most permissive and may lose information
55- // also, the matching logic is greedy so it will pick the first 'castable' variant
56- //
57- // each pass picks the first matching variant by type_id order.
48+ /// Selects the best-matching child array from a [`UnionArray`] for a given target type
49+ ///
50+ /// The goal is to find the source field whose type is closest to the target,
51+ /// so that the subsequent cast is as lossless as possible. The heuristic uses
52+ /// three passes with decreasing specificity:
53+ ///
54+ /// 1. **Exact match**: field type equals the target type.
55+ /// 2. **Same type family**: field and target belong to the same logical family
56+ /// (e.g. `Utf8` and `Utf8View` are both strings). This avoids a greedy
57+ /// cross-family cast in pass 3 (e.g. picking `Int32` over `Utf8` when the
58+ /// target is `Utf8View`, since `can_cast_types(Int32, Utf8View)` is true)
59+ /// 3. **Castable**:`can_cast_types` reports the field can be cast to the target
60+ /// Nested target types are skipped here because union extraction introduces
61+ /// nulls, which can conflict with non-nullable inner fields
62+ ///
63+ /// Each pass greedily picks the first matching field by type_id order
5864pub ( crate ) fn resolve_variant < ' a > (
5965 fields : & ' a UnionFields ,
6066 target_type : & DataType ,
@@ -81,7 +87,7 @@ pub(crate) fn resolve_variant<'a>(
8187 . map ( |( _, f) | f)
8288}
8389
84- /// Extracts the best-matching variant from a union array for a given target type,
90+ /// Extracts the best-matching child array from a [`UnionArray`] for a given target type,
8591/// and casts it to that type.
8692///
8793/// Rows where a different variant is active become NULL.
0 commit comments