Skip to content

Commit a768913

Browse files
committed
fix: use writer types in Skipper for resolved named record types
When a writer-only field references a named Avro type that was previously resolved against a reader schema, `parse_type` returns the registered reader-resolved type from the shared resolver. This caused two problems: 1. The Skipper built its struct sub-skippers from the reader's field list, which omits writer-only fields. Their bytes were never consumed, leaving the cursor at the wrong position for all subsequent records. 2. Reader fields carry resolution-induced nullability (e.g. a writer plain `long` matched against a reader `["null", long]` gains `nullability = Some(NullFirst)`). The Skipper read a union-tag byte that was never written, causing "Unexpected EOF" errors. Fix: store the writer's data type in `ResolvedField::ToReader` alongside the reader index. The Skipper's `Codec::Struct` arm now iterates `rec.writer_fields` and uses the writer type from every entry - both `ToReader(_, wdt)` and `Skip(wdt)` - so it always follows the writer's wire format.
1 parent 322f9ce commit a768913

File tree

2 files changed

+577
-15
lines changed

2 files changed

+577
-15
lines changed

arrow-avro/src/codec.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,9 @@ pub(crate) struct ResolvedRecord {
9494
#[derive(Debug, Clone, PartialEq)]
9595
pub(crate) enum ResolvedField {
9696
/// Resolves to a field indexed in the reader schema.
97-
ToReader(usize),
97+
/// The `AvroDataType` is the writer's type for this field, used by the Skipper
98+
/// to correctly consume writer bytes when the whole record is being skipped.
99+
ToReader(usize, AvroDataType),
98100
/// For fields present in the writer's schema but not the reader's, this stores their data type.
99101
/// This is needed to correctly skip over these fields during deserialization.
100102
Skip(AvroDataType),
@@ -2341,10 +2343,10 @@ impl<'a> Maker<'a> {
23412343
.iter()
23422344
.enumerate()
23432345
.map(|(writer_index, writer_field)| {
2346+
let dt = self.parse_type(&writer_field.r#type, writer_ns)?;
23442347
if let Some(reader_index) = writer_to_reader[writer_index] {
2345-
Ok(ResolvedField::ToReader(reader_index))
2348+
Ok(ResolvedField::ToReader(reader_index, dt))
23462349
} else {
2347-
let dt = self.parse_type(&writer_field.r#type, writer_ns)?;
23482350
Ok(ResolvedField::Skip(dt))
23492351
}
23502352
})
@@ -2888,7 +2890,7 @@ mod tests {
28882890
default_fields,
28892891
}) => {
28902892
assert_eq!(writer_fields.len(), 1);
2891-
assert_eq!(writer_fields[0], ResolvedField::ToReader(0));
2893+
assert!(matches!(writer_fields[0], ResolvedField::ToReader(0, _)));
28922894
assert_eq!(default_fields.len(), 1);
28932895
assert_eq!(default_fields[0], 1);
28942896
}
@@ -2981,7 +2983,7 @@ mod tests {
29812983
default_fields,
29822984
}) => {
29832985
assert_eq!(writer_fields.len(), 1);
2984-
assert_eq!(writer_fields[0], ResolvedField::ToReader(0));
2986+
assert!(matches!(writer_fields[0], ResolvedField::ToReader(0, _)));
29852987
assert_eq!(default_fields.len(), 1);
29862988
assert_eq!(default_fields[0], 1);
29872989
}
@@ -3802,9 +3804,9 @@ mod tests {
38023804
assert!(matches!(
38033805
&rec.writer_fields[..],
38043806
&[
3805-
ResolvedField::ToReader(1),
3807+
ResolvedField::ToReader(1, _),
38063808
ResolvedField::Skip(_),
3807-
ResolvedField::ToReader(0),
3809+
ResolvedField::ToReader(0, _),
38083810
]
38093811
));
38103812
assert_eq!(rec.default_fields.as_ref(), &[2usize, 3usize]);

0 commit comments

Comments
 (0)