From 07a261f46c0080242c64753d17b4e9f9ed51d041 Mon Sep 17 00:00:00 2001 From: default Date: Tue, 3 Mar 2026 13:52:59 +0000 Subject: [PATCH] feat: Replace `from_avro_datum*` functions with `GenericDatumReader` --- avro/src/lib.rs | 21 +- avro/src/reader/datum.rs | 351 ++++++++++++++++++++++ avro/src/reader/mod.rs | 174 +---------- avro/src/schema/mod.rs | 12 +- avro/src/schema/resolve.rs | 2 +- avro/src/serde/de.rs | 6 +- avro/src/writer/datum.rs | 8 +- avro/tests/avro-3786.rs | 32 +- avro/tests/avro-3787.rs | 12 +- avro/tests/io.rs | 63 ++-- avro/tests/schema.rs | 14 +- avro/tests/to_from_avro_datum_schemata.rs | 24 +- 12 files changed, 472 insertions(+), 247 deletions(-) create mode 100644 avro/src/reader/datum.rs diff --git a/avro/src/lib.rs b/avro/src/lib.rs index f2f99757..314d5965 100644 --- a/avro/src/lib.rs +++ b/avro/src/lib.rs @@ -56,13 +56,13 @@ mod decimal; mod decode; mod duration; mod encode; -mod reader; #[cfg(doc)] pub mod documentation; pub mod error; pub mod headers; pub mod rabin; +pub mod reader; pub mod schema; pub mod schema_compatibility; pub mod schema_equality; @@ -90,8 +90,13 @@ pub use codec::{Codec, DeflateSettings}; pub use decimal::Decimal; pub use duration::{Days, Duration, Millis, Months}; pub use error::Error; +#[expect( + deprecated, + reason = "Still need to export it until we remove it completely" +)] pub use reader::{ - Reader, from_avro_datum, from_avro_datum_reader_schemata, from_avro_datum_schemata, + Reader, + datum::{from_avro_datum, from_avro_datum_reader_schemata, from_avro_datum_schemata}, read_marker, single_object::{GenericSingleObjectReader, SpecificSingleObjectReader}, }; @@ -154,9 +159,11 @@ pub fn set_serde_human_readable(human_readable: bool) -> bool { #[cfg(test)] mod tests { use crate::{ - Codec, Reader, Schema, Writer, from_avro_datum, + Codec, Reader, Schema, Writer, + reader::datum::GenericDatumReader, types::{Record, Value}, }; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; //TODO: move where it fits better @@ -301,7 +308,7 @@ mod tests { } #[test] - fn test_illformed_length() { + fn test_illformed_length() -> TestResult { let raw_schema = r#" { "type": "record", @@ -318,7 +325,11 @@ mod tests { // Would allocate 18446744073709551605 bytes let illformed: &[u8] = &[0x3e, 0x15, 0xff, 0x1f, 0x15, 0xff]; - let value = from_avro_datum(&schema, &mut &*illformed, None); + let value = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut &*illformed); assert!(value.is_err()); + + Ok(()) } } diff --git a/avro/src/reader/datum.rs b/avro/src/reader/datum.rs new file mode 100644 index 00000000..beed61bb --- /dev/null +++ b/avro/src/reader/datum.rs @@ -0,0 +1,351 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::io::Read; + +use bon::bon; + +use crate::{AvroResult, Schema, decode::decode_internal, schema::ResolvedSchema, types::Value}; + +/// Reader for reading raw Avro data. +/// +/// This is most likely not what you need. Most users should use [`Reader`][crate::Reader], +/// [`GenericSingleObjectReader`][crate::GenericSingleObjectReader], or +/// [`SpecificSingleObjectReader`][crate::SpecificSingleObjectReader] instead. +pub struct GenericDatumReader<'s> { + writer: &'s Schema, + resolved: ResolvedSchema<'s>, + reader: Option<(&'s Schema, ResolvedSchema<'s>)>, +} + +#[bon] +impl<'s> GenericDatumReader<'s> { + /// Build a [`DatumReader`]. + /// + /// This is most likely not what you need. Most users should use [`Reader`][crate::Reader], + /// [`GenericSingleObjectReader`][crate::GenericSingleObjectReader], or + /// [`SpecificSingleObjectReader`][crate::SpecificSingleObjectReader] instead. + #[builder] + pub fn new( + /// The schema that was used to write the Avro datum. + #[builder(start_fn)] + writer_schema: &'s Schema, + /// Already resolved schemata that will be used to resolve references in the writer's schema. + resolved_writer_schemata: Option>, + /// The schema that will be used to resolve the value to conform the the new schema. + reader_schema: Option<&'s Schema>, + /// Already resolved schemata that will be used to resolve references in the reader's schema. + resolved_reader_schemata: Option>, + ) -> AvroResult { + let resolved_writer_schemata = if let Some(resolved) = resolved_writer_schemata { + resolved + } else { + ResolvedSchema::try_from(writer_schema)? + }; + + let reader = if let Some(reader) = reader_schema { + if let Some(resolved) = resolved_reader_schemata { + Some((reader, resolved)) + } else { + Some((reader, ResolvedSchema::try_from(reader)?)) + } + } else { + None + }; + + Ok(Self { + writer: writer_schema, + resolved: resolved_writer_schemata, + reader, + }) + } +} + +impl<'s, S: generic_datum_reader_builder::State> GenericDatumReaderBuilder<'s, S> { + /// Set the schemata that will be used to resolve any references in the writer's schema. + /// + /// This is equivalent to `.resolved_writer_schemata(ResolvedSchema::new_with_schemata(schemata)?)`. + /// If you already have a [`ResolvedSchema`], use that function instead. + pub fn writer_schemata( + self, + schemata: Vec<&'s Schema>, + ) -> AvroResult< + GenericDatumReaderBuilder<'s, generic_datum_reader_builder::SetResolvedWriterSchemata>, + > + where + S::ResolvedWriterSchemata: generic_datum_reader_builder::IsUnset, + { + let resolved = ResolvedSchema::new_with_schemata(schemata)?; + Ok(self.resolved_writer_schemata(resolved)) + } + + /// Set the schemata that will be used to resolve any references in the reader's schema. + /// + /// This is equivalent to `.resolved_reader_schemata(ResolvedSchema::new_with_schemata(schemata)?)`. + /// If you already have a [`ResolvedSchema`], use that function instead. + /// + /// This function can only be called after the reader schema is set. + pub fn reader_schemata( + self, + schemata: Vec<&'s Schema>, + ) -> AvroResult< + GenericDatumReaderBuilder<'s, generic_datum_reader_builder::SetResolvedReaderSchemata>, + > + where + S::ResolvedReaderSchemata: generic_datum_reader_builder::IsUnset, + S::ReaderSchema: generic_datum_reader_builder::IsSet, + { + let resolved = ResolvedSchema::new_with_schemata(schemata)?; + Ok(self.resolved_reader_schemata(resolved)) + } +} + +impl<'s> GenericDatumReader<'s> { + /// Read a Avro datum from the reader. + pub fn read_value(&self, reader: &mut R) -> AvroResult { + let value = decode_internal(self.writer, self.resolved.get_names(), None, reader)?; + if let Some((reader, resolved)) = &self.reader { + value.resolve_internal(reader, resolved.get_names(), None, &None) + } else { + Ok(value) + } + } +} + +/// Deprecated. +/// +/// This is equivalent to +/// ```ignore +/// GenericDatumReader::builder(writer_schema) +/// .maybe_reader_schema(reader_schema) +/// .build()? +/// .read_value(reader) +/// ``` +/// +/// Decode a `Value` encoded in Avro format given its `Schema` and anything implementing `io::Read` +/// to read from. +/// +/// In case a reader `Schema` is provided, schema resolution will also be performed. +/// +/// **NOTE** This function has a quite small niche of usage and does NOT take care of reading the +/// header and consecutive data blocks; use [`Reader`](struct.Reader.html) if you don't know what +/// you are doing, instead. +#[deprecated(since = "0.22.0", note = "Use `DatumReader` instead")] +pub fn from_avro_datum( + writer_schema: &Schema, + reader: &mut R, + reader_schema: Option<&Schema>, +) -> AvroResult { + GenericDatumReader::builder(writer_schema) + .maybe_reader_schema(reader_schema) + .build()? + .read_value(reader) +} + +/// Deprecated. +/// +/// This is equivalent to +/// ```ignore +/// GenericDatumReader::builder(writer_schema) +/// .writer_schemata(writer_schemata)? +/// .maybe_reader_schema(reader_schema) +/// .build()? +/// .read_value(reader) +/// ``` +/// +/// Decode a `Value` from raw Avro data. +/// +/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided +/// schemata to resolve any dependencies. +/// +/// When a reader `Schema` is provided, schema resolution will also be performed. +#[deprecated(since = "0.22.0", note = "Use `DatumReader` instead")] +pub fn from_avro_datum_schemata( + writer_schema: &Schema, + writer_schemata: Vec<&Schema>, + reader: &mut R, + reader_schema: Option<&Schema>, +) -> AvroResult { + GenericDatumReader::builder(writer_schema) + .writer_schemata(writer_schemata)? + .maybe_reader_schema(reader_schema) + .build()? + .read_value(reader) +} + +/// Deprecated. +/// +/// This is equivalent to +/// ```ignore +/// GenericDatumReader::builder(writer_schema) +/// .writer_schemata(writer_schemata)? +/// .maybe_reader_schema(reader_schema) +/// .reader_schemata(reader_schemata)? +/// .build()? +/// .read_value(reader) +/// ``` +/// +/// Decode a `Value` from raw Avro data. +/// +/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided +/// schemata to resolve any dependencies. +/// +/// When a reader `Schema` is provided, schema resolution will also be performed. +#[deprecated(since = "0.22.0", note = "Use `DatumReader` instead")] +pub fn from_avro_datum_reader_schemata( + writer_schema: &Schema, + writer_schemata: Vec<&Schema>, + reader: &mut R, + reader_schema: Option<&Schema>, + reader_schemata: Vec<&Schema>, +) -> AvroResult { + GenericDatumReader::builder(writer_schema) + .writer_schemata(writer_schemata)? + .maybe_reader_schema(reader_schema) + .reader_schemata(reader_schemata)? + .build()? + .read_value(reader) +} + +#[cfg(test)] +mod tests { + use apache_avro_test_helper::TestResult; + use serde::Deserialize; + + use crate::{ + Schema, from_value, + reader::datum::GenericDatumReader, + types::{Record, Value}, + }; + + #[test] + fn test_from_avro_datum() -> TestResult { + let schema = Schema::parse_str( + r#"{ + "type": "record", + "name": "test", + "fields": [ + { + "name": "a", + "type": "long", + "default": 42 + }, + { + "name": "b", + "type": "string" + } + ] + }"#, + )?; + let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; + + let mut record = Record::new(&schema).unwrap(); + record.put("a", 27i64); + record.put("b", "foo"); + let expected = record.into(); + + let avro_datum = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut encoded)?; + + assert_eq!(avro_datum, expected); + + Ok(()) + } + + #[test] + fn test_from_avro_datum_with_union_to_struct() -> TestResult { + const TEST_RECORD_SCHEMA_3240: &str = r#" + { + "type": "record", + "name": "test", + "fields": [ + { + "name": "a", + "type": "long", + "default": 42 + }, + { + "name": "b", + "type": "string" + }, + { + "name": "a_nullable_array", + "type": ["null", {"type": "array", "items": {"type": "string"}}], + "default": null + }, + { + "name": "a_nullable_boolean", + "type": ["null", {"type": "boolean"}], + "default": null + }, + { + "name": "a_nullable_string", + "type": ["null", {"type": "string"}], + "default": null + } + ] + } + "#; + #[derive(Default, Debug, Deserialize, PartialEq, Eq)] + struct TestRecord3240 { + a: i64, + b: String, + a_nullable_array: Option>, + // we are missing the 'a_nullable_boolean' field to simulate missing keys + // a_nullable_boolean: Option, + a_nullable_string: Option, + } + + let schema = Schema::parse_str(TEST_RECORD_SCHEMA_3240)?; + let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; + + let expected_record: TestRecord3240 = TestRecord3240 { + a: 27i64, + b: String::from("foo"), + a_nullable_array: None, + a_nullable_string: None, + }; + + let avro_datum = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut encoded)?; + let parsed_record: TestRecord3240 = match &avro_datum { + Value::Record(_) => from_value::(&avro_datum)?, + unexpected => { + panic!("could not map avro data to struct, found unexpected: {unexpected:?}") + } + }; + + assert_eq!(parsed_record, expected_record); + + Ok(()) + } + + #[test] + fn test_null_union() -> TestResult { + let schema = Schema::parse_str(r#"["null", "long"]"#)?; + let mut encoded: &'static [u8] = &[2, 0]; + + let avro_datum = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut encoded)?; + assert_eq!(avro_datum, Value::Union(1, Box::new(Value::Long(0)))); + + Ok(()) + } +} diff --git a/avro/src/reader/mod.rs b/avro/src/reader/mod.rs index 0113e505..94af9092 100644 --- a/avro/src/reader/mod.rs +++ b/avro/src/reader/mod.rs @@ -18,14 +18,10 @@ //! Logic handling reading from Avro format at user level. mod block; +pub mod datum; pub mod single_object; -use crate::{ - AvroResult, - decode::{decode, decode_internal}, - schema::{ResolvedSchema, Schema}, - types::Value, -}; +use crate::{AvroResult, schema::Schema, types::Value}; use block::Block; use bon::bon; use std::{collections::HashMap, io::Read}; @@ -137,74 +133,6 @@ impl Iterator for Reader<'_, R> { } } -/// Decode a `Value` encoded in Avro format given its `Schema` and anything implementing `io::Read` -/// to read from. -/// -/// In case a reader `Schema` is provided, schema resolution will also be performed. -/// -/// **NOTE** This function has a quite small niche of usage and does NOT take care of reading the -/// header and consecutive data blocks; use [`Reader`](struct.Reader.html) if you don't know what -/// you are doing, instead. -pub fn from_avro_datum( - writer_schema: &Schema, - reader: &mut R, - reader_schema: Option<&Schema>, -) -> AvroResult { - let value = decode(writer_schema, reader)?; - match reader_schema { - Some(schema) => value.resolve(schema), - None => Ok(value), - } -} - -/// Decode a `Value` from raw Avro data. -/// -/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided -/// schemata to resolve any dependencies. -/// -/// When a reader `Schema` is provided, schema resolution will also be performed. -pub fn from_avro_datum_schemata( - writer_schema: &Schema, - writer_schemata: Vec<&Schema>, - reader: &mut R, - reader_schema: Option<&Schema>, -) -> AvroResult { - from_avro_datum_reader_schemata( - writer_schema, - writer_schemata, - reader, - reader_schema, - Vec::with_capacity(0), - ) -} - -/// Decode a `Value` from raw Avro data. -/// -/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided -/// schemata to resolve any dependencies. -/// -/// When a reader `Schema` is provided, schema resolution will also be performed. -pub fn from_avro_datum_reader_schemata( - writer_schema: &Schema, - writer_schemata: Vec<&Schema>, - reader: &mut R, - reader_schema: Option<&Schema>, - reader_schemata: Vec<&Schema>, -) -> AvroResult { - let rs = ResolvedSchema::try_from(writer_schemata)?; - let value = decode_internal(writer_schema, rs.get_names(), None, reader)?; - match reader_schema { - Some(schema) => { - if reader_schemata.is_empty() { - value.resolve(schema) - } else { - value.resolve_schemata(schema, reader_schemata) - } - } - None => Ok(value), - } -} - /// Reads the marker bytes from Avro bytes generated earlier by a `Writer` pub fn read_marker(bytes: &[u8]) -> [u8; 16] { assert!( @@ -219,11 +147,9 @@ pub fn read_marker(bytes: &[u8]) -> [u8; 16] { #[cfg(test)] mod tests { use super::*; - use crate::from_value; use crate::types::Record; use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; - use serde::Deserialize; use std::io::Cursor; const SCHEMA: &str = r#" @@ -243,7 +169,6 @@ mod tests { ] } "#; - const UNION_SCHEMA: &str = r#"["null", "long"]"#; const ENCODED: &[u8] = &[ 79u8, 98u8, 106u8, 1u8, 4u8, 22u8, 97u8, 118u8, 114u8, 111u8, 46u8, 115u8, 99u8, 104u8, 101u8, 109u8, 97u8, 222u8, 1u8, 123u8, 34u8, 116u8, 121u8, 112u8, 101u8, 34u8, 58u8, 34u8, @@ -261,101 +186,6 @@ mod tests { 207u8, 108u8, 180u8, 158u8, 57u8, 114u8, 40u8, 173u8, 199u8, 228u8, 239u8, ]; - #[test] - fn test_from_avro_datum() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; - - let mut record = Record::new(&schema).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - let expected = record.into(); - - assert_eq!(from_avro_datum(&schema, &mut encoded, None)?, expected); - - Ok(()) - } - - #[test] - fn test_from_avro_datum_with_union_to_struct() -> TestResult { - const TEST_RECORD_SCHEMA_3240: &str = r#" - { - "type": "record", - "name": "test", - "fields": [ - { - "name": "a", - "type": "long", - "default": 42 - }, - { - "name": "b", - "type": "string" - }, - { - "name": "a_nullable_array", - "type": ["null", {"type": "array", "items": {"type": "string"}}], - "default": null - }, - { - "name": "a_nullable_boolean", - "type": ["null", {"type": "boolean"}], - "default": null - }, - { - "name": "a_nullable_string", - "type": ["null", {"type": "string"}], - "default": null - } - ] - } - "#; - #[derive(Default, Debug, Deserialize, PartialEq, Eq)] - struct TestRecord3240 { - a: i64, - b: String, - a_nullable_array: Option>, - // we are missing the 'a_nullable_boolean' field to simulate missing keys - // a_nullable_boolean: Option, - a_nullable_string: Option, - } - - let schema = Schema::parse_str(TEST_RECORD_SCHEMA_3240)?; - let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; - - let expected_record: TestRecord3240 = TestRecord3240 { - a: 27i64, - b: String::from("foo"), - a_nullable_array: None, - a_nullable_string: None, - }; - - let avro_datum = from_avro_datum(&schema, &mut encoded, None)?; - let parsed_record: TestRecord3240 = match &avro_datum { - Value::Record(_) => from_value::(&avro_datum)?, - unexpected => { - panic!("could not map avro data to struct, found unexpected: {unexpected:?}") - } - }; - - assert_eq!(parsed_record, expected_record); - - Ok(()) - } - - #[test] - fn test_null_union() -> TestResult { - let schema = Schema::parse_str(UNION_SCHEMA)?; - let mut encoded: &'static [u8] = &[2, 0]; - - assert_eq!( - from_avro_datum(&schema, &mut encoded, None)?, - Value::Union(1, Box::new(Value::Long(0))) - ); - - Ok(()) - } - #[test] fn test_reader_iterator() -> TestResult { let schema = Schema::parse_str(SCHEMA)?; diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index 91b0450d..893b13c2 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -1169,7 +1169,7 @@ fn field_ordering_position(field: &str) -> Option { mod tests { use super::*; use crate::writer::datum::GenericDatumWriter; - use crate::{error::Details, rabin::Rabin}; + use crate::{error::Details, rabin::Rabin, reader::datum::GenericDatumReader}; use apache_avro_test_helper::{ TestResult, logger::{assert_logged, assert_not_logged}, @@ -2998,7 +2998,10 @@ mod tests { .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 2); @@ -3585,7 +3588,10 @@ mod tests { let mut x = &datum[..]; // Deserialization should succeed and we should be able to resolve the schema. - let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; assert!(deser_value.validate(&reader_schema)); // Verify that we can read a field from the record. diff --git a/avro/src/schema/resolve.rs b/avro/src/schema/resolve.rs index 449436b4..5087c8e7 100644 --- a/avro/src/schema/resolve.rs +++ b/avro/src/schema/resolve.rs @@ -23,7 +23,7 @@ use crate::schema::{ use crate::{AvroResult, Error, Schema}; use std::collections::HashMap; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ResolvedSchema<'s> { pub(super) names_ref: NamesRef<'s>, schemata: Vec<&'s Schema>, diff --git a/avro/src/serde/de.rs b/avro/src/serde/de.rs index ee9dec17..82d40d5a 100644 --- a/avro/src/serde/de.rs +++ b/avro/src/serde/de.rs @@ -953,8 +953,8 @@ mod tests { use apache_avro_test_helper::TestResult; use super::*; - use crate::Decimal; use crate::writer::datum::GenericDatumWriter; + use crate::{Decimal, reader::datum::GenericDatumReader}; #[derive(PartialEq, Eq, Serialize, Deserialize, Debug)] pub struct StringEnum { @@ -997,7 +997,9 @@ mod tests { ); // decode from avro - let value = crate::from_avro_datum(&schema, &mut buf, None)?; + let value = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut buf)?; let decoded_data: StringEnum = crate::from_value(&value)?; diff --git a/avro/src/writer/datum.rs b/avro/src/writer/datum.rs index c5eb4a2d..30195543 100644 --- a/avro/src/writer/datum.rs +++ b/avro/src/writer/datum.rs @@ -211,6 +211,8 @@ pub fn to_avro_datum_schemata>( mod tests { use apache_avro_test_helper::TestResult; + use super::*; + use crate::reader::datum::GenericDatumReader; use crate::{ Days, Decimal, Duration, Millis, Months, schema::{DecimalSchema, FixedSchema, InnerDecimalSchema, Name}, @@ -218,8 +220,6 @@ mod tests { util::zig_i64, }; - use super::*; - const SCHEMA: &str = r#" { "type": "record", @@ -346,7 +346,9 @@ mod tests { // Should deserialize from the schema into the logical type. let mut r = ser.as_slice(); - let de = crate::from_avro_datum(&schema, &mut r, None)?; + let de = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut r)?; assert_eq!(de, value); Ok(()) } diff --git a/avro/tests/avro-3786.rs b/avro/tests/avro-3786.rs index 461de7c1..9719fd0c 100644 --- a/avro/tests/avro-3786.rs +++ b/avro/tests/avro-3786.rs @@ -16,7 +16,7 @@ // under the License. use apache_avro::writer::datum::GenericDatumWriter; -use apache_avro::{Schema, from_avro_datum, to_value, types}; +use apache_avro::{Schema, reader::datum::GenericDatumReader, to_value, types}; use apache_avro_test_helper::TestResult; #[test] @@ -138,7 +138,10 @@ fn avro_3786_deserialize_union_with_different_enum_order() -> TestResult { .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 2); @@ -264,7 +267,10 @@ fn avro_3786_deserialize_union_with_different_enum_order_defined_in_record() -> .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 1); @@ -379,7 +385,10 @@ fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_ .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 1); @@ -494,7 +503,10 @@ fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_ .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 1); @@ -609,7 +621,10 @@ fn deserialize_union_with_different_enum_order_defined_in_record() -> TestResult .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 1); @@ -885,7 +900,10 @@ fn deserialize_union_with_record_with_enum_defined_inline_reader_has_different_i .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 3); diff --git a/avro/tests/avro-3787.rs b/avro/tests/avro-3787.rs index 03d61587..b1558d9d 100644 --- a/avro/tests/avro-3787.rs +++ b/avro/tests/avro-3787.rs @@ -16,7 +16,7 @@ // under the License. use apache_avro::writer::datum::GenericDatumWriter; -use apache_avro::{Schema, from_avro_datum, to_value, types}; +use apache_avro::{Schema, reader::datum::GenericDatumReader, to_value, types}; use apache_avro_test_helper::TestResult; #[test] @@ -139,7 +139,10 @@ fn avro_3787_deserialize_union_with_unknown_symbol() -> TestResult { .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 2); @@ -272,7 +275,10 @@ fn avro_3787_deserialize_union_with_unknown_symbol_no_ref() -> TestResult { .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 1); diff --git a/avro/tests/io.rs b/avro/tests/io.rs index ae51a79d..2998a2cb 100644 --- a/avro/tests/io.rs +++ b/avro/tests/io.rs @@ -17,7 +17,7 @@ //! Port of use apache_avro::writer::datum::GenericDatumWriter; -use apache_avro::{Error, Schema, error::Details, from_avro_datum, types::Value}; +use apache_avro::{Error, Schema, error::Details, reader::datum::GenericDatumReader, types::Value}; use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use std::{io::Cursor, sync::OnceLock}; @@ -235,7 +235,9 @@ fn test_round_trip() -> TestResult { let encoded = GenericDatumWriter::builder(&schema) .build()? .write_value_to_vec(value.clone())?; - let decoded = from_avro_datum(&schema, &mut Cursor::new(encoded), None).unwrap(); + let decoded = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut Cursor::new(encoded))?; assert_eq!(value, &decoded); } @@ -285,14 +287,10 @@ fn test_schema_promotion() -> TestResult { let encoded = GenericDatumWriter::builder(&writer_schema) .build()? .write_value_to_vec(original_value.clone())?; - let decoded = from_avro_datum( - &writer_schema, - &mut Cursor::new(encoded), - Some(&reader_schema), - ) - .unwrap_or_else(|_| { - panic!("failed to decode {original_value:?} with schema: {reader_raw_schema:?}",) - }); + let decoded = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded))?; assert_eq!(decoded, promotable_values[j]); } } @@ -310,11 +308,10 @@ fn test_unknown_symbol() -> TestResult { let encoded = GenericDatumWriter::builder(&writer_schema) .build()? .write_value_to_vec(original_value.clone())?; - let decoded = from_avro_datum( - &writer_schema, - &mut Cursor::new(encoded), - Some(&reader_schema), - ); + let decoded = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded)); assert!(decoded.is_err()); Ok(()) @@ -336,11 +333,10 @@ fn test_default_value() -> TestResult { let encoded = GenericDatumWriter::builder(long_record_schema()) .build()? .write_value_to_vec(long_record_datum().clone())?; - let datum_read = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - )?; + let datum_read = GenericDatumReader::builder(long_record_schema()) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded))?; match default_datum { // For float/double, NaN != NaN, so we check specially here. @@ -395,11 +391,10 @@ fn test_no_default_value() -> TestResult { let encoded = GenericDatumWriter::builder(long_record_schema()) .build()? .write_value_to_vec(long_record_datum().clone())?; - let result = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - ); + let result = GenericDatumReader::builder(long_record_schema()) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded)); assert!(result.is_err()); Ok(()) @@ -426,11 +421,10 @@ fn test_projection() -> TestResult { let encoded = GenericDatumWriter::builder(long_record_schema()) .build()? .write_value_to_vec(long_record_datum().clone())?; - let datum_read = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - )?; + let datum_read = GenericDatumReader::builder(long_record_schema()) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded))?; assert_eq!(datum_to_read, datum_read); Ok(()) @@ -457,11 +451,10 @@ fn test_field_order() -> TestResult { let encoded = GenericDatumWriter::builder(long_record_schema()) .build()? .write_value_to_vec(long_record_datum().clone())?; - let datum_read = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - )?; + let datum_read = GenericDatumReader::builder(long_record_schema()) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded))?; assert_eq!(datum_to_read, datum_read); Ok(()) diff --git a/avro/tests/schema.rs b/avro/tests/schema.rs index 3542657e..89e328ae 100644 --- a/avro/tests/schema.rs +++ b/avro/tests/schema.rs @@ -19,7 +19,8 @@ use apache_avro::writer::datum::GenericDatumWriter; use apache_avro::{ Codec, Error, Reader, Schema, Writer, error::Details, - from_avro_datum, from_value, + from_value, + reader::datum::GenericDatumReader, schema::{EnumSchema, FixedSchema, Name, RecordField, RecordSchema}, to_value, types::{Record, Value}, @@ -862,8 +863,10 @@ fn avro_old_issue_47() -> TestResult { .build()? .write_value_to_vec(ser_value)?; - let de_value = &from_avro_datum(&schema, &mut &*serialized_bytes, None)?; - let deserialized_record = from_value::(de_value)?; + let de_value = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut &*serialized_bytes)?; + let deserialized_record = from_value::(&de_value)?; assert_eq!(record, deserialized_record); Ok(()) @@ -988,7 +991,10 @@ fn test_avro_3785_deserialize_namespace_with_nullable_type_containing_reference_ .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { Value::Record(fields) => { assert_eq!(fields.len(), 2); diff --git a/avro/tests/to_from_avro_datum_schemata.rs b/avro/tests/to_from_avro_datum_schemata.rs index 5cc79730..08493679 100644 --- a/avro/tests/to_from_avro_datum_schemata.rs +++ b/avro/tests/to_from_avro_datum_schemata.rs @@ -15,11 +15,9 @@ // specific language governing permissions and limitations // under the License. +use apache_avro::reader::datum::GenericDatumReader; use apache_avro::writer::datum::GenericDatumWriter; -use apache_avro::{ - Codec, Reader, Schema, Writer, from_avro_datum_reader_schemata, from_avro_datum_schemata, - types::Value, -}; +use apache_avro::{Codec, Reader, Schema, Writer, types::Value}; use apache_avro_test_helper::{TestResult, init}; static SCHEMA_A_STR: &str = r#"{ @@ -59,7 +57,10 @@ fn test_avro_3683_multiple_schemata_to_from_avro_datum() -> TestResult { .write_value_to_vec(record.clone())?; assert_eq!(actual, expected); - let value = from_avro_datum_schemata(schema_b, schemata, &mut actual.as_slice(), None)?; + let value = GenericDatumReader::builder(schema_b) + .writer_schemata(schemata)? + .build()? + .read_value(&mut actual.as_slice())?; assert_eq!(value, record); Ok(()) @@ -86,13 +87,12 @@ fn avro_rs_106_test_multiple_schemata_to_from_avro_datum_with_resolution() -> Te .write_value_to_vec(record.clone())?; assert_eq!(actual, expected); - let value = from_avro_datum_reader_schemata( - schema_b, - schemata.clone(), - &mut actual.as_slice(), - Some(schema_b), - schemata, - )?; + let value = GenericDatumReader::builder(schema_b) + .writer_schemata(schemata.clone())? + .reader_schema(schema_b) + .reader_schemata(schemata)? + .build()? + .read_value(&mut actual.as_slice())?; assert_eq!(value, record); Ok(())