From 58cfb998b3643e03ee3fa052fc3c70e1d781ddc7 Mon Sep 17 00:00:00 2001 From: Martin Tzvetanov Grigorov Date: Thu, 19 Feb 2026 13:48:34 +0200 Subject: [PATCH 1/5] feat!: Use bon builder style instead of several methods Instead of having several methods like: Schema::array(Schema) and Schema::array_with_attributes(Schema, BTreeMap) we could use Bon's function builders with start_fn: ``` Schema::array(Schema).call() Schema::array(Schema).attributes(BTreeMap).call() ``` The only annoying part is the `.call()` --- avro/src/decode.rs | 4 ++-- avro/src/encode.rs | 7 +++++-- avro/src/schema/mod.rs | 30 ++++++++++++++---------------- avro/src/schema_equality.rs | 4 ++-- avro/src/serde/derive.rs | 10 +++++----- avro/src/types.rs | 4 ++-- 6 files changed, 30 insertions(+), 29 deletions(-) diff --git a/avro/src/decode.rs b/avro/src/decode.rs index d88ab5fb..cfe069db 100644 --- a/avro/src/decode.rs +++ b/avro/src/decode.rs @@ -376,7 +376,7 @@ mod tests { #[test] fn test_decode_array_without_size() -> TestResult { let mut input: &[u8] = &[6, 2, 4, 6, 0]; - let result = decode(&Schema::array(Schema::Int), &mut input); + let result = decode(&Schema::array(Schema::Int).call(), &mut input); assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result?); Ok(()) @@ -385,7 +385,7 @@ mod tests { #[test] fn test_decode_array_with_size() -> TestResult { let mut input: &[u8] = &[5, 6, 2, 4, 6, 0]; - let result = decode(&Schema::array(Schema::Int), &mut input); + let result = decode(&Schema::array(Schema::Int).call(), &mut input); assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result?); Ok(()) diff --git a/avro/src/encode.rs b/avro/src/encode.rs index 7efe4a6b..a21a326c 100644 --- a/avro/src/encode.rs +++ b/avro/src/encode.rs @@ -392,10 +392,13 @@ pub(crate) mod tests { let empty: Vec = Vec::new(); encode( &Value::Array(empty.clone()), - &Schema::array(Schema::Int), + &Schema::array(Schema::Int).call(), &mut buf, ) - .expect(&success(&Value::Array(empty), &Schema::array(Schema::Int))); + .expect(&success( + &Value::Array(empty), + &Schema::array(Schema::Int).call(), + )); assert_eq!(vec![0u8], buf); } diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index 35372cef..01fc2534 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -56,6 +56,7 @@ pub use crate::schema::{ resolve::ResolvedSchema, union::UnionSchema, }; +use bon::bon; /// Represents documentation for complex Avro schemas. pub type Documentation = Option; @@ -383,6 +384,7 @@ type DecimalMetadata = usize; pub(crate) type Precision = DecimalMetadata; pub(crate) type Scale = DecimalMetadata; +#[bon] impl Schema { /// Converts `self` into its [Parsing Canonical Form]. /// @@ -664,16 +666,12 @@ impl Schema { } /// Returns a `Schema::Array` with the given items. - pub fn array(items: Schema) -> Self { - Schema::Array(ArraySchema { - items: Box::new(items), - default: None, - attributes: Default::default(), - }) - } - - /// Returns a `Schema::Array` with the given items and custom attributes. - pub fn array_with_attributes(items: Schema, attributes: BTreeMap) -> Self { + #[builder] + pub fn array( + #[builder(start_fn)] items: Schema, + attributes: Option>, + ) -> Self { + let attributes = attributes.unwrap_or_default(); Schema::Array(ArraySchema { items: Box::new(items), default: None, @@ -1154,7 +1152,7 @@ mod tests { #[test] fn test_array_schema() -> TestResult { let schema = Schema::parse_str(r#"{"type": "array", "items": "string"}"#)?; - assert_eq!(Schema::array(Schema::String), schema); + assert_eq!(Schema::array(Schema::String).call(), schema); Ok(()) } @@ -1607,7 +1605,8 @@ mod tests { aliases: None, schema: Schema::array(Schema::Ref { name: Name::new("Node")?, - }), + }) + .call(), order: RecordFieldOrder::Ascending, position: 1, custom_attributes: Default::default(), @@ -4682,10 +4681,9 @@ mod tests { #[test] fn test_avro_3927_serialize_array_with_custom_attributes() -> TestResult { - let expected = Schema::array_with_attributes( - Schema::Long, - BTreeMap::from([("field-id".to_string(), "1".into())]), - ); + let expected = Schema::array(Schema::Long) + .attributes(BTreeMap::from([("field-id".to_string(), "1".into())])) + .call(); let value = serde_json::to_value(&expected)?; let serialized = serde_json::to_string(&value)?; diff --git a/avro/src/schema_equality.rs b/avro/src/schema_equality.rs index e65e28cc..58b1b016 100644 --- a/avro/src/schema_equality.rs +++ b/avro/src/schema_equality.rs @@ -475,11 +475,11 @@ mod tests { #[test] fn test_avro_3939_compare_array_schemata() { - let schema_one = Schema::array(Schema::Boolean); + let schema_one = Schema::array(Schema::Boolean).call(); assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - let schema_two = Schema::array(Schema::Boolean); + let schema_two = Schema::array(Schema::Boolean).call(); let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs index c631db98..b17f259c 100644 --- a/avro/src/serde/derive.rs +++ b/avro/src/serde/derive.rs @@ -524,7 +524,7 @@ macro_rules! impl_array_schema ( ($type:ty where T: AvroSchemaComponent) => ( impl AvroSchemaComponent for $type { fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { - Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)) + Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).call() } fn get_record_fields_in_ctxt(_: usize, _: &mut Names, _: &Namespace) -> Option> { @@ -544,7 +544,7 @@ where T: AvroSchemaComponent, { fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { - Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)) + Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).call() } fn get_record_fields_in_ctxt( @@ -784,7 +784,7 @@ mod tests { #[test] fn avro_rs_401_slice() -> TestResult { let schema = <[u8]>::get_schema(); - assert_eq!(schema, Schema::array(Schema::Int)); + assert_eq!(schema, Schema::array(Schema::Int).call()); Ok(()) } @@ -792,7 +792,7 @@ mod tests { #[test] fn avro_rs_401_array() -> TestResult { let schema = <[u8; 55]>::get_schema(); - assert_eq!(schema, Schema::array(Schema::Int)); + assert_eq!(schema, Schema::array(Schema::Int).call()); Ok(()) } @@ -804,7 +804,7 @@ mod tests { schema, Schema::union(vec![ Schema::Null, - Schema::array(Schema::array(Schema::Int)) + Schema::array(Schema::array(Schema::Int).call()).call() ])? ); diff --git a/avro/src/types.rs b/avro/src/types.rs index 9c1f05f3..53028519 100644 --- a/avro/src/types.rs +++ b/avro/src/types.rs @@ -1351,13 +1351,13 @@ mod tests { ), ( Value::Array(vec![Value::Long(42i64)]), - Schema::array(Schema::Long), + Schema::array(Schema::Long).call(), true, "", ), ( Value::Array(vec![Value::Boolean(true)]), - Schema::array(Schema::Long), + Schema::array(Schema::Long).call(), false, "Invalid value: Array([Boolean(true)]) for schema: Array(ArraySchema { items: Long, default: None, attributes: {} }). Reason: Unsupported value-schema combination! Value: Boolean(true), schema: Long", ), From 605771f785b3b9c52afffde1e72c93b7440c1f72 Mon Sep 17 00:00:00 2001 From: Martin Tzvetanov Grigorov Date: Thu, 19 Feb 2026 13:58:33 +0200 Subject: [PATCH 2/5] Fix doc tests. --- avro/src/schema_compatibility.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/avro/src/schema_compatibility.rs b/avro/src/schema_compatibility.rs index 7d9c611b..9e47a312 100644 --- a/avro/src/schema_compatibility.rs +++ b/avro/src/schema_compatibility.rs @@ -29,8 +29,8 @@ //! //! ``` //! # use apache_avro::{Schema, schema_compatibility::{Compatibility, SchemaCompatibility}}; -//! let writers_schema = Schema::array(Schema::Int); -//! let readers_schema = Schema::array(Schema::Long); +//! let writers_schema = Schema::array(Schema::Int).call(); +//! let readers_schema = Schema::array(Schema::Long).call(); //! assert_eq!(SchemaCompatibility::can_read(&writers_schema, &readers_schema), Ok(Compatibility::Full)); //! ``` //! @@ -40,8 +40,8 @@ //! //! ``` //! # use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; -//! let writers_schema = Schema::array(Schema::Long); -//! let readers_schema = Schema::array(Schema::Int); +//! let writers_schema = Schema::array(Schema::Long).call(); +//! let readers_schema = Schema::array(Schema::Int).call(); //! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); //! ``` //! From b2900826a56889ca89056d44331d057ad8bffce2 Mon Sep 17 00:00:00 2001 From: Martin Tzvetanov Grigorov Date: Thu, 19 Feb 2026 15:11:59 +0200 Subject: [PATCH 3/5] Use `.build()` as a finish_fn for the bon style function builders --- avro/src/decode.rs | 4 ++-- avro/src/encode.rs | 4 ++-- avro/src/schema/mod.rs | 10 +++++----- avro/src/schema_compatibility.rs | 8 ++++---- avro/src/schema_equality.rs | 4 ++-- avro/src/serde/derive.rs | 10 +++++----- avro/src/types.rs | 4 ++-- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/avro/src/decode.rs b/avro/src/decode.rs index cfe069db..9e389d47 100644 --- a/avro/src/decode.rs +++ b/avro/src/decode.rs @@ -376,7 +376,7 @@ mod tests { #[test] fn test_decode_array_without_size() -> TestResult { let mut input: &[u8] = &[6, 2, 4, 6, 0]; - let result = decode(&Schema::array(Schema::Int).call(), &mut input); + let result = decode(&Schema::array(Schema::Int).build(), &mut input); assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result?); Ok(()) @@ -385,7 +385,7 @@ mod tests { #[test] fn test_decode_array_with_size() -> TestResult { let mut input: &[u8] = &[5, 6, 2, 4, 6, 0]; - let result = decode(&Schema::array(Schema::Int).call(), &mut input); + let result = decode(&Schema::array(Schema::Int).build(), &mut input); assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result?); Ok(()) diff --git a/avro/src/encode.rs b/avro/src/encode.rs index a21a326c..32ec10b6 100644 --- a/avro/src/encode.rs +++ b/avro/src/encode.rs @@ -392,12 +392,12 @@ pub(crate) mod tests { let empty: Vec = Vec::new(); encode( &Value::Array(empty.clone()), - &Schema::array(Schema::Int).call(), + &Schema::array(Schema::Int).build(), &mut buf, ) .expect(&success( &Value::Array(empty), - &Schema::array(Schema::Int).call(), + &Schema::array(Schema::Int).build(), )); assert_eq!(vec![0u8], buf); } diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index 01fc2534..280a6e8f 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -665,8 +665,8 @@ impl Schema { }) } - /// Returns a `Schema::Array` with the given items. - #[builder] + /// Returns a `Schema::Array` with the given items and optional custom attributes. + #[builder(finish_fn = build)] pub fn array( #[builder(start_fn)] items: Schema, attributes: Option>, @@ -1152,7 +1152,7 @@ mod tests { #[test] fn test_array_schema() -> TestResult { let schema = Schema::parse_str(r#"{"type": "array", "items": "string"}"#)?; - assert_eq!(Schema::array(Schema::String).call(), schema); + assert_eq!(Schema::array(Schema::String).build(), schema); Ok(()) } @@ -1606,7 +1606,7 @@ mod tests { schema: Schema::array(Schema::Ref { name: Name::new("Node")?, }) - .call(), + .build(), order: RecordFieldOrder::Ascending, position: 1, custom_attributes: Default::default(), @@ -4683,7 +4683,7 @@ mod tests { fn test_avro_3927_serialize_array_with_custom_attributes() -> TestResult { let expected = Schema::array(Schema::Long) .attributes(BTreeMap::from([("field-id".to_string(), "1".into())])) - .call(); + .build(); let value = serde_json::to_value(&expected)?; let serialized = serde_json::to_string(&value)?; diff --git a/avro/src/schema_compatibility.rs b/avro/src/schema_compatibility.rs index 9e47a312..926b4e06 100644 --- a/avro/src/schema_compatibility.rs +++ b/avro/src/schema_compatibility.rs @@ -29,8 +29,8 @@ //! //! ``` //! # use apache_avro::{Schema, schema_compatibility::{Compatibility, SchemaCompatibility}}; -//! let writers_schema = Schema::array(Schema::Int).call(); -//! let readers_schema = Schema::array(Schema::Long).call(); +//! let writers_schema = Schema::array(Schema::Int).build(); +//! let readers_schema = Schema::array(Schema::Long).build(); //! assert_eq!(SchemaCompatibility::can_read(&writers_schema, &readers_schema), Ok(Compatibility::Full)); //! ``` //! @@ -40,8 +40,8 @@ //! //! ``` //! # use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; -//! let writers_schema = Schema::array(Schema::Long).call(); -//! let readers_schema = Schema::array(Schema::Int).call(); +//! let writers_schema = Schema::array(Schema::Long).build(); +//! let readers_schema = Schema::array(Schema::Int).build(); //! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); //! ``` //! diff --git a/avro/src/schema_equality.rs b/avro/src/schema_equality.rs index 58b1b016..37df9ed9 100644 --- a/avro/src/schema_equality.rs +++ b/avro/src/schema_equality.rs @@ -475,11 +475,11 @@ mod tests { #[test] fn test_avro_3939_compare_array_schemata() { - let schema_one = Schema::array(Schema::Boolean).call(); + let schema_one = Schema::array(Schema::Boolean).build(); assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - let schema_two = Schema::array(Schema::Boolean).call(); + let schema_two = Schema::array(Schema::Boolean).build(); let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs index 4f91dfeb..fb2a6095 100644 --- a/avro/src/serde/derive.rs +++ b/avro/src/serde/derive.rs @@ -529,7 +529,7 @@ macro_rules! impl_array_schema ( ($type:ty where T: AvroSchemaComponent) => ( impl AvroSchemaComponent for $type { fn get_schema_in_ctxt(named_schemas: &mut HashSet, enclosing_namespace: &Namespace) -> Schema { - Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).call() + Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build() } fn get_record_fields_in_ctxt(_: usize, _: &mut HashSet, _: &Namespace) -> Option> { @@ -552,7 +552,7 @@ where named_schemas: &mut HashSet, enclosing_namespace: &Namespace, ) -> Schema { - Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).call() + Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build() } fn get_record_fields_in_ctxt( @@ -808,7 +808,7 @@ mod tests { #[test] fn avro_rs_401_slice() -> TestResult { let schema = <[u8]>::get_schema(); - assert_eq!(schema, Schema::array(Schema::Int).call()); + assert_eq!(schema, Schema::array(Schema::Int).build()); Ok(()) } @@ -816,7 +816,7 @@ mod tests { #[test] fn avro_rs_401_array() -> TestResult { let schema = <[u8; 55]>::get_schema(); - assert_eq!(schema, Schema::array(Schema::Int).call()); + assert_eq!(schema, Schema::array(Schema::Int).build()); Ok(()) } @@ -828,7 +828,7 @@ mod tests { schema, Schema::union(vec![ Schema::Null, - Schema::array(Schema::array(Schema::Int).call()).call() + Schema::array(Schema::array(Schema::Int).build()).build() ])? ); diff --git a/avro/src/types.rs b/avro/src/types.rs index 53028519..51762608 100644 --- a/avro/src/types.rs +++ b/avro/src/types.rs @@ -1351,13 +1351,13 @@ mod tests { ), ( Value::Array(vec![Value::Long(42i64)]), - Schema::array(Schema::Long).call(), + Schema::array(Schema::Long).build(), true, "", ), ( Value::Array(vec![Value::Boolean(true)]), - Schema::array(Schema::Long).call(), + Schema::array(Schema::Long).build(), false, "Invalid value: Array([Boolean(true)]) for schema: Array(ArraySchema { items: Long, default: None, attributes: {} }). Reason: Unsupported value-schema combination! Value: Boolean(true), schema: Long", ), From 73431f17efd6d3631098aeea9f126610ba8d6b2c Mon Sep 17 00:00:00 2001 From: Martin Tzvetanov Grigorov Date: Thu, 19 Feb 2026 15:20:31 +0200 Subject: [PATCH 4/5] Use bon function builder for Schema::map() too --- WARP.md | 237 ++++++++++++++++++++++++++++++++++++ avro/src/decode.rs | 4 +- avro/src/encode.rs | 7 +- avro/src/reader.rs | 2 +- avro/src/schema/mod.rs | 27 ++-- avro/src/schema_equality.rs | 44 ++++--- avro/src/serde/derive.rs | 2 +- avro/src/writer.rs | 6 +- 8 files changed, 288 insertions(+), 41 deletions(-) create mode 100644 WARP.md diff --git a/WARP.md b/WARP.md new file mode 100644 index 00000000..b6261000 --- /dev/null +++ b/WARP.md @@ -0,0 +1,237 @@ +# WARP.md + +This file provides guidance to WARP (warp.dev) when working with code in this repository. + +## Project Overview + +This is the Apache Avro Rust SDK, a comprehensive library for working with Apache Avro data serialization format. It's structured as a Cargo workspace with multiple interdependent crates that provide different aspects of Avro functionality. + +## Workspace Structure + +This is a multi-crate workspace with the following components: + +### Core Crates +- **`avro/`** - Main library (`apache-avro` crate) with full Avro functionality +- **`avro_derive/`** - Proc-macro crate for automatic schema derivation (`apache-avro-derive`) +- **`avro_test_helper/`** - Testing utilities shared across the workspace + +### Additional Components +- **`wasm-demo/`** - WebAssembly demonstration application +- **`fuzz/`** - Fuzzing tests (excluded from workspace) + +## Common Development Commands + +### Building and Testing + +```bash +# Build entire workspace +cargo build --all-features + +# Build release version +cargo build --all-features --release + +# Run all tests (includes doc tests and pre-commit hooks) +make test + +# Run tests without pre-commit setup +cargo test --all-features --all-targets +cargo test --doc + +# Run specific workspace member +cargo test -p apache-avro +cargo test -p apache-avro-derive + +# Build with specific codec features +cargo build --features snappy,zstandard,bzip,xz +``` + +### Quality and Linting + +```bash +# Format all code +make lint +# or directly: +cargo fmt + +# Run clippy with strict settings +make clippy +# or directly: +cargo clippy --all-features --all-targets -- -Dclippy::all -Dunused_imports + +# Check without building +cargo check --all-features + +# Security audit +cargo audit +``` + +### Documentation and Development Tools + +```bash +# Generate and open local documentation +make doc-local + +# Generate documentation without opening +make doc + +# Update README files from doc comments +make readme + +# Run benchmarks +make benchmark +``` + +### Apache Avro Specific Commands + +```bash +# Generate interoperability test data +./build.sh interop-data-generate + +# Test interoperability with other Avro implementations +./build.sh interop-data-test + +# Distribution build +./build.sh dist +``` + +### Pre-commit Setup + +```bash +# Install pre-commit hooks (includes Python venv setup) +make install-hooks + +# Clean pre-commit setup +make clean-hooks + +# Manual pre-commit run +.venv/bin/pre-commit run --all-files +``` + +## Architecture Overview + +### Multi-Layer Design + +The codebase follows a layered architecture: + +1. **Schema Layer** (`schema.rs`) - Avro schema parsing, validation, and representation +2. **Type System** (`types.rs`) - Avro value types and native Rust type mappings +3. **Encoding/Decoding** (`encode.rs`, `decode.rs`) - Low-level binary format handling +4. **Reader/Writer API** (`reader.rs`, `writer.rs`) - High-level streaming interfaces +5. **Serde Integration** (`ser.rs`, `de.rs`) - Rust serde framework integration + +### Key Components + +#### Schema Management +- JSON schema parsing with dependency resolution +- Programmatic schema construction +- Schema compatibility checking (`schema_compatibility.rs`) +- Custom validation and naming rules (`validator.rs`) + +#### Data Processing Approaches +1. **Native Avro Types**: Using `Value`, `Record` types with schema validation +2. **Serde Integration**: Direct serialization/deserialization of Rust structs +3. **Derive Macros**: Automatic schema generation from Rust types + +#### Codec Support +Configurable compression codecs via feature flags: +- `snappy` - Google Snappy compression +- `zstandard` - Facebook Zstandard compression +- `bzip` - BZip2 compression +- `xz` - XZ/LZMA compression +- Built-in: Null (uncompressed) and Deflate + +#### Logical Types Support +Built-in support for Avro logical types: +- Decimal (using `num-bigint`) +- UUID (using `uuid` crate) +- Date, Time, Timestamp variants +- Duration with months/days/millis + +## Development Considerations + +### Feature Flag Strategy +The library uses feature flags for optional functionality: +- Use `--all-features` for comprehensive testing +- Individual codec features for minimal builds +- `derive` feature for procedural macro functionality + +### Error Handling Pattern +- Custom `Error` enum with detailed context (replaced `failure` crate in v0.11) +- Extensive use of `thiserror` for error derivation +- Schema validation errors provide precise location information + +### Memory Safety and Security +- Built-in allocation limits (default 512MB) to prevent malicious data attacks +- Use `max_allocation_bytes()` before processing if expecting large data +- Robust schema validation prevents many attack vectors + +### Compatibility and Migration +- Check `migration_guide.md` for breaking changes between versions +- MSRV (Minimum Supported Rust Version): 1.85.0 +- Schema compatibility checking available via `SchemaCompatibility::can_read()` + +### Testing Strategy +- Comprehensive unit tests for all modules +- Integration tests with real Avro files +- Interoperability testing with Apache Avro implementations +- Fuzzing tests (separate `fuzz/` directory) +- Property-based testing patterns + +### Code Organization Patterns +- Single responsibility modules (encoding, decoding, schema, etc.) +- Trait-based design for extensibility +- Builder patterns for complex configurations +- Iterator-based APIs for memory efficiency + +## Customization Points + +### Schema Validation +Implement `SchemaNameValidator` trait for custom naming rules: +```rust +set_schema_name_validator(Box::new(MyCustomValidator)); +``` + +### Schema Equality +Implement `SchemataEq` trait for custom schema comparison: +- Default: `StructFieldEq` (fast structural comparison) +- Alternative: `SpecificationEq` (canonical JSON comparison) + +### Fingerprinting +Built-in support for schema fingerprinting: +- SHA-256, MD5, Rabin fingerprints +- Used for schema registry integration + +## Apache Foundation Requirements + +### Licensing +- All files must include Apache 2.0 license headers +- Use provided license header templates +- `deny.toml` enforces allowed license dependencies + +### Release Process +- Follow Apache release guidelines in `RELEASE.md` +- Version bumps must be coordinated across workspace +- Update `CHANGELOG.md` for all changes + +### Contribution Requirements +- All contributions licensed under Apache 2.0 +- Pre-commit hooks enforce formatting and linting +- Consider backward compatibility impact +- Update migration guide for breaking changes + +## Performance Considerations + +### Benchmarking +- Benchmarks available in `benches/` directory +- Compare against serde_json for baseline performance +- Memory allocation patterns matter for large datasets + +### Memory Efficiency +- Streaming APIs for large data processing +- Batch processing capabilities in Reader/Writer +- Allocation limit controls for security + +### Feature Selection +- Minimal feature sets for smaller binaries +- Codec features only when compression needed +- WebAssembly compatibility considerations \ No newline at end of file diff --git a/avro/src/decode.rs b/avro/src/decode.rs index 9e389d47..dfa4bd3b 100644 --- a/avro/src/decode.rs +++ b/avro/src/decode.rs @@ -394,7 +394,7 @@ mod tests { #[test] fn test_decode_map_without_size() -> TestResult { let mut input: &[u8] = &[0x02, 0x08, 0x74, 0x65, 0x73, 0x74, 0x02, 0x00]; - let result = decode(&Schema::map(Schema::Int), &mut input); + let result = decode(&Schema::map(Schema::Int).build(), &mut input); let mut expected = HashMap::new(); expected.insert(String::from("test"), Int(1)); assert_eq!(Map(expected), result?); @@ -405,7 +405,7 @@ mod tests { #[test] fn test_decode_map_with_size() -> TestResult { let mut input: &[u8] = &[0x01, 0x0C, 0x08, 0x74, 0x65, 0x73, 0x74, 0x02, 0x00]; - let result = decode(&Schema::map(Schema::Int), &mut input); + let result = decode(&Schema::map(Schema::Int).build(), &mut input); let mut expected = HashMap::new(); expected.insert(String::from("test"), Int(1)); assert_eq!(Map(expected), result?); diff --git a/avro/src/encode.rs b/avro/src/encode.rs index 32ec10b6..610e1b8a 100644 --- a/avro/src/encode.rs +++ b/avro/src/encode.rs @@ -408,10 +408,13 @@ pub(crate) mod tests { let empty: HashMap = HashMap::new(); encode( &Value::Map(empty.clone()), - &Schema::map(Schema::Int), + &Schema::map(Schema::Int).build(), &mut buf, ) - .expect(&success(&Value::Map(empty), &Schema::map(Schema::Int))); + .expect(&success( + &Value::Map(empty), + &Schema::map(Schema::Int).build(), + )); assert_eq!(vec![0u8], buf); } diff --git a/avro/src/reader.rs b/avro/src/reader.rs index 8af522c9..9716d73c 100644 --- a/avro/src/reader.rs +++ b/avro/src/reader.rs @@ -88,7 +88,7 @@ impl<'r, R: Read> Block<'r, R> { return Err(Details::HeaderMagic.into()); } - let meta_schema = Schema::map(Schema::Bytes); + let meta_schema = Schema::map(Schema::Bytes).build(); match decode(&meta_schema, &mut self.reader)? { Value::Map(metadata) => { self.read_writer_schema(&metadata)?; diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index 280a6e8f..2fec6559 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -647,17 +647,13 @@ impl Schema { } } - /// Returns a `Schema::Map` with the given types. - pub fn map(types: Schema) -> Self { - Schema::Map(MapSchema { - types: Box::new(types), - default: None, - attributes: Default::default(), - }) - } - - /// Returns a `Schema::Map` with the given types and custom attributes. - pub fn map_with_attributes(types: Schema, attributes: BTreeMap) -> Self { + /// Returns a `Schema::Map` with the given types and optional custom attributes. + #[builder(finish_fn = build)] + pub fn map( + #[builder(start_fn)] types: Schema, + attributes: Option>, + ) -> Self { + let attributes = attributes.unwrap_or_default(); Schema::Map(MapSchema { types: Box::new(types), default: None, @@ -1159,7 +1155,7 @@ mod tests { #[test] fn test_map_schema() -> TestResult { let schema = Schema::parse_str(r#"{"type": "map", "values": "double"}"#)?; - assert_eq!(Schema::map(Schema::Double), schema); + assert_eq!(Schema::map(Schema::Double).build(), schema); Ok(()) } @@ -4703,10 +4699,9 @@ mod tests { #[test] fn test_avro_3927_serialize_map_with_custom_attributes() -> TestResult { - let expected = Schema::map_with_attributes( - Schema::Long, - BTreeMap::from([("field-id".to_string(), "1".into())]), - ); + let expected = Schema::map(Schema::Long) + .attributes(BTreeMap::from([("field-id".to_string(), "1".into())])) + .build(); let value = serde_json::to_value(&expected)?; let serialized = serde_json::to_string(&value)?; diff --git a/avro/src/schema_equality.rs b/avro/src/schema_equality.rs index 37df9ed9..f0b8ce5f 100644 --- a/avro/src/schema_equality.rs +++ b/avro/src/schema_equality.rs @@ -424,14 +424,18 @@ mod tests { #[test] fn test_avro_3939_compare_schemata_not_including_attributes() { - let schema_one = Schema::map_with_attributes( - Schema::Boolean, - BTreeMap::from_iter([("key1".to_string(), Value::Bool(true))]), - ); - let schema_two = Schema::map_with_attributes( - Schema::Boolean, - BTreeMap::from_iter([("key2".to_string(), Value::Bool(true))]), - ); + let schema_one = Schema::map(Schema::Boolean) + .attributes(BTreeMap::from_iter([( + "key1".to_string(), + Value::Bool(true), + )])) + .build(); + let schema_two = Schema::map(Schema::Boolean) + .attributes(BTreeMap::from_iter([( + "key2".to_string(), + Value::Bool(true), + )])) + .build(); // STRUCT_FIELD_EQ does not include attributes ! assert!(STRUCT_FIELD_EQ.compare(&schema_one, &schema_two)); } @@ -441,24 +445,28 @@ mod tests { let struct_field_eq = StructFieldEq { include_attributes: true, }; - let schema_one = Schema::map_with_attributes( - Schema::Boolean, - BTreeMap::from_iter([("key1".to_string(), Value::Bool(true))]), - ); - let schema_two = Schema::map_with_attributes( - Schema::Boolean, - BTreeMap::from_iter([("key2".to_string(), Value::Bool(true))]), - ); + let schema_one = Schema::map(Schema::Boolean) + .attributes(BTreeMap::from_iter([( + "key1".to_string(), + Value::Bool(true), + )])) + .build(); + let schema_two = Schema::map(Schema::Boolean) + .attributes(BTreeMap::from_iter([( + "key2".to_string(), + Value::Bool(true), + )])) + .build(); assert!(!struct_field_eq.compare(&schema_one, &schema_two)); } #[test] fn test_avro_3939_compare_map_schemata() { - let schema_one = Schema::map(Schema::Boolean); + let schema_one = Schema::map(Schema::Boolean).build(); assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - let schema_two = Schema::map(Schema::Boolean); + let schema_two = Schema::map(Schema::Boolean).build(); let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs index fb2a6095..8a6b4324 100644 --- a/avro/src/serde/derive.rs +++ b/avro/src/serde/derive.rs @@ -572,7 +572,7 @@ where named_schemas: &mut HashSet, enclosing_namespace: &Namespace, ) -> Schema { - Schema::map(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)) + Schema::map(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build() } fn get_record_fields_in_ctxt( diff --git a/avro/src/writer.rs b/avro/src/writer.rs index 61c02de6..29fd7cc1 100644 --- a/avro/src/writer.rs +++ b/avro/src/writer.rs @@ -524,7 +524,11 @@ impl<'a, W: Write> Writer<'a, W> { let mut header = Vec::new(); header.extend_from_slice(AVRO_OBJECT_HEADER); - encode(&metadata.into(), &Schema::map(Schema::Bytes), &mut header)?; + encode( + &metadata.into(), + &Schema::map(Schema::Bytes).build(), + &mut header, + )?; header.extend_from_slice(&self.marker); Ok(header) From 7554dce56cf718682fd2dbf0a0074461bb5c1d5d Mon Sep 17 00:00:00 2001 From: Martin Tzvetanov Grigorov Date: Thu, 19 Feb 2026 16:04:09 +0200 Subject: [PATCH 5/5] Use Bon function style builder for Reader constructors --- WARP.md | 237 ---------------------- avro/benches/serde.rs | 2 +- avro/examples/benchmark.rs | 4 +- avro/src/documentation/dynamic.rs | 4 +- avro/src/lib.rs | 7 +- avro/src/reader.rs | 70 +++---- avro/src/schema_compatibility.rs | 4 +- avro/src/serde/ser_schema.rs | 2 +- avro/src/writer.rs | 2 +- avro/tests/schema.rs | 34 ++-- avro/tests/shared.rs | 12 +- avro/tests/to_from_avro_datum_schemata.rs | 5 +- avro/tests/union_schema.rs | 5 +- avro_derive/tests/derive.rs | 5 +- avro_derive/tests/serde.rs | 2 +- 15 files changed, 82 insertions(+), 313 deletions(-) delete mode 100644 WARP.md diff --git a/WARP.md b/WARP.md deleted file mode 100644 index b6261000..00000000 --- a/WARP.md +++ /dev/null @@ -1,237 +0,0 @@ -# WARP.md - -This file provides guidance to WARP (warp.dev) when working with code in this repository. - -## Project Overview - -This is the Apache Avro Rust SDK, a comprehensive library for working with Apache Avro data serialization format. It's structured as a Cargo workspace with multiple interdependent crates that provide different aspects of Avro functionality. - -## Workspace Structure - -This is a multi-crate workspace with the following components: - -### Core Crates -- **`avro/`** - Main library (`apache-avro` crate) with full Avro functionality -- **`avro_derive/`** - Proc-macro crate for automatic schema derivation (`apache-avro-derive`) -- **`avro_test_helper/`** - Testing utilities shared across the workspace - -### Additional Components -- **`wasm-demo/`** - WebAssembly demonstration application -- **`fuzz/`** - Fuzzing tests (excluded from workspace) - -## Common Development Commands - -### Building and Testing - -```bash -# Build entire workspace -cargo build --all-features - -# Build release version -cargo build --all-features --release - -# Run all tests (includes doc tests and pre-commit hooks) -make test - -# Run tests without pre-commit setup -cargo test --all-features --all-targets -cargo test --doc - -# Run specific workspace member -cargo test -p apache-avro -cargo test -p apache-avro-derive - -# Build with specific codec features -cargo build --features snappy,zstandard,bzip,xz -``` - -### Quality and Linting - -```bash -# Format all code -make lint -# or directly: -cargo fmt - -# Run clippy with strict settings -make clippy -# or directly: -cargo clippy --all-features --all-targets -- -Dclippy::all -Dunused_imports - -# Check without building -cargo check --all-features - -# Security audit -cargo audit -``` - -### Documentation and Development Tools - -```bash -# Generate and open local documentation -make doc-local - -# Generate documentation without opening -make doc - -# Update README files from doc comments -make readme - -# Run benchmarks -make benchmark -``` - -### Apache Avro Specific Commands - -```bash -# Generate interoperability test data -./build.sh interop-data-generate - -# Test interoperability with other Avro implementations -./build.sh interop-data-test - -# Distribution build -./build.sh dist -``` - -### Pre-commit Setup - -```bash -# Install pre-commit hooks (includes Python venv setup) -make install-hooks - -# Clean pre-commit setup -make clean-hooks - -# Manual pre-commit run -.venv/bin/pre-commit run --all-files -``` - -## Architecture Overview - -### Multi-Layer Design - -The codebase follows a layered architecture: - -1. **Schema Layer** (`schema.rs`) - Avro schema parsing, validation, and representation -2. **Type System** (`types.rs`) - Avro value types and native Rust type mappings -3. **Encoding/Decoding** (`encode.rs`, `decode.rs`) - Low-level binary format handling -4. **Reader/Writer API** (`reader.rs`, `writer.rs`) - High-level streaming interfaces -5. **Serde Integration** (`ser.rs`, `de.rs`) - Rust serde framework integration - -### Key Components - -#### Schema Management -- JSON schema parsing with dependency resolution -- Programmatic schema construction -- Schema compatibility checking (`schema_compatibility.rs`) -- Custom validation and naming rules (`validator.rs`) - -#### Data Processing Approaches -1. **Native Avro Types**: Using `Value`, `Record` types with schema validation -2. **Serde Integration**: Direct serialization/deserialization of Rust structs -3. **Derive Macros**: Automatic schema generation from Rust types - -#### Codec Support -Configurable compression codecs via feature flags: -- `snappy` - Google Snappy compression -- `zstandard` - Facebook Zstandard compression -- `bzip` - BZip2 compression -- `xz` - XZ/LZMA compression -- Built-in: Null (uncompressed) and Deflate - -#### Logical Types Support -Built-in support for Avro logical types: -- Decimal (using `num-bigint`) -- UUID (using `uuid` crate) -- Date, Time, Timestamp variants -- Duration with months/days/millis - -## Development Considerations - -### Feature Flag Strategy -The library uses feature flags for optional functionality: -- Use `--all-features` for comprehensive testing -- Individual codec features for minimal builds -- `derive` feature for procedural macro functionality - -### Error Handling Pattern -- Custom `Error` enum with detailed context (replaced `failure` crate in v0.11) -- Extensive use of `thiserror` for error derivation -- Schema validation errors provide precise location information - -### Memory Safety and Security -- Built-in allocation limits (default 512MB) to prevent malicious data attacks -- Use `max_allocation_bytes()` before processing if expecting large data -- Robust schema validation prevents many attack vectors - -### Compatibility and Migration -- Check `migration_guide.md` for breaking changes between versions -- MSRV (Minimum Supported Rust Version): 1.85.0 -- Schema compatibility checking available via `SchemaCompatibility::can_read()` - -### Testing Strategy -- Comprehensive unit tests for all modules -- Integration tests with real Avro files -- Interoperability testing with Apache Avro implementations -- Fuzzing tests (separate `fuzz/` directory) -- Property-based testing patterns - -### Code Organization Patterns -- Single responsibility modules (encoding, decoding, schema, etc.) -- Trait-based design for extensibility -- Builder patterns for complex configurations -- Iterator-based APIs for memory efficiency - -## Customization Points - -### Schema Validation -Implement `SchemaNameValidator` trait for custom naming rules: -```rust -set_schema_name_validator(Box::new(MyCustomValidator)); -``` - -### Schema Equality -Implement `SchemataEq` trait for custom schema comparison: -- Default: `StructFieldEq` (fast structural comparison) -- Alternative: `SpecificationEq` (canonical JSON comparison) - -### Fingerprinting -Built-in support for schema fingerprinting: -- SHA-256, MD5, Rabin fingerprints -- Used for schema registry integration - -## Apache Foundation Requirements - -### Licensing -- All files must include Apache 2.0 license headers -- Use provided license header templates -- `deny.toml` enforces allowed license dependencies - -### Release Process -- Follow Apache release guidelines in `RELEASE.md` -- Version bumps must be coordinated across workspace -- Update `CHANGELOG.md` for all changes - -### Contribution Requirements -- All contributions licensed under Apache 2.0 -- Pre-commit hooks enforce formatting and linting -- Consider backward compatibility impact -- Update migration guide for breaking changes - -## Performance Considerations - -### Benchmarking -- Benchmarks available in `benches/` directory -- Compare against serde_json for baseline performance -- Memory allocation patterns matter for large datasets - -### Memory Efficiency -- Streaming APIs for large data processing -- Batch processing capabilities in Reader/Writer -- Allocation limit controls for security - -### Feature Selection -- Minimal feature sets for smaller binaries -- Codec features only when compression needed -- WebAssembly compatibility considerations \ No newline at end of file diff --git a/avro/benches/serde.rs b/avro/benches/serde.rs index fad858b2..37e0985e 100644 --- a/avro/benches/serde.rs +++ b/avro/benches/serde.rs @@ -251,7 +251,7 @@ fn write_ser(schema: &Schema, records: &[T]) -> AvroResult } fn read(schema: &Schema, bytes: &[u8]) -> Result<(), Box> { - let reader = Reader::with_schema(schema, bytes)?; + let reader = Reader::builder(bytes).schema(schema).build()?; for record in reader { let _ = record?; diff --git a/avro/examples/benchmark.rs b/avro/examples/benchmark.rs index 7d5922bf..3200f90c 100644 --- a/avro/examples/benchmark.rs +++ b/avro/examples/benchmark.rs @@ -78,7 +78,9 @@ fn benchmark( for _ in 0..runs { let start = Instant::now(); - let reader = Reader::with_schema(schema, BufReader::new(&bytes[..]))?; + let reader = Reader::builder(BufReader::new(&bytes[..])) + .schema(schema) + .build()?; let mut read_records = Vec::with_capacity(count); for record in reader { diff --git a/avro/src/documentation/dynamic.rs b/avro/src/documentation/dynamic.rs index 4a426fb5..4c3f9992 100644 --- a/avro/src/documentation/dynamic.rs +++ b/avro/src/documentation/dynamic.rs @@ -208,7 +208,7 @@ //! let reader_schema = Schema::parse_str(reader_raw_schema).unwrap(); //! //! // reader creation can fail in case the input to read from is not Avro-compatible or malformed -//! let reader = Reader::with_schema(&reader_schema, &input[..]).unwrap(); +//! let reader = Reader::builder(&input[..]).schema(&reader_schema).build().unwrap(); //! //! // value is a Result of an Avro Value in case the read operation fails //! for value in reader { @@ -268,7 +268,7 @@ //! writer.append_ser(test)?; //! //! let input = writer.into_inner()?; -//! let reader = Reader::with_schema(&schema, &input[..])?; +//! let reader = Reader::builder(&input[..]).schema(&schema).build()?; //! //! for record in reader { //! println!("{:?}", from_value::(&record?)); diff --git a/avro/src/lib.rs b/avro/src/lib.rs index d166c51f..9c9cef61 100644 --- a/avro/src/lib.rs +++ b/avro/src/lib.rs @@ -193,7 +193,10 @@ mod tests { record.put("b", "foo"); writer.append_value(record).unwrap(); let input = writer.into_inner().unwrap(); - let mut reader = Reader::with_schema(&reader_schema, &input[..]).unwrap(); + let mut reader = Reader::builder(&input[..]) + .schema(&reader_schema) + .build() + .unwrap(); assert_eq!( reader.next().unwrap().unwrap(), Value::Record(vec![ @@ -235,7 +238,7 @@ mod tests { record.put("c", "clubs"); writer.append_value(record).unwrap(); let input = writer.into_inner().unwrap(); - let mut reader = Reader::with_schema(&schema, &input[..]).unwrap(); + let mut reader = Reader::builder(&input[..]).schema(&schema).build().unwrap(); assert_eq!( reader.next().unwrap().unwrap(), Value::Record(vec![ diff --git a/avro/src/reader.rs b/avro/src/reader.rs index 9716d73c..79ebc3bb 100644 --- a/avro/src/reader.rs +++ b/avro/src/reader.rs @@ -30,6 +30,7 @@ use crate::{ types::Value, util, }; +use bon::bon; use log::warn; use serde::de::DeserializeOwned; use serde_json::from_slice; @@ -337,57 +338,39 @@ pub struct Reader<'a, R> { should_resolve_schema: bool, } +#[bon] impl<'a, R: Read> Reader<'a, R> { - /// Creates a `Reader` given something implementing the `io::Read` trait to read from. - /// No reader `Schema` will be set. - /// - /// **NOTE** The avro header is going to be read automatically upon creation of the `Reader`. pub fn new(reader: R) -> AvroResult> { - let block = Block::new(reader, vec![])?; - let reader = Reader { - block, - reader_schema: None, - errored: false, - should_resolve_schema: false, - }; - Ok(reader) - } - - /// Creates a `Reader` given a reader `Schema` and something implementing the `io::Read` trait - /// to read from. - /// - /// **NOTE** The avro header is going to be read automatically upon creation of the `Reader`. - pub fn with_schema(schema: &'a Schema, reader: R) -> AvroResult> { - let block = Block::new(reader, vec![schema])?; - let mut reader = Reader { - block, - reader_schema: Some(schema), - errored: false, - should_resolve_schema: false, - }; - // Check if the reader and writer schemas disagree. - reader.should_resolve_schema = reader.writer_schema() != schema; - Ok(reader) + Reader::builder(reader).build() } - - /// Creates a `Reader` given a reader `Schema` and something implementing the `io::Read` trait - /// to read from. + /// Creates a `Reader` given something implementing the `io::Read` trait to read from. + /// With an optional reader `Schema` and optional schemata to use for resolving schema + /// references. /// /// **NOTE** The avro header is going to be read automatically upon creation of the `Reader`. - pub fn with_schemata( - schema: &'a Schema, - schemata: Vec<&'a Schema>, - reader: R, + #[builder(finish_fn = build)] + pub fn builder( + #[builder(start_fn)] reader: R, + schema: Option<&'a Schema>, + schemata: Option>, ) -> AvroResult> { + let schemata = match schemata { + Some(schemata) => schemata, + None => match schema { + Some(schema) => vec![schema], + None => vec![], + }, + }; let block = Block::new(reader, schemata)?; let mut reader = Reader { block, - reader_schema: Some(schema), + reader_schema: schema, errored: false, should_resolve_schema: false, }; // Check if the reader and writer schemas disagree. - reader.should_resolve_schema = reader.writer_schema() != schema; + reader.should_resolve_schema = + schema.is_some_and(|reader_schema| reader.writer_schema() != reader_schema); Ok(reader) } @@ -744,7 +727,7 @@ mod tests { #[test] fn test_reader_iterator() -> TestResult { let schema = Schema::parse_str(SCHEMA)?; - let reader = Reader::with_schema(&schema, ENCODED)?; + let reader = Reader::builder(ENCODED).schema(&schema).build()?; let mut record1 = Record::new(&schema).unwrap(); record1.put("a", 27i64); @@ -767,7 +750,12 @@ mod tests { fn test_reader_invalid_header() -> TestResult { let schema = Schema::parse_str(SCHEMA)?; let mut invalid = &ENCODED[1..]; - assert!(Reader::with_schema(&schema, &mut invalid).is_err()); + assert!( + Reader::builder(&mut invalid) + .schema(&schema) + .build() + .is_err() + ); Ok(()) } @@ -776,7 +764,7 @@ mod tests { fn test_reader_invalid_block() -> TestResult { let schema = Schema::parse_str(SCHEMA)?; let mut invalid = &ENCODED[0..ENCODED.len() - 19]; - let reader = Reader::with_schema(&schema, &mut invalid)?; + let reader = Reader::builder(&mut invalid).schema(&schema).build()?; for value in reader { assert!(value.is_err()); } diff --git a/avro/src/schema_compatibility.rs b/avro/src/schema_compatibility.rs index 926b4e06..eb26828c 100644 --- a/avro/src/schema_compatibility.rs +++ b/avro/src/schema_compatibility.rs @@ -1333,7 +1333,7 @@ mod tests { record.put("c", "clubs"); writer.append_value(record).unwrap(); let input = writer.into_inner()?; - let mut reader = Reader::with_schema(&reader_schema, &input[..])?; + let mut reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; assert_eq!( reader.next().unwrap().unwrap(), Value::Record(vec![ @@ -1397,7 +1397,7 @@ mod tests { record.put("c", "hearts"); writer.append_value(record).unwrap(); let input = writer.into_inner()?; - let mut reader = Reader::with_schema(&reader_schema, &input[..])?; + let mut reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; assert_eq!( reader.next().unwrap().unwrap(), Value::Record(vec![ diff --git a/avro/src/serde/ser_schema.rs b/avro/src/serde/ser_schema.rs index fb222a01..af391a67 100644 --- a/avro/src/serde/ser_schema.rs +++ b/avro/src/serde/ser_schema.rs @@ -3270,7 +3270,7 @@ mod tests { e: 5, })?; let encoded = writer.into_inner()?; - let mut reader = Reader::with_schema(&schema, &encoded[..])?; + let mut reader = Reader::builder(&encoded[..]).schema(&schema).build()?; let decoded = from_value::(&reader.next().unwrap()?)?; assert_eq!( decoded, diff --git a/avro/src/writer.rs b/avro/src/writer.rs index 29fd7cc1..6150cfc3 100644 --- a/avro/src/writer.rs +++ b/avro/src/writer.rs @@ -1477,7 +1477,7 @@ mod tests { writer.add_user_metadata("a".to_string(), "b")?; let result = writer.into_inner()?; - let reader = Reader::with_schema(&schema, &result[..])?; + let reader = Reader::builder(&result[..]).schema(&schema).build()?; let mut expected = HashMap::new(); expected.insert("a".to_string(), vec![b'b']); assert_eq!(reader.user_metadata(), &expected); diff --git a/avro/tests/schema.rs b/avro/tests/schema.rs index 667f0564..ecc5c7f6 100644 --- a/avro/tests/schema.rs +++ b/avro/tests/schema.rs @@ -1058,7 +1058,7 @@ fn test_avro_3847_union_field_with_default_value_of_ref() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1124,7 +1124,7 @@ fn test_avro_3847_union_field_with_default_value_of_ref() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1184,7 +1184,7 @@ fn test_avro_3847_union_field_with_default_value_of_ref() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1263,7 +1263,7 @@ fn test_avro_3847_union_field_with_default_value_of_ref_with_namespace() -> Test "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1331,7 +1331,7 @@ fn test_avro_3847_union_field_with_default_value_of_ref_with_namespace() -> Test "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1393,7 +1393,7 @@ fn test_avro_3847_union_field_with_default_value_of_ref_with_namespace() -> Test "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1472,7 +1472,7 @@ fn test_avro_3847_union_field_with_default_value_of_ref_with_enclosing_namespace "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1540,7 +1540,7 @@ fn test_avro_3847_union_field_with_default_value_of_ref_with_enclosing_namespace "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1602,7 +1602,7 @@ fn test_avro_3847_union_field_with_default_value_of_ref_with_enclosing_namespace "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1666,7 +1666,7 @@ fn test_avro_3851_read_default_value_for_simple_record_field() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1713,7 +1713,7 @@ fn test_avro_3851_read_default_value_for_nested_record_field() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1756,7 +1756,7 @@ fn test_avro_3851_read_default_value_for_enum_record_field() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1796,7 +1796,7 @@ fn test_avro_3851_read_default_value_for_fixed_record_field() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1833,7 +1833,7 @@ fn test_avro_3851_read_default_value_for_array_record_field() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1873,7 +1873,7 @@ fn test_avro_3851_read_default_value_for_map_record_field() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1951,7 +1951,7 @@ fn test_avro_3851_read_default_value_for_ref_record_field() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); @@ -1997,7 +1997,7 @@ fn test_avro_3851_read_default_value_for_enum() -> TestResult { "#; let reader_schema = Schema::parse_str(reader_schema_str)?; let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; + let reader = Reader::builder(&input[..]).schema(&reader_schema).build()?; let result = reader.collect::, _>>()?; assert_eq!(1, result.len()); diff --git a/avro/tests/shared.rs b/avro/tests/shared.rs index c0a51b64..00bcb611 100644 --- a/avro/tests/shared.rs +++ b/avro/tests/shared.rs @@ -111,8 +111,10 @@ fn test_folder(folder: &Path) -> Result<(), ErrorsDesc> { )); } else { let file: File = File::open(data_path).expect("Can't open data.avro"); - let reader = - Reader::with_schema(&schema, BufReader::new(&file)).expect("Can't read data.avro"); + let reader = Reader::builder(BufReader::new(&file)) + .schema(&schema) + .build() + .expect("Can't read data.avro"); let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null).unwrap(); @@ -128,8 +130,10 @@ fn test_folder(folder: &Path) -> Result<(), ErrorsDesc> { writer.flush().expect("Error on flush"); let bytes: Vec = writer.into_inner().unwrap(); - let reader_bis = - Reader::with_schema(&schema, &bytes[..]).expect("Can't read flushed vector"); + let reader_bis = Reader::builder(&bytes[..]) + .schema(&schema) + .build() + .expect("Can't read flushed vector"); let mut records_iter: Iter = records.iter(); for r2 in reader_bis { diff --git a/avro/tests/to_from_avro_datum_schemata.rs b/avro/tests/to_from_avro_datum_schemata.rs index 8617c56b..e2f4dac3 100644 --- a/avro/tests/to_from_avro_datum_schemata.rs +++ b/avro/tests/to_from_avro_datum_schemata.rs @@ -112,7 +112,10 @@ fn test_avro_3683_multiple_schemata_writer_reader() -> TestResult { writer.flush()?; drop(writer); //drop the writer so that `output` is no more referenced mutably - let reader = Reader::with_schemata(schema_b, schemata, output.as_slice())?; + let reader = Reader::builder(output.as_slice()) + .schema(schema_b) + .schemata(schemata) + .build()?; let value = reader.into_iter().next().unwrap()?; assert_eq!(value, record); diff --git a/avro/tests/union_schema.rs b/avro/tests/union_schema.rs index 6e48e479..e368d746 100644 --- a/avro/tests/union_schema.rs +++ b/avro/tests/union_schema.rs @@ -76,7 +76,10 @@ where writer.flush()?; drop(writer); //drop the writer so that `encoded` is no more referenced mutably - let mut reader = Reader::with_schemata(schema, schemata.iter().collect(), encoded.as_slice())?; + let mut reader = Reader::builder(encoded.as_slice()) + .schema(schema) + .schemata(schemata.iter().collect()) + .build()?; from_value::(&reader.next().expect("")?) } diff --git a/avro_derive/tests/derive.rs b/avro_derive/tests/derive.rs index 631e8250..4be02c92 100644 --- a/avro_derive/tests/derive.rs +++ b/avro_derive/tests/derive.rs @@ -62,7 +62,10 @@ where { assert!(!encoded.is_empty()); let schema = T::get_schema(); - let mut reader = Reader::with_schema(&schema, &encoded[..]).unwrap(); + let mut reader = Reader::builder(&encoded[..]) + .schema(&schema) + .build() + .unwrap(); if let Some(res) = reader.next() { match res { Ok(value) => { diff --git a/avro_derive/tests/serde.rs b/avro_derive/tests/serde.rs index e14503de..f82971c3 100644 --- a/avro_derive/tests/serde.rs +++ b/avro_derive/tests/serde.rs @@ -63,7 +63,7 @@ where { assert!(!encoded.is_empty()); let schema = T::get_schema(); - let mut reader = Reader::with_schema(&schema, &encoded[..])?; + let mut reader = Reader::builder(&encoded[..]).schema(&schema).build()?; if let Some(res) = reader.next() { return res.and_then(|v| from_value::(&v)); }