From 5bb5c4becd5c2040837271369f18faf9dcf1d9e1 Mon Sep 17 00:00:00 2001 From: Laurent Valdes Date: Sat, 11 Apr 2026 16:42:03 +0200 Subject: [PATCH] fix: accept JSON string defaults for decimal fields in union MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Avro 1.12.0 Specification, §"Complex Types / Records", the JSON encoding of a `bytes` field's default value is a string whose codepoints 0-255 map to byte values 0-255 (e.g. `"\u00FF"`). The same section specifies that a union-typed field's default must correspond to the first schema that matches in the union. `decimal` is defined as a logical type over `bytes`, so this rule transitively applies: a nullable decimal field expressed as `[{bytes, logicalType: decimal}, null]` with a JSON string default requires `resolve_decimal` to accept `Value::String` when validating defaults at schema parse time. Before this change the parser rejected such schemas with `GetDefaultUnion(Decimal, String)`, even though Java and Python Avro accept them. The added arm walks the string's codepoints, rejecting any above 0xFF, and returns a `Value::Decimal`. The precision check is skipped because the spec does not require a default's byte length to cover the declared precision — it only requires a valid `bytes` value. Wire-level decoded records always reach `resolve_decimal` as `Value::Bytes`, so this arm is exclusively a default-validation path. Tests cover `\u0000`, a full 0..=255 round-trip, codepoints > 0xFF being rejected, and end-to-end parsing of a nullable decimal record schema. --- avro/src/types.rs | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/avro/src/types.rs b/avro/src/types.rs index ad0258b5..2a6b1a0a 100644 --- a/avro/src/types.rs +++ b/avro/src/types.rs @@ -846,6 +846,20 @@ impl Value { Ok(Value::Decimal(Decimal::from(bytes))) } } + // JSON string defaults per spec §Records: codepoints 0-255 + // map to byte values 0-255. No precision check — defaults + // need only be valid `bytes`, not fit the declared precision. + Value::String(s) => { + let mut bytes = Vec::with_capacity(s.len()); + for c in s.chars() { + let cp = c as u32; + if cp > 0xFF { + return Err(Details::ResolveDecimal(Value::String(s)).into()); + } + bytes.push(cp as u8); + } + Ok(Value::Decimal(Decimal::from(bytes))) + } other => Err(Details::ResolveDecimal(other).into()), } } @@ -1776,6 +1790,69 @@ Field with name '"b"' is not a member of the map items"#, Ok(()) } + #[test] + fn resolve_decimal_from_string_default() -> TestResult { + let value = Value::String("\u{0000}".to_string()); + let resolved = value.resolve(&Schema::Decimal(DecimalSchema { + precision: 10, + scale: 4, + inner: InnerDecimalSchema::Bytes, + }))?; + assert_eq!(resolved, Value::Decimal(Decimal::from(vec![0u8]))); + + let mut all_bytes_str = String::new(); + for b in 0u8..=255u8 { + all_bytes_str.push(char::from_u32(b as u32).unwrap()); + } + let resolved = Value::String(all_bytes_str).resolve(&Schema::Decimal(DecimalSchema { + precision: 10, + scale: 0, + inner: InnerDecimalSchema::Bytes, + }))?; + assert_eq!( + resolved, + Value::Decimal(Decimal::from((0u8..=255u8).collect::>())) + ); + + let value = Value::String("\u{0100}".to_string()); + assert!( + value + .resolve(&Schema::Decimal(DecimalSchema { + precision: 10, + scale: 4, + inner: InnerDecimalSchema::Bytes, + })) + .is_err() + ); + + Ok(()) + } + + #[test] + fn parse_schema_with_nullable_decimal_string_default() -> TestResult { + let schema_json = r#"{ + "type": "record", + "name": "NullableDecimal", + "fields": [ + { + "name": "amount", + "type": [ + { + "type": "bytes", + "scale": 4, + "precision": 10, + "logicalType": "decimal" + }, + "null" + ], + "default": "\u0000" + } + ] + }"#; + Schema::parse_str(schema_json)?; + Ok(()) + } + #[test] fn resolve_decimal_invalid_scale() { let value = Value::Decimal(Decimal::from(vec![1, 2]));