Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 92 additions & 5 deletions avro/src/serde/derive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ use std::collections::{HashMap, HashSet};
///
/// Set the `doc` attribute of the schema. Defaults to the documentation of the type.
///
/// - `#[avro(default = r#"{"field": 42, "other": "Spam"}"#)]`
///
/// Provide the default value for this type when it is used in a field.
///
/// - `#[avro(alias = "name")]`
///
/// Set the `alias` attribute of the schema. Can be specified multiple times.
Expand Down Expand Up @@ -113,11 +117,22 @@ use std::collections::{HashMap, HashSet};
///
/// Set the `doc` attribute of the field. Defaults to the documentation of the field.
///
/// - `#[avro(default = "null")]`
/// - `#[avro(default = ..)]`
///
/// Control the `default` attribute of the field. When not used, it will use [`AvroSchemaComponent::field_default`]
/// to get the default value for a type. To remove the `default` attribute for a field, set `default` to `false`: `#[avro(default = false)]`.
///
/// Set the `default` attribute of the field.
/// To override or set a default value, provide a JSON string:
///
/// _Note:_ This is a JSON value not a Rust value, as this is put in the schema itself.
/// - Null: `#[avro(default = "null")]`
/// - Boolean: `#[avro(default = "true")]`.
/// - Number: `#[avro(default = "42")]` or `#[avro(default = "42.5")]`
/// - String: `#[avro(default = r#""String needs extra quotes""#)]`.
/// - Array: `#[avro(default = r#"["One", "Two", "Three"]"#)]`.
/// - Object: `#[avro(default = r#"{"One": 1}"#)]`.
///
/// See [the specification](https://avro.apache.org/docs/++version++/specification/#schema-record)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doc link contains ++version++, which looks like a placeholder and likely won’t resolve as-is; consider linking to a concrete Avro spec version or a stable URL.

Severity: low

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:incorrect-but-reasonable; category:documentation; feedback: The Augment AI reviewer is not correct! ++version++ is a special placeholder for the next version that is not yet released. It is always valid.

/// for details on how to map a type to a JSON value.
///
/// - `#[serde(alias = "name")]`
///
Expand Down Expand Up @@ -220,6 +235,11 @@ pub trait AvroSchema {
/// fn get_record_fields_in_ctxt(_: usize, _: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> {
/// None // A Schema::Int is not a Schema::Record so there are no fields to return
/// }
///
/// fn field_default() -> Option<serde_json::Value> {
/// // Zero as default value. Can also be None if you don't want to provide a default value
/// Some(0u8.into())
/// }
///}
/// ```
///
Expand All @@ -242,6 +262,10 @@ pub trait AvroSchema {
/// fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> {
/// T::get_record_fields_in_ctxt(first_field_position, named_schemas, enclosing_namespace)
/// }
///
/// fn field_default() -> Option<serde_json::Value> {
/// T::field_default()
/// }
///}
/// ```
///
Expand All @@ -256,6 +280,7 @@ pub trait AvroSchema {
/// - Implement `get_record_fields_in_ctxt` as the default implementation has to be implemented
/// with backtracking and a lot of cloning.
/// - Even if your schema is not a record, still implement the function and just return `None`
/// - Implement `field_default()` if you want to use `#[serde(skip_serializing{,_if})]`.
///
/// ```
/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Namespace, RecordField, RecordSchema}};
Expand Down Expand Up @@ -305,6 +330,11 @@ pub trait AvroSchema {
/// .build(),
/// ])
/// }
///
/// fn field_default() -> Option<serde_json::Value> {
/// // This type does not provide a default value
/// None
/// }
///}
/// ```
pub trait AvroSchemaComponent {
Expand Down Expand Up @@ -332,6 +362,16 @@ pub trait AvroSchemaComponent {
Self::get_schema_in_ctxt,
)
}

/// The default value of this type when used for a record field.
///
/// `None` means no default value, which is also the default implementation.
///
/// Implementations of this trait provided by this crate return `None` except for `Option<T>`
/// which returns `Some(serde_json::Value::Null)`.
fn field_default() -> Option<serde_json::Value> {
None
}
}

/// Get the record fields from `schema_fn` without polluting `named_schemas` or causing duplicate names
Expand Down Expand Up @@ -487,6 +527,10 @@ macro_rules! impl_schema (
fn get_record_fields_in_ctxt(_: usize, _: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
None
}
}
);
);
Expand Down Expand Up @@ -515,6 +559,10 @@ macro_rules! impl_passthrough_schema (
fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> {
T::get_record_fields_in_ctxt(first_field_position, named_schemas, enclosing_namespace)
}

fn field_default() -> Option<serde_json::Value> {
T::field_default()
}
}
);
);
Expand All @@ -535,6 +583,10 @@ macro_rules! impl_array_schema (
fn get_record_fields_in_ctxt(_: usize, _: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
None
}
}
);
);
Expand Down Expand Up @@ -562,6 +614,11 @@ where
) -> Option<Vec<RecordField>> {
None
}

/// If `T` has a field default, this will return an array of elements with that default. Otherwise there is no default.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The documentation for field_default on [T; N] is misleading. It states that it will return an array of default elements if the inner type T has a default, but the implementation always returns None. This could cause confusion. The comment should be updated to accurately reflect that arrays do not have a default value by default.

Suggested change
/// If `T` has a field default, this will return an array of elements with that default. Otherwise there is no default.
/// Arrays do not have a default value by default.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:good-to-have; category:documentation; feedback: The Gemini AI reviewer is correct! The docstring is obsolete since now there is no default value by default for all types but std::option::Option. The method could be removed because it does the same as the default implementation in the AvroSchemaComponent trait.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The doc comment says the array implementation will derive a default from T’s field_default, but field_default() currently always returns None; could we align the comment with the actual behavior (or vice-versa)?

Severity: low

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:good-to-have; category:documentation; feedback: The Augment AI reviewer is correct! The docstring is obsolete since now there is no default value by default for all types but std::option::Option. The method could be removed because it does the same as the default implementation in the AvroSchemaComponent trait.

fn field_default() -> Option<serde_json::Value> {
None
}
Comment on lines +617 to +621
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Stale doc comment contradicts the implementation.

The doc comment says "If T has a field default, this will return an array of elements with that default" but the method unconditionally returns None. Either update the comment to reflect the current behavior, or implement the described logic.

Proposed fix (update doc to match implementation)
-    /// If `T` has a field default, this will return an array of elements with that default. Otherwise there is no default.
-    fn field_default() -> Option<serde_json::Value> {
-        None
-    }
+    fn field_default() -> Option<serde_json::Value> {
+        None
+    }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
/// If `T` has a field default, this will return an array of elements with that default. Otherwise there is no default.
fn field_default() -> Option<serde_json::Value> {
None
}
fn field_default() -> Option<serde_json::Value> {
None
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@avro/src/serde/derive.rs` around lines 617 - 621, The docstring for fn
field_default() currently claims it "will return an array of elements with that
default" for types with field defaults, but the implementation always returns
None; update the comment on field_default() to accurately reflect the current
behavior (e.g., "Returns None; no field defaults are provided by this derive
implementation") and remove the misleading mention of arrays/field defaults, or
alternatively implement the described logic inside field_default() if you intend
to support returning default arrays for T—refer to the field_default() function
and any surrounding derive logic when making the change.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:good-to-have; category:documentation; feedback: The CodeRabbit AI reviewer is correct! The docstring is obsolete since now there is no default value by default for all types but std::option::Option. The method could be removed because it does the same as the default implementation in the AvroSchemaComponent trait.

}

impl<T> AvroSchemaComponent for HashMap<String, T>
Expand All @@ -582,6 +639,10 @@ where
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
None
}
}

impl<T> AvroSchemaComponent for Option<T>
Expand Down Expand Up @@ -609,6 +670,10 @@ where
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
Some(serde_json::Value::Null)
}
}

impl AvroSchemaComponent for core::time::Duration {
Expand Down Expand Up @@ -644,6 +709,10 @@ impl AvroSchemaComponent for core::time::Duration {
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
None
}
}

impl AvroSchemaComponent for uuid::Uuid {
Expand Down Expand Up @@ -679,6 +748,10 @@ impl AvroSchemaComponent for uuid::Uuid {
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
None
}
}

impl AvroSchemaComponent for u64 {
Expand Down Expand Up @@ -712,6 +785,10 @@ impl AvroSchemaComponent for u64 {
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
None
}
}

impl AvroSchemaComponent for u128 {
Expand Down Expand Up @@ -745,6 +822,10 @@ impl AvroSchemaComponent for u128 {
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
None
}
}

impl AvroSchemaComponent for i128 {
Expand Down Expand Up @@ -778,12 +859,18 @@ impl AvroSchemaComponent for i128 {
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
None
}
}

#[cfg(test)]
mod tests {
use crate::schema::{FixedSchema, Name};
use crate::{AvroSchema, Schema};
use crate::{
AvroSchema, Schema,
schema::{FixedSchema, Name},
};
use apache_avro_test_helper::TestResult;

#[test]
Expand Down
7 changes: 7 additions & 0 deletions avro/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,13 @@ impl Value {
}
Value::Uuid(Uuid::from_slice(bytes).map_err(Details::ConvertSliceToUuid)?)
}
(Value::String(ref string), UuidSchema::Fixed(_)) => {
let bytes = string.as_bytes();
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For UuidSchema::Fixed, using string.as_bytes() interprets the JSON string as UTF-8 bytes; Avro fixed defaults are effectively “one Unicode code point per byte”, so any \u00XX values (>0x7F) will become multi-byte and either fail the length check or produce the wrong UUID.

Severity: high

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:annoying; category:bug; feedback: The Augment AI reviewer is not correct! By specification UUID v4 could contain only alphanumeric characters and the '-'. So, there are no multi-byte characters in it. If the String length is not 16 then the value is invalid.

if bytes.len() != 16 {
return Err(Details::ConvertFixedToUuid(bytes.len()).into());
}
Value::Uuid(Uuid::from_slice(bytes).map_err(Details::ConvertSliceToUuid)?)
}
(other, _) => return Err(Details::GetUuid(other).into()),
};
Ok(value)
Expand Down
6 changes: 5 additions & 1 deletion avro_derive/src/attributes/avro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
//! Although a user will mostly use the Serde attributes, there are some Avro specific attributes
//! a user can use. These add extra metadata to the generated schema.

use crate::attributes::FieldDefault;
use crate::case::RenameRule;
use darling::FromMeta;
use proc_macro2::Span;
Expand Down Expand Up @@ -53,6 +54,9 @@ pub struct ContainerAttributes {
/// [`serde::ContainerAttributes::rename_all`]: crate::attributes::serde::ContainerAttributes::rename_all
#[darling(default)]
pub rename_all: RenameRule,
/// Set the default value if this schema is used as a field
#[darling(default)]
pub default: Option<String>,
}

impl ContainerAttributes {
Expand Down Expand Up @@ -125,7 +129,7 @@ pub struct FieldAttributes {
///
/// This is also used as the default when `skip_serializing{_if}` is used.
#[darling(default)]
pub default: Option<String>,
pub default: FieldDefault,
/// Deprecated. Use [`serde::FieldAttributes::alias`] instead.
///
/// Adds the `aliases` field to the schema.
Expand Down
54 changes: 50 additions & 4 deletions avro_derive/src/attributes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

use crate::case::RenameRule;
use darling::{FromAttributes, FromMeta};
use proc_macro2::Span;
use proc_macro2::{Span, TokenStream};
use quote::quote;
use syn::{AttrStyle, Attribute, Expr, Ident, Path, spanned::Spanned};

mod avro;
Expand All @@ -30,6 +31,7 @@ pub struct NamedTypeOptions {
pub aliases: Vec<String>,
pub rename_all: RenameRule,
pub transparent: bool,
pub default: TokenStream,
}

impl NamedTypeOptions {
Expand Down Expand Up @@ -116,12 +118,29 @@ impl NamedTypeOptions {

let doc = avro.doc.or_else(|| extract_rustdoc(attributes));

let default = match avro.default {
None => quote! { None },
Some(default_value) => {
let _: serde_json::Value =
serde_json::from_str(&default_value[..]).map_err(|e| {
vec![syn::Error::new(
ident.span(),
format!("Invalid Avro `default` JSON: \n{e}"),
)]
})?;
quote! {
Some(serde_json::from_str(#default_value).expect(format!("Invalid JSON: {:?}", #default_value).as_str()))
}
}
};

Ok(Self {
name: full_schema_name,
doc,
aliases: avro.alias,
rename_all: serde.rename_all.serialize,
transparent: serde.transparent,
default,
})
}
}
Expand Down Expand Up @@ -210,11 +229,38 @@ impl With {
}
}
}
/// How to get the default value for a value.
#[derive(Debug, PartialEq, Default)]
pub enum FieldDefault {
/// Use `<T as AvroSchemaComponent>::field_default`.
#[default]
Trait,
/// Don't set a default.
Disabled,
/// Use this JSON value.
Value(String),
}

impl FromMeta for FieldDefault {
fn from_string(value: &str) -> darling::Result<Self> {
Ok(Self::Value(value.to_string()))
}

fn from_bool(value: bool) -> darling::Result<Self> {
if value {
Err(darling::Error::custom(
"Expected `false` or a JSON string, got `true`",
))
} else {
Ok(Self::Disabled)
}
}
}

#[derive(Default)]
pub struct FieldOptions {
pub doc: Option<String>,
pub default: Option<String>,
pub default: FieldDefault,
pub alias: Vec<String>,
pub rename: Option<String>,
pub skip: bool,
Expand Down Expand Up @@ -274,11 +320,11 @@ impl FieldOptions {
}
if ((serde.skip_serializing && !serde.skip_deserializing)
|| serde.skip_serializing_if.is_some())
&& avro.default.is_none()
&& avro.default == FieldDefault::Disabled
{
errors.push(syn::Error::new(
span,
r#"`#[serde(skip_serializing)]` and `#[serde(skip_serializing_if)]` require `#[avro(default = "..")]`"#
"`#[serde(skip_serializing)]` and `#[serde(skip_serializing_if)]` are incompatible with `#[avro(default = false)]`"
));
}
let with = match With::from_avro_and_serde(&avro.with, &serde.with, span) {
Expand Down
Loading