Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 96 additions & 7 deletions avro/src/serde/derive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ use crate::schema::{
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};

const FIXED_8_DEFAULT: &str = "\0\0\0\0\0\0\0\0";
const FIXED_12_DEFAULT: &str = "\0\0\0\0\0\0\0\0\0\0\0\0";
const FIXED_16_DEFAULT: &str = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";

/// Trait for types that serve as an Avro data model.
///
/// **Do not implement directly!** Either derive it or implement [`AvroSchemaComponent`] to get this trait
Expand Down Expand Up @@ -81,6 +85,10 @@ use std::collections::{HashMap, HashSet};
///
/// Set the `doc` attribute of the schema. Defaults to the documentation of the type.
///
/// - `#[avro(default = r#"{"field": 42, "other": "Spam"}"#)]`
///
/// Provide the default value for this type when it is used in a field.
///
/// - `#[avro(alias = "name")]`
///
/// Set the `alias` attribute of the schema. Can be specified multiple times.
Expand Down Expand Up @@ -113,11 +121,14 @@ use std::collections::{HashMap, HashSet};
///
/// Set the `doc` attribute of the field. Defaults to the documentation of the field.
///
/// - `#[avro(default = "null")]`
/// - `#[avro(default = "null")]` or `#[avro(default = false)]`
///
/// Set the `default` attribute of the field.
/// Control the `default` attribute of the field. When not used, it will use [`AvroSchemaComponent::field_default`]
/// to get the default value for a type. This default value can be overriden by providing a JSON string.
/// To remove the `default` attribute for a field, set `default` to `false`.
///
/// _Note:_ This is a JSON value not a Rust value, as this is put in the schema itself.
/// _Note:_ This is a JSON value not a Rust value, as this is put in the schema itself. To encode a JSON string
/// you need to use double quotes: `#[avro(default = r#""Some string value""#)]`.
///
/// - `#[serde(alias = "name")]`
///
Expand Down Expand Up @@ -220,6 +231,11 @@ pub trait AvroSchema {
/// fn get_record_fields_in_ctxt(_: usize, _: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> {
/// None // A Schema::Int is not a Schema::Record so there are no fields to return
/// }
///
/// fn field_default() -> Option<serde_json::Value> {
/// // Zero as default value. Can also be None if you don't want to provide a default value
/// Some(0u8.into())
/// }
///}
/// ```
///
Expand All @@ -242,6 +258,10 @@ pub trait AvroSchema {
/// fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> {
/// T::get_record_fields_in_ctxt(first_field_position, named_schemas, enclosing_namespace)
/// }
///
/// fn field_default() -> Option<serde_json::Value> {
/// T::field_default()
/// }
///}
/// ```
///
Expand All @@ -256,6 +276,7 @@ pub trait AvroSchema {
/// - Implement `get_record_fields_in_ctxt` as the default implementation has to be implemented
/// with backtracking and a lot of cloning.
/// - Even if your schema is not a record, still implement the function and just return `None`
/// - Implement `field_default()` if you want to use `#[serde(skip_serializing{,_if})]`.
///
/// ```
/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Namespace, RecordField, RecordSchema}};
Expand Down Expand Up @@ -305,6 +326,11 @@ pub trait AvroSchema {
/// .build(),
/// ])
/// }
///
/// fn field_default() -> Option<serde_json::Value> {
/// // This type does not provide a default value
/// None
/// }
///}
/// ```
pub trait AvroSchemaComponent {
Expand Down Expand Up @@ -332,6 +358,16 @@ pub trait AvroSchemaComponent {
Self::get_schema_in_ctxt,
)
}

/// The default value of this type when used for a record field.
///
/// `None` means no default value, which is also the default implementation.
///
/// Implementations of this trait provided by this crate use the [`Default::default`] value of
/// the type.
fn field_default() -> Option<serde_json::Value> {
None
}
}

/// Get the record fields from `schema_fn` without polluting `named_schemas` or causing duplicate names
Expand Down Expand Up @@ -479,6 +515,10 @@ where

macro_rules! impl_schema (
($type:ty, $variant_constructor:expr) => (
impl_schema!($type, $variant_constructor, <$type as Default>::default());
);

($type:ty, $variant_constructor:expr, $default_constructor:expr) => (
impl AvroSchemaComponent for $type {
fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema {
$variant_constructor
Expand All @@ -487,6 +527,10 @@ macro_rules! impl_schema (
fn get_record_fields_in_ctxt(_: usize, _: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
Some(serde_json::Value::from($default_constructor))
}
}
);
);
Expand All @@ -502,8 +546,8 @@ impl_schema!(u32, Schema::Long);
impl_schema!(f32, Schema::Float);
impl_schema!(f64, Schema::Double);
impl_schema!(String, Schema::String);
impl_schema!(str, Schema::String);
impl_schema!(char, Schema::String);
impl_schema!(str, Schema::String, String::default());
impl_schema!(char, Schema::String, String::from(char::default()));

macro_rules! impl_passthrough_schema (
($type:ty where T: AvroSchemaComponent + ?Sized $(+ $bound:tt)*) => (
Expand All @@ -515,6 +559,10 @@ macro_rules! impl_passthrough_schema (
fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> {
T::get_record_fields_in_ctxt(first_field_position, named_schemas, enclosing_namespace)
}

fn field_default() -> Option<serde_json::Value> {
T::field_default()
}
}
);
);
Expand All @@ -535,6 +583,10 @@ macro_rules! impl_array_schema (
fn get_record_fields_in_ctxt(_: usize, _: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
Some(serde_json::Value::Array(Vec::new()))
}
}
);
);
Expand Down Expand Up @@ -562,6 +614,13 @@ where
) -> Option<Vec<RecordField>> {
None
}

/// If `T` has a field default, this will return an array of with that default. Otherwise there is no default.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There is a small typo in the doc comment: "of with" should be "with".

Suggested change
/// If `T` has a field default, this will return an array of with that default. Otherwise there is no default.
/// If `T` has a field default, this will return an array with that default. Otherwise there is no default.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:valid-but-wont-fix; category:bug; feedback: The Gemini AI reviewer is not correct! The grammar would be better if "elements" is added instead of removing the "of": this will return an array of elements with that default

fn field_default() -> Option<serde_json::Value> {
T::field_default().map(|default| {
serde_json::Value::Array(std::array::from_fn::<_, N, _>(|_| default.clone()).to_vec())
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using vec![default; N] is a simpler and more idiomatic way to create a vector with N cloned elements than std::array::from_fn followed by to_vec().

Suggested change
serde_json::Value::Array(std::array::from_fn::<_, N, _>(|_| default.clone()).to_vec())
serde_json::Value::Array(vec![default; N])

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:good-to-have; category:bug; feedback: The Gemini AI reviewer is correct! Using the vec! initializer will be shorter and more succint.

})
}
}

impl<T> AvroSchemaComponent for HashMap<String, T>
Expand All @@ -582,6 +641,10 @@ where
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
Some(serde_json::Value::Object(serde_json::Map::new()))
}
}

impl<T> AvroSchemaComponent for Option<T>
Expand Down Expand Up @@ -609,6 +672,10 @@ where
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
Some(serde_json::Value::Null)
}
}

impl AvroSchemaComponent for core::time::Duration {
Expand Down Expand Up @@ -644,6 +711,10 @@ impl AvroSchemaComponent for core::time::Duration {
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
Some(serde_json::Value::String(FIXED_12_DEFAULT.to_string()))
}
}

impl AvroSchemaComponent for uuid::Uuid {
Expand Down Expand Up @@ -679,6 +750,10 @@ impl AvroSchemaComponent for uuid::Uuid {
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
Some(serde_json::Value::String(FIXED_16_DEFAULT.to_string()))
}
}

impl AvroSchemaComponent for u64 {
Expand Down Expand Up @@ -712,6 +787,10 @@ impl AvroSchemaComponent for u64 {
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
Some(serde_json::Value::String(FIXED_8_DEFAULT.to_string()))
}
}

impl AvroSchemaComponent for u128 {
Expand Down Expand Up @@ -745,6 +824,10 @@ impl AvroSchemaComponent for u128 {
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
Some(serde_json::Value::String(FIXED_16_DEFAULT.to_string()))
}
}

impl AvroSchemaComponent for i128 {
Expand Down Expand Up @@ -778,12 +861,18 @@ impl AvroSchemaComponent for i128 {
) -> Option<Vec<RecordField>> {
None
}

fn field_default() -> Option<serde_json::Value> {
Some(serde_json::Value::String(FIXED_16_DEFAULT.to_string()))
}
}

#[cfg(test)]
mod tests {
use crate::schema::{FixedSchema, Name};
use crate::{AvroSchema, Schema};
use crate::{
AvroSchema, Schema,
schema::{FixedSchema, Name},
};
use apache_avro_test_helper::TestResult;

#[test]
Expand Down
7 changes: 7 additions & 0 deletions avro/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,13 @@ impl Value {
}
Value::Uuid(Uuid::from_slice(bytes).map_err(Details::ConvertSliceToUuid)?)
}
(Value::String(ref string), UuidSchema::Fixed(_)) => {
let bytes = string.as_bytes();
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For UuidSchema::Fixed, using string.as_bytes() treats the JSON default as UTF-8 bytes; this can change both the length and the actual byte values for non-ASCII \u00xx escapes, so fixed/uuid defaults may resolve incorrectly. It may be worth ensuring this follows Avro’s “fixed/bytes default as JSON string of raw bytes” interpretation (1 character → 1 byte).

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

if bytes.len() != 16 {
return Err(Details::ConvertFixedToUuid(bytes.len()).into());
}
Value::Uuid(Uuid::from_slice(bytes).map_err(Details::ConvertSliceToUuid)?)
}
(other, _) => return Err(Details::GetUuid(other).into()),
};
Ok(value)
Expand Down
6 changes: 5 additions & 1 deletion avro_derive/src/attributes/avro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
//! Although a user will mostly use the Serde attributes, there are some Avro specific attributes
//! a user can use. These add extra metadata to the generated schema.

use crate::attributes::FieldDefault;
use crate::case::RenameRule;
use darling::FromMeta;
use proc_macro2::Span;
Expand Down Expand Up @@ -53,6 +54,9 @@ pub struct ContainerAttributes {
/// [`serde::ContainerAttributes::rename_all`]: crate::attributes::serde::ContainerAttributes::rename_all
#[darling(default)]
pub rename_all: RenameRule,
/// Set the default value if this schema is used as a field
#[darling(default)]
pub default: Option<String>,
}

impl ContainerAttributes {
Expand Down Expand Up @@ -125,7 +129,7 @@ pub struct FieldAttributes {
///
/// This is also used as the default when `skip_serializing{_if}` is used.
#[darling(default)]
pub default: Option<String>,
pub default: FieldDefault,
/// Deprecated. Use [`serde::FieldAttributes::alias`] instead.
///
/// Adds the `aliases` field to the schema.
Expand Down
54 changes: 50 additions & 4 deletions avro_derive/src/attributes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

use crate::case::RenameRule;
use darling::{FromAttributes, FromMeta};
use proc_macro2::Span;
use proc_macro2::{Span, TokenStream};
use quote::quote;
use syn::{AttrStyle, Attribute, Expr, Ident, Path, spanned::Spanned};

mod avro;
Expand All @@ -30,6 +31,7 @@ pub struct NamedTypeOptions {
pub aliases: Vec<String>,
pub rename_all: RenameRule,
pub transparent: bool,
pub default: TokenStream,
}

impl NamedTypeOptions {
Expand Down Expand Up @@ -116,12 +118,29 @@ impl NamedTypeOptions {

let doc = avro.doc.or_else(|| extract_rustdoc(attributes));

let default = match avro.default {
None => quote! { None },
Some(default_value) => {
let _: serde_json::Value =
serde_json::from_str(&default_value[..]).map_err(|e| {
vec![syn::Error::new(
ident.span(),
format!("Invalid avro default json: \n{e}"),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

"Avro" and "JSON" should be capitalized in the error message for consistency and correctness.

Suggested change
format!("Invalid avro default json: \n{e}"),
format!("Invalid Avro default JSON: \n{e}"),

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:good-to-have; category:documentation; feedback: The Gemini AI reviewer is correct! The names and abbreviations should be capitalized/upper-cased to emphasise their meaning.

)]
})?;
quote! {
Some(serde_json::from_str(#default_value).expect(format!("Invalid JSON: {:?}", #default_value).as_str()))
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Since the JSON string has already been validated at compile time (lines 124-130), using format! and as_str() inside expect is redundant. A simple static message is sufficient.

Suggested change
Some(serde_json::from_str(#default_value).expect(format!("Invalid JSON: {:?}", #default_value).as_str()))
Some(serde_json::from_str(#default_value).expect("Invalid JSON"))

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:good-to-have; category:documentation; feedback: The Gemini AI reviewer is correct! There is no need to parse the JSON twice. The successful result could be assigned and quoted instead. This way there won't be a need to expect() and provide a panic message.

}
}
};

Ok(Self {
name: full_schema_name,
doc,
aliases: avro.alias,
rename_all: serde.rename_all.serialize,
transparent: serde.transparent,
default,
})
}
}
Expand Down Expand Up @@ -210,11 +229,38 @@ impl With {
}
}
}
/// How to get the default value for a value.
#[derive(Debug, PartialEq, Default)]
pub enum FieldDefault {
/// Use `<T as AvroSchemaComponent>::field_default`.
#[default]
Trait,
/// Don't set a default.
Disabled,
/// Use this JSON value.
Value(String),
}

impl FromMeta for FieldDefault {
fn from_string(value: &str) -> darling::Result<Self> {
Ok(Self::Value(value.to_string()))
}

fn from_bool(value: bool) -> darling::Result<Self> {
if value {
Err(darling::Error::custom(
"Expected `false` or a JSON string, got `true`",
))
} else {
Ok(Self::Disabled)
}
}
}

#[derive(Default)]
pub struct FieldOptions {
pub doc: Option<String>,
pub default: Option<String>,
pub default: FieldDefault,
pub alias: Vec<String>,
pub rename: Option<String>,
pub skip: bool,
Expand Down Expand Up @@ -274,11 +320,11 @@ impl FieldOptions {
}
if ((serde.skip_serializing && !serde.skip_deserializing)
|| serde.skip_serializing_if.is_some())
&& avro.default.is_none()
&& avro.default == FieldDefault::Disabled
{
errors.push(syn::Error::new(
span,
"`#[serde(skip_serializing)]` and `#[serde(skip_serializing_if)]` require `#[avro(default = \"..\")]`"
"`#[serde(skip_serializing)]` and `#[serde(skip_serializing_if)]` are incompatible with `#[avro(default = false)]`"
));
}
let with = match With::from_avro_and_serde(&avro.with, &serde.with, span) {
Expand Down
Loading