Skip to content

Commit faeb5ba

Browse files
Kriskras99martin-g
authored andcommitted
fix: Review feedback
--------- Co-authored-by: Martin Grigorov <martin-g@users.noreply.github.com>
1 parent 3d03097 commit faeb5ba

File tree

19 files changed

+294
-91
lines changed

19 files changed

+294
-91
lines changed

avro/src/documentation/avro_data_model_to_serde.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
//! how the different data models are mapped. When mapping from Rust types to an Avro schema,
2222
//! see [the documentation for the reverse](super::serde_data_model_to_avro).
2323
//!
24-
//! Only the the mapping as defined here is supported. Any other behavior might change in a minor version.
24+
//! Only the mapping as defined here is supported. Any other behavior might change in a minor version.
2525
//!
2626
//! ## Primitive Types
2727
//! - `null`: `()`
@@ -38,11 +38,11 @@
3838
//! - `records`: A struct with the same name and fields or a tuple with the same fields.
3939
//! - Extra fields can be added to the struct if they are marked with `#[serde(skip)]`
4040
//! - `enums`: A enum with the same name and unit variants for every symbol
41-
//! - The index of the symbol most match the index of the variant
41+
//! - The index of the symbol must match the index of the variant
4242
//! - `arrays`: [`Vec`] (or any type that uses [`Serialize::serialize_seq`](serde::Serialize), [`Deserialize::deserialize_seq`](serde::Deserialize))
4343
//! - `[T; N]` is (de)serialized as a tuple by Serde, to (de)serialize them as an `array` use [`apache_avro::serde::array`]
4444
//! - `maps`: [`HashMap<String, _>`](std::collections::HashMap) (or any type that uses [`Serialize::serialize_map`](serde::Serialize), [`Deserialize::deserialize_map`](serde::Deserialize))
45-
//! - `unions`: A enum with a variant for each variant
45+
//! - `unions`: An enum with a variant for each union variant
4646
//! - The index of the union variant must match the enum variant
4747
//! - A `null` can be a unit variant or a newtype variant with a unit type
4848
//! - All other variants must be newtype variants, struct variants, or tuple variants.

avro/src/documentation/serde_data_model_to_avro.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
//!
2020
//! When manually mapping Rust types to an Avro schema, or the reverse, it is important to understand
2121
//! how the different data models are mapped. When mapping from an Avro schema to Rust types,
22-
//! see [the documentation for the reverse](super::serde_data_model_to_avro).
22+
//! see [the documentation for the reverse](super::avro_data_model_to_serde).
2323
//!
2424
//! Only the schemas generated by the [`AvroSchema`] derive and the mapping as defined here are
2525
//! supported. Any other behavior might change in a minor version.
@@ -47,7 +47,7 @@
4747
//! - **newtype variant** => See [Enums](##Enums)
4848
//! - **seq** => [`Schema::Array`]
4949
//! - **tuple**
50-
//! - For tuples with one element, the schema will be the schema the only element
50+
//! - For tuples with one element, the schema will be the schema of the only element
5151
//! - For tuples with more than one element, the schema will be a [`Schema::Record`] with as many fields as there are elements.
5252
//! The schema must have the attribute `org.apache.avro.rust.tuple` with the value set to `true`.
5353
//! - **Note:** Serde (de)serializes arrays (`[T; N]`) as tuples. To (de)serialize an array as a
@@ -91,7 +91,7 @@
9191
//! `content` tag and use the [`Schema::Union`] schema.
9292
//!
9393
//! ### Untagged
94-
//! This enum representation is ued by Serde if the attribute `#[serde(untagged)]` is used. It maps
94+
//! This enum representation is used by Serde if the attribute `#[serde(untagged)]` is used. It maps
9595
//! to a [`Schema::Union`] with the following schemas:
9696
//! - **unit_variant** => [`Schema::Null`].
9797
//! - **newtype_variant** => The schema of the inner type.

avro/src/error.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,9 @@ pub enum Details {
157157
#[error("Union index {index} out of bounds: {num_variants}")]
158158
GetUnionVariant { index: i64, num_variants: usize },
159159

160-
#[error("Enum symbol index out of bounds: {num_variants}")]
160+
#[error(
161+
"Enum symbol index out of bounds: got {index} but there are only {num_variants} variants"
162+
)]
161163
EnumSymbolIndex { index: usize, num_variants: usize },
162164

163165
#[error("Enum symbol not found {0}")]
@@ -553,7 +555,7 @@ pub enum Details {
553555
#[error("Decoded integer out of range for i32: {1}: {0}")]
554556
ZagI32(#[source] std::num::TryFromIntError, i64),
555557

556-
#[error("unable to read block")]
558+
#[error("Did not read any bytes, block is corrupt")]
557559
ReadBlock,
558560

559561
#[error("Failed to serialize value into Avro value: {0}")]
@@ -661,6 +663,14 @@ pub enum Details {
661663
"The implementation of `SchemaNameValidator` is incorrect! It returned an out-of-bounds index or provided a regex that did not capture a group named `name`"
662664
)]
663665
InvalidSchemaNameValidatorImplementation,
666+
667+
#[error(
668+
"Not all tuple fields were serialized, expected to serialize element at position {position} of a {total_elements}-tuple but `SerializeTuple::end()` was called"
669+
)]
670+
SerializeTupleMissingElements {
671+
position: usize,
672+
total_elements: usize,
673+
},
664674
}
665675

666676
#[derive(thiserror::Error, PartialEq)]

avro/src/reader/block.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,8 @@ impl<'r, R: Read> Block<'r, R> {
230230
let b_original = block_bytes.len();
231231

232232
let item = if read_schema.is_some() {
233-
todo!("Schema aware deserialisation does not resolve schemas yet");
233+
// TODO: Implement SchemaAwareResolvingDeserializer
234+
panic!("Schema aware deserialisation does not resolve schemas yet");
234235
} else {
235236
let config = Config {
236237
names: &self.names_refs,

avro/src/reader/datum.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ impl<'s> GenericDatumReader<'s> {
146146
// `reader` is `impl Read` instead of a generic on the function like T so it's easier to
147147
// specify the type wanted (`read_deser<String>` vs `read_deser<String, _>`)
148148
if let Some((_, _)) = &self.reader {
149-
todo!("Schema aware deserialisation does not resolve schemas yet");
149+
// TODO: Implement SchemaAwareResolvingDeserializer
150+
panic!("Schema aware deserialisation does not resolve schemas yet");
150151
} else {
151152
T::deserialize(SchemaAwareDeserializer::new(
152153
reader,

avro/src/reader/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ impl<'a, R: Read> Reader<'a, R> {
6969
#[builder(finish_fn = build)]
7070
pub fn builder(
7171
#[builder(start_fn)] reader: R,
72+
/// The schema the written data needs to be adapted to.
73+
///
74+
/// This is currently not compatible with [`ReaderDeser`] and will panic during reading if the
75+
/// writer schema is not the same as the reader schema.
7276
reader_schema: Option<&'a Schema>,
7377
schemata: Option<Vec<&'a Schema>>,
7478
#[builder(default = is_human_readable())] human_readable: bool,
@@ -127,6 +131,12 @@ impl<'a, R: Read> Reader<'a, R> {
127131
}
128132

129133
fn read_next_deser<T: DeserializeOwned>(&mut self) -> AvroResult<Option<T>> {
134+
// TODO: Implement SchemaAwareResolvingDeserializer
135+
assert!(
136+
!self.should_resolve_schema,
137+
"Schema aware deserialisation does not resolve schemas yet"
138+
);
139+
130140
self.block.read_next_deser(self.reader_schema)
131141
}
132142
}

avro/src/schema/mod.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -841,6 +841,7 @@ impl Schema {
841841
| Schema::Ref { name } => {
842842
let name: String = name
843843
.to_string()
844+
.replace('_', "__")
844845
.chars()
845846
.map(|c| if c.is_ascii_alphanumeric() { c } else { '_' })
846847
.collect();
@@ -5182,4 +5183,29 @@ mod tests {
51825183

51835184
Ok(())
51845185
}
5186+
5187+
#[test]
5188+
fn avro_rs_512_unique_normalized_name_must_be_unique() -> TestResult {
5189+
let one = Schema::Ref {
5190+
name: "a.b.c".parse()?,
5191+
};
5192+
let two = Schema::Ref {
5193+
name: "a_b_c".parse()?,
5194+
};
5195+
let three = Schema::Ref {
5196+
name: "a__b__c".parse()?,
5197+
};
5198+
let four = Schema::Ref {
5199+
name: "a._b._c".parse()?,
5200+
};
5201+
5202+
assert_ne!(one, two);
5203+
assert_ne!(one, three);
5204+
assert_ne!(one, four);
5205+
assert_ne!(two, three);
5206+
assert_ne!(two, four);
5207+
assert_ne!(three, four);
5208+
5209+
Ok(())
5210+
}
51855211
}

avro/src/serde/deser_schema/map.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ impl<'s, 'r, R: Read, S: Borrow<Schema>> MapDeserializer<'s, 'r, R, S> {
6060
let _bytes = zag_i64(reader)?;
6161
}
6262
if remaining == 0 {
63-
// If the block size is zero the array is finished
63+
// If the block size is zero the map is finished
6464
Ok(None)
6565
} else {
6666
Ok(Some(remaining.unsigned_abs()))

0 commit comments

Comments
 (0)