Skip to content

Commit a4d8dd5

Browse files
roofdivermartin-gCopilot
authored
Commiting Unit Test with Failing Byte Array Deserialization (#293)
* Commiting Unit Test with Failing Byte Array Deserialization * updating readme with solution to byte array deserialization issues * adding recommended changes to example and unit tests * removing example avro file * Minor cleanup of the new bytes IT test Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> * Move the new documentation about serde byte arrays from README.md to lib.rs Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> * Derive PartialEq to ExampleByteArray Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> * Update README Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> * Fix grammar Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Fix grammar Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> Co-authored-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> Co-authored-by: Martin Grigorov <martin-g@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent b36f472 commit a4d8dd5

File tree

3 files changed

+278
-0
lines changed

3 files changed

+278
-0
lines changed

avro/README.md

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,89 @@ set_schemata_equality_comparator(Box::new(MyCustomSchemataEq));
746746
If the application parses schemas before setting a comparator, the default comparator will be
747747
registered and used!
748748

749+
### Deserializing Avro Byte Arrays
750+
751+
If using the Serde way to deserialize avro files, there are sometimes special derive statements
752+
that need to be applied in the case of byte arrays.
753+
754+
```rust
755+
use serde::{Deserialize, Serialize};
756+
757+
#[derive(Debug, Deserialize, Serialize)]
758+
struct SampleStruct {
759+
#[serde(with = "apache_avro::serde_avro_bytes")]
760+
non_optional_bytes: Vec<u8>,
761+
#[serde(with = "apache_avro::serde_avro_bytes_opt")]
762+
optional_bytes: Option<Vec<u8>>,
763+
#[serde(with = "apache_avro::serde_avro_fixed")]
764+
non_optional_fixed: [u8; 6],
765+
#[serde(with = "apache_avro::serde_avro_fixed_opt")]
766+
optional_fixed: Option<[u8; 6]>,
767+
}
768+
```
769+
770+
Here is a complete example of a serde round trip of a struct with a nullable byte array:
771+
772+
```rust
773+
use serde::{Deserialize, Serialize};
774+
775+
#[derive(Debug, Deserialize, PartialEq, Serialize)]
776+
struct ExampleByteArray {
777+
#[serde(with = "apache_avro::serde_avro_bytes_opt")]
778+
data_bytes: Option<Vec<u8>>,
779+
description: Option<String>,
780+
}
781+
782+
fn serde_byte_array() {
783+
let raw_schema = r#"
784+
{
785+
"type": "record",
786+
"name": "SimpleRecord",
787+
"fields": [
788+
{"name": "data_bytes", "type": ["null", "bytes"], "default": null},
789+
{"name": "description", "type": ["null", "string"], "default": null}
790+
]
791+
}"#;
792+
793+
let schema = apache_avro::Schema::parse_str(raw_schema).unwrap();
794+
795+
// Create vector of ExampleByteArray
796+
let records = vec![
797+
ExampleByteArray {
798+
data_bytes: Some(vec![1, 2, 3, 4, 5]),
799+
description: Some("First record".to_string()),
800+
},
801+
ExampleByteArray {
802+
data_bytes: None,
803+
description: Some("Second record".to_string()),
804+
},
805+
ExampleByteArray {
806+
data_bytes: Some(vec![10, 20, 30]),
807+
description: None,
808+
},
809+
];
810+
811+
// Serialize records to Avro binary format with the schema
812+
let mut writer = apache_avro::Writer::new(&schema, Vec::new());
813+
for record in &records {
814+
writer.append_ser(record).unwrap();
815+
}
816+
817+
let avro_data = writer.into_inner().unwrap();
818+
819+
820+
// Deserialize Avro binary data back into ExampleByteArray structs
821+
let reader = apache_avro::Reader::new(&avro_data[..]).unwrap();
822+
let deserialized_records: Vec<ExampleByteArray> = reader
823+
.map(|value| apache_avro::from_value::<ExampleByteArray>(&value.unwrap()).unwrap())
824+
.collect();
825+
826+
assert_eq!(records, deserialized_records);
827+
}
828+
```
829+
830+
Full implementation and other options for things like fixed byte arrays can found in src/bytes.rs
831+
749832
<!-- cargo-rdme end -->
750833

751834
## License

avro/src/lib.rs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,89 @@
858858
//! If the application parses schemas before setting a comparator, the default comparator will be
859859
//! registered and used!
860860
//!
861+
//! ## Deserializing Avro Byte Arrays
862+
//!
863+
//! If using the Serde way to deserialize avro files, there are sometimes special derive statements
864+
//! that need to be applied in the case of byte arrays.
865+
//!
866+
//! ```rust
867+
//! use serde::{Deserialize, Serialize};
868+
//!
869+
//! #[derive(Debug, Deserialize, Serialize)]
870+
//! struct SampleStruct {
871+
//! #[serde(with = "apache_avro::serde_avro_bytes")]
872+
//! non_optional_bytes: Vec<u8>,
873+
//! #[serde(with = "apache_avro::serde_avro_bytes_opt")]
874+
//! optional_bytes: Option<Vec<u8>>,
875+
//! #[serde(with = "apache_avro::serde_avro_fixed")]
876+
//! non_optional_fixed: [u8; 6],
877+
//! #[serde(with = "apache_avro::serde_avro_fixed_opt")]
878+
//! optional_fixed: Option<[u8; 6]>,
879+
//! }
880+
//! ```
881+
//!
882+
//! Here is a complete example of a serde round trip of a struct with a nullable byte array:
883+
//!
884+
//! ```rust
885+
//! use serde::{Deserialize, Serialize};
886+
//!
887+
//! #[derive(Debug, Deserialize, PartialEq, Serialize)]
888+
//! struct ExampleByteArray {
889+
//! #[serde(with = "apache_avro::serde_avro_bytes_opt")]
890+
//! data_bytes: Option<Vec<u8>>,
891+
//! description: Option<String>,
892+
//! }
893+
//!
894+
//! fn serde_byte_array() {
895+
//! let raw_schema = r#"
896+
//! {
897+
//! "type": "record",
898+
//! "name": "SimpleRecord",
899+
//! "fields": [
900+
//! {"name": "data_bytes", "type": ["null", "bytes"], "default": null},
901+
//! {"name": "description", "type": ["null", "string"], "default": null}
902+
//! ]
903+
//! }"#;
904+
//!
905+
//! let schema = apache_avro::Schema::parse_str(raw_schema).unwrap();
906+
//!
907+
//! // Create vector of ExampleByteArray
908+
//! let records = vec![
909+
//! ExampleByteArray {
910+
//! data_bytes: Some(vec![1, 2, 3, 4, 5]),
911+
//! description: Some("First record".to_string()),
912+
//! },
913+
//! ExampleByteArray {
914+
//! data_bytes: None,
915+
//! description: Some("Second record".to_string()),
916+
//! },
917+
//! ExampleByteArray {
918+
//! data_bytes: Some(vec![10, 20, 30]),
919+
//! description: None,
920+
//! },
921+
//! ];
922+
//!
923+
//! // Serialize records to Avro binary format with the schema
924+
//! let mut writer = apache_avro::Writer::new(&schema, Vec::new());
925+
//! for record in &records {
926+
//! writer.append_ser(record).unwrap();
927+
//! }
928+
//!
929+
//! let avro_data = writer.into_inner().unwrap();
930+
//!
931+
//!
932+
//! // Deserialize Avro binary data back into ExampleByteArray structs
933+
//! let reader = apache_avro::Reader::new(&avro_data[..]).unwrap();
934+
//! let deserialized_records: Vec<ExampleByteArray> = reader
935+
//! .map(|value| apache_avro::from_value::<ExampleByteArray>(&value.unwrap()).unwrap())
936+
//! .collect();
937+
//!
938+
//! assert_eq!(records, deserialized_records);
939+
//! }
940+
//! ```
941+
//!
942+
//! Full implementation and other options for things like fixed byte arrays can found in src/bytes.rs
943+
//!
861944
862945
mod bigdecimal;
863946
mod bytes;
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
use apache_avro_test_helper::TestResult;
2+
use serde::{Deserialize, Serialize};
3+
4+
#[derive(Debug, Deserialize, PartialEq, Serialize)]
5+
struct ExampleByteArray {
6+
#[serde(with = "apache_avro::serde_avro_bytes_opt")]
7+
data_bytes: Option<Vec<u8>>,
8+
description: Option<String>,
9+
}
10+
11+
#[derive(Deserialize, Serialize)]
12+
struct ExampleByteArrayFiltered {
13+
description: Option<String>,
14+
}
15+
16+
#[test]
17+
fn avro_rs_285_bytes_deserialization_round_trip() -> TestResult {
18+
// define schema
19+
let raw_schema = r#"
20+
{
21+
"type": "record",
22+
"name": "SimpleRecord",
23+
"fields": [
24+
{"name": "data_bytes", "type": ["null", "bytes"], "default": null},
25+
{"name": "description", "type": ["null", "string"], "default": null}
26+
]
27+
}
28+
"#;
29+
30+
let schema = apache_avro::Schema::parse_str(raw_schema)?;
31+
32+
let records = vec![
33+
ExampleByteArray {
34+
data_bytes: Some(vec![1, 2, 3, 4, 5]),
35+
description: Some("First record".to_string()),
36+
},
37+
ExampleByteArray {
38+
data_bytes: None,
39+
description: Some("Second record".to_string()),
40+
},
41+
ExampleByteArray {
42+
data_bytes: Some(vec![10, 20, 30]),
43+
description: None,
44+
},
45+
];
46+
47+
// serialize records to Avro binary format with schema
48+
let mut writer = apache_avro::Writer::new(&schema, Vec::new());
49+
for record in &records {
50+
writer.append_ser(record)?;
51+
}
52+
53+
let avro_data = writer.into_inner()?;
54+
55+
// deserialize Avro binary data back into ExampleByteArray structs
56+
let reader = apache_avro::Reader::new(&avro_data[..])?;
57+
let deserialized_records: Vec<ExampleByteArray> = reader
58+
.map(|value| apache_avro::from_value::<ExampleByteArray>(&value.unwrap()).unwrap())
59+
.collect();
60+
61+
assert_eq!(records, deserialized_records);
62+
Ok(())
63+
}
64+
65+
#[test]
66+
fn avro_rs_285_bytes_deserialization_filtered_round_trip() -> TestResult {
67+
let raw_schema = r#"
68+
{
69+
"type": "record",
70+
"name": "SimpleRecord",
71+
"fields": [
72+
{"name": "data_bytes", "type": ["null", "bytes"], "default": null},
73+
{"name": "description", "type": ["null", "string"], "default": null}
74+
]
75+
}
76+
"#;
77+
78+
let schema = apache_avro::Schema::parse_str(raw_schema)?;
79+
80+
let records = vec![
81+
ExampleByteArray {
82+
data_bytes: Some(vec![1, 2, 3, 4, 5]),
83+
description: Some("First record".to_string()),
84+
},
85+
ExampleByteArray {
86+
data_bytes: None,
87+
description: Some("Second record".to_string()),
88+
},
89+
ExampleByteArray {
90+
data_bytes: Some(vec![10, 20, 30]),
91+
description: None,
92+
},
93+
];
94+
95+
// serialize records to Avro binary format with schema
96+
let mut writer = apache_avro::Writer::new(&schema, Vec::new());
97+
for record in &records {
98+
writer.append_ser(record)?;
99+
}
100+
101+
let avro_data = writer.into_inner()?;
102+
103+
// deserialize Avro binary data back into ExampleByteArrayFiltered structs
104+
let reader = apache_avro::Reader::new(&avro_data[..])?;
105+
let deserialized_records: Vec<ExampleByteArrayFiltered> = reader
106+
.map(|value| apache_avro::from_value::<ExampleByteArrayFiltered>(&value.unwrap()).unwrap())
107+
.collect();
108+
109+
assert_eq!(records.len(), deserialized_records.len());
110+
111+
Ok(())
112+
}

0 commit comments

Comments
 (0)