Skip to content

Commit 673bec2

Browse files
elad-yosifonElad Yosifonmartin-g
authored
fix #226: Index-Out-Of-Bounds panic when using #[serde(skip_serializing_if=..)] (#227)
* fix #226: Index-Out-Of-Bounds panic when using #[serde(skip_serializing_if = "Option::is_none")] * Issue #225 - Compare the deserialized result with the serialized one Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> * Issue #226: Serialize the RecordField's default when skipping a field We need to serialize something, otherwise the deserialization does not know that something has been skipped and tries to read a value according to the schema Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> * fmt Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> * Do not try to track the fields with item_count. Serde's `skip` breaks it because it does not notify us when `#[serde(skip_serialize)]` is used. Drop the support for Struct tuple (e.g. `struct S(A, B, C)` - it is hard to map it to Avro record schema. It should still work for Avro array. Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> * clippy + fmt Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> * Add ASLv2 header for the new IT test Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> --------- Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org> Co-authored-by: Elad Yosifon <me@elad-yosifon.xyz> Co-authored-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
1 parent 1e64604 commit 673bec2

File tree

2 files changed

+183
-96
lines changed

2 files changed

+183
-96
lines changed

avro/src/ser_schema.rs

Lines changed: 53 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,12 @@ use crate::{
2222
bigdecimal::big_decimal_as_bytes,
2323
encode::{encode_int, encode_long},
2424
error::{Details, Error},
25-
schema::{Name, NamesRef, Namespace, RecordSchema, Schema},
25+
schema::{Name, NamesRef, Namespace, RecordField, RecordSchema, Schema},
2626
};
2727
use bigdecimal::BigDecimal;
28-
use serde::ser;
28+
use serde::{Serialize, ser};
2929
use std::{borrow::Cow, io::Write, str::FromStr};
3030

31-
const RECORD_FIELD_INIT_BUFFER_SIZE: usize = 64;
3231
const COLLECTION_SERIALIZER_ITEM_LIMIT: usize = 1024;
3332
const COLLECTION_SERIALIZER_DEFAULT_INIT_ITEM_CAPACITY: usize = 32;
3433
const SINGLE_VALUE_INIT_BUFFER_SIZE: usize = 128;
@@ -250,68 +249,39 @@ impl<W: Write> ser::SerializeMap for SchemaAwareWriteSerializeMap<'_, '_, W> {
250249
pub struct SchemaAwareWriteSerializeStruct<'a, 's, W: Write> {
251250
ser: &'a mut SchemaAwareWriteSerializer<'s, W>,
252251
record_schema: &'s RecordSchema,
253-
item_count: usize,
254-
buffered_fields: Vec<Option<Vec<u8>>>,
255252
bytes_written: usize,
256253
}
257254

258255
impl<'a, 's, W: Write> SchemaAwareWriteSerializeStruct<'a, 's, W> {
259256
fn new(
260257
ser: &'a mut SchemaAwareWriteSerializer<'s, W>,
261258
record_schema: &'s RecordSchema,
262-
len: usize,
263259
) -> SchemaAwareWriteSerializeStruct<'a, 's, W> {
264260
SchemaAwareWriteSerializeStruct {
265261
ser,
266262
record_schema,
267-
item_count: 0,
268-
buffered_fields: vec![None; len],
269263
bytes_written: 0,
270264
}
271265
}
272266

273-
fn serialize_next_field<T>(&mut self, value: &T) -> Result<(), Error>
267+
fn serialize_next_field<T>(&mut self, field: &RecordField, value: &T) -> Result<(), Error>
274268
where
275269
T: ?Sized + ser::Serialize,
276270
{
277-
let next_field = self.record_schema.fields.get(self.item_count).expect(
278-
"Validity of the next field index was expected to have been checked by the caller",
279-
);
280-
281271
// If we receive fields in order, write them directly to the main writer
282272
let mut value_ser = SchemaAwareWriteSerializer::new(
283273
&mut *self.ser.writer,
284-
&next_field.schema,
274+
&field.schema,
285275
self.ser.names,
286276
self.ser.enclosing_namespace.clone(),
287277
);
288278
self.bytes_written += value.serialize(&mut value_ser)?;
289279

290-
self.item_count += 1;
291-
292-
// Write any buffered data to the stream that has now become next in line
293-
while let Some(buffer) = self
294-
.buffered_fields
295-
.get_mut(self.item_count)
296-
.and_then(|b| b.take())
297-
{
298-
self.bytes_written += self
299-
.ser
300-
.writer
301-
.write(buffer.as_slice())
302-
.map_err(Details::WriteBytes)?;
303-
self.item_count += 1;
304-
}
305-
306280
Ok(())
307281
}
308282

309283
fn end(self) -> Result<usize, Error> {
310-
if self.item_count != self.record_schema.fields.len() {
311-
Err(Details::GetField(self.record_schema.fields[self.item_count].name.clone()).into())
312-
} else {
313-
Ok(self.bytes_written)
314-
}
284+
Ok(self.bytes_written)
315285
}
316286
}
317287

@@ -323,63 +293,50 @@ impl<W: Write> ser::SerializeStruct for SchemaAwareWriteSerializeStruct<'_, '_,
323293
where
324294
T: ?Sized + ser::Serialize,
325295
{
326-
if self.item_count >= self.record_schema.fields.len() {
327-
return Err(Details::FieldName(String::from(key)).into());
328-
}
329-
330-
let next_field = &self.record_schema.fields[self.item_count];
331-
let next_field_matches = match &next_field.aliases {
332-
Some(aliases) => {
333-
key == next_field.name.as_str() || aliases.iter().any(|a| key == a.as_str())
334-
}
335-
None => key == next_field.name.as_str(),
336-
};
337-
338-
if next_field_matches {
339-
self.serialize_next_field(&value).map_err(|e| {
340-
Details::SerializeRecordFieldWithSchema {
341-
field_name: key,
342-
record_schema: Schema::Record(self.record_schema.clone()),
343-
error: Box::new(e),
344-
}
345-
})?;
346-
Ok(())
347-
} else {
348-
if self.item_count < self.record_schema.fields.len() {
349-
for i in self.item_count..self.record_schema.fields.len() {
350-
let field = &self.record_schema.fields[i];
351-
let field_matches = match &field.aliases {
352-
Some(aliases) => {
353-
key == field.name.as_str() || aliases.iter().any(|a| key == a.as_str())
354-
}
355-
None => key == field.name.as_str(),
356-
};
357-
358-
if field_matches {
359-
let mut buffer: Vec<u8> = Vec::with_capacity(RECORD_FIELD_INIT_BUFFER_SIZE);
360-
let mut value_ser = SchemaAwareWriteSerializer::new(
361-
&mut buffer,
362-
&field.schema,
363-
self.ser.names,
364-
self.ser.enclosing_namespace.clone(),
365-
);
366-
value.serialize(&mut value_ser).map_err(|e| {
367-
Details::SerializeRecordFieldWithSchema {
368-
field_name: key,
369-
record_schema: Schema::Record(self.record_schema.clone()),
370-
error: Box::new(e),
371-
}
372-
})?;
373-
374-
self.buffered_fields[i] = Some(buffer);
375-
376-
return Ok(());
296+
let record_field = self
297+
.record_schema
298+
.lookup
299+
.get(key)
300+
.and_then(|idx| self.record_schema.fields.get(*idx));
301+
302+
match record_field {
303+
Some(field) => {
304+
// self.item_count += 1;
305+
self.serialize_next_field(field, value).map_err(|e| {
306+
Details::SerializeRecordFieldWithSchema {
307+
field_name: key,
308+
record_schema: Schema::Record(self.record_schema.clone()),
309+
error: Box::new(e),
377310
}
378-
}
311+
.into()
312+
})
379313
}
314+
None => Err(Details::FieldName(String::from(key)).into()),
315+
}
316+
}
380317

381-
Err(Details::FieldName(String::from(key)).into())
318+
fn skip_field(&mut self, key: &'static str) -> Result<(), Self::Error> {
319+
let skipped_field = self
320+
.record_schema
321+
.lookup
322+
.get(key)
323+
.and_then(|idx| self.record_schema.fields.get(*idx));
324+
325+
if let Some(skipped_field) = skipped_field {
326+
// self.item_count += 1;
327+
skipped_field
328+
.default
329+
.serialize(&mut SchemaAwareWriteSerializer::new(
330+
self.ser.writer,
331+
&skipped_field.schema,
332+
self.ser.names,
333+
self.ser.enclosing_namespace.clone(),
334+
))?;
335+
} else {
336+
return Err(Details::GetField(key.to_string()).into());
382337
}
338+
339+
Ok(())
383340
}
384341

385342
fn end(self) -> Result<Self::Ok, Self::Error> {
@@ -418,7 +375,9 @@ impl<W: Write> SchemaAwareWriteSerializeTupleStruct<'_, '_, W> {
418375
{
419376
use SchemaAwareWriteSerializeTupleStruct::*;
420377
match self {
421-
Record(record_ser) => record_ser.serialize_next_field(&value),
378+
Record(_record_ser) => {
379+
unimplemented!("Tuple struct serialization to record is not supported!");
380+
}
422381
Array(array_ser) => array_ser.serialize_element(&value),
423382
}
424383
}
@@ -1127,7 +1086,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> {
11271086
match variant_schema {
11281087
Schema::Null => { /* skip */ }
11291088
_ => {
1130-
encode_int(i as i32, &mut *self.writer)?;
1089+
encode_long(i as i64, &mut *self.writer)?;
11311090
let mut variant_ser = SchemaAwareWriteSerializer::new(
11321091
&mut *self.writer,
11331092
variant_schema,
@@ -1406,7 +1365,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> {
14061365
SchemaAwareWriteSerializeSeq::new(self, &sch.items, Some(len)),
14071366
)),
14081367
Schema::Record(sch) => Ok(SchemaAwareWriteSerializeTupleStruct::Record(
1409-
SchemaAwareWriteSerializeStruct::new(self, sch, len),
1368+
SchemaAwareWriteSerializeStruct::new(self, sch),
14101369
)),
14111370
Schema::Ref { name: ref_name } => {
14121371
let ref_schema = self.get_ref_schema(ref_name)?;
@@ -1543,11 +1502,9 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> {
15431502
};
15441503

15451504
match schema {
1546-
Schema::Record(record_schema) => Ok(SchemaAwareWriteSerializeStruct::new(
1547-
self,
1548-
record_schema,
1549-
len,
1550-
)),
1505+
Schema::Record(record_schema) => {
1506+
Ok(SchemaAwareWriteSerializeStruct::new(self, record_schema))
1507+
}
15511508
Schema::Ref { name: ref_name } => {
15521509
let ref_schema = self.get_ref_schema(ref_name)?;
15531510
self.serialize_struct_with_schema(name, len, ref_schema)

avro/tests/avro-rs-226.rs

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use apache_avro::{AvroSchema, Schema, Writer, from_value};
19+
use apache_avro_test_helper::TestResult;
20+
use serde::{Deserialize, Serialize, de::DeserializeOwned};
21+
use std::fmt::Debug;
22+
23+
fn ser_deser<T>(schema: &Schema, record: T) -> TestResult
24+
where
25+
T: Serialize + DeserializeOwned + Debug + PartialEq + Clone,
26+
{
27+
let record2 = record.clone();
28+
let mut writer = Writer::new(schema, vec![]);
29+
writer.append_ser(record)?;
30+
let bytes_written = writer.into_inner()?;
31+
32+
let reader = apache_avro::Reader::new(&bytes_written[..])?;
33+
for value in reader {
34+
let value = value?;
35+
let deserialized = from_value::<T>(&value)?;
36+
assert_eq!(deserialized, record2);
37+
}
38+
39+
Ok(())
40+
}
41+
42+
#[test]
43+
fn avro_rs_226_index_out_of_bounds_with_serde_skip_serializing_skip_middle_field() -> TestResult {
44+
#[derive(AvroSchema, Clone, Debug, Deserialize, PartialEq, Serialize)]
45+
struct T {
46+
x: Option<i8>,
47+
#[serde(skip_serializing_if = "Option::is_none")]
48+
y: Option<String>,
49+
z: Option<i8>,
50+
}
51+
52+
ser_deser::<T>(
53+
&T::get_schema(),
54+
T {
55+
x: None,
56+
y: None,
57+
z: Some(1),
58+
},
59+
)
60+
}
61+
62+
#[test]
63+
fn avro_rs_226_index_out_of_bounds_with_serde_skip_serializing_skip_first_field() -> TestResult {
64+
#[derive(AvroSchema, Clone, Debug, Deserialize, PartialEq, Serialize)]
65+
struct T {
66+
#[serde(skip_serializing_if = "Option::is_none")]
67+
x: Option<i8>,
68+
y: Option<String>,
69+
z: Option<i8>,
70+
}
71+
72+
ser_deser::<T>(
73+
&T::get_schema(),
74+
T {
75+
x: None,
76+
y: None,
77+
z: Some(1),
78+
},
79+
)
80+
}
81+
82+
#[test]
83+
fn avro_rs_226_index_out_of_bounds_with_serde_skip_serializing_skip_last_field() -> TestResult {
84+
#[derive(AvroSchema, Clone, Debug, Deserialize, PartialEq, Serialize)]
85+
struct T {
86+
x: Option<i8>,
87+
y: Option<String>,
88+
#[serde(skip_serializing_if = "Option::is_none")]
89+
z: Option<i8>,
90+
}
91+
92+
ser_deser::<T>(
93+
&T::get_schema(),
94+
T {
95+
x: Some(0),
96+
y: None,
97+
z: None,
98+
},
99+
)
100+
}
101+
102+
#[test]
103+
#[ignore = "This test should be re-enabled once the serde-driven deserialization is implemented! PR #227"]
104+
fn avro_rs_226_index_out_of_bounds_with_serde_skip_multiple_fields() -> TestResult {
105+
#[derive(AvroSchema, Clone, Debug, Deserialize, PartialEq, Serialize)]
106+
struct T {
107+
no_skip1: Option<i8>,
108+
#[serde(skip_serializing)]
109+
skip_serializing: Option<String>,
110+
#[serde(skip_serializing_if = "Option::is_none")]
111+
skip_serializing_if: Option<i8>,
112+
#[serde(skip_deserializing)]
113+
skip_deserializing: Option<String>,
114+
#[serde(skip)]
115+
skip: Option<String>,
116+
no_skip2: Option<i8>,
117+
}
118+
119+
ser_deser::<T>(
120+
&T::get_schema(),
121+
T {
122+
no_skip1: Some(1),
123+
skip_serializing: None,
124+
skip_serializing_if: None,
125+
skip_deserializing: None,
126+
skip: None,
127+
no_skip2: Some(2),
128+
},
129+
)
130+
}

0 commit comments

Comments
 (0)