Skip to content

Commit b7130ea

Browse files
committed
feat: avro_1_12 feature
Introduce the "avro_1_12" feature flag and use it to guard the behavior of JSON null defaults for union types having null schema in a position other than the first.
1 parent 8e92144 commit b7130ea

File tree

3 files changed

+95
-16
lines changed

3 files changed

+95
-16
lines changed

arrow-avro/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ md5 = ["dep:md5"]
4444
sha256 = ["dep:sha2"]
4545
small_decimals = []
4646
avro_custom_types = ["dep:arrow-select"]
47+
avro_1_12 = []
4748

4849
# Enable async APIs
4950
async = ["futures", "tokio"]

arrow-avro/src/codec.rs

Lines changed: 67 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,52 @@ impl AvroDataType {
251251
self.nullability
252252
}
253253

254+
// Returns `Ok` if this data type accepts a JSON null default value,
255+
// according to Avro schema rules prior to spec version 1.12, otherwise
256+
// returns an `Err` with a schema error.
257+
// Prior to 1.12, Avro only allowed default values matching the first branch of a union.
258+
#[cfg(not(feature = "avro_1_12"))]
259+
fn validate_null_default(&self) -> Result<(), ArrowError> {
260+
match self.codec() {
261+
Codec::Null => Ok(()),
262+
Codec::Union(encodings, _, _)
263+
if encodings
264+
.first()
265+
.map_or(false, |enc| matches!(enc.codec(), Codec::Null)) =>
266+
{
267+
Ok(())
268+
}
269+
_ if self.nullability() == Some(Nullability::NullFirst) => Ok(()),
270+
_ => Err(ArrowError::SchemaError(
271+
"JSON null default is only valid for `null` type or for a union whose first branch is `null`"
272+
.to_string(),
273+
)),
274+
}
275+
}
276+
277+
// Returns `Ok` if this data type accepts a JSON null default value,
278+
// according to Avro schema rules for spec version 1.12 and later, otherwise
279+
// returns an `Err` with a schema error.
280+
// Since 1.12, Avro allows default values matching any branch of a union.
281+
#[cfg(feature = "avro_1_12")]
282+
fn validate_null_default(&self) -> Result<(), ArrowError> {
283+
match self.codec() {
284+
Codec::Null => Ok(()),
285+
Codec::Union(encodings, _, _)
286+
if encodings
287+
.iter()
288+
.any(|enc| matches!(enc.codec(), Codec::Null)) =>
289+
{
290+
Ok(())
291+
}
292+
_ if self.nullability().is_some() => Ok(()),
293+
_ => Err(ArrowError::SchemaError(
294+
"JSON null default is only valid for `null` type or for a union with a `null` branch"
295+
.to_string(),
296+
)),
297+
}
298+
}
299+
254300
#[inline]
255301
fn parse_default_literal(&self, default_json: &Value) -> Result<AvroLiteral, ArrowError> {
256302
fn expect_string<'v>(
@@ -313,21 +359,10 @@ impl AvroDataType {
313359
}
314360
}
315361

316-
// Handle JSON nulls per-spec: allowed only for `null` type or unions with null
362+
// Handle JSON nulls per the spec rules
317363
if default_json.is_null() {
318-
return match self.codec() {
319-
Codec::Null => Ok(AvroLiteral::Null),
320-
Codec::Union(encodings, _, _)
321-
if encodings.iter().any(|enc| matches!(enc.codec(), Codec::Null)) =>
322-
{
323-
Ok(AvroLiteral::Null)
324-
}
325-
_ if self.nullability().is_some() => Ok(AvroLiteral::Null),
326-
_ => Err(ArrowError::SchemaError(
327-
"JSON null default is only valid for `null` type or for a union with a `null` branch"
328-
.to_string(),
329-
)),
330-
};
364+
self.validate_null_default()?;
365+
return Ok(AvroLiteral::Null);
331366
}
332367
let lit = match self.codec() {
333368
Codec::Null => {
@@ -3282,6 +3317,24 @@ mod tests {
32823317
let lit2 = dt_int_nf.parse_and_store_default(&Value::Null).unwrap();
32833318
assert_eq!(lit2, AvroLiteral::Null);
32843319
assert_default_stored(&dt_int_nf, &Value::Null);
3320+
}
3321+
3322+
#[cfg(not(feature = "avro_1_12"))]
3323+
#[test]
3324+
fn test_validate_and_store_default_null_and_nullability_rules_avro_1_11() {
3325+
let mut dt_int_ns =
3326+
AvroDataType::new(Codec::Int32, HashMap::new(), Some(Nullability::NullSecond));
3327+
let err2 = dt_int_ns.parse_and_store_default(&Value::Null).unwrap_err();
3328+
assert!(
3329+
err2.to_string()
3330+
.contains("JSON null default is only valid for `null` type"),
3331+
"unexpected error: {err2}"
3332+
);
3333+
}
3334+
3335+
#[cfg(feature = "avro_1_12")]
3336+
#[test]
3337+
fn test_validate_and_store_default_null_and_nullability_rules_avro_1_12() {
32853338
let mut dt_int_ns =
32863339
AvroDataType::new(Codec::Int32, HashMap::new(), Some(Nullability::NullSecond));
32873340
let lit3 = dt_int_ns.parse_and_store_default(&Value::Null).unwrap();

arrow-avro/src/reader/mod.rs

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2950,15 +2950,14 @@ mod test {
29502950
},"default":{"x":7}}),
29512951
serde_json::json!({"name":"d_nullable_null","type":["null","int"],"default":null}),
29522952
serde_json::json!({"name":"d_nullable_value","type":["int","null"],"default":123}),
2953-
serde_json::json!({"name":"d_nullable_null_second","type":["int","null"],"default":null}),
29542953
],
29552954
);
29562955
let actual = read_alltypes_with_reader_schema(path, reader_schema);
29572956
let num_rows = actual.num_rows();
29582957
assert!(num_rows > 0, "skippable_types.avro should contain rows");
29592958
assert_eq!(
29602959
actual.num_columns(),
2961-
23,
2960+
22,
29622961
"expected exactly our defaulted fields"
29632962
);
29642963
let mut arrays: Vec<Arc<dyn Array>> = Vec::with_capacity(22);
@@ -3085,6 +3084,32 @@ mod test {
30853084
arrays.push(Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(
30863085
123, num_rows,
30873086
))));
3087+
let expected = RecordBatch::try_new(actual.schema(), arrays).unwrap();
3088+
assert_eq!(
3089+
actual, expected,
3090+
"defaults should materialize correctly for all fields"
3091+
);
3092+
}
3093+
3094+
#[cfg(feature = "avro_1_12")]
3095+
#[test]
3096+
fn test_schema_resolution_defaults_cases_supported_by_avro_1_12() {
3097+
let path = "test/data/skippable_types.avro";
3098+
let reader_schema = make_reader_schema_with_default_fields(
3099+
path,
3100+
vec![
3101+
serde_json::json!({"name":"d_nullable_null_second","type":["int","null"],"default":null}),
3102+
],
3103+
);
3104+
let actual = read_alltypes_with_reader_schema(path, reader_schema);
3105+
let num_rows = actual.num_rows();
3106+
assert!(num_rows > 0, "skippable_types.avro should contain rows");
3107+
assert_eq!(
3108+
actual.num_columns(),
3109+
1,
3110+
"expected exactly our defaulted fields"
3111+
);
3112+
let mut arrays: Vec<Arc<dyn Array>> = Vec::with_capacity(22);
30883113
arrays.push(Arc::new(Int32Array::from_iter(std::iter::repeat_n(
30893114
None::<i32>,
30903115
num_rows,

0 commit comments

Comments
 (0)