Skip to content

Commit ddde66d

Browse files
alambJefffrey
andauthored
Add a test for reading nested REE data in json (#9634)
# Which issue does this PR close? - related to #9497 # Rationale for this change I (well Codex) found a regression in the changes in #9497 but they are not covered by a test. So we should add a test # What changes are included in this PR? Add a test for reading nested REE data from json (that currently passes on main but fails on #9497) # Are these changes tested? Only tests # Are there any user-facing changes? No this is just a test Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
1 parent 2b8a761 commit ddde66d

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed

arrow-json/src/reader/mod.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3457,4 +3457,43 @@ mod tests {
34573457
assert_eq!(run_array.len(), 3);
34583458
assert_eq!(run_array.run_ends().values(), &[2i16, 3]);
34593459
}
3460+
3461+
#[test]
3462+
fn test_read_nested_run_end_encoded() {
3463+
let buf = r#"
3464+
{"a": "x"}
3465+
{"a": "x"}
3466+
{"a": "y"}
3467+
"#;
3468+
3469+
// The outer REE compresses whole rows, while the inner REE compresses the
3470+
// repeated string values produced by decoding those rows.
3471+
let inner_type = DataType::RunEndEncoded(
3472+
Arc::new(Field::new("run_ends", DataType::Int64, false)),
3473+
Arc::new(Field::new("values", DataType::Utf8, true)),
3474+
);
3475+
let outer_type = DataType::RunEndEncoded(
3476+
Arc::new(Field::new("run_ends", DataType::Int64, false)),
3477+
Arc::new(Field::new("values", inner_type, true)),
3478+
);
3479+
let schema = Arc::new(Schema::new(vec![Field::new("a", outer_type, true)]));
3480+
let batches = do_read(buf, 1024, false, false, schema);
3481+
assert_eq!(batches.len(), 1);
3482+
3483+
let col = batches[0].column(0);
3484+
let outer = col.as_run::<arrow_array::types::Int64Type>();
3485+
// Three logical rows compress to two outer runs: ["x", "x"] and ["y"].
3486+
assert_eq!(outer.len(), 3);
3487+
assert_eq!(outer.run_ends().values(), &[2, 3]);
3488+
3489+
let nested = outer.values().as_run::<arrow_array::types::Int64Type>();
3490+
// The physical values of the outer REE are themselves a two-element REE.
3491+
assert_eq!(nested.len(), 2);
3492+
assert_eq!(nested.run_ends().values(), &[1, 2]);
3493+
3494+
let nested_values = nested.values().as_string::<i32>();
3495+
assert_eq!(nested_values.len(), 2);
3496+
assert_eq!(nested_values.value(0), "x");
3497+
assert_eq!(nested_values.value(1), "y");
3498+
}
34603499
}

0 commit comments

Comments
 (0)