@@ -3053,4 +3053,43 @@ mod tests {
30533053 assert_eq ! ( run_array. len( ) , 3 ) ;
30543054 assert_eq ! ( run_array. run_ends( ) . values( ) , & [ 2i16 , 3 ] ) ;
30553055 }
3056+
3057+ #[ test]
3058+ fn test_read_nested_run_end_encoded ( ) {
3059+ let buf = r#"
3060+ {"a": "x"}
3061+ {"a": "x"}
3062+ {"a": "y"}
3063+ "# ;
3064+
3065+ // The outer REE compresses whole rows, while the inner REE compresses the
3066+ // repeated string values produced by decoding those rows.
3067+ let inner_type = DataType :: RunEndEncoded (
3068+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int64 , false ) ) ,
3069+ Arc :: new ( Field :: new ( "values" , DataType :: Utf8 , true ) ) ,
3070+ ) ;
3071+ let outer_type = DataType :: RunEndEncoded (
3072+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int64 , false ) ) ,
3073+ Arc :: new ( Field :: new ( "values" , inner_type, true ) ) ,
3074+ ) ;
3075+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( "a" , outer_type, true ) ] ) ) ;
3076+ let batches = do_read ( buf, 1024 , false , false , schema) ;
3077+ assert_eq ! ( batches. len( ) , 1 ) ;
3078+
3079+ let col = batches[ 0 ] . column ( 0 ) ;
3080+ let outer = col. as_run :: < arrow_array:: types:: Int64Type > ( ) ;
3081+ // Three logical rows compress to two outer runs: ["x", "x"] and ["y"].
3082+ assert_eq ! ( outer. len( ) , 3 ) ;
3083+ assert_eq ! ( outer. run_ends( ) . values( ) , & [ 2 , 3 ] ) ;
3084+
3085+ let nested = outer. values ( ) . as_run :: < arrow_array:: types:: Int64Type > ( ) ;
3086+ // The physical values of the outer REE are themselves a two-element REE.
3087+ assert_eq ! ( nested. len( ) , 2 ) ;
3088+ assert_eq ! ( nested. run_ends( ) . values( ) , & [ 1 , 2 ] ) ;
3089+
3090+ let nested_values = nested. values ( ) . as_string :: < i32 > ( ) ;
3091+ assert_eq ! ( nested_values. len( ) , 2 ) ;
3092+ assert_eq ! ( nested_values. value( 0 ) , "x" ) ;
3093+ assert_eq ! ( nested_values. value( 1 ) , "y" ) ;
3094+ }
30563095}
0 commit comments