From 39f0aadeb6d15a9253fe4a4ca6d8a6769ccc91a5 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Mon, 29 Dec 2025 18:32:42 -0600 Subject: [PATCH] disable array reader cache for nested fields --- parquet/src/arrow/array_reader/builder.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/parquet/src/arrow/array_reader/builder.rs b/parquet/src/arrow/array_reader/builder.rs index 82c8e77f6393..a1ec04dfd916 100644 --- a/parquet/src/arrow/array_reader/builder.rs +++ b/parquet/src/arrow/array_reader/builder.rs @@ -152,7 +152,12 @@ impl<'a> ArrayReaderBuilder<'a> { return Ok(Some(reader)); }; - if cache_options.projection_mask.leaf_included(col_idx) { + // Skip caching for columns with nullable ancestors (def_level > 0) + // because CachedArrayReader doesn't support get_def_levels() yet. + // See: https://github.com/apache/arrow-rs/issues/XXXX + if cache_options.projection_mask.leaf_included(col_idx) + && field.def_level == 0 + { Ok(Some(Box::new(CachedArrayReader::new( reader, Arc::clone(cache_options.cache),