diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
index abd1801213710..9dce4cf77b933 100755
--- a/benchmarks/bench.sh
+++ b/benchmarks/bench.sh
@@ -677,7 +677,7 @@ run_tpch() {
     echo "Running tpch benchmark..."
 
     FORMAT=$2
-    debug_run $CARGO_COMMAND --bin dfbench -- tpch --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format ${FORMAT} -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
+    debug_run $CARGO_COMMAND --bin dfbench -- tpch --iterations 5 --path "${TPCH_DIR}" --scale-factor "${SCALE_FACTOR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format ${FORMAT} -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
 }
 
 # Runs the tpch in memory (needs tpch parquet data)
@@ -693,7 +693,7 @@ run_tpch_mem() {
     echo "RESULTS_FILE: ${RESULTS_FILE}"
     echo "Running tpch_mem benchmark..."
     # -m means in memory
-    debug_run $CARGO_COMMAND --bin dfbench -- tpch --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
+    debug_run $CARGO_COMMAND --bin dfbench -- tpch --iterations 5 --path "${TPCH_DIR}" --scale-factor "${SCALE_FACTOR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
 }
 
 # Runs the tpcds benchmark
diff --git a/benchmarks/queries/q10.sql b/benchmarks/queries/q10.sql
index 8613fd4962837..8ac2fd90798c9 100644
--- a/benchmarks/queries/q10.sql
+++ b/benchmarks/queries/q10.sql
@@ -16,7 +16,7 @@ where
         c_custkey = o_custkey
   and l_orderkey = o_orderkey
   and o_orderdate >= date '1993-10-01'
-  and o_orderdate < date '1994-01-01'
+  and o_orderdate < date '1993-10-01' + interval '3' month
   and l_returnflag = 'R'
   and c_nationkey = n_nationkey
 group by
diff --git a/benchmarks/queries/q11.sql b/benchmarks/queries/q11.sql
index c23ed1c71bfb3..9a9710d09ec35 100644
--- a/benchmarks/queries/q11.sql
+++ b/benchmarks/queries/q11.sql
@@ -13,7 +13,7 @@ group by
     ps_partkey having
     sum(ps_supplycost * ps_availqty) > (
     select
-    sum(ps_supplycost * ps_availqty) * 0.0001
+    sum(ps_supplycost * ps_availqty) * 0.0001 /* __TPCH_Q11_FRACTION__ */
     from
     partsupp,
     supplier,
@@ -24,4 +24,4 @@ group by
                   and n_name = 'GERMANY'
     )
 order by
-    value desc;
\ No newline at end of file
+    value desc;
diff --git a/benchmarks/queries/q12.sql b/benchmarks/queries/q12.sql
index f8e6d960c8420..c3f4d62344701 100644
--- a/benchmarks/queries/q12.sql
+++ b/benchmarks/queries/q12.sql
@@ -23,8 +23,8 @@ where
   and l_commitdate < l_receiptdate
   and l_shipdate < l_commitdate
   and l_receiptdate >= date '1994-01-01'
-  and l_receiptdate < date '1995-01-01'
+  and l_receiptdate < date '1994-01-01' + interval '1' year
 group by
     l_shipmode
 order by
-    l_shipmode;
\ No newline at end of file
+    l_shipmode;
diff --git a/benchmarks/queries/q14.sql b/benchmarks/queries/q14.sql
index d8ef6afaca9bb..6fe88c42662d0 100644
--- a/benchmarks/queries/q14.sql
+++ b/benchmarks/queries/q14.sql
@@ -10,4 +10,4 @@ from
 where
         l_partkey = p_partkey
   and l_shipdate >= date '1995-09-01'
-  and l_shipdate < date '1995-10-01';
\ No newline at end of file
+  and l_shipdate < date '1995-09-01' + interval '1' month;
diff --git a/benchmarks/queries/q5.sql b/benchmarks/queries/q5.sql
index 5a336b231184b..146980ccd6f76 100644
--- a/benchmarks/queries/q5.sql
+++ b/benchmarks/queries/q5.sql
@@ -17,8 +17,8 @@ where
   and n_regionkey = r_regionkey
   and r_name = 'ASIA'
   and o_orderdate >= date '1994-01-01'
-  and o_orderdate < date '1995-01-01'
+  and o_orderdate < date '1994-01-01' + interval '1' year
 group by
     n_name
 order by
-    revenue desc;
\ No newline at end of file
+    revenue desc;
diff --git a/benchmarks/queries/q6.sql b/benchmarks/queries/q6.sql
index 5806f980f8088..5a13fe7df765a 100644
--- a/benchmarks/queries/q6.sql
+++ b/benchmarks/queries/q6.sql
@@ -4,6 +4,6 @@ from
     lineitem
 where
         l_shipdate >= date '1994-01-01'
-  and l_shipdate < date '1995-01-01'
+  and l_shipdate < date '1994-01-01' + interval '1' year
   and l_discount between 0.06 - 0.01 and 0.06 + 0.01
-  and l_quantity < 24;
\ No newline at end of file
+  and l_quantity < 24;
diff --git a/benchmarks/src/tpch/mod.rs b/benchmarks/src/tpch/mod.rs
index 681aa0a403ee1..08cedc0e5b4c3 100644
--- a/benchmarks/src/tpch/mod.rs
+++ b/benchmarks/src/tpch/mod.rs
@@ -33,6 +33,7 @@ pub const TPCH_TABLES: &[&str] = &[
 
 pub const TPCH_QUERY_START_ID: usize = 1;
 pub const TPCH_QUERY_END_ID: usize = 22;
+const TPCH_Q11_FRACTION_SENTINEL: &str = "0.0001 /* __TPCH_Q11_FRACTION__ */";
 
 /// The `.tbl` file contains a trailing column
 pub fn get_tbl_tpch_table_schema(table: &str) -> Schema {
@@ -139,6 +140,21 @@ pub fn get_tpch_table_schema(table: &str) -> Schema {
 
 /// Get the SQL statements from the specified query file
 pub fn get_query_sql(query: usize) -> Result<Vec<String>> {
+    get_query_sql_for_scale_factor(query, 1.0)
+}
+
+/// Get the SQL statements from the specified query file using the provided scale factor for
+/// TPC-H substitutions such as Q11 FRACTION.
+pub fn get_query_sql_for_scale_factor(
+    query: usize,
+    scale_factor: f64,
+) -> Result<Vec<String>> {
+    if !(scale_factor.is_finite() && scale_factor > 0.0) {
+        return plan_err!(
+            "invalid scale factor. Expected a positive finite value, got {scale_factor}"
+        );
+    }
+
     if query > 0 && query < 23 {
         let possibilities = vec![
             format!("queries/q{query}.sql"),
@@ -148,6 +164,7 @@ pub fn get_query_sql(query: usize) -> Result<Vec<String>> {
         for filename in possibilities {
             match fs::read_to_string(&filename) {
                 Ok(contents) => {
+                    let contents = customize_query_sql(query, contents, scale_factor)?;
                     return Ok(contents
                         .split(';')
                         .map(|s| s.trim())
@@ -164,6 +181,27 @@ pub fn get_query_sql(query: usize) -> Result<Vec<String>> {
     }
 }
 
+fn customize_query_sql(
+    query: usize,
+    contents: String,
+    scale_factor: f64,
+) -> Result<String> {
+    if query != 11 {
+        return Ok(contents);
+    }
+
+    if !contents.contains(TPCH_Q11_FRACTION_SENTINEL) {
+        return plan_err!(
+            "invalid query 11. Missing fraction marker {TPCH_Q11_FRACTION_SENTINEL}"
+        );
+    }
+
+    Ok(contents.replace(
+        TPCH_Q11_FRACTION_SENTINEL,
+        &format!("(0.0001 / {scale_factor})"),
+    ))
+}
+
 pub const QUERY_LIMIT: [Option<usize>; 22] = [
     None,
     Some(100),
@@ -188,3 +226,51 @@ pub const QUERY_LIMIT: [Option<usize>; 22] = [
     Some(100),
     None,
 ];
+
+#[cfg(test)]
+mod tests {
+    use super::{get_query_sql, get_query_sql_for_scale_factor};
+    use datafusion::error::Result;
+
+    fn get_single_query(query: usize) -> Result<String> {
+        let mut queries = get_query_sql(query)?;
+        assert_eq!(queries.len(), 1);
+        Ok(queries.remove(0))
+    }
+
+    fn get_single_query_for_scale_factor(
+        query: usize,
+        scale_factor: f64,
+    ) -> Result<String> {
+        let mut queries = get_query_sql_for_scale_factor(query, scale_factor)?;
+        assert_eq!(queries.len(), 1);
+        Ok(queries.remove(0))
+    }
+
+    #[test]
+    fn q11_uses_scale_factor_substitution() -> Result<()> {
+        let sf1_sql = get_single_query(11)?;
+        assert!(sf1_sql.contains("(0.0001 / 1)"));
+
+        let sf01_sql = get_single_query_for_scale_factor(11, 0.1)?;
+        assert!(sf01_sql.contains("(0.0001 / 0.1)"));
+
+        let sf10_sql = get_single_query_for_scale_factor(11, 10.0)?;
+        assert!(sf10_sql.contains("(0.0001 / 10)"));
+
+        let sf30_sql = get_single_query_for_scale_factor(11, 30.0)?;
+        assert!(sf30_sql.contains("(0.0001 / 30)"));
+        assert!(!sf10_sql.contains("__TPCH_Q11_FRACTION__"));
+        Ok(())
+    }
+
+    #[test]
+    fn interval_queries_use_interval_arithmetic() -> Result<()> {
+        assert!(get_single_query(5)?.contains("date '1994-01-01' + interval '1' year"));
+        assert!(get_single_query(6)?.contains("date '1994-01-01' + interval '1' year"));
+        assert!(get_single_query(10)?.contains("date '1993-10-01' + interval '3' month"));
+        assert!(get_single_query(12)?.contains("date '1994-01-01' + interval '1' year"));
+        assert!(get_single_query(14)?.contains("date '1995-09-01' + interval '1' month"));
+        Ok(())
+    }
+}
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index 0d1268013c168..ec7aa8c554a28 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -15,17 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
 use super::{
-    TPCH_QUERY_END_ID, TPCH_QUERY_START_ID, TPCH_TABLES, get_query_sql,
+    TPCH_QUERY_END_ID, TPCH_QUERY_START_ID, TPCH_TABLES, get_query_sql_for_scale_factor,
     get_tbl_tpch_table_schema, get_tpch_table_schema,
 };
 use crate::util::{BenchmarkRun, CommonOpt, QueryResult, print_memory_stats};
 
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::{self, pretty_format_batches};
+use datafusion::common::exec_err;
 use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::file_format::csv::CsvFormat;
 use datafusion::datasource::file_format::parquet::ParquetFormat;
@@ -71,6 +72,11 @@ pub struct RunOpt {
     #[arg(required = true, short = 'p', long = "path")]
     path: PathBuf,
 
+    /// TPC-H scale factor used for query substitutions such as Q11 FRACTION.
+    /// If omitted, the benchmark tries to infer it from paths like `.../tpch_sf10/...`.
+    #[arg(long)]
+    scale_factor: Option<f64>,
+
     /// File format: `csv` or `parquet`
     #[arg(short = 'f', long = "format", default_value = "csv")]
     file_format: String,
@@ -133,10 +139,11 @@ impl RunOpt {
         let ctx = SessionContext::new_with_config_rt(config, rt);
         // register tables
         self.register_tables(&ctx).await?;
+        let scale_factor = self.scale_factor()?;
 
         for query_id in query_range {
             benchmark_run.start_new_case(&format!("Query {query_id}"));
-            let query_run = self.benchmark_query(query_id, &ctx).await;
+            let query_run = self.benchmark_query(query_id, scale_factor, &ctx).await;
             match query_run {
                 Ok(query_results) => {
                     for iter in query_results {
@@ -157,13 +164,14 @@ impl RunOpt {
     async fn benchmark_query(
         &self,
         query_id: usize,
+        scale_factor: f64,
         ctx: &SessionContext,
     ) -> Result<Vec<QueryResult>> {
         let mut millis = vec![];
         // run benchmark
         let mut query_results = vec![];
 
-        let sql = &get_query_sql(query_id)?;
+        let sql = &get_query_sql_for_scale_factor(query_id, scale_factor)?;
 
         for i in 0..self.iterations() {
             let start = Instant::now();
@@ -346,6 +354,82 @@ impl RunOpt {
             .partitions
             .unwrap_or_else(get_available_parallelism)
     }
+
+    fn scale_factor(&self) -> Result<f64> {
+        resolve_scale_factor(self.scale_factor, &self.path)
+    }
+}
+
+fn resolve_scale_factor(scale_factor: Option<f64>, path: &Path) -> Result<f64> {
+    let scale_factor = scale_factor
+        .or_else(|| infer_scale_factor_from_path(path))
+        .unwrap_or(1.0);
+
+    if scale_factor.is_finite() && scale_factor > 0.0 {
+        Ok(scale_factor)
+    } else {
+        exec_err!(
+            "Invalid TPC-H scale factor {scale_factor}. Expected a positive finite value"
+        )
+    }
+}
+
+fn infer_scale_factor_from_path(path: &Path) -> Option<f64> {
+    path.iter().find_map(|component| {
+        component
+            .to_str()?
+            .strip_prefix("tpch_sf")?
+            .parse::<f64>()
+            .ok()
+    })
+}
+
+#[cfg(test)]
+mod scale_factor_tests {
+    use std::path::Path;
+
+    use super::{infer_scale_factor_from_path, resolve_scale_factor};
+    use datafusion::error::Result;
+
+    #[test]
+    fn uses_explicit_scale_factor_when_provided() -> Result<()> {
+        let scale_factor =
+            resolve_scale_factor(Some(30.0), Path::new("benchmarks/data/tpch_sf10"))?;
+        assert_eq!(scale_factor, 30.0);
+        Ok(())
+    }
+
+    #[test]
+    fn infers_scale_factor_from_standard_tpch_path() -> Result<()> {
+        let scale_factor =
+            resolve_scale_factor(None, Path::new("benchmarks/data/tpch_sf10"))?;
+        assert_eq!(scale_factor, 10.0);
+        assert_eq!(
+            infer_scale_factor_from_path(Path::new("benchmarks/data/tpch_sf0.1")),
+            Some(0.1)
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn defaults_to_sf1_when_path_has_no_scale_factor() -> Result<()> {
+        let scale_factor = resolve_scale_factor(None, Path::new("benchmarks/data"))?;
+        assert_eq!(scale_factor, 1.0);
+        Ok(())
+    }
+
+    #[test]
+    fn rejects_invalid_scale_factors() {
+        assert!(resolve_scale_factor(Some(0.0), Path::new("benchmarks/data")).is_err());
+        assert!(resolve_scale_factor(Some(-1.0), Path::new("benchmarks/data")).is_err());
+        assert!(
+            resolve_scale_factor(Some(f64::NAN), Path::new("benchmarks/data")).is_err()
+        );
+        assert!(
+            resolve_scale_factor(Some(f64::INFINITY), Path::new("benchmarks/data"))
+                .is_err()
+        );
+    }
 }
 
 #[cfg(test)]
@@ -392,6 +476,7 @@ mod tests {
             query: Some(query),
             common,
             path: PathBuf::from(path.to_string()),
+            scale_factor: Some(1.0),
             file_format: "tbl".to_string(),
             mem_table: false,
             output_path: None,
@@ -402,7 +487,7 @@ mod tests {
             hash_join_buffering_capacity: 0,
         };
         opt.register_tables(&ctx).await?;
-        let queries = get_query_sql(query)?;
+        let queries = crate::tpch::get_query_sql(query)?;
         for query in queries {
             let plan = ctx.sql(&query).await?;
             let plan = plan.into_optimized_plan()?;
@@ -432,6 +517,7 @@ mod tests {
             query: Some(query),
             common,
             path: PathBuf::from(path.to_string()),
+            scale_factor: Some(1.0),
             file_format: "tbl".to_string(),
             mem_table: false,
             output_path: None,
@@ -442,7 +528,7 @@ mod tests {
             hash_join_buffering_capacity: 0,
         };
         opt.register_tables(&ctx).await?;
-        let queries = get_query_sql(query)?;
+        let queries = crate::tpch::get_query_sql(query)?;
         for query in queries {
             let plan = ctx.sql(&query).await?;
             let plan = plan.create_physical_plan().await?;
diff --git a/datafusion/core/benches/sort.rs b/datafusion/core/benches/sort.rs
index 4ba57a1530e81..7544f7ae26d43 100644
--- a/datafusion/core/benches/sort.rs
+++ b/datafusion/core/benches/sort.rs
@@ -102,61 +102,104 @@ const NUM_STREAMS: usize = 8;
 /// The size of each batch within each stream
 const BATCH_SIZE: usize = 1024;
 
-/// Total number of input rows to generate
-const INPUT_SIZE: u64 = 100000;
+/// Input sizes to benchmark. The small size (100K) exercises the
+/// in-memory concat-and-sort path; the large size (10M) exercises
+/// the sort-then-merge path with high fan-in.
+const INPUT_SIZES: &[(u64, &str)] = &[(100_000, "100k"), (1_000_000, "1M")];
 
 type PartitionedBatches = Vec<Vec<RecordBatch>>;
+type StreamGenerator = Box<dyn Fn(bool) -> PartitionedBatches>;
 
 fn criterion_benchmark(c: &mut Criterion) {
-    let cases: Vec<(&str, &dyn Fn(bool) -> PartitionedBatches)> = vec![
-        ("i64", &i64_streams),
-        ("f64", &f64_streams),
-        ("utf8 low cardinality", &utf8_low_cardinality_streams),
-        ("utf8 high cardinality", &utf8_high_cardinality_streams),
-        (
-            "utf8 view low cardinality",
-            &utf8_view_low_cardinality_streams,
-        ),
-        (
-            "utf8 view high cardinality",
-            &utf8_view_high_cardinality_streams,
-        ),
-        ("utf8 tuple", &utf8_tuple_streams),
-        ("utf8 view tuple", &utf8_view_tuple_streams),
-        ("utf8 dictionary", &dictionary_streams),
-        ("utf8 dictionary tuple", &dictionary_tuple_streams),
-        ("mixed dictionary tuple", &mixed_dictionary_tuple_streams),
-        ("mixed tuple", &mixed_tuple_streams),
-        (
-            "mixed tuple with utf8 view",
-            &mixed_tuple_with_utf8_view_streams,
-        ),
-    ];
-
-    for (name, f) in cases {
-        c.bench_function(&format!("merge sorted {name}"), |b| {
-            let data = f(true);
-            let case = BenchCase::merge_sorted(&data);
-            b.iter(move || case.run())
-        });
-
-        c.bench_function(&format!("sort merge {name}"), |b| {
-            let data = f(false);
-            let case = BenchCase::sort_merge(&data);
-            b.iter(move || case.run())
-        });
-
-        c.bench_function(&format!("sort {name}"), |b| {
-            let data = f(false);
-            let case = BenchCase::sort(&data);
-            b.iter(move || case.run())
-        });
-
-        c.bench_function(&format!("sort partitioned {name}"), |b| {
-            let data = f(false);
-            let case = BenchCase::sort_partitioned(&data);
-            b.iter(move || case.run())
-        });
+    for &(input_size, size_label) in INPUT_SIZES {
+        let cases: Vec<(&str, StreamGenerator)> = vec![
+            (
+                "i64",
+                Box::new(move |sorted| i64_streams(sorted, input_size)),
+            ),
+            (
+                "f64",
+                Box::new(move |sorted| f64_streams(sorted, input_size)),
+            ),
+            (
+                "utf8 low cardinality",
+                Box::new(move |sorted| utf8_low_cardinality_streams(sorted, input_size)),
+            ),
+            (
+                "utf8 high cardinality",
+                Box::new(move |sorted| utf8_high_cardinality_streams(sorted, input_size)),
+            ),
+            (
+                "utf8 view low cardinality",
+                Box::new(move |sorted| {
+                    utf8_view_low_cardinality_streams(sorted, input_size)
+                }),
+            ),
+            (
+                "utf8 view high cardinality",
+                Box::new(move |sorted| {
+                    utf8_view_high_cardinality_streams(sorted, input_size)
+                }),
+            ),
+            (
+                "utf8 tuple",
+                Box::new(move |sorted| utf8_tuple_streams(sorted, input_size)),
+            ),
+            (
+                "utf8 view tuple",
+                Box::new(move |sorted| utf8_view_tuple_streams(sorted, input_size)),
+            ),
+            (
+                "utf8 dictionary",
+                Box::new(move |sorted| dictionary_streams(sorted, input_size)),
+            ),
+            (
+                "utf8 dictionary tuple",
+                Box::new(move |sorted| dictionary_tuple_streams(sorted, input_size)),
+            ),
+            (
+                "mixed dictionary tuple",
+                Box::new(move |sorted| {
+                    mixed_dictionary_tuple_streams(sorted, input_size)
+                }),
+            ),
+            (
+                "mixed tuple",
+                Box::new(move |sorted| mixed_tuple_streams(sorted, input_size)),
+            ),
+            (
+                "mixed tuple with utf8 view",
+                Box::new(move |sorted| {
+                    mixed_tuple_with_utf8_view_streams(sorted, input_size)
+                }),
+            ),
+        ];
+
+        for (name, f) in &cases {
+            c.bench_function(&format!("merge sorted {name} {size_label}"), |b| {
+                let data = f(true);
+                let case = BenchCase::merge_sorted(&data);
+                b.iter(move || case.run())
+            });
+
+            c.bench_function(&format!("sort merge {name} {size_label}"), |b| {
+                let data = f(false);
+                let case = BenchCase::sort_merge(&data);
+                b.iter(move || case.run())
+            });
+
+            c.bench_function(&format!("sort {name} {size_label}"), |b| {
+                let data = f(false);
+                let case = BenchCase::sort(&data);
+                b.iter(move || case.run())
+            });
+
+            c.bench_function(&format!("sort partitioned {name} {size_label}"), |b| {
+                let data = f(false);
+                let case = BenchCase::sort_partitioned(&data);
+                b.iter(move || case.run())
+            });
+        }
     }
 }
 
@@ -279,8 +322,8 @@ fn make_sort_exprs(schema: &Schema) -> LexOrdering {
 }
 
 /// Create streams of int64 (where approximately 1/3 values is repeated)
-fn i64_streams(sorted: bool) -> PartitionedBatches {
-    let mut values = DataGenerator::new().i64_values();
+fn i64_streams(sorted: bool, input_size: u64) -> PartitionedBatches {
+    let mut values = DataGenerator::new(input_size).i64_values();
     if sorted {
         values.sort_unstable();
     }
@@ -293,8 +336,8 @@ fn i64_streams(sorted: bool) -> PartitionedBatches {
 
 /// Create streams of f64 (where approximately 1/3 values are repeated)
 /// with the same distribution as i64_streams
-fn f64_streams(sorted: bool) -> PartitionedBatches {
-    let mut values = DataGenerator::new().f64_values();
+fn f64_streams(sorted: bool, input_size: u64) -> PartitionedBatches {
+    let mut values = DataGenerator::new(input_size).f64_values();
     if sorted {
         values.sort_unstable_by(|a, b| a.total_cmp(b));
     }
@@ -306,8 +349,8 @@ fn f64_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create streams of random low cardinality utf8 values
-fn utf8_low_cardinality_streams(sorted: bool) -> PartitionedBatches {
-    let mut values = DataGenerator::new().utf8_low_cardinality_values();
+fn utf8_low_cardinality_streams(sorted: bool, input_size: u64) -> PartitionedBatches {
+    let mut values = DataGenerator::new(input_size).utf8_low_cardinality_values();
     if sorted {
         values.sort_unstable();
     }
@@ -318,8 +361,11 @@ fn utf8_low_cardinality_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create streams of random low cardinality utf8_view values
-fn utf8_view_low_cardinality_streams(sorted: bool) -> PartitionedBatches {
-    let mut values = DataGenerator::new().utf8_low_cardinality_values();
+fn utf8_view_low_cardinality_streams(
+    sorted: bool,
+    input_size: u64,
+) -> PartitionedBatches {
+    let mut values = DataGenerator::new(input_size).utf8_low_cardinality_values();
     if sorted {
         values.sort_unstable();
     }
@@ -330,8 +376,11 @@ fn utf8_view_low_cardinality_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create streams of high  cardinality (~ no duplicates) utf8_view values
-fn utf8_view_high_cardinality_streams(sorted: bool) -> PartitionedBatches {
-    let mut values = DataGenerator::new().utf8_high_cardinality_values();
+fn utf8_view_high_cardinality_streams(
+    sorted: bool,
+    input_size: u64,
+) -> PartitionedBatches {
+    let mut values = DataGenerator::new(input_size).utf8_high_cardinality_values();
     if sorted {
         values.sort_unstable();
     }
@@ -342,8 +391,8 @@ fn utf8_view_high_cardinality_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create streams of high  cardinality (~ no duplicates) utf8 values
-fn utf8_high_cardinality_streams(sorted: bool) -> PartitionedBatches {
-    let mut values = DataGenerator::new().utf8_high_cardinality_values();
+fn utf8_high_cardinality_streams(sorted: bool, input_size: u64) -> PartitionedBatches {
+    let mut values = DataGenerator::new(input_size).utf8_high_cardinality_values();
     if sorted {
         values.sort_unstable();
     }
@@ -354,8 +403,8 @@ fn utf8_high_cardinality_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create a batch of (utf8_low, utf8_low, utf8_high)
-fn utf8_tuple_streams(sorted: bool) -> PartitionedBatches {
-    let mut data_gen = DataGenerator::new();
+fn utf8_tuple_streams(sorted: bool, input_size: u64) -> PartitionedBatches {
+    let mut data_gen = DataGenerator::new(input_size);
 
     // need to sort by the combined key, so combine them together
     let mut tuples: Vec<_> = data_gen
@@ -387,8 +436,8 @@ fn utf8_tuple_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create a batch of (utf8_view_low, utf8_view_low, utf8_view_high)
-fn utf8_view_tuple_streams(sorted: bool) -> PartitionedBatches {
-    let mut data_gen = DataGenerator::new();
+fn utf8_view_tuple_streams(sorted: bool, input_size: u64) -> PartitionedBatches {
+    let mut data_gen = DataGenerator::new(input_size);
 
     // need to sort by the combined key, so combine them together
     let mut tuples: Vec<_> = data_gen
@@ -420,8 +469,8 @@ fn utf8_view_tuple_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create a batch of (f64, utf8_low, utf8_low, i64)
-fn mixed_tuple_streams(sorted: bool) -> PartitionedBatches {
-    let mut data_gen = DataGenerator::new();
+fn mixed_tuple_streams(sorted: bool, input_size: u64) -> PartitionedBatches {
+    let mut data_gen = DataGenerator::new(input_size);
 
     // need to sort by the combined key, so combine them together
     let mut tuples: Vec<_> = data_gen
@@ -458,8 +507,11 @@ fn mixed_tuple_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create a batch of (f64, utf8_view_low, utf8_view_low, i64)
-fn mixed_tuple_with_utf8_view_streams(sorted: bool) -> PartitionedBatches {
-    let mut data_gen = DataGenerator::new();
+fn mixed_tuple_with_utf8_view_streams(
+    sorted: bool,
+    input_size: u64,
+) -> PartitionedBatches {
+    let mut data_gen = DataGenerator::new(input_size);
 
     // need to sort by the combined key, so combine them together
     let mut tuples: Vec<_> = data_gen
@@ -496,8 +548,8 @@ fn mixed_tuple_with_utf8_view_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create a batch of (utf8_dict)
-fn dictionary_streams(sorted: bool) -> PartitionedBatches {
-    let mut data_gen = DataGenerator::new();
+fn dictionary_streams(sorted: bool, input_size: u64) -> PartitionedBatches {
+    let mut data_gen = DataGenerator::new(input_size);
     let mut values = data_gen.utf8_low_cardinality_values();
     if sorted {
         values.sort_unstable();
@@ -511,8 +563,8 @@ fn dictionary_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create a batch of (utf8_dict, utf8_dict, utf8_dict)
-fn dictionary_tuple_streams(sorted: bool) -> PartitionedBatches {
-    let mut data_gen = DataGenerator::new();
+fn dictionary_tuple_streams(sorted: bool, input_size: u64) -> PartitionedBatches {
+    let mut data_gen = DataGenerator::new(input_size);
     let mut tuples: Vec<_> = data_gen
         .utf8_low_cardinality_values()
         .into_iter()
@@ -542,8 +594,8 @@ fn dictionary_tuple_streams(sorted: bool) -> PartitionedBatches {
 }
 
 /// Create a batch of (utf8_dict, utf8_dict, utf8_dict, i64)
-fn mixed_dictionary_tuple_streams(sorted: bool) -> PartitionedBatches {
-    let mut data_gen = DataGenerator::new();
+fn mixed_dictionary_tuple_streams(sorted: bool, input_size: u64) -> PartitionedBatches {
+    let mut data_gen = DataGenerator::new(input_size);
     let mut tuples: Vec<_> = data_gen
         .utf8_low_cardinality_values()
         .into_iter()
@@ -579,19 +631,21 @@ fn mixed_dictionary_tuple_streams(sorted: bool) -> PartitionedBatches {
 /// Encapsulates creating data for this test
 struct DataGenerator {
     rng: StdRng,
+    input_size: u64,
 }
 
 impl DataGenerator {
-    fn new() -> Self {
+    fn new(input_size: u64) -> Self {
         Self {
             rng: StdRng::seed_from_u64(42),
+            input_size,
         }
     }
 
     /// Create an array of i64 sorted values (where approximately 1/3 values is repeated)
     fn i64_values(&mut self) -> Vec<i64> {
-        let mut vec: Vec<_> = (0..INPUT_SIZE)
-            .map(|_| self.rng.random_range(0..INPUT_SIZE as i64))
+        let mut vec: Vec<_> = (0..self.input_size)
+            .map(|_| self.rng.random_range(0..self.input_size as i64))
             .collect();
 
         vec.sort_unstable();
@@ -614,7 +668,7 @@ impl DataGenerator {
             .collect::<Vec<_>>();
 
         // pick from the 100 strings randomly
-        let mut input = (0..INPUT_SIZE)
+        let mut input = (0..self.input_size)
             .map(|_| {
                 let idx = self.rng.random_range(0..strings.len());
                 let s = Arc::clone(&strings[idx]);
@@ -629,7 +683,7 @@ impl DataGenerator {
     /// Create sorted values of high  cardinality (~ no duplicates) utf8 values
     fn utf8_high_cardinality_values(&mut self) -> Vec<Option<String>> {
         // make random strings
-        let mut input = (0..INPUT_SIZE)
+        let mut input = (0..self.input_size)
             .map(|_| Some(self.random_string()))
             .collect::<Vec<_>>();
 
diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown.rs
index a255c07545829..d058e44a85d00 100644
--- a/datafusion/core/tests/physical_optimizer/filter_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/filter_pushdown.rs
@@ -18,7 +18,7 @@
 use std::sync::{Arc, LazyLock};
 
 use arrow::{
-    array::{Float64Array, Int32Array, RecordBatch, StringArray, record_batch},
+    array::record_batch,
     datatypes::{DataType, Field, Schema, SchemaRef},
     util::pretty::pretty_format_batches,
 };
@@ -30,11 +30,10 @@ use datafusion::{
         PhysicalExpr,
         expressions::{BinaryExpr, Column, Literal},
     },
-    prelude::{ParquetReadOptions, SessionConfig, SessionContext},
+    prelude::{SessionConfig, SessionContext},
     scalar::ScalarValue,
 };
 use datafusion_catalog::memory::DataSourceExec;
-use datafusion_common::JoinType;
 use datafusion_common::config::ConfigOptions;
 use datafusion_datasource::{
     PartitionedFile, file_groups::FileGroup, file_scan_config::FileScanConfigBuilder,
@@ -42,19 +41,14 @@ use datafusion_datasource::{
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_expr::ScalarUDF;
 use datafusion_functions::math::random::RandomFunc;
-use datafusion_functions_aggregate::{
-    count::count_udaf,
-    min_max::{max_udaf, min_udaf},
-};
+use datafusion_functions_aggregate::{count::count_udaf, min_max::min_udaf};
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr, expressions::col};
 use datafusion_physical_expr::{
-    Partitioning, ScalarFunctionExpr,
-    aggregate::{AggregateExprBuilder, AggregateFunctionExpr},
+    Partitioning, ScalarFunctionExpr, aggregate::AggregateExprBuilder,
 };
 use datafusion_physical_optimizer::{
     PhysicalOptimizerRule, filter_pushdown::FilterPushdown,
 };
-use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
 use datafusion_physical_plan::{
     ExecutionPlan,
     aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy},
@@ -70,9 +64,7 @@ use super::pushdown_utils::{
     OptimizationTest, TestNode, TestScanBuilder, TestSource, format_plan_for_test,
 };
 use datafusion_physical_plan::union::UnionExec;
-use futures::StreamExt;
-use object_store::{ObjectStore, memory::InMemory};
-use regex::Regex;
+use object_store::memory::InMemory;
 
 #[test]
 fn test_pushdown_into_scan() {
@@ -176,128 +168,11 @@ fn test_pushdown_into_scan_with_config_options() {
     );
 }
 
-#[tokio::test]
-async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with limited values
-    let build_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8View, ["ba", "bb"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8View, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more values
-    let probe_batches = vec![
-        record_batch!(
-            ("d", Utf8, ["aa", "ab", "ac", "ad"]),
-            ("e", Utf8View, ["ba", "bb", "bc", "bd"]),
-            ("f", Float64, [1.0, 2.0, 3.0, 4.0])
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("d", DataType::Utf8, false),
-        Field::new("e", DataType::Utf8View, false),
-        Field::new("f", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec
-    let on = vec![(
-        col("a", &build_side_schema).unwrap(),
-        col("d", &probe_side_schema).unwrap(),
-    )];
-    let join = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            probe_scan,
-            on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::Partitioned,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    );
-
-    let join_schema = join.schema();
-
-    // Finally let's add a SortExec on the outside to test pushdown of dynamic filters
-    let sort_expr =
-        PhysicalSortExpr::new(col("e", &join_schema).unwrap(), SortOptions::default());
-    let plan = Arc::new(
-        SortExec::new(LexOrdering::new(vec![sort_expr]).unwrap(), join)
-            .with_fetch(Some(2)),
-    ) as Arc<dyn ExecutionPlan>;
-
-    let mut config = ConfigOptions::default();
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-    config.execution.parquet.pushdown_filters = true;
-
-    // Apply the FilterPushdown optimizer rule
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(Arc::clone(&plan), &config)
-        .unwrap();
-
-    // Test that filters are pushed down correctly to each side of the join
-    insta::assert_snapshot!(
-        format_plan_for_test(&plan),
-        @r"
-    - SortExec: TopK(fetch=2), expr=[e@4 ASC], preserve_partitioning=[false]
-    -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)]
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ] AND DynamicFilter [ empty ]
-    "
-    );
-
-    // Put some data through the plan to check that the filter is updated to reflect the TopK state
-    let session_ctx = SessionContext::new_with_config(SessionConfig::new());
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    // Iterate one batch
-    stream.next().await.unwrap().unwrap();
-
-    // Test that filters are pushed down correctly to each side of the join
-    // NOTE: We dropped the CASE expression here because we now optimize that away if there's only 1 partition
-    insta::assert_snapshot!(
-        format_plan_for_test(&plan),
-        @r"
-    - SortExec: TopK(fetch=2), expr=[e@4 ASC], preserve_partitioning=[false], filter=[e@4 IS NULL OR e@4 < bb]
-    -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)]
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ d@0 >= aa AND d@0 <= ab AND d@0 IN (SET) ([aa, ab]) ] AND DynamicFilter [ e@1 IS NULL OR e@1 < bb ]
-    "
-    );
-}
-
-// Test both static and dynamic filter pushdown in HashJoinExec.
-// Note that static filter pushdown is rare: it should have already happened in the logical optimizer phase.
-// However users may manually construct plans that could result in a FilterExec -> HashJoinExec -> Scan setup.
-// Dynamic filters arise in cases such as nested inner joins or TopK -> HashJoinExec -> Scan setups.
+// Inner-join part is covered by push_down_filter_parquet.slt::test_hashjoin_parent_filter_pushdown.
+// The Left-join part stays in Rust: SQL's outer-join-elimination rewrites
+// `LEFT JOIN ... WHERE <probe-side-null-rejecting>` into an INNER JOIN
+// before physical filter pushdown runs, so the preserved-vs-non-preserved
+// distinction this test exercises is not reachable via SQL.
 #[tokio::test]
 async fn test_static_filter_pushdown_through_hash_join() {
     use datafusion_common::JoinType;
@@ -926,175 +801,11 @@ fn test_node_handles_child_pushdown_result() {
     );
 }
 
-#[tokio::test]
-async fn test_topk_dynamic_filter_pushdown() {
-    let batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["bd", "bc"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-        record_batch!(
-            ("a", Utf8, ["ac", "ad"]),
-            ("b", Utf8, ["bb", "ba"]),
-            ("c", Float64, [2.0, 1.0])
-        )
-        .unwrap(),
-    ];
-    let scan = TestScanBuilder::new(schema())
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-    let plan = Arc::new(
-        SortExec::new(
-            LexOrdering::new(vec![PhysicalSortExpr::new(
-                col("b", &schema()).unwrap(),
-                SortOptions::new(true, false), // descending, nulls_first
-            )])
-            .unwrap(),
-            Arc::clone(&scan),
-        )
-        .with_fetch(Some(1)),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // expect the predicate to be pushed down into the DataSource
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - SortExec: TopK(fetch=1), expr=[b@1 DESC NULLS LAST], preserve_partitioning=[false]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - SortExec: TopK(fetch=1), expr=[b@1 DESC NULLS LAST], preserve_partitioning=[false]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    "
-    );
-
-    // Actually apply the optimization to the plan and put some data through it to check that the filter is updated to reflect the TopK state
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-    let config = SessionConfig::new().with_batch_size(2);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    // Iterate one batch
-    stream.next().await.unwrap().unwrap();
-    // Now check what our filter looks like
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - SortExec: TopK(fetch=1), expr=[b@1 DESC NULLS LAST], preserve_partitioning=[false], filter=[b@1 > bd]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ b@1 > bd ]
-    "
-    );
-}
-
-#[tokio::test]
-async fn test_topk_dynamic_filter_pushdown_multi_column_sort() {
-    let batches = vec![
-        // We are going to do ORDER BY b ASC NULLS LAST, a DESC
-        // And we put the values in such a way that the first batch will fill the TopK
-        // and we skip the second batch.
-        record_batch!(
-            ("a", Utf8, ["ac", "ad"]),
-            ("b", Utf8, ["bb", "ba"]),
-            ("c", Float64, [2.0, 1.0])
-        )
-        .unwrap(),
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["bc", "bd"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-    ];
-    let scan = TestScanBuilder::new(schema())
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-    let plan = Arc::new(
-        SortExec::new(
-            LexOrdering::new(vec![
-                PhysicalSortExpr::new(
-                    col("b", &schema()).unwrap(),
-                    SortOptions::default().asc().nulls_last(),
-                ),
-                PhysicalSortExpr::new(
-                    col("a", &schema()).unwrap(),
-                    SortOptions::default().desc().nulls_first(),
-                ),
-            ])
-            .unwrap(),
-            Arc::clone(&scan),
-        )
-        .with_fetch(Some(2)),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // expect the predicate to be pushed down into the DataSource
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - SortExec: TopK(fetch=2), expr=[b@1 ASC NULLS LAST, a@0 DESC], preserve_partitioning=[false]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - SortExec: TopK(fetch=2), expr=[b@1 ASC NULLS LAST, a@0 DESC], preserve_partitioning=[false]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    "
-    );
-
-    // Actually apply the optimization to the plan and put some data through it to check that the filter is updated to reflect the TopK state
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-    let config = SessionConfig::new().with_batch_size(2);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    // Iterate one batch
-    let res = stream.next().await.unwrap().unwrap();
-    #[rustfmt::skip]
-    let expected = [
-        "+----+----+-----+",
-        "| a  | b  | c   |",
-        "+----+----+-----+",
-        "| ad | ba | 1.0 |",
-        "| ac | bb | 2.0 |",
-        "+----+----+-----+",
-    ];
-    assert_batches_eq!(expected, &[res]);
-    // Now check what our filter looks like
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - SortExec: TopK(fetch=2), expr=[b@1 ASC NULLS LAST, a@0 DESC], preserve_partitioning=[false], filter=[b@1 < bb OR b@1 = bb AND (a@0 IS NULL OR a@0 > ac)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ b@1 < bb OR b@1 = bb AND (a@0 IS NULL OR a@0 > ac) ]
-    "
-    );
-    // There should be no more batches
-    assert!(stream.next().await.is_none());
-}
-
+// Not portable to sqllogictest: requires manually constructing
+// `SortExec(CoalescePartitionsExec(scan))`. A SQL `ORDER BY ... LIMIT` over a
+// multi-partition scan plans as `SortPreservingMergeExec(SortExec(scan))`
+// instead, so the filter-through-coalesce path this test exercises is not
+// reachable via SQL.
 #[tokio::test]
 async fn test_topk_filter_passes_through_coalesce_partitions() {
     // Create multiple batches for different partitions
@@ -1163,129 +874,12 @@ async fn test_topk_filter_passes_through_coalesce_partitions() {
     );
 }
 
+// Not portable to sqllogictest: this test pins `PartitionMode::Partitioned`
+// by hand-wiring `RepartitionExec(Hash, 12)` on both join sides. A SQL
+// INNER JOIN over small parquet inputs plans as `CollectLeft`, so the
+// per-partition CASE filter this test exercises is not reachable via SQL.
 #[tokio::test]
-async fn test_hashjoin_dynamic_filter_pushdown() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with limited values
-    let build_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["ba", "bb"]),
-            ("c", Float64, [1.0, 2.0]) // Extra column not used in join
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more values
-    let probe_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
-            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
-            ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("e", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec with dynamic filter
-    let on = vec![
-        (
-            col("a", &build_side_schema).unwrap(),
-            col("a", &probe_side_schema).unwrap(),
-        ),
-        (
-            col("b", &build_side_schema).unwrap(),
-            col("b", &probe_side_schema).unwrap(),
-        ),
-    ];
-    let plan = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            probe_scan,
-            on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // expect the predicate to be pushed down into the probe side DataSource
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    ",
-    );
-
-    // Actually apply the optimization to the plan and execute to see the filter in action
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-
-    // Test for https://github.com/apache/datafusion/pull/17371: dynamic filter linking survives `with_new_children`
-    let children = plan.children().into_iter().map(Arc::clone).collect();
-    let plan = plan.with_new_children(children).unwrap();
-
-    let config = SessionConfig::new().with_batch_size(10);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    // Iterate one batch
-    stream.next().await.unwrap().unwrap();
-
-    // Now check what our filter looks like
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ]
-    "
-    );
-}
-
-#[tokio::test]
-async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
+async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
     use datafusion_common::JoinType;
     use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
 
@@ -1533,6 +1127,12 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
     );
 }
 
+// Not portable to sqllogictest: this test specifically pins a
+// `RepartitionExec(Hash, 12)` between `HashJoinExec(CollectLeft)` and the
+// probe-side scan to verify the dynamic filter link survives that boundary
+// (regression for #17451). The same CollectLeft filter content and
+// pushdown counters are already covered by the simpler slt port
+// (push_down_filter_parquet.slt::test_hashjoin_dynamic_filter_pushdown).
 #[tokio::test]
 async fn test_hashjoin_dynamic_filter_pushdown_collect_left() {
     use datafusion_common::JoinType;
@@ -1705,265 +1305,6 @@ async fn test_hashjoin_dynamic_filter_pushdown_collect_left() {
     );
 }
 
-#[tokio::test]
-async fn test_nested_hashjoin_dynamic_filter_pushdown() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create test data for three tables: t1, t2, t3
-    // t1: small table with limited values (will be build side of outer join)
-    let t1_batches = vec![
-        record_batch!(("a", Utf8, ["aa", "ab"]), ("x", Float64, [1.0, 2.0])).unwrap(),
-    ];
-    let t1_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("x", DataType::Float64, false),
-    ]));
-    let t1_scan = TestScanBuilder::new(Arc::clone(&t1_schema))
-        .with_support(true)
-        .with_batches(t1_batches)
-        .build();
-
-    // t2: larger table (will be probe side of inner join, build side of outer join)
-    let t2_batches = vec![
-        record_batch!(
-            ("b", Utf8, ["aa", "ab", "ac", "ad", "ae"]),
-            ("c", Utf8, ["ca", "cb", "cc", "cd", "ce"]),
-            ("y", Float64, [1.0, 2.0, 3.0, 4.0, 5.0])
-        )
-        .unwrap(),
-    ];
-    let t2_schema = Arc::new(Schema::new(vec![
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Utf8, false),
-        Field::new("y", DataType::Float64, false),
-    ]));
-    let t2_scan = TestScanBuilder::new(Arc::clone(&t2_schema))
-        .with_support(true)
-        .with_batches(t2_batches)
-        .build();
-
-    // t3: largest table (will be probe side of inner join)
-    let t3_batches = vec![
-        record_batch!(
-            ("d", Utf8, ["ca", "cb", "cc", "cd", "ce", "cf", "cg", "ch"]),
-            ("z", Float64, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
-        )
-        .unwrap(),
-    ];
-    let t3_schema = Arc::new(Schema::new(vec![
-        Field::new("d", DataType::Utf8, false),
-        Field::new("z", DataType::Float64, false),
-    ]));
-    let t3_scan = TestScanBuilder::new(Arc::clone(&t3_schema))
-        .with_support(true)
-        .with_batches(t3_batches)
-        .build();
-
-    // Create nested join structure:
-    // Join (t1.a = t2.b)
-    // /        \
-    // t1    Join(t2.c = t3.d)
-    //         /    \
-    //        t2   t3
-
-    // First create inner join: t2.c = t3.d
-    let inner_join_on =
-        vec![(col("c", &t2_schema).unwrap(), col("d", &t3_schema).unwrap())];
-    let inner_join = Arc::new(
-        HashJoinExec::try_new(
-            t2_scan,
-            t3_scan,
-            inner_join_on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::Partitioned,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    );
-
-    // Then create outer join: t1.a = t2.b (from inner join result)
-    let outer_join_on = vec![(
-        col("a", &t1_schema).unwrap(),
-        col("b", &inner_join.schema()).unwrap(),
-    )];
-    let outer_join = Arc::new(
-        HashJoinExec::try_new(
-            t1_scan,
-            inner_join as Arc<dyn ExecutionPlan>,
-            outer_join_on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::Partitioned,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // Test that dynamic filters are pushed down correctly through nested joins
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&outer_join), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@0)]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, x], file_type=test, pushdown_supported=true
-        -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, d@0)]
-        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true
-        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@0)]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, x], file_type=test, pushdown_supported=true
-          -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, d@0)]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    ",
-    );
-
-    // Execute the plan to verify the dynamic filters are properly updated
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(outer_join, &config)
-        .unwrap();
-    let config = SessionConfig::new().with_batch_size(10);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    // Execute to populate the dynamic filters
-    stream.next().await.unwrap().unwrap();
-
-    // Verify that both the inner and outer join have updated dynamic filters
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@0)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, x], file_type=test, pushdown_supported=true
-    -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, d@0)]
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ b@0 >= aa AND b@0 <= ab AND b@0 IN (SET) ([aa, ab]) ]
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ d@0 >= ca AND d@0 <= cb AND d@0 IN (SET) ([ca, cb]) ]
-    "
-    );
-}
-
-#[tokio::test]
-async fn test_hashjoin_parent_filter_pushdown() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with limited values
-    let build_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["ba", "bb"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more values
-    let probe_batches = vec![
-        record_batch!(
-            ("d", Utf8, ["aa", "ab", "ac", "ad"]),
-            ("e", Utf8, ["ba", "bb", "bc", "bd"]),
-            ("f", Float64, [1.0, 2.0, 3.0, 4.0])
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("d", DataType::Utf8, false),
-        Field::new("e", DataType::Utf8, false),
-        Field::new("f", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec
-    let on = vec![(
-        col("a", &build_side_schema).unwrap(),
-        col("d", &probe_side_schema).unwrap(),
-    )];
-    let join = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            probe_scan,
-            on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::Partitioned,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    );
-
-    // Create filters that can be pushed down to different sides
-    // We need to create filters in the context of the join output schema
-    let join_schema = join.schema();
-
-    // Filter on build side column: a = 'aa'
-    let left_filter = col_lit_predicate("a", "aa", &join_schema);
-    // Filter on probe side column: e = 'ba'
-    let right_filter = col_lit_predicate("e", "ba", &join_schema);
-    // Filter that references both sides: a = d (should not be pushed down)
-    let cross_filter = Arc::new(BinaryExpr::new(
-        col("a", &join_schema).unwrap(),
-        Operator::Eq,
-        col("d", &join_schema).unwrap(),
-    )) as Arc<dyn PhysicalExpr>;
-
-    let filter =
-        Arc::new(FilterExec::try_new(left_filter, Arc::clone(&join) as _).unwrap());
-    let filter = Arc::new(FilterExec::try_new(right_filter, filter).unwrap());
-    let plan = Arc::new(FilterExec::try_new(cross_filter, filter).unwrap())
-        as Arc<dyn ExecutionPlan>;
-
-    // Test that filters are pushed down correctly to each side of the join
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - FilterExec: a@0 = d@3
-        -   FilterExec: e@4 = ba
-        -     FilterExec: a@0 = aa
-        -       HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)]
-        -         DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-        -         DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - FilterExec: a@0 = d@3
-          -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = aa
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=e@1 = ba
-    "
-    );
-}
-
 #[test]
 fn test_hashjoin_parent_filter_pushdown_same_column_names() {
     use datafusion_common::JoinType;
@@ -2176,64 +1517,6 @@ fn test_hashjoin_parent_filter_pushdown_semi_anti_join() {
     );
 }
 
-/// Integration test for dynamic filter pushdown with TopK.
-/// We use an integration test because there are complex interactions in the optimizer rules
-/// that the unit tests applying a single optimizer rule do not cover.
-#[tokio::test]
-async fn test_topk_dynamic_filter_pushdown_integration() {
-    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
-    let mut cfg = SessionConfig::new();
-    cfg.options_mut().execution.parquet.pushdown_filters = true;
-    cfg.options_mut().execution.parquet.max_row_group_size = 128;
-    let ctx = SessionContext::new_with_config(cfg);
-    ctx.register_object_store(
-        ObjectStoreUrl::parse("memory://").unwrap().as_ref(),
-        Arc::clone(&store),
-    );
-    ctx.sql(
-        r"
-COPY  (
-  SELECT 1372708800 + value AS t
-  FROM generate_series(0, 99999)
-  ORDER BY t
- ) TO 'memory:///1.parquet'
-STORED AS PARQUET;
-  ",
-    )
-    .await
-    .unwrap()
-    .collect()
-    .await
-    .unwrap();
-
-    // Register the file with the context
-    ctx.register_parquet(
-        "topk_pushdown",
-        "memory:///1.parquet",
-        ParquetReadOptions::default(),
-    )
-    .await
-    .unwrap();
-
-    // Create a TopK query that will use dynamic filter pushdown
-    // Note that we use t * t as the order by expression to avoid
-    // the order pushdown optimizer from optimizing away the TopK.
-    let df = ctx
-        .sql(r"EXPLAIN ANALYZE SELECT t FROM topk_pushdown ORDER BY t * t LIMIT 10;")
-        .await
-        .unwrap();
-    let batches = df.collect().await.unwrap();
-    let explain = format!("{}", pretty_format_batches(&batches).unwrap());
-
-    assert!(explain.contains("output_rows=128")); // Read 1 row group
-    assert!(explain.contains("t@0 < 1884329474306198481")); // Dynamic filter was applied
-    assert!(
-        explain.contains("pushdown_rows_matched=128, pushdown_rows_pruned=99.87 K"),
-        "{explain}"
-    );
-    // Pushdown pruned most rows
-}
-
 #[test]
 fn test_filter_pushdown_through_union() {
     let scan1 = TestScanBuilder::new(schema()).with_support(true).build();
@@ -2450,233 +1733,11 @@ fn schema() -> SchemaRef {
     Arc::clone(&TEST_SCHEMA)
 }
 
-struct ProjectionDynFilterTestCase {
-    schema: SchemaRef,
-    batches: Vec<RecordBatch>,
-    projection: Vec<(Arc<dyn PhysicalExpr>, String)>,
-    sort_expr: PhysicalSortExpr,
-    expected_plans: Vec<String>,
-}
-
-async fn run_projection_dyn_filter_case(case: ProjectionDynFilterTestCase) {
-    let ProjectionDynFilterTestCase {
-        schema,
-        batches,
-        projection,
-        sort_expr,
-        expected_plans,
-    } = case;
-
-    let scan = TestScanBuilder::new(Arc::clone(&schema))
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-
-    let projection_exec = Arc::new(ProjectionExec::try_new(projection, scan).unwrap());
-
-    let sort = Arc::new(
-        SortExec::new(LexOrdering::new(vec![sort_expr]).unwrap(), projection_exec)
-            .with_fetch(Some(2)),
-    ) as Arc<dyn ExecutionPlan>;
-
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-
-    let optimized_plan = FilterPushdown::new_post_optimization()
-        .optimize(Arc::clone(&sort), &config)
-        .unwrap();
-
-    pretty_assertions::assert_eq!(
-        format_plan_for_test(&optimized_plan).trim(),
-        expected_plans[0].trim()
-    );
-
-    let config = SessionConfig::new().with_batch_size(2);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = optimized_plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    for (idx, expected_plan) in expected_plans.iter().enumerate().skip(1) {
-        stream.next().await.unwrap().unwrap();
-        let formatted_plan = format_plan_for_test(&optimized_plan);
-        pretty_assertions::assert_eq!(
-            formatted_plan.trim(),
-            expected_plan.trim(),
-            "Mismatch at iteration {}",
-            idx
-        );
-    }
-}
-
-#[tokio::test]
-async fn test_topk_with_projection_transformation_on_dyn_filter() {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Int32, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let simple_abc = vec![
-        record_batch!(
-            ("a", Int32, [1, 2, 3]),
-            ("b", Utf8, ["x", "y", "z"]),
-            ("c", Float64, [1.0, 2.0, 3.0])
-        )
-        .unwrap(),
-    ];
-
-    // Case 1: Reordering [b, a]
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![
-            (col("b", &schema).unwrap(), "b".to_string()),
-            (col("a", &schema).unwrap(), "a".to_string()),
-        ],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("a", 1)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-r#"  - SortExec: TopK(fetch=2), expr=[a@1 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[b@1 as b, a@0 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-r#"  - SortExec: TopK(fetch=2), expr=[a@1 ASC], preserve_partitioning=[false], filter=[a@1 IS NULL OR a@1 < 2]
-  -   ProjectionExec: expr=[b@1 as b, a@0 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 IS NULL OR a@0 < 2 ]"#.to_string()]
-    })
-    .await;
-
-    // Case 2: Pruning [a]
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![(col("a", &schema).unwrap(), "a".to_string())],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("a", 0)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[a@0 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false], filter=[a@0 IS NULL OR a@0 < 2]
-  -   ProjectionExec: expr=[a@0 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 IS NULL OR a@0 < 2 ]"#.to_string(),
-        ],
-    })
-    .await;
-
-    // Case 3: Identity [a, b]
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![
-            (col("a", &schema).unwrap(), "a".to_string()),
-            (col("b", &schema).unwrap(), "b".to_string()),
-        ],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("a", 0)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[a@0 as a, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false], filter=[a@0 IS NULL OR a@0 < 2]
-  -   ProjectionExec: expr=[a@0 as a, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 IS NULL OR a@0 < 2 ]"#.to_string(),
-        ],
-    })
-    .await;
-
-    // Case 4: Expressions [a + 1, b]
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![
-            (
-                Arc::new(BinaryExpr::new(
-                    col("a", &schema).unwrap(),
-                    Operator::Plus,
-                    Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
-                )),
-                "a_plus_1".to_string(),
-            ),
-            (col("b", &schema).unwrap(), "b".to_string()),
-        ],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("a_plus_1", 0)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-            r#"  - SortExec: TopK(fetch=2), expr=[a_plus_1@0 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[a@0 + 1 as a_plus_1, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-            r#"  - SortExec: TopK(fetch=2), expr=[a_plus_1@0 ASC], preserve_partitioning=[false], filter=[a_plus_1@0 IS NULL OR a_plus_1@0 < 3]
-  -   ProjectionExec: expr=[a@0 + 1 as a_plus_1, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 + 1 IS NULL OR a@0 + 1 < 3 ]"#.to_string(),
-        ],
-    })
-    .await;
-
-    // Case 5: [a as b, b as a] (swapped columns)
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![
-            (col("a", &schema).unwrap(), "b".to_string()),
-            (col("b", &schema).unwrap(), "a".to_string()),
-        ],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("b", 0)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-            r#"  - SortExec: TopK(fetch=2), expr=[b@0 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[a@0 as b, b@1 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-            r#"  - SortExec: TopK(fetch=2), expr=[b@0 ASC], preserve_partitioning=[false], filter=[b@0 IS NULL OR b@0 < 2]
-  -   ProjectionExec: expr=[a@0 as b, b@1 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 IS NULL OR a@0 < 2 ]"#.to_string(),
-        ],
-    })
-    .await;
-
-    // Case 6: Confusing expr [a + 1 as a, b]
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![
-            (
-                Arc::new(BinaryExpr::new(
-                    col("a", &schema).unwrap(),
-                    Operator::Plus,
-                    Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
-                )),
-                "a".to_string(),
-            ),
-            (col("b", &schema).unwrap(), "b".to_string()),
-        ],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("a", 0)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[a@0 + 1 as a, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false], filter=[a@0 IS NULL OR a@0 < 3]
-  -   ProjectionExec: expr=[a@0 + 1 as a, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 + 1 IS NULL OR a@0 + 1 < 3 ]"#.to_string(),
-        ],
-    })
-    .await;
-}
+// test_topk_with_projection_transformation_on_dyn_filter has been ported
+// to datafusion/sqllogictest/test_files/push_down_filter_parquet.slt; see
+// `topk_proj` fixture for the 4 representative cases (reorder, prune,
+// expression, alias shadowing). The `run_projection_dyn_filter_case`
+// harness was removed along with it.
 
 /// Returns a predicate that is a binary expression col = lit
 fn col_lit_predicate(
@@ -2693,382 +1754,12 @@ fn col_lit_predicate(
 }
 
 // ==== Aggregate Dynamic Filter tests ====
-
-// ---- Test Utilities ----
-struct AggregateDynFilterCase<'a> {
-    schema: SchemaRef,
-    batches: Vec<RecordBatch>,
-    aggr_exprs: Vec<AggregateFunctionExpr>,
-    expected_before: Option<&'a str>,
-    expected_after: Option<&'a str>,
-    scan_support: bool,
-}
-
-async fn run_aggregate_dyn_filter_case(case: AggregateDynFilterCase<'_>) {
-    let AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs,
-        expected_before,
-        expected_after,
-        scan_support,
-    } = case;
-
-    let scan = TestScanBuilder::new(Arc::clone(&schema))
-        .with_support(scan_support)
-        .with_batches(batches)
-        .build();
-
-    let aggr_exprs: Vec<_> = aggr_exprs
-        .into_iter()
-        .map(|expr| Arc::new(expr) as Arc<AggregateFunctionExpr>)
-        .collect();
-    let aggr_len = aggr_exprs.len();
-
-    let plan: Arc<dyn ExecutionPlan> = Arc::new(
-        AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::new_single(vec![]),
-            aggr_exprs,
-            vec![None; aggr_len],
-            scan,
-            Arc::clone(&schema),
-        )
-        .unwrap(),
-    );
-
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-
-    let optimized = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-
-    let before = format_plan_for_test(&optimized);
-    if let Some(expected) = expected_before {
-        assert!(
-            before.contains(expected),
-            "expected `{expected}` before execution, got: {before}"
-        );
-    } else {
-        assert!(
-            !before.contains("DynamicFilter ["),
-            "dynamic filter unexpectedly present before execution: {before}"
-        );
-    }
-
-    let session_ctx = SessionContext::new();
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let task_ctx = session_ctx.state().task_ctx();
-    let mut stream = optimized.execute(0, Arc::clone(&task_ctx)).unwrap();
-    let _ = stream.next().await.transpose().unwrap();
-
-    let after = format_plan_for_test(&optimized);
-    if let Some(expected) = expected_after {
-        assert!(
-            after.contains(expected),
-            "expected `{expected}` after execution, got: {after}"
-        );
-    } else {
-        assert!(
-            !after.contains("DynamicFilter ["),
-            "dynamic filter unexpectedly present after execution: {after}"
-        );
-    }
-}
-
-// ---- Test Cases ----
-// Cases covered below:
-// 1. `min(a)` and `max(a)` baseline.
-// 2. Unsupported expression input (`min(a+1)`).
-// 3. Multiple supported columns (same column vs different columns).
-// 4. Mixed supported + unsupported aggregates.
-// 5. Entirely NULL input to surface current bound behavior.
-// 6. End-to-end tests on parquet files
-
-/// `MIN(a)`: able to pushdown dynamic filter
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_min_simple() {
-    // Single min(a) showcases the base case.
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
-    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
-
-    let min_expr =
-        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("min_a")
-            .build()
-            .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        expected_after: Some("DynamicFilter [ a@0 < 1 ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// `MAX(a)`: able to pushdown dynamic filter
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_max_simple() {
-    // Single max(a) mirrors the base case on the upper bound.
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
-    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
-
-    let max_expr =
-        AggregateExprBuilder::new(max_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("max_a")
-            .build()
-            .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![max_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        expected_after: Some("DynamicFilter [ a@0 > 8 ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// `MIN(a+1)`: Can't pushdown dynamic filter
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_min_expression_not_supported() {
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
-    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
-
-    let expr: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
-        col("a", &schema).unwrap(),
-        Operator::Plus,
-        Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
-    ));
-    let min_expr = AggregateExprBuilder::new(min_udaf(), vec![expr])
-        .schema(Arc::clone(&schema))
-        .alias("min_a_plus_one")
-        .build()
-        .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_expr],
-        expected_before: None,
-        expected_after: None,
-        scan_support: true,
-    })
-    .await;
-}
-
-/// `MIN(a), MAX(a)`: Pushdown dynamic filter like `(a<1) or (a>8)`
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_min_max_same_column() {
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
-    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
-
-    let min_expr =
-        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("min_a")
-            .build()
-            .unwrap();
-    let max_expr =
-        AggregateExprBuilder::new(max_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("max_a")
-            .build()
-            .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_expr, max_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        expected_after: Some("DynamicFilter [ a@0 < 1 OR a@0 > 8 ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// `MIN(a), MAX(b)`: Pushdown dynamic filter like `(a<1) or (b>9)`
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_min_max_different_columns() {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Int32, true),
-        Field::new("b", DataType::Int32, true),
-    ]));
-    let batches = vec![
-        record_batch!(("a", Int32, [5, 1, 3, 8]), ("b", Int32, [7, 2, 4, 9])).unwrap(),
-    ];
-
-    let min_expr =
-        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("min_a")
-            .build()
-            .unwrap();
-    let max_expr =
-        AggregateExprBuilder::new(max_udaf(), vec![col("b", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("max_b")
-            .build()
-            .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_expr, max_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        expected_after: Some("DynamicFilter [ a@0 < 1 OR b@1 > 9 ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// Mix of supported/unsupported aggregates retains only the valid ones.
-/// `MIN(a), MAX(a), MAX(b), MIN(c+1)`: Pushdown dynamic filter like `(a<1) or (a>8) OR (b>12)`
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_multiple_mixed_expressions() {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Int32, true),
-        Field::new("b", DataType::Int32, true),
-        Field::new("c", DataType::Int32, true),
-    ]));
-    let batches = vec![
-        record_batch!(
-            ("a", Int32, [5, 1, 3, 8]),
-            ("b", Int32, [10, 4, 6, 12]),
-            ("c", Int32, [100, 70, 90, 110])
-        )
-        .unwrap(),
-    ];
-
-    let min_a = AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
-        .schema(Arc::clone(&schema))
-        .alias("min_a")
-        .build()
-        .unwrap();
-    let max_a = AggregateExprBuilder::new(max_udaf(), vec![col("a", &schema).unwrap()])
-        .schema(Arc::clone(&schema))
-        .alias("max_a")
-        .build()
-        .unwrap();
-    let max_b = AggregateExprBuilder::new(max_udaf(), vec![col("b", &schema).unwrap()])
-        .schema(Arc::clone(&schema))
-        .alias("max_b")
-        .build()
-        .unwrap();
-    let expr_c: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
-        col("c", &schema).unwrap(),
-        Operator::Plus,
-        Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
-    ));
-    let min_c_expr = AggregateExprBuilder::new(min_udaf(), vec![expr_c])
-        .schema(Arc::clone(&schema))
-        .alias("min_c_plus_one")
-        .build()
-        .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_a, max_a, max_b, min_c_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        expected_after: Some("DynamicFilter [ a@0 < 1 OR a@0 > 8 OR b@1 > 12 ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// Don't tighten the dynamic filter if all inputs are null
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_min_all_nulls() {
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
-    let batches = vec![record_batch!(("a", Int32, [None, None, None, None])).unwrap()];
-
-    let min_expr =
-        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("min_a")
-            .build()
-            .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        // After reading the input it hasn't a meaningful bound to update, so the
-        // predicate `true` means don't filter out anything
-        expected_after: Some("DynamicFilter [ true ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// Test aggregate dynamic filter is working when reading parquet files
-///
-/// Runs 'select max(id) from test_table where id > 1', and ensure some file ranges
-/// pruned by the dynamic filter.
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_parquet_e2e() {
-    let config = SessionConfig::new()
-        .with_collect_statistics(true)
-        .with_target_partitions(2)
-        .set_bool("datafusion.optimizer.enable_dynamic_filter_pushdown", true)
-        .set_bool("datafusion.execution.parquet.pushdown_filters", true);
-    let ctx = SessionContext::new_with_config(config);
-
-    let data_path = format!(
-        "{}/tests/data/test_statistics_per_partition/",
-        env!("CARGO_MANIFEST_DIR")
-    );
-
-    ctx.register_parquet("test_table", &data_path, ParquetReadOptions::default())
-        .await
-        .unwrap();
-
-    // partition 1:
-    //   files: ..03-01(id=4), ..03-02(id=3)
-    // partition 1:
-    //   files: ..03-03(id=2), ..03-04(id=1)
-    //
-    // In partition 1, after reading the first file, the dynamic filter will be update
-    // to "id > 4", so the `..03-02` file must be able to get pruned out
-    let df = ctx
-        .sql("explain analyze select max(id) from test_table where id > 1")
-        .await
-        .unwrap();
-
-    let result = df.collect().await.unwrap();
-
-    let formatted = pretty_format_batches(&result).unwrap();
-    let explain_analyze = format!("{formatted}");
-
-    // Capture "2" from "files_ranges_pruned_statistics=4 total → 2 matched"
-    let re = Regex::new(
-        r"files_ranges_pruned_statistics\s*=\s*(\d+)\s*total\s*[→>\-]\s*(\d+)\s*matched",
-    )
-    .unwrap();
-
-    if let Some(caps) = re.captures(&explain_analyze) {
-        let matched_num: i32 = caps[2].parse().unwrap();
-        assert!(
-            matched_num < 4,
-            "Total 4 files, if some pruned, the matched count is < 4"
-        );
-    } else {
-        unreachable!("metrics should exist")
-    }
-}
+//
+// The end-to-end min/max dynamic filter cases (simple/min/max/mixed/all-nulls)
+// have been ported to
+// `datafusion/sqllogictest/test_files/push_down_filter_regression.slt`.
+// The `run_aggregate_dyn_filter_case` harness used to drive them was removed
+// along with the test functions.
 
 /// Non-partial (Single) aggregates should skip dynamic filter initialization.
 #[test]
@@ -3115,139 +1806,6 @@ fn test_aggregate_dynamic_filter_not_created_for_single_mode() {
     );
 }
 
-#[tokio::test]
-async fn test_aggregate_filter_pushdown() {
-    // Test that filters can pass through AggregateExec even with aggregate functions
-    // when the filter references grouping columns
-    // Simulates: SELECT a, COUNT(b) FROM table WHERE a = 'x' GROUP BY a
-
-    let batches = vec![
-        record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
-    ];
-
-    let scan = TestScanBuilder::new(schema())
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-
-    // Create an aggregate: GROUP BY a with COUNT(b)
-    let group_by = PhysicalGroupBy::new_single(vec![(
-        col("a", &schema()).unwrap(),
-        "a".to_string(),
-    )]);
-
-    // Add COUNT aggregate
-    let count_expr =
-        AggregateExprBuilder::new(count_udaf(), vec![col("b", &schema()).unwrap()])
-            .schema(schema())
-            .alias("count")
-            .build()
-            .unwrap();
-
-    let aggregate = Arc::new(
-        AggregateExec::try_new(
-            AggregateMode::Partial,
-            group_by,
-            vec![count_expr.into()], // Has aggregate function
-            vec![None],              // No filter on the aggregate function
-            Arc::clone(&scan),
-            schema(),
-        )
-        .unwrap(),
-    );
-
-    // Add a filter on the grouping column 'a'
-    let predicate = col_lit_predicate("a", "x", &schema());
-    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap())
-        as Arc<dyn ExecutionPlan>;
-
-    // Even with aggregate functions, filter on grouping column should be pushed through
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - FilterExec: a@0 = x
-        -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
-        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count], ordering_mode=Sorted
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = x
-    "
-    );
-}
-
-#[tokio::test]
-async fn test_no_pushdown_filter_on_aggregate_result() {
-    // Test that filters on aggregate results (not grouping columns) are NOT pushed through
-    // SELECT a, COUNT(b) as cnt FROM table GROUP BY a HAVING cnt > 5
-    // The filter on 'cnt' cannot be pushed down because it's an aggregate result
-
-    let batches = vec![
-        record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
-    ];
-
-    let scan = TestScanBuilder::new(schema())
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-
-    // Create an aggregate: GROUP BY a with COUNT(b)
-    let group_by = PhysicalGroupBy::new_single(vec![(
-        col("a", &schema()).unwrap(),
-        "a".to_string(),
-    )]);
-
-    // Add COUNT aggregate
-    let count_expr =
-        AggregateExprBuilder::new(count_udaf(), vec![col("b", &schema()).unwrap()])
-            .schema(schema())
-            .alias("count")
-            .build()
-            .unwrap();
-
-    let aggregate = Arc::new(
-        AggregateExec::try_new(
-            AggregateMode::Partial,
-            group_by,
-            vec![count_expr.into()],
-            vec![None],
-            Arc::clone(&scan),
-            schema(),
-        )
-        .unwrap(),
-    );
-
-    // Add a filter on the aggregate output column
-    // This simulates filtering on COUNT result, which should NOT be pushed through
-    let agg_schema = aggregate.schema();
-    let predicate = Arc::new(BinaryExpr::new(
-        Arc::new(Column::new_with_schema("count[count]", &agg_schema).unwrap()),
-        Operator::Gt,
-        Arc::new(Literal::new(ScalarValue::Int64(Some(5)))),
-    ));
-    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap())
-        as Arc<dyn ExecutionPlan>;
-
-    // The filter should NOT be pushed through the aggregate since it's on an aggregate result
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - FilterExec: count[count]@1 > 5
-        -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
-        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - FilterExec: count[count]@1 > 5
-          -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    "
-    );
-}
-
 #[test]
 fn test_pushdown_filter_on_non_first_grouping_column() {
     // Test that filters on non-first grouping columns are still pushed down
@@ -3628,209 +2186,61 @@ fn test_pushdown_through_aggregate_grouping_sets_with_reordered_input() {
             aggregate_expr,
             vec![None],
             projection,
-            reordered_schema,
-        )
-        .unwrap(),
-    );
-
-    let agg_output_schema = aggregate.schema();
-
-    // Filter on b (present in all grouping sets) should be pushed down
-    let predicate = col_lit_predicate("b", "bar", &agg_output_schema);
-    let plan = Arc::new(FilterExec::try_new(predicate, aggregate.clone()).unwrap());
-
-    insta::assert_snapshot!(
-        OptimizationTest::new(plan, FilterPushdown::new(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - FilterExec: b@1 = bar
-        -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
-        -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
-        -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt], ordering_mode=PartiallySorted([1])
-          -   ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=b@1 = bar
-    "
-    );
-
-    // Filter on a (missing from second grouping set) should not be pushed down
-    let predicate = col_lit_predicate("a", "foo", &agg_output_schema);
-    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap());
-
-    insta::assert_snapshot!(
-        OptimizationTest::new(plan, FilterPushdown::new(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - FilterExec: a@0 = foo
-        -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
-        -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
-        -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - FilterExec: a@0 = foo
-          -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
-          -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
-          -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    "
-    );
-}
-
-/// Regression test for https://github.com/apache/datafusion/issues/21065.
-///
-/// Given a plan similar to the following, ensure that the filter is pushed down
-/// through an AggregateExec whose input columns are reordered by a ProjectionExec.
-#[tokio::test]
-async fn test_hashjoin_dynamic_filter_pushdown_through_aggregate_with_reordered_input() {
-    // Build side
-    let build_batches = vec![record_batch!(("a", Utf8, ["h1", "h2"])).unwrap()];
-    let build_schema =
-        Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Probe side
-    let probe_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["h1", "h2", "h3", "h4"]),
-            ("value", Float64, [1.0, 2.0, 3.0, 4.0])
-        )
-        .unwrap(),
-    ];
-    let probe_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("value", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // ProjectionExec reorders (a, value) → (value, a)
-    let reordered_schema = Arc::new(Schema::new(vec![
-        Field::new("value", DataType::Float64, false),
-        Field::new("a", DataType::Utf8, false),
-    ]));
-    let projection = Arc::new(
-        ProjectionExec::try_new(
-            vec![
-                (col("value", &probe_schema).unwrap(), "value".to_string()),
-                (col("a", &probe_schema).unwrap(), "a".to_string()),
-            ],
-            probe_scan,
-        )
-        .unwrap(),
-    );
-
-    // AggregateExec: GROUP BY a@1, min(value@0)
-    let aggregate_expr = vec![
-        AggregateExprBuilder::new(
-            min_udaf(),
-            vec![col("value", &reordered_schema).unwrap()],
-        )
-        .schema(reordered_schema.clone())
-        .alias("min_value")
-        .build()
-        .map(Arc::new)
-        .unwrap(),
-    ];
-    let group_by = PhysicalGroupBy::new_single(vec![(
-        col("a", &reordered_schema).unwrap(), // a@1 in input
-        "a".to_string(),
-    )]);
-
-    let aggregate = Arc::new(
-        AggregateExec::try_new(
-            AggregateMode::Single,
-            group_by,
-            aggregate_expr,
-            vec![None],
-            projection,
-            reordered_schema,
-        )
-        .unwrap(),
-    );
-
-    // Aggregate output schema: (a@0, min_value@1)
-    let agg_output_schema = aggregate.schema();
-
-    // Join the build and probe side
-    let plan = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            aggregate,
-            vec![(
-                col("a", &build_schema).unwrap(),
-                col("a", &agg_output_schema).unwrap(),
-            )],
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
+            reordered_schema,
         )
         .unwrap(),
-    ) as Arc<dyn ExecutionPlan>;
+    );
+
+    let agg_output_schema = aggregate.schema();
+
+    // Filter on b (present in all grouping sets) should be pushed down
+    let predicate = col_lit_predicate("b", "bar", &agg_output_schema);
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate.clone()).unwrap());
 
-    // The HashJoin's dynamic filter on `a` should push
-    // through the aggregate and reach the probe-side DataSource.
     insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
         @r"
     OptimizationTest:
       input:
-        - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a], file_type=test, pushdown_supported=true
-        -   AggregateExec: mode=Single, gby=[a@1 as a], aggr=[min_value]
-        -     ProjectionExec: expr=[value@1 as value, a@0 as a]
-        -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, value], file_type=test, pushdown_supported=true
+        - FilterExec: b@1 = bar
+        -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
+        -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
+        -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
       output:
         Ok:
-          - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a], file_type=test, pushdown_supported=true
-          -   AggregateExec: mode=Single, gby=[a@1 as a], aggr=[min_value]
-          -     ProjectionExec: expr=[value@1 as value, a@0 as a]
-          -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, value], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
+          - AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt], ordering_mode=PartiallySorted([1])
+          -   ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
+          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=b@1 = bar
     "
     );
 
-    // Actually execute the plan to verify the dynamic filter is populated
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-
-    let session_config = SessionConfig::new().with_batch_size(10);
-    let session_ctx = SessionContext::new_with_config(session_config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    stream.next().await.unwrap().unwrap();
+    // Filter on a (missing from second grouping set) should not be pushed down
+    let predicate = col_lit_predicate("a", "foo", &agg_output_schema);
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap());
 
-    // After execution, the dynamic filter should be populated with values
     insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
         @r"
-    - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a], file_type=test, pushdown_supported=true
-    -   AggregateExec: mode=Single, gby=[a@1 as a], aggr=[min_value]
-    -     ProjectionExec: expr=[value@1 as value, a@0 as a]
-    -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, value], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= h1 AND a@0 <= h2 AND a@0 IN (SET) ([h1, h2]) ]
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = foo
+        -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
+        -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
+        -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - FilterExec: a@0 = foo
+          -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
+          -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
+          -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
     "
     );
 }
 
+/// Regression test for https://github.com/apache/datafusion/issues/21065.
+///
+/// Given a plan similar to the following, ensure that the filter is pushed down
+/// through an AggregateExec whose input columns are reordered by a ProjectionExec.
 #[test]
 fn test_pushdown_with_computed_grouping_key() {
     // Test filter pushdown with computed grouping expression
@@ -3892,6 +2302,11 @@ fn test_pushdown_with_computed_grouping_key() {
     );
 }
 
+// Not portable to sqllogictest: in CollectLeft (the mode SQL picks for small
+// data), an empty build side short-circuits the HashJoin and the probe scan
+// is never executed, so its dynamic filter stays at `[ empty ]` rather than
+// collapsing to `[ false ]`. The Rust test uses PartitionMode::Partitioned
+// on a hand-wired plan, which does trigger the `false` path.
 #[tokio::test]
 async fn test_hashjoin_dynamic_filter_all_partitions_empty() {
     use datafusion_common::JoinType;
@@ -4024,145 +2439,9 @@ async fn test_hashjoin_dynamic_filter_all_partitions_empty() {
     );
 }
 
-#[tokio::test]
-async fn test_hashjoin_dynamic_filter_with_nulls() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Test scenario where build side has NULL values in join keys
-    // This validates NULL handling in bounds computation and filter generation
-
-    // Create build side with NULL values
-    let build_batch = RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, true),  // nullable
-            Field::new("b", DataType::Int32, true), // nullable
-        ])),
-        vec![
-            Arc::new(StringArray::from(vec![Some("aa"), None, Some("ab")])),
-            Arc::new(Int32Array::from(vec![Some(1), Some(2), None])),
-        ],
-    )
-    .unwrap();
-    let build_batches = vec![build_batch];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, true),
-        Field::new("b", DataType::Int32, true),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with nullable fields
-    let probe_batch = RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, true),
-            Field::new("b", DataType::Int32, true),
-            Field::new("c", DataType::Float64, false),
-        ])),
-        vec![
-            Arc::new(StringArray::from(vec![
-                Some("aa"),
-                Some("ab"),
-                Some("ac"),
-                None,
-            ])),
-            Arc::new(Int32Array::from(vec![Some(1), Some(3), Some(4), Some(5)])),
-            Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
-        ],
-    )
-    .unwrap();
-    let probe_batches = vec![probe_batch];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, true),
-        Field::new("b", DataType::Int32, true),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec in CollectLeft mode (simpler for this test)
-    let on = vec![
-        (
-            col("a", &build_side_schema).unwrap(),
-            col("a", &probe_side_schema).unwrap(),
-        ),
-        (
-            col("b", &build_side_schema).unwrap(),
-            col("b", &probe_side_schema).unwrap(),
-        ),
-    ];
-    let plan = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            Arc::clone(&probe_scan),
-            on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    );
-
-    // Apply the filter pushdown optimizer
-    let mut config = SessionConfig::new();
-    config.options_mut().execution.parquet.pushdown_filters = true;
-    let optimizer = FilterPushdown::new_post_optimization();
-    let plan = optimizer.optimize(plan, config.options()).unwrap();
-
-    insta::assert_snapshot!(
-        format_plan_for_test(&plan),
-        @r"
-    - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    "
-    );
-
-    // Put some data through the plan to check that the filter is updated to reflect the TopK state
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    // Execute all partitions (required for partitioned hash join coordination)
-    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
-        .await
-        .unwrap();
-
-    // Test that filters are pushed down correctly to each side of the join
-    insta::assert_snapshot!(
-        format_plan_for_test(&plan),
-        @r"
-    - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= 1 AND b@1 <= 2 AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:1}, {c0:,c1:2}, {c0:ab,c1:}]) ]
-    "
-    );
-
-    #[rustfmt::skip]
-    let expected = [
-        "+----+---+----+---+-----+",
-        "| a  | b | a  | b | c   |",
-        "+----+---+----+---+-----+",
-        "| aa | 1 | aa | 1 | 1.0 |",
-        "+----+---+----+---+-----+",
-    ];
-    assert_batches_eq!(&expected, &batches);
-}
-
-/// Test that when hash_join_inlist_pushdown_max_size is set to a very small value,
-/// the HashTable strategy is used instead of InList strategy, even with small build sides.
-/// This test is identical to test_hashjoin_dynamic_filter_pushdown_partitioned except
-/// for the config setting that forces the HashTable strategy.
+// Not portable to sqllogictest: same reason as
+// test_hashjoin_dynamic_filter_pushdown_partitioned — hand-wires
+// PartitionMode::Partitioned, which SQL never picks for small parquet inputs.
 #[tokio::test]
 async fn test_hashjoin_hash_table_pushdown_partitioned() {
     use datafusion_common::JoinType;
@@ -4325,10 +2604,10 @@ async fn test_hashjoin_hash_table_pushdown_partitioned() {
     );
 }
 
-/// Test that when hash_join_inlist_pushdown_max_size is set to a very small value,
-/// the HashTable strategy is used instead of InList strategy in CollectLeft mode.
-/// This test is identical to test_hashjoin_dynamic_filter_pushdown_collect_left except
-/// for the config setting that forces the HashTable strategy.
+// Ported to push_down_filter_parquet.slt (`hl_build`/`hl_probe` fixture).
+// Rust version retained only because the slt port cannot hand-wire the
+// RepartitionExec-above-probe shape this test uses; the hash_lookup vs
+// IN (SET) invariant is captured in the slt port.
 #[tokio::test]
 async fn test_hashjoin_hash_table_pushdown_collect_left() {
     use datafusion_common::JoinType;
@@ -4397,145 +2676,10 @@ async fn test_hashjoin_hash_table_pushdown_collect_left() {
             col("b", &probe_side_schema).unwrap(),
         ),
     ];
-    let hash_join = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            probe_repartition,
-            on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    );
-
-    // Top-level CoalescePartitionsExec
-    let cp = Arc::new(CoalescePartitionsExec::new(hash_join)) as Arc<dyn ExecutionPlan>;
-    // Add a sort for deterministic output
-    let plan = Arc::new(SortExec::new(
-        LexOrdering::new(vec![PhysicalSortExpr::new(
-            col("a", &probe_side_schema).unwrap(),
-            SortOptions::new(true, false), // descending, nulls_first
-        )])
-        .unwrap(),
-        cp,
-    )) as Arc<dyn ExecutionPlan>;
-
-    // Apply the optimization with config setting that forces HashTable strategy
-    let session_config = SessionConfig::default()
-        .with_batch_size(10)
-        .set_usize("datafusion.optimizer.hash_join_inlist_pushdown_max_size", 1)
-        .set_bool("datafusion.execution.parquet.pushdown_filters", true)
-        .set_bool("datafusion.optimizer.enable_dynamic_filter_pushdown", true);
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, session_config.options())
-        .unwrap();
-    let session_ctx = SessionContext::new_with_config(session_config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
-        .await
-        .unwrap();
-
-    // Verify that hash_lookup is used instead of IN (SET)
-    let plan_str = format_plan_for_test(&plan).to_string();
-    assert!(
-        plan_str.contains("hash_lookup"),
-        "Expected hash_lookup in plan but got: {plan_str}"
-    );
-    assert!(
-        !plan_str.contains("IN (SET)"),
-        "Expected no IN (SET) in plan but got: {plan_str}"
-    );
-
-    let result = format!("{}", pretty_format_batches(&batches).unwrap());
-
-    let probe_scan_metrics = probe_scan.metrics().unwrap();
-
-    // The probe side had 4 rows, but after applying the dynamic filter only 2 rows should remain.
-    assert_eq!(probe_scan_metrics.output_rows().unwrap(), 2);
-
-    // Results should be identical to the InList version
-    insta::assert_snapshot!(
-        result,
-        @r"
-    +----+----+-----+----+----+-----+
-    | a  | b  | c   | a  | b  | e   |
-    +----+----+-----+----+----+-----+
-    | ab | bb | 2.0 | ab | bb | 2.0 |
-    | aa | ba | 1.0 | aa | ba | 1.0 |
-    +----+----+-----+----+----+-----+
-    ",
-    );
-}
-
-/// Test HashTable strategy with integer multi-column join keys.
-/// Verifies that hash_lookup works correctly with integer data types.
-#[tokio::test]
-async fn test_hashjoin_hash_table_pushdown_integer_keys() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with integer keys
-    let build_batches = vec![
-        record_batch!(
-            ("id1", Int32, [1, 2]),
-            ("id2", Int32, [10, 20]),
-            ("value", Float64, [100.0, 200.0])
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("id1", DataType::Int32, false),
-        Field::new("id2", DataType::Int32, false),
-        Field::new("value", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more integer rows
-    let probe_batches = vec![
-        record_batch!(
-            ("id1", Int32, [1, 2, 3, 4]),
-            ("id2", Int32, [10, 20, 30, 40]),
-            ("data", Utf8, ["a", "b", "c", "d"])
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("id1", DataType::Int32, false),
-        Field::new("id2", DataType::Int32, false),
-        Field::new("data", DataType::Utf8, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create join on multiple integer columns
-    let on = vec![
-        (
-            col("id1", &build_side_schema).unwrap(),
-            col("id1", &probe_side_schema).unwrap(),
-        ),
-        (
-            col("id2", &build_side_schema).unwrap(),
-            col("id2", &probe_side_schema).unwrap(),
-        ),
-    ];
-    let plan = Arc::new(
+    let hash_join = Arc::new(
         HashJoinExec::try_new(
             build_scan,
-            Arc::clone(&probe_scan),
+            probe_repartition,
             on,
             None,
             &JoinType::Inner,
@@ -4547,7 +2691,19 @@ async fn test_hashjoin_hash_table_pushdown_integer_keys() {
         .unwrap(),
     );
 
-    // Apply optimization with forced HashTable strategy
+    // Top-level CoalescePartitionsExec
+    let cp = Arc::new(CoalescePartitionsExec::new(hash_join)) as Arc<dyn ExecutionPlan>;
+    // Add a sort for deterministic output
+    let plan = Arc::new(SortExec::new(
+        LexOrdering::new(vec![PhysicalSortExpr::new(
+            col("a", &probe_side_schema).unwrap(),
+            SortOptions::new(true, false), // descending, nulls_first
+        )])
+        .unwrap(),
+        cp,
+    )) as Arc<dyn ExecutionPlan>;
+
+    // Apply the optimization with config setting that forces HashTable strategy
     let session_config = SessionConfig::default()
         .with_batch_size(10)
         .set_usize("datafusion.optimizer.hash_join_inlist_pushdown_max_size", 1)
@@ -4567,7 +2723,7 @@ async fn test_hashjoin_hash_table_pushdown_integer_keys() {
         .await
         .unwrap();
 
-    // Verify hash_lookup is used
+    // Verify that hash_lookup is used instead of IN (SET)
     let plan_str = format_plan_for_test(&plan).to_string();
     assert!(
         plan_str.contains("hash_lookup"),
@@ -4581,22 +2737,30 @@ async fn test_hashjoin_hash_table_pushdown_integer_keys() {
     let result = format!("{}", pretty_format_batches(&batches).unwrap());
 
     let probe_scan_metrics = probe_scan.metrics().unwrap();
-    // Only 2 rows from probe side match the build side
+
+    // The probe side had 4 rows, but after applying the dynamic filter only 2 rows should remain.
     assert_eq!(probe_scan_metrics.output_rows().unwrap(), 2);
 
+    // Results should be identical to the InList version
     insta::assert_snapshot!(
         result,
         @r"
-    +-----+-----+-------+-----+-----+------+
-    | id1 | id2 | value | id1 | id2 | data |
-    +-----+-----+-------+-----+-----+------+
-    | 1   | 10  | 100.0 | 1   | 10  | a    |
-    | 2   | 20  | 200.0 | 2   | 20  | b    |
-    +-----+-----+-------+-----+-----+------+
+    +----+----+-----+----+----+-----+
+    | a  | b  | c   | a  | b  | e   |
+    +----+----+-----+----+----+-----+
+    | ab | bb | 2.0 | ab | bb | 2.0 |
+    | aa | ba | 1.0 | aa | ba | 1.0 |
+    +----+----+-----+----+----+-----+
     ",
     );
 }
 
+// Not portable to sqllogictest: asserts on `HashJoinExec::dynamic_filter_for_test().is_used()`
+// which is a debug-only API. The observable behavior (probe-side scan
+// receiving the dynamic filter when the data source supports it) is
+// already covered by the simpler CollectLeft port in push_down_filter_parquet.slt;
+// the with_support(false) branch has no SQL analog (parquet always supports
+// pushdown).
 #[tokio::test]
 async fn test_hashjoin_dynamic_filter_pushdown_is_used() {
     use datafusion_common::JoinType;
@@ -4685,7 +2849,13 @@ async fn test_hashjoin_dynamic_filter_pushdown_is_used() {
     }
 }
 
-/// Regression test for https://github.com/apache/datafusion/issues/20109
+/// Regression test for https://github.com/apache/datafusion/issues/20109.
+///
+/// Not portable to sqllogictest: the regression specifically targets the
+/// physical FilterPushdown rule running over *stacked* FilterExecs with
+/// projections on a MemorySourceConfig. In SQL the logical optimizer
+/// collapses the two filters before physical planning, so the stacked
+/// FilterExec shape this test exercises is unreachable.
 #[tokio::test]
 async fn test_filter_with_projection_pushdown() {
     use arrow::array::{Int64Array, RecordBatch, StringArray};
@@ -4772,7 +2942,11 @@ async fn test_filter_with_projection_pushdown() {
     assert_batches_eq!(expected, &result);
 }
 
-/// Test that ExecutionPlan::apply_expressions() can discover dynamic filters across the plan tree
+/// Test that ExecutionPlan::apply_expressions() can discover dynamic filters across the plan tree.
+///
+/// Not portable to sqllogictest: asserts by walking the plan tree with
+/// `apply_expressions` + `downcast_ref::<DynamicFilterPhysicalExpr>` and
+/// counting nodes. Neither API is observable from SQL.
 #[tokio::test]
 async fn test_discover_dynamic_filters_via_expressions_api() {
     use datafusion_common::JoinType;
@@ -4872,277 +3046,3 @@ async fn test_discover_dynamic_filters_via_expressions_api() {
         "After optimization, should discover exactly 2 dynamic filters (1 in HashJoinExec, 1 in DataSourceExec), found {count_after}"
     );
 }
-
-#[tokio::test]
-async fn test_hashjoin_dynamic_filter_pushdown_left_join() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with limited values
-    let build_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["ba", "bb"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more values (some won't match)
-    let probe_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
-            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
-            ("e", Float64, [1.0, 2.0, 3.0, 4.0])
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("e", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec with Left join and CollectLeft mode
-    let on = vec![
-        (
-            col("a", &build_side_schema).unwrap(),
-            col("a", &probe_side_schema).unwrap(),
-        ),
-        (
-            col("b", &build_side_schema).unwrap(),
-            col("b", &probe_side_schema).unwrap(),
-        ),
-    ];
-    let plan = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            Arc::clone(&probe_scan),
-            on,
-            None,
-            &JoinType::Left,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // Expect the dynamic filter predicate to be pushed down into the probe side DataSource
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@0), (b@1, b@1)]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@0), (b@1, b@1)]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    ",
-    );
-
-    // Actually apply the optimization and execute the plan
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-
-    // Test that dynamic filter linking survives with_new_children
-    let children = plan.children().into_iter().map(Arc::clone).collect();
-    let plan = plan.with_new_children(children).unwrap();
-
-    let config = SessionConfig::new().with_batch_size(10);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
-        .await
-        .unwrap();
-
-    // After execution, verify the dynamic filter was populated with bounds and IN-list
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@0), (b@1, b@1)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ]
-    "
-    );
-
-    // Verify result correctness: left join preserves all build (left) rows.
-    // All build rows match probe rows here, so we get 2 matched rows.
-    // The dynamic filter pruned unmatched probe rows (ac, ad) at scan time,
-    // which is safe because those probe rows would never match any build row.
-    let result = format!("{}", pretty_format_batches(&batches).unwrap());
-    insta::assert_snapshot!(
-        result,
-        @r"
-    +----+----+-----+----+----+-----+
-    | a  | b  | c   | a  | b  | e   |
-    +----+----+-----+----+----+-----+
-    | aa | ba | 1.0 | aa | ba | 1.0 |
-    | ab | bb | 2.0 | ab | bb | 2.0 |
-    +----+----+-----+----+----+-----+
-    "
-    );
-}
-
-#[tokio::test]
-async fn test_hashjoin_dynamic_filter_pushdown_left_semi_join() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with limited values
-    let build_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["ba", "bb"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more values (some won't match)
-    let probe_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
-            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
-            ("e", Float64, [1.0, 2.0, 3.0, 4.0])
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("e", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec with LeftSemi join and CollectLeft mode
-    let on = vec![
-        (
-            col("a", &build_side_schema).unwrap(),
-            col("a", &probe_side_schema).unwrap(),
-        ),
-        (
-            col("b", &build_side_schema).unwrap(),
-            col("b", &probe_side_schema).unwrap(),
-        ),
-    ];
-    let plan = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            Arc::clone(&probe_scan),
-            on,
-            None,
-            &JoinType::LeftSemi,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // Expect the dynamic filter predicate to be pushed down into the probe side DataSource
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(a@0, a@0), (b@1, b@1)]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(a@0, a@0), (b@1, b@1)]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    ",
-    );
-
-    // Actually apply the optimization and execute the plan
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-
-    // Test that dynamic filter linking survives with_new_children
-    let children = plan.children().into_iter().map(Arc::clone).collect();
-    let plan = plan.with_new_children(children).unwrap();
-
-    let config = SessionConfig::new().with_batch_size(10);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
-        .await
-        .unwrap();
-
-    // After execution, verify the dynamic filter was populated with bounds and IN-list
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(a@0, a@0), (b@1, b@1)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ]
-    "
-    );
-
-    // Verify result correctness: left semi join returns only build (left) rows
-    // that have at least one matching probe row. Output schema is build-side columns only.
-    let result = format!("{}", pretty_format_batches(&batches).unwrap());
-    insta::assert_snapshot!(
-        result,
-        @r"
-    +----+----+-----+
-    | a  | b  | c   |
-    +----+----+-----+
-    | aa | ba | 1.0 |
-    | ab | bb | 2.0 |
-    +----+----+-----+
-    "
-    );
-}
diff --git a/datafusion/datasource-json/src/boundary_stream.rs b/datafusion/datasource-json/src/boundary_stream.rs
index fc40feda6b80f..847c80279a53e 100644
--- a/datafusion/datasource-json/src/boundary_stream.rs
+++ b/datafusion/datasource-json/src/boundary_stream.rs
@@ -28,7 +28,7 @@ use std::task::{Context, Poll};
 use bytes::Bytes;
 use futures::stream::{BoxStream, Stream};
 use futures::{StreamExt, TryFutureExt};
-use object_store::{GetOptions, GetRange, ObjectStore};
+use object_store::{GetOptions, GetRange, GetResultPayload, ObjectStore};
 
 /// How far past `raw_end` the initial bounded fetch covers. If the terminating
 /// newline is not found within this window, `ScanningLastTerminator` issues
@@ -90,10 +90,52 @@ async fn get_stream(
     range: std::ops::Range<u64>,
 ) -> object_store::Result<BoxStream<'static, object_store::Result<Bytes>>> {
     let opts = GetOptions {
-        range: Some(GetRange::Bounded(range)),
+        range: Some(GetRange::Bounded(range.clone())),
         ..Default::default()
     };
     let result = store.get_opts(&location, opts).await?;
+
+    #[cfg(not(target_arch = "wasm32"))]
+    if let GetResultPayload::File(mut file, _path) = result.payload {
+        use std::io::{Read, Seek, SeekFrom};
+        const CHUNK_SIZE: u64 = 8 * 1024;
+
+        file.seek(SeekFrom::Start(range.start)).map_err(|e| {
+            object_store::Error::Generic {
+                store: "local",
+                source: Box::new(e),
+            }
+        })?;
+
+        return Ok(futures::stream::try_unfold(
+            (file, range.end - range.start),
+            move |(mut file, remaining)| async move {
+                if remaining == 0 {
+                    return Ok(None);
+                }
+                let to_read = remaining.min(CHUNK_SIZE);
+                let cap = usize::try_from(to_read).map_err(|e| {
+                    object_store::Error::Generic {
+                        store: "local",
+                        source: Box::new(e),
+                    }
+                })?;
+
+                let mut buf = Vec::with_capacity(cap);
+                let read =
+                    (&mut file)
+                        .take(to_read)
+                        .read_to_end(&mut buf)
+                        .map_err(|e| object_store::Error::Generic {
+                            store: "local",
+                            source: Box::new(e),
+                        })?;
+                Ok(Some((Bytes::from(buf), (file, remaining - read as u64))))
+            },
+        )
+        .boxed());
+    }
+
     Ok(result.into_stream())
 }
 
diff --git a/datafusion/functions-nested/src/array_compact.rs b/datafusion/functions-nested/src/array_compact.rs
new file mode 100644
index 0000000000000..11be494b5b20f
--- /dev/null
+++ b/datafusion/functions-nested/src/array_compact.rs
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ScalarUDFImpl`] definitions for array_compact function.
+
+use crate::utils::make_scalar_function;
+use arrow::array::{
+    Array, ArrayRef, Capacities, GenericListArray, MutableArrayData, OffsetSizeTrait,
+    make_array,
+};
+use arrow::buffer::OffsetBuffer;
+use arrow::datatypes::DataType;
+use arrow::datatypes::DataType::{LargeList, List, Null};
+use datafusion_common::cast::{as_large_list_array, as_list_array};
+use datafusion_common::{Result, exec_err, utils::take_function_args};
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    Volatility,
+};
+use datafusion_macros::user_doc;
+use std::sync::Arc;
+
+make_udf_expr_and_func!(
+    ArrayCompact,
+    array_compact,
+    array,
+    "removes null values from the array.",
+    array_compact_udf
+);
+
+#[user_doc(
+    doc_section(label = "Array Functions"),
+    description = "Removes null values from the array.",
+    syntax_example = "array_compact(array)",
+    sql_example = r#"```sql
+> select array_compact([1, NULL, 2, NULL, 3]) arr;
++-----------+
+| arr       |
++-----------+
+| [1, 2, 3] |
++-----------+
+```"#,
+    argument(
+        name = "array",
+        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
+    )
+)]
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct ArrayCompact {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl Default for ArrayCompact {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ArrayCompact {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::array(Volatility::Immutable),
+            aliases: vec!["list_compact".to_string()],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayCompact {
+    fn name(&self) -> &str {
+        "array_compact"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        Ok(arg_types[0].clone())
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        make_scalar_function(array_compact_inner)(&args.args)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
+    }
+}
+
+/// array_compact SQL function
+fn array_compact_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
+    let [input_array] = take_function_args("array_compact", arg)?;
+
+    match &input_array.data_type() {
+        List(field) => {
+            let array = as_list_array(input_array)?;
+            compact_list::<i32>(array, field)
+        }
+        LargeList(field) => {
+            let array = as_large_list_array(input_array)?;
+            compact_list::<i64>(array, field)
+        }
+        Null => Ok(Arc::clone(input_array)),
+        array_type => exec_err!("array_compact does not support type '{array_type}'."),
+    }
+}
+
+/// Remove null elements from each row of a list array.
+fn compact_list<O: OffsetSizeTrait>(
+    list_array: &GenericListArray<O>,
+    field: &Arc<arrow::datatypes::Field>,
+) -> Result<ArrayRef> {
+    let values = list_array.values();
+
+    // Fast path: no nulls in values, return input unchanged
+    if values.null_count() == 0 {
+        return Ok(Arc::new(list_array.clone()));
+    }
+
+    let original_data = values.to_data();
+    let capacity = original_data.len() - values.null_count();
+    let mut offsets = Vec::<O>::with_capacity(list_array.len() + 1);
+    offsets.push(O::zero());
+    let mut mutable = MutableArrayData::with_capacities(
+        vec![&original_data],
+        false,
+        Capacities::Array(capacity),
+    );
+
+    for row_index in 0..list_array.len() {
+        if list_array.nulls().is_some_and(|n| n.is_null(row_index)) {
+            offsets.push(offsets[row_index]);
+            continue;
+        }
+
+        let start = list_array.offsets()[row_index].as_usize();
+        let end = list_array.offsets()[row_index + 1].as_usize();
+        let mut copied = 0usize;
+
+        // Batch consecutive non-null elements into single extend() calls
+        // to reduce per-element overhead. For [1, 2, NULL, 3, 4] this
+        // produces 2 extend calls (0..2, 3..5) instead of 4 individual ones.
+        let mut batch_start: Option<usize> = None;
+        for i in start..end {
+            if values.is_null(i) {
+                // Null breaks the current batch — flush it
+                if let Some(bs) = batch_start {
+                    mutable.extend(0, bs, i);
+                    copied += i - bs;
+                    batch_start = None;
+                }
+            } else if batch_start.is_none() {
+                batch_start = Some(i);
+            }
+        }
+        // Flush any remaining batch after the loop
+        if let Some(bs) = batch_start {
+            mutable.extend(0, bs, end);
+            copied += end - bs;
+        }
+
+        offsets.push(offsets[row_index] + O::usize_as(copied));
+    }
+
+    let new_values = make_array(mutable.freeze());
+    Ok(Arc::new(GenericListArray::<O>::try_new(
+        Arc::clone(field),
+        OffsetBuffer::new(offsets.into()),
+        new_values,
+        list_array.nulls().cloned(),
+    )?))
+}
diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs
index 99b25ec96454b..99c33fb64039f 100644
--- a/datafusion/functions-nested/src/lib.rs
+++ b/datafusion/functions-nested/src/lib.rs
@@ -37,6 +37,7 @@
 #[macro_use]
 pub mod macros;
 
+pub mod array_compact;
 pub mod array_has;
 pub mod arrays_zip;
 pub mod cardinality;
@@ -77,6 +78,7 @@ use std::sync::Arc;
 
 /// Fluent-style API for creating `Expr`s
 pub mod expr_fn {
+    pub use super::array_compact::array_compact;
     pub use super::array_has::array_has;
     pub use super::array_has::array_has_all;
     pub use super::array_has::array_has_any;
@@ -128,6 +130,7 @@ pub mod expr_fn {
 /// Return all default nested type functions
 pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
     vec![
+        array_compact::array_compact_udf(),
         string::array_to_string_udf(),
         string::string_to_array_udf(),
         range::range_udf(),
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 81d5c8f91a5bc..7b6779abe486f 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -7685,6 +7685,99 @@ from array_distinct_table_2D_fixed;
 [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
 [[5, 6], NULL]
 
+## array_compact (aliases: `list_compact`)
+
+# basic: remove nulls from integer array
+query ?
+select array_compact([1, NULL, 2, NULL, 3]);
+----
+[1, 2, 3]
+
+# no nulls present
+query ?
+select array_compact([1, 2, 3]);
+----
+[1, 2, 3]
+
+# all nulls
+query ?
+select array_compact(arrow_cast([NULL, NULL, NULL], 'List(Int64)'));
+----
+[]
+
+# empty array
+query ?
+select array_compact([]);
+----
+[]
+
+# NULL input returns NULL
+query ?
+select array_compact(NULL::INT[]);
+----
+NULL
+
+# string array
+query ?
+select array_compact(['a', NULL, 'b', NULL, 'c']);
+----
+[a, b, c]
+
+# float array
+query ?
+select array_compact([1.0, NULL, 2.0, NULL]);
+----
+[1.0, 2.0]
+
+# nested array (2D)
+query ?
+select array_compact([make_array(1, 2), NULL, make_array(3, 4)]);
+----
+[[1, 2], [3, 4]]
+
+# LargeList
+query ?
+select array_compact(arrow_cast([1, NULL, 2, NULL, 3], 'LargeList(Int64)'));
+----
+[1, 2, 3]
+
+# alias list_compact
+query ?
+select list_compact([1, NULL, 2]);
+----
+[1, 2]
+
+# table-based test
+statement ok
+CREATE TABLE array_compact_table AS VALUES
+  (make_array(1, NULL, 2, NULL, 3)),
+  (make_array(NULL, NULL, NULL)),
+  (make_array(4, 5, 6)),
+  (NULL::INT[])
+;
+
+query ?
+select array_compact(column1) from array_compact_table;
+----
+[1, 2, 3]
+[]
+[4, 5, 6]
+NULL
+
+statement ok
+DROP TABLE array_compact_table;
+
+# FixedSizeList (coerced to List)
+query ?
+select array_compact(arrow_cast(make_array(1, NULL, 2, NULL, 3), 'FixedSizeList(5, Int64)'));
+----
+[1, 2, 3]
+
+query ?
+select array_compact(arrow_cast(make_array(NULL, NULL, NULL), 'FixedSizeList(3, Int64)'));
+----
+[]
+
 ## arrays_zip (aliases: `list_zip`)
 
 # Spark example: arrays_zip(array(1, 2, 3), array(2, 3, 4))
diff --git a/datafusion/sqllogictest/test_files/push_down_filter_parquet.slt b/datafusion/sqllogictest/test_files/push_down_filter_parquet.slt
index ab6847e1c4834..8469c32a17033 100644
--- a/datafusion/sqllogictest/test_files/push_down_filter_parquet.slt
+++ b/datafusion/sqllogictest/test_files/push_down_filter_parquet.slt
@@ -166,6 +166,863 @@ drop table small_table;
 statement ok
 drop table large_table;
 
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_topk_dynamic_filter_pushdown_integration
+#
+# Integration test for dynamic filter pushdown with TopK:
+# a TopK over an ordered parquet file emits a dynamic filter that should
+# get pushed into the parquet scan and prune every row group except the
+# first (128 rows), visible via `pushdown_rows_matched` / `pushdown_rows_pruned`.
+#
+# `t * t` (rather than `t`) is used as the sort expression so the
+# order-pushdown optimizer does not rewrite the TopK away.
+########
+
+statement ok
+set datafusion.execution.parquet.max_row_group_size = 128;
+
+query I
+COPY (
+  SELECT 1372708800 + value AS t
+  FROM generate_series(0, 99999)
+  ORDER BY t
+) TO 'test_files/scratch/push_down_filter_parquet/topk_pushdown.parquet'
+STORED AS PARQUET;
+----
+100000
+
+statement ok
+CREATE EXTERNAL TABLE topk_pushdown
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_pushdown.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE SELECT t FROM topk_pushdown ORDER BY t * t LIMIT 10;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=10), expr=[t@0 * t@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[t@0 * t@0 < 1884329474306198481], metrics=[output_rows=10, output_batches=1, row_replacements=10]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_pushdown.parquet]]}, projection=[t], output_ordering=[t@0 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ t@0 * t@0 < 1884329474306198481 ], metrics=[output_rows=128, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=782 total → 782 matched, row_groups_pruned_bloom_filter=782 total → 782 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=128, pushdown_rows_pruned=99.87 K, predicate_cache_inner_records=128, predicate_cache_records=128, scan_efficiency_ratio=64.87% (258.7 K/398.8 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+reset datafusion.execution.parquet.max_row_group_size;
+
+statement ok
+drop table topk_pushdown;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_topk_dynamic_filter_pushdown
+#
+# A `ORDER BY b DESC LIMIT 1` over a parquet file should emit a dynamic
+# filter on the TopK that gets pushed into the scan's predicate, visible
+# in EXPLAIN ANALYZE as `DynamicFilter [ b > <max> ]` tightened to the
+# running max seen by the TopK during execution.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'bd', 1.0),
+    ('ab', 'bc', 2.0),
+    ('ac', 'bb', 2.0),
+    ('ad', 'ba', 1.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/topk_single_col.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE topk_single_col (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_single_col.parquet';
+
+# Data-correctness check — top row by b desc is (aa, bd, 1).
+query TTR
+SELECT * FROM topk_single_col ORDER BY b DESC LIMIT 1;
+----
+aa bd 1
+
+# Before execution: a dynamic filter is emitted but still empty
+# (DynamicFilter [ empty ]) on the parquet scan.
+query TT
+EXPLAIN SELECT * FROM topk_single_col ORDER BY b DESC LIMIT 1;
+----
+physical_plan
+01)SortExec: TopK(fetch=1), expr=[b@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_single_col.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+# After execution: the dynamic filter is tightened to `b > <max_seen>`.
+query TT
+EXPLAIN ANALYZE SELECT * FROM topk_single_col ORDER BY b DESC LIMIT 1;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=1), expr=[b@1 DESC], preserve_partitioning=[false], filter=[b@1 IS NULL OR b@1 > bd], metrics=[output_rows=1, output_batches=1, row_replacements=1]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_single_col.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=DynamicFilter [ b@1 IS NULL OR b@1 > bd ], pruning_predicate=b_null_count@0 > 0 OR b_null_count@0 != row_count@2 AND b_max@1 > bd, required_guarantees=[], metrics=[output_rows=4, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=4, pushdown_rows_pruned=0, predicate_cache_inner_records=4, predicate_cache_records=4, scan_efficiency_ratio=22.37% (240/1.07 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table topk_single_col;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_topk_dynamic_filter_pushdown_multi_column_sort
+#
+# TopK with a compound sort key (b ASC NULLS LAST, a DESC). The dynamic
+# filter emitted to the scan should encode the tiebreaker as a compound
+# predicate of the form `b < <k1> OR (b = <k1> AND a > <k2>)`.
+# With fetch=2 the top-2 rows are (ad, ba) and (ac, bb), so the final
+# filter reads `b < bb OR (b = bb AND a > ac)`.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('ac', 'bb', 2.0),
+    ('ad', 'ba', 1.0),
+    ('aa', 'bc', 1.0),
+    ('ab', 'bd', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/topk_multi_col.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE topk_multi_col (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_multi_col.parquet';
+
+# Data-correctness: confirm the top-2 rows come out in the expected order.
+query TTR
+SELECT * FROM topk_multi_col ORDER BY b ASC NULLS LAST, a DESC LIMIT 2;
+----
+ad ba 1
+ac bb 2
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE SELECT * FROM topk_multi_col ORDER BY b ASC NULLS LAST, a DESC LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[b@1 ASC NULLS LAST, a@0 DESC], preserve_partitioning=[false], filter=[b@1 < bb OR b@1 = bb AND (a@0 IS NULL OR a@0 > ac)], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_multi_col.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=DynamicFilter [ b@1 < bb OR b@1 = bb AND (a@0 IS NULL OR a@0 > ac) ], pruning_predicate=b_null_count@1 != row_count@2 AND b_min@0 < bb OR b_null_count@1 != row_count@2 AND b_min@0 <= bb AND bb <= b_max@3 AND (a_null_count@4 > 0 OR a_null_count@4 != row_count@2 AND a_max@5 > ac), required_guarantees=[], metrics=[output_rows=4, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=4, pushdown_rows_pruned=0, predicate_cache_inner_records=8, predicate_cache_records=8, scan_efficiency_ratio=22.37% (240/1.07 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table topk_multi_col;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_dynamic_filter_pushdown
+#
+# CollectLeft hash join on two equi-join keys: the dynamic filter emitted
+# by the build side should cover both keys and include an `IN (SET)`
+# predicate over `struct(a, b)` for the rows it collected.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/join_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0),
+    ('ac', 'bc', 3.0),
+    ('ad', 'bd', 4.0)
+  ) AS v(a, b, e)
+) TO 'test_files/scratch/push_down_filter_parquet/join_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE join_build (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/join_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE join_probe (a VARCHAR, b VARCHAR, e DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/join_probe.parquet';
+
+# Data-correctness: only the 2 build rows should match.
+query TTRR
+SELECT p.a, p.b, build.c, p.e
+FROM join_probe p INNER JOIN join_build AS build
+  ON p.a = build.a AND p.b = build.b
+ORDER BY p.a;
+----
+aa ba 1 1
+ab bb 2 2
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE
+SELECT p.a, p.b, build.c, p.e
+FROM join_probe p INNER JOIN join_build AS build
+  ON p.a = build.a AND p.b = build.b;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)], projection=[a@3, b@4, c@2, e@5], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/join_build.parquet]]}, projection=[a, b, c], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=20.48% (214/1.04 K)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/join_probe.parquet]]}, projection=[a, b, e], file_type=parquet, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= aa AND a_null_count@1 != row_count@2 AND a_min@3 <= ab AND b_null_count@5 != row_count@2 AND b_max@4 >= ba AND b_null_count@5 != row_count@2 AND b_min@6 <= bb, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=22.78% (246/1.08 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table join_build;
+
+statement ok
+drop table join_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_nested_hashjoin_dynamic_filter_pushdown
+#
+# Nested hash joins: `t1 JOIN (t2 JOIN t3 ON t2.c = t3.d) ON t1.a = t2.b`
+# should push dynamic filters down to BOTH the t2 and t3 scans
+# (t2 gets a filter on `b`, t3 gets a filter on `d`).
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 1.0),
+    ('ab', 2.0)
+  ) AS v(a, x)
+) TO 'test_files/scratch/push_down_filter_parquet/nested_t1.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ca', 1.0),
+    ('ab', 'cb', 2.0),
+    ('ac', 'cc', 3.0),
+    ('ad', 'cd', 4.0),
+    ('ae', 'ce', 5.0)
+  ) AS v(b, c, y)
+) TO 'test_files/scratch/push_down_filter_parquet/nested_t2.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('ca', 1.0),
+    ('cb', 2.0),
+    ('cc', 3.0),
+    ('cd', 4.0),
+    ('ce', 5.0),
+    ('cf', 6.0),
+    ('cg', 7.0),
+    ('ch', 8.0)
+  ) AS v(d, z)
+) TO 'test_files/scratch/push_down_filter_parquet/nested_t3.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE nested_t1 (a VARCHAR, x DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/nested_t1.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE nested_t2 (b VARCHAR, c VARCHAR, y DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/nested_t2.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE nested_t3 (d VARCHAR, z DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/nested_t3.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE
+SELECT *
+FROM nested_t1
+INNER JOIN nested_t2 ON nested_t1.a = nested_t2.b
+INNER JOIN nested_t3 ON nested_t2.c = nested_t3.d;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@3, d@0)], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, b@0)], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/nested_t1.parquet]]}, projection=[a, x], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=18.23% (144/790)]
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/nested_t2.parquet]]}, projection=[b, c, y], file_type=parquet, predicate=DynamicFilter [ b@0 >= aa AND b@0 <= ab AND b@0 IN (SET) ([aa, ab]) ], pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 >= aa AND b_null_count@1 != row_count@2 AND b_min@3 <= ab AND (b_null_count@1 != row_count@2 AND b_min@3 <= aa AND aa <= b_max@0 OR b_null_count@1 != row_count@2 AND b_min@3 <= ab AND ab <= b_max@0), required_guarantees=[b in (aa, ab)], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=1 total → 1 matched, page_index_rows_pruned=5 total → 5 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=3, predicate_cache_inner_records=5, predicate_cache_records=2, scan_efficiency_ratio=23.2% (252/1.09 K)]
+05)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/nested_t3.parquet]]}, projection=[d, z], file_type=parquet, predicate=DynamicFilter [ d@0 >= ca AND d@0 <= cb AND hash_lookup ], pruning_predicate=d_null_count@1 != row_count@2 AND d_max@0 >= ca AND d_null_count@1 != row_count@2 AND d_min@3 <= cb, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=1 total → 1 matched, page_index_rows_pruned=8 total → 8 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=6, predicate_cache_inner_records=8, predicate_cache_records=2, scan_efficiency_ratio=22.12% (184/832)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table nested_t1;
+
+statement ok
+drop table nested_t2;
+
+statement ok
+drop table nested_t3;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_parent_filter_pushdown
+#
+# A FilterExec above a HashJoin should split: the single-side predicates
+# should be pushed down to each scan, while any cross-side predicate
+# stays above the join.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/parent_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0),
+    ('ac', 'bc', 3.0),
+    ('ad', 'bd', 4.0)
+  ) AS v(d, e, f)
+) TO 'test_files/scratch/push_down_filter_parquet/parent_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE parent_build (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/parent_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE parent_probe (d VARCHAR, e VARCHAR, f DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/parent_probe.parquet';
+
+query TT
+EXPLAIN
+SELECT *
+FROM parent_build
+INNER JOIN parent_probe ON parent_build.a = parent_probe.d
+WHERE parent_build.a = 'aa' AND parent_probe.e = 'ba';
+----
+physical_plan
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, d@0)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/parent_build.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=a@0 = aa, pruning_predicate=a_null_count@2 != row_count@3 AND a_min@0 <= aa AND aa <= a_max@1, required_guarantees=[a in (aa)]
+03)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/parent_probe.parquet]]}, projection=[d, e, f], file_type=parquet, predicate=e@1 = ba AND d@0 = aa AND DynamicFilter [ empty ], pruning_predicate=e_null_count@2 != row_count@3 AND e_min@0 <= ba AND ba <= e_max@1 AND d_null_count@6 != row_count@3 AND d_min@4 <= aa AND aa <= d_max@5, required_guarantees=[d in (aa), e in (ba)]
+
+statement ok
+drop table parent_build;
+
+statement ok
+drop table parent_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_dynamic_filter_pushdown_through_hash_join_with_topk
+#
+# Composition: a TopK sitting above an InnerJoin should push its dynamic
+# filter THROUGH the hash join and into the probe-side scan, where it
+# gets combined (AND) with the hash join's own dynamic filter. The
+# probe-side predicate should contain BOTH a `DynamicFilter [ d IN (...) ]`
+# (from the join) and a `DynamicFilter [ e < <topk_bound> ]` (from the TopK).
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/topk_join_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0),
+    ('ac', 'bc', 3.0),
+    ('ad', 'bd', 4.0)
+  ) AS v(d, e, f)
+) TO 'test_files/scratch/push_down_filter_parquet/topk_join_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE topk_join_build (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_join_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE topk_join_probe (d VARCHAR, e VARCHAR, f DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_join_probe.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE
+SELECT topk_join_probe.e
+FROM topk_join_build
+INNER JOIN topk_join_probe ON topk_join_build.a = topk_join_probe.d
+ORDER BY topk_join_probe.e ASC
+LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[e@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[e@0 < bb], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, d@0)], projection=[e@2], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_join_build.parquet]]}, projection=[a], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=6.7% (70/1.04 K)]
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_join_probe.parquet]]}, projection=[d, e], file_type=parquet, predicate=DynamicFilter [ d@0 >= aa AND d@0 <= ab AND d@0 IN (SET) ([aa, ab]) ] AND DynamicFilter [ e@1 < bb ], pruning_predicate=d_null_count@1 != row_count@2 AND d_max@0 >= aa AND d_null_count@1 != row_count@2 AND d_min@3 <= ab AND (d_null_count@1 != row_count@2 AND d_min@3 <= aa AND aa <= d_max@0 OR d_null_count@1 != row_count@2 AND d_min@3 <= ab AND ab <= d_max@0) AND e_null_count@5 != row_count@2 AND e_min@4 < bb, required_guarantees=[d in (aa, ab)], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=1 total → 1 matched, page_index_rows_pruned=4 total → 4 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=15.37% (166/1.08 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table topk_join_build;
+
+statement ok
+drop table topk_join_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_topk_with_projection_transformation_on_dyn_filter
+#
+# When a ProjectionExec sits between a TopK and the parquet scan, the
+# dynamic filter emitted by the TopK (on the projected column) must be
+# rewritten back to the original scan-side column before it reaches the
+# scan. Covers: simple `SELECT a`, column reorder (`SELECT b, a`),
+# expression projection (`SELECT a+1 AS a_plus_1`), and column alias
+# shadowing (`SELECT a+1 AS a`).
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    (1, 'x', 1.0),
+    (2, 'y', 2.0),
+    (3, 'z', 3.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/topk_proj.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE topk_proj (a INT, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_proj.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+# Case 1: reorder — `SELECT b, a` — filter on the TopK uses projected column
+# `a`, but the scan predicate must reference the original `a@0`.
+query TT
+EXPLAIN ANALYZE SELECT b, a FROM topk_proj ORDER BY a LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[a@1 ASC NULLS LAST], preserve_partitioning=[false], filter=[a@1 < 2], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_proj.parquet]]}, projection=[b, a], file_type=parquet, predicate=DynamicFilter [ a@0 < 2 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 2, required_guarantees=[], metrics=[output_rows=3, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=3, pushdown_rows_pruned=0, predicate_cache_inner_records=3, predicate_cache_records=3, scan_efficiency_ratio=13.72% (153/1.11 K)]
+
+# Case 2: prune — `SELECT a` — filter stays as `a < 2` on the scan.
+query TT
+EXPLAIN ANALYZE SELECT a FROM topk_proj ORDER BY a LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[a@0 < 2], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_proj.parquet]]}, projection=[a], file_type=parquet, predicate=DynamicFilter [ a@0 < 2 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 2, required_guarantees=[], metrics=[output_rows=3, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=3, pushdown_rows_pruned=0, predicate_cache_inner_records=3, predicate_cache_records=3, scan_efficiency_ratio=7.09% (79/1.11 K)]
+
+# Case 3: expression — `SELECT a+1 AS a_plus_1` — the TopK filter is on
+# `a_plus_1`, the scan predicate must read `a@0 + 1`.
+query TT
+EXPLAIN ANALYZE SELECT a + 1 AS a_plus_1, b FROM topk_proj ORDER BY a_plus_1 LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[a_plus_1@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[a_plus_1@0 < 3], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_proj.parquet]]}, projection=[CAST(a@0 AS Int64) + 1 as a_plus_1, b], file_type=parquet, predicate=DynamicFilter [ CAST(a@0 AS Int64) + 1 < 3 ], metrics=[output_rows=3, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=3, pushdown_rows_pruned=0, predicate_cache_inner_records=3, predicate_cache_records=3, scan_efficiency_ratio=13.72% (153/1.11 K)]
+
+# Case 4: alias shadowing — `SELECT a+1 AS a` — the projection renames
+# `a+1` to `a`, so the TopK's `a < 3` must still be rewritten to
+# `a@0 + 1 < 3` on the scan.
+query TT
+EXPLAIN ANALYZE SELECT a + 1 AS a, b FROM topk_proj ORDER BY a LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[a@0 < 3], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_proj.parquet]]}, projection=[CAST(a@0 AS Int64) + 1 as a, b], file_type=parquet, predicate=DynamicFilter [ CAST(a@0 AS Int64) + 1 < 3 ], metrics=[output_rows=3, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=3, pushdown_rows_pruned=0, predicate_cache_inner_records=3, predicate_cache_records=3, scan_efficiency_ratio=13.72% (153/1.11 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table topk_proj;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_dynamic_filter_pushdown_through_aggregate_with_reordered_input
+#
+# The HashJoin's dynamic filter on the join key should propagate down
+# through a GROUP BY aggregate and land on the underlying parquet scan.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES ('h1'), ('h2')) AS v(a)
+) TO 'test_files/scratch/push_down_filter_parquet/join_agg_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('h1', 1.0),
+    ('h2', 2.0),
+    ('h3', 3.0),
+    ('h4', 4.0)
+  ) AS v(a, value)
+) TO 'test_files/scratch/push_down_filter_parquet/join_agg_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE join_agg_build (a VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/join_agg_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE join_agg_probe (a VARCHAR, value DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/join_agg_probe.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE
+SELECT b.a, agg.min_value
+FROM join_agg_build b
+INNER JOIN (
+  SELECT a, MIN(value) AS min_value FROM join_agg_probe GROUP BY a
+) agg ON b.a = agg.a;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)], projection=[a@0, min_value@2], metrics=[output_rows=2, output_batches=2, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=2, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/join_agg_build.parquet]]}, projection=[a], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=15.32% (70/457)]
+03)--ProjectionExec: expr=[a@0 as a, min(join_agg_probe.value)@1 as min_value], metrics=[output_rows=2, output_batches=2]
+04)----AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[min(join_agg_probe.value)], metrics=[output_rows=2, output_batches=2, spill_count=0, spilled_rows=0]
+05)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1, metrics=[output_rows=2, output_batches=2, spill_count=0, spilled_rows=0]
+06)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[min(join_agg_probe.value)], metrics=[output_rows=2, output_batches=1, spill_count=0, spilled_rows=0, skipped_aggregation_rows=0, reduction_factor=100% (2/2)]
+07)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/join_agg_probe.parquet]]}, projection=[a, value], file_type=parquet, predicate=DynamicFilter [ a@0 >= h1 AND a@0 <= h2 AND a@0 IN (SET) ([h1, h2]) ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= h1 AND a_null_count@1 != row_count@2 AND a_min@3 <= h2 AND (a_null_count@1 != row_count@2 AND a_min@3 <= h1 AND h1 <= a_max@0 OR a_null_count@1 != row_count@2 AND a_min@3 <= h2 AND h2 <= a_max@0), required_guarantees=[a in (h1, h2)], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=1 total → 1 matched, page_index_rows_pruned=4 total → 4 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=4, predicate_cache_records=2, scan_efficiency_ratio=19.81% (163/823)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table join_agg_build;
+
+statement ok
+drop table join_agg_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_dynamic_filter_with_nulls
+#
+# HashJoin build side contains NULL join-key values. The dynamic filter
+# emitted to the probe scan should still correctly handle NULLs.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 1),
+    (CAST(NULL AS VARCHAR), 2),
+    ('ab', CAST(NULL AS INT))
+  ) AS v(a, b)
+) TO 'test_files/scratch/push_down_filter_parquet/nulls_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 1, 1.0),
+    ('ab', 3, 2.0),
+    ('ac', 4, 3.0),
+    (CAST(NULL AS VARCHAR), 5, 4.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/nulls_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE nulls_build (a VARCHAR, b INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/nulls_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE nulls_probe (a VARCHAR, b INT, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/nulls_probe.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE
+SELECT nulls_build.a, nulls_build.b, nulls_probe.a, nulls_probe.b, nulls_probe.c
+FROM nulls_build INNER JOIN nulls_probe
+ON nulls_build.a = nulls_probe.a AND nulls_build.b = nulls_probe.b;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)], metrics=[output_rows=1, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=3, input_batches=1, input_rows=1, avg_fanout=100% (1/1), probe_hit_rate=100% (1/1)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/nulls_build.parquet]]}, projection=[a, b], file_type=parquet, metrics=[output_rows=3, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=18.6% (144/774)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/nulls_probe.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= 1 AND b@1 <= 2 AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:1}, {c0:,c1:2}, {c0:ab,c1:}]) ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= aa AND a_null_count@1 != row_count@2 AND a_min@3 <= ab AND b_null_count@5 != row_count@2 AND b_max@4 >= 1 AND b_null_count@5 != row_count@2 AND b_min@6 <= 2, required_guarantees=[], metrics=[output_rows=1, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=1, pushdown_rows_pruned=3, predicate_cache_inner_records=8, predicate_cache_records=2, scan_efficiency_ratio=21.1% (237/1.12 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table nulls_build;
+
+statement ok
+drop table nulls_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_dynamic_filter_pushdown_left_join
+#   test_hashjoin_dynamic_filter_pushdown_left_semi_join
+#
+# For LEFT and LEFT SEMI joins (without a WHERE that would convert them
+# to INNER), the HashJoin's dynamic filter should still push into the
+# probe scan on the join keys — unmatched probe rows that can never
+# contribute to any build match are safely pruned there.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/lj_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0),
+    ('ac', 'bc', 3.0),
+    ('ad', 'bd', 4.0)
+  ) AS v(a, b, e)
+) TO 'test_files/scratch/push_down_filter_parquet/lj_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE lj_build (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/lj_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE lj_probe (a VARCHAR, b VARCHAR, e DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/lj_probe.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+# LEFT JOIN: build is preserved; probe rows get dynamic filter based on build keys.
+query TT
+EXPLAIN ANALYZE
+SELECT lj_build.a, lj_build.b, lj_build.c, lj_probe.a, lj_probe.b, lj_probe.e
+FROM lj_build LEFT JOIN lj_probe
+ON lj_build.a = lj_probe.a AND lj_build.b = lj_probe.b;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@0), (b@1, b@1)], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=2, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/lj_build.parquet]]}, projection=[a, b, c], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=20.48% (214/1.04 K)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/lj_probe.parquet]]}, projection=[a, b, e], file_type=parquet, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= aa AND a_null_count@1 != row_count@2 AND a_min@3 <= ab AND b_null_count@5 != row_count@2 AND b_max@4 >= ba AND b_null_count@5 != row_count@2 AND b_min@6 <= bb, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=22.78% (246/1.08 K)]
+
+# LEFT SEMI JOIN: only matching build rows are returned; probe scan still
+# receives the dynamic filter.
+query TT
+EXPLAIN ANALYZE
+SELECT lj_build.*
+FROM lj_build
+WHERE EXISTS (
+  SELECT 1 FROM lj_probe
+  WHERE lj_probe.a = lj_build.a AND lj_probe.b = lj_build.b
+);
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(a@0, a@0), (b@1, b@1)], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=2, input_rows=4, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/lj_build.parquet]]}, projection=[a, b, c], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=20.48% (214/1.04 K)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/lj_probe.parquet]]}, projection=[a, b], file_type=parquet, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= aa AND a_null_count@1 != row_count@2 AND a_min@3 <= ab AND b_null_count@5 != row_count@2 AND b_max@4 >= ba AND b_null_count@5 != row_count@2 AND b_min@6 <= bb, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=15.37% (166/1.08 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table lj_build;
+
+statement ok
+drop table lj_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_hash_table_pushdown_collect_left
+#   test_hashjoin_hash_table_pushdown_integer_keys
+#
+# With `hash_join_inlist_pushdown_max_size = 1`, the HashJoin dynamic filter
+# should use the HashTable strategy (`hash_lookup`) instead of the InList
+# strategy (`IN (SET) ([...])`), even when the build side is tiny.
+########
+
+statement ok
+set datafusion.optimizer.hash_join_inlist_pushdown_max_size = 1;
+
+# --- multi-column string keys ---
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/hl_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0),
+    ('ac', 'bc', 3.0),
+    ('ad', 'bd', 4.0)
+  ) AS v(a, b, e)
+) TO 'test_files/scratch/push_down_filter_parquet/hl_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE hl_build (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/hl_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE hl_probe (a VARCHAR, b VARCHAR, e DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/hl_probe.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+# probe scan filter should contain `hash_lookup`, not `IN (SET)`.
+query TT
+EXPLAIN ANALYZE
+SELECT p.a, p.b, build.c, p.e
+FROM hl_probe p INNER JOIN hl_build AS build
+  ON p.a = build.a AND p.b = build.b;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)], projection=[a@3, b@4, c@2, e@5], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/hl_build.parquet]]}, projection=[a, b, c], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=20.48% (214/1.04 K)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/hl_probe.parquet]]}, projection=[a, b, e], file_type=parquet, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND hash_lookup ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= aa AND a_null_count@1 != row_count@2 AND a_min@3 <= ab AND b_null_count@5 != row_count@2 AND b_max@4 >= ba AND b_null_count@5 != row_count@2 AND b_min@6 <= bb, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=22.78% (246/1.08 K)]
+
+statement ok
+drop table hl_build;
+
+statement ok
+drop table hl_probe;
+
+# --- multi-column integer keys ---
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    (1, 10, 100.0),
+    (2, 20, 200.0)
+  ) AS v(id1, id2, value)
+) TO 'test_files/scratch/push_down_filter_parquet/int_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    (1, 10, 'a'),
+    (2, 20, 'b'),
+    (3, 30, 'c'),
+    (4, 40, 'd')
+  ) AS v(id1, id2, data)
+) TO 'test_files/scratch/push_down_filter_parquet/int_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE int_build (id1 INT, id2 INT, value DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/int_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE int_probe (id1 INT, id2 INT, data VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/int_probe.parquet';
+
+query TT
+EXPLAIN ANALYZE
+SELECT b.id1, b.id2, b.value, p.data
+FROM int_build b INNER JOIN int_probe p
+  ON b.id1 = p.id1 AND b.id2 = p.id2;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id1@0, id1@0), (id2@1, id2@1)], projection=[id1@0, id2@1, value@2, data@5], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/int_build.parquet]]}, projection=[id1, id2, value], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=19.02% (222/1.17 K)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/int_probe.parquet]]}, projection=[id1, id2, data], file_type=parquet, predicate=DynamicFilter [ id1@0 >= 1 AND id1@0 <= 2 AND id2@1 >= 10 AND id2@1 <= 20 AND hash_lookup ], pruning_predicate=id1_null_count@1 != row_count@2 AND id1_max@0 >= 1 AND id1_null_count@1 != row_count@2 AND id1_min@3 <= 2 AND id2_null_count@5 != row_count@2 AND id2_max@4 >= 10 AND id2_null_count@5 != row_count@2 AND id2_min@6 <= 20, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=21.43% (239/1.11 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+reset datafusion.optimizer.hash_join_inlist_pushdown_max_size;
+
+statement ok
+drop table int_build;
+
+statement ok
+drop table int_probe;
+
+
 # Config reset
 statement ok
 RESET datafusion.explain.physical_plan_only;
diff --git a/datafusion/sqllogictest/test_files/push_down_filter_regression.slt b/datafusion/sqllogictest/test_files/push_down_filter_regression.slt
index cfc564fa2b56c..51ad998c02307 100644
--- a/datafusion/sqllogictest/test_files/push_down_filter_regression.slt
+++ b/datafusion/sqllogictest/test_files/push_down_filter_regression.slt
@@ -185,9 +185,318 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[(NULL as id), (id@0 as id)], aggr=[max(agg_dyn_test.id)]
 05)--------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-01/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-02/j5fUeSDQo22oPyPU.parquet], [WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-03/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-04/j5fUeSDQo22oPyPU.parquet]]}, projection=[id], file_type=parquet, predicate=id@0 < 10, pruning_predicate=id_null_count@1 != row_count@2 AND id_min@0 < 10, required_guarantees=[]
 
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_aggregate_dynamic_filter_parquet_e2e
+#
+# End-to-end: `select max(id) from t where id > 1` should pick up a dynamic
+# filter from the partial aggregate that prunes at least one file range
+# (files_ranges_pruned_statistics must report < 4 matched out of 4 total).
+#
+# Build a self-contained fixture of 4 single-row parquet files, one per
+# `id` value. With target_partitions=2, files are split into 2 groups of 2.
+# Writing the files in decreasing-id order ensures the first file in each
+# group has the larger id, so after reading it the dynamic filter tightens
+# (e.g. to `id > 4` in group 1) and prunes the second file in the group.
+
+statement ok
+COPY (VALUES (4)) TO 'test_files/scratch/push_down_filter_regression/agg_dyn/file_0.parquet' STORED AS PARQUET;
+
+statement ok
+COPY (VALUES (3)) TO 'test_files/scratch/push_down_filter_regression/agg_dyn/file_1.parquet' STORED AS PARQUET;
+
+statement ok
+COPY (VALUES (2)) TO 'test_files/scratch/push_down_filter_regression/agg_dyn/file_2.parquet' STORED AS PARQUET;
+
+statement ok
+COPY (VALUES (1)) TO 'test_files/scratch/push_down_filter_regression/agg_dyn/file_3.parquet' STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_dyn_e2e (column1 INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_dyn/';
+
+statement ok
+set datafusion.execution.collect_statistics = true;
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE select max(column1) from agg_dyn_e2e where column1 > 1;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[max(agg_dyn_e2e.column1)], metrics=[output_rows=1, output_batches=1]
+02)--CoalescePartitionsExec, metrics=[output_rows=2, output_batches=2]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(agg_dyn_e2e.column1)], metrics=[output_rows=2, output_batches=2]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn/file_0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn/file_1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn/file_2.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn/file_3.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 > 1 AND DynamicFilter [ column1@0 > 4 ], pruning_predicate=column1_null_count@1 != row_count@2 AND column1_max@0 > 1 AND column1_null_count@1 != row_count@2 AND column1_max@0 > 4, required_guarantees=[], metrics=[output_rows=2, output_batches=2, files_ranges_pruned_statistics=4 total → 4 matched, row_groups_pruned_statistics=4 total → 2 matched -> 2 fully matched, row_groups_pruned_bloom_filter=2 total → 2 matched, page_index_pages_pruned=2 total → 2 matched, page_index_rows_pruned=2 total → 2 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=4, files_processed=4, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=0, predicate_cache_inner_records=2, predicate_cache_records=4, scan_efficiency_ratio=25.15% (130/517)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+reset datafusion.execution.collect_statistics;
+
+statement ok
+drop table agg_dyn_e2e;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs:
+#   test_aggregate_dynamic_filter_min_simple
+#   test_aggregate_dynamic_filter_max_simple
+#   test_aggregate_dynamic_filter_min_expression_not_supported
+#   test_aggregate_dynamic_filter_min_max_same_column
+#   test_aggregate_dynamic_filter_min_max_different_columns
+#   test_aggregate_dynamic_filter_multiple_mixed_expressions
+#   test_aggregate_dynamic_filter_min_all_nulls
+#
+# Baseline behavior of aggregate dynamic filter emission:
+#   MIN(a)      -> filter `a < <min_seen>`
+#   MAX(a)      -> filter `a > <max_seen>`
+#   MIN + MAX   -> filter `a < <min> OR a > <max>` (or across columns)
+#   MIN(a+1)    -> no dynamic filter (expression input is unsupported)
+#   all-NULLs   -> filter stays `true` (no meaningful bound to propagate)
+#
+# Dynamic aggregate filters are only emitted in Partial+Final mode, not
+# Single mode, so each fixture is written to 2 files and runs with
+# target_partitions=2 to force a multi-partition aggregate plan.
+
+statement ok
+set datafusion.execution.target_partitions = 2;
+
+# --- single-column fixture ([5, 1, 3, 8]) split across 2 files ---
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (5), (1)) AS v(a)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_single/file_0.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (3), (8)) AS v(a)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_single/file_1.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_dyn_single (a INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_dyn_single/';
+
+# Use `analyze_level = summary` + `analyze_categories = 'none'` so metrics
+# render empty; we only care that the `predicate=DynamicFilter [ ... ]` text
+# matches. Pruning metrics here are subject to a parallel-execution race
+# (the order in which Partial aggregates publish filter updates vs. when the
+# scan reads each partition), so the filter *content* is deterministic but
+# the pruning counts are not.
+statement ok
+set datafusion.explain.analyze_level = summary;
+
+statement ok
+set datafusion.explain.analyze_categories = 'none';
+
+# MIN(a) -> DynamicFilter [ a < 1 ]
+query TT
+EXPLAIN ANALYZE SELECT MIN(a) FROM agg_dyn_single;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_single.a)], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_single.a)], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_1.parquet]]}, projection=[a], file_type=parquet, predicate=DynamicFilter [ a@0 < 1 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 1, required_guarantees=[], metrics=[]
+
+# MAX(a) -> DynamicFilter [ a > 8 ]
+query TT
+EXPLAIN ANALYZE SELECT MAX(a) FROM agg_dyn_single;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[max(agg_dyn_single.a)], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(agg_dyn_single.a)], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_1.parquet]]}, projection=[a], file_type=parquet, predicate=DynamicFilter [ a@0 > 8 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 > 8, required_guarantees=[], metrics=[]
+
+# MIN(a), MAX(a) -> DynamicFilter [ a < 1 OR a > 8 ]
+query TT
+EXPLAIN ANALYZE SELECT MIN(a), MAX(a) FROM agg_dyn_single;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_single.a), max(agg_dyn_single.a)], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_single.a), max(agg_dyn_single.a)], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_1.parquet]]}, projection=[a], file_type=parquet, predicate=DynamicFilter [ a@0 < 1 OR a@0 > 8 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 1 OR a_null_count@1 != row_count@2 AND a_max@3 > 8, required_guarantees=[], metrics=[]
+
+# MIN(a+1) -> no dynamic filter (expression input is not a plain column)
+query TT
+EXPLAIN ANALYZE SELECT MIN(a + 1) FROM agg_dyn_single;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_single.a + Int64(1))], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_single.a + Int64(1))], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_1.parquet]]}, projection=[a], file_type=parquet, metrics=[]
+
+statement ok
+drop table agg_dyn_single;
+
+# --- two-column fixture: MIN(a) + MAX(b) across columns ---
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (5, 7), (1, 2)) AS v(a, b)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_two_col/file_0.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (3, 4), (8, 9)) AS v(a, b)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_two_col/file_1.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_dyn_two_col (a INT, b INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_dyn_two_col/';
+
+# MIN(a), MAX(b) -> DynamicFilter [ a < 1 OR b > 9 ]
+query TT
+EXPLAIN ANALYZE SELECT MIN(a), MAX(b) FROM agg_dyn_two_col;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_two_col.a), max(agg_dyn_two_col.b)], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_two_col.a), max(agg_dyn_two_col.b)], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_two_col/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_two_col/file_1.parquet]]}, projection=[a, b], file_type=parquet, predicate=DynamicFilter [ a@0 < 1 OR b@1 > 9 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 1 OR b_null_count@4 != row_count@2 AND b_max@3 > 9, required_guarantees=[], metrics=[]
+
+statement ok
+drop table agg_dyn_two_col;
+
+# --- mixed expressions: MIN(a), MAX(a), MAX(b), MIN(c+1) ---
+# Supported aggregates (MIN(a), MAX(a), MAX(b)) should drive a filter;
+# MIN(c+1) is unsupported and must not contribute.
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (5, 10, 100), (1, 4, 70)) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_mixed/file_0.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (3, 6, 90), (8, 12, 110)) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_mixed/file_1.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_dyn_mixed (a INT, b INT, c INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_dyn_mixed/';
+
+# -> DynamicFilter [ a < 1 OR a > 8 OR b > 12 ] (MIN(c+1) dropped as unsupported)
+query TT
+EXPLAIN ANALYZE SELECT MIN(a), MAX(a), MAX(b), MIN(c + 1) FROM agg_dyn_mixed;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_mixed.a), max(agg_dyn_mixed.a), max(agg_dyn_mixed.b), min(agg_dyn_mixed.c + Int64(1))], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_mixed.a), max(agg_dyn_mixed.a), max(agg_dyn_mixed.b), min(agg_dyn_mixed.c + Int64(1))], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_mixed/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_mixed/file_1.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=DynamicFilter [ a@0 < 1 OR a@0 > 8 OR b@1 > 12 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 1 OR a_null_count@1 != row_count@2 AND a_max@3 > 8 OR b_null_count@5 != row_count@2 AND b_max@4 > 12, required_guarantees=[], metrics=[]
+
+statement ok
+drop table agg_dyn_mixed;
+
+# --- all-NULLs input: filter should stay `true` (no meaningful bound) ---
+
+statement ok
+COPY (
+  SELECT CAST(NULL AS INT) AS a FROM (VALUES (1), (2)) AS v(n)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_nulls/file_0.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT CAST(NULL AS INT) AS a FROM (VALUES (3), (4)) AS v(n)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_nulls/file_1.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_dyn_nulls (a INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_dyn_nulls/';
+
+# MIN(a) over all-NULL input -> DynamicFilter [ true ]
+query TT
+EXPLAIN ANALYZE SELECT MIN(a) FROM agg_dyn_nulls;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_nulls.a)], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_nulls.a)], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_nulls/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_nulls/file_1.parquet]]}, projection=[a], file_type=parquet, predicate=DynamicFilter [ true ], metrics=[]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+reset datafusion.explain.analyze_level;
+
+statement ok
+drop table agg_dyn_nulls;
+
+statement ok
+set datafusion.execution.target_partitions = 4;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_aggregate_filter_pushdown
+#   test_no_pushdown_filter_on_aggregate_result
+#
+# - A filter that only references grouping columns (`WHERE a = 'x' GROUP BY a`)
+#   should push through the aggregate and land on the scan's predicate.
+# - A filter on an aggregate *result* (`HAVING count(b) > 5`) must NOT push
+#   through — it has to stay above the aggregate.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES ('x', 'foo'), ('y', 'bar')) AS v(a, b)
+) TO 'test_files/scratch/push_down_filter_regression/agg_filter_pushdown.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_filter_pushdown (a VARCHAR, b VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_filter_pushdown.parquet';
+
+# Filter on grouping column pushes down to the scan.
+query TT
+EXPLAIN SELECT a, count(b) FROM agg_filter_pushdown WHERE a = 'x' GROUP BY a;
+----
+physical_plan
+01)AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(agg_filter_pushdown.b)], ordering_mode=Sorted
+02)--RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+03)----AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(agg_filter_pushdown.b)], ordering_mode=Sorted
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_filter_pushdown.parquet]]}, projection=[a, b], file_type=parquet, predicate=a@0 = x, pruning_predicate=a_null_count@2 != row_count@3 AND a_min@0 <= x AND x <= a_max@1, required_guarantees=[a in (x)]
+
+# Filter on aggregate result (HAVING) stays above the aggregate.
+query TT
+EXPLAIN SELECT a, count(b) AS cnt FROM agg_filter_pushdown GROUP BY a HAVING count(b) > 5;
+----
+physical_plan
+01)ProjectionExec: expr=[a@0 as a, count(agg_filter_pushdown.b)@1 as cnt]
+02)--FilterExec: count(agg_filter_pushdown.b)@1 > 5
+03)----AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(agg_filter_pushdown.b)]
+04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(agg_filter_pushdown.b)]
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_filter_pushdown.parquet]]}, projection=[a, b], file_type=parquet
+
+statement ok
+drop table agg_filter_pushdown;
+
 # Config reset
 
-# The SLT runner sets `target_partitions` to 4 instead of using the default, so 
+# The SLT runner sets `target_partitions` to 4 instead of using the default, so
 # reset it explicitly.
 statement ok
 set datafusion.execution.target_partitions = 4;
diff --git a/dev/release/README.md b/dev/release/README.md
index 533bfc20ee7c9..2f99d52b200d2 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -19,59 +19,11 @@
 
 # Release Process
 
-DataFusion typically has major releases around once per month, including breaking API changes.
+For contributor-facing guidance on release branches and backports, see the
+[Contributor Guide Release Management page](../../docs/source/contributor-guide/release_management.md).
 
-Patch releases are made on an adhoc basis, but we try and avoid them given the frequent major releases.
-
-## Release Process Overview
-
-New development happens on the `main` branch.
-Releases are made from branches, e.g. `branch-50` for the `50.x.y` release series.
-
-To prepare for a new release series, we:
-
-- Create a new branch from `main`, such as `branch-50` in the Apache repository (not in a fork)
-- Continue merging new features changes to `main` branch
-- Prepare the release branch for release:
-  - Update version numbers in `Cargo.toml` files and create `CHANGELOG.md`
-  - Add additional changes to the release branch as needed
-- When the code is ready, create GitHub tags release candidate (rc) artifacts from the release branch.
-- After the release is approved, publish to [crates.io], the ASF distribution servers, and GitHub tags.
-
-To add changes to the release branch, depending on the change we either:
-
-- Fix the issue on `main` and then backport the change to the release branch (e.g. [#18129])
-- Fix the issue on the release branch and then forward-port the change back to `main` (e.g.[#18057])
-
-[crates.io]: https://crates.io/crates/datafusion
-[#18129]: https://github.com/apache/datafusion/pull/18129
-[#18057]: https://github.com/apache/datafusion/pull/18057
-
-## Backporting (add changes) to `branch-*` branch
-
-If you would like to propose your change for inclusion in a patch release, the
-change must be applied to the relevant release branch. To do so please follow
-these steps:
-
-1. Find (or create) the issue for the incremental release ([example release issue]) and discuss the proposed change there with the maintainers.
-2. Follow normal workflow to create PR to `main` branch and wait for its approval and merge.
-3. After PR is squash merged to `main`, branch from most recent release branch (e.g. `branch-50`), cherry-pick the commit and create a PR targeting the release branch [example backport PR].
-
-For example, to backport commit `12345` from `main` to `branch-50`:
-
-```shell
-git checkout branch-50
-git checkout -b backport_to_50
-git cherry-pick 12345 # your git commit hash
-git push -u <your fork>
-# make a PR as normal targeting branch-50, prefixed with [branch-50]
-```
-
-It is also acceptable to fix the issue directly on the release branch first
-and then cherry-pick the change back to `main` branch in a new PR.
-
-[example release issue]: https://github.com/apache/datafusion/issues/18072
-[example backport pr]: https://github.com/apache/datafusion/pull/18131
+This guide is for maintainers to create release candidates and run the release
+process.
 
 ## Release Prerequisites
 
@@ -160,33 +112,35 @@ branch-50:
 First, prepare a PR to update the changelog and versions to reflect the planned
 release. See [#18173](https://github.com/apache/datafusion/pull/18173) for an example.
 
-#### Update Version Numbers
-
-Manually update the DataFusion version in the root `Cargo.toml` to reflect the new release version.
+- This PR should be the last one merged before creating a release
+  candidate, so the changelog includes all changes.
+- If there are code changes between RCs, create and merge a new PR to update
+  the changelog before creating the next RC.
 
-Ensure Cargo.lock is updated accordingly by running:
+**Step 1**: Manually update the DataFusion version in the root `Cargo.toml` to
+reflect the new release version. Ensure `Cargo.lock` is updated accordingly by
+running:
 
 ```shell
 cargo check -p datafusion
 ```
 
-#### Changelog Generation
-
-We maintain a [changelog] so our users know what has been changed between releases.
-
-[changelog]: ../changelog
-
-The changelog is generated using a Python script.
+**Step 2**: Update the [changelog] in `dev/changelog/`. Each release has its
+own file, such as `dev/changelog/50.0.0.md`, which should include all changes
+since the previous release.
 
-To run the script, you will need a GitHub Personal Access Token (described in the prerequisites section) and the `PyGitHub` library. First install the dev dependencies via `uv`:
+The changelog is generated using a Python script, which requires a GitHub
+Personal Access Token (described in the prerequisites section) and the
+`PyGitHub` library. First install the dev dependencies via `uv`:
 
 ```shell
 uv sync
 ```
 
-To generate the changelog, set the `GITHUB_TOKEN` environment variable and then run `./dev/release/generate-changelog.py`
-providing two commit ids or tags followed by the version number of the release being created. For example,
-to generate a change log of all changes between the `50.3.0` tag and `branch-51`, in preparation for release `51.0.0`:
+To generate the changelog, set the `GITHUB_TOKEN` environment variable and run
+`./dev/release/generate-changelog.py` with two commit IDs or tags followed by
+the release version. For example, to generate a changelog of all changes
+between the `50.3.0` tag and `branch-51` for release `51.0.0`:
 
 > [!NOTE]
 >
@@ -205,52 +159,52 @@ uv run ./dev/release/generate-changelog.py 50.3.0 branch-51 51.0.0 > dev/changel
 This script creates a changelog from GitHub PRs based on the labels associated with them as well as looking for
 titles starting with `feat:`, `fix:`, or `docs:`.
 
-Once the change log is generated, run `prettier` to format the document:
+Once the changelog is generated, run `prettier` to format the document:
 
 ```shell
 prettier -w dev/changelog/51.0.0.md
 ```
 
-#### Commit and PR
-
 Then commit the changes and create a PR targeting the release branch.
 
 ```shell
 git commit -a -m 'Update version'
 ```
 
-Remember to merge any fixes back to `main` branch as well.
+Remember to merge any fixes back to the `main` branch as well.
 
 ### 4. Prepare Release Candidate Artifacts
 
 After the PR gets merged, you are ready to create release artifacts based off the
 merged commit.
 
-(Note you need to be a committer to run these scripts as they upload to the apache svn distribution servers)
+- You must be a committer to run these scripts because they upload to the
+  Apache SVN distribution servers.
+- If there are code changes between RCs, create and merge a changelog PR before
+  creating the next RC.
 
 #### Pick a Release Candidate (RC) number
 
 Pick numbers in sequential order, with `1` for `rc1`, `2` for `rc2`, etc.
 
-#### Create git Tag for the Release:
+#### Create Git tag for the Release Candidate
 
 While the official release artifacts are signed tarballs and zip files, we also
-tag the commit it was created for convenience and code archaeology. Release tags
-have the format `<version>` (e.g. `38.0.0`), and release candidates have the
-format `<version>-rc<rc>` (e.g. `38.0.0-rc0`). See [the list of existing
+tag the commit it was created from for convenience and code archaeology. Release tags
+look like `38.0.0`, and release candidate tags look like `38.0.0-rc0`. See [the list of existing
 tags].
 
 [the list of existing tags]: https://github.com/apache/datafusion/tags
 
-Using a string such as `38.0.0` as the `<version>`, create and push the rc tag by running these commands:
+Create and push the RC tag:
 
 ```shell
 git fetch apache
-git tag <version>-<rc> apache/branch-X # create tag from the release branch
-git push apache <version>-<rc>         # push tag to Github remote
+git tag <version>-rc<rc> apache/branch-X
+git push apache <version>-rc<rc>
 ```
 
-For example, to create the `50.3.0-rc1 tag from `branch-50`:
+For example, to create the `50.3.0-rc1` tag from `branch-50`, use:
 
 ```shell
 git fetch apache
@@ -260,7 +214,7 @@ git push apache 50.3.0-rc1
 
 #### Create, Sign, and Upload Artifacts
 
-Run the `create-tarball.sh` script with the `<version>` tag and `<rc>` and you determined in previous steps:
+Run the `create-tarball.sh` script with the `<version>` tag and `<rc>` number you determined in previous steps:
 
 For example, to create the `50.3.0-rc1` artifacts:
 
@@ -275,18 +229,20 @@ The `create-tarball.sh` script
    apache distribution SVN server
 
 2. Provides you an email template to
-   send to dev@datafusion.apache.org for release voting.
+   send to `dev@datafusion.apache.org` for release voting.
 
 ### 5. Vote on Release Candidate Artifacts
 
-Send the email output from the script to dev@datafusion.apache.org.
+Send the email output from the script to `dev@datafusion.apache.org`.
 
-In order to publish the release on crates.io, it must be "official". To become
-official it needs at least three PMC members to vote +1 on it.
+In order to publish the release on crates.io, it must be "official." To become
+official, it needs at least three PMC members to vote +1 on it and no -1 votes.
+The vote must remain open for at least 72 hours to give everyone a chance to
+review the release candidate.
 
 #### Verifying Release Candidates
 
-The `dev/release/verify-release-candidate.sh` is a script in this repository that can assist in the verification process. Run it like:
+`dev/release/verify-release-candidate.sh` is a script in this repository that can assist in the verification process. Run it like this:
 
 ```shell
 ./dev/release/verify-release-candidate.sh 50.3.0 1
@@ -294,15 +250,18 @@ The `dev/release/verify-release-candidate.sh` is a script in this repository tha
 
 #### If the Release is not Approved
 
-If the release is not approved, fix whatever the problem is, merge changelog
-changes into the release branch and try again with the next RC number.
+If the release is not approved, fix whatever the problem is, make a new release
+candidate and try again.
+
+Reminders:
 
-Remember to merge any fixes back to `main` branch as well.
+1. Update the changelog and create a new PR if there are any code changes, and merge it before creating the next RC.
+2. Merge any fixes back to the `main` branch as well.
 
-#### If the Release is Approved: Call the Vote
+#### If the Vote Passes: Announce the Result
 
 Call the vote on the Arrow dev list by replying to the RC voting thread. The
-reply should have a new subject constructed by adding `[RESULT]` prefix to the
+reply should have a new subject constructed by adding the `[RESULT]` prefix to the
 old subject line.
 
 Sample announcement template:
@@ -415,39 +374,46 @@ the examples from previous releases.
 The release information is used to generate a template for a board report (see example from Apache Arrow project
 [here](https://github.com/apache/arrow/pull/14357)).
 
-### 10: Delete old RCs and Releases
+### 10: Delete Old RCs and Releases
 
 See the ASF documentation on [when to archive](https://www.apache.org/legal/release-policy.html#when-to-archive)
 for more information.
 
-#### Deleting old release candidates from `dev` svn
-
 Release candidates should be deleted once the release is published.
 
-Get a list of DataFusion release candidates:
+To get a list of DataFusion release candidates:
 
 ```shell
 svn ls https://dist.apache.org/repos/dist/dev/datafusion
 ```
 
-Delete a release candidate:
+To delete a release candidate:
 
 ```shell
 svn delete -m "delete old DataFusion RC" https://dist.apache.org/repos/dist/dev/datafusion/apache-datafusion-50.0.0-rc1/
 ```
 
-#### Deleting old releases from `release` svn
+#### Delete old releases from `release` SVN
 
-Only the latest release should be available. Delete old releases after publishing the new release.
+Only the latest release should be available. Delete old releases after
+publishing the new release.
 
-Get a list of DataFusion releases:
+To get a list of DataFusion releases:
 
 ```shell
 svn ls https://dist.apache.org/repos/dist/release/datafusion
 ```
 
-Delete a release:
+To delete a release:
 
 ```shell
 svn delete -m "delete old DataFusion release" https://dist.apache.org/repos/dist/release/datafusion/datafusion-50.0.0
 ```
+
+### 11. Forward-port changelog to `main`
+
+After the release is published, forward port the version update and changelog to
+`main` so that it is up to date for the next release. See [#21053] for an
+example PR that forward-ports the changelog to `main`.
+
+[#21053]: https://github.com/apache/datafusion/pull/21053
diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md
index 4ace4be49499b..6ec1efa4d99fa 100644
--- a/docs/source/contributor-guide/index.md
+++ b/docs/source/contributor-guide/index.md
@@ -101,6 +101,11 @@ If you are concerned that a larger design will be lost in a string of small PRs,
 
 Note all commits in a PR are squashed when merged to the `main` branch so there is one commit per PR after merge.
 
+## Release Management and Backports
+
+Contributor-facing guidance for release branches, patch releases, and backports
+is documented in the [Release Management](release_management.md) guide.
+
 ## Before Submitting a PR
 
 Before submitting a PR, run the standard non-functional checks. PRs must pass
diff --git a/docs/source/contributor-guide/release_management.md b/docs/source/contributor-guide/release_management.md
new file mode 100644
index 0000000000000..0515204a5ecbc
--- /dev/null
+++ b/docs/source/contributor-guide/release_management.md
@@ -0,0 +1,123 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Release Management
+
+This page describes DataFusion release branches and backports. For the
+maintainer release guide, including release candidate artifacts, voting, and
+publication, see [the release process README in `dev/release`].
+
+## Overview
+
+DataFusion typically has a major release about once per month, including
+breaking API changes. Patch releases are made on an ad hoc basis, but we try to
+avoid them because major releases are frequent.
+
+New development happens on the [`main` branch]. Releases are made from release
+branches named `branch-NN`, such as [`branch-50`] for the `50.x.y` release
+series.
+
+In general:
+
+- New features land on [`main`]
+- Patch releases are cut from the corresponding `branch-NN`
+- Only targeted, low-risk fixes should be added to a release branch
+
+Changes reach a release branch in one of two ways:
+
+- (Most common) Fix the issue on `main` and then backport the merged change to the release branch
+- Fix the issue on the release branch and then forward-port the change to `main`
+
+Releases are coordinated in a GitHub issue, such as the
+[release issue for 50.3.0]. If you think a fix should be included in a patch
+release, discuss it on the relevant tracking issue first. You can also open the
+backport PR first and then link it from the tracking issue.
+
+To prepare for a new release series, maintainers:
+
+- Create a new branch from `main`, such as `branch-50`, in the Apache repository
+- Continue merging new features to `main`
+- Prepare the release branch for release by updating versions, changelog content,
+  and any additional release-specific fixes via the
+  [Backport Workflow](#backport-workflow)
+- Create release candidate artifacts from the release branch
+- After approval, publish to crates.io, ASF distribution servers, and Git tags
+
+## Backport Workflow
+
+The usual workflow is:
+
+1. Fix on `main` first, and merge the fix via a normal PR workflow.
+2. Cherry-pick the merged commit onto the release branch.
+3. Open a backport PR targeting the release branch (examples below).
+
+- [Example backport PR]
+- [Additional backport PR example]
+
+### Inputs
+
+To backport a change, gather the following information:
+
+- Target branch, such as `apache/branch-52`
+- The release tracking issue URL, such as https://github.com/apache/datafusion/issues/19692
+- The original PR URL, such as https://github.com/apache/datafusion/pull/20192
+- Optional explicit commit SHA to backport
+
+### Apply the Backport
+
+Start from the target release branch, create a dedicated backport branch, and
+use `git cherry-pick`. For example, to backport PR #1234 to `branch-52` when
+the commit SHA is `abc123`, run:
+
+```bash
+git checkout apache/branch-52
+git checkout -b alamb/backport_1234
+git cherry-pick abc123
+```
+
+### Test
+
+Run tests as described in the [testing documentation].
+
+### Open the PR
+
+Create a PR against the release branch, not `main`, and prefix it with
+`[branch-NN]` to show which release branch the backport targets. For example:
+
+- `[branch-52] fix: validate inter-file ordering in eq_properties() (#20329)`
+
+Use a PR description that links the tracking issue, original PR, and target
+branch, for example:
+
+```markdown
+- Part of <tracking-issue-url>
+- Closes <backport-issue-url> on <branch-name>
+
+This PR:
+
+- Backports <original-pr-url> from @<author> to the <branch-name> line
+```
+
+[`main` branch]: https://github.com/apache/datafusion/tree/main
+[`branch-50`]: https://github.com/apache/datafusion/tree/branch-50
+[the release process readme in `dev/release`]: https://github.com/apache/datafusion/blob/main/dev/release/README.md
+[release issue for 50.3.0]: https://github.com/apache/datafusion/issues/18072
+[example backport pr]: https://github.com/apache/datafusion/pull/18131
+[additional backport pr example]: https://github.com/apache/datafusion/pull/20792
+[testing documentation]: testing.md
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 4d57faa0cbf73..cc0da3c44473e 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -163,6 +163,7 @@ To get started, see
    contributor-guide/testing
    contributor-guide/api-health
    contributor-guide/howtos
+   contributor-guide/release_management
    contributor-guide/roadmap
    contributor-guide/governance
    contributor-guide/inviting
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index ca8707270fe18..9f5814447b7dc 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -3237,6 +3237,7 @@ _Alias of [current_date](#current_date)._
 - [array_any_value](#array_any_value)
 - [array_append](#array_append)
 - [array_cat](#array_cat)
+- [array_compact](#array_compact)
 - [array_concat](#array_concat)
 - [array_contains](#array_contains)
 - [array_dims](#array_dims)
@@ -3285,6 +3286,7 @@ _Alias of [current_date](#current_date)._
 - [list_any_value](#list_any_value)
 - [list_append](#list_append)
 - [list_cat](#list_cat)
+- [list_compact](#list_compact)
 - [list_concat](#list_concat)
 - [list_contains](#list_contains)
 - [list_dims](#list_dims)
@@ -3391,6 +3393,33 @@ array_append(array, element)
 
 _Alias of [array_concat](#array_concat)._
 
+### `array_compact`
+
+Removes null values from the array.
+
+```sql
+array_compact(array)
+```
+
+#### Arguments
+
+- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators.
+
+#### Example
+
+```sql
+> select array_compact([1, NULL, 2, NULL, 3]) arr;
++-----------+
+| arr       |
++-----------+
+| [1, 2, 3] |
++-----------+
+```
+
+#### Aliases
+
+- list_compact
+
 ### `array_concat`
 
 Concatenates arrays.
@@ -4535,6 +4564,10 @@ _Alias of [array_append](#array_append)._
 
 _Alias of [array_concat](#array_concat)._
 
+### `list_compact`
+
+_Alias of [array_compact](#array_compact)._
+
 ### `list_concat`
 
 _Alias of [array_concat](#array_concat)._