From 2373589a9a559dedeb65093466f9493b015263ba Mon Sep 17 00:00:00 2001 From: Qi Zhu <821684824@qq.com> Date: Wed, 25 Mar 2026 10:15:50 +0800 Subject: [PATCH] Upgrade to DF 52.4 --- Cargo.toml | 18 +++++++-------- src/materialized/dependencies.rs | 38 +++++++++++++++++++++----------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1ba3bee..45c7cfc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,15 +33,15 @@ arrow = "57.0.0" arrow-schema = "57.0.0" async-trait = "0.1.89" dashmap = "6" -datafusion = { git = "https://github.com/massive-com/arrow-datafusion", rev = "9cd0824" } -datafusion-common = { git = "https://github.com/massive-com/arrow-datafusion", rev = "9cd0824" } -datafusion-expr = { git = "https://github.com/massive-com/arrow-datafusion", rev = "9cd0824" } -datafusion-functions = { git = "https://github.com/massive-com/arrow-datafusion", rev = "9cd0824" } -datafusion-functions-aggregate = { git = "https://github.com/massive-com/arrow-datafusion", rev = "9cd0824" } -datafusion-optimizer = { git = "https://github.com/massive-com/arrow-datafusion", rev = "9cd0824" } -datafusion-physical-expr = { git = "https://github.com/massive-com/arrow-datafusion", rev = "9cd0824" } -datafusion-physical-plan = { git = "https://github.com/massive-com/arrow-datafusion", rev = "9cd0824" } -datafusion-sql = { git = "https://github.com/massive-com/arrow-datafusion", rev = "9cd0824" } +datafusion = { git = "https://github.com/massive-com/arrow-datafusion", rev = "cd6aaa6" } +datafusion-common = { git = "https://github.com/massive-com/arrow-datafusion", rev = "cd6aaa6" } +datafusion-expr = { git = "https://github.com/massive-com/arrow-datafusion", rev = "cd6aaa6" } +datafusion-functions = { git = "https://github.com/massive-com/arrow-datafusion", rev = "cd6aaa6" } +datafusion-functions-aggregate = { git = "https://github.com/massive-com/arrow-datafusion", rev = "cd6aaa6" } +datafusion-optimizer = { git = "https://github.com/massive-com/arrow-datafusion", rev = "cd6aaa6" } +datafusion-physical-expr = { git = "https://github.com/massive-com/arrow-datafusion", rev = "cd6aaa6" } +datafusion-physical-plan = { git = "https://github.com/massive-com/arrow-datafusion", rev = "cd6aaa6" } +datafusion-sql = { git = "https://github.com/massive-com/arrow-datafusion", rev = "cd6aaa6" } futures = "0.3" itertools = "0.14" log = "0.4" diff --git a/src/materialized/dependencies.rs b/src/materialized/dependencies.rs index 808978e..07f7f7e 100644 --- a/src/materialized/dependencies.rs +++ b/src/materialized/dependencies.rs @@ -1830,20 +1830,32 @@ mod test { SELECT year, column1 AS column2 FROM t3 ", projection: &["year"], + // In DF 52.4, the plan for coalesce changed due to apache/datafusion#20879 + // ("Ensure columns are casted to the correct names with Unions"). + // Previously, `coerce_exprs_for_schema` would alias any cast expression + // with the original column name, which kept `coalesce(CAST(...), t2.year)` + // as a single opaque expression. After the fix, only bare `Expr::Column` + // references get an alias on cast, so the inner `CAST(t1.year AS Utf8View)` + // is now visible to the optimizer. This triggers: + // 1. CSE (common subexpression elimination) extracts the shared + // `CAST(t1.year AS Utf8View)` (used in both coalesce and join condition) + // into `__common_expr_2`. + // 2. `coalesce(a, b)` is expanded to `CASE WHEN a IS NOT NULL THEN a ELSE b END`. expected_plan: vec![ - "+--------------+--------------------------------------------------------------------+", - "| plan_type | plan |", - "+--------------+--------------------------------------------------------------------+", - "| logical_plan | Union |", - "| | Projection: coalesce(CAST(t1.year AS Utf8View), t2.year) AS year |", - "| | Full Join: Using CAST(t1.year AS Utf8View) = t2.year |", - "| | SubqueryAlias: t1 |", - "| | Projection: t1.column1 AS year |", - "| | TableScan: t1 projection=[column1] |", - "| | SubqueryAlias: t2 |", - "| | TableScan: t2 projection=[year] |", - "| | TableScan: t3 projection=[year] |", - "+--------------+--------------------------------------------------------------------+", + "+--------------+---------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+--------------+---------------------------------------------------------------------------------------------------+", + "| logical_plan | Union |", + "| | Projection: CASE WHEN __common_expr_2 IS NOT NULL THEN __common_expr_2 ELSE t2.year END AS year |", + "| | Projection: CAST(t1.year AS Utf8View) AS __common_expr_2, t2.year |", + "| | Full Join: Using CAST(t1.year AS Utf8View) = t2.year |", + "| | SubqueryAlias: t1 |", + "| | Projection: t1.column1 AS year |", + "| | TableScan: t1 projection=[column1] |", + "| | SubqueryAlias: t2 |", + "| | TableScan: t2 projection=[year] |", + "| | TableScan: t3 projection=[year] |", + "+--------------+---------------------------------------------------------------------------------------------------+", ], expected_output: vec![ "+------+",