From 641767153398d31e754c2f76b990dd11dee2953b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 24 Jun 2025 14:54:00 +0200 Subject: [PATCH 01/11] chore: skip order calculation / exponential planning --- .../src/equivalence/properties/union.rs | 40 +++++++++++++++++-- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr/src/equivalence/properties/union.rs b/datafusion/physical-expr/src/equivalence/properties/union.rs index d77129472a8ba..702688011668d 100644 --- a/datafusion/physical-expr/src/equivalence/properties/union.rs +++ b/datafusion/physical-expr/src/equivalence/properties/union.rs @@ -67,16 +67,43 @@ fn calculate_union_binary( }) .collect::>(); + // TEMP HACK WORKAROUND + // Revert code from https://github.com/apache/datafusion/pull/12562 + // Context: https://github.com/apache/datafusion/issues/13748 + // Context: https://github.com/influxdata/influxdb_iox/issues/13038 + // Next, calculate valid orderings for the union by searching for prefixes // in both sides. - let mut orderings = UnionEquivalentOrderingBuilder::new(); - orderings.add_satisfied_orderings(&lhs, &rhs)?; - orderings.add_satisfied_orderings(&rhs, &lhs)?; - let orderings = orderings.build(); + let mut orderings = vec![]; + for ordering in lhs.normalized_oeq_class().into_iter() { + let mut ordering: Vec = ordering.into(); + + // Progressively shorten the ordering to search for a satisfied prefix: + while !rhs.ordering_satisfy(ordering.clone())? { + ordering.pop(); + } + // There is a non-trivial satisfied prefix, add it as a valid ordering: + if !ordering.is_empty() { + orderings.push(ordering); + } + } + for ordering in rhs.normalized_oeq_class().into_iter() { + let mut ordering: Vec = ordering.into(); + + // Progressively shorten the ordering to search for a satisfied prefix: + while !lhs.ordering_satisfy(ordering.clone())? { + ordering.pop(); + } + // There is a non-trivial satisfied prefix, add it as a valid ordering: + if !ordering.is_empty() { + orderings.push(ordering); + } + } let mut eq_properties = EquivalenceProperties::new(lhs.schema); eq_properties.add_constants(constants)?; eq_properties.add_orderings(orderings); + Ok(eq_properties) } @@ -122,6 +149,7 @@ struct UnionEquivalentOrderingBuilder { orderings: Vec, } +#[expect(unused)] impl UnionEquivalentOrderingBuilder { fn new() -> Self { Self { orderings: vec![] } @@ -504,6 +532,7 @@ mod tests { } #[test] + #[ignore = "InfluxData patch: chore: skip order calculation / exponential planning"] fn test_union_equivalence_properties_constants_fill_gaps() -> Result<()> { let schema = create_test_schema().unwrap(); UnionEquivalenceTest::new(&schema) @@ -579,6 +608,7 @@ mod tests { } #[test] + #[ignore = "InfluxData patch: chore: skip order calculation / exponential planning"] fn test_union_equivalence_properties_constants_fill_gaps_non_symmetric() -> Result<()> { let schema = create_test_schema().unwrap(); @@ -607,6 +637,7 @@ mod tests { } #[test] + #[ignore = "InfluxData patch: chore: skip order calculation / exponential planning"] fn test_union_equivalence_properties_constants_gap_fill_symmetric() -> Result<()> { let schema = create_test_schema().unwrap(); UnionEquivalenceTest::new(&schema) @@ -658,6 +689,7 @@ mod tests { } #[test] + #[ignore = "InfluxData patch: chore: skip order calculation / exponential planning"] fn test_union_equivalence_properties_constants_middle_desc() -> Result<()> { let schema = create_test_schema().unwrap(); UnionEquivalenceTest::new(&schema) From 4277f8fe0cb7b5527a6a6ba35efdd63104fd66ca Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 16 Jul 2024 12:14:19 -0400 Subject: [PATCH 02/11] (New) Test + workaround for SanityCheck plan --- datafusion/physical-optimizer/src/sanity_checker.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/datafusion/physical-optimizer/src/sanity_checker.rs b/datafusion/physical-optimizer/src/sanity_checker.rs index bff33a281556d..8eb00327143dc 100644 --- a/datafusion/physical-optimizer/src/sanity_checker.rs +++ b/datafusion/physical-optimizer/src/sanity_checker.rs @@ -32,6 +32,8 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported}; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion_physical_plan::joins::SymmetricHashJoinExec; +use datafusion_physical_plan::sorts::sort::SortExec; +use datafusion_physical_plan::union::UnionExec; use datafusion_physical_plan::{ExecutionPlanProperties, get_plan_string}; use crate::PhysicalOptimizerRule; @@ -136,6 +138,14 @@ pub fn check_plan_sanity( plan.required_input_ordering(), plan.required_input_distribution(), ) { + // TEMP HACK WORKAROUND https://github.com/apache/datafusion/issues/11492 + if child.as_any().downcast_ref::().is_some() { + continue; + } + if child.as_any().downcast_ref::().is_some() { + continue; + } + let child_eq_props = child.equivalence_properties(); if let Some(sort_req) = sort_req { let sort_req = sort_req.into_single(); From 64dc11c50a50e0dde6f39c5fb6d130344ea183bf Mon Sep 17 00:00:00 2001 From: Adam Curtis Date: Tue, 21 Oct 2025 13:33:16 -0400 Subject: [PATCH 03/11] chore: add debug logging and skip error on physical schema check --- datafusion/core/src/physical_planner.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index cc7d534776d7e..b006d8ae39a9c 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -802,10 +802,17 @@ impl DefaultPhysicalPlanner { )); } } - return internal_err!( + debug!( "Physical input schema should be the same as the one converted from logical input schema. Differences: {}", differences.iter().map(|s| format!("\n\t- {s}")).join("") ); + + //influx: temporarily remove error and only log so that we can find a + //reproducer in production + // return internal_err!("Physical input schema should be the same as the one converted from logical input schema. Differences: {}", differences + // .iter() + // .map(|s| format!("\n\t- {s}")) + // .join("")); } let groups = self.create_grouping_physical_expr( @@ -4207,6 +4214,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_metadata() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4227,6 +4236,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_field_count() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4247,6 +4258,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_field_name() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4268,6 +4281,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_field_type() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4286,6 +4301,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_field_nullability() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4304,6 +4321,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_field_metadata() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4324,6 +4343,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_multiple() { let logical_schema = Arc::new(Schema::new(vec![ Field::new("c1", DataType::Int32, false), From 506dd6de584193f787018548a806178941207e94 Mon Sep 17 00:00:00 2001 From: Adam Curtis Date: Tue, 16 Dec 2025 12:21:06 -0500 Subject: [PATCH 04/11] fix: wrap join operators with cooperative() for cancellation support --- datafusion/physical-plan/src/joins/cross_join.rs | 9 +++++---- datafusion/physical-plan/src/joins/hash_join/exec.rs | 5 +++-- datafusion/physical-plan/src/joins/nested_loop_join.rs | 5 +++-- .../physical-plan/src/joins/sort_merge_join/exec.rs | 5 +++-- .../physical-plan/src/joins/symmetric_hash_join.rs | 9 +++++---- 5 files changed, 19 insertions(+), 14 deletions(-) diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 4f32b6176ec39..1774a8ea95e53 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -25,6 +25,7 @@ use super::utils::{ OnceAsync, OnceFut, StatefulStreamResult, adjust_right_output_partitioning, reorder_output_after_swap, }; +use crate::coop::cooperative; use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::projection::{ @@ -332,7 +333,7 @@ impl ExecutionPlan for CrossJoinExec { })?; if enforce_batch_size_in_joins { - Ok(Box::pin(CrossJoinStream { + Ok(Box::pin(cooperative(CrossJoinStream { schema: Arc::clone(&self.schema), left_fut, right: stream, @@ -341,9 +342,9 @@ impl ExecutionPlan for CrossJoinExec { state: CrossJoinStreamState::WaitBuildSide, left_data: RecordBatch::new_empty(self.left().schema()), batch_transformer: BatchSplitter::new(batch_size), - })) + }))) } else { - Ok(Box::pin(CrossJoinStream { + Ok(Box::pin(cooperative(CrossJoinStream { schema: Arc::clone(&self.schema), left_fut, right: stream, @@ -352,7 +353,7 @@ impl ExecutionPlan for CrossJoinExec { state: CrossJoinStreamState::WaitBuildSide, left_data: RecordBatch::new_empty(self.left().schema()), batch_transformer: NoopBatchTransformer::new(), - })) + }))) } } diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index 91fc1ee4436ee..0a45887eff3e4 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -22,6 +22,7 @@ use std::sync::{Arc, OnceLock}; use std::{any::Any, vec}; use crate::ExecutionPlanProperties; +use crate::coop::cooperative; use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::filter_pushdown::{ ChildPushdownResult, FilterDescription, FilterPushdownPhase, @@ -1061,7 +1062,7 @@ impl ExecutionPlan for HashJoinExec { .map(|(_, right_expr)| Arc::clone(right_expr)) .collect::>(); - Ok(Box::pin(HashJoinStream::new( + Ok(Box::pin(cooperative(HashJoinStream::new( partition, self.schema(), on_right, @@ -1079,7 +1080,7 @@ impl ExecutionPlan for HashJoinExec { self.right.output_ordering().is_some(), build_accumulator, self.mode, - ))) + )))) } fn metrics(&self) -> Option { diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index 44637321a7e35..fa435c0d815b1 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -29,6 +29,7 @@ use super::utils::{ reorder_output_after_swap, swap_join_projection, }; use crate::common::can_project; +use crate::coop::cooperative; use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::joins::SharedBitmapBuilder; use crate::joins::utils::{ @@ -529,7 +530,7 @@ impl ExecutionPlan for NestedLoopJoinExec { None => self.column_indices.clone(), }; - Ok(Box::pin(NestedLoopJoinStream::new( + Ok(Box::pin(cooperative(NestedLoopJoinStream::new( self.schema(), self.filter.clone(), self.join_type, @@ -538,7 +539,7 @@ impl ExecutionPlan for NestedLoopJoinExec { column_indices_after_projection, metrics, batch_size, - ))) + )))) } fn metrics(&self) -> Option { diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs index ae7a5fa764bcc..82929107806de 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs @@ -23,6 +23,7 @@ use std::any::Any; use std::fmt::Formatter; use std::sync::Arc; +use crate::coop::cooperative; use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::expressions::PhysicalSortExpr; use crate::joins::sort_merge_join::metrics::SortMergeJoinMetrics; @@ -497,7 +498,7 @@ impl ExecutionPlan for SortMergeJoinExec { .register(context.memory_pool()); // create join stream - Ok(Box::pin(SortMergeJoinStream::try_new( + Ok(Box::pin(cooperative(SortMergeJoinStream::try_new( context.session_config().spill_compression(), Arc::clone(&self.schema), self.sort_options.clone(), @@ -512,7 +513,7 @@ impl ExecutionPlan for SortMergeJoinExec { SortMergeJoinMetrics::new(partition, &self.metrics), reservation, context.runtime_env(), - )?)) + )?))) } fn metrics(&self) -> Option { diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 1f6bc703a0300..a54f930114c60 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -33,6 +33,7 @@ use std::task::{Context, Poll}; use std::vec; use crate::common::SharedMemoryReservation; +use crate::coop::cooperative; use crate::execution_plan::{boundedness_from_children, emission_type_from_children}; use crate::joins::stream_join_utils::{ PruningJoinHashMap, SortedFilterExpr, StreamJoinMetrics, @@ -534,7 +535,7 @@ impl ExecutionPlan for SymmetricHashJoinExec { } if enforce_batch_size_in_joins { - Ok(Box::pin(SymmetricHashJoinStream { + Ok(Box::pin(cooperative(SymmetricHashJoinStream { left_stream, right_stream, schema: self.schema(), @@ -552,9 +553,9 @@ impl ExecutionPlan for SymmetricHashJoinExec { state: SHJStreamState::PullRight, reservation, batch_transformer: BatchSplitter::new(batch_size), - })) + }))) } else { - Ok(Box::pin(SymmetricHashJoinStream { + Ok(Box::pin(cooperative(SymmetricHashJoinStream { left_stream, right_stream, schema: self.schema(), @@ -572,7 +573,7 @@ impl ExecutionPlan for SymmetricHashJoinExec { state: SHJStreamState::PullRight, reservation, batch_transformer: NoopBatchTransformer::new(), - })) + }))) } } From d615ad91a7f75a8f34eedae60cec4458ac8d277d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Feb 2026 08:44:14 -0500 Subject: [PATCH 05/11] Update bytes to v1.11.1 to avoid security audit --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5ab0b8c84a563..e9167c639a774 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1225,9 +1225,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "bytes-utils" From d87aafd4122b632ff8c8f2a07e6f6f087ce73f6d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 6 Feb 2026 14:13:02 -0500 Subject: [PATCH 06/11] chore: Update time to avoid rustsec error --- Cargo.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e9167c639a774..c2e9312a24723 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4193,9 +4193,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" [[package]] name = "num-integer" @@ -6044,30 +6044,30 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", "itoa", "num-conv", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" dependencies = [ "num-conv", "time-core", From a21b158cb073c0dffd8475e0e359af9f514a355b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 9 Feb 2026 14:57:20 -0500 Subject: [PATCH 07/11] Fix incorrect SortExec removal before AggregateExec --- .../src/enforce_sorting/sort_pushdown.rs | 74 +++++++ .../sqllogictest/test_files/sort_pushdown.slt | 184 ++++++++++++++++++ 2 files changed, 258 insertions(+) diff --git a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs index 698fdea8e766e..267faeda0c1bb 100644 --- a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs +++ b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs @@ -35,6 +35,7 @@ use datafusion_physical_expr_common::sort_expr::{ LexOrdering, LexRequirement, OrderingRequirements, PhysicalSortExpr, PhysicalSortRequirement, }; +use datafusion_physical_plan::aggregates::AggregateExec; use datafusion_physical_plan::execution_plan::CardinalityEffect; use datafusion_physical_plan::filter::FilterExec; use datafusion_physical_plan::joins::utils::{ @@ -353,6 +354,8 @@ fn pushdown_requirement_to_children( Ok(None) } } + } else if let Some(aggregate_exec) = plan.as_any().downcast_ref::() { + handle_aggregate_pushdown(aggregate_exec, parent_required) } else if maintains_input_order.is_empty() || !maintains_input_order.iter().any(|o| *o) || plan.as_any().is::() @@ -388,6 +391,77 @@ fn pushdown_requirement_to_children( // TODO: Add support for Projection push down } +/// Try to push sorting through [`AggregateExec`] +/// +/// `AggregateExec` only preserves the input order of its group by columns +/// (not aggregates in general, which are formed from arbitrary expressions over +/// input) +/// +/// Thus function rewrites the parent required ordering in terms of the +/// aggregate input if possible. This rewritten requirement represents the +/// ordering of the `AggregateExec`'s **input** that would also satisfy the +/// **parent** ordering. +/// +/// If no such mapping is possible (e.g. because the sort references aggregate +/// columns), returns None. +fn handle_aggregate_pushdown( + aggregate_exec: &AggregateExec, + parent_required: OrderingRequirements, +) -> Result>>> { + if !aggregate_exec + .maintains_input_order() + .into_iter() + .any(|o| o) + { + return Ok(None); + } + + let group_expr = aggregate_exec.group_expr(); + // GROUPING SETS introduce additional output columns and NULL substitutions; + // skip pushdown until we can map those cases safely. + if group_expr.has_grouping_set() { + return Ok(None); + } + + let group_input_exprs = group_expr.input_exprs(); + let parent_requirement = parent_required.into_single(); + let mut child_requirement = Vec::with_capacity(parent_requirement.len()); + + for req in parent_requirement { + // Sort above AggregateExec should reference its output columns. Map each + // output group-by column to its original input expression. + let Some(column) = req.expr.as_any().downcast_ref::() else { + return Ok(None); + }; + if column.index() >= group_input_exprs.len() { + // AggregateExec does not produce output that is sorted on aggregate + // columns so those can not be pushed through. + return Ok(None); + } + child_requirement.push(PhysicalSortRequirement::new( + Arc::clone(&group_input_exprs[column.index()]), + req.options, + )); + } + + let Some(child_requirement) = LexRequirement::new(child_requirement) else { + return Ok(None); + }; + + // Keep sort above aggregate unless input ordering already satisfies the + // mapped requirement. + if aggregate_exec + .input() + .equivalence_properties() + .ordering_satisfy_requirement(child_requirement.iter().cloned())? + { + let child_requirements = OrderingRequirements::new(child_requirement); + Ok(Some(vec![Some(child_requirements)])) + } else { + Ok(None) + } +} + /// Return true if pushing the sort requirements through a node would violate /// the input sorting requirements for the plan fn pushdown_would_violate_requirements( diff --git a/datafusion/sqllogictest/test_files/sort_pushdown.slt b/datafusion/sqllogictest/test_files/sort_pushdown.slt index 58d9915a24be2..4148de1d62f73 100644 --- a/datafusion/sqllogictest/test_files/sort_pushdown.slt +++ b/datafusion/sqllogictest/test_files/sort_pushdown.slt @@ -851,6 +851,184 @@ LIMIT 3; 5 4 2 -3 +# Test 3.7: Aggregate ORDER BY expression should keep SortExec +# Source pattern declared on parquet scan: [x ASC, y ASC]. +# Requested pattern in ORDER BY: [x ASC, CAST(y AS BIGINT) % 2 ASC]. +# Example for x=1 input y order 1,2,3 gives bucket order 1,0,1, which does not +# match requested bucket ASC order. SortExec is required above AggregateExec. +statement ok +SET datafusion.execution.target_partitions = 1; + +statement ok +CREATE TABLE agg_expr_data(x INT, y INT, v INT) AS VALUES +(1, 1, 10), +(1, 2, 20), +(1, 3, 30), +(2, 1, 40), +(2, 2, 50), +(2, 3, 60); + +query I +COPY (SELECT * FROM agg_expr_data ORDER BY x, y) +TO 'test_files/scratch/sort_pushdown/agg_expr_sorted.parquet'; +---- +6 + +statement ok +CREATE EXTERNAL TABLE agg_expr_parquet(x INT, y INT, v INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/agg_expr_sorted.parquet' +WITH ORDER (x ASC, y ASC); + +query TT +EXPLAIN SELECT + x, + CAST(y AS BIGINT) % 2, + SUM(v) +FROM agg_expr_parquet +GROUP BY x, CAST(y AS BIGINT) % 2 +ORDER BY x, CAST(y AS BIGINT) % 2; +---- +logical_plan +01)Sort: agg_expr_parquet.x ASC NULLS LAST, agg_expr_parquet.y % Int64(2) ASC NULLS LAST +02)--Aggregate: groupBy=[[agg_expr_parquet.x, CAST(agg_expr_parquet.y AS Int64) % Int64(2)]], aggr=[[sum(CAST(agg_expr_parquet.v AS Int64))]] +03)----TableScan: agg_expr_parquet projection=[x, y, v] +physical_plan +01)SortExec: expr=[x@0 ASC NULLS LAST, agg_expr_parquet.y % Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[false] +02)--AggregateExec: mode=Single, gby=[x@0 as x, CAST(y@1 AS Int64) % 2 as agg_expr_parquet.y % Int64(2)], aggr=[sum(agg_expr_parquet.v)], ordering_mode=PartiallySorted([0]) +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/agg_expr_sorted.parquet]]}, projection=[x, y, v], output_ordering=[x@0 ASC NULLS LAST, y@1 ASC NULLS LAST], file_type=parquet + +# Expected output pattern from ORDER BY [x, bucket]: +# rows grouped by x, and within each x bucket appears as 0 then 1. +query III +SELECT + x, + CAST(y AS BIGINT) % 2, + SUM(v) +FROM agg_expr_parquet +GROUP BY x, CAST(y AS BIGINT) % 2 +ORDER BY x, CAST(y AS BIGINT) % 2; +---- +1 0 20 +1 1 40 +2 0 50 +2 1 100 + +# Test 3.8: Aggregate ORDER BY monotonic expression can push down (no SortExec) +query TT +EXPLAIN SELECT + x, + CAST(y AS BIGINT), + SUM(v) +FROM agg_expr_parquet +GROUP BY x, CAST(y AS BIGINT) +ORDER BY x, CAST(y AS BIGINT); +---- +logical_plan +01)Sort: agg_expr_parquet.x ASC NULLS LAST, agg_expr_parquet.y ASC NULLS LAST +02)--Aggregate: groupBy=[[agg_expr_parquet.x, CAST(agg_expr_parquet.y AS Int64)]], aggr=[[sum(CAST(agg_expr_parquet.v AS Int64))]] +03)----TableScan: agg_expr_parquet projection=[x, y, v] +physical_plan +01)AggregateExec: mode=Single, gby=[x@0 as x, CAST(y@1 AS Int64) as agg_expr_parquet.y], aggr=[sum(agg_expr_parquet.v)], ordering_mode=Sorted +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/agg_expr_sorted.parquet]]}, projection=[x, y, v], output_ordering=[x@0 ASC NULLS LAST, y@1 ASC NULLS LAST], file_type=parquet + +query III +SELECT + x, + CAST(y AS BIGINT), + SUM(v) +FROM agg_expr_parquet +GROUP BY x, CAST(y AS BIGINT) +ORDER BY x, CAST(y AS BIGINT); +---- +1 1 10 +1 2 20 +1 3 30 +2 1 40 +2 2 50 +2 3 60 + +# Test 3.9: Aggregate ORDER BY aggregate output should keep SortExec +query TT +EXPLAIN SELECT x, SUM(v) +FROM agg_expr_parquet +GROUP BY x +ORDER BY SUM(v); +---- +logical_plan +01)Sort: sum(agg_expr_parquet.v) ASC NULLS LAST +02)--Aggregate: groupBy=[[agg_expr_parquet.x]], aggr=[[sum(CAST(agg_expr_parquet.v AS Int64))]] +03)----TableScan: agg_expr_parquet projection=[x, v] +physical_plan +01)SortExec: expr=[sum(agg_expr_parquet.v)@1 ASC NULLS LAST], preserve_partitioning=[false] +02)--AggregateExec: mode=Single, gby=[x@0 as x], aggr=[sum(agg_expr_parquet.v)], ordering_mode=Sorted +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/agg_expr_sorted.parquet]]}, projection=[x, v], output_ordering=[x@0 ASC NULLS LAST], file_type=parquet + +query II +SELECT x, SUM(v) +FROM agg_expr_parquet +GROUP BY x +ORDER BY SUM(v); +---- +1 60 +2 150 + +# Test 3.10: Aggregate with non-preserved input order should keep SortExec +# v is not part of the order by +query TT +EXPLAIN SELECT v, SUM(y) +FROM agg_expr_parquet +GROUP BY v +ORDER BY v; +---- +logical_plan +01)Sort: agg_expr_parquet.v ASC NULLS LAST +02)--Aggregate: groupBy=[[agg_expr_parquet.v]], aggr=[[sum(CAST(agg_expr_parquet.y AS Int64))]] +03)----TableScan: agg_expr_parquet projection=[y, v] +physical_plan +01)SortExec: expr=[v@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--AggregateExec: mode=Single, gby=[v@1 as v], aggr=[sum(agg_expr_parquet.y)] +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/agg_expr_sorted.parquet]]}, projection=[y, v], file_type=parquet + +query II +SELECT v, SUM(y) +FROM agg_expr_parquet +GROUP BY v +ORDER BY v; +---- +10 1 +20 2 +30 3 +40 1 +50 2 +60 3 + +# Test 3.11: Aggregate ORDER BY non-column expression (unsatisfied) keeps SortExec +# (though note in theory DataFusion could figure out that data sorted by x will also be sorted by x+1) +query TT +EXPLAIN SELECT x, SUM(v) +FROM agg_expr_parquet +GROUP BY x +ORDER BY x + 1 DESC; +---- +logical_plan +01)Sort: CAST(agg_expr_parquet.x AS Int64) + Int64(1) DESC NULLS FIRST +02)--Aggregate: groupBy=[[agg_expr_parquet.x]], aggr=[[sum(CAST(agg_expr_parquet.v AS Int64))]] +03)----TableScan: agg_expr_parquet projection=[x, v] +physical_plan +01)SortExec: expr=[CAST(x@0 AS Int64) + 1 DESC], preserve_partitioning=[false] +02)--AggregateExec: mode=Single, gby=[x@0 as x], aggr=[sum(agg_expr_parquet.v)], ordering_mode=Sorted +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/agg_expr_sorted.parquet]]}, projection=[x, v], output_ordering=[x@0 ASC NULLS LAST], file_type=parquet + +query II +SELECT x, SUM(v) +FROM agg_expr_parquet +GROUP BY x +ORDER BY x + 1 DESC; +---- +2 150 +1 60 + # Cleanup statement ok DROP TABLE timestamp_data; @@ -882,5 +1060,11 @@ DROP TABLE signed_data; statement ok DROP TABLE signed_parquet; +statement ok +DROP TABLE agg_expr_data; + +statement ok +DROP TABLE agg_expr_parquet; + statement ok SET datafusion.optimizer.enable_sort_pushdown = true; From b62786424824e3f87230f8fd1923937199802e7a Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sat, 14 Feb 2026 14:04:01 -0500 Subject: [PATCH 08/11] fix: validate inter-file ordering in eq_properties() (#20329) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Discovered this bug while working on #19724. TLDR: just because the files themselves are sorted doesn't mean the partition streams are sorted. - **`eq_properties()` in `FileScanConfig` blindly trusted `output_ordering`** (set from Parquet `sorting_columns` metadata) without verifying that files within a group are in the correct inter-file order - `EnforceSorting` then removed `SortExec` based on this unvalidated ordering, producing **wrong results** when filesystem order didn't match data order - Added `validated_output_ordering()` that filters orderings using `MinMaxStatistics::new_from_files()` + `is_sorted()` to verify inter-file sort order before reporting them to the optimizer - Added `validated_output_ordering()` method on `FileScanConfig` that validates each output ordering against actual file group statistics - Changed `eq_properties()` to call `self.validated_output_ordering()` instead of `self.output_ordering.clone()` Added 8 new regression tests (Tests 4-11): | Test | Scenario | Key assertion | |------|----------|---------------| | **4** | Reversed filesystem order (inferred ordering) | SortExec retained — wrong inter-file order detected | | **5** | Overlapping file ranges (inferred ordering) | SortExec retained — overlapping ranges detected | | **6** | `WITH ORDER` + reversed filesystem order | SortExec retained despite explicit ordering | | **7** | Correctly ordered multi-file group (positive) | SortExec eliminated — validation passes | | **8** | DESC ordering with wrong inter-file DESC order | SortExec retained for DESC direction | | **9** | Multi-column sort key (overlapping vs non-overlapping) | Conservative rejection with overlapping stats; passes with clean boundaries | | **10** | Correctly ordered + `WITH ORDER` (positive) | SortExec eliminated — both ordering and stats agree | | **11** | Multiple partitions (one file per group) | `SortPreservingMergeExec` merges; no per-partition sort needed | - [x] `cargo test --test sqllogictests -- sort_pushdown` — all new + existing tests pass - [x] `cargo test -p datafusion-datasource` — 97 unit tests + 6 doc tests pass - [x] Existing Test 1 (single-file sort pushdown with `WITH ORDER`) still eliminates SortExec (no regression) 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 --- .../partition_statistics.rs | 2 +- datafusion/datasource/src/file_scan_config.rs | 162 ++++-- datafusion/datasource/src/statistics.rs | 3 +- .../test_files/parquet_sorted_statistics.slt | 2 +- .../sqllogictest/test_files/sort_pushdown.slt | 539 ++++++++++++++++++ 5 files changed, 661 insertions(+), 47 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs index ba53d079e3059..36fa95aa9f57d 100644 --- a/datafusion/core/tests/physical_optimizer/partition_statistics.rs +++ b/datafusion/core/tests/physical_optimizer/partition_statistics.rs @@ -864,7 +864,7 @@ mod test { let plan_string = get_plan_string(&aggregate_exec_partial).swap_remove(0); assert_snapshot!( plan_string, - @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)], ordering_mode=Sorted" + @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]" ); let p0_statistics = aggregate_exec_partial.partition_statistics(Some(0))?; diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs index c8636343ccc5a..facd12d7cbdfe 100644 --- a/datafusion/datasource/src/file_scan_config.rs +++ b/datafusion/datasource/src/file_scan_config.rs @@ -665,7 +665,7 @@ impl DataSource for FileScanConfig { let schema = self.file_source.table_schema().table_schema(); let mut eq_properties = EquivalenceProperties::new_with_orderings( Arc::clone(schema), - self.output_ordering.clone(), + self.validated_output_ordering(), ) .with_constraints(self.constraints.clone()); @@ -853,6 +853,40 @@ impl DataSource for FileScanConfig { } impl FileScanConfig { + /// Returns only the output orderings that are validated against actual + /// file group statistics. + /// + /// For example, individual files may be ordered by `col1 ASC`, + /// but if we have files with these min/max statistics in a single partition / file group: + /// + /// - file1: min(col1) = 10, max(col1) = 20 + /// - file2: min(col1) = 5, max(col1) = 15 + /// + /// Because reading file1 followed by file2 would produce out-of-order output (there is overlap + /// in the ranges), we cannot retain `col1 ASC` as a valid output ordering. + /// + /// Similarly this would not be a valid order (non-overlapping ranges but not ordered): + /// + /// - file1: min(col1) = 20, max(col1) = 30 + /// - file2: min(col1) = 10, max(col1) = 15 + /// + /// On the other hand if we had: + /// + /// - file1: min(col1) = 5, max(col1) = 15 + /// - file2: min(col1) = 16, max(col1) = 25 + /// + /// Then we know that reading file1 followed by file2 will produce ordered output, + /// so `col1 ASC` would be retained. + /// + /// Note that we are checking for ordering *within* *each* file group / partition, + /// files in different partitions are read independently and do not affect each other's ordering. + /// Merging of the multiple partition streams into a single ordered stream is handled + /// upstream e.g. by `SortPreservingMergeExec`. + fn validated_output_ordering(&self) -> Vec { + let schema = self.file_source.table_schema().table_schema(); + validate_orderings(&self.output_ordering, schema, &self.file_groups, None) + } + /// Get the file schema (schema of the files without partition columns) pub fn file_schema(&self) -> &SchemaRef { self.file_source.table_schema().file_schema() @@ -1202,6 +1236,51 @@ fn ordered_column_indices_from_projection( .collect::>>() } +/// Check whether a given ordering is valid for all file groups by verifying +/// that files within each group are sorted according to their min/max statistics. +/// +/// For single-file (or empty) groups, the ordering is trivially valid. +/// For multi-file groups, we check that the min/max statistics for the sort +/// columns are in order and non-overlapping (or touching at boundaries). +/// +/// `projection` maps projected column indices back to table-schema indices +/// when validating after projection; pass `None` when validating at +/// table-schema level. +fn is_ordering_valid_for_file_groups( + file_groups: &[FileGroup], + ordering: &LexOrdering, + schema: &SchemaRef, + projection: Option<&[usize]>, +) -> bool { + file_groups.iter().all(|group| { + if group.len() <= 1 { + return true; // single-file groups are trivially sorted + } + match MinMaxStatistics::new_from_files(ordering, schema, projection, group.iter()) + { + Ok(stats) => stats.is_sorted(), + Err(_) => false, // can't prove sorted → reject + } + }) +} + +/// Filters orderings to retain only those valid for all file groups, +/// verified via min/max statistics. +fn validate_orderings( + orderings: &[LexOrdering], + schema: &SchemaRef, + file_groups: &[FileGroup], + projection: Option<&[usize]>, +) -> Vec { + orderings + .iter() + .filter(|ordering| { + is_ordering_valid_for_file_groups(file_groups, ordering, schema, projection) + }) + .cloned() + .collect() +} + /// The various listing tables does not attempt to read all files /// concurrently, instead they will read files in sequence within a /// partition. This is an important property as it allows plans to @@ -1268,52 +1347,47 @@ fn get_projected_output_ordering( let projected_orderings = project_orderings(&base_config.output_ordering, projected_schema); - let mut all_orderings = vec![]; - for new_ordering in projected_orderings { - // Check if any file groups are not sorted - if base_config.file_groups.iter().any(|group| { - if group.len() <= 1 { - // File groups with <= 1 files are always sorted - return false; - } - - let Some(indices) = base_config - .file_source - .projection() - .as_ref() - .map(|p| ordered_column_indices_from_projection(p)) - else { - // Can't determine if ordered without a simple projection - return true; - }; - - let statistics = match MinMaxStatistics::new_from_files( - &new_ordering, + let indices = base_config + .file_source + .projection() + .as_ref() + .map(|p| ordered_column_indices_from_projection(p)); + + match indices { + Some(Some(indices)) => { + // Simple column projection — validate with statistics + validate_orderings( + &projected_orderings, projected_schema, - indices.as_deref(), - group.iter(), - ) { - Ok(statistics) => statistics, - Err(e) => { - log::trace!("Error fetching statistics for file group: {e}"); - // we can't prove that it's ordered, so we have to reject it - return true; - } - }; - - !statistics.is_sorted() - }) { - debug!( - "Skipping specified output ordering {:?}. \ - Some file groups couldn't be determined to be sorted: {:?}", - base_config.output_ordering[0], base_config.file_groups - ); - continue; + &base_config.file_groups, + Some(indices.as_slice()), + ) + } + None => { + // No projection — validate with statistics (no remapping needed) + validate_orderings( + &projected_orderings, + projected_schema, + &base_config.file_groups, + None, + ) + } + Some(None) => { + // Complex projection (expressions, not simple columns) — can't + // determine column indices for statistics. Still valid if all + // file groups have at most one file. + if base_config.file_groups.iter().all(|g| g.len() <= 1) { + projected_orderings + } else { + debug!( + "Skipping specified output orderings. \ + Some file groups couldn't be determined to be sorted: {:?}", + base_config.file_groups + ); + vec![] + } } - - all_orderings.push(new_ordering); } - all_orderings } /// Convert type to a type suitable for use as a `ListingTable` diff --git a/datafusion/datasource/src/statistics.rs b/datafusion/datasource/src/statistics.rs index 2f34ca032e132..b1a56e096c222 100644 --- a/datafusion/datasource/src/statistics.rs +++ b/datafusion/datasource/src/statistics.rs @@ -266,11 +266,12 @@ impl MinMaxStatistics { } /// Check if the min/max statistics are in order and non-overlapping + /// (or touching at boundaries) pub fn is_sorted(&self) -> bool { self.max_by_sort_order .iter() .zip(self.min_by_sort_order.iter().skip(1)) - .all(|(max, next_min)| max < next_min) + .all(|(max, next_min)| max <= next_min) } } diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt index 5a559bdb94835..fd3a40ca17079 100644 --- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt +++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt @@ -274,4 +274,4 @@ logical_plan 02)--TableScan: test_table projection=[constant_col] physical_plan 01)SortPreservingMergeExec: [constant_col@0 ASC NULLS LAST] -02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[constant_col], file_type=parquet +02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[constant_col], output_ordering=[constant_col@0 ASC NULLS LAST], file_type=parquet diff --git a/datafusion/sqllogictest/test_files/sort_pushdown.slt b/datafusion/sqllogictest/test_files/sort_pushdown.slt index 4148de1d62f73..a9ccb050b72fe 100644 --- a/datafusion/sqllogictest/test_files/sort_pushdown.slt +++ b/datafusion/sqllogictest/test_files/sort_pushdown.slt @@ -1068,3 +1068,542 @@ DROP TABLE agg_expr_parquet; statement ok SET datafusion.optimizer.enable_sort_pushdown = true; + +# Test 4: Reversed filesystem order with inferred ordering +# Create 3 parquet files with non-overlapping id ranges, named so filesystem +# order is OPPOSITE to data order. Each file is internally sorted by id ASC. +# Force target_partitions=1 so all files end up in one file group, which is +# where the inter-file ordering bug manifests. +# Without inter-file validation, the optimizer would incorrectly trust the +# inferred ordering and remove SortExec. + +# Save current target_partitions and set to 1 to force single file group +statement ok +SET datafusion.execution.target_partitions = 1; + +statement ok +CREATE TABLE reversed_high(id INT, value INT) AS VALUES (7, 700), (8, 800), (9, 900); + +statement ok +CREATE TABLE reversed_mid(id INT, value INT) AS VALUES (4, 400), (5, 500), (6, 600); + +statement ok +CREATE TABLE reversed_low(id INT, value INT) AS VALUES (1, 100), (2, 200), (3, 300); + +query I +COPY (SELECT * FROM reversed_high ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/reversed/a_high.parquet'; +---- +3 + +query I +COPY (SELECT * FROM reversed_mid ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/reversed/b_mid.parquet'; +---- +3 + +query I +COPY (SELECT * FROM reversed_low ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/reversed/c_low.parquet'; +---- +3 + +# External table with NO "WITH ORDER" — relies on inferred ordering from parquet metadata +statement ok +CREATE EXTERNAL TABLE reversed_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/reversed/'; + +# Test 4.1: SortExec must be present because files are not in inter-file order +query TT +EXPLAIN SELECT * FROM reversed_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: reversed_parquet.id ASC NULLS LAST +02)--TableScan: reversed_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/a_high.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/c_low.parquet]]}, projection=[id, value], file_type=parquet + +# Test 4.2: Results must be correct +query II +SELECT * FROM reversed_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Test 5: Overlapping files with inferred ordering +# Create files with overlapping id ranges + +statement ok +CREATE TABLE overlap_x(id INT, value INT) AS VALUES (1, 100), (3, 300), (5, 500); + +statement ok +CREATE TABLE overlap_y(id INT, value INT) AS VALUES (2, 200), (4, 400), (6, 600); + +query I +COPY (SELECT * FROM overlap_x ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/overlap/file_x.parquet'; +---- +3 + +query I +COPY (SELECT * FROM overlap_y ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/overlap/file_y.parquet'; +---- +3 + +statement ok +CREATE EXTERNAL TABLE overlap_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/overlap/'; + +# Test 5.1: SortExec must be present because files have overlapping ranges +query TT +EXPLAIN SELECT * FROM overlap_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: overlap_parquet.id ASC NULLS LAST +02)--TableScan: overlap_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/overlap/file_x.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/overlap/file_y.parquet]]}, projection=[id, value], file_type=parquet + +# Test 5.2: Results must be correct +query II +SELECT * FROM overlap_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 + +# Test 6: WITH ORDER + reversed filesystem order +# Same file setup as Test 4 but explicitly declaring ordering via WITH ORDER. +# Even with WITH ORDER, the optimizer should detect that inter-file order is wrong +# and keep SortExec. + +statement ok +CREATE EXTERNAL TABLE reversed_with_order_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/reversed/' +WITH ORDER (id ASC); + +# Test 6.1: SortExec must be present despite WITH ORDER +query TT +EXPLAIN SELECT * FROM reversed_with_order_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: reversed_with_order_parquet.id ASC NULLS LAST +02)--TableScan: reversed_with_order_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/a_high.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/c_low.parquet]]}, projection=[id, value], file_type=parquet + +# Test 6.2: Results must be correct +query II +SELECT * FROM reversed_with_order_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Test 7: Correctly ordered multi-file single group (positive case) +# Files are in CORRECT inter-file order within a single group. +# The validation should PASS and SortExec should be eliminated. + +statement ok +CREATE TABLE correct_low(id INT, value INT) AS VALUES (1, 100), (2, 200), (3, 300); + +statement ok +CREATE TABLE correct_mid(id INT, value INT) AS VALUES (4, 400), (5, 500), (6, 600); + +statement ok +CREATE TABLE correct_high(id INT, value INT) AS VALUES (7, 700), (8, 800), (9, 900); + +query I +COPY (SELECT * FROM correct_low ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/correct/a_low.parquet'; +---- +3 + +query I +COPY (SELECT * FROM correct_mid ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/correct/b_mid.parquet'; +---- +3 + +query I +COPY (SELECT * FROM correct_high ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/correct/c_high.parquet'; +---- +3 + +statement ok +CREATE EXTERNAL TABLE correct_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/correct/' +WITH ORDER (id ASC); + +# Test 7.1: SortExec should be ELIMINATED — files are in correct inter-file order +query TT +EXPLAIN SELECT * FROM correct_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: correct_parquet.id ASC NULLS LAST +02)--TableScan: correct_parquet projection=[id, value] +physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/a_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/c_high.parquet]]}, projection=[id, value], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet + +# Test 7.2: Results must be correct +query II +SELECT * FROM correct_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Test 7.3: DESC query on correctly ordered ASC files should still use SortExec +# Note: reverse_row_groups=true reverses the file list in the plan display +query TT +EXPLAIN SELECT * FROM correct_parquet ORDER BY id DESC; +---- +logical_plan +01)Sort: correct_parquet.id DESC NULLS FIRST +02)--TableScan: correct_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 DESC], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/c_high.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/a_low.parquet]]}, projection=[id, value], file_type=parquet, reverse_row_groups=true + +query II +SELECT * FROM correct_parquet ORDER BY id DESC; +---- +9 900 +8 800 +7 700 +6 600 +5 500 +4 400 +3 300 +2 200 +1 100 + +# Test 8: DESC ordering with files in wrong inter-file DESC order +# Create files internally sorted by id DESC, but named so filesystem order +# is WRONG for DESC ordering (low values first in filesystem order). + +statement ok +CREATE TABLE desc_low(id INT, value INT) AS VALUES (3, 300), (2, 200), (1, 100); + +statement ok +CREATE TABLE desc_high(id INT, value INT) AS VALUES (9, 900), (8, 800), (7, 700); + +query I +COPY (SELECT * FROM desc_low ORDER BY id DESC) +TO 'test_files/scratch/sort_pushdown/desc_reversed/a_low.parquet'; +---- +3 + +query I +COPY (SELECT * FROM desc_high ORDER BY id DESC) +TO 'test_files/scratch/sort_pushdown/desc_reversed/b_high.parquet'; +---- +3 + +statement ok +CREATE EXTERNAL TABLE desc_reversed_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/desc_reversed/' +WITH ORDER (id DESC); + +# Test 8.1: SortExec must be present — files are in wrong inter-file DESC order +# (a_low has 1-3, b_high has 7-9; for DESC, b_high should come first) +query TT +EXPLAIN SELECT * FROM desc_reversed_parquet ORDER BY id DESC; +---- +logical_plan +01)Sort: desc_reversed_parquet.id DESC NULLS FIRST +02)--TableScan: desc_reversed_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 DESC], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/desc_reversed/a_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/desc_reversed/b_high.parquet]]}, projection=[id, value], file_type=parquet + +# Test 8.2: Results must be correct +query II +SELECT * FROM desc_reversed_parquet ORDER BY id DESC; +---- +9 900 +8 800 +7 700 +3 300 +2 200 +1 100 + +# Test 9: Multi-column sort key validation +# Files have (category, id) ordering. Files share a boundary value on category='B' +# so column-level min/max statistics overlap on the primary key column. +# The validation conservatively rejects this because column-level stats can't +# precisely represent row-level boundaries for multi-column keys. + +statement ok +CREATE TABLE multi_col_a(category VARCHAR, id INT, value INT) AS VALUES +('A', 1, 10), ('A', 2, 20), ('B', 1, 30); + +statement ok +CREATE TABLE multi_col_b(category VARCHAR, id INT, value INT) AS VALUES +('B', 2, 40), ('C', 1, 50), ('C', 2, 60); + +query I +COPY (SELECT * FROM multi_col_a ORDER BY category ASC, id ASC) +TO 'test_files/scratch/sort_pushdown/multi_col/a_first.parquet'; +---- +3 + +query I +COPY (SELECT * FROM multi_col_b ORDER BY category ASC, id ASC) +TO 'test_files/scratch/sort_pushdown/multi_col/b_second.parquet'; +---- +3 + +statement ok +CREATE EXTERNAL TABLE multi_col_parquet(category VARCHAR, id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/multi_col/' +WITH ORDER (category ASC, id ASC); + +# Test 9.1: SortExec is present — validation conservatively rejects because +# column-level stats overlap on category='B' across both files +query TT +EXPLAIN SELECT * FROM multi_col_parquet ORDER BY category ASC, id ASC; +---- +logical_plan +01)Sort: multi_col_parquet.category ASC NULLS LAST, multi_col_parquet.id ASC NULLS LAST +02)--TableScan: multi_col_parquet projection=[category, id, value] +physical_plan +01)SortExec: expr=[category@0 ASC NULLS LAST, id@1 ASC NULLS LAST], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_col/a_first.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_col/b_second.parquet]]}, projection=[category, id, value], file_type=parquet + +# Test 9.2: Results must be correct +query TII +SELECT * FROM multi_col_parquet ORDER BY category ASC, id ASC; +---- +A 1 10 +A 2 20 +B 1 30 +B 2 40 +C 1 50 +C 2 60 + +# Test 9.3: Multi-column sort with non-overlapping primary key across files +# When files don't overlap on the primary column, validation succeeds. + +statement ok +CREATE TABLE multi_col_x(category VARCHAR, id INT, value INT) AS VALUES +('A', 1, 10), ('A', 2, 20); + +statement ok +CREATE TABLE multi_col_y(category VARCHAR, id INT, value INT) AS VALUES +('B', 1, 30), ('B', 2, 40); + +query I +COPY (SELECT * FROM multi_col_x ORDER BY category ASC, id ASC) +TO 'test_files/scratch/sort_pushdown/multi_col_clean/x_first.parquet'; +---- +2 + +query I +COPY (SELECT * FROM multi_col_y ORDER BY category ASC, id ASC) +TO 'test_files/scratch/sort_pushdown/multi_col_clean/y_second.parquet'; +---- +2 + +statement ok +CREATE EXTERNAL TABLE multi_col_clean_parquet(category VARCHAR, id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/multi_col_clean/' +WITH ORDER (category ASC, id ASC); + +# Test 9.3a: SortExec should be eliminated — non-overlapping primary column +query TT +EXPLAIN SELECT * FROM multi_col_clean_parquet ORDER BY category ASC, id ASC; +---- +logical_plan +01)Sort: multi_col_clean_parquet.category ASC NULLS LAST, multi_col_clean_parquet.id ASC NULLS LAST +02)--TableScan: multi_col_clean_parquet projection=[category, id, value] +physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_col_clean/x_first.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_col_clean/y_second.parquet]]}, projection=[category, id, value], output_ordering=[category@0 ASC NULLS LAST, id@1 ASC NULLS LAST], file_type=parquet + +# Test 9.3b: Results must be correct +query TII +SELECT * FROM multi_col_clean_parquet ORDER BY category ASC, id ASC; +---- +A 1 10 +A 2 20 +B 1 30 +B 2 40 + +# Test 10: Correctly ordered files WITH ORDER (positive counterpart to Test 6) +# Files in correct_parquet are in correct ASC order — WITH ORDER should pass validation +# and SortExec should be eliminated. + +statement ok +CREATE EXTERNAL TABLE correct_with_order_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/correct/' +WITH ORDER (id ASC); + +# Test 10.1: SortExec should be ELIMINATED — files are in correct order +query TT +EXPLAIN SELECT * FROM correct_with_order_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: correct_with_order_parquet.id ASC NULLS LAST +02)--TableScan: correct_with_order_parquet projection=[id, value] +physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/a_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/c_high.parquet]]}, projection=[id, value], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet + +# Test 10.2: Results must be correct +query II +SELECT * FROM correct_with_order_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Test 11: Multiple file groups (target_partitions > 1) — each group has one file +# When files are spread across separate partitions (one file per group), each +# partition is trivially sorted and SortPreservingMergeExec handles the merge. + +# Restore higher target_partitions so files go into separate groups +statement ok +SET datafusion.execution.target_partitions = 4; + +statement ok +CREATE EXTERNAL TABLE multi_partition_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/reversed/' +WITH ORDER (id ASC); + +# Test 11.1: With separate partitions, each file is trivially sorted. +# SortPreservingMergeExec merges, no SortExec needed per-partition. +query TT +EXPLAIN SELECT * FROM multi_partition_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: multi_partition_parquet.id ASC NULLS LAST +02)--TableScan: multi_partition_parquet projection=[id, value] +physical_plan +01)SortPreservingMergeExec: [id@0 ASC NULLS LAST] +02)--DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/a_high.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/b_mid.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/c_low.parquet]]}, projection=[id, value], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet + +# Test 11.2: Results must be correct +query II +SELECT * FROM multi_partition_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Restore target_partitions to 1 for remaining cleanup +statement ok +SET datafusion.execution.target_partitions = 2; + +statement ok +DROP TABLE reversed_high; + +statement ok +DROP TABLE reversed_mid; + +statement ok +DROP TABLE reversed_low; + +statement ok +DROP TABLE reversed_parquet; + +statement ok +DROP TABLE overlap_x; + +statement ok +DROP TABLE overlap_y; + +statement ok +DROP TABLE overlap_parquet; + +statement ok +DROP TABLE reversed_with_order_parquet; + +statement ok +DROP TABLE correct_low; + +statement ok +DROP TABLE correct_mid; + +statement ok +DROP TABLE correct_high; + +statement ok +DROP TABLE correct_parquet; + +statement ok +DROP TABLE desc_low; + +statement ok +DROP TABLE desc_high; + +statement ok +DROP TABLE desc_reversed_parquet; + +statement ok +DROP TABLE multi_col_a; + +statement ok +DROP TABLE multi_col_b; + +statement ok +DROP TABLE multi_col_parquet; + +statement ok +DROP TABLE multi_col_x; + +statement ok +DROP TABLE multi_col_y; + +statement ok +DROP TABLE multi_col_clean_parquet; + +statement ok +DROP TABLE correct_with_order_parquet; + +statement ok +DROP TABLE multi_partition_parquet; + From 866a4f01e70418a86375d7439d1285c5c7760313 Mon Sep 17 00:00:00 2001 From: Tim-53 <82676248+Tim-53@users.noreply.github.com> Date: Tue, 24 Feb 2026 13:10:15 +0100 Subject: [PATCH 09/11] fix: HashJoin panic with dictionary-encoded columns in multi-key joins (#20441) - Closes #20437 `flatten_dictionary_array` returned only the unique values rather then the full expanded array when being called on a `DictionaryArray`. When building a `StructArray` this caused a length mismatch panic. Replaced `array.values()` with `arrow::compute::cast(array, value_type)` in `flatten_dictionary_array`, which properly expands the dictionary into a full length array matching the row count. Yes, both a new unit test aswell as a regression test were added. Nope --------- Co-authored-by: Andrew Lamb --- .../src/joins/hash_join/inlist_builder.rs | 58 ++++++-- datafusion/sqllogictest/test_files/joins.slt | 125 ++++++++++++++++++ 2 files changed, 174 insertions(+), 9 deletions(-) diff --git a/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs b/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs index 7dccc5b0ba7c2..9bf59d9e333d6 100644 --- a/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs +++ b/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs @@ -20,8 +20,8 @@ use std::sync::Arc; use arrow::array::{ArrayRef, StructArray}; +use arrow::compute::cast; use arrow::datatypes::{Field, FieldRef, Fields}; -use arrow::downcast_dictionary_array; use arrow_schema::DataType; use datafusion_common::Result; @@ -33,15 +33,16 @@ pub(super) fn build_struct_fields(data_types: &[DataType]) -> Result { .collect() } -/// Flattens dictionary-encoded arrays to their underlying value arrays. +/// Casts dictionary-encoded arrays to their underlying value type, preserving row count. /// Non-dictionary arrays are returned as-is. -fn flatten_dictionary_array(array: &ArrayRef) -> ArrayRef { - downcast_dictionary_array! { - array => { +fn flatten_dictionary_array(array: &ArrayRef) -> Result { + match array.data_type() { + DataType::Dictionary(_, value_type) => { + let casted = cast(array, value_type)?; // Recursively flatten in case of nested dictionaries - flatten_dictionary_array(array.values()) + flatten_dictionary_array(&casted) } - _ => Arc::clone(array) + _ => Ok(Arc::clone(array)), } } @@ -68,7 +69,7 @@ pub(super) fn build_struct_inlist_values( let flattened_arrays: Vec = join_key_arrays .iter() .map(flatten_dictionary_array) - .collect(); + .collect::>>()?; // Build the source array/struct let source_array: ArrayRef = if flattened_arrays.len() == 1 { @@ -99,7 +100,9 @@ pub(super) fn build_struct_inlist_values( #[cfg(test)] mod tests { use super::*; - use arrow::array::{Int32Array, StringArray}; + use arrow::array::{ + DictionaryArray, Int8Array, Int32Array, StringArray, StringDictionaryBuilder, + }; use arrow_schema::DataType; use std::sync::Arc; @@ -130,4 +133,41 @@ mod tests { ) ); } + + #[test] + fn test_build_multi_column_inlist_with_dictionary() { + let mut builder = StringDictionaryBuilder::::new(); + builder.append_value("foo"); + builder.append_value("foo"); + builder.append_value("foo"); + let dict_array = Arc::new(builder.finish()) as ArrayRef; + + let int_array = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef; + + let result = build_struct_inlist_values(&[dict_array, int_array]) + .unwrap() + .unwrap(); + + assert_eq!(result.len(), 3); + assert_eq!( + *result.data_type(), + DataType::Struct( + build_struct_fields(&[DataType::Utf8, DataType::Int32]).unwrap() + ) + ); + } + + #[test] + fn test_build_single_column_dictionary_inlist() { + let keys = Int8Array::from(vec![0i8, 0, 0]); + let values = Arc::new(StringArray::from(vec!["foo"])); + let dict_array = Arc::new(DictionaryArray::new(keys, values)) as ArrayRef; + + let result = build_struct_inlist_values(std::slice::from_ref(&dict_array)) + .unwrap() + .unwrap(); + + assert_eq!(result.len(), 3); + assert_eq!(*result.data_type(), DataType::Utf8); + } } diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 38037ede21db2..e020885a447e0 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -5198,3 +5198,128 @@ DROP TABLE t1_c; statement ok DROP TABLE t2_c; + +# Reproducer of https://github.com/apache/datafusion/issues/19067 +statement count 0 +set datafusion.explain.physical_plan_only = true; + +# Setup Left Table with FixedSizeBinary(4) +statement count 0 +CREATE TABLE issue_19067_left AS +SELECT + column1 as id, + arrow_cast(decode(column2, 'hex'), 'FixedSizeBinary(4)') as join_key +FROM (VALUES + (1, 'AAAAAAAA'), + (2, 'BBBBBBBB'), + (3, 'CCCCCCCC') +); + +# Setup Right Table with FixedSizeBinary(4) +statement count 0 +CREATE TABLE issue_19067_right AS +SELECT + arrow_cast(decode(column1, 'hex'), 'FixedSizeBinary(4)') as join_key, + column2 as value +FROM (VALUES + ('AAAAAAAA', 1000), + ('BBBBBBBB', 2000) +); + +# Perform Left Join. Third row should contain NULL in `right_key`. +query I??I +SELECT + l.id, + l.join_key as left_key, + r.join_key as right_key, + r.value +FROM issue_19067_left l +LEFT JOIN issue_19067_right r ON l.join_key = r.join_key +ORDER BY l.id; +---- +1 aaaaaaaa aaaaaaaa 1000 +2 bbbbbbbb bbbbbbbb 2000 +3 cccccccc NULL NULL + +# Ensure usage of HashJoinExec +query TT +EXPLAIN +SELECT + l.id, + l.join_key as left_key, + r.join_key as right_key, + r.value +FROM issue_19067_left l +LEFT JOIN issue_19067_right r ON l.join_key = r.join_key +ORDER BY l.id; +---- +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--ProjectionExec: expr=[id@2 as id, join_key@3 as left_key, join_key@0 as right_key, value@1 as value] +03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(join_key@0, join_key@1)] +04)------DataSourceExec: partitions=1, partition_sizes=[1] +05)------DataSourceExec: partitions=1, partition_sizes=[1] + +statement count 0 +set datafusion.explain.physical_plan_only = false; + +statement count 0 +DROP TABLE issue_19067_left; + +statement count 0 +DROP TABLE issue_19067_right; + +# Test that empty projections pushed into joins produce correct row counts at runtime. +# When count(1) is used over a RIGHT/FULL JOIN, the optimizer embeds an empty projection +# (projection=[]) into the HashJoinExec. This validates that the runtime batch construction +# handles zero-column output correctly, preserving the correct number of rows. + +statement ok +CREATE TABLE empty_proj_left AS VALUES (1, 'a'), (2, 'b'), (3, 'c'); + +statement ok +CREATE TABLE empty_proj_right AS VALUES (1, 'x'), (2, 'y'), (4, 'z'); + +query I +SELECT count(1) FROM empty_proj_left RIGHT JOIN empty_proj_right ON empty_proj_left.column1 = empty_proj_right.column1; +---- +3 + +query I +SELECT count(1) FROM empty_proj_left FULL JOIN empty_proj_right ON empty_proj_left.column1 = empty_proj_right.column1; +---- +4 + +statement count 0 +DROP TABLE empty_proj_left; + +statement count 0 +DROP TABLE empty_proj_right; + +# Issue #20437: HashJoin panic with dictionary-encoded columns in multi-key joins +# https://github.com/apache/datafusion/issues/20437 + +statement ok +CREATE TABLE issue_20437_small AS +SELECT id, arrow_cast(region, 'Dictionary(Int32, Utf8)') AS region +FROM (VALUES (1, 'west'), (2, 'west')) AS t(id, region); + +statement ok +CREATE TABLE issue_20437_large AS +SELECT id, region, value +FROM (VALUES (1, 'west', 100), (2, 'west', 200), (3, 'east', 300)) AS t(id, region, value); + +query ITI +SELECT s.id, s.region, l.value +FROM issue_20437_small s +JOIN issue_20437_large l ON s.id = l.id AND s.region = l.region +ORDER BY s.id; +---- +1 west 100 +2 west 200 + +statement count 0 +DROP TABLE issue_20437_small; + +statement count 0 +DROP TABLE issue_20437_large; From 20598115a943fb0da68f9fe8507b510d8f292a9c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 23 Feb 2026 14:38:11 -0500 Subject: [PATCH 10/11] Avoid flattening dictionaries in Join InLists --- .../src/joins/hash_join/inlist_builder.rs | 39 +++++++------------ 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs b/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs index 9bf59d9e333d6..0ca338265ecc6 100644 --- a/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs +++ b/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs @@ -20,7 +20,6 @@ use std::sync::Arc; use arrow::array::{ArrayRef, StructArray}; -use arrow::compute::cast; use arrow::datatypes::{Field, FieldRef, Fields}; use arrow_schema::DataType; use datafusion_common::Result; @@ -33,19 +32,6 @@ pub(super) fn build_struct_fields(data_types: &[DataType]) -> Result { .collect() } -/// Casts dictionary-encoded arrays to their underlying value type, preserving row count. -/// Non-dictionary arrays are returned as-is. -fn flatten_dictionary_array(array: &ArrayRef) -> Result { - match array.data_type() { - DataType::Dictionary(_, value_type) => { - let casted = cast(array, value_type)?; - // Recursively flatten in case of nested dictionaries - flatten_dictionary_array(&casted) - } - _ => Ok(Arc::clone(array)), - } -} - /// Builds InList values from join key column arrays. /// /// If `join_key_arrays` is: @@ -65,20 +51,14 @@ fn flatten_dictionary_array(array: &ArrayRef) -> Result { pub(super) fn build_struct_inlist_values( join_key_arrays: &[ArrayRef], ) -> Result> { - // Flatten any dictionary-encoded arrays - let flattened_arrays: Vec = join_key_arrays - .iter() - .map(flatten_dictionary_array) - .collect::>>()?; - // Build the source array/struct - let source_array: ArrayRef = if flattened_arrays.len() == 1 { + let source_array: ArrayRef = if join_key_arrays.len() == 1 { // Single column: use directly - Arc::clone(&flattened_arrays[0]) + Arc::clone(&join_key_arrays[0]) } else { // Multi-column: build StructArray once from all columns let fields = build_struct_fields( - &flattened_arrays + &join_key_arrays .iter() .map(|arr| arr.data_type().clone()) .collect::>(), @@ -88,7 +68,7 @@ pub(super) fn build_struct_inlist_values( let arrays_with_fields: Vec<(FieldRef, ArrayRef)> = fields .iter() .cloned() - .zip(flattened_arrays.iter().cloned()) + .zip(join_key_arrays.iter().cloned()) .collect(); Arc::new(StructArray::from(arrays_with_fields)) @@ -152,7 +132,14 @@ mod tests { assert_eq!( *result.data_type(), DataType::Struct( - build_struct_fields(&[DataType::Utf8, DataType::Int32]).unwrap() + build_struct_fields(&[ + DataType::Dictionary( + Box::new(DataType::Int8), + Box::new(DataType::Utf8) + ), + DataType::Int32 + ]) + .unwrap() ) ); } @@ -168,6 +155,6 @@ mod tests { .unwrap(); assert_eq!(result.len(), 3); - assert_eq!(*result.data_type(), DataType::Utf8); + assert_eq!(result.data_type(), dict_array.data_type()); } } From 6a0206e08361c7b325f03f37da71e760b1cfaf18 Mon Sep 17 00:00:00 2001 From: Adam Curtis Date: Wed, 4 Mar 2026 10:47:39 -0500 Subject: [PATCH 11/11] chore: bump arrow/parquet to 57.3.0 Includes fix for FixedSizeBinary LEFT JOIN bug - https://github.com/apache/arrow-rs/pull/8981 Cherry-picked test and API updates from - https://github.com/apache/datafusion/pull/19355 --- Cargo.lock | 64 +++++++++---------- Cargo.toml | 14 ++-- datafusion/common/src/scalar/mod.rs | 7 +- .../src/avro_to_arrow/schema.rs | 4 +- .../functions/src/core/union_extract.rs | 5 +- datafusion/physical-plan/src/filter.rs | 5 +- datafusion/proto-common/src/from_proto/mod.rs | 21 +++--- .../tests/cases/roundtrip_logical_plan.rs | 12 ++-- datafusion/sqllogictest/src/test_context.rs | 5 +- datafusion/sqllogictest/test_files/case.slt | 5 +- .../test_files/spark/hash/crc32.slt | 6 +- datafusion/sqllogictest/test_files/struct.slt | 39 ++++++++--- 12 files changed, 109 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c2e9312a24723..3bdae442123fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -223,9 +223,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" +checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ "arrow-arith", "arrow-array", @@ -246,9 +246,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" +checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ "arrow-array", "arrow-buffer", @@ -260,9 +260,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" +checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash", "arrow-buffer", @@ -279,9 +279,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" +checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" dependencies = [ "bytes", "half", @@ -291,9 +291,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" +checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ "arrow-array", "arrow-buffer", @@ -313,9 +313,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" +checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ "arrow-array", "arrow-cast", @@ -328,9 +328,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" +checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ "arrow-buffer", "arrow-schema", @@ -341,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5f57c3d39d1b1b7c1376a772ea86a131e7da310aed54ebea9363124bb885e3" +checksum = "58c5b083668e6230eae3eab2fc4b5fb989974c845d0aa538dde61a4327c78675" dependencies = [ "arrow-arith", "arrow-array", @@ -369,9 +369,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" +checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ "arrow-array", "arrow-buffer", @@ -385,9 +385,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" +checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ "arrow-array", "arrow-buffer", @@ -409,9 +409,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" +checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ "arrow-array", "arrow-buffer", @@ -422,9 +422,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" +checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -435,9 +435,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" +checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" dependencies = [ "bitflags", "serde", @@ -447,9 +447,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" +checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash", "arrow-array", @@ -461,9 +461,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" +checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ "arrow-array", "arrow-buffer", @@ -4380,9 +4380,9 @@ dependencies = [ [[package]] name = "parquet" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index 6424f512cc3df..8bc8006daf147 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -91,19 +91,19 @@ ahash = { version = "0.8", default-features = false, features = [ "runtime-rng", ] } apache-avro = { version = "0.21", default-features = false } -arrow = { version = "57.1.0", features = [ +arrow = { version = "57.2.0", features = [ "prettyprint", "chrono-tz", ] } -arrow-buffer = { version = "57.1.0", default-features = false } -arrow-flight = { version = "57.1.0", features = [ +arrow-buffer = { version = "57.2.0", default-features = false } +arrow-flight = { version = "57.2.0", features = [ "flight-sql-experimental", ] } -arrow-ipc = { version = "57.1.0", default-features = false, features = [ +arrow-ipc = { version = "57.2.0", default-features = false, features = [ "lz4", ] } -arrow-ord = { version = "57.1.0", default-features = false } -arrow-schema = { version = "57.1.0", default-features = false } +arrow-ord = { version = "57.2.0", default-features = false } +arrow-schema = { version = "57.2.0", default-features = false } async-trait = "0.1.89" bigdecimal = "0.4.8" bytes = "1.11" @@ -166,7 +166,7 @@ log = "^0.4" num-traits = { version = "0.2" } object_store = { version = "0.12.4", default-features = false } parking_lot = "0.12" -parquet = { version = "57.1.0", default-features = false, features = [ +parquet = { version = "57.2.0", default-features = false, features = [ "arrow", "async", "object_store", diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index e4e048ad3c0d8..eda4952cf590b 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -8868,7 +8868,7 @@ mod tests { .unwrap(), ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(), ScalarValue::try_new_null(&DataType::Union( - UnionFields::new(vec![42], vec![field_ref]), + UnionFields::try_new(vec![42], vec![field_ref]).unwrap(), UnionMode::Dense, )) .unwrap(), @@ -8971,13 +8971,14 @@ mod tests { } // Test union type - let union_fields = UnionFields::new( + let union_fields = UnionFields::try_new( vec![0, 1], vec![ Field::new("i32", DataType::Int32, false), Field::new("f64", DataType::Float64, false), ], - ); + ) + .unwrap(); let union_result = ScalarValue::new_default(&DataType::Union( union_fields.clone(), UnionMode::Sparse, diff --git a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs index 0e8f2a4d56088..053be3c9aff94 100644 --- a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs +++ b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs @@ -117,8 +117,8 @@ fn schema_to_field_with_props( .iter() .map(|s| schema_to_field_with_props(s, None, has_nullable, None)) .collect::>>()?; - let type_ids = 0_i8..fields.len() as i8; - DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense) + // Assign type_ids based on the order in which they appear + DataType::Union(UnionFields::from_fields(fields), UnionMode::Dense) } } AvroSchema::Record(RecordSchema { fields, .. }) => { diff --git a/datafusion/functions/src/core/union_extract.rs b/datafusion/functions/src/core/union_extract.rs index 56d4f23cc4e2e..8d915fb2e2c07 100644 --- a/datafusion/functions/src/core/union_extract.rs +++ b/datafusion/functions/src/core/union_extract.rs @@ -189,13 +189,14 @@ mod tests { fn test_scalar_value() -> Result<()> { let fun = UnionExtractFun::new(); - let fields = UnionFields::new( + let fields = UnionFields::try_new( vec![1, 3], vec![ Field::new("str", DataType::Utf8, false), Field::new("int", DataType::Int32, false), ], - ); + ) + .unwrap(); let args = vec![ ColumnarValue::Scalar(ScalarValue::Union( diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index e724cdad64840..e78e50c9868fb 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -1540,13 +1540,14 @@ mod tests { #[test] fn test_equivalence_properties_union_type() -> Result<()> { let union_type = DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![0, 1], vec![ Field::new("f1", DataType::Int32, true), Field::new("f2", DataType::Utf8, true), ], - ), + ) + .unwrap(), UnionMode::Sparse, ); diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs index e8e71c3884586..3c41b8cad9ed1 100644 --- a/datafusion/proto-common/src/from_proto/mod.rs +++ b/datafusion/proto-common/src/from_proto/mod.rs @@ -304,13 +304,16 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for DataType { }; let union_fields = parse_proto_fields_to_fields(&union.union_types)?; - // Default to index based type ids if not provided - let type_ids: Vec<_> = match union.type_ids.is_empty() { - true => (0..union_fields.len() as i8).collect(), - false => union.type_ids.iter().map(|i| *i as i8).collect(), + // Default to index based type ids if not explicitly provided + let union_fields = if union.type_ids.is_empty() { + UnionFields::from_fields(union_fields) + } else { + let type_ids = union.type_ids.iter().map(|i| *i as i8); + UnionFields::try_new(type_ids, union_fields).map_err(|e| { + DataFusionError::from(e).context("Deserializing Union DataType") + })? }; - - DataType::Union(UnionFields::new(type_ids, union_fields), union_mode) + DataType::Union(union_fields, union_mode) } arrow_type::ArrowTypeEnum::Dictionary(dict) => { let key_datatype = dict.as_ref().key.as_deref().required("key")?; @@ -602,7 +605,9 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { .collect::>>(); let fields = fields.ok_or_else(|| Error::required("UnionField"))?; let fields = parse_proto_fields_to_fields(&fields)?; - let fields = UnionFields::new(ids, fields); + let union_fields = UnionFields::try_new(ids, fields).map_err(|e| { + DataFusionError::from(e).context("Deserializing Union ScalarValue") + })?; let v_id = val.value_id as i8; let val = match &val.value { None => None, @@ -614,7 +619,7 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { Some((v_id, Box::new(val))) } }; - Self::Union(val, fields, mode) + Self::Union(val, union_fields, mode) } Value::FixedSizeBinaryValue(v) => { Self::FixedSizeBinary(v.length, Some(v.clone().values)) diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index bcfda648b53e5..b9af9fc9352b2 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -1780,19 +1780,20 @@ fn round_trip_datatype() { ), ])), DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![7, 5, 3], vec![ Field::new("nullable", DataType::Boolean, false), Field::new("name", DataType::Utf8, false), Field::new("datatype", DataType::Binary, false), ], - ), + ) + .unwrap(), UnionMode::Sparse, ), DataType::Union( - UnionFields::new( - vec![5, 8, 1], + UnionFields::try_new( + vec![5, 8, 1, 100], vec![ Field::new("nullable", DataType::Boolean, false), Field::new("name", DataType::Utf8, false), @@ -1807,7 +1808,8 @@ fn round_trip_datatype() { true, ), ], - ), + ) + .unwrap(), UnionMode::Dense, ), DataType::Dictionary( diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs index 9ec085b41eec0..a69bdac663002 100644 --- a/datafusion/sqllogictest/src/test_context.rs +++ b/datafusion/sqllogictest/src/test_context.rs @@ -436,14 +436,15 @@ fn create_example_udf() -> ScalarUDF { fn register_union_table(ctx: &SessionContext) { let union = UnionArray::try_new( - UnionFields::new( + UnionFields::try_new( // typeids: 3 for int, 1 for string vec![3, 1], vec![ Field::new("int", DataType::Int32, false), Field::new("string", DataType::Utf8, false), ], - ), + ) + .unwrap(), ScalarBuffer::from(vec![3, 1, 3]), None, vec![ diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt index 074d216ac7524..481dde5be9f5c 100644 --- a/datafusion/sqllogictest/test_files/case.slt +++ b/datafusion/sqllogictest/test_files/case.slt @@ -384,8 +384,7 @@ SELECT column2, column3, column4 FROM t; {foo: a, xxx: b} {xxx: c, foo: d} {xxx: e} # coerce structs with different field orders, -# (note the *value*s are from column2 but the field name is 'xxx', as the coerced -# type takes the field name from the last argument (column3) +# should keep the same field values query ? SELECT case @@ -394,7 +393,7 @@ SELECT end FROM t; ---- -{xxx: a, foo: b} +{xxx: b, foo: a} # coerce structs with different field orders query ? diff --git a/datafusion/sqllogictest/test_files/spark/hash/crc32.slt b/datafusion/sqllogictest/test_files/spark/hash/crc32.slt index 6fbeb11fb9a36..df5588c75837d 100644 --- a/datafusion/sqllogictest/test_files/spark/hash/crc32.slt +++ b/datafusion/sqllogictest/test_files/spark/hash/crc32.slt @@ -81,7 +81,7 @@ SELECT crc32(arrow_cast('Spark', 'BinaryView')); ---- 1557323817 -# Upstream arrow-rs issue: https://github.com/apache/arrow-rs/issues/8841 -# This should succeed after we receive the fix -query error Arrow error: Compute error: Internal Error: Cannot cast BinaryView to BinaryArray of expected type +query I select crc32(arrow_cast(null, 'Dictionary(Int32, Utf8)')) +---- +NULL diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index d985af1104da3..a91a5e7f870a9 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -492,9 +492,18 @@ Struct("r": Utf8, "c": Float64) statement ok drop table t; -query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type +statement ok create table t as values({r: 'a', c: 1}), ({c: 2.3, r: 'b'}); +query ? +select * from t; +---- +{c: 1.0, r: a} +{c: 2.3, r: b} + +statement ok +drop table t; + ################################## ## Test Coalesce with Struct ################################## @@ -560,10 +569,18 @@ create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as valu (row('purple', 1), row('green', 2.3)); # out of order struct literal -# TODO: This query should not fail -statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type +statement ok create table t(a struct(r varchar, c int)) as values ({r: 'a', c: 1}), ({c: 2, r: 'b'}); +query ? +select * from t; +---- +{r: a, c: 1} +{r: b, c: 2} + +statement ok +drop table t; + ################################## ## Test Array of Struct ################################## @@ -573,9 +590,11 @@ select [{r: 'a', c: 1}, {r: 'b', c: 2}]; ---- [{r: a, c: 1}, {r: b, c: 2}] -# Can't create a list of struct with different field types -query error +# Create a list of struct with different field types +query ? select [{r: 'a', c: 1}, {c: 2, r: 'b'}]; +---- +[{c: 1, r: a}, {c: 2, r: b}] statement ok create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values (row('a', 1), row('b', 2.3)); @@ -592,9 +611,11 @@ drop table t; statement ok create table t(a struct(r varchar, c int), b struct(c float, r varchar)) as values (row('a', 1), row(2.3, 'b')); -# create array with different struct type is not valid -query error +# create array with different struct type should be cast +query T select arrow_typeof([a, b]) from t; +---- +List(Struct("c": Float32, "r": Utf8View)) statement ok drop table t; @@ -602,13 +623,13 @@ drop table t; statement ok create table t(a struct(r varchar, c int, g float), b struct(r varchar, c float, g int)) as values (row('a', 1, 2.3), row('b', 2.3, 2)); -# type of each column should not coerced but perserve as it is +# type of each column should not coerced but preserve as it is query T select arrow_typeof(a) from t; ---- Struct("r": Utf8View, "c": Int32, "g": Float32) -# type of each column should not coerced but perserve as it is +# type of each column should not coerced but preserve as it is query T select arrow_typeof(b) from t; ----