From 641767153398d31e754c2f76b990dd11dee2953b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 24 Jun 2025 14:54:00 +0200 Subject: [PATCH 01/12] chore: skip order calculation / exponential planning --- .../src/equivalence/properties/union.rs | 40 +++++++++++++++++-- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr/src/equivalence/properties/union.rs b/datafusion/physical-expr/src/equivalence/properties/union.rs index d77129472a8ba..702688011668d 100644 --- a/datafusion/physical-expr/src/equivalence/properties/union.rs +++ b/datafusion/physical-expr/src/equivalence/properties/union.rs @@ -67,16 +67,43 @@ fn calculate_union_binary( }) .collect::>(); + // TEMP HACK WORKAROUND + // Revert code from https://github.com/apache/datafusion/pull/12562 + // Context: https://github.com/apache/datafusion/issues/13748 + // Context: https://github.com/influxdata/influxdb_iox/issues/13038 + // Next, calculate valid orderings for the union by searching for prefixes // in both sides. - let mut orderings = UnionEquivalentOrderingBuilder::new(); - orderings.add_satisfied_orderings(&lhs, &rhs)?; - orderings.add_satisfied_orderings(&rhs, &lhs)?; - let orderings = orderings.build(); + let mut orderings = vec![]; + for ordering in lhs.normalized_oeq_class().into_iter() { + let mut ordering: Vec = ordering.into(); + + // Progressively shorten the ordering to search for a satisfied prefix: + while !rhs.ordering_satisfy(ordering.clone())? { + ordering.pop(); + } + // There is a non-trivial satisfied prefix, add it as a valid ordering: + if !ordering.is_empty() { + orderings.push(ordering); + } + } + for ordering in rhs.normalized_oeq_class().into_iter() { + let mut ordering: Vec = ordering.into(); + + // Progressively shorten the ordering to search for a satisfied prefix: + while !lhs.ordering_satisfy(ordering.clone())? { + ordering.pop(); + } + // There is a non-trivial satisfied prefix, add it as a valid ordering: + if !ordering.is_empty() { + orderings.push(ordering); + } + } let mut eq_properties = EquivalenceProperties::new(lhs.schema); eq_properties.add_constants(constants)?; eq_properties.add_orderings(orderings); + Ok(eq_properties) } @@ -122,6 +149,7 @@ struct UnionEquivalentOrderingBuilder { orderings: Vec, } +#[expect(unused)] impl UnionEquivalentOrderingBuilder { fn new() -> Self { Self { orderings: vec![] } @@ -504,6 +532,7 @@ mod tests { } #[test] + #[ignore = "InfluxData patch: chore: skip order calculation / exponential planning"] fn test_union_equivalence_properties_constants_fill_gaps() -> Result<()> { let schema = create_test_schema().unwrap(); UnionEquivalenceTest::new(&schema) @@ -579,6 +608,7 @@ mod tests { } #[test] + #[ignore = "InfluxData patch: chore: skip order calculation / exponential planning"] fn test_union_equivalence_properties_constants_fill_gaps_non_symmetric() -> Result<()> { let schema = create_test_schema().unwrap(); @@ -607,6 +637,7 @@ mod tests { } #[test] + #[ignore = "InfluxData patch: chore: skip order calculation / exponential planning"] fn test_union_equivalence_properties_constants_gap_fill_symmetric() -> Result<()> { let schema = create_test_schema().unwrap(); UnionEquivalenceTest::new(&schema) @@ -658,6 +689,7 @@ mod tests { } #[test] + #[ignore = "InfluxData patch: chore: skip order calculation / exponential planning"] fn test_union_equivalence_properties_constants_middle_desc() -> Result<()> { let schema = create_test_schema().unwrap(); UnionEquivalenceTest::new(&schema) From 4277f8fe0cb7b5527a6a6ba35efdd63104fd66ca Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 16 Jul 2024 12:14:19 -0400 Subject: [PATCH 02/12] (New) Test + workaround for SanityCheck plan --- datafusion/physical-optimizer/src/sanity_checker.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/datafusion/physical-optimizer/src/sanity_checker.rs b/datafusion/physical-optimizer/src/sanity_checker.rs index bff33a281556d..8eb00327143dc 100644 --- a/datafusion/physical-optimizer/src/sanity_checker.rs +++ b/datafusion/physical-optimizer/src/sanity_checker.rs @@ -32,6 +32,8 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported}; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion_physical_plan::joins::SymmetricHashJoinExec; +use datafusion_physical_plan::sorts::sort::SortExec; +use datafusion_physical_plan::union::UnionExec; use datafusion_physical_plan::{ExecutionPlanProperties, get_plan_string}; use crate::PhysicalOptimizerRule; @@ -136,6 +138,14 @@ pub fn check_plan_sanity( plan.required_input_ordering(), plan.required_input_distribution(), ) { + // TEMP HACK WORKAROUND https://github.com/apache/datafusion/issues/11492 + if child.as_any().downcast_ref::().is_some() { + continue; + } + if child.as_any().downcast_ref::().is_some() { + continue; + } + let child_eq_props = child.equivalence_properties(); if let Some(sort_req) = sort_req { let sort_req = sort_req.into_single(); From 64dc11c50a50e0dde6f39c5fb6d130344ea183bf Mon Sep 17 00:00:00 2001 From: Adam Curtis Date: Tue, 21 Oct 2025 13:33:16 -0400 Subject: [PATCH 03/12] chore: add debug logging and skip error on physical schema check --- datafusion/core/src/physical_planner.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index cc7d534776d7e..b006d8ae39a9c 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -802,10 +802,17 @@ impl DefaultPhysicalPlanner { )); } } - return internal_err!( + debug!( "Physical input schema should be the same as the one converted from logical input schema. Differences: {}", differences.iter().map(|s| format!("\n\t- {s}")).join("") ); + + //influx: temporarily remove error and only log so that we can find a + //reproducer in production + // return internal_err!("Physical input schema should be the same as the one converted from logical input schema. Differences: {}", differences + // .iter() + // .map(|s| format!("\n\t- {s}")) + // .join("")); } let groups = self.create_grouping_physical_expr( @@ -4207,6 +4214,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_metadata() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4227,6 +4236,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_field_count() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4247,6 +4258,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_field_name() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4268,6 +4281,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_field_type() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4286,6 +4301,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_field_nullability() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4304,6 +4321,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_field_metadata() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); @@ -4324,6 +4343,8 @@ digraph { } #[tokio::test] + // Ignored due to disabling the physical schema check skip. + #[ignore] async fn test_aggregate_schema_mismatch_multiple() { let logical_schema = Arc::new(Schema::new(vec![ Field::new("c1", DataType::Int32, false), From 506dd6de584193f787018548a806178941207e94 Mon Sep 17 00:00:00 2001 From: Adam Curtis Date: Tue, 16 Dec 2025 12:21:06 -0500 Subject: [PATCH 04/12] fix: wrap join operators with cooperative() for cancellation support --- datafusion/physical-plan/src/joins/cross_join.rs | 9 +++++---- datafusion/physical-plan/src/joins/hash_join/exec.rs | 5 +++-- datafusion/physical-plan/src/joins/nested_loop_join.rs | 5 +++-- .../physical-plan/src/joins/sort_merge_join/exec.rs | 5 +++-- .../physical-plan/src/joins/symmetric_hash_join.rs | 9 +++++---- 5 files changed, 19 insertions(+), 14 deletions(-) diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 4f32b6176ec39..1774a8ea95e53 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -25,6 +25,7 @@ use super::utils::{ OnceAsync, OnceFut, StatefulStreamResult, adjust_right_output_partitioning, reorder_output_after_swap, }; +use crate::coop::cooperative; use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::projection::{ @@ -332,7 +333,7 @@ impl ExecutionPlan for CrossJoinExec { })?; if enforce_batch_size_in_joins { - Ok(Box::pin(CrossJoinStream { + Ok(Box::pin(cooperative(CrossJoinStream { schema: Arc::clone(&self.schema), left_fut, right: stream, @@ -341,9 +342,9 @@ impl ExecutionPlan for CrossJoinExec { state: CrossJoinStreamState::WaitBuildSide, left_data: RecordBatch::new_empty(self.left().schema()), batch_transformer: BatchSplitter::new(batch_size), - })) + }))) } else { - Ok(Box::pin(CrossJoinStream { + Ok(Box::pin(cooperative(CrossJoinStream { schema: Arc::clone(&self.schema), left_fut, right: stream, @@ -352,7 +353,7 @@ impl ExecutionPlan for CrossJoinExec { state: CrossJoinStreamState::WaitBuildSide, left_data: RecordBatch::new_empty(self.left().schema()), batch_transformer: NoopBatchTransformer::new(), - })) + }))) } } diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index 91fc1ee4436ee..0a45887eff3e4 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -22,6 +22,7 @@ use std::sync::{Arc, OnceLock}; use std::{any::Any, vec}; use crate::ExecutionPlanProperties; +use crate::coop::cooperative; use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::filter_pushdown::{ ChildPushdownResult, FilterDescription, FilterPushdownPhase, @@ -1061,7 +1062,7 @@ impl ExecutionPlan for HashJoinExec { .map(|(_, right_expr)| Arc::clone(right_expr)) .collect::>(); - Ok(Box::pin(HashJoinStream::new( + Ok(Box::pin(cooperative(HashJoinStream::new( partition, self.schema(), on_right, @@ -1079,7 +1080,7 @@ impl ExecutionPlan for HashJoinExec { self.right.output_ordering().is_some(), build_accumulator, self.mode, - ))) + )))) } fn metrics(&self) -> Option { diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index 44637321a7e35..fa435c0d815b1 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -29,6 +29,7 @@ use super::utils::{ reorder_output_after_swap, swap_join_projection, }; use crate::common::can_project; +use crate::coop::cooperative; use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::joins::SharedBitmapBuilder; use crate::joins::utils::{ @@ -529,7 +530,7 @@ impl ExecutionPlan for NestedLoopJoinExec { None => self.column_indices.clone(), }; - Ok(Box::pin(NestedLoopJoinStream::new( + Ok(Box::pin(cooperative(NestedLoopJoinStream::new( self.schema(), self.filter.clone(), self.join_type, @@ -538,7 +539,7 @@ impl ExecutionPlan for NestedLoopJoinExec { column_indices_after_projection, metrics, batch_size, - ))) + )))) } fn metrics(&self) -> Option { diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs index ae7a5fa764bcc..82929107806de 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs @@ -23,6 +23,7 @@ use std::any::Any; use std::fmt::Formatter; use std::sync::Arc; +use crate::coop::cooperative; use crate::execution_plan::{EmissionType, boundedness_from_children}; use crate::expressions::PhysicalSortExpr; use crate::joins::sort_merge_join::metrics::SortMergeJoinMetrics; @@ -497,7 +498,7 @@ impl ExecutionPlan for SortMergeJoinExec { .register(context.memory_pool()); // create join stream - Ok(Box::pin(SortMergeJoinStream::try_new( + Ok(Box::pin(cooperative(SortMergeJoinStream::try_new( context.session_config().spill_compression(), Arc::clone(&self.schema), self.sort_options.clone(), @@ -512,7 +513,7 @@ impl ExecutionPlan for SortMergeJoinExec { SortMergeJoinMetrics::new(partition, &self.metrics), reservation, context.runtime_env(), - )?)) + )?))) } fn metrics(&self) -> Option { diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 1f6bc703a0300..a54f930114c60 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -33,6 +33,7 @@ use std::task::{Context, Poll}; use std::vec; use crate::common::SharedMemoryReservation; +use crate::coop::cooperative; use crate::execution_plan::{boundedness_from_children, emission_type_from_children}; use crate::joins::stream_join_utils::{ PruningJoinHashMap, SortedFilterExpr, StreamJoinMetrics, @@ -534,7 +535,7 @@ impl ExecutionPlan for SymmetricHashJoinExec { } if enforce_batch_size_in_joins { - Ok(Box::pin(SymmetricHashJoinStream { + Ok(Box::pin(cooperative(SymmetricHashJoinStream { left_stream, right_stream, schema: self.schema(), @@ -552,9 +553,9 @@ impl ExecutionPlan for SymmetricHashJoinExec { state: SHJStreamState::PullRight, reservation, batch_transformer: BatchSplitter::new(batch_size), - })) + }))) } else { - Ok(Box::pin(SymmetricHashJoinStream { + Ok(Box::pin(cooperative(SymmetricHashJoinStream { left_stream, right_stream, schema: self.schema(), @@ -572,7 +573,7 @@ impl ExecutionPlan for SymmetricHashJoinExec { state: SHJStreamState::PullRight, reservation, batch_transformer: NoopBatchTransformer::new(), - })) + }))) } } From d615ad91a7f75a8f34eedae60cec4458ac8d277d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Feb 2026 08:44:14 -0500 Subject: [PATCH 05/12] Update bytes to v1.11.1 to avoid security audit --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5ab0b8c84a563..e9167c639a774 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1225,9 +1225,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "bytes-utils" From d87aafd4122b632ff8c8f2a07e6f6f087ce73f6d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 6 Feb 2026 14:13:02 -0500 Subject: [PATCH 06/12] chore: Update time to avoid rustsec error --- Cargo.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e9167c639a774..c2e9312a24723 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4193,9 +4193,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" [[package]] name = "num-integer" @@ -6044,30 +6044,30 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", "itoa", "num-conv", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" dependencies = [ "num-conv", "time-core", From a21b158cb073c0dffd8475e0e359af9f514a355b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 9 Feb 2026 14:57:20 -0500 Subject: [PATCH 07/12] Fix incorrect SortExec removal before AggregateExec --- .../src/enforce_sorting/sort_pushdown.rs | 74 +++++++ .../sqllogictest/test_files/sort_pushdown.slt | 184 ++++++++++++++++++ 2 files changed, 258 insertions(+) diff --git a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs index 698fdea8e766e..267faeda0c1bb 100644 --- a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs +++ b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs @@ -35,6 +35,7 @@ use datafusion_physical_expr_common::sort_expr::{ LexOrdering, LexRequirement, OrderingRequirements, PhysicalSortExpr, PhysicalSortRequirement, }; +use datafusion_physical_plan::aggregates::AggregateExec; use datafusion_physical_plan::execution_plan::CardinalityEffect; use datafusion_physical_plan::filter::FilterExec; use datafusion_physical_plan::joins::utils::{ @@ -353,6 +354,8 @@ fn pushdown_requirement_to_children( Ok(None) } } + } else if let Some(aggregate_exec) = plan.as_any().downcast_ref::() { + handle_aggregate_pushdown(aggregate_exec, parent_required) } else if maintains_input_order.is_empty() || !maintains_input_order.iter().any(|o| *o) || plan.as_any().is::() @@ -388,6 +391,77 @@ fn pushdown_requirement_to_children( // TODO: Add support for Projection push down } +/// Try to push sorting through [`AggregateExec`] +/// +/// `AggregateExec` only preserves the input order of its group by columns +/// (not aggregates in general, which are formed from arbitrary expressions over +/// input) +/// +/// Thus function rewrites the parent required ordering in terms of the +/// aggregate input if possible. This rewritten requirement represents the +/// ordering of the `AggregateExec`'s **input** that would also satisfy the +/// **parent** ordering. +/// +/// If no such mapping is possible (e.g. because the sort references aggregate +/// columns), returns None. +fn handle_aggregate_pushdown( + aggregate_exec: &AggregateExec, + parent_required: OrderingRequirements, +) -> Result>>> { + if !aggregate_exec + .maintains_input_order() + .into_iter() + .any(|o| o) + { + return Ok(None); + } + + let group_expr = aggregate_exec.group_expr(); + // GROUPING SETS introduce additional output columns and NULL substitutions; + // skip pushdown until we can map those cases safely. + if group_expr.has_grouping_set() { + return Ok(None); + } + + let group_input_exprs = group_expr.input_exprs(); + let parent_requirement = parent_required.into_single(); + let mut child_requirement = Vec::with_capacity(parent_requirement.len()); + + for req in parent_requirement { + // Sort above AggregateExec should reference its output columns. Map each + // output group-by column to its original input expression. + let Some(column) = req.expr.as_any().downcast_ref::() else { + return Ok(None); + }; + if column.index() >= group_input_exprs.len() { + // AggregateExec does not produce output that is sorted on aggregate + // columns so those can not be pushed through. + return Ok(None); + } + child_requirement.push(PhysicalSortRequirement::new( + Arc::clone(&group_input_exprs[column.index()]), + req.options, + )); + } + + let Some(child_requirement) = LexRequirement::new(child_requirement) else { + return Ok(None); + }; + + // Keep sort above aggregate unless input ordering already satisfies the + // mapped requirement. + if aggregate_exec + .input() + .equivalence_properties() + .ordering_satisfy_requirement(child_requirement.iter().cloned())? + { + let child_requirements = OrderingRequirements::new(child_requirement); + Ok(Some(vec![Some(child_requirements)])) + } else { + Ok(None) + } +} + /// Return true if pushing the sort requirements through a node would violate /// the input sorting requirements for the plan fn pushdown_would_violate_requirements( diff --git a/datafusion/sqllogictest/test_files/sort_pushdown.slt b/datafusion/sqllogictest/test_files/sort_pushdown.slt index 58d9915a24be2..4148de1d62f73 100644 --- a/datafusion/sqllogictest/test_files/sort_pushdown.slt +++ b/datafusion/sqllogictest/test_files/sort_pushdown.slt @@ -851,6 +851,184 @@ LIMIT 3; 5 4 2 -3 +# Test 3.7: Aggregate ORDER BY expression should keep SortExec +# Source pattern declared on parquet scan: [x ASC, y ASC]. +# Requested pattern in ORDER BY: [x ASC, CAST(y AS BIGINT) % 2 ASC]. +# Example for x=1 input y order 1,2,3 gives bucket order 1,0,1, which does not +# match requested bucket ASC order. SortExec is required above AggregateExec. +statement ok +SET datafusion.execution.target_partitions = 1; + +statement ok +CREATE TABLE agg_expr_data(x INT, y INT, v INT) AS VALUES +(1, 1, 10), +(1, 2, 20), +(1, 3, 30), +(2, 1, 40), +(2, 2, 50), +(2, 3, 60); + +query I +COPY (SELECT * FROM agg_expr_data ORDER BY x, y) +TO 'test_files/scratch/sort_pushdown/agg_expr_sorted.parquet'; +---- +6 + +statement ok +CREATE EXTERNAL TABLE agg_expr_parquet(x INT, y INT, v INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/agg_expr_sorted.parquet' +WITH ORDER (x ASC, y ASC); + +query TT +EXPLAIN SELECT + x, + CAST(y AS BIGINT) % 2, + SUM(v) +FROM agg_expr_parquet +GROUP BY x, CAST(y AS BIGINT) % 2 +ORDER BY x, CAST(y AS BIGINT) % 2; +---- +logical_plan +01)Sort: agg_expr_parquet.x ASC NULLS LAST, agg_expr_parquet.y % Int64(2) ASC NULLS LAST +02)--Aggregate: groupBy=[[agg_expr_parquet.x, CAST(agg_expr_parquet.y AS Int64) % Int64(2)]], aggr=[[sum(CAST(agg_expr_parquet.v AS Int64))]] +03)----TableScan: agg_expr_parquet projection=[x, y, v] +physical_plan +01)SortExec: expr=[x@0 ASC NULLS LAST, agg_expr_parquet.y % Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[false] +02)--AggregateExec: mode=Single, gby=[x@0 as x, CAST(y@1 AS Int64) % 2 as agg_expr_parquet.y % Int64(2)], aggr=[sum(agg_expr_parquet.v)], ordering_mode=PartiallySorted([0]) +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/agg_expr_sorted.parquet]]}, projection=[x, y, v], output_ordering=[x@0 ASC NULLS LAST, y@1 ASC NULLS LAST], file_type=parquet + +# Expected output pattern from ORDER BY [x, bucket]: +# rows grouped by x, and within each x bucket appears as 0 then 1. +query III +SELECT + x, + CAST(y AS BIGINT) % 2, + SUM(v) +FROM agg_expr_parquet +GROUP BY x, CAST(y AS BIGINT) % 2 +ORDER BY x, CAST(y AS BIGINT) % 2; +---- +1 0 20 +1 1 40 +2 0 50 +2 1 100 + +# Test 3.8: Aggregate ORDER BY monotonic expression can push down (no SortExec) +query TT +EXPLAIN SELECT + x, + CAST(y AS BIGINT), + SUM(v) +FROM agg_expr_parquet +GROUP BY x, CAST(y AS BIGINT) +ORDER BY x, CAST(y AS BIGINT); +---- +logical_plan +01)Sort: agg_expr_parquet.x ASC NULLS LAST, agg_expr_parquet.y ASC NULLS LAST +02)--Aggregate: groupBy=[[agg_expr_parquet.x, CAST(agg_expr_parquet.y AS Int64)]], aggr=[[sum(CAST(agg_expr_parquet.v AS Int64))]] +03)----TableScan: agg_expr_parquet projection=[x, y, v] +physical_plan +01)AggregateExec: mode=Single, gby=[x@0 as x, CAST(y@1 AS Int64) as agg_expr_parquet.y], aggr=[sum(agg_expr_parquet.v)], ordering_mode=Sorted +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/agg_expr_sorted.parquet]]}, projection=[x, y, v], output_ordering=[x@0 ASC NULLS LAST, y@1 ASC NULLS LAST], file_type=parquet + +query III +SELECT + x, + CAST(y AS BIGINT), + SUM(v) +FROM agg_expr_parquet +GROUP BY x, CAST(y AS BIGINT) +ORDER BY x, CAST(y AS BIGINT); +---- +1 1 10 +1 2 20 +1 3 30 +2 1 40 +2 2 50 +2 3 60 + +# Test 3.9: Aggregate ORDER BY aggregate output should keep SortExec +query TT +EXPLAIN SELECT x, SUM(v) +FROM agg_expr_parquet +GROUP BY x +ORDER BY SUM(v); +---- +logical_plan +01)Sort: sum(agg_expr_parquet.v) ASC NULLS LAST +02)--Aggregate: groupBy=[[agg_expr_parquet.x]], aggr=[[sum(CAST(agg_expr_parquet.v AS Int64))]] +03)----TableScan: agg_expr_parquet projection=[x, v] +physical_plan +01)SortExec: expr=[sum(agg_expr_parquet.v)@1 ASC NULLS LAST], preserve_partitioning=[false] +02)--AggregateExec: mode=Single, gby=[x@0 as x], aggr=[sum(agg_expr_parquet.v)], ordering_mode=Sorted +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/agg_expr_sorted.parquet]]}, projection=[x, v], output_ordering=[x@0 ASC NULLS LAST], file_type=parquet + +query II +SELECT x, SUM(v) +FROM agg_expr_parquet +GROUP BY x +ORDER BY SUM(v); +---- +1 60 +2 150 + +# Test 3.10: Aggregate with non-preserved input order should keep SortExec +# v is not part of the order by +query TT +EXPLAIN SELECT v, SUM(y) +FROM agg_expr_parquet +GROUP BY v +ORDER BY v; +---- +logical_plan +01)Sort: agg_expr_parquet.v ASC NULLS LAST +02)--Aggregate: groupBy=[[agg_expr_parquet.v]], aggr=[[sum(CAST(agg_expr_parquet.y AS Int64))]] +03)----TableScan: agg_expr_parquet projection=[y, v] +physical_plan +01)SortExec: expr=[v@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--AggregateExec: mode=Single, gby=[v@1 as v], aggr=[sum(agg_expr_parquet.y)] +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/agg_expr_sorted.parquet]]}, projection=[y, v], file_type=parquet + +query II +SELECT v, SUM(y) +FROM agg_expr_parquet +GROUP BY v +ORDER BY v; +---- +10 1 +20 2 +30 3 +40 1 +50 2 +60 3 + +# Test 3.11: Aggregate ORDER BY non-column expression (unsatisfied) keeps SortExec +# (though note in theory DataFusion could figure out that data sorted by x will also be sorted by x+1) +query TT +EXPLAIN SELECT x, SUM(v) +FROM agg_expr_parquet +GROUP BY x +ORDER BY x + 1 DESC; +---- +logical_plan +01)Sort: CAST(agg_expr_parquet.x AS Int64) + Int64(1) DESC NULLS FIRST +02)--Aggregate: groupBy=[[agg_expr_parquet.x]], aggr=[[sum(CAST(agg_expr_parquet.v AS Int64))]] +03)----TableScan: agg_expr_parquet projection=[x, v] +physical_plan +01)SortExec: expr=[CAST(x@0 AS Int64) + 1 DESC], preserve_partitioning=[false] +02)--AggregateExec: mode=Single, gby=[x@0 as x], aggr=[sum(agg_expr_parquet.v)], ordering_mode=Sorted +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/agg_expr_sorted.parquet]]}, projection=[x, v], output_ordering=[x@0 ASC NULLS LAST], file_type=parquet + +query II +SELECT x, SUM(v) +FROM agg_expr_parquet +GROUP BY x +ORDER BY x + 1 DESC; +---- +2 150 +1 60 + # Cleanup statement ok DROP TABLE timestamp_data; @@ -882,5 +1060,11 @@ DROP TABLE signed_data; statement ok DROP TABLE signed_parquet; +statement ok +DROP TABLE agg_expr_data; + +statement ok +DROP TABLE agg_expr_parquet; + statement ok SET datafusion.optimizer.enable_sort_pushdown = true; From b62786424824e3f87230f8fd1923937199802e7a Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sat, 14 Feb 2026 14:04:01 -0500 Subject: [PATCH 08/12] fix: validate inter-file ordering in eq_properties() (#20329) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Discovered this bug while working on #19724. TLDR: just because the files themselves are sorted doesn't mean the partition streams are sorted. - **`eq_properties()` in `FileScanConfig` blindly trusted `output_ordering`** (set from Parquet `sorting_columns` metadata) without verifying that files within a group are in the correct inter-file order - `EnforceSorting` then removed `SortExec` based on this unvalidated ordering, producing **wrong results** when filesystem order didn't match data order - Added `validated_output_ordering()` that filters orderings using `MinMaxStatistics::new_from_files()` + `is_sorted()` to verify inter-file sort order before reporting them to the optimizer - Added `validated_output_ordering()` method on `FileScanConfig` that validates each output ordering against actual file group statistics - Changed `eq_properties()` to call `self.validated_output_ordering()` instead of `self.output_ordering.clone()` Added 8 new regression tests (Tests 4-11): | Test | Scenario | Key assertion | |------|----------|---------------| | **4** | Reversed filesystem order (inferred ordering) | SortExec retained — wrong inter-file order detected | | **5** | Overlapping file ranges (inferred ordering) | SortExec retained — overlapping ranges detected | | **6** | `WITH ORDER` + reversed filesystem order | SortExec retained despite explicit ordering | | **7** | Correctly ordered multi-file group (positive) | SortExec eliminated — validation passes | | **8** | DESC ordering with wrong inter-file DESC order | SortExec retained for DESC direction | | **9** | Multi-column sort key (overlapping vs non-overlapping) | Conservative rejection with overlapping stats; passes with clean boundaries | | **10** | Correctly ordered + `WITH ORDER` (positive) | SortExec eliminated — both ordering and stats agree | | **11** | Multiple partitions (one file per group) | `SortPreservingMergeExec` merges; no per-partition sort needed | - [x] `cargo test --test sqllogictests -- sort_pushdown` — all new + existing tests pass - [x] `cargo test -p datafusion-datasource` — 97 unit tests + 6 doc tests pass - [x] Existing Test 1 (single-file sort pushdown with `WITH ORDER`) still eliminates SortExec (no regression) 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 --- .../partition_statistics.rs | 2 +- datafusion/datasource/src/file_scan_config.rs | 162 ++++-- datafusion/datasource/src/statistics.rs | 3 +- .../test_files/parquet_sorted_statistics.slt | 2 +- .../sqllogictest/test_files/sort_pushdown.slt | 539 ++++++++++++++++++ 5 files changed, 661 insertions(+), 47 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs index ba53d079e3059..36fa95aa9f57d 100644 --- a/datafusion/core/tests/physical_optimizer/partition_statistics.rs +++ b/datafusion/core/tests/physical_optimizer/partition_statistics.rs @@ -864,7 +864,7 @@ mod test { let plan_string = get_plan_string(&aggregate_exec_partial).swap_remove(0); assert_snapshot!( plan_string, - @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)], ordering_mode=Sorted" + @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]" ); let p0_statistics = aggregate_exec_partial.partition_statistics(Some(0))?; diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs index c8636343ccc5a..facd12d7cbdfe 100644 --- a/datafusion/datasource/src/file_scan_config.rs +++ b/datafusion/datasource/src/file_scan_config.rs @@ -665,7 +665,7 @@ impl DataSource for FileScanConfig { let schema = self.file_source.table_schema().table_schema(); let mut eq_properties = EquivalenceProperties::new_with_orderings( Arc::clone(schema), - self.output_ordering.clone(), + self.validated_output_ordering(), ) .with_constraints(self.constraints.clone()); @@ -853,6 +853,40 @@ impl DataSource for FileScanConfig { } impl FileScanConfig { + /// Returns only the output orderings that are validated against actual + /// file group statistics. + /// + /// For example, individual files may be ordered by `col1 ASC`, + /// but if we have files with these min/max statistics in a single partition / file group: + /// + /// - file1: min(col1) = 10, max(col1) = 20 + /// - file2: min(col1) = 5, max(col1) = 15 + /// + /// Because reading file1 followed by file2 would produce out-of-order output (there is overlap + /// in the ranges), we cannot retain `col1 ASC` as a valid output ordering. + /// + /// Similarly this would not be a valid order (non-overlapping ranges but not ordered): + /// + /// - file1: min(col1) = 20, max(col1) = 30 + /// - file2: min(col1) = 10, max(col1) = 15 + /// + /// On the other hand if we had: + /// + /// - file1: min(col1) = 5, max(col1) = 15 + /// - file2: min(col1) = 16, max(col1) = 25 + /// + /// Then we know that reading file1 followed by file2 will produce ordered output, + /// so `col1 ASC` would be retained. + /// + /// Note that we are checking for ordering *within* *each* file group / partition, + /// files in different partitions are read independently and do not affect each other's ordering. + /// Merging of the multiple partition streams into a single ordered stream is handled + /// upstream e.g. by `SortPreservingMergeExec`. + fn validated_output_ordering(&self) -> Vec { + let schema = self.file_source.table_schema().table_schema(); + validate_orderings(&self.output_ordering, schema, &self.file_groups, None) + } + /// Get the file schema (schema of the files without partition columns) pub fn file_schema(&self) -> &SchemaRef { self.file_source.table_schema().file_schema() @@ -1202,6 +1236,51 @@ fn ordered_column_indices_from_projection( .collect::>>() } +/// Check whether a given ordering is valid for all file groups by verifying +/// that files within each group are sorted according to their min/max statistics. +/// +/// For single-file (or empty) groups, the ordering is trivially valid. +/// For multi-file groups, we check that the min/max statistics for the sort +/// columns are in order and non-overlapping (or touching at boundaries). +/// +/// `projection` maps projected column indices back to table-schema indices +/// when validating after projection; pass `None` when validating at +/// table-schema level. +fn is_ordering_valid_for_file_groups( + file_groups: &[FileGroup], + ordering: &LexOrdering, + schema: &SchemaRef, + projection: Option<&[usize]>, +) -> bool { + file_groups.iter().all(|group| { + if group.len() <= 1 { + return true; // single-file groups are trivially sorted + } + match MinMaxStatistics::new_from_files(ordering, schema, projection, group.iter()) + { + Ok(stats) => stats.is_sorted(), + Err(_) => false, // can't prove sorted → reject + } + }) +} + +/// Filters orderings to retain only those valid for all file groups, +/// verified via min/max statistics. +fn validate_orderings( + orderings: &[LexOrdering], + schema: &SchemaRef, + file_groups: &[FileGroup], + projection: Option<&[usize]>, +) -> Vec { + orderings + .iter() + .filter(|ordering| { + is_ordering_valid_for_file_groups(file_groups, ordering, schema, projection) + }) + .cloned() + .collect() +} + /// The various listing tables does not attempt to read all files /// concurrently, instead they will read files in sequence within a /// partition. This is an important property as it allows plans to @@ -1268,52 +1347,47 @@ fn get_projected_output_ordering( let projected_orderings = project_orderings(&base_config.output_ordering, projected_schema); - let mut all_orderings = vec![]; - for new_ordering in projected_orderings { - // Check if any file groups are not sorted - if base_config.file_groups.iter().any(|group| { - if group.len() <= 1 { - // File groups with <= 1 files are always sorted - return false; - } - - let Some(indices) = base_config - .file_source - .projection() - .as_ref() - .map(|p| ordered_column_indices_from_projection(p)) - else { - // Can't determine if ordered without a simple projection - return true; - }; - - let statistics = match MinMaxStatistics::new_from_files( - &new_ordering, + let indices = base_config + .file_source + .projection() + .as_ref() + .map(|p| ordered_column_indices_from_projection(p)); + + match indices { + Some(Some(indices)) => { + // Simple column projection — validate with statistics + validate_orderings( + &projected_orderings, projected_schema, - indices.as_deref(), - group.iter(), - ) { - Ok(statistics) => statistics, - Err(e) => { - log::trace!("Error fetching statistics for file group: {e}"); - // we can't prove that it's ordered, so we have to reject it - return true; - } - }; - - !statistics.is_sorted() - }) { - debug!( - "Skipping specified output ordering {:?}. \ - Some file groups couldn't be determined to be sorted: {:?}", - base_config.output_ordering[0], base_config.file_groups - ); - continue; + &base_config.file_groups, + Some(indices.as_slice()), + ) + } + None => { + // No projection — validate with statistics (no remapping needed) + validate_orderings( + &projected_orderings, + projected_schema, + &base_config.file_groups, + None, + ) + } + Some(None) => { + // Complex projection (expressions, not simple columns) — can't + // determine column indices for statistics. Still valid if all + // file groups have at most one file. + if base_config.file_groups.iter().all(|g| g.len() <= 1) { + projected_orderings + } else { + debug!( + "Skipping specified output orderings. \ + Some file groups couldn't be determined to be sorted: {:?}", + base_config.file_groups + ); + vec![] + } } - - all_orderings.push(new_ordering); } - all_orderings } /// Convert type to a type suitable for use as a `ListingTable` diff --git a/datafusion/datasource/src/statistics.rs b/datafusion/datasource/src/statistics.rs index 2f34ca032e132..b1a56e096c222 100644 --- a/datafusion/datasource/src/statistics.rs +++ b/datafusion/datasource/src/statistics.rs @@ -266,11 +266,12 @@ impl MinMaxStatistics { } /// Check if the min/max statistics are in order and non-overlapping + /// (or touching at boundaries) pub fn is_sorted(&self) -> bool { self.max_by_sort_order .iter() .zip(self.min_by_sort_order.iter().skip(1)) - .all(|(max, next_min)| max < next_min) + .all(|(max, next_min)| max <= next_min) } } diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt index 5a559bdb94835..fd3a40ca17079 100644 --- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt +++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt @@ -274,4 +274,4 @@ logical_plan 02)--TableScan: test_table projection=[constant_col] physical_plan 01)SortPreservingMergeExec: [constant_col@0 ASC NULLS LAST] -02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[constant_col], file_type=parquet +02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[constant_col], output_ordering=[constant_col@0 ASC NULLS LAST], file_type=parquet diff --git a/datafusion/sqllogictest/test_files/sort_pushdown.slt b/datafusion/sqllogictest/test_files/sort_pushdown.slt index 4148de1d62f73..a9ccb050b72fe 100644 --- a/datafusion/sqllogictest/test_files/sort_pushdown.slt +++ b/datafusion/sqllogictest/test_files/sort_pushdown.slt @@ -1068,3 +1068,542 @@ DROP TABLE agg_expr_parquet; statement ok SET datafusion.optimizer.enable_sort_pushdown = true; + +# Test 4: Reversed filesystem order with inferred ordering +# Create 3 parquet files with non-overlapping id ranges, named so filesystem +# order is OPPOSITE to data order. Each file is internally sorted by id ASC. +# Force target_partitions=1 so all files end up in one file group, which is +# where the inter-file ordering bug manifests. +# Without inter-file validation, the optimizer would incorrectly trust the +# inferred ordering and remove SortExec. + +# Save current target_partitions and set to 1 to force single file group +statement ok +SET datafusion.execution.target_partitions = 1; + +statement ok +CREATE TABLE reversed_high(id INT, value INT) AS VALUES (7, 700), (8, 800), (9, 900); + +statement ok +CREATE TABLE reversed_mid(id INT, value INT) AS VALUES (4, 400), (5, 500), (6, 600); + +statement ok +CREATE TABLE reversed_low(id INT, value INT) AS VALUES (1, 100), (2, 200), (3, 300); + +query I +COPY (SELECT * FROM reversed_high ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/reversed/a_high.parquet'; +---- +3 + +query I +COPY (SELECT * FROM reversed_mid ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/reversed/b_mid.parquet'; +---- +3 + +query I +COPY (SELECT * FROM reversed_low ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/reversed/c_low.parquet'; +---- +3 + +# External table with NO "WITH ORDER" — relies on inferred ordering from parquet metadata +statement ok +CREATE EXTERNAL TABLE reversed_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/reversed/'; + +# Test 4.1: SortExec must be present because files are not in inter-file order +query TT +EXPLAIN SELECT * FROM reversed_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: reversed_parquet.id ASC NULLS LAST +02)--TableScan: reversed_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/a_high.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/c_low.parquet]]}, projection=[id, value], file_type=parquet + +# Test 4.2: Results must be correct +query II +SELECT * FROM reversed_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Test 5: Overlapping files with inferred ordering +# Create files with overlapping id ranges + +statement ok +CREATE TABLE overlap_x(id INT, value INT) AS VALUES (1, 100), (3, 300), (5, 500); + +statement ok +CREATE TABLE overlap_y(id INT, value INT) AS VALUES (2, 200), (4, 400), (6, 600); + +query I +COPY (SELECT * FROM overlap_x ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/overlap/file_x.parquet'; +---- +3 + +query I +COPY (SELECT * FROM overlap_y ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/overlap/file_y.parquet'; +---- +3 + +statement ok +CREATE EXTERNAL TABLE overlap_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/overlap/'; + +# Test 5.1: SortExec must be present because files have overlapping ranges +query TT +EXPLAIN SELECT * FROM overlap_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: overlap_parquet.id ASC NULLS LAST +02)--TableScan: overlap_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/overlap/file_x.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/overlap/file_y.parquet]]}, projection=[id, value], file_type=parquet + +# Test 5.2: Results must be correct +query II +SELECT * FROM overlap_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 + +# Test 6: WITH ORDER + reversed filesystem order +# Same file setup as Test 4 but explicitly declaring ordering via WITH ORDER. +# Even with WITH ORDER, the optimizer should detect that inter-file order is wrong +# and keep SortExec. + +statement ok +CREATE EXTERNAL TABLE reversed_with_order_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/reversed/' +WITH ORDER (id ASC); + +# Test 6.1: SortExec must be present despite WITH ORDER +query TT +EXPLAIN SELECT * FROM reversed_with_order_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: reversed_with_order_parquet.id ASC NULLS LAST +02)--TableScan: reversed_with_order_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/a_high.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/c_low.parquet]]}, projection=[id, value], file_type=parquet + +# Test 6.2: Results must be correct +query II +SELECT * FROM reversed_with_order_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Test 7: Correctly ordered multi-file single group (positive case) +# Files are in CORRECT inter-file order within a single group. +# The validation should PASS and SortExec should be eliminated. + +statement ok +CREATE TABLE correct_low(id INT, value INT) AS VALUES (1, 100), (2, 200), (3, 300); + +statement ok +CREATE TABLE correct_mid(id INT, value INT) AS VALUES (4, 400), (5, 500), (6, 600); + +statement ok +CREATE TABLE correct_high(id INT, value INT) AS VALUES (7, 700), (8, 800), (9, 900); + +query I +COPY (SELECT * FROM correct_low ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/correct/a_low.parquet'; +---- +3 + +query I +COPY (SELECT * FROM correct_mid ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/correct/b_mid.parquet'; +---- +3 + +query I +COPY (SELECT * FROM correct_high ORDER BY id ASC) +TO 'test_files/scratch/sort_pushdown/correct/c_high.parquet'; +---- +3 + +statement ok +CREATE EXTERNAL TABLE correct_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/correct/' +WITH ORDER (id ASC); + +# Test 7.1: SortExec should be ELIMINATED — files are in correct inter-file order +query TT +EXPLAIN SELECT * FROM correct_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: correct_parquet.id ASC NULLS LAST +02)--TableScan: correct_parquet projection=[id, value] +physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/a_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/c_high.parquet]]}, projection=[id, value], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet + +# Test 7.2: Results must be correct +query II +SELECT * FROM correct_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Test 7.3: DESC query on correctly ordered ASC files should still use SortExec +# Note: reverse_row_groups=true reverses the file list in the plan display +query TT +EXPLAIN SELECT * FROM correct_parquet ORDER BY id DESC; +---- +logical_plan +01)Sort: correct_parquet.id DESC NULLS FIRST +02)--TableScan: correct_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 DESC], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/c_high.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/a_low.parquet]]}, projection=[id, value], file_type=parquet, reverse_row_groups=true + +query II +SELECT * FROM correct_parquet ORDER BY id DESC; +---- +9 900 +8 800 +7 700 +6 600 +5 500 +4 400 +3 300 +2 200 +1 100 + +# Test 8: DESC ordering with files in wrong inter-file DESC order +# Create files internally sorted by id DESC, but named so filesystem order +# is WRONG for DESC ordering (low values first in filesystem order). + +statement ok +CREATE TABLE desc_low(id INT, value INT) AS VALUES (3, 300), (2, 200), (1, 100); + +statement ok +CREATE TABLE desc_high(id INT, value INT) AS VALUES (9, 900), (8, 800), (7, 700); + +query I +COPY (SELECT * FROM desc_low ORDER BY id DESC) +TO 'test_files/scratch/sort_pushdown/desc_reversed/a_low.parquet'; +---- +3 + +query I +COPY (SELECT * FROM desc_high ORDER BY id DESC) +TO 'test_files/scratch/sort_pushdown/desc_reversed/b_high.parquet'; +---- +3 + +statement ok +CREATE EXTERNAL TABLE desc_reversed_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/desc_reversed/' +WITH ORDER (id DESC); + +# Test 8.1: SortExec must be present — files are in wrong inter-file DESC order +# (a_low has 1-3, b_high has 7-9; for DESC, b_high should come first) +query TT +EXPLAIN SELECT * FROM desc_reversed_parquet ORDER BY id DESC; +---- +logical_plan +01)Sort: desc_reversed_parquet.id DESC NULLS FIRST +02)--TableScan: desc_reversed_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 DESC], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/desc_reversed/a_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/desc_reversed/b_high.parquet]]}, projection=[id, value], file_type=parquet + +# Test 8.2: Results must be correct +query II +SELECT * FROM desc_reversed_parquet ORDER BY id DESC; +---- +9 900 +8 800 +7 700 +3 300 +2 200 +1 100 + +# Test 9: Multi-column sort key validation +# Files have (category, id) ordering. Files share a boundary value on category='B' +# so column-level min/max statistics overlap on the primary key column. +# The validation conservatively rejects this because column-level stats can't +# precisely represent row-level boundaries for multi-column keys. + +statement ok +CREATE TABLE multi_col_a(category VARCHAR, id INT, value INT) AS VALUES +('A', 1, 10), ('A', 2, 20), ('B', 1, 30); + +statement ok +CREATE TABLE multi_col_b(category VARCHAR, id INT, value INT) AS VALUES +('B', 2, 40), ('C', 1, 50), ('C', 2, 60); + +query I +COPY (SELECT * FROM multi_col_a ORDER BY category ASC, id ASC) +TO 'test_files/scratch/sort_pushdown/multi_col/a_first.parquet'; +---- +3 + +query I +COPY (SELECT * FROM multi_col_b ORDER BY category ASC, id ASC) +TO 'test_files/scratch/sort_pushdown/multi_col/b_second.parquet'; +---- +3 + +statement ok +CREATE EXTERNAL TABLE multi_col_parquet(category VARCHAR, id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/multi_col/' +WITH ORDER (category ASC, id ASC); + +# Test 9.1: SortExec is present — validation conservatively rejects because +# column-level stats overlap on category='B' across both files +query TT +EXPLAIN SELECT * FROM multi_col_parquet ORDER BY category ASC, id ASC; +---- +logical_plan +01)Sort: multi_col_parquet.category ASC NULLS LAST, multi_col_parquet.id ASC NULLS LAST +02)--TableScan: multi_col_parquet projection=[category, id, value] +physical_plan +01)SortExec: expr=[category@0 ASC NULLS LAST, id@1 ASC NULLS LAST], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_col/a_first.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_col/b_second.parquet]]}, projection=[category, id, value], file_type=parquet + +# Test 9.2: Results must be correct +query TII +SELECT * FROM multi_col_parquet ORDER BY category ASC, id ASC; +---- +A 1 10 +A 2 20 +B 1 30 +B 2 40 +C 1 50 +C 2 60 + +# Test 9.3: Multi-column sort with non-overlapping primary key across files +# When files don't overlap on the primary column, validation succeeds. + +statement ok +CREATE TABLE multi_col_x(category VARCHAR, id INT, value INT) AS VALUES +('A', 1, 10), ('A', 2, 20); + +statement ok +CREATE TABLE multi_col_y(category VARCHAR, id INT, value INT) AS VALUES +('B', 1, 30), ('B', 2, 40); + +query I +COPY (SELECT * FROM multi_col_x ORDER BY category ASC, id ASC) +TO 'test_files/scratch/sort_pushdown/multi_col_clean/x_first.parquet'; +---- +2 + +query I +COPY (SELECT * FROM multi_col_y ORDER BY category ASC, id ASC) +TO 'test_files/scratch/sort_pushdown/multi_col_clean/y_second.parquet'; +---- +2 + +statement ok +CREATE EXTERNAL TABLE multi_col_clean_parquet(category VARCHAR, id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/multi_col_clean/' +WITH ORDER (category ASC, id ASC); + +# Test 9.3a: SortExec should be eliminated — non-overlapping primary column +query TT +EXPLAIN SELECT * FROM multi_col_clean_parquet ORDER BY category ASC, id ASC; +---- +logical_plan +01)Sort: multi_col_clean_parquet.category ASC NULLS LAST, multi_col_clean_parquet.id ASC NULLS LAST +02)--TableScan: multi_col_clean_parquet projection=[category, id, value] +physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_col_clean/x_first.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_col_clean/y_second.parquet]]}, projection=[category, id, value], output_ordering=[category@0 ASC NULLS LAST, id@1 ASC NULLS LAST], file_type=parquet + +# Test 9.3b: Results must be correct +query TII +SELECT * FROM multi_col_clean_parquet ORDER BY category ASC, id ASC; +---- +A 1 10 +A 2 20 +B 1 30 +B 2 40 + +# Test 10: Correctly ordered files WITH ORDER (positive counterpart to Test 6) +# Files in correct_parquet are in correct ASC order — WITH ORDER should pass validation +# and SortExec should be eliminated. + +statement ok +CREATE EXTERNAL TABLE correct_with_order_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/correct/' +WITH ORDER (id ASC); + +# Test 10.1: SortExec should be ELIMINATED — files are in correct order +query TT +EXPLAIN SELECT * FROM correct_with_order_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: correct_with_order_parquet.id ASC NULLS LAST +02)--TableScan: correct_with_order_parquet projection=[id, value] +physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/a_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/correct/c_high.parquet]]}, projection=[id, value], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet + +# Test 10.2: Results must be correct +query II +SELECT * FROM correct_with_order_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Test 11: Multiple file groups (target_partitions > 1) — each group has one file +# When files are spread across separate partitions (one file per group), each +# partition is trivially sorted and SortPreservingMergeExec handles the merge. + +# Restore higher target_partitions so files go into separate groups +statement ok +SET datafusion.execution.target_partitions = 4; + +statement ok +CREATE EXTERNAL TABLE multi_partition_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/reversed/' +WITH ORDER (id ASC); + +# Test 11.1: With separate partitions, each file is trivially sorted. +# SortPreservingMergeExec merges, no SortExec needed per-partition. +query TT +EXPLAIN SELECT * FROM multi_partition_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: multi_partition_parquet.id ASC NULLS LAST +02)--TableScan: multi_partition_parquet projection=[id, value] +physical_plan +01)SortPreservingMergeExec: [id@0 ASC NULLS LAST] +02)--DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/a_high.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/b_mid.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/c_low.parquet]]}, projection=[id, value], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet + +# Test 11.2: Results must be correct +query II +SELECT * FROM multi_partition_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Restore target_partitions to 1 for remaining cleanup +statement ok +SET datafusion.execution.target_partitions = 2; + +statement ok +DROP TABLE reversed_high; + +statement ok +DROP TABLE reversed_mid; + +statement ok +DROP TABLE reversed_low; + +statement ok +DROP TABLE reversed_parquet; + +statement ok +DROP TABLE overlap_x; + +statement ok +DROP TABLE overlap_y; + +statement ok +DROP TABLE overlap_parquet; + +statement ok +DROP TABLE reversed_with_order_parquet; + +statement ok +DROP TABLE correct_low; + +statement ok +DROP TABLE correct_mid; + +statement ok +DROP TABLE correct_high; + +statement ok +DROP TABLE correct_parquet; + +statement ok +DROP TABLE desc_low; + +statement ok +DROP TABLE desc_high; + +statement ok +DROP TABLE desc_reversed_parquet; + +statement ok +DROP TABLE multi_col_a; + +statement ok +DROP TABLE multi_col_b; + +statement ok +DROP TABLE multi_col_parquet; + +statement ok +DROP TABLE multi_col_x; + +statement ok +DROP TABLE multi_col_y; + +statement ok +DROP TABLE multi_col_clean_parquet; + +statement ok +DROP TABLE correct_with_order_parquet; + +statement ok +DROP TABLE multi_partition_parquet; + From 866a4f01e70418a86375d7439d1285c5c7760313 Mon Sep 17 00:00:00 2001 From: Tim-53 <82676248+Tim-53@users.noreply.github.com> Date: Tue, 24 Feb 2026 13:10:15 +0100 Subject: [PATCH 09/12] fix: HashJoin panic with dictionary-encoded columns in multi-key joins (#20441) - Closes #20437 `flatten_dictionary_array` returned only the unique values rather then the full expanded array when being called on a `DictionaryArray`. When building a `StructArray` this caused a length mismatch panic. Replaced `array.values()` with `arrow::compute::cast(array, value_type)` in `flatten_dictionary_array`, which properly expands the dictionary into a full length array matching the row count. Yes, both a new unit test aswell as a regression test were added. Nope --------- Co-authored-by: Andrew Lamb --- .../src/joins/hash_join/inlist_builder.rs | 58 ++++++-- datafusion/sqllogictest/test_files/joins.slt | 125 ++++++++++++++++++ 2 files changed, 174 insertions(+), 9 deletions(-) diff --git a/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs b/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs index 7dccc5b0ba7c2..9bf59d9e333d6 100644 --- a/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs +++ b/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs @@ -20,8 +20,8 @@ use std::sync::Arc; use arrow::array::{ArrayRef, StructArray}; +use arrow::compute::cast; use arrow::datatypes::{Field, FieldRef, Fields}; -use arrow::downcast_dictionary_array; use arrow_schema::DataType; use datafusion_common::Result; @@ -33,15 +33,16 @@ pub(super) fn build_struct_fields(data_types: &[DataType]) -> Result { .collect() } -/// Flattens dictionary-encoded arrays to their underlying value arrays. +/// Casts dictionary-encoded arrays to their underlying value type, preserving row count. /// Non-dictionary arrays are returned as-is. -fn flatten_dictionary_array(array: &ArrayRef) -> ArrayRef { - downcast_dictionary_array! { - array => { +fn flatten_dictionary_array(array: &ArrayRef) -> Result { + match array.data_type() { + DataType::Dictionary(_, value_type) => { + let casted = cast(array, value_type)?; // Recursively flatten in case of nested dictionaries - flatten_dictionary_array(array.values()) + flatten_dictionary_array(&casted) } - _ => Arc::clone(array) + _ => Ok(Arc::clone(array)), } } @@ -68,7 +69,7 @@ pub(super) fn build_struct_inlist_values( let flattened_arrays: Vec = join_key_arrays .iter() .map(flatten_dictionary_array) - .collect(); + .collect::>>()?; // Build the source array/struct let source_array: ArrayRef = if flattened_arrays.len() == 1 { @@ -99,7 +100,9 @@ pub(super) fn build_struct_inlist_values( #[cfg(test)] mod tests { use super::*; - use arrow::array::{Int32Array, StringArray}; + use arrow::array::{ + DictionaryArray, Int8Array, Int32Array, StringArray, StringDictionaryBuilder, + }; use arrow_schema::DataType; use std::sync::Arc; @@ -130,4 +133,41 @@ mod tests { ) ); } + + #[test] + fn test_build_multi_column_inlist_with_dictionary() { + let mut builder = StringDictionaryBuilder::::new(); + builder.append_value("foo"); + builder.append_value("foo"); + builder.append_value("foo"); + let dict_array = Arc::new(builder.finish()) as ArrayRef; + + let int_array = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef; + + let result = build_struct_inlist_values(&[dict_array, int_array]) + .unwrap() + .unwrap(); + + assert_eq!(result.len(), 3); + assert_eq!( + *result.data_type(), + DataType::Struct( + build_struct_fields(&[DataType::Utf8, DataType::Int32]).unwrap() + ) + ); + } + + #[test] + fn test_build_single_column_dictionary_inlist() { + let keys = Int8Array::from(vec![0i8, 0, 0]); + let values = Arc::new(StringArray::from(vec!["foo"])); + let dict_array = Arc::new(DictionaryArray::new(keys, values)) as ArrayRef; + + let result = build_struct_inlist_values(std::slice::from_ref(&dict_array)) + .unwrap() + .unwrap(); + + assert_eq!(result.len(), 3); + assert_eq!(*result.data_type(), DataType::Utf8); + } } diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 38037ede21db2..e020885a447e0 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -5198,3 +5198,128 @@ DROP TABLE t1_c; statement ok DROP TABLE t2_c; + +# Reproducer of https://github.com/apache/datafusion/issues/19067 +statement count 0 +set datafusion.explain.physical_plan_only = true; + +# Setup Left Table with FixedSizeBinary(4) +statement count 0 +CREATE TABLE issue_19067_left AS +SELECT + column1 as id, + arrow_cast(decode(column2, 'hex'), 'FixedSizeBinary(4)') as join_key +FROM (VALUES + (1, 'AAAAAAAA'), + (2, 'BBBBBBBB'), + (3, 'CCCCCCCC') +); + +# Setup Right Table with FixedSizeBinary(4) +statement count 0 +CREATE TABLE issue_19067_right AS +SELECT + arrow_cast(decode(column1, 'hex'), 'FixedSizeBinary(4)') as join_key, + column2 as value +FROM (VALUES + ('AAAAAAAA', 1000), + ('BBBBBBBB', 2000) +); + +# Perform Left Join. Third row should contain NULL in `right_key`. +query I??I +SELECT + l.id, + l.join_key as left_key, + r.join_key as right_key, + r.value +FROM issue_19067_left l +LEFT JOIN issue_19067_right r ON l.join_key = r.join_key +ORDER BY l.id; +---- +1 aaaaaaaa aaaaaaaa 1000 +2 bbbbbbbb bbbbbbbb 2000 +3 cccccccc NULL NULL + +# Ensure usage of HashJoinExec +query TT +EXPLAIN +SELECT + l.id, + l.join_key as left_key, + r.join_key as right_key, + r.value +FROM issue_19067_left l +LEFT JOIN issue_19067_right r ON l.join_key = r.join_key +ORDER BY l.id; +---- +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--ProjectionExec: expr=[id@2 as id, join_key@3 as left_key, join_key@0 as right_key, value@1 as value] +03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(join_key@0, join_key@1)] +04)------DataSourceExec: partitions=1, partition_sizes=[1] +05)------DataSourceExec: partitions=1, partition_sizes=[1] + +statement count 0 +set datafusion.explain.physical_plan_only = false; + +statement count 0 +DROP TABLE issue_19067_left; + +statement count 0 +DROP TABLE issue_19067_right; + +# Test that empty projections pushed into joins produce correct row counts at runtime. +# When count(1) is used over a RIGHT/FULL JOIN, the optimizer embeds an empty projection +# (projection=[]) into the HashJoinExec. This validates that the runtime batch construction +# handles zero-column output correctly, preserving the correct number of rows. + +statement ok +CREATE TABLE empty_proj_left AS VALUES (1, 'a'), (2, 'b'), (3, 'c'); + +statement ok +CREATE TABLE empty_proj_right AS VALUES (1, 'x'), (2, 'y'), (4, 'z'); + +query I +SELECT count(1) FROM empty_proj_left RIGHT JOIN empty_proj_right ON empty_proj_left.column1 = empty_proj_right.column1; +---- +3 + +query I +SELECT count(1) FROM empty_proj_left FULL JOIN empty_proj_right ON empty_proj_left.column1 = empty_proj_right.column1; +---- +4 + +statement count 0 +DROP TABLE empty_proj_left; + +statement count 0 +DROP TABLE empty_proj_right; + +# Issue #20437: HashJoin panic with dictionary-encoded columns in multi-key joins +# https://github.com/apache/datafusion/issues/20437 + +statement ok +CREATE TABLE issue_20437_small AS +SELECT id, arrow_cast(region, 'Dictionary(Int32, Utf8)') AS region +FROM (VALUES (1, 'west'), (2, 'west')) AS t(id, region); + +statement ok +CREATE TABLE issue_20437_large AS +SELECT id, region, value +FROM (VALUES (1, 'west', 100), (2, 'west', 200), (3, 'east', 300)) AS t(id, region, value); + +query ITI +SELECT s.id, s.region, l.value +FROM issue_20437_small s +JOIN issue_20437_large l ON s.id = l.id AND s.region = l.region +ORDER BY s.id; +---- +1 west 100 +2 west 200 + +statement count 0 +DROP TABLE issue_20437_small; + +statement count 0 +DROP TABLE issue_20437_large; From 20598115a943fb0da68f9fe8507b510d8f292a9c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 23 Feb 2026 14:38:11 -0500 Subject: [PATCH 10/12] Avoid flattening dictionaries in Join InLists --- .../src/joins/hash_join/inlist_builder.rs | 39 +++++++------------ 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs b/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs index 9bf59d9e333d6..0ca338265ecc6 100644 --- a/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs +++ b/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs @@ -20,7 +20,6 @@ use std::sync::Arc; use arrow::array::{ArrayRef, StructArray}; -use arrow::compute::cast; use arrow::datatypes::{Field, FieldRef, Fields}; use arrow_schema::DataType; use datafusion_common::Result; @@ -33,19 +32,6 @@ pub(super) fn build_struct_fields(data_types: &[DataType]) -> Result { .collect() } -/// Casts dictionary-encoded arrays to their underlying value type, preserving row count. -/// Non-dictionary arrays are returned as-is. -fn flatten_dictionary_array(array: &ArrayRef) -> Result { - match array.data_type() { - DataType::Dictionary(_, value_type) => { - let casted = cast(array, value_type)?; - // Recursively flatten in case of nested dictionaries - flatten_dictionary_array(&casted) - } - _ => Ok(Arc::clone(array)), - } -} - /// Builds InList values from join key column arrays. /// /// If `join_key_arrays` is: @@ -65,20 +51,14 @@ fn flatten_dictionary_array(array: &ArrayRef) -> Result { pub(super) fn build_struct_inlist_values( join_key_arrays: &[ArrayRef], ) -> Result> { - // Flatten any dictionary-encoded arrays - let flattened_arrays: Vec = join_key_arrays - .iter() - .map(flatten_dictionary_array) - .collect::>>()?; - // Build the source array/struct - let source_array: ArrayRef = if flattened_arrays.len() == 1 { + let source_array: ArrayRef = if join_key_arrays.len() == 1 { // Single column: use directly - Arc::clone(&flattened_arrays[0]) + Arc::clone(&join_key_arrays[0]) } else { // Multi-column: build StructArray once from all columns let fields = build_struct_fields( - &flattened_arrays + &join_key_arrays .iter() .map(|arr| arr.data_type().clone()) .collect::>(), @@ -88,7 +68,7 @@ pub(super) fn build_struct_inlist_values( let arrays_with_fields: Vec<(FieldRef, ArrayRef)> = fields .iter() .cloned() - .zip(flattened_arrays.iter().cloned()) + .zip(join_key_arrays.iter().cloned()) .collect(); Arc::new(StructArray::from(arrays_with_fields)) @@ -152,7 +132,14 @@ mod tests { assert_eq!( *result.data_type(), DataType::Struct( - build_struct_fields(&[DataType::Utf8, DataType::Int32]).unwrap() + build_struct_fields(&[ + DataType::Dictionary( + Box::new(DataType::Int8), + Box::new(DataType::Utf8) + ), + DataType::Int32 + ]) + .unwrap() ) ); } @@ -168,6 +155,6 @@ mod tests { .unwrap(); assert_eq!(result.len(), 3); - assert_eq!(*result.data_type(), DataType::Utf8); + assert_eq!(result.data_type(), dict_array.data_type()); } } From 6a0206e08361c7b325f03f37da71e760b1cfaf18 Mon Sep 17 00:00:00 2001 From: Adam Curtis Date: Wed, 4 Mar 2026 10:47:39 -0500 Subject: [PATCH 11/12] chore: bump arrow/parquet to 57.3.0 Includes fix for FixedSizeBinary LEFT JOIN bug - https://github.com/apache/arrow-rs/pull/8981 Cherry-picked test and API updates from - https://github.com/apache/datafusion/pull/19355 --- Cargo.lock | 64 +++++++++---------- Cargo.toml | 14 ++-- datafusion/common/src/scalar/mod.rs | 7 +- .../src/avro_to_arrow/schema.rs | 4 +- .../functions/src/core/union_extract.rs | 5 +- datafusion/physical-plan/src/filter.rs | 5 +- datafusion/proto-common/src/from_proto/mod.rs | 21 +++--- .../tests/cases/roundtrip_logical_plan.rs | 12 ++-- datafusion/sqllogictest/src/test_context.rs | 5 +- datafusion/sqllogictest/test_files/case.slt | 5 +- .../test_files/spark/hash/crc32.slt | 6 +- datafusion/sqllogictest/test_files/struct.slt | 39 ++++++++--- 12 files changed, 109 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c2e9312a24723..3bdae442123fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -223,9 +223,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" +checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ "arrow-arith", "arrow-array", @@ -246,9 +246,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" +checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ "arrow-array", "arrow-buffer", @@ -260,9 +260,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" +checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash", "arrow-buffer", @@ -279,9 +279,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" +checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" dependencies = [ "bytes", "half", @@ -291,9 +291,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" +checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ "arrow-array", "arrow-buffer", @@ -313,9 +313,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" +checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ "arrow-array", "arrow-cast", @@ -328,9 +328,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" +checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ "arrow-buffer", "arrow-schema", @@ -341,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5f57c3d39d1b1b7c1376a772ea86a131e7da310aed54ebea9363124bb885e3" +checksum = "58c5b083668e6230eae3eab2fc4b5fb989974c845d0aa538dde61a4327c78675" dependencies = [ "arrow-arith", "arrow-array", @@ -369,9 +369,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" +checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ "arrow-array", "arrow-buffer", @@ -385,9 +385,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" +checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ "arrow-array", "arrow-buffer", @@ -409,9 +409,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" +checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ "arrow-array", "arrow-buffer", @@ -422,9 +422,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" +checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -435,9 +435,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" +checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" dependencies = [ "bitflags", "serde", @@ -447,9 +447,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" +checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash", "arrow-array", @@ -461,9 +461,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" +checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ "arrow-array", "arrow-buffer", @@ -4380,9 +4380,9 @@ dependencies = [ [[package]] name = "parquet" -version = "57.1.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index 6424f512cc3df..8bc8006daf147 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -91,19 +91,19 @@ ahash = { version = "0.8", default-features = false, features = [ "runtime-rng", ] } apache-avro = { version = "0.21", default-features = false } -arrow = { version = "57.1.0", features = [ +arrow = { version = "57.2.0", features = [ "prettyprint", "chrono-tz", ] } -arrow-buffer = { version = "57.1.0", default-features = false } -arrow-flight = { version = "57.1.0", features = [ +arrow-buffer = { version = "57.2.0", default-features = false } +arrow-flight = { version = "57.2.0", features = [ "flight-sql-experimental", ] } -arrow-ipc = { version = "57.1.0", default-features = false, features = [ +arrow-ipc = { version = "57.2.0", default-features = false, features = [ "lz4", ] } -arrow-ord = { version = "57.1.0", default-features = false } -arrow-schema = { version = "57.1.0", default-features = false } +arrow-ord = { version = "57.2.0", default-features = false } +arrow-schema = { version = "57.2.0", default-features = false } async-trait = "0.1.89" bigdecimal = "0.4.8" bytes = "1.11" @@ -166,7 +166,7 @@ log = "^0.4" num-traits = { version = "0.2" } object_store = { version = "0.12.4", default-features = false } parking_lot = "0.12" -parquet = { version = "57.1.0", default-features = false, features = [ +parquet = { version = "57.2.0", default-features = false, features = [ "arrow", "async", "object_store", diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index e4e048ad3c0d8..eda4952cf590b 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -8868,7 +8868,7 @@ mod tests { .unwrap(), ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(), ScalarValue::try_new_null(&DataType::Union( - UnionFields::new(vec![42], vec![field_ref]), + UnionFields::try_new(vec![42], vec![field_ref]).unwrap(), UnionMode::Dense, )) .unwrap(), @@ -8971,13 +8971,14 @@ mod tests { } // Test union type - let union_fields = UnionFields::new( + let union_fields = UnionFields::try_new( vec![0, 1], vec![ Field::new("i32", DataType::Int32, false), Field::new("f64", DataType::Float64, false), ], - ); + ) + .unwrap(); let union_result = ScalarValue::new_default(&DataType::Union( union_fields.clone(), UnionMode::Sparse, diff --git a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs index 0e8f2a4d56088..053be3c9aff94 100644 --- a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs +++ b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs @@ -117,8 +117,8 @@ fn schema_to_field_with_props( .iter() .map(|s| schema_to_field_with_props(s, None, has_nullable, None)) .collect::>>()?; - let type_ids = 0_i8..fields.len() as i8; - DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense) + // Assign type_ids based on the order in which they appear + DataType::Union(UnionFields::from_fields(fields), UnionMode::Dense) } } AvroSchema::Record(RecordSchema { fields, .. }) => { diff --git a/datafusion/functions/src/core/union_extract.rs b/datafusion/functions/src/core/union_extract.rs index 56d4f23cc4e2e..8d915fb2e2c07 100644 --- a/datafusion/functions/src/core/union_extract.rs +++ b/datafusion/functions/src/core/union_extract.rs @@ -189,13 +189,14 @@ mod tests { fn test_scalar_value() -> Result<()> { let fun = UnionExtractFun::new(); - let fields = UnionFields::new( + let fields = UnionFields::try_new( vec![1, 3], vec![ Field::new("str", DataType::Utf8, false), Field::new("int", DataType::Int32, false), ], - ); + ) + .unwrap(); let args = vec![ ColumnarValue::Scalar(ScalarValue::Union( diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index e724cdad64840..e78e50c9868fb 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -1540,13 +1540,14 @@ mod tests { #[test] fn test_equivalence_properties_union_type() -> Result<()> { let union_type = DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![0, 1], vec![ Field::new("f1", DataType::Int32, true), Field::new("f2", DataType::Utf8, true), ], - ), + ) + .unwrap(), UnionMode::Sparse, ); diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs index e8e71c3884586..3c41b8cad9ed1 100644 --- a/datafusion/proto-common/src/from_proto/mod.rs +++ b/datafusion/proto-common/src/from_proto/mod.rs @@ -304,13 +304,16 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for DataType { }; let union_fields = parse_proto_fields_to_fields(&union.union_types)?; - // Default to index based type ids if not provided - let type_ids: Vec<_> = match union.type_ids.is_empty() { - true => (0..union_fields.len() as i8).collect(), - false => union.type_ids.iter().map(|i| *i as i8).collect(), + // Default to index based type ids if not explicitly provided + let union_fields = if union.type_ids.is_empty() { + UnionFields::from_fields(union_fields) + } else { + let type_ids = union.type_ids.iter().map(|i| *i as i8); + UnionFields::try_new(type_ids, union_fields).map_err(|e| { + DataFusionError::from(e).context("Deserializing Union DataType") + })? }; - - DataType::Union(UnionFields::new(type_ids, union_fields), union_mode) + DataType::Union(union_fields, union_mode) } arrow_type::ArrowTypeEnum::Dictionary(dict) => { let key_datatype = dict.as_ref().key.as_deref().required("key")?; @@ -602,7 +605,9 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { .collect::>>(); let fields = fields.ok_or_else(|| Error::required("UnionField"))?; let fields = parse_proto_fields_to_fields(&fields)?; - let fields = UnionFields::new(ids, fields); + let union_fields = UnionFields::try_new(ids, fields).map_err(|e| { + DataFusionError::from(e).context("Deserializing Union ScalarValue") + })?; let v_id = val.value_id as i8; let val = match &val.value { None => None, @@ -614,7 +619,7 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { Some((v_id, Box::new(val))) } }; - Self::Union(val, fields, mode) + Self::Union(val, union_fields, mode) } Value::FixedSizeBinaryValue(v) => { Self::FixedSizeBinary(v.length, Some(v.clone().values)) diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index bcfda648b53e5..b9af9fc9352b2 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -1780,19 +1780,20 @@ fn round_trip_datatype() { ), ])), DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![7, 5, 3], vec![ Field::new("nullable", DataType::Boolean, false), Field::new("name", DataType::Utf8, false), Field::new("datatype", DataType::Binary, false), ], - ), + ) + .unwrap(), UnionMode::Sparse, ), DataType::Union( - UnionFields::new( - vec![5, 8, 1], + UnionFields::try_new( + vec![5, 8, 1, 100], vec![ Field::new("nullable", DataType::Boolean, false), Field::new("name", DataType::Utf8, false), @@ -1807,7 +1808,8 @@ fn round_trip_datatype() { true, ), ], - ), + ) + .unwrap(), UnionMode::Dense, ), DataType::Dictionary( diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs index 9ec085b41eec0..a69bdac663002 100644 --- a/datafusion/sqllogictest/src/test_context.rs +++ b/datafusion/sqllogictest/src/test_context.rs @@ -436,14 +436,15 @@ fn create_example_udf() -> ScalarUDF { fn register_union_table(ctx: &SessionContext) { let union = UnionArray::try_new( - UnionFields::new( + UnionFields::try_new( // typeids: 3 for int, 1 for string vec![3, 1], vec![ Field::new("int", DataType::Int32, false), Field::new("string", DataType::Utf8, false), ], - ), + ) + .unwrap(), ScalarBuffer::from(vec![3, 1, 3]), None, vec![ diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt index 074d216ac7524..481dde5be9f5c 100644 --- a/datafusion/sqllogictest/test_files/case.slt +++ b/datafusion/sqllogictest/test_files/case.slt @@ -384,8 +384,7 @@ SELECT column2, column3, column4 FROM t; {foo: a, xxx: b} {xxx: c, foo: d} {xxx: e} # coerce structs with different field orders, -# (note the *value*s are from column2 but the field name is 'xxx', as the coerced -# type takes the field name from the last argument (column3) +# should keep the same field values query ? SELECT case @@ -394,7 +393,7 @@ SELECT end FROM t; ---- -{xxx: a, foo: b} +{xxx: b, foo: a} # coerce structs with different field orders query ? diff --git a/datafusion/sqllogictest/test_files/spark/hash/crc32.slt b/datafusion/sqllogictest/test_files/spark/hash/crc32.slt index 6fbeb11fb9a36..df5588c75837d 100644 --- a/datafusion/sqllogictest/test_files/spark/hash/crc32.slt +++ b/datafusion/sqllogictest/test_files/spark/hash/crc32.slt @@ -81,7 +81,7 @@ SELECT crc32(arrow_cast('Spark', 'BinaryView')); ---- 1557323817 -# Upstream arrow-rs issue: https://github.com/apache/arrow-rs/issues/8841 -# This should succeed after we receive the fix -query error Arrow error: Compute error: Internal Error: Cannot cast BinaryView to BinaryArray of expected type +query I select crc32(arrow_cast(null, 'Dictionary(Int32, Utf8)')) +---- +NULL diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index d985af1104da3..a91a5e7f870a9 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -492,9 +492,18 @@ Struct("r": Utf8, "c": Float64) statement ok drop table t; -query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type +statement ok create table t as values({r: 'a', c: 1}), ({c: 2.3, r: 'b'}); +query ? +select * from t; +---- +{c: 1.0, r: a} +{c: 2.3, r: b} + +statement ok +drop table t; + ################################## ## Test Coalesce with Struct ################################## @@ -560,10 +569,18 @@ create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as valu (row('purple', 1), row('green', 2.3)); # out of order struct literal -# TODO: This query should not fail -statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type +statement ok create table t(a struct(r varchar, c int)) as values ({r: 'a', c: 1}), ({c: 2, r: 'b'}); +query ? +select * from t; +---- +{r: a, c: 1} +{r: b, c: 2} + +statement ok +drop table t; + ################################## ## Test Array of Struct ################################## @@ -573,9 +590,11 @@ select [{r: 'a', c: 1}, {r: 'b', c: 2}]; ---- [{r: a, c: 1}, {r: b, c: 2}] -# Can't create a list of struct with different field types -query error +# Create a list of struct with different field types +query ? select [{r: 'a', c: 1}, {c: 2, r: 'b'}]; +---- +[{c: 1, r: a}, {c: 2, r: b}] statement ok create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values (row('a', 1), row('b', 2.3)); @@ -592,9 +611,11 @@ drop table t; statement ok create table t(a struct(r varchar, c int), b struct(c float, r varchar)) as values (row('a', 1), row(2.3, 'b')); -# create array with different struct type is not valid -query error +# create array with different struct type should be cast +query T select arrow_typeof([a, b]) from t; +---- +List(Struct("c": Float32, "r": Utf8View)) statement ok drop table t; @@ -602,13 +623,13 @@ drop table t; statement ok create table t(a struct(r varchar, c int, g float), b struct(r varchar, c float, g int)) as values (row('a', 1, 2.3), row('b', 2.3, 2)); -# type of each column should not coerced but perserve as it is +# type of each column should not coerced but preserve as it is query T select arrow_typeof(a) from t; ---- Struct("r": Utf8View, "c": Int32, "g": Float32) -# type of each column should not coerced but perserve as it is +# type of each column should not coerced but preserve as it is query T select arrow_typeof(b) from t; ---- From 7c614015f8b827dc7f38e85e8f8f4ef63775ea65 Mon Sep 17 00:00:00 2001 From: Adam Curtis Date: Mon, 9 Mar 2026 16:34:43 -0400 Subject: [PATCH 12/12] Revert "Respect execution timezone in to_timestamp and related functions (#19078)" This reverts commit ada0923a3927d16dc5d810637cfbea4146c54f54. --- Cargo.lock | 1 - datafusion/functions/Cargo.toml | 3 +- datafusion/functions/benches/to_timestamp.rs | 24 +- datafusion/functions/src/datetime/common.rs | 153 +--- datafusion/functions/src/datetime/mod.rs | 46 +- datafusion/functions/src/datetime/to_date.rs | 6 +- .../functions/src/datetime/to_timestamp.rs | 840 ++++-------------- .../functions/src/datetime/to_unixtime.rs | 2 +- datafusion/functions/src/macros.rs | 29 - .../test_files/datetime/timestamps.slt | 12 +- .../test_files/to_timestamp_timezone.slt | 204 ----- .../source/user-guide/sql/scalar_functions.md | 79 +- 12 files changed, 240 insertions(+), 1159 deletions(-) delete mode 100644 datafusion/sqllogictest/test_files/to_timestamp_timezone.slt diff --git a/Cargo.lock b/Cargo.lock index 3bdae442123fa..125ec00468a87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2242,7 +2242,6 @@ dependencies = [ "blake2", "blake3", "chrono", - "chrono-tz", "criterion", "ctor", "datafusion-common", diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 2bdc05abe3806..a4f9211fe017c 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -40,7 +40,7 @@ workspace = true [features] crypto_expressions = ["md-5", "sha2", "blake2", "blake3"] # enable datetime functions -datetime_expressions = ["chrono-tz"] +datetime_expressions = [] # Enable encoding by default so the doctests work. In general don't automatically enable all packages. default = [ "datetime_expressions", @@ -71,7 +71,6 @@ base64 = { version = "0.22", optional = true } blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.8", optional = true } chrono = { workspace = true } -chrono-tz = { version = "0.10.4", optional = true } datafusion-common = { workspace = true } datafusion-doc = { workspace = true } datafusion-execution = { workspace = true } diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index ed865fa6e8d50..d95a197872eb3 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -114,21 +114,16 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("f", DataType::Timestamp(TimeUnit::Nanosecond, None), true).into(); let arg_field = Field::new("a", DataType::Utf8, false).into(); let arg_fields = vec![arg_field]; - let mut options = ConfigOptions::default(); - options.execution.time_zone = Some("UTC".into()); - let config_options = Arc::new(options); - - let to_timestamp_udf = to_timestamp(config_options.as_ref()); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function("to_timestamp_no_formats_utf8", |b| { - let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let arr_data = data(); let batch_len = arr_data.len(); let string_array = ColumnarValue::Array(Arc::new(arr_data) as ArrayRef); b.iter(|| { black_box( - to_timestamp_udf + to_timestamp() .invoke_with_args(ScalarFunctionArgs { args: vec![string_array.clone()], arg_fields: arg_fields.clone(), @@ -142,14 +137,13 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_timestamp_no_formats_largeutf8", |b| { - let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let data = cast(&data(), &DataType::LargeUtf8).unwrap(); let batch_len = data.len(); let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { black_box( - to_timestamp_udf + to_timestamp() .invoke_with_args(ScalarFunctionArgs { args: vec![string_array.clone()], arg_fields: arg_fields.clone(), @@ -163,14 +157,13 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_timestamp_no_formats_utf8view", |b| { - let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let data = cast(&data(), &DataType::Utf8View).unwrap(); let batch_len = data.len(); let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { black_box( - to_timestamp_udf + to_timestamp() .invoke_with_args(ScalarFunctionArgs { args: vec![string_array.clone()], arg_fields: arg_fields.clone(), @@ -184,7 +177,6 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_timestamp_with_formats_utf8", |b| { - let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let (inputs, format1, format2, format3) = data_with_formats(); let batch_len = inputs.len(); @@ -204,7 +196,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - to_timestamp_udf + to_timestamp() .invoke_with_args(ScalarFunctionArgs { args: args.clone(), arg_fields: arg_fields.clone(), @@ -218,7 +210,6 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_timestamp_with_formats_largeutf8", |b| { - let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let (inputs, format1, format2, format3) = data_with_formats(); let batch_len = inputs.len(); @@ -246,7 +237,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - to_timestamp_udf + to_timestamp() .invoke_with_args(ScalarFunctionArgs { args: args.clone(), arg_fields: arg_fields.clone(), @@ -260,7 +251,6 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_timestamp_with_formats_utf8view", |b| { - let to_timestamp_udf = Arc::clone(&to_timestamp_udf); let (inputs, format1, format2, format3) = data_with_formats(); let batch_len = inputs.len(); @@ -289,7 +279,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - to_timestamp_udf + to_timestamp() .invoke_with_args(ScalarFunctionArgs { args: args.clone(), arg_fields: arg_fields.clone(), diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs index 2db64beafa9b7..5b152081b64ca 100644 --- a/datafusion/functions/src/datetime/common.rs +++ b/datafusion/functions/src/datetime/common.rs @@ -15,57 +15,31 @@ // specific language governing permissions and limitations // under the License. -use std::sync::{Arc, LazyLock}; +use std::sync::Arc; -use arrow::array::timezone::Tz; use arrow::array::{ Array, ArrowPrimitiveType, AsArray, GenericStringArray, PrimitiveArray, StringArrayType, StringViewArray, }; -use arrow::compute::DecimalCast; -use arrow::compute::kernels::cast_utils::string_to_datetime; -use arrow::datatypes::{DataType, TimeUnit}; -use arrow_buffer::ArrowNativeType; +use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; +use arrow::datatypes::DataType; use chrono::LocalResult::Single; use chrono::format::{Parsed, StrftimeItems, parse}; use chrono::{DateTime, TimeZone, Utc}; + use datafusion_common::cast::as_generic_string_array; use datafusion_common::{ - DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, - internal_datafusion_err, unwrap_or_internal_err, + DataFusionError, Result, ScalarType, ScalarValue, exec_datafusion_err, exec_err, + unwrap_or_internal_err, }; use datafusion_expr::ColumnarValue; /// Error message if nanosecond conversion request beyond supported interval const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"; -static UTC: LazyLock = LazyLock::new(|| "UTC".parse().expect("UTC is always valid")); - -/// Converts a string representation of a date‑time into a timestamp expressed in -/// nanoseconds since the Unix epoch. -/// -/// This helper is a thin wrapper around the more general `string_to_datetime` -/// function. It accepts an optional `timezone` which, if `None`, defaults to -/// Coordinated Universal Time (UTC). The string `s` must contain a valid -/// date‑time format that can be parsed by the underlying chrono parser. -/// -/// # Return Value -/// -/// * `Ok(i64)` – The number of nanoseconds since `1970‑01‑01T00:00:00Z`. -/// * `Err(DataFusionError)` – If the string cannot be parsed, the parsed -/// value is out of range (between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804) -/// or the parsed value does not correspond to an unambiguous time. -pub(crate) fn string_to_timestamp_nanos_with_timezone( - timezone: &Option, - s: &str, -) -> Result { - let tz = timezone.as_ref().unwrap_or(&UTC); - let dt = string_to_datetime(tz, s)?; - let parsed = dt - .timestamp_nanos_opt() - .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}"))?; - - Ok(parsed) +/// Calls string_to_timestamp_nanos and converts the error type +pub(crate) fn string_to_timestamp_nanos_shim(s: &str) -> Result { + string_to_timestamp_nanos(s).map_err(|e| e.into()) } /// Checks that all the arguments from the second are of type [Utf8], [LargeUtf8] or [Utf8View] @@ -95,12 +69,13 @@ pub(crate) fn validate_data_types(args: &[ColumnarValue], name: &str) -> Result< /// Accepts a string and parses it using the [`chrono::format::strftime`] specifiers /// relative to the provided `timezone` /// +/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled +/// +/// * `2023-01-01 040506 America/Los_Angeles` +/// /// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error /// will be returned /// -/// Note that parsing [IANA timezones] is not supported yet in chrono - -/// and this implementation only supports named timezones at the end of the string preceded by a space. -/// /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html /// [IANA timezones]: https://www.iana.org/time-zones pub(crate) fn string_to_datetime_formatted( @@ -114,55 +89,11 @@ pub(crate) fn string_to_datetime_formatted( ) }; - let mut datetime_str = s; - let mut format = format; - - // Manually handle the most common case of a named timezone at the end of the timestamp. - // Note that %+ handles 'Z' at the end of the string without a space. This code doesn't - // handle named timezones with no preceding space since that would require writing a - // custom parser (or switching to Jiff) - let tz: Option = if format.trim_end().ends_with(" %Z") { - // grab the string after the last space as the named timezone - if let Some((dt_str, timezone_name)) = datetime_str.trim_end().rsplit_once(' ') { - datetime_str = dt_str; - - // attempt to parse the timezone name - let result: Result = - timezone_name.parse(); - let Ok(tz) = result else { - return Err(err(&result.unwrap_err().to_string())); - }; - - // successfully parsed the timezone name, remove the ' %Z' from the format - format = &format[..format.len() - 3]; - - Some(tz) - } else { - None - } - } else if format.contains("%Z") { - return Err(err( - "'%Z' is only supported at the end of the format string preceded by a space", - )); - } else { - None - }; - let mut parsed = Parsed::new(); - parse(&mut parsed, datetime_str, StrftimeItems::new(format)) - .map_err(|e| err(&e.to_string()))?; + parse(&mut parsed, s, StrftimeItems::new(format)).map_err(|e| err(&e.to_string()))?; - let dt = match tz { - Some(tz) => { - // A timezone was manually parsed out, convert it to a fixed offset - match parsed.to_datetime_with_timezone(&tz) { - Ok(dt) => Ok(dt.fixed_offset()), - Err(e) => Err(e), - } - } - // default to parse the string assuming it has a timezone - None => parsed.to_datetime(), - }; + // attempt to parse the string assuming it has a timezone + let dt = parsed.to_datetime(); if let Err(e) = &dt { // no timezone or other failure, try without a timezone @@ -184,7 +115,7 @@ pub(crate) fn string_to_datetime_formatted( } /// Accepts a string with a `chrono` format and converts it to a -/// nanosecond precision timestamp relative to the provided `timezone`. +/// nanosecond precision timestamp. /// /// See [`chrono::format::strftime`] for the full set of supported formats. /// @@ -210,21 +141,19 @@ pub(crate) fn string_to_datetime_formatted( /// /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html #[inline] -pub(crate) fn string_to_timestamp_nanos_formatted_with_timezone( - timezone: &Option, +pub(crate) fn string_to_timestamp_nanos_formatted( s: &str, format: &str, ) -> Result { - let dt = string_to_datetime_formatted(timezone.as_ref().unwrap_or(&UTC), s, format)?; - let parsed = dt + string_to_datetime_formatted(&Utc, s, format)? + .naive_utc() + .and_utc() .timestamp_nanos_opt() - .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}"))?; - - Ok(parsed) + .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}")) } /// Accepts a string with a `chrono` format and converts it to a -/// millisecond precision timestamp relative to the provided `timezone`. +/// millisecond precision timestamp. /// /// See [`chrono::format::strftime`] for the full set of supported formats. /// @@ -247,14 +176,14 @@ pub(crate) fn string_to_timestamp_millis_formatted(s: &str, format: &str) -> Res .timestamp_millis()) } -pub(crate) fn handle( +pub(crate) fn handle( args: &[ColumnarValue], op: F, name: &str, - dt: &DataType, ) -> Result where O: ArrowPrimitiveType, + S: ScalarType, F: Fn(&str) -> Result, { match &args[0] { @@ -281,13 +210,8 @@ where }, ColumnarValue::Scalar(scalar) => match scalar.try_as_str() { Some(a) => { - let result = a - .as_ref() - .map(|x| op(x)) - .transpose()? - .and_then(|v| v.to_i64()); - let s = scalar_value(dt, result)?; - Ok(ColumnarValue::Scalar(s)) + let result = a.as_ref().map(|x| op(x)).transpose()?; + Ok(ColumnarValue::Scalar(S::scalar(result))) } _ => exec_err!("Unsupported data type {scalar:?} for function {name}"), }, @@ -297,15 +221,15 @@ where // Given a function that maps a `&str`, `&str` to an arrow native type, // returns a `ColumnarValue` where the function is applied to either a `ArrayRef` or `ScalarValue` // depending on the `args`'s variant. -pub(crate) fn handle_multiple( +pub(crate) fn handle_multiple( args: &[ColumnarValue], op: F, op2: M, name: &str, - dt: &DataType, ) -> Result where O: ArrowPrimitiveType, + S: ScalarType, F: Fn(&str, &str) -> Result, M: Fn(O::Native) -> O::Native, { @@ -374,9 +298,9 @@ where if let Some(s) = x { match op(a, s.as_str()) { Ok(r) => { - let result = op2(r).to_i64(); - let s = scalar_value(dt, result)?; - ret = Some(Ok(ColumnarValue::Scalar(s))); + ret = Some(Ok(ColumnarValue::Scalar(S::scalar(Some( + op2(r), + ))))); break; } Err(e) => ret = Some(Err(e)), @@ -530,16 +454,3 @@ where // first map is the iterator, second is for the `Option<_>` array.iter().map(|x| x.map(&op).transpose()).collect() } - -fn scalar_value(dt: &DataType, r: Option) -> Result { - match dt { - DataType::Date32 => Ok(ScalarValue::Date32(r.and_then(|v| v.to_i32()))), - DataType::Timestamp(u, tz) => match u { - TimeUnit::Second => Ok(ScalarValue::TimestampSecond(r, tz.clone())), - TimeUnit::Millisecond => Ok(ScalarValue::TimestampMillisecond(r, tz.clone())), - TimeUnit::Microsecond => Ok(ScalarValue::TimestampMicrosecond(r, tz.clone())), - TimeUnit::Nanosecond => Ok(ScalarValue::TimestampNanosecond(r, tz.clone())), - }, - t => Err(internal_datafusion_err!("Unsupported data type: {t:?}")), - } -} diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs index 39b9453295df6..9872db3faf556 100644 --- a/datafusion/functions/src/datetime/mod.rs +++ b/datafusion/functions/src/datetime/mod.rs @@ -53,14 +53,11 @@ make_udf_function!(to_date::ToDateFunc, to_date); make_udf_function!(to_local_time::ToLocalTimeFunc, to_local_time); make_udf_function!(to_time::ToTimeFunc, to_time); make_udf_function!(to_unixtime::ToUnixtimeFunc, to_unixtime); -make_udf_function_with_config!(to_timestamp::ToTimestampFunc, to_timestamp); -make_udf_function_with_config!( - to_timestamp::ToTimestampSecondsFunc, - to_timestamp_seconds -); -make_udf_function_with_config!(to_timestamp::ToTimestampMillisFunc, to_timestamp_millis); -make_udf_function_with_config!(to_timestamp::ToTimestampMicrosFunc, to_timestamp_micros); -make_udf_function_with_config!(to_timestamp::ToTimestampNanosFunc, to_timestamp_nanos); +make_udf_function!(to_timestamp::ToTimestampFunc, to_timestamp); +make_udf_function!(to_timestamp::ToTimestampSecondsFunc, to_timestamp_seconds); +make_udf_function!(to_timestamp::ToTimestampMillisFunc, to_timestamp_millis); +make_udf_function!(to_timestamp::ToTimestampMicrosFunc, to_timestamp_micros); +make_udf_function!(to_timestamp::ToTimestampNanosFunc, to_timestamp_nanos); // create UDF with config make_udf_function_with_config!(now::NowFunc, now); @@ -121,24 +118,24 @@ pub mod expr_fn { args, ),( to_timestamp, - "converts a string and optional formats to a `Timestamp(Nanoseconds, TimeZone)`", - @config args, + "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`", + args, ),( to_timestamp_seconds, - "converts a string and optional formats to a `Timestamp(Seconds, TimeZone)`", - @config args, + "converts a string and optional formats to a `Timestamp(Seconds, None)`", + args, ),( to_timestamp_millis, - "converts a string and optional formats to a `Timestamp(Milliseconds, TimeZone)`", - @config args, + "converts a string and optional formats to a `Timestamp(Milliseconds, None)`", + args, ),( to_timestamp_micros, - "converts a string and optional formats to a `Timestamp(Microseconds, TimeZone)`", - @config args, + "converts a string and optional formats to a `Timestamp(Microseconds, None)`", + args, ),( to_timestamp_nanos, - "converts a string and optional formats to a `Timestamp(Nanoseconds, TimeZone)`", - @config args, + "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`", + args, )); /// Returns a string representation of a date, time, timestamp or duration based @@ -274,7 +271,6 @@ pub mod expr_fn { /// Returns all DataFusion functions defined in this package pub fn functions() -> Vec> { use datafusion_common::config::ConfigOptions; - let config = ConfigOptions::default(); vec![ current_date(), current_time(), @@ -284,16 +280,16 @@ pub fn functions() -> Vec> { from_unixtime(), make_date(), make_time(), - now(&config), + now(&ConfigOptions::default()), to_char(), to_date(), to_local_time(), to_time(), to_unixtime(), - to_timestamp(&config), - to_timestamp_seconds(&config), - to_timestamp_millis(&config), - to_timestamp_micros(&config), - to_timestamp_nanos(&config), + to_timestamp(), + to_timestamp_seconds(), + to_timestamp_millis(), + to_timestamp_micros(), + to_timestamp_nanos(), ] } diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index 60c6fdf2df975..471995089ba3b 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -84,7 +84,7 @@ impl ToDateFunc { fn to_date(&self, args: &[ColumnarValue]) -> Result { match args.len() { - 1 => handle::( + 1 => handle::( args, |s| match Date32Type::parse(s) { Some(v) => Ok(v), @@ -94,9 +94,8 @@ impl ToDateFunc { )), }, "to_date", - &Date32, ), - 2.. => handle_multiple::( + 2.. => handle_multiple::( args, |s, format| { string_to_timestamp_millis_formatted(s, format) @@ -109,7 +108,6 @@ impl ToDateFunc { }, |n| n, "to_date", - &Date32, ), 0 => exec_err!("Unsupported 0 argument count for function to_date"), } diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index 58077694b07a0..fdaf8a764ad8a 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -20,37 +20,25 @@ use std::sync::Arc; use crate::datetime::common::*; use arrow::array::Float64Array; -use arrow::array::timezone::Tz; use arrow::datatypes::DataType::*; use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; use arrow::datatypes::{ - ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType, - TimestampNanosecondType, TimestampSecondType, + ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType, + TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; -use datafusion_common::config::ConfigOptions; use datafusion_common::format::DEFAULT_CAST_OPTIONS; use datafusion_common::{Result, ScalarType, ScalarValue, exec_err}; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarUDF, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; use datafusion_macros::user_doc; #[user_doc( doc_section(label = "Time and Date Functions"), description = r#" -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000`) in the session time zone. Supports strings, -integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') -if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the -session time zone, or UTC if no session time zone is set. -Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). - -Note: `to_timestamp` returns `Timestamp(ns, TimeZone)` where the time zone is the session time zone. The supported range -for integer input is between`-9223372037` and `9223372036`. Supported range for string input is between -`1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` -for the input outside of supported bounds. - -The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. -The time zone can be a value like +00:00, 'Europe/London' etc. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. + +Note: `to_timestamp` returns `Timestamp(ns)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds. "#, syntax_example = "to_timestamp(expression[, ..., format_n])", sql_example = r#"```sql @@ -75,32 +63,17 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = r#" -Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. -Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully -parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is -only supported at the end of the string preceded by a space. -"# + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampFunc { signature: Signature, - timezone: Option>, } #[user_doc( doc_section(label = "Time and Date Functions"), - description = r#" -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00`) in the session time zone. Supports strings, -integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') -if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the -session time zone, or UTC if no session time zone is set. -Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). - -The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. -The time zone can be a value like +00:00, 'Europe/London' etc. -"#, + description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", syntax_example = "to_timestamp_seconds(expression[, ..., format_n])", sql_example = r#"```sql > select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00'); @@ -124,32 +97,17 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = r#" -Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. -Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully -parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is -only supported at the end of the string preceded by a space. -"# + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampSecondsFunc { signature: Signature, - timezone: Option>, } #[user_doc( doc_section(label = "Time and Date Functions"), - description = r#" -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000`) in the session time zone. Supports strings, -integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') -if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the -session time zone, or UTC if no session time zone is set. -Integers, unsigned integers, and doubles are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). - -The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. -The time zone can be a value like +00:00, 'Europe/London' etc. -"#, + description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", syntax_example = "to_timestamp_millis(expression[, ..., format_n])", sql_example = r#"```sql > select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00'); @@ -173,32 +131,17 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = r#" -Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. -Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully -parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is -only supported at the end of the string preceded by a space. -"# + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampMillisFunc { signature: Signature, - timezone: Option>, } #[user_doc( doc_section(label = "Time and Date Functions"), - description = r#" -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000`) in the session time zone. Supports strings, -integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') -if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the -session time zone, or UTC if no session time zone is set. -Integers, unsigned integers, and doubles are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`). - -The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. -The time zone can be a value like +00:00, 'Europe/London' etc. -"#, + description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.", syntax_example = "to_timestamp_micros(expression[, ..., format_n])", sql_example = r#"```sql > select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00'); @@ -222,31 +165,17 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = r#" -Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. -Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully -parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is -only supported at the end of the string preceded by a space. -"# + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampMicrosFunc { signature: Signature, - timezone: Option>, } #[user_doc( doc_section(label = "Time and Date Functions"), - description = r#" -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000`) in the session time zone. Supports strings, -integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') -if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the -session time zone. Integers, unsigned integers, and doubles are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). - -The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. -The time zone can be a value like +00:00, 'Europe/London' etc. -"#, + description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", syntax_example = "to_timestamp_nanos(expression[, ..., format_n])", sql_example = r#"```sql > select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00'); @@ -270,60 +199,83 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = r#" -Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. -Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully -parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is -only supported at the end of the string preceded by a space. -"# + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampNanosFunc { signature: Signature, - timezone: Option>, } -/// Macro to generate boilerplate constructors and config methods for ToTimestamp* functions. -/// Generates: Default impl, deprecated new(), new_with_config(), and extracts timezone from ConfigOptions. -macro_rules! impl_to_timestamp_constructors { - ($func:ty) => { - impl Default for $func { - fn default() -> Self { - Self::new_with_config(&ConfigOptions::default()) - } +impl Default for ToTimestampFunc { + fn default() -> Self { + Self::new() + } +} + +impl ToTimestampFunc { + pub fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), } + } +} - impl $func { - #[deprecated(since = "52.0.0", note = "use `new_with_config` instead")] - /// Deprecated constructor retained for backwards compatibility. - /// - /// Prefer `new_with_config` which allows specifying the - /// timezone via [`ConfigOptions`]. This helper now mirrors the - /// canonical default offset (None) provided by `ConfigOptions::default()`. - pub fn new() -> Self { - Self::new_with_config(&ConfigOptions::default()) - } +impl Default for ToTimestampSecondsFunc { + fn default() -> Self { + Self::new() + } +} - pub fn new_with_config(config: &ConfigOptions) -> Self { - Self { - signature: Signature::variadic_any(Volatility::Immutable), - timezone: config - .execution - .time_zone - .as_ref() - .map(|tz| Arc::from(tz.as_str())), - } - } +impl ToTimestampSecondsFunc { + pub fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), } - }; + } } -impl_to_timestamp_constructors!(ToTimestampFunc); -impl_to_timestamp_constructors!(ToTimestampSecondsFunc); -impl_to_timestamp_constructors!(ToTimestampMillisFunc); -impl_to_timestamp_constructors!(ToTimestampMicrosFunc); -impl_to_timestamp_constructors!(ToTimestampNanosFunc); +impl Default for ToTimestampMillisFunc { + fn default() -> Self { + Self::new() + } +} + +impl ToTimestampMillisFunc { + pub fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + } + } +} + +impl Default for ToTimestampMicrosFunc { + fn default() -> Self { + Self::new() + } +} + +impl ToTimestampMicrosFunc { + pub fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + } + } +} + +impl Default for ToTimestampNanosFunc { + fn default() -> Self { + Self::new() + } +} + +impl ToTimestampNanosFunc { + pub fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + } + } +} /// to_timestamp SQL function /// @@ -331,15 +283,6 @@ impl_to_timestamp_constructors!(ToTimestampNanosFunc); /// The supported range for integer input is between `-9223372037` and `9223372036`. /// Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. /// Please use `to_timestamp_seconds` for the input outside of supported bounds. -/// Macro to generate the with_updated_config method for ToTimestamp* functions. -macro_rules! impl_with_updated_config { - () => { - fn with_updated_config(&self, config: &ConfigOptions) -> Option { - Some(Self::new_with_config(config).into()) - } - }; -} - impl ScalarUDFImpl for ToTimestampFunc { fn as_any(&self) -> &dyn Any { self @@ -353,18 +296,15 @@ impl ScalarUDFImpl for ToTimestampFunc { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(Timestamp(Nanosecond, self.timezone.clone())) + fn return_type(&self, arg_types: &[DataType]) -> Result { + Ok(return_type_for(&arg_types[0], Nanosecond)) } - impl_with_updated_config!(); - fn invoke_with_args( &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { - let datafusion_expr::ScalarFunctionArgs { args, .. } = args; - + let args = args.args; if args.is_empty() { return exec_err!( "to_timestamp function requires 1 or more arguments, got {}", @@ -377,13 +317,13 @@ impl ScalarUDFImpl for ToTimestampFunc { validate_data_types(&args, "to_timestamp")?; } - let tz = self.timezone.clone(); - match args[0].data_type() { Int32 | Int64 => args[0] .cast_to(&Timestamp(Second, None), None)? - .cast_to(&Timestamp(Nanosecond, tz), None), - Null | Timestamp(_, _) => args[0].cast_to(&Timestamp(Nanosecond, tz), None), + .cast_to(&Timestamp(Nanosecond, None), None), + Null | Timestamp(_, None) => { + args[0].cast_to(&Timestamp(Nanosecond, None), None) + } Float64 => { let rescaled = arrow::compute::kernels::numeric::mul( &args[0].to_array(1)?, @@ -393,12 +333,15 @@ impl ScalarUDFImpl for ToTimestampFunc { )?; Ok(ColumnarValue::Array(arrow::compute::cast_with_options( &rescaled, - &Timestamp(Nanosecond, tz), + &Timestamp(Nanosecond, None), &DEFAULT_CAST_OPTIONS, )?)) } + Timestamp(_, Some(tz)) => { + args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None) + } Utf8View | LargeUtf8 | Utf8 => { - to_timestamp_impl::(&args, "to_timestamp", &tz) + to_timestamp_impl::(&args, "to_timestamp") } Decimal128(_, _) => { match &args[0] { @@ -411,12 +354,14 @@ impl ScalarUDFImpl for ToTimestampFunc { let scale_factor = 10_i128.pow(*scale as u32); let seconds = value / scale_factor; let fraction = value % scale_factor; + let nanos = (fraction * 1_000_000_000) / scale_factor; + let timestamp_nanos = seconds * 1_000_000_000 + nanos; Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond( Some(timestamp_nanos as i64), - tz, + None, ))) } _ => exec_err!("Invalid decimal value"), @@ -427,7 +372,6 @@ impl ScalarUDFImpl for ToTimestampFunc { } } } - fn documentation(&self) -> Option<&Documentation> { self.doc() } @@ -446,18 +390,15 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(Timestamp(Second, self.timezone.clone())) + fn return_type(&self, arg_types: &[DataType]) -> Result { + Ok(return_type_for(&arg_types[0], Second)) } - impl_with_updated_config!(); - fn invoke_with_args( &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { - let datafusion_expr::ScalarFunctionArgs { args, .. } = args; - + let args = args.args; if args.is_empty() { return exec_err!( "to_timestamp_seconds function requires 1 or more arguments, got {}", @@ -470,17 +411,14 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc { validate_data_types(&args, "to_timestamp")?; } - let tz = self.timezone.clone(); - match args[0].data_type() { - Null | Int32 | Int64 | Timestamp(_, _) | Decimal128(_, _) => { - args[0].cast_to(&Timestamp(Second, tz), None) + Null | Int32 | Int64 | Timestamp(_, None) | Decimal128(_, _) => { + args[0].cast_to(&Timestamp(Second, None), None) + } + Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None), + Utf8View | LargeUtf8 | Utf8 => { + to_timestamp_impl::(&args, "to_timestamp_seconds") } - Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::( - &args, - "to_timestamp_seconds", - &self.timezone, - ), other => { exec_err!( "Unsupported data type {} for function to_timestamp_seconds", @@ -489,7 +427,6 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc { } } } - fn documentation(&self) -> Option<&Documentation> { self.doc() } @@ -508,18 +445,15 @@ impl ScalarUDFImpl for ToTimestampMillisFunc { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(Timestamp(Millisecond, self.timezone.clone())) + fn return_type(&self, arg_types: &[DataType]) -> Result { + Ok(return_type_for(&arg_types[0], Millisecond)) } - impl_with_updated_config!(); - fn invoke_with_args( &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { - let datafusion_expr::ScalarFunctionArgs { args, .. } = args; - + let args = args.args; if args.is_empty() { return exec_err!( "to_timestamp_millis function requires 1 or more arguments, got {}", @@ -533,13 +467,15 @@ impl ScalarUDFImpl for ToTimestampMillisFunc { } match args[0].data_type() { - Null | Int32 | Int64 | Timestamp(_, _) => { - args[0].cast_to(&Timestamp(Millisecond, self.timezone.clone()), None) + Null | Int32 | Int64 | Timestamp(_, None) => { + args[0].cast_to(&Timestamp(Millisecond, None), None) + } + Timestamp(_, Some(tz)) => { + args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None) } Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::( &args, "to_timestamp_millis", - &self.timezone, ), other => { exec_err!( @@ -549,7 +485,6 @@ impl ScalarUDFImpl for ToTimestampMillisFunc { } } } - fn documentation(&self) -> Option<&Documentation> { self.doc() } @@ -568,18 +503,15 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(Timestamp(Microsecond, self.timezone.clone())) + fn return_type(&self, arg_types: &[DataType]) -> Result { + Ok(return_type_for(&arg_types[0], Microsecond)) } - impl_with_updated_config!(); - fn invoke_with_args( &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { - let datafusion_expr::ScalarFunctionArgs { args, .. } = args; - + let args = args.args; if args.is_empty() { return exec_err!( "to_timestamp_micros function requires 1 or more arguments, got {}", @@ -593,13 +525,15 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc { } match args[0].data_type() { - Null | Int32 | Int64 | Timestamp(_, _) => { - args[0].cast_to(&Timestamp(Microsecond, self.timezone.clone()), None) + Null | Int32 | Int64 | Timestamp(_, None) => { + args[0].cast_to(&Timestamp(Microsecond, None), None) + } + Timestamp(_, Some(tz)) => { + args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None) } Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::( &args, "to_timestamp_micros", - &self.timezone, ), other => { exec_err!( @@ -609,7 +543,6 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc { } } } - fn documentation(&self) -> Option<&Documentation> { self.doc() } @@ -628,18 +561,15 @@ impl ScalarUDFImpl for ToTimestampNanosFunc { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(Timestamp(Nanosecond, self.timezone.clone())) + fn return_type(&self, arg_types: &[DataType]) -> Result { + Ok(return_type_for(&arg_types[0], Nanosecond)) } - impl_with_updated_config!(); - fn invoke_with_args( &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { - let datafusion_expr::ScalarFunctionArgs { args, .. } = args; - + let args = args.args; if args.is_empty() { return exec_err!( "to_timestamp_nanos function requires 1 or more arguments, got {}", @@ -653,14 +583,15 @@ impl ScalarUDFImpl for ToTimestampNanosFunc { } match args[0].data_type() { - Null | Int32 | Int64 | Timestamp(_, _) => { - args[0].cast_to(&Timestamp(Nanosecond, self.timezone.clone()), None) + Null | Int32 | Int64 | Timestamp(_, None) => { + args[0].cast_to(&Timestamp(Nanosecond, None), None) + } + Timestamp(_, Some(tz)) => { + args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None) + } + Utf8View | LargeUtf8 | Utf8 => { + to_timestamp_impl::(&args, "to_timestamp_nanos") } - Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::( - &args, - "to_timestamp_nanos", - &self.timezone, - ), other => { exec_err!( "Unsupported data type {} for function to_timestamp_nanos", @@ -669,16 +600,23 @@ impl ScalarUDFImpl for ToTimestampNanosFunc { } } } - fn documentation(&self) -> Option<&Documentation> { self.doc() } } +/// Returns the return type for the to_timestamp_* function, preserving +/// the timezone if it exists. +fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType { + match arg { + Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))), + _ => Timestamp(unit, None), + } +} + fn to_timestamp_impl>( args: &[ColumnarValue], name: &str, - timezone: &Option>, ) -> Result { let factor = match T::UNIT { Second => 1_000_000_000, @@ -687,26 +625,17 @@ fn to_timestamp_impl>( Nanosecond => 1, }; - let tz = match timezone.clone() { - Some(tz) => Some(tz.parse::()?), - None => None, - }; - match args.len() { - 1 => handle::( + 1 => handle::( args, - move |s| string_to_timestamp_nanos_with_timezone(&tz, s).map(|n| n / factor), + |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor), name, - &Timestamp(T::UNIT, timezone.clone()), ), - n if n >= 2 => handle_multiple::( + n if n >= 2 => handle_multiple::( args, - move |s, format| { - string_to_timestamp_nanos_formatted_with_timezone(&tz, s, format) - }, + string_to_timestamp_nanos_formatted, |n| n / factor, name, - &Timestamp(T::UNIT, timezone.clone()), ), _ => exec_err!("Unsupported 0 argument count for function {name}"), } @@ -723,110 +652,35 @@ mod tests { }; use arrow::array::{ArrayRef, Int64Array, StringBuilder}; use arrow::datatypes::{Field, TimeUnit}; - use chrono::{DateTime, FixedOffset, Utc}; + use chrono::Utc; use datafusion_common::config::ConfigOptions; use datafusion_common::{DataFusionError, ScalarValue, assert_contains}; - use datafusion_expr::{ScalarFunctionArgs, ScalarFunctionImplementation}; + use datafusion_expr::ScalarFunctionImplementation; use super::*; fn to_timestamp(args: &[ColumnarValue]) -> Result { - let timezone: Option> = Some("UTC".into()); - to_timestamp_impl::(args, "to_timestamp", &timezone) + to_timestamp_impl::(args, "to_timestamp") } /// to_timestamp_millis SQL function fn to_timestamp_millis(args: &[ColumnarValue]) -> Result { - let timezone: Option> = Some("UTC".into()); - to_timestamp_impl::( - args, - "to_timestamp_millis", - &timezone, - ) + to_timestamp_impl::(args, "to_timestamp_millis") } /// to_timestamp_micros SQL function fn to_timestamp_micros(args: &[ColumnarValue]) -> Result { - let timezone: Option> = Some("UTC".into()); - to_timestamp_impl::( - args, - "to_timestamp_micros", - &timezone, - ) + to_timestamp_impl::(args, "to_timestamp_micros") } /// to_timestamp_nanos SQL function fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result { - let timezone: Option> = Some("UTC".into()); - to_timestamp_impl::( - args, - "to_timestamp_nanos", - &timezone, - ) + to_timestamp_impl::(args, "to_timestamp_nanos") } /// to_timestamp_seconds SQL function fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result { - let timezone: Option> = Some("UTC".into()); - to_timestamp_impl::(args, "to_timestamp_seconds", &timezone) - } - - fn udfs_and_timeunit() -> Vec<(Box, TimeUnit)> { - let udfs: Vec<(Box, TimeUnit)> = vec![ - ( - Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())), - Nanosecond, - ), - ( - Box::new(ToTimestampSecondsFunc::new_with_config( - &ConfigOptions::default(), - )), - Second, - ), - ( - Box::new(ToTimestampMillisFunc::new_with_config( - &ConfigOptions::default(), - )), - Millisecond, - ), - ( - Box::new(ToTimestampMicrosFunc::new_with_config( - &ConfigOptions::default(), - )), - Microsecond, - ), - ( - Box::new(ToTimestampNanosFunc::new_with_config( - &ConfigOptions::default(), - )), - Nanosecond, - ), - ]; - udfs - } - - fn validate_expected_error( - options: &mut ConfigOptions, - args: ScalarFunctionArgs, - expected_err: &str, - ) { - let udfs = udfs_and_timeunit(); - - for (udf, _) in udfs { - match udf - .with_updated_config(options) - .unwrap() - .invoke_with_args(args.clone()) - { - Ok(_) => panic!("Expected error but got success"), - Err(e) => { - assert!( - e.to_string().contains(expected_err), - "Can not find expected error '{expected_err}'. Actual error '{e}'" - ); - } - } - } + to_timestamp_impl::(args, "to_timestamp_seconds") } #[test] @@ -897,368 +751,6 @@ mod tests { Ok(()) } - #[test] - fn to_timestamp_respects_execution_timezone() -> Result<()> { - let udfs = udfs_and_timeunit(); - - let mut options = ConfigOptions::default(); - options.execution.time_zone = Some("-05:00".to_string()); - - let time_zone: Option> = options - .execution - .time_zone - .as_ref() - .map(|tz| Arc::from(tz.as_str())); - - for (udf, time_unit) in udfs { - let field = Field::new("arg", Utf8, true).into(); - - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some( - "2020-09-08T13:42:29".to_string(), - )))], - arg_fields: vec![field], - number_rows: 1, - return_field: Field::new( - "f", - Timestamp(time_unit, Some("-05:00".into())), - true, - ) - .into(), - config_options: Arc::new(options.clone()), - }; - - let result = udf - .with_updated_config(&options.clone()) - .unwrap() - .invoke_with_args(args)?; - let result = match time_unit { - Second => { - let ColumnarValue::Scalar(ScalarValue::TimestampSecond( - Some(value), - tz, - )) = result - else { - panic!("expected scalar timestamp"); - }; - - assert_eq!(tz, time_zone); - - value - } - Millisecond => { - let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond( - Some(value), - tz, - )) = result - else { - panic!("expected scalar timestamp"); - }; - - assert_eq!(tz, time_zone); - - value - } - Microsecond => { - let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond( - Some(value), - tz, - )) = result - else { - panic!("expected scalar timestamp"); - }; - - assert_eq!(tz, time_zone); - - value - } - Nanosecond => { - let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond( - Some(value), - tz, - )) = result - else { - panic!("expected scalar timestamp"); - }; - - assert_eq!(tz, time_zone); - - value - } - }; - - let scale = match time_unit { - Second => 1_000_000_000, - Millisecond => 1_000_000, - Microsecond => 1_000, - Nanosecond => 1, - }; - - let offset = FixedOffset::west_opt(5 * 3600).unwrap(); - let result = Some( - DateTime::::from_timestamp_nanos(result * scale) - .with_timezone(&offset) - .to_string(), - ); - - assert_eq!(result, Some("2020-09-08 13:42:29 -05:00".to_string())); - } - - Ok(()) - } - - #[test] - fn to_timestamp_formats_respects_execution_timezone() -> Result<()> { - let udfs = udfs_and_timeunit(); - - let mut options = ConfigOptions::default(); - options.execution.time_zone = Some("-05:00".to_string()); - - let time_zone: Option> = options - .execution - .time_zone - .as_ref() - .map(|tz| Arc::from(tz.as_str())); - - let expr_field = Field::new("arg", Utf8, true).into(); - let format_field: Arc = Field::new("fmt", Utf8, true).into(); - - for (udf, time_unit) in udfs { - for (value, format, expected_str) in [ - ( - "2020-09-08 09:42:29 -05:00", - "%Y-%m-%d %H:%M:%S %z", - Some("2020-09-08 09:42:29 -05:00"), - ), - ( - "2020-09-08T13:42:29Z", - "%+", - Some("2020-09-08 08:42:29 -05:00"), - ), - ( - "2020-09-08 13:42:29 UTC", - "%Y-%m-%d %H:%M:%S %Z", - Some("2020-09-08 08:42:29 -05:00"), - ), - ( - "+0000 2024-01-01 12:00:00", - "%z %Y-%m-%d %H:%M:%S", - Some("2024-01-01 07:00:00 -05:00"), - ), - ( - "20200908134229+0100", - "%Y%m%d%H%M%S%z", - Some("2020-09-08 07:42:29 -05:00"), - ), - ( - "2020-09-08+0230 13:42", - "%Y-%m-%d%z %H:%M", - Some("2020-09-08 06:12:00 -05:00"), - ), - ] { - let args = ScalarFunctionArgs { - args: vec![ - ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))), - ColumnarValue::Scalar(ScalarValue::Utf8(Some( - format.to_string(), - ))), - ], - arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)], - number_rows: 1, - return_field: Field::new( - "f", - Timestamp(time_unit, Some("-05:00".into())), - true, - ) - .into(), - config_options: Arc::new(options.clone()), - }; - let result = udf - .with_updated_config(&options.clone()) - .unwrap() - .invoke_with_args(args)?; - let result = match time_unit { - Second => { - let ColumnarValue::Scalar(ScalarValue::TimestampSecond( - Some(value), - tz, - )) = result - else { - panic!("expected scalar timestamp"); - }; - - assert_eq!(tz, time_zone); - - value - } - Millisecond => { - let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond( - Some(value), - tz, - )) = result - else { - panic!("expected scalar timestamp"); - }; - - assert_eq!(tz, time_zone); - - value - } - Microsecond => { - let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond( - Some(value), - tz, - )) = result - else { - panic!("expected scalar timestamp"); - }; - - assert_eq!(tz, time_zone); - - value - } - Nanosecond => { - let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond( - Some(value), - tz, - )) = result - else { - panic!("expected scalar timestamp"); - }; - - assert_eq!(tz, time_zone); - - value - } - }; - - let scale = match time_unit { - Second => 1_000_000_000, - Millisecond => 1_000_000, - Microsecond => 1_000, - Nanosecond => 1, - }; - let offset = FixedOffset::west_opt(5 * 3600).unwrap(); - let result = Some( - DateTime::::from_timestamp_nanos(result * scale) - .with_timezone(&offset) - .to_string(), - ); - - assert_eq!(result, expected_str.map(|s| s.to_string())); - } - } - - Ok(()) - } - - #[test] - fn to_timestamp_invalid_execution_timezone_behavior() -> Result<()> { - let field: Arc = Field::new("arg", Utf8, true).into(); - let return_field: Arc = - Field::new("f", Timestamp(Nanosecond, None), true).into(); - - let mut options = ConfigOptions::default(); - options.execution.time_zone = Some("Invalid/Timezone".to_string()); - - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some( - "2020-09-08T13:42:29Z".to_string(), - )))], - arg_fields: vec![Arc::clone(&field)], - number_rows: 1, - return_field: Arc::clone(&return_field), - config_options: Arc::new(options.clone()), - }; - - let expected_err = - "Invalid timezone \"Invalid/Timezone\": failed to parse timezone"; - - validate_expected_error(&mut options, args, expected_err); - - Ok(()) - } - - #[test] - fn to_timestamp_formats_invalid_execution_timezone_behavior() -> Result<()> { - let expr_field: Arc = Field::new("arg", Utf8, true).into(); - let format_field: Arc = Field::new("fmt", Utf8, true).into(); - let return_field: Arc = - Field::new("f", Timestamp(Nanosecond, None), true).into(); - - let mut options = ConfigOptions::default(); - options.execution.time_zone = Some("Invalid/Timezone".to_string()); - - let expected_err = - "Invalid timezone \"Invalid/Timezone\": failed to parse timezone"; - - let make_args = |value: &str, format: &str| ScalarFunctionArgs { - args: vec![ - ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))), - ColumnarValue::Scalar(ScalarValue::Utf8(Some(format.to_string()))), - ], - arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)], - number_rows: 1, - return_field: Arc::clone(&return_field), - config_options: Arc::new(options.clone()), - }; - - for (value, format, _expected_str) in [ - ( - "2020-09-08 09:42:29 -05:00", - "%Y-%m-%d %H:%M:%S %z", - Some("2020-09-08 09:42:29 -05:00"), - ), - ( - "2020-09-08T13:42:29Z", - "%+", - Some("2020-09-08 08:42:29 -05:00"), - ), - ( - "2020-09-08 13:42:29 +0000", - "%Y-%m-%d %H:%M:%S %z", - Some("2020-09-08 08:42:29 -05:00"), - ), - ( - "+0000 2024-01-01 12:00:00", - "%z %Y-%m-%d %H:%M:%S", - Some("2024-01-01 07:00:00 -05:00"), - ), - ( - "20200908134229+0100", - "%Y%m%d%H%M%S%z", - Some("2020-09-08 07:42:29 -05:00"), - ), - ( - "2020-09-08+0230 13:42", - "%Y-%m-%d%z %H:%M", - Some("2020-09-08 06:12:00 -05:00"), - ), - ] { - let args = make_args(value, format); - validate_expected_error(&mut options.clone(), args, expected_err); - } - - let args = ScalarFunctionArgs { - args: vec![ - ColumnarValue::Scalar(ScalarValue::Utf8(Some( - "2020-09-08T13:42:29".to_string(), - ))), - ColumnarValue::Scalar(ScalarValue::Utf8(Some( - "%Y-%m-%dT%H:%M:%S".to_string(), - ))), - ], - arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)], - number_rows: 1, - return_field: Arc::clone(&return_field), - config_options: Arc::new(options.clone()), - }; - - validate_expected_error(&mut options.clone(), args, expected_err); - - Ok(()) - } - #[test] fn to_timestamp_invalid_input_type() -> Result<()> { // pass the wrong type of input array to to_timestamp and test @@ -1428,11 +920,7 @@ mod tests { } fn parse_timestamp_formatted(s: &str, format: &str) -> Result { - let result = string_to_timestamp_nanos_formatted_with_timezone( - &Some("UTC".parse()?), - s, - format, - ); + let result = string_to_timestamp_nanos_formatted(s, format); if let Err(e) = &result { eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}"); } @@ -1500,21 +988,13 @@ mod tests { } #[test] - fn test_no_tz() { + fn test_tz() { let udfs: Vec> = vec![ - Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())), - Box::new(ToTimestampSecondsFunc::new_with_config( - &ConfigOptions::default(), - )), - Box::new(ToTimestampMillisFunc::new_with_config( - &ConfigOptions::default(), - )), - Box::new(ToTimestampNanosFunc::new_with_config( - &ConfigOptions::default(), - )), - Box::new(ToTimestampSecondsFunc::new_with_config( - &ConfigOptions::default(), - )), + Box::new(ToTimestampFunc::new()), + Box::new(ToTimestampSecondsFunc::new()), + Box::new(ToTimestampMillisFunc::new()), + Box::new(ToTimestampNanosFunc::new()), + Box::new(ToTimestampSecondsFunc::new()), ]; let mut nanos_builder = TimestampNanosecondArray::builder(2); @@ -1547,8 +1027,8 @@ mod tests { for array in arrays { let rt = udf.return_type(&[array.data_type()]).unwrap(); let arg_field = Field::new("arg", array.data_type().clone(), true).into(); - assert!(matches!(rt, Timestamp(_, None))); - let args = ScalarFunctionArgs { + assert!(matches!(rt, Timestamp(_, Some(_)))); + let args = datafusion_expr::ScalarFunctionArgs { args: vec![array.clone()], arg_fields: vec![arg_field], number_rows: 4, @@ -1563,7 +1043,7 @@ mod tests { _ => panic!("Expected a columnar array"), }; let ty = array.data_type(); - assert!(matches!(ty, Timestamp(_, None))); + assert!(matches!(ty, Timestamp(_, Some(_)))); } } @@ -1598,7 +1078,7 @@ mod tests { let rt = udf.return_type(&[array.data_type()]).unwrap(); assert!(matches!(rt, Timestamp(_, None))); let arg_field = Field::new("arg", array.data_type().clone(), true).into(); - let args = ScalarFunctionArgs { + let args = datafusion_expr::ScalarFunctionArgs { args: vec![array.clone()], arg_fields: vec![arg_field], number_rows: 5, diff --git a/datafusion/functions/src/datetime/to_unixtime.rs b/datafusion/functions/src/datetime/to_unixtime.rs index 5ebcce0a7cfc2..fb6315d8f0e06 100644 --- a/datafusion/functions/src/datetime/to_unixtime.rs +++ b/datafusion/functions/src/datetime/to_unixtime.rs @@ -135,7 +135,7 @@ impl ScalarUDFImpl for ToUnixtimeFunc { .cast_to(&DataType::Timestamp(TimeUnit::Second, tz), None)? .cast_to(&DataType::Int64, None), DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => { - ToTimestampSecondsFunc::new_with_config(args.config_options.as_ref()) + ToTimestampSecondsFunc::new() .invoke_with_args(args)? .cast_to(&DataType::Int64, None) } diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs index 4adc331fef669..3d728a178497a 100644 --- a/datafusion/functions/src/macros.rs +++ b/datafusion/functions/src/macros.rs @@ -41,17 +41,6 @@ /// - `Vec` argument (single argument followed by a comma) /// - Variable number of `Expr` arguments (zero or more arguments, must be without commas) /// - Functions that require config (marked with `@config` prefix) -/// -/// Note on configuration construction paths: -/// - The convenience wrappers generated for `@config` functions call the inner -/// constructor with `ConfigOptions::default()`. These wrappers are intended -/// primarily for programmatic `Expr` construction and convenience usage. -/// - When functions are registered in a session, DataFusion will call -/// `with_updated_config()` to create a `ScalarUDF` instance using the session's -/// actual `ConfigOptions`. This also happens when configuration changes at runtime -/// (e.g., via `SET` statements). In short: the macro uses the default config for -/// convenience constructors; the session config is applied when functions are -/// registered or when configuration is updated. #[macro_export] macro_rules! export_functions { ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => { @@ -70,24 +59,6 @@ macro_rules! export_functions { } }; - // function that requires config and takes a vector argument - (single $FUNC:ident, $DOC:expr, @config $arg:ident,) => { - #[doc = $DOC] - pub fn $FUNC($arg: Vec) -> datafusion_expr::Expr { - use datafusion_common::config::ConfigOptions; - super::$FUNC(&ConfigOptions::default()).call($arg) - } - }; - - // function that requires config and variadic arguments - (single $FUNC:ident, $DOC:expr, @config $($arg:ident)*) => { - #[doc = $DOC] - pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr { - use datafusion_common::config::ConfigOptions; - super::$FUNC(&ConfigOptions::default()).call(vec![$($arg),*]) - } - }; - // single vector argument (a single argument followed by a comma) (single $FUNC:ident, $DOC:expr, $arg:ident,) => { #[doc = $DOC] diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index dbb924ef7aa63..5749b1c53d852 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -193,8 +193,6 @@ SELECT TIMESTAMPTZ '2000-01-01T01:01:01' ---- 2000-01-01T01:01:01Z -statement ok -RESET datafusion.execution.time_zone ########## ## cast tests @@ -2333,7 +2331,7 @@ CREATE TABLE foo (time TIMESTAMPTZ) AS VALUES ('2020-01-01T03:00:00+05:00') statement ok -RESET datafusion.execution.time_zone +SET TIME ZONE = '+00' # verify column type query T @@ -2447,7 +2445,7 @@ true true true true true true # known issues. currently overflows (expects default precision to be microsecond instead of nanoseconds. Work pending) #verify extreme values #query PPPPPPPP -#SELECT to_timestamp(-62125747200), to_timestamp(1926632005177), -62125747200::timestamp as t1, 1926632005177::timestamp, cast(-62125747200 as timestamp), cast(1926632005177 as timestamp) as t2 +#SELECT to_timestamp(-62125747200), to_timestamp(1926632005177), -62125747200::timestamp, 1926632005177::timestamp, cast(-62125747200 as timestamp), cast(1926632005177 as timestamp) #---- #0001-04-25T00:00:00 +63022-07-16T12:59:37 0001-04-25T00:00:00 +63022-07-16T12:59:37 0001-04-25T00:00:00 +63022-07-16T12:59:37 @@ -2773,8 +2771,8 @@ SELECT t1.ts, t1.ts + INTERVAL '1' SECOND FROM t1; query PT SELECT t1.ts::timestamptz, arrow_typeof(t1.ts::timestamptz) FROM t1; ---- -2018-07-01T06:00:00 Timestamp(ns) -2018-07-01T07:00:00 Timestamp(ns) +2018-07-01T06:00:00Z Timestamp(ns, "+00") +2018-07-01T07:00:00Z Timestamp(ns, "+00") query D SELECT 0::TIME @@ -4341,7 +4339,7 @@ SELECT arrow_cast(a, 'LargeUtf8') FROM (SELECT CAST('2005-09-10 13:31:00 +02:00' AS timestamp with time zone) AS a) ---- -Timestamp(ns) 2005-09-10T11:31:00 2005-09-10T11:31:00 2005-09-10T11:31:00 2005-09-10T11:31:00 +Timestamp(ns, "+00") 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z query P SELECT diff --git a/datafusion/sqllogictest/test_files/to_timestamp_timezone.slt b/datafusion/sqllogictest/test_files/to_timestamp_timezone.slt deleted file mode 100644 index d48e41d1204de..0000000000000 --- a/datafusion/sqllogictest/test_files/to_timestamp_timezone.slt +++ /dev/null @@ -1,204 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -########## -## to_timestamp timezone tests -########## - -## Reset timezone for other tests -statement ok -RESET datafusion.execution.time_zone - -## Test 1: Default timezone (None) with naive timestamp -## Naive timestamps (without explicit timezone) should be interpreted as UTC by default -query P -SELECT to_timestamp('2020-09-08T13:42:29'); ----- -2020-09-08T13:42:29 - -## Test 2: Explicit UTC timezone ('Z' suffix) -## Explicit timezone should be respected regardless of session timezone -query P -SELECT to_timestamp('2020-09-08T13:42:29Z'); ----- -2020-09-08T13:42:29 - -## Test 3: Explicit timezone offset (+05:00) -## Explicit offset should be respected - this is 13:42:29+05:00 which is 08:42:29 UTC -query P -SELECT to_timestamp('2020-09-08T13:42:29+05:00'); ----- -2020-09-08T08:42:29 - -## Test 4: Explicit timezone offset without colon (+0500) -## Should handle offset formats without colons -query P -SELECT to_timestamp('2020-09-08T13:42:29+0500'); ----- -2020-09-08T08:42:29 - -## Test 5: Negative timezone offset -query P -SELECT to_timestamp('2020-09-08T13:42:29-03:30'); ----- -2020-09-08T17:12:29 - -## Test 6: Configure session timezone to America/New_York -statement ok -SET datafusion.execution.time_zone = 'America/New_York'; - -## Test 7: Naive timestamp with configured timezone -## '2020-09-08T13:42:29' in America/New_York is EDT (UTC-4) -## So this should become '2020-09-08T13:42:29-04:00' -query P -SELECT to_timestamp('2020-09-08T13:42:29'); ----- -2020-09-08T13:42:29-04:00 - -## Test 8: Explicit UTC should be transformed to configured timezone -query P -SELECT to_timestamp('2020-09-08T13:42:29Z'); ----- -2020-09-08T09:42:29-04:00 - -## Test 9: Explicit offset should be transformed to configured timezone -query P -SELECT to_timestamp('2020-09-08T13:42:29+05:00'); ----- -2020-09-08T04:42:29-04:00 - -## Test 10: Check arrow_typeof returns timstamp in configured timezone -## Result should be Timestamp(Nanosecond, "America/New_York") regardless of input timezone -query T -SELECT arrow_typeof(to_timestamp('2020-09-08T13:42:29Z')); ----- -Timestamp(ns, "America/New_York") - -## Test 11: Configure to offset-based timezone -statement ok -SET datafusion.execution.time_zone = '+05:30'; - -## Test 12: Naive timestamp with offset-based configured timezone -query P -SELECT to_timestamp('2020-09-08T13:42:29'); ----- -2020-09-08T13:42:29+05:30 - -## Test 13: Reset to None -statement ok -RESET datafusion.execution.time_zone - -## Test 14: Naive timestamp -query P -SELECT to_timestamp('2020-09-08T13:42:29'); ----- -2020-09-08T13:42:29 - -query P -SELECT to_timestamp('2020-09-08T13:42:29Z'); ----- -2020-09-08T13:42:29 - -query P -SELECT to_timestamp('2020-09-08T13:42:29+05:00'); ----- -2020-09-08T08:42:29 - -statement ok -SET datafusion.execution.time_zone = 'America/New_York'; - -## Test 15: to_timestamp with format string - naive timestamp with session timezone - -query P -SELECT to_timestamp('2020-09-08 13:42:29', '%Y-%m-%d %H:%M:%S'); ----- -2020-09-08T13:42:29-04:00 - -## Test 16: to_timestamp with format string - explicit timezone should be respected -statement ok -SET datafusion.execution.time_zone = 'UTC'; - -query P -SELECT to_timestamp('2020-09-08 13:42:29 +0000', '%Y-%m-%d %H:%M:%S %z'); ----- -2020-09-08T13:42:29Z - -query P -SELECT to_timestamp('2020-09-08 13:42:29 America/Toronto', '%Y-%m-%d %H:%M:%S %Z'); ----- -2020-09-08T17:42:29Z - -query error Error parsing timestamp from '2020-09-08 13:42:29America/Toronto' using format '%Y-%m-%d %H:%M:%S%Z': '%Z' is only supported at the end of the format string preceded by a space -SELECT to_timestamp('2020-09-08 13:42:29America/Toronto', '%Y-%m-%d %H:%M:%S%Z'); - -## Test 17: Test all precision variants respect timezone -statement ok -SET datafusion.execution.time_zone = 'America/New_York'; - -## to_timestamp_seconds -query P -SELECT to_timestamp_seconds('2020-09-08T13:42:29'); ----- -2020-09-08T13:42:29-04:00 - -## to_timestamp_millis -query P -SELECT to_timestamp_millis('2020-09-08T13:42:29.123'); ----- -2020-09-08T13:42:29.123-04:00 - -## to_timestamp_micros -query P -SELECT to_timestamp_micros('2020-09-08T13:42:29.123456'); ----- -2020-09-08T13:42:29.123456-04:00 - -## to_timestamp_nanos -query P -SELECT to_timestamp_nanos('2020-09-08T13:42:29.123456789'); ----- -2020-09-08T13:42:29.123456789-04:00 - -## test integers -query T -select arrow_typeof(to_timestamp_seconds(61)) ----- -Timestamp(s, "America/New_York") - -query T -select arrow_typeof(to_timestamp_millis(61)) ----- -Timestamp(ms, "America/New_York") - -query T -select arrow_typeof(to_timestamp_micros(61)) ----- -Timestamp(µs, "America/New_York") - -query T -select arrow_typeof(to_timestamp_nanos(61)) ----- -Timestamp(ns, "America/New_York") - -query T -select arrow_typeof(to_timestamp(61)) ----- -Timestamp(ns, "America/New_York") - -## Reset timezone for other tests -statement ok -RESET datafusion.execution.time_zone diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index fe1ed1cab6bd7..360311552f025 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2867,19 +2867,9 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `to_timestamp` -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000`) in the session time zone. Supports strings, -integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') -if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the -session time zone, or UTC if no session time zone is set. -Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. -Note: `to_timestamp` returns `Timestamp(ns, TimeZone)` where the time zone is the session time zone. The supported range -for integer input is between`-9223372037` and `9223372036`. Supported range for string input is between -`1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` -for the input outside of supported bounds. - -The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. -The time zone can be a value like +00:00, 'Europe/London' etc. +Note: `to_timestamp` returns `Timestamp(ns)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds. ```sql to_timestamp(expression[, ..., format_n]) @@ -2888,11 +2878,7 @@ to_timestamp(expression[, ..., format_n]) #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: - Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. - Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully - parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is - only supported at the end of the string preceded by a space. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. #### Example @@ -2915,14 +2901,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `to_timestamp_micros` -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000`) in the session time zone. Supports strings, -integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') -if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the -session time zone, or UTC if no session time zone is set. -Integers, unsigned integers, and doubles are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`). - -The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. -The time zone can be a value like +00:00, 'Europe/London' etc. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp. ```sql to_timestamp_micros(expression[, ..., format_n]) @@ -2931,11 +2910,7 @@ to_timestamp_micros(expression[, ..., format_n]) #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: - Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. - Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully - parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is - only supported at the end of the string preceded by a space. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. #### Example @@ -2958,14 +2933,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `to_timestamp_millis` -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000`) in the session time zone. Supports strings, -integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') -if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the -session time zone, or UTC if no session time zone is set. -Integers, unsigned integers, and doubles are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). - -The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. -The time zone can be a value like +00:00, 'Europe/London' etc. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. ```sql to_timestamp_millis(expression[, ..., format_n]) @@ -2974,11 +2942,7 @@ to_timestamp_millis(expression[, ..., format_n]) #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: - Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. - Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully - parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is - only supported at the end of the string preceded by a space. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. #### Example @@ -3001,13 +2965,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `to_timestamp_nanos` -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000`) in the session time zone. Supports strings, -integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') -if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the -session time zone. Integers, unsigned integers, and doubles are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). - -The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. -The time zone can be a value like +00:00, 'Europe/London' etc. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. ```sql to_timestamp_nanos(expression[, ..., format_n]) @@ -3016,11 +2974,7 @@ to_timestamp_nanos(expression[, ..., format_n]) #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: - Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. - Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully - parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is - only supported at the end of the string preceded by a space. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. #### Example @@ -3043,14 +2997,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `to_timestamp_seconds` -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00`) in the session time zone. Supports strings, -integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') -if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the -session time zone, or UTC if no session time zone is set. -Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). - -The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`. -The time zone can be a value like +00:00, 'Europe/London' etc. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. ```sql to_timestamp_seconds(expression[, ..., format_n]) @@ -3059,11 +3006,7 @@ to_timestamp_seconds(expression[, ..., format_n]) #### Arguments - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: - Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. - Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully - parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is - only supported at the end of the string preceded by a space. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. #### Example