Skip to content

Commit 89f816a

Browse files
cj-zhukovalamb
andauthored
Disallow order by within ordered-set aggregate functions argument lists (apache#20421)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes apache#123` indicates that this PR will close issue apache#123. --> - Closes #apache#18281. ## Rationale for this change This PR fixes incorrect handling of ordered-set aggregate syntax. <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? - Updated `datafusion/sql/src/expr/function.rs` to return a planning error when `ORDER BY` is used inline - Added test coverage in `datafusion/sqllogictest/test_files/aggregate.slt` to ensure these invalid forms now return a syntax/planning error <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? Yes, these changes are tested. <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? Yes - this is a public API change. This query: ``` select quantile_cont(col0, 0.75 order by col0) from values (1, 3), (2, 2), (3, 1) t(col0, col1); ``` now returns an error: `ORDER BY must be specified using WITHIN GROUP` <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent 3f475b1 commit 89f816a

File tree

2 files changed

+85
-16
lines changed

2 files changed

+85
-16
lines changed

datafusion/sql/src/expr/function.rs

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -542,32 +542,70 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
542542
// accept a WITHIN GROUP clause.
543543
let supports_within_group = fm.supports_within_group_clause();
544544

545-
if !within_group.is_empty() && !supports_within_group {
545+
// Built-in ordered-set aggregates must also support WITHIN GROUP
546+
let is_builtin_ordered_set = matches!(
547+
name.as_str(),
548+
"percentile_cont"
549+
| "quantile_cont"
550+
| "approx_percentile_cont"
551+
| "approx_percentile_cont_with_weight"
552+
);
553+
554+
let supports_within_group =
555+
supports_within_group || is_builtin_ordered_set;
556+
557+
let mut within_group = within_group;
558+
let mut order_by = order_by;
559+
560+
if supports_within_group
561+
&& within_group.is_empty()
562+
&& !order_by.is_empty()
563+
{
564+
// Inline ORDER BY syntax:
565+
// quantile_cont(value, percentile ORDER BY value)
566+
if args.len() >= 2 {
567+
args.remove(0);
568+
arg_names.remove(0);
569+
}
570+
571+
within_group = order_by;
572+
order_by = vec![];
573+
}
574+
575+
if !supports_within_group && !within_group.is_empty() {
546576
return plan_err!(
547577
"WITHIN GROUP is only supported for ordered-set aggregate functions"
548578
);
549579
}
550580

551-
// If the UDAF supports WITHIN GROUP, convert the ordering into
552-
// sort expressions and prepend them as unnamed function args.
553-
let order_by = if supports_within_group {
554-
let (within_group_sorts, new_args, new_arg_names) = self
555-
.extract_and_prepend_within_group_args(
581+
let order_by: Vec<SortExpr> = if supports_within_group {
582+
if !within_group.is_empty() {
583+
// WITHIN GROUP syntax
584+
let sorts = self.order_by_to_sort_expr(
556585
within_group,
557-
args,
558-
arg_names,
559586
schema,
560587
planner_context,
588+
false,
589+
None,
561590
)?;
562-
args = new_args;
563-
arg_names = new_arg_names;
564-
within_group_sorts
565-
} else {
566-
let order_by = if !order_by.is_empty() {
567-
order_by
591+
592+
if sorts.len() != 1 {
593+
return plan_err!(
594+
"Only a single ordering expression is permitted in WITHIN GROUP clause"
595+
);
596+
}
597+
598+
// Prepend ordered value expression to args
599+
let value_expr = sorts[0].expr.clone();
600+
arg_names = std::iter::once(None).chain(arg_names).collect();
601+
args = std::iter::once(value_expr).chain(args).collect();
602+
603+
sorts
568604
} else {
569-
within_group
570-
};
605+
vec![]
606+
}
607+
} else {
608+
// Normal aggregate behavior
571609
self.order_by_to_sort_expr(
572610
order_by,
573611
schema,
@@ -867,6 +905,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
867905
Ok((exprs, names))
868906
}
869907

908+
#[expect(dead_code)]
870909
fn extract_and_prepend_within_group_args(
871910
&self,
872911
within_group: Vec<OrderByExpr>,

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,36 @@ CREATE TABLE group_median_table_nullable (
125125
( 'group1', 125, 32766, 2147483646, arrow_cast(9223372036854775806,'Int64'), 100, 101, 4294967294, arrow_cast(100,'UInt64'), 3.2, 5.5, arrow_cast('NAN','Float64'), 0.0004, 0.0004 ),
126126
( 'group1', 127, 32767, 2147483647, arrow_cast(9223372036854775807,'Int64'), 255, 65535, 4294967295, 18446744073709551615, 2.2, 2.2, arrow_cast('NAN','Float64'), 0.0005, 0.0005 )
127127

128+
query R
129+
select quantile_cont(col0, 0.75 order by col0)
130+
from values (1, 3), (2, 2), (3, 1) t(col0, col1);
131+
----
132+
2.5
133+
134+
query R
135+
select quantile_cont(col0, 0.75 order by col0 desc)
136+
from values (1, 3), (2, 2), (3, 1) t(col0, col1);
137+
----
138+
1.5
139+
140+
query R
141+
select quantile_cont(0.75 order by col0)
142+
from values (1, 3), (2, 2), (3, 1) t(col0, col1);
143+
----
144+
2.5
145+
146+
query R
147+
select quantile_cont(0.75 order by col0 desc)
148+
from values (1, 3), (2, 2), (3, 1) t(col0, col1);
149+
----
150+
1.5
151+
152+
query R
153+
select quantile_cont(0.75) within group (order by col0)
154+
from values (1), (2), (3) t(col0);
155+
----
156+
2.5
157+
128158
#######
129159
# Error tests
130160
#######

0 commit comments

Comments
 (0)