Skip to content

Commit 7c5eb20

Browse files
Merge branch 'apache:main' into main
2 parents c3aadf9 + 28bd4bc commit 7c5eb20

File tree

40 files changed

+1251
-343
lines changed

40 files changed

+1251
-343
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
name: Label new issues with requires-triage
19+
20+
on:
21+
issues:
22+
types: [opened]
23+
24+
permissions:
25+
issues: write
26+
27+
jobs:
28+
add-triage-label:
29+
runs-on: ubuntu-latest
30+
steps:
31+
- uses: actions/github-script@v7
32+
with:
33+
script: |
34+
await github.rest.issues.addLabels({
35+
owner: context.repo.owner,
36+
repo: context.repo.repo,
37+
issue_number: context.issue.number,
38+
labels: ['requires-triage']
39+
})

common/src/main/scala/org/apache/comet/CometConf.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,20 @@ object CometConf extends ShimCometConf {
798798
.longConf
799799
.createWithDefault(3000L)
800800

801+
val COMET_METRICS_ENABLED: ConfigEntry[Boolean] =
802+
conf("spark.comet.metrics.enabled")
803+
.category(CATEGORY_EXEC)
804+
.doc(
805+
"Whether to enable Comet metrics reporting through Spark's external monitoring system. " +
806+
"When enabled, Comet exposes metrics such as native operators, Spark operators, " +
807+
"queries planned, transitions, and acceleration ratio. These metrics can be " +
808+
"visualized through tools like Grafana when a metrics sink (e.g., Prometheus) is " +
809+
"configured. Disabled by default because Spark plan traversal adds overhead and " +
810+
"metrics require a sink to be useful. " +
811+
"This config must be set before the SparkSession is created to take effect.")
812+
.booleanConf
813+
.createWithDefault(false)
814+
801815
val COMET_LIBHDFS_SCHEMES_KEY = "fs.comet.libhdfs.schemes"
802816

803817
val COMET_LIBHDFS_SCHEMES: OptionalConfigEntry[String] =

dev/diffs/3.5.8.diff

Lines changed: 48 additions & 300 deletions
Large diffs are not rendered by default.

dev/diffs/4.0.1.diff

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,21 @@ index aa3d02dc2fb..c4f878d9908 100644
245245
-- Test cases with unicode_rtrim.
246246
WITH t(c1) AS (SELECT replace(listagg(DISTINCT col1 COLLATE unicode_rtrim) COLLATE utf8_binary, ' ', '') FROM (VALUES ('xbc '), ('xbc '), ('a'), ('xbc'))) SELECT len(c1), regexp_count(c1, 'a'), regexp_count(c1, 'xbc') FROM t;
247247
WITH t(c1) AS (SELECT listagg(col1) WITHIN GROUP (ORDER BY col1 COLLATE unicode_rtrim) FROM (VALUES ('abc '), ('abc\n'), ('abc'), ('x'))) SELECT replace(replace(c1, ' ', ''), '\n', '$') FROM t;
248+
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
249+
index 0000000..0000000 100644
250+
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
251+
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
252+
@@ -6,6 +6,10 @@
253+
-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L352-L605
254+
255+
-- Test aggregate operator with codegen on and off.
256+
+
257+
+-- Floating-point precision difference between DataFusion and JVM for FILTER aggregates
258+
+--SET spark.comet.enabled = false
259+
+
260+
--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
261+
--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
262+
--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
248263
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
249264
index 3a409eea348..26e9aaf215c 100644
250265
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql

dev/ensure-jars-have-correct-contents.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ allowed_expr+="|^org/apache/spark/sql/$"
9393
allowed_expr+="|^org/apache/spark/sql/ExtendedExplainGenerator.*$"
9494
allowed_expr+="|^org/apache/spark/CometPlugin.class$"
9595
allowed_expr+="|^org/apache/spark/CometDriverPlugin.*$"
96+
allowed_expr+="|^org/apache/spark/CometSource.*$"
9697
allowed_expr+="|^org/apache/spark/CometTaskMemoryManager.class$"
9798
allowed_expr+="|^org/apache/spark/CometTaskMemoryManager.*$"
9899
allowed_expr+="|^scala-collection-compat.properties$"
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
<!---
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
# Bug Triage Guide
21+
22+
This guide describes how we prioritize and triage bugs in the Comet project. The goal is to ensure
23+
that the most impactful bugs — especially correctness issues that produce wrong results — are
24+
identified and addressed before less critical issues.
25+
26+
## Priority Labels
27+
28+
Every bug should have exactly one priority label. When filing or triaging a bug, apply the
29+
appropriate label from the table below.
30+
31+
| Label | Color | Description | Examples |
32+
| ------------------- | ------ | ------------------------------------------------------------------------------------ | --------------------------------------------------------------------- |
33+
| `priority:critical` | Red | Data corruption, silent wrong results, security vulnerabilities | Wrong aggregation results, FFI data corruption, incorrect cast output |
34+
| `priority:high` | Orange | Crashes, panics, segfaults, major functional breakage affecting production workloads | Native engine panic, JVM segfault, NPE on supported code path |
35+
| `priority:medium` | Yellow | Functional bugs, performance regressions, broken features that have workarounds | Missing expression support, writer feature gaps, excessive spilling |
36+
| `priority:low` | Green | Minor issues, test-only failures, tooling, CI flakes, cosmetic issues | Flaky CI test, build script edge case, documentation generator bug |
37+
38+
### How to Choose a Priority
39+
40+
Use this decision tree:
41+
42+
1. **Can this bug cause silent wrong results?** If yes → `priority:critical`. These are the most
43+
dangerous bugs because users may not notice the incorrect output.
44+
2. **Does this bug crash the JVM or native engine?** If yes → `priority:high`. Crashes are
45+
disruptive but at least visible to the user.
46+
3. **Does this bug break a feature or cause significant performance degradation?** If yes →
47+
`priority:medium`. The user can work around it (e.g., falling back to Spark) but it impacts
48+
the value of Comet.
49+
4. **Everything else**`priority:low`. Test failures, CI issues, tooling, and cosmetic problems.
50+
51+
### Escalation Triggers
52+
53+
A bug should be escalated to a higher priority if:
54+
55+
- A `priority:high` crash is discovered to also produce wrong results silently in some cases →
56+
escalate to `priority:critical`
57+
- A `priority:medium` bug is reported by multiple users or affects a common workload → consider
58+
escalating to `priority:high`
59+
- A `priority:low` CI flake is blocking PR merges consistently → escalate to `priority:medium`
60+
61+
## Area Labels
62+
63+
Area labels indicate which subsystem is affected. A bug may have multiple area labels. These
64+
help contributors find bugs in their area of expertise.
65+
66+
| Label | Description |
67+
| ------------------ | ----------------------------------------- |
68+
| `area:writer` | Native writer (Parquet and other formats) |
69+
| `area:shuffle` | Shuffle (JVM and native) |
70+
| `area:aggregation` | Hash aggregates, aggregate expressions |
71+
| `area:scan` | Data source scan (Parquet, CSV, Iceberg) |
72+
| `area:expressions` | Expression evaluation |
73+
| `area:ffi` | Arrow FFI / JNI boundary |
74+
| `area:ci` | CI/CD, GitHub Actions, build tooling |
75+
76+
The following pre-existing labels also serve as area indicators: `native_datafusion`,
77+
`native_iceberg_compat`, `spark 4`, `spark sql tests`.
78+
79+
## Triage Process
80+
81+
Every new issue is automatically labeled with `requires-triage` when it is opened. This makes it
82+
easy to find issues that have not yet been triaged by filtering on that label. Once an issue has
83+
been triaged, remove the `requires-triage` label and apply the appropriate priority and area labels.
84+
85+
### For New Issues
86+
87+
When a new bug is filed:
88+
89+
1. **Reproduce or verify** the issue if possible. If the report lacks reproduction steps, ask
90+
the reporter for more details.
91+
2. **Assess correctness impact first.** Ask: "Could this produce wrong results silently?" This
92+
is more important than whether it crashes.
93+
3. **Apply a priority label** using the decision tree above.
94+
4. **Apply area labels** to indicate the affected subsystem(s).
95+
5. **Apply `good first issue`** if the fix is likely straightforward and well-scoped.
96+
6. **Remove the `requires-triage` label** to indicate triage is complete.
97+
98+
### For Existing Bugs
99+
100+
Periodically review open bugs to ensure priorities are still accurate:
101+
102+
- Has a `priority:medium` bug been open for a long time with user reports? Consider escalating.
103+
- Has a `priority:high` bug been fixed by a related change? Close it.
104+
- Are there clusters of related bugs that should be tracked under an EPIC?
105+
106+
### Prioritization Principles
107+
108+
1. **Correctness over crashes.** A bug that silently returns wrong results is worse than one that
109+
crashes, because crashes are at least visible.
110+
2. **User-reported over test-only.** A bug hit by a real user on a real workload takes priority
111+
over one found only in test suites.
112+
3. **Core path over experimental.** Bugs in the default scan mode (`native_comet`) or widely-used
113+
expressions take priority over bugs in experimental features like `native_datafusion` or
114+
`native_iceberg_compat`.
115+
4. **Production safety over feature completeness.** Fixing a data corruption bug is more important
116+
than adding support for a new expression.
117+
118+
## Common Bug Categories
119+
120+
### Correctness Bugs (`priority:critical`)
121+
122+
These are bugs where Comet produces different results than Spark without any error or warning.
123+
Examples include:
124+
125+
- Incorrect cast behavior (e.g., negative zero to string)
126+
- Aggregate functions ignoring configuration (e.g., `ignoreNulls`)
127+
- Data corruption in FFI boundary (e.g., boolean arrays with non-zero offset)
128+
- Type mismatches between partial and final aggregation stages
129+
130+
When fixing correctness bugs, always add a regression test that verifies the output matches Spark.
131+
132+
### Crash Bugs (`priority:high`)
133+
134+
These are bugs where the native engine panics, segfaults, or throws an unhandled exception.
135+
Common patterns include:
136+
137+
- **All-scalar inputs:** Some expressions assume at least one columnar input and panic when all
138+
inputs are literals (e.g., when `ConstantFolding` is disabled)
139+
- **Type mismatches:** Downcasting to the wrong Arrow array type
140+
- **Memory safety:** FFI boundary issues, unaligned arrays, GlobalRef lifecycle
141+
142+
### Aggregate Planning Bugs
143+
144+
Several bugs relate to how Comet plans hash aggregates across stage boundaries. The key issue is
145+
that Spark's AQE may materialize a Comet partial aggregate but then run the final aggregate in
146+
Spark (or vice versa), and the intermediate formats may not be compatible. See the
147+
[EPIC #2892](https://github.com/apache/datafusion-comet/issues/2892) for the full picture.
148+
149+
### Native Writer Bugs
150+
151+
The native Parquet writer has a cluster of known test failures tracked as individual issues
152+
(#3417#3430). These are lower priority since the native writer is still maturing, but they
153+
should be addressed before the writer is promoted to production-ready status.
154+
155+
## How to Help with Triage
156+
157+
Triage is a valuable contribution that doesn't require writing code. You can help by:
158+
159+
- Reviewing new issues and suggesting a priority label
160+
- Reproducing reported bugs and adding details
161+
- Identifying duplicate issues
162+
- Linking related issues together
163+
- Testing whether old bugs have been fixed by recent changes

docs/source/contributor-guide/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ Profiling Native Code <profiling_native_code>
3939
Spark SQL Tests <spark-sql-tests.md>
4040
Iceberg Spark Tests <iceberg-spark-tests.md>
4141
SQL File Tests <sql-file-tests.md>
42+
Bug Triage <bug_triage>
4243
Roadmap <roadmap.md>
4344
Release Process <release_process>
4445
Github and Issue Tracker <https://github.com/apache/datafusion-comet>

native/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

native/core/src/execution/planner.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -973,14 +973,28 @@ impl PhysicalPlanner {
973973
.map(|expr| self.create_agg_expr(expr, Arc::clone(&schema)))
974974
.collect();
975975

976-
let num_agg = agg.agg_exprs.len();
977976
let aggr_expr = agg_exprs?.into_iter().map(Arc::new).collect();
977+
978+
// Build per-aggregate filter expressions from the FILTER (WHERE ...) clause.
979+
// Filters are only present in Partial mode; Final/PartialMerge always get None.
980+
let filter_exprs: Result<Vec<Option<Arc<dyn PhysicalExpr>>>, ExecutionError> = agg
981+
.agg_exprs
982+
.iter()
983+
.map(|expr| {
984+
if let Some(f) = expr.filter.as_ref() {
985+
self.create_expr(f, Arc::clone(&schema)).map(Some)
986+
} else {
987+
Ok(None)
988+
}
989+
})
990+
.collect();
991+
978992
let aggregate: Arc<dyn ExecutionPlan> = Arc::new(
979993
datafusion::physical_plan::aggregates::AggregateExec::try_new(
980994
mode,
981995
group_by,
982996
aggr_expr,
983-
vec![None; num_agg], // no filter expressions
997+
filter_exprs?,
984998
Arc::clone(&child.native_plan),
985999
Arc::clone(&schema),
9861000
)?,

native/proto/src/proto/expr.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ message AggExpr {
141141
BloomFilterAgg bloomFilterAgg = 16;
142142
}
143143

144+
// Optional filter expression for SQL FILTER (WHERE ...) clause.
145+
// Only set in Partial aggregation mode; absent in Final/PartialMerge.
146+
optional Expr filter = 89;
147+
144148
// Optional QueryContext for error reporting (contains SQL text and position)
145149
optional QueryContext query_context = 90;
146150

0 commit comments

Comments
 (0)