martin-augment
diff --git a/‎.github/workflows/label_new_issues.yml‎
Lines changed: 39 additions & 0 deletions b/‎.github/workflows/label_new_issues.yml‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 14 additions & 0 deletions b/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎dev/diffs/3.5.8.diff‎
Lines changed: 48 additions & 300 deletions b/‎dev/diffs/3.5.8.diff‎
Lines changed: 48 additions & 300 deletions
diff --git a/‎dev/diffs/4.0.1.diff‎
Lines changed: 15 additions & 0 deletions b/‎dev/diffs/4.0.1.diff‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎dev/ensure-jars-have-correct-contents.sh‎
Lines changed: 1 addition & 0 deletions b/‎dev/ensure-jars-have-correct-contents.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/contributor-guide/bug_triage.md‎
Lines changed: 163 additions & 0 deletions b/‎docs/source/contributor-guide/bug_triage.md‎
Lines changed: 163 additions & 0 deletions
diff --git a/‎docs/source/contributor-guide/index.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/contributor-guide/index.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎native/Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎native/Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎native/core/src/execution/planner.rs‎
Lines changed: 16 additions & 2 deletions b/‎native/core/src/execution/planner.rs‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎native/proto/src/proto/expr.proto‎
Lines changed: 4 additions & 0 deletions b/‎native/proto/src/proto/expr.proto‎
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Label new issues with requires-triage
+
+on:
+  issues:
+    types: [opened]
+
+permissions:
+  issues: write
+
+jobs:
+  add-triage-label:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              labels: ['requires-triage']
+            })
@@ -798,6 +798,20 @@ object CometConf extends ShimCometConf {
       .longConf
       .createWithDefault(3000L)
 
+  val COMET_METRICS_ENABLED: ConfigEntry[Boolean] =
+    conf("spark.comet.metrics.enabled")
+      .category(CATEGORY_EXEC)
+      .doc(
+        "Whether to enable Comet metrics reporting through Spark's external monitoring system. " +
+          "When enabled, Comet exposes metrics such as native operators, Spark operators, " +
+          "queries planned, transitions, and acceleration ratio. These metrics can be " +
+          "visualized through tools like Grafana when a metrics sink (e.g., Prometheus) is " +
+          "configured. Disabled by default because Spark plan traversal adds overhead and " +
+          "metrics require a sink to be useful. " +
+          "This config must be set before the SparkSession is created to take effect.")
+      .booleanConf
+      .createWithDefault(false)
+
   val COMET_LIBHDFS_SCHEMES_KEY = "fs.comet.libhdfs.schemes"
 
   val COMET_LIBHDFS_SCHEMES: OptionalConfigEntry[String] =
 
@@ -245,6 +245,21 @@ index aa3d02dc2fb..c4f878d9908 100644
  -- Test cases with unicode_rtrim.
  WITH t(c1) AS (SELECT replace(listagg(DISTINCT col1 COLLATE unicode_rtrim) COLLATE utf8_binary, ' ', '') FROM (VALUES ('xbc  '), ('xbc '), ('a'), ('xbc'))) SELECT len(c1), regexp_count(c1, 'a'), regexp_count(c1, 'xbc') FROM t;
  WITH t(c1) AS (SELECT listagg(col1) WITHIN GROUP (ORDER BY col1 COLLATE unicode_rtrim) FROM (VALUES ('abc '), ('abc\n'), ('abc'), ('x'))) SELECT replace(replace(c1, ' ', ''), '\n', '$') FROM t;
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
+index 0000000..0000000 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
+@@ -6,6 +6,10 @@
+ -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L352-L605
+
+ -- Test aggregate operator with codegen on and off.
++
++-- Floating-point precision difference between DataFusion and JVM for FILTER aggregates
++--SET spark.comet.enabled = false
++
+ --CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+ --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+ --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
 diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
 index 3a409eea348..26e9aaf215c 100644
 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
 
@@ -93,6 +93,7 @@ allowed_expr+="|^org/apache/spark/sql/$"
 allowed_expr+="|^org/apache/spark/sql/ExtendedExplainGenerator.*$"
 allowed_expr+="|^org/apache/spark/CometPlugin.class$"
 allowed_expr+="|^org/apache/spark/CometDriverPlugin.*$"
+allowed_expr+="|^org/apache/spark/CometSource.*$"
 allowed_expr+="|^org/apache/spark/CometTaskMemoryManager.class$"
 allowed_expr+="|^org/apache/spark/CometTaskMemoryManager.*$"
 allowed_expr+="|^scala-collection-compat.properties$"
 
@@ -0,0 +1,163 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Bug Triage Guide
+
+This guide describes how we prioritize and triage bugs in the Comet project. The goal is to ensure
+that the most impactful bugs — especially correctness issues that produce wrong results — are
+identified and addressed before less critical issues.
+
+## Priority Labels
+
+Every bug should have exactly one priority label. When filing or triaging a bug, apply the
+appropriate label from the table below.
+
+| Label               | Color  | Description                                                                          | Examples                                                              |
+| ------------------- | ------ | ------------------------------------------------------------------------------------ | --------------------------------------------------------------------- |
+| `priority:critical` | Red    | Data corruption, silent wrong results, security vulnerabilities                      | Wrong aggregation results, FFI data corruption, incorrect cast output |
+| `priority:high`     | Orange | Crashes, panics, segfaults, major functional breakage affecting production workloads | Native engine panic, JVM segfault, NPE on supported code path         |
+| `priority:medium`   | Yellow | Functional bugs, performance regressions, broken features that have workarounds      | Missing expression support, writer feature gaps, excessive spilling   |
+| `priority:low`      | Green  | Minor issues, test-only failures, tooling, CI flakes, cosmetic issues                | Flaky CI test, build script edge case, documentation generator bug    |
+
+### How to Choose a Priority
+
+Use this decision tree:
+
+1. **Can this bug cause silent wrong results?** If yes → `priority:critical`. These are the most
+   dangerous bugs because users may not notice the incorrect output.
+2. **Does this bug crash the JVM or native engine?** If yes → `priority:high`. Crashes are
+   disruptive but at least visible to the user.
+3. **Does this bug break a feature or cause significant performance degradation?** If yes →
+   `priority:medium`. The user can work around it (e.g., falling back to Spark) but it impacts
+   the value of Comet.
+4. **Everything else** → `priority:low`. Test failures, CI issues, tooling, and cosmetic problems.
+
+### Escalation Triggers
+
+A bug should be escalated to a higher priority if:
+
+- A `priority:high` crash is discovered to also produce wrong results silently in some cases →
+  escalate to `priority:critical`
+- A `priority:medium` bug is reported by multiple users or affects a common workload → consider
+  escalating to `priority:high`
+- A `priority:low` CI flake is blocking PR merges consistently → escalate to `priority:medium`
+
+## Area Labels
+
+Area labels indicate which subsystem is affected. A bug may have multiple area labels. These
+help contributors find bugs in their area of expertise.
+
+| Label              | Description                               |
+| ------------------ | ----------------------------------------- |
+| `area:writer`      | Native writer (Parquet and other formats) |
+| `area:shuffle`     | Shuffle (JVM and native)                  |
+| `area:aggregation` | Hash aggregates, aggregate expressions    |
+| `area:scan`        | Data source scan (Parquet, CSV, Iceberg)  |
+| `area:expressions` | Expression evaluation                     |
+| `area:ffi`         | Arrow FFI / JNI boundary                  |
+| `area:ci`          | CI/CD, GitHub Actions, build tooling      |
+
+The following pre-existing labels also serve as area indicators: `native_datafusion`,
+`native_iceberg_compat`, `spark 4`, `spark sql tests`.
+
+## Triage Process
+
+Every new issue is automatically labeled with `requires-triage` when it is opened. This makes it
+easy to find issues that have not yet been triaged by filtering on that label. Once an issue has
+been triaged, remove the `requires-triage` label and apply the appropriate priority and area labels.
+
+### For New Issues
+
+When a new bug is filed:
+
+1. **Reproduce or verify** the issue if possible. If the report lacks reproduction steps, ask
+   the reporter for more details.
+2. **Assess correctness impact first.** Ask: "Could this produce wrong results silently?" This
+   is more important than whether it crashes.
+3. **Apply a priority label** using the decision tree above.
+4. **Apply area labels** to indicate the affected subsystem(s).
+5. **Apply `good first issue`** if the fix is likely straightforward and well-scoped.
+6. **Remove the `requires-triage` label** to indicate triage is complete.
+
+### For Existing Bugs
+
+Periodically review open bugs to ensure priorities are still accurate:
+
+- Has a `priority:medium` bug been open for a long time with user reports? Consider escalating.
+- Has a `priority:high` bug been fixed by a related change? Close it.
+- Are there clusters of related bugs that should be tracked under an EPIC?
+
+### Prioritization Principles
+
+1. **Correctness over crashes.** A bug that silently returns wrong results is worse than one that
+   crashes, because crashes are at least visible.
+2. **User-reported over test-only.** A bug hit by a real user on a real workload takes priority
+   over one found only in test suites.
+3. **Core path over experimental.** Bugs in the default scan mode (`native_comet`) or widely-used
+   expressions take priority over bugs in experimental features like `native_datafusion` or
+   `native_iceberg_compat`.
+4. **Production safety over feature completeness.** Fixing a data corruption bug is more important
+   than adding support for a new expression.
+
+## Common Bug Categories
+
+### Correctness Bugs (`priority:critical`)
+
+These are bugs where Comet produces different results than Spark without any error or warning.
+Examples include:
+
+- Incorrect cast behavior (e.g., negative zero to string)
+- Aggregate functions ignoring configuration (e.g., `ignoreNulls`)
+- Data corruption in FFI boundary (e.g., boolean arrays with non-zero offset)
+- Type mismatches between partial and final aggregation stages
+
+When fixing correctness bugs, always add a regression test that verifies the output matches Spark.
+
+### Crash Bugs (`priority:high`)
+
+These are bugs where the native engine panics, segfaults, or throws an unhandled exception.
+Common patterns include:
+
+- **All-scalar inputs:** Some expressions assume at least one columnar input and panic when all
+  inputs are literals (e.g., when `ConstantFolding` is disabled)
+- **Type mismatches:** Downcasting to the wrong Arrow array type
+- **Memory safety:** FFI boundary issues, unaligned arrays, GlobalRef lifecycle
+
+### Aggregate Planning Bugs
+
+Several bugs relate to how Comet plans hash aggregates across stage boundaries. The key issue is
+that Spark's AQE may materialize a Comet partial aggregate but then run the final aggregate in
+Spark (or vice versa), and the intermediate formats may not be compatible. See the
+[EPIC #2892](https://github.com/apache/datafusion-comet/issues/2892) for the full picture.
+
+### Native Writer Bugs
+
+The native Parquet writer has a cluster of known test failures tracked as individual issues
+(#3417–#3430). These are lower priority since the native writer is still maturing, but they
+should be addressed before the writer is promoted to production-ready status.
+
+## How to Help with Triage
+
+Triage is a valuable contribution that doesn't require writing code. You can help by:
+
+- Reviewing new issues and suggesting a priority label
+- Reproducing reported bugs and adding details
+- Identifying duplicate issues
+- Linking related issues together
+- Testing whether old bugs have been fixed by recent changes
@@ -39,6 +39,7 @@ Profiling Native Code <profiling_native_code>
 Spark SQL Tests <spark-sql-tests.md>
 Iceberg Spark Tests <iceberg-spark-tests.md>
 SQL File Tests <sql-file-tests.md>
+Bug Triage <bug_triage>
 Roadmap <roadmap.md>
 Release Process <release_process>
 Github and Issue Tracker <https://github.com/apache/datafusion-comet>
 
@@ -973,14 +973,28 @@ impl PhysicalPlanner {
                     .map(|expr| self.create_agg_expr(expr, Arc::clone(&schema)))
                     .collect();
 
-                let num_agg = agg.agg_exprs.len();
                 let aggr_expr = agg_exprs?.into_iter().map(Arc::new).collect();
+
+                // Build per-aggregate filter expressions from the FILTER (WHERE ...) clause.
+                // Filters are only present in Partial mode; Final/PartialMerge always get None.
+                let filter_exprs: Result<Vec<Option<Arc<dyn PhysicalExpr>>>, ExecutionError> = agg
+                    .agg_exprs
+                    .iter()
+                    .map(|expr| {
+                        if let Some(f) = expr.filter.as_ref() {
+                            self.create_expr(f, Arc::clone(&schema)).map(Some)
+                        } else {
+                            Ok(None)
+                        }
+                    })
+                    .collect();
+
                 let aggregate: Arc<dyn ExecutionPlan> = Arc::new(
                     datafusion::physical_plan::aggregates::AggregateExec::try_new(
                         mode,
                         group_by,
                         aggr_expr,
-                        vec![None; num_agg], // no filter expressions
+                        filter_exprs?,
                         Arc::clone(&child.native_plan),
                         Arc::clone(&schema),
                     )?,
 
@@ -141,6 +141,10 @@ message AggExpr {
     BloomFilterAgg bloomFilterAgg = 16;
   }
 
+  // Optional filter expression for SQL FILTER (WHERE ...) clause.
+  // Only set in Partial aggregation mode; absent in Final/PartialMerge.
+  optional Expr filter = 89;
+
   // Optional QueryContext for error reporting (contains SQL text and position)
   optional QueryContext query_context = 90;
Original file line number	Diff line number	Diff line change
`@@ -141,6 +141,10 @@ message AggExpr {`
`141`	`141`	`BloomFilterAgg bloomFilterAgg = 16;`
`142`	`142`	`}`
`143`	`143`
	`144`	`+ // Optional filter expression for SQL FILTER (WHERE ...) clause.`
	`145`	`+ // Only set in Partial aggregation mode; absent in Final/PartialMerge.`
	`146`	`+ optional Expr filter = 89;`
	`147`	`+`
`144`	`148`	`// Optional QueryContext for error reporting (contains SQL text and position)`
`145`	`149`	`optional QueryContext query_context = 90;`
`146`	`150`