diff --git a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala index 302b4aa603d3..4c877bc53d60 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala @@ -496,12 +496,17 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { } } - // FIXME: Ignored: https://github.com/apache/incubator-gluten/issues/7600. - ignore("monotonically_increasintestg_id") { + test("monotonically_increasing_id") { runQueryAndCompare("""SELECT monotonically_increasing_id(), l_orderkey | from lineitem limit 100""".stripMargin) { checkGlutenPlan[ProjectExecTransformer] } + // Multiple calls must produce independent results (issue #7628). + runQueryAndCompare( + """SELECT monotonically_increasing_id(), monotonically_increasing_id() + | from lineitem limit 100""".stripMargin) { + checkGlutenPlan[ProjectExecTransformer] + } } test("sequence function optimized by Spark constant folding") { diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index babcaf0e5f64..3a1e9888da64 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -666,6 +666,14 @@ std::unordered_map WholeStageResultIterator::getQueryC configs[velox::core::QueryConfig::kSparkPartitionId] = std::to_string(taskInfo_.partitionId); + // Disable expression deduplication for non-deterministic functions to + // match Spark semantics. Spark creates separate instances for each + // non-deterministic expression, each with independent state. Without + // this, Velox merges structurally identical non-deterministic calls + // (e.g. two monotonically_increasing_id() in the same query) into one + // shared instance, causing incorrect results. + configs[velox::core::QueryConfig::kExprDedupNonDeterministic] = "false"; + // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY' // or 'legacy' if (veloxCfg_->get(kSparkLegacyTimeParserPolicy, "") == "LEGACY") { diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala index b13aced2a62c..b190419e4b27 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala @@ -290,6 +290,7 @@ object ExpressionMappings { Sig[MakeDecimal](MAKE_DECIMAL), Sig[PromotePrecision](PROMOTE_PRECISION), Sig[SparkPartitionID](SPARK_PARTITION_ID), + Sig[MonotonicallyIncreasingID](MONOTONICALLY_INCREASING_ID), Sig[AtLeastNNonNulls](AT_LEAST_N_NON_NULLS), Sig[WidthBucket](WIDTH_BUCKET), Sig[ReplicateRows](REPLICATE_ROWS),