From 5c458099a2b77443ab79f9c5a5ab2f303a53e2e5 Mon Sep 17 00:00:00 2001
From: Allen Xu <allxu@nvidia.com>
Date: Wed, 11 Mar 2026 17:07:39 +0800
Subject: [PATCH 1/2] [AutoSparkUT] Fix GpuScalar to preserve -0.0 for
 float/double (issue #14116)

cuDF's Scalar.fromDouble(-0.0) normalizes -0.0 to 0.0, losing the sign
bit. This caused GPU scalar subqueries to return 0.0 where CPU correctly
returns -0.0, violating GPU-CPU parity.

Root cause: the JNI path Scalar.fromDouble -> makeFloat64Scalar drops
the IEEE 754 sign bit of negative zero during scalar creation.

Fix: in GpuScalar.from(), create float/double scalars via a 1-element
ColumnVector + getScalarElement(0) instead of Scalar.fromDouble/fromFloat.
The column-based path preserves the exact bit pattern.

This re-enables the previously excluded test "normalize special floating
numbers in subquery" in RapidsSQLQuerySuite.

Closes #14116

Signed-off-by: Allen Xu <allxu@nvidia.com>
Made-with: Cursor
---
 .../com/nvidia/spark/rapids/literals.scala    | 19 ++++++++++++++++---
 .../sql/rapids/utils/RapidsTestSettings.scala |  1 -
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/literals.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/literals.scala
index fc6566dc222..c6ec1b80d7d 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/literals.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/literals.scala
@@ -280,8 +280,16 @@ object GpuScalar extends Logging {
         s" for LongType, expecting Long, or Int.")
     }
     case DoubleType => v match {
-        case d: Double => Scalar.fromDouble(d)
-        case f: Float => Scalar.fromDouble(f.toDouble)
+        case d: Double =>
+          // cuDF Scalar.fromDouble normalizes -0.0 to 0.0 (see #14116).
+          // Create via a 1-element column to preserve the exact bit pattern.
+          withResource(ColumnVector.fromDoubles(d)) { cv =>
+            cv.getScalarElement(0)
+          }
+        case f: Float =>
+          withResource(ColumnVector.fromDoubles(f.toDouble)) { cv =>
+            cv.getScalarElement(0)
+          }
         case _ => throw new IllegalArgumentException(s"'$v: ${v.getClass}' is not supported" +
           s" for DoubleType, expecting Double or Float.")
     }
@@ -314,7 +322,12 @@ object GpuScalar extends Logging {
         s" for DateType, expecting Int or LocalDate")
     }
     case FloatType => v match {
-      case f: Float => Scalar.fromFloat(f)
+      case f: Float =>
+        // cuDF Scalar.fromFloat normalizes -0.0f to 0.0f (see #14116).
+        // Create via a 1-element column to preserve the exact bit pattern.
+        withResource(ColumnVector.fromFloats(f)) { cv =>
+          cv.getScalarElement(0)
+        }
       case _ => throw new IllegalArgumentException(s"'$v: ${v.getClass}' is not supported" +
         s" for FloatType, expecting Float.")
     }
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index 085d823d383..2cf300092bf 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -226,7 +226,6 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("SPARK-17515: CollectLimit.execute() should perform per-partition limits", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/14109"))
     .exclude("SPARK-19650: An action on a Command should not trigger a Spark job", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/14110"))
     .exclude("SPARK-31594: Do not display the seed of rand/randn with no argument in output schema", ADJUST_UT("Replaced by testRapids version with a correct regex expression to match the projectExplainOutput, randn isn't supported now. See https://github.com/NVIDIA/spark-rapids/issues/11613"))
-    .exclude("normalize special floating numbers in subquery", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/14116"))
     .exclude("SPARK-33677: LikeSimplification should be skipped if pattern contains any escapeChar", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/14117"))
     .exclude("SPARK-33593: Vector reader got incorrect data with binary partition value", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/14118"))
     .exclude("SPARK-33084: Add jar support Ivy URI in SQL -- jar contains udf class", ADJUST_UT("Replaced by testRapids version that uses testFile() to access Spark test resources instead of getContextClassLoader"))

From dbc46f01651f51342d4f51a8bf889d8477496795 Mon Sep 17 00:00:00 2001
From: Allen Xu <allxu@nvidia.com>
Date: Thu, 12 Mar 2026 14:38:25 +0800
Subject: [PATCH 2/2] Optimize -0.0 workaround: only use ColumnVector path for
 negative zero

Address review feedback: instead of routing all float/double scalar
creation through ColumnVector (which allocates device memory), detect
-0.0 via raw bit comparison and only use the slow path for that specific
value. All other values continue to use the fast Scalar.fromDouble/
Scalar.fromFloat path, making the common-case cost zero.

Signed-off-by: Allen Xu <allxu@nvidia.com>
Made-with: Cursor
Signed-off-by: Allen Xu <allxu@nvidia.com>
---
 .../com/nvidia/spark/rapids/literals.scala    | 31 +++++++++++++------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/literals.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/literals.scala
index c6ec1b80d7d..cb2e2101f41 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/literals.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/literals.scala
@@ -280,18 +280,26 @@ object GpuScalar extends Logging {
         s" for LongType, expecting Long, or Int.")
     }
     case DoubleType => v match {
-        case d: Double =>
-          // cuDF Scalar.fromDouble normalizes -0.0 to 0.0 (see #14116).
-          // Create via a 1-element column to preserve the exact bit pattern.
+      case d: Double =>
+        // cuDF Scalar.fromDouble normalizes -0.0 to 0.0 (see #14116).
+        if (JDouble.doubleToRawLongBits(d) == JDouble.doubleToRawLongBits(-0.0d)) {
           withResource(ColumnVector.fromDoubles(d)) { cv =>
             cv.getScalarElement(0)
           }
-        case f: Float =>
-          withResource(ColumnVector.fromDoubles(f.toDouble)) { cv =>
+        } else {
+          Scalar.fromDouble(d)
+        }
+      case f: Float =>
+        val d = f.toDouble
+        if (JDouble.doubleToRawLongBits(d) == JDouble.doubleToRawLongBits(-0.0d)) {
+          withResource(ColumnVector.fromDoubles(d)) { cv =>
             cv.getScalarElement(0)
           }
-        case _ => throw new IllegalArgumentException(s"'$v: ${v.getClass}' is not supported" +
-          s" for DoubleType, expecting Double or Float.")
+        } else {
+          Scalar.fromDouble(d)
+        }
+      case _ => throw new IllegalArgumentException(s"'$v: ${v.getClass}' is not supported" +
+        s" for DoubleType, expecting Double or Float.")
     }
     case TimestampType => v match {
       // Usually the timestamp will be used by the `add/sub` operators for date/time related
@@ -324,9 +332,12 @@ object GpuScalar extends Logging {
     case FloatType => v match {
       case f: Float =>
         // cuDF Scalar.fromFloat normalizes -0.0f to 0.0f (see #14116).
-        // Create via a 1-element column to preserve the exact bit pattern.
-        withResource(ColumnVector.fromFloats(f)) { cv =>
-          cv.getScalarElement(0)
+        if (JFloat.floatToRawIntBits(f) == JFloat.floatToRawIntBits(-0.0f)) {
+          withResource(ColumnVector.fromFloats(f)) { cv =>
+            cv.getScalarElement(0)
+          }
+        } else {
+          Scalar.fromFloat(f)
         }
       case _ => throw new IllegalArgumentException(s"'$v: ${v.getClass}' is not supported" +
         s" for FloatType, expecting Float.")