Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions docs/compatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -753,12 +753,6 @@ The following formats/patterns are supported on the GPU. Timezone of UTC is assu
| `"tomorrow"` | Yes |
| `"yesterday"` | Yes |

### String to Timestamp
GPU Aligns to Spark except a known case which is actually a Spark bug.
Spark 35x supports the following case(spaces + Thh:mm:ss) while Spark 400 does not:
cast(' T00:00:00' as timestamp)
For more details, refer to [bug link](https://github.com/NVIDIA/spark-rapids-jni/issues/3401)

### Constant Folding

ConstantFolding is an operator optimization rule in Catalyst that replaces expressions that can
Expand Down
20 changes: 1 addition & 19 deletions integration_tests/src/main/python/cast_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,7 @@ def _gen_df(spark):
("T23:17:50",),
("T23:17:50",),
("T23:17:50",),
(" \r\n\tT23:17:50",), # This is testing issue: https://github.com/NVIDIA/spark-rapids-jni/issues/3401
("T23:17:50 \r\n\t",),
("T00",),
("T1:2",),
Expand Down Expand Up @@ -963,25 +964,6 @@ def _query(spark):
assert_gpu_and_cpu_are_equal_collect(lambda spark: _query(spark))


# Spark 400 and DB35 can not handle pattern: left spaces + Thh:mm:ss, refer to the bug link
@pytest.mark.skipif(is_spark_400_or_later() or is_databricks_version_or_later(14, 3),
reason="https://github.com/NVIDIA/spark-rapids-jni/issues/3401")
def test_cast_string_to_timestamp_for_just_time_spaces_leading():
def _gen_df(spark):
return spark.createDataFrame(
[
(" \r\n\tT23:17:50 \r\n\t",),
(" \r\n\tT23:17:50",),
],
'str_col string')

def _query(spark):
spark._jvm.com.nvidia.spark.rapids.jni.GpuTimeZoneDB.cacheDatabase(2200)
return _gen_df(spark).selectExpr("cast(str_col as timestamp)")

assert_gpu_and_cpu_are_equal_collect(lambda spark: _query(spark))


def test_cast_string_to_timestamp_valid_just_time_with_timezone():
# For the just time strings, will get current date to fill the missing date.
# E.g.: "T00:00:00" will be "2025-05-23T00:00:00"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1370,8 +1370,8 @@ object GpuCast {
defaultTimeZone: Option[String] = Option.empty[String]): ColumnVector = {
val tz = defaultTimeZone.getOrElse("Z")
val normalizedTZ = ZoneId.of(tz, ZoneId.SHORT_IDS).normalized().toString
val isSpark320 = VersionUtils.cmpSparkVersion(3, 2, 0) == 0
closeOnExcept(CastStrings.toTimestamp(input, normalizedTZ, ansiMode, isSpark320)) { result =>
val versionForJni = VersionUtils.getVersionForJni
closeOnExcept(CastStrings.toTimestamp(input, normalizedTZ, ansiMode, versionForJni)) { result =>
if (ansiMode && result == null) {
throw new DateTimeException("One or more values is not a valid timestamp")
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

package com.nvidia.spark.rapids

import com.nvidia.spark.rapids.jni.{Version => VersionForJni, Platform => PlatformForJni}

object VersionUtils {

lazy val isSpark320OrLater: Boolean = cmpSparkVersion(3, 2, 0) >= 0
Expand Down Expand Up @@ -43,4 +45,20 @@ object VersionUtils {
val sparkFullVersion = ((sparkMajor.toLong * 1000) + sparkMinor) * 1000 + sparkBugfix
sparkFullVersion.compareTo(fullVersion)
}

/**
* Get the version used by JNI interface
* Must use `com.nvidia.spark.rapids.jni.Version` in the JNI interface
*/
def getVersionForJni: VersionForJni = {
val sparkShimVersion = ShimLoader.getShimVersion
sparkShimVersion match {
case SparkShimVersion(a, b, c) =>
new VersionForJni(PlatformForJni.SPARK, a, b, c)
case DatabricksShimVersion(a, b, c, _) =>
new VersionForJni(PlatformForJni.DATABRICKS, a, b, c)
case ClouderaShimVersion(a, b, c, _) =>
new VersionForJni(PlatformForJni.CLOUDERA, a, b, c)
}
}
}
Loading