diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala index 8b2193b58042..41dc79cd32aa 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala @@ -17,6 +17,7 @@ package org.apache.gluten.backendsapi.velox import org.apache.gluten.backendsapi.{BackendsApiManager, ValidatorApi} +import org.apache.gluten.config.VeloxConfig import org.apache.gluten.execution.ValidationResult import org.apache.gluten.substrait.`type`.TypeNode import org.apache.gluten.substrait.SubstraitContext @@ -104,11 +105,18 @@ class VeloxValidatorApi extends ValidatorApi { object VeloxValidatorApi { private def isPrimitiveType(dataType: DataType): Boolean = { + val enableTimestampNtzValidation = VeloxConfig.get.enableTimestampNtzValidation dataType match { case BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | StringType | BinaryType | _: DecimalType | DateType | TimestampType | YearMonthIntervalType.DEFAULT | NullType => true + case dt + if !enableTimestampNtzValidation && + dt.getClass.getSimpleName == "TimestampNTZType" => + // Allow TimestampNTZ when validation is disabled (for development/testing) + // Use reflection to avoid compile-time dependency on Spark 3.4+ TimestampNTZType + true case _ => false } } diff --git a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala index 46dcb55fe9d3..c8a04b164dbf 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala @@ -94,6 +94,8 @@ class VeloxConfig(conf: SQLConf) extends GlutenConfig(conf) { def hashProbeDynamicFilterPushdownEnabled: Boolean = getConf(HASH_PROBE_DYNAMIC_FILTER_PUSHDOWN_ENABLED) + + def enableTimestampNtzValidation: Boolean = getConf(ENABLE_TIMESTAMP_NTZ_VALIDATION) } object VeloxConfig extends ConfigRegistry { @@ -731,4 +733,13 @@ object VeloxConfig extends ConfigRegistry { .doc("Maps table field names to file field names using names, not indices for Parquet files.") .booleanConf .createWithDefault(true) + + val ENABLE_TIMESTAMP_NTZ_VALIDATION = + buildConf("spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation") + .doc( + "Enable validation fallback for TimestampNTZ type. When true (default), any plan " + + "containing TimestampNTZ will fall back to Spark execution. Set to false during " + + "development/testing of TimestampNTZ support to allow native execution.") + .booleanConf + .createWithDefault(true) } diff --git a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala index 302b4aa603d3..30446087071f 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala @@ -39,6 +39,30 @@ class ScalarFunctionsValidateSuiteRasOn extends ScalarFunctionsValidateSuite { } } +// Dedicated test suite for TimestampNTZ foldable expression tests +class TimestampNtzFoldableTestSuite extends FunctionsValidateSuite { + disableFallbackCheck + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set(GlutenConfig.RAS_ENABLED.key, "false") + .set("spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation", "false") + } + + test("localtimestamp_folded_no_fallback") { + // Verify localtimestamp() is folded to a literal by Spark's optimizer. + // The scan should execute natively, though Project may fall back due to TimestampNTZ. + val df = spark.sql("SELECT l_orderkey, localtimestamp() FROM lineitem LIMIT 1") + val optimizedPlan = df.queryExecution.optimizedPlan.toString() + assert( + !optimizedPlan.contains("LocalTimestamp"), + s"Expected LocalTimestamp to be folded to a literal, but got: $optimizedPlan" + ) + checkGlutenPlan[BatchScanExecTransformer](df) + df.collect() + } +} + abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { disableFallbackCheck diff --git a/docs/velox-configuration.md b/docs/velox-configuration.md index 358dc419626b..490703625336 100644 --- a/docs/velox-configuration.md +++ b/docs/velox-configuration.md @@ -26,6 +26,7 @@ nav_order: 16 | spark.gluten.sql.columnar.backend.velox.cudf.memoryPercent | 50 | The initial percent of GPU memory to allocate for memory resource for one thread. | | spark.gluten.sql.columnar.backend.velox.cudf.memoryResource | async | GPU RMM memory resource. | | spark.gluten.sql.columnar.backend.velox.directorySizeGuess | 32KB | Deprecated, rename to spark.gluten.sql.columnar.backend.velox.footerEstimatedSize | +| spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation | true | Enable validation fallback for TimestampNTZ type. When true (default), any plan containing TimestampNTZ will fall back to Spark execution. Set to false during development/testing of TimestampNTZ support to allow native execution. | | spark.gluten.sql.columnar.backend.velox.fileHandleCacheEnabled | false | Disables caching if false. File handle cache should be disabled if files are mutable, i.e. file content may change while file path stays the same. | | spark.gluten.sql.columnar.backend.velox.filePreloadThreshold | 1MB | Set the file preload threshold for velox file scan, refer to Velox's file-preload-threshold | | spark.gluten.sql.columnar.backend.velox.floatingPointMode | loose | Config used to control the tolerance of floating point operations alignment with Spark. When the mode is set to strict, flushing is disabled for sum(float/double)and avg(float/double). When set to loose, flushing will be enabled. |