diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ArrowCsvScanSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ArrowCsvScanSuite.scala index 36e3c6636d32..6a59ad1fd1df 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ArrowCsvScanSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ArrowCsvScanSuite.scala @@ -21,6 +21,7 @@ import org.apache.gluten.datasource.ArrowCSVFileFormat import org.apache.spark.SparkConf import org.apache.spark.sql.execution.{ArrowFileSourceScanExec, BaseArrowScanExec, ColumnarToRowExec} +import org.apache.spark.sql.execution.columnar.SparkCacheUtil import org.apache.spark.sql.types.{IntegerType, StringType, StructType} import org.scalatest.Ignore @@ -83,6 +84,18 @@ class ArrowCsvScanWithTableCacheSuite extends ArrowCsvScanSuiteBase { .set(GlutenConfig.COLUMNAR_TABLE_CACHE_ENABLED.key, "true") } + override def beforeAll(): Unit = { + super.beforeAll() + // A common practice as well as in Spark tests, to clear the cache serializer + // in case it was already set as the default row-based serializer. + SparkCacheUtil.clearCacheSerializer() + } + + override def afterAll(): Unit = { + SparkCacheUtil.clearCacheSerializer() + super.afterAll() + } + /** * Test for GLUTEN-8453: https://github.com/apache/incubator-gluten/issues/8453. To make sure no * error is thrown when caching an Arrow Java query plan. diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxColumnarCacheSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxColumnarCacheSuite.scala index e4f59051bdd3..35733bedcf78 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxColumnarCacheSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxColumnarCacheSuite.scala @@ -23,7 +23,7 @@ import org.apache.spark.SparkConf import org.apache.spark.sql.Row import org.apache.spark.sql.execution.{ColumnarToRowExec, SparkPlan} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper -import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec +import org.apache.spark.sql.execution.columnar.{InMemoryTableScanExec, SparkCacheUtil} import org.apache.spark.sql.types.{LongType, Metadata, MetadataBuilder, StructType} import org.apache.spark.storage.StorageLevel @@ -35,9 +35,17 @@ class VeloxColumnarCacheSuite extends VeloxWholeStageTransformerSuite with Adapt override def beforeAll(): Unit = { super.beforeAll() + // A common practice as well as in Spark tests, to clear the cache serializer + // in case it was already set as the default row-based serializer. + SparkCacheUtil.clearCacheSerializer() createTPCHNotNullTables() } + override protected def afterAll(): Unit = { + SparkCacheUtil.clearCacheSerializer() + super.afterAll() + } + override protected def sparkConf: SparkConf = { super.sparkConf .set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager") diff --git a/gluten-core/src/main/scala/org/apache/spark/sql/execution/columnar/SparkCacheUtil.scala b/gluten-core/src/main/scala/org/apache/spark/sql/execution/columnar/SparkCacheUtil.scala new file mode 100644 index 000000000000..b45c14ebc4b3 --- /dev/null +++ b/gluten-core/src/main/scala/org/apache/spark/sql/execution/columnar/SparkCacheUtil.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.columnar + +object SparkCacheUtil { + def clearCacheSerializer(): Unit = { + InMemoryRelation.clearSerializer() + } +} diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala b/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala index 3b8e7c6568ff..6e686210ef2a 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala @@ -963,7 +963,7 @@ object GlutenConfig { .createWithDefault(100000) val COLUMNAR_TABLE_CACHE_ENABLED = - buildConf("spark.gluten.sql.columnar.tableCache") + buildStaticConf("spark.gluten.sql.columnar.tableCache") .doc("Enable or disable columnar table cache.") .booleanConf .createWithDefault(false)