diff --git a/build/buildall b/build/buildall
index 4ec9e691c0d..087e6aabbc3 100755
--- a/build/buildall
+++ b/build/buildall
@@ -1,6 +1,6 @@
#!/bin/bash
#
-# Copyright (c) 2021-2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2026, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -38,8 +38,8 @@ function print_usage() {
echo " -gb, --generate-bloop"
echo " generate projects for Bloop clients: IDE (Scala Metals, IntelliJ) or Bloop CLI"
echo " -p=DIST_PROFILE, --profile=DIST_PROFILE"
- echo " use this profile for the dist module, default: noSnapshots, also supported: snapshots, minimumFeatureVersionMix,"
- echo " snapshotsWithDatabricks, noSnapshotsWithDatabricks, noSnapshotsScala213, snapshotsScala213."
+ echo " use this profile for the dist module, default: noSnapshots, also supported: snapshots,"
+ echo " snapshotsWithDatabricks, noSnapshotsWithDatabricks"
echo " NOTE: the Databricks-related spark3XYdb shims are not built locally, the jars are fetched prebuilt from a"
echo " . remote Maven repo. You can also supply a comma-separated list of build versions. E.g., --profile=330,331 will"
echo " build only the distribution jar only for 3.3.0 and 3.3.1"
@@ -54,6 +54,8 @@ function print_usage() {
echo " use this option to build project with maven. E.g., --option='-Dcudf.version=cuda12'"
echo " --rebuild-dist-only"
echo " repackage the dist module artifact using installed dependencies"
+ echo " --scala213"
+ echo " build 2.13 shims"
}
function bloopInstall() {
@@ -152,7 +154,7 @@ case "$1" in
;;
-o=*|--option=*)
- MVN_OPT="${1#*=}"
+ export MVN_OPT="${1#*=}"
;;
*)
@@ -172,41 +174,35 @@ if [[ "$DIST_PROFILE" == *Scala213 ]]; then
SCALA213=1
fi
+MVN=${MVN:-"mvn"}
# include options to mvn command
-export MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 ${MVN_OPT}"
+export MVN="$MVN -Dmaven.wagon.http.retryHandler.count=3 ${MVN_OPT}"
if [[ "$SCALA213" == "1" ]]; then
- MVN="$MVN -f scala2.13/"
- DIST_PROFILE=${DIST_PROFILE:-"noSnapshotsScala213"}
+ POM_FILE="scala2.13/pom.xml"
+ export MVN="$MVN -f scala2.13/"
$(dirname $0)/make-scala-version-build-files.sh 2.13
-else
- DIST_PROFILE=${DIST_PROFILE:-"noSnapshots"}
+else
+ POM_FILE="pom.xml"
fi
+DIST_PROFILE=${DIST_PROFILE:-"noSnapshots"}
+
+
[[ "$MODULE" != "" ]] && MODULE_OPT="--projects $MODULE --also-make" || MODULE_OPT=""
echo "Collecting Spark versions..."
case $DIST_PROFILE in
- snapshotsScala213)
- SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "snap_and_no_snap" "scala2.13/pom.xml"))
- ;;
-
- noSnapshotsScala213)
- SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "no_snapshots" "scala2.13/pom.xml"))
- ;;
-
snapshots?(WithDatabricks))
- SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "snap_and_no_snap" "pom.xml"))
+ SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "snap_and_no_snap" $POM_FILE))
;;
noSnapshots?(WithDatabricks))
- SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "no_snapshots" "pom.xml"))
+ SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "no_snapshots" $POM_FILE))
;;
- minimumFeatureVersionMix)
- SPARK_SHIM_VERSIONS=($(versionsFromDistProfile "minimumFeatureVersionMix"))
- ;;
+
[34]*)
<<< $DIST_PROFILE IFS="," read -ra SPARK_SHIM_VERSIONS
diff --git a/datagen/src/main/spark400/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala b/datagen/src/main/spark400/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala
index fe9c6f468ec..ad42aa6bfdd 100644
--- a/datagen/src/main/spark400/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala
+++ b/datagen/src/main/spark400/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2024-2025, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2026, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
/*** spark-rapids-shim-json-lines
{"spark": "400"}
{"spark": "401"}
+{"spark": "411"}
spark-rapids-shim-json-lines ***/
package org.apache.spark.sql.tests.datagen
diff --git a/delta-lake/common/src/main/delta-33x-40x/scala/com/nvidia/spark/rapids/delta/common/DeltaProviderBase.scala b/delta-lake/common/src/main/delta-33x-40x/scala/com/nvidia/spark/rapids/delta/common/DeltaProviderBase.scala
index e86f33299c2..474f79ff6e3 100644
--- a/delta-lake/common/src/main/delta-33x-40x/scala/com/nvidia/spark/rapids/delta/common/DeltaProviderBase.scala
+++ b/delta-lake/common/src/main/delta-33x-40x/scala/com/nvidia/spark/rapids/delta/common/DeltaProviderBase.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2025-2026, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@ import com.nvidia.spark.rapids._
import com.nvidia.spark.rapids.RapidsPluginImplicits._
import com.nvidia.spark.rapids.delta.{DeltaIOProvider, GpuDeltaDataSource, RapidsDeltaUtils}
import com.nvidia.spark.rapids.shims._
+import com.nvidia.spark.rapids.shims.InvalidateCacheShims
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.SparkSession
@@ -132,7 +133,7 @@ abstract class DeltaProviderBase extends DeltaIOProvider {
cpuExec.tableSpec,
cpuExec.writeOptions,
cpuExec.orCreate,
- cpuExec.invalidateCache)
+ InvalidateCacheShims.getInvalidateCache(cpuExec.invalidateCache))
}
diff --git a/delta-lake/common/src/main/delta-33x-40x/scala/com/nvidia/spark/rapids/delta/common/GpuDeltaParquetFileFormatBase.scala b/delta-lake/common/src/main/delta-33x-40x/scala/com/nvidia/spark/rapids/delta/common/GpuDeltaParquetFileFormatBase.scala
index c512721c5f1..824788c03e2 100644
--- a/delta-lake/common/src/main/delta-33x-40x/scala/com/nvidia/spark/rapids/delta/common/GpuDeltaParquetFileFormatBase.scala
+++ b/delta-lake/common/src/main/delta-33x-40x/scala/com/nvidia/spark/rapids/delta/common/GpuDeltaParquetFileFormatBase.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2025-2026, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,7 +17,6 @@
package com.nvidia.spark.rapids.delta.common
import ai.rapids.cudf._
-import ai.rapids.cudf.HostColumnVector._
import com.nvidia.spark.rapids._
import com.nvidia.spark.rapids.Arm.withResource
import com.nvidia.spark.rapids.RapidsPluginImplicits._
diff --git a/delta-lake/common/src/main/delta-33x/scala/org/apache/spark/sql/delta/rapids/GpuOptimizeWriteExchangeExec.scala b/delta-lake/common/src/main/delta-33x/scala/org/apache/spark/sql/delta/rapids/GpuOptimizeWriteExchangeExec.scala
index 1f332fc66d9..5640376de72 100644
--- a/delta-lake/common/src/main/delta-33x/scala/org/apache/spark/sql/delta/rapids/GpuOptimizeWriteExchangeExec.scala
+++ b/delta-lake/common/src/main/delta-33x/scala/org/apache/spark/sql/delta/rapids/GpuOptimizeWriteExchangeExec.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2025-2026, NVIDIA CORPORATION.
*
* This file was derived from OptimizeWriteExchange.scala
* in the Delta Lake project at https://github.com/delta-io/delta
@@ -26,8 +26,7 @@ import scala.concurrent.Future
import scala.concurrent.duration.Duration
import com.nvidia.spark.rapids.{GpuColumnarBatchSerializer, GpuExec, GpuMetric, GpuPartitioning, GpuRoundRobinPartitioning, RapidsConf}
-import com.nvidia.spark.rapids.GpuMetric.{OP_TIME_NEW_SHUFFLE_READ, OP_TIME_NEW_SHUFFLE_WRITE}
-import com.nvidia.spark.rapids.GpuMetric.{DESCRIPTION_OP_TIME_NEW_SHUFFLE_READ, DESCRIPTION_OP_TIME_NEW_SHUFFLE_WRITE, MODERATE_LEVEL}
+import com.nvidia.spark.rapids.GpuMetric._
import com.nvidia.spark.rapids.delta.RapidsDeltaSQLConf
import com.nvidia.spark.rapids.shims.GpuHashPartitioning
@@ -60,7 +59,6 @@ case class GpuOptimizeWriteExchangeExec(
partitioning: GpuPartitioning,
override val child: SparkPlan,
@transient deltaLog: DeltaLog) extends Exchange with GpuExec with DeltaLogging {
- import GpuMetric._
// Use 150% of target file size hint config considering parquet compression.
// Still the result file can be smaller/larger than the config due to data skew or
diff --git a/delta-lake/common/src/main/delta-40x/scala/org/apache/spark/sql/delta/hooks/GpuAutoCompact.scala b/delta-lake/common/src/main/delta-40x/scala/org/apache/spark/sql/delta/hooks/GpuAutoCompact.scala
index 4ac4cac83a9..eed441e4006 100644
--- a/delta-lake/common/src/main/delta-40x/scala/org/apache/spark/sql/delta/hooks/GpuAutoCompact.scala
+++ b/delta-lake/common/src/main/delta-40x/scala/org/apache/spark/sql/delta/hooks/GpuAutoCompact.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2025-2026, NVIDIA CORPORATION.
*
* This file was derived from OptimisticTransaction.scala and TransactionalWrite.scala
* in the Delta Lake project at https://github.com/delta-io/delta.
@@ -23,11 +23,8 @@ package org.apache.spark.sql.delta.hooks
import org.apache.spark.internal.MDC
import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.catalog.CatalogTable
import org.apache.spark.sql.delta._
import org.apache.spark.sql.delta.actions._
-import org.apache.spark.sql.delta.commands.DeltaOptimizeContext
-import org.apache.spark.sql.delta.commands.optimize._
import org.apache.spark.sql.delta.logging.DeltaLogKeys
import org.apache.spark.sql.delta.rapids.GpuOptimisticTransactionBase
import org.apache.spark.sql.delta.stats.AutoCompactPartitionStats
diff --git a/delta-lake/common/src/main/delta-40x/scala/org/apache/spark/sql/delta/rapids/GpuWriteIntoDelta.scala b/delta-lake/common/src/main/delta-40x/scala/org/apache/spark/sql/delta/rapids/GpuWriteIntoDelta.scala
index 9f7bf72ec37..d3ee99d43b7 100644
--- a/delta-lake/common/src/main/delta-40x/scala/org/apache/spark/sql/delta/rapids/GpuWriteIntoDelta.scala
+++ b/delta-lake/common/src/main/delta-40x/scala/org/apache/spark/sql/delta/rapids/GpuWriteIntoDelta.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2025, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2026, NVIDIA CORPORATION.
*
* This file was derived from WriteIntoDelta.scala
* in the Delta Lake project at https://github.com/delta-io/delta.
@@ -45,8 +45,6 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
import org.apache.spark.sql.functions.{array, col, explode, lit, struct}
import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.nvidia.DFUDFShims
-import org.apache.spark.sql.rapids.shims.TrampolineConnectShims.SparkSession
import org.apache.spark.sql.types.StructType
/** GPU version of Delta Lake's WriteIntoDelta. */
diff --git a/delta-lake/delta-40x/src/main/scala/org/apache/spark/sql/delta/rapids/delta40x/GpuMergeIntoCommand.scala b/delta-lake/delta-40x/src/main/scala/org/apache/spark/sql/delta/rapids/delta40x/GpuMergeIntoCommand.scala
index e491febc243..717bd641b47 100644
--- a/delta-lake/delta-40x/src/main/scala/org/apache/spark/sql/delta/rapids/delta40x/GpuMergeIntoCommand.scala
+++ b/delta-lake/delta-40x/src/main/scala/org/apache/spark/sql/delta/rapids/delta40x/GpuMergeIntoCommand.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2025-2026, NVIDIA CORPORATION.
*
* This file was derived from MergeIntoCommand.scala
* in the Delta Lake project at https://github.com/delta-io/delta.
@@ -30,11 +30,11 @@ import com.nvidia.spark.rapids.RapidsConf
import com.nvidia.spark.rapids.delta._
import org.apache.spark.SparkContext
-import org.apache.spark.sql.{DataFrame, Row, SparkSession => SqlSparkSession}
+import org.apache.spark.sql.{Row, SparkSession => SqlSparkSession}
import org.apache.spark.sql.catalyst.catalog.CatalogTable
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, Literal, Or}
import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.classic.{ColumnNodeToExpressionConverter, ExpressionUtils, SparkSession => ClassicSparkSession}
+import org.apache.spark.sql.classic.{SparkSession => ClassicSparkSession}
import org.apache.spark.sql.delta._
import org.apache.spark.sql.delta.actions.{AddFile, FileAction}
import org.apache.spark.sql.delta.commands.MergeIntoCommandBase
@@ -384,7 +384,7 @@ case class GpuMergeIntoCommand(
}
}
commitAndRecordStats(
- org.apache.spark.sql.classic.SparkSession.active,
+ ClassicSparkSession.active,
gpuDeltaTxn,
mergeActions,
startTime,
@@ -583,7 +583,6 @@ case class GpuMergeIntoCommand(
val matchedRowCounts = collectTouchedFiles.groupBy(ROW_ID_COL).agg(sum("one").as("count"))
// Get multiple matches and simultaneously collect (using touchedFilesAccum) the file names
- import org.apache.spark.sql.delta.implicits._
val mmRow = matchedRowCounts
.filter(col("count") > lit(1))
.select(
diff --git a/delta-lake/delta-40x/src/main/scala/org/apache/spark/sql/delta/rapids/delta40x/GpuOptimisticTransaction.scala b/delta-lake/delta-40x/src/main/scala/org/apache/spark/sql/delta/rapids/delta40x/GpuOptimisticTransaction.scala
index b0acf681806..c3ad36504b1 100644
--- a/delta-lake/delta-40x/src/main/scala/org/apache/spark/sql/delta/rapids/delta40x/GpuOptimisticTransaction.scala
+++ b/delta-lake/delta-40x/src/main/scala/org/apache/spark/sql/delta/rapids/delta40x/GpuOptimisticTransaction.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2025-2026, NVIDIA CORPORATION.
*
* This file was derived from OptimisticTransaction.scala and TransactionalWrite.scala
* in the Delta Lake project at https://github.com/delta-io/delta.
@@ -32,7 +32,7 @@ import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.{SparkSession => SqlSparkSession}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, RuntimeReplaceable}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
import org.apache.spark.sql.delta._
import org.apache.spark.sql.delta.actions.{AddFile, FileAction}
@@ -47,7 +47,6 @@ import org.apache.spark.sql.execution.metric.SQLMetric
import org.apache.spark.sql.functions.to_json
import org.apache.spark.sql.rapids.{BasicColumnarWriteJobStatsTracker, ColumnarWriteJobStatsTracker, GpuWriteJobStatsTracker}
import org.apache.spark.sql.rapids.delta.GpuIdentityColumn
-import org.apache.spark.sql.rapids.shims.TrampolineConnectShims
import org.apache.spark.sql.rapids.shims.TrampolineConnectShims.SparkSession
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.vectorized.ColumnarBatch
diff --git a/dist/README.md b/dist/README.md
index af53b7a39e0..aa23b6a6332 100644
--- a/dist/README.md
+++ b/dist/README.md
@@ -30,7 +30,7 @@ for each version of Spark supported in the jar, i.e., spark330/, spark341/, etc.
If you have to change the contents of the uber jar the following files control what goes into the base jar as classes that are not shaded.
-1. `unshimmed-common-from-spark320.txt` - This has classes and files that should go into the base jar with their normal
+1. `unshimmed-common-from-single-shim.txt` - This has classes and files that should go into the base jar with their normal
package name (not shaded). This includes user visible classes (i.e., com/nvidia/spark/SQLPlugin), python files,
and other files that aren't version specific. Uses Spark 3.2.0 built jar for these base classes as explained above.
2. `unshimmed-from-each-spark3xx.txt` - This is applied to all the individual Spark specific version jars to pull
diff --git a/dist/build/package-parallel-worlds.py b/dist/build/package-parallel-worlds.py
index ef64a4cd6bd..652b34410cf 100644
--- a/dist/build/package-parallel-worlds.py
+++ b/dist/build/package-parallel-worlds.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2026, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@ def shell_exec(shell_cmd):
artifacts = attributes.get('artifact_csv').split(',')
buildver_list = re.sub(r'\s+', '', project.getProperty('included_buildvers'),
flags=re.UNICODE).split(',')
+buildver_list = sorted(buildver_list, reverse=True)
source_basedir = project.getProperty('spark.rapids.source.basedir')
project_basedir = project.getProperty('spark.rapids.project.basedir')
project_version = project.getProperty('project.version')
@@ -73,8 +74,8 @@ def shell_exec(shell_cmd):
shell_exec(mvn_cmd)
dist_dir = os.sep.join([source_basedir, 'dist'])
- with open(os.sep.join([dist_dir, 'unshimmed-common-from-spark320.txt']), 'r') as f:
- from_spark320 = f.read().splitlines()
+ with open(os.sep.join([dist_dir, 'unshimmed-common-from-single-shim.txt']), 'r') as f:
+ from_single_shim = f.read().splitlines()
with open(os.sep.join([dist_dir, 'unshimmed-from-each-spark3xx.txt']), 'r') as f:
from_each = f.read().splitlines()
with zipfile.ZipFile(os.sep.join([deps_dir, art_jar]), 'r') as zip_handle:
@@ -82,13 +83,13 @@ def shell_exec(shell_cmd):
zip_handle.extractall(path=top_dist_jar_dir)
else:
zip_handle.extractall(path=os.sep.join([top_dist_jar_dir, classifier]))
- # IMPORTANT unconditional extract from first to the top
+ # IMPORTANT unconditional extract from the highest Spark version to the top
if bv == buildver_list[0] and art == 'sql-plugin-api':
zip_handle.extractall(path=top_dist_jar_dir)
# TODO deprecate
namelist = zip_handle.namelist()
matching_members = []
- glob_list = from_spark320 + from_each if bv == buildver_list[0] else from_each
+ glob_list = from_single_shim + from_each if bv == buildver_list[0] else from_each
for pat in glob_list:
new_matches = fnmatch.filter(namelist, pat)
matching_members += new_matches
diff --git a/dist/maven-antrun/build-parallel-worlds.xml b/dist/maven-antrun/build-parallel-worlds.xml
index bc4d7c9991c..9f422bc95fa 100644
--- a/dist/maven-antrun/build-parallel-worlds.xml
+++ b/dist/maven-antrun/build-parallel-worlds.xml
@@ -1,6 +1,6 @@
+ includesfile="${spark.rapids.source.basedir}/${rapids.module}/unshimmed-common-from-single-shim.txt"/>
diff --git a/dist/scripts/binary-dedupe.sh b/dist/scripts/binary-dedupe.sh
index b761ea57826..6813818bcd1 100755
--- a/dist/scripts/binary-dedupe.sh
+++ b/dist/scripts/binary-dedupe.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# Copyright (c) 2021-2025, NVIDIA CORPORATION.
+# Copyright (c) 2021-2026, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -85,9 +85,6 @@ function retain_single_copy() {
package_class_parts=("${path_parts[@]:2}")
- package_len=$((${#package_class_parts[@]} - 1))
- package_parts=("${package_class_parts[@]::$package_len}")
-
package_class_with_spaces="${package_class_parts[*]}"
# com/nvidia/spark/udf/Repr\$UnknownCapturedArg\$.class
package_class="${package_class_with_spaces// //}"
@@ -164,12 +161,16 @@ function verify_same_sha_for_unshimmed() {
# sha1 look up if there is an entry with the unshimmed class as a suffix
class_file_quoted=$(printf '%q' "$class_file")
-
# TODO currently RapidsShuffleManager is "removed" from /spark* by construction in
# dist pom.xml via ant. We could delegate this logic to this script
# and make both simmpler
- if [[ ! "$class_file_quoted" =~ com/nvidia/spark/rapids/spark[34].*/.*ShuffleManager.class ]]; then
-
+ #
+ # TODO ParquetCachedBatchSerializer is not bitwise-identical after 411,
+ # but it is compatible with previous versions because it merely adds a new method.
+ # we might need to replace this strict check with MiMa
+ # https://github.com/apache/spark/blob/7011706a0a8dbec6adb5b5b121921b29b314335f/sql/core/src/main/scala/org/apache/spark/sql/columnar/CachedBatchSerializer.scala#L75-L95
+ if [[ ! "$class_file_quoted" =~ com/nvidia/spark/rapids/spark[34].*/.*ShuffleManager.class && \
+ "$class_file_quoted" != "com/nvidia/spark/ParquetCachedBatchSerializer.class" ]]; then
if ! grep -q "/spark.\+/$class_file_quoted" "$SPARK_SHARED_TXT"; then
echo >&2 "$class_file is not bitwise-identical across shims"
exit 255
diff --git a/dist/unshimmed-common-from-spark320.txt b/dist/unshimmed-common-from-single-shim.txt
similarity index 83%
rename from dist/unshimmed-common-from-spark320.txt
rename to dist/unshimmed-common-from-single-shim.txt
index 3871188e1fc..81a5c61003c 100644
--- a/dist/unshimmed-common-from-spark320.txt
+++ b/dist/unshimmed-common-from-single-shim.txt
@@ -1,8 +1,6 @@
META-INF/DEPENDENCIES
META-INF/LICENSE
META-INF/NOTICE
-com/nvidia/spark/GpuCachedBatchSerializer*
-com/nvidia/spark/ParquetCachedBatchSerializer*
com/nvidia/spark/rapids/ExplainPlan.class
com/nvidia/spark/rapids/ExplainPlan$.class
com/nvidia/spark/rapids/ExplainPlanBase.class
diff --git a/integration_tests/requirements.txt b/integration_tests/requirements.txt
index 616e9318949..ce0fc4318ec 100644
--- a/integration_tests/requirements.txt
+++ b/integration_tests/requirements.txt
@@ -17,6 +17,7 @@ pandas
pyarrow == 17.0.0 ; python_version == '3.8'
pyarrow == 19.0.1 ; python_version >= '3.9'
pytest-xdist >= 2.0.0
+pytz
findspark
fsspec == 2025.3.0
fastparquet == 2024.5.0 ; python_version >= '3.9'
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index eb63174d6a9..a619e6c66ae 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -79,7 +79,7 @@ else
# PySpark uses ".dev0" for "-SNAPSHOT" and either ".dev" for "preview" or ".devN" for "previewN"
# https://github.com/apache/spark/blob/66f25e314032d562567620806057fcecc8b71f08/dev/create-release/release-build.sh#L267
VERSION_STRING=$(PYTHONPATH=${SPARK_HOME}/python:${PY4J_FILE} python -c \
- "import pyspark, re; print(re.sub('\.dev[012]?$', '', pyspark.__version__))"
+ "import pyspark, re; print(re.sub(r'\.dev[012]?$', '', pyspark.__version__))"
)
SCALA_VERSION=`$SPARK_HOME/bin/pyspark --version 2>&1| grep Scala | awk '{split($4,v,"."); printf "%s.%s", v[1], v[2]}'`
diff --git a/integration_tests/src/main/python/asserts.py b/integration_tests/src/main/python/asserts.py
index b211b8b2bcc..cc9013cd845 100644
--- a/integration_tests/src/main/python/asserts.py
+++ b/integration_tests/src/main/python/asserts.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2020-2026, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -95,6 +95,9 @@ def _assert_equal(cpu, gpu, float_check, path):
assert cpu == gpu, f"GPU ({gpu}) and CPU ({cpu}) decimal values are different at {path}"
elif isinstance(cpu, bytearray):
assert cpu == gpu, f"GPU ({gpu}) and CPU ({cpu}) bytearray values are different at {path}"
+ elif isinstance(cpu, bytes):
+ # Spark 4.1.0+ returns bytes instead of bytearray for binary data
+ assert cpu == gpu, f"GPU ({gpu}) and CPU ({cpu}) bytes values are different at {path}"
elif isinstance(cpu, timedelta):
# Used by interval type DayTimeInterval for Pyspark 3.3.0+
assert cpu == gpu, f"GPU ({gpu}) and CPU ({cpu}) timedelta values are different at {path}"
diff --git a/integration_tests/src/main/python/udf_test.py b/integration_tests/src/main/python/udf_test.py
index 19961e30cb7..51b3f4f2857 100644
--- a/integration_tests/src/main/python/udf_test.py
+++ b/integration_tests/src/main/python/udf_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2020-2026, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -49,6 +49,8 @@
arrow_udf_conf = {
'spark.sql.execution.arrow.pyspark.enabled': 'true',
'spark.rapids.sql.exec.WindowInPandasExec': 'true',
+ # ArrowWindowPythonExec is the new name for WindowInPandasExec in Spark 4.1+
+ 'spark.rapids.sql.exec.ArrowWindowPythonExec': 'true',
'spark.rapids.sql.exec.FlatMapCoGroupsInPandasExec': 'true'
}
@@ -148,7 +150,7 @@ def pandas_sum(to_process: pd.Series) -> int:
lambda spark : binary_op_df(spark, data_gen)\
.groupBy('a')\
.agg(pandas_sum(f.col('b'))),
- conf=arrow_udf_conf)
+ conf=arrow_udf_conf_unsafe)
@ignore_order(local=True)
diff --git a/integration_tests/src/test/spark400/scala/org/apache/spark/sql/rapids/shims/TrampolineUtilShim.scala b/integration_tests/src/test/spark400/scala/org/apache/spark/sql/rapids/shims/TrampolineUtilShim.scala
index cf3556baec9..61924942c45 100644
--- a/integration_tests/src/test/spark400/scala/org/apache/spark/sql/rapids/shims/TrampolineUtilShim.scala
+++ b/integration_tests/src/test/spark400/scala/org/apache/spark/sql/rapids/shims/TrampolineUtilShim.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2025-2026, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
/*** spark-rapids-shim-json-lines
{"spark": "400"}
{"spark": "401"}
+{"spark": "411"}
spark-rapids-shim-json-lines ***/
package org.apache.spark.sql.rapids.shims
diff --git a/jdk-profiles/pom.xml b/jdk-profiles/pom.xml
index 3d743f857bb..8b51165431d 100644
--- a/jdk-profiles/pom.xml
+++ b/jdk-profiles/pom.xml
@@ -1,6 +1,6 @@
[9,)
+
+ buildver
+ !411
+
@@ -63,7 +65,6 @@
net.alchim31.maven
scala-maven-plugin
- ${scala.plugin.version}
${java.major.version}
@@ -72,5 +73,31 @@
+
+ release411
+
+
+ buildver
+ 411
+
+
+
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+
+
+ -Xlint:all,-serial,-path,-try,-processing|-target|${java.major.version}
+
+
+
+
+
+
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 5747fac6193..038331d7dc5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -769,6 +769,26 @@
delta-lake/delta-40x
+
+ release411
+
+
+ buildver
+ 411
+
+
+
+ 411
+ ${spark411.version}
+ ${spark411.version}
+ 1.13.1
+ rapids-4-spark-delta-stub
+ 2.0.7
+
+
+ delta-lake/delta-stub
+
+
-->
source-javadoc
@@ -794,14 +814,14 @@
scala-2.12
2.12
- 2.12.15
+ 2.12.21
scala-2.13
2.13
- 2.13.14
+ 2.13.18
@@ -894,7 +914,7 @@
26.02.0-SNAPSHOT
2.12
incremental
- 2.12.15
+ 2.12.21
4.9.2
@@ -1469,10 +1490,8 @@ This will force full Scala code rebuild in downstream modules.
-Wconf:cat=lint-adapted-args:e
diff --git a/scala2.13/jdk-profiles/pom.xml b/scala2.13/jdk-profiles/pom.xml
index d20e01a02ef..81ab0c08e5c 100644
--- a/scala2.13/jdk-profiles/pom.xml
+++ b/scala2.13/jdk-profiles/pom.xml
@@ -1,6 +1,6 @@
[9,)
+
+ buildver
+ !411
+
@@ -63,7 +65,6 @@
net.alchim31.maven
scala-maven-plugin
- ${scala.plugin.version}
${java.major.version}
@@ -72,5 +73,31 @@
+
+ release411
+
+
+ buildver
+ 411
+
+
+
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+
+
+ -Xlint:all,-serial,-path,-try,-processing|-target|${java.major.version}
+
+
+
+
+
+
\ No newline at end of file
diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index 9ae5365d117..0c208bd2230 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -769,6 +769,26 @@
delta-lake/delta-40x
+
+ release411
+
+
+ buildver
+ 411
+
+
+
+ 411
+ ${spark411.version}
+ ${spark411.version}
+ 1.13.1
+ rapids-4-spark-delta-stub
+ 2.0.7
+
+
+ delta-lake/delta-stub
+
+
source-javadoc
@@ -794,14 +814,14 @@
scala-2.12
2.12
- 2.12.15
+ 2.12.21
scala-2.13
2.13
- 2.13.14
+ 2.13.18
@@ -894,7 +914,7 @@
26.02.0-SNAPSHOT
2.13
incremental
- 2.13.14
+ 2.13.18
4.9.2
@@ -1469,10 +1490,8 @@ This will force full Scala code rebuild in downstream modules.
-Wconf:cat=lint-adapted-args:e
-Xsource:2.13
- -Ywarn-unused:locals,patvars,privates
-Wconf:cat=deprecation:e,any:e
-Wconf:cat=scaladoc:wv
- -Wconf:cat=lint-multiarg-infix:wv
-Wconf:cat=other-nullary-override:e
-Wconf:msg=^(?=.*?method|value|type|object|trait|inheritance)(?=.*?deprecated)(?=.*?since 2.13).+$:s
-Wconf:msg=^(?=.*?Widening conversion from)(?=.*?is deprecated because it loses precision).+$:s
@@ -1480,6 +1499,14 @@ This will force full Scala code rebuild in downstream modules.
-Wconf:cat=unchecked&msg=outer reference:s
-Wconf:cat=unchecked&msg=eliminated by erasure:s
-Wconf:msg=^(?=.*?a value of type)(?=.*?cannot also be).+$:s
+ -Wconf:cat=unused:e
+ -Wconf:cat=unused-imports:e
+ -Wconf:cat=unused-locals:e
+ -Wconf:cat=unused-nowarn:e
+ -Wconf:cat=unused-params:e
+ -Wconf:cat=unused-pat-vars:e
+ -Wconf:cat=unused-privates:e
+ -Wunused:imports,locals,patvars,privates
diff --git a/scala2.13/sql-plugin-api/pom.xml b/scala2.13/sql-plugin-api/pom.xml
index c3c28b8c3c9..f4a79c3cde5 100644
--- a/scala2.13/sql-plugin-api/pom.xml
+++ b/scala2.13/sql-plugin-api/pom.xml
@@ -1,6 +1,6 @@