Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.
#

ARG CUDA_VERSION=12.0.1
ARG CUDA_VERSION=12.2.2
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04

# ubuntu22
Expand Down Expand Up @@ -47,6 +47,6 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
&& conda config --set solver libmamba

# install cuML
ARG CUML_VER=25.10
RUN conda install -y -c rapidsai -c conda-forge -c nvidia cuml=$CUML_VER cuvs=$CUML_VER python=3.10 cuda-version=12.0 numpy~=1.0 \
ARG RAPIDS_VERSION=25.12
RUN conda install -y -c rapidsai -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.10 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
&& conda clean --all -f -y
115 changes: 0 additions & 115 deletions docker/Dockerfile

This file was deleted.

6 changes: 4 additions & 2 deletions docker/Dockerfile.pip
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
# limitations under the License.
#

ARG CUDA_VERSION=12.0.1
ARG CUDA_VERSION=12.2.2
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04

ARG PYSPARK_VERSION=3.3.1
ARG RAPIDS_VERSION=25.10.0
ARG RAPIDS_VERSION=25.12.0
ARG ARCH=amd64
#ARG ARCH=arm64

Expand Down Expand Up @@ -50,6 +50,8 @@ RUN pip install --no-cache-dir \
cudf-cu12~=${RAPIDS_VERSION} \
cuml-cu12~=${RAPIDS_VERSION} \
cuvs-cu12~=${RAPIDS_VERSION} \
pylibraft-cu12~=${RAPIDS_VERSION} \
raft-dask-cu12~=${RAPIDS_VERSION} \
numpy~=1.0 \
--extra-index-url=https://pypi.nvidia.com

Expand Down
6 changes: 3 additions & 3 deletions docker/Dockerfile.python
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
# limitations under the License.
#

ARG CUDA_VERSION=12.0.1
ARG CUDA_VERSION=12.2.2
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04

ARG CUML_VERSION=25.10
ARG RAPIDS_VERSION=25.12

# ubuntu22
RUN sed -i -e 's|http://archive.ubuntu.com/ubuntu|https://archive.ubuntu.com/ubuntu|g' \
Expand Down Expand Up @@ -47,7 +47,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py38_4.10.3-Linu

# install cuML

RUN conda install -y -c rapidsai -c conda-forge -c nvidia python=3.10 cuda-version=12.0 cuml=$CUML_VERSION numpy~=1.0 \
RUN conda install -y -c rapidsai -c conda-forge -c nvidia python=3.10 cuda-version=12.2 cuml=$RAPIDS_VERSION cudf=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION numpy~=1.0 \
&& conda clean --all -f -y

# install python dependencies
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
project = 'spark-rapids-ml'
copyright = '2025, NVIDIA'
author = 'NVIDIA'
release = '25.10.0'
release = '25.12.0'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
6 changes: 3 additions & 3 deletions jvm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ including setting up the server and running client-side tests.
To start the Spark Connect server with Spark Rapids ML support, follow these steps:

```shell
conda activate rapids-25.10 # from spark-rapids-ml installation
conda activate rapids-25.12 # from spark-rapids-ml installation
export SPARK_HOME=<directory where spark was installed above>
export PYSPARK_PYTHON=$(which python)
export PLUGIN_JAR=$(pip show spark-rapids-ml | grep Location: | cut -d ' ' -f 2 )/spark_rapids_ml/jars/com.nvidia.rapids.ml-25.10.0.jar
export PLUGIN_JAR=$(pip show spark-rapids-ml | grep Location: | cut -d ' ' -f 2 )/spark_rapids_ml/jars/com.nvidia.rapids.ml-25.12.0.jar
$SPARK_HOME/sbin/start-connect-server.sh --master local[*] \
--jars $PLUGIN_JAR \
--conf spark.driver.memory=20G
Expand Down Expand Up @@ -107,7 +107,7 @@ mvn clean package -DskipTests
if you would like to compile the plugin and run the unit tests, install `spark-rapids-ml` python package and its dependencies per the above instructions and run the following command:

``` shell
conda activate rapids-25.10
conda activate rapids-25.12
export PYSPARK_PYTHON=$(which python)
mvn clean package
```
Expand Down
2 changes: 1 addition & 1 deletion jvm/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

<groupId>com.nvidia.rapids</groupId>
<artifactId>ml</artifactId>
<version>25.10.0</version>
<version>25.12.0</version>
<packaging>jar</packaging>

<properties>
Expand Down
7 changes: 5 additions & 2 deletions notebooks/aws-emr/init-bootstrap-action.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,20 @@ sudo bash -c "wget https://www.python.org/ftp/python/3.10.9/Python-3.10.9.tgz &&
tar xzf Python-3.10.9.tgz && cd Python-3.10.9 && \
./configure --enable-optimizations && make altinstall"

RAPIDS_VERSION=25.10.0
RAPIDS_VERSION=25.12.0

sudo /usr/local/bin/pip3.10 install --upgrade pip

# install scikit-learn
sudo /usr/local/bin/pip3.10 install scikit-learn

# install cudf and cuml
sudo /usr/local/bin/pip3.10 install --no-cache-dir cudf-cu12~=${RAPIDS_VERSION} \
sudo /usr/local/bin/pip3.10 install --no-cache-dir \
cudf-cu12~=${RAPIDS_VERSION} \
cuml-cu12~=${RAPIDS_VERSION} \
cuvs-cu12~=${RAPIDS_VERSION} \
pylibraft-cu12~=${RAPIDS_VERSION} \
raft-dask-cu12~=${RAPIDS_VERSION} \
--extra-index-url=https://pypi.nvidia.com --verbose
sudo /usr/local/bin/pip3.10 install spark-rapids-ml
sudo /usr/local/bin/pip3.10 list
Expand Down
6 changes: 3 additions & 3 deletions notebooks/databricks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ If you already have a Databricks account, you can run the example notebooks on a
```bash
export WS_SAVE_DIR="/path/to/directory/in/workspace"
databricks workspace mkdirs ${WS_SAVE_DIR} --profile ${PROFILE}
databricks workspace import --format AUTO --file init-pip-cuda-12.0.sh ${WS_SAVE_DIR}/init-pip-cuda-12.0.sh --profile ${PROFILE}
databricks workspace import --format AUTO --file init-pip-cuda-12.sh ${WS_SAVE_DIR}/init-pip-cuda-12.sh --profile ${PROFILE}
```
**Note**: the init script does the following on each Spark node:
- updates the CUDA runtime to 12.0 (required for Spark Rapids ML dependencies).
- updates the CUDA runtime (required for Spark Rapids ML dependencies).
- downloads and installs the [Spark-Rapids](https://github.com/NVIDIA/spark-rapids) plugin for accelerating data loading and Spark SQL.
- installs various `cuXX` dependencies via pip.
- if the cluster environment variable `SPARK_RAPIDS_ML_NO_IMPORT_ENABLED=1` is define (see below), the init script also modifies a Databricks notebook kernel startup script to enable no-import change UX for the cluster. See [no-import-change](../README.md#no-import-change).
- Create a cluster using **Databricks 13.3 LTS ML GPU Runtime** using at least two single-gpu workers and add the following configurations to the **Advanced options**.
- **Init Scripts**
- add the workspace path to the uploaded init script `${WS_SAVE_DIR}/init-pip-cuda-12.0.sh` as set above (but substitute variables manually in the form).
- add the workspace path to the uploaded init script `${WS_SAVE_DIR}/init-pip-cuda-12.sh` as set above (but substitute variables manually in the form).
- **Spark**
- **Spark config**
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,31 @@ set -ex
# IMPORTANT: specify RAPIDS_VERSION fully 23.10.0 and not 23.10
# also in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0)
# while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2)
RAPIDS_VERSION=25.10.0
RAPIDS_VERSION=25.12.0
SPARK_RAPIDS_VERSION=25.08.0

curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda12.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar

# install cudatoolkit 12.0 via runfile approach
wget https://developer.download.nvidia.com/compute/cuda/12.0.1/local_installers/cuda_12.0.1_525.85.12_linux.run
sh cuda_12.0.1_525.85.12_linux.run --silent --toolkit
# install cudatoolkit 12.2 via runfile approach
wget https://developer.download.nvidia.com/compute/cuda/12.2.2/local_installers/cuda_12.2.2_535.104.05_linux.run
sh cuda_12.2.2_535.104.05_linux.run --silent --toolkit

# reset symlink and update library loading paths
rm /usr/local/cuda
ln -s /usr/local/cuda-12.0 /usr/local/cuda
ln -s /usr/local/cuda-12.2 /usr/local/cuda

# upgrade pip
/databricks/python/bin/pip install --upgrade pip

# install cudf, cuml and their rapids dependencies
# using ~= pulls in latest micro version patches
/databricks/python/bin/pip install cudf-cu12~=${RAPIDS_VERSION} \
/databricks/python/bin/pip install --no-cache-dir \
cudf-cu12~=${RAPIDS_VERSION} \
cuml-cu12~=${RAPIDS_VERSION} \
cuvs-cu12~=${RAPIDS_VERSION} \
pylibraft-cu12~=${RAPIDS_VERSION} \
raft-dask-cu12~=${RAPIDS_VERSION} \
numpy~=1.0 \
--extra-index-url=https://pypi.nvidia.com

# install spark-rapids-ml
Expand Down
2 changes: 1 addition & 1 deletion notebooks/dataproc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ If you already have a Dataproc account, you can run the example notebooks on a D
If you wish to enable [no-import-change](../README.md#no-import-change) UX for the cluster, change the `spark-rapids-ml-no-import-enabled` metadata value to `1` in the command. The initialization script `spark_rapids_ml.sh` checks this metadata value and modifies the run time accordingly.

```
export RAPIDS_VERSION=25.10.0
export RAPIDS_VERSION=25.12.0

gcloud dataproc clusters create $USER-spark-rapids-ml \
--image-version=2.2-ubuntu22 \
Expand Down
9 changes: 7 additions & 2 deletions notebooks/dataproc/spark_rapids_ml.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@

set -ex

RAPIDS_VERSION=25.10.0
RAPIDS_VERSION=25.12.0


# install cudf and cuml
pip install --upgrade pip
pip install cudf-cu12~=${RAPIDS_VERSION} cuml-cu12~=${RAPIDS_VERSION} cuvs-cu12~=${RAPIDS_VERSION} \
pip install --no-cache-dir \
cudf-cu12~=${RAPIDS_VERSION} \
cuml-cu12~=${RAPIDS_VERSION} \
cuvs-cu12~=${RAPIDS_VERSION} \
pylibraft-cu12~=${RAPIDS_VERSION} \
raft-dask-cu12~=${RAPIDS_VERSION} \
--extra-index-url=https://pypi.nvidia.com

# install spark-rapids-ml
Expand Down
3 changes: 2 additions & 1 deletion notebooks/logistic-regression.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -739,9 +739,10 @@
"outputs": [],
"source": [
"from spark_rapids_ml.classification import LogisticRegression as GPULR\n",
"from spark_rapids_ml.metrics.utils import logistic_regression_objective\n",
"gpu_lr, gpu_model, gpu_fit_time, gpu_test_logLoss = sparse_vectors_compat(GPULR)\n",
"print(f\"GPU fit took: {gpu_fit_time} sec\")\n",
"print(f\"GPU training objective: {gpu_model.objective}\")\n",
"print(f\"GPU training objective: {logistic_regression_objective(df_train, gpu_model)}\")\n",
"print(f\"GPU test logLoss: {gpu_test_logLoss}\")"
]
},
Expand Down
8 changes: 4 additions & 4 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ This PySpark-compatible API leverages the RAPIDS cuML python API to provide GPU-

For simplicity, the following instructions just use Spark local mode, assuming a server with at least one GPU.

First, install RAPIDS cuML per [these instructions](https://rapids.ai/start.html). Example for CUDA Toolkit 12.0:
First, install RAPIDS cuML per [these instructions](https://rapids.ai/start.html). Example for CUDA Toolkit 12.2:
```bash
conda create -n rapids-25.10 \
conda create -n rapids-25.12 \
-c rapidsai -c conda-forge -c nvidia \
cuml=25.10 cuvs=25.10 python=3.10 cuda-version=12.0 numpy~=1.0
python=3.10 cuml=25.12 cuvs=25.12 pylibraft=25.12 raft-dask=25.12 cuda-version=12.2 numpy~=1.0
```

**Note**: while testing, we recommend using conda or docker to simplify installation and isolate your environment while experimenting. Once you have a working environment, you can then try installing directly, if necessary.
Expand All @@ -31,7 +31,7 @@ conda create -n rapids-25.10 \

Once you have the conda environment, activate it and install the required packages.
```bash
conda activate rapids-25.10
conda activate rapids-25.12

## for development access to notebooks, tests, and benchmarks
git clone --branch main https://github.com/NVIDIA/spark-rapids-ml.git
Expand Down
2 changes: 1 addition & 1 deletion python/benchmark/databricks/cpu_cluster_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ cat <<EOF
"spark_conf": {},
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"availability": "ON_DEMAND",
"zone_id": "us-west-2a",
"spot_bid_price_percent": 100,
"ebs_volume_type": "GENERAL_PURPOSE_SSD",
Expand Down
7 changes: 5 additions & 2 deletions python/benchmark/databricks/gpu_cluster_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,12 @@ cat <<EOF
"spark.sql.files.maxPartitionBytes": "2000000000000",
"spark.databricks.delta.optimizeWrite.enabled": "false"
},
"spark_env_vars": {
"LD_LIBRARY_PATH": "/usr/local/cuda/compat"
},
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"availability": "ON_DEMAND",
"zone_id": "us-west-2a",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0
Expand All @@ -55,7 +58,7 @@ cat <<EOF
"init_scripts": [
{
"workspace": {
"destination": "${INIT_SCRIPT_DIR}/init-pip-cuda-12.0.sh"
"destination": "${INIT_SCRIPT_DIR}/init-pip-cuda-12.sh"
}
}
],
Expand Down
Loading