Skip to content

trivial -> alwaysTrue #5

trivial -> alwaysTrue

trivial -> alwaysTrue #5

name: "Delta Spark Python"
on:
push:
paths-ignore:
- '**.md'
- '**.txt'
pull_request:
paths-ignore:
- '**.md'
- '**.txt'
jobs:
# Generate Spark versions matrix from CrossSparkVersions.scala
# This workflow tests against released versions only (no snapshots)
generate-matrix:
name: "Generate Released Spark Versions Matrix"
runs-on: ubuntu-24.04
outputs:
spark_versions: ${{ steps.generate.outputs.spark_versions }}
steps:
- uses: actions/checkout@v3
- name: install java
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: "17"
- name: Generate released Spark versions matrix
id: generate
run: |
# Get only released versions (exclude snapshots)
SPARK_VERSIONS=$(python3 project/scripts/get_spark_version_info.py --released-spark-versions)
echo "spark_versions=$SPARK_VERSIONS" >> $GITHUB_OUTPUT
echo "Generated released Spark versions: $SPARK_VERSIONS"
test:
name: "DSP (${{ matrix.spark_version }})"
runs-on: ubuntu-24.04
needs: generate-matrix
strategy:
matrix:
# Spark versions are dynamically generated - released versions only
spark_version: ${{ fromJson(needs.generate-matrix.outputs.spark_versions) }}
# These Scala versions must match those in the build.sbt
scala: [2.13.16]
env:
SCALA_VERSION: ${{ matrix.scala }}
SPARK_VERSION: ${{ matrix.spark_version }}
steps:
- uses: actions/checkout@v3
- name: Get Spark version details
id: spark-details
run: |
# Get JVM version and full Spark version for this matrix entry
JVM_VERSION=$(python3 project/scripts/get_spark_version_info.py --get-field "${{ matrix.spark_version }}" targetJvm | jq -r)
FULL_VERSION=$(python3 project/scripts/get_spark_version_info.py --get-field "${{ matrix.spark_version }}" fullVersion | jq -r)
echo "jvm_version=$JVM_VERSION" >> $GITHUB_OUTPUT
echo "spark_full_version=$FULL_VERSION" >> $GITHUB_OUTPUT
echo "Using JVM $JVM_VERSION for Spark ${{ matrix.spark_version }} (full: $FULL_VERSION)"
- name: install java
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: ${{ steps.spark-details.outputs.jvm_version }}
- name: Cache Scala, SBT
uses: actions/cache@v3
with:
path: |
~/.sbt
~/.ivy2
~/.cache/coursier
# Change the key if dependencies are changed. For each key, GitHub Actions will cache the
# the above directories when we use the key for the first time. After that, each run will
# just use the cache. The cache is immutable so we need to use a new key when trying to
# cache new stuff.
key: delta-sbt-cache-spark${{ matrix.spark_version }}-scala${{ matrix.scala }}
- name: Install Job dependencies
run: |
sudo apt-get update
sudo apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git
sudo apt install libedit-dev
curl -LO https://github.com/bufbuild/buf/releases/download/v1.28.1/buf-Linux-x86_64.tar.gz
mkdir -p ~/buf
tar -xvzf buf-Linux-x86_64.tar.gz -C ~/buf --strip-components 1
rm buf-Linux-x86_64.tar.gz
sudo apt install python3-pip --fix-missing
sudo pip3 install pipenv==2024.4.1
curl https://pyenv.run | bash
export PATH="~/.pyenv/bin:$PATH"
eval "$(pyenv init -)"
eval "$(pyenv virtualenv-init -)"
pyenv install 3.10
pyenv global system 3.10
pipenv --python 3.10 install
# Update the pip version to 24.0. By default `pyenv.run` installs the latest pip version
# available. From version 24.1, `pip` doesn't allow installing python packages
# with version string containing `-`. In Delta-Spark case, the pypi package generated has
# `-SNAPSHOT` in version (e.g. `3.3.0-SNAPSHOT`) as the version is picked up from
# the`version.sbt` file.
pipenv run pip install pip==24.0 setuptools==69.5.1 wheel==0.43.0
# Install pyspark matching the full spark version
pipenv run pip install pyspark==${{ steps.spark-details.outputs.spark_full_version }}
pipenv run pip install flake8==3.9.0
pipenv run pip install black==23.12.1
pipenv run pip install importlib_metadata==3.10.0
pipenv run pip install mypy==1.8.0
pipenv run pip install mypy-protobuf==3.3.0
pipenv run pip install cryptography==37.0.4
pipenv run pip install twine==4.0.1
pipenv run pip install wheel==0.33.4
pipenv run pip install setuptools==41.1.0
pipenv run pip install pydocstyle==3.0.0
pipenv run pip install pandas==2.2.0
pipenv run pip install pyarrow==15.0.0
pipenv run pip install pypandoc==1.3.3
pipenv run pip install numpy==1.22.4
pipenv run pip install googleapis-common-protos-stubs==2.2.0
pipenv run pip install grpc-stubs==1.24.11
# Version-specific dependencies for Spark Connect compatibility
if [[ "${{ matrix.spark_version }}" == "4.0" ]]; then
pipenv run pip install grpcio==1.67.0
pipenv run pip install grpcio-status==1.67.0
pipenv run pip install googleapis-common-protos==1.65.0
pipenv run pip install protobuf==5.29.1
else
# Spark 4.1+ requirements from https://github.com/apache/spark/blob/branch-4.1/dev/requirements.txt
pipenv run pip install grpcio==1.76.0
pipenv run pip install grpcio-status==1.76.0
pipenv run pip install googleapis-common-protos==1.71.0
pipenv run pip install protobuf==6.33.0
pipenv run pip install zstandard==0.25.0
fi
- name: Run Python tests
# when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_test.yaml
run: |
TEST_PARALLELISM_COUNT=4 pipenv run python run-tests.py --group spark-python --spark-version ${{ matrix.spark_version }}