Skip to content

trivial -> alwaysTrue #6

trivial -> alwaysTrue

trivial -> alwaysTrue #6

Workflow file for this run

name: "Delta Spark"
on:
push:
paths-ignore:
- '**.md'
- '**.txt'
pull_request:
paths-ignore:
- '**.md'
- '**.txt'
jobs:
# Generate Spark versions matrix from CrossSparkVersions.scala
# This ensures the workflow always uses the versions defined in the build
generate-matrix:
name: "Generate Spark Versions Matrix"
runs-on: ubuntu-24.04
outputs:
spark_versions: ${{ steps.generate.outputs.spark_versions }}
steps:
- uses: actions/checkout@v3
- name: install java
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: "17"
- name: Generate Spark versions matrix
id: generate
run: |
# The script automatically generates spark-versions.json from CrossSparkVersions.scala
SPARK_VERSIONS=$(python3 project/scripts/get_spark_version_info.py --all-spark-versions)
echo "spark_versions=$SPARK_VERSIONS" >> $GITHUB_OUTPUT
echo "Generated Spark versions: $SPARK_VERSIONS"
test:
name: "DS: Spark ${{ matrix.spark_version }}, Scala ${{ matrix.scala }}, Shard ${{ matrix.shard }}"
runs-on: ubuntu-24.04
needs: generate-matrix
strategy:
fail-fast: false
matrix:
# Spark versions are dynamically generated from CrossSparkVersions.scala
# DO NOT hardcode versions here - they are automatically loaded from the build configuration
spark_version: ${{ fromJson(needs.generate-matrix.outputs.spark_versions) }}
# These Scala versions must match those in the build.sbt
scala: [2.13.16]
# Important: This list of shards must be [0..NUM_SHARDS - 1]
shard: [0, 1, 2, 3]
env:
SCALA_VERSION: ${{ matrix.scala }}
SPARK_VERSION: ${{ matrix.spark_version }}
# Important: This must be the same as the length of shards in matrix
NUM_SHARDS: 4
steps:
- uses: actions/checkout@v3
- name: Get Spark version details
id: spark-details
run: |
# The script automatically generates spark-versions.json if needed
JVM_VERSION=$(python3 project/scripts/get_spark_version_info.py --get-field "${{ matrix.spark_version }}" targetJvm | jq -r)
echo "jvm_version=$JVM_VERSION" >> $GITHUB_OUTPUT
echo "Using JVM version: $JVM_VERSION for Spark ${{ matrix.spark_version }}"
- name: install java
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: ${{ steps.spark-details.outputs.jvm_version }}
- name: Cache Scala, SBT
uses: actions/cache@v3
with:
path: |
~/.sbt
~/.ivy2
~/.cache/coursier
# Change the key if dependencies are changed. For each key, GitHub Actions will cache the
# the above directories when we use the key for the first time. After that, each run will
# just use the cache. The cache is immutable so we need to use a new key when trying to
# cache new stuff.
key: delta-sbt-cache-spark${{ matrix.spark_version }}-scala${{ matrix.scala }}
- name: Install Job dependencies
run: |
sudo apt-get update
sudo apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git
sudo apt install libedit-dev
curl -LO https://github.com/bufbuild/buf/releases/download/v1.28.1/buf-Linux-x86_64.tar.gz
mkdir -p ~/buf
tar -xvzf buf-Linux-x86_64.tar.gz -C ~/buf --strip-components 1
rm buf-Linux-x86_64.tar.gz
sudo apt install python3-pip --fix-missing
sudo pip3 install pipenv==2024.4.1
curl https://pyenv.run | bash
export PATH="~/.pyenv/bin:$PATH"
eval "$(pyenv init -)"
eval "$(pyenv virtualenv-init -)"
pyenv install 3.9
pyenv global system 3.9
pipenv --python 3.9 install
# Update the pip version to 24.0. By default `pyenv.run` installs the latest pip version
# available. From version 24.1, `pip` doesn't allow installing python packages
# with version string containing `-`. In Delta-Spark case, the pypi package generated has
# `-SNAPSHOT` in version (e.g. `3.3.0-SNAPSHOT`) as the version is picked up from
# the`version.sbt` file.
pipenv run pip install pip==24.0 setuptools==69.5.1 wheel==0.43.0
pipenv run pip install flake8==3.9.0
pipenv run pip install black==23.12.1
pipenv run pip install importlib_metadata==3.10.0
pipenv run pip install mypy==1.8.0
pipenv run pip install mypy-protobuf==3.3.0
pipenv run pip install cryptography==37.0.4
pipenv run pip install twine==4.0.1
pipenv run pip install wheel==0.33.4
pipenv run pip install setuptools==41.1.0
pipenv run pip install pydocstyle==3.0.0
pipenv run pip install pandas==2.2.0
pipenv run pip install pyarrow==11.0.0
pipenv run pip install pypandoc==1.3.3
pipenv run pip install numpy==1.22.4
pipenv run pip install grpcio==1.67.0
pipenv run pip install grpcio-status==1.67.0
pipenv run pip install googleapis-common-protos==1.65.0
pipenv run pip install protobuf==5.29.1
pipenv run pip install googleapis-common-protos-stubs==2.2.0
pipenv run pip install grpc-stubs==1.24.11
- name: Scala structured logging style check
run: |
if [ -f ./dev/spark_structured_logging_style.py ]; then
python3 ./dev/spark_structured_logging_style.py
fi
- name: Run Scala/Java tests
# when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_python_test.yaml
run: |
TEST_PARALLELISM_COUNT=4 pipenv run python run-tests.py --group spark --shard ${{ matrix.shard }} --spark-version ${{ matrix.spark_version }}