[GLUTEN-6887][VL] Daily Update Velox Version (2026_03_11) #4462
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Licensed to the Apache Software Foundation (ASF) under one or more | |
| # contributor license agreements. See the NOTICE file distributed with | |
| # this work for additional information regarding copyright ownership. | |
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |
| # (the "License"); you may not use this file except in compliance with | |
| # the License. You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| name: Velox Backend (x86) | |
| on: | |
| pull_request: | |
| paths: | |
| - '.github/workflows/velox_backend_x86.yml' | |
| - '.github/workflows/util/install-resources.sh' #TODO remove after image update | |
| - 'pom.xml' | |
| - 'backends-velox/**' | |
| - 'gluten-uniffle/**' | |
| - 'gluten-celeborn/**' | |
| - 'gluten-ras/**' | |
| - 'gluten-core/**' | |
| - 'gluten-substrait/**' | |
| - 'gluten-arrow/**' | |
| - 'gluten-delta/**' | |
| - 'gluten-iceberg/**' | |
| - 'gluten-hudi/**' | |
| - 'gluten-paimon/**' | |
| - 'gluten-ut/**' | |
| - 'package/**' | |
| - 'shims/**' | |
| - 'tools/gluten-it/**' | |
| - 'ep/build-velox/**' | |
| - 'cpp/**' | |
| - 'dev/**' | |
| - 'build/mvn' | |
| env: | |
| ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | |
| MVN_CMD: 'build/mvn -ntp' | |
| WGET_CMD: 'wget -nv' | |
| CCACHE_DIR: "${{ github.workspace }}/.ccache" | |
| # spark.sql.ansi.enabled defaults to false. | |
| SPARK_ANSI_SQL_MODE: false | |
| concurrency: | |
| group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} | |
| cancel-in-progress: true | |
| jobs: | |
| build-native-lib-centos-7: | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Get Ccache | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: '${{ env.CCACHE_DIR }}' | |
| key: ccache-centos7-release-default-${{github.sha}} | |
| restore-keys: | | |
| ccache-centos7-release-default | |
| - name: Build Gluten native libraries | |
| run: | | |
| docker pull apache/gluten:vcpkg-centos-7 | |
| docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:vcpkg-centos-7 bash -c " | |
| set -e | |
| yum install tzdata -y | |
| df -a | |
| cd /work | |
| export CCACHE_DIR=/work/.ccache | |
| export CCACHE_MAXSIZE=1G | |
| mkdir -p /work/.ccache | |
| ccache -sz | |
| bash dev/ci-velox-buildstatic-centos-7.sh | |
| ccache -s | |
| mkdir -p /work/.m2/repository/org/apache/arrow/ | |
| cp -r /root/.m2/repository/org/apache/arrow/* /work/.m2/repository/org/apache/arrow/ | |
| " | |
| - name: "Save ccache" | |
| if: always() | |
| uses: actions/cache/save@v4 | |
| id: ccache | |
| with: | |
| path: '${{ env.CCACHE_DIR }}' | |
| key: ccache-centos7-release-default-${{github.sha}} | |
| - uses: actions/upload-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| if-no-files-found: error | |
| - uses: actions/upload-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: .m2/repository/org/apache/arrow/ | |
| if-no-files-found: error | |
| tpc-test-ubuntu: | |
| needs: build-native-lib-centos-7 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| os: [ "ubuntu:20.04", "ubuntu:22.04" ] | |
| spark: [ "spark-3.3", "spark-3.4", "spark-3.5", "spark-4.0", "spark-4.1" ] | |
| java: [ "java-8", "java-11", "java-17", "java-21" ] | |
| # Spark supports JDK17 since 3.3. | |
| exclude: | |
| - spark: spark-3.3 | |
| java: java-21 | |
| - spark: spark-3.4 | |
| java: java-21 | |
| - spark: spark-3.5 | |
| java: java-21 | |
| - spark: spark-3.4 | |
| java: java-17 | |
| - spark: spark-3.5 | |
| java: java-17 | |
| - spark: spark-3.3 | |
| java: java-11 | |
| - spark: spark-3.4 | |
| java: java-11 | |
| - os: ubuntu:20.04 | |
| java: java-17 | |
| - os: ubuntu:20.04 | |
| java: java-11 | |
| - os: ubuntu:20.04 | |
| java: java-21 | |
| - spark: spark-4.0 | |
| java: java-8 | |
| - spark: spark-4.0 | |
| java: java-11 | |
| - spark: spark-4.1 | |
| java: java-8 | |
| - spark: spark-4.1 | |
| java: java-11 | |
| runs-on: ubuntu-22.04 | |
| container: ${{ matrix.os }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Native Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download All Arrow Jar Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Setup tzdata | |
| run: | | |
| #sed -i 's|http://archive|http://us.archive|g' /etc/apt/sources.list | |
| if [ "${{ matrix.os }}" = "ubuntu:22.04" ]; then | |
| apt-get update | |
| TZ="Etc/GMT" DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata | |
| fi | |
| - name: Setup java | |
| run: | | |
| if [ "${{ matrix.java }}" = "java-17" ]; then | |
| apt-get update && apt-get install -y openjdk-17-jdk wget | |
| apt remove openjdk-11* -y | |
| elif [ "${{ matrix.java }}" = "java-21" ]; then | |
| apt-get update && apt-get install -y openjdk-21-jdk wget | |
| elif [ "${{ matrix.java }}" = "java-11" ]; then | |
| apt-get update && apt-get install -y openjdk-11-jdk wget | |
| else | |
| apt-get update && apt-get install -y openjdk-8-jdk wget | |
| apt remove openjdk-11* -y | |
| fi | |
| ls -l /root/.m2/repository/org/apache/arrow/arrow-dataset/15.0.0-gluten/ | |
| - name: Install Hadoop & Setup HDFS | |
| if: matrix.os == 'ubuntu:22.04' && matrix.spark == 'spark-3.5' && matrix.java == 'java-8' | |
| shell: bash | |
| run: | | |
| export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64 | |
| source .github/workflows/util/install-resources.sh | |
| install_hadoop | |
| setup_hdfs | |
| - name: Install MinIO | |
| if: matrix.os == 'ubuntu:22.04' && matrix.spark == 'spark-3.5' && matrix.java == 'java-8' | |
| shell: bash | |
| run: | | |
| export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64 | |
| source .github/workflows/util/install-resources.sh | |
| install_minio | |
| - name: Build and run TPC-H / TPC-DS | |
| shell: bash | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64 | |
| echo "JAVA_HOME: $JAVA_HOME" | |
| case "${{ matrix.spark }}" in | |
| spark-4.0|spark-4.1) | |
| $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pscala-2.13 -Pbackends-velox -DskipTests | |
| ;; | |
| *) | |
| $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests | |
| ;; | |
| esac | |
| cd $GITHUB_WORKSPACE/tools/gluten-it | |
| $GITHUB_WORKSPACE/$MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} | |
| # Setup S3 JARs after gluten-it build | |
| if [ "${{ matrix.os }}" = "ubuntu:22.04" ] && \ | |
| [ "${{ matrix.spark }}" = "spark-3.5" ] && \ | |
| [ "${{ matrix.java }}" = "java-8" ]; then | |
| source $GITHUB_WORKSPACE/.github/workflows/util/install-resources.sh | |
| SPARK_VERSION=$(echo "${{ matrix.spark }}" | sed 's/spark-//') | |
| setup_minio "$SPARK_VERSION" | |
| fi | |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
| && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 | |
| if [ "${{ matrix.os }}" = "ubuntu:22.04" ] && \ | |
| [ "${{ matrix.spark }}" = "spark-3.5" ] && \ | |
| [ "${{ matrix.java }}" = "java-8" ]; then | |
| GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
| --queries=q1 --data-dir="hdfs://localhost:9000/test" | |
| GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
| --queries=q1 --data-dir="s3a://gluten-it/test" \ | |
| --extra-conf=spark.hadoop.fs.s3a.endpoint=http://localhost:9100 \ | |
| --extra-conf=spark.hadoop.fs.s3a.access.key=admin \ | |
| --extra-conf=spark.hadoop.fs.s3a.secret.key=admin123 \ | |
| --extra-conf=spark.hadoop.fs.s3a.path.style.access=true \ | |
| --extra-conf=spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem | |
| fi | |
| tpc-test-centos8: | |
| needs: build-native-lib-centos-7 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| os: [ "centos:8" ] | |
| spark: [ "spark-3.3", "spark-3.4", "spark-3.5", "spark-4.0", "spark-4.1" ] | |
| java: [ "java-8", "java-11", "java-17" ] | |
| # Spark supports JDK17 since 3.3. | |
| exclude: | |
| - spark: spark-3.4 | |
| java: java-17 | |
| - spark: spark-3.5 | |
| java: java-17 | |
| - spark: spark-3.3 | |
| java: java-11 | |
| - spark: spark-3.4 | |
| java: java-11 | |
| - spark: spark-4.0 | |
| java: java-8 | |
| - spark: spark-4.0 | |
| java: java-11 | |
| - spark: spark-4.1 | |
| java: java-8 | |
| - spark: spark-4.1 | |
| java: java-11 | |
| runs-on: ubuntu-22.04 | |
| container: ${{ matrix.os }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Native Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download All Arrow Jar Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Update mirror list | |
| run: | | |
| if [ "${{ matrix.os }}" = "centos:7" ] || [ "${{ matrix.os }}" = "centos:8" ]; then | |
| sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true | |
| sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true | |
| fi | |
| - name: Setup java | |
| run: | | |
| if [ "${{ matrix.java }}" = "java-17" ]; then | |
| yum update -y && yum install -y java-17-openjdk-devel wget | |
| elif [ "${{ matrix.java }}" = "java-11" ]; then | |
| yum update -y && yum install -y java-11-openjdk-devel wget | |
| else | |
| yum update -y && yum install -y java-1.8.0-openjdk-devel wget | |
| fi | |
| - name: Set environment variables | |
| run: | | |
| if [ "${{ matrix.java }}" = "java-17" ]; then | |
| echo "JAVA_HOME=/usr/lib/jvm/java-17-openjdk" >> $GITHUB_ENV | |
| elif [ "${{ matrix.java }}" = "java-11" ]; then | |
| echo "JAVA_HOME=/usr/lib/jvm/java-11-openjdk" >> $GITHUB_ENV | |
| else | |
| echo "JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk" >> $GITHUB_ENV | |
| fi | |
| - name: Build gluten-it | |
| run: | | |
| echo "JAVA_HOME: $JAVA_HOME" | |
| cd $GITHUB_WORKSPACE/ | |
| case "${{ matrix.spark }}" in | |
| spark-4.0|spark-4.1) | |
| $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pscala-2.13 -Pbackends-velox -DskipTests | |
| ;; | |
| *) | |
| $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests | |
| ;; | |
| esac | |
| cd $GITHUB_WORKSPACE/tools/gluten-it | |
| $GITHUB_WORKSPACE/$MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} | |
| - name: Run TPC-H / TPC-DS | |
| run: | | |
| echo "JAVA_HOME: $JAVA_HOME" | |
| cd $GITHUB_WORKSPACE/tools/gluten-it | |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 | |
| GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 | |
| - name: Run TPC-H / TPC-DS with RAS | |
| run: | | |
| echo "JAVA_HOME: $JAVA_HOME" | |
| cd $GITHUB_WORKSPACE/tools/gluten-it | |
| SPARK41_CONF="" | |
| if [ "${{ matrix.spark }}" = "spark-4.1" ]; then | |
| SPARK41_CONF="--extra-conf=spark.sql.unionOutputPartitioning=false" | |
| fi | |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
| --extra-conf=spark.gluten.ras.enabled=true $SPARK41_CONF \ | |
| && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
| --extra-conf=spark.gluten.ras.enabled=true $SPARK41_CONF | |
| tpc-test-centos7: | |
| needs: build-native-lib-centos-7 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| spark: [ "spark-3.3", "spark-3.4", "spark-3.5" ] | |
| java: [ "java-8" ] | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Native Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download All Arrow Jar Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: .m2/repository/org/apache/arrow/ | |
| - name: Build and run TPCH/DS tests | |
| run: | | |
| docker pull centos:7 | |
| docker run -v $GITHUB_WORKSPACE:/work -v /$GITHUB_WORKSPACE/.m2:/root/.m2/ -w /work \ | |
| -e matrix.java=${{ matrix.java }} -e matrix.spark=${{ matrix.spark }} \ | |
| centos:7 \ | |
| bash -c " | |
| set -e | |
| sed -i -e 's|mirrorlist=|#mirrorlist=|g' /etc/yum.repos.d/CentOS-* || true | |
| sed -i -e 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* || true | |
| yum update -y && yum install -y java-1.8.0-openjdk-devel wget tzdata python3-pip | |
| # Set environment variables | |
| export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk | |
| # Build gluten | |
| build/mvn -ntp clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests | |
| # Build gluten-it | |
| cd /work/tools/gluten-it | |
| /work/build/mvn -ntp clean install -P${{ matrix.spark }} -P${{ matrix.java }} | |
| # Run TPC-H / TPC-DS | |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 | |
| GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 | |
| # Run TPC-H / TPC-DS with RAS | |
| cd /work/tools/gluten-it | |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
| --extra-conf=spark.gluten.ras.enabled=true \ | |
| && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
| --extra-conf=spark.gluten.ras.enabled=true | |
| " | |
| tpc-test-ubuntu-oom: | |
| needs: build-native-lib-centos-7 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| spark: [ "spark-3.3" ] | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Maximize build disk space | |
| shell: bash | |
| run: | | |
| df -h | |
| set -euo pipefail | |
| echo "Removing unwanted software... " | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /usr/local/lib/android | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /opt/hostedtoolcache/CodeQL | |
| sudo docker image prune --all --force > /dev/null | |
| df -h | |
| - uses: actions/checkout@v4 | |
| - name: Download All Native Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download All Arrow Jar Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /home/runner/.m2/repository/org/apache/arrow/ | |
| - name: Setup java | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y openjdk-8-jdk wget | |
| - name: Set environment variables | |
| run: | | |
| echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV | |
| - name: Build for Spark ${{ matrix.spark }} | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests | |
| cd $GITHUB_WORKSPACE/tools/gluten-it | |
| $GITHUB_WORKSPACE/$MVN_CMD clean install -P${{ matrix.spark }} | |
| GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local --benchmark-type=ds -s=30.0 --threads=12 | |
| - name: TPC-DS SF30.0 Parquet local spark3.3 Q67/Q95 low memory, memory isolation off | |
| run: | | |
| cd tools/gluten-it \ | |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
| --data-gen=skip -m=OffHeapExecutionMemory \ | |
| -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ | |
| -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \ | |
| -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \ | |
| -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ | |
| -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 \ | |
| --excluded-dims=OFFHEAP_SIZE:4g | |
| - name: TPC-DS SF30.0 Parquet local spark3.3 Q67 low memory, memory isolation on | |
| run: | | |
| cd tools/gluten-it \ | |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
| --data-gen=skip -m=OffHeapExecutionMemory \ | |
| -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ | |
| -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \ | |
| -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \ | |
| -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ | |
| -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 | |
| - name: TPC-DS SF30.0 Parquet local spark3.3 Q95 low memory, memory isolation on | |
| run: | | |
| cd tools/gluten-it \ | |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
| --data-gen=skip -m=OffHeapExecutionMemory \ | |
| -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ | |
| -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \ | |
| -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \ | |
| -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ | |
| -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 | |
| - name: TPC-DS SF30.0 Parquet local spark3.3 Q23A/Q23B low memory | |
| run: | | |
| cd tools/gluten-it \ | |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
| --data-gen=skip -m=OffHeapExecutionMemory \ | |
| -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ | |
| -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ | |
| -d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ | |
| -d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ | |
| -d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 | |
| - name: TPC-DS SF30.0 Parquet local spark3.3 Q23A/Q23B low memory, memory isolation on | |
| run: | | |
| cd tools/gluten-it \ | |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
| --data-gen=skip -m=OffHeapExecutionMemory \ | |
| -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ | |
| -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ | |
| -d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ | |
| -d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ | |
| -d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 | |
| - name: TPC-DS SF30.0 Parquet local spark3.3 Q97 low memory | |
| run: | | |
| cd tools/gluten-it \ | |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
| --data-gen=skip -m=OffHeapExecutionMemory \ | |
| --extra-conf=spark.gluten.sql.columnar.backend.velox.IOThreads=0 \ | |
| -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ | |
| -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ | |
| -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ | |
| -d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g \ | |
| -d=IO_THREADS:12,spark.gluten.sql.columnar.backend.velox.IOThreads=12 \ | |
| -d=IO_THREADS:0,spark.gluten.sql.columnar.backend.velox.IOThreads=0 | |
| tpc-test-ubuntu-randomkill: | |
| needs: build-native-lib-centos-7 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| spark: [ "spark-3.3" ] | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Maximize build disk space | |
| shell: bash | |
| run: | | |
| df -h | |
| set -euo pipefail | |
| echo "Removing unwanted software... " | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /usr/local/lib/android | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /opt/hostedtoolcache/CodeQL | |
| sudo docker image prune --all --force > /dev/null | |
| df -h | |
| - uses: actions/checkout@v4 | |
| - name: Download All Native Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download All Arrow Jar Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /home/runner/.m2/repository/org/apache/arrow/ | |
| - name: Setup java | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y openjdk-8-jdk wget | |
| - name: Set environment variables | |
| run: | | |
| echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV | |
| - name: Build for Spark ${{ matrix.spark }} | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests | |
| cd $GITHUB_WORKSPACE/tools/gluten-it | |
| $GITHUB_WORKSPACE/$MVN_CMD clean install -P${{ matrix.spark }} | |
| GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local --benchmark-type=ds -s=30.0 --threads=12 | |
| - name: TPC-DS SF30.0 Parquet local spark3.3 random kill tasks | |
| run: | | |
| cd tools/gluten-it \ | |
| && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries \ | |
| --local --preset=velox --benchmark-type=ds --error-on-memleak -s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
| --data-gen=skip --random-kill-tasks --no-session-reuse | |
| tpc-test-centos8-uniffle: | |
| needs: build-native-lib-centos-7 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| spark: [ "spark-3.3" ] | |
| uniffle: [ "0.10.0" ] | |
| hadoop: [ "2.8.5" ] | |
| runs-on: ubuntu-22.04 | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Native Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download All Arrow Jar Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Install Uniffle ${{ matrix.uniffle }} | |
| run: | | |
| export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk | |
| if [ ! -e "/opt/apache-uniffle-${{ matrix.uniffle }}-bin.tar.gz" ]; then | |
| ${WGET_CMD} https://archive.apache.org/dist/uniffle/${{ matrix.uniffle }}/apache-uniffle-${{ matrix.uniffle }}-bin.tar.gz -P /opt | |
| fi | |
| if [ ! -e "/opt/hadoop-${{ matrix.hadoop }}.tar.gz" ]; then | |
| ${WGET_CMD} https://archive.apache.org/dist/hadoop/common/hadoop-${{ matrix.hadoop }}/hadoop-${{ matrix.hadoop }}.tar.gz -P /opt | |
| fi | |
| cd /opt && rm -rf shims && \ | |
| mkdir /opt/uniffle && tar xzf apache-uniffle-${{ matrix.uniffle }}-bin.tar.gz -C /opt/uniffle --strip-components=1 && \ | |
| tar xzf hadoop-${{ matrix.hadoop }}.tar.gz -C /opt/ && \ | |
| cd /opt/uniffle && mkdir shuffle_data && \ | |
| bash -c "echo -e 'XMX_SIZE=16g\nHADOOP_HOME=/opt/hadoop-${{ matrix.hadoop }}' > ./conf/rss-env.sh" && \ | |
| bash -c "echo -e 'rss.coordinator.shuffle.nodes.max 1\nrss.rpc.server.port 19999' > ./conf/coordinator.conf" && \ | |
| bash -c "echo -e 'rss.server.app.expired.withoutHeartbeat 7200000\nrss.server.heartbeat.delay 3000\nrss.rpc.server.port 19997\nrss.rpc.server.type GRPC_NETTY\nrss.jetty.http.port 19996\nrss.server.netty.port 19995\nrss.storage.basePath /opt/uniffle/shuffle_data\nrss.storage.type MEMORY_LOCALFILE\nrss.coordinator.quorum localhost:19999\nrss.server.flush.thread.alive 10\nrss.server.single.buffer.flush.threshold 64m' > ./conf/server.conf" && \ | |
| bash ./bin/start-coordinator.sh && bash ./bin/start-shuffle-server.sh | |
| - name: Build for Spark ${{ matrix.spark }} | |
| run: | | |
| cd $GITHUB_WORKSPACE/ && \ | |
| $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -Puniffle -DskipTests | |
| - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.3 with uniffle-${{ matrix.uniffle }} | |
| run: | | |
| export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk && \ | |
| cd $GITHUB_WORKSPACE/tools/gluten-it && \ | |
| $GITHUB_WORKSPACE/$MVN_CMD clean install -P${{ matrix.spark }} -Puniffle && \ | |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox-with-uniffle --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 | |
| tpc-test-ubuntu-2204-celeborn: | |
| needs: build-native-lib-centos-7 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| spark: [ "spark-3.3" ] | |
| celeborn: [ "celeborn-0.6.1", "celeborn-0.5.4"] | |
| writer: ["sort", "hash"] | |
| runs-on: ubuntu-22.04 | |
| container: centos:8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Native Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download All Arrow Jar Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Update mirror list | |
| run: | | |
| sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true | |
| sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true | |
| - name: Setup java and maven | |
| run: | | |
| yum update -y && yum install -y java-1.8.0-openjdk-devel wget | |
| - name: Set environment variables | |
| run: | | |
| echo "JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk" >> $GITHUB_ENV | |
| - name: Build for Spark ${{ matrix.spark }} | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -Pceleborn -DskipTests | |
| - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.3 with ${{ matrix.celeborn }} | |
| run: | | |
| EXTRA_PROFILE="" | |
| if [ "${{ matrix.celeborn }}" = "celeborn-0.5.4" ]; then | |
| EXTRA_PROFILE="-Pceleborn-0.5" | |
| elif [ "${{ matrix.celeborn }}" = "celeborn-0.6.1" ]; then | |
| EXTRA_PROFILE="-Pceleborn-0.6" | |
| fi | |
| echo "EXTRA_PROFILE: ${EXTRA_PROFILE}" | |
| if [ ! -e "/opt/apache-${{ matrix.celeborn }}-bin.tgz" ]; then | |
| echo "WARNING: please pre-install your required package in docker image since the downloading is throttled by this site." | |
| wget -nv https://www.apache.org/dyn/closer.lua/celeborn/${{ matrix.celeborn }}/apache-${{ matrix.celeborn }}-bin.tgz?action=download -O /opt/apache-${{ matrix.celeborn }}-bin.tgz | |
| fi | |
| cd /opt && rm -rf shims && mkdir -p celeborn && \ | |
| tar xzf apache-${{ matrix.celeborn }}-bin.tgz -C /opt/celeborn --strip-components=1 && cd celeborn && \ | |
| mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \ | |
| bash -c "echo -e 'CELEBORN_MASTER_MEMORY=8g\nCELEBORN_WORKER_MEMORY=8g\nCELEBORN_WORKER_OFFHEAP_MEMORY=16g' > ./conf/celeborn-env.sh" && \ | |
| bash -c "echo -e 'celeborn.worker.commitFiles.threads 32\nceleborn.worker.sortPartition.threads 16' > ./conf/celeborn-defaults.conf" && \ | |
| bash ./sbin/start-master.sh && bash ./sbin/start-worker.sh && \ | |
| cd $GITHUB_WORKSPACE/tools/gluten-it && $GITHUB_WORKSPACE/$MVN_CMD clean install -Pspark-3.3 -Pceleborn ${EXTRA_PROFILE} && \ | |
| GLUTEN_IT_JVM_ARGS=-Xmx16G sbin/gluten-it.sh queries-compare \ | |
| --extra-conf=spark.celeborn.client.spark.shuffle.writer=${{ matrix.writer }} \ | |
| --extra-conf=spark.sql.shuffle.partitions=16 \ | |
| --extra-conf=spark.celeborn.client.eagerlyCreateInputStream.threads=4 \ | |
| --local --preset=velox-with-celeborn --benchmark-type=h --error-on-memleak --off-heap-size=16g -s=1.0 --threads=16 --iterations=1 && \ | |
| GLUTEN_IT_JVM_ARGS=-Xmx16G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox-with-celeborn --extra-conf=spark.celeborn.client.spark.shuffle.writer=${{ matrix.writer }} \ | |
| --extra-conf=spark.gluten.sql.columnar.shuffle.celeborn.useRssSort=true \ | |
| --extra-conf=spark.sql.shuffle.partitions=16 \ | |
| --extra-conf=spark.celeborn.client.eagerlyCreateInputStream.threads=4 \ | |
| --benchmark-type=ds --error-on-memleak \ | |
| --off-heap-size=16g -s=1.0 --threads=16 --iterations=1 | |
| if [ "${{ matrix.writer }}" = "sort" ]; then | |
| GLUTEN_IT_JVM_ARGS=-Xmx16G sbin/gluten-it.sh queries-compare \ | |
| --local --preset=velox-with-celeborn --extra-conf=spark.celeborn.client.spark.shuffle.writer=${{ matrix.writer }} \ | |
| --extra-conf=spark.gluten.sql.columnar.shuffle.celeborn.useRssSort=false \ | |
| --extra-conf=spark.celeborn.client.eagerlyCreateInputStream.threads=4 \ | |
| --extra-conf=spark.sql.shuffle.partitions=16 \ | |
| --benchmark-type=ds --error-on-memleak \ | |
| --off-heap-size=16g -s=1.0 --threads=16 --iterations=1 | |
| fi | |
| spark-test-spark33: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Prepare | |
| run: | | |
| dnf module -y install python39 && \ | |
| alternatives --set python3 /usr/bin/python3.9 && \ | |
| pip3 install setuptools==77.0.3 && \ | |
| pip3 install pyspark==3.3.1 cython && \ | |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 | |
| - name: Build and Run unit test for Spark 3.3.1 (other tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| export SPARK_SCALA_VERSION=2.12 | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-3.3 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \ | |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| - name: Upload golden files | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-golden-files | |
| path: /tmp/tpch-approved-plan/** | |
| spark-test-spark33-slow: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Build and Run unit test for Spark 3.3.1 (slow tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-3.3 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \ | |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| spark-test-spark34: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Prepare spark.test.home for Spark 3.4.4 (other tests) | |
| run: | | |
| dnf module -y install python39 && \ | |
| alternatives --set python3 /usr/bin/python3.9 && \ | |
| pip3 install setuptools==77.0.3 && \ | |
| pip3 install pyspark==3.4.4 cython && \ | |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 | |
| - name: Build and Run unit test for Spark 3.4.4 (other tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| export SPARK_SCALA_VERSION=2.12 | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| export SPARK_HOME=/opt/shims/spark34/spark_home/ | |
| ls -l $SPARK_HOME | |
| $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Piceberg-test -Pdelta -Phudi -Ppaimon -Pspark-ut \ | |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest \ | |
| -DargLine="-Dspark.test.home=$SPARK_HOME" | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| - name: Upload golden files | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-golden-files | |
| path: /tmp/tpch-approved-plan/** | |
| spark-test-spark34-slow: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Build and Run unit test for Spark 3.4.4 (slow tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| export SPARK_HOME=/opt/shims/spark34/spark_home/ | |
| ls -l $SPARK_HOME | |
| $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ | |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \ | |
| -DargLine="-Dspark.test.home=$SPARK_HOME" | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| spark-test-spark35: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Prepare | |
| run: | | |
| dnf module -y install python39 && \ | |
| alternatives --set python3 /usr/bin/python3.9 && \ | |
| pip3 install setuptools==77.0.3 && \ | |
| pip3 install pyspark==3.5.5 cython && \ | |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 | |
| - name: Build and Run unit test for Spark 3.5.5 (other tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| export SPARK_SCALA_VERSION=2.12 | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \ | |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| - name: Upload golden files | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-golden-files | |
| path: /tmp/tpch-approved-plan/** | |
| spark-test-spark35-scala213: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Prepare | |
| run: | | |
| dnf module -y install python39 && \ | |
| alternatives --set python3 /usr/bin/python3.9 && \ | |
| pip3 install setuptools==77.0.3 && \ | |
| pip3 install pyspark==3.5.5 cython && \ | |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 | |
| - name: Build and Run unit test for Spark 3.5.5 with scala-2.13 (other tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| export SPARK_SCALA_VERSION=2.13 | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Piceberg \ | |
| -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark35-scala-2.13/spark_home/" \ | |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| spark-test-spark35-slow: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Build and Run unit test for Spark 3.5.5 (slow tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \ | |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| spark-test-spark35-ras: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Prepare | |
| run: | | |
| dnf module -y install python39 && \ | |
| alternatives --set python3 /usr/bin/python3.9 && \ | |
| pip3 install setuptools==77.0.3 && \ | |
| pip3 install pyspark==3.5.5 cython && \ | |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 | |
| - name: Build and Run unit test for Spark 3.5.5 (other tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| export SPARK_SCALA_VERSION=2.12 | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Ppaimon -Pspark-ut \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \ | |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest | |
| - name: Upload test report | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| spark-test-spark35-slow-ras: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Build and Run unit test for Spark 3.5.5 (slow tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Ppaimon -Pspark-ut \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \ | |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest | |
| - name: Upload test report | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| spark-test-spark35-smj: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Prepare | |
| run: | | |
| dnf module -y install python39 && \ | |
| alternatives --set python3 /usr/bin/python3.9 && \ | |
| pip3 install setuptools==77.0.3 && \ | |
| pip3 install pyspark==3.5.5 cython && \ | |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 | |
| - name: Build and Run unit test for Spark 3.5.5 (other tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| export SPARK_SCALA_VERSION=2.12 | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Ppaimon -Pspark-ut \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.sql.columnar.forceShuffledHashJoin=false" \ | |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest | |
| - name: Upload test report | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| spark-test-spark35-slow-smj: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Build and Run unit test for Spark 3.5.5 (slow tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Ppaimon -Pspark-ut \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.sql.columnar.forceShuffledHashJoin=false" \ | |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest | |
| - name: Upload test report | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| cpp-test-udf-test: | |
| runs-on: ubuntu-22.04 | |
| container: apache/gluten:centos-8-jdk8 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Get Ccache | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: '${{ env.CCACHE_DIR }}' | |
| key: ccache-centos8-release-shared-${{runner.arch}}-${{github.sha}} | |
| restore-keys: | | |
| ccache-centos8-release-shared-${{runner.arch}} | |
| - name: Build Gluten native libraries | |
| run: | | |
| df -a | |
| bash dev/ci-velox-buildshared-centos-8.sh | |
| ccache -s | |
| - name: Run CPP unit test | |
| run: | | |
| cd ./cpp/build && ctest -V | |
| - name: Run CPP benchmark test | |
| run: | | |
| $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -pl backends-velox -am \ | |
| -DtagsToInclude="org.apache.gluten.tags.GenerateExample" -Dtest=none -DfailIfNoTests=false -Dexec.skip | |
| # This test depends on files generated by the above mvn test. | |
| ./cpp/build/velox/benchmarks/generic_benchmark --with-shuffle --partitioning hash --threads 1 --iterations 1 \ | |
| --conf $(realpath backends-velox/generated-native-benchmark/conf_12_0_*.ini) \ | |
| --plan $(realpath backends-velox/generated-native-benchmark/plan_12_0_*.json) \ | |
| --data $(realpath backends-velox/generated-native-benchmark/data_12_0_*_0.parquet),$(realpath backends-velox/generated-native-benchmark/data_12_0_*_1.parquet) | |
| - name: Run UDF test | |
| run: | | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| # Depends on --build_example=ON. | |
| $MVN_CMD test -Pspark-3.5 -Pbackends-velox -Pjava-17 -Piceberg -Pdelta -Ppaimon -DtagsToExclude=org.apache.gluten.tags.EnhancedFeaturesTest \ | |
| -DtagsToInclude=org.apache.gluten.tags.UDFTest \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" | |
| - name: Run CPP benchmark test for Spark 4.0 | |
| run: | | |
| export SPARK_SCALA_VERSION=2.13 | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| $MVN_CMD clean test -Pspark-4.0 -Pjava-17 -Pscala-2.13 -Pbackends-velox -pl backends-velox -am \ | |
| -DtagsToInclude="org.apache.gluten.tags.GenerateExample" -Dtest=none -DfailIfNoTests=false -Dexec.skip | |
| # This test depends on files generated by the above mvn test. | |
| ./cpp/build/velox/benchmarks/generic_benchmark --with-shuffle --partitioning hash --threads 1 --iterations 1 \ | |
| --conf $(realpath backends-velox/generated-native-benchmark/conf_12_0_*.ini) \ | |
| --plan $(realpath backends-velox/generated-native-benchmark/plan_12_0_*.json) \ | |
| --data $(realpath backends-velox/generated-native-benchmark/data_12_0_*_0.parquet),$(realpath backends-velox/generated-native-benchmark/data_12_0_*_1.parquet) | |
| - name: Upload test report | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: "**/target/*.log" | |
| build-cudf-centos-9: | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: "node-cleanup" # by default the free runner does not have enough disk space | |
| run: | | |
| sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL | |
| sudo docker image prune --all --force | |
| sudo docker builder prune -a | |
| - run: df -h | sort -k 5 -nr # check disk space for debug | |
| - uses: actions/checkout@v4 | |
| - name: Get Ccache | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: '${{ env.CCACHE_DIR }}' | |
| key: ccache-centos9-release-shared-${{runner.arch}}-${{github.sha}} | |
| restore-keys: | | |
| ccache-centos9-release-shared-${{runner.arch}} | |
| - name: Build Gluten native libraries | |
| run: | | |
| docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:centos-9-jdk8-cudf bash -c " | |
| set -e | |
| rm -rf /opt/rh/gcc-toolset-12 && ln -s /opt/rh/gcc-toolset-14 /opt/rh/gcc-toolset-12 # hack to use gcc 14, should upgrade in Velox build script later | |
| df -a | |
| dnf autoremove -y && dnf clean all | |
| dnf remove -y cuda-toolkit-12* && dnf install -y cuda-toolkit-13-1 | |
| ls -l /usr/local/ | |
| source /opt/rh/gcc-toolset-12/enable | |
| export CMAKE_BUILD_PARALLEL_LEVEL=4 | |
| export NUM_THREADS=4 | |
| export CCACHE_DIR=/work/.ccache | |
| export LD_LIBRARY_PATH=/work/ep/build-velox/build/velox_ep/_build/release/_deps/curl-build/lib:$LD_LIBRARY_PATH | |
| mkdir -p /work/.ccache | |
| cd /work | |
| bash dev/builddeps-veloxbe.sh --run_setup_script=OFF --build_arrow=OFF --build_tests=ON --build_benchmarks=ON --enable_gpu=ON | |
| rm -rf ep/build-velox/build/velox_ep | |
| build/mvn clean package -Pbackends-velox -Pspark-3.4 -DskipTests | |
| ccache -s | |
| " | |
| build-fast-build-test: | |
| runs-on: ubuntu-22.04 | |
| container: apache/gluten:centos-8-jdk17 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Build with fast-build profile (Spark 4.0, Java 17) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| $MVN_CMD clean test-compile -Pspark-4.0 -Pscala-2.13 -Pbackends-velox -Pspark-ut -Piceberg,iceberg-test,delta,paimon -Pfast-build | |
| spark-test-spark40: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-8-jdk17 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Prepare | |
| run: | | |
| dnf module -y install python39 && \ | |
| alternatives --set python3 /usr/bin/python3.9 && \ | |
| pip3 install setuptools==77.0.3 && \ | |
| pip3 install pyspark==3.5.5 cython && \ | |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 | |
| - name: Prepare Spark Resources for Spark 4.0.1 #TODO remove after image update | |
| run: | | |
| rm -rf /opt/shims/spark40 | |
| bash .github/workflows/util/install-resources.sh 4.0 | |
| mv /opt/shims/spark40/spark_home/assembly/target/scala-2.12 /opt/shims/spark40/spark_home/assembly/target/scala-2.13 | |
| - name: Build and Run unit test for Spark 4.0.0 with scala-2.13 (other tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| export SPARK_SCALA_VERSION=2.13 | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox \ | |
| -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark40/spark_home/" \ | |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| spark-test-spark40-slow: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-8-jdk17 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Prepare Spark Resources for Spark 4.0.1 #TODO remove after image update | |
| run: | | |
| rm -rf /opt/shims/spark40 | |
| bash .github/workflows/util/install-resources.sh 4.0 | |
| mv /opt/shims/spark40/spark_home/assembly/target/scala-2.12 /opt/shims/spark40/spark_home/assembly/target/scala-2.13 | |
| - name: Build and Run unit test for Spark 4.0 (slow tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pspark-ut \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark40/spark_home/" \ | |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| spark-test-spark41: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-9-jdk17 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Prepare | |
| run: | | |
| dnf install -y python3.11 python3.11-pip python3.11-devel && \ | |
| ls -la /usr/bin/python3.11 && \ | |
| alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \ | |
| alternatives --set python3 /usr/bin/python3.11 && \ | |
| pip3 install setuptools==77.0.3 && \ | |
| pip3 install pyspark==3.5.5 cython && \ | |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 | |
| - name: Prepare Spark Resources for Spark 4.1.0 #TODO remove after image update | |
| run: | | |
| rm -rf /opt/shims/spark41 | |
| bash .github/workflows/util/install-resources.sh 4.1 | |
| mv /opt/shims/spark41/spark_home/assembly/target/scala-2.12 /opt/shims/spark41/spark_home/assembly/target/scala-2.13 | |
| - name: Build and Run unit test for Spark 4.1.0 with scala-2.13 (other tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| export SPARK_SCALA_VERSION=2.13 | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox \ | |
| -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark41/spark_home/" \ | |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* | |
| spark-test-spark41-slow: | |
| needs: build-native-lib-centos-7 | |
| runs-on: ubuntu-22.04 | |
| env: | |
| SPARK_TESTING: true | |
| container: apache/gluten:centos-9-jdk17 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download All Artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: velox-native-lib-centos-7-${{github.sha}} | |
| path: ./cpp/build/ | |
| - name: Download Arrow Jars | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: arrow-jars-centos-7-${{github.sha}} | |
| path: /root/.m2/repository/org/apache/arrow/ | |
| - name: Prepare Spark Resources for Spark 4.1.0 #TODO remove after image update | |
| run: | | |
| rm -rf /opt/shims/spark41 | |
| bash .github/workflows/util/install-resources.sh 4.1 | |
| mv /opt/shims/spark41/spark_home/assembly/target/scala-2.12 /opt/shims/spark41/spark_home/assembly/target/scala-2.13 | |
| - name: Build and Run unit test for Spark 4.0 (slow tests) | |
| run: | | |
| cd $GITHUB_WORKSPACE/ | |
| yum install -y java-17-openjdk-devel | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| java -version | |
| $MVN_CMD clean test -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pspark-ut \ | |
| -DargLine="-Dspark.test.home=/opt/shims/spark41/spark_home/" \ | |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest | |
| - name: Upload test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-report | |
| path: '**/surefire-reports/TEST-*.xml' | |
| - name: Upload unit tests log files | |
| if: ${{ !success() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ github.job }}-test-log | |
| path: | | |
| **/target/*.log | |
| **/gluten-ut/**/hs_err_*.log | |
| **/gluten-ut/**/core.* |