diff --git a/.github/workflows/velox_backend_cache.yml b/.github/workflows/velox_backend_cache.yml index eb060c3a90c1..8d0a3da0c91e 100644 --- a/.github/workflows/velox_backend_cache.yml +++ b/.github/workflows/velox_backend_cache.yml @@ -145,9 +145,13 @@ jobs: strategy: matrix: os: [ ubuntu-22.04 ] - container: apache/gluten:centos-9-jdk8-cudf steps: - - uses: actions/checkout@v2 + - name: "node-cleanup" # by default the free runner does not have enough disk space + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + sudo docker builder prune -a + - uses: actions/checkout@v4 - name: Get Ccache uses: actions/cache/restore@v3 with: @@ -157,14 +161,26 @@ jobs: ccache-centos9-release-shared-${{runner.arch}} - name: Build Gluten shared libraries run: | - export CCACHE_MAXSIZE=1G - dnf autoremove -y + docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:centos-9-jdk8-cudf bash -c " + rm -rf /opt/rh/gcc-toolset-12 && ln -s /opt/rh/gcc-toolset-14 /opt/rh/gcc-toolset-12 # hack to use gcc 14, should upgrade in Velox build script later df -a - rm -rf /opt/rh/gcc-toolset-12 && cp -r /opt/rh/gcc-toolset-14 /opt/rh/gcc-toolset-12 # hack to use gcc 14, should upgrade in Velox build script later + dnf autoremove -y && dnf clean all + dnf remove -y cuda-toolkit-12* && dnf install -y cuda-toolkit-13-1 + ls -l /usr/local/ source /opt/rh/gcc-toolset-12/enable + + export CMAKE_BUILD_PARALLEL_LEVEL=4 export NUM_THREADS=4 - bash dev/builddeps-veloxbe.sh --run_setup_script=OFF --build_arrow=OFF --build_tests=OFF --build_benchmarks=ON --enable_gpu=ON # TODO: re-enable tests with more disk space + export CCACHE_MAXSIZE=1G + export CCACHE_DIR=/work/.ccache + mkdir -p /work/.ccache + + cd /work + bash dev/builddeps-veloxbe.sh --run_setup_script=OFF --build_arrow=OFF --build_tests=ON --build_benchmarks=ON --enable_gpu=ON # TODO: re-enable tests with more disk space rm -rf ep/build-velox/build/velox_ep + mvn clean package -Pbackends-velox -Pspark-3.4 -DskipTests + ccache -s + " - name: Save Ccache if: always() uses: actions/cache/save@v3 @@ -172,75 +188,3 @@ jobs: with: path: '${{ env.CCACHE_DIR }}' key: ccache-centos9-release-shared-${{runner.arch}}-${{github.sha}} - - # ccache-native-lib-ubuntu-velox-ut: - # runs-on: ubuntu-22.04 - # env: - # CCACHE_DIR: "${{ github.workspace }}/.ccache" - # container: ghcr.io/facebookincubator/velox-dev:amd64-ubuntu-22.04-avx - # steps: - # - uses: actions/checkout@v2 - # - name: Get Ccache - # uses: actions/cache/restore@v3 - # with: - # path: '${{ env.CCACHE_DIR }}' - # key: ccache-ubuntu-release-default - # - name: Ensure Cache Dirs Exists - # working-directory: ${{ github.workspace }} - # run: | - # mkdir -p '${{ env.CCACHE_DIR }}' - # - name: Build Gluten native libraries - # run: | - # rm -rf /opt/miniconda-for-velox/ - # cd ep/build-velox/src && \ - # ./get-velox.sh - # cd ../build/velox_ep/ - # make EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON -DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON" - - # - name: CCache after - # run: | - # ccache -vs - - # - uses: actions/cache/save@v3 - # with: - # path: '${{ env.CCACHE_DIR }}' - # key: ccache-ubuntu-release-default -# ccache-native-lib-centos-velox-ut: -# runs-on: ubuntu-22.04 -# env: -# CCACHE_DIR: "${{ github.workspace }}/.ccache" -# container: ghcr.io/facebookincubator/velox-dev:centos8 -# steps: -# - uses: actions/checkout@v2 -# - name: Setup java and maven -# run: | -# yum install sudo patch java-1.8.0-openjdk-devel wget -y && \ -# wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz -# tar -xvf apache-maven-3.8.8-bin.tar.gz -# mv apache-maven-3.8.8 /usr/lib/maven -# - name: Get Ccache -# uses: actions/cache/restore@v3 -# with: -# path: '${{ env.CCACHE_DIR }}' -# key: ccache-centos-release-default -# - name: Ensure Cache Dirs Exists -# working-directory: ${{ github.workspace }} -# run: | -# mkdir -p '${{ env.CCACHE_DIR }}' -# - name: Build Gluten native libraries -# run: | -# rm -rf /opt/miniconda-for-velox/ -# cd ep/build-velox/src && \ -# ./get-velox.sh -# cd ../build/velox_ep/ -# source /opt/rh/gcc-toolset-9/enable -# make EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON -DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON" -# -# - name: CCache after -# run: | -# ccache -s -# -# - uses: actions/cache/save@v3 -# with: -# path: '${{ env.CCACHE_DIR }}' -# key: ccache-centos-release-default diff --git a/.github/workflows/velox_backend_x86.yml b/.github/workflows/velox_backend_x86.yml index 5618aba857cb..1bcf1f97efe8 100644 --- a/.github/workflows/velox_backend_x86.yml +++ b/.github/workflows/velox_backend_x86.yml @@ -1314,9 +1314,15 @@ jobs: build-cudf-centos-9: runs-on: ubuntu-22.04 - container: apache/gluten:centos-9-jdk8-cudf steps: - - uses: actions/checkout@v2 + - name: "node-cleanup" # by default the free runner does not have enough disk space + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + sudo docker builder prune -a + - run: df -h | sort -k 5 -nr # check disk space for debug + + - uses: actions/checkout@v4 - name: Get Ccache uses: actions/cache/restore@v4 with: @@ -1326,15 +1332,25 @@ jobs: ccache-centos9-release-shared-${{runner.arch}} - name: Build Gluten native libraries run: | - rm -rf /opt/rh/gcc-toolset-12 && cp -r /opt/rh/gcc-toolset-14 /opt/rh/gcc-toolset-12 # hack to use gcc 14, should upgrade in Velox build script later - dnf autoremove -y + docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:centos-9-jdk8-cudf bash -c " + rm -rf /opt/rh/gcc-toolset-12 && ln -s /opt/rh/gcc-toolset-14 /opt/rh/gcc-toolset-12 # hack to use gcc 14, should upgrade in Velox build script later df -a + dnf autoremove -y && dnf clean all + dnf remove -y cuda-toolkit-12* && dnf install -y cuda-toolkit-13-1 + ls -l /usr/local/ source /opt/rh/gcc-toolset-12/enable + + export CMAKE_BUILD_PARALLEL_LEVEL=4 export NUM_THREADS=4 - # bash dev/builddeps-veloxbe.sh --run_setup_script=OFF --build_arrow=OFF --build_tests=OFF --build_benchmarks=ON --enable_gpu=ON # TODO: re-enable tests with more disk space - # rm -rf ep/build-velox/build/velox_ep - # mvn clean package -Pbackends-velox -Pspark-3.4 -DskipTests + export CCACHE_DIR=/work/.ccache + mkdir -p /work/.ccache + + cd /work + bash dev/builddeps-veloxbe.sh --run_setup_script=OFF --build_arrow=OFF --build_tests=ON --build_benchmarks=ON --enable_gpu=ON # TODO: re-enable tests with more disk space + rm -rf ep/build-velox/build/velox_ep + mvn clean package -Pbackends-velox -Pspark-3.4 -DskipTests ccache -s + " spark-test-spark40: needs: build-native-lib-centos-7 diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt index e389113c4e04..9547cde0e254 100644 --- a/cpp/velox/CMakeLists.txt +++ b/cpp/velox/CMakeLists.txt @@ -439,6 +439,7 @@ if(ENABLE_GPU) ${VELOX_BUILD_PATH}/_deps/nvtx3-src/c/include ${VELOX_BUILD_PATH}/_deps/nvcomp_proprietary_binary-src/include ${VELOX_BUILD_PATH}/_deps/rapids_logger-src/include + /usr/local/cuda/include/cccl /usr/local/cuda/include) target_compile_definitions( @@ -458,7 +459,7 @@ if(ENABLE_GPU) ${VELOX_BUILD_PATH}/_deps/nvcomp_proprietary_binary-src/lib64/libnvcomp.so ${VELOX_BUILD_PATH}/_deps/nvcomp_proprietary_binary-src/lib64/libnvcomp_cpu.so ${VELOX_BUILD_PATH}/_deps/rapids_logger-build/librapids_logger.so - /usr/local/cuda-12.8/lib64/libcudart.so.12) + /usr/local/cuda/lib64/libcudart.so) endif() add_custom_command( diff --git a/dev/docker/cudf/Dockerfile b/dev/docker/cudf/Dockerfile index 42258a69c94b..c900c632ea1f 100644 --- a/dev/docker/cudf/Dockerfile +++ b/dev/docker/cudf/Dockerfile @@ -28,7 +28,10 @@ ENV CUDA_ARCHITECTURES=70 WORKDIR /opt/gluten -RUN rm -rf /opt/rh/gcc-toolset-12 && cp -r /opt/rh/gcc-toolset-14 /opt/rh/gcc-toolset-12; \ +RUN rm -rf /opt/rh/gcc-toolset-12 && ln -s /opt/rh/gcc-toolset-14 /opt/rh/gcc-toolset-12; \ + dnf remove -y cuda-toolkit-12* && dnf install -y cuda-toolkit-13-1; \ + dnf autoremove -y && dnf clean all; \ + source /opt/rh/gcc-toolset-12/enable; \ bash ./dev/buildbundle-veloxbe.sh --run_setup_script=OFF --build_arrow=ON --spark_version=3.4 --build_tests=ON --build_benchmarks=ON --enable_gpu=ON && rm -rf /opt/gluten # You can try the data in folder backends-velox/src/test/resources/tpch-data-parquet diff --git a/ep/build-velox/src/build-velox.sh b/ep/build-velox/src/build-velox.sh index 3e0f6be4fb25..ac62f8fc27f3 100755 --- a/ep/build-velox/src/build-velox.sh +++ b/ep/build-velox/src/build-velox.sh @@ -134,8 +134,8 @@ function compile { if [ $ENABLE_GPU == "ON" ]; then # the cuda default options are for Centos9 image from Meta echo "enable GPU support." - COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_GPU=ON -DVELOX_ENABLE_CUDF=ON -DCMAKE_CUDA_ARCHITECTURES=70 \ - -DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.8/bin/nvcc" + COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_GPU=ON -DVELOX_ENABLE_CUDF=ON -DCMAKE_CUDA_ARCHITECTURES=75 \ + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc" fi if [ -n "${GLUTEN_VCPKG_ENABLED:-}" ]; then COMPILE_OPTION="$COMPILE_OPTION -DVELOX_GFLAGS_TYPE=static"