diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000000..c87e6c56f85f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +build/ +graphbolt/build/ +dgl_sparse/build/ +tensoradapter/pytorch/build/ +python/build/ +python/dist/ +python/*.egg-info/ +python/libdgl.so diff --git a/CMakeLists.txt b/CMakeLists.txt index 00491e4f6ad2..3dca01dac1ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -634,6 +634,7 @@ if (BUILD_GRAPHBOLT) ALL ${CMAKE_COMMAND} -E env CMAKE_COMMAND=${CMAKE_CMD} + CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" PYTORCH_ROCM_ARCH=${CMAKE_HIP_ARCHITECTURES} GPU_TARGETS=${CMAKE_HIP_ARCHITECTURES} CMAKE_HIP_ARCHITECTURES=${CMAKE_HIP_ARCHITECTURES} diff --git a/CMakePresets.json b/CMakePresets.json index f02ab2f6629c..00926fbcc23b 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -23,7 +23,7 @@ "CMAKE_CXX_FLAGS": "-fdiagnostics-color=always", "CMAKE_HIP_FLAGS": "-ftime-trace -fdiagnostics-color=always", "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_PREFIX_PATH": "/opt/rocm/lib/cmake", + "CMAKE_PREFIX_PATH": "/opt/rocm/lib/cmake:/opt/rocm/lib/rapids/cmake", "CMAKE_COLOR_DIAGNOSTICS": "ON" } }, diff --git a/docker/Dockerfile.ci_gpu_rocm b/docker/Dockerfile.ci_gpu_rocm index 5d13c774c96d..d8758f41a464 100644 --- a/docker/Dockerfile.ci_gpu_rocm +++ b/docker/Dockerfile.ci_gpu_rocm @@ -2,7 +2,7 @@ # Licensed under the Apache License Version 2.0" ############################################################################# -ARG BASE_IMAGE=rocm/pytorch:rocm7.0_ubuntu24.04_py3.12_pytorch_release_2.6.0 +ARG BASE_IMAGE=rocm/pytorch:rocm7.0.2_ubuntu24.04_py3.12_pytorch_release_2.6.0 FROM ${BASE_IMAGE} AS dgl_build # NOTE: This dockerfile **assumes** that BASE_IMAGE comes with the appropriate @@ -28,13 +28,6 @@ ENV DGL_SRC_DIR="/src/dgl" RUN mkdir -p ${DGL_SRC_DIR} COPY . ${DGL_SRC_DIR} -# Clean up remnants of any previous builds -RUN rm -rf ${DGL_SRC_DIR}/build -RUN rm -rf ${DGL_SRC_DIR}/graphbolt/build -RUN rm -rf ${DGL_SRC_DIR}/dgl_sparse/build -RUN rm -rf ${DGL_SRC_DIR}/tensoradapter/pytorch/build -RUN rm -rf ${DGL_SRC_DIR}/python/build ${DGL_SRC_DIR}/python/dist ${DGL_SRC_DIR}/python/*.egg-info ${DGL_SRC_DIR}/python/libdgl.so - # Set GPU build targets ARG ARG_GPU_BUILD_TARGETS="gfx90a,gfx942" ENV GPU_BUILD_TARGETS=${ARG_GPU_BUILD_TARGETS} diff --git a/graphbolt/src/cuda/cooperative_minibatching_utils.cu b/graphbolt/src/cuda/cooperative_minibatching_utils.cu index 969296597475..00ad8341900c 100644 --- a/graphbolt/src/cuda/cooperative_minibatching_utils.cu +++ b/graphbolt/src/cuda/cooperative_minibatching_utils.cu @@ -54,12 +54,7 @@ torch::Tensor RankAssignment( THRUST_CALL( transform, nodes_ptr, nodes_ptr + nodes.numel(), part_ids_ptr, -#ifdef GRAPHBOLT_USE_HIP - ::proclaim_return_type -#else - ::cuda::proclaim_return_type -#endif - ( + ::cuda::proclaim_return_type( [rank = static_cast(rank), world_size = static_cast( world_size)] __device__(index_t id) -> part_t { diff --git a/graphbolt/src/cuda/extension/gpu_graph_cache.cu b/graphbolt/src/cuda/extension/gpu_graph_cache.cu index 46732e0aa8f4..3990ac498713 100644 --- a/graphbolt/src/cuda/extension/gpu_graph_cache.cu +++ b/graphbolt/src/cuda/extension/gpu_graph_cache.cu @@ -23,11 +23,7 @@ #include #ifdef GRAPHBOLT_USE_HIP -#include #include -namespace cuda { -using stream_ref = cuco::cuda_stream_ref; -} #define C10_CUDA_KERNEL_LAUNCH_CHECK C10_HIP_KERNEL_LAUNCH_CHECK #else #include @@ -510,12 +506,10 @@ std::tuple> GpuGraphCache::Replace( } if (edge_id_offsets) { // Append the edge ids as the last element of the output. - output_edge_tensors.push_back( - ops::IndptrEdgeIdsImpl( - output_indptr, output_indptr.scalar_type(), - *edge_id_offsets, - static_cast( - static_cast(output_size)))); + output_edge_tensors.push_back(ops::IndptrEdgeIdsImpl( + output_indptr, output_indptr.scalar_type(), + *edge_id_offsets, + static_cast(static_cast(output_size)))); } { diff --git a/graphbolt/src/cuda/extension/unique_and_compact_map.cu b/graphbolt/src/cuda/extension/unique_and_compact_map.cu index b305247824d1..6a5625634065 100644 --- a/graphbolt/src/cuda/extension/unique_and_compact_map.cu +++ b/graphbolt/src/cuda/extension/unique_and_compact_map.cu @@ -25,10 +25,6 @@ #ifdef GRAPHBOLT_USE_HIP #include -#include -namespace cuda{ - using stream_ref = cuco::cuda_stream_ref; -} #define C10_CUDA_KERNEL_LAUNCH_CHECK C10_HIP_KERNEL_LAUNCH_CHECK #else #include @@ -209,12 +205,8 @@ UniqueAndCompactBatchedHashMapBased( cub::ArgIndexInputIterator index_it(indexes.data_ptr()); auto input_it = thrust::make_transform_iterator( index_it, - #ifdef GRAPHBOLT_USE_HIP - ::proclaim_return_type - #else - ::cuda::proclaim_return_type - #endif - <::cuda::std::tuple>( + ::cuda::proclaim_return_type< + ::cuda::std::tuple>( [=, map = map.ref(cuco::find)] __device__(auto it) -> ::cuda::std::tuple { const auto i = it.key; @@ -247,12 +239,7 @@ UniqueAndCompactBatchedHashMapBased( auto unique_ids_offsets_dev_ptr = unique_ids_offsets_dev.data_ptr(); auto output_it = thrust::make_tabulate_output_iterator( - #ifdef GRAPHBOLT_USE_HIP - ::proclaim_return_type - #else - ::cuda::proclaim_return_type - #endif - ( + ::cuda::proclaim_return_type( [=, unique_ids_ptr = unique_ids.data_ptr(), part_ids_ptr = part_ids ? part_ids->data_ptr() : nullptr, @@ -276,12 +263,7 @@ UniqueAndCompactBatchedHashMapBased( DeviceSelect::If, input_it, output_it, unique_ids_offsets_dev_ptr + num_batches, offsets_ptr[2 * num_batches], - #ifdef GRAPHBOLT_USE_HIP - ::proclaim_return_type - #else - ::cuda::proclaim_return_type - #endif - ([] __device__(const auto& t) { + ::cuda::proclaim_return_type([] __device__(const auto& t) { return ::cuda::std::get<3>(t); })); auto unique_ids_offsets = torch::empty( @@ -300,12 +282,7 @@ UniqueAndCompactBatchedHashMapBased( thrust::make_zip_iterator( unique_ids_offsets_dev2.data_ptr(), unique_ids_offsets.data_ptr()), - #ifdef GRAPHBOLT_USE_HIP - ::proclaim_return_type - #else - ::cuda::proclaim_return_type - #endif - < + ::cuda::proclaim_return_type< thrust::tuple>( [=] __device__(const auto x) { return thrust::make_tuple(x, x); diff --git a/script/install_graphbolt_deps.sh b/script/install_graphbolt_deps.sh index d119994d6d78..0a67bba493d0 100644 --- a/script/install_graphbolt_deps.sh +++ b/script/install_graphbolt_deps.sh @@ -1,66 +1,41 @@ #!/usr/bin/env bash -export CC=/opt/rocm/llvm/bin/clang -export CXX=/opt/rocm/llvm/bin/clang++ -# set the install prefix to the cwd/install -# INSTALL_PREFIX=$(pwd)/install -INSTALL_PREFIX=/opt/rocm -FILE_SOURCE_DIR=$(dirname $(realpath $0)) -DEPS_DIR=$(pwd) +ROCM_ROOT=/opt/rocm -# Not installed by default -git clone https://github.com/ROCm/libhipcxx.git -cd libhipcxx -git checkout v2.2.0 -cmake -B build \ - -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -cmake --build build --target install -cd ${DEPS_DIR} +export CC=${ROCM_ROOT}/llvm/bin/clang +export CXX=${ROCM_ROOT}/llvm/bin/clang++ -# Need to patch for https://github.com/ROCm/rocm-libraries/issues/101. -# Should be fixed in -# https://github.com/ROCm/rocm-libraries/commit/e403601a2abe4a305cafd6526af2dc9bc69823e2#diff-7579081ee4dda43a07274a2397b8277bfa022af6d485ba086efc66a124ee8f5b -git clone https://github.com/tpopp/rocThrust.git -cd rocThrust -git checkout 613db9a025709fb18f2a676543a17850bd231b04 -cmake -B build \ - -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -cmake --build build --target install -cd ${DEPS_DIR} +set -x +INSTALL_PREFIX=${ROCM_ROOT} +FILE_SOURCE_DIR=$(dirname $(realpath $0)) +DEPS_DIR=$(pwd) +export CMAKE_PREFIX_PATH="/opt/rocm/hip/lib/cmake;/opt/rocm/lib/cmake" -# Need to patch for https://github.com/ROCm/hipCollections/issues/7, https://github.com/ROCm/hipCollections/issues/8, https://github.com/ROCm/hipCollections/issues/9 -git clone https://github.com/tpopp/hipCollections.git +git clone https://github.com/ROCm/hipCollections.git -b release/rocmds-25.10 +export RAPIDS_CMAKE_SCRIPT_BRANCH=release/rocmds-25.10 cd hipCollections -git checkout 6e31da8fd309f229d28adde8583a30bb4efaf1b7 cmake -B build \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DINSTALL_CUCO=ON -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DBUILD_EXAMPLES=OFF cmake --build build --target install cd ${DEPS_DIR} - -# if ROCM < 7.0 we also need to install rocThrust -ROCM_VERSION=$(/opt/rocm/bin/hipconfig --version) -#strip the major version from the ROCM_VERSION (before the dot) -ROCM_VERSION=${ROCM_VERSION%%.*} -echo "Working with ROCm Major Version: $ROCM_VERSION" -if [ "$ROCM_VERSION" -lt "7" ]; then - - # Need to patch for https://github.com/ROCm/rocm-libraries/issues/94. Fixed in https://github.com/ROCm/rocm-libraries/commit/2539bb2e1cd17d287f532a65125b662bf0b658dc - git clone https://github.com/tpopp/hipCUB.git - cd hipCUB - git checkout f342111197dd020f1c4210b16aa550b08992e97b - cmake -B build \ - -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} - cmake --build build --target install - cd ${DEPS_DIR} -else - echo "ROCm Major Version is 7.0 or higher, skipping hipCUB installation" - # TODO remove this once the patches are merged - # Right now we need to patch the rocPRIM headers to fix the build because these - # config headers are missing gfx942 (I've added them manually) - cp ${FILE_SOURCE_DIR}/*.hpp ${INSTALL_PREFIX}/include/rocprim/device/detail/config/. - -fi - - - +# TODO this is an unacceptable way to do this, +# see https://github.com/ROCm/libhipcxx/issues/10 for more details +# This was implicitly not allowed in previous releases we were using, +# but with v2.7.0 they are explicitly not allowed. + +# We only use semaphores for a counter of IO operations in graphbolt, +# that only runs on the host (not on the device) so we should be "safe" +# to use this for now. +sed -i '/#error semaphore is not supported on AMD hardware and should not be included/d' ${INSTALL_PREFIX}/include/rapids/libhipcxx/cuda/semaphore +sed -i '/#error semaphore is not supported on AMD hardware and should not be included/d' ${INSTALL_PREFIX}/include/rapids/libhipcxx/hip/semaphore +sed -i '/#error semaphore is not supported on AMD hardware and should not be included/d' ${INSTALL_PREFIX}/include/rapids/libhipcxx/cuda/std/semaphore +sed -i '/#error semaphore is not supported on AMD hardware and should not be included/d' ${INSTALL_PREFIX}/include/rapids/libhipcxx/hip/std/semaphore + +# TODO remove this once the patches are merged +# the patches for this were merged in https://github.com/ROCm/rocm-libraries/pull/1883 +# but may take more time to be released. + +# Right now we need to patch the rocPRIM headers to fix the build because these +# config headers are missing gfx942 (I've added them manually) +cp ${FILE_SOURCE_DIR}/*.hpp ${INSTALL_PREFIX}/include/rocprim/device/detail/config/.