Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c1e6aca
Enabling ci build for py-torch 2.9, 2.10 on rocm
renjithravindrankannath Feb 6, 2026
7faae32
Merge branch 'develop' into py-torch-2.10-rocm
renjithravindrankannath Feb 9, 2026
22b06cb
Removing mkldnn check and manual variant
renjithravindrankannath Feb 9, 2026
29e0c97
Merge branch 'develop' into py-torch-2.10-rocm
renjithravindrankannath Feb 9, 2026
c397270
version check correction for aotriton
renjithravindrankannath Feb 11, 2026
58629f9
Increase timout for ck
renjithravindrankannath Feb 12, 2026
a5e4a0a
24h for long ROCm/ML rebuilds; GitLab project/runner max must allow this
renjithravindrankannath Feb 12, 2026
12b2710
Reverting commit 58629f9140f04ba4c23f6ff16a5448cf28c37db5 timout for ck
renjithravindrankannath Feb 12, 2026
441d218
py-torchvision requires rocm math lib paths indirectly when py-torch …
renjithravindrankannath Feb 17, 2026
02e36f6
Only add paths for packages that are in the spec to avoid KeyError
renjithravindrankannath Feb 18, 2026
bc1a5f0
libtorch_hip.so needs aotriton and hip libs at runtime
renjithravindrankannath Feb 19, 2026
16ac8a8
Correcting the library path with prefix
renjithravindrankannath Feb 20, 2026
ee6e80e
Add prefix lib dirs when they exist so the loader can find .so files
renjithravindrankannath Feb 21, 2026
a3fb110
style error fix
renjithravindrankannath Feb 21, 2026
6087d68
import NoLibrariesError
renjithravindrankannath Feb 23, 2026
be4d3c3
Changing dependency to rebuild aotriton
renjithravindrankannath Feb 24, 2026
a71c9aa
reverting the recent change
renjithravindrankannath Feb 24, 2026
92ea3ac
Changing dependency to rebuild aotriton
renjithravindrankannath Feb 24, 2026
bebce47
Merge branch 'develop' into py-torch-2.10-rocm
renjithravindrankannath Feb 24, 2026
10aa531
Revert "Changing dependency to rebuild aotriton"
renjithravindrankannath Feb 25, 2026
d6e94b3
Revert "reverting the recent change"
renjithravindrankannath Feb 25, 2026
c038ae4
Revert "Changing dependency to rebuild aotriton"
renjithravindrankannath Feb 25, 2026
9d5dac8
Changing dependency again to rebuild aotriton
renjithravindrankannath Feb 26, 2026
8520f97
Revert "Changing dependency again to rebuild aotriton"
renjithravindrankannath Feb 26, 2026
8ed5223
Limiting ci build to py-torch as remaining are not yet ready
renjithravindrankannath Feb 26, 2026
324f587
py-llvmlite@0.46.0 needs llvm@20
renjithravindrankannath Feb 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .ci/gitlab/configs/linux/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ ci:
- wrf
build-job:
tags: [ "spack", "huge" ]
# 24h for long ROCm/ML rebuilds; GitLab project/runner max must allow this
timeout: 1440 minutes
variables:
CI_JOB_SIZE: huge
SPACK_BUILD_JOBS: "12"
Expand Down
3 changes: 2 additions & 1 deletion repos/spack_repo/builtin/packages/hwloc/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ class Hwloc(AutotoolsPackage, CudaPackage, ROCmPackage):
depends_on("mpi", when="+netloc")

with when("+rocm"):
depends_on("rocm-smi-lib")
depends_on("rocm-smi-lib@:6.4", when="@:2.11.1")
depends_on("rocm-smi-lib@7.0:", when="@2.12.2:")
depends_on("rocm-opencl", when="+opencl")
# Avoid a circular dependency since the openmp
# variant of llvm-amdgpu depends on hwloc.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,18 @@ index 9be7f37..39d0f24 100644
endif()

diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index 1c0d3a2..e0de4b1 100644
index 1c0d3a2..83f9f9d 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -167,6 +167,10 @@ if(HIP_FOUND)
@@ -167,6 +167,11 @@ if(HIP_FOUND)
find_package_and_print_version(hipsolver REQUIRED)
find_package_and_print_version(hiprtc REQUIRED)

+ list(APPEND ROCM_INCLUDE ${rocthrust_INCLUDE_DIR})
+ list(APPEND ROCM_INCLUDE ${rocprim_INCLUDE_DIR})
+ list(APPEND ROCM_INCLUDE ${hipcub_INCLUDE_DIR})
+ list(APPEND ROCM_INCLUDE ${rocRAND_INCLUDE_DIR})
+ list(APPEND ROCM_INCLUDE $ENV{AOTRITON_INSTALLED_PREFIX}/include)

find_library(PYTORCH_HIP_LIBRARIES amdhip64 HINTS ${ROCM_PATH}/lib)
# TODO: miopen_LIBRARIES should return fullpath to the library file,
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index d2d23b7..620a89f 100644
index d2d23b7ab65..620a89f65cb 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -1379,13 +1379,6 @@ if(USE_ROCM)
Expand All @@ -26,7 +26,7 @@ index d2d23b7..620a89f 100644
endif()

diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index 58c74dd..d3e1ad4 100644
index 58c74ddda35..54f96871372 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -26,12 +26,6 @@ else()
Expand Down Expand Up @@ -78,7 +78,15 @@ index 58c74dd..d3e1ad4 100644
find_package_and_print_version(amd_comgr REQUIRED)
find_package_and_print_version(rocrand REQUIRED)
find_package_and_print_version(hiprand REQUIRED)
@@ -171,7 +168,11 @@ if(HIP_FOUND)
@@ -157,6 +154,7 @@ if(HIP_FOUND)
find_package_and_print_version(hipcub REQUIRED)
find_package_and_print_version(rocthrust REQUIRED)
find_package_and_print_version(hipsolver REQUIRED)
+ list(APPEND ROCM_INCLUDE_DIRS $ENV{AOTRITON_INSTALLED_PREFIX}/include)
# workaround cmake 4 build issue
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
message(WARNING "Work around hiprtc cmake failure for cmake >= 4")
@@ -171,7 +169,11 @@ if(HIP_FOUND)
if(UNIX)
find_package_and_print_version(rccl)
find_package_and_print_version(hsa-runtime64 REQUIRED)
Expand Down
24 changes: 19 additions & 5 deletions repos/spack_repo/builtin/packages/py_torch/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage):
conflicts("+gloo+rocm")
conflicts("+rocm", when="@2.3", msg="Rocm doesn't support py-torch 2.3 release")
conflicts("+rocm", when="@2.4", msg="Rocm doesn't support py-torch 2.4 release")
conflicts("+rocm", when="@2.8", msg="Rocm doesn't support py-torch 2.8 release")
conflicts("+tensorpipe", when="+rocm ^hip@:5.1", msg="TensorPipe not supported until ROCm 5.2")
conflicts("+breakpad", when="target=ppc64:")
conflicts("+breakpad", when="target=ppc64le:")
Expand Down Expand Up @@ -305,7 +306,8 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage):
depends_on("valgrind", when="+valgrind")
with when("+rocm"):
depends_on("hsa-rocr-dev")
depends_on("hip")
depends_on("hip@7.0:", when="@2.9:")
depends_on("hip@:6.4", when="@:2.7")
depends_on("rccl", when="+nccl")
depends_on("rocprim")
depends_on("hipcub")
Expand All @@ -320,11 +322,15 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage):
depends_on("rocfft")
depends_on("rocblas")
depends_on("miopen-hip")
depends_on("composable-kernel")
depends_on("hipblaslt")
# Ensure hipblaslt version for 2.9+
depends_on("hipblaslt@7.0:", when="@2.9:")
depends_on("rocminfo")
depends_on("aotriton@0.8.1b", when="@2.5:2.6")
depends_on("aotriton@0.9.1b", when="@2.7:")
depends_on("composable-kernel@:6.3.2", when="@2.5")
depends_on("composable-kernel@6.3.2:", when="@2.6:")
depends_on("hipsparselt@7.0:", when="@2.9:")
depends_on("aotriton@0.8b", when="@2.5:2.6")
depends_on("aotriton@0.9.2b", when="@2.7")
depends_on("aotriton@0.10b", when="@2.8:")
depends_on("mpi", when="+mpi")
depends_on("ucc", when="+ucc")
depends_on("ucx", when="+ucc")
Expand Down Expand Up @@ -568,6 +574,14 @@ def patch(self):
"torch_global_deps PROPERTIES LINKER_LANGUAGE CXX",
"caffe2/CMakeLists.txt",
)
if self.spec.satisfies("@2.5:+rocm"):
filter_file(
"find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)",
"find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)\n"
"set(ROCTRACER_INCLUDE_DIR $ENV{ROCTRACER_INCLUDE_DIR})",
"cmake/public/LoadHIP.cmake",
string=True,
)
if self.spec.satisfies("@2.1:2.7+rocm"):
filter_file(
"${ROCM_INCLUDE_DIRS}/rocm-core/rocm_version.h",
Expand Down
4 changes: 2 additions & 2 deletions stacks/ml-linux-x86_64-rocm/spack.yaml
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alalazo this will make your #3175 a bit simpler

Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ spack:
# Keras
- py-keras backend=tensorflow
# - py-keras backend=jax
# - py-keras backend=torch
- py-keras backend=torch

# PyTorch
# Does not yet support Spack-installed ROCm
Expand All @@ -54,7 +54,7 @@ spack:
# - py-pytorch-lightning
# - py-segmentation-models-pytorch
# - py-timm
# - py-torch
- py-torch
# - py-torch-geometric
# - py-torch-nvidia-apex
# - py-torchaudio
Expand Down
Loading