Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions .github/workflows/build_portable_linux_pytorch_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,8 @@ jobs:
./external-builds/pytorch/pytorch_vision_repo.py checkout --require-related-commit
./external-builds/pytorch/pytorch_triton_repo.py checkout

- name: Create pip cache directory
run: mkdir -p /tmp/pipcache

# Note: determine_version.py sets optional_build_prod_arguments in
# GITHUB_ENV, which includes --rocm-sdk-version and --version-suffix.
- name: Determine optional arguments passed to `build_prod_wheels.py`
if: ${{ inputs.rocm_version }}
run: |
Expand All @@ -192,7 +191,6 @@ jobs:
./external-builds/pytorch/build_prod_wheels.py \
build \
--install-rocm \
--pip-cache-dir /tmp/pipcache \
--index-url "${{ inputs.cloudfront_url }}/${{ inputs.amdgpu_family }}/" \
--clean \
--output-dir ${{ env.PACKAGE_DIST_DIR }} ${{ env.optional_build_prod_arguments }}
Expand Down Expand Up @@ -310,7 +308,7 @@ jobs:
--include "torch-${TORCH_VERSION}-${CP_VERSION}-linux_x86_64.whl" \
--include "torchaudio-${TORCHAUDIO_VERSION}-${CP_VERSION}-linux_x86_64.whl" \
--include "torchvision-${TORCHVISION_VERSION}-${CP_VERSION}-linux_x86_64.whl" \
--include "triton-${TRITON_VERSION}-${CP_VERSION}-linux_x86_64.whl"
--include "triton-${TRITON_VERSION}-${CP_VERSION}-linux_x86_64.whl" \
--include "apex-${APEX_VERSION}-${CP_VERSION}-linux_x86_64.whl"

- name: (Re-)Generate Python package release index
Expand Down
133 changes: 133 additions & 0 deletions .github/workflows/build_portable_linux_pytorch_wheels_ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# CI variant of build_portable_linux_pytorch_wheels.yml.
#
# Key differences from the release workflow:
# - Installs ROCm packages via --find-links (CI artifacts) instead of
# --index-url (release index)
# - Builds torch only (no torchvision, torchaudio, or triton)
# - No S3 upload, staging, testing, or promotion — just build + sanity check
# - (Not yet implemented) this can use an explict separate sccache bucket
#
# TODO(#3291): Build more packages (torchvision, torchaudio, triton, etc.)
# TODO(#3291): Upload packages to S3 (via upload_python_packages.py?)
#
# Both workflows share build_prod_wheels.py for the actual build logic.
# See https://github.com/ROCm/TheRock/issues/3291 for convergence plans.

name: Build Portable Linux PyTorch Wheels (CI)

on:
workflow_call:
inputs:
artifact_group:
type: string
required: true
python_version:
type: string
default: "3.12"
pytorch_git_ref:
description: PyTorch ref to checkout (typically "release/X.Y")
type: string
default: "release/2.10"
rocm_package_find_links_url:
description: URL for pip --find-links to install ROCm packages
type: string
required: true
rocm_version:
description: ROCm package version to install and build against (e.g. 7.10.0.dev0)
type: string
required: true
workflow_dispatch:
inputs:
artifact_group:
description: "The artifact group to build (e.g. gfx94X-dcgpu, gfx120X-all)"
type: string
default: gfx94X-dcgpu
python_version:
type: string
default: "3.12"
pytorch_git_ref:
description: PyTorch ref to checkout (typically "release/X.Y")
type: string
default: "release/2.10"
rocm_package_find_links_url:
description: URL for pip --find-links to install ROCm packages
type: string
required: true
rocm_version:
description: ROCm package version to install and build against (e.g. 7.10.0.dev0)
type: string
required: true

permissions:
contents: read

run-name: Build portable Linux PyTorch Wheels CI (${{ inputs.artifact_group }}, py${{ inputs.python_version }}, ${{ inputs.pytorch_git_ref }})

jobs:
build_pytorch_wheels:
name: Build PyTorch | ${{ inputs.artifact_group }} | torch ${{ inputs.pytorch_git_ref }} | py${{ inputs.python_version }}
runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-linux-scale-rocm' || 'ubuntu-24.04' }}
container:
image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:db2b63f938941dde2abc80b734e64b45b9995a282896d513a0f3525d4591d6cb
env:
PACKAGE_DIST_DIR: ${{ github.workspace }}/output/packages/dist
optional_build_prod_arguments: ""
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Configure Git Identity
run: |
git config --global user.name "therockbot"
git config --global user.email "therockbot@amd.com"

- name: Select Python version
run: |
python build_tools/github_actions/python_to_cp_version.py \
--python-version ${{ inputs.python_version }}

- name: Add selected Python version to PATH
run: |
python_dir="/opt/python/${{ env.cp_version }}"
if ! [ -x "${python_dir}/bin/python" ]; then
echo "ERROR: Could not find python: ${python_dir}"
exit 1
fi
echo "${python_dir}/bin" >> "$GITHUB_PATH"

- name: Checkout PyTorch source (nightly)
if: ${{ inputs.pytorch_git_ref == 'nightly' }}
run: |
./external-builds/pytorch/pytorch_torch_repo.py checkout \
--repo-hashtag nightly

- name: Checkout PyTorch source (stable)
if: ${{ inputs.pytorch_git_ref != 'nightly' }}
run: |
./external-builds/pytorch/pytorch_torch_repo.py checkout \
--gitrepo-origin https://github.com/ROCm/pytorch.git \
--repo-hashtag ${{ inputs.pytorch_git_ref }}

# Note: determine_version.py sets optional_build_prod_arguments in
# GITHUB_ENV, which includes --rocm-sdk-version and --version-suffix.
- name: Determine optional arguments passed to `build_prod_wheels.py`
if: ${{ inputs.rocm_version }}
run: |
pip install packaging
python build_tools/github_actions/determine_version.py \
--rocm-version ${{ inputs.rocm_version }}

- name: Build PyTorch wheels
run: |
./external-builds/pytorch/build_prod_wheels.py \
build \
--install-rocm \
--find-links "${{ inputs.rocm_package_find_links_url }}" \
--clean \
--output-dir ${{ env.PACKAGE_DIST_DIR }} \
${{ env.optional_build_prod_arguments }}

- name: Sanity check wheel
run: |
python external-builds/pytorch/sanity_check_wheel.py \
${{ env.PACKAGE_DIST_DIR }}/
2 changes: 2 additions & 0 deletions .github/workflows/build_windows_pytorch_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ jobs:
--torch-dir ${{ env.CHECKOUT_ROOT }}/torch \
--require-related-commit

# Note: determine_version.py sets optional_build_prod_arguments in
# GITHUB_ENV, which includes --rocm-sdk-version and --version-suffix.
- name: Determine optional arguments passed to `build_prod_wheels.py`
if: ${{ inputs.rocm_version }}
run: |
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ jobs:
rocm_package_version: ${{ needs.setup.outputs.rocm_package_version }}
test_type: ${{ needs.setup.outputs.test_type }}
sanity_check_only_for_family: ${{ matrix.variant.sanity_check_only_for_family == true }}
build_pytorch: ${{ matrix.variant.build_pytorch == true }}
permissions:
contents: read
id-token: write
Expand Down
25 changes: 25 additions & 0 deletions .github/workflows/ci_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ on:
type: string
sanity_check_only_for_family:
type: boolean
build_pytorch:
type: boolean
default: false

permissions:
contents: read
Expand Down Expand Up @@ -164,3 +167,25 @@ jobs:
package_find_links_url: ${{ needs.build_portable_linux_python_packages.outputs.package_find_links_url }}
python_version: "3.12"
rocm_version: ${{ inputs.rocm_package_version }}

# TODO(#3291): Add test_pytorch_wheels job
build_portable_linux_pytorch_wheels_ci:
needs: [build_portable_linux_python_packages]
name: Build PyTorch
if: >-
${{
!failure() &&
!cancelled() &&
(
inputs.use_prebuilt_artifacts == 'false' ||
inputs.use_prebuilt_artifacts == 'true'
) &&
inputs.build_pytorch == true
}}
uses: ./.github/workflows/build_portable_linux_pytorch_wheels_ci.yml
with:
artifact_group: ${{ inputs.artifact_group }}
python_version: "3.12"
pytorch_git_ref: "release/2.10"
rocm_package_find_links_url: ${{ needs.build_portable_linux_python_packages.outputs.package_find_links_url }}
rocm_version: ${{ inputs.rocm_package_version }}
1 change: 1 addition & 0 deletions .github/workflows/ci_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ jobs:
rocm_package_version: ${{ needs.setup.outputs.rocm_package_version }}
test_type: ${{ needs.setup.outputs.test_type }}
sanity_check_only_for_family: ${{ matrix.variant.sanity_check_only_for_family == true }}
build_pytorch: ${{ matrix.variant.build_pytorch == true }}
permissions:
contents: read
id-token: write
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/ci_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,6 @@ jobs:
package_find_links_url: ${{ needs.build_windows_python_packages.outputs.package_find_links_url }}
python_version: "3.12"
rocm_version: ${{ inputs.rocm_package_version }}

# TODO(#3291): Add build_windows_pytorch_wheels_ci
# TODO(#3291): Add test_pytorch_wheels job
33 changes: 24 additions & 9 deletions BUILD_TOPOLOGY.toml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ artifact_groups = ["third-party-sysdeps", "base"]
description = "Compiler, runtimes, and core profiling"
artifact_groups = [
"compiler",
"core-amdsmi",
"core-runtime",
"third-party-libs",
"hip-runtime",
Expand Down Expand Up @@ -208,6 +209,12 @@ type = "generic"
artifact_group_deps = ["base", "third-party-sysdeps"]
source_sets = ["rocm-systems"]

[artifact_groups.core-amdsmi]
description = "AMD SMI tool (amdsmi)"
type = "generic"
artifact_group_deps = ["base", "third-party-sysdeps"]
source_sets = ["rocm-systems"]

[artifact_groups.compiler]
description = "AMD LLVM toolchain and compiler infrastructure"
type = "generic"
Expand Down Expand Up @@ -267,9 +274,9 @@ source_sets = ["rocm-systems"] # rocprofiler-sdk is in rocm-systems
[artifact_groups.dctools-core]
description = "Data center management tools with minimal dependencies"
type = "generic"
artifact_group_deps = ["core-runtime", "profiler-core"]
artifact_group_deps = ["core-runtime", "core-amdsmi", "profiler-core"]
# TODO: rocm-systems included for projects/hip/VERSION (see CMakeLists.txt)
source_sets = ["base", "rocm-systems"] # RDC uses amdsmi from base
source_sets = ["base", "rocm-systems"] # RDC uses amdsmi from core-amdsmi

# Future artifact groups
# [artifact_groups.dctools-rocm]
Expand All @@ -280,7 +287,7 @@ source_sets = ["base", "rocm-systems"] # RDC uses amdsmi from base
[artifact_groups.profiler-apps]
description = "Profiler applications and analysis tools"
type = "generic"
artifact_group_deps = ["profiler-core", "compiler"]
artifact_group_deps = ["profiler-core", "compiler", "core-amdsmi"]
source_sets = ["rocm-systems", "profiler-extras"] # rocprofiler-systems + trace decoder

[artifact_groups.iree-libs]
Expand Down Expand Up @@ -437,6 +444,14 @@ feature_name = "CORE_RUNTIME"
feature_group = "CORE"
disable_platforms = ["windows"]

[artifacts.core-amdsmi]
artifact_group = "core-amdsmi"
type = "target-neutral"
artifact_deps = ["base", "sysdeps"]
feature_name = "CORE_AMDSMI"
feature_group = "CORE"
disable_platforms = ["windows"]

[artifacts.core-hip]
artifact_group = "hip-runtime"
type = "target-neutral"
Expand All @@ -454,7 +469,7 @@ feature_group = "CORE" # Part of core, enabled by default
[artifacts.rocrtst]
artifact_group = "rocrtst"
type = "target-neutral"
artifact_deps = ["core-runtime", "core-ocl"]
artifact_deps = ["core-runtime", "core-ocl", "sysdeps-hwloc"]
feature_name = "CORE_RUNTIME_TESTS"
feature_group = "CORE"
disable_platforms = ["windows"]
Expand All @@ -479,7 +494,7 @@ feature_group = "CORE" # Part of core, enabled by default
[artifacts.blas]
artifact_group = "math-libs"
type = "target-specific"
artifact_deps = ["core-runtime", "core-hip", "host-blas", "host-suite-sparse", "rocprofiler-sdk"]
artifact_deps = ["core-runtime", "core-hip", "core-amdsmi", "host-blas", "host-suite-sparse", "rocprofiler-sdk"]
split_databases = ["rocblas", "hipblaslt"]

[artifacts.fft]
Expand Down Expand Up @@ -528,7 +543,7 @@ artifact_deps = []
[artifacts.composable-kernel]
artifact_group = "ml-libs"
type = "target-specific"
artifact_deps = ["core-runtime", "core-hip"]
artifact_deps = ["core-runtime", "core-hip", "rand", "rocprofiler-sdk"]

[artifacts.miopen]
artifact_group = "ml-libs"
Expand Down Expand Up @@ -568,7 +583,7 @@ disable_platforms = ["windows"]
[artifacts.rccl]
artifact_group = "comm-libs"
type = "target-specific"
artifact_deps = ["core-runtime", "core-hip", "hipify", "rocprofiler-sdk"]
artifact_deps = ["core-runtime", "core-hip", "hipify", "rocprofiler-sdk", "core-amdsmi"]
disable_platforms = ["windows"]

# --- Profiler Tools ---
Expand Down Expand Up @@ -598,7 +613,7 @@ disable_platforms = ["windows"]
[artifacts.rocprofiler-systems]
artifact_group = "profiler-apps"
type = "target-neutral"
artifact_deps = ["amd-llvm", "core-hip", "rocprofiler-sdk"]
artifact_deps = ["amd-llvm", "core-hip", "rocprofiler-sdk", "core-amdsmi"]
feature_name = "ROCPROFSYS"
feature_group = "PROFILER"
disable_platforms = ["windows"]
Expand All @@ -608,7 +623,7 @@ disable_platforms = ["windows"]
[artifacts.rdc]
artifact_group = "dctools-core"
type = "target-neutral"
artifact_deps = ["core-hip", "rocprofiler-sdk", "sysdeps"]
artifact_deps = ["core-hip", "rocprofiler-sdk", "sysdeps", "core-amdsmi"]
feature_group = "DC_TOOLS" # Part of DC tools, enabled by default via THEROCK_ENABLE_ALL
disable_platforms = ["windows"]

Expand Down
31 changes: 0 additions & 31 deletions base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,37 +56,6 @@ therock_cmake_subproject_provide_package(rocm-core rocm-core lib/cmake/rocm-core
therock_cmake_subproject_activate(rocm-core)


################################################################################
# amdsmi
################################################################################

if(NOT WIN32) # TODO(#36): Enable on Windows and/or make subproject inclusion generally optional

therock_cmake_subproject_declare(amdsmi
EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_SYSTEMS_SOURCE_DIR}/projects/amdsmi"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/amdsmi"
USE_DIST_AMDGPU_TARGETS
BACKGROUND_BUILD
CMAKE_ARGS
"-DCMAKE_VERBOSE_MAKEFILE=OFF"
-DBUILD_TESTS=${THEROCK_BUILD_TESTING}
BUILD_DEPS
therock-googletest
RUNTIME_DEPS
rocm-core
${THEROCK_BUNDLED_LIBDRM}
INSTALL_RPATH_DIRS
lib
INTERFACE_INSTALL_RPATH_DIRS
lib
)
therock_cmake_subproject_provide_package(amdsmi
amd_smi lib/cmake)
therock_cmake_subproject_activate(amdsmi)
list(APPEND _optional_artifact_deps amdsmi)

endif()


################################################################################
# rocm_smi_lib
Expand Down
Loading
Loading