diff --git a/.github/workflows/build_portable_linux_pytorch_wheels.yml b/.github/workflows/build_portable_linux_pytorch_wheels.yml index 5e627a9460..2547246c18 100644 --- a/.github/workflows/build_portable_linux_pytorch_wheels.yml +++ b/.github/workflows/build_portable_linux_pytorch_wheels.yml @@ -175,9 +175,8 @@ jobs: ./external-builds/pytorch/pytorch_vision_repo.py checkout --require-related-commit ./external-builds/pytorch/pytorch_triton_repo.py checkout - - name: Create pip cache directory - run: mkdir -p /tmp/pipcache - + # Note: determine_version.py sets optional_build_prod_arguments in + # GITHUB_ENV, which includes --rocm-sdk-version and --version-suffix. - name: Determine optional arguments passed to `build_prod_wheels.py` if: ${{ inputs.rocm_version }} run: | @@ -192,7 +191,6 @@ jobs: ./external-builds/pytorch/build_prod_wheels.py \ build \ --install-rocm \ - --pip-cache-dir /tmp/pipcache \ --index-url "${{ inputs.cloudfront_url }}/${{ inputs.amdgpu_family }}/" \ --clean \ --output-dir ${{ env.PACKAGE_DIST_DIR }} ${{ env.optional_build_prod_arguments }} @@ -310,7 +308,7 @@ jobs: --include "torch-${TORCH_VERSION}-${CP_VERSION}-linux_x86_64.whl" \ --include "torchaudio-${TORCHAUDIO_VERSION}-${CP_VERSION}-linux_x86_64.whl" \ --include "torchvision-${TORCHVISION_VERSION}-${CP_VERSION}-linux_x86_64.whl" \ - --include "triton-${TRITON_VERSION}-${CP_VERSION}-linux_x86_64.whl" + --include "triton-${TRITON_VERSION}-${CP_VERSION}-linux_x86_64.whl" \ --include "apex-${APEX_VERSION}-${CP_VERSION}-linux_x86_64.whl" - name: (Re-)Generate Python package release index diff --git a/.github/workflows/build_portable_linux_pytorch_wheels_ci.yml b/.github/workflows/build_portable_linux_pytorch_wheels_ci.yml new file mode 100644 index 0000000000..f5d9430a8c --- /dev/null +++ b/.github/workflows/build_portable_linux_pytorch_wheels_ci.yml @@ -0,0 +1,133 @@ +# CI variant of build_portable_linux_pytorch_wheels.yml. +# +# Key differences from the release workflow: +# - Installs ROCm packages via --find-links (CI artifacts) instead of +# --index-url (release index) +# - Builds torch only (no torchvision, torchaudio, or triton) +# - No S3 upload, staging, testing, or promotion — just build + sanity check +# - (Not yet implemented) this can use an explict separate sccache bucket +# +# TODO(#3291): Build more packages (torchvision, torchaudio, triton, etc.) +# TODO(#3291): Upload packages to S3 (via upload_python_packages.py?) +# +# Both workflows share build_prod_wheels.py for the actual build logic. +# See https://github.com/ROCm/TheRock/issues/3291 for convergence plans. + +name: Build Portable Linux PyTorch Wheels (CI) + +on: + workflow_call: + inputs: + artifact_group: + type: string + required: true + python_version: + type: string + default: "3.12" + pytorch_git_ref: + description: PyTorch ref to checkout (typically "release/X.Y") + type: string + default: "release/2.10" + rocm_package_find_links_url: + description: URL for pip --find-links to install ROCm packages + type: string + required: true + rocm_version: + description: ROCm package version to install and build against (e.g. 7.10.0.dev0) + type: string + required: true + workflow_dispatch: + inputs: + artifact_group: + description: "The artifact group to build (e.g. gfx94X-dcgpu, gfx120X-all)" + type: string + default: gfx94X-dcgpu + python_version: + type: string + default: "3.12" + pytorch_git_ref: + description: PyTorch ref to checkout (typically "release/X.Y") + type: string + default: "release/2.10" + rocm_package_find_links_url: + description: URL for pip --find-links to install ROCm packages + type: string + required: true + rocm_version: + description: ROCm package version to install and build against (e.g. 7.10.0.dev0) + type: string + required: true + +permissions: + contents: read + +run-name: Build portable Linux PyTorch Wheels CI (${{ inputs.artifact_group }}, py${{ inputs.python_version }}, ${{ inputs.pytorch_git_ref }}) + +jobs: + build_pytorch_wheels: + name: Build PyTorch | ${{ inputs.artifact_group }} | torch ${{ inputs.pytorch_git_ref }} | py${{ inputs.python_version }} + runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-linux-scale-rocm' || 'ubuntu-24.04' }} + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:db2b63f938941dde2abc80b734e64b45b9995a282896d513a0f3525d4591d6cb + env: + PACKAGE_DIST_DIR: ${{ github.workspace }}/output/packages/dist + optional_build_prod_arguments: "" + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Configure Git Identity + run: | + git config --global user.name "therockbot" + git config --global user.email "therockbot@amd.com" + + - name: Select Python version + run: | + python build_tools/github_actions/python_to_cp_version.py \ + --python-version ${{ inputs.python_version }} + + - name: Add selected Python version to PATH + run: | + python_dir="/opt/python/${{ env.cp_version }}" + if ! [ -x "${python_dir}/bin/python" ]; then + echo "ERROR: Could not find python: ${python_dir}" + exit 1 + fi + echo "${python_dir}/bin" >> "$GITHUB_PATH" + + - name: Checkout PyTorch source (nightly) + if: ${{ inputs.pytorch_git_ref == 'nightly' }} + run: | + ./external-builds/pytorch/pytorch_torch_repo.py checkout \ + --repo-hashtag nightly + + - name: Checkout PyTorch source (stable) + if: ${{ inputs.pytorch_git_ref != 'nightly' }} + run: | + ./external-builds/pytorch/pytorch_torch_repo.py checkout \ + --gitrepo-origin https://github.com/ROCm/pytorch.git \ + --repo-hashtag ${{ inputs.pytorch_git_ref }} + + # Note: determine_version.py sets optional_build_prod_arguments in + # GITHUB_ENV, which includes --rocm-sdk-version and --version-suffix. + - name: Determine optional arguments passed to `build_prod_wheels.py` + if: ${{ inputs.rocm_version }} + run: | + pip install packaging + python build_tools/github_actions/determine_version.py \ + --rocm-version ${{ inputs.rocm_version }} + + - name: Build PyTorch wheels + run: | + ./external-builds/pytorch/build_prod_wheels.py \ + build \ + --install-rocm \ + --find-links "${{ inputs.rocm_package_find_links_url }}" \ + --clean \ + --output-dir ${{ env.PACKAGE_DIST_DIR }} \ + ${{ env.optional_build_prod_arguments }} + + - name: Sanity check wheel + run: | + python external-builds/pytorch/sanity_check_wheel.py \ + ${{ env.PACKAGE_DIST_DIR }}/ diff --git a/.github/workflows/build_windows_pytorch_wheels.yml b/.github/workflows/build_windows_pytorch_wheels.yml index 6c9db1be7d..965a534d8a 100644 --- a/.github/workflows/build_windows_pytorch_wheels.yml +++ b/.github/workflows/build_windows_pytorch_wheels.yml @@ -195,6 +195,8 @@ jobs: --torch-dir ${{ env.CHECKOUT_ROOT }}/torch \ --require-related-commit + # Note: determine_version.py sets optional_build_prod_arguments in + # GITHUB_ENV, which includes --rocm-sdk-version and --version-suffix. - name: Determine optional arguments passed to `build_prod_wheels.py` if: ${{ inputs.rocm_version }} run: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6238ab0fd..af8ce5c705 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -102,6 +102,7 @@ jobs: rocm_package_version: ${{ needs.setup.outputs.rocm_package_version }} test_type: ${{ needs.setup.outputs.test_type }} sanity_check_only_for_family: ${{ matrix.variant.sanity_check_only_for_family == true }} + build_pytorch: ${{ matrix.variant.build_pytorch == true }} permissions: contents: read id-token: write diff --git a/.github/workflows/ci_linux.yml b/.github/workflows/ci_linux.yml index 8940861787..d3da978ad6 100644 --- a/.github/workflows/ci_linux.yml +++ b/.github/workflows/ci_linux.yml @@ -32,6 +32,9 @@ on: type: string sanity_check_only_for_family: type: boolean + build_pytorch: + type: boolean + default: false permissions: contents: read @@ -164,3 +167,25 @@ jobs: package_find_links_url: ${{ needs.build_portable_linux_python_packages.outputs.package_find_links_url }} python_version: "3.12" rocm_version: ${{ inputs.rocm_package_version }} + + # TODO(#3291): Add test_pytorch_wheels job + build_portable_linux_pytorch_wheels_ci: + needs: [build_portable_linux_python_packages] + name: Build PyTorch + if: >- + ${{ + !failure() && + !cancelled() && + ( + inputs.use_prebuilt_artifacts == 'false' || + inputs.use_prebuilt_artifacts == 'true' + ) && + inputs.build_pytorch == true + }} + uses: ./.github/workflows/build_portable_linux_pytorch_wheels_ci.yml + with: + artifact_group: ${{ inputs.artifact_group }} + python_version: "3.12" + pytorch_git_ref: "release/2.10" + rocm_package_find_links_url: ${{ needs.build_portable_linux_python_packages.outputs.package_find_links_url }} + rocm_version: ${{ inputs.rocm_package_version }} diff --git a/.github/workflows/ci_nightly.yml b/.github/workflows/ci_nightly.yml index fcd9af7d46..4f6b402c1b 100644 --- a/.github/workflows/ci_nightly.yml +++ b/.github/workflows/ci_nightly.yml @@ -85,6 +85,7 @@ jobs: rocm_package_version: ${{ needs.setup.outputs.rocm_package_version }} test_type: ${{ needs.setup.outputs.test_type }} sanity_check_only_for_family: ${{ matrix.variant.sanity_check_only_for_family == true }} + build_pytorch: ${{ matrix.variant.build_pytorch == true }} permissions: contents: read id-token: write diff --git a/.github/workflows/ci_windows.yml b/.github/workflows/ci_windows.yml index 05f7ba1d7d..f58b8d6b70 100644 --- a/.github/workflows/ci_windows.yml +++ b/.github/workflows/ci_windows.yml @@ -164,3 +164,6 @@ jobs: package_find_links_url: ${{ needs.build_windows_python_packages.outputs.package_find_links_url }} python_version: "3.12" rocm_version: ${{ inputs.rocm_package_version }} + + # TODO(#3291): Add build_windows_pytorch_wheels_ci + # TODO(#3291): Add test_pytorch_wheels job diff --git a/BUILD_TOPOLOGY.toml b/BUILD_TOPOLOGY.toml index 8a8529eb50..eeb552b8eb 100644 --- a/BUILD_TOPOLOGY.toml +++ b/BUILD_TOPOLOGY.toml @@ -137,6 +137,7 @@ artifact_groups = ["third-party-sysdeps", "base"] description = "Compiler, runtimes, and core profiling" artifact_groups = [ "compiler", + "core-amdsmi", "core-runtime", "third-party-libs", "hip-runtime", @@ -208,6 +209,12 @@ type = "generic" artifact_group_deps = ["base", "third-party-sysdeps"] source_sets = ["rocm-systems"] +[artifact_groups.core-amdsmi] +description = "AMD SMI tool (amdsmi)" +type = "generic" +artifact_group_deps = ["base", "third-party-sysdeps"] +source_sets = ["rocm-systems"] + [artifact_groups.compiler] description = "AMD LLVM toolchain and compiler infrastructure" type = "generic" @@ -267,9 +274,9 @@ source_sets = ["rocm-systems"] # rocprofiler-sdk is in rocm-systems [artifact_groups.dctools-core] description = "Data center management tools with minimal dependencies" type = "generic" -artifact_group_deps = ["core-runtime", "profiler-core"] +artifact_group_deps = ["core-runtime", "core-amdsmi", "profiler-core"] # TODO: rocm-systems included for projects/hip/VERSION (see CMakeLists.txt) -source_sets = ["base", "rocm-systems"] # RDC uses amdsmi from base +source_sets = ["base", "rocm-systems"] # RDC uses amdsmi from core-amdsmi # Future artifact groups # [artifact_groups.dctools-rocm] @@ -280,7 +287,7 @@ source_sets = ["base", "rocm-systems"] # RDC uses amdsmi from base [artifact_groups.profiler-apps] description = "Profiler applications and analysis tools" type = "generic" -artifact_group_deps = ["profiler-core", "compiler"] +artifact_group_deps = ["profiler-core", "compiler", "core-amdsmi"] source_sets = ["rocm-systems", "profiler-extras"] # rocprofiler-systems + trace decoder [artifact_groups.iree-libs] @@ -437,6 +444,14 @@ feature_name = "CORE_RUNTIME" feature_group = "CORE" disable_platforms = ["windows"] +[artifacts.core-amdsmi] +artifact_group = "core-amdsmi" +type = "target-neutral" +artifact_deps = ["base", "sysdeps"] +feature_name = "CORE_AMDSMI" +feature_group = "CORE" +disable_platforms = ["windows"] + [artifacts.core-hip] artifact_group = "hip-runtime" type = "target-neutral" @@ -454,7 +469,7 @@ feature_group = "CORE" # Part of core, enabled by default [artifacts.rocrtst] artifact_group = "rocrtst" type = "target-neutral" -artifact_deps = ["core-runtime", "core-ocl"] +artifact_deps = ["core-runtime", "core-ocl", "sysdeps-hwloc"] feature_name = "CORE_RUNTIME_TESTS" feature_group = "CORE" disable_platforms = ["windows"] @@ -479,7 +494,7 @@ feature_group = "CORE" # Part of core, enabled by default [artifacts.blas] artifact_group = "math-libs" type = "target-specific" -artifact_deps = ["core-runtime", "core-hip", "host-blas", "host-suite-sparse", "rocprofiler-sdk"] +artifact_deps = ["core-runtime", "core-hip", "core-amdsmi", "host-blas", "host-suite-sparse", "rocprofiler-sdk"] split_databases = ["rocblas", "hipblaslt"] [artifacts.fft] @@ -528,7 +543,7 @@ artifact_deps = [] [artifacts.composable-kernel] artifact_group = "ml-libs" type = "target-specific" -artifact_deps = ["core-runtime", "core-hip"] +artifact_deps = ["core-runtime", "core-hip", "rand", "rocprofiler-sdk"] [artifacts.miopen] artifact_group = "ml-libs" @@ -568,7 +583,7 @@ disable_platforms = ["windows"] [artifacts.rccl] artifact_group = "comm-libs" type = "target-specific" -artifact_deps = ["core-runtime", "core-hip", "hipify", "rocprofiler-sdk"] +artifact_deps = ["core-runtime", "core-hip", "hipify", "rocprofiler-sdk", "core-amdsmi"] disable_platforms = ["windows"] # --- Profiler Tools --- @@ -598,7 +613,7 @@ disable_platforms = ["windows"] [artifacts.rocprofiler-systems] artifact_group = "profiler-apps" type = "target-neutral" -artifact_deps = ["amd-llvm", "core-hip", "rocprofiler-sdk"] +artifact_deps = ["amd-llvm", "core-hip", "rocprofiler-sdk", "core-amdsmi"] feature_name = "ROCPROFSYS" feature_group = "PROFILER" disable_platforms = ["windows"] @@ -608,7 +623,7 @@ disable_platforms = ["windows"] [artifacts.rdc] artifact_group = "dctools-core" type = "target-neutral" -artifact_deps = ["core-hip", "rocprofiler-sdk", "sysdeps"] +artifact_deps = ["core-hip", "rocprofiler-sdk", "sysdeps", "core-amdsmi"] feature_group = "DC_TOOLS" # Part of DC tools, enabled by default via THEROCK_ENABLE_ALL disable_platforms = ["windows"] diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index 4f31c4e246..0ff96e7b50 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -56,37 +56,6 @@ therock_cmake_subproject_provide_package(rocm-core rocm-core lib/cmake/rocm-core therock_cmake_subproject_activate(rocm-core) -################################################################################ -# amdsmi -################################################################################ - -if(NOT WIN32) # TODO(#36): Enable on Windows and/or make subproject inclusion generally optional - -therock_cmake_subproject_declare(amdsmi - EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_SYSTEMS_SOURCE_DIR}/projects/amdsmi" - BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/amdsmi" - USE_DIST_AMDGPU_TARGETS - BACKGROUND_BUILD - CMAKE_ARGS - "-DCMAKE_VERBOSE_MAKEFILE=OFF" - -DBUILD_TESTS=${THEROCK_BUILD_TESTING} - BUILD_DEPS - therock-googletest - RUNTIME_DEPS - rocm-core - ${THEROCK_BUNDLED_LIBDRM} - INSTALL_RPATH_DIRS - lib - INTERFACE_INSTALL_RPATH_DIRS - lib -) -therock_cmake_subproject_provide_package(amdsmi - amd_smi lib/cmake) -therock_cmake_subproject_activate(amdsmi) -list(APPEND _optional_artifact_deps amdsmi) - -endif() - ################################################################################ # rocm_smi_lib diff --git a/base/artifact.toml b/base/artifact.toml index c1d45d32d4..8e1b169b8b 100644 --- a/base/artifact.toml +++ b/base/artifact.toml @@ -2,22 +2,6 @@ [components.dev."base/half/stage"] [components.doc."base/half/stage"] -# amdsmi -[components.dbg."base/amdsmi/stage"] -optional = "windows" -[components.dev."base/amdsmi/stage"] -optional = "windows" -[components.doc."base/amdsmi/stage"] -optional = "windows" -[components.lib."base/amdsmi/stage"] -optional = "windows" -[components.run."base/amdsmi/stage"] -optional = "windows" -include = [ - "bin/**", - "libexec/**", - "share/amd_smi/**", -] # rocm_smi_lib [components.dbg."base/rocm_smi_lib/stage"] diff --git a/base/rocm-kpack b/base/rocm-kpack index 93eb7be7ec..fe9dd03a73 160000 --- a/base/rocm-kpack +++ b/base/rocm-kpack @@ -1 +1 @@ -Subproject commit 93eb7be7ec16adc30acadd59d623da9c46cec2cf +Subproject commit fe9dd03a738e6810b2876635de093b2dde1ce243 diff --git a/build_tools/build_python_packages.py b/build_tools/build_python_packages.py index f02bbd94ae..43921f21a9 100755 --- a/build_tools/build_python_packages.py +++ b/build_tools/build_python_packages.py @@ -86,6 +86,7 @@ def core_artifact_filter(an: ArtifactName) -> bool: core = an.name in [ "amd-llvm", "base", + "core-amdsmi", "core-hip", "core-ocl", "core-hipinfo", diff --git a/build_tools/github_actions/amdgpu_family_matrix.py b/build_tools/github_actions/amdgpu_family_matrix.py index b16103ccc0..10471bb3c3 100644 --- a/build_tools/github_actions/amdgpu_family_matrix.py +++ b/build_tools/github_actions/amdgpu_family_matrix.py @@ -147,20 +147,17 @@ }, }, "gfx101x": { - # TODO(#1926): Resolve bgemm kernel hip file generation error, to enable PyTorch builds + # TODO(#1926): Resolve bgemm kernel hip file generation error to enable PyTorch builds "linux": { "test-runs-on": "", "family": "gfx101X-dgpu", - "expect_failure": True, "build_variants": ["release"], "expect_pytorch_failure": True, }, - # TODO(#1925): Enable arch for aotriton to enable PyTorch builds "windows": { "test-runs-on": "", "family": "gfx101X-dgpu", "build_variants": ["release"], - "expect_pytorch_failure": True, }, }, "gfx103x": { diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py index 175c99538b..0fbac377ca 100755 --- a/build_tools/github_actions/configure_ci.py +++ b/build_tools/github_actions/configure_ci.py @@ -215,6 +215,8 @@ def generate_multi_arch_matrix( # Extract family names for dist_amdgpu_families family_names = [f["amdgpu_family"] for f in family_info_list] + expect_failure = info.get("expect_failure", False) + expect_pytorch_failure = info.get("expect_pytorch_failure", False) matrix_row = { "matrix_per_family_json": json.dumps(family_info_list), "dist_amdgpu_families": ";".join(family_names), @@ -222,7 +224,8 @@ def generate_multi_arch_matrix( "build_variant_label": info["build_variant_label"], "build_variant_suffix": info["build_variant_suffix"], "build_variant_cmake_preset": info["build_variant_cmake_preset"], - "expect_failure": info.get("expect_failure", False), + "expect_failure": expect_failure, + "build_pytorch": not expect_failure and not expect_pytorch_failure, } matrix_output.append(matrix_row) @@ -489,6 +492,15 @@ def matrix_generator( # But if not, honor what is already there. if build_variant_info.get("expect_failure", False): matrix_row["expect_failure"] = True + + # Enable pytorch builds for families without known build failures. + # TODO(#3291): Add finer-grained controls over when pytorch is built + expect_failure = matrix_row.get("expect_failure", False) + expect_pytorch_failure = matrix_row.get("expect_pytorch_failure", False) + matrix_row["build_pytorch"] = ( + not expect_failure and not expect_pytorch_failure + ) + del matrix_row["build_variants"] matrix_row.update(build_variant_info) @@ -644,7 +656,16 @@ def format_variants(variants): result = [] for item in variants: if "family" in item: - result.append(item["family"]) + label = item["family"] + # Also show flags for the family, if any. + flags = [] + if item.get("expect_failure"): + flags.append("expect_failure") + if item.get("build_pytorch"): + flags.append("build_pytorch") + if flags: + label += f" ({', '.join(flags)})" + result.append(label) elif "matrix_per_family_json" in item: # Multi-arch mode: show the families from the JSON families = json.loads(item["matrix_per_family_json"]) diff --git a/build_tools/github_actions/configure_ci_path_filters.py b/build_tools/github_actions/configure_ci_path_filters.py index 81dbfb81ec..37f01a6280 100644 --- a/build_tools/github_actions/configure_ci_path_filters.py +++ b/build_tools/github_actions/configure_ci_path_filters.py @@ -151,18 +151,6 @@ def is_ci_run_required(paths: Optional[Iterable[str]]) -> bool: ".github/dependabot.yml", "*CODEOWNERS", "*LICENSE", - # Changes to 'external-builds/' (e.g. PyTorch) do not affect "CI" workflows. - # At time of writing, workflows run in this sequence: - # `ci.yml` - # `ci_linux.yml` - # `build_linux_artifacts.yml` - # `test_artifacts.yml` - # `test_component.yml` - # If we add external-builds tests there, we can revisit this, maybe leaning - # on options like LINUX_USE_PREBUILT_ARTIFACTS or sufficient caching to keep - # workflows efficient when only nodes closer to the edges of the build graph - # are changed. - "external-builds/*", # Changes to dockerfiles do not currently affect CI workflows directly. # Docker images are built and published after commits are pushed, then # workflows can be updated to use the new image sha256 values. @@ -179,8 +167,10 @@ def is_ci_run_required(paths: Optional[Iterable[str]]) -> bool: "ci*.yml", "multi_arch*.yml", "build*artifact*.yml", + "build*ci.yml", "build*python_packages.yml", "test*artifacts.yml", + "test_rocm_wheels.yml", "test_sanity_check.yml", "test_component.yml", ] diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py index 85b79b5936..7c5e007c2d 100644 --- a/build_tools/github_actions/fetch_test_configurations.py +++ b/build_tools/github_actions/fetch_test_configurations.py @@ -255,6 +255,14 @@ def _get_script_path(script_name: str) -> str: "platform": ["linux", "windows"], "total_shards": 1, }, + "composablekernel": { + "job_name": "composablekernel", + "fetch_artifact_args": "--composablekernel --tests", + "timeout_minutes": 60, + "test_script": f"python {_get_script_path('test_composablekernel.py')}", + "platform": ["linux", "windows"], + "total_shards": 1, + }, # TODO(iree-org/fusilli/issues/57): Enable fusilli tests once build is # enabled by default. # "fusilli_plugin": { diff --git a/build_tools/github_actions/test_executable_scripts/test_composablekernel.py b/build_tools/github_actions/test_executable_scripts/test_composablekernel.py new file mode 100644 index 0000000000..788625d8a9 --- /dev/null +++ b/build_tools/github_actions/test_executable_scripts/test_composablekernel.py @@ -0,0 +1,24 @@ +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent +AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES") + +logging.basicConfig(level=logging.INFO) + +cmd = [ + "test_ck_tile_pooling", +] + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") + +subprocess.run( + cmd, + cwd=THEROCK_DIR, + check=True, +) diff --git a/build_tools/github_actions/test_executable_scripts/test_miopen.py b/build_tools/github_actions/test_executable_scripts/test_miopen.py index f605621e36..9f73f1e1a2 100644 --- a/build_tools/github_actions/test_executable_scripts/test_miopen.py +++ b/build_tools/github_actions/test_executable_scripts/test_miopen.py @@ -220,6 +220,13 @@ "*CPU_UnitTestConvSolverImplicitGemmGroupWrwXdlopsDevApplicability_FP16.ConvHipImplicitGemmGroupWrwXdlops*" ) + # Disable long running tests + negative_filter.append("Full/GPU_Softmax_FP32*") # 24 min + negative_filter.append("Full/GPU_Softmax_BFP16*") # 13 min + negative_filter.append("Full/GPU_Softmax_FP16*") # 11.5 min + negative_filter.append("Smoke/GPU_Reduce_FP32*") # 6.5 min + negative_filter.append("Smoke/GPU_Reduce_FP16*") # 4.5 min + #################################################### # Creating a smoke test filter diff --git a/build_tools/github_actions/tests/configure_ci_path_filters_test.py b/build_tools/github_actions/tests/configure_ci_path_filters_test.py index 47023422e8..d2b9317f9f 100644 --- a/build_tools/github_actions/tests/configure_ci_path_filters_test.py +++ b/build_tools/github_actions/tests/configure_ci_path_filters_test.py @@ -20,12 +20,7 @@ def test_dont_run_ci_if_only_markdown_files_edited(self): run_ci = is_ci_run_required(paths) self.assertFalse(run_ci) - def test_dont_run_ci_if_only_external_builds_edited(self): - paths = ["external-builds/pytorch/CMakeLists.txt"] - run_ci = is_ci_run_required(paths) - self.assertFalse(run_ci) - - def test_dont_run_ci_if_only_external_builds_edited(self): + def test_dont_run_ci_if_only_experimental_files_edited(self): paths = ["experimental/file.h"] run_ci = is_ci_run_required(paths) self.assertFalse(run_ci) diff --git a/build_tools/github_actions/tests/configure_ci_test.py b/build_tools/github_actions/tests/configure_ci_test.py index d7ed0e6d2a..31cf0e34fc 100644 --- a/build_tools/github_actions/tests/configure_ci_test.py +++ b/build_tools/github_actions/tests/configure_ci_test.py @@ -355,6 +355,73 @@ def test_windows_schedule_matrix_generator(self): ) self.assertEqual(windows_test_labels, []) + def test_build_pytorch_disabled_when_expect_failure(self): + """build_pytorch should be False when expect_failure is True.""" + # Schedule trigger includes all families, some with expect_failure + linux_target_output, _ = configure_ci.matrix_generator( + is_pull_request=False, + is_workflow_dispatch=False, + is_push=False, + is_schedule=True, + base_args={"build_variant": "release"}, + families={}, + platform="linux", + ) + for entry in linux_target_output: + if entry.get("expect_failure", False): + self.assertFalse( + entry.get("build_pytorch", False), + f"build_pytorch should be False when expect_failure is True " + f"for family {entry.get('family')}", + ) + + def test_build_pytorch_disabled_when_expect_pytorch_failure(self): + """build_pytorch should be False when expect_pytorch_failure is True.""" + # Use schedule trigger on windows to include gfx90x which has + # expect_pytorch_failure on windows + windows_target_output, _ = configure_ci.matrix_generator( + is_pull_request=False, + is_workflow_dispatch=False, + is_push=False, + is_schedule=True, + base_args={"build_variant": "release"}, + families={}, + platform="windows", + ) + for entry in windows_target_output: + if entry.get("expect_pytorch_failure", False): + self.assertFalse( + entry.get("build_pytorch", False), + f"build_pytorch should be False when expect_pytorch_failure " + f"is True for family {entry.get('family')}", + ) + + def test_build_pytorch_enabled_for_supported_families(self): + """build_pytorch should be True for families without known failures.""" + # Presubmit families (gfx94x, gfx110x, etc.) should have build_pytorch + # enabled if they don't have expect_failure or expect_pytorch_failure + linux_target_output, _ = configure_ci.matrix_generator( + is_pull_request=True, + is_workflow_dispatch=False, + is_push=False, + is_schedule=False, + base_args={ + "pr_labels": '{"labels":[]}', + "build_variant": "release", + }, + families={}, + platform="linux", + ) + for entry in linux_target_output: + if not entry.get("expect_failure", False) and not entry.get( + "expect_pytorch_failure", False + ): + self.assertTrue( + entry.get("build_pytorch", False), + f"build_pytorch should be True for supported family " + f"{entry.get('family')}", + ) + def test_determine_long_lived_branch(self): """Test to correctly determine long-lived branch that expect more testing.""" diff --git a/build_tools/install_rocm_from_artifacts.py b/build_tools/install_rocm_from_artifacts.py index 82774e71b5..bee5a9e4cc 100644 --- a/build_tools/install_rocm_from_artifacts.py +++ b/build_tools/install_rocm_from_artifacts.py @@ -24,6 +24,7 @@ [--hipdnn-samples | --no-hipdnn-samples] [--miopen | --no-miopen] [--miopen-plugin | --no-miopen-plugin] + [--composablekernel | --no-composablekernel] [--fusilli-plugin | --no-fusilli-plugin] [--hipblaslt-plugin | --no-hipblaslt-plugin] [--prim | --no-prim] @@ -316,6 +317,8 @@ def retrieve_artifacts_by_run_id(args): "base_lib", "amd-llvm_run", "amd-llvm_lib", + "core-amdsmi_run", + "core-amdsmi_lib", "core-hip_lib", "core-hip_dev", "core-ocl_lib", @@ -336,6 +339,7 @@ def retrieve_artifacts_by_run_id(args): args.hipdnn_samples, args.miopen, args.miopen_plugin, + args.composablekernel, args.fusilli_plugin, args.hipblaslt_plugin, args.prim, @@ -386,6 +390,8 @@ def retrieve_artifacts_by_run_id(args): argv.append("rand_dev") if args.miopen_plugin: extra_artifacts.append("miopen-plugin") + if args.composablekernel: + extra_artifacts.append("composablekernel") if args.fusilli_plugin: extra_artifacts.append("fusilli-plugin") if args.hipblaslt_plugin: @@ -649,6 +655,13 @@ def main(argv): action=argparse.BooleanOptionalAction, ) + artifacts_group.add_argument( + "--composablekernel", + default=False, + help="Include 'composablekernel' artifacts", + action=argparse.BooleanOptionalAction, + ) + artifacts_group.add_argument( "--fusilli-plugin", default=False, diff --git a/build_tools/packaging/linux/package.json b/build_tools/packaging/linux/package.json index 0e631cd8f9..05be8718a6 100644 --- a/build_tools/packaging/linux/package.json +++ b/build_tools/packaging/linux/package.json @@ -24,7 +24,7 @@ "Homepage": "https://github.com/ROCm/rocm-systems", "Artifactory": [ { - "Artifact": "base", + "Artifact": "core-amdsmi", "Artifact_Subdir": [ { "Name": "amdsmi", diff --git a/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/core_test.py b/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/core_test.py index 8ed35f54d0..a959006c37 100644 --- a/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/core_test.py +++ b/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/core_test.py @@ -112,6 +112,10 @@ def testSharedLibrariesLoad(self): if "libtest_linking_lib" in str(so_path): # rocprim unit tests, not actual library files continue + if "opencl" in str(so_path): + # We use OpenCL ICD from distro rather than TheRock + # and we do not build it + continue with self.subTest(msg="Check shared library loads", so_path=so_path): # Load each in an isolated process because not all libraries in the tree # are designed to load into the same process (i.e. LLVM runtime libs, diff --git a/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/devel_test.py b/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/devel_test.py index 38b24097e1..ac23081ec6 100644 --- a/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/devel_test.py +++ b/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/devel_test.py @@ -152,6 +152,10 @@ def testSharedLibrariesLoad(self): if "libtest_linking_lib" in str(so_path): # rocprim unit tests, not actual library files continue + if "opencl" in str(so_path): + # We use OpenCL ICD from distro rather than TheRock + # and we do not build it + continue with self.subTest(msg="Check shared library loads", so_path=so_path): # Load each in an isolated process because not all libraries in the tree # are designed to load into the same process (i.e. LLVM runtime libs, diff --git a/comm-libs/CMakeLists.txt b/comm-libs/CMakeLists.txt index ac9c1f6f60..de3d6663b2 100644 --- a/comm-libs/CMakeLists.txt +++ b/comm-libs/CMakeLists.txt @@ -7,6 +7,18 @@ if(THEROCK_ENABLE_RCCL AND THEROCK_SANITIZER STREQUAL "") list(APPEND optional_profiler_deps roctracer rocprofiler-sdk) endif() + # RCCL can optionally use amdsmi/rocm_smi_lib on Linux, but those may not be + # enabled/declared depending on feature selection. + set(_rccl_optional_runtime_deps) + if(NOT WIN32) + if(TARGET amdsmi) + list(APPEND _rccl_optional_runtime_deps amdsmi) + endif() + if(TARGET rocm_smi_lib) + list(APPEND _rccl_optional_runtime_deps rocm_smi_lib) + endif() + endif() + ############################################################################## # rccl ############################################################################## @@ -35,11 +47,10 @@ if(THEROCK_ENABLE_RCCL AND THEROCK_SANITIZER STREQUAL "") therock-fmt therock-googletest RUNTIME_DEPS - amdsmi hip-clr hipify - rocm_smi_lib rocprofiler-register + ${_rccl_optional_runtime_deps} ${optional_profiler_deps} ) therock_cmake_subproject_glob_c_sources(rccl diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 0ef7650549..f464a3515d 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -9,6 +9,52 @@ if(WIN32) set(_system_toolchain "") endif() +if(THEROCK_ENABLE_CORE_AMDSMI) + + ################################################################################ + # amdsmi + ################################################################################ + + if(NOT WIN32) # TODO(#36): Enable on Windows and/or make subproject inclusion generally optional + + therock_cmake_subproject_declare(amdsmi + EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_SYSTEMS_SOURCE_DIR}/projects/amdsmi" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/amdsmi" + USE_DIST_AMDGPU_TARGETS + BACKGROUND_BUILD + CMAKE_ARGS + "-DCMAKE_VERBOSE_MAKEFILE=OFF" + "-DBUILD_TESTS=${THEROCK_BUILD_TESTING}" + BUILD_DEPS + therock-googletest + RUNTIME_DEPS + rocm-core + ${THEROCK_BUNDLED_LIBDRM} + INSTALL_RPATH_DIRS + lib + INTERFACE_INSTALL_RPATH_DIRS + lib + ) + therock_cmake_subproject_provide_package(amdsmi amd_smi lib/cmake) + therock_cmake_subproject_activate(amdsmi) + + therock_provide_artifact(core-amdsmi + TARGET_NEUTRAL + DESCRIPTOR artifact-core-amdsmi.toml + COMPONENTS + dbg + dev + doc + lib + run + SUBPROJECT_DEPS + amdsmi + ) + + endif() # NOT WIN32 +endif(THEROCK_ENABLE_CORE_AMDSMI) + + if(THEROCK_ENABLE_CORE_RUNTIME) ############################################################################## # ROCR-Runtime @@ -300,6 +346,9 @@ if(THEROCK_ENABLE_OCL_RUNTIME) rocprofiler-register ROCR-Runtime ) + list(APPEND OCL_CLR_CMAKE_ARGS + "-DBUILD_TESTS=${THEROCK_BUILD_TESTING}" + ) endif() therock_cmake_subproject_declare(ocl-clr @@ -343,6 +392,7 @@ if(THEROCK_ENABLE_OCL_RUNTIME) doc lib run + test SUBPROJECT_DEPS ocl-clr ) @@ -374,6 +424,15 @@ endif(THEROCK_ENABLE_OCL_RUNTIME) if(THEROCK_BUILD_TESTING) if(THEROCK_ENABLE_CORE_RUNTIME_TESTS) + # rocrtst may optionally build against amdsmi when available (Linux). + set(_rocrtst_build_deps + amd-llvm + ocl-clr + ) + if(TARGET amdsmi) + list(APPEND _rocrtst_build_deps amdsmi) + endif() + therock_cmake_subproject_declare(rocrtst USE_DIST_AMDGPU_TARGETS EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_SYSTEMS_SOURCE_DIR}/projects/rocr-runtime/rocrtst/suites/test_common" @@ -386,15 +445,18 @@ if(THEROCK_BUILD_TESTING) COMPILER_TOOLCHAIN "${_system_toolchain}" BUILD_DEPS - amd-llvm - amdsmi - ocl-clr + ${_rocrtst_build_deps} RUNTIME_DEPS ROCR-Runtime rocprofiler-register ${THEROCK_BUNDLED_ELFUTILS} + ${THEROCK_BUNDLED_HWLOC} ${THEROCK_BUNDLED_LIBDRM} ${THEROCK_BUNDLED_NUMACTL} + INTERFACE_LINK_DIRS + "lib/rocm_sysdeps/lib" + INTERFACE_INSTALL_RPATH_DIRS + "lib/rocm_sysdeps/lib" ) therock_cmake_subproject_glob_c_sources(rocrtst SUBDIRS .) therock_cmake_subproject_activate(rocrtst) diff --git a/core/artifact-core-amdsmi.toml b/core/artifact-core-amdsmi.toml new file mode 100644 index 0000000000..abfe69a661 --- /dev/null +++ b/core/artifact-core-amdsmi.toml @@ -0,0 +1,12 @@ +# amdsmi +[components.dbg."core/amdsmi/stage"] +[components.dev."core/amdsmi/stage"] +[components.doc."core/amdsmi/stage"] +[components.lib."core/amdsmi/stage"] +include = [ "lib/**" ] +[components.run."core/amdsmi/stage"] +include = [ + "bin/**", + "libexec/**", + "share/amd_smi/**", +] diff --git a/core/artifact-core-ocl.toml b/core/artifact-core-ocl.toml index abee8565dd..eedde0fe65 100644 --- a/core/artifact-core-ocl.toml +++ b/core/artifact-core-ocl.toml @@ -3,10 +3,12 @@ [components.dev."core/ocl-clr/stage"] [components.doc."core/ocl-clr/stage"] [components.lib."core/ocl-clr/stage"] -include = [ - "share/ocl/**", -] [components.run."core/ocl-clr/stage"] include = [ "bin/**", ] +[components.test."core/ocl-clr/stage"] +include = [ + "tests/**", + "share/opencl/**", +] diff --git a/core/artifact-core-rocrtst.toml b/core/artifact-core-rocrtst.toml index 24dd542bbe..5c3683f8d6 100644 --- a/core/artifact-core-rocrtst.toml +++ b/core/artifact-core-rocrtst.toml @@ -2,6 +2,9 @@ [components.dbg."core/rocrtst/stage"] [components.dev."core/rocrtst/stage"] [components.doc."core/rocrtst/stage"] -[components.lib."core/rocrtst/stage"] [components.run."core/rocrtst/stage"] [components.test."core/rocrtst/stage"] +exclude = [ + "lib/rocrtst/lib/libhwloc.so*", + "lib/rocrtst/lib/LICENSE", +] diff --git a/base/post_hook_amdsmi.cmake b/core/post_hook_amdsmi.cmake similarity index 100% rename from base/post_hook_amdsmi.cmake rename to core/post_hook_amdsmi.cmake diff --git a/docs/development/artifacts.md b/docs/development/artifacts.md index 386aab4705..e49b465666 100644 --- a/docs/development/artifacts.md +++ b/docs/development/artifacts.md @@ -208,6 +208,7 @@ These artifacts are built if any project features requiring them are enabled: ### Core Artifacts - `base`: Base ROCM tools and structural components. ROCM sub-projects that do not depend on anything outside of this set are included here so that everything can depend on them. +- `core-amdsmi`: AMD System Management Interface (amdsmi) library and tools for GPU and driver management, packaged as a standalone core artifact due to distinct product and usage semantics. - `core-runtime`: Low level runtime components used for interfacing with kernel drivers. - `core-hip`: HIP runtime, compiler interface, and build tools. diff --git a/docs/development/windows_support.md b/docs/development/windows_support.md index 9a33c67013..806b128918 100644 --- a/docs/development/windows_support.md +++ b/docs/development/windows_support.md @@ -26,7 +26,6 @@ mainline, in open source, using MSVC, etc.). | Component subset | Subproject | Supported | Notes | | ------------------- | ------------------------------------------------------------------------------------------------------------------------ | --------- | --------------------------------------------- | | base | aux-overlay | ✅ | | -| base | [amdsmi](https://github.com/ROCm/amdsmi) | ❌ | Unsupported | | base | [rocm-cmake](https://github.com/ROCm/rocm-cmake) | ✅ | | | base | [rocm-core](https://github.com/ROCm/rocm-core) | ✅ | | | base | [rocm_smi_lib](https://github.com/ROCm/rocm_smi_lib) | ❌ | Unsupported | @@ -38,6 +37,7 @@ mainline, in open source, using MSVC, etc.). | compiler | [hipcc](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | ✅ | | | compiler | [hipify](https://github.com/ROCm/HIPIFY) | ✅ | | | | | | | +| core | [amdsmi](https://github.com/ROCm/amdsmi) | ❌ | Unsupported | | core | [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime) | ❌ | Unsupported | | core | [rocminfo](https://github.com/ROCm/rocminfo) | ❌ | Unsupported | | core | [hipInfo from hip-tests](https://github.com/ROCm/hip-tests) | ✅ | | diff --git a/external-builds/pytorch/build_prod_wheels.py b/external-builds/pytorch/build_prod_wheels.py index 61c89b0e6c..10e878f800 100755 --- a/external-builds/pytorch/build_prod_wheels.py +++ b/external-builds/pytorch/build_prod_wheels.py @@ -292,17 +292,22 @@ def directory_if_exists(dir: Path) -> Path | None: def do_install_rocm(args: argparse.Namespace): - # Optional cache dir arguments - cache_dir_args = ( - ["--cache-dir", str(args.pip_cache_dir)] if args.pip_cache_dir else [] - ) - # Because the rocm package caches current GPU selection and such, we # always purge it to ensure a clean rebuild. - run_command( - [sys.executable, "-m", "pip", "cache", "remove", "rocm"] + cache_dir_args, - cwd=Path.cwd(), + # + # This can fail in environments where the pip cache is disabled or + # unwritable (e.g. manylinux containers), which is fine — if there's no + # cache, there's nothing stale to purge. + cache_dir_args = ( + ["--cache-dir", str(args.pip_cache_dir)] if args.pip_cache_dir else [] ) + try: + run_command( + [sys.executable, "-m", "pip", "cache", "remove", "rocm"] + cache_dir_args, + cwd=Path.cwd(), + ) + except subprocess.CalledProcessError: + print("Warning: pip cache remove failed (cache may be disabled), continuing") # Do the main pip install. pip_args = [ @@ -319,8 +324,7 @@ def do_install_rocm(args: argparse.Namespace): if args.find_links: pip_args.extend(["--find-links", args.find_links]) if args.pip_cache_dir: - pip_args.extend(["--cache-dir", args.pip_cache_dir]) - pip_args += cache_dir_args + pip_args.extend(["--cache-dir", str(args.pip_cache_dir)]) rocm_sdk_version = args.rocm_sdk_version if args.rocm_sdk_version else "" pip_args.extend([f"rocm[libraries,devel]{rocm_sdk_version}"]) run_command(pip_args, cwd=Path.cwd()) @@ -658,8 +662,16 @@ def do_build_pytorch( pytorch_build_version_parsed = parse(pytorch_build_version) print(f" Using PYTORCH_BUILD_VERSION: {pytorch_build_version}") - # Detect exactly PyTorch 2.9.x is_pytorch_2_9 = pytorch_build_version_parsed.release[:2] == (2, 9) + is_pytorch_2_11_or_later = pytorch_build_version_parsed.release[:2] >= (2, 11) + + # aotriton is not supported on certain architectures yet. + # gfx101X/gfx103X: https://github.com/ROCm/TheRock/issues/1925 + AOTRITON_UNSUPPORTED_ARCHS = ["gfx101", "gfx103"] + # gfx1152/53: supported in aotriton 0.11.2b+ (https://github.com/ROCm/aotriton/pull/142), + # which is pinned by pytorch >= 2.11. Older versions don't include it. + if not is_pytorch_2_11_or_later: + AOTRITON_UNSUPPORTED_ARCHS += ["gfx1152", "gfx1153"] ## Enable FBGEMM_GENAI on Linux for PyTorch, as it is available only for 2.9 on rocm/pytorch ## and causes build failures for other PyTorch versions @@ -698,11 +710,6 @@ def do_build_pytorch( # Default behavior — determined by if triton is build use_flash_attention = "ON" if triton_requirement else "OFF" - # no aotriton support for gfx103X - # - # temporarily disable aotriton for gfx1152/53 until pytorch - # uses a commit that enables it ( https://github.com/ROCm/aotriton/pull/142 ) - AOTRITON_UNSUPPORTED_ARCHS = ["gfx103", "gfx1152", "gfx1153"] if any( arch in env["PYTORCH_ROCM_ARCH"] for arch in AOTRITON_UNSUPPORTED_ARCHS ): @@ -759,11 +766,6 @@ def do_build_pytorch( use_flash_attention = "0" - # no aotriton support for gfx103X - # - # temporarily prevent enabling aotriton for gfx1152/53 until pytorch - # uses a commit that enables it ( https://github.com/ROCm/aotriton/pull/142 ) - AOTRITON_UNSUPPORTED_ARCHS = ["gfx103", "gfx1152", "gfx1153"] if args.enable_pytorch_flash_attention_windows and not any( arch in env["PYTORCH_ROCM_ARCH"] for arch in AOTRITON_UNSUPPORTED_ARCHS ): diff --git a/math-libs/BLAS/CMakeLists.txt b/math-libs/BLAS/CMakeLists.txt index db45573925..559a22c91d 100644 --- a/math-libs/BLAS/CMakeLists.txt +++ b/math-libs/BLAS/CMakeLists.txt @@ -94,13 +94,15 @@ endif() # hipBLASLt ############################################################################## -set(hipBLASLt_optional_deps) +set(hipBLASLt_runtime_deps) if(NOT WIN32) - # hipBLASLt is hard-coded to not expect rocm-smi and amdsmi on Windows. - list(APPEND hipBLASLt_optional_deps - amdsmi - rocm_smi_lib - ) + # Required on Linux. + list(APPEND hipBLASLt_runtime_deps amdsmi) + + # rocm_smi_lib remains optional (guarded). + if(TARGET rocm_smi_lib) + list(APPEND hipBLASLt_runtime_deps rocm_smi_lib) + endif() endif() set(hipBLASLt_rocRoller_deps) @@ -146,11 +148,11 @@ therock_cmake_subproject_declare(hipBLASLt hip-clr therock-host-blas ${hipBLASLt_rocRoller_runtime_deps} - ${hipBLASLt_optional_deps} + ${hipBLASLt_runtime_deps} ${optional_profiler_deps} ) therock_cmake_subproject_glob_c_sources(hipBLASLt -SUBDIRS + SUBDIRS . ) therock_cmake_subproject_provide_package(hipBLASLt hipblaslt lib/cmake/hipblaslt) @@ -165,7 +167,9 @@ list(APPEND _blas_subproject_names hipBLASLt) set(rocBLAS_optional_runtime_deps) if(NOT WIN32) # rocBLAS is hard-coded to not expect rocm-smi. - list(APPEND rocBLAS_optional_runtime_deps rocm_smi_lib) + if(TARGET rocm_smi_lib) + list(APPEND rocBLAS_optional_runtime_deps rocm_smi_lib) + endif() elseif(THEROCK_BUILD_TESTING) list(APPEND rocBLAS_optional_runtime_deps therock-host-blas) endif() @@ -289,13 +293,15 @@ if(THEROCK_ENABLE_SPARSE) ############################################################################## if(NOT WIN32) # Remove this block once hipSPARSELt is supported on Windows - set(hipSPARSELt_optional_deps) - if(NOT WIN32) - # hipSPARSELt is hard-coded to not expect rocm-smi and amdsmi on Windows. - list(APPEND hipSPARSELt_optional_deps - amdsmi - rocm_smi_lib - ) + # hipSPARSELt expects amd_smi on Linux. + set(hipSPARSELt_runtime_deps) + + # Required on Linux. + list(APPEND hipSPARSELt_runtime_deps amdsmi) + + # rocm_smi_lib is optional. + if(TARGET rocm_smi_lib) + list(APPEND hipSPARSELt_runtime_deps rocm_smi_lib) endif() therock_cmake_subproject_declare(hipSPARSELt @@ -320,7 +326,7 @@ if(THEROCK_ENABLE_SPARSE) RUNTIME_DEPS hip-clr therock-host-blas - ${hipSPARSELt_optional_deps} + ${hipSPARSELt_runtime_deps} ${optional_profiler_deps} ) therock_cmake_subproject_glob_c_sources(hipSPARSELt diff --git a/ml-libs/CMakeLists.txt b/ml-libs/CMakeLists.txt index a1b815f080..61eb2ad400 100644 --- a/ml-libs/CMakeLists.txt +++ b/ml-libs/CMakeLists.txt @@ -25,16 +25,21 @@ if(THEROCK_ENABLE_COMPOSABLE_KERNEL) ############################################################################## # TODO: Move this to math-libs + # In the context of building miopen, do a narrow build of CK. + set(optional_ck_cmake_args) + if(THEROCK_MIOPEN_USE_COMPOSABLE_KERNEL) + list(APPEND optional_ck_cmake_args -DMIOPEN_REQ_LIBS_ONLY=ON) + endif() + therock_cmake_subproject_declare(composable_kernel EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_LIBRARIES_SOURCE_DIR}/projects/composablekernel" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel" - BACKGROUND_BUILD CMAKE_ARGS -DHIP_PLATFORM=amd -DROCM_PATH= -DROCM_DIR= "-DBUILD_TESTING=${THEROCK_BUILD_TESTING}" - -DMIOPEN_REQ_LIBS_ONLY=ON + ${optional_ck_cmake_args} CMAKE_INCLUDES therock_explicit_finders.cmake COMPILER_TOOLCHAIN diff --git a/ml-libs/artifact-composable-kernel.toml b/ml-libs/artifact-composable-kernel.toml index faa101f25b..061c35d15d 100644 --- a/ml-libs/artifact-composable-kernel.toml +++ b/ml-libs/artifact-composable-kernel.toml @@ -6,4 +6,8 @@ include = [ "share/composable_kernel/**", ] +[components.test."ml-libs/composable_kernel/stage"] +include = [ + "bin/test_*", +] [components.run."ml-libs/composable_kernel/stage"]