From 8f0bff50c553f747435e0f5db99b292f452929f8 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Fri, 14 Nov 2025 12:36:23 -0500 Subject: [PATCH 1/6] Add basic build and publish workflow --- .github/workflows/build-gfx90a.yml | 57 ++++++++++++++++++++++++++++++ AGENTS.md | 22 ++++++++++++ README.md | 2 ++ 3 files changed, 81 insertions(+) create mode 100644 .github/workflows/build-gfx90a.yml create mode 100644 AGENTS.md diff --git a/.github/workflows/build-gfx90a.yml b/.github/workflows/build-gfx90a.yml new file mode 100644 index 0000000..2278ad7 --- /dev/null +++ b/.github/workflows/build-gfx90a.yml @@ -0,0 +1,57 @@ +name: Build gfx90a Docker image + +on: + push: + branches: + - main + paths: + - envs/x86/gfx90a/** + - .github/workflows/build-gfx90a.yml + - AGENTS.md + - README.md + pull_request: + paths: + - envs/x86/gfx90a/** + - .github/workflows/build-gfx90a.yml + - AGENTS.md + - README.md + workflow_dispatch: + +jobs: + build: + name: Build gfx90a image + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Check out repository + uses: actions/checkout@v4 + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Set up Buildx + uses: docker/setup-buildx-action@v3 + - name: Cache Docker layers + uses: actions/cache@v4 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-gfx90a-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx-gfx90a- + - name: Build docker image + uses: docker/build-push-action@v5 + with: + context: . + file: envs/x86/gfx90a/Dockerfile + push: true + tags: | + higherordermethods/selfish:gfx90a + higherordermethods/selfish:gfx90a-${{ github.sha }} + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max + - name: Move cache + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-new /tmp/.buildx-cache diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..73620c0 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,22 @@ +# Repository Guidelines + +## Project Structure & Module Organization +Source files live under `envs///`, where each leaf directory owns a `spack.yaml` manifest and (optionally) a generated `Dockerfile`. Keep CPU targets (`x86`, …) and accelerator targets (`gfx90a`, `sm72`, `none`) granular so images stay purpose-built, and limit the root `README.md` to high-level context. + +## Build, Test, and Development Commands +- `spack spec -e envs/x86/gfx90a/spack.yaml` — concretizes the manifest locally; run this before opening a PR so dependency drift is caught early. +- `spack containerize envs/x86/gfx90a/spack.yaml > envs/x86/gfx90a/Dockerfile` — regenerates the Dockerfile after manifest edits (avoid hand-tuning output). +- `docker build -f envs/x86/gfx90a/Dockerfile -t selfish:gfx90a .` — builds the shareable runtime image; tag images `-` for clarity. +- `docker run --rm selfish:gfx90a spack find hdf5` — smoke-tests that the expected view was installed inside the image. + +## Coding Style & Naming Conventions +Spack YAML uses 2-space indentation, lowercase keys, and quoted constraint strings (`"target=x86_64_v3"`). Group `specs` alphabetically, keep `packages` overrides sorted by scope, and rely on multiline `RUN` blocks with trailing `\` alignment plus brief comments for non-obvious workarounds. Name new environments after the hardware tuple (`x86/gfx942`, `x86/none`) so downstream scripts can glob predictably. + +## Testing Guidelines +For each environment change, run `spack spec` followed by `spack install --fail-fast` inside a disposable builder container to verify concretization. Container builds must pass `docker build` locally before review; capture the last ~20 lines for the PR description. When adding MPI/HDF5 variants, run `docker run --rm mpichversion` (or another representative binary) to prove runtime availability. There is no coverage gate, but every new spec should ship with at least one build log, and GitHub Actions now double-checks gfx90a builds and publishes them to `higherordermethods/selfish`. + +## Commit & Pull Request Guidelines +Existing history uses short, imperative subject lines (“Initial commit”); follow the same format and include the touched environment in parentheses when practical, e.g., `Add feq-parse 2.2.2 to gfx90a`. One logical change per commit keeps bisects clean. PRs should describe the motivation, list updated directories, attach the relevant `spack spec` or `docker build` excerpt, and link any upstream SELF issues. Paste terminal snippets when reviewing GPU-specific behavior. + +## Security & Configuration Tips +Pin base images (`rockylinux:9`) and Spack refs in manifests, and run `dnf update -y` at build time to pick up CVEs. Never embed registry credentials or cluster hostnames in `spack.yaml`; rely on build-time secrets where required. Before publishing, scan the resulting image with `docker scout cves selfish:gfx90a` (or equivalent) to catch dependency vulnerabilities. diff --git a/README.md b/README.md index d4c97f7..e35ad4f 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ While SELF does support bare-metal builds and those are regularly tested, the co The core SELF team at Fluid Numerics has adopted enroot+pyxis with Slurm for our deployment model due to positive experience with this approach. +See [Repository Guidelines](AGENTS.md) for contributor expectations, build commands, and review checklists. + More docs coming soon From 33bc3caad73f3438e98c102b9734c2abf246a47c Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Fri, 14 Nov 2025 13:56:54 -0500 Subject: [PATCH 2/6] Install rocm from yum repo --- envs/x86/gfx90a/Dockerfile | 18 ++++++++++++++++++ envs/x86/gfx90a/rocm.repo | 7 +++++++ 2 files changed, 25 insertions(+) create mode 100644 envs/x86/gfx90a/rocm.repo diff --git a/envs/x86/gfx90a/Dockerfile b/envs/x86/gfx90a/Dockerfile index fe8347c..cb0748a 100644 --- a/envs/x86/gfx90a/Dockerfile +++ b/envs/x86/gfx90a/Dockerfile @@ -33,6 +33,13 @@ RUN dnf update -y \ && rm -rf /var/cache/dnf \ && dnf clean all +# Install HIP # +COPY envs/x86_64/gfx90a/rocm.repo /etc/yum.repos.d/rocm.repo + +RUN dnf clean all && \ + dnf update -y && \ + dnf install rocm-hip-libraries rocm-hip-runtime + RUN mkdir $SPACK_ROOT && cd $SPACK_ROOT && \ git init --quiet && git remote add origin https://github.com/spack/spack.git && git fetch --depth=1 origin v1.0.2 && git checkout --detach FETCH_HEAD && \ mkdir -p $SPACK_ROOT/opt/spack @@ -87,6 +94,11 @@ set -o noclobber \ && echo ' - target=x86_64_v3' \ && echo ' prefer:' \ && echo ' - amdgpu_target=gfx942' \ +&& echo ' hip:' \ +&& echo ' buildable: false' \ +&& echo ' externals:' \ +&& echo ' spec: "hip@6.4.1"' \ +&& echo ' prefix: /opt/rocm"' \ && echo '' \ && echo ' concretizer:' \ && echo ' unify: true' \ @@ -115,6 +127,12 @@ FROM docker.io/rockylinux:9 COPY --from=builder /opt/spack-environment /opt/spack-environment COPY --from=builder /opt/software /opt/software +# Install HIP # +COPY envs/x86_64/gfx90a/rocm.repo /etc/yum.repos.d/rocm.repo +RUN dnf clean all && \ + dnf update -y && \ + dnf install rocm-hip-libraries rocm-hip-runtime + # paths.view is a symlink, so copy the parent to avoid dereferencing and duplicating it COPY --from=builder /opt/views /opt/views diff --git a/envs/x86/gfx90a/rocm.repo b/envs/x86/gfx90a/rocm.repo new file mode 100644 index 0000000..b56cd1d --- /dev/null +++ b/envs/x86/gfx90a/rocm.repo @@ -0,0 +1,7 @@ +[rocm] +name=ROCm 6.4.1 repository +baseurl=https://repo.radeon.com/rocm/el9/6.4.1/main +enabled=1 +priority=50 +gpgcheck=1 +gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key From 3b8836e3116fb6f4600949fbc3df09ae0d7489bc Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Fri, 14 Nov 2025 14:04:03 -0500 Subject: [PATCH 3/6] Set copy path relative to context path --- envs/x86/gfx90a/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/envs/x86/gfx90a/Dockerfile b/envs/x86/gfx90a/Dockerfile index cb0748a..628886a 100644 --- a/envs/x86/gfx90a/Dockerfile +++ b/envs/x86/gfx90a/Dockerfile @@ -34,7 +34,7 @@ RUN dnf update -y \ && dnf clean all # Install HIP # -COPY envs/x86_64/gfx90a/rocm.repo /etc/yum.repos.d/rocm.repo +COPY ./envs/x86_64/gfx90a/rocm.repo /etc/yum.repos.d/rocm.repo RUN dnf clean all && \ dnf update -y && \ @@ -128,7 +128,7 @@ COPY --from=builder /opt/spack-environment /opt/spack-environment COPY --from=builder /opt/software /opt/software # Install HIP # -COPY envs/x86_64/gfx90a/rocm.repo /etc/yum.repos.d/rocm.repo +COPY ./envs/x86_64/gfx90a/rocm.repo /etc/yum.repos.d/rocm.repo RUN dnf clean all && \ dnf update -y && \ dnf install rocm-hip-libraries rocm-hip-runtime From ec9695ec6df1c351ee471caa49ede336d45d3a78 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Tue, 3 Feb 2026 11:34:16 -0500 Subject: [PATCH 4/6] Add multi-arch ROCm workflow (x86/rocm) Replace single-arch gfx90a workflow with matrix-based build supporting gfx906, gfx90a, and gfx942 GPU architectures. Images now follow the documented naming scheme: higherordermethods/selfish:--- Co-Authored-By: Claude Opus 4.5 --- .github/workflows/build-gfx90a.yml | 57 ----------------- .github/workflows/build-rocm.yml | 93 ++++++++++++++++++++++++++++ AGENTS.md => CLAUDE.md | 0 README.md | 81 +++++++++++++++++++++++- envs/x86/{gfx90a => rocm}/Dockerfile | 0 envs/x86/{gfx90a => rocm}/rocm.repo | 0 envs/x86/{gfx90a => rocm}/spack.yaml | 0 7 files changed, 172 insertions(+), 59 deletions(-) delete mode 100644 .github/workflows/build-gfx90a.yml create mode 100644 .github/workflows/build-rocm.yml rename AGENTS.md => CLAUDE.md (100%) rename envs/x86/{gfx90a => rocm}/Dockerfile (100%) rename envs/x86/{gfx90a => rocm}/rocm.repo (100%) rename envs/x86/{gfx90a => rocm}/spack.yaml (100%) diff --git a/.github/workflows/build-gfx90a.yml b/.github/workflows/build-gfx90a.yml deleted file mode 100644 index 2278ad7..0000000 --- a/.github/workflows/build-gfx90a.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: Build gfx90a Docker image - -on: - push: - branches: - - main - paths: - - envs/x86/gfx90a/** - - .github/workflows/build-gfx90a.yml - - AGENTS.md - - README.md - pull_request: - paths: - - envs/x86/gfx90a/** - - .github/workflows/build-gfx90a.yml - - AGENTS.md - - README.md - workflow_dispatch: - -jobs: - build: - name: Build gfx90a image - runs-on: ubuntu-latest - permissions: - contents: read - steps: - - name: Check out repository - uses: actions/checkout@v4 - - name: Log in to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Set up Buildx - uses: docker/setup-buildx-action@v3 - - name: Cache Docker layers - uses: actions/cache@v4 - with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-gfx90a-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx-gfx90a- - - name: Build docker image - uses: docker/build-push-action@v5 - with: - context: . - file: envs/x86/gfx90a/Dockerfile - push: true - tags: | - higherordermethods/selfish:gfx90a - higherordermethods/selfish:gfx90a-${{ github.sha }} - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max - - name: Move cache - run: | - rm -rf /tmp/.buildx-cache - mv /tmp/.buildx-cache-new /tmp/.buildx-cache diff --git a/.github/workflows/build-rocm.yml b/.github/workflows/build-rocm.yml new file mode 100644 index 0000000..25ddde8 --- /dev/null +++ b/.github/workflows/build-rocm.yml @@ -0,0 +1,93 @@ +name: Build ROCm Docker images + +on: + push: + branches: + - main + paths: + - envs/x86/rocm/** + - .github/workflows/build-rocm.yml + pull_request: + paths: + - envs/x86/rocm/** + - .github/workflows/build-rocm.yml + workflow_dispatch: + +env: + REGISTRY: docker.io + IMAGE_NAME: higherordermethods/selfish + +jobs: + build: + name: Build ${{ matrix.gpu_arch }} image + runs-on: ubuntu-latest + permissions: + contents: read + strategy: + fail-fast: false + matrix: + gpu_arch: [gfx906, gfx90a, gfx942] + gpu_backend_version: ["6.4.3"] + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Log in to Docker Hub + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up Buildx + uses: docker/setup-buildx-action@v3 + + - name: Generate image metadata + id: meta + run: | + # Convert GPU_BACKEND_VERSION 6.4.3 -> rocm643 + VERSION_NO_DOTS=$(echo "${{ matrix.gpu_backend_version }}" | tr -d '.') + GPU_BACKEND="rocm${VERSION_NO_DOTS}" + + # Build tag components + CPU_PLATFORM="x86" + GPU_ARCH="${{ matrix.gpu_arch }}" + + # Generate tags following: --- + echo "tags<> $GITHUB_OUTPUT + echo "${{ env.IMAGE_NAME }}:latest-${CPU_PLATFORM}-${GPU_BACKEND}-${GPU_ARCH}" >> $GITHUB_OUTPUT + echo "${{ env.IMAGE_NAME }}:${{ github.sha }}-${CPU_PLATFORM}-${GPU_BACKEND}-${GPU_ARCH}" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + echo "gpu_backend=${GPU_BACKEND}" >> $GITHUB_OUTPUT + + - name: Cache Docker layers + uses: actions/cache@v4 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ matrix.gpu_arch }}-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx-${{ matrix.gpu_arch }}- + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: envs/x86/rocm/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + build-args: | + GPU_ARCH=${{ matrix.gpu_arch }} + GPU_BACKEND_VERSION=${{ matrix.gpu_backend_version }} + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max + labels: | + com.fluidnumerics.rocm.target=${{ matrix.gpu_arch }} + com.fluidnumerics.rocm.version=${{ matrix.gpu_backend_version }} + org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }} + org.opencontainers.image.revision=${{ github.sha }} + + - name: Move cache + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-new /tmp/.buildx-cache diff --git a/AGENTS.md b/CLAUDE.md similarity index 100% rename from AGENTS.md rename to CLAUDE.md diff --git a/README.md b/README.md index e35ad4f..7aa295b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ While SELF does support bare-metal builds and those are regularly tested, the co The core SELF team at Fluid Numerics has adopted enroot+pyxis with Slurm for our deployment model due to positive experience with this approach. -See [Repository Guidelines](AGENTS.md) for contributor expectations, build commands, and review checklists. +See [Repository Guidelines](CLAUDE.md) for contributor expectations, build commands, and review checklists. More docs coming soon @@ -15,4 +15,81 @@ More docs coming soon ## Organization -The `envs/` subdirectory defines all of the base environments that are aimed at providing base images with all the dependencies required for developing SELF. The subdirectory structure is as `envs/{cpu_platform}/{gpu_platform}`. When `{gpu_platform}=none`, that environment is an environment for working with non-gpu accelerated implementations of SELF. +The `envs/` subdirectory defines all of the base environments that are aimed at providing base images with all the dependencies required for developing SELF. The subdirectory structure is as `envs/{cpu_platform}/{gpu_backend}`. When `{gpu_platform}=none`, that environment is an environment for working with non-gpu accelerated implementations of SELF. + +## Container Images + +SELFish provides pre-built container images with all dependencies for GPU-accelerated spectral element computations. Images are tagged using a **version-architecture** naming scheme to support multiple GPU targets. + +### Image Tagging Scheme + +Images follow the pattern: `higherordermethods/selfish:---` + +- **``**: Semantic version (e.g., `v1.2.3`) or release channel (`latest`, `dev`) +- **``** : Target cpu architecture (e.g. `x86`, `arm` ) +- **``** : GPU backend provider with version (e.g. `rocm643`, `cuda112`) +- **``**: Target GPU architecture (e.g., `gfx90a`, `gfx906`, `gfx942`) + +#### Examples: +```bash +# Stable release for MI210/MI250 (gfx90a) +docker pull higherordermethods/selfish:v1.2.3-gfx90a + +# Latest stable for Radeon Instinct MI100 (gfx908) +docker pull higherordermethods/selfish:latest-gfx908 + +# Development build for MI300A (gfx942) +docker pull higherordermethods/selfish:dev-gfx942 +``` + +### Supported GPU Architectures + +| Architecture | GPU Models | Tag Suffix | +|--------------|------------|------------| +| gfx90a | MI210, MI250, MI250X | `-gfx90a` | +| gfx908 | MI100 | `-gfx908` | +| gfx906 | MI50, MI60, Radeon VII | `-gfx906` | +| gfx942 | MI300A, MI300X | `-gfx942` | +| sm_72 | V100 | -sm72 | + +### Determining Your GPU Architecture + +If you're unsure which image to use, check your GPU architecture. + +For AMD GPUs, +```bash +# Using rocminfo +rocminfo | grep "Name:" | grep "gfx" + +# Using rocm-smi +rocm-smi --showproductname +``` + +### Using with Slurm + +Specify the architecture-specific image in your job script: +```bash +#!/bin/bash +#SBATCH --gpus=1 +#SBATCH --container-image=higherordermethods/selfish:v1.2.3-gfx90a + +./run_simulation.sh +``` + +### Version Pinning Recommendations + +- **Production**: Pin to specific versions (e.g., `v1.2.3-gfx90a`) for reproducibility +- **Development**: Use `latest-` for convenience (auto-updates with new releases) +- **Testing CI**: Use `dev-` to test against bleeding-edge builds + +### Image Metadata + +All images include OCI labels for programmatic inspection: +```bash +docker inspect higherordermethods/selfish:v1.2.3-gfx90a | grep -A5 Labels +``` + +Key labels: +- `com.fluidnumerics.rocm.target`: GPU architecture target +- `com.fluidnumerics.selfish.version`: SELFish version +- `org.opencontainers.image.version`: Container image version diff --git a/envs/x86/gfx90a/Dockerfile b/envs/x86/rocm/Dockerfile similarity index 100% rename from envs/x86/gfx90a/Dockerfile rename to envs/x86/rocm/Dockerfile diff --git a/envs/x86/gfx90a/rocm.repo b/envs/x86/rocm/rocm.repo similarity index 100% rename from envs/x86/gfx90a/rocm.repo rename to envs/x86/rocm/rocm.repo diff --git a/envs/x86/gfx90a/spack.yaml b/envs/x86/rocm/spack.yaml similarity index 100% rename from envs/x86/gfx90a/spack.yaml rename to envs/x86/rocm/spack.yaml From 54e95f2a03c0c4a368d87e7e61fa34d114eb8234 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Tue, 3 Feb 2026 11:44:42 -0500 Subject: [PATCH 5/6] Fix rocm.repo COPY path and add version substitution (x86/rocm) Co-Authored-By: Claude Opus 4.5 --- envs/x86/rocm/Dockerfile | 61 ++++++++++++++++++++++++++++------- envs/x86/rocm/feq-parse.patch | 12 +++++++ envs/x86/rocm/rocm.repo | 4 +-- 3 files changed, 63 insertions(+), 14 deletions(-) create mode 100644 envs/x86/rocm/feq-parse.patch diff --git a/envs/x86/rocm/Dockerfile b/envs/x86/rocm/Dockerfile index 628886a..184a47f 100644 --- a/envs/x86/rocm/Dockerfile +++ b/envs/x86/rocm/Dockerfile @@ -1,5 +1,8 @@ FROM docker.io/rockylinux:9 AS bootstrap +ARG GPU_ARCH=gfx90a +ARG GPU_BACKEND_VERSION=6.4.3 + ENV SPACK_ROOT=/opt/spack \ CURRENTLY_BUILDING_DOCKER_IMAGE=1 \ container=docker @@ -9,6 +12,7 @@ RUN dnf update -y \ && dnf update -y \ && dnf --enablerepo epel install -y \ bzip2 \ + cmake \ curl-minimal \ file \ findutils \ @@ -34,14 +38,17 @@ RUN dnf update -y \ && dnf clean all # Install HIP # -COPY ./envs/x86_64/gfx90a/rocm.repo /etc/yum.repos.d/rocm.repo +COPY ./envs/x86/rocm/rocm.repo /etc/yum.repos.d/rocm.repo +RUN sed -i "s/@GPU_BACKEND_VERSION@/${GPU_BACKEND_VERSION}/g" /etc/yum.repos.d/rocm.repo RUN dnf clean all && \ dnf update -y && \ - dnf install rocm-hip-libraries rocm-hip-runtime + dnf install -y rocm-hip-sdk rocm-llvm rocm-smi-lib rocminfo + +RUN ls -l /opt/rocm-${GPU_BACKEND_VERSION}/include/hip/hip_version.h RUN mkdir $SPACK_ROOT && cd $SPACK_ROOT && \ - git init --quiet && git remote add origin https://github.com/spack/spack.git && git fetch --depth=1 origin v1.0.2 && git checkout --detach FETCH_HEAD && \ + git init --quiet && git remote add origin https://github.com/spack/spack.git && git fetch --depth=1 origin develop && git checkout --detach FETCH_HEAD && \ mkdir -p $SPACK_ROOT/opt/spack RUN ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \ @@ -86,28 +93,56 @@ set -o noclobber \ && (echo spack: \ && echo ' specs:' \ && echo ' - feq-parse@2.2.2' \ -&& echo ' - mpich@4.2.3 +rocm' \ +&& echo ' - openmpi@5.0.8 +rocm' \ && echo ' - hdf5@1.14.5 +fortran +mpi' \ && echo ' packages:' \ && echo ' all:' \ && echo ' require:' \ && echo ' - target=x86_64_v3' \ && echo ' prefer:' \ -&& echo ' - amdgpu_target=gfx942' \ +&& echo " - amdgpu_target=$GPU_ARCH" \ +&& echo ' cmake:' \ +&& echo ' buildable: false' \ +&& echo ' externals:' \ +&& echo ' - spec: "cmake@3.26.5"' \ +&& echo ' prefix: "/usr"' \ +&& echo ' rocm-smi-lib:' \ +&& echo ' buildable: false' \ +&& echo ' externals:' \ +&& echo ' - spec: "rocm-smi-lib@${GPU_BACKEND_VERSION}"' \ +&& echo ' prefix: "/opt/rocm-${GPU_BACKEND_VERSION}"' \ && echo ' hip:' \ && echo ' buildable: false' \ && echo ' externals:' \ -&& echo ' spec: "hip@6.4.1"' \ -&& echo ' prefix: /opt/rocm"' \ +&& echo ' - spec: "hip@${GPU_BACKEND_VERSION}"' \ +&& echo ' prefix: "/opt/rocm-${GPU_BACKEND_VERSION}"' \ +&& echo ' hsa-rocr-dev:' \ +&& echo ' buildable: false' \ +&& echo ' externals:' \ +&& echo ' - spec: "hsa-rocr-dev@${GPU_BACKEND_VERSION}"' \ +&& echo ' prefix: "/opt/rocm-${GPU_BACKEND_VERSION}"' \ +&& echo ' llvm-amdgpu:' \ +&& echo ' buildable: false' \ +&& echo ' externals:' \ +&& echo ' - spec: "llvm-amdgpu@${GPU_BACKEND_VERSION}"' \ +&& echo ' prefix: "/opt/rocm-${GPU_BACKEND_VERSION}"' \ && echo '' \ && echo ' concretizer:' \ && echo ' unify: true' \ && echo ' config:' \ -&& echo ' install_tree: /opt/software' \ +&& echo ' install_tree:' \ +&& echo ' root: /opt/software' \ && echo ' view: /opt/views/view') > /opt/spack-environment/spack.yaml +# Apply feq-parse patch to add "c" build dependency +COPY ./envs/x86/rocm/feq-parse.patch /tmp/feq-parse.patch +# +RUN SPACK_PKGS_ROOT=$(spack repo list | awk '{print $NF}') &&\ + SPACK_BUILTIN_PKGS_ROOT=${SPACK_PKGS_ROOT/repos\/spack_repo\/builtin} &&\ + patch -p1 -d $SPACK_BUILTIN_PKGS_ROOT < /tmp/feq-parse.patch + # Install the software, remove unnecessary deps -RUN cd /opt/spack-environment && spack env activate . && spack install --fail-fast && spack gc -y +RUN cd /opt/spack-environment && spack env activate . && spack repo list && spack install --fail-fast && spack gc -y # Strip all the binaries RUN find -L /opt/views/view/* -type f -exec readlink -f '{}' \; | \ @@ -128,10 +163,12 @@ COPY --from=builder /opt/spack-environment /opt/spack-environment COPY --from=builder /opt/software /opt/software # Install HIP # -COPY ./envs/x86_64/gfx90a/rocm.repo /etc/yum.repos.d/rocm.repo -RUN dnf clean all && \ +COPY ./envs/x86/rocm/rocm.repo /etc/yum.repos.d/rocm.repo +ARG GPU_BACKEND_VERSION=6.4.3 +RUN sed -i "s/@GPU_BACKEND_VERSION@/${GPU_BACKEND_VERSION}/g" /etc/yum.repos.d/rocm.repo && \ + dnf clean all && \ dnf update -y && \ - dnf install rocm-hip-libraries rocm-hip-runtime + dnf install -y rocm-hip-libraries rocm-hip-runtime # paths.view is a symlink, so copy the parent to avoid dereferencing and duplicating it COPY --from=builder /opt/views /opt/views diff --git a/envs/x86/rocm/feq-parse.patch b/envs/x86/rocm/feq-parse.patch new file mode 100644 index 0000000..399f387 --- /dev/null +++ b/envs/x86/rocm/feq-parse.patch @@ -0,0 +1,12 @@ +diff --git a/repos/spack_repo/builtin/packages/feq_parse/package.py b/repos/spack_repo/builtin/packages/feq_parse/package.py +index e4b960b7..bc0916b9 100644 +--- a/repos/spack_repo/builtin/packages/feq_parse/package.py ++++ b/repos/spack_repo/builtin/packages/feq_parse/package.py +@@ -29,6 +29,7 @@ class FeqParse(CMakePackage): + version("1.0.2", sha256="1cd1db7562908ea16fc65dc5268b654405d0b3d9dcfe11f409949c431b48a3e8") + + depends_on("fortran", type="build") # generated ++ depends_on("c", type="build") # generated + + depends_on("cmake@3.0.2:", type="build") + diff --git a/envs/x86/rocm/rocm.repo b/envs/x86/rocm/rocm.repo index b56cd1d..2df98d1 100644 --- a/envs/x86/rocm/rocm.repo +++ b/envs/x86/rocm/rocm.repo @@ -1,6 +1,6 @@ [rocm] -name=ROCm 6.4.1 repository -baseurl=https://repo.radeon.com/rocm/el9/6.4.1/main +name=ROCm @GPU_BACKEND_VERSION@ repository +baseurl=https://repo.radeon.com/rocm/el9/@GPU_BACKEND_VERSION@/main enabled=1 priority=50 gpgcheck=1 From 4a1c56b3a57ba77b941a204805c386978525f91e Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Tue, 3 Feb 2026 11:52:50 -0500 Subject: [PATCH 6/6] Fix variable expansion in spack.yaml generation (x86/rocm) Co-Authored-By: Claude Opus 4.5 --- envs/x86/rocm/Dockerfile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/envs/x86/rocm/Dockerfile b/envs/x86/rocm/Dockerfile index 184a47f..7f5ce6e 100644 --- a/envs/x86/rocm/Dockerfile +++ b/envs/x86/rocm/Dockerfile @@ -109,23 +109,23 @@ set -o noclobber \ && echo ' rocm-smi-lib:' \ && echo ' buildable: false' \ && echo ' externals:' \ -&& echo ' - spec: "rocm-smi-lib@${GPU_BACKEND_VERSION}"' \ -&& echo ' prefix: "/opt/rocm-${GPU_BACKEND_VERSION}"' \ +&& echo " - spec: \"rocm-smi-lib@${GPU_BACKEND_VERSION}\"" \ +&& echo " prefix: \"/opt/rocm-${GPU_BACKEND_VERSION}\"" \ && echo ' hip:' \ && echo ' buildable: false' \ && echo ' externals:' \ -&& echo ' - spec: "hip@${GPU_BACKEND_VERSION}"' \ -&& echo ' prefix: "/opt/rocm-${GPU_BACKEND_VERSION}"' \ +&& echo " - spec: \"hip@${GPU_BACKEND_VERSION}\"" \ +&& echo " prefix: \"/opt/rocm-${GPU_BACKEND_VERSION}\"" \ && echo ' hsa-rocr-dev:' \ && echo ' buildable: false' \ && echo ' externals:' \ -&& echo ' - spec: "hsa-rocr-dev@${GPU_BACKEND_VERSION}"' \ -&& echo ' prefix: "/opt/rocm-${GPU_BACKEND_VERSION}"' \ +&& echo " - spec: \"hsa-rocr-dev@${GPU_BACKEND_VERSION}\"" \ +&& echo " prefix: \"/opt/rocm-${GPU_BACKEND_VERSION}\"" \ && echo ' llvm-amdgpu:' \ && echo ' buildable: false' \ && echo ' externals:' \ -&& echo ' - spec: "llvm-amdgpu@${GPU_BACKEND_VERSION}"' \ -&& echo ' prefix: "/opt/rocm-${GPU_BACKEND_VERSION}"' \ +&& echo " - spec: \"llvm-amdgpu@${GPU_BACKEND_VERSION}\"" \ +&& echo " prefix: \"/opt/rocm-${GPU_BACKEND_VERSION}\"" \ && echo '' \ && echo ' concretizer:' \ && echo ' unify: true' \