From 3afc4624e0787ac74597087af1188ef2fe5fbbc9 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 14 Mar 2026 20:20:41 -0400 Subject: [PATCH 1/4] Split Buildkite pipeline and add NVIDIA V100 coverage tests Move MI210 pipeline to its own file and add a new V100 coverage pipeline using CUDA backend. The parent pipeline.yml now uploads both sub-pipelines. Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/amd-mi210-debug-tests.yml | 54 +++++++++++++++++++++++++ .buildkite/nvidia-v100-debug-tests.yml | 53 ++++++++++++++++++++++++ .buildkite/pipeline.yml | 56 ++------------------------ 3 files changed, 111 insertions(+), 52 deletions(-) create mode 100644 .buildkite/amd-mi210-debug-tests.yml create mode 100644 .buildkite/nvidia-v100-debug-tests.yml diff --git a/.buildkite/amd-mi210-debug-tests.yml b/.buildkite/amd-mi210-debug-tests.yml new file mode 100644 index 00000000..5a685a2d --- /dev/null +++ b/.buildkite/amd-mi210-debug-tests.yml @@ -0,0 +1,54 @@ +steps: + - label: ":partyparrot: Build & Test AMD MI210 (Coverage)" + key: "build-test-mi210" + command: | + source /opt/spack-environment/activate.sh + export WORKSPACE=/workspace + mkdir -p /workspace/build + rocminfo + cd /workspace/build + FC=gfortran cmake -DCMAKE_INSTALL_PREFIX=/workspace/opt/self \ + -DCMAKE_BUILD_TYPE="coverage" \ + -DSELF_ENABLE_GPU=ON \ + -DSELF_GPU_BACKEND=HIP \ + -DSELF_ENABLE_TESTING=ON \ + -DCMAKE_HIP_ARCHITECTURES="gfx90a" \ + -DGPU_TARGETS="gfx90a" \ + -DSELF_ENABLE_EXAMPLES=ON \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + /workspace/ + make -j + lcov --capture --initial \ + --directory /workspace/build/src/ \ + --output-file /workspace/initial.info + ctest --test-dir /workspace/build --output-on-failure + lcov --capture \ + --directory /workspace/build/src/ \ + --output-file /workspace/ctest-capture.info + lcov --add-tracefile /workspace/initial.info \ + --add-tracefile /workspace/ctest-capture.info \ + --output-file /workspace/coverage.info + env: + slurm_partition: "main" + slurm_gres: "gpu:mi210:2" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/selfish:latest-x86-rocm643-gfx90a" + agents: + queue: "galapagos" + + - label: ":codecov: Upload Coverage" + key: "upload-coverage" + depends_on: "build-test-mi210" + command: | + curl -Os https://uploader.codecov.io/latest/linux/codecov + chmod +x codecov + ./codecov -t "$${CODECOV_TOKEN}" \ + -f "$${BUILDKITE_BUILD_CHECKOUT_PATH}/coverage.info" \ + -F buildkite-rocm-mi210 + secrets: + - CODECOV_TOKEN + agents: + queue: "galapagos" diff --git a/.buildkite/nvidia-v100-debug-tests.yml b/.buildkite/nvidia-v100-debug-tests.yml new file mode 100644 index 00000000..fdf8c4c5 --- /dev/null +++ b/.buildkite/nvidia-v100-debug-tests.yml @@ -0,0 +1,53 @@ +steps: + - label: ":partyparrot: Build & Test NVIDIA V100 (Coverage)" + key: "build-test-v100" + command: | + source /opt/spack-environment/activate.sh + export WORKSPACE=/workspace + mkdir -p /workspace/build + nvidia-smi + cd /workspace/build + FC=gfortran cmake -DCMAKE_INSTALL_PREFIX=/workspace/opt/self \ + -DCMAKE_BUILD_TYPE="coverage" \ + -DSELF_ENABLE_GPU=ON \ + -DSELF_GPU_BACKEND=CUDA \ + -DSELF_ENABLE_TESTING=ON \ + -DCMAKE_CUDA_ARCHITECTURES="70" \ + -DSELF_ENABLE_EXAMPLES=ON \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + /workspace/ + make -j + lcov --capture --initial \ + --directory /workspace/build/src/ \ + --output-file /workspace/initial.info + ctest --test-dir /workspace/build --output-on-failure + lcov --capture \ + --directory /workspace/build/src/ \ + --output-file /workspace/ctest-capture.info + lcov --add-tracefile /workspace/initial.info \ + --add-tracefile /workspace/ctest-capture.info \ + --output-file /workspace/coverage.info + env: + slurm_partition: "main" + slurm_gres: "gpu:v100:2" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/selfish:latest-x86-cuda124-sm70" + agents: + queue: "galapagos" + + - label: ":codecov: Upload Coverage (V100)" + key: "upload-coverage-v100" + depends_on: "build-test-v100" + command: | + curl -Os https://uploader.codecov.io/latest/linux/codecov + chmod +x codecov + ./codecov -t "$${CODECOV_TOKEN}" \ + -f "$${BUILDKITE_BUILD_CHECKOUT_PATH}/coverage.info" \ + -F buildkite-cuda-v100 + secrets: + - CODECOV_TOKEN + agents: + queue: "galapagos" diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 5a685a2d..a21b3010 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,54 +1,6 @@ steps: - - label: ":partyparrot: Build & Test AMD MI210 (Coverage)" - key: "build-test-mi210" - command: | - source /opt/spack-environment/activate.sh - export WORKSPACE=/workspace - mkdir -p /workspace/build - rocminfo - cd /workspace/build - FC=gfortran cmake -DCMAKE_INSTALL_PREFIX=/workspace/opt/self \ - -DCMAKE_BUILD_TYPE="coverage" \ - -DSELF_ENABLE_GPU=ON \ - -DSELF_GPU_BACKEND=HIP \ - -DSELF_ENABLE_TESTING=ON \ - -DCMAKE_HIP_ARCHITECTURES="gfx90a" \ - -DGPU_TARGETS="gfx90a" \ - -DSELF_ENABLE_EXAMPLES=ON \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - /workspace/ - make -j - lcov --capture --initial \ - --directory /workspace/build/src/ \ - --output-file /workspace/initial.info - ctest --test-dir /workspace/build --output-on-failure - lcov --capture \ - --directory /workspace/build/src/ \ - --output-file /workspace/ctest-capture.info - lcov --add-tracefile /workspace/initial.info \ - --add-tracefile /workspace/ctest-capture.info \ - --output-file /workspace/coverage.info - env: - slurm_partition: "main" - slurm_gres: "gpu:mi210:2" - slurm_time: "01:00:00" - slurm_nodes: 1 - slurm_ntasks: 2 - slurm_cpus_per_task: 8 - slurm_container_image: "docker://higherordermethods/selfish:latest-x86-rocm643-gfx90a" - agents: - queue: "galapagos" + - label: ":pipeline: AMD MI210 Debug Tests" + command: buildkite-agent pipeline upload .buildkite/amd-mi210-debug-tests.yml - - label: ":codecov: Upload Coverage" - key: "upload-coverage" - depends_on: "build-test-mi210" - command: | - curl -Os https://uploader.codecov.io/latest/linux/codecov - chmod +x codecov - ./codecov -t "$${CODECOV_TOKEN}" \ - -f "$${BUILDKITE_BUILD_CHECKOUT_PATH}/coverage.info" \ - -F buildkite-rocm-mi210 - secrets: - - CODECOV_TOKEN - agents: - queue: "galapagos" + - label: ":pipeline: NVIDIA V100 Debug Tests" + command: buildkite-agent pipeline upload .buildkite/nvidia-v100-debug-tests.yml From 2dd7eebeee04b94ace3e8fdb966707ed3cb45551 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 14 Mar 2026 20:43:34 -0400 Subject: [PATCH 2/4] Add release build pipelines and Dockerfiles for all targets Add Dockerfiles for x86_gfx90a, x86_sm70, and x86 (CPU-only) release builds. Add CPU coverage test pipeline. Add release-builds pipeline that builds, tests, and pushes Docker images to dockerhub after coverage tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/pipeline.yml | 10 +- .buildkite/release-builds.yml | 143 +++++++++++++++++++++++++++++ .buildkite/x86-cpu-debug-tests.yml | 49 ++++++++++ .dockerignore | 15 +++ docker/x86/Dockerfile | 23 +++++ docker/x86_gfx90a/Dockerfile | 26 ++++++ docker/x86_sm70/Dockerfile | 25 +++++ 7 files changed, 289 insertions(+), 2 deletions(-) create mode 100644 .buildkite/release-builds.yml create mode 100644 .buildkite/x86-cpu-debug-tests.yml create mode 100644 .dockerignore create mode 100644 docker/x86/Dockerfile create mode 100644 docker/x86_gfx90a/Dockerfile create mode 100644 docker/x86_sm70/Dockerfile diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index a21b3010..4c15ff75 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,6 +1,12 @@ steps: - - label: ":pipeline: AMD MI210 Debug Tests" + - label: ":pipeline: AMD MI210 Coverage Tests" command: buildkite-agent pipeline upload .buildkite/amd-mi210-debug-tests.yml - - label: ":pipeline: NVIDIA V100 Debug Tests" + - label: ":pipeline: NVIDIA V100 Coverage Tests" command: buildkite-agent pipeline upload .buildkite/nvidia-v100-debug-tests.yml + + - label: ":pipeline: x86 CPU Coverage Tests" + command: buildkite-agent pipeline upload .buildkite/x86-cpu-debug-tests.yml + + - label: ":pipeline: Release Builds" + command: buildkite-agent pipeline upload .buildkite/release-builds.yml diff --git a/.buildkite/release-builds.yml b/.buildkite/release-builds.yml new file mode 100644 index 00000000..9c5397c9 --- /dev/null +++ b/.buildkite/release-builds.yml @@ -0,0 +1,143 @@ +steps: + # ---- AMD MI210 (gfx90a) Release ---- + - label: ":docker: Build Release Image (x86-rocm643-gfx90a)" + key: "docker-build-gfx90a" + depends_on: "build-test-mi210" + command: | + SELFISH_IMAGE="higherordermethods/selfish:latest-x86-rocm643-gfx90a" + docker pull "$${SELFISH_IMAGE}" + SELFISH_SHA=$$(docker inspect --format='{{index .RepoDigests 0}}' "$${SELFISH_IMAGE}" | cut -d@ -f2) + docker build \ + --build-arg SELFISH_IMAGE="$${SELFISH_IMAGE}" \ + --build-arg SELFISH_SHA="$${SELFISH_SHA}" \ + -t higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-rocm643-gfx90a \ + -f docker/x86_gfx90a/Dockerfile . + docker push higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-rocm643-gfx90a + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" + + - label: ":test_tube: Test Release Image (x86-rocm643-gfx90a)" + key: "test-release-gfx90a" + depends_on: "docker-build-gfx90a" + command: | + source /opt/spack-environment/activate.sh + ctest --test-dir /opt/self/build --output-on-failure + env: + slurm_partition: "main" + slurm_gres: "gpu:mi210:2" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-rocm643-gfx90a" + agents: + queue: "galapagos" + + - label: ":rocket: Push Release Image (x86-rocm643-gfx90a)" + key: "push-release-gfx90a" + depends_on: "test-release-gfx90a" + command: | + docker tag higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-rocm643-gfx90a \ + higherordermethods/self:latest-x86-rocm643-gfx90a + docker push higherordermethods/self:latest-x86-rocm643-gfx90a + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" + + # ---- NVIDIA V100 (sm70) Release ---- + - label: ":docker: Build Release Image (x86-cuda124-sm70)" + key: "docker-build-sm70" + depends_on: "build-test-v100" + command: | + SELFISH_IMAGE="higherordermethods/selfish:latest-x86-cuda124-sm70" + docker pull "$${SELFISH_IMAGE}" + SELFISH_SHA=$$(docker inspect --format='{{index .RepoDigests 0}}' "$${SELFISH_IMAGE}" | cut -d@ -f2) + docker build \ + --build-arg SELFISH_IMAGE="$${SELFISH_IMAGE}" \ + --build-arg SELFISH_SHA="$${SELFISH_SHA}" \ + -t higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-cuda124-sm70 \ + -f docker/x86_sm70/Dockerfile . + docker push higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-cuda124-sm70 + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" + + - label: ":test_tube: Test Release Image (x86-cuda124-sm70)" + key: "test-release-sm70" + depends_on: "docker-build-sm70" + command: | + source /opt/spack-environment/activate.sh + ctest --test-dir /opt/self/build --output-on-failure + env: + slurm_partition: "main" + slurm_gres: "gpu:v100:2" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-cuda124-sm70" + agents: + queue: "galapagos" + + - label: ":rocket: Push Release Image (x86-cuda124-sm70)" + key: "push-release-sm70" + depends_on: "test-release-sm70" + command: | + docker tag higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-cuda124-sm70 \ + higherordermethods/self:latest-x86-cuda124-sm70 + docker push higherordermethods/self:latest-x86-cuda124-sm70 + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" + + # ---- x86 CPU-only Release ---- + - label: ":docker: Build Release Image (x86)" + key: "docker-build-x86" + depends_on: "build-test-x86-cpu" + command: | + SELFISH_IMAGE="higherordermethods/selfish:latest-x86" + docker pull "$${SELFISH_IMAGE}" + SELFISH_SHA=$$(docker inspect --format='{{index .RepoDigests 0}}' "$${SELFISH_IMAGE}" | cut -d@ -f2) + docker build \ + --build-arg SELFISH_IMAGE="$${SELFISH_IMAGE}" \ + --build-arg SELFISH_SHA="$${SELFISH_SHA}" \ + -t higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86 \ + -f docker/x86/Dockerfile . + docker push higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86 + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" + + - label: ":test_tube: Test Release Image (x86)" + key: "test-release-x86" + depends_on: "docker-build-x86" + command: | + source /opt/spack-environment/activate.sh + ctest --test-dir /opt/self/build --output-on-failure + env: + slurm_partition: "main" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86" + agents: + queue: "galapagos" + + - label: ":rocket: Push Release Image (x86)" + key: "push-release-x86" + depends_on: "test-release-x86" + command: | + docker tag higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86 \ + higherordermethods/self:latest-x86 + docker push higherordermethods/self:latest-x86 + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" diff --git a/.buildkite/x86-cpu-debug-tests.yml b/.buildkite/x86-cpu-debug-tests.yml new file mode 100644 index 00000000..2c689100 --- /dev/null +++ b/.buildkite/x86-cpu-debug-tests.yml @@ -0,0 +1,49 @@ +steps: + - label: ":partyparrot: Build & Test x86 CPU (Coverage)" + key: "build-test-x86-cpu" + command: | + source /opt/spack-environment/activate.sh + export WORKSPACE=/workspace + mkdir -p /workspace/build + cd /workspace/build + FC=gfortran cmake -DCMAKE_INSTALL_PREFIX=/workspace/opt/self \ + -DCMAKE_BUILD_TYPE="coverage" \ + -DSELF_ENABLE_GPU=OFF \ + -DSELF_ENABLE_TESTING=ON \ + -DSELF_ENABLE_EXAMPLES=ON \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + /workspace/ + make -j + lcov --capture --initial \ + --directory /workspace/build/src/ \ + --output-file /workspace/initial.info + ctest --test-dir /workspace/build --output-on-failure + lcov --capture \ + --directory /workspace/build/src/ \ + --output-file /workspace/ctest-capture.info + lcov --add-tracefile /workspace/initial.info \ + --add-tracefile /workspace/ctest-capture.info \ + --output-file /workspace/coverage.info + env: + slurm_partition: "main" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/selfish:latest-x86" + agents: + queue: "galapagos" + + - label: ":codecov: Upload Coverage (x86 CPU)" + key: "upload-coverage-x86-cpu" + depends_on: "build-test-x86-cpu" + command: | + curl -Os https://uploader.codecov.io/latest/linux/codecov + chmod +x codecov + ./codecov -t "$${CODECOV_TOKEN}" \ + -f "$${BUILDKITE_BUILD_CHECKOUT_PATH}/coverage.info" \ + -F buildkite-x86-cpu + secrets: + - CODECOV_TOKEN + agents: + queue: "galapagos" diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..17b79431 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +.git +build/ +env/ +*.o +*.mod +*.gcno +*.gcda +*.gcov +*.info +*.sif +*.out +*.err +*.log +.vscode/ +.spack-env diff --git a/docker/x86/Dockerfile b/docker/x86/Dockerfile new file mode 100644 index 00000000..b20814dc --- /dev/null +++ b/docker/x86/Dockerfile @@ -0,0 +1,23 @@ +ARG SELFISH_IMAGE=higherordermethods/selfish:latest-x86 +FROM ${SELFISH_IMAGE} + +ARG SELFISH_SHA="" +LABEL org.opencontainers.image.base.name="${SELFISH_IMAGE}" +LABEL org.opencontainers.image.base.digest="${SELFISH_SHA}" + +COPY . /opt/self/src + +RUN source /opt/spack-environment/activate.sh && \ + mkdir -p /opt/self/build && \ + cd /opt/self/build && \ + FC=gfortran cmake \ + -DCMAKE_INSTALL_PREFIX=/opt/self/install \ + -DCMAKE_BUILD_TYPE=Release \ + -DSELF_ENABLE_GPU=OFF \ + -DSELF_ENABLE_TESTING=ON \ + -DSELF_ENABLE_EXAMPLES=ON \ + /opt/self/src && \ + make -j$(nproc) && \ + make install + +WORKDIR /opt/self/build diff --git a/docker/x86_gfx90a/Dockerfile b/docker/x86_gfx90a/Dockerfile new file mode 100644 index 00000000..c7df3157 --- /dev/null +++ b/docker/x86_gfx90a/Dockerfile @@ -0,0 +1,26 @@ +ARG SELFISH_IMAGE=higherordermethods/selfish:latest-x86-rocm643-gfx90a +FROM ${SELFISH_IMAGE} + +ARG SELFISH_SHA="" +LABEL org.opencontainers.image.base.name="${SELFISH_IMAGE}" +LABEL org.opencontainers.image.base.digest="${SELFISH_SHA}" + +COPY . /opt/self/src + +RUN source /opt/spack-environment/activate.sh && \ + mkdir -p /opt/self/build && \ + cd /opt/self/build && \ + FC=gfortran cmake \ + -DCMAKE_INSTALL_PREFIX=/opt/self/install \ + -DCMAKE_BUILD_TYPE=Release \ + -DSELF_ENABLE_GPU=ON \ + -DSELF_GPU_BACKEND=HIP \ + -DSELF_ENABLE_TESTING=ON \ + -DCMAKE_HIP_ARCHITECTURES="gfx90a" \ + -DGPU_TARGETS="gfx90a" \ + -DSELF_ENABLE_EXAMPLES=ON \ + /opt/self/src && \ + make -j$(nproc) && \ + make install + +WORKDIR /opt/self/build diff --git a/docker/x86_sm70/Dockerfile b/docker/x86_sm70/Dockerfile new file mode 100644 index 00000000..f8c7d8da --- /dev/null +++ b/docker/x86_sm70/Dockerfile @@ -0,0 +1,25 @@ +ARG SELFISH_IMAGE=higherordermethods/selfish:latest-x86-cuda124-sm70 +FROM ${SELFISH_IMAGE} + +ARG SELFISH_SHA="" +LABEL org.opencontainers.image.base.name="${SELFISH_IMAGE}" +LABEL org.opencontainers.image.base.digest="${SELFISH_SHA}" + +COPY . /opt/self/src + +RUN source /opt/spack-environment/activate.sh && \ + mkdir -p /opt/self/build && \ + cd /opt/self/build && \ + FC=gfortran cmake \ + -DCMAKE_INSTALL_PREFIX=/opt/self/install \ + -DCMAKE_BUILD_TYPE=Release \ + -DSELF_ENABLE_GPU=ON \ + -DSELF_GPU_BACKEND=CUDA \ + -DSELF_ENABLE_TESTING=ON \ + -DCMAKE_CUDA_ARCHITECTURES="70" \ + -DSELF_ENABLE_EXAMPLES=ON \ + /opt/self/src && \ + make -j$(nproc) && \ + make install + +WORKDIR /opt/self/build From 215dfa550ad9215669e48cacb3016ba36c46cc82 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 14 Mar 2026 20:52:52 -0400 Subject: [PATCH 3/4] Split coverage pipelines into separate build, test, and coverage steps Build steps no longer request GPU resources. Only test steps require GPUs. Coverage generation and upload run without GPU allocation. Release pipeline depends_on updated to match new test step keys. Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/amd-mi210-debug-tests.yml | 43 +++++++++++++++++++++----- .buildkite/nvidia-v100-debug-tests.yml | 39 ++++++++++++++++++++--- .buildkite/release-builds.yml | 6 ++-- .buildkite/x86-cpu-debug-tests.yml | 36 +++++++++++++++++++-- 4 files changed, 106 insertions(+), 18 deletions(-) diff --git a/.buildkite/amd-mi210-debug-tests.yml b/.buildkite/amd-mi210-debug-tests.yml index 5a685a2d..c215c738 100644 --- a/.buildkite/amd-mi210-debug-tests.yml +++ b/.buildkite/amd-mi210-debug-tests.yml @@ -1,11 +1,10 @@ steps: - - label: ":partyparrot: Build & Test AMD MI210 (Coverage)" - key: "build-test-mi210" + - label: ":cmake: Build AMD MI210 (Coverage)" + key: "build-mi210" command: | source /opt/spack-environment/activate.sh export WORKSPACE=/workspace mkdir -p /workspace/build - rocminfo cd /workspace/build FC=gfortran cmake -DCMAKE_INSTALL_PREFIX=/workspace/opt/self \ -DCMAKE_BUILD_TYPE="coverage" \ @@ -21,7 +20,38 @@ steps: lcov --capture --initial \ --directory /workspace/build/src/ \ --output-file /workspace/initial.info + env: + slurm_partition: "main" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/selfish:latest-x86-rocm643-gfx90a" + agents: + queue: "galapagos" + + - label: ":test_tube: Test AMD MI210 (Coverage)" + key: "test-mi210" + depends_on: "build-mi210" + command: | + source /opt/spack-environment/activate.sh ctest --test-dir /workspace/build --output-on-failure + env: + slurm_partition: "main" + slurm_gres: "gpu:mi210:2" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/selfish:latest-x86-rocm643-gfx90a" + agents: + queue: "galapagos" + + - label: ":coverage: Coverage AMD MI210" + key: "coverage-mi210" + depends_on: "test-mi210" + command: | + source /opt/spack-environment/activate.sh lcov --capture \ --directory /workspace/build/src/ \ --output-file /workspace/ctest-capture.info @@ -30,7 +60,6 @@ steps: --output-file /workspace/coverage.info env: slurm_partition: "main" - slurm_gres: "gpu:mi210:2" slurm_time: "01:00:00" slurm_nodes: 1 slurm_ntasks: 2 @@ -39,9 +68,9 @@ steps: agents: queue: "galapagos" - - label: ":codecov: Upload Coverage" - key: "upload-coverage" - depends_on: "build-test-mi210" + - label: ":codecov: Upload Coverage (MI210)" + key: "upload-coverage-mi210" + depends_on: "coverage-mi210" command: | curl -Os https://uploader.codecov.io/latest/linux/codecov chmod +x codecov diff --git a/.buildkite/nvidia-v100-debug-tests.yml b/.buildkite/nvidia-v100-debug-tests.yml index fdf8c4c5..7992b4dc 100644 --- a/.buildkite/nvidia-v100-debug-tests.yml +++ b/.buildkite/nvidia-v100-debug-tests.yml @@ -1,11 +1,10 @@ steps: - - label: ":partyparrot: Build & Test NVIDIA V100 (Coverage)" - key: "build-test-v100" + - label: ":cmake: Build NVIDIA V100 (Coverage)" + key: "build-v100" command: | source /opt/spack-environment/activate.sh export WORKSPACE=/workspace mkdir -p /workspace/build - nvidia-smi cd /workspace/build FC=gfortran cmake -DCMAKE_INSTALL_PREFIX=/workspace/opt/self \ -DCMAKE_BUILD_TYPE="coverage" \ @@ -20,7 +19,38 @@ steps: lcov --capture --initial \ --directory /workspace/build/src/ \ --output-file /workspace/initial.info + env: + slurm_partition: "main" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/selfish:latest-x86-cuda124-sm70" + agents: + queue: "galapagos" + + - label: ":test_tube: Test NVIDIA V100 (Coverage)" + key: "test-v100" + depends_on: "build-v100" + command: | + source /opt/spack-environment/activate.sh ctest --test-dir /workspace/build --output-on-failure + env: + slurm_partition: "main" + slurm_gres: "gpu:v100:2" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/selfish:latest-x86-cuda124-sm70" + agents: + queue: "galapagos" + + - label: ":coverage: Coverage NVIDIA V100" + key: "coverage-v100" + depends_on: "test-v100" + command: | + source /opt/spack-environment/activate.sh lcov --capture \ --directory /workspace/build/src/ \ --output-file /workspace/ctest-capture.info @@ -29,7 +59,6 @@ steps: --output-file /workspace/coverage.info env: slurm_partition: "main" - slurm_gres: "gpu:v100:2" slurm_time: "01:00:00" slurm_nodes: 1 slurm_ntasks: 2 @@ -40,7 +69,7 @@ steps: - label: ":codecov: Upload Coverage (V100)" key: "upload-coverage-v100" - depends_on: "build-test-v100" + depends_on: "coverage-v100" command: | curl -Os https://uploader.codecov.io/latest/linux/codecov chmod +x codecov diff --git a/.buildkite/release-builds.yml b/.buildkite/release-builds.yml index 9c5397c9..f864cd7b 100644 --- a/.buildkite/release-builds.yml +++ b/.buildkite/release-builds.yml @@ -2,7 +2,7 @@ steps: # ---- AMD MI210 (gfx90a) Release ---- - label: ":docker: Build Release Image (x86-rocm643-gfx90a)" key: "docker-build-gfx90a" - depends_on: "build-test-mi210" + depends_on: "test-mi210" command: | SELFISH_IMAGE="higherordermethods/selfish:latest-x86-rocm643-gfx90a" docker pull "$${SELFISH_IMAGE}" @@ -50,7 +50,7 @@ steps: # ---- NVIDIA V100 (sm70) Release ---- - label: ":docker: Build Release Image (x86-cuda124-sm70)" key: "docker-build-sm70" - depends_on: "build-test-v100" + depends_on: "test-v100" command: | SELFISH_IMAGE="higherordermethods/selfish:latest-x86-cuda124-sm70" docker pull "$${SELFISH_IMAGE}" @@ -98,7 +98,7 @@ steps: # ---- x86 CPU-only Release ---- - label: ":docker: Build Release Image (x86)" key: "docker-build-x86" - depends_on: "build-test-x86-cpu" + depends_on: "test-x86-cpu" command: | SELFISH_IMAGE="higherordermethods/selfish:latest-x86" docker pull "$${SELFISH_IMAGE}" diff --git a/.buildkite/x86-cpu-debug-tests.yml b/.buildkite/x86-cpu-debug-tests.yml index 2c689100..e8fe9909 100644 --- a/.buildkite/x86-cpu-debug-tests.yml +++ b/.buildkite/x86-cpu-debug-tests.yml @@ -1,6 +1,6 @@ steps: - - label: ":partyparrot: Build & Test x86 CPU (Coverage)" - key: "build-test-x86-cpu" + - label: ":cmake: Build x86 CPU (Coverage)" + key: "build-x86-cpu" command: | source /opt/spack-environment/activate.sh export WORKSPACE=/workspace @@ -17,7 +17,37 @@ steps: lcov --capture --initial \ --directory /workspace/build/src/ \ --output-file /workspace/initial.info + env: + slurm_partition: "main" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/selfish:latest-x86" + agents: + queue: "galapagos" + + - label: ":test_tube: Test x86 CPU (Coverage)" + key: "test-x86-cpu" + depends_on: "build-x86-cpu" + command: | + source /opt/spack-environment/activate.sh ctest --test-dir /workspace/build --output-on-failure + env: + slurm_partition: "main" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/selfish:latest-x86" + agents: + queue: "galapagos" + + - label: ":coverage: Coverage x86 CPU" + key: "coverage-x86-cpu" + depends_on: "test-x86-cpu" + command: | + source /opt/spack-environment/activate.sh lcov --capture \ --directory /workspace/build/src/ \ --output-file /workspace/ctest-capture.info @@ -36,7 +66,7 @@ steps: - label: ":codecov: Upload Coverage (x86 CPU)" key: "upload-coverage-x86-cpu" - depends_on: "build-test-x86-cpu" + depends_on: "coverage-x86-cpu" command: | curl -Os https://uploader.codecov.io/latest/linux/codecov chmod +x codecov From 27ed891de9989bc7a269d2ca4d2acea993a4bf8b Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 14 Mar 2026 21:08:19 -0400 Subject: [PATCH 4/4] Separate PR and main branch pipeline behavior PRs run coverage tests then release build/test (no push). Main branch runs only release build, test, and push to dockerhub. Uses Buildkite if conditions to select the appropriate pipeline. Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/pipeline.yml | 10 +++ .buildkite/release-and-publish.yml | 137 +++++++++++++++++++++++++++++ .buildkite/release-builds.yml | 39 -------- 3 files changed, 147 insertions(+), 39 deletions(-) create mode 100644 .buildkite/release-and-publish.yml diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 4c15ff75..d2eaa9a3 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,12 +1,22 @@ steps: + # PR builds: coverage tests + release build/test (no push) - label: ":pipeline: AMD MI210 Coverage Tests" + if: build.branch != "main" command: buildkite-agent pipeline upload .buildkite/amd-mi210-debug-tests.yml - label: ":pipeline: NVIDIA V100 Coverage Tests" + if: build.branch != "main" command: buildkite-agent pipeline upload .buildkite/nvidia-v100-debug-tests.yml - label: ":pipeline: x86 CPU Coverage Tests" + if: build.branch != "main" command: buildkite-agent pipeline upload .buildkite/x86-cpu-debug-tests.yml - label: ":pipeline: Release Builds" + if: build.branch != "main" command: buildkite-agent pipeline upload .buildkite/release-builds.yml + + # Main branch: release build, test, and publish only + - label: ":pipeline: Release & Publish" + if: build.branch == "main" + command: buildkite-agent pipeline upload .buildkite/release-and-publish.yml diff --git a/.buildkite/release-and-publish.yml b/.buildkite/release-and-publish.yml new file mode 100644 index 00000000..1afba292 --- /dev/null +++ b/.buildkite/release-and-publish.yml @@ -0,0 +1,137 @@ +steps: + # ---- AMD MI210 (gfx90a) Release ---- + - label: ":docker: Build Release Image (x86-rocm643-gfx90a)" + key: "docker-build-gfx90a" + command: | + SELFISH_IMAGE="higherordermethods/selfish:latest-x86-rocm643-gfx90a" + docker pull "$${SELFISH_IMAGE}" + SELFISH_SHA=$$(docker inspect --format='{{index .RepoDigests 0}}' "$${SELFISH_IMAGE}" | cut -d@ -f2) + docker build \ + --build-arg SELFISH_IMAGE="$${SELFISH_IMAGE}" \ + --build-arg SELFISH_SHA="$${SELFISH_SHA}" \ + -t higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-rocm643-gfx90a \ + -t higherordermethods/self:latest-x86-rocm643-gfx90a \ + -f docker/x86_gfx90a/Dockerfile . + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" + + - label: ":test_tube: Test Release Image (x86-rocm643-gfx90a)" + key: "test-release-gfx90a" + depends_on: "docker-build-gfx90a" + command: | + source /opt/spack-environment/activate.sh + ctest --test-dir /opt/self/build --output-on-failure + env: + slurm_partition: "main" + slurm_gres: "gpu:mi210:2" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-rocm643-gfx90a" + agents: + queue: "galapagos" + + - label: ":rocket: Push Release Image (x86-rocm643-gfx90a)" + key: "push-release-gfx90a" + depends_on: "test-release-gfx90a" + command: | + docker push higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-rocm643-gfx90a + docker push higherordermethods/self:latest-x86-rocm643-gfx90a + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" + + # ---- NVIDIA V100 (sm70) Release ---- + - label: ":docker: Build Release Image (x86-cuda124-sm70)" + key: "docker-build-sm70" + command: | + SELFISH_IMAGE="higherordermethods/selfish:latest-x86-cuda124-sm70" + docker pull "$${SELFISH_IMAGE}" + SELFISH_SHA=$$(docker inspect --format='{{index .RepoDigests 0}}' "$${SELFISH_IMAGE}" | cut -d@ -f2) + docker build \ + --build-arg SELFISH_IMAGE="$${SELFISH_IMAGE}" \ + --build-arg SELFISH_SHA="$${SELFISH_SHA}" \ + -t higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-cuda124-sm70 \ + -t higherordermethods/self:latest-x86-cuda124-sm70 \ + -f docker/x86_sm70/Dockerfile . + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" + + - label: ":test_tube: Test Release Image (x86-cuda124-sm70)" + key: "test-release-sm70" + depends_on: "docker-build-sm70" + command: | + source /opt/spack-environment/activate.sh + ctest --test-dir /opt/self/build --output-on-failure + env: + slurm_partition: "main" + slurm_gres: "gpu:v100:2" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-cuda124-sm70" + agents: + queue: "galapagos" + + - label: ":rocket: Push Release Image (x86-cuda124-sm70)" + key: "push-release-sm70" + depends_on: "test-release-sm70" + command: | + docker push higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-cuda124-sm70 + docker push higherordermethods/self:latest-x86-cuda124-sm70 + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" + + # ---- x86 CPU-only Release ---- + - label: ":docker: Build Release Image (x86)" + key: "docker-build-x86" + command: | + SELFISH_IMAGE="higherordermethods/selfish:latest-x86" + docker pull "$${SELFISH_IMAGE}" + SELFISH_SHA=$$(docker inspect --format='{{index .RepoDigests 0}}' "$${SELFISH_IMAGE}" | cut -d@ -f2) + docker build \ + --build-arg SELFISH_IMAGE="$${SELFISH_IMAGE}" \ + --build-arg SELFISH_SHA="$${SELFISH_SHA}" \ + -t higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86 \ + -t higherordermethods/self:latest-x86 \ + -f docker/x86/Dockerfile . + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" + + - label: ":test_tube: Test Release Image (x86)" + key: "test-release-x86" + depends_on: "docker-build-x86" + command: | + source /opt/spack-environment/activate.sh + ctest --test-dir /opt/self/build --output-on-failure + env: + slurm_partition: "main" + slurm_time: "01:00:00" + slurm_nodes: 1 + slurm_ntasks: 2 + slurm_cpus_per_task: 8 + slurm_container_image: "docker://higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86" + agents: + queue: "galapagos" + + - label: ":rocket: Push Release Image (x86)" + key: "push-release-x86" + depends_on: "test-release-x86" + command: | + docker push higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86 + docker push higherordermethods/self:latest-x86 + env: + slurm_nodelist: "oram" + agents: + queue: "galapagos" diff --git a/.buildkite/release-builds.yml b/.buildkite/release-builds.yml index f864cd7b..7b6ef92b 100644 --- a/.buildkite/release-builds.yml +++ b/.buildkite/release-builds.yml @@ -12,7 +12,6 @@ steps: --build-arg SELFISH_SHA="$${SELFISH_SHA}" \ -t higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-rocm643-gfx90a \ -f docker/x86_gfx90a/Dockerfile . - docker push higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-rocm643-gfx90a env: slurm_nodelist: "oram" agents: @@ -35,18 +34,6 @@ steps: agents: queue: "galapagos" - - label: ":rocket: Push Release Image (x86-rocm643-gfx90a)" - key: "push-release-gfx90a" - depends_on: "test-release-gfx90a" - command: | - docker tag higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-rocm643-gfx90a \ - higherordermethods/self:latest-x86-rocm643-gfx90a - docker push higherordermethods/self:latest-x86-rocm643-gfx90a - env: - slurm_nodelist: "oram" - agents: - queue: "galapagos" - # ---- NVIDIA V100 (sm70) Release ---- - label: ":docker: Build Release Image (x86-cuda124-sm70)" key: "docker-build-sm70" @@ -60,7 +47,6 @@ steps: --build-arg SELFISH_SHA="$${SELFISH_SHA}" \ -t higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-cuda124-sm70 \ -f docker/x86_sm70/Dockerfile . - docker push higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-cuda124-sm70 env: slurm_nodelist: "oram" agents: @@ -83,18 +69,6 @@ steps: agents: queue: "galapagos" - - label: ":rocket: Push Release Image (x86-cuda124-sm70)" - key: "push-release-sm70" - depends_on: "test-release-sm70" - command: | - docker tag higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86-cuda124-sm70 \ - higherordermethods/self:latest-x86-cuda124-sm70 - docker push higherordermethods/self:latest-x86-cuda124-sm70 - env: - slurm_nodelist: "oram" - agents: - queue: "galapagos" - # ---- x86 CPU-only Release ---- - label: ":docker: Build Release Image (x86)" key: "docker-build-x86" @@ -108,7 +82,6 @@ steps: --build-arg SELFISH_SHA="$${SELFISH_SHA}" \ -t higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86 \ -f docker/x86/Dockerfile . - docker push higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86 env: slurm_nodelist: "oram" agents: @@ -129,15 +102,3 @@ steps: slurm_container_image: "docker://higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86" agents: queue: "galapagos" - - - label: ":rocket: Push Release Image (x86)" - key: "push-release-x86" - depends_on: "test-release-x86" - command: | - docker tag higherordermethods/self:$${BUILDKITE_BUILD_NUMBER}-x86 \ - higherordermethods/self:latest-x86 - docker push higherordermethods/self:latest-x86 - env: - slurm_nodelist: "oram" - agents: - queue: "galapagos"