diff --git a/.github/workflows/build-nvidia.yml b/.github/workflows/build-nvidia.yml new file mode 100644 index 0000000..d000527 --- /dev/null +++ b/.github/workflows/build-nvidia.yml @@ -0,0 +1,96 @@ +name: Build NVIDIA Docker images + +on: + push: + branches: + - main + paths: + - envs/x86/sm70/** + - envs/x86/sm100/** + - .github/workflows/build-nvidia.yml + pull_request: + paths: + - envs/x86/sm70/** + - envs/x86/sm100/** + - .github/workflows/build-nvidia.yml + workflow_dispatch: + +env: + REGISTRY: docker.io + IMAGE_NAME: higherordermethods/selfish + +jobs: + build: + name: Build ${{ matrix.gpu_arch }} image + runs-on: ubuntu-latest + permissions: + contents: read + strategy: + fail-fast: false + matrix: + include: + - gpu_arch: sm70 + cuda_version: "12.4" + - gpu_arch: sm100 + cuda_version: "13.0" + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Log in to Docker Hub + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up Buildx + uses: docker/setup-buildx-action@v3 + + - name: Generate image metadata + id: meta + run: | + # Convert CUDA version 12.4 -> cuda124 + VERSION_NO_DOTS=$(echo "${{ matrix.cuda_version }}" | tr -d '.') + GPU_BACKEND="cuda${VERSION_NO_DOTS}" + + CPU_PLATFORM="x86" + GPU_ARCH="${{ matrix.gpu_arch }}" + + # Tags: --- + echo "tags<> $GITHUB_OUTPUT + echo "${{ env.IMAGE_NAME }}:latest-${CPU_PLATFORM}-${GPU_BACKEND}-${GPU_ARCH}" >> $GITHUB_OUTPUT + echo "${{ env.IMAGE_NAME }}:${{ github.sha }}-${CPU_PLATFORM}-${GPU_BACKEND}-${GPU_ARCH}" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + echo "gpu_backend=${GPU_BACKEND}" >> $GITHUB_OUTPUT + + - name: Cache Docker layers + uses: actions/cache@v4 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ matrix.gpu_arch }}-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx-${{ matrix.gpu_arch }}- + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: envs/x86/${{ matrix.gpu_arch }}/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + build-args: | + CUDA_VERSION=${{ matrix.cuda_version }} + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max + labels: | + com.fluidnumerics.cuda.target=${{ matrix.gpu_arch }} + com.fluidnumerics.cuda.version=${{ matrix.cuda_version }} + org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }} + org.opencontainers.image.revision=${{ github.sha }} + + - name: Move cache + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-new /tmp/.buildx-cache diff --git a/envs/x86/sm100/Dockerfile b/envs/x86/sm100/Dockerfile new file mode 100644 index 0000000..f3be8ee --- /dev/null +++ b/envs/x86/sm100/Dockerfile @@ -0,0 +1,182 @@ +FROM docker.io/rockylinux:9 AS bootstrap + +ARG CUDA_VERSION=13.0 + +ENV SPACK_ROOT=/opt/spack \ + CURRENTLY_BUILDING_DOCKER_IMAGE=1 \ + container=docker + +RUN dnf update -y \ + && dnf install -y epel-release \ + && dnf update -y \ + && dnf --enablerepo epel install -y \ + bzip2 \ + cmake \ + curl-minimal \ + file \ + findutils \ + gcc-c++ \ + gcc \ + gcc-gfortran \ + git \ + gnupg2 \ + hg \ + hostname \ + iproute \ + make \ + patch \ + python3 \ + python3-pip \ + python3-setuptools \ + svn \ + unzip \ + xz \ + zstd \ + && pip3 install boto3 \ + && rm -rf /var/cache/dnf \ + && dnf clean all + +# Install CUDA toolkit from NVIDIA repo +RUN dnf config-manager \ + --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \ + && dnf clean all \ + && dnf update -y \ + && dnf install -y cuda-toolkit-$(echo ${CUDA_VERSION} | tr '.' '-') + +RUN ls -l /usr/local/cuda-${CUDA_VERSION}/include/cuda.h + +RUN mkdir $SPACK_ROOT && cd $SPACK_ROOT && \ + git init --quiet && git remote add origin https://github.com/spack/spack.git && git fetch --depth=1 origin develop && git checkout --detach FETCH_HEAD && \ + mkdir -p $SPACK_ROOT/opt/spack + +RUN ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \ + /usr/local/bin/docker-shell \ + && ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \ + /usr/local/bin/interactive-shell \ + && ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \ + /usr/local/bin/spack-env + +RUN mkdir -p /root/.spack \ + && cp $SPACK_ROOT/share/spack/docker/modules.yaml \ + /root/.spack/modules.yaml \ + && rm -rf /root/*.* /run/nologin + +# [WORKAROUND] +# https://superuser.com/questions/1241548/ +# xubuntu-16-04-ttyname-failed-inappropriate-ioctl-for-device#1253889 +RUN [ -f ~/.profile ] \ + && sed -i 's/mesg n/( tty -s \\&\\& mesg n || true )/g' ~/.profile \ + || true + + +WORKDIR /root +SHELL ["docker-shell"] + +# Creates the package cache +RUN spack bootstrap now \ + && spack bootstrap status --optional \ + && spack spec hdf5+mpi + +ENTRYPOINT ["/bin/bash", "/opt/spack/share/spack/docker/entrypoint.bash"] +CMD ["interactive-shell"] + +# Build stage with Spack pre-installed and ready to be used +FROM bootstrap AS builder + + +# What we want to install and how we want to install it +# is specified in a manifest file (spack.yaml) +RUN mkdir -p /opt/spack-environment && \ +set -o noclobber \ +&& (echo spack: \ +&& echo ' specs:' \ +&& echo ' - feq-parse@2.2.2' \ +&& echo ' - openmpi@5.0.8 +cuda cuda_arch=100' \ +&& echo ' - hdf5@1.14.5 +fortran +mpi' \ +&& echo ' - cmake@3.31.11'\ +&& echo ' packages:' \ +&& echo ' all:' \ +&& echo ' require:' \ +&& echo ' - target=x86_64_v3' \ +&& echo ' prefer:' \ +&& echo ' - cuda_arch=100' \ +&& echo ' cuda:' \ +&& echo ' buildable: false' \ +&& echo ' externals:' \ +&& echo " - spec: \"cuda@${CUDA_VERSION}\"" \ +&& echo " prefix: \"/usr/local/cuda-${CUDA_VERSION}\"" \ +&& echo '' \ +&& echo ' concretizer:' \ +&& echo ' unify: true' \ +&& echo ' config:' \ +&& echo ' install_tree:' \ +&& echo ' root: /opt/software' \ +&& echo ' view: /opt/views/view') > /opt/spack-environment/spack.yaml + +# Apply feq-parse patch to add "c" build dependency +COPY ./envs/x86/sm100/feq-parse.patch /tmp/feq-parse.patch +# +RUN SPACK_PKGS_ROOT=$(spack repo list | awk '{print $NF}') &&\ + SPACK_BUILTIN_PKGS_ROOT=${SPACK_PKGS_ROOT/repos\/spack_repo\/builtin} &&\ + patch -p1 -d $SPACK_BUILTIN_PKGS_ROOT < /tmp/feq-parse.patch + +# Install the software, remove unnecessary deps +RUN cd /opt/spack-environment && spack env activate . && spack repo list && spack install --fail-fast && spack gc -y + +# Strip all the binaries +RUN find -L /opt/views/view/* -type f -exec readlink -f '{}' \; | \ + xargs file -i | \ + grep 'charset=binary' | \ + grep 'x-executable\|x-archive\|x-sharedlib' | \ + awk -F: '{print $1}' | xargs strip + +# Modifications to the environment that are necessary to run +RUN cd /opt/spack-environment && \ + spack env activate --sh -d . > activate.sh + + +# Bare OS image to run the installed executables +FROM docker.io/rockylinux:9 + +COPY --from=builder /opt/spack-environment /opt/spack-environment +COPY --from=builder /opt/software /opt/software + +RUN dnf update -y \ + && dnf install -y epel-release \ + && dnf update -y \ + && dnf --enablerepo epel install -y \ + bzip2 \ + cmake \ + curl-minimal \ + file \ + findutils \ + gcc-c++ \ + gcc \ + gcc-gfortran \ + lcov + +# Install CUDA runtime libraries +ARG CUDA_VERSION=13.0 +RUN dnf config-manager \ + --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \ + && dnf clean all \ + && dnf update -y \ + && dnf install -y \ + cuda-libraries-$(echo ${CUDA_VERSION} | tr '.' '-') \ + cuda-nvtx-$(echo ${CUDA_VERSION} | tr '.' '-') + +# paths.view is a symlink, so copy the parent to avoid dereferencing and duplicating it +COPY --from=builder /opt/views /opt/views + +RUN { \ + echo '#!/bin/sh' \ + && echo '.' /opt/spack-environment/activate.sh \ + && echo 'exec "$@"'; \ + } > /entrypoint.sh \ +&& chmod a+x /entrypoint.sh \ +&& ln -s /opt/views/view /opt/view + + +LABEL "mpi"="openmpi" +ENTRYPOINT [ "/entrypoint.sh" ] +CMD [ "/bin/bash" ] diff --git a/envs/x86/sm100/feq-parse.patch b/envs/x86/sm100/feq-parse.patch new file mode 100644 index 0000000..399f387 --- /dev/null +++ b/envs/x86/sm100/feq-parse.patch @@ -0,0 +1,12 @@ +diff --git a/repos/spack_repo/builtin/packages/feq_parse/package.py b/repos/spack_repo/builtin/packages/feq_parse/package.py +index e4b960b7..bc0916b9 100644 +--- a/repos/spack_repo/builtin/packages/feq_parse/package.py ++++ b/repos/spack_repo/builtin/packages/feq_parse/package.py +@@ -29,6 +29,7 @@ class FeqParse(CMakePackage): + version("1.0.2", sha256="1cd1db7562908ea16fc65dc5268b654405d0b3d9dcfe11f409949c431b48a3e8") + + depends_on("fortran", type="build") # generated ++ depends_on("c", type="build") # generated + + depends_on("cmake@3.0.2:", type="build") + diff --git a/envs/x86/sm100/spack.yaml b/envs/x86/sm100/spack.yaml new file mode 100644 index 0000000..01f0c8b --- /dev/null +++ b/envs/x86/sm100/spack.yaml @@ -0,0 +1,24 @@ +spack: + specs: + - feq-parse@2.2.2 + - openmpi@5.0.8 +cuda cuda_arch=100 + - hdf5@1.14.5 +fortran +mpi + + packages: + all: + require: + - "target=x86_64_v3" + prefer: + - "cuda_arch=100" + + container: + format: docker + images: + os: rockylinux:9 + spack: + ref: v1.0.2 + + strip: true + + labels: + mpi: openmpi diff --git a/envs/x86/sm70/Dockerfile b/envs/x86/sm70/Dockerfile new file mode 100644 index 0000000..5ae2dab --- /dev/null +++ b/envs/x86/sm70/Dockerfile @@ -0,0 +1,182 @@ +FROM docker.io/rockylinux:9 AS bootstrap + +ARG CUDA_VERSION=12.4 + +ENV SPACK_ROOT=/opt/spack \ + CURRENTLY_BUILDING_DOCKER_IMAGE=1 \ + container=docker + +RUN dnf update -y \ + && dnf install -y epel-release \ + && dnf update -y \ + && dnf --enablerepo epel install -y \ + bzip2 \ + cmake \ + curl-minimal \ + file \ + findutils \ + gcc-c++ \ + gcc \ + gcc-gfortran \ + git \ + gnupg2 \ + hg \ + hostname \ + iproute \ + make \ + patch \ + python3 \ + python3-pip \ + python3-setuptools \ + svn \ + unzip \ + xz \ + zstd \ + && pip3 install boto3 \ + && rm -rf /var/cache/dnf \ + && dnf clean all + +# Install CUDA toolkit from NVIDIA repo +RUN dnf config-manager \ + --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \ + && dnf clean all \ + && dnf update -y \ + && dnf install -y cuda-toolkit-$(echo ${CUDA_VERSION} | tr '.' '-') + +RUN ls -l /usr/local/cuda-${CUDA_VERSION}/include/cuda.h + +RUN mkdir $SPACK_ROOT && cd $SPACK_ROOT && \ + git init --quiet && git remote add origin https://github.com/spack/spack.git && git fetch --depth=1 origin develop && git checkout --detach FETCH_HEAD && \ + mkdir -p $SPACK_ROOT/opt/spack + +RUN ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \ + /usr/local/bin/docker-shell \ + && ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \ + /usr/local/bin/interactive-shell \ + && ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \ + /usr/local/bin/spack-env + +RUN mkdir -p /root/.spack \ + && cp $SPACK_ROOT/share/spack/docker/modules.yaml \ + /root/.spack/modules.yaml \ + && rm -rf /root/*.* /run/nologin + +# [WORKAROUND] +# https://superuser.com/questions/1241548/ +# xubuntu-16-04-ttyname-failed-inappropriate-ioctl-for-device#1253889 +RUN [ -f ~/.profile ] \ + && sed -i 's/mesg n/( tty -s \\&\\& mesg n || true )/g' ~/.profile \ + || true + + +WORKDIR /root +SHELL ["docker-shell"] + +# Creates the package cache +RUN spack bootstrap now \ + && spack bootstrap status --optional \ + && spack spec hdf5+mpi + +ENTRYPOINT ["/bin/bash", "/opt/spack/share/spack/docker/entrypoint.bash"] +CMD ["interactive-shell"] + +# Build stage with Spack pre-installed and ready to be used +FROM bootstrap AS builder + + +# What we want to install and how we want to install it +# is specified in a manifest file (spack.yaml) +RUN mkdir -p /opt/spack-environment && \ +set -o noclobber \ +&& (echo spack: \ +&& echo ' specs:' \ +&& echo ' - feq-parse@2.2.2' \ +&& echo ' - openmpi@5.0.8 +cuda cuda_arch=70' \ +&& echo ' - hdf5@1.14.5 +fortran +mpi' \ +&& echo ' - cmake@3.31.11'\ +&& echo ' packages:' \ +&& echo ' all:' \ +&& echo ' require:' \ +&& echo ' - target=x86_64_v3' \ +&& echo ' prefer:' \ +&& echo ' - cuda_arch=70' \ +&& echo ' cuda:' \ +&& echo ' buildable: false' \ +&& echo ' externals:' \ +&& echo " - spec: \"cuda@${CUDA_VERSION}\"" \ +&& echo " prefix: \"/usr/local/cuda-${CUDA_VERSION}\"" \ +&& echo '' \ +&& echo ' concretizer:' \ +&& echo ' unify: true' \ +&& echo ' config:' \ +&& echo ' install_tree:' \ +&& echo ' root: /opt/software' \ +&& echo ' view: /opt/views/view') > /opt/spack-environment/spack.yaml + +# Apply feq-parse patch to add "c" build dependency +COPY ./envs/x86/sm70/feq-parse.patch /tmp/feq-parse.patch +# +RUN SPACK_PKGS_ROOT=$(spack repo list | awk '{print $NF}') &&\ + SPACK_BUILTIN_PKGS_ROOT=${SPACK_PKGS_ROOT/repos\/spack_repo\/builtin} &&\ + patch -p1 -d $SPACK_BUILTIN_PKGS_ROOT < /tmp/feq-parse.patch + +# Install the software, remove unnecessary deps +RUN cd /opt/spack-environment && spack env activate . && spack repo list && spack install --fail-fast && spack gc -y + +# Strip all the binaries +RUN find -L /opt/views/view/* -type f -exec readlink -f '{}' \; | \ + xargs file -i | \ + grep 'charset=binary' | \ + grep 'x-executable\|x-archive\|x-sharedlib' | \ + awk -F: '{print $1}' | xargs strip + +# Modifications to the environment that are necessary to run +RUN cd /opt/spack-environment && \ + spack env activate --sh -d . > activate.sh + + +# Bare OS image to run the installed executables +FROM docker.io/rockylinux:9 + +COPY --from=builder /opt/spack-environment /opt/spack-environment +COPY --from=builder /opt/software /opt/software + +RUN dnf update -y \ + && dnf install -y epel-release \ + && dnf update -y \ + && dnf --enablerepo epel install -y \ + bzip2 \ + cmake \ + curl-minimal \ + file \ + findutils \ + gcc-c++ \ + gcc \ + gcc-gfortran \ + lcov + +# Install CUDA runtime libraries +ARG CUDA_VERSION=12.4 +RUN dnf config-manager \ + --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \ + && dnf clean all \ + && dnf update -y \ + && dnf install -y \ + cuda-libraries-$(echo ${CUDA_VERSION} | tr '.' '-') \ + cuda-nvtx-$(echo ${CUDA_VERSION} | tr '.' '-') + +# paths.view is a symlink, so copy the parent to avoid dereferencing and duplicating it +COPY --from=builder /opt/views /opt/views + +RUN { \ + echo '#!/bin/sh' \ + && echo '.' /opt/spack-environment/activate.sh \ + && echo 'exec "$@"'; \ + } > /entrypoint.sh \ +&& chmod a+x /entrypoint.sh \ +&& ln -s /opt/views/view /opt/view + + +LABEL "mpi"="openmpi" +ENTRYPOINT [ "/entrypoint.sh" ] +CMD [ "/bin/bash" ] diff --git a/envs/x86/sm70/feq-parse.patch b/envs/x86/sm70/feq-parse.patch new file mode 100644 index 0000000..399f387 --- /dev/null +++ b/envs/x86/sm70/feq-parse.patch @@ -0,0 +1,12 @@ +diff --git a/repos/spack_repo/builtin/packages/feq_parse/package.py b/repos/spack_repo/builtin/packages/feq_parse/package.py +index e4b960b7..bc0916b9 100644 +--- a/repos/spack_repo/builtin/packages/feq_parse/package.py ++++ b/repos/spack_repo/builtin/packages/feq_parse/package.py +@@ -29,6 +29,7 @@ class FeqParse(CMakePackage): + version("1.0.2", sha256="1cd1db7562908ea16fc65dc5268b654405d0b3d9dcfe11f409949c431b48a3e8") + + depends_on("fortran", type="build") # generated ++ depends_on("c", type="build") # generated + + depends_on("cmake@3.0.2:", type="build") + diff --git a/envs/x86/sm70/spack.yaml b/envs/x86/sm70/spack.yaml new file mode 100644 index 0000000..90f6c16 --- /dev/null +++ b/envs/x86/sm70/spack.yaml @@ -0,0 +1,24 @@ +spack: + specs: + - feq-parse@2.2.2 + - openmpi@5.0.8 +cuda cuda_arch=70 + - hdf5@1.14.5 +fortran +mpi + + packages: + all: + require: + - "target=x86_64_v3" + prefer: + - "cuda_arch=70" + + container: + format: docker + images: + os: rockylinux:9 + spack: + ref: v1.0.2 + + strip: true + + labels: + mpi: openmpi