Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions .github/workflows/build-nvidia.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
name: Build NVIDIA Docker images

on:
push:
branches:
- main
paths:
- envs/x86/sm70/**
- envs/x86/sm100/**
- .github/workflows/build-nvidia.yml
pull_request:
paths:
- envs/x86/sm70/**
- envs/x86/sm100/**
- .github/workflows/build-nvidia.yml
workflow_dispatch:

env:
REGISTRY: docker.io
IMAGE_NAME: higherordermethods/selfish

jobs:
build:
name: Build ${{ matrix.gpu_arch }} image
runs-on: ubuntu-latest
permissions:
contents: read
strategy:
fail-fast: false
matrix:
include:
- gpu_arch: sm70
cuda_version: "12.4"
- gpu_arch: sm100
cuda_version: "13.0"
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Log in to Docker Hub
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Set up Buildx
uses: docker/setup-buildx-action@v3

- name: Generate image metadata
id: meta
run: |
# Convert CUDA version 12.4 -> cuda124
VERSION_NO_DOTS=$(echo "${{ matrix.cuda_version }}" | tr -d '.')
GPU_BACKEND="cuda${VERSION_NO_DOTS}"

CPU_PLATFORM="x86"
GPU_ARCH="${{ matrix.gpu_arch }}"

# Tags: <version>-<cpu_platform>-<gpu_backend>-<gpu_arch>
echo "tags<<EOF" >> $GITHUB_OUTPUT
echo "${{ env.IMAGE_NAME }}:latest-${CPU_PLATFORM}-${GPU_BACKEND}-${GPU_ARCH}" >> $GITHUB_OUTPUT
echo "${{ env.IMAGE_NAME }}:${{ github.sha }}-${CPU_PLATFORM}-${GPU_BACKEND}-${GPU_ARCH}" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT

echo "gpu_backend=${GPU_BACKEND}" >> $GITHUB_OUTPUT

- name: Cache Docker layers
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ matrix.gpu_arch }}-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-${{ matrix.gpu_arch }}-

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
file: envs/x86/${{ matrix.gpu_arch }}/Dockerfile
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
build-args: |
CUDA_VERSION=${{ matrix.cuda_version }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max
labels: |
com.fluidnumerics.cuda.target=${{ matrix.gpu_arch }}
com.fluidnumerics.cuda.version=${{ matrix.cuda_version }}
org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}

- name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
182 changes: 182 additions & 0 deletions envs/x86/sm100/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
FROM docker.io/rockylinux:9 AS bootstrap

ARG CUDA_VERSION=13.0

ENV SPACK_ROOT=/opt/spack \
CURRENTLY_BUILDING_DOCKER_IMAGE=1 \
container=docker

RUN dnf update -y \
&& dnf install -y epel-release \
&& dnf update -y \
&& dnf --enablerepo epel install -y \
bzip2 \
cmake \
curl-minimal \
file \
findutils \
gcc-c++ \
gcc \
gcc-gfortran \
git \
gnupg2 \
hg \
hostname \
iproute \
make \
patch \
python3 \
python3-pip \
python3-setuptools \
svn \
unzip \
xz \
zstd \
&& pip3 install boto3 \
&& rm -rf /var/cache/dnf \
&& dnf clean all

# Install CUDA toolkit from NVIDIA repo
RUN dnf config-manager \
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
&& dnf clean all \
&& dnf update -y \
&& dnf install -y cuda-toolkit-$(echo ${CUDA_VERSION} | tr '.' '-')

RUN ls -l /usr/local/cuda-${CUDA_VERSION}/include/cuda.h

RUN mkdir $SPACK_ROOT && cd $SPACK_ROOT && \
git init --quiet && git remote add origin https://github.com/spack/spack.git && git fetch --depth=1 origin develop && git checkout --detach FETCH_HEAD && \
mkdir -p $SPACK_ROOT/opt/spack

RUN ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \
/usr/local/bin/docker-shell \
&& ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \
/usr/local/bin/interactive-shell \
&& ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \
/usr/local/bin/spack-env

RUN mkdir -p /root/.spack \
&& cp $SPACK_ROOT/share/spack/docker/modules.yaml \
/root/.spack/modules.yaml \
&& rm -rf /root/*.* /run/nologin

# [WORKAROUND]
# https://superuser.com/questions/1241548/
# xubuntu-16-04-ttyname-failed-inappropriate-ioctl-for-device#1253889
RUN [ -f ~/.profile ] \
&& sed -i 's/mesg n/( tty -s \\&\\& mesg n || true )/g' ~/.profile \
|| true


WORKDIR /root
SHELL ["docker-shell"]

# Creates the package cache
RUN spack bootstrap now \
&& spack bootstrap status --optional \
&& spack spec hdf5+mpi

ENTRYPOINT ["/bin/bash", "/opt/spack/share/spack/docker/entrypoint.bash"]
CMD ["interactive-shell"]

# Build stage with Spack pre-installed and ready to be used
FROM bootstrap AS builder


# What we want to install and how we want to install it
# is specified in a manifest file (spack.yaml)
RUN mkdir -p /opt/spack-environment && \
set -o noclobber \
&& (echo spack: \
&& echo ' specs:' \
&& echo ' - feq-parse@2.2.2' \
&& echo ' - openmpi@5.0.8 +cuda cuda_arch=100' \
&& echo ' - hdf5@1.14.5 +fortran +mpi' \
&& echo ' - cmake@3.31.11'\
&& echo ' packages:' \
&& echo ' all:' \
&& echo ' require:' \
&& echo ' - target=x86_64_v3' \
&& echo ' prefer:' \
&& echo ' - cuda_arch=100' \
&& echo ' cuda:' \
&& echo ' buildable: false' \
&& echo ' externals:' \
&& echo " - spec: \"cuda@${CUDA_VERSION}\"" \
&& echo " prefix: \"/usr/local/cuda-${CUDA_VERSION}\"" \
&& echo '' \
&& echo ' concretizer:' \
&& echo ' unify: true' \
&& echo ' config:' \
&& echo ' install_tree:' \
&& echo ' root: /opt/software' \
&& echo ' view: /opt/views/view') > /opt/spack-environment/spack.yaml

# Apply feq-parse patch to add "c" build dependency
COPY ./envs/x86/sm100/feq-parse.patch /tmp/feq-parse.patch
#
RUN SPACK_PKGS_ROOT=$(spack repo list | awk '{print $NF}') &&\
SPACK_BUILTIN_PKGS_ROOT=${SPACK_PKGS_ROOT/repos\/spack_repo\/builtin} &&\
patch -p1 -d $SPACK_BUILTIN_PKGS_ROOT < /tmp/feq-parse.patch

# Install the software, remove unnecessary deps
RUN cd /opt/spack-environment && spack env activate . && spack repo list && spack install --fail-fast && spack gc -y

# Strip all the binaries
RUN find -L /opt/views/view/* -type f -exec readlink -f '{}' \; | \
xargs file -i | \
grep 'charset=binary' | \
grep 'x-executable\|x-archive\|x-sharedlib' | \
awk -F: '{print $1}' | xargs strip

# Modifications to the environment that are necessary to run
RUN cd /opt/spack-environment && \
spack env activate --sh -d . > activate.sh


# Bare OS image to run the installed executables
FROM docker.io/rockylinux:9

COPY --from=builder /opt/spack-environment /opt/spack-environment
COPY --from=builder /opt/software /opt/software

RUN dnf update -y \
&& dnf install -y epel-release \
&& dnf update -y \
&& dnf --enablerepo epel install -y \
bzip2 \
cmake \
curl-minimal \
file \
findutils \
gcc-c++ \
gcc \
gcc-gfortran \
lcov

# Install CUDA runtime libraries
ARG CUDA_VERSION=13.0
RUN dnf config-manager \
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
&& dnf clean all \
&& dnf update -y \
&& dnf install -y \
cuda-libraries-$(echo ${CUDA_VERSION} | tr '.' '-') \
cuda-nvtx-$(echo ${CUDA_VERSION} | tr '.' '-')

# paths.view is a symlink, so copy the parent to avoid dereferencing and duplicating it
COPY --from=builder /opt/views /opt/views

RUN { \
echo '#!/bin/sh' \
&& echo '.' /opt/spack-environment/activate.sh \
&& echo 'exec "$@"'; \
} > /entrypoint.sh \
&& chmod a+x /entrypoint.sh \
&& ln -s /opt/views/view /opt/view


LABEL "mpi"="openmpi"
ENTRYPOINT [ "/entrypoint.sh" ]
CMD [ "/bin/bash" ]
12 changes: 12 additions & 0 deletions envs/x86/sm100/feq-parse.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
diff --git a/repos/spack_repo/builtin/packages/feq_parse/package.py b/repos/spack_repo/builtin/packages/feq_parse/package.py
index e4b960b7..bc0916b9 100644
--- a/repos/spack_repo/builtin/packages/feq_parse/package.py
+++ b/repos/spack_repo/builtin/packages/feq_parse/package.py
@@ -29,6 +29,7 @@ class FeqParse(CMakePackage):
version("1.0.2", sha256="1cd1db7562908ea16fc65dc5268b654405d0b3d9dcfe11f409949c431b48a3e8")

depends_on("fortran", type="build") # generated
+ depends_on("c", type="build") # generated

depends_on("cmake@3.0.2:", type="build")

24 changes: 24 additions & 0 deletions envs/x86/sm100/spack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
spack:
specs:
- feq-parse@2.2.2
- openmpi@5.0.8 +cuda cuda_arch=100
- hdf5@1.14.5 +fortran +mpi

packages:
all:
require:
- "target=x86_64_v3"
prefer:
- "cuda_arch=100"

container:
format: docker
images:
os: rockylinux:9
spack:
ref: v1.0.2

strip: true

labels:
mpi: openmpi
Loading