Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 42 additions & 17 deletions .github/actions/provision-cuda/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,36 @@ runs:
method: 'network'
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "visual_studio_integration"]'

# Linux x64: Install from NVIDIA repos
- name: Install CUDA (Linux x64)
if: runner.os == 'Linux' && inputs.arch == 'x64'
# Linux: Compute version info for cache key
- name: Compute CUDA version info
id: cuda-info
if: runner.os == 'Linux'
shell: bash
env:
VERSION: ${{ inputs.version }}
run: |
echo "major-minor=$(echo $VERSION | cut -d. -f1,2)" >> $GITHUB_OUTPUT

# Linux: Cache CUDA toolkit directory (~2-3 GB, saves 3-5 min install)
- name: Cache CUDA toolkit
if: runner.os == 'Linux'
id: cuda-cache
uses: actions/cache@v4
with:
path: /usr/local/cuda-${{ steps.cuda-info.outputs.major-minor }}
key: cuda-toolkit-${{ inputs.version }}-${{ inputs.arch }}

# Linux: Install build dependencies (always needed, fast from apt cache)
- name: Install build tools
if: runner.os == 'Linux'
shell: bash
run: |
sudo apt-get update -qq
sudo apt-get install -y -qq build-essential cmake

# Linux x64: Install CUDA toolkit (cache miss only)
- name: Install CUDA toolkit (Linux x64)
if: runner.os == 'Linux' && inputs.arch == 'x64' && steps.cuda-cache.outputs.cache-hit != 'true'
shell: bash
env:
VERSION: ${{ inputs.version }}
Expand All @@ -42,18 +69,11 @@ runs:
wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update -qq
sudo apt-get install -y -qq cuda-toolkit-${version_slug} build-essential cmake
sudo apt-get install -y -qq cuda-toolkit-${version_slug}

cuda_path="/usr/local/cuda-${version_major_minor}"
echo "CUDA_PATH=${cuda_path}" >> $GITHUB_ENV
echo "${cuda_path}/bin" >> $GITHUB_PATH
echo "LD_LIBRARY_PATH=${cuda_path}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV

echo "CUDA installed at: ${cuda_path}"

# Linux ARM64: Install from NVIDIA repos
- name: Install CUDA (Linux ARM64)
if: runner.os == 'Linux' && inputs.arch == 'arm64'
# Linux ARM64: Install CUDA toolkit (cache miss only)
- name: Install CUDA toolkit (Linux ARM64)
if: runner.os == 'Linux' && inputs.arch == 'arm64' && steps.cuda-cache.outputs.cache-hit != 'true'
shell: bash
env:
VERSION: ${{ inputs.version }}
Expand All @@ -65,14 +85,19 @@ runs:
wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update -qq
sudo apt-get install -y -qq cuda-toolkit-${version_slug} build-essential cmake
sudo apt-get install -y -qq cuda-toolkit-${version_slug}

cuda_path="/usr/local/cuda-${version_major_minor}"
# Linux: Set CUDA environment variables (always - cached or fresh install)
- name: Set CUDA environment
if: runner.os == 'Linux'
shell: bash
run: |
cuda_path="/usr/local/cuda-${{ steps.cuda-info.outputs.major-minor }}"
echo "CUDA_PATH=${cuda_path}" >> $GITHUB_ENV
echo "${cuda_path}/bin" >> $GITHUB_PATH
echo "LD_LIBRARY_PATH=${cuda_path}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV

echo "CUDA installed at: ${cuda_path}"
echo "CUDA ready at: ${cuda_path}"

# Set output
- name: Set CUDA path output
Expand Down
113 changes: 91 additions & 22 deletions .github/workflows/gpu-test.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
name: GPU Tests (CUDA)

on:
pull_request:
branches: [ main ]
paths:
- 'liblloyal'
- 'llama.cpp'
- 'lib/**'
- 'src/**'
- 'test/**'
- 'ci/**'
- 'CMakeLists.txt'
workflow_dispatch:
inputs:
skip_build:
Expand Down Expand Up @@ -38,25 +48,29 @@ jobs:
run: node scripts/sync-llama-cpp.js --check
shell: bash

- name: Install build tools
run: |
sudo apt-get update
sudo apt-get install -y build-essential cmake

# CUDA 12.2.2 required for Cloud Run L4 GPU (driver 535.x)
# provision-cuda also installs build-essential + cmake
- name: Provision CUDA toolkit
uses: ./.github/actions/provision-cuda
with:
version: '12.2.2'
arch: x64

- name: Setup ccache
uses: hendrikmuhs/ccache-action@v1.2
with:
key: cuda-build-${{ runner.os }}

- name: Install npm dependencies
run: npm install --ignore-scripts
run: npm ci --ignore-scripts

- name: Build native module
run: npm run build
env:
LLOYAL_GPU: cuda
CMAKE_C_COMPILER_LAUNCHER: ccache
CMAKE_CXX_COMPILER_LAUNCHER: ccache
CMAKE_CUDA_COMPILER_LAUNCHER: ccache

- name: Create platform package
run: node scripts/create-platform-package.js linux-x64-cuda ubuntu-22.04 x64
Expand All @@ -67,6 +81,7 @@ jobs:
name: package-linux-x64-cuda
path: packages/linux-x64-cuda/
retention-days: 1
compression-level: 0

# GPU Integration Tests via Cloud Run
# Runs real GPU tests on NVIDIA L4
Expand Down Expand Up @@ -129,15 +144,15 @@ jobs:
--image="${IMAGE}" \
--service-account="${{ secrets.GCP_SA_EMAIL }}" \
--set-env-vars=LLOYAL_GPU=cuda,LLOYAL_NO_FALLBACK=1 \
--task-timeout=10m \
--task-timeout=20m \
--no-gpu-zonal-redundancy
else
gcloud run jobs create $JOB_NAME \
--region=us-east4 \
--image="${IMAGE}" \
--service-account="${{ secrets.GCP_SA_EMAIL }}" \
--set-env-vars=LLOYAL_GPU=cuda,LLOYAL_NO_FALLBACK=1 \
--task-timeout=10m \
--task-timeout=20m \
--gpu=1 \
--gpu-type=nvidia-l4 \
--memory=16Gi \
Expand All @@ -149,20 +164,74 @@ jobs:
- name: Run GPU tests
run: |
JOB_NAME="lloyal-gpu-test-cuda"
REGION="us-east4"

# Execute job
EXECUTION=$(gcloud run jobs execute $JOB_NAME \
--region=us-east4 \
--wait \
# Launch job asynchronously so we can stream logs
EXEC=$(gcloud run jobs execute $JOB_NAME \
--region=$REGION \
--async \
--format='value(metadata.name)')

echo "Execution: $EXECUTION"

# Wait for logs to flush to Cloud Logging
sleep 5

# Get logs
gcloud logging read \
"resource.type=\"cloud_run_job\" AND resource.labels.job_name=\"$JOB_NAME\" AND resource.labels.location=\"us-east4\"" \
--limit=200 \
--format='value(textPayload)'
echo "Execution: $EXEC"
echo "Streaming logs (container startup may take ~30s)..."
echo ""

# Filter for this specific execution's logs
LOG_FILTER="resource.type=\"cloud_run_job\" AND resource.labels.job_name=\"$JOB_NAME\" AND labels.\"run.googleapis.com/execution_name\"=\"$EXEC\""

# Poll loop: stream new log lines + check for completion
SEEN=0
while true; do
# Check if execution has completed
COMPLETION=$(gcloud run jobs executions describe "$EXEC" \
--region="$REGION" \
--format='value(status.completionTime)' 2>/dev/null || true)

# Fetch all logs for this execution in chronological order
LOGS=$(gcloud logging read "$LOG_FILTER" \
--limit=10000 \
--order=asc \
--format='value(textPayload)' 2>/dev/null || true)

# Print only lines we haven't seen yet
if [ -n "$LOGS" ]; then
TOTAL=$(echo "$LOGS" | wc -l | tr -d ' ')
if [ "$TOTAL" -gt "$SEEN" ]; then
echo "$LOGS" | tail -n +$((SEEN + 1))
SEEN=$TOTAL
fi
fi

# If done, do one final fetch for stragglers then break
if [ -n "$COMPLETION" ]; then
sleep 5
LOGS=$(gcloud logging read "$LOG_FILTER" \
--limit=10000 \
--order=asc \
--format='value(textPayload)' 2>/dev/null || true)
if [ -n "$LOGS" ]; then
TOTAL=$(echo "$LOGS" | wc -l | tr -d ' ')
if [ "$TOTAL" -gt "$SEEN" ]; then
echo "$LOGS" | tail -n +$((SEEN + 1))
fi
fi
break
fi

sleep 10
done

# Determine pass/fail from execution status
SUCCEEDED=$(gcloud run jobs executions describe "$EXEC" \
--region="$REGION" \
--format=json 2>/dev/null | \
jq -r '.status.conditions[] | select(.type == "Completed") | .status')

if [ "$SUCCEEDED" = "True" ]; then
echo ""
echo "✅ GPU Tests Passed"
else
echo ""
echo "❌ GPU Tests Failed"
exit 1
fi
Loading
Loading