Adopt the new BranchStore API from liblloyal for high throughput multi-branch operations #28
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: GPU Tests (CUDA) | |
| on: | |
| pull_request: | |
| branches: [ main ] | |
| paths: | |
| - 'liblloyal' | |
| - 'llama.cpp' | |
| - 'lib/**' | |
| - 'src/**' | |
| - 'test/**' | |
| - 'CMakeLists.txt' | |
| workflow_dispatch: | |
| workflow_call: | |
| inputs: | |
| skip_build: | |
| description: 'Skip build step (packages already built by caller)' | |
| type: boolean | |
| default: true | |
| jobs: | |
| build-cuda-package: | |
| name: Build linux-x64-cuda | |
| if: ${{ github.repository == 'lloyal-ai/lloyal.node' && inputs.skip_build != true }} | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: 24 | |
| registry-url: 'https://registry.npmjs.org' | |
| - name: Validate llama.cpp version | |
| run: node scripts/sync-llama-cpp.js --check | |
| shell: bash | |
| # CUDA 12.2.2 required for Cloud Run L4 GPU (driver 535.x) | |
| # provision-cuda also installs build-essential + cmake | |
| - name: Provision CUDA toolkit | |
| uses: ./.github/actions/provision-cuda | |
| with: | |
| version: '12.2.2' | |
| arch: x64 | |
| - name: Setup ccache | |
| uses: hendrikmuhs/ccache-action@v1.2 | |
| with: | |
| key: cuda-build-${{ runner.os }} | |
| - name: Install npm dependencies | |
| run: npm ci --ignore-scripts | |
| - name: Build native module | |
| run: npm run build | |
| env: | |
| LLOYAL_GPU: cuda | |
| CMAKE_C_COMPILER_LAUNCHER: ccache | |
| CMAKE_CXX_COMPILER_LAUNCHER: ccache | |
| CMAKE_CUDA_COMPILER_LAUNCHER: ccache | |
| - name: Create platform package | |
| run: node scripts/create-platform-package.js linux-x64-cuda ubuntu-22.04 x64 | |
| - name: Upload platform package artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: package-linux-x64-cuda | |
| path: packages/linux-x64-cuda/ | |
| retention-days: 1 | |
| compression-level: 0 | |
| gpu-integration: | |
| name: GPU Tests (L4) | |
| needs: build-cuda-package | |
| runs-on: ubuntu-latest | |
| if: ${{ github.repository == 'lloyal-ai/lloyal.node' && !cancelled() && (needs.build-cuda-package.result == 'success' || needs.build-cuda-package.result == 'skipped') }} | |
| permissions: | |
| contents: read | |
| id-token: write | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Checkout infrastructure scripts | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: lloyal-ai/lloyal-infra | |
| token: ${{ secrets.INFRA_REPO_PAT }} | |
| path: ci | |
| - name: Authenticate to GCP | |
| uses: google-github-actions/auth@v2 | |
| with: | |
| workload_identity_provider: ${{ secrets.GCP_WIF_PROVIDER }} | |
| service_account: ${{ secrets.GCP_SA_EMAIL }} | |
| - name: Set up Cloud SDK | |
| uses: google-github-actions/setup-gcloud@v2 | |
| - name: Configure Docker for Artifact Registry | |
| run: gcloud auth configure-docker ${{ secrets.GCP_REGION }}-docker.pkg.dev --quiet | |
| - name: Download package artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: package-linux-x64-cuda | |
| path: packages/package-linux-x64-cuda | |
| - name: Build and push GPU test image | |
| run: | | |
| IMAGE="${{ secrets.GCP_REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/${{ secrets.GCP_AR_REPO }}/gpu-tests:${{ github.sha }}-cuda" | |
| docker build -f ci/Dockerfile.gpu-tests -t "$IMAGE" . | |
| docker push "$IMAGE" | |
| echo "IMAGE=$IMAGE" >> $GITHUB_ENV | |
| - name: Deploy and run GPU tests | |
| run: bash ci/deploy-gpu-tests.sh | |
| env: | |
| GCP_REGION: ${{ secrets.GCP_REGION }} | |
| GCP_SA_EMAIL: ${{ secrets.GCP_SA_EMAIL }} |