From f60c34c958d39e9d9ed7f28a19be3bdac80fca08 Mon Sep 17 00:00:00 2001 From: LLoyal Research Date: Thu, 12 Feb 2026 19:36:39 +1100 Subject: [PATCH 1/3] feat(ci): infra integration --- .github/workflows/gpu-test.yml | 162 ++------------------------------- .github/workflows/release.yml | 6 +- .gitignore | 5 +- ci/Dockerfile.gpu-tests | 53 ----------- ci/run-gpu-tests.sh | 157 -------------------------------- ci/setup-infra.sh | 111 ---------------------- 6 files changed, 14 insertions(+), 480 deletions(-) delete mode 100644 ci/Dockerfile.gpu-tests delete mode 100755 ci/run-gpu-tests.sh delete mode 100755 ci/setup-infra.sh diff --git a/.github/workflows/gpu-test.yml b/.github/workflows/gpu-test.yml index 2057193..e07dbd8 100644 --- a/.github/workflows/gpu-test.yml +++ b/.github/workflows/gpu-test.yml @@ -9,7 +9,6 @@ on: - 'lib/**' - 'src/**' - 'test/**' - - 'ci/**' - 'CMakeLists.txt' workflow_dispatch: inputs: @@ -17,19 +16,13 @@ on: description: 'Skip build step (use existing artifacts)' type: boolean default: false - workflow_call: - inputs: - skip_build: - description: 'Skip build step (packages already built by caller)' - type: boolean - default: true jobs: # Build CUDA package for testing # Skipped when called from release.yml (packages already built) build-cuda-package: name: Build linux-x64-cuda - if: ${{ inputs.skip_build != true }} + if: ${{ github.repository == 'lloyal-ai/lloyal.node' && inputs.skip_build != true }} runs-on: ubuntu-22.04 steps: @@ -83,155 +76,14 @@ jobs: retention-days: 1 compression-level: 0 - # GPU Integration Tests via Cloud Run - # Runs real GPU tests on NVIDIA L4 - # - # L4 GPU Requirements (as of 2024): - # - Driver: 535.216.03 (supports CUDA 12.2.2 max) - # - Minimum: 4 CPU, 16 GiB memory - # - Regions: us-central1, us-east4, europe-west1, europe-west4, asia-southeast1 - # - Quota: 3 L4 GPUs per region (default) + # GPU Integration Tests via Cloud Run (L4) + # Infrastructure details are in the private lloyal-infra repo gpu-integration: name: GPU Tests (L4) needs: build-cuda-package - runs-on: ubuntu-latest - # Run if build succeeded OR was skipped (packages from caller) - if: ${{ !cancelled() && (needs.build-cuda-package.result == 'success' || needs.build-cuda-package.result == 'skipped') }} - + if: ${{ github.repository == 'lloyal-ai/lloyal.node' && !cancelled() && (needs.build-cuda-package.result == 'success' || needs.build-cuda-package.result == 'skipped') }} + uses: lloyal-ai/lloyal-infra/.github/workflows/gpu-integration.yml@main + secrets: inherit permissions: contents: read - id-token: write # Required for Workload Identity Federation - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Authenticate to GCP - uses: google-github-actions/auth@v2 - with: - workload_identity_provider: ${{ secrets.GCP_WIF_PROVIDER }} - service_account: ${{ secrets.GCP_SA_EMAIL }} - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v2 - - - name: Configure Docker for Artifact Registry - run: gcloud auth configure-docker us-east4-docker.pkg.dev --quiet - - - name: Download package artifact - uses: actions/download-artifact@v4 - with: - name: package-linux-x64-cuda - path: packages/package-linux-x64-cuda - - - name: Build GPU test image - run: | - IMAGE="us-east4-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/lloyal-ci/gpu-tests:${{ github.sha }}-cuda" - docker build \ - -f ci/Dockerfile.gpu-tests \ - -t "$IMAGE" . - docker push "$IMAGE" - echo "IMAGE=$IMAGE" >> $GITHUB_ENV - - - name: Deploy Cloud Run Job - run: | - JOB_NAME="lloyal-gpu-test-cuda" - - # Check if job exists - if gcloud run jobs describe $JOB_NAME --region=us-east4 2>/dev/null; then - gcloud run jobs update $JOB_NAME \ - --region=us-east4 \ - --image="${IMAGE}" \ - --service-account="${{ secrets.GCP_SA_EMAIL }}" \ - --set-env-vars=LLOYAL_GPU=cuda,LLOYAL_NO_FALLBACK=1 \ - --task-timeout=20m \ - --no-gpu-zonal-redundancy - else - gcloud run jobs create $JOB_NAME \ - --region=us-east4 \ - --image="${IMAGE}" \ - --service-account="${{ secrets.GCP_SA_EMAIL }}" \ - --set-env-vars=LLOYAL_GPU=cuda,LLOYAL_NO_FALLBACK=1 \ - --task-timeout=20m \ - --gpu=1 \ - --gpu-type=nvidia-l4 \ - --memory=16Gi \ - --cpu=4 \ - --max-retries=0 \ - --no-gpu-zonal-redundancy - fi - - - name: Run GPU tests - run: | - JOB_NAME="lloyal-gpu-test-cuda" - REGION="us-east4" - - # Launch job asynchronously so we can stream logs - EXEC=$(gcloud run jobs execute $JOB_NAME \ - --region=$REGION \ - --async \ - --format='value(metadata.name)') - - echo "Execution: $EXEC" - echo "Streaming logs (container startup may take ~30s)..." - echo "" - - # Filter for this specific execution's logs - LOG_FILTER="resource.type=\"cloud_run_job\" AND resource.labels.job_name=\"$JOB_NAME\" AND labels.\"run.googleapis.com/execution_name\"=\"$EXEC\"" - - # Poll loop: stream new log lines + check for completion - SEEN=0 - while true; do - # Check if execution has completed - COMPLETION=$(gcloud run jobs executions describe "$EXEC" \ - --region="$REGION" \ - --format='value(status.completionTime)' 2>/dev/null || true) - - # Fetch all logs for this execution in chronological order - LOGS=$(gcloud logging read "$LOG_FILTER" \ - --limit=10000 \ - --order=asc \ - --format='value(textPayload)' 2>/dev/null || true) - - # Print only lines we haven't seen yet - if [ -n "$LOGS" ]; then - TOTAL=$(echo "$LOGS" | wc -l | tr -d ' ') - if [ "$TOTAL" -gt "$SEEN" ]; then - echo "$LOGS" | tail -n +$((SEEN + 1)) - SEEN=$TOTAL - fi - fi - - # If done, do one final fetch for stragglers then break - if [ -n "$COMPLETION" ]; then - sleep 5 - LOGS=$(gcloud logging read "$LOG_FILTER" \ - --limit=10000 \ - --order=asc \ - --format='value(textPayload)' 2>/dev/null || true) - if [ -n "$LOGS" ]; then - TOTAL=$(echo "$LOGS" | wc -l | tr -d ' ') - if [ "$TOTAL" -gt "$SEEN" ]; then - echo "$LOGS" | tail -n +$((SEEN + 1)) - fi - fi - break - fi - - sleep 10 - done - - # Determine pass/fail from execution status - SUCCEEDED=$(gcloud run jobs executions describe "$EXEC" \ - --region="$REGION" \ - --format=json 2>/dev/null | \ - jq -r '.status.conditions[] | select(.type == "Completed") | .status') - - if [ "$SUCCEEDED" = "True" ]; then - echo "" - echo "✅ GPU Tests Passed" - else - echo "" - echo "❌ GPU Tests Failed" - exit 1 - fi + id-token: write diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2f130df..06306b9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -307,15 +307,15 @@ jobs: path: packages/${{ matrix.package }}/ retention-days: 1 - # GPU Integration Tests (reusable workflow) + # GPU Integration Tests (reusable workflow from private infra repo) gpu-tests: name: GPU Tests needs: build-and-test - uses: ./.github/workflows/gpu-test.yml + uses: lloyal-ai/lloyal-infra/.github/workflows/gpu-integration.yml@main secrets: inherit permissions: contents: read - id-token: write # Required for GCP Workload Identity Federation + id-token: write publish: name: Publish all packages diff --git a/.gitignore b/.gitignore index 1f2f1b8..a7dddba 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,7 @@ Thumbs.db tmp/ -packages/darwin-arm64 \ No newline at end of file +packages/darwin-arm64 + +# CI infra scripts (injected from lloyal-infra during CI) +ci/ \ No newline at end of file diff --git a/ci/Dockerfile.gpu-tests b/ci/Dockerfile.gpu-tests deleted file mode 100644 index cf53766..0000000 --- a/ci/Dockerfile.gpu-tests +++ /dev/null @@ -1,53 +0,0 @@ -# GPU Testing Image for lloyal.node (CUDA only) -# Runs integration tests on NVIDIA L4 GPU via Cloud Run Jobs -# -# Note: Vulkan is tested separately on GitHub Actions with software rendering -# because Cloud Run only mounts CUDA libs, not Vulkan/graphics driver components. -# -# Build: docker build -f ci/Dockerfile.gpu-tests -t gpu-tests . -# Run: docker run --gpus all gpu-tests - -# CUDA 12.2.2 required for Cloud Run L4 GPU (driver 535.x supports up to 12.2.x) -FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 - -# Install runtime dependencies -# - libgomp1: OpenMP runtime (required by llama.cpp CUDA build) -RUN apt-get update && apt-get install -y --no-install-recommends \ - wget \ - curl \ - ca-certificates \ - jq \ - libgomp1 \ - && rm -rf /var/lib/apt/lists/* - -# Install Node.js 24 -RUN wget -qO- https://deb.nodesource.com/setup_24.x | bash - \ - && apt-get install -y nodejs \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -# Copy package.json first for dependency caching -COPY package.json ./ - -# Install dependencies (omit dev deps - only need runtime deps like @lloyal-labs/tsampler) -RUN npm install --omit=dev - -# Copy pre-built packages from release workflow -# These are downloaded as artifacts before docker build -COPY packages/ ./packages/ - -# Copy test files, examples, lib, and scripts -COPY test/ ./test/ -COPY examples/ ./examples/ -COPY lib/ ./lib/ -COPY scripts/download-test-models.sh ./scripts/ - -# Models downloaded at runtime via run-gpu-tests.sh -# This allows testing multiple models from test/matrix.json -# without baking large files into the image - -# Test script -COPY ci/run-gpu-tests.sh ./ - -ENTRYPOINT ["bash", "run-gpu-tests.sh"] diff --git a/ci/run-gpu-tests.sh b/ci/run-gpu-tests.sh deleted file mode 100755 index 8a6df8f..0000000 --- a/ci/run-gpu-tests.sh +++ /dev/null @@ -1,157 +0,0 @@ -#!/bin/bash -set -e - -echo "=== GPU Test Environment ===" -echo "CUDA: $(nvcc --version 2>/dev/null | grep release || echo 'runtime-only')" -echo "GPU: $(nvidia-smi --query-gpu=name,driver_version --format=csv,noheader 2>/dev/null || echo 'N/A')" -echo "" - -# GPU backend to test (passed as arg or env) -GPU_BACKEND="${1:-${LLOYAL_GPU:-cuda}}" - -# Validate GPU backend to avoid injection and invalid values -case "${GPU_BACKEND}" in - cuda|vulkan) - ;; - *) - echo "Error: Invalid GPU backend '${GPU_BACKEND}'. Allowed values are: cuda, vulkan." >&2 - exit 1 - ;; -esac - -PACKAGE_NAME="linux-x64-${GPU_BACKEND}" - -echo "Testing backend: ${GPU_BACKEND}" -echo "Installing package: ${PACKAGE_NAME}" - -# Install the pre-built package -cd /app -if [ -d "./packages/package-${PACKAGE_NAME}" ]; then - npm install "./packages/package-${PACKAGE_NAME}" -else - echo "Error: Package directory not found: ./packages/package-${PACKAGE_NAME}" - ls -la ./packages/ - exit 1 -fi - -echo "" -echo "=== Downloading Test Models ===" -./scripts/download-test-models.sh --all - -echo "" -echo "=== Verifying Backend ===" -node -e " -const { loadBinary } = require('./lib'); -process.env.LLOYAL_GPU = '${GPU_BACKEND}'; -process.env.LLOYAL_NO_FALLBACK = '1'; -try { - const addon = loadBinary(); - console.log('✓ Package loaded successfully'); - console.log(' Exports:', Object.keys(addon)); -} catch (e) { - console.error('Failed to load binary:', e); - process.exit(1); -} -" - -# Common env for all test runs -export LLOYAL_GPU="${GPU_BACKEND}" -export LLOYAL_NO_FALLBACK=1 -export LLAMA_CTX_SIZE=4096 - -echo "" -echo "=== Running Model Matrix (nCtx=${LLAMA_CTX_SIZE}) ===" - -# Read model list from matrix.json -MODELS=$(jq -c '.models[]' test/matrix.json) - -# Per-model results tracking -TOTAL=0 -PASS=0 -FAIL=0 -declare -a RESULTS=() -declare -a FAIL_DETAILS=() - -# Don't exit on per-model failure — track results and report at end -set +e - -while IFS= read -r model; do - name=$(echo "$model" | jq -r '.name') - file=$(echo "$model" | jq -r '.file') - - echo "" - echo "══════════════════════════════════════" - echo "MODEL: $name ($file)" - echo "══════════════════════════════════════" - - TOTAL=$((TOTAL + 1)) - MODEL_LOG=$(mktemp) - MODEL_FAILED=false - - # --- Integration tests --- - echo "── Integration Tests ──" - LLAMA_TEST_MODEL="models/$file" \ - node test/integration.js 2>&1 | tee "$MODEL_LOG" - INT_EXIT=${PIPESTATUS[0]} - - if [ $INT_EXIT -ne 0 ]; then - MODEL_FAILED=true - fi - - # --- Example tests --- - echo "" - echo "── Example Tests ──" - LLAMA_TEST_MODEL="models/$file" \ - node test/examples.js 2>&1 | tee -a "$MODEL_LOG" - EX_EXIT=${PIPESTATUS[0]} - - if [ $EX_EXIT -ne 0 ]; then - MODEL_FAILED=true - fi - - # Per-model summary - if [ "$MODEL_FAILED" = false ]; then - RESULTS+=("✅ $name") - PASS=$((PASS + 1)) - else - RESULTS+=("❌ $name") - FAIL=$((FAIL + 1)) - # Extract failure lines for the final summary - FAILURES=$(grep -E '\[FAIL\]|❌ FAILED|Assertion failed|Fatal error' "$MODEL_LOG" | head -10) - FAIL_DETAILS+=("── $name ──"$'\n'"$FAILURES") - fi - - rm -f "$MODEL_LOG" -done <<< "$MODELS" - -set -e - -# Final summary table -echo "" -echo "══════════════════════════════════════" -echo "MODEL MATRIX RESULTS" -echo "══════════════════════════════════════" -for r in "${RESULTS[@]}"; do echo " $r"; done -echo "" -echo "Total: $PASS passed, $FAIL failed out of $TOTAL models" - -if [ $FAIL -gt 0 ] && [ ${#FAIL_DETAILS[@]} -gt 0 ]; then - echo "" - echo "══════════════════════════════════════" - echo "FAILURE DETAILS" - echo "══════════════════════════════════════" - for d in "${FAIL_DETAILS[@]}"; do - echo "$d" - echo "" - done -fi - -if [ $FAIL -eq 0 ]; then - echo "" - echo "=== ✅ GPU Tests Passed ===" - exit 0 -else - echo "" - echo "=== ❌ GPU Tests Failed ===" - exit 1 -fi diff --git a/ci/setup-infra.sh b/ci/setup-infra.sh deleted file mode 100755 index 91742d1..0000000 --- a/ci/setup-infra.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/bash -set -e - -# --- CONFIGURATION --- -PROJECT_ID="lloyal-node" -GITHUB_REPO="lloyal-ai/lloyal.node" -REGION="us-east4" - -# Infrastructure Names -SA_NAME="github-ci-runner" -POOL_NAME="github-pool" -PROVIDER_NAME="github-provider" -AR_REPO="lloyal-ci" - -echo "=== Provisioning GCP Infrastructure for $GITHUB_REPO ===" -echo "Project: $PROJECT_ID" - -# 1. Setup Project & APIs -gcloud config set project "$PROJECT_ID" - -echo "Enabling APIs..." -gcloud services enable \ - iam.googleapis.com \ - iamcredentials.googleapis.com \ - artifactregistry.googleapis.com \ - run.googleapis.com \ - logging.googleapis.com - -# 2. Artifact Registry -if ! gcloud artifacts repositories describe "$AR_REPO" --location="$REGION" &>/dev/null; then - echo "Creating Artifact Registry repo..." - gcloud artifacts repositories create "$AR_REPO" \ - --repository-format=docker \ - --location="$REGION" \ - --description="Docker repository for lloyal.node CI" -else - echo "Artifact Registry repo exists." -fi - -# 3. Service Account -SA_EMAIL="${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com" -if ! gcloud iam service-accounts describe "$SA_EMAIL" &>/dev/null; then - echo "Creating Service Account..." - gcloud iam service-accounts create "$SA_NAME" --display-name="GitHub Actions CI Runner" -else - echo "Service Account exists." -fi - -# 4. Assign Roles -echo "Assigning IAM roles..." - -# Grant Artifact Registry Writer (Push images) -gcloud artifacts repositories add-iam-policy-binding "$AR_REPO" \ - --location="$REGION" \ - --member="serviceAccount:$SA_EMAIL" \ - --role="roles/artifactregistry.writer" > /dev/null - -# Grant Cloud Run Developer (Create/Update Jobs) -gcloud projects add-iam-policy-binding "$PROJECT_ID" \ - --member="serviceAccount:$SA_EMAIL" \ - --role="roles/run.developer" > /dev/null - -# Grant Cloud Run Invoker (Execute Jobs) -gcloud projects add-iam-policy-binding "$PROJECT_ID" \ - --member="serviceAccount:$SA_EMAIL" \ - --role="roles/run.invoker" > /dev/null - -# Grant Logging Viewer (Read logs back to CI) -gcloud projects add-iam-policy-binding "$PROJECT_ID" \ - --member="serviceAccount:$SA_EMAIL" \ - --role="roles/logging.viewer" > /dev/null - -# Grant Service Account User (Act as itself) -gcloud iam service-accounts add-iam-policy-binding "$SA_EMAIL" \ - --member="serviceAccount:$SA_EMAIL" \ - --role="roles/iam.serviceAccountUser" > /dev/null - -# 5. Workload Identity Federation -if ! gcloud iam workload-identity-pools describe "$POOL_NAME" --location="global" &>/dev/null; then - echo "Creating Identity Pool..." - gcloud iam workload-identity-pools create "$POOL_NAME" \ - --location="global" \ - --display-name="GitHub Actions Pool" -fi - -POOL_ID=$(gcloud iam workload-identity-pools describe "$POOL_NAME" --location="global" --format="value(name)") - -# Create Provider with Security Condition -if ! gcloud iam workload-identity-pools providers describe "$PROVIDER_NAME" --location="global" --workload-identity-pool="$POOL_NAME" &>/dev/null; then - echo "Creating Identity Provider..." - gcloud iam workload-identity-pools providers create-oidc "$PROVIDER_NAME" \ - --location="global" \ - --workload-identity-pool="$POOL_NAME" \ - --display-name="GitHub Provider" \ - --attribute-mapping="google.subject=assertion.sub,attribute.actor=assertion.actor,attribute.repository=assertion.repository" \ - --attribute-condition="assertion.repository_owner == 'lloyal-ai'" \ - --issuer-uri="https://token.actions.githubusercontent.com" -fi - -echo "Binding GitHub Repo to Service Account..." -gcloud iam service-accounts add-iam-policy-binding "$SA_EMAIL" \ - --role="roles/iam.workloadIdentityUser" \ - --member="principalSet://iam.googleapis.com/${POOL_ID}/attribute.repository/${GITHUB_REPO}" > /dev/null - -# --- OUTPUT --- -PROVIDER_FULL_PATH=$(gcloud iam workload-identity-pools providers describe "$PROVIDER_NAME" --location="global" --workload-identity-pool="$POOL_NAME" --format="value(name)") - -echo "DONE:" -echo "GCP_PROJECT_ID : $PROJECT_ID" -echo "GCP_SA_EMAIL : $SA_EMAIL" -echo "GCP_WIF_PROVIDER : $PROVIDER_FULL_PATH" \ No newline at end of file From ca5d40550d352c46a526355e9488f69be1e6e34c Mon Sep 17 00:00:00 2001 From: LLoyal Research Date: Thu, 12 Feb 2026 20:02:31 +1100 Subject: [PATCH 2/3] feat(ci): infra integration - fix tests --- .github/workflows/tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6861128..b8e01c8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -64,6 +64,9 @@ jobs: - name: Build from submodules run: npm run build + env: + # Force CPU — GitHub Actions paravirtual Metal GPU has driver bugs + LLOYAL_GPU: cpu # This runs scripts/build.js which: # 1. Builds llama.cpp from llama.cpp/ # 2. Builds liblloyal from liblloyal/ From 09fe6308116eb591d5cb32b4a06d9a373c627c8e Mon Sep 17 00:00:00 2001 From: LLoyal Research Date: Thu, 12 Feb 2026 20:18:00 +1100 Subject: [PATCH 3/3] feat(ci): infra integration --- .github/workflows/gpu-test.yml | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/.github/workflows/gpu-test.yml b/.github/workflows/gpu-test.yml index e07dbd8..5717180 100644 --- a/.github/workflows/gpu-test.yml +++ b/.github/workflows/gpu-test.yml @@ -11,18 +11,11 @@ on: - 'test/**' - 'CMakeLists.txt' workflow_dispatch: - inputs: - skip_build: - description: 'Skip build step (use existing artifacts)' - type: boolean - default: false jobs: - # Build CUDA package for testing - # Skipped when called from release.yml (packages already built) build-cuda-package: name: Build linux-x64-cuda - if: ${{ github.repository == 'lloyal-ai/lloyal.node' && inputs.skip_build != true }} + if: ${{ github.repository == 'lloyal-ai/lloyal.node' }} runs-on: ubuntu-22.04 steps: @@ -81,7 +74,7 @@ jobs: gpu-integration: name: GPU Tests (L4) needs: build-cuda-package - if: ${{ github.repository == 'lloyal-ai/lloyal.node' && !cancelled() && (needs.build-cuda-package.result == 'success' || needs.build-cuda-package.result == 'skipped') }} + if: ${{ github.repository == 'lloyal-ai/lloyal.node' && needs.build-cuda-package.result == 'success' }} uses: lloyal-ai/lloyal-infra/.github/workflows/gpu-integration.yml@main secrets: inherit permissions: