From f60c34c958d39e9d9ed7f28a19be3bdac80fca08 Mon Sep 17 00:00:00 2001
From: LLoyal Research <research@lloyal.ai>
Date: Thu, 12 Feb 2026 19:36:39 +1100
Subject: [PATCH 1/3] feat(ci): infra integration

---
 .github/workflows/gpu-test.yml | 162 ++-------------------------------
 .github/workflows/release.yml  |   6 +-
 .gitignore                     |   5 +-
 ci/Dockerfile.gpu-tests        |  53 -----------
 ci/run-gpu-tests.sh            | 157 --------------------------------
 ci/setup-infra.sh              | 111 ----------------------
 6 files changed, 14 insertions(+), 480 deletions(-)
 delete mode 100644 ci/Dockerfile.gpu-tests
 delete mode 100755 ci/run-gpu-tests.sh
 delete mode 100755 ci/setup-infra.sh

diff --git a/.github/workflows/gpu-test.yml b/.github/workflows/gpu-test.yml
index 2057193..e07dbd8 100644
--- a/.github/workflows/gpu-test.yml
+++ b/.github/workflows/gpu-test.yml
@@ -9,7 +9,6 @@ on:
       - 'lib/**'
       - 'src/**'
       - 'test/**'
-      - 'ci/**'
       - 'CMakeLists.txt'
   workflow_dispatch:
     inputs:
@@ -17,19 +16,13 @@ on:
         description: 'Skip build step (use existing artifacts)'
         type: boolean
         default: false
-  workflow_call:
-    inputs:
-      skip_build:
-        description: 'Skip build step (packages already built by caller)'
-        type: boolean
-        default: true
 
 jobs:
   # Build CUDA package for testing
   # Skipped when called from release.yml (packages already built)
   build-cuda-package:
     name: Build linux-x64-cuda
-    if: ${{ inputs.skip_build != true }}
+    if: ${{ github.repository == 'lloyal-ai/lloyal.node' && inputs.skip_build != true }}
     runs-on: ubuntu-22.04
 
     steps:
@@ -83,155 +76,14 @@ jobs:
           retention-days: 1
           compression-level: 0
 
-  # GPU Integration Tests via Cloud Run
-  # Runs real GPU tests on NVIDIA L4
-  #
-  # L4 GPU Requirements (as of 2024):
-  #   - Driver: 535.216.03 (supports CUDA 12.2.2 max)
-  #   - Minimum: 4 CPU, 16 GiB memory
-  #   - Regions: us-central1, us-east4, europe-west1, europe-west4, asia-southeast1
-  #   - Quota: 3 L4 GPUs per region (default)
+  # GPU Integration Tests via Cloud Run (L4)
+  # Infrastructure details are in the private lloyal-infra repo
   gpu-integration:
     name: GPU Tests (L4)
     needs: build-cuda-package
-    runs-on: ubuntu-latest
-    # Run if build succeeded OR was skipped (packages from caller)
-    if: ${{ !cancelled() && (needs.build-cuda-package.result == 'success' || needs.build-cuda-package.result == 'skipped') }}
-
+    if: ${{ github.repository == 'lloyal-ai/lloyal.node' && !cancelled() && (needs.build-cuda-package.result == 'success' || needs.build-cuda-package.result == 'skipped') }}
+    uses: lloyal-ai/lloyal-infra/.github/workflows/gpu-integration.yml@main
+    secrets: inherit
     permissions:
       contents: read
-      id-token: write  # Required for Workload Identity Federation
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Authenticate to GCP
-        uses: google-github-actions/auth@v2
-        with:
-          workload_identity_provider: ${{ secrets.GCP_WIF_PROVIDER }}
-          service_account: ${{ secrets.GCP_SA_EMAIL }}
-
-      - name: Set up Cloud SDK
-        uses: google-github-actions/setup-gcloud@v2
-
-      - name: Configure Docker for Artifact Registry
-        run: gcloud auth configure-docker us-east4-docker.pkg.dev --quiet
-
-      - name: Download package artifact
-        uses: actions/download-artifact@v4
-        with:
-          name: package-linux-x64-cuda
-          path: packages/package-linux-x64-cuda
-
-      - name: Build GPU test image
-        run: |
-          IMAGE="us-east4-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/lloyal-ci/gpu-tests:${{ github.sha }}-cuda"
-          docker build \
-            -f ci/Dockerfile.gpu-tests \
-            -t "$IMAGE" .
-          docker push "$IMAGE"
-          echo "IMAGE=$IMAGE" >> $GITHUB_ENV
-
-      - name: Deploy Cloud Run Job
-        run: |
-          JOB_NAME="lloyal-gpu-test-cuda"
-
-          # Check if job exists
-          if gcloud run jobs describe $JOB_NAME --region=us-east4 2>/dev/null; then
-            gcloud run jobs update $JOB_NAME \
-              --region=us-east4 \
-              --image="${IMAGE}" \
-              --service-account="${{ secrets.GCP_SA_EMAIL }}" \
-              --set-env-vars=LLOYAL_GPU=cuda,LLOYAL_NO_FALLBACK=1 \
-              --task-timeout=20m \
-              --no-gpu-zonal-redundancy
-          else
-            gcloud run jobs create $JOB_NAME \
-              --region=us-east4 \
-              --image="${IMAGE}" \
-              --service-account="${{ secrets.GCP_SA_EMAIL }}" \
-              --set-env-vars=LLOYAL_GPU=cuda,LLOYAL_NO_FALLBACK=1 \
-              --task-timeout=20m \
-              --gpu=1 \
-              --gpu-type=nvidia-l4 \
-              --memory=16Gi \
-              --cpu=4 \
-              --max-retries=0 \
-              --no-gpu-zonal-redundancy
-          fi
-
-      - name: Run GPU tests
-        run: |
-          JOB_NAME="lloyal-gpu-test-cuda"
-          REGION="us-east4"
-
-          # Launch job asynchronously so we can stream logs
-          EXEC=$(gcloud run jobs execute $JOB_NAME \
-            --region=$REGION \
-            --async \
-            --format='value(metadata.name)')
-
-          echo "Execution: $EXEC"
-          echo "Streaming logs (container startup may take ~30s)..."
-          echo ""
-
-          # Filter for this specific execution's logs
-          LOG_FILTER="resource.type=\"cloud_run_job\" AND resource.labels.job_name=\"$JOB_NAME\" AND labels.\"run.googleapis.com/execution_name\"=\"$EXEC\""
-
-          # Poll loop: stream new log lines + check for completion
-          SEEN=0
-          while true; do
-            # Check if execution has completed
-            COMPLETION=$(gcloud run jobs executions describe "$EXEC" \
-              --region="$REGION" \
-              --format='value(status.completionTime)' 2>/dev/null || true)
-
-            # Fetch all logs for this execution in chronological order
-            LOGS=$(gcloud logging read "$LOG_FILTER" \
-              --limit=10000 \
-              --order=asc \
-              --format='value(textPayload)' 2>/dev/null || true)
-
-            # Print only lines we haven't seen yet
-            if [ -n "$LOGS" ]; then
-              TOTAL=$(echo "$LOGS" | wc -l | tr -d ' ')
-              if [ "$TOTAL" -gt "$SEEN" ]; then
-                echo "$LOGS" | tail -n +$((SEEN + 1))
-                SEEN=$TOTAL
-              fi
-            fi
-
-            # If done, do one final fetch for stragglers then break
-            if [ -n "$COMPLETION" ]; then
-              sleep 5
-              LOGS=$(gcloud logging read "$LOG_FILTER" \
-                --limit=10000 \
-                --order=asc \
-                --format='value(textPayload)' 2>/dev/null || true)
-              if [ -n "$LOGS" ]; then
-                TOTAL=$(echo "$LOGS" | wc -l | tr -d ' ')
-                if [ "$TOTAL" -gt "$SEEN" ]; then
-                  echo "$LOGS" | tail -n +$((SEEN + 1))
-                fi
-              fi
-              break
-            fi
-
-            sleep 10
-          done
-
-          # Determine pass/fail from execution status
-          SUCCEEDED=$(gcloud run jobs executions describe "$EXEC" \
-            --region="$REGION" \
-            --format=json 2>/dev/null | \
-            jq -r '.status.conditions[] | select(.type == "Completed") | .status')
-
-          if [ "$SUCCEEDED" = "True" ]; then
-            echo ""
-            echo "✅ GPU Tests Passed"
-          else
-            echo ""
-            echo "❌ GPU Tests Failed"
-            exit 1
-          fi
+      id-token: write
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 2f130df..06306b9 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -307,15 +307,15 @@ jobs:
           path: packages/${{ matrix.package }}/
           retention-days: 1
 
-  # GPU Integration Tests (reusable workflow)
+  # GPU Integration Tests (reusable workflow from private infra repo)
   gpu-tests:
     name: GPU Tests
     needs: build-and-test
-    uses: ./.github/workflows/gpu-test.yml
+    uses: lloyal-ai/lloyal-infra/.github/workflows/gpu-integration.yml@main
     secrets: inherit
     permissions:
       contents: read
-      id-token: write  # Required for GCP Workload Identity Federation
+      id-token: write
 
   publish:
     name: Publish all packages
diff --git a/.gitignore b/.gitignore
index 1f2f1b8..a7dddba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,4 +38,7 @@ Thumbs.db
 
 tmp/
 
-packages/darwin-arm64
\ No newline at end of file
+packages/darwin-arm64
+
+# CI infra scripts (injected from lloyal-infra during CI)
+ci/
\ No newline at end of file
diff --git a/ci/Dockerfile.gpu-tests b/ci/Dockerfile.gpu-tests
deleted file mode 100644
index cf53766..0000000
--- a/ci/Dockerfile.gpu-tests
+++ /dev/null
@@ -1,53 +0,0 @@
-# GPU Testing Image for lloyal.node (CUDA only)
-# Runs integration tests on NVIDIA L4 GPU via Cloud Run Jobs
-#
-# Note: Vulkan is tested separately on GitHub Actions with software rendering
-# because Cloud Run only mounts CUDA libs, not Vulkan/graphics driver components.
-#
-# Build: docker build -f ci/Dockerfile.gpu-tests -t gpu-tests .
-# Run:   docker run --gpus all gpu-tests
-
-# CUDA 12.2.2 required for Cloud Run L4 GPU (driver 535.x supports up to 12.2.x)
-FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04
-
-# Install runtime dependencies
-# - libgomp1: OpenMP runtime (required by llama.cpp CUDA build)
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    wget \
-    curl \
-    ca-certificates \
-    jq \
-    libgomp1 \
-    && rm -rf /var/lib/apt/lists/*
-
-# Install Node.js 24
-RUN wget -qO- https://deb.nodesource.com/setup_24.x | bash - \
-    && apt-get install -y nodejs \
-    && rm -rf /var/lib/apt/lists/*
-
-WORKDIR /app
-
-# Copy package.json first for dependency caching
-COPY package.json ./
-
-# Install dependencies (omit dev deps - only need runtime deps like @lloyal-labs/tsampler)
-RUN npm install --omit=dev
-
-# Copy pre-built packages from release workflow
-# These are downloaded as artifacts before docker build
-COPY packages/ ./packages/
-
-# Copy test files, examples, lib, and scripts
-COPY test/ ./test/
-COPY examples/ ./examples/
-COPY lib/ ./lib/
-COPY scripts/download-test-models.sh ./scripts/
-
-# Models downloaded at runtime via run-gpu-tests.sh
-# This allows testing multiple models from test/matrix.json
-# without baking large files into the image
-
-# Test script
-COPY ci/run-gpu-tests.sh ./
-
-ENTRYPOINT ["bash", "run-gpu-tests.sh"]
diff --git a/ci/run-gpu-tests.sh b/ci/run-gpu-tests.sh
deleted file mode 100755
index 8a6df8f..0000000
--- a/ci/run-gpu-tests.sh
+++ /dev/null
@@ -1,157 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "=== GPU Test Environment ==="
-echo "CUDA:   $(nvcc --version 2>/dev/null | grep release || echo 'runtime-only')"
-echo "GPU:    $(nvidia-smi --query-gpu=name,driver_version --format=csv,noheader 2>/dev/null || echo 'N/A')"
-echo ""
-
-# GPU backend to test (passed as arg or env)
-GPU_BACKEND="${1:-${LLOYAL_GPU:-cuda}}"
-
-# Validate GPU backend to avoid injection and invalid values
-case "${GPU_BACKEND}" in
-  cuda|vulkan)
-    ;;
-  *)
-    echo "Error: Invalid GPU backend '${GPU_BACKEND}'. Allowed values are: cuda, vulkan." >&2
-    exit 1
-    ;;
-esac
-
-PACKAGE_NAME="linux-x64-${GPU_BACKEND}"
-
-echo "Testing backend: ${GPU_BACKEND}"
-echo "Installing package: ${PACKAGE_NAME}"
-
-# Install the pre-built package
-cd /app
-if [ -d "./packages/package-${PACKAGE_NAME}" ]; then
-  npm install "./packages/package-${PACKAGE_NAME}"
-else
-  echo "Error: Package directory not found: ./packages/package-${PACKAGE_NAME}"
-  ls -la ./packages/
-  exit 1
-fi
-
-echo ""
-echo "=== Downloading Test Models ==="
-./scripts/download-test-models.sh --all
-
-echo ""
-echo "=== Verifying Backend ==="
-node -e "
-const { loadBinary } = require('./lib');
-process.env.LLOYAL_GPU = '${GPU_BACKEND}';
-process.env.LLOYAL_NO_FALLBACK = '1';
-try {
-  const addon = loadBinary();
-  console.log('✓ Package loaded successfully');
-  console.log('  Exports:', Object.keys(addon));
-} catch (e) {
-  console.error('Failed to load binary:', e);
-  process.exit(1);
-}
-"
-
-# Common env for all test runs
-export LLOYAL_GPU="${GPU_BACKEND}"
-export LLOYAL_NO_FALLBACK=1
-export LLAMA_CTX_SIZE=4096
-
-echo ""
-echo "=== Running Model Matrix (nCtx=${LLAMA_CTX_SIZE}) ==="
-
-# Read model list from matrix.json
-MODELS=$(jq -c '.models[]' test/matrix.json)
-
-# Per-model results tracking
-TOTAL=0
-PASS=0
-FAIL=0
-declare -a RESULTS=()
-declare -a FAIL_DETAILS=()
-
-# Don't exit on per-model failure — track results and report at end
-set +e
-
-while IFS= read -r model; do
-  name=$(echo "$model" | jq -r '.name')
-  file=$(echo "$model" | jq -r '.file')
-
-  echo ""
-  echo "══════════════════════════════════════"
-  echo "MODEL: $name ($file)"
-  echo "══════════════════════════════════════"
-
-  TOTAL=$((TOTAL + 1))
-  MODEL_LOG=$(mktemp)
-  MODEL_FAILED=false
-
-  # --- Integration tests ---
-  echo "── Integration Tests ──"
-  LLAMA_TEST_MODEL="models/$file" \
-  node test/integration.js 2>&1 | tee "$MODEL_LOG"
-  INT_EXIT=${PIPESTATUS[0]}
-
-  if [ $INT_EXIT -ne 0 ]; then
-    MODEL_FAILED=true
-  fi
-
-  # --- Example tests ---
-  echo ""
-  echo "── Example Tests ──"
-  LLAMA_TEST_MODEL="models/$file" \
-  node test/examples.js 2>&1 | tee -a "$MODEL_LOG"
-  EX_EXIT=${PIPESTATUS[0]}
-
-  if [ $EX_EXIT -ne 0 ]; then
-    MODEL_FAILED=true
-  fi
-
-  # Per-model summary
-  if [ "$MODEL_FAILED" = false ]; then
-    RESULTS+=("✅ $name")
-    PASS=$((PASS + 1))
-  else
-    RESULTS+=("❌ $name")
-    FAIL=$((FAIL + 1))
-    # Extract failure lines for the final summary
-    FAILURES=$(grep -E '\[FAIL\]|❌ FAILED|Assertion failed|Fatal error' "$MODEL_LOG" | head -10)
-    FAIL_DETAILS+=("── $name ──"$'\n'"$FAILURES")
-  fi
-
-  rm -f "$MODEL_LOG"
-done <<< "$MODELS"
-
-set -e
-
-# Final summary table
-echo ""
-echo "══════════════════════════════════════"
-echo "MODEL MATRIX RESULTS"
-echo "══════════════════════════════════════"
-for r in "${RESULTS[@]}"; do echo "  $r"; done
-echo ""
-echo "Total: $PASS passed, $FAIL failed out of $TOTAL models"
-
-if [ $FAIL -gt 0 ] && [ ${#FAIL_DETAILS[@]} -gt 0 ]; then
-  echo ""
-  echo "══════════════════════════════════════"
-  echo "FAILURE DETAILS"
-  echo "══════════════════════════════════════"
-  for d in "${FAIL_DETAILS[@]}"; do
-    echo "$d"
-    echo ""
-  done
-fi
-
-if [ $FAIL -eq 0 ]; then
-  echo ""
-  echo "=== ✅ GPU Tests Passed ==="
-  exit 0
-else
-  echo ""
-  echo "=== ❌ GPU Tests Failed ==="
-  exit 1
-fi
diff --git a/ci/setup-infra.sh b/ci/setup-infra.sh
deleted file mode 100755
index 91742d1..0000000
--- a/ci/setup-infra.sh
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/bin/bash
-set -e
-
-# --- CONFIGURATION ---
-PROJECT_ID="lloyal-node"
-GITHUB_REPO="lloyal-ai/lloyal.node"
-REGION="us-east4"
-
-# Infrastructure Names
-SA_NAME="github-ci-runner"
-POOL_NAME="github-pool"
-PROVIDER_NAME="github-provider"
-AR_REPO="lloyal-ci"
-
-echo "=== Provisioning GCP Infrastructure for $GITHUB_REPO ==="
-echo "Project: $PROJECT_ID"
-
-# 1. Setup Project & APIs
-gcloud config set project "$PROJECT_ID"
-
-echo "Enabling APIs..."
-gcloud services enable \
-  iam.googleapis.com \
-  iamcredentials.googleapis.com \
-  artifactregistry.googleapis.com \
-  run.googleapis.com \
-  logging.googleapis.com
-
-# 2. Artifact Registry
-if ! gcloud artifacts repositories describe "$AR_REPO" --location="$REGION" &>/dev/null; then
-    echo "Creating Artifact Registry repo..."
-    gcloud artifacts repositories create "$AR_REPO" \
-      --repository-format=docker \
-      --location="$REGION" \
-      --description="Docker repository for lloyal.node CI"
-else
-    echo "Artifact Registry repo exists."
-fi
-
-# 3. Service Account
-SA_EMAIL="${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com"
-if ! gcloud iam service-accounts describe "$SA_EMAIL" &>/dev/null; then
-    echo "Creating Service Account..."
-    gcloud iam service-accounts create "$SA_NAME" --display-name="GitHub Actions CI Runner"
-else
-    echo "Service Account exists."
-fi
-
-# 4. Assign Roles
-echo "Assigning IAM roles..."
-
-# Grant Artifact Registry Writer (Push images)
-gcloud artifacts repositories add-iam-policy-binding "$AR_REPO" \
-  --location="$REGION" \
-  --member="serviceAccount:$SA_EMAIL" \
-  --role="roles/artifactregistry.writer" > /dev/null
-
-# Grant Cloud Run Developer (Create/Update Jobs)
-gcloud projects add-iam-policy-binding "$PROJECT_ID" \
-  --member="serviceAccount:$SA_EMAIL" \
-  --role="roles/run.developer" > /dev/null
-
-# Grant Cloud Run Invoker (Execute Jobs)
-gcloud projects add-iam-policy-binding "$PROJECT_ID" \
-  --member="serviceAccount:$SA_EMAIL" \
-  --role="roles/run.invoker" > /dev/null
-
-# Grant Logging Viewer (Read logs back to CI)
-gcloud projects add-iam-policy-binding "$PROJECT_ID" \
-  --member="serviceAccount:$SA_EMAIL" \
-  --role="roles/logging.viewer" > /dev/null
-
-# Grant Service Account User (Act as itself)
-gcloud iam service-accounts add-iam-policy-binding "$SA_EMAIL" \
-  --member="serviceAccount:$SA_EMAIL" \
-  --role="roles/iam.serviceAccountUser" > /dev/null
-
-# 5. Workload Identity Federation
-if ! gcloud iam workload-identity-pools describe "$POOL_NAME" --location="global" &>/dev/null; then
-    echo "Creating Identity Pool..."
-    gcloud iam workload-identity-pools create "$POOL_NAME" \
-      --location="global" \
-      --display-name="GitHub Actions Pool"
-fi
-
-POOL_ID=$(gcloud iam workload-identity-pools describe "$POOL_NAME" --location="global" --format="value(name)")
-
-# Create Provider with Security Condition
-if ! gcloud iam workload-identity-pools providers describe "$PROVIDER_NAME" --location="global" --workload-identity-pool="$POOL_NAME" &>/dev/null; then
-    echo "Creating Identity Provider..."
-    gcloud iam workload-identity-pools providers create-oidc "$PROVIDER_NAME" \
-      --location="global" \
-      --workload-identity-pool="$POOL_NAME" \
-      --display-name="GitHub Provider" \
-      --attribute-mapping="google.subject=assertion.sub,attribute.actor=assertion.actor,attribute.repository=assertion.repository" \
-      --attribute-condition="assertion.repository_owner == 'lloyal-ai'" \
-      --issuer-uri="https://token.actions.githubusercontent.com"
-fi
-
-echo "Binding GitHub Repo to Service Account..."
-gcloud iam service-accounts add-iam-policy-binding "$SA_EMAIL" \
-  --role="roles/iam.workloadIdentityUser" \
-  --member="principalSet://iam.googleapis.com/${POOL_ID}/attribute.repository/${GITHUB_REPO}" > /dev/null
-
-# --- OUTPUT ---
-PROVIDER_FULL_PATH=$(gcloud iam workload-identity-pools providers describe "$PROVIDER_NAME" --location="global" --workload-identity-pool="$POOL_NAME" --format="value(name)")
-
-echo "DONE:"
-echo "GCP_PROJECT_ID   : $PROJECT_ID"
-echo "GCP_SA_EMAIL     : $SA_EMAIL"
-echo "GCP_WIF_PROVIDER : $PROVIDER_FULL_PATH"
\ No newline at end of file

From ca5d40550d352c46a526355e9488f69be1e6e34c Mon Sep 17 00:00:00 2001
From: LLoyal Research <research@lloyal.ai>
Date: Thu, 12 Feb 2026 20:02:31 +1100
Subject: [PATCH 2/3] feat(ci): infra integration - fix tests

---
 .github/workflows/tests.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 6861128..b8e01c8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -64,6 +64,9 @@ jobs:
 
       - name: Build from submodules
         run: npm run build
+        env:
+          # Force CPU — GitHub Actions paravirtual Metal GPU has driver bugs
+          LLOYAL_GPU: cpu
         # This runs scripts/build.js which:
         # 1. Builds llama.cpp from llama.cpp/
         # 2. Builds liblloyal from liblloyal/

From 09fe6308116eb591d5cb32b4a06d9a373c627c8e Mon Sep 17 00:00:00 2001
From: LLoyal Research <research@lloyal.ai>
Date: Thu, 12 Feb 2026 20:18:00 +1100
Subject: [PATCH 3/3] feat(ci): infra integration

---
 .github/workflows/gpu-test.yml | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/gpu-test.yml b/.github/workflows/gpu-test.yml
index e07dbd8..5717180 100644
--- a/.github/workflows/gpu-test.yml
+++ b/.github/workflows/gpu-test.yml
@@ -11,18 +11,11 @@ on:
       - 'test/**'
       - 'CMakeLists.txt'
   workflow_dispatch:
-    inputs:
-      skip_build:
-        description: 'Skip build step (use existing artifacts)'
-        type: boolean
-        default: false
 
 jobs:
-  # Build CUDA package for testing
-  # Skipped when called from release.yml (packages already built)
   build-cuda-package:
     name: Build linux-x64-cuda
-    if: ${{ github.repository == 'lloyal-ai/lloyal.node' && inputs.skip_build != true }}
+    if: ${{ github.repository == 'lloyal-ai/lloyal.node' }}
     runs-on: ubuntu-22.04
 
     steps:
@@ -81,7 +74,7 @@ jobs:
   gpu-integration:
     name: GPU Tests (L4)
     needs: build-cuda-package
-    if: ${{ github.repository == 'lloyal-ai/lloyal.node' && !cancelled() && (needs.build-cuda-package.result == 'success' || needs.build-cuda-package.result == 'skipped') }}
+    if: ${{ github.repository == 'lloyal-ai/lloyal.node' && needs.build-cuda-package.result == 'success' }}
     uses: lloyal-ai/lloyal-infra/.github/workflows/gpu-integration.yml@main
     secrets: inherit
     permissions: