github · pelikhan · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026
diff --git a/actions/setup/js/check_skip_if_check_failing.cjs b/actions/setup/js/check_skip_if_check_failing.cjs
@@ -1,7 +1,7 @@
 // @ts-check
 /// <reference types="@actions/github-script" />
 
-const { getErrorMessage } = require("./error_helpers.cjs");
+const { getErrorMessage, isRateLimitError } = require("./error_helpers.cjs");
 const { ERR_API } = require("./error_codes.cjs");
 const { getBaseBranch } = require("./get_base_branch.cjs");
 
@@ -212,7 +212,18 @@ async function main() {
     core.info(`✓ No failing checks found on "${ref}", workflow can proceed`);
     core.setOutput("skip_if_check_failing_ok", "true");
   } catch (error) {
-    core.setFailed(`${ERR_API}: Failed to fetch check runs for ref "${ref}": ${getErrorMessage(error)}`);
+    const errorMsg = getErrorMessage(error);
+    // Gracefully handle API rate limit errors (fail-open) to avoid blocking the workflow
+    // due to transient GitHub API availability issues. When multiple workflows run
+    // simultaneously, they can exhaust the installation API rate limit, causing this
+    // check to fail. Failing open matches the behavior of other pre-activation checks.
+    if (isRateLimitError(error)) {
+      core.warning(`⚠️ API rate limit exceeded while checking CI status for ref "${ref}": ${errorMsg}`);
+      core.warning(`Allowing workflow to proceed (fail-open on rate limit)`);
+      core.setOutput("skip_if_check_failing_ok", "true");
+    } else {
+      core.setFailed(`${ERR_API}: Failed to fetch check runs for ref "${ref}": ${errorMsg}`);
+    }
   }
 }
 

diff --git a/actions/setup/js/check_skip_if_check_failing.test.cjs b/actions/setup/js/check_skip_if_check_failing.test.cjs
@@ -268,14 +268,38 @@ describe("check_skip_if_check_failing.cjs", () => {
     expect(mockCore.setOutput).toHaveBeenCalledWith("skip_if_check_failing_ok", "true");
   });
 
-  it("should fail with error message when API call fails", async () => {
-    mockGithub.paginate.mockRejectedValue(new Error("Rate limit exceeded"));
+  it("should allow workflow when API call fails due to rate limiting (fail-open)", async () => {
+    mockGithub.paginate.mockRejectedValue(new Error("API rate limit exceeded for installation"));
+
+    const { main } = await import("./check_skip_if_check_failing.cjs");
+    await main();
+
+    // Rate limit errors should fail-open: allow the workflow to proceed
+    expect(mockCore.setFailed).not.toHaveBeenCalled();
+    expect(mockCore.warning).toHaveBeenCalledWith(expect.stringContaining("rate limit"));
+    expect(mockCore.setOutput).toHaveBeenCalledWith("skip_if_check_failing_ok", "true");
+  });
+
+  it("should allow workflow when API call fails with 'rate limit exceeded' message (fail-open)", async () => {
+    mockGithub.paginate.mockRejectedValue(new Error("rate limit exceeded: please retry after 60 seconds"));
+
+    const { main } = await import("./check_skip_if_check_failing.cjs");
+    await main();
+
+    // 'rate limit exceeded' variant should also fail-open
+    expect(mockCore.setFailed).not.toHaveBeenCalled();
+    expect(mockCore.warning).toHaveBeenCalledWith(expect.stringContaining("rate limit"));
+    expect(mockCore.setOutput).toHaveBeenCalledWith("skip_if_check_failing_ok", "true");
+  });
+
+  it("should fail with error message when non-rate-limit API call fails", async () => {
+    mockGithub.paginate.mockRejectedValue(new Error("Network connection error"));
 
     const { main } = await import("./check_skip_if_check_failing.cjs");
     await main();
 
     expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("Failed to fetch check runs"));
-    expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("Rate limit exceeded"));
+    expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("Network connection error"));
     expect(mockCore.setOutput).not.toHaveBeenCalled();
   });
 

diff --git a/actions/setup/js/error_helpers.cjs b/actions/setup/js/error_helpers.cjs
@@ -40,4 +40,18 @@ function isLockedError(error) {
   return hasLockedMessage;
 }
 
-module.exports = { getErrorMessage, isLockedError };
+/**
+ * Check if an error is due to a GitHub API rate limit being exceeded.
+ * This includes both installation-level and user-level rate limits.
+ * Used to determine if a check should fail-open (allow workflow to proceed)
+ * rather than hard-failing when the error is transient.
+ *
+ * @param {unknown} error - The error value to check
+ * @returns {boolean} True if error is due to API rate limiting, false otherwise
+ */
+function isRateLimitError(error) {
+  const errorMessage = getErrorMessage(error);
+  return /\bapi rate limit\b|\brate limit exceeded\b/i.test(errorMessage);
+}
+
+module.exports = { getErrorMessage, isLockedError, isRateLimitError };
diff --git a/actions/setup/js/error_helpers.test.cjs b/actions/setup/js/error_helpers.test.cjs
@@ -1,5 +1,5 @@
 import { describe, it, expect } from "vitest";
-import { getErrorMessage, isLockedError } from "./error_helpers.cjs";
+import { getErrorMessage, isLockedError, isRateLimitError } from "./error_helpers.cjs";
 
 describe("error_helpers", () => {
   describe("getErrorMessage", () => {
@@ -89,4 +89,35 @@ describe("error_helpers", () => {
       expect(isLockedError(error)).toBe(true);
     });
   });
+
+  describe("isRateLimitError", () => {
+    it("should return true for 'API rate limit exceeded' message", () => {
+      expect(isRateLimitError(new Error("API rate limit exceeded for installation"))).toBe(true);
+    });
+
+    it("should return true for 'rate limit exceeded' message", () => {
+      expect(isRateLimitError(new Error("rate limit exceeded: please retry after 60 seconds"))).toBe(true);
+    });
+
+    it("should return true for mixed-case 'API Rate Limit' message", () => {
+      expect(isRateLimitError(new Error("API Rate Limit exceeded"))).toBe(true);
+    });
+
+    it("should return false for unrelated API errors", () => {
+      expect(isRateLimitError(new Error("Network connection error"))).toBe(false);
+    });
+
+    it("should return false for null error", () => {
+      expect(isRateLimitError(null)).toBe(false);
+    });
+
+    it("should return false for undefined error", () => {
+      expect(isRateLimitError(undefined)).toBe(false);
+    });
+
+    it("should return false for non-rate-limit 403 errors", () => {
+      const error = new Error("Forbidden: insufficient permissions");
+      expect(isRateLimitError(error)).toBe(false);
+    });
+  });
 });
diff --git a/actions/setup/sh/install_awf_binary.sh b/actions/setup/sh/install_awf_binary.sh
@@ -209,16 +209,18 @@ else
   install_platform_binary
 fi
 
-# Verify installation
-# Use the absolute path to avoid PATH issues on self-hosted or GPU runners
-# where ${AWF_INSTALL_DIR} may not be in the user PATH. The binary is always
-# accessible in subsequent steps via sudo (which includes /usr/local/bin).
+# Verify installation by running --version with sudo.
+# Use sudo to match how awf is invoked in subsequent steps (sudo -E awf ...).
+# On GPU runners (e.g. aw-gpu-runner-T4), /usr/local/bin may be inaccessible
+# to the current non-root user due to filesystem or security policy restrictions,
+# so running the version check without sudo would fail with "Permission denied".
+# A successful run prints the version string (e.g. "0.25.13") to stdout.
 # Also clear DIFC (Data Integrity and Filtering Controls) proxy env vars
 # set by start_difc_proxy.sh. When the DIFC proxy is active, GITHUB_API_URL
 # and GITHUB_GRAPHQL_URL point to localhost:18443 and GH_HOST is overridden.
 # The AWF bundle may try to reach these endpoints on startup, causing the
 # version check to fail with a connection error if the proxy rejects the request.
-env -u GITHUB_API_URL -u GITHUB_GRAPHQL_URL -u GH_HOST \
+sudo env -u GITHUB_API_URL -u GITHUB_GRAPHQL_URL -u GH_HOST \
-sudo env -u GITHUB_API_URL -u GITHUB_GRAPHQL_URL -u GH_HOST \
+# Preserve the current PATH as well so bundle installs that execute `node`
+# via PATH continue to use the same validated Node.js under sudo.
+sudo env PATH="${PATH}" -u GITHUB_API_URL -u GITHUB_GRAPHQL_URL -u GH_HOST \
-sudo env -u GITHUB_API_URL -u GITHUB_GRAPHQL_URL -u GH_HOST \
+# Preserve the current PATH as well so bundle installs that execute `node`
+# via PATH continue to use the same validated Node.js under sudo.
+sudo env PATH="${PATH}" -u GITHUB_API_URL -u GITHUB_GRAPHQL_URL -u GH_HOST \
     "${AWF_INSTALL_DIR}/${AWF_INSTALL_NAME}" --version
 
 echo "✓ AWF installation complete"