diff --git a/actions/setup/js/check_skip_if_check_failing.cjs b/actions/setup/js/check_skip_if_check_failing.cjs index 915c5a26c0..fff1ebd93b 100644 --- a/actions/setup/js/check_skip_if_check_failing.cjs +++ b/actions/setup/js/check_skip_if_check_failing.cjs @@ -1,7 +1,7 @@ // @ts-check /// -const { getErrorMessage } = require("./error_helpers.cjs"); +const { getErrorMessage, isRateLimitError } = require("./error_helpers.cjs"); const { ERR_API } = require("./error_codes.cjs"); const { getBaseBranch } = require("./get_base_branch.cjs"); @@ -212,7 +212,18 @@ async function main() { core.info(`✓ No failing checks found on "${ref}", workflow can proceed`); core.setOutput("skip_if_check_failing_ok", "true"); } catch (error) { - core.setFailed(`${ERR_API}: Failed to fetch check runs for ref "${ref}": ${getErrorMessage(error)}`); + const errorMsg = getErrorMessage(error); + // Gracefully handle API rate limit errors (fail-open) to avoid blocking the workflow + // due to transient GitHub API availability issues. When multiple workflows run + // simultaneously, they can exhaust the installation API rate limit, causing this + // check to fail. Failing open matches the behavior of other pre-activation checks. + if (isRateLimitError(error)) { + core.warning(`⚠️ API rate limit exceeded while checking CI status for ref "${ref}": ${errorMsg}`); + core.warning(`Allowing workflow to proceed (fail-open on rate limit)`); + core.setOutput("skip_if_check_failing_ok", "true"); + } else { + core.setFailed(`${ERR_API}: Failed to fetch check runs for ref "${ref}": ${errorMsg}`); + } } } diff --git a/actions/setup/js/check_skip_if_check_failing.test.cjs b/actions/setup/js/check_skip_if_check_failing.test.cjs index 15d7598881..a1e8e30311 100644 --- a/actions/setup/js/check_skip_if_check_failing.test.cjs +++ b/actions/setup/js/check_skip_if_check_failing.test.cjs @@ -268,14 +268,38 @@ describe("check_skip_if_check_failing.cjs", () => { expect(mockCore.setOutput).toHaveBeenCalledWith("skip_if_check_failing_ok", "true"); }); - it("should fail with error message when API call fails", async () => { - mockGithub.paginate.mockRejectedValue(new Error("Rate limit exceeded")); + it("should allow workflow when API call fails due to rate limiting (fail-open)", async () => { + mockGithub.paginate.mockRejectedValue(new Error("API rate limit exceeded for installation")); + + const { main } = await import("./check_skip_if_check_failing.cjs"); + await main(); + + // Rate limit errors should fail-open: allow the workflow to proceed + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockCore.warning).toHaveBeenCalledWith(expect.stringContaining("rate limit")); + expect(mockCore.setOutput).toHaveBeenCalledWith("skip_if_check_failing_ok", "true"); + }); + + it("should allow workflow when API call fails with 'rate limit exceeded' message (fail-open)", async () => { + mockGithub.paginate.mockRejectedValue(new Error("rate limit exceeded: please retry after 60 seconds")); + + const { main } = await import("./check_skip_if_check_failing.cjs"); + await main(); + + // 'rate limit exceeded' variant should also fail-open + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockCore.warning).toHaveBeenCalledWith(expect.stringContaining("rate limit")); + expect(mockCore.setOutput).toHaveBeenCalledWith("skip_if_check_failing_ok", "true"); + }); + + it("should fail with error message when non-rate-limit API call fails", async () => { + mockGithub.paginate.mockRejectedValue(new Error("Network connection error")); const { main } = await import("./check_skip_if_check_failing.cjs"); await main(); expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("Failed to fetch check runs")); - expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("Rate limit exceeded")); + expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("Network connection error")); expect(mockCore.setOutput).not.toHaveBeenCalled(); }); diff --git a/actions/setup/js/error_helpers.cjs b/actions/setup/js/error_helpers.cjs index e762b6aa65..aaba9e8483 100644 --- a/actions/setup/js/error_helpers.cjs +++ b/actions/setup/js/error_helpers.cjs @@ -40,4 +40,18 @@ function isLockedError(error) { return hasLockedMessage; } -module.exports = { getErrorMessage, isLockedError }; +/** + * Check if an error is due to a GitHub API rate limit being exceeded. + * This includes both installation-level and user-level rate limits. + * Used to determine if a check should fail-open (allow workflow to proceed) + * rather than hard-failing when the error is transient. + * + * @param {unknown} error - The error value to check + * @returns {boolean} True if error is due to API rate limiting, false otherwise + */ +function isRateLimitError(error) { + const errorMessage = getErrorMessage(error); + return /\bapi rate limit\b|\brate limit exceeded\b/i.test(errorMessage); +} + +module.exports = { getErrorMessage, isLockedError, isRateLimitError }; diff --git a/actions/setup/js/error_helpers.test.cjs b/actions/setup/js/error_helpers.test.cjs index fbb9555c7c..7aa5357215 100644 --- a/actions/setup/js/error_helpers.test.cjs +++ b/actions/setup/js/error_helpers.test.cjs @@ -1,5 +1,5 @@ import { describe, it, expect } from "vitest"; -import { getErrorMessage, isLockedError } from "./error_helpers.cjs"; +import { getErrorMessage, isLockedError, isRateLimitError } from "./error_helpers.cjs"; describe("error_helpers", () => { describe("getErrorMessage", () => { @@ -89,4 +89,35 @@ describe("error_helpers", () => { expect(isLockedError(error)).toBe(true); }); }); + + describe("isRateLimitError", () => { + it("should return true for 'API rate limit exceeded' message", () => { + expect(isRateLimitError(new Error("API rate limit exceeded for installation"))).toBe(true); + }); + + it("should return true for 'rate limit exceeded' message", () => { + expect(isRateLimitError(new Error("rate limit exceeded: please retry after 60 seconds"))).toBe(true); + }); + + it("should return true for mixed-case 'API Rate Limit' message", () => { + expect(isRateLimitError(new Error("API Rate Limit exceeded"))).toBe(true); + }); + + it("should return false for unrelated API errors", () => { + expect(isRateLimitError(new Error("Network connection error"))).toBe(false); + }); + + it("should return false for null error", () => { + expect(isRateLimitError(null)).toBe(false); + }); + + it("should return false for undefined error", () => { + expect(isRateLimitError(undefined)).toBe(false); + }); + + it("should return false for non-rate-limit 403 errors", () => { + const error = new Error("Forbidden: insufficient permissions"); + expect(isRateLimitError(error)).toBe(false); + }); + }); }); diff --git a/actions/setup/sh/install_awf_binary.sh b/actions/setup/sh/install_awf_binary.sh index dcb0519990..a79e72771d 100755 --- a/actions/setup/sh/install_awf_binary.sh +++ b/actions/setup/sh/install_awf_binary.sh @@ -209,16 +209,18 @@ else install_platform_binary fi -# Verify installation -# Use the absolute path to avoid PATH issues on self-hosted or GPU runners -# where ${AWF_INSTALL_DIR} may not be in the user PATH. The binary is always -# accessible in subsequent steps via sudo (which includes /usr/local/bin). +# Verify installation by running --version with sudo. +# Use sudo to match how awf is invoked in subsequent steps (sudo -E awf ...). +# On GPU runners (e.g. aw-gpu-runner-T4), /usr/local/bin may be inaccessible +# to the current non-root user due to filesystem or security policy restrictions, +# so running the version check without sudo would fail with "Permission denied". +# A successful run prints the version string (e.g. "0.25.13") to stdout. # Also clear DIFC (Data Integrity and Filtering Controls) proxy env vars # set by start_difc_proxy.sh. When the DIFC proxy is active, GITHUB_API_URL # and GITHUB_GRAPHQL_URL point to localhost:18443 and GH_HOST is overridden. # The AWF bundle may try to reach these endpoints on startup, causing the # version check to fail with a connection error if the proxy rejects the request. -env -u GITHUB_API_URL -u GITHUB_GRAPHQL_URL -u GH_HOST \ +sudo env -u GITHUB_API_URL -u GITHUB_GRAPHQL_URL -u GH_HOST \ "${AWF_INSTALL_DIR}/${AWF_INSTALL_NAME}" --version echo "✓ AWF installation complete"