From 2ba0a47ef89a22dd844f2d72806e27d5978097df Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 12:18:59 -0700 Subject: [PATCH 01/14] fix: replace slash commands with explicit Read instructions in e2e-triage workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit claude-code-action@v1 does not install project plugins, so /e2e:triage-ci and /e2e:implement slash commands were not resolved. The triage step completed in 21ms with $0 API cost — the model was never called, producing empty output. Replace slash commands with explicit "Read and follow" instructions that use the Read tool (already in allowedTools) to load skill files directly. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 77de372b0fce --- .github/workflows/e2e-triage.yml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index e26a2bb23..d2885b4c4 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -188,7 +188,14 @@ jobs: uses: anthropics/claude-code-action@v1 with: prompt: | - /e2e:triage-ci ${{ steps.artifacts.outputs.path }} --agent ${{ matrix.agent }} --sha ${{ needs.matrix-setup.outputs.sha }} + Read and follow the full E2E triage procedure from .claude/skills/e2e/triage-ci.md. + + Inputs: + - Local artifact path: ${{ steps.artifacts.outputs.path }} + - Agent: ${{ matrix.agent }} + - SHA: ${{ needs.matrix-setup.outputs.sha }} + + This is a CI artifact analysis (local path provided) -- skip Steps L2-L5 and go straight to Shared Analysis. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} claude_args: "--allowedTools 'Read,Grep,Glob'" display_report: true @@ -205,7 +212,12 @@ jobs: COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} with: prompt: | - /e2e:triage-ci ${{ env.RUN_URL }} --agent ${{ matrix.agent }} --sha ${{ needs.matrix-setup.outputs.sha }} + Read and follow the full E2E triage procedure from .claude/skills/e2e/triage-ci.md. + + Inputs: + - CI run URL: ${{ env.RUN_URL }} + - Agent: ${{ matrix.agent }} + - SHA: ${{ needs.matrix-setup.outputs.sha }} anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} claude_args: "--allowedTools 'Read,Grep,Glob,Bash(mise:*),Bash(scripts:*)'" display_report: true @@ -251,7 +263,9 @@ jobs: uses: anthropics/claude-code-action@v1 with: prompt: | - /e2e:implement Read the triage findings at ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md for agent ${{ matrix.agent }}. + Read and follow the fix implementation procedure from .claude/skills/e2e/implement.md. + + Read the triage findings at ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md for agent ${{ matrix.agent }}. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode,Bash(mise:*)'" display_report: true From fb140e834a38a1c4cee981c716857f5206a54d00 Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 12:34:29 -0700 Subject: [PATCH 02/14] fix: add github_token to bypass OIDC branch validation for testing claude-code-action's OIDC token exchange requires the workflow file to match the default branch, preventing testing on feature branches. Pass github_token directly to bypass this restriction. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: d73c731f6abc --- .github/workflows/e2e-triage.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index d2885b4c4..d59b13570 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -199,6 +199,7 @@ jobs: anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} claude_args: "--allowedTools 'Read,Grep,Glob'" display_report: true + github_token: ${{ github.token }} - name: Run triage (with re-runs) id: triage_rerun @@ -221,6 +222,7 @@ jobs: anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} claude_args: "--allowedTools 'Read,Grep,Glob,Bash(mise:*),Bash(scripts:*)'" display_report: true + github_token: ${{ github.token }} - name: Resolve triage outcome id: triage @@ -269,6 +271,7 @@ jobs: anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode,Bash(mise:*)'" display_report: true + github_token: ${{ github.token }} - name: Extract plan output id: plan_output From 5aa090ee39356ddaf67b7f179be45aa7e67421e8 Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 13:06:19 -0700 Subject: [PATCH 03/14] feat: combine triage + plan into single plan-mode prompt Merge the two claude-code-action invocations (triage + plan) into a single prompt that runs in plan mode. Claude writes both triage findings and fix plan to the plan file, which is then extracted and split into triage.md and plan.md artifacts. Benefits: - Single invocation reduces cost (~$0.60 vs $0.86) - Plan mode gives structured reasoning for fix plans - Plan content captured from file (fixes empty plan artifact) Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 9a30a28f697c --- .github/workflows/e2e-triage.yml | 74 ++++++++++++++++---------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index d59b13570..749ec44b4 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -188,7 +188,7 @@ jobs: uses: anthropics/claude-code-action@v1 with: prompt: | - Read and follow the full E2E triage procedure from .claude/skills/e2e/triage-ci.md. + Enter plan mode, then read and follow the full E2E triage procedure from .claude/skills/e2e/triage-ci.md. Inputs: - Local artifact path: ${{ steps.artifacts.outputs.path }} @@ -196,8 +196,14 @@ jobs: - SHA: ${{ needs.matrix-setup.outputs.sha }} This is a CI artifact analysis (local path provided) -- skip Steps L2-L5 and go straight to Shared Analysis. + + Write ALL output to the plan file -- both the triage findings report and the fix plan. Structure it as: + 1. Triage findings (per-test findings blocks + summary table from the skill) + 2. A "## Fix Plan" section with specific code changes for each actionable finding + + When done, call ExitPlanMode. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - claude_args: "--allowedTools 'Read,Grep,Glob'" + claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode'" display_report: true github_token: ${{ github.token }} @@ -213,14 +219,20 @@ jobs: COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} with: prompt: | - Read and follow the full E2E triage procedure from .claude/skills/e2e/triage-ci.md. + Enter plan mode, then read and follow the full E2E triage procedure from .claude/skills/e2e/triage-ci.md. Inputs: - CI run URL: ${{ env.RUN_URL }} - Agent: ${{ matrix.agent }} - SHA: ${{ needs.matrix-setup.outputs.sha }} + + Write ALL output to the plan file -- both the triage findings report and the fix plan. Structure it as: + 1. Triage findings (per-test findings blocks + summary table from the skill) + 2. A "## Fix Plan" section with specific code changes for each actionable finding + + When done, call ExitPlanMode. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - claude_args: "--allowedTools 'Read,Grep,Glob,Bash(mise:*),Bash(scripts:*)'" + claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode,Bash(mise:*),Bash(scripts:*)'" display_report: true github_token: ${{ github.token }} @@ -250,43 +262,29 @@ jobs: env: EXECUTION_FILE: ${{ steps.triage.outputs.execution_file }} TRIAGE_OUTPUT_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md + PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md run: | set -euo pipefail - mkdir -p "$(dirname "$TRIAGE_OUTPUT_FILE")" - # Extract assistant text content from execution JSON - jq -r '[.[] | select(.type == "assistant") | .message.content[] - | select(.type == "text") | .text] | join("\n")' \ - "$EXECUTION_FILE" > "$TRIAGE_OUTPUT_FILE" - - - name: Generate fix plan - id: plan - if: steps.triage.outputs.outcome == 'success' - uses: anthropics/claude-code-action@v1 - with: - prompt: | - Read and follow the fix implementation procedure from .claude/skills/e2e/implement.md. - - Read the triage findings at ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md for agent ${{ matrix.agent }}. - anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode,Bash(mise:*)'" - display_report: true - github_token: ${{ github.token }} - - name: Extract plan output - id: plan_output - if: steps.plan.outcome == 'success' && steps.plan.outputs.execution_file != '' - shell: bash - env: - EXECUTION_FILE: ${{ steps.plan.outputs.execution_file }} - PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md - run: | - set -euo pipefail + # Try to extract from plan file on disk (plan mode writes here) + plan_src=$(find /home/runner/.claude/plans -name '*.md' -type f 2>/dev/null | head -1) + if [ -n "$plan_src" ] && [ -s "$plan_src" ]; then + combined="$(cat "$plan_src")" + else + # Fallback: extract assistant text from execution JSON + combined=$(jq -r '[.[] | select(.type == "assistant") | .message.content[] + | select(.type == "text") | .text] | join("\n")' "$EXECUTION_FILE") + fi - mkdir -p "$(dirname "$PLAN_FILE")" - jq -r '[.[] | select(.type == "assistant") | .message.content[] - | select(.type == "text") | .text] | join("\n")' \ - "$EXECUTION_FILE" > "$PLAN_FILE" + # Split on "## Fix Plan" header + if echo "$combined" | grep -q "^## Fix Plan"; then + echo "$combined" | sed '/^## Fix Plan/,$d' > "$TRIAGE_OUTPUT_FILE" + echo "$combined" | sed -n '/^## Fix Plan/,$p' > "$PLAN_FILE" + else + echo "$combined" > "$TRIAGE_OUTPUT_FILE" + echo "$combined" > "$PLAN_FILE" + fi - name: Post triage completion if: ${{ always() && (steps.triage.outputs.outcome == 'success' || steps.triage.outputs.outcome == 'failure') && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} @@ -305,7 +303,7 @@ jobs: scripts/post-slack-message.sh "$message" - name: Post fix plan to Slack - if: steps.plan_output.outcome == 'success' && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' + if: steps.triage_output.outcome == 'success' && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' shell: bash env: PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md @@ -336,7 +334,7 @@ jobs: retention-days: 7 - name: Upload plan artifact - if: always() && steps.plan_output.outcome == 'success' + if: always() && steps.triage_output.outcome == 'success' uses: actions/upload-artifact@v7 with: name: e2e-plan-${{ matrix.agent }} From 357c9b34af0c119fd647ef65900ae5e3d26caac2 Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 13:24:20 -0700 Subject: [PATCH 04/14] fix: clean summary, remove duplicate artifact, add execution.json - Set display_report: false to remove tool call noise from summary tab - Add custom job summary step that shows triage + plan markdown - Remove redundant "Upload plan artifact" (triage upload has both files) - Copy execution.json to artifacts for debugging Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 609909ae979f --- .github/workflows/e2e-triage.yml | 40 +++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index 749ec44b4..5deb41323 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -204,7 +204,7 @@ jobs: When done, call ExitPlanMode. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode'" - display_report: true + display_report: false github_token: ${{ github.token }} - name: Run triage (with re-runs) @@ -233,7 +233,7 @@ jobs: When done, call ExitPlanMode. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode,Bash(mise:*),Bash(scripts:*)'" - display_report: true + display_report: false github_token: ${{ github.token }} - name: Resolve triage outcome @@ -286,6 +286,35 @@ jobs: echo "$combined" > "$PLAN_FILE" fi + # Copy execution JSON to artifacts for debugging + cp "$EXECUTION_FILE" "$(dirname "$TRIAGE_OUTPUT_FILE")/execution.json" 2>/dev/null || true + + - name: Write job summary + if: always() && steps.triage_output.outcome == 'success' + shell: bash + env: + TRIAGE_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md + PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md + run: | + set -euo pipefail + { + echo "# E2E Triage: ${{ matrix.agent }}" + echo "" + if [ -s "$TRIAGE_FILE" ]; then + cat "$TRIAGE_FILE" + else + echo "_No triage findings._" + fi + echo "" + echo "---" + echo "" + if [ -s "$PLAN_FILE" ]; then + cat "$PLAN_FILE" + else + echo "_No fix plan._" + fi + } >> "$GITHUB_STEP_SUMMARY" + - name: Post triage completion if: ${{ always() && (steps.triage.outputs.outcome == 'success' || steps.triage.outputs.outcome == 'failure') && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} shell: bash @@ -333,10 +362,3 @@ jobs: path: e2e-triage-artifacts/ retention-days: 7 - - name: Upload plan artifact - if: always() && steps.triage_output.outcome == 'success' - uses: actions/upload-artifact@v7 - with: - name: e2e-plan-${{ matrix.agent }} - path: e2e-triage-artifacts/${{ matrix.agent }}/plan.md - retention-days: 7 From 17f71a0378c88d8ee874903f0d10ccb537ad978f Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 14:17:41 -0700 Subject: [PATCH 05/14] fix: collapse triage matrix into single job, add early Slack notification, log Slack errors - Collapse per-agent matrix into single triage job so Claude can correlate failures across agents and find shared root causes - Add "triage starting" Slack notification in setup job before triage begins - Log actual Slack API error field in post-slack-message.sh (was silently swallowing the error, making failures impossible to diagnose) - Update e2e-fix workflow to download single unified triage artifact - URL-encode failed_agents in Fix It URL since it may contain commas Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 74562887fe7f --- .github/workflows/e2e-fix.yml | 20 ++--- .github/workflows/e2e-triage.yml | 124 +++++++++++++++++-------------- scripts/post-slack-message.sh | 4 +- 3 files changed, 77 insertions(+), 71 deletions(-) diff --git a/.github/workflows/e2e-fix.yml b/.github/workflows/e2e-fix.yml index c110c948b..3c42d0411 100644 --- a/.github/workflows/e2e-fix.yml +++ b/.github/workflows/e2e-fix.yml @@ -65,24 +65,18 @@ jobs: env: GH_TOKEN: ${{ github.token }} TRIAGE_RUN_ID: ${{ inputs.triage_run_id }} - FAILED_AGENTS: ${{ inputs.failed_agents }} shell: bash run: | set -euo pipefail mkdir -p triage-plans - IFS=',' read -ra agents <<< "$FAILED_AGENTS" - for agent in "${agents[@]}"; do - agent="$(echo "$agent" | xargs)" # trim whitespace - echo "Downloading plan for $agent..." - gh run download "$TRIAGE_RUN_ID" \ - --name "e2e-plan-${agent}" \ - --dir "triage-plans/${agent}" || { - echo "warning: no plan artifact found for $agent" >&2 - continue - } - done + gh run download "$TRIAGE_RUN_ID" \ + --name "e2e-triage" \ + --dir "triage-plans" || { + echo "error: no triage artifact found for run $TRIAGE_RUN_ID" >&2 + exit 1 + } echo "Downloaded plans:" find triage-plans -name '*.md' -type f @@ -92,7 +86,7 @@ jobs: uses: anthropics/claude-code-action@v1 with: prompt: | - Read the fix plans in the triage-plans/ directory. Each subdirectory contains a plan.md for one agent. + Read the fix plan in triage-plans/plan.md (and triage findings in triage-plans/triage.md for context). Execute all fixes exactly as specified in the plans. After applying fixes, run: 1. mise run fmt diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index 5deb41323..432d8f529 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -39,16 +39,22 @@ concurrency: cancel-in-progress: true jobs: - matrix-setup: + setup: runs-on: ubuntu-latest outputs: - agents: ${{ steps.set.outputs.agents }} + agents_csv: ${{ steps.set.outputs.agents_csv }} run_url: ${{ steps.set.outputs.run_url }} sha: ${{ steps.set.outputs.sha }} slack_channel: ${{ steps.set.outputs.slack_channel }} slack_thread_ts: ${{ steps.set.outputs.slack_thread_ts }} steps: - - name: Validate payload and build matrix + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 1 + sparse-checkout: scripts + + - name: Validate payload id: set shell: bash env: @@ -62,10 +68,6 @@ jobs: run: | set -euo pipefail - csv_to_json() { - printf '%s' "$1" | jq -R -s -c 'split(",") | map(gsub("^\\s+|\\s+$"; "")) | map(select(length > 0))' - } - run_url="$RUN_URL_INPUT" sha="${SHA_INPUT:-}" slack_channel="${SLACK_CHANNEL_INPUT:-}" @@ -83,16 +85,17 @@ jobs: sha=$(echo "$run_data" | jq -r '.headSha') fi if [ -z "$FAILED_AGENTS_INPUT" ]; then - agents_json=$(echo "$run_data" | jq -c '[.jobs[] + # Extract agent names from failed job names like "e2e (gemini-cli)" + agents_csv=$(echo "$run_data" | jq -r '[.jobs[] | select(.conclusion == "failure") | (.name | (try capture("\\((?[^)]+)\\)").agent catch null)) | select(. != null) - ]') + ] | join(", ")') fi fi if [ -n "$FAILED_AGENTS_INPUT" ]; then - agents_json="$(csv_to_json "$FAILED_AGENTS_INPUT")" + agents_csv="$FAILED_AGENTS_INPUT" fi if [ -z "$run_url" ]; then @@ -103,7 +106,7 @@ jobs: echo "sha is required" >&2 exit 1 fi - if [ -z "$agents_json" ] || [ "$agents_json" = "[]" ] || [ "$agents_json" = "null" ]; then + if [ -z "$agents_csv" ]; then echo "agents is required (provide failed_agents input or ensure failed job names contain '(agent-name)')" >&2 exit 1 fi @@ -112,36 +115,36 @@ jobs: echo "sha=$sha" >> "$GITHUB_OUTPUT" echo "slack_channel=$slack_channel" >> "$GITHUB_OUTPUT" echo "slack_thread_ts=$slack_thread_ts" >> "$GITHUB_OUTPUT" - echo "agents=$agents_json" >> "$GITHUB_OUTPUT" + echo "agents_csv=$agents_csv" >> "$GITHUB_OUTPUT" + + - name: Post triage starting + if: ${{ env.SLACK_BOT_TOKEN != '' && steps.set.outputs.slack_channel != '' && steps.set.outputs.slack_thread_ts != '' }} + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + SLACK_CHANNEL: ${{ steps.set.outputs.slack_channel }} + SLACK_THREAD_TS: ${{ steps.set.outputs.slack_thread_ts }} + AGENTS_CSV: ${{ steps.set.outputs.agents_csv }} + TRIAGE_RUN_URL: ${{ steps.set.outputs.run_url }} + shell: bash + run: | + scripts/post-slack-message.sh "Starting E2E triage for \`${AGENTS_CSV}\` on <${TRIAGE_RUN_URL}|this run>." triage: - needs: [matrix-setup] + needs: [setup] runs-on: ubuntu-latest timeout-minutes: ${{ inputs.rerun == true && 90 || 45 }} env: - RUN_URL: ${{ needs.matrix-setup.outputs.run_url }} - E2E_AGENT: ${{ matrix.agent }} + RUN_URL: ${{ needs.setup.outputs.run_url }} + FAILED_AGENTS: ${{ needs.setup.outputs.agents_csv }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - SLACK_CHANNEL: ${{ needs.matrix-setup.outputs.slack_channel }} - SLACK_THREAD_TS: ${{ needs.matrix-setup.outputs.slack_thread_ts }} - strategy: - fail-fast: false - matrix: - agent: ${{ fromJson(needs.matrix-setup.outputs.agents) }} + SLACK_CHANNEL: ${{ needs.setup.outputs.slack_channel }} + SLACK_THREAD_TS: ${{ needs.setup.outputs.slack_thread_ts }} steps: - name: Checkout repository uses: actions/checkout@v6 with: fetch-depth: 1 - - name: Post triage started - if: ${{ env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} - shell: bash - run: | - set -euo pipefail - - scripts/post-slack-message.sh "Starting E2E triage for \`$E2E_AGENT\` on <$RUN_URL|this run>." - - name: Setup mise uses: jdx/mise-action@v4 @@ -158,22 +161,26 @@ jobs: if: inputs.rerun == true run: sudo apt-get update && sudo apt-get install -y tmux - - name: Install agent CLI + - name: Install agent CLIs if: inputs.rerun == true run: | - case "${{ matrix.agent }}" in - claude-code) curl -fsSL https://claude.ai/install.sh | bash ;; - opencode) curl -fsSL https://opencode.ai/install | bash ;; - gemini-cli) npm install -g @google/gemini-cli ;; - cursor-cli) curl https://cursor.com/install -fsS | bash ;; - factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;; - copilot-cli) npm install -g @github/copilot ;; - roger-roger) ;; # installed by mise (see mise.toml) - esac + IFS=',' read -ra agents <<< "$FAILED_AGENTS" + for agent in "${agents[@]}"; do + agent="$(echo "$agent" | xargs)" + case "$agent" in + claude-code) curl -fsSL https://claude.ai/install.sh | bash ;; + opencode) curl -fsSL https://opencode.ai/install | bash ;; + gemini-cli) npm install -g @google/gemini-cli ;; + cursor-cli) curl https://cursor.com/install -fsS | bash ;; + factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;; + copilot-cli) npm install -g @github/copilot ;; + roger-roger) ;; # installed by mise (see mise.toml) + esac + done echo "$HOME/.local/bin" >> $GITHUB_PATH - - name: Bootstrap agent - if: inputs.rerun == true && matrix.agent != 'roger-roger' + - name: Bootstrap agents + if: inputs.rerun == true env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} @@ -192,10 +199,11 @@ jobs: Inputs: - Local artifact path: ${{ steps.artifacts.outputs.path }} - - Agent: ${{ matrix.agent }} - - SHA: ${{ needs.matrix-setup.outputs.sha }} + - Failed agents: ${{ env.FAILED_AGENTS }} + - SHA: ${{ needs.setup.outputs.sha }} This is a CI artifact analysis (local path provided) -- skip Steps L2-L5 and go straight to Shared Analysis. + Analyze ALL failed agents together -- failures may share a common root cause. Write ALL output to the plan file -- both the triage findings report and the fix plan. Structure it as: 1. Triage findings (per-test findings blocks + summary table from the skill) @@ -223,8 +231,10 @@ jobs: Inputs: - CI run URL: ${{ env.RUN_URL }} - - Agent: ${{ matrix.agent }} - - SHA: ${{ needs.matrix-setup.outputs.sha }} + - Failed agents: ${{ env.FAILED_AGENTS }} + - SHA: ${{ needs.setup.outputs.sha }} + + Analyze ALL failed agents together -- failures may share a common root cause. Write ALL output to the plan file -- both the triage findings report and the fix plan. Structure it as: 1. Triage findings (per-test findings blocks + summary table from the skill) @@ -261,8 +271,8 @@ jobs: shell: bash env: EXECUTION_FILE: ${{ steps.triage.outputs.execution_file }} - TRIAGE_OUTPUT_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md - PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md + TRIAGE_OUTPUT_FILE: ${{ github.workspace }}/e2e-triage-artifacts/triage.md + PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/plan.md run: | set -euo pipefail mkdir -p "$(dirname "$TRIAGE_OUTPUT_FILE")" @@ -293,12 +303,12 @@ jobs: if: always() && steps.triage_output.outcome == 'success' shell: bash env: - TRIAGE_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md - PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md + TRIAGE_FILE: ${{ github.workspace }}/e2e-triage-artifacts/triage.md + PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/plan.md run: | set -euo pipefail { - echo "# E2E Triage: ${{ matrix.agent }}" + echo "# E2E Triage: ${FAILED_AGENTS}" echo "" if [ -s "$TRIAGE_FILE" ]; then cat "$TRIAGE_FILE" @@ -324,9 +334,9 @@ jobs: set -euo pipefail if [ "$TRIAGE_OUTCOME" = "success" ]; then - message="E2E triage complete for \`$E2E_AGENT\`." + message="E2E triage complete for \`$FAILED_AGENTS\`." else - message="E2E triage failed for \`$E2E_AGENT\`." + message="E2E triage failed for \`$FAILED_AGENTS\`." fi scripts/post-slack-message.sh "$message" @@ -335,7 +345,7 @@ jobs: if: steps.triage_output.outcome == 'success' && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' shell: bash env: - PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md + PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/plan.md TRIAGE_RUN_ID: ${{ github.run_id }} run: | set -euo pipefail @@ -345,9 +355,10 @@ jobs: # Construct Fix It URL encoded_run_url="$(python3 -c "import urllib.parse, os; print(urllib.parse.quote(os.environ['RUN_URL'], safe=''))")" - fix_url="https://e2e-triage.entireio.workers.dev/fix?triage_run_id=${TRIAGE_RUN_ID}&run_url=${encoded_run_url}&failed_agents=${E2E_AGENT}&slack_channel=${SLACK_CHANNEL}&slack_thread_ts=${SLACK_THREAD_TS}" + encoded_agents="$(python3 -c "import urllib.parse, os; print(urllib.parse.quote(os.environ['FAILED_AGENTS'], safe=''))")" + fix_url="https://e2e-triage.entireio.workers.dev/fix?triage_run_id=${TRIAGE_RUN_ID}&run_url=${encoded_run_url}&failed_agents=${encoded_agents}&slack_channel=${SLACK_CHANNEL}&slack_thread_ts=${SLACK_THREAD_TS}" - message="Fix plan ready for \`$E2E_AGENT\`: + message="Fix plan ready for \`$FAILED_AGENTS\`: ${summary} <${fix_url}|Fix It> — applies the plan and creates a draft PR" @@ -358,7 +369,6 @@ jobs: if: always() uses: actions/upload-artifact@v7 with: - name: e2e-triage-${{ matrix.agent }} + name: e2e-triage path: e2e-triage-artifacts/ retention-days: 7 - diff --git a/scripts/post-slack-message.sh b/scripts/post-slack-message.sh index 76546dc0b..04af6a381 100755 --- a/scripts/post-slack-message.sh +++ b/scripts/post-slack-message.sh @@ -20,5 +20,7 @@ if ! response="$(curl -fsS https://slack.com/api/chat.postMessage \ fi if ! jq -e '.ok == true' >/dev/null <<<"$response"; then - echo "warning: slack notification returned non-ok response" >&2 + error="$(jq -r '.error // "unknown"' <<<"$response")" + echo "warning: slack notification returned non-ok response: $error" >&2 + echo "warning: full response: $response" >&2 fi From f6358265b4d2d90040baee9d5d8fe83a1d7cb984 Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 14:36:54 -0700 Subject: [PATCH 06/14] fix: move SLACK_BOT_TOKEN to job-level env so if-condition can evaluate it The "Post triage starting" step was silently skipped because env.SLACK_BOT_TOKEN was only set at the step level, but GitHub Actions evaluates if: conditions before step env is applied. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 585bce40d0d2 --- .github/workflows/e2e-triage.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index 432d8f529..a6541469c 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -41,6 +41,8 @@ concurrency: jobs: setup: runs-on: ubuntu-latest + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} outputs: agents_csv: ${{ steps.set.outputs.agents_csv }} run_url: ${{ steps.set.outputs.run_url }} @@ -120,7 +122,6 @@ jobs: - name: Post triage starting if: ${{ env.SLACK_BOT_TOKEN != '' && steps.set.outputs.slack_channel != '' && steps.set.outputs.slack_thread_ts != '' }} env: - SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_CHANNEL: ${{ steps.set.outputs.slack_channel }} SLACK_THREAD_TS: ${{ steps.set.outputs.slack_thread_ts }} AGENTS_CSV: ${{ steps.set.outputs.agents_csv }} From 7f0641bba646a7b344bb204d08f692f5c49ad94e Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 14:58:11 -0700 Subject: [PATCH 07/14] fix: normalize Slack thread_ts when dot is stripped by dispatch pipeline Slack thread_ts must be in dot-decimal format (e.g., "1482960137.003543"). The dot gets stripped somewhere in the dispatch pipeline (URL encoding or GitHub Actions numeric coercion), causing Slack to reject with invalid_thread_ts. Re-insert the dot assuming 6 decimal places. Ref: https://api.slack.com/messaging/retrieving Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 0009b566bbbc --- .github/workflows/e2e-triage.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index a6541469c..824329cf1 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -75,6 +75,13 @@ jobs: slack_channel="${SLACK_CHANNEL_INPUT:-}" slack_thread_ts="${SLACK_THREAD_TS_INPUT:-}" + # Normalize thread_ts — re-insert dot if stripped (always 6 decimal places) + # Slack ts format is "seconds.microseconds" e.g. "1482960137.003543" + # See: https://api.slack.com/messaging/retrieving + if [ -n "$slack_thread_ts" ] && ! echo "$slack_thread_ts" | grep -q '\.'; then + slack_thread_ts="$(echo "$slack_thread_ts" | sed 's/\(.*\)\(.\{6\}\)$/\1.\2/')" + fi + # Derive missing values from run URL via GitHub API if [ -z "$sha" ] || [ -z "$FAILED_AGENTS_INPUT" ]; then run_id=$(echo "$run_url" | grep -oE '/runs/[0-9]+' | grep -oE '[0-9]+') From 7b20a89fac1a71b31922a4c20fd03d6c25ed4ccd Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 15:13:55 -0700 Subject: [PATCH 08/14] feat: clean up triage Slack messages with Block Kit button - Simplify "triage started" message: remove agent list, link to triage run - Merge "triage completion" and "fix plan" steps into single "triage result" - Use Block Kit with green "Fix It" button instead of plain text link - Remove raw plan/markdown dump from Slack thread - Add --payload flag to post-slack-message.sh for Block Kit support Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: be8091cb744b --- .github/workflows/e2e-triage.yml | 69 +++++++++++++++++++------------- scripts/post-slack-message.sh | 25 +++++++++--- 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index 824329cf1..8f97f942d 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -131,11 +131,10 @@ jobs: env: SLACK_CHANNEL: ${{ steps.set.outputs.slack_channel }} SLACK_THREAD_TS: ${{ steps.set.outputs.slack_thread_ts }} - AGENTS_CSV: ${{ steps.set.outputs.agents_csv }} - TRIAGE_RUN_URL: ${{ steps.set.outputs.run_url }} + TRIAGE_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} shell: bash run: | - scripts/post-slack-message.sh "Starting E2E triage for \`${AGENTS_CSV}\` on <${TRIAGE_RUN_URL}|this run>." + scripts/post-slack-message.sh ":hourglass_flowing_sand: E2E triage started — <${TRIAGE_RUN_URL}|view triage run>" triage: needs: [setup] @@ -333,45 +332,61 @@ jobs: fi } >> "$GITHUB_STEP_SUMMARY" - - name: Post triage completion + - name: Post triage result if: ${{ always() && (steps.triage.outputs.outcome == 'success' || steps.triage.outputs.outcome == 'failure') && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} shell: bash env: TRIAGE_OUTCOME: ${{ steps.triage.outputs.outcome }} + TRIAGE_OUTPUT_OUTCOME: ${{ steps.triage_output.outcome }} + TRIAGE_RUN_ID: ${{ github.run_id }} + SUMMARY_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | set -euo pipefail - if [ "$TRIAGE_OUTCOME" = "success" ]; then - message="E2E triage complete for \`$FAILED_AGENTS\`." - else - message="E2E triage failed for \`$FAILED_AGENTS\`." + if [ "$TRIAGE_OUTCOME" != "success" ]; then + scripts/post-slack-message.sh ":x: E2E triage failed — <${SUMMARY_URL}|view run>" + exit 0 fi - scripts/post-slack-message.sh "$message" - - - name: Post fix plan to Slack - if: steps.triage_output.outcome == 'success' && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' - shell: bash - env: - PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/plan.md - TRIAGE_RUN_ID: ${{ github.run_id }} - run: | - set -euo pipefail - - # Extract first few lines as summary - summary="$(head -20 "$PLAN_FILE" | sed '/^$/d' | head -5)" + if [ "$TRIAGE_OUTPUT_OUTCOME" != "success" ]; then + scripts/post-slack-message.sh ":warning: E2E triage completed but output extraction failed — <${SUMMARY_URL}|view run>" + exit 0 + fi # Construct Fix It URL encoded_run_url="$(python3 -c "import urllib.parse, os; print(urllib.parse.quote(os.environ['RUN_URL'], safe=''))")" encoded_agents="$(python3 -c "import urllib.parse, os; print(urllib.parse.quote(os.environ['FAILED_AGENTS'], safe=''))")" fix_url="https://e2e-triage.entireio.workers.dev/fix?triage_run_id=${TRIAGE_RUN_ID}&run_url=${encoded_run_url}&failed_agents=${encoded_agents}&slack_channel=${SLACK_CHANNEL}&slack_thread_ts=${SLACK_THREAD_TS}" - message="Fix plan ready for \`$FAILED_AGENTS\`: - ${summary} - - <${fix_url}|Fix It> — applies the plan and creates a draft PR" - - scripts/post-slack-message.sh "$message" + # Block Kit payload with button + payload="$(jq -n \ + --arg summary_url "$SUMMARY_URL" \ + --arg fix_url "$fix_url" \ + '{ + text: "E2E triage complete", + blocks: [ + { + type: "section", + text: { + type: "mrkdwn", + text: (":white_check_mark: E2E triage complete — <" + $summary_url + "|view results>") + } + }, + { + type: "actions", + elements: [ + { + type: "button", + text: { type: "plain_text", text: "Fix It" }, + url: $fix_url, + style: "primary" + } + ] + } + ] + }')" + + scripts/post-slack-message.sh --payload "$payload" - name: Upload triage output if: always() diff --git a/scripts/post-slack-message.sh b/scripts/post-slack-message.sh index 04af6a381..5a3aab297 100755 --- a/scripts/post-slack-message.sh +++ b/scripts/post-slack-message.sh @@ -3,13 +3,26 @@ set -euo pipefail # Post a message to a Slack thread using the chat.postMessage API. # Requires SLACK_BOT_TOKEN, SLACK_CHANNEL, and SLACK_THREAD_TS env vars. +# +# Usage: +# post-slack-message.sh "plain text message" +# post-slack-message.sh --payload '{"text":"fallback","blocks":[...]}' +# +# With --payload, channel and thread_ts are injected automatically. -text="${1:?message is required}" -payload="$(jq -n \ - --arg channel "$SLACK_CHANNEL" \ - --arg thread_ts "$SLACK_THREAD_TS" \ - --arg text "$text" \ - '{channel: $channel, thread_ts: $thread_ts, text: $text}')" +if [ "$1" = "--payload" ]; then + payload="$(echo "$2" | jq \ + --arg channel "$SLACK_CHANNEL" \ + --arg thread_ts "$SLACK_THREAD_TS" \ + '. + {channel: $channel, thread_ts: $thread_ts}')" +else + text="${1:?message is required}" + payload="$(jq -n \ + --arg channel "$SLACK_CHANNEL" \ + --arg thread_ts "$SLACK_THREAD_TS" \ + --arg text "$text" \ + '{channel: $channel, thread_ts: $thread_ts, text: $text}')" +fi if ! response="$(curl -fsS https://slack.com/api/chat.postMessage \ -H "Authorization: Bearer ${SLACK_BOT_TOKEN}" \ From 5a2f90e3d54fa29dce273145d6e0e34ef17aa1a4 Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 15:23:06 -0700 Subject: [PATCH 09/14] fix: add context line explaining Fix It button creates a draft PR Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 6ce3944ef17d --- .github/workflows/e2e-triage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index 8f97f942d..57d63f2e4 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -369,7 +369,7 @@ jobs: type: "section", text: { type: "mrkdwn", - text: (":white_check_mark: E2E triage complete — <" + $summary_url + "|view results>") + text: (":white_check_mark: E2E triage complete — <" + $summary_url + "|view results>\nClick *Fix It* to apply the plan and create a draft PR.") } }, { From 5a1a9901359c3d8763a405c097e92db6b37304d5 Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 15:31:32 -0700 Subject: [PATCH 10/14] feat: clean up e2e-fix Slack messages, add thread_ts normalization - Add thread_ts dot normalization (same fix as triage workflow) - Simplify "fix started" message with emoji, link to fix run - Success: broadcast ":review: E2E fix applied: " to channel using reply_broadcast + unfurl_links - Failure: ":x: E2E fix failed" with run link (thread only) Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: dc9d1a373abd --- .github/workflows/e2e-fix.yml | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/.github/workflows/e2e-fix.yml b/.github/workflows/e2e-fix.yml index 3c42d0411..6568f2e0e 100644 --- a/.github/workflows/e2e-fix.yml +++ b/.github/workflows/e2e-fix.yml @@ -48,15 +48,22 @@ jobs: with: fetch-depth: 0 + - name: Normalize Slack thread_ts + shell: bash + run: | + ts="${SLACK_THREAD_TS}" + if [ -n "$ts" ] && ! echo "$ts" | grep -q '\.'; then + ts="$(echo "$ts" | sed 's/\(.*\)\(.\{6\}\)$/\1.\2/')" + echo "SLACK_THREAD_TS=$ts" >> "$GITHUB_ENV" + fi + - name: Post fix started if: ${{ env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} shell: bash env: - FAILED_AGENTS: ${{ inputs.failed_agents }} + FIX_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | - set -euo pipefail - - scripts/post-slack-message.sh "Starting E2E fix for \`${FAILED_AGENTS}\`." + scripts/post-slack-message.sh ":hourglass_flowing_sand: Applying E2E fix — <${FIX_RUN_URL}|view fix run>" - name: Setup mise uses: jdx/mise-action@v4 @@ -110,25 +117,25 @@ jobs: run: | set -euo pipefail - # Find the draft PR URL from the fix step output pr_url="$(gh pr list --head "$FIX_BRANCH" --json url -q '.[0].url' 2>/dev/null || true)" if [ -n "$pr_url" ]; then - message="E2E fix complete — draft PR ready: <${pr_url}|Review PR>" + payload="$(jq -n \ + --arg pr_url "$pr_url" \ + '{ + text: (":review: E2E fix applied: " + $pr_url), + reply_broadcast: true, + unfurl_links: true + }')" + scripts/post-slack-message.sh --payload "$payload" else - message="E2E fix complete — changes applied but no PR was created. Check the <${RUN_URL}|workflow run> for details." + scripts/post-slack-message.sh ":warning: E2E fix complete but no PR was created — <${RUN_URL}|view run>" fi - scripts/post-slack-message.sh "$message" - - name: Post failure to Slack if: failure() && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' shell: bash env: RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | - set -euo pipefail - - message="E2E fix failed. Check the <${RUN_URL}|workflow run> for details." - - scripts/post-slack-message.sh "$message" + scripts/post-slack-message.sh ":x: E2E fix failed — <${RUN_URL}|view run>" From 14b0f2c2f3e6076d20d8f937ffbe835e7620fd52 Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 16:13:59 -0700 Subject: [PATCH 11/14] feat: add E2E test verification with retry to fix workflow After applying fixes, the workflow now installs agent CLIs and runs the actual failing E2E tests twice per agent to confirm the fix works. If verification fails, Claude Code gets one more attempt with the failure output as context, then tests run again. PR is only created after E2E verification passes, and Slack messages now report verification status (attempt count, pass/fail). Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 8eae20f22eab --- .github/workflows/e2e-fix.yml | 183 ++++++++++++++++++++++++++++++---- scripts/verify-e2e-tests.sh | 41 ++++++++ 2 files changed, 203 insertions(+), 21 deletions(-) create mode 100755 scripts/verify-e2e-tests.sh diff --git a/.github/workflows/e2e-fix.yml b/.github/workflows/e2e-fix.yml index 6568f2e0e..1ffa1e365 100644 --- a/.github/workflows/e2e-fix.yml +++ b/.github/workflows/e2e-fix.yml @@ -37,7 +37,7 @@ concurrency: jobs: fix: runs-on: ubuntu-latest - timeout-minutes: 30 + timeout-minutes: 90 env: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_CHANNEL: ${{ inputs.slack_channel }} @@ -100,42 +100,183 @@ jobs: 2. mise run lint 3. mise run test:e2e:canary - If verification passes, create a git branch fix/e2e-${{ github.run_id }}, commit all changes, - push, and create a draft PR with a summary of what was fixed. - If verification fails, fix the issues and retry. Do not give up without attempting to fix lint/format errors. + + Do NOT create a git branch or PR yet — E2E verification will happen in a later step. + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: "--allowedTools 'Edit,Write,Read,Glob,Grep,Bash(git:*),Bash(mise:*),Bash(gh:*)'" + + - name: Install system dependencies + run: sudo apt-get update && sudo apt-get install -y tmux + + - name: Install agent CLIs + env: + FAILED_AGENTS: ${{ inputs.failed_agents }} + run: | + for agent in $(echo "$FAILED_AGENTS" | tr ',' ' ' | xargs); do + case "$agent" in + claude-code) curl -fsSL https://claude.ai/install.sh | bash ;; + opencode) curl -fsSL https://opencode.ai/install | bash ;; + gemini-cli) npm install -g @google/gemini-cli ;; + cursor-cli) curl https://cursor.com/install -fsS | bash ;; + factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;; + copilot-cli) npm install -g @github/copilot ;; + roger-roger) ;; # installed by mise + esac + done + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Bootstrap agents + env: + FAILED_AGENTS: ${{ inputs.failed_agents }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} + FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + run: | + # Skip bootstrap if only roger-roger + agents=$(echo "$FAILED_AGENTS" | tr ',' '\n' | sed 's/^ *//;s/ *$//' | grep -v '^roger-roger$' || true) + if [ -n "$agents" ]; then + go run ./e2e/bootstrap + fi + + - name: Verify E2E tests (attempt 1) + id: verify1 + continue-on-error: true + env: + FAILED_AGENTS: ${{ inputs.failed_agents }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} + FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + run: scripts/verify-e2e-tests.sh 1 e2e-verify-output.txt + + - name: Retry fix with failure context + id: fix2 + if: steps.verify1.outcome == 'failure' + uses: anthropics/claude-code-action@v1 + with: + prompt: | + The previous E2E fix was applied but the E2E tests still fail. + Read the test failure output in e2e-verify-output.txt. + + Also read the original triage findings in triage-plans/triage.md and fix plan in triage-plans/plan.md for context. + + Diagnose why the tests are still failing and apply additional fixes. After fixing, run: + 1. mise run fmt + 2. mise run lint + 3. mise run test:e2e:canary + + Do NOT create a git branch or PR yet. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} claude_args: "--allowedTools 'Edit,Write,Read,Glob,Grep,Bash(git:*),Bash(mise:*),Bash(gh:*)'" - - name: Post success to Slack - if: success() && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' + - name: Clean artifacts between attempts + if: steps.verify1.outcome == 'failure' + run: rm -rf e2e/artifacts/ + + - name: Verify E2E tests (attempt 2) + id: verify2 + if: steps.verify1.outcome == 'failure' + continue-on-error: true + env: + FAILED_AGENTS: ${{ inputs.failed_agents }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} + FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + run: scripts/verify-e2e-tests.sh 2 e2e-verify-output-2.txt + + - name: Create fix PR + id: create_pr + if: steps.verify1.outcome == 'success' || steps.verify2.outcome == 'success' + env: + GH_TOKEN: ${{ github.token }} + FIX_BRANCH: fix/e2e-${{ github.run_id }} + ORIGINAL_RUN_URL: ${{ inputs.run_url }} + FAILED_AGENTS: ${{ inputs.failed_agents }} + TRIAGE_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ inputs.triage_run_id }} + VERIFY1_OUTCOME: ${{ steps.verify1.outcome }} + run: | + set -euo pipefail + + attempt="1" + if [ "$VERIFY1_OUTCOME" != "success" ]; then + attempt="2" + fi + + git checkout -b "$FIX_BRANCH" + git add -A + git commit -m "$(cat < + COMMIT_EOF + )" + git push -u origin "$FIX_BRANCH" + + gh pr create --draft \ + --title "fix: resolve E2E test failures" \ + --body "$(cat </dev/null || true)" + if [ "$FIX_OUTCOME" != "success" ]; then + scripts/post-slack-message.sh ":x: E2E fix failed to apply — <${RUN_URL}|view run>" + exit 0 + fi + + if [ "$VERIFY1_OUTCOME" = "success" ] || [ "$VERIFY2_OUTCOME" = "success" ]; then + pr_url="$(gh pr list --head "$FIX_BRANCH" --json url -q '.[0].url' 2>/dev/null || true)" + attempt="1" + [ "$VERIFY1_OUTCOME" != "success" ] && attempt="2" - if [ -n "$pr_url" ]; then - payload="$(jq -n \ - --arg pr_url "$pr_url" \ - '{ - text: (":review: E2E fix applied: " + $pr_url), + if [ -n "$pr_url" ]; then + payload="$(jq -n --arg pr_url "$pr_url" --arg attempt "$attempt" '{ + text: (":white_check_mark: E2E fix verified (" + $attempt + "/2 attempts, 2/2 test passes): " + $pr_url), reply_broadcast: true, unfurl_links: true }')" - scripts/post-slack-message.sh --payload "$payload" + scripts/post-slack-message.sh --payload "$payload" + else + scripts/post-slack-message.sh ":warning: E2E fix verified but no PR created — <${RUN_URL}|view run>" + fi else - scripts/post-slack-message.sh ":warning: E2E fix complete but no PR was created — <${RUN_URL}|view run>" + scripts/post-slack-message.sh ":x: E2E fix failed verification after 2 attempts — <${RUN_URL}|view run>" fi - - name: Post failure to Slack - if: failure() && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' - shell: bash - env: - RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - run: | - scripts/post-slack-message.sh ":x: E2E fix failed — <${RUN_URL}|view run>" + - name: Upload verification artifacts + if: always() + uses: actions/upload-artifact@v7 + with: + name: e2e-fix-verification + path: | + e2e/artifacts/ + e2e-verify-output*.txt + retention-days: 7 diff --git a/scripts/verify-e2e-tests.sh b/scripts/verify-e2e-tests.sh new file mode 100755 index 000000000..67ae12239 --- /dev/null +++ b/scripts/verify-e2e-tests.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# verify-e2e-tests.sh — Run E2E tests twice per agent to verify a fix. +# +# Usage: verify-e2e-tests.sh +# attempt - attempt number (for log messages) +# output_file - file to append test output to +# +# Required env vars: +# FAILED_AGENTS - comma-separated list of agents to verify + +set -euo pipefail + +attempt="${1:?usage: verify-e2e-tests.sh }" +output_file="${2:?usage: verify-e2e-tests.sh }" + +failed="" +for agent in $(echo "$FAILED_AGENTS" | tr ',' ' ' | xargs); do + limit="" + test_filter="" + case "$agent" in + gemini-cli) limit="6" ;; + factoryai-droid) limit="1" ;; + roger-roger) test_filter="TestExternalAgent" ;; + esac + export E2E_CONCURRENT_TEST_LIMIT="$limit" + + for run in 1 2; do + echo "=== $agent: verification run $run/2 (attempt $attempt) ===" + if ! mise run test:e2e --agent "$agent" ${test_filter:+"$test_filter"} 2>&1 | tee -a "$output_file"; then + failed="$failed $agent(run$run)" + echo "=== $agent: run $run FAILED ===" + break + fi + echo "=== $agent: run $run passed ===" + done +done + +if [ -n "$failed" ]; then + echo "FAILED:$failed" >> "$output_file" + exit 1 +fi From 7aad3510663adef75a0869dfe1ffa68124af5e4d Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 16:25:25 -0700 Subject: [PATCH 12/14] feat: auto-detect run_url and failed_agents from triage artifacts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fix workflow now only requires triage_run_id — run_url and failed_agents are auto-detected from metadata.json in the triage artifacts. Explicit inputs still take precedence for backward compatibility with the Slack "Fix It" button. The triage workflow now writes metadata.json (run_url, failed_agents, sha) alongside its existing plan/triage artifacts. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: ce6d58f81698 --- .github/workflows/e2e-fix.yml | 56 +++++++++++++++++++++++++------- .github/workflows/e2e-triage.yml | 15 +++++++++ 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/.github/workflows/e2e-fix.yml b/.github/workflows/e2e-fix.yml index 1ffa1e365..b218f57f1 100644 --- a/.github/workflows/e2e-fix.yml +++ b/.github/workflows/e2e-fix.yml @@ -8,12 +8,12 @@ on: required: true type: string run_url: - description: Original failed E2E run URL - required: true + description: Original failed E2E run URL (auto-detected from triage artifacts if omitted) + required: false type: string failed_agents: - description: Comma-separated list of agents to fix - required: true + description: Comma-separated list of agents to fix (auto-detected from triage artifacts if omitted) + required: false type: string slack_channel: description: Slack channel ID for thread replies @@ -86,7 +86,41 @@ jobs: } echo "Downloaded plans:" - find triage-plans -name '*.md' -type f + find triage-plans -type f + + - name: Resolve inputs from triage metadata + shell: bash + env: + INPUT_RUN_URL: ${{ inputs.run_url }} + INPUT_FAILED_AGENTS: ${{ inputs.failed_agents }} + run: | + set -euo pipefail + + meta="triage-plans/metadata.json" + if [ ! -f "$meta" ]; then + if [ -z "$INPUT_RUN_URL" ] || [ -z "$INPUT_FAILED_AGENTS" ]; then + echo "error: run_url and failed_agents are required when triage artifacts lack metadata.json" >&2 + exit 1 + fi + echo "RESOLVED_RUN_URL=$INPUT_RUN_URL" >> "$GITHUB_ENV" + echo "RESOLVED_FAILED_AGENTS=$INPUT_FAILED_AGENTS" >> "$GITHUB_ENV" + exit 0 + fi + + run_url="${INPUT_RUN_URL:-$(jq -r '.run_url' "$meta")}" + failed_agents="${INPUT_FAILED_AGENTS:-$(jq -r '.failed_agents' "$meta")}" + + if [ -z "$run_url" ] || [ "$run_url" = "null" ]; then + echo "error: could not determine run_url" >&2 + exit 1 + fi + if [ -z "$failed_agents" ] || [ "$failed_agents" = "null" ]; then + echo "error: could not determine failed_agents" >&2 + exit 1 + fi + + echo "RESOLVED_RUN_URL=$run_url" >> "$GITHUB_ENV" + echo "RESOLVED_FAILED_AGENTS=$failed_agents" >> "$GITHUB_ENV" - name: Apply fixes id: fix @@ -111,7 +145,7 @@ jobs: - name: Install agent CLIs env: - FAILED_AGENTS: ${{ inputs.failed_agents }} + FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }} run: | for agent in $(echo "$FAILED_AGENTS" | tr ',' ' ' | xargs); do case "$agent" in @@ -128,7 +162,7 @@ jobs: - name: Bootstrap agents env: - FAILED_AGENTS: ${{ inputs.failed_agents }} + FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} @@ -145,7 +179,7 @@ jobs: id: verify1 continue-on-error: true env: - FAILED_AGENTS: ${{ inputs.failed_agents }} + FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} @@ -182,7 +216,7 @@ jobs: if: steps.verify1.outcome == 'failure' continue-on-error: true env: - FAILED_AGENTS: ${{ inputs.failed_agents }} + FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} @@ -196,8 +230,8 @@ jobs: env: GH_TOKEN: ${{ github.token }} FIX_BRANCH: fix/e2e-${{ github.run_id }} - ORIGINAL_RUN_URL: ${{ inputs.run_url }} - FAILED_AGENTS: ${{ inputs.failed_agents }} + ORIGINAL_RUN_URL: ${{ env.RESOLVED_RUN_URL }} + FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }} TRIAGE_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ inputs.triage_run_id }} VERIFY1_OUTCOME: ${{ steps.verify1.outcome }} run: | diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index 57d63f2e4..349ec832d 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -388,6 +388,21 @@ jobs: scripts/post-slack-message.sh --payload "$payload" + - name: Write triage metadata + if: always() + shell: bash + env: + META_RUN_URL: ${{ needs.setup.outputs.run_url }} + META_AGENTS_CSV: ${{ needs.setup.outputs.agents_csv }} + META_SHA: ${{ needs.setup.outputs.sha }} + run: | + jq -n \ + --arg run_url "$META_RUN_URL" \ + --arg failed_agents "$META_AGENTS_CSV" \ + --arg sha "$META_SHA" \ + '{run_url: $run_url, failed_agents: $failed_agents, sha: $sha}' \ + > e2e-triage-artifacts/metadata.json + - name: Upload triage output if: always() uses: actions/upload-artifact@v7 From 4b7406e526c3e4fe8f5c6db674086d9dc2ddb65d Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 16:37:31 -0700 Subject: [PATCH 13/14] fix: normalize triage_run_id when a full URL is passed gh run download expects a numeric run ID but the Cloudflare Worker or manual trigger may pass a full GitHub Actions URL. Extract the numeric ID from the URL before calling gh run download. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 0c1ad5d2c425 --- .github/workflows/e2e-fix.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/e2e-fix.yml b/.github/workflows/e2e-fix.yml index b218f57f1..39fe72054 100644 --- a/.github/workflows/e2e-fix.yml +++ b/.github/workflows/e2e-fix.yml @@ -78,6 +78,15 @@ jobs: mkdir -p triage-plans + # Normalize: extract numeric ID if a full URL was passed + if echo "$TRIAGE_RUN_ID" | grep -qE '^https?://'; then + TRIAGE_RUN_ID=$(echo "$TRIAGE_RUN_ID" | grep -oE '/runs/[0-9]+' | grep -oE '[0-9]+') + if [ -z "$TRIAGE_RUN_ID" ]; then + echo "error: could not extract run ID from triage_run_id URL" >&2 + exit 1 + fi + fi + gh run download "$TRIAGE_RUN_ID" \ --name "e2e-triage" \ --dir "triage-plans" || { From 70ca9ed5ae2e99282eedd47cda7e27ca8872318c Mon Sep 17 00:00:00 2001 From: Alisha Kawaguchi Date: Mon, 23 Mar 2026 17:34:29 -0700 Subject: [PATCH 14/14] fix: add github_token to claude-code-action steps The action validates that the workflow file matches the default branch. Passing github_token allows it to authenticate and run on feature branches. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 887ffbfe48ff --- .github/workflows/e2e-fix.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/e2e-fix.yml b/.github/workflows/e2e-fix.yml index 39fe72054..a559ff77f 100644 --- a/.github/workflows/e2e-fix.yml +++ b/.github/workflows/e2e-fix.yml @@ -147,6 +147,7 @@ jobs: Do NOT create a git branch or PR yet — E2E verification will happen in a later step. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + github_token: ${{ github.token }} claude_args: "--allowedTools 'Edit,Write,Read,Glob,Grep,Bash(git:*),Bash(mise:*),Bash(gh:*)'" - name: Install system dependencies @@ -214,6 +215,7 @@ jobs: Do NOT create a git branch or PR yet. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + github_token: ${{ github.token }} claude_args: "--allowedTools 'Edit,Write,Read,Glob,Grep,Bash(git:*),Bash(mise:*),Bash(gh:*)'" - name: Clean artifacts between attempts