Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
271 changes: 229 additions & 42 deletions .github/workflows/e2e-fix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ on:
required: true
type: string
run_url:
description: Original failed E2E run URL
required: true
description: Original failed E2E run URL (auto-detected from triage artifacts if omitted)
required: false
type: string
failed_agents:
description: Comma-separated list of agents to fix
required: true
description: Comma-separated list of agents to fix (auto-detected from triage artifacts if omitted)
required: false
type: string
slack_channel:
description: Slack channel ID for thread replies
Expand All @@ -37,7 +37,7 @@ concurrency:
jobs:
fix:
runs-on: ubuntu-latest
timeout-minutes: 30
timeout-minutes: 90
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
SLACK_CHANNEL: ${{ inputs.slack_channel }}
Expand All @@ -48,15 +48,22 @@ jobs:
with:
fetch-depth: 0

- name: Normalize Slack thread_ts
shell: bash
run: |
ts="${SLACK_THREAD_TS}"
if [ -n "$ts" ] && ! echo "$ts" | grep -q '\.'; then
ts="$(echo "$ts" | sed 's/\(.*\)\(.\{6\}\)$/\1.\2/')"
echo "SLACK_THREAD_TS=$ts" >> "$GITHUB_ENV"
fi

- name: Post fix started
if: ${{ env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }}
shell: bash
env:
FAILED_AGENTS: ${{ inputs.failed_agents }}
FIX_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
set -euo pipefail

scripts/post-slack-message.sh "Starting E2E fix for \`${FAILED_AGENTS}\`."
scripts/post-slack-message.sh ":hourglass_flowing_sand: Applying E2E fix — <${FIX_RUN_URL}|view fix run>"

- name: Setup mise
uses: jdx/mise-action@v4
Expand All @@ -65,76 +72,256 @@ jobs:
env:
GH_TOKEN: ${{ github.token }}
TRIAGE_RUN_ID: ${{ inputs.triage_run_id }}
FAILED_AGENTS: ${{ inputs.failed_agents }}
shell: bash
run: |
set -euo pipefail

mkdir -p triage-plans

IFS=',' read -ra agents <<< "$FAILED_AGENTS"
for agent in "${agents[@]}"; do
agent="$(echo "$agent" | xargs)" # trim whitespace
echo "Downloading plan for $agent..."
gh run download "$TRIAGE_RUN_ID" \
--name "e2e-plan-${agent}" \
--dir "triage-plans/${agent}" || {
echo "warning: no plan artifact found for $agent" >&2
continue
}
done
# Normalize: extract numeric ID if a full URL was passed
if echo "$TRIAGE_RUN_ID" | grep -qE '^https?://'; then
TRIAGE_RUN_ID=$(echo "$TRIAGE_RUN_ID" | grep -oE '/runs/[0-9]+' | grep -oE '[0-9]+')
if [ -z "$TRIAGE_RUN_ID" ]; then
echo "error: could not extract run ID from triage_run_id URL" >&2
exit 1
fi
fi

gh run download "$TRIAGE_RUN_ID" \
--name "e2e-triage" \
--dir "triage-plans" || {
echo "error: no triage artifact found for run $TRIAGE_RUN_ID" >&2
exit 1
}

echo "Downloaded plans:"
find triage-plans -name '*.md' -type f
find triage-plans -type f

- name: Resolve inputs from triage metadata
shell: bash
env:
INPUT_RUN_URL: ${{ inputs.run_url }}
INPUT_FAILED_AGENTS: ${{ inputs.failed_agents }}
run: |
set -euo pipefail

meta="triage-plans/metadata.json"
if [ ! -f "$meta" ]; then
if [ -z "$INPUT_RUN_URL" ] || [ -z "$INPUT_FAILED_AGENTS" ]; then
echo "error: run_url and failed_agents are required when triage artifacts lack metadata.json" >&2
exit 1
fi
echo "RESOLVED_RUN_URL=$INPUT_RUN_URL" >> "$GITHUB_ENV"
echo "RESOLVED_FAILED_AGENTS=$INPUT_FAILED_AGENTS" >> "$GITHUB_ENV"
exit 0
fi

run_url="${INPUT_RUN_URL:-$(jq -r '.run_url' "$meta")}"
failed_agents="${INPUT_FAILED_AGENTS:-$(jq -r '.failed_agents' "$meta")}"

if [ -z "$run_url" ] || [ "$run_url" = "null" ]; then
echo "error: could not determine run_url" >&2
exit 1
fi
if [ -z "$failed_agents" ] || [ "$failed_agents" = "null" ]; then
echo "error: could not determine failed_agents" >&2
exit 1
fi

echo "RESOLVED_RUN_URL=$run_url" >> "$GITHUB_ENV"
echo "RESOLVED_FAILED_AGENTS=$failed_agents" >> "$GITHUB_ENV"

- name: Apply fixes
id: fix
uses: anthropics/claude-code-action@v1
with:
prompt: |
Read the fix plans in the triage-plans/ directory. Each subdirectory contains a plan.md for one agent.
Read the fix plan in triage-plans/plan.md (and triage findings in triage-plans/triage.md for context).

Execute all fixes exactly as specified in the plans. After applying fixes, run:
1. mise run fmt
2. mise run lint
3. mise run test:e2e:canary

If verification passes, create a git branch fix/e2e-${{ github.run_id }}, commit all changes,
push, and create a draft PR with a summary of what was fixed.

If verification fails, fix the issues and retry. Do not give up without attempting to fix lint/format errors.

Do NOT create a git branch or PR yet — E2E verification will happen in a later step.
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
github_token: ${{ github.token }}
claude_args: "--allowedTools 'Edit,Write,Read,Glob,Grep,Bash(git:*),Bash(mise:*),Bash(gh:*)'"

- name: Post success to Slack
if: success() && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != ''
shell: bash
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y tmux

- name: Install agent CLIs
env:
FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }}
run: |
for agent in $(echo "$FAILED_AGENTS" | tr ',' ' ' | xargs); do
case "$agent" in
claude-code) curl -fsSL https://claude.ai/install.sh | bash ;;
opencode) curl -fsSL https://opencode.ai/install | bash ;;
gemini-cli) npm install -g @google/gemini-cli ;;
cursor-cli) curl https://cursor.com/install -fsS | bash ;;
factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;;
copilot-cli) npm install -g @github/copilot ;;
roger-roger) ;; # installed by mise
esac
done
echo "$HOME/.local/bin" >> $GITHUB_PATH

- name: Bootstrap agents
env:
FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
run: |
# Skip bootstrap if only roger-roger
agents=$(echo "$FAILED_AGENTS" | tr ',' '\n' | sed 's/^ *//;s/ *$//' | grep -v '^roger-roger$' || true)
if [ -n "$agents" ]; then
go run ./e2e/bootstrap
fi

- name: Verify E2E tests (attempt 1)
id: verify1
continue-on-error: true
env:
FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
run: scripts/verify-e2e-tests.sh 1 e2e-verify-output.txt

- name: Retry fix with failure context
id: fix2
if: steps.verify1.outcome == 'failure'
uses: anthropics/claude-code-action@v1
with:
prompt: |
The previous E2E fix was applied but the E2E tests still fail.
Read the test failure output in e2e-verify-output.txt.

Also read the original triage findings in triage-plans/triage.md and fix plan in triage-plans/plan.md for context.

Diagnose why the tests are still failing and apply additional fixes. After fixing, run:
1. mise run fmt
2. mise run lint
3. mise run test:e2e:canary

Do NOT create a git branch or PR yet.
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
github_token: ${{ github.token }}
claude_args: "--allowedTools 'Edit,Write,Read,Glob,Grep,Bash(git:*),Bash(mise:*),Bash(gh:*)'"

- name: Clean artifacts between attempts
if: steps.verify1.outcome == 'failure'
run: rm -rf e2e/artifacts/

- name: Verify E2E tests (attempt 2)
id: verify2
if: steps.verify1.outcome == 'failure'
continue-on-error: true
env:
FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
run: scripts/verify-e2e-tests.sh 2 e2e-verify-output-2.txt

- name: Create fix PR
id: create_pr
if: steps.verify1.outcome == 'success' || steps.verify2.outcome == 'success'
env:
GH_TOKEN: ${{ github.token }}
FIX_BRANCH: fix/e2e-${{ github.run_id }}
RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
ORIGINAL_RUN_URL: ${{ env.RESOLVED_RUN_URL }}
FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }}
TRIAGE_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ inputs.triage_run_id }}
VERIFY1_OUTCOME: ${{ steps.verify1.outcome }}
run: |
set -euo pipefail

# Find the draft PR URL from the fix step output
pr_url="$(gh pr list --head "$FIX_BRANCH" --json url -q '.[0].url' 2>/dev/null || true)"

if [ -n "$pr_url" ]; then
message="E2E fix complete — draft PR ready: <${pr_url}|Review PR>"
else
message="E2E fix complete — changes applied but no PR was created. Check the <${RUN_URL}|workflow run> for details."
attempt="1"
if [ "$VERIFY1_OUTCOME" != "success" ]; then
attempt="2"
fi

scripts/post-slack-message.sh "$message"
git checkout -b "$FIX_BRANCH"
git add -A
git commit -m "$(cat <<COMMIT_EOF
fix: resolve E2E test failures

Automated fix applied by E2E fix workflow.
Original failure: ${ORIGINAL_RUN_URL}

Co-Authored-By: Claude <noreply@anthropic.com>
COMMIT_EOF
)"
git push -u origin "$FIX_BRANCH"

gh pr create --draft \
--title "fix: resolve E2E test failures" \
--body "$(cat <<PR_EOF
## Summary
Automated fix for E2E test failures.

- Original failure: ${ORIGINAL_RUN_URL}
- Failed agents: ${FAILED_AGENTS}
- Fix verified: E2E tests passed 2/2 runs (attempt $attempt)
- Triage run: ${TRIAGE_RUN_URL}
PR_EOF
)"

- name: Post failure to Slack
if: failure() && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != ''
- name: Post result to Slack
if: always() && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != ''
shell: bash
env:
GH_TOKEN: ${{ github.token }}
FIX_BRANCH: fix/e2e-${{ github.run_id }}
RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
FIX_OUTCOME: ${{ steps.fix.outcome }}
VERIFY1_OUTCOME: ${{ steps.verify1.outcome }}
VERIFY2_OUTCOME: ${{ steps.verify2.outcome }}
run: |
set -euo pipefail

message="E2E fix failed. Check the <${RUN_URL}|workflow run> for details."
if [ "$FIX_OUTCOME" != "success" ]; then
scripts/post-slack-message.sh ":x: E2E fix failed to apply — <${RUN_URL}|view run>"
exit 0
fi

if [ "$VERIFY1_OUTCOME" = "success" ] || [ "$VERIFY2_OUTCOME" = "success" ]; then
pr_url="$(gh pr list --head "$FIX_BRANCH" --json url -q '.[0].url' 2>/dev/null || true)"
attempt="1"
[ "$VERIFY1_OUTCOME" != "success" ] && attempt="2"

scripts/post-slack-message.sh "$message"
if [ -n "$pr_url" ]; then
payload="$(jq -n --arg pr_url "$pr_url" --arg attempt "$attempt" '{
text: (":white_check_mark: E2E fix verified (" + $attempt + "/2 attempts, 2/2 test passes): " + $pr_url),
reply_broadcast: true,
unfurl_links: true
}')"
scripts/post-slack-message.sh --payload "$payload"
else
scripts/post-slack-message.sh ":warning: E2E fix verified but no PR created — <${RUN_URL}|view run>"
fi
else
scripts/post-slack-message.sh ":x: E2E fix failed verification after 2 attempts — <${RUN_URL}|view run>"
fi

- name: Upload verification artifacts
if: always()
uses: actions/upload-artifact@v7
with:
name: e2e-fix-verification
path: |
e2e/artifacts/
e2e-verify-output*.txt
retention-days: 7
Loading
Loading