diff --git a/.github/workflows/e2e-fix.yml b/.github/workflows/e2e-fix.yml index c110c948b..a559ff77f 100644 --- a/.github/workflows/e2e-fix.yml +++ b/.github/workflows/e2e-fix.yml @@ -8,12 +8,12 @@ on: required: true type: string run_url: - description: Original failed E2E run URL - required: true + description: Original failed E2E run URL (auto-detected from triage artifacts if omitted) + required: false type: string failed_agents: - description: Comma-separated list of agents to fix - required: true + description: Comma-separated list of agents to fix (auto-detected from triage artifacts if omitted) + required: false type: string slack_channel: description: Slack channel ID for thread replies @@ -37,7 +37,7 @@ concurrency: jobs: fix: runs-on: ubuntu-latest - timeout-minutes: 30 + timeout-minutes: 90 env: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_CHANNEL: ${{ inputs.slack_channel }} @@ -48,15 +48,22 @@ jobs: with: fetch-depth: 0 + - name: Normalize Slack thread_ts + shell: bash + run: | + ts="${SLACK_THREAD_TS}" + if [ -n "$ts" ] && ! echo "$ts" | grep -q '\.'; then + ts="$(echo "$ts" | sed 's/\(.*\)\(.\{6\}\)$/\1.\2/')" + echo "SLACK_THREAD_TS=$ts" >> "$GITHUB_ENV" + fi + - name: Post fix started if: ${{ env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} shell: bash env: - FAILED_AGENTS: ${{ inputs.failed_agents }} + FIX_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | - set -euo pipefail - - scripts/post-slack-message.sh "Starting E2E fix for \`${FAILED_AGENTS}\`." + scripts/post-slack-message.sh ":hourglass_flowing_sand: Applying E2E fix — <${FIX_RUN_URL}|view fix run>" - name: Setup mise uses: jdx/mise-action@v4 @@ -65,76 +72,256 @@ jobs: env: GH_TOKEN: ${{ github.token }} TRIAGE_RUN_ID: ${{ inputs.triage_run_id }} - FAILED_AGENTS: ${{ inputs.failed_agents }} shell: bash run: | set -euo pipefail mkdir -p triage-plans - IFS=',' read -ra agents <<< "$FAILED_AGENTS" - for agent in "${agents[@]}"; do - agent="$(echo "$agent" | xargs)" # trim whitespace - echo "Downloading plan for $agent..." - gh run download "$TRIAGE_RUN_ID" \ - --name "e2e-plan-${agent}" \ - --dir "triage-plans/${agent}" || { - echo "warning: no plan artifact found for $agent" >&2 - continue - } - done + # Normalize: extract numeric ID if a full URL was passed + if echo "$TRIAGE_RUN_ID" | grep -qE '^https?://'; then + TRIAGE_RUN_ID=$(echo "$TRIAGE_RUN_ID" | grep -oE '/runs/[0-9]+' | grep -oE '[0-9]+') + if [ -z "$TRIAGE_RUN_ID" ]; then + echo "error: could not extract run ID from triage_run_id URL" >&2 + exit 1 + fi + fi + + gh run download "$TRIAGE_RUN_ID" \ + --name "e2e-triage" \ + --dir "triage-plans" || { + echo "error: no triage artifact found for run $TRIAGE_RUN_ID" >&2 + exit 1 + } echo "Downloaded plans:" - find triage-plans -name '*.md' -type f + find triage-plans -type f + + - name: Resolve inputs from triage metadata + shell: bash + env: + INPUT_RUN_URL: ${{ inputs.run_url }} + INPUT_FAILED_AGENTS: ${{ inputs.failed_agents }} + run: | + set -euo pipefail + + meta="triage-plans/metadata.json" + if [ ! -f "$meta" ]; then + if [ -z "$INPUT_RUN_URL" ] || [ -z "$INPUT_FAILED_AGENTS" ]; then + echo "error: run_url and failed_agents are required when triage artifacts lack metadata.json" >&2 + exit 1 + fi + echo "RESOLVED_RUN_URL=$INPUT_RUN_URL" >> "$GITHUB_ENV" + echo "RESOLVED_FAILED_AGENTS=$INPUT_FAILED_AGENTS" >> "$GITHUB_ENV" + exit 0 + fi + + run_url="${INPUT_RUN_URL:-$(jq -r '.run_url' "$meta")}" + failed_agents="${INPUT_FAILED_AGENTS:-$(jq -r '.failed_agents' "$meta")}" + + if [ -z "$run_url" ] || [ "$run_url" = "null" ]; then + echo "error: could not determine run_url" >&2 + exit 1 + fi + if [ -z "$failed_agents" ] || [ "$failed_agents" = "null" ]; then + echo "error: could not determine failed_agents" >&2 + exit 1 + fi + + echo "RESOLVED_RUN_URL=$run_url" >> "$GITHUB_ENV" + echo "RESOLVED_FAILED_AGENTS=$failed_agents" >> "$GITHUB_ENV" - name: Apply fixes id: fix uses: anthropics/claude-code-action@v1 with: prompt: | - Read the fix plans in the triage-plans/ directory. Each subdirectory contains a plan.md for one agent. + Read the fix plan in triage-plans/plan.md (and triage findings in triage-plans/triage.md for context). Execute all fixes exactly as specified in the plans. After applying fixes, run: 1. mise run fmt 2. mise run lint 3. mise run test:e2e:canary - If verification passes, create a git branch fix/e2e-${{ github.run_id }}, commit all changes, - push, and create a draft PR with a summary of what was fixed. - If verification fails, fix the issues and retry. Do not give up without attempting to fix lint/format errors. + + Do NOT create a git branch or PR yet — E2E verification will happen in a later step. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + github_token: ${{ github.token }} claude_args: "--allowedTools 'Edit,Write,Read,Glob,Grep,Bash(git:*),Bash(mise:*),Bash(gh:*)'" - - name: Post success to Slack - if: success() && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' - shell: bash + - name: Install system dependencies + run: sudo apt-get update && sudo apt-get install -y tmux + + - name: Install agent CLIs + env: + FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }} + run: | + for agent in $(echo "$FAILED_AGENTS" | tr ',' ' ' | xargs); do + case "$agent" in + claude-code) curl -fsSL https://claude.ai/install.sh | bash ;; + opencode) curl -fsSL https://opencode.ai/install | bash ;; + gemini-cli) npm install -g @google/gemini-cli ;; + cursor-cli) curl https://cursor.com/install -fsS | bash ;; + factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;; + copilot-cli) npm install -g @github/copilot ;; + roger-roger) ;; # installed by mise + esac + done + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Bootstrap agents + env: + FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} + FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + run: | + # Skip bootstrap if only roger-roger + agents=$(echo "$FAILED_AGENTS" | tr ',' '\n' | sed 's/^ *//;s/ *$//' | grep -v '^roger-roger$' || true) + if [ -n "$agents" ]; then + go run ./e2e/bootstrap + fi + + - name: Verify E2E tests (attempt 1) + id: verify1 + continue-on-error: true + env: + FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} + FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + run: scripts/verify-e2e-tests.sh 1 e2e-verify-output.txt + + - name: Retry fix with failure context + id: fix2 + if: steps.verify1.outcome == 'failure' + uses: anthropics/claude-code-action@v1 + with: + prompt: | + The previous E2E fix was applied but the E2E tests still fail. + Read the test failure output in e2e-verify-output.txt. + + Also read the original triage findings in triage-plans/triage.md and fix plan in triage-plans/plan.md for context. + + Diagnose why the tests are still failing and apply additional fixes. After fixing, run: + 1. mise run fmt + 2. mise run lint + 3. mise run test:e2e:canary + + Do NOT create a git branch or PR yet. + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + github_token: ${{ github.token }} + claude_args: "--allowedTools 'Edit,Write,Read,Glob,Grep,Bash(git:*),Bash(mise:*),Bash(gh:*)'" + + - name: Clean artifacts between attempts + if: steps.verify1.outcome == 'failure' + run: rm -rf e2e/artifacts/ + + - name: Verify E2E tests (attempt 2) + id: verify2 + if: steps.verify1.outcome == 'failure' + continue-on-error: true + env: + FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} + FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + run: scripts/verify-e2e-tests.sh 2 e2e-verify-output-2.txt + + - name: Create fix PR + id: create_pr + if: steps.verify1.outcome == 'success' || steps.verify2.outcome == 'success' env: GH_TOKEN: ${{ github.token }} FIX_BRANCH: fix/e2e-${{ github.run_id }} - RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + ORIGINAL_RUN_URL: ${{ env.RESOLVED_RUN_URL }} + FAILED_AGENTS: ${{ env.RESOLVED_FAILED_AGENTS }} + TRIAGE_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ inputs.triage_run_id }} + VERIFY1_OUTCOME: ${{ steps.verify1.outcome }} run: | set -euo pipefail - # Find the draft PR URL from the fix step output - pr_url="$(gh pr list --head "$FIX_BRANCH" --json url -q '.[0].url' 2>/dev/null || true)" - - if [ -n "$pr_url" ]; then - message="E2E fix complete — draft PR ready: <${pr_url}|Review PR>" - else - message="E2E fix complete — changes applied but no PR was created. Check the <${RUN_URL}|workflow run> for details." + attempt="1" + if [ "$VERIFY1_OUTCOME" != "success" ]; then + attempt="2" fi - scripts/post-slack-message.sh "$message" + git checkout -b "$FIX_BRANCH" + git add -A + git commit -m "$(cat < + COMMIT_EOF + )" + git push -u origin "$FIX_BRANCH" + + gh pr create --draft \ + --title "fix: resolve E2E test failures" \ + --body "$(cat <" + exit 0 + fi + + if [ "$VERIFY1_OUTCOME" = "success" ] || [ "$VERIFY2_OUTCOME" = "success" ]; then + pr_url="$(gh pr list --head "$FIX_BRANCH" --json url -q '.[0].url' 2>/dev/null || true)" + attempt="1" + [ "$VERIFY1_OUTCOME" != "success" ] && attempt="2" - scripts/post-slack-message.sh "$message" + if [ -n "$pr_url" ]; then + payload="$(jq -n --arg pr_url "$pr_url" --arg attempt "$attempt" '{ + text: (":white_check_mark: E2E fix verified (" + $attempt + "/2 attempts, 2/2 test passes): " + $pr_url), + reply_broadcast: true, + unfurl_links: true + }')" + scripts/post-slack-message.sh --payload "$payload" + else + scripts/post-slack-message.sh ":warning: E2E fix verified but no PR created — <${RUN_URL}|view run>" + fi + else + scripts/post-slack-message.sh ":x: E2E fix failed verification after 2 attempts — <${RUN_URL}|view run>" + fi + + - name: Upload verification artifacts + if: always() + uses: actions/upload-artifact@v7 + with: + name: e2e-fix-verification + path: | + e2e/artifacts/ + e2e-verify-output*.txt + retention-days: 7 diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml index e26a2bb23..349ec832d 100644 --- a/.github/workflows/e2e-triage.yml +++ b/.github/workflows/e2e-triage.yml @@ -39,16 +39,24 @@ concurrency: cancel-in-progress: true jobs: - matrix-setup: + setup: runs-on: ubuntu-latest + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} outputs: - agents: ${{ steps.set.outputs.agents }} + agents_csv: ${{ steps.set.outputs.agents_csv }} run_url: ${{ steps.set.outputs.run_url }} sha: ${{ steps.set.outputs.sha }} slack_channel: ${{ steps.set.outputs.slack_channel }} slack_thread_ts: ${{ steps.set.outputs.slack_thread_ts }} steps: - - name: Validate payload and build matrix + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 1 + sparse-checkout: scripts + + - name: Validate payload id: set shell: bash env: @@ -62,15 +70,18 @@ jobs: run: | set -euo pipefail - csv_to_json() { - printf '%s' "$1" | jq -R -s -c 'split(",") | map(gsub("^\\s+|\\s+$"; "")) | map(select(length > 0))' - } - run_url="$RUN_URL_INPUT" sha="${SHA_INPUT:-}" slack_channel="${SLACK_CHANNEL_INPUT:-}" slack_thread_ts="${SLACK_THREAD_TS_INPUT:-}" + # Normalize thread_ts — re-insert dot if stripped (always 6 decimal places) + # Slack ts format is "seconds.microseconds" e.g. "1482960137.003543" + # See: https://api.slack.com/messaging/retrieving + if [ -n "$slack_thread_ts" ] && ! echo "$slack_thread_ts" | grep -q '\.'; then + slack_thread_ts="$(echo "$slack_thread_ts" | sed 's/\(.*\)\(.\{6\}\)$/\1.\2/')" + fi + # Derive missing values from run URL via GitHub API if [ -z "$sha" ] || [ -z "$FAILED_AGENTS_INPUT" ]; then run_id=$(echo "$run_url" | grep -oE '/runs/[0-9]+' | grep -oE '[0-9]+') @@ -83,16 +94,17 @@ jobs: sha=$(echo "$run_data" | jq -r '.headSha') fi if [ -z "$FAILED_AGENTS_INPUT" ]; then - agents_json=$(echo "$run_data" | jq -c '[.jobs[] + # Extract agent names from failed job names like "e2e (gemini-cli)" + agents_csv=$(echo "$run_data" | jq -r '[.jobs[] | select(.conclusion == "failure") | (.name | (try capture("\\((?[^)]+)\\)").agent catch null)) | select(. != null) - ]') + ] | join(", ")') fi fi if [ -n "$FAILED_AGENTS_INPUT" ]; then - agents_json="$(csv_to_json "$FAILED_AGENTS_INPUT")" + agents_csv="$FAILED_AGENTS_INPUT" fi if [ -z "$run_url" ]; then @@ -103,7 +115,7 @@ jobs: echo "sha is required" >&2 exit 1 fi - if [ -z "$agents_json" ] || [ "$agents_json" = "[]" ] || [ "$agents_json" = "null" ]; then + if [ -z "$agents_csv" ]; then echo "agents is required (provide failed_agents input or ensure failed job names contain '(agent-name)')" >&2 exit 1 fi @@ -112,36 +124,34 @@ jobs: echo "sha=$sha" >> "$GITHUB_OUTPUT" echo "slack_channel=$slack_channel" >> "$GITHUB_OUTPUT" echo "slack_thread_ts=$slack_thread_ts" >> "$GITHUB_OUTPUT" - echo "agents=$agents_json" >> "$GITHUB_OUTPUT" + echo "agents_csv=$agents_csv" >> "$GITHUB_OUTPUT" + + - name: Post triage starting + if: ${{ env.SLACK_BOT_TOKEN != '' && steps.set.outputs.slack_channel != '' && steps.set.outputs.slack_thread_ts != '' }} + env: + SLACK_CHANNEL: ${{ steps.set.outputs.slack_channel }} + SLACK_THREAD_TS: ${{ steps.set.outputs.slack_thread_ts }} + TRIAGE_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + shell: bash + run: | + scripts/post-slack-message.sh ":hourglass_flowing_sand: E2E triage started — <${TRIAGE_RUN_URL}|view triage run>" triage: - needs: [matrix-setup] + needs: [setup] runs-on: ubuntu-latest timeout-minutes: ${{ inputs.rerun == true && 90 || 45 }} env: - RUN_URL: ${{ needs.matrix-setup.outputs.run_url }} - E2E_AGENT: ${{ matrix.agent }} + RUN_URL: ${{ needs.setup.outputs.run_url }} + FAILED_AGENTS: ${{ needs.setup.outputs.agents_csv }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - SLACK_CHANNEL: ${{ needs.matrix-setup.outputs.slack_channel }} - SLACK_THREAD_TS: ${{ needs.matrix-setup.outputs.slack_thread_ts }} - strategy: - fail-fast: false - matrix: - agent: ${{ fromJson(needs.matrix-setup.outputs.agents) }} + SLACK_CHANNEL: ${{ needs.setup.outputs.slack_channel }} + SLACK_THREAD_TS: ${{ needs.setup.outputs.slack_thread_ts }} steps: - name: Checkout repository uses: actions/checkout@v6 with: fetch-depth: 1 - - name: Post triage started - if: ${{ env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} - shell: bash - run: | - set -euo pipefail - - scripts/post-slack-message.sh "Starting E2E triage for \`$E2E_AGENT\` on <$RUN_URL|this run>." - - name: Setup mise uses: jdx/mise-action@v4 @@ -158,22 +168,26 @@ jobs: if: inputs.rerun == true run: sudo apt-get update && sudo apt-get install -y tmux - - name: Install agent CLI + - name: Install agent CLIs if: inputs.rerun == true run: | - case "${{ matrix.agent }}" in - claude-code) curl -fsSL https://claude.ai/install.sh | bash ;; - opencode) curl -fsSL https://opencode.ai/install | bash ;; - gemini-cli) npm install -g @google/gemini-cli ;; - cursor-cli) curl https://cursor.com/install -fsS | bash ;; - factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;; - copilot-cli) npm install -g @github/copilot ;; - roger-roger) ;; # installed by mise (see mise.toml) - esac + IFS=',' read -ra agents <<< "$FAILED_AGENTS" + for agent in "${agents[@]}"; do + agent="$(echo "$agent" | xargs)" + case "$agent" in + claude-code) curl -fsSL https://claude.ai/install.sh | bash ;; + opencode) curl -fsSL https://opencode.ai/install | bash ;; + gemini-cli) npm install -g @google/gemini-cli ;; + cursor-cli) curl https://cursor.com/install -fsS | bash ;; + factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;; + copilot-cli) npm install -g @github/copilot ;; + roger-roger) ;; # installed by mise (see mise.toml) + esac + done echo "$HOME/.local/bin" >> $GITHUB_PATH - - name: Bootstrap agent - if: inputs.rerun == true && matrix.agent != 'roger-roger' + - name: Bootstrap agents + if: inputs.rerun == true env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} @@ -188,10 +202,25 @@ jobs: uses: anthropics/claude-code-action@v1 with: prompt: | - /e2e:triage-ci ${{ steps.artifacts.outputs.path }} --agent ${{ matrix.agent }} --sha ${{ needs.matrix-setup.outputs.sha }} + Enter plan mode, then read and follow the full E2E triage procedure from .claude/skills/e2e/triage-ci.md. + + Inputs: + - Local artifact path: ${{ steps.artifacts.outputs.path }} + - Failed agents: ${{ env.FAILED_AGENTS }} + - SHA: ${{ needs.setup.outputs.sha }} + + This is a CI artifact analysis (local path provided) -- skip Steps L2-L5 and go straight to Shared Analysis. + Analyze ALL failed agents together -- failures may share a common root cause. + + Write ALL output to the plan file -- both the triage findings report and the fix plan. Structure it as: + 1. Triage findings (per-test findings blocks + summary table from the skill) + 2. A "## Fix Plan" section with specific code changes for each actionable finding + + When done, call ExitPlanMode. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - claude_args: "--allowedTools 'Read,Grep,Glob'" - display_report: true + claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode'" + display_report: false + github_token: ${{ github.token }} - name: Run triage (with re-runs) id: triage_rerun @@ -205,10 +234,24 @@ jobs: COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} with: prompt: | - /e2e:triage-ci ${{ env.RUN_URL }} --agent ${{ matrix.agent }} --sha ${{ needs.matrix-setup.outputs.sha }} + Enter plan mode, then read and follow the full E2E triage procedure from .claude/skills/e2e/triage-ci.md. + + Inputs: + - CI run URL: ${{ env.RUN_URL }} + - Failed agents: ${{ env.FAILED_AGENTS }} + - SHA: ${{ needs.setup.outputs.sha }} + + Analyze ALL failed agents together -- failures may share a common root cause. + + Write ALL output to the plan file -- both the triage findings report and the fix plan. Structure it as: + 1. Triage findings (per-test findings blocks + summary table from the skill) + 2. A "## Fix Plan" section with specific code changes for each actionable finding + + When done, call ExitPlanMode. anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - claude_args: "--allowedTools 'Read,Grep,Glob,Bash(mise:*),Bash(scripts:*)'" - display_report: true + claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode,Bash(mise:*),Bash(scripts:*)'" + display_report: false + github_token: ${{ github.token }} - name: Resolve triage outcome id: triage @@ -235,93 +278,135 @@ jobs: shell: bash env: EXECUTION_FILE: ${{ steps.triage.outputs.execution_file }} - TRIAGE_OUTPUT_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md + TRIAGE_OUTPUT_FILE: ${{ github.workspace }}/e2e-triage-artifacts/triage.md + PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/plan.md run: | set -euo pipefail - mkdir -p "$(dirname "$TRIAGE_OUTPUT_FILE")" - # Extract assistant text content from execution JSON - jq -r '[.[] | select(.type == "assistant") | .message.content[] - | select(.type == "text") | .text] | join("\n")' \ - "$EXECUTION_FILE" > "$TRIAGE_OUTPUT_FILE" - - - name: Generate fix plan - id: plan - if: steps.triage.outputs.outcome == 'success' - uses: anthropics/claude-code-action@v1 - with: - prompt: | - /e2e:implement Read the triage findings at ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md for agent ${{ matrix.agent }}. - anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode,Bash(mise:*)'" - display_report: true - - name: Extract plan output - id: plan_output - if: steps.plan.outcome == 'success' && steps.plan.outputs.execution_file != '' + # Try to extract from plan file on disk (plan mode writes here) + plan_src=$(find /home/runner/.claude/plans -name '*.md' -type f 2>/dev/null | head -1) + if [ -n "$plan_src" ] && [ -s "$plan_src" ]; then + combined="$(cat "$plan_src")" + else + # Fallback: extract assistant text from execution JSON + combined=$(jq -r '[.[] | select(.type == "assistant") | .message.content[] + | select(.type == "text") | .text] | join("\n")' "$EXECUTION_FILE") + fi + + # Split on "## Fix Plan" header + if echo "$combined" | grep -q "^## Fix Plan"; then + echo "$combined" | sed '/^## Fix Plan/,$d' > "$TRIAGE_OUTPUT_FILE" + echo "$combined" | sed -n '/^## Fix Plan/,$p' > "$PLAN_FILE" + else + echo "$combined" > "$TRIAGE_OUTPUT_FILE" + echo "$combined" > "$PLAN_FILE" + fi + + # Copy execution JSON to artifacts for debugging + cp "$EXECUTION_FILE" "$(dirname "$TRIAGE_OUTPUT_FILE")/execution.json" 2>/dev/null || true + + - name: Write job summary + if: always() && steps.triage_output.outcome == 'success' shell: bash env: - EXECUTION_FILE: ${{ steps.plan.outputs.execution_file }} - PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md + TRIAGE_FILE: ${{ github.workspace }}/e2e-triage-artifacts/triage.md + PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/plan.md run: | set -euo pipefail + { + echo "# E2E Triage: ${FAILED_AGENTS}" + echo "" + if [ -s "$TRIAGE_FILE" ]; then + cat "$TRIAGE_FILE" + else + echo "_No triage findings._" + fi + echo "" + echo "---" + echo "" + if [ -s "$PLAN_FILE" ]; then + cat "$PLAN_FILE" + else + echo "_No fix plan._" + fi + } >> "$GITHUB_STEP_SUMMARY" - mkdir -p "$(dirname "$PLAN_FILE")" - jq -r '[.[] | select(.type == "assistant") | .message.content[] - | select(.type == "text") | .text] | join("\n")' \ - "$EXECUTION_FILE" > "$PLAN_FILE" - - - name: Post triage completion + - name: Post triage result if: ${{ always() && (steps.triage.outputs.outcome == 'success' || steps.triage.outputs.outcome == 'failure') && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} shell: bash env: TRIAGE_OUTCOME: ${{ steps.triage.outputs.outcome }} + TRIAGE_OUTPUT_OUTCOME: ${{ steps.triage_output.outcome }} + TRIAGE_RUN_ID: ${{ github.run_id }} + SUMMARY_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | set -euo pipefail - if [ "$TRIAGE_OUTCOME" = "success" ]; then - message="E2E triage complete for \`$E2E_AGENT\`." - else - message="E2E triage failed for \`$E2E_AGENT\`." + if [ "$TRIAGE_OUTCOME" != "success" ]; then + scripts/post-slack-message.sh ":x: E2E triage failed — <${SUMMARY_URL}|view run>" + exit 0 fi - scripts/post-slack-message.sh "$message" + if [ "$TRIAGE_OUTPUT_OUTCOME" != "success" ]; then + scripts/post-slack-message.sh ":warning: E2E triage completed but output extraction failed — <${SUMMARY_URL}|view run>" + exit 0 + fi - - name: Post fix plan to Slack - if: steps.plan_output.outcome == 'success' && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' + # Construct Fix It URL + encoded_run_url="$(python3 -c "import urllib.parse, os; print(urllib.parse.quote(os.environ['RUN_URL'], safe=''))")" + encoded_agents="$(python3 -c "import urllib.parse, os; print(urllib.parse.quote(os.environ['FAILED_AGENTS'], safe=''))")" + fix_url="https://e2e-triage.entireio.workers.dev/fix?triage_run_id=${TRIAGE_RUN_ID}&run_url=${encoded_run_url}&failed_agents=${encoded_agents}&slack_channel=${SLACK_CHANNEL}&slack_thread_ts=${SLACK_THREAD_TS}" + + # Block Kit payload with button + payload="$(jq -n \ + --arg summary_url "$SUMMARY_URL" \ + --arg fix_url "$fix_url" \ + '{ + text: "E2E triage complete", + blocks: [ + { + type: "section", + text: { + type: "mrkdwn", + text: (":white_check_mark: E2E triage complete — <" + $summary_url + "|view results>\nClick *Fix It* to apply the plan and create a draft PR.") + } + }, + { + type: "actions", + elements: [ + { + type: "button", + text: { type: "plain_text", text: "Fix It" }, + url: $fix_url, + style: "primary" + } + ] + } + ] + }')" + + scripts/post-slack-message.sh --payload "$payload" + + - name: Write triage metadata + if: always() shell: bash env: - PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md - TRIAGE_RUN_ID: ${{ github.run_id }} + META_RUN_URL: ${{ needs.setup.outputs.run_url }} + META_AGENTS_CSV: ${{ needs.setup.outputs.agents_csv }} + META_SHA: ${{ needs.setup.outputs.sha }} run: | - set -euo pipefail - - # Extract first few lines as summary - summary="$(head -20 "$PLAN_FILE" | sed '/^$/d' | head -5)" - - # Construct Fix It URL - encoded_run_url="$(python3 -c "import urllib.parse, os; print(urllib.parse.quote(os.environ['RUN_URL'], safe=''))")" - fix_url="https://e2e-triage.entireio.workers.dev/fix?triage_run_id=${TRIAGE_RUN_ID}&run_url=${encoded_run_url}&failed_agents=${E2E_AGENT}&slack_channel=${SLACK_CHANNEL}&slack_thread_ts=${SLACK_THREAD_TS}" - - message="Fix plan ready for \`$E2E_AGENT\`: - ${summary} - - <${fix_url}|Fix It> — applies the plan and creates a draft PR" - - scripts/post-slack-message.sh "$message" + jq -n \ + --arg run_url "$META_RUN_URL" \ + --arg failed_agents "$META_AGENTS_CSV" \ + --arg sha "$META_SHA" \ + '{run_url: $run_url, failed_agents: $failed_agents, sha: $sha}' \ + > e2e-triage-artifacts/metadata.json - name: Upload triage output if: always() uses: actions/upload-artifact@v7 with: - name: e2e-triage-${{ matrix.agent }} + name: e2e-triage path: e2e-triage-artifacts/ retention-days: 7 - - - name: Upload plan artifact - if: always() && steps.plan_output.outcome == 'success' - uses: actions/upload-artifact@v7 - with: - name: e2e-plan-${{ matrix.agent }} - path: e2e-triage-artifacts/${{ matrix.agent }}/plan.md - retention-days: 7 diff --git a/scripts/post-slack-message.sh b/scripts/post-slack-message.sh index 76546dc0b..5a3aab297 100755 --- a/scripts/post-slack-message.sh +++ b/scripts/post-slack-message.sh @@ -3,13 +3,26 @@ set -euo pipefail # Post a message to a Slack thread using the chat.postMessage API. # Requires SLACK_BOT_TOKEN, SLACK_CHANNEL, and SLACK_THREAD_TS env vars. +# +# Usage: +# post-slack-message.sh "plain text message" +# post-slack-message.sh --payload '{"text":"fallback","blocks":[...]}' +# +# With --payload, channel and thread_ts are injected automatically. -text="${1:?message is required}" -payload="$(jq -n \ - --arg channel "$SLACK_CHANNEL" \ - --arg thread_ts "$SLACK_THREAD_TS" \ - --arg text "$text" \ - '{channel: $channel, thread_ts: $thread_ts, text: $text}')" +if [ "$1" = "--payload" ]; then + payload="$(echo "$2" | jq \ + --arg channel "$SLACK_CHANNEL" \ + --arg thread_ts "$SLACK_THREAD_TS" \ + '. + {channel: $channel, thread_ts: $thread_ts}')" +else + text="${1:?message is required}" + payload="$(jq -n \ + --arg channel "$SLACK_CHANNEL" \ + --arg thread_ts "$SLACK_THREAD_TS" \ + --arg text "$text" \ + '{channel: $channel, thread_ts: $thread_ts, text: $text}')" +fi if ! response="$(curl -fsS https://slack.com/api/chat.postMessage \ -H "Authorization: Bearer ${SLACK_BOT_TOKEN}" \ @@ -20,5 +33,7 @@ if ! response="$(curl -fsS https://slack.com/api/chat.postMessage \ fi if ! jq -e '.ok == true' >/dev/null <<<"$response"; then - echo "warning: slack notification returned non-ok response" >&2 + error="$(jq -r '.error // "unknown"' <<<"$response")" + echo "warning: slack notification returned non-ok response: $error" >&2 + echo "warning: full response: $response" >&2 fi diff --git a/scripts/verify-e2e-tests.sh b/scripts/verify-e2e-tests.sh new file mode 100755 index 000000000..67ae12239 --- /dev/null +++ b/scripts/verify-e2e-tests.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# verify-e2e-tests.sh — Run E2E tests twice per agent to verify a fix. +# +# Usage: verify-e2e-tests.sh +# attempt - attempt number (for log messages) +# output_file - file to append test output to +# +# Required env vars: +# FAILED_AGENTS - comma-separated list of agents to verify + +set -euo pipefail + +attempt="${1:?usage: verify-e2e-tests.sh }" +output_file="${2:?usage: verify-e2e-tests.sh }" + +failed="" +for agent in $(echo "$FAILED_AGENTS" | tr ',' ' ' | xargs); do + limit="" + test_filter="" + case "$agent" in + gemini-cli) limit="6" ;; + factoryai-droid) limit="1" ;; + roger-roger) test_filter="TestExternalAgent" ;; + esac + export E2E_CONCURRENT_TEST_LIMIT="$limit" + + for run in 1 2; do + echo "=== $agent: verification run $run/2 (attempt $attempt) ===" + if ! mise run test:e2e --agent "$agent" ${test_filter:+"$test_filter"} 2>&1 | tee -a "$output_file"; then + failed="$failed $agent(run$run)" + echo "=== $agent: run $run FAILED ===" + break + fi + echo "=== $agent: run $run passed ===" + done +done + +if [ -n "$failed" ]; then + echo "FAILED:$failed" >> "$output_file" + exit 1 +fi