diff --git a/README.md b/README.md index 61c86b0..49348be 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,34 @@ Template repository for Rust workspace projects in the tensor4all organization. 2. Add crates to `Cargo.toml` `members` 3. Adjust `coverage-thresholds.json` as needed +## Agentic Bug Sweep + +Use `bash scripts/agentic-bug-sweep.sh` to run a bounded headless Codex bug sweep that can create, update, or consolidate GitHub issues. + +Requirements: + +- `codex` +- `gh` + +Primary mode is remote-repository analysis: + +```bash +bash scripts/agentic-bug-sweep.sh \ + --repo-url https://github.com/tensor4all/tenferro-rs \ + --ref main \ + --iterations 20 \ + --max-consecutive-none 3 +``` + +`--workdir` remains available as a local override when you already have a checked-out repository. + +The workflow always stops after the configured `--iterations` limit or after `--max-consecutive-none` dry runs in a row. + +Artifacts: + +- durable reports: `docs/test-reports/agentic-bug-sweep/` +- ephemeral execution state: `target/agentic-bug-sweep/` + ## Coverage Coverage is checked per-file against thresholds in `coverage-thresholds.json`. diff --git a/ai/agentic-bug-sweep.md b/ai/agentic-bug-sweep.md new file mode 100644 index 0000000..626f307 --- /dev/null +++ b/ai/agentic-bug-sweep.md @@ -0,0 +1,31 @@ +You are running one iteration of an automated bug sweep for this repository. + +Your job on each iteration is to: + +1. Inspect open bug issues in the target GitHub repository. +2. Inspect prior bug-sweep reports from `docs/test-reports/agentic-bug-sweep/`. +3. Choose the next unexplored or highest-yield area to investigate. +4. Use the installed `test-feature` skill from `agentic-tests` to investigate that area. +5. Decide whether the result means: + - `create`: a new issue should be created + - `update`: an existing issue should be updated + - `merge`: the finding is the same bug as an existing canonical issue and duplicates should be closed + - `none`: no actionable bug was found + +Relationship rules: + +- If the finding is the same bug as an existing issue, use `merge`. +- If the finding is not the same bug, but it likely shares the same root cause as an existing issue, keep the primary action as `create` or `update` and populate `related_issue_numbers`. +- Only use `duplicates_to_close` for true duplicates of the same bug. + +Output rules: + +- Return only JSON that matches the provided schema. +- Always include a short `summary` and the generated `report_path`. +- The schema requires every top-level field to be present. Use `null` for fields that are irrelevant to the chosen action. +- For new issues, provide `issue.title`, `issue.body`, and `issue.labels`. +- For issue updates, provide `canonical_issue_number` and `issue_comment`. +- For duplicate consolidation, provide `canonical_issue_number`, `issue_comment`, `duplicates_to_close`, and `duplicate_comment`. +- If you provide `related_issue_numbers`, also provide `related_comment`. + +Do not run raw GitHub issue mutations yourself. The shell script will apply any create, update, merge, or related-issue actions. diff --git a/ai/agentic-bug-sweep.schema.json b/ai/agentic-bug-sweep.schema.json new file mode 100644 index 0000000..62bdbc3 --- /dev/null +++ b/ai/agentic-bug-sweep.schema.json @@ -0,0 +1,72 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "additionalProperties": false, + "properties": { + "summary": { + "type": "string" + }, + "report_path": { + "type": "string" + }, + "action": { + "type": "string", + "enum": ["create", "update", "merge", "none"] + }, + "issue": { + "type": ["object", "null"], + "additionalProperties": false, + "properties": { + "title": { + "type": "string" + }, + "body": { + "type": "string" + }, + "labels": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": ["title", "body", "labels"] + }, + "canonical_issue_number": { + "type": ["integer", "null"] + }, + "related_issue_numbers": { + "type": ["array", "null"], + "items": { + "type": "integer" + } + }, + "duplicates_to_close": { + "type": ["array", "null"], + "items": { + "type": "integer" + } + }, + "duplicate_comment": { + "type": ["string", "null"] + }, + "issue_comment": { + "type": ["string", "null"] + }, + "related_comment": { + "type": ["string", "null"] + } + }, + "required": [ + "summary", + "report_path", + "action", + "issue", + "canonical_issue_number", + "related_issue_numbers", + "duplicates_to_close", + "duplicate_comment", + "issue_comment", + "related_comment" + ] +} diff --git a/docs/plans/2026-03-08-agentic-bug-sweep-design.md b/docs/plans/2026-03-08-agentic-bug-sweep-design.md new file mode 100644 index 0000000..2b92c06 --- /dev/null +++ b/docs/plans/2026-03-08-agentic-bug-sweep-design.md @@ -0,0 +1,219 @@ +# Agentic Bug Sweep Design + +## Goal + +Add a headless Codex-driven bug sweep workflow that repeatedly runs `agentic-tests`, triages findings against existing GitHub issues, creates or consolidates issues, records related same-root-cause issues, and stops after either a fixed iteration budget or too many consecutive dry runs. + +## Scope + +This workflow belongs in the repository as a reusable automation surface. It is responsible for: + +- running a bounded number of headless Codex iterations +- letting Codex choose the next exploration target autonomously +- persisting reports and machine-readable iteration results +- creating new GitHub issues when a new bug is found +- consolidating findings into existing issues when the bug is already tracked +- recording links to related issues when a finding appears to share the same root cause but is still a distinct symptom +- closing duplicates when a canonical issue is selected + +It is not responsible for: + +- fixing bugs automatically +- pushing branches or creating pull requests +- continuing forever without an explicit iteration bound + +## Recommended Approach + +Use a shell script as the orchestration layer and use headless Codex only for the judgment-heavy parts. + +The shell script should own deterministic operations: + +- iteration counting +- lock handling +- environment checks +- report and log storage +- GitHub mutations through `gh` + +Codex should own non-deterministic reasoning: + +- choosing the next high-yield exploration area +- interpreting `agentic-tests` results +- deciding whether a finding should create a new issue, update an existing issue, merge into a canonical issue, or produce no action +- identifying existing issues that are probably related because they appear to share the same root cause + +This split keeps side effects auditable and makes failures easier to recover from than a single fully autonomous prompt. + +## Architecture + +The workflow should consist of three checked-in files plus artifact directories: + +- `scripts/agentic-bug-sweep.sh` +- `ai/agentic-bug-sweep.md` +- `ai/agentic-bug-sweep.schema.json` +- durable artifacts in `docs/test-reports/agentic-bug-sweep/` +- ephemeral state in `target/agentic-bug-sweep/` + +`scripts/agentic-bug-sweep.sh` is the entrypoint. It gathers repository context, invokes `codex exec`, validates the returned JSON, and applies GitHub side effects. + +`ai/agentic-bug-sweep.md` is the fixed prompt that tells Codex to inspect prior reports, inspect existing issues, choose the next target, run the installed `agentic-tests` flow, and emit only schema-valid JSON. + +`ai/agentic-bug-sweep.schema.json` defines the exact contract that the shell script accepts from Codex. + +## Iteration Flow + +Each iteration should run in this order: + +1. Acquire a lock so only one sweep runs at a time. +2. Snapshot relevant GitHub state such as open bug issues and recent issue metadata. +3. Gather prior sweep reports from `docs/test-reports/agentic-bug-sweep/`. +4. Invoke `codex exec` headlessly with: + - the target repository path + - the fixed prompt file + - the JSON schema + - a durable output file for the last message +5. Validate the returned JSON strictly. +6. Apply the requested GitHub action: + - `create`: create a new issue + - `update`: comment on an existing issue + - `merge`: update the canonical issue, comment on duplicates, and close duplicates + - `none`: record the dry run and do not mutate GitHub +7. Preserve any same-root-cause relationships returned through `related_issue_numbers`. +8. Persist iteration metadata and report references. +9. Update counters and decide whether to continue. + +The shell should never let Codex perform raw GitHub mutations directly. Codex returns intent and payload; the shell applies the side effects. + +## Stop Policy + +The workflow should use two explicit limits: + +- `--iterations N`: hard upper bound for total iterations +- `--max-consecutive-none M`: early-stop threshold for consecutive dry runs + +Behavior: + +- every iteration consumes one unit from `N` +- `action=none` increments the dry-run counter +- any of `create`, `update`, or `merge` resets the dry-run counter to zero +- the workflow stops as soon as either `N` iterations are reached or `M` consecutive `none` results occur + +Stop reasons should be recorded explicitly, for example: + +- `completed_max_iterations` +- `completed_consecutive_none_threshold` +- `failed_codex_exec` +- `failed_invalid_json` +- `failed_github_mutation` + +## JSON Contract + +The Codex output should be minimal and action-oriented. Required top-level fields: + +```json +{ + "summary": "short human-readable iteration summary", + "report_path": "docs/test-reports/bug-sweep-20260308-123456.md", + "action": "create", + "issue": { + "title": "Bug: ...", + "body": "Markdown body", + "labels": ["bug", "prio/p1", "area/einsum"] + }, + "canonical_issue_number": 123, + "related_issue_numbers": [140, 141], + "duplicates_to_close": [124, 130], + "duplicate_comment": "Closing in favor of #123 because ...", + "issue_comment": "New evidence from automated sweep: ...", + "related_comment": "This newly discovered bug likely shares the same root cause as #123." +} +``` + +Contract rules: + +- `create` + - requires `issue` +- `update` + - requires `canonical_issue_number` + - requires `issue_comment` +- `merge` + - requires `canonical_issue_number` + - requires `issue_comment` + - requires `duplicates_to_close` + - requires `duplicate_comment` +- `none` + - requires only `summary` and `report_path` +- any non-`none` action may include `related_issue_numbers` +- if `related_issue_numbers` is present and the workflow should notify those issues directly, require `related_comment` + +The schema should reject any missing fields for the selected action. + +## Issue Consolidation Policy + +Consolidation should mean operational unification, not a Git merge. + +The workflow should distinguish two cases: + +- duplicate or same bug + - use `merge` +- same likely root cause but distinct user-visible bug + - keep the primary action as `create` or `update` + - record the relationship through `related_issue_numbers` + +When Codex selects `merge`: + +1. Comment on the canonical issue with the new evidence. +2. Comment on each duplicate issue with a pointer to the canonical issue. +3. Close each duplicate issue. + +This ordering preserves information even if a later GitHub command fails. + +When Codex returns `related_issue_numbers`, the workflow should preserve that relationship in the primary issue body or comment, and may also comment on the related issues when `related_comment` is provided. + +## Failure Policy + +The workflow should stop on the first hard failure. + +- If `codex exec` fails, stop and preserve logs. +- If Codex returns invalid JSON, stop and preserve the raw response. +- If a GitHub mutation fails, stop immediately after recording which mutation failed. +- If report generation succeeds but issue mutation fails, keep the report path and iteration payload so the run can be inspected and resumed manually. + +The script should prefer conservative failure over silent continuation after partial side effects. + +## File Layout + +Durable files: + +- `docs/test-reports/agentic-bug-sweep/` for reports, iteration summaries, and any audit trail worth keeping + +Ephemeral files: + +- `target/agentic-bug-sweep/lock` +- `target/agentic-bug-sweep/context/` +- `target/agentic-bug-sweep/output/` + +This split keeps long-lived artifacts in versioned paths and temporary execution state out of the main tree. + +## Testing Strategy + +Verification should focus on deterministic shell behavior and schema enforcement. + +- unit-style tests for argument parsing and stop-condition bookkeeping +- tests that stub `codex exec` output and verify `create`, `update`, `merge`, and `none` branches +- tests that verify related-issue handling for same-root-cause findings +- tests that verify duplicate close ordering +- tests that verify early stop on consecutive `none` +- tests that verify failure on invalid JSON or failed `gh` commands + +The headless Codex behavior itself should be validated by schema conformance and by preserving raw iteration outputs for inspection. + +## Non-Goals + +- automatic bug fixing +- automatic branch creation or PR creation +- unbounded autonomous exploration +- opaque direct GitHub mutations from inside Codex prompts + +## Recommended Next Step + +Implement the shell orchestrator, the fixed prompt, and the schema together. Then add tests that stub `codex exec` and `gh` so the control flow can be validated without making live network mutations. diff --git a/docs/plans/2026-03-08-agentic-bug-sweep.md b/docs/plans/2026-03-08-agentic-bug-sweep.md new file mode 100644 index 0000000..ed65e45 --- /dev/null +++ b/docs/plans/2026-03-08-agentic-bug-sweep.md @@ -0,0 +1,322 @@ +# Agentic Bug Sweep Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Add a bounded headless Codex workflow that runs `agentic-tests`, triages findings against existing GitHub issues, creates or consolidates issues, records related same-root-cause issues, and stops after either a configured iteration count or too many consecutive `none` results. + +**Architecture:** Keep deterministic control flow in a Bash entrypoint under `scripts/`, keep Codex reasoning in a fixed prompt under `ai/`, and enforce the handoff with a checked-in JSON schema. Store durable reports under `docs/test-reports/agentic-bug-sweep/` and ephemeral execution state under `target/agentic-bug-sweep/`. + +**Tech Stack:** Bash, JSON Schema, GitHub CLI, headless `codex exec`, Python `unittest`, existing repository `ai/` and `scripts/` conventions + +--- + +### Task 1: Add the prompt and schema surfaces + +**Files:** +- Create: `ai/agentic-bug-sweep.md` +- Create: `ai/agentic-bug-sweep.schema.json` +- Create: `tests/test_agentic_bug_sweep.py` +- Reference: `docs/plans/2026-03-08-agentic-bug-sweep-design.md` + +**Step 1: Write the failing test** + +Add a `unittest` case that asserts: + +- `ai/agentic-bug-sweep.md` exists and instructs Codex to inspect open issues and prior reports +- `ai/agentic-bug-sweep.schema.json` parses as JSON +- the schema recognizes `create`, `update`, `merge`, and `none` +- the schema contains `related_issue_numbers` support for same-root-cause findings + +**Step 2: Run the test to verify it fails** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_prompt_and_schema_contract -v` +Expected: FAIL because the prompt, schema, and test file do not yet exist. + +**Step 3: Write the minimal prompt and schema** + +Implement the fixed prompt and the JSON schema with conditional requirements for each action. + +**Step 4: Run the test to verify it passes** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_prompt_and_schema_contract -v` +Expected: PASS. + +**Step 5: Commit** + +```bash +git add ai/agentic-bug-sweep.md ai/agentic-bug-sweep.schema.json tests/test_agentic_bug_sweep.py +git commit -m "feat: add agentic bug sweep prompt contract" +``` + +### Task 2: Scaffold the shell entrypoint + +**Files:** +- Create: `scripts/agentic-bug-sweep.sh` +- Modify: `tests/test_agentic_bug_sweep.py` +- Reference: `scripts/create-pr.sh` + +**Step 1: Write the failing help-path test** + +Add a `unittest` case that runs `bash scripts/agentic-bug-sweep.sh --help` in a temp repo fixture and asserts the usage text mentions: + +- `--iterations` +- `--max-consecutive-none` +- `--repo` +- `--workdir` + +**Step 2: Run the test to verify it fails** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_help_path -v` +Expected: FAIL because the script does not exist yet. + +**Step 3: Add the CLI surface** + +Implement argument parsing, preflight checks, and state-directory creation in `scripts/agentic-bug-sweep.sh`. + +**Step 4: Run the test to verify it passes** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_help_path -v` +Expected: PASS. + +**Step 5: Commit** + +```bash +git add scripts/agentic-bug-sweep.sh tests/test_agentic_bug_sweep.py +git commit -m "feat: scaffold agentic bug sweep entrypoint" +``` + +### Task 3: Implement one iteration of headless Codex execution + +**Files:** +- Modify: `scripts/agentic-bug-sweep.sh` +- Modify: `tests/test_agentic_bug_sweep.py` + +**Step 1: Write the failing test for a single successful iteration** + +Add a `unittest` case that stubs `codex exec` to emit a valid `action=create` payload and asserts that the shell script records the iteration output in the expected directories. + +**Step 2: Run the test to verify it fails** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_single_iteration_create -v` +Expected: FAIL because the script does not yet invoke `codex exec` or persist iteration output. + +**Step 3: Add minimal Codex invocation** + +Implement one iteration that: + +- builds a context payload from open issues and prior reports +- calls `codex exec` with the fixed prompt and schema +- saves raw output and parsed JSON to `target/agentic-bug-sweep/output/` + +**Step 4: Run the test to verify it passes** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_single_iteration_create -v` +Expected: PASS. + +**Step 5: Commit** + +```bash +git add scripts/agentic-bug-sweep.sh tests/test_agentic_bug_sweep.py +git commit -m "feat: run one headless codex bug sweep iteration" +``` + +### Task 4: Implement GitHub mutation branches + +**Files:** +- Modify: `scripts/agentic-bug-sweep.sh` +- Modify: `tests/test_agentic_bug_sweep.py` + +**Step 1: Write failing tests for `create`, `update`, `merge`, `none`, and related-issue recording** + +Add tests that stub: + +- `gh issue create` for `create` +- `gh issue comment` for `update` +- canonical comment plus duplicate comment and close ordering for `merge` +- related issue comments or body linking for same-root-cause findings +- no `gh` mutation for `none` + +**Step 2: Run the tests to verify they fail** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_github_actions -v` +Expected: FAIL because the mutation branches are incomplete. + +**Step 3: Implement the mutation helpers** + +Add Bash helpers that: + +- create a new issue from title, body, and labels +- comment on an existing issue +- comment on duplicates and close them after the canonical issue update +- record and optionally comment on `related_issue_numbers` for same-root-cause findings +- skip GitHub mutation entirely for `none` + +**Step 4: Run the tests to verify they pass** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_github_actions -v` +Expected: PASS. + +**Step 5: Commit** + +```bash +git add scripts/agentic-bug-sweep.sh tests/test_agentic_bug_sweep.py +git commit -m "feat: apply github actions for bug sweep outcomes" +``` + +### Task 5: Implement iteration bookkeeping and stop conditions + +**Files:** +- Modify: `scripts/agentic-bug-sweep.sh` +- Modify: `tests/test_agentic_bug_sweep.py` + +**Step 1: Write failing tests for loop control** + +Add tests that verify: + +- the script never exceeds `--iterations` +- `action=none` increments the dry-run counter +- `create`, `update`, and `merge` reset the dry-run counter +- the script stops early after `--max-consecutive-none` + +**Step 2: Run the tests to verify they fail** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_stop_conditions -v` +Expected: FAIL because loop control and reset behavior are incomplete. + +**Step 3: Implement the loop and counters** + +Track: + +- current iteration number +- current consecutive-`none` count +- final stop reason + +Persist a small summary artifact for each run. + +**Step 4: Run the tests to verify they pass** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_stop_conditions -v` +Expected: PASS. + +**Step 5: Commit** + +```bash +git add scripts/agentic-bug-sweep.sh tests/test_agentic_bug_sweep.py +git commit -m "feat: add bounded loop control to bug sweep" +``` + +### Task 6: Harden failure handling and audit artifacts + +**Files:** +- Modify: `scripts/agentic-bug-sweep.sh` +- Modify: `tests/test_agentic_bug_sweep.py` + +**Step 1: Write failing tests for hard failures** + +Add tests for: + +- failed `codex exec` +- invalid JSON output +- failed `gh` mutation after a valid report +- lock acquisition failure + +**Step 2: Run the tests to verify they fail** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_failure_paths -v` +Expected: FAIL because the error handling and audit artifacts are incomplete. + +**Step 3: Implement conservative failure behavior** + +Ensure the script: + +- exits non-zero on the first hard failure +- preserves raw Codex output on invalid JSON +- preserves report references after partial success +- records the stop reason clearly + +**Step 4: Run the tests to verify they pass** + +Run: `python3 -m unittest tests.test_agentic_bug_sweep.AgenticBugSweepTests.test_failure_paths -v` +Expected: PASS. + +**Step 5: Commit** + +```bash +git add scripts/agentic-bug-sweep.sh tests/test_agentic_bug_sweep.py +git commit -m "feat: preserve audit state on bug sweep failures" +``` + +### Task 7: Document usage and repository expectations + +**Files:** +- Modify: `README.md` +- Reference: `AGENTS.md` +- Reference: `docs/plans/2026-03-08-agentic-bug-sweep-design.md` + +**Step 1: Add a short automation note to the README** + +Document: + +- the purpose of `scripts/agentic-bug-sweep.sh` +- the required tools (`codex`, `gh`) +- the bounded iteration model +- where reports and logs are stored + +Keep the README concise and avoid duplicating the full prompt logic. + +**Step 2: Verify the README remains high-level** + +Check that the README points readers to the script and generated artifacts without re-embedding the internal JSON contract. + +**Step 3: Commit** + +```bash +git add README.md +git commit -m "docs: add agentic bug sweep usage note" +``` + +### Task 8: Run full verification + +**Files:** +- Test: `scripts/agentic-bug-sweep.sh` +- Test: `tests/test_agentic_bug_sweep.py` +- Test: `ai/agentic-bug-sweep.md` +- Test: `ai/agentic-bug-sweep.schema.json` + +**Step 1: Run formatter and tests** + +Run: + +```bash +cargo fmt --all +python3 -m unittest tests.test_agentic_bug_sweep -v +``` + +Expected: formatting succeeds and the bug sweep tests pass. + +**Step 2: Run the broader Python test suite** + +Run: + +```bash +python3 -m unittest tests.test_create_pr_script tests.test_monitor_pr_checks tests.test_repo_settings_scripts tests.test_new_repo_script tests.test_agentic_bug_sweep -v +``` + +Expected: existing script tests still pass with the new automation files present. + +**Step 3: Run a help-path smoke test** + +Run: + +```bash +bash scripts/agentic-bug-sweep.sh --help +``` + +Expected: usage text prints successfully. + +**Step 4: Commit** + +```bash +git add README.md scripts/agentic-bug-sweep.sh tests/test_agentic_bug_sweep.py ai/agentic-bug-sweep.md ai/agentic-bug-sweep.schema.json +git commit -m "test: verify agentic bug sweep workflow" +``` diff --git a/scripts/agentic-bug-sweep.sh b/scripts/agentic-bug-sweep.sh new file mode 100644 index 0000000..9d3ff53 --- /dev/null +++ b/scripts/agentic-bug-sweep.sh @@ -0,0 +1,555 @@ +#!/usr/bin/env bash +set -euo pipefail + +ITERATIONS="" +MAX_CONSECUTIVE_NONE="" +REPO="" +REPO_URL="" +REF="" +TARGET_WORKDIR="" +TARGET_REPORT_ROOT="" +MODEL="" +LAST_ITERATION_OUTPUT_PATH="" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +PROMPT_PATH="${REPO_ROOT}/ai/agentic-bug-sweep.md" +SCHEMA_PATH="${REPO_ROOT}/ai/agentic-bug-sweep.schema.json" +STATE_ROOT="${REPO_ROOT}/target/agentic-bug-sweep" +REPORT_ROOT="${REPO_ROOT}/docs/test-reports/agentic-bug-sweep" +LOCK_PATH="${STATE_ROOT}/lock" + +usage() { + cat <<'EOF' +Usage: bash scripts/agentic-bug-sweep.sh [options] + +Options: + --iterations N Maximum number of Codex iterations to run + --max-consecutive-none N Stop after N consecutive `none` results + --repo OWNER/REPO Target GitHub repository slug + --repo-url URL Remote repository URL to clone and analyze + --ref REF Git ref to clone when using --repo-url + --workdir PATH Target repository working directory + --model MODEL Optional model override for `codex exec` + --help Show this help text +EOF +} + +log() { + printf '%s\n' "$*" +} + +require_command() { + if ! command -v "$1" >/dev/null 2>&1; then + log "missing required command: $1" + exit 1 + fi +} + +ensure_inputs() { + if [[ -z "$ITERATIONS" || -z "$MAX_CONSECUTIVE_NONE" ]]; then + log "missing required arguments" + usage + exit 1 + fi + if [[ -z "$TARGET_WORKDIR" && -z "$REPO_URL" ]]; then + log "either --workdir or --repo-url is required" + usage + exit 1 + fi + if [[ -z "$REPO" && -z "$REPO_URL" ]]; then + log "either --repo or --repo-url is required" + usage + exit 1 + fi + if ! [[ "$ITERATIONS" =~ ^[0-9]+$ ]] || ! [[ "$MAX_CONSECUTIVE_NONE" =~ ^[0-9]+$ ]]; then + log "--iterations and --max-consecutive-none must be non-negative integers" + exit 1 + fi + if [[ "$ITERATIONS" -eq 0 ]]; then + log "--iterations must be greater than 0" + exit 1 + fi +} + +ensure_paths() { + if [[ ! -f "$PROMPT_PATH" ]]; then + log "missing prompt file: $PROMPT_PATH" + exit 1 + fi + if [[ ! -f "$SCHEMA_PATH" ]]; then + log "missing schema file: $SCHEMA_PATH" + exit 1 + fi + if [[ ! -d "$TARGET_WORKDIR" ]]; then + log "target workdir does not exist: $TARGET_WORKDIR" + exit 1 + fi + TARGET_REPORT_ROOT="${TARGET_WORKDIR}/docs/test-reports/agentic-bug-sweep" + mkdir -p "$TARGET_REPORT_ROOT" +} + +ensure_tools() { + require_command codex + require_command gh + require_command python3 + if [[ -n "$REPO_URL" ]]; then + require_command git + fi + gh auth status >/dev/null 2>&1 +} + +prepare_state_dirs() { + mkdir -p "${STATE_ROOT}/context" + mkdir -p "${STATE_ROOT}/output" + mkdir -p "${STATE_ROOT}/repos" + mkdir -p "${STATE_ROOT}/tmp" + mkdir -p "$REPORT_ROOT" +} + +release_lock() { + rmdir "$LOCK_PATH" >/dev/null 2>&1 || true +} + +acquire_lock() { + if ! mkdir "$LOCK_PATH" >/dev/null 2>&1; then + log "failed to acquire lock: $LOCK_PATH" + exit 1 + fi + trap release_lock EXIT +} + +capture_open_issues() { + gh issue list \ + --repo "$REPO" \ + --state open \ + --label bug \ + --limit 200 \ + --json number,title,body,labels,url >"${STATE_ROOT}/context/open-issues.json" +} + +capture_prior_reports() { + find "$TARGET_REPORT_ROOT" -maxdepth 1 -type f -name '*.md' | sort >"${STATE_ROOT}/context/prior-reports.txt" +} + +parse_repo_slug_from_url() { + python3 - "$1" <<'PY' +import re +import sys +from urllib.parse import urlparse + +url = sys.argv[1] +if "://" in url: + parsed = urlparse(url) + path = parsed.path +else: + match = re.match(r"^[^@]+@[^:]+:(.+)$", url) + if not match: + raise SystemExit(f"unsupported repo url: {url}") + path = "/" + match.group(1) + +parts = [part for part in path.split("/") if part] +if len(parts) < 2: + raise SystemExit(f"unsupported repo url: {url}") +owner, repo = parts[-2], parts[-1] +if repo.endswith(".git"): + repo = repo[:-4] +print(f"{owner}/{repo}") +PY +} + +resolve_target_repo() { + local clone_dir + local -a clone_args + + if [[ -z "$REPO" && -n "$REPO_URL" ]]; then + REPO="$(parse_repo_slug_from_url "$REPO_URL")" + fi + + if [[ -n "$TARGET_WORKDIR" ]]; then + TARGET_WORKDIR="$(cd "$TARGET_WORKDIR" && pwd)" + return + fi + + clone_dir="${STATE_ROOT}/repos/${REPO//\//-}" + rm -rf "$clone_dir" + clone_args=(clone --depth 1) + if [[ -n "$REF" ]]; then + clone_args+=(--branch "$REF") + fi + clone_args+=("$REPO_URL" "$clone_dir") + + if ! git "${clone_args[@]}"; then + log "failed to clone repository: $REPO_URL" + exit 1 + fi + TARGET_WORKDIR="$clone_dir" +} + +validate_json_file() { + python3 - "$1" <<'PY' +import json +import sys + +with open(sys.argv[1], "r", encoding="utf-8") as handle: + json.load(handle) +PY +} + +json_get_string() { + python3 - "$1" "$2" <<'PY' +import json +import sys + +path, key = sys.argv[1], sys.argv[2] +with open(path, "r", encoding="utf-8") as handle: + data = json.load(handle) +for part in key.split("."): + data = data[part] +if isinstance(data, (list, dict)): + raise SystemExit(f"{key} does not resolve to a scalar") +print(data) +PY +} + +json_get_lines() { + python3 - "$1" "$2" <<'PY' +import json +import sys + +path, key = sys.argv[1], sys.argv[2] +with open(path, "r", encoding="utf-8") as handle: + data = json.load(handle) +for part in key.split("."): + data = data[part] +if not isinstance(data, list): + raise SystemExit(f"{key} is not a list") +for item in data: + print(item) +PY +} + +json_has_value() { + python3 - "$1" "$2" <<'PY' +import json +import sys + +path, key = sys.argv[1], sys.argv[2] +with open(path, "r", encoding="utf-8") as handle: + data = json.load(handle) +try: + for part in key.split("."): + data = data[part] +except (KeyError, TypeError): + raise SystemExit(1) +if data in (None, "", []): + raise SystemExit(1) +raise SystemExit(0) +PY +} + +write_text_with_related() { + python3 - "$1" "$2" "$3" <<'PY' +import json +import sys + +path, key, dest = sys.argv[1], sys.argv[2], sys.argv[3] +with open(path, "r", encoding="utf-8") as handle: + data = json.load(handle) +value = data +for part in key.split("."): + value = value[part] +related = data.get("related_issue_numbers", []) + +with open(dest, "w", encoding="utf-8") as handle: + handle.write(value) + if related: + if not value.endswith("\n"): + handle.write("\n") + handle.write("\n## Related issues\n\n") + for issue_number in related: + handle.write(f"- #{issue_number}\n") +PY +} + +write_text_exact() { + python3 - "$1" "$2" "$3" <<'PY' +import json +import sys + +path, key, dest = sys.argv[1], sys.argv[2], sys.argv[3] +with open(path, "r", encoding="utf-8") as handle: + data = json.load(handle) +value = data +for part in key.split("."): + value = value[part] +with open(dest, "w", encoding="utf-8") as handle: + handle.write(value) +PY +} + +create_issue() { + local result_path="$1" + local title + local body_file + local -a labels + local -a create_args + + title="$(json_get_string "$result_path" "issue.title")" + body_file="$(mktemp "${STATE_ROOT}/output/create-body.XXXXXX.md")" + write_text_with_related "$result_path" "issue.body" "$body_file" + mapfile -t labels < <(json_get_lines "$result_path" "issue.labels") + + create_args=(issue create --repo "$REPO" --title "$title" --body-file "$body_file") + for label in "${labels[@]}"; do + create_args+=(--label "$label") + done + if ! gh "${create_args[@]}" >/dev/null; then + fail_run "failed_github_mutation" "failed to create issue" + fi +} + +update_issue() { + local result_path="$1" + local issue_number + local comment_file + + issue_number="$(json_get_string "$result_path" "canonical_issue_number")" + comment_file="$(mktemp "${STATE_ROOT}/output/update-comment.XXXXXX.md")" + write_text_with_related "$result_path" "issue_comment" "$comment_file" + if ! gh issue comment "$issue_number" --repo "$REPO" --body-file "$comment_file" >/dev/null; then + fail_run "failed_github_mutation" "failed to comment on issue ${issue_number}" + fi +} + +merge_issue() { + local result_path="$1" + local canonical_issue_number + local canonical_comment_file + local duplicate_comment_file + local duplicate_issue_number + + canonical_issue_number="$(json_get_string "$result_path" "canonical_issue_number")" + canonical_comment_file="$(mktemp "${STATE_ROOT}/output/merge-canonical-comment.XXXXXX.md")" + duplicate_comment_file="$(mktemp "${STATE_ROOT}/output/merge-duplicate-comment.XXXXXX.md")" + + write_text_with_related "$result_path" "issue_comment" "$canonical_comment_file" + write_text_exact "$result_path" "duplicate_comment" "$duplicate_comment_file" + if ! gh issue comment "$canonical_issue_number" --repo "$REPO" --body-file "$canonical_comment_file" >/dev/null; then + fail_run "failed_github_mutation" "failed to comment on canonical issue ${canonical_issue_number}" + fi + + while IFS= read -r duplicate_issue_number; do + if ! gh issue comment "$duplicate_issue_number" --repo "$REPO" --body-file "$duplicate_comment_file" >/dev/null; then + fail_run "failed_github_mutation" "failed to comment on duplicate issue ${duplicate_issue_number}" + fi + if ! gh issue close "$duplicate_issue_number" --repo "$REPO" --reason not planned >/dev/null; then + fail_run "failed_github_mutation" "failed to close duplicate issue ${duplicate_issue_number}" + fi + done < <(json_get_lines "$result_path" "duplicates_to_close") +} + +apply_action() { + local result_path="$1" + local action="$2" + case "$action" in + create) + create_issue "$result_path" + ;; + update) + update_issue "$result_path" + ;; + merge) + merge_issue "$result_path" + ;; + none) + ;; + *) + log "unsupported action: $action" + exit 1 + ;; + esac +} + +write_run_summary() { + python3 - "$1" "$2" "$3" "$4" <<'PY' +import json +import sys + +summary_path, iterations_run, consecutive_none_count, stop_reason = sys.argv[1:] +with open(summary_path, "w", encoding="utf-8") as handle: + json.dump( + { + "iterations_run": int(iterations_run), + "consecutive_none_count": int(consecutive_none_count), + "stop_reason": stop_reason, + }, + handle, + indent=2, + ) +PY +} + +fail_run() { + local stop_reason="$1" + local message="$2" + + write_run_summary \ + "${STATE_ROOT}/output/run-summary.json" \ + "$iteration_number" \ + "$consecutive_none_count" \ + "$stop_reason" + log "$message" + exit 1 +} + +validate_result_contract() { + local result_path="$1" + local action + + action="$(json_get_string "$result_path" "action")" + case "$action" in + create) + json_has_value "$result_path" "issue.title" || fail_run "failed_invalid_contract" "missing issue.title for create action" + json_has_value "$result_path" "issue.body" || fail_run "failed_invalid_contract" "missing issue.body for create action" + json_has_value "$result_path" "issue.labels" || fail_run "failed_invalid_contract" "missing issue.labels for create action" + ;; + update) + json_has_value "$result_path" "canonical_issue_number" || fail_run "failed_invalid_contract" "missing canonical_issue_number for update action" + json_has_value "$result_path" "issue_comment" || fail_run "failed_invalid_contract" "missing issue_comment for update action" + ;; + merge) + json_has_value "$result_path" "canonical_issue_number" || fail_run "failed_invalid_contract" "missing canonical_issue_number for merge action" + json_has_value "$result_path" "issue_comment" || fail_run "failed_invalid_contract" "missing issue_comment for merge action" + json_has_value "$result_path" "duplicates_to_close" || fail_run "failed_invalid_contract" "missing duplicates_to_close for merge action" + json_has_value "$result_path" "duplicate_comment" || fail_run "failed_invalid_contract" "missing duplicate_comment for merge action" + ;; + none) + ;; + *) + fail_run "failed_invalid_contract" "unsupported action returned by codex: ${action}" + ;; + esac + + if json_has_value "$result_path" "related_issue_numbers"; then + json_has_value "$result_path" "related_comment" || fail_run "failed_invalid_contract" "missing related_comment for related issues" + fi +} + +run_iteration() { + local iteration_number="$1" + local iteration_tag + local output_path + local prompt_text + local -a codex_args + + printf -v iteration_tag '%03d' "$iteration_number" + output_path="${STATE_ROOT}/output/iteration-${iteration_tag}.json" + prompt_text="$(cat "$PROMPT_PATH") + +Target repository: ${REPO} +Target workdir: ${TARGET_WORKDIR} +Open issues JSON: ${STATE_ROOT}/context/open-issues.json +Prior bug-sweep report index: ${STATE_ROOT}/context/prior-reports.txt +Target report root: ${TARGET_REPORT_ROOT}" + + codex_args=(exec --cd "$TARGET_WORKDIR" --sandbox workspace-write --output-schema "$SCHEMA_PATH" -o "$output_path") + if [[ -n "$MODEL" ]]; then + codex_args+=(--model "$MODEL") + fi + codex_args+=("$prompt_text") + + if ! TMPDIR="${STATE_ROOT}/tmp" codex "${codex_args[@]}"; then + fail_run "failed_codex_exec" "codex exec failed on iteration ${iteration_number}" + fi + if ! validate_json_file "$output_path"; then + fail_run "failed_invalid_json" "invalid JSON returned by codex on iteration ${iteration_number}" + fi + validate_result_contract "$output_path" + LAST_ITERATION_OUTPUT_PATH="$output_path" +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --iterations) + ITERATIONS="$2" + shift 2 + ;; + --max-consecutive-none) + MAX_CONSECUTIVE_NONE="$2" + shift 2 + ;; + --repo) + REPO="$2" + shift 2 + ;; + --repo-url) + REPO_URL="$2" + shift 2 + ;; + --ref) + REF="$2" + shift 2 + ;; + --workdir) + TARGET_WORKDIR="$2" + shift 2 + ;; + --model) + MODEL="$2" + shift 2 + ;; + --help) + usage + exit 0 + ;; + *) + log "Unknown argument: $1" + usage + exit 1 + ;; + esac +done + +ensure_inputs +ensure_tools +prepare_state_dirs +iteration_number=0 +consecutive_none_count=0 +stop_reason="" +acquire_lock +resolve_target_repo +ensure_paths + +while (( iteration_number < ITERATIONS )); do + iteration_number=$((iteration_number + 1)) + capture_open_issues + capture_prior_reports + + run_iteration "$iteration_number" + iteration_output_path="$LAST_ITERATION_OUTPUT_PATH" + iteration_action="$(json_get_string "$iteration_output_path" "action")" + apply_action "$iteration_output_path" "$iteration_action" + + if [[ "$iteration_action" == "none" ]]; then + consecutive_none_count=$((consecutive_none_count + 1)) + if (( MAX_CONSECUTIVE_NONE > 0 && consecutive_none_count >= MAX_CONSECUTIVE_NONE )); then + stop_reason="completed_consecutive_none_threshold" + break + fi + else + consecutive_none_count=0 + fi +done + +if [[ -z "$stop_reason" ]]; then + stop_reason="completed_max_iterations" +fi + +write_run_summary \ + "${STATE_ROOT}/output/run-summary.json" \ + "$iteration_number" \ + "$consecutive_none_count" \ + "$stop_reason" + +log "agentic bug sweep iteration completed" diff --git a/tests/test_agentic_bug_sweep.py b/tests/test_agentic_bug_sweep.py new file mode 100644 index 0000000..3c50e95 --- /dev/null +++ b/tests/test_agentic_bug_sweep.py @@ -0,0 +1,664 @@ +import json +import os +import shutil +import stat +import subprocess +import tempfile +import textwrap +import unittest +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[1] +PROMPT_PATH = REPO_ROOT / "ai" / "agentic-bug-sweep.md" +SCHEMA_PATH = REPO_ROOT / "ai" / "agentic-bug-sweep.schema.json" +SOURCE_SCRIPT = REPO_ROOT / "scripts" / "agentic-bug-sweep.sh" + + +def write_executable(path: Path, content: str) -> None: + path.write_text(content, encoding="utf-8") + path.chmod(path.stat().st_mode | stat.S_IXUSR) + + +def setup_fake_repo(root: Path, *, gh_script: str, codex_script: str) -> None: + (root / "scripts").mkdir() + (root / "ai").mkdir() + (root / "bin").mkdir() + (root / "state").mkdir() + (root / "docs" / "test-reports" / "agentic-bug-sweep").mkdir(parents=True) + + shutil.copy2(SOURCE_SCRIPT, root / "scripts" / "agentic-bug-sweep.sh") + shutil.copy2(PROMPT_PATH, root / "ai" / "agentic-bug-sweep.md") + shutil.copy2(SCHEMA_PATH, root / "ai" / "agentic-bug-sweep.schema.json") + + write_executable(root / "bin" / "gh", gh_script) + write_executable(root / "bin" / "codex", codex_script) + + +def run_bug_sweep( + root: Path, + *, + iterations: str = "1", + max_consecutive_none: str = "1", + repo: str = "tensor4all/template-rs", + workdir: Path | None = None, + repo_url: str | None = None, + ref: str | None = None, + extra_env: dict[str, str] | None = None, +) -> subprocess.CompletedProcess[str]: + env = os.environ.copy() + env["PATH"] = f"{root / 'bin'}:{env['PATH']}" + env["FAKE_STATE_DIR"] = str(root / "state") + if extra_env: + env.update(extra_env) + + args = [ + "bash", + "scripts/agentic-bug-sweep.sh", + "--iterations", + iterations, + "--max-consecutive-none", + max_consecutive_none, + "--repo", + repo, + ] + if workdir is None and repo_url is None: + workdir = root + if workdir is not None: + args.extend(["--workdir", str(workdir)]) + if repo_url is not None: + args.extend(["--repo-url", repo_url]) + if ref is not None: + args.extend(["--ref", ref]) + + return subprocess.run( + args, + cwd=root, + text=True, + capture_output=True, + env=env, + check=False, + ) + + +def codex_script_for_payloads( + payloads: list[dict[str, object]], *, stdout_lines: list[str] | None = None +) -> str: + responses_text = json.dumps(payloads) + stdout_prefix = "" + if stdout_lines: + stdout_prefix = "".join(f"printf '%s\\n' {line!r}\n" for line in stdout_lines) + return ( + "#!/usr/bin/env bash\n" + "set -euo pipefail\n" + f"{stdout_prefix}" + "printf 'call\\n' >>\"${FAKE_STATE_DIR:?}/codex.log\"\n" + "printf '%q ' \"$@\" >>\"${FAKE_STATE_DIR:?}/codex-args.log\"\n" + "printf '\\n' >>\"${FAKE_STATE_DIR:?}/codex-args.log\"\n" + "printf '%s\\n' \"${TMPDIR:-}\" >>\"${FAKE_STATE_DIR:?}/codex-tmpdir.log\"\n" + "counter_file=\"${FAKE_STATE_DIR:?}/codex-counter.txt\"\n" + "if [[ ! -f \"$counter_file\" ]]; then\n" + " printf '0\\n' >\"$counter_file\"\n" + "fi\n" + "counter=\"$(cat \"$counter_file\")\"\n" + "\n" + "output_path=\"\"\n" + "prev=\"\"\n" + "for arg in \"$@\"; do\n" + " if [[ \"$prev\" == \"-o\" || \"$prev\" == \"--output-last-message\" ]]; then\n" + " output_path=\"$arg\"\n" + " fi\n" + " prev=\"$arg\"\n" + "done\n" + "\n" + "python3 - \"$counter_file\" \"${output_path:?}\" <<'PY'\n" + "import json\n" + "import sys\n" + "\n" + f"payloads = json.loads({responses_text!r})\n" + "counter_path, output_path = sys.argv[1], sys.argv[2]\n" + "with open(counter_path, 'r', encoding='utf-8') as handle:\n" + " index = int(handle.read().strip())\n" + "if index >= len(payloads):\n" + " raise SystemExit(f'no payload configured for invocation {index}')\n" + "with open(output_path, 'w', encoding='utf-8') as handle:\n" + " json.dump(payloads[index], handle, indent=2)\n" + "with open(counter_path, 'w', encoding='utf-8') as handle:\n" + " handle.write(str(index + 1))\n" + "PY\n" + "exit 0\n" + ) + + +def codex_script_for_payload( + payload: dict[str, object], *, stdout_lines: list[str] | None = None +) -> str: + return codex_script_for_payloads([payload], stdout_lines=stdout_lines) + + +def gh_script_with_mutations(*, fail_comment: bool = False) -> str: + comment_failure_branch = "" + if fail_comment: + comment_failure_branch = ( + " printf 'simulated comment failure\\n' >&2\n" + " exit 1\n" + ) + + return textwrap.dedent( + f"""\ + #!/usr/bin/env bash + set -euo pipefail + printf '%s\\n' "$*" >>"${{FAKE_STATE_DIR:?}}/gh.log" + + if [[ "$1" == "auth" && "$2" == "status" ]]; then + exit 0 + fi + + if [[ "$1" == "issue" && "$2" == "list" ]]; then + printf '[{{"number":1,"title":"Tracked bug","body":"details","labels":[{{"name":"bug"}}],"url":"https://example.invalid/issues/1"}}]\\n' + exit 0 + fi + + if [[ "$1" == "issue" && "$2" == "create" ]]; then + title="" + body_file="" + prev="" + for arg in "$@"; do + if [[ "$prev" == "--title" ]]; then + title="$arg" + fi + if [[ "$prev" == "--body-file" || "$prev" == "-F" ]]; then + body_file="$arg" + fi + prev="$arg" + done + printf '%s' "$title" >"${{FAKE_STATE_DIR:?}}/create-title.txt" + cp "${{body_file:?}}" "${{FAKE_STATE_DIR:?}}/create-body.md" + printf 'https://example.invalid/issues/99\\n' + exit 0 + fi + + if [[ "$1" == "issue" && "$2" == "comment" ]]; then + issue_number="$3" + body_text="" + body_file="" + prev="" + for arg in "$@"; do + if [[ "$prev" == "--body" || "$prev" == "-b" ]]; then + body_text="$arg" + fi + if [[ "$prev" == "--body-file" || "$prev" == "-F" ]]; then + body_file="$arg" + fi + prev="$arg" + done +{comment_failure_branch} if [[ -n "$body_file" ]]; then + cp "$body_file" "${{FAKE_STATE_DIR:?}}/comment-${{issue_number}}.md" + else + printf '%s' "$body_text" >"${{FAKE_STATE_DIR:?}}/comment-${{issue_number}}.md" + fi + exit 0 + fi + + if [[ "$1" == "issue" && "$2" == "close" ]]; then + issue_number="$3" + printf '%s' "$issue_number" >>"${{FAKE_STATE_DIR:?}}/closed.log" + printf '\\n' >>"${{FAKE_STATE_DIR:?}}/closed.log" + exit 0 + fi + + printf 'unexpected gh invocation: %s\\n' "$*" >&2 + exit 1 + """ + ) + + +class AgenticBugSweepTests(unittest.TestCase): + def test_prompt_and_schema_contract(self) -> None: + self.assertTrue(PROMPT_PATH.is_file(), msg=f"missing prompt file: {PROMPT_PATH}") + prompt = PROMPT_PATH.read_text(encoding="utf-8") + self.assertIn("open bug issues", prompt) + self.assertIn("prior bug-sweep reports", prompt) + self.assertIn("test-feature", prompt) + self.assertIn("related_issue_numbers", prompt) + + self.assertTrue(SCHEMA_PATH.is_file(), msg=f"missing schema file: {SCHEMA_PATH}") + schema = json.loads(SCHEMA_PATH.read_text(encoding="utf-8")) + + action_enum = schema["properties"]["action"]["enum"] + self.assertEqual(action_enum, ["create", "update", "merge", "none"]) + self.assertIn("related_issue_numbers", schema["properties"]) + self.assertNotIn("allOf", schema) + self.assertNotIn("if", schema) + self.assertEqual(set(schema["required"]), set(schema["properties"])) + self.assertEqual(schema["properties"]["issue"]["type"], ["object", "null"]) + + def test_help_path(self) -> None: + result = subprocess.run( + ["bash", str(SOURCE_SCRIPT), "--help"], + cwd=REPO_ROOT, + text=True, + capture_output=True, + check=False, + ) + + self.assertEqual(result.returncode, 0, msg=f"stdout={result.stdout}\nstderr={result.stderr}") + self.assertIn("--iterations", result.stdout) + self.assertIn("--max-consecutive-none", result.stdout) + self.assertIn("--repo", result.stdout) + self.assertIn("--repo-url", result.stdout) + self.assertIn("--ref", result.stdout) + self.assertIn("--workdir", result.stdout) + + def test_single_iteration_create(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + setup_fake_repo( + root, + gh_script=gh_script_with_mutations(), + codex_script=codex_script_for_payload( + { + "summary": "Found a new bug", + "report_path": "docs/test-reports/bug-sweep-20260308-000001.md", + "action": "create", + "issue": { + "title": "Bug: sample", + "body": "body", + "labels": ["bug", "prio/p1"], + }, + } + , stdout_lines=["codex progress chatter"]), + ) + result = run_bug_sweep(root, workdir=root) + + self.assertEqual(result.returncode, 0, msg=f"stdout={result.stdout}\nstderr={result.stderr}") + self.assertTrue((root / "target" / "agentic-bug-sweep" / "context" / "open-issues.json").is_file()) + self.assertTrue((root / "target" / "agentic-bug-sweep" / "output" / "iteration-001.json").is_file()) + + codex_invocations = (root / "state" / "codex-args.log").read_text(encoding="utf-8") + self.assertIn("exec", codex_invocations) + self.assertIn("--output-schema", codex_invocations) + self.assertIn("--sandbox", codex_invocations) + self.assertIn("workspace-write", codex_invocations) + codex_tmpdir = (root / "state" / "codex-tmpdir.log").read_text(encoding="utf-8").strip() + self.assertEqual(codex_tmpdir, str(root / "target" / "agentic-bug-sweep" / "tmp")) + + def test_remote_repo_url_clone_mode(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + setup_fake_repo( + root, + gh_script=gh_script_with_mutations(), + codex_script=codex_script_for_payload( + { + "summary": "No actionable bug found", + "report_path": "docs/test-reports/agentic-bug-sweep/bug-sweep-20260308-remote.md", + "action": "none", + } + ), + ) + + remote_source = root / "remote-source" + (remote_source / "docs" / "test-reports" / "agentic-bug-sweep").mkdir(parents=True) + (remote_source / "README.md").write_text("# remote repo\n", encoding="utf-8") + (remote_source / "docs" / "test-reports" / "agentic-bug-sweep" / "existing.md").write_text( + "existing report\n", encoding="utf-8" + ) + + write_executable( + root / "bin" / "git", + textwrap.dedent( + """\ + #!/usr/bin/env bash + set -euo pipefail + printf '%s\\n' "$*" >>"${FAKE_STATE_DIR:?}/git.log" + + if [[ "$1" == "clone" ]]; then + dest="${@: -1}" + mkdir -p "$(dirname "$dest")" + cp -R "${REMOTE_SOURCE_DIR:?}" "$dest" + exit 0 + fi + + printf 'unexpected git invocation: %s\\n' "$*" >&2 + exit 1 + """ + ), + ) + + result = run_bug_sweep( + root, + repo="tensor4all/demo-repo", + repo_url="https://github.com/tensor4all/demo-repo.git", + ref="main", + extra_env={"REMOTE_SOURCE_DIR": str(remote_source)}, + ) + + self.assertEqual(result.returncode, 0, msg=f"stdout={result.stdout}\nstderr={result.stderr}") + + git_log = (root / "state" / "git.log").read_text(encoding="utf-8") + self.assertIn("clone --depth 1 --branch main https://github.com/tensor4all/demo-repo.git", git_log) + + codex_invocations = (root / "state" / "codex-args.log").read_text(encoding="utf-8") + self.assertIn(str(root / "target" / "agentic-bug-sweep" / "repos" / "tensor4all-demo-repo"), codex_invocations) + gh_log = (root / "state" / "gh.log").read_text(encoding="utf-8") + self.assertIn("issue list --repo tensor4all/demo-repo", gh_log) + + prior_reports = (root / "target" / "agentic-bug-sweep" / "context" / "prior-reports.txt").read_text( + encoding="utf-8" + ) + self.assertIn("existing.md", prior_reports) + + def test_github_actions(self) -> None: + cases = [ + { + "name": "create_with_related", + "payload": { + "summary": "Found a new bug", + "report_path": "docs/test-reports/bug-sweep-20260308-000002.md", + "action": "create", + "issue": { + "title": "Bug: create path", + "body": "Primary repro", + "labels": ["bug", "prio/p1"], + }, + "related_issue_numbers": [12], + "related_comment": "Likely same root cause as this new finding.", + }, + }, + { + "name": "update", + "payload": { + "summary": "Expanded existing issue", + "report_path": "docs/test-reports/bug-sweep-20260308-000003.md", + "action": "update", + "canonical_issue_number": 21, + "issue_comment": "New evidence from automation.", + }, + }, + { + "name": "merge", + "payload": { + "summary": "Duplicate of an existing issue", + "report_path": "docs/test-reports/bug-sweep-20260308-000004.md", + "action": "merge", + "canonical_issue_number": 31, + "issue_comment": "Canonical issue updated with new repro.", + "duplicates_to_close": [32, 33], + "duplicate_comment": "Closing in favor of #31.", + }, + }, + { + "name": "none", + "payload": { + "summary": "No actionable bug found", + "report_path": "docs/test-reports/bug-sweep-20260308-000005.md", + "action": "none", + }, + }, + ] + + for case in cases: + with self.subTest(case=case["name"]): + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + setup_fake_repo( + root, + gh_script=gh_script_with_mutations(), + codex_script=codex_script_for_payload(case["payload"]), + ) + result = run_bug_sweep(root) + + self.assertEqual(result.returncode, 0, msg=f"stdout={result.stdout}\nstderr={result.stderr}") + gh_log = (root / "state" / "gh.log").read_text(encoding="utf-8") + + if case["name"] == "create_with_related": + self.assertIn("issue create", gh_log) + create_body = (root / "state" / "create-body.md").read_text(encoding="utf-8") + self.assertIn("Primary repro", create_body) + self.assertIn("Related issues", create_body) + self.assertIn("#12", create_body) + elif case["name"] == "update": + self.assertIn("issue comment 21", gh_log) + comment = (root / "state" / "comment-21.md").read_text(encoding="utf-8") + self.assertIn("New evidence from automation.", comment) + elif case["name"] == "merge": + log_lines = gh_log.splitlines() + canonical_index = next(i for i, line in enumerate(log_lines) if "issue comment 31" in line) + duplicate_comment_index = next(i for i, line in enumerate(log_lines) if "issue comment 32" in line) + duplicate_close_index = next(i for i, line in enumerate(log_lines) if "issue close 32" in line) + self.assertLess(canonical_index, duplicate_comment_index) + self.assertLess(duplicate_comment_index, duplicate_close_index) + elif case["name"] == "none": + self.assertNotIn("issue create", gh_log) + self.assertNotIn("issue comment", gh_log) + self.assertNotIn("issue close", gh_log) + + def test_stop_conditions(self) -> None: + cases = [ + { + "name": "max_iterations", + "iterations": "2", + "max_consecutive_none": "5", + "payloads": [ + { + "summary": "No actionable bug found", + "report_path": "docs/test-reports/bug-sweep-20260308-000010.md", + "action": "none", + }, + { + "summary": "No actionable bug found again", + "report_path": "docs/test-reports/bug-sweep-20260308-000011.md", + "action": "none", + }, + ], + "expected_invocations": 2, + "expected_stop_reason": "completed_max_iterations", + }, + { + "name": "consecutive_none_threshold", + "iterations": "5", + "max_consecutive_none": "2", + "payloads": [ + { + "summary": "No actionable bug found", + "report_path": "docs/test-reports/bug-sweep-20260308-000012.md", + "action": "none", + }, + { + "summary": "No actionable bug found again", + "report_path": "docs/test-reports/bug-sweep-20260308-000013.md", + "action": "none", + }, + { + "summary": "This payload should not be consumed", + "report_path": "docs/test-reports/bug-sweep-20260308-000014.md", + "action": "create", + "issue": { + "title": "Bug: unreachable", + "body": "body", + "labels": ["bug"], + }, + }, + ], + "expected_invocations": 2, + "expected_stop_reason": "completed_consecutive_none_threshold", + }, + { + "name": "productive_iteration_resets_none_counter", + "iterations": "4", + "max_consecutive_none": "2", + "payloads": [ + { + "summary": "No actionable bug found", + "report_path": "docs/test-reports/bug-sweep-20260308-000015.md", + "action": "none", + }, + { + "summary": "Found a new bug", + "report_path": "docs/test-reports/bug-sweep-20260308-000016.md", + "action": "create", + "issue": { + "title": "Bug: reset counter", + "body": "body", + "labels": ["bug"], + }, + }, + { + "summary": "No actionable bug found after create", + "report_path": "docs/test-reports/bug-sweep-20260308-000017.md", + "action": "none", + }, + { + "summary": "No actionable bug found again", + "report_path": "docs/test-reports/bug-sweep-20260308-000018.md", + "action": "none", + }, + ], + "expected_invocations": 4, + "expected_stop_reason": "completed_consecutive_none_threshold", + }, + ] + + for case in cases: + with self.subTest(case=case["name"]): + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + setup_fake_repo( + root, + gh_script=gh_script_with_mutations(), + codex_script=codex_script_for_payloads(case["payloads"]), + ) + result = run_bug_sweep( + root, + iterations=case["iterations"], + max_consecutive_none=case["max_consecutive_none"], + ) + + self.assertEqual(result.returncode, 0, msg=f"stdout={result.stdout}\nstderr={result.stderr}") + + codex_invocation_count = int((root / "state" / "codex-counter.txt").read_text(encoding="utf-8")) + self.assertEqual(codex_invocation_count, case["expected_invocations"]) + + summary_path = root / "target" / "agentic-bug-sweep" / "output" / "run-summary.json" + self.assertTrue(summary_path.is_file()) + summary = json.loads(summary_path.read_text(encoding="utf-8")) + self.assertEqual(summary["iterations_run"], case["expected_invocations"]) + self.assertEqual(summary["stop_reason"], case["expected_stop_reason"]) + + def test_failure_paths(self) -> None: + cases = [ + { + "name": "failed_codex_exec", + "gh_script": gh_script_with_mutations(), + "codex_script": ( + "#!/usr/bin/env bash\n" + "set -euo pipefail\n" + "printf 'call\\n' >>\"${FAKE_STATE_DIR:?}/codex.log\"\n" + "exit 23\n" + ), + "expected_stop_reason": "failed_codex_exec", + "expect_summary": True, + "expect_iteration_output": False, + }, + { + "name": "failed_invalid_json", + "gh_script": gh_script_with_mutations(), + "codex_script": ( + "#!/usr/bin/env bash\n" + "set -euo pipefail\n" + "printf 'call\\n' >>\"${FAKE_STATE_DIR:?}/codex.log\"\n" + "output_path=\"\"\n" + "prev=\"\"\n" + "for arg in \"$@\"; do\n" + " if [[ \"$prev\" == \"-o\" || \"$prev\" == \"--output-last-message\" ]]; then\n" + " output_path=\"$arg\"\n" + " fi\n" + " prev=\"$arg\"\n" + "done\n" + "printf 'not-json\\n' >\"${output_path:?}\"\n" + "exit 0\n" + ), + "expected_stop_reason": "failed_invalid_json", + "expect_summary": True, + "expect_iteration_output": True, + }, + { + "name": "failed_github_mutation", + "gh_script": gh_script_with_mutations(fail_comment=True), + "codex_script": codex_script_for_payload( + { + "summary": "Expanded existing issue", + "report_path": "docs/test-reports/bug-sweep-20260308-000020.md", + "action": "update", + "canonical_issue_number": 21, + "issue_comment": "New evidence from automation.", + } + ), + "expected_stop_reason": "failed_github_mutation", + "expect_summary": True, + "expect_iteration_output": True, + }, + { + "name": "failed_lock_acquisition", + "gh_script": gh_script_with_mutations(), + "codex_script": codex_script_for_payload( + { + "summary": "No actionable bug found", + "report_path": "docs/test-reports/bug-sweep-20260308-000021.md", + "action": "none", + } + ), + "expected_message": "failed to acquire lock", + "expect_summary": False, + "expect_iteration_output": False, + "precreate_lock": True, + }, + { + "name": "failed_invalid_contract", + "gh_script": gh_script_with_mutations(), + "codex_script": codex_script_for_payload( + { + "summary": "Bad create payload", + "report_path": "docs/test-reports/bug-sweep-20260308-000022.md", + "action": "create", + } + ), + "expected_stop_reason": "failed_invalid_contract", + "expect_summary": True, + "expect_iteration_output": True, + }, + ] + + for case in cases: + with self.subTest(case=case["name"]): + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + setup_fake_repo( + root, + gh_script=case["gh_script"], + codex_script=case["codex_script"], + ) + if case.get("precreate_lock"): + (root / "target" / "agentic-bug-sweep" / "lock").mkdir(parents=True) + + result = run_bug_sweep(root) + + self.assertNotEqual(result.returncode, 0) + if "expected_message" in case: + self.assertIn(case["expected_message"], result.stdout + result.stderr) + + summary_path = root / "target" / "agentic-bug-sweep" / "output" / "run-summary.json" + self.assertEqual(summary_path.is_file(), case["expect_summary"]) + if case["expect_summary"]: + summary = json.loads(summary_path.read_text(encoding="utf-8")) + self.assertEqual(summary["stop_reason"], case["expected_stop_reason"]) + + iteration_output_path = ( + root / "target" / "agentic-bug-sweep" / "output" / "iteration-001.json" + ) + self.assertEqual(iteration_output_path.is_file(), case["expect_iteration_output"]) + + +if __name__ == "__main__": + unittest.main()