diff --git a/swe_af/execution/coding_loop.py b/swe_af/execution/coding_loop.py index 2f964df..0e1e334 100644 --- a/swe_af/execution/coding_loop.py +++ b/swe_af/execution/coding_loop.py @@ -43,14 +43,20 @@ async def _call_with_timeout(coro, timeout: int = 2700, label: str = ""): # --------------------------------------------------------------------------- -def _iteration_state_path(artifacts_dir: str, issue_name: str) -> str: +def _iteration_state_path(artifacts_dir: str, issue_name: str, build_id: str = "") -> str: if not artifacts_dir: return "" + if build_id: + # Scope iteration checkpoints by build_id so parallel/sequential builds + # against the same repo do not resume stale state from prior runs. + return os.path.join( + artifacts_dir, "execution", "iterations", build_id, f"{issue_name}.json", + ) return os.path.join(artifacts_dir, "execution", "iterations", f"{issue_name}.json") -def _save_iteration_state(artifacts_dir: str, issue_name: str, state: dict) -> None: - path = _iteration_state_path(artifacts_dir, issue_name) +def _save_iteration_state(artifacts_dir: str, issue_name: str, state: dict, build_id: str = "") -> None: + path = _iteration_state_path(artifacts_dir, issue_name, build_id=build_id) if not path: return os.makedirs(os.path.dirname(path), exist_ok=True) @@ -58,8 +64,8 @@ def _save_iteration_state(artifacts_dir: str, issue_name: str, state: dict) -> N json.dump(state, f, indent=2, default=str) -def _load_iteration_state(artifacts_dir: str, issue_name: str) -> dict | None: - path = _iteration_state_path(artifacts_dir, issue_name) +def _load_iteration_state(artifacts_dir: str, issue_name: str, build_id: str = "") -> dict | None: + path = _iteration_state_path(artifacts_dir, issue_name, build_id=build_id) if not path or not os.path.exists(path): return None with open(path, "r") as f: @@ -297,7 +303,7 @@ async def _run_default_path( Returns (action, summary, review_result). """ - permission_mode = "" + permission_mode = config.permission_mode try: review_result = await _call_with_timeout( @@ -371,7 +377,7 @@ async def _run_flagged_path( Returns (action, summary, review_result, qa_result, synthesis_result). """ - permission_mode = "" + permission_mode = config.permission_mode # QA + reviewer in parallel try: @@ -531,7 +537,7 @@ async def run_coding_loop( branch_name = issue.get("branch_name", "") max_iterations = config.max_coding_iterations timeout = config.agent_timeout_seconds - permission_mode = "" # inherits from agent config + permission_mode = config.permission_mode # Multi-repo context (None for single-repo builds) target_repo = issue.get("target_repo", "") @@ -574,7 +580,7 @@ async def run_coding_loop( is_first_success = len(dag_state.completed_issues) == 0 # Resume from iteration checkpoint if available - existing_state = _load_iteration_state(dag_state.artifacts_dir, issue_name) + existing_state = _load_iteration_state(dag_state.artifacts_dir, issue_name, build_id=dag_state.build_id) if existing_state: start_iteration = existing_state.get("iteration", 0) + 1 feedback = existing_state.get("feedback", "") @@ -717,7 +723,7 @@ async def run_coding_loop( "feedback": summary, "files_changed": files_changed, "iteration_history": iteration_history, - }) + }, build_id=dag_state.build_id) # --- 3. WRITE TO MEMORY --- if action == "approve": diff --git a/swe_af/execution/schemas.py b/swe_af/execution/schemas.py index a12fa59..edf161a 100644 --- a/swe_af/execution/schemas.py +++ b/swe_af/execution/schemas.py @@ -710,6 +710,7 @@ def to_execution_config_dict(self) -> dict: return { "runtime": self.runtime, "models": self.models, + "permission_mode": self.permission_mode, "max_retries_per_issue": self.max_retries_per_issue, "max_replans": self.max_replans, "enable_replanning": self.enable_replanning, @@ -785,6 +786,7 @@ class ExecutionConfig(BaseModel): enable_integration_testing: bool = True max_coding_iterations: int = 5 agent_max_turns: int = DEFAULT_AGENT_MAX_TURNS + permission_mode: str = "" agent_timeout_seconds: int = 2700 # 45 min max_advisor_invocations: int = 2 enable_issue_advisor: bool = True diff --git a/tests/test_coding_loop_regressions.py b/tests/test_coding_loop_regressions.py new file mode 100644 index 0000000..b9af44d --- /dev/null +++ b/tests/test_coding_loop_regressions.py @@ -0,0 +1,100 @@ +import asyncio +import json +from pathlib import Path + +from swe_af.execution.coding_loop import run_coding_loop +from swe_af.execution.schemas import DAGState, ExecutionConfig, IssueOutcome + + +def _make_dag_state(tmp_path: Path, build_id: str) -> DAGState: + return DAGState( + repo_path=str(tmp_path), + artifacts_dir=str(tmp_path / ".artifacts"), + build_id=build_id, + ) + + +def test_run_coding_loop_ignores_legacy_iteration_state_when_build_id_present(tmp_path: Path) -> None: + artifacts_dir = tmp_path / ".artifacts" + legacy_state = artifacts_dir / "execution" / "iterations" / "create-hello-script.json" + legacy_state.parent.mkdir(parents=True, exist_ok=True) + legacy_state.write_text( + json.dumps( + { + "iteration": 1, + "feedback": "approved", + "files_changed": [], + "iteration_history": [ + { + "iteration": 1, + "action": "approve", + "summary": "legacy", + "path": "default", + }, + ], + }, + ), + ) + + dag_state = _make_dag_state(tmp_path, build_id="newbuild1") + config = ExecutionConfig(max_coding_iterations=1, permission_mode="bypassPermissions") + issue = {"name": "create-hello-script", "guidance": {"needs_deeper_qa": False}} + + async def call_fn(target: str, **kwargs): + if target.endswith(".run_coder"): + return {"files_changed": []} + if target.endswith(".run_code_reviewer"): + return {"approved": True, "blocking": False, "summary": "ok", "debt_items": []} + raise AssertionError(f"Unexpected target: {target}") + + result = asyncio.run( + run_coding_loop( + issue=issue, + dag_state=dag_state, + call_fn=call_fn, + node_id="swe-planner", + config=config, + note_fn=None, + memory_fn=None, + ), + ) + + assert result.outcome == IssueOutcome.COMPLETED + assert result.attempts == 1 + assert result.iteration_history + assert result.iteration_history[0]["summary"] == "ok" + + +def test_run_coding_loop_propagates_permission_mode_to_all_agents(tmp_path: Path) -> None: + dag_state = _make_dag_state(tmp_path, build_id="permtest1") + config = ExecutionConfig(max_coding_iterations=1, permission_mode="bypassPermissions") + issue = {"name": "flagged-issue", "guidance": {"needs_deeper_qa": True}} + observed_modes: dict[str, str] = {} + + async def call_fn(target: str, **kwargs): + observed_modes[target.split(".")[-1]] = kwargs.get("permission_mode", "") + if target.endswith(".run_coder"): + return {"files_changed": []} + if target.endswith(".run_qa"): + return {"passed": True, "summary": "qa ok", "test_failures": []} + if target.endswith(".run_code_reviewer"): + return {"approved": True, "blocking": False, "summary": "review ok", "debt_items": []} + if target.endswith(".run_qa_synthesizer"): + return {"action": "approve", "summary": "approved", "stuck": False} + raise AssertionError(f"Unexpected target: {target}") + + result = asyncio.run( + run_coding_loop( + issue=issue, + dag_state=dag_state, + call_fn=call_fn, + node_id="swe-planner", + config=config, + note_fn=None, + memory_fn=None, + ), + ) + + assert result.outcome == IssueOutcome.COMPLETED + for agent_name in ("run_coder", "run_qa", "run_code_reviewer", "run_qa_synthesizer"): + assert observed_modes[agent_name] == "bypassPermissions"