From 0330b0a99f7c445d00f293f0382988a10faa7fc5 Mon Sep 17 00:00:00 2001 From: santoshkumarradha Date: Fri, 13 Mar 2026 00:09:05 +0530 Subject: [PATCH 1/9] feat: migrate planning agents to AgentField native .harness() Replace AgentAI(...).run() with router.harness() in all 4 planning agents: run_product_manager, run_architect, run_tech_lead, run_sprint_planner. - Remove swe_af.agent_ai imports - Map Tool enums to string tool names - Add provider mapping: claude -> claude-code - Drop log_file parameter (not supported by harness) - Preserve all prompts, schemas, and error handling Fixes #22 --- swe_af/reasoners/pipeline.py | 147 +++++++++++++++++------------------ 1 file changed, 72 insertions(+), 75 deletions(-) diff --git a/swe_af/reasoners/pipeline.py b/swe_af/reasoners/pipeline.py index af1ad1e..5d553f1 100644 --- a/swe_af/reasoners/pipeline.py +++ b/swe_af/reasoners/pipeline.py @@ -14,8 +14,6 @@ from pydantic import BaseModel -from swe_af.agent_ai import AgentAI, AgentAIConfig -from swe_af.agent_ai.types import Tool from swe_af.execution.schemas import DEFAULT_AGENT_MAX_TURNS from swe_af.reasoners.schemas import ( Architecture, @@ -31,6 +29,7 @@ # Pure helpers (NOT reasoners) # --------------------------------------------------------------------------- + def _ensure_paths(base: str) -> dict[str, str]: """Create artifact directories under *base* and return a path map.""" paths = { @@ -89,9 +88,7 @@ def _compute_levels(issues: list[dict]) -> list[list[str]]: return levels -def _validate_file_conflicts( - issues: list[dict], levels: list[list[str]] -) -> list[dict]: +def _validate_file_conflicts(issues: list[dict], levels: list[list[str]]) -> list[dict]: """Detect file conflicts between issues scheduled at the same parallel level. For each level, collects ``files_to_modify`` and ``files_to_create`` across @@ -155,6 +152,7 @@ def _assign_sequence_numbers(issues: list[dict], levels: list[list[str]]) -> lis # Reasoners # --------------------------------------------------------------------------- + @router.reasoner() async def run_product_manager( goal: str, @@ -172,26 +170,19 @@ async def run_product_manager( base = os.path.join(os.path.abspath(repo_path), artifacts_dir) paths = _ensure_paths(base) - log_path = os.path.join(base, "logs", "product_manager.jsonl") - - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=max_turns, - allowed_tools=[Tool.READ, Tool.GLOB, Tool.GREP, Tool.BASH], - permission_mode=permission_mode or None, - )) from swe_af.prompts.product_manager import product_manager_prompts, pm_task_prompt # noqa: PLC0415 from swe_af.execution.schemas import WorkspaceManifest # noqa: PLC0415 + system_prompt, _ = product_manager_prompts( goal=goal, repo_path=repo_path, prd_path=paths["prd"], additional_context=additional_context, ) - ws_manifest = WorkspaceManifest(**workspace_manifest) if workspace_manifest else None + ws_manifest = ( + WorkspaceManifest(**workspace_manifest) if workspace_manifest else None + ) task_prompt = pm_task_prompt( goal=goal, repo_path=repo_path, @@ -199,17 +190,23 @@ async def run_product_manager( additional_context=additional_context, workspace_manifest=ws_manifest, ) - response = await ai.run( - task_prompt, + provider = "claude-code" if ai_provider == "claude" else ai_provider + result = await router.harness( + prompt=task_prompt, + schema=PRD, + provider=provider, + model=model, + max_turns=max_turns, + tools=["Read", "Glob", "Grep", "Bash"], + permission_mode=permission_mode or None, system_prompt=system_prompt, - output_schema=PRD, - log_file=log_path, + cwd=repo_path, ) - if response.parsed is None: + if result.parsed is None: raise RuntimeError("Product manager failed to produce a valid PRD") router.note("PM complete", tags=["pm", "complete"]) - return response.parsed.model_dump() + return result.parsed.model_dump() @router.reasoner() @@ -229,20 +226,11 @@ async def run_architect( base = os.path.join(os.path.abspath(repo_path), artifacts_dir) paths = _ensure_paths(base) - log_path = os.path.join(base, "logs", "architect.jsonl") - - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=max_turns, - allowed_tools=[Tool.READ, Tool.WRITE, Tool.GLOB, Tool.GREP, Tool.BASH], - permission_mode=permission_mode or None, - )) prd_obj = PRD(**prd) from swe_af.prompts.architect import architect_prompts, architect_task_prompt # noqa: PLC0415 from swe_af.execution.schemas import WorkspaceManifest # noqa: PLC0415 + system_prompt, _ = architect_prompts( prd=prd_obj, repo_path=repo_path, @@ -250,7 +238,9 @@ async def run_architect( architecture_path=paths["architecture"], feedback=feedback or None, ) - ws_manifest = WorkspaceManifest(**workspace_manifest) if workspace_manifest else None + ws_manifest = ( + WorkspaceManifest(**workspace_manifest) if workspace_manifest else None + ) task_prompt = architect_task_prompt( prd=prd_obj, repo_path=repo_path, @@ -259,17 +249,23 @@ async def run_architect( feedback=feedback or None, workspace_manifest=ws_manifest, ) - response = await ai.run( - task_prompt, + provider = "claude-code" if ai_provider == "claude" else ai_provider + result = await router.harness( + prompt=task_prompt, + schema=Architecture, + provider=provider, + model=model, + max_turns=max_turns, + tools=["Read", "Write", "Glob", "Grep", "Bash"], + permission_mode=permission_mode or None, system_prompt=system_prompt, - output_schema=Architecture, - log_file=log_path, + cwd=repo_path, ) - if response.parsed is None: + if result.parsed is None: raise RuntimeError("Architect failed to produce a valid architecture") router.note("Architect complete", tags=["architect", "complete"]) - return response.parsed.model_dump() + return result.parsed.model_dump() @router.reasoner() @@ -289,41 +285,40 @@ async def run_tech_lead( base = os.path.join(os.path.abspath(repo_path), artifacts_dir) paths = _ensure_paths(base) - log_path = os.path.join(base, "logs", "tech_lead.jsonl") - - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=max_turns, - allowed_tools=[Tool.READ, Tool.GLOB, Tool.GREP], - permission_mode=permission_mode or None, - )) from swe_af.prompts.tech_lead import tech_lead_prompts, tech_lead_task_prompt # noqa: PLC0415 from swe_af.execution.schemas import WorkspaceManifest # noqa: PLC0415 + system_prompt, _ = tech_lead_prompts( prd_path=paths["prd"], architecture_path=paths["architecture"], revision_number=revision_number, ) - ws_manifest = WorkspaceManifest(**workspace_manifest) if workspace_manifest else None + ws_manifest = ( + WorkspaceManifest(**workspace_manifest) if workspace_manifest else None + ) task_prompt = tech_lead_task_prompt( prd_path=paths["prd"], architecture_path=paths["architecture"], revision_number=revision_number, workspace_manifest=ws_manifest, ) - response = await ai.run( - task_prompt, + provider = "claude-code" if ai_provider == "claude" else ai_provider + result = await router.harness( + prompt=task_prompt, + schema=ReviewResult, + provider=provider, + model=model, + max_turns=max_turns, + tools=["Read", "Glob", "Grep"], + permission_mode=permission_mode or None, system_prompt=system_prompt, - output_schema=ReviewResult, - log_file=log_path, + cwd=repo_path, ) - if response.parsed is None: + if result.parsed is None: raise RuntimeError("Tech lead failed to produce a valid review") - review = response.parsed.model_dump() + review = result.parsed.model_dump() review_json_path = os.path.join(base, "plan", "review.json") with open(review_json_path, "w") as f: json.dump(review, f, indent=2, default=str) @@ -356,21 +351,15 @@ class SprintPlanOutput(BaseModel): base = os.path.join(os.path.abspath(repo_path), artifacts_dir) paths = _ensure_paths(base) - log_path = os.path.join(base, "logs", "sprint_planner.jsonl") - - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=max_turns, - allowed_tools=[Tool.READ, Tool.GLOB, Tool.GREP], - permission_mode=permission_mode or None, - )) prd_obj = PRD(**prd) arch_obj = Architecture(**architecture) - from swe_af.prompts.sprint_planner import sprint_planner_prompts, sprint_planner_task_prompt # noqa: PLC0415 + from swe_af.prompts.sprint_planner import ( + sprint_planner_prompts, + sprint_planner_task_prompt, + ) # noqa: PLC0415 from swe_af.execution.schemas import WorkspaceManifest # noqa: PLC0415 + system_prompt, _ = sprint_planner_prompts( prd=prd_obj, architecture=arch_obj, @@ -378,7 +367,9 @@ class SprintPlanOutput(BaseModel): prd_path=paths["prd"], architecture_path=paths["architecture"], ) - ws_manifest = WorkspaceManifest(**workspace_manifest) if workspace_manifest else None + ws_manifest = ( + WorkspaceManifest(**workspace_manifest) if workspace_manifest else None + ) task_prompt = sprint_planner_task_prompt( goal=prd_obj.validated_description, prd=prd_obj, @@ -388,17 +379,23 @@ class SprintPlanOutput(BaseModel): prd_path=paths["prd"], architecture_path=paths["architecture"], ) - response = await ai.run( - task_prompt, + provider = "claude-code" if ai_provider == "claude" else ai_provider + result = await router.harness( + prompt=task_prompt, + schema=SprintPlanOutput, + provider=provider, + model=model, + max_turns=max_turns, + tools=["Read", "Glob", "Grep"], + permission_mode=permission_mode or None, system_prompt=system_prompt, - output_schema=SprintPlanOutput, - log_file=log_path, + cwd=repo_path, ) - if response.parsed is None: + if result.parsed is None: raise RuntimeError("Sprint planner failed to produce valid issues") router.note("Sprint Planner complete", tags=["sprint_planner", "complete"]) return { - "issues": [issue.model_dump() for issue in response.parsed.issues], - "rationale": response.parsed.rationale, + "issues": [issue.model_dump() for issue in result.parsed.issues], + "rationale": result.parsed.rationale, } From 751b7cf293b93253702fb58b23728b42f460274b Mon Sep 17 00:00:00 2001 From: santoshkumarradha Date: Fri, 13 Mar 2026 00:11:04 +0530 Subject: [PATCH 2/9] feat: migrate planning agents to AgentField native .harness() (fixes #22) --- swe_af/reasoners/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swe_af/reasoners/pipeline.py b/swe_af/reasoners/pipeline.py index 5d553f1..d791668 100644 --- a/swe_af/reasoners/pipeline.py +++ b/swe_af/reasoners/pipeline.py @@ -1,7 +1,7 @@ """Internal reasoners for the SWE planning pipeline. Each reasoner wraps a single agent role (PM, Architect, Tech Lead, Sprint Planner) -and uses AgentAI for actual AI execution. The @router.reasoner() decorator provides +and uses router.harness() for actual AI execution. The @router.reasoner() decorator provides FastAPI endpoints, workflow DAG tracking, and observability via router.note(). """ From e901def67ee3ee4e0b687de91cadabbb1a2ce94e Mon Sep 17 00:00:00 2001 From: santoshkumarradha Date: Fri, 13 Mar 2026 00:24:49 +0530 Subject: [PATCH 3/9] feat: migrate coding loop agents to AgentField native .harness() Replace AgentAI(...).run() with router.harness() in run_coder, run_qa, run_code_reviewer. - Remove swe_af.agent_ai imports - Map Tool enums to string tool names - Add provider mapping: claude -> claude-code - Drop log_file parameter - Preserve all fallbacks (reviewer: approved=True on failure) - Preserve iteration_id injection Fixes #23 --- swe_af/reasoners/execution_agents.py | 106 +++++++++++---------------- 1 file changed, 41 insertions(+), 65 deletions(-) diff --git a/swe_af/reasoners/execution_agents.py b/swe_af/reasoners/execution_agents.py index e60e0d0..dbc1bd7 100644 --- a/swe_af/reasoners/execution_agents.py +++ b/swe_af/reasoners/execution_agents.py @@ -930,10 +930,6 @@ async def run_coder( """ project_context = project_context or {} issue_name = issue.get("name", "?") - _artifacts_dir = project_context.get("artifacts_dir", "") - log_dir = os.path.join(_artifacts_dir, "logs") if _artifacts_dir else None - log_path = os.path.join(log_dir, f"coder_{issue_name}_iter_{iteration}.jsonl") if log_dir else None - router.note( f"Coder starting: {issue_name} (iteration {iteration})", tags=["coder", "start"], @@ -952,35 +948,30 @@ async def run_coder( target_repo=target_repo, ) - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=worktree_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[ - Tool.READ, Tool.WRITE, Tool.EDIT, - Tool.BASH, Tool.GLOB, Tool.GREP, - ], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=CODER_SYSTEM_PROMPT, + model=model, + provider=provider, + tools=["Read", "Write", "Edit", "Bash", "Glob", "Grep"], output_schema=CoderResult, - log_file=log_path, + cwd=worktree_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + out = result.model_dump() + if out is not None: router.note( f"Coder complete: {issue_name}, " - f"files={len(response.parsed.files_changed)}, " - f"complete={response.parsed.complete}", + f"files={len(result.files_changed)}, " + f"complete={result.complete}", tags=["coder", "complete"], ) - result = response.parsed.model_dump() - result["iteration_id"] = iteration_id - return result + out["iteration_id"] = iteration_id + return out except Exception as e: router.note( f"Coder agent failed: {issue_name}: {e}", @@ -1014,10 +1005,6 @@ async def run_qa( """ project_context = project_context or {} issue_name = issue.get("name", "?") - _artifacts_dir = project_context.get("artifacts_dir", "") - log_dir = os.path.join(_artifacts_dir, "logs") if _artifacts_dir else None - log_path = os.path.join(log_dir, f"qa_{issue_name}_iter_{iteration_id}.jsonl") if log_dir else None - router.note( f"QA starting: {issue_name}", tags=["qa", "start"], @@ -1035,33 +1022,28 @@ async def run_qa( target_repo=target_repo, ) - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=worktree_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[ - Tool.READ, Tool.WRITE, Tool.EDIT, - Tool.BASH, Tool.GLOB, Tool.GREP, - ], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=QA_SYSTEM_PROMPT, + model=model, + provider=provider, + tools=["Read", "Write", "Edit", "Bash", "Glob", "Grep"], output_schema=QAResult, - log_file=log_path, + cwd=worktree_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + out = result.model_dump() + if out is not None: router.note( - f"QA complete: {issue_name}, passed={response.parsed.passed}", + f"QA complete: {issue_name}, passed={result.passed}", tags=["qa", "complete"], ) - result = response.parsed.model_dump() - result["iteration_id"] = iteration_id - return result + out["iteration_id"] = iteration_id + return out except Exception as e: router.note( f"QA agent failed: {issue_name}: {e}", @@ -1098,10 +1080,6 @@ async def run_code_reviewer( """ project_context = project_context or {} issue_name = issue.get("name", "?") - _artifacts_dir = project_context.get("artifacts_dir", "") - log_dir = os.path.join(_artifacts_dir, "logs") if _artifacts_dir else None - log_path = os.path.join(log_dir, f"reviewer_{issue_name}_iter_{iteration_id}.jsonl") if log_dir else None - router.note( f"Code reviewer starting: {issue_name}", tags=["code_reviewer", "start"], @@ -1121,32 +1099,30 @@ async def run_code_reviewer( target_repo=target_repo, ) - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=worktree_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.READ, Tool.GLOB, Tool.GREP, Tool.BASH], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=CODE_REVIEWER_SYSTEM_PROMPT, + model=model, + provider=provider, + tools=["Read", "Glob", "Grep", "Bash"], output_schema=CodeReviewResult, - log_file=log_path, + cwd=worktree_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + out = result.model_dump() + if out is not None: router.note( f"Code reviewer complete: {issue_name}, " - f"approved={response.parsed.approved}, " - f"blocking={response.parsed.blocking}", + f"approved={result.approved}, " + f"blocking={result.blocking}", tags=["code_reviewer", "complete"], ) - result = response.parsed.model_dump() - result["iteration_id"] = iteration_id - return result + out["iteration_id"] = iteration_id + return out except Exception as e: router.note( f"Code reviewer agent failed: {issue_name}: {e}", From 7048b65b46adc2d12279b251676c983d47ab2785 Mon Sep 17 00:00:00 2001 From: santoshkumarradha Date: Fri, 13 Mar 2026 00:43:08 +0530 Subject: [PATCH 4/9] feat: migrate all execution agents to AgentField native .harness()/.ai() (fixes #23, fixes #24, fixes #25, fixes #26, fixes #27) --- swe_af/reasoners/execution_agents.py | 511 ++++++++++++--------------- 1 file changed, 227 insertions(+), 284 deletions(-) diff --git a/swe_af/reasoners/execution_agents.py b/swe_af/reasoners/execution_agents.py index dbc1bd7..783438c 100644 --- a/swe_af/reasoners/execution_agents.py +++ b/swe_af/reasoners/execution_agents.py @@ -10,8 +10,6 @@ from pydantic import BaseModel -from swe_af.agent_ai import AgentAI, AgentAIConfig -from swe_af.agent_ai.types import Tool from swe_af.execution.schemas import ( DEFAULT_AGENT_MAX_TURNS, AdvisorAction, @@ -45,7 +43,9 @@ from swe_af.prompts.github_pr import github_pr_task_prompt from swe_af.prompts.repo_finalize import SYSTEM_PROMPT as REPO_FINALIZE_SYSTEM_PROMPT from swe_af.prompts.repo_finalize import repo_finalize_task_prompt -from swe_af.prompts.integration_tester import SYSTEM_PROMPT as INTEGRATION_TESTER_SYSTEM_PROMPT +from swe_af.prompts.integration_tester import ( + SYSTEM_PROMPT as INTEGRATION_TESTER_SYSTEM_PROMPT, +) from swe_af.prompts.integration_tester import integration_tester_task_prompt from swe_af.prompts.issue_writer import SYSTEM_PROMPT as ISSUE_WRITER_SYSTEM_PROMPT from swe_af.prompts.issue_writer import issue_writer_task_prompt @@ -61,9 +61,16 @@ from swe_af.prompts.retry_advisor import retry_advisor_task_prompt from swe_af.prompts.verifier import SYSTEM_PROMPT as VERIFIER_SYSTEM_PROMPT from swe_af.prompts.verifier import verifier_task_prompt -from swe_af.prompts.workspace import CLEANUP_SYSTEM_PROMPT as WORKSPACE_CLEANUP_SYSTEM_PROMPT -from swe_af.prompts.workspace import SETUP_SYSTEM_PROMPT as WORKSPACE_SETUP_SYSTEM_PROMPT -from swe_af.prompts.workspace import workspace_cleanup_task_prompt, workspace_setup_task_prompt +from swe_af.prompts.workspace import ( + CLEANUP_SYSTEM_PROMPT as WORKSPACE_CLEANUP_SYSTEM_PROMPT, +) +from swe_af.prompts.workspace import ( + SETUP_SYSTEM_PROMPT as WORKSPACE_SETUP_SYSTEM_PROMPT, +) +from swe_af.prompts.workspace import ( + workspace_cleanup_task_prompt, + workspace_setup_task_prompt, +) from . import router @@ -73,6 +80,7 @@ def _maybe_workspace_manifest(raw: dict | None): if raw is None: return None from swe_af.execution.schemas import WorkspaceManifest + return WorkspaceManifest(**raw) @@ -80,15 +88,18 @@ def _maybe_workspace_manifest(raw: dict | None): # Helper for the replanner: reconstruct DAGState from dict # --------------------------------------------------------------------------- + def _build_dag_state(dag_state_dict: dict): """Reconstruct a DAGState from a dict (for prompt building).""" from swe_af.execution.schemas import DAGState + return DAGState(**dag_state_dict) def _build_issue_results(failed_issues: list[dict]): """Reconstruct IssueResult list from dicts (for prompt building).""" from swe_af.execution.schemas import IssueResult + return [IssueResult(**f) for f in failed_issues] @@ -96,6 +107,7 @@ def _build_issue_results(failed_issues: list[dict]): # Reasoners # --------------------------------------------------------------------------- + @router.reasoner() async def run_retry_advisor( issue: dict, @@ -137,33 +149,27 @@ async def run_retry_advisor( workspace_manifest=ws_manifest, ) - issue_name = issue.get("name", "unknown") - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, f"retry_advisor_{issue_name}_{attempt_number}.jsonl") if log_dir else None - - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.READ, Tool.GLOB, Tool.GREP, Tool.BASH], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=RETRY_ADVISOR_SYSTEM_PROMPT, - output_schema=RetryAdvice, - log_file=log_path, + schema=RetryAdvice, + model=model, + provider=provider, + tools=["Read", "Glob", "Grep", "Bash"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Retry advisor: should_retry={response.parsed.should_retry}, " - f"confidence={response.parsed.confidence}", + f"Retry advisor: should_retry={result.parsed.should_retry}, " + f"confidence={result.parsed.confidence}", tags=["retry_advisor", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Retry advisor agent failed: {e}", @@ -222,33 +228,27 @@ async def run_issue_advisor( workspace_manifest=ws_manifest, ) - artifacts_dir = dag_state_summary.get("artifacts_dir", "") - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, f"issue_advisor_{issue_name}_{advisor_invocation}.jsonl") if log_dir else None - cwd = worktree_path or dag_state_summary.get("repo_path", ".") - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=cwd, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.READ, Tool.GLOB, Tool.GREP, Tool.BASH], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=ISSUE_ADVISOR_SYSTEM_PROMPT, - output_schema=IssueAdvisorDecision, - log_file=log_path, + schema=IssueAdvisorDecision, + model=model, + provider=provider, + tools=["Read", "Glob", "Grep", "Bash"], + cwd=cwd, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Issue advisor decision: {response.parsed.action.value} — {response.parsed.summary}", + f"Issue advisor decision: {result.parsed.action.value} — {result.parsed.summary}", tags=["issue_advisor", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Issue advisor agent failed: {e}", @@ -297,50 +297,51 @@ async def run_replanner( ) task_prompt = replanner_task_prompt( - state, failures, + state, + failures, escalation_notes=escalation_notes, - adaptation_history=state.adaptation_history if hasattr(state, "adaptation_history") else [], + adaptation_history=state.adaptation_history + if hasattr(state, "adaptation_history") + else [], ) log_dir = os.path.join(state.artifacts_dir, "logs") if state.artifacts_dir else None - log_path = os.path.join(log_dir, f"replanner_{state.replan_count}.jsonl") if log_dir else None - - ai = AgentAI(AgentAIConfig( - model=replan_model, - provider=ai_provider, - cwd=state.repo_path or ".", - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.READ, Tool.GLOB, Tool.GREP, Tool.BASH], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider current_prompt = task_prompt for attempt in range(2): try: - response = await ai.run( + result = await router.harness( current_prompt, system_prompt=REPLANNER_SYSTEM_PROMPT, - output_schema=ReplanDecision, - log_file=log_path, + schema=ReplanDecision, + model=replan_model, + provider=provider, + tools=["Read", "Glob", "Grep", "Bash"], + cwd=state.repo_path or ".", + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) # Log raw response for debugging (even on parse failure) if log_dir: - raw_log = os.path.join(log_dir, f"replanner_{state.replan_count}_raw_{attempt}.txt") + raw_log = os.path.join( + log_dir, f"replanner_{state.replan_count}_raw_{attempt}.txt" + ) os.makedirs(log_dir, exist_ok=True) with open(raw_log, "w") as f: - f.write(response.text or "(empty)") + f.write(getattr(result, "text", "") or "(empty)") - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Replan decision: {response.parsed.action.value} — {response.parsed.summary}", + f"Replan decision: {result.parsed.action.value} — {result.parsed.summary}", tags=["replanner", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() # Parse failed — retry with tighter prompt router.note( f"Replanner produced unparseable output (attempt {attempt + 1}): " - f"{(response.text or '')[:500]}", + f"{(getattr(result, 'text', '') or '')[:500]}", tags=["replanner", "parse_error"], ) current_prompt = ( @@ -395,11 +396,6 @@ async def run_issue_writer( Multiple instances can run in parallel (one per issue). """ issue_name = issue.get("name", "unknown") - # issues_dir is /plan/issues — derive log_dir from grandparent - _artifacts_base = os.path.dirname(os.path.dirname(issues_dir)) if issues_dir else "" - log_dir = os.path.join(_artifacts_base, "logs") if _artifacts_base else None - log_path = os.path.join(log_dir, f"issue_writer_{issue_name}.jsonl") if log_dir else None - router.note( f"Issue writer starting for {issue_name}", tags=["issue_writer", "start"], @@ -423,28 +419,26 @@ class IssueWriterOutput(BaseModel): issue_file_path: str success: bool - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.READ, Tool.WRITE, Tool.GLOB, Tool.GREP], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=ISSUE_WRITER_SYSTEM_PROMPT, - output_schema=IssueWriterOutput, - log_file=log_path, + schema=IssueWriterOutput, + model=model, + provider=provider, + tools=["Read", "Write", "Glob", "Grep"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Issue writer complete for {issue_name}: {response.parsed.issue_file_path}", + f"Issue writer complete for {issue_name}: {result.parsed.issue_file_path}", tags=["issue_writer", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Issue writer failed for {issue_name}: {e}", @@ -476,9 +470,6 @@ async def run_verifier( Returns a VerificationResult dict. """ - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, "verifier.jsonl") if log_dir else None - router.note("Verifier starting", tags=["verifier", "start"]) ws_manifest = _maybe_workspace_manifest(workspace_manifest) @@ -492,29 +483,26 @@ async def run_verifier( workspace_manifest=ws_manifest, ) - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.READ, Tool.GLOB, Tool.GREP, Tool.BASH], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=VERIFIER_SYSTEM_PROMPT, - output_schema=VerificationResult, - log_file=log_path, + schema=VerificationResult, + model=model, + provider=provider, + tools=["Read", "Glob", "Grep", "Bash"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Verifier complete: passed={response.parsed.passed}, " - f"summary={response.parsed.summary}", + f"Verifier complete: passed={result.parsed.passed}, summary={result.parsed.summary}", tags=["verifier", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Verifier agent failed: {e}", @@ -555,15 +543,14 @@ async def run_git_init( will be injected into the system prompt to help the agent learn from the previous failure. """ - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, "git_init.jsonl") if log_dir else None - router.note( f"Git init starting for: {goal[:80]}", tags=["git_init", "start"], ) - task_prompt = git_init_task_prompt(repo_path=repo_path, goal=goal, build_id=build_id) + task_prompt = git_init_task_prompt( + repo_path=repo_path, goal=goal, build_id=build_id + ) # Build system prompt with error context if retrying system_prompt = GIT_INIT_SYSTEM_PROMPT @@ -578,29 +565,27 @@ async def run_git_init( "- If the error indicates a parsing issue, ensure your output is valid JSON\n" ) - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.BASH], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=system_prompt, - output_schema=GitInitResult, - log_file=log_path, + schema=GitInitResult, + model=model, + provider=provider, + tools=["Bash"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Git init complete: mode={response.parsed.mode}, " - f"integration_branch={response.parsed.integration_branch}", + f"Git init complete: mode={result.parsed.mode}, " + f"integration_branch={result.parsed.integration_branch}", tags=["git_init", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Git init agent failed: {e}", @@ -635,9 +620,6 @@ async def run_workspace_setup( Returns {workspaces: [WorkspaceInfo, ...], success: bool}. """ - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, f"workspace_setup_level_{level}.jsonl") if log_dir else None - issue_names = [i.get("name", "?") for i in issues] router.note( f"Workspace setup: creating {len(issues)} worktrees for {issue_names}", @@ -656,28 +638,26 @@ class WorkspaceSetupResult(BaseModel): workspaces: list[WorkspaceInfo] success: bool - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.BASH], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=WORKSPACE_SETUP_SYSTEM_PROMPT, - output_schema=WorkspaceSetupResult, - log_file=log_path, + schema=WorkspaceSetupResult, + model=model, + provider=provider, + tools=["Bash"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Workspace setup complete: {len(response.parsed.workspaces)} worktrees created", + f"Workspace setup complete: {len(result.parsed.workspaces)} worktrees created", tags=["workspace_setup", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Workspace setup agent failed: {e}", @@ -706,9 +686,6 @@ async def run_merger( Returns a MergeResult dict. """ branch_names = [b.get("branch_name", "?") for b in branches_to_merge] - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, f"merger_level_{level}.jsonl") if log_dir else None - router.note( f"Merger starting: {len(branches_to_merge)} branches {branch_names}", tags=["merger", "start"], @@ -723,30 +700,28 @@ async def run_merger( architecture_summary=architecture_summary, ) - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.BASH, Tool.READ, Tool.GLOB, Tool.GREP], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=MERGER_SYSTEM_PROMPT, - output_schema=MergeResult, - log_file=log_path, + schema=MergeResult, + model=model, + provider=provider, + tools=["Bash", "Read", "Glob", "Grep"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Merger complete: merged={response.parsed.merged_branches}, " - f"failed={response.parsed.failed_branches}, " - f"needs_test={response.parsed.needs_integration_test}", + f"Merger complete: merged={result.parsed.merged_branches}, " + f"failed={result.parsed.failed_branches}, " + f"needs_test={result.parsed.needs_integration_test}", tags=["merger", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Merger agent failed: {e}", @@ -781,9 +756,6 @@ async def run_integration_tester( Returns an IntegrationTestResult dict. """ - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, f"integration_tester_level_{level}.jsonl") if log_dir else None - router.note( f"Integration tester starting: {len(merged_branches)} merged branches", tags=["integration_tester", "start"], @@ -801,29 +773,27 @@ async def run_integration_tester( workspace_manifest=ws_manifest, ) - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.BASH, Tool.READ, Tool.WRITE, Tool.GLOB, Tool.GREP], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=INTEGRATION_TESTER_SYSTEM_PROMPT, - output_schema=IntegrationTestResult, - log_file=log_path, + schema=IntegrationTestResult, + model=model, + provider=provider, + tools=["Bash", "Read", "Write", "Glob", "Grep"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Integration tester complete: passed={response.parsed.passed}, " - f"{response.parsed.tests_passed}/{response.parsed.tests_run} tests passed", + f"Integration tester complete: passed={result.parsed.passed}, " + f"{result.parsed.tests_passed}/{result.parsed.tests_run} tests passed", tags=["integration_tester", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Integration tester agent failed: {e}", @@ -854,9 +824,6 @@ async def run_workspace_cleanup( Returns {success: bool, cleaned: list[str]}. """ - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, f"workspace_cleanup_level_{level}.jsonl") if log_dir else None - router.note( f"Workspace cleanup: {len(branches_to_clean)} branches to clean", tags=["workspace_cleanup", "start"], @@ -872,28 +839,26 @@ class WorkspaceCleanupResult(BaseModel): success: bool cleaned: list[str] = [] - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.BASH], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=WORKSPACE_CLEANUP_SYSTEM_PROMPT, - output_schema=WorkspaceCleanupResult, - log_file=log_path, + schema=WorkspaceCleanupResult, + model=model, + provider=provider, + tools=["Bash"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Workspace cleanup complete: {len(response.parsed.cleaned)} cleaned", + f"Workspace cleanup complete: {len(result.parsed.cleaned)} cleaned", tags=["workspace_cleanup", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Workspace cleanup agent failed: {e}", @@ -954,22 +919,22 @@ async def run_coder( result = await router.harness( task_prompt, system_prompt=CODER_SYSTEM_PROMPT, + schema=CoderResult, model=model, provider=provider, tools=["Read", "Write", "Edit", "Bash", "Glob", "Grep"], - output_schema=CoderResult, cwd=worktree_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, ) - out = result.model_dump() - if out is not None: + if result.parsed is not None: router.note( f"Coder complete: {issue_name}, " - f"files={len(result.files_changed)}, " - f"complete={result.complete}", + f"files={len(result.parsed.files_changed)}, " + f"complete={result.parsed.complete}", tags=["coder", "complete"], ) + out = result.parsed.model_dump() out["iteration_id"] = iteration_id return out except Exception as e: @@ -1028,20 +993,20 @@ async def run_qa( result = await router.harness( task_prompt, system_prompt=QA_SYSTEM_PROMPT, + schema=QAResult, model=model, provider=provider, tools=["Read", "Write", "Edit", "Bash", "Glob", "Grep"], - output_schema=QAResult, cwd=worktree_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, ) - out = result.model_dump() - if out is not None: + if result.parsed is not None: router.note( - f"QA complete: {issue_name}, passed={result.passed}", + f"QA complete: {issue_name}, passed={result.parsed.passed}", tags=["qa", "complete"], ) + out = result.parsed.model_dump() out["iteration_id"] = iteration_id return out except Exception as e: @@ -1105,22 +1070,22 @@ async def run_code_reviewer( result = await router.harness( task_prompt, system_prompt=CODE_REVIEWER_SYSTEM_PROMPT, + schema=CodeReviewResult, model=model, provider=provider, tools=["Read", "Glob", "Grep", "Bash"], - output_schema=CodeReviewResult, cwd=worktree_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, ) - out = result.model_dump() - if out is not None: + if result.parsed is not None: router.note( f"Code reviewer complete: {issue_name}, " - f"approved={result.approved}, " - f"blocking={result.blocking}", + f"approved={result.parsed.approved}, " + f"blocking={result.parsed.blocking}", tags=["code_reviewer", "complete"], ) + out = result.parsed.model_dump() out["iteration_id"] = iteration_id return out except Exception as e: @@ -1157,10 +1122,6 @@ async def run_qa_synthesizer( Returns a QASynthesisResult dict with action, summary, stuck. """ issue_summary = issue_summary or {} - _issue_name = issue_summary.get("name", "unknown") - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, f"synthesizer_{_issue_name}_iter_{iteration_id}.jsonl") if log_dir else None - router.note( "QA synthesizer starting", tags=["qa_synthesizer", "start"], @@ -1178,31 +1139,22 @@ async def run_qa_synthesizer( workspace_manifest=ws_manifest, ) - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=worktree_path or ".", - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[], - permission_mode=permission_mode or None, - )) - try: - response = await ai.run( + result = await router.ai( task_prompt, - system_prompt=QA_SYNTHESIZER_SYSTEM_PROMPT, - output_schema=QASynthesisResult, - log_file=log_path, + system=QA_SYNTHESIZER_SYSTEM_PROMPT, + schema=QASynthesisResult, + model=model, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"QA synthesizer complete: action={response.parsed.action.value}, " - f"stuck={response.parsed.stuck}", + f"QA synthesizer complete: action={result.parsed.action.value}, " + f"stuck={result.parsed.stuck}", tags=["qa_synthesizer", "complete"], ) - result = response.parsed.model_dump() - result["iteration_id"] = iteration_id - return result + out = result.parsed.model_dump() + out["iteration_id"] = iteration_id + return out except Exception as e: router.note( f"QA synthesizer agent failed: {e}", @@ -1216,10 +1168,14 @@ async def run_qa_synthesizer( if tests_passed and review_approved and not review_blocking: fallback_action = "approve" - fallback_summary = "Synthesizer failed but QA passed and review approved — approving." + fallback_summary = ( + "Synthesizer failed but QA passed and review approved — approving." + ) elif review_blocking: fallback_action = "block" - fallback_summary = "Synthesizer failed and review has blocking issues — blocking." + fallback_summary = ( + "Synthesizer failed and review has blocking issues — blocking." + ) else: fallback_action = "fix" fallback_summary = ( @@ -1261,9 +1217,6 @@ async def generate_fix_issues( ) repo_path = dag_state.get("repo_path", ".") - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, "fix_generator.jsonl") if log_dir else None - task_prompt = fix_generator_task_prompt( failed_criteria=failed_criteria, dag_state_summary=dag_state, @@ -1280,36 +1233,36 @@ async def generate_fix_issues( "applied to. Available repos:\n" ) for repo in ws_manifest.repos: - task_prompt += f"- **{repo.repo_name}** (role: {repo.role}): `{repo.absolute_path}`\n" + task_prompt += ( + f"- **{repo.repo_name}** (role: {repo.role}): `{repo.absolute_path}`\n" + ) class FixGeneratorOutput(BaseModel): fix_issues: list[dict] = [] debt_items: list[dict] = [] summary: str = "" - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.READ, Tool.GLOB, Tool.GREP, Tool.BASH], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=FIX_GENERATOR_SYSTEM_PROMPT, - output_schema=FixGeneratorOutput, - log_file=log_path, + schema=FixGeneratorOutput, + model=model, + provider=provider, + tools=["Read", "Glob", "Grep", "Bash"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Fix generator complete: {len(response.parsed.fix_issues)} fix issues, " - f"{len(response.parsed.debt_items)} debt items", + f"Fix generator complete: {len(result.parsed.fix_issues)} fix issues, " + f"{len(result.parsed.debt_items)} debt items", tags=["fix_generator", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Fix generator agent failed: {e}", @@ -1349,36 +1302,31 @@ async def run_repo_finalize( Returns a RepoFinalizeResult dict. Non-blocking: failure does not affect build success. """ - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, "repo_finalize.jsonl") if log_dir else None - router.note("Repo finalize starting", tags=["repo_finalize", "start"]) task_prompt = repo_finalize_task_prompt(repo_path=repo_path) - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.BASH, Tool.READ, Tool.GLOB, Tool.GREP], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=REPO_FINALIZE_SYSTEM_PROMPT, - output_schema=RepoFinalizeResult, - log_file=log_path, + schema=RepoFinalizeResult, + model=model, + provider=provider, + tools=["Bash", "Read", "Glob", "Grep"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"Repo finalize complete: {len(response.parsed.files_removed)} files removed, " - f"gitignore_updated={response.parsed.gitignore_updated}", + f"Repo finalize complete: {len(result.parsed.files_removed)} files removed, " + f"gitignore_updated={result.parsed.gitignore_updated}", tags=["repo_finalize", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"Repo finalize agent failed: {e}", @@ -1414,9 +1362,6 @@ async def run_github_pr( Returns a GitHubPRResult dict. """ - log_dir = os.path.join(artifacts_dir, "logs") if artifacts_dir else None - log_path = os.path.join(log_dir, "github_pr.jsonl") if log_dir else None - router.note( f"GitHub PR: pushing {integration_branch} and creating draft PR", tags=["github_pr", "start"], @@ -1432,28 +1377,26 @@ async def run_github_pr( accumulated_debt=accumulated_debt, ) - ai = AgentAI(AgentAIConfig( - model=model, - provider=ai_provider, - cwd=repo_path, - max_turns=DEFAULT_AGENT_MAX_TURNS, - allowed_tools=[Tool.BASH], - permission_mode=permission_mode or None, - )) + provider = "claude-code" if ai_provider == "claude" else ai_provider try: - response = await ai.run( + result = await router.harness( task_prompt, system_prompt=GITHUB_PR_SYSTEM_PROMPT, - output_schema=GitHubPRResult, - log_file=log_path, + schema=GitHubPRResult, + model=model, + provider=provider, + tools=["Bash"], + cwd=repo_path, + max_turns=DEFAULT_AGENT_MAX_TURNS, + permission_mode=permission_mode or None, ) - if response.parsed is not None: + if result.parsed is not None: router.note( - f"GitHub PR complete: {response.parsed.pr_url}", + f"GitHub PR complete: {result.parsed.pr_url}", tags=["github_pr", "complete"], ) - return response.parsed.model_dump() + return result.parsed.model_dump() except Exception as e: router.note( f"GitHub PR agent failed: {e}", From 26a260b168f94a85cef400bd5462cd5b75a91f8d Mon Sep 17 00:00:00 2001 From: santoshkumarradha Date: Fri, 13 Mar 2026 00:58:16 +0530 Subject: [PATCH 5/9] feat: add provider normalization for AgentField native harness (fixes #30) --- _worktrees/issue-22-planning-agents | 1 + _worktrees/issue-23-coding-loop | 1 + _worktrees/issue-24-qa-synthesizer | 1 + _worktrees/issue-25-advisory-agents | 1 + _worktrees/issue-26-git-workflow | 1 + _worktrees/issue-27-verification-output | 1 + _worktrees/issue-28-fast-module | 1 + swe_af/execution/schemas.py | 135 +++++++++++++++--------- 8 files changed, 94 insertions(+), 48 deletions(-) create mode 160000 _worktrees/issue-22-planning-agents create mode 160000 _worktrees/issue-23-coding-loop create mode 160000 _worktrees/issue-24-qa-synthesizer create mode 160000 _worktrees/issue-25-advisory-agents create mode 160000 _worktrees/issue-26-git-workflow create mode 160000 _worktrees/issue-27-verification-output create mode 160000 _worktrees/issue-28-fast-module diff --git a/_worktrees/issue-22-planning-agents b/_worktrees/issue-22-planning-agents new file mode 160000 index 0000000..751b7cf --- /dev/null +++ b/_worktrees/issue-22-planning-agents @@ -0,0 +1 @@ +Subproject commit 751b7cf293b93253702fb58b23728b42f460274b diff --git a/_worktrees/issue-23-coding-loop b/_worktrees/issue-23-coding-loop new file mode 160000 index 0000000..7048b65 --- /dev/null +++ b/_worktrees/issue-23-coding-loop @@ -0,0 +1 @@ +Subproject commit 7048b65b46adc2d12279b251676c983d47ab2785 diff --git a/_worktrees/issue-24-qa-synthesizer b/_worktrees/issue-24-qa-synthesizer new file mode 160000 index 0000000..1e19da0 --- /dev/null +++ b/_worktrees/issue-24-qa-synthesizer @@ -0,0 +1 @@ +Subproject commit 1e19da00d38c1046bd7ea7eeb706cf36593ba39d diff --git a/_worktrees/issue-25-advisory-agents b/_worktrees/issue-25-advisory-agents new file mode 160000 index 0000000..3088f66 --- /dev/null +++ b/_worktrees/issue-25-advisory-agents @@ -0,0 +1 @@ +Subproject commit 3088f669bbd4a9cb68581093ba9f4d0b762038b3 diff --git a/_worktrees/issue-26-git-workflow b/_worktrees/issue-26-git-workflow new file mode 160000 index 0000000..4369c9f --- /dev/null +++ b/_worktrees/issue-26-git-workflow @@ -0,0 +1 @@ +Subproject commit 4369c9fafdedd0786e2f85df382077028286253b diff --git a/_worktrees/issue-27-verification-output b/_worktrees/issue-27-verification-output new file mode 160000 index 0000000..d32dc02 --- /dev/null +++ b/_worktrees/issue-27-verification-output @@ -0,0 +1 @@ +Subproject commit d32dc02e6053e27be84047cb7b435aaf81f23cf5 diff --git a/_worktrees/issue-28-fast-module b/_worktrees/issue-28-fast-module new file mode 160000 index 0000000..01e17b7 --- /dev/null +++ b/_worktrees/issue-28-fast-module @@ -0,0 +1 @@ +Subproject commit 01e17b7a5b53d3fe8c5adce3246afd920002a93c diff --git a/swe_af/execution/schemas.py b/swe_af/execution/schemas.py index edf161a..cb3b086 100644 --- a/swe_af/execution/schemas.py +++ b/swe_af/execution/schemas.py @@ -6,12 +6,32 @@ from enum import Enum from typing import Any, Literal -from pydantic import BaseModel, ConfigDict, PrivateAttr, field_validator, model_validator +from pydantic import ( + BaseModel, + ConfigDict, + PrivateAttr, + field_validator, + model_validator, +) # Global default for all agent max_turns. Change this one value to adjust everywhere. DEFAULT_AGENT_MAX_TURNS: int = 150 +# --------------------------------------------------------------------------- +# Provider normalization +# --------------------------------------------------------------------------- + + +def _normalize_provider(ai_provider: str) -> str: + """Map legacy provider names to AgentField native names. + + Ensures backward compatibility between old "claude" provider name + and AgentField's native "claude-code" provider name. + """ + return {"claude": "claude-code"}.get(ai_provider, ai_provider) + + # --------------------------------------------------------------------------- # Multi-repo helper # --------------------------------------------------------------------------- @@ -42,13 +62,13 @@ def _derive_repo_name(url: str) -> str: class RepoSpec(BaseModel): """Specification for a single repository in a multi-repo build.""" - repo_url: str = "" # GitHub/git URL (required if repo_path empty) - repo_path: str = "" # Absolute path to an existing local repo - role: str # 'primary' or 'dependency' - branch: str = "" # Branch to checkout (empty = default branch) + repo_url: str = "" # GitHub/git URL (required if repo_path empty) + repo_path: str = "" # Absolute path to an existing local repo + role: str # 'primary' or 'dependency' + branch: str = "" # Branch to checkout (empty = default branch) sparse_paths: list[str] = [] # For sparse checkout; empty = full checkout - mount_point: str = "" # Workspace subdirectory override - create_pr: bool = True # Whether to create a PR for this repo + mount_point: str = "" # Workspace subdirectory override + create_pr: bool = True # Whether to create a PR for this repo @field_validator("role") @classmethod @@ -60,23 +80,25 @@ def _validate_role(cls, v: str) -> str: @field_validator("repo_url") @classmethod def _validate_repo_url(cls, v: str) -> str: - if v and not (v.startswith("http://") or v.startswith("https://") or v.startswith("git@")): - raise ValueError( - f"repo_url must be an HTTP(S) or SSH git URL, got {v!r}" - ) + if v and not ( + v.startswith("http://") or v.startswith("https://") or v.startswith("git@") + ): + raise ValueError(f"repo_url must be an HTTP(S) or SSH git URL, got {v!r}") return v class WorkspaceRepo(BaseModel): """A repository that has been cloned into the workspace.""" - model_config = ConfigDict(frozen=False) # Mutable: git_init_result assigned post-clone + model_config = ConfigDict( + frozen=False + ) # Mutable: git_init_result assigned post-clone - repo_name: str # Derived name (from _derive_repo_name) - repo_url: str # Original git URL - role: str # 'primary' or 'dependency' - absolute_path: str # Path where the repo was cloned - branch: str # Actual checked-out branch + repo_name: str # Derived name (from _derive_repo_name) + repo_url: str # Original git URL + role: str # 'primary' or 'dependency' + absolute_path: str # Path where the repo was cloned + branch: str # Actual checked-out branch sparse_paths: list[str] = [] create_pr: bool = True git_init_result: dict | None = None # Populated by _init_all_repos after cloning @@ -85,9 +107,9 @@ class WorkspaceRepo(BaseModel): class WorkspaceManifest(BaseModel): """Snapshot of all repositories cloned for a multi-repo build.""" - workspace_root: str # Parent directory containing all repos + workspace_root: str # Parent directory containing all repos repos: list[WorkspaceRepo] # All cloned repos - primary_repo_name: str # Name of the primary repo + primary_repo_name: str # Name of the primary repo @property def primary_repo(self) -> WorkspaceRepo | None: @@ -112,10 +134,10 @@ class RepoPRResult(BaseModel): class AdvisorAction(str, Enum): """What the Issue Advisor decided to do after a coding loop failure.""" - RETRY_MODIFIED = "retry_modified" # Relax ACs, retry coding loop - RETRY_APPROACH = "retry_approach" # Keep ACs, different strategy - SPLIT = "split" # Break into sub-issues - ACCEPT_WITH_DEBT = "accept_with_debt" # Close enough, record gaps + RETRY_MODIFIED = "retry_modified" # Relax ACs, retry coding loop + RETRY_APPROACH = "retry_approach" # Keep ACs, different strategy + SPLIT = "split" # Break into sub-issues + ACCEPT_WITH_DEBT = "accept_with_debt" # Close enough, record gaps ESCALATE_TO_REPLAN = "escalate_to_replan" # Flag for outer loop @@ -123,11 +145,11 @@ class IssueOutcome(str, Enum): """Outcome of executing a single issue.""" COMPLETED = "completed" - COMPLETED_WITH_DEBT = "completed_with_debt" # Accepted via ACCEPT_WITH_DEBT + COMPLETED_WITH_DEBT = "completed_with_debt" # Accepted via ACCEPT_WITH_DEBT FAILED_RETRYABLE = "failed_retryable" FAILED_UNRECOVERABLE = "failed_unrecoverable" - FAILED_NEEDS_SPLIT = "failed_needs_split" # Advisor wants to split - FAILED_ESCALATED = "failed_escalated" # Advisor escalated to replanner + FAILED_NEEDS_SPLIT = "failed_needs_split" # Advisor wants to split + FAILED_ESCALATED = "failed_escalated" # Advisor escalated to replanner SKIPPED = "skipped" @@ -165,7 +187,7 @@ class IssueAdvisorDecision(BaseModel): action: AdvisorAction failure_diagnosis: str - failure_category: str = "" # environment|logic|dependency|approach|scope + failure_category: str = "" # environment|logic|dependency|approach|scope rationale: str confidence: float = 0.5 # RETRY_MODIFIED @@ -293,7 +315,9 @@ class DAGState(BaseModel): adaptation_history: list[dict] = [] # --- Multi-repo workspace --- - workspace_manifest: dict | None = None # Serialised WorkspaceManifest (dict for JSON compat) + workspace_manifest: dict | None = ( + None # Serialised WorkspaceManifest (dict for JSON compat) + ) class GitInitResult(BaseModel): @@ -305,9 +329,9 @@ class GitInitResult(BaseModel): initial_commit_sha: str # commit SHA before any work success: bool error_message: str = "" - remote_url: str = "" # origin URL (set if repo was cloned) - remote_default_branch: str = "" # e.g. "main" — for PR base - repo_name: str = "" # Repo this result belongs to (multi-repo) + remote_url: str = "" # origin URL (set if repo was cloned) + remote_default_branch: str = "" # e.g. "main" — for PR base + repo_name: str = "" # Repo this result belongs to (multi-repo) class WorkspaceInfo(BaseModel): @@ -385,11 +409,11 @@ class CoderResult(BaseModel): summary: str = "" complete: bool = True iteration_id: str = "" - tests_passed: bool | None = None # Self-reported: did tests pass? - test_summary: str = "" # Brief test run output - codebase_learnings: list[str] = [] # Conventions discovered (for shared memory) - agent_retro: dict = {} # What worked, what didn't (for shared memory) - repo_name: str = "" # Repo where coder ran (multi-repo) + tests_passed: bool | None = None # Self-reported: did tests pass? + test_summary: str = "" # Brief test run output + codebase_learnings: list[str] = [] # Conventions discovered (for shared memory) + agent_retro: dict = {} # What worked, what didn't (for shared memory) + repo_name: str = "" # Repo where coder ran (multi-repo) class QAResult(BaseModel): @@ -398,7 +422,7 @@ class QAResult(BaseModel): passed: bool summary: str = "" test_failures: list[dict] = [] # [{test_name, file, error, expected, actual}] - coverage_gaps: list[str] = [] # ACs without test coverage + coverage_gaps: list[str] = [] # ACs without test coverage iteration_id: str = "" @@ -491,7 +515,9 @@ def _runtime_to_provider(runtime: str) -> Literal["claude", "opencode"]: return "claude" if runtime == "open_code": return "opencode" - raise ValueError(f"Unsupported runtime {runtime!r}. Valid runtimes: {', '.join(RUNTIME_VALUES)}") + raise ValueError( + f"Unsupported runtime {runtime!r}. Valid runtimes: {', '.join(RUNTIME_VALUES)}" + ) def _legacy_hint_for_model_key(key: str) -> str: @@ -614,16 +640,20 @@ class BuildConfig(BaseModel): agent_max_turns: int = DEFAULT_AGENT_MAX_TURNS execute_fn_target: str = "" permission_mode: str = "" - repo_url: str = "" # GitHub URL to clone (single-repo shorthand) - repos: list[RepoSpec] = [] # Multi-repo list; normalised by _normalize_repos - enable_github_pr: bool = True # Create draft PR after build - github_pr_base: str = "" # PR base branch (default: repo's default branch) + repo_url: str = "" # GitHub URL to clone (single-repo shorthand) + repos: list[RepoSpec] = [] # Multi-repo list; normalised by _normalize_repos + enable_github_pr: bool = True # Create draft PR after build + github_pr_base: str = "" # PR base branch (default: repo's default branch) agent_timeout_seconds: int = 2700 max_advisor_invocations: int = 2 enable_issue_advisor: bool = True - enable_learning: bool = False # Cross-issue shared memory (conventions, failure patterns, bug patterns) - max_concurrent_issues: int = 3 # max parallel issues per level (0 = unlimited) - level_failure_abort_threshold: float = 0.8 # abort DAG when >= this fraction of a level fails + enable_learning: bool = ( + False # Cross-issue shared memory (conventions, failure patterns, bug patterns) + ) + max_concurrent_issues: int = 3 # max parallel issues per level (0 = unlimited) + level_failure_abort_threshold: float = ( + 0.8 # abort DAG when >= this fraction of a level fails + ) @model_validator(mode="before") @classmethod @@ -787,18 +817,27 @@ class ExecutionConfig(BaseModel): max_coding_iterations: int = 5 agent_max_turns: int = DEFAULT_AGENT_MAX_TURNS permission_mode: str = "" - agent_timeout_seconds: int = 2700 # 45 min + agent_timeout_seconds: int = 2700 # 45 min max_advisor_invocations: int = 2 enable_issue_advisor: bool = True enable_learning: bool = False - max_concurrent_issues: int = 3 # max parallel issues per level (0 = unlimited) - level_failure_abort_threshold: float = 0.8 # abort DAG when >= this fraction of a level fails + max_concurrent_issues: int = 3 # max parallel issues per level (0 = unlimited) + level_failure_abort_threshold: float = ( + 0.8 # abort DAG when >= this fraction of a level fails + ) @model_validator(mode="before") @classmethod def _validate_v2_keys(cls, data: Any) -> Any: return _reject_legacy_config_keys(data) + @model_validator(mode="after") + def _normalize_provider_field(self) -> "ExecutionConfig": + # Normalize legacy provider names at config boundary for defense-in-depth + # (inline mappings in execution_agents.py/pipeline.py provide first layer) + self.runtime = "claude_code" if self.runtime == "claude" else self.runtime + return self + def model_post_init(self, __context: Any) -> None: """Resolve runtime model selection once at construction time.""" self._resolved_models = resolve_runtime_models( From 04321f2cf097f05564a43f4ac30971414697c7da Mon Sep 17 00:00:00 2001 From: santoshkumarradha Date: Fri, 13 Mar 2026 01:01:20 +0530 Subject: [PATCH 6/9] chore: delete swe_af/agent_ai/ module and fix remaining references (fixes #29) --- swe_af/agent_ai/README.md | 18 - swe_af/agent_ai/__init__.py | 28 - swe_af/agent_ai/client.py | 86 --- swe_af/agent_ai/factory.py | 77 --- swe_af/agent_ai/providers/__init__.py | 1 - swe_af/agent_ai/providers/base.py | 53 -- swe_af/agent_ai/providers/claude/__init__.py | 3 - swe_af/agent_ai/providers/claude/adapter.py | 37 -- swe_af/agent_ai/providers/claude/client.py | 586 ------------------ swe_af/agent_ai/providers/codex/__init__.py | 3 - swe_af/agent_ai/providers/codex/adapter.py | 102 --- swe_af/agent_ai/providers/codex/client.py | 364 ----------- .../agent_ai/providers/opencode/__init__.py | 9 - swe_af/agent_ai/providers/opencode/client.py | 432 ------------- swe_af/agent_ai/types.py | 136 ---- swe_af/execution/_replanner_compat.py | 67 +- ..._init_executor_planner_verifier_routing.py | 188 +++--- tests/test_agent_ai_provider.py | 22 - tests/test_claude_provider_compat.py | 60 -- tests/test_codex_adapter.py | 71 --- 20 files changed, 141 insertions(+), 2202 deletions(-) delete mode 100644 swe_af/agent_ai/README.md delete mode 100644 swe_af/agent_ai/__init__.py delete mode 100644 swe_af/agent_ai/client.py delete mode 100644 swe_af/agent_ai/factory.py delete mode 100644 swe_af/agent_ai/providers/__init__.py delete mode 100644 swe_af/agent_ai/providers/base.py delete mode 100644 swe_af/agent_ai/providers/claude/__init__.py delete mode 100644 swe_af/agent_ai/providers/claude/adapter.py delete mode 100644 swe_af/agent_ai/providers/claude/client.py delete mode 100644 swe_af/agent_ai/providers/codex/__init__.py delete mode 100644 swe_af/agent_ai/providers/codex/adapter.py delete mode 100644 swe_af/agent_ai/providers/codex/client.py delete mode 100644 swe_af/agent_ai/providers/opencode/__init__.py delete mode 100644 swe_af/agent_ai/providers/opencode/client.py delete mode 100644 swe_af/agent_ai/types.py delete mode 100644 tests/test_agent_ai_provider.py delete mode 100644 tests/test_claude_provider_compat.py delete mode 100644 tests/test_codex_adapter.py diff --git a/swe_af/agent_ai/README.md b/swe_af/agent_ai/README.md deleted file mode 100644 index f5c76e2..0000000 --- a/swe_af/agent_ai/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# agent_ai - -Provider-agnostic AI runtime for the SWE pipeline. - -## Providers -- `claude`: backed by Claude Code SDK (`claude_agent_sdk`) -- `opencode`: backed by OpenCode CLI (`opencode run -m model`) for 75+ LLM providers (OpenRouter, OpenAI, Google, Anthropic) - -## Selection -Public pipeline config selects `runtime`: -- `claude_code` -> internal provider `claude` -- `open_code` -> internal provider `opencode` - -The resolved provider is exposed to internals as: -- `BuildConfig.ai_provider` -- `ExecutionConfig.ai_provider` - -A single run should use one provider end-to-end. diff --git a/swe_af/agent_ai/__init__.py b/swe_af/agent_ai/__init__.py deleted file mode 100644 index 0b1a862..0000000 --- a/swe_af/agent_ai/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from swe_af.agent_ai.client import AgentAI, AgentAIConfig, ClaudeAI, ClaudeAIConfig -from swe_af.agent_ai.types import ( - AgentResponse, - ClaudeResponse, - Message, - Metrics, - TextContent, - ThinkingContent, - Tool, - ToolResultContent, - ToolUseContent, -) - -__all__ = [ - "AgentAI", - "AgentAIConfig", - "AgentResponse", - "ClaudeAI", - "ClaudeAIConfig", - "ClaudeResponse", - "Message", - "TextContent", - "ToolUseContent", - "ToolResultContent", - "ThinkingContent", - "Metrics", - "Tool", -] diff --git a/swe_af/agent_ai/client.py b/swe_af/agent_ai/client.py deleted file mode 100644 index 10a4b7a..0000000 --- a/swe_af/agent_ai/client.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Provider-agnostic AI client facade.""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from pathlib import Path -from typing import Literal, Type - -from pydantic import BaseModel - -from swe_af.agent_ai.factory import build_provider_client -from swe_af.agent_ai.types import AgentResponse, Tool - -DEFAULT_TOOLS: list[str] = [ - Tool.READ, - Tool.WRITE, - Tool.EDIT, - Tool.BASH, - Tool.GLOB, - Tool.GREP, -] - - -@dataclass -class AgentAIConfig: - """Configuration for AgentAI.""" - - provider: Literal["claude", "codex", "opencode"] = "claude" - codex_bin: str = "codex" - opencode_bin: str = "opencode" - model: str = "sonnet" - cwd: str | Path = "." - max_turns: int = 10 - allowed_tools: list[str] = field(default_factory=lambda: list(DEFAULT_TOOLS)) - system_prompt: str | None = None - max_retries: int = 3 - initial_delay: float = 1.0 - max_delay: float = 30.0 - backoff_factor: float = 2.0 - permission_mode: str | None = None - max_budget_usd: float | None = None - env: dict[str, str] = field(default_factory=dict) - - -class AgentAI: - """Async facade that dispatches requests to the selected provider client.""" - - def __init__(self, config: AgentAIConfig | None = None) -> None: - self.config = config or AgentAIConfig() - - async def run( - self, - prompt: str, - *, - model: str | None = None, - cwd: str | Path | None = None, - max_turns: int | None = None, - allowed_tools: list[str] | None = None, - system_prompt: str | None = None, - output_schema: Type[BaseModel] | None = None, - max_retries: int | None = None, - max_budget_usd: float | None = None, - permission_mode: str | None = None, - env: dict[str, str] | None = None, - log_file: str | Path | None = None, - ) -> AgentResponse[BaseModel]: - provider_client = build_provider_client(self.config) - return await provider_client.run( - prompt, - model=model, - cwd=cwd, - max_turns=max_turns, - allowed_tools=allowed_tools, - system_prompt=system_prompt, - output_schema=output_schema, - max_retries=max_retries, - max_budget_usd=max_budget_usd, - permission_mode=permission_mode, - env=env, - log_file=log_file, - ) - - -# Backward-compatible aliases retained during migration. -ClaudeAI = AgentAI -ClaudeAIConfig = AgentAIConfig diff --git a/swe_af/agent_ai/factory.py b/swe_af/agent_ai/factory.py deleted file mode 100644 index e7a9602..0000000 --- a/swe_af/agent_ai/factory.py +++ /dev/null @@ -1,77 +0,0 @@ -"""Provider factory for AgentAI.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -from swe_af.agent_ai.providers.base import ProviderClient - -if TYPE_CHECKING: - from swe_af.agent_ai.client import AgentAIConfig - - -def build_provider_client(config: "AgentAIConfig") -> ProviderClient: - """Build the provider-specific client for the current config.""" - if config.provider == "claude": - from swe_af.agent_ai.providers.claude import ClaudeProviderClient, ClaudeProviderConfig - - provider_cfg = ClaudeProviderConfig( - model=config.model, - cwd=config.cwd, - max_turns=config.max_turns, - allowed_tools=list(config.allowed_tools), - system_prompt=config.system_prompt, - max_retries=config.max_retries, - initial_delay=config.initial_delay, - max_delay=config.max_delay, - backoff_factor=config.backoff_factor, - permission_mode=config.permission_mode, - max_budget_usd=config.max_budget_usd, - env=dict(config.env), - ) - return ClaudeProviderClient(provider_cfg) - - if config.provider == "codex": - from swe_af.agent_ai.providers.codex import CodexProviderClient, CodexProviderConfig - - provider_cfg = CodexProviderConfig( - codex_bin=config.codex_bin, - model=config.model, - cwd=config.cwd, - max_turns=config.max_turns, - allowed_tools=list(config.allowed_tools), - system_prompt=config.system_prompt, - max_retries=config.max_retries, - initial_delay=config.initial_delay, - max_delay=config.max_delay, - backoff_factor=config.backoff_factor, - permission_mode=config.permission_mode, - max_budget_usd=config.max_budget_usd, - env=dict(config.env), - ) - return CodexProviderClient(provider_cfg) - - if config.provider == "opencode": - from swe_af.agent_ai.providers.opencode import ( - OpenCodeProviderClient, - OpenCodeProviderConfig, - ) - - provider_cfg = OpenCodeProviderConfig( - opencode_bin=config.opencode_bin, - model=config.model, - cwd=config.cwd, - max_turns=config.max_turns, - allowed_tools=list(config.allowed_tools), - system_prompt=config.system_prompt, - max_retries=config.max_retries, - initial_delay=config.initial_delay, - max_delay=config.max_delay, - backoff_factor=config.backoff_factor, - permission_mode=config.permission_mode, - max_budget_usd=config.max_budget_usd, - env=dict(config.env), - ) - return OpenCodeProviderClient(provider_cfg) - - raise ValueError(f"Unsupported provider: {config.provider}") diff --git a/swe_af/agent_ai/providers/__init__.py b/swe_af/agent_ai/providers/__init__.py deleted file mode 100644 index 9b01cc0..0000000 --- a/swe_af/agent_ai/providers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""AI providers.""" diff --git a/swe_af/agent_ai/providers/base.py b/swe_af/agent_ai/providers/base.py deleted file mode 100644 index 91d0a03..0000000 --- a/swe_af/agent_ai/providers/base.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Provider interface for AI backends.""" - -from __future__ import annotations - -from typing import Any, Protocol, Type, TypeVar - -from pydantic import BaseModel - -from swe_af.agent_ai.types import AgentResponse - -T = TypeVar("T", bound=BaseModel) - - -class ProviderClient(Protocol): - """Protocol implemented by provider-specific clients.""" - - async def run( - self, - prompt: str, - *, - model: str | None = None, - cwd: str | None = None, - max_turns: int | None = None, - allowed_tools: list[str] | None = None, - system_prompt: str | None = None, - output_schema: Type[T] | None = None, - max_retries: int | None = None, - max_budget_usd: float | None = None, - permission_mode: str | None = None, - env: dict[str, str] | None = None, - log_file: str | None = None, - ) -> AgentResponse[T]: - ... - - -def make_empty_response(is_error: bool = True) -> AgentResponse[Any]: - """Small helper for providers that need a safe fallback value.""" - from swe_af.agent_ai.types import Metrics - - return AgentResponse( - result=None, - parsed=None, - messages=[], - metrics=Metrics( - duration_ms=0, - duration_api_ms=0, - num_turns=0, - total_cost_usd=None, - usage=None, - session_id="", - ), - is_error=is_error, - ) diff --git a/swe_af/agent_ai/providers/claude/__init__.py b/swe_af/agent_ai/providers/claude/__init__.py deleted file mode 100644 index f785cb0..0000000 --- a/swe_af/agent_ai/providers/claude/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from swe_af.agent_ai.providers.claude.client import ClaudeProviderClient, ClaudeProviderConfig, DEFAULT_TOOLS - -__all__ = ["ClaudeProviderClient", "ClaudeProviderConfig", "DEFAULT_TOOLS"] diff --git a/swe_af/agent_ai/providers/claude/adapter.py b/swe_af/agent_ai/providers/claude/adapter.py deleted file mode 100644 index 38f3eda..0000000 --- a/swe_af/agent_ai/providers/claude/adapter.py +++ /dev/null @@ -1,37 +0,0 @@ -"""Claude SDK adapter helpers.""" - -from __future__ import annotations - -from typing import Any - -from claude_agent_sdk import ( - TextBlock as _TextBlock, - ThinkingBlock as _ThinkingBlock, - ToolResultBlock as _ToolResultBlock, - ToolUseBlock as _ToolUseBlock, -) - -from swe_af.agent_ai.types import ( - Content, - TextContent, - ThinkingContent, - ToolResultContent, - ToolUseContent, -) - - -def convert_content_block(block: Any) -> Content: - """Map Claude SDK content blocks to provider-agnostic content dataclasses.""" - if isinstance(block, _TextBlock): - return TextContent(text=block.text) - if isinstance(block, _ToolUseBlock): - return ToolUseContent(id=block.id, name=block.name, input=block.input) - if isinstance(block, _ToolResultBlock): - return ToolResultContent( - tool_use_id=block.tool_use_id, - content=block.content, - is_error=block.is_error or False, - ) - if isinstance(block, _ThinkingBlock): - return ThinkingContent(thinking=block.thinking, signature=block.signature) - return TextContent(text=str(block)[:500]) diff --git a/swe_af/agent_ai/providers/claude/client.py b/swe_af/agent_ai/providers/claude/client.py deleted file mode 100644 index 39e8cf9..0000000 --- a/swe_af/agent_ai/providers/claude/client.py +++ /dev/null @@ -1,586 +0,0 @@ -"""Claude provider client backed by ``claude_agent_sdk``.""" - -from __future__ import annotations - -import asyncio -import importlib.metadata -import json -import os -import time -import uuid -from dataclasses import dataclass, field -from pathlib import Path -from typing import IO, Any, Type, TypeVar - -from pydantic import BaseModel - -from claude_agent_sdk import ( - AssistantMessage as _AssistantMessage, - ClaudeAgentOptions, - ResultMessage as _ResultMessage, - query as _query, -) - -from swe_af.agent_ai.providers.claude.adapter import convert_content_block -from swe_af.agent_ai.types import ( - AgentResponse, - Content, - ErrorKind, - Message, - Metrics, - TextContent, - ThinkingContent, - Tool, - ToolResultContent, - ToolUseContent, -) - -T = TypeVar("T", bound=BaseModel) - -_TRANSIENT_PATTERNS = frozenset( - { - "rate limit", - "rate_limit", - "overloaded", - "timeout", - "timed out", - "connection reset", - "connection refused", - "temporarily unavailable", - "service unavailable", - "503", - "502", - "504", - "internal server error", - "500", - } -) - -_SDK_PROTOCOL_ERROR_PATTERNS = frozenset( - { - "unknown message type: rate_limit_event", - } -) - -# Keep in sync with dependency pins in pyproject/requirements. -_STABLE_SDK_VERSION = "0.1.20" - -DEFAULT_TOOLS: list[str] = [ - Tool.READ, - Tool.WRITE, - Tool.EDIT, - Tool.BASH, - Tool.GLOB, - Tool.GREP, -] - -_SCHEMA_FILE_TOOLS: list[str] = [Tool.WRITE, Tool.READ] - - -def _is_transient(error: str) -> bool: - if _is_sdk_protocol_error(error): - return False - low = error.lower() - return any(p in low for p in _TRANSIENT_PATTERNS) - - -def _is_sdk_protocol_error(error: str) -> bool: - low = error.lower() - return any(p in low for p in _SDK_PROTOCOL_ERROR_PATTERNS) - - -def _installed_sdk_version() -> str: - try: - return importlib.metadata.version("claude-agent-sdk") - except Exception: - return "unknown" - - -def _build_sdk_protocol_error_message(raw_error: str, *, sdk_version: str | None = None) -> str: - version = sdk_version or _installed_sdk_version() - return ( - f"{raw_error}. " - f"Detected claude-agent-sdk version={version}. " - "This is a known stream compatibility failure in some SDK versions. " - f"Use claude-agent-sdk=={_STABLE_SDK_VERSION} for SWE-AF stability." - ) - - -def _schema_output_path(cwd: str) -> str: - """Generate a unique temp file path for structured JSON output.""" - name = f".claude_output_{uuid.uuid4().hex[:12]}.json" - return os.path.join(os.path.abspath(cwd), name) - - -def _build_schema_suffix(output_path: str, schema_json: str) -> str: - """Prompt suffix instructing the agent to write structured output to a file.""" - return ( - f"\n\n---\n" - f"IMPORTANT — STRUCTURED OUTPUT REQUIREMENT:\n" - f"After completing the task, you MUST write your final structured output " - f"as a single valid JSON object to this file:\n" - f" {output_path}\n\n" - f"The JSON must conform to this schema:\n" - f"```json\n{schema_json}\n```\n\n" - f"Write ONLY valid JSON to the file — no markdown fences, no explanation, " - f"just the raw JSON object. Use the Write tool to create the file." - ) - - -def _read_and_parse_json_file(path: str, schema: Type[T]) -> T | None: - """Read a JSON file and parse against schema. Returns None on failure.""" - try: - if not os.path.exists(path): - return None - with open(path, "r", encoding="utf-8") as f: - raw = f.read() - text = raw.strip() - if text.startswith("```"): - lines = text.split("\n", 1) - text = lines[1] if len(lines) > 1 else text - if text.endswith("```"): - text = text[: -len("```")] - text = text.strip() - data = json.loads(text) - return schema.model_validate(data) - except Exception: - return None - - -def _cleanup_files(paths: list[str]) -> None: - """Remove all temp files, silently ignoring missing/errors.""" - for p in paths: - try: - if os.path.exists(p): - os.remove(p) - except OSError: - pass - - -def _content_to_dict(c: Content) -> dict[str, Any]: - """Convert a Content dataclass to a JSON-serializable dict.""" - if isinstance(c, TextContent): - return {"type": "text", "text": c.text[:500]} - if isinstance(c, ToolUseContent): - return {"type": "tool_use", "name": c.name, "id": c.id} - if isinstance(c, ToolResultContent): - return { - "type": "tool_result", - "tool_use_id": c.tool_use_id, - "is_error": c.is_error, - } - if isinstance(c, ThinkingContent): - return {"type": "thinking", "length": len(c.thinking)} - return {"type": "unknown"} - - -def _write_log(fh: IO[str], event: str, **data: Any) -> None: - """Append a single JSONL event to the log file handle.""" - entry = {"ts": time.time(), "event": event, **data} - fh.write(json.dumps(entry, default=str) + "\n") - fh.flush() - - -def _open_log(log_file: str | Path | None) -> IO[str] | None: - """Open a log file for appending. Returns None if no log_file.""" - if log_file is None: - return None - path = Path(log_file) - path.parent.mkdir(parents=True, exist_ok=True) - return open(path, "a", encoding="utf-8") - - -@dataclass -class ClaudeProviderConfig: - """Configuration for the Claude provider client.""" - - model: str = "sonnet" - cwd: str | Path = "." - max_turns: int = 10 - allowed_tools: list[str] = field(default_factory=lambda: list(DEFAULT_TOOLS)) - system_prompt: str | None = None - max_retries: int = 3 - initial_delay: float = 1.0 - max_delay: float = 30.0 - backoff_factor: float = 2.0 - permission_mode: str | None = None - max_budget_usd: float | None = None - env: dict[str, str] = field(default_factory=dict) - - -class ClaudeProviderClient: - """Async client for invoking Claude Code as an AI agent.""" - - def __init__(self, config: ClaudeProviderConfig | None = None) -> None: - self.config = config or ClaudeProviderConfig() - - async def run( - self, - prompt: str, - *, - model: str | None = None, - cwd: str | Path | None = None, - max_turns: int | None = None, - allowed_tools: list[str] | None = None, - system_prompt: str | None = None, - output_schema: Type[T] | None = None, - max_retries: int | None = None, - max_budget_usd: float | None = None, - permission_mode: str | None = None, - env: dict[str, str] | None = None, - log_file: str | Path | None = None, - ) -> AgentResponse[T]: - """Run a prompt through Claude Code.""" - cfg = self.config - effective_model = model or cfg.model - effective_cwd = str(cwd or cfg.cwd) - effective_turns = max_turns or cfg.max_turns - effective_tools = allowed_tools if allowed_tools is not None else list(cfg.allowed_tools) - effective_retries = max_retries if max_retries is not None else cfg.max_retries - effective_env = {**cfg.env, **(env or {})} - effective_system = system_prompt or cfg.system_prompt - effective_budget = max_budget_usd or cfg.max_budget_usd - effective_perm = permission_mode or cfg.permission_mode - - output_path: str | None = None - final_prompt = prompt - if output_schema: - output_path = _schema_output_path(effective_cwd) - schema_json = json.dumps(output_schema.model_json_schema(), indent=2) - final_prompt = prompt + _build_schema_suffix(output_path, schema_json) - for t in _SCHEMA_FILE_TOOLS: - if t not in effective_tools: - effective_tools.append(t) - - _stderr_lines: list[str] = [] - - def _stderr_callback(line: str) -> None: - _stderr_lines.append(line) - if len(_stderr_lines) > 200: - _stderr_lines.pop(0) - - opts_kwargs: dict[str, Any] = { - "model": effective_model, - "cwd": effective_cwd, - "max_turns": effective_turns, - "stderr": _stderr_callback, - } - if effective_tools: - opts_kwargs["allowed_tools"] = effective_tools - if effective_system: - opts_kwargs["system_prompt"] = effective_system - if effective_budget: - opts_kwargs["max_budget_usd"] = effective_budget - if effective_perm: - opts_kwargs["permission_mode"] = effective_perm - if effective_env: - opts_kwargs["env"] = effective_env - - options = ClaudeAgentOptions(**opts_kwargs) - - _temp_files: list[str] = [] - if output_path: - _temp_files.append(output_path) - - log_fh = _open_log(log_file) - try: - return await self._run_with_retries( - prompt=prompt, - final_prompt=final_prompt, - options=options, - output_schema=output_schema, - output_path=output_path, - effective_cwd=effective_cwd, - effective_model=effective_model, - effective_env=effective_env, - effective_perm=effective_perm, - effective_retries=effective_retries, - temp_files=_temp_files, - log_fh=log_fh, - stderr_lines=_stderr_lines, - ) - finally: - if log_fh: - log_fh.close() - _cleanup_files(_temp_files) - - async def _run_with_retries( - self, - *, - prompt: str, - final_prompt: str, - options: ClaudeAgentOptions, - output_schema: Type[T] | None, - output_path: str | None, - effective_cwd: str, - effective_model: str, - effective_env: dict[str, str], - effective_perm: str | None, - effective_retries: int, - temp_files: list[str], - log_fh: IO[str] | None = None, - stderr_lines: list[str] | None = None, - ) -> AgentResponse[T]: - cfg = self.config - delay = cfg.initial_delay - last_exc: Exception | None = None - - if log_fh: - _write_log(log_fh, "start", prompt=prompt, model=options.model, max_turns=options.max_turns) - - for attempt in range(effective_retries + 1): - try: - response = await self._execute(final_prompt, options, log_fh=log_fh) - - if not output_schema or output_path is None: - if log_fh: - _write_log( - log_fh, - "end", - is_error=response.is_error, - num_turns=response.metrics.num_turns, - cost_usd=response.metrics.total_cost_usd, - ) - return response - - parsed = _read_and_parse_json_file(output_path, output_schema) - if parsed is not None: - resp = AgentResponse( - result=response.result, - parsed=parsed, - messages=response.messages, - metrics=response.metrics, - is_error=False, - ) - if log_fh: - _write_log( - log_fh, - "end", - is_error=False, - num_turns=response.metrics.num_turns, - cost_usd=response.metrics.total_cost_usd, - ) - return resp - - if log_fh: - _write_log(log_fh, "backup_start", reason="structured output parse failed") - - backup_log_file = f"{log_fh.name}_backup" if log_fh else None - backup_log_fh = _open_log(backup_log_file) - try: - parsed = await self._backup_schema_agent( - original_prompt=prompt, - output_schema=output_schema, - cwd=effective_cwd, - model=effective_model, - env=effective_env, - perm=effective_perm, - temp_files=temp_files, - log_fh=backup_log_fh, - ) - finally: - if backup_log_fh: - backup_log_fh.close() - - if parsed is not None: - resp = AgentResponse( - result=response.result, - parsed=parsed, - messages=response.messages, - metrics=response.metrics, - is_error=False, - ) - if log_fh: - _write_log( - log_fh, - "end", - is_error=False, - backup_used=True, - num_turns=response.metrics.num_turns, - cost_usd=response.metrics.total_cost_usd, - ) - return resp - - if log_fh: - _write_log(log_fh, "end", is_error=True, reason="schema parse failed after backup") - return AgentResponse( - result=response.result, - parsed=None, - messages=response.messages, - metrics=response.metrics, - is_error=True, - ) - - except Exception as e: - raw_error = str(e) - if _is_sdk_protocol_error(raw_error): - compat_error = RuntimeError(_build_sdk_protocol_error_message(raw_error)) - if log_fh: - _write_log(log_fh, "end", is_error=True, error=str(compat_error)) - raise compat_error from e - - last_exc = e - _captured_stderr = "\n".join(stderr_lines[-50:]) if stderr_lines else "" - if attempt < effective_retries and _is_transient(raw_error): - if log_fh: - _write_log( - log_fh, - "retry", - attempt=attempt + 1, - error=str(e), - delay=delay, - stderr=_captured_stderr or None, - ) - if stderr_lines: - stderr_lines.clear() - await asyncio.sleep(delay) - delay = min(delay * cfg.backoff_factor, cfg.max_delay) - if output_schema: - output_path = _schema_output_path(effective_cwd) - temp_files.append(output_path) - schema_json = json.dumps(output_schema.model_json_schema(), indent=2) - final_prompt = prompt + _build_schema_suffix(output_path, schema_json) - continue - if log_fh: - _write_log(log_fh, "end", is_error=True, error=str(e), stderr=_captured_stderr or None) - raise - - raise last_exc # type: ignore[misc] - - async def _backup_schema_agent( - self, - original_prompt: str, - output_schema: Type[T], - cwd: str, - model: str, - env: dict[str, str], - perm: str | None, - temp_files: list[str], - log_fh: IO[str] | None = None, - ) -> T | None: - """Run a backup pass to reconstruct only the required JSON output.""" - output_path = _schema_output_path(cwd) - temp_files.append(output_path) - schema_json = json.dumps(output_schema.model_json_schema(), indent=2) - - backup_prompt = ( - f"A previous agent was given the following task and has ALREADY completed " - f"the work (files are written, changes are made). However, it failed to " - f"produce the required structured JSON output.\n\n" - f"Original task:\n{original_prompt}\n\n" - f"Your ONLY job is to inspect the current state of the working directory, " - f"understand what was done, and write a JSON file that accurately summarizes " - f"the result.\n\n" - f"Write the JSON to:\n {output_path}\n\n" - f"Schema:\n```json\n{schema_json}\n```\n\n" - f"Write ONLY valid JSON — no markdown fences, no explanation. " - f"Use the Write tool." - ) - - backup_tools = [Tool.READ, Tool.WRITE, Tool.GLOB, Tool.GREP] - opts_kwargs: dict[str, Any] = { - "model": model, - "cwd": cwd, - "max_turns": 5, - "allowed_tools": backup_tools, - } - if env: - opts_kwargs["env"] = env - if perm: - opts_kwargs["permission_mode"] = perm - - options = ClaudeAgentOptions(**opts_kwargs) - - if log_fh: - _write_log(log_fh, "start", prompt="[backup schema agent]", model=model, max_turns=5) - - try: - async for msg in _query(prompt=backup_prompt, options=options): - if log_fh and isinstance(msg, _AssistantMessage): - content = [convert_content_block(b) for b in (msg.content or [])] - _write_log( - log_fh, - "assistant", - turn="backup", - content=[_content_to_dict(c) for c in content], - ) - except Exception: - pass - - if log_fh: - _write_log(log_fh, "end") - - return _read_and_parse_json_file(output_path, output_schema) - - async def _execute( - self, - prompt: str, - options: ClaudeAgentOptions, - *, - log_fh: IO[str] | None = None, - ) -> AgentResponse[Any]: - """Execute a single query against the SDK and map to AgentResponse.""" - messages: list[Message] = [] - result_text: str | None = None - metrics_data: dict[str, Any] = {} - turn = 0 - - async for msg in _query(prompt=prompt, options=options): - if isinstance(msg, _AssistantMessage): - turn += 1 - content = [convert_content_block(b) for b in (msg.content or [])] - error = ErrorKind(msg.error) if msg.error else None - messages.append( - Message( - role="assistant", - content=content, - model=msg.model, - error=error, - parent_tool_use_id=msg.parent_tool_use_id, - ) - ) - if log_fh: - _write_log( - log_fh, - "assistant", - turn=turn, - model=msg.model, - content=[_content_to_dict(c) for c in content], - ) - elif isinstance(msg, _ResultMessage): - result_text = msg.result - metrics_data = { - "duration_ms": msg.duration_ms, - "duration_api_ms": msg.duration_api_ms, - "num_turns": msg.num_turns, - "total_cost_usd": msg.total_cost_usd, - "usage": msg.usage, - "session_id": msg.session_id, - } - if log_fh: - _write_log( - log_fh, - "result", - num_turns=msg.num_turns, - cost_usd=msg.total_cost_usd, - duration_ms=msg.duration_ms, - ) - - metrics = Metrics(**metrics_data) if metrics_data else Metrics( - duration_ms=0, - duration_api_ms=0, - num_turns=0, - total_cost_usd=None, - usage=None, - session_id="", - ) - - is_error = metrics_data.get("is_error", False) if metrics_data else False - - return AgentResponse( - result=result_text, - parsed=None, - messages=messages, - metrics=metrics, - is_error=is_error, - ) diff --git a/swe_af/agent_ai/providers/codex/__init__.py b/swe_af/agent_ai/providers/codex/__init__.py deleted file mode 100644 index 63b3d28..0000000 --- a/swe_af/agent_ai/providers/codex/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from swe_af.agent_ai.providers.codex.client import CodexProviderClient, CodexProviderConfig, DEFAULT_TOOLS - -__all__ = ["CodexProviderClient", "CodexProviderConfig", "DEFAULT_TOOLS"] diff --git a/swe_af/agent_ai/providers/codex/adapter.py b/swe_af/agent_ai/providers/codex/adapter.py deleted file mode 100644 index 04e654a..0000000 --- a/swe_af/agent_ai/providers/codex/adapter.py +++ /dev/null @@ -1,102 +0,0 @@ -"""Codex CLI adapter helpers.""" - -from __future__ import annotations - -import json -from typing import Any - -CLAUDE_ALIAS_MODELS = {"haiku", "sonnet", "opus"} - - -def should_pass_model(model: str | None) -> bool: - """Only pass model to codex when it is not a Claude alias.""" - if not model: - return False - return model.lower() not in CLAUDE_ALIAS_MODELS - - -def build_codex_command( - *, - codex_bin: str, - cwd: str, - prompt: str, - model: str | None, - output_schema_path: str | None, - output_last_message_path: str, -) -> list[str]: - """Build the codex CLI argv list.""" - cmd = [ - codex_bin, - "exec", - "--json", - "-c", - "mcp_servers.figma.enabled=false", - "--dangerously-bypass-approvals-and-sandbox", - "-C", - cwd, - "--skip-git-repo-check", - "--output-last-message", - output_last_message_path, - ] - if should_pass_model(model): - cmd.extend(["-m", model]) - if output_schema_path: - cmd.extend(["--output-schema", output_schema_path]) - cmd.append(prompt) - return cmd - - -def parse_codex_jsonl(stdout_text: str) -> tuple[str | None, dict[str, Any] | None, list[dict[str, Any]]]: - """Parse codex JSONL event stream for final message and usage.""" - final_text: str | None = None - usage: dict[str, Any] | None = None - events: list[dict[str, Any]] = [] - - for raw_line in stdout_text.splitlines(): - line = raw_line.strip() - if not line: - continue - try: - event = json.loads(line) - except Exception: - continue - if isinstance(event, dict): - events.append(event) - if event.get("type") == "item.completed": - item = event.get("item") or {} - if item.get("type") == "agent_message" and isinstance(item.get("text"), str): - final_text = item["text"] - elif event.get("type") == "turn.completed": - turn_usage = event.get("usage") - if isinstance(turn_usage, dict): - usage = turn_usage - - return final_text, usage, events - - -def normalize_schema_for_codex(schema: dict[str, Any]) -> dict[str, Any]: - """Normalize JSON schema for codex strict-mode requirements. - - Codex structured output currently requires object schemas to: - - explicitly set ``additionalProperties: false`` - - provide ``required`` as an array including all object properties - """ - - def _walk(node: Any) -> Any: - if isinstance(node, dict): - node_type = node.get("type") - if node_type == "object": - node["additionalProperties"] = False - properties = node.get("properties") - if isinstance(properties, dict): - node["required"] = list(properties.keys()) - else: - node["required"] = [] - for key, value in list(node.items()): - node[key] = _walk(value) - elif isinstance(node, list): - return [_walk(v) for v in node] - return node - - copied = json.loads(json.dumps(schema)) - return _walk(copied) diff --git a/swe_af/agent_ai/providers/codex/client.py b/swe_af/agent_ai/providers/codex/client.py deleted file mode 100644 index ab809d8..0000000 --- a/swe_af/agent_ai/providers/codex/client.py +++ /dev/null @@ -1,364 +0,0 @@ -"""Codex provider client backed by ``codex exec`` CLI.""" - -from __future__ import annotations - -import asyncio -import json -import os -import time -import uuid -from dataclasses import dataclass, field -from pathlib import Path -from typing import IO, Any, Type, TypeVar - -from pydantic import BaseModel - -from swe_af.agent_ai.providers.codex.adapter import ( - build_codex_command, - normalize_schema_for_codex, - parse_codex_jsonl, -) -from swe_af.agent_ai.types import AgentResponse, Message, Metrics, TextContent, Tool - -T = TypeVar("T", bound=BaseModel) - -_TRANSIENT_PATTERNS = frozenset( - { - "rate limit", - "rate_limit", - "overloaded", - "timeout", - "timed out", - "connection reset", - "connection refused", - "temporarily unavailable", - "service unavailable", - "503", - "502", - "504", - "internal server error", - "500", - } -) - -DEFAULT_TOOLS: list[str] = [ - Tool.READ, - Tool.WRITE, - Tool.EDIT, - Tool.BASH, - Tool.GLOB, - Tool.GREP, -] - - -def _is_transient(error: str) -> bool: - low = error.lower() - return any(p in low for p in _TRANSIENT_PATTERNS) - - -def _tmp_path(cwd: str, prefix: str) -> str: - name = f".{prefix}_{uuid.uuid4().hex[:12]}.json" - return os.path.join(os.path.abspath(cwd), name) - - -def _read_json(path: str) -> dict[str, Any] | None: - try: - if not os.path.exists(path): - return None - with open(path, "r", encoding="utf-8") as f: - text = f.read().strip() - return json.loads(text) - except Exception: - return None - - -def _read_and_parse_json_file(path: str, schema: Type[T]) -> T | None: - """Read a JSON file and parse against schema. Returns None on failure.""" - data = _read_json(path) - if data is None: - return None - try: - return schema.model_validate(data) - except Exception: - return None - - -def _cleanup_files(paths: list[str]) -> None: - for p in paths: - try: - if os.path.exists(p): - os.remove(p) - except OSError: - pass - - -def _write_log(fh: IO[str], event: str, **data: Any) -> None: - entry = {"ts": time.time(), "event": event, **data} - fh.write(json.dumps(entry, default=str) + "\n") - fh.flush() - - -def _open_log(log_file: str | Path | None) -> IO[str] | None: - if log_file is None: - return None - path = Path(log_file) - path.parent.mkdir(parents=True, exist_ok=True) - return open(path, "a", encoding="utf-8") - - -@dataclass -class CodexProviderConfig: - """Configuration for the Codex CLI provider.""" - - codex_bin: str = "codex" - model: str = "sonnet" - cwd: str | Path = "." - max_turns: int = 10 - allowed_tools: list[str] = field(default_factory=lambda: list(DEFAULT_TOOLS)) - system_prompt: str | None = None - max_retries: int = 3 - initial_delay: float = 1.0 - max_delay: float = 30.0 - backoff_factor: float = 2.0 - permission_mode: str | None = None - max_budget_usd: float | None = None - env: dict[str, str] = field(default_factory=dict) - - -class CodexProviderClient: - """Async client for invoking Codex via CLI.""" - - def __init__(self, config: CodexProviderConfig | None = None) -> None: - self.config = config or CodexProviderConfig() - - async def run( - self, - prompt: str, - *, - model: str | None = None, - cwd: str | Path | None = None, - max_turns: int | None = None, - allowed_tools: list[str] | None = None, - system_prompt: str | None = None, - output_schema: Type[T] | None = None, - max_retries: int | None = None, - max_budget_usd: float | None = None, - permission_mode: str | None = None, - env: dict[str, str] | None = None, - log_file: str | Path | None = None, - ) -> AgentResponse[T]: - """Run a prompt through Codex CLI with optional schema-constrained output.""" - cfg = self.config - effective_model = model or cfg.model - effective_cwd = str(cwd or cfg.cwd) - effective_retries = max_retries if max_retries is not None else cfg.max_retries - effective_env = {**cfg.env, **(env or {})} - effective_system = system_prompt or cfg.system_prompt - - output_schema_path: str | None = None - output_last_message_path = _tmp_path(effective_cwd, "codex_output") - temp_files: list[str] = [output_last_message_path] - - if output_schema: - output_schema_path = _tmp_path(effective_cwd, "codex_schema") - temp_files.append(output_schema_path) - schema_obj = normalize_schema_for_codex(output_schema.model_json_schema()) - with open(output_schema_path, "w", encoding="utf-8") as f: - json.dump(schema_obj, f, indent=2) - - final_prompt = prompt - if effective_system: - final_prompt = f"System Instructions:\n{effective_system}\n\nUser Task:\n{prompt}" - - log_fh = _open_log(log_file) - try: - return await self._run_with_retries( - prompt=prompt, - final_prompt=final_prompt, - effective_model=effective_model, - effective_cwd=effective_cwd, - effective_env=effective_env, - output_schema=output_schema, - output_schema_path=output_schema_path, - output_last_message_path=output_last_message_path, - effective_retries=effective_retries, - log_fh=log_fh, - ) - finally: - if log_fh: - log_fh.close() - _cleanup_files(temp_files) - - async def _run_with_retries( - self, - *, - prompt: str, - final_prompt: str, - effective_model: str, - effective_cwd: str, - effective_env: dict[str, str], - output_schema: Type[T] | None, - output_schema_path: str | None, - output_last_message_path: str, - effective_retries: int, - log_fh: IO[str] | None, - ) -> AgentResponse[T]: - cfg = self.config - delay = cfg.initial_delay - last_exc: Exception | None = None - - if log_fh: - _write_log(log_fh, "start", prompt=prompt, model=effective_model, provider="codex") - - for attempt in range(effective_retries + 1): - try: - response = await self._execute( - prompt=final_prompt, - model=effective_model, - cwd=effective_cwd, - env=effective_env, - output_schema_path=output_schema_path, - output_last_message_path=output_last_message_path, - log_fh=log_fh, - ) - - if not output_schema: - if log_fh: - _write_log( - log_fh, - "end", - is_error=response.is_error, - num_turns=response.metrics.num_turns, - cost_usd=response.metrics.total_cost_usd, - ) - return response - - parsed = _read_and_parse_json_file(output_last_message_path, output_schema) - if parsed is not None: - parsed_response = AgentResponse( - result=response.result, - parsed=parsed, - messages=response.messages, - metrics=response.metrics, - is_error=False, - ) - if log_fh: - _write_log( - log_fh, - "end", - is_error=False, - num_turns=parsed_response.metrics.num_turns, - cost_usd=parsed_response.metrics.total_cost_usd, - ) - return parsed_response - - if log_fh: - _write_log(log_fh, "end", is_error=True, reason="schema parse failed") - return AgentResponse( - result=response.result, - parsed=None, - messages=response.messages, - metrics=response.metrics, - is_error=True, - ) - - except Exception as e: - last_exc = e - if attempt < effective_retries and _is_transient(str(e)): - if log_fh: - _write_log(log_fh, "retry", attempt=attempt + 1, error=str(e), delay=delay) - await asyncio.sleep(delay) - delay = min(delay * cfg.backoff_factor, cfg.max_delay) - continue - if log_fh: - _write_log(log_fh, "end", is_error=True, error=str(e)) - raise - - raise last_exc # type: ignore[misc] - - async def _execute( - self, - *, - prompt: str, - model: str, - cwd: str, - env: dict[str, str], - output_schema_path: str | None, - output_last_message_path: str, - log_fh: IO[str] | None, - ) -> AgentResponse[Any]: - started = time.time() - - cmd = build_codex_command( - codex_bin=self.config.codex_bin, - cwd=cwd, - prompt=prompt, - model=model, - output_schema_path=output_schema_path, - output_last_message_path=output_last_message_path, - ) - - proc = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=cwd, - env={**os.environ, **env}, - ) - stdout_b, stderr_b = await proc.communicate() - - stdout_text = stdout_b.decode("utf-8", errors="replace") - stderr_text = stderr_b.decode("utf-8", errors="replace") - - if proc.returncode != 0: - raise RuntimeError( - f"codex exec failed with exit code {proc.returncode}: {stderr_text[-800:]}" - ) - - final_text, usage, events = parse_codex_jsonl(stdout_text) - if final_text is None and os.path.exists(output_last_message_path): - try: - with open(output_last_message_path, "r", encoding="utf-8") as f: - final_text = f.read().strip() - except OSError: - final_text = None - - duration_ms = int((time.time() - started) * 1000) - output_tokens = usage.get("output_tokens", 0) if isinstance(usage, dict) else 0 - - metrics = Metrics( - duration_ms=duration_ms, - duration_api_ms=duration_ms, - num_turns=1, - total_cost_usd=None, - usage=usage, - session_id="", - ) - messages = [] - if final_text: - messages.append( - Message( - role="assistant", - content=[TextContent(text=final_text)], - model=model, - ) - ) - - if log_fh: - _write_log( - log_fh, - "result", - provider="codex", - output_tokens=output_tokens, - duration_ms=duration_ms, - stderr_tail=stderr_text[-400:] if stderr_text else None, - event_count=len(events), - ) - - return AgentResponse( - result=final_text, - parsed=None, - messages=messages, - metrics=metrics, - is_error=False, - ) diff --git a/swe_af/agent_ai/providers/opencode/__init__.py b/swe_af/agent_ai/providers/opencode/__init__.py deleted file mode 100644 index de70651..0000000 --- a/swe_af/agent_ai/providers/opencode/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""OpenCode provider for AgentAI.""" - -from swe_af.agent_ai.providers.opencode.client import ( - OpenCodeProviderClient, - OpenCodeProviderConfig, - DEFAULT_TOOLS, -) - -__all__ = ["OpenCodeProviderClient", "OpenCodeProviderConfig", "DEFAULT_TOOLS"] diff --git a/swe_af/agent_ai/providers/opencode/client.py b/swe_af/agent_ai/providers/opencode/client.py deleted file mode 100644 index ef83898..0000000 --- a/swe_af/agent_ai/providers/opencode/client.py +++ /dev/null @@ -1,432 +0,0 @@ -"""OpenCode provider client backed by ``opencode acp`` CLI.""" - -from __future__ import annotations - -import asyncio -import json -import os -import time -import uuid -from dataclasses import dataclass, field -from pathlib import Path -from typing import IO, Any, Type, TypeVar - -from pydantic import BaseModel - -from swe_af.agent_ai.types import ( - AgentResponse, - Content, - Message, - Metrics, - TextContent, - Tool, -) - -T = TypeVar("T", bound=BaseModel) - -_TRANSIENT_PATTERNS = frozenset( - { - "rate limit", - "rate_limit", - "overloaded", - "timeout", - "timed out", - "connection reset", - "connection refused", - "temporarily unavailable", - "service unavailable", - "503", - "502", - "504", - "internal server error", - "500", - } -) - -DEFAULT_TOOLS: list[str] = [ - Tool.READ, - Tool.WRITE, - Tool.EDIT, - Tool.BASH, - Tool.GLOB, - Tool.GREP, -] - -_SCHEMA_FILE_TOOLS: list[str] = [Tool.WRITE, Tool.READ] - - -def _is_transient(error: str) -> bool: - """Check if an error message indicates a transient failure.""" - low = error.lower() - return any(p in low for p in _TRANSIENT_PATTERNS) - - -def _schema_output_path(cwd: str) -> str: - """Generate a unique temp file path for structured JSON output.""" - name = f".opencode_output_{uuid.uuid4().hex[:12]}.json" - return os.path.join(os.path.abspath(cwd), name) - - -def _build_schema_suffix(output_path: str, schema_json: str) -> str: - """Prompt suffix instructing the agent to write structured output to a file.""" - return ( - f"\n\n---\n" - f"IMPORTANT — STRUCTURED OUTPUT REQUIREMENT:\n" - f"After completing the task, you MUST write your final structured output " - f"as a single valid JSON object to this file:\n" - f" {output_path}\n\n" - f"The JSON must conform to this schema:\n" - f"```json\n{schema_json}\n```\n\n" - f"Write ONLY valid JSON to the file — no markdown fences, no explanation, " - f"just the raw JSON object. Use the Write tool to create the file." - ) - - -def _read_and_parse_json_file(path: str, schema: Type[T]) -> T | None: - """Read a JSON file and parse against schema. Returns None on failure.""" - try: - if not os.path.exists(path): - return None - with open(path, "r", encoding="utf-8") as f: - raw = f.read() - text = raw.strip() - # Strip markdown fences if present - if text.startswith("```"): - lines = text.split("\n", 1) - text = lines[1] if len(lines) > 1 else text - if text.endswith("```"): - text = text[: -len("```")] - text = text.strip() - data = json.loads(text) - return schema.model_validate(data) - except Exception: - return None - - -def _cleanup_files(paths: list[str]) -> None: - """Remove all temp files, silently ignoring missing/errors.""" - for p in paths: - try: - if os.path.exists(p): - os.remove(p) - except OSError: - pass - - -def _content_to_dict(c: Content) -> dict[str, Any]: - """Convert a Content dataclass to a JSON-serializable dict.""" - if isinstance(c, TextContent): - return {"type": "text", "text": c.text[:500]} - return {"type": "unknown"} - - -def _write_log(fh: IO[str], event: str, **data: Any) -> None: - """Append a single JSONL event to the log file handle.""" - entry = {"ts": time.time(), "event": event, **data} - fh.write(json.dumps(entry, default=str) + "\n") - fh.flush() - - -def _open_log(log_file: str | Path | None) -> IO[str] | None: - """Open a log file for appending. Returns None if no log_file.""" - if log_file is None: - return None - path = Path(log_file) - path.parent.mkdir(parents=True, exist_ok=True) - return open(path, "a", encoding="utf-8") - - -@dataclass -class OpenCodeProviderConfig: - """Configuration for the OpenCode provider client.""" - - opencode_bin: str = "opencode" - model: str = "sonnet" - cwd: str | Path = "." - max_turns: int = 10 - allowed_tools: list[str] = field(default_factory=lambda: list(DEFAULT_TOOLS)) - system_prompt: str | None = None - max_retries: int = 3 - initial_delay: float = 1.0 - max_delay: float = 30.0 - backoff_factor: float = 2.0 - permission_mode: str | None = None - max_budget_usd: float | None = None - env: dict[str, str] = field(default_factory=dict) - - -class OpenCodeProviderClient: - """Async client for invoking OpenCode via CLI with prompt-based structured output.""" - - def __init__(self, config: OpenCodeProviderConfig | None = None) -> None: - self.config = config or OpenCodeProviderConfig() - - async def run( - self, - prompt: str, - *, - model: str | None = None, - cwd: str | Path | None = None, - max_turns: int | None = None, - allowed_tools: list[str] | None = None, - system_prompt: str | None = None, - output_schema: Type[T] | None = None, - max_retries: int | None = None, - max_budget_usd: float | None = None, - permission_mode: str | None = None, - env: dict[str, str] | None = None, - log_file: str | Path | None = None, - ) -> AgentResponse[T]: - """Run a prompt through OpenCode ACP.""" - cfg = self.config - effective_model = model or cfg.model - effective_cwd = str(cwd or cfg.cwd) - effective_turns = max_turns or cfg.max_turns - effective_tools = allowed_tools if allowed_tools is not None else list(cfg.allowed_tools) - effective_retries = max_retries if max_retries is not None else cfg.max_retries - effective_env = {**cfg.env, **(env or {})} - effective_system = system_prompt or cfg.system_prompt - - output_path: str | None = None - final_prompt = prompt - if output_schema: - output_path = _schema_output_path(effective_cwd) - schema_json = json.dumps(output_schema.model_json_schema(), indent=2) - final_prompt = prompt + _build_schema_suffix(output_path, schema_json) - # Ensure Write and Read tools are available for structured output - for t in _SCHEMA_FILE_TOOLS: - if t not in effective_tools: - effective_tools.append(t) - - temp_files: list[str] = [] - if output_path: - temp_files.append(output_path) - - log_fh = _open_log(log_file) - try: - return await self._run_with_retries( - prompt=prompt, - final_prompt=final_prompt, - output_schema=output_schema, - output_path=output_path, - effective_cwd=effective_cwd, - effective_model=effective_model, - effective_turns=effective_turns, - effective_tools=effective_tools, - effective_system=effective_system, - effective_env=effective_env, - effective_retries=effective_retries, - temp_files=temp_files, - log_fh=log_fh, - ) - finally: - if log_fh: - log_fh.close() - _cleanup_files(temp_files) - - async def _run_with_retries( - self, - *, - prompt: str, - final_prompt: str, - output_schema: Type[T] | None, - output_path: str | None, - effective_cwd: str, - effective_model: str, - effective_turns: int, - effective_tools: list[str], - effective_system: str | None, - effective_env: dict[str, str], - effective_retries: int, - temp_files: list[str], - log_fh: IO[str] | None = None, - ) -> AgentResponse[T]: - """Execute with retry logic for transient errors.""" - cfg = self.config - delay = cfg.initial_delay - last_exc: Exception | None = None - - if log_fh: - _write_log( - log_fh, - "start", - prompt=prompt, - model=effective_model, - max_turns=effective_turns, - ) - - for attempt in range(effective_retries + 1): - try: - response = await self._execute( - prompt=final_prompt, - model=effective_model, - cwd=effective_cwd, - max_turns=effective_turns, - tools=effective_tools, - system_prompt=effective_system, - env=effective_env, - log_fh=log_fh, - ) - - # If no output schema, return as-is - if not output_schema or output_path is None: - if log_fh: - _write_log( - log_fh, - "end", - is_error=response.is_error, - num_turns=response.metrics.num_turns, - cost_usd=response.metrics.total_cost_usd, - ) - return response - - # Try to parse structured output from file - parsed = _read_and_parse_json_file(output_path, output_schema) - if parsed is not None: - resp = AgentResponse( - result=response.result, - parsed=parsed, - messages=response.messages, - metrics=response.metrics, - is_error=False, - ) - if log_fh: - _write_log( - log_fh, - "end", - is_error=False, - num_turns=response.metrics.num_turns, - cost_usd=response.metrics.total_cost_usd, - ) - return resp - - # Structured output parsing failed - if log_fh: - _write_log(log_fh, "end", is_error=True, reason="schema parse failed") - return AgentResponse( - result=response.result, - parsed=None, - messages=response.messages, - metrics=response.metrics, - is_error=True, - ) - - except Exception as e: - last_exc = e - # Check if error is transient and we have retries left - if attempt < effective_retries and _is_transient(str(e)): - if log_fh: - _write_log( - log_fh, - "retry", - attempt=attempt + 1, - error=str(e), - delay=delay, - ) - await asyncio.sleep(delay) - delay = min(delay * cfg.backoff_factor, cfg.max_delay) - # Generate new temp file path for retry - if output_schema: - output_path = _schema_output_path(effective_cwd) - temp_files.append(output_path) - schema_json = json.dumps(output_schema.model_json_schema(), indent=2) - final_prompt = prompt + _build_schema_suffix(output_path, schema_json) - continue - # Non-transient error or out of retries - if log_fh: - _write_log(log_fh, "end", is_error=True, error=str(e)) - raise - - raise last_exc # type: ignore[misc] - - async def _execute( - self, - *, - prompt: str, - model: str, - cwd: str, - max_turns: int, - tools: list[str], - system_prompt: str | None, - env: dict[str, str], - log_fh: IO[str] | None = None, - ) -> AgentResponse[Any]: - """Execute single OpenCode invocation via subprocess.""" - start_time = time.time() - - # Build command - OpenCode v1.2+ uses 'run' with -m flag for model selection - cmd = [ - self.config.opencode_bin, - "run", - "-m", - model, - prompt, - ] - - # Construct full environment (inherit + add user env) - full_env = { - **os.environ, - **env, - } - - # Execute OpenCode in headless mode - # CRITICAL: Set stdin=DEVNULL to prevent OpenCode from trying to open /dev/tty - proc = await asyncio.create_subprocess_exec( - *cmd, - stdin=asyncio.subprocess.DEVNULL, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=cwd, - env=full_env, - ) - - # Wait for completion - stdout_b, stderr_b = await proc.communicate() - duration_ms = int((time.time() - start_time) * 1000) - - stdout_text = stdout_b.decode("utf-8", errors="replace") - stderr_text = stderr_b.decode("utf-8", errors="replace") - - if proc.returncode != 0: - error_msg = f"opencode failed with exit code {proc.returncode}: {stderr_text}" - raise RuntimeError(error_msg) - - # Parse output - OpenCode writes response to stdout - final_text = stdout_text.strip() or None - - # Build metrics - metrics = Metrics( - duration_ms=duration_ms, - duration_api_ms=duration_ms, - num_turns=1, - total_cost_usd=None, - usage=None, - session_id="", - ) - - # Build messages - messages = [ - Message( - role="assistant", - content=[TextContent(text=final_text)] if final_text else [], - model=model, - error=None, - parent_tool_use_id=None, - ) - ] - - if log_fh: - _write_log( - log_fh, - "result", - num_turns=1, - duration_ms=duration_ms, - ) - - return AgentResponse( - result=final_text, - parsed=None, - messages=messages, - metrics=metrics, - is_error=False, - ) diff --git a/swe_af/agent_ai/types.py b/swe_af/agent_ai/types.py deleted file mode 100644 index e750483..0000000 --- a/swe_af/agent_ai/types.py +++ /dev/null @@ -1,136 +0,0 @@ -"""Shared typed schemas for provider-agnostic AI responses and configuration.""" - -from __future__ import annotations - -from dataclasses import dataclass -from enum import Enum -from typing import Any, Literal - - -class Tool(str, Enum): - """Available coding-agent tools.""" - - READ = "Read" - WRITE = "Write" - EDIT = "Edit" - BASH = "Bash" - GLOB = "Glob" - GREP = "Grep" - NOTEBOOK_EDIT = "NotebookEdit" - TASK = "Task" - WEB_FETCH = "WebFetch" - WEB_SEARCH = "WebSearch" - - -class Model(str, Enum): - """Common model aliases retained for compatibility.""" - - HAIKU = "haiku" - SONNET = "sonnet" - OPUS = "opus" - - -class ErrorKind(str, Enum): - """Error categories from agent backends.""" - - AUTH = "authentication_failed" - BILLING = "billing_error" - RATE_LIMIT = "rate_limit" - INVALID_REQUEST = "invalid_request" - SERVER = "server_error" - UNKNOWN = "unknown" - - -@dataclass(frozen=True, slots=True) -class TextContent: - """Text block from assistant.""" - - text: str - - -@dataclass(frozen=True, slots=True) -class ToolUseContent: - """Tool invocation by assistant.""" - - id: str - name: str - input: dict[str, Any] - - -@dataclass(frozen=True, slots=True) -class ToolResultContent: - """Result returned from a tool.""" - - tool_use_id: str - content: str | list[dict[str, Any]] | None = None - is_error: bool = False - - -@dataclass(frozen=True, slots=True) -class ThinkingContent: - """Extended thinking block.""" - - thinking: str - signature: str = "" - - -Content = TextContent | ToolUseContent | ToolResultContent | ThinkingContent - - -@dataclass(frozen=True, slots=True) -class Message: - """A single assistant message in a conversation.""" - - role: Literal["assistant"] - content: list[Content] - model: str - error: ErrorKind | None = None - parent_tool_use_id: str | None = None - - -@dataclass(frozen=True, slots=True) -class Metrics: - """Execution metrics.""" - - duration_ms: int - duration_api_ms: int - num_turns: int - total_cost_usd: float | None - usage: dict[str, Any] | None - session_id: str - - -@dataclass(frozen=True, slots=True) -class AgentResponse[T]: - """Typed response from an AI invocation.""" - - result: str | None - parsed: T | None - messages: list[Message] - metrics: Metrics - is_error: bool - - @property - def text(self) -> str: - """Last text content from the conversation, or result.""" - if self.result: - return self.result - for msg in reversed(self.messages): - for block in reversed(msg.content): - if isinstance(block, TextContent): - return block.text - return "" - - @property - def tool_uses(self) -> list[ToolUseContent]: - """All tool invocations across messages.""" - out: list[ToolUseContent] = [] - for msg in self.messages: - for block in msg.content: - if isinstance(block, ToolUseContent): - out.append(block) - return out - - -# Backward-compatible alias retained during migration. -ClaudeResponse = AgentResponse diff --git a/swe_af/execution/_replanner_compat.py b/swe_af/execution/_replanner_compat.py index a57206c..3718610 100644 --- a/swe_af/execution/_replanner_compat.py +++ b/swe_af/execution/_replanner_compat.py @@ -1,20 +1,20 @@ -"""Replanner agent — invokes AgentAI to restructure the DAG after failures.""" +"""Replanner agent — backward-compat direct invocation via router.harness().""" from __future__ import annotations import os from typing import Callable -from swe_af.agent_ai import AgentAI, AgentAIConfig -from swe_af.agent_ai.types import Tool from swe_af.execution.schemas import ( DAGState, + DEFAULT_AGENT_MAX_TURNS, ExecutionConfig, IssueResult, ReplanAction, ReplanDecision, ) from swe_af.prompts.replanner import SYSTEM_PROMPT, replanner_task_prompt +from swe_af.reasoners import router async def invoke_replanner( @@ -23,22 +23,7 @@ async def invoke_replanner( config: ExecutionConfig, note_fn: Callable | None = None, ) -> ReplanDecision: - """Call the replanner agent to decide how to handle unrecoverable failures. - - The replanner gets read-only codebase access and the full DAG context - (completed work, failures with error context, remaining issues, PRD, - architecture). It returns a structured ReplanDecision. - - Args: - dag_state: Current execution state with all context. - failed_issues: The unrecoverable failures that triggered replanning. - config: Execution configuration (model, etc.). - note_fn: Optional callback for observability notes. - - Returns: - ReplanDecision from the replanner agent. Falls back to ABORT if the - agent fails to produce valid output. - """ + """Call the replanner agent to decide how to handle unrecoverable failures.""" if note_fn: failed_names = [f.issue_name for f in failed_issues] note_fn( @@ -49,32 +34,42 @@ async def invoke_replanner( task_prompt = replanner_task_prompt(dag_state, failed_issues) - log_dir = os.path.join(dag_state.artifacts_dir, "logs") if dag_state.artifacts_dir else None - log_path = os.path.join(log_dir, f"replanner_{dag_state.replan_count}.jsonl") if log_dir else None - - ai = AgentAI(AgentAIConfig( - model=config.replan_model, - provider=config.ai_provider, - cwd=dag_state.repo_path or ".", - max_turns=15, - allowed_tools=[Tool.READ, Tool.GLOB, Tool.GREP, Tool.BASH], - )) + log_dir = ( + os.path.join(dag_state.artifacts_dir, "logs") + if dag_state.artifacts_dir + else None + ) + provider = "claude-code" if config.ai_provider == "claude" else config.ai_provider try: - response = await ai.run( - task_prompt, + result = await router.harness( + prompt=task_prompt, + schema=ReplanDecision, + provider=provider, + model=config.replan_model, + max_turns=DEFAULT_AGENT_MAX_TURNS, + tools=["Read", "Glob", "Grep", "Bash"], + permission_mode=None, system_prompt=SYSTEM_PROMPT, - output_schema=ReplanDecision, - log_file=log_path, + cwd=dag_state.repo_path or ".", ) - if response.parsed is not None: + # Log raw response for debugging + if log_dir: + raw_log = os.path.join( + log_dir, f"replanner_{dag_state.replan_count}_raw.txt" + ) + os.makedirs(log_dir, exist_ok=True) + with open(raw_log, "w") as f: + f.write(getattr(result, "text", "") or "(empty)") + + if result.parsed is not None: if note_fn: note_fn( - f"Replan decision: {response.parsed.action.value} — {response.parsed.summary}", + f"Replan decision: {result.parsed.action.value} — {result.parsed.summary}", tags=["execution", "replan", "complete"], ) - return response.parsed + return result.parsed except Exception as e: if note_fn: diff --git a/tests/fast/test_fast_init_executor_planner_verifier_routing.py b/tests/fast/test_fast_init_executor_planner_verifier_routing.py index 62aac7c..afb6c6d 100644 --- a/tests/fast/test_fast_init_executor_planner_verifier_routing.py +++ b/tests/fast/test_fast_init_executor_planner_verifier_routing.py @@ -81,7 +81,7 @@ def _run_subprocess( ) -> subprocess.CompletedProcess: """Run python -c in a fresh subprocess with clean env.""" env = os.environ.copy() - for key in (unset_keys or []): + for key in unset_keys or []: env.pop(key, None) env.setdefault("AGENTFIELD_SERVER", "http://localhost:9999") if extra_env: @@ -99,6 +99,7 @@ def _run_subprocess( def _patch_fast_router_note(): """Suppress fast_router.note() calls to avoid 'Router not attached' errors.""" import swe_af.fast as fast_pkg # noqa: PLC0415 + router = fast_pkg.fast_router old = router.__dict__.get("note", None) router.__dict__["note"] = MagicMock(return_value=None) @@ -122,6 +123,7 @@ class TestFastInitThinWrapperDelegation: def test_run_coder_wrapper_delegates_to_execution_agents(self) -> None: """run_coder wrapper in __init__ must call _ea.run_coder via lazy import.""" import swe_af.fast as fast_pkg # noqa: PLC0415 + src = inspect.getsource(fast_pkg) assert "_ea.run_coder" in src, ( @@ -132,6 +134,7 @@ def test_run_coder_wrapper_delegates_to_execution_agents(self) -> None: def test_run_verifier_wrapper_delegates_to_execution_agents(self) -> None: """run_verifier wrapper in __init__ must call _ea.run_verifier via lazy import.""" import swe_af.fast as fast_pkg # noqa: PLC0415 + src = inspect.getsource(fast_pkg) assert "_ea.run_verifier" in src, ( @@ -142,6 +145,7 @@ def test_run_verifier_wrapper_delegates_to_execution_agents(self) -> None: def test_run_git_init_wrapper_delegates_to_execution_agents(self) -> None: """run_git_init wrapper in __init__ must call _ea.run_git_init via lazy import.""" import swe_af.fast as fast_pkg # noqa: PLC0415 + src = inspect.getsource(fast_pkg) assert "_ea.run_git_init" in src, ( @@ -151,6 +155,7 @@ def test_run_git_init_wrapper_delegates_to_execution_agents(self) -> None: def test_run_repo_finalize_wrapper_delegates_to_execution_agents(self) -> None: """run_repo_finalize wrapper in __init__ must call _ea.run_repo_finalize.""" import swe_af.fast as fast_pkg # noqa: PLC0415 + src = inspect.getsource(fast_pkg) assert "_ea.run_repo_finalize" in src, ( @@ -160,6 +165,7 @@ def test_run_repo_finalize_wrapper_delegates_to_execution_agents(self) -> None: def test_run_github_pr_wrapper_delegates_to_execution_agents(self) -> None: """run_github_pr wrapper in __init__ must call _ea.run_github_pr via lazy import.""" import swe_af.fast as fast_pkg # noqa: PLC0415 + src = inspect.getsource(fast_pkg) assert "_ea.run_github_pr" in src, ( @@ -169,6 +175,7 @@ def test_run_github_pr_wrapper_delegates_to_execution_agents(self) -> None: def test_fast_init_source_has_lazy_imports_for_all_wrappers(self) -> None: """__init__ wrappers must all use lazy imports (inside function body).""" import swe_af.fast as fast_pkg # noqa: PLC0415 + src = inspect.getsource(fast_pkg) # All thin wrappers must use lazy import of execution_agents @@ -179,8 +186,7 @@ def test_fast_init_source_has_lazy_imports_for_all_wrappers(self) -> None: # The imports must be inside function bodies (indented) lines = src.splitlines() import_lines = [ - line for line in lines - if "import swe_af.reasoners.execution_agents" in line + line for line in lines if "import swe_af.reasoners.execution_agents" in line ] assert import_lines, "Must have execution_agents imports" for line in import_lines: @@ -197,19 +203,23 @@ def test_fast_init_does_not_call_pipeline_agents_in_code(self) -> None: """ import ast # noqa: PLC0415 import swe_af.fast as fast_pkg # noqa: PLC0415 + src = inspect.getsource(fast_pkg) # Parse AST to check actual imported names (not docstring text) tree = ast.parse(src) pipeline_agents = { - "run_architect", "run_tech_lead", "run_sprint_planner", - "run_product_manager", "run_issue_writer", + "run_architect", + "run_tech_lead", + "run_sprint_planner", + "run_product_manager", + "run_issue_writer", } # Verify: no direct import of pipeline agents at the module level for node in ast.walk(tree): if isinstance(node, ast.ImportFrom) and node.module: - for alias in (node.names or []): + for alias in node.names or []: name = alias.name or "" assert name not in pipeline_agents, ( f"__init__ must not import pipeline agent {name!r} " @@ -218,19 +228,24 @@ def test_fast_init_does_not_call_pipeline_agents_in_code(self) -> None: # Verify: fast_router tag is 'swe-fast' (not reusing a pipeline router) from swe_af.fast import fast_router # noqa: PLC0415 + tags = getattr(fast_router, "tags", None) or getattr(fast_router, "_tags", []) assert "swe-fast" in tags, ( f"fast_router tags must be 'swe-fast', got {tags!r}; " "this ensures it's not mistakenly using the pipeline router" ) + def test_all_five_thin_wrappers_registered_on_fast_router(self) -> None: """All five thin wrappers must be registered on fast_router at import time.""" import swe_af.fast as fast_pkg # noqa: PLC0415 names = {r["func"].__name__ for r in fast_pkg.fast_router.reasoners} expected_wrappers = { - "run_git_init", "run_coder", "run_verifier", - "run_repo_finalize", "run_github_pr", + "run_git_init", + "run_coder", + "run_verifier", + "run_repo_finalize", + "run_github_pr", } missing = expected_wrappers - names assert not missing, ( @@ -258,8 +273,7 @@ def test_pipeline_not_loaded_after_importing_fast_init(self) -> None: """ result = _run_subprocess(code, unset_keys=["NODE_ID"]) assert result.returncode == 0, ( - f"swe_af.fast import must NOT load pipeline.py; " - f"stderr={result.stderr!r}" + f"swe_af.fast import must NOT load pipeline.py; stderr={result.stderr!r}" ) assert "OK" in result.stdout @@ -289,8 +303,7 @@ def test_pipeline_not_loaded_after_importing_all_fast_submodules(self) -> None: """ result = _run_subprocess(code, unset_keys=["NODE_ID"]) assert result.returncode == 0, ( - f"No fast submodule should load pipeline; " - f"stderr={result.stderr!r}" + f"No fast submodule should load pipeline; stderr={result.stderr!r}" ) assert "OK" in result.stdout @@ -337,8 +350,7 @@ def test_executor_routes_to_swe_fast_run_coder_when_node_id_unset(self) -> None: """ result = _run_subprocess(code, unset_keys=["NODE_ID"]) assert result.returncode == 0, ( - f"executor must use 'swe-fast.run_coder' route; " - f"stderr={result.stderr!r}" + f"executor must use 'swe-fast.run_coder' route; stderr={result.stderr!r}" ) assert "OK" in result.stdout @@ -436,8 +448,10 @@ def test_fallback_prd_dict_forwarded_to_fast_verify_unchanged(self) -> None: called_prd: list = [] verify_response = { - "passed": True, "summary": "ok", - "criteria_results": [], "suggested_fixes": [], + "passed": True, + "summary": "ok", + "criteria_results": [], + "suggested_fixes": [], } mock_app = MagicMock() @@ -450,15 +464,17 @@ async def _capture_call(route: str, **kwargs: Any) -> Any: with patch.dict("sys.modules", {"swe_af.fast.app": mock_app}): from swe_af.fast.verifier import fast_verify # noqa: PLC0415 - _run_coro(fast_verify( - prd=fallback_prd, - repo_path="/tmp/repo", - task_results=[], - verifier_model="haiku", - permission_mode="", - ai_provider="claude", - artifacts_dir="", - )) + _run_coro( + fast_verify( + prd=fallback_prd, + repo_path="/tmp/repo", + task_results=[], + verifier_model="haiku", + permission_mode="", + ai_provider="claude", + artifacts_dir="", + ) + ) assert len(called_prd) == 1, "fast_verify must call app.call once" received_prd = called_prd[0] @@ -507,8 +523,14 @@ async def test_coder_complete_false_yields_failed_outcome(self) -> None: import swe_af.fast.executor as ex # noqa: PLC0415 result = await ex.fast_execute_tasks( - tasks=[{"name": "t1", "title": "T1", "description": "d", - "acceptance_criteria": ["ac"]}], + tasks=[ + { + "name": "t1", + "title": "T1", + "description": "d", + "acceptance_criteria": ["ac"], + } + ], repo_path="/tmp/repo", task_timeout_seconds=30, ) @@ -541,8 +563,14 @@ async def test_coder_complete_true_yields_completed_outcome(self) -> None: import swe_af.fast.executor as ex # noqa: PLC0415 result = await ex.fast_execute_tasks( - tasks=[{"name": "t1", "title": "T1", "description": "d", - "acceptance_criteria": ["ac"]}], + tasks=[ + { + "name": "t1", + "title": "T1", + "description": "d", + "acceptance_criteria": ["ac"], + } + ], repo_path="/tmp/repo", task_timeout_seconds=30, ) @@ -571,8 +599,14 @@ async def test_coder_missing_complete_field_defaults_to_failed(self) -> None: import swe_af.fast.executor as ex # noqa: PLC0415 result = await ex.fast_execute_tasks( - tasks=[{"name": "t1", "title": "T1", "description": "d", - "acceptance_criteria": ["ac"]}], + tasks=[ + { + "name": "t1", + "title": "T1", + "description": "d", + "acceptance_criteria": ["ac"], + } + ], repo_path="/tmp/repo", task_timeout_seconds=30, ) @@ -602,8 +636,14 @@ async def test_executor_files_changed_forwarded_from_coder(self) -> None: import swe_af.fast.executor as ex # noqa: PLC0415 result = await ex.fast_execute_tasks( - tasks=[{"name": "api-task", "title": "API Task", "description": "d", - "acceptance_criteria": ["ac"]}], + tasks=[ + { + "name": "api-task", + "title": "API Task", + "description": "d", + "acceptance_criteria": ["ac"], + } + ], repo_path="/tmp/repo", task_timeout_seconds=30, ) @@ -636,15 +676,17 @@ def test_partial_result_from_app_call_is_completed_by_schema(self) -> None: with patch.dict("sys.modules", {"swe_af.fast.app": mock_app}): from swe_af.fast.verifier import fast_verify # noqa: PLC0415 - result = _run_coro(fast_verify( - prd="goal", - repo_path="/tmp", - task_results=[], - verifier_model="haiku", - permission_mode="", - ai_provider="claude", - artifacts_dir="", - )) + result = _run_coro( + fast_verify( + prd="goal", + repo_path="/tmp", + task_results=[], + verifier_model="haiku", + permission_mode="", + ai_provider="claude", + artifacts_dir="", + ) + ) # FastVerificationResult defaults: criteria_results=[], suggested_fixes=[] assert "passed" in result and result["passed"] is True @@ -672,15 +714,17 @@ def test_extra_fields_from_app_call_are_ignored_gracefully(self) -> None: with patch.dict("sys.modules", {"swe_af.fast.app": mock_app}): from swe_af.fast.verifier import fast_verify # noqa: PLC0415 - result = _run_coro(fast_verify( - prd="goal", - repo_path="/tmp", - task_results=[], - verifier_model="haiku", - permission_mode="", - ai_provider="claude", - artifacts_dir="", - )) + result = _run_coro( + fast_verify( + prd="goal", + repo_path="/tmp", + task_results=[], + verifier_model="haiku", + permission_mode="", + ai_provider="claude", + artifacts_dir="", + ) + ) assert result["passed"] is False assert result["summary"] == "failed" @@ -692,7 +736,8 @@ def test_verifier_result_can_be_stored_in_fast_build_result(self) -> None: for verification, expected_passed in [ ( { - "passed": True, "summary": "All criteria met", + "passed": True, + "summary": "All criteria met", "criteria_results": [{"name": "ac-1", "passed": True}], "suggested_fixes": [], }, @@ -700,7 +745,8 @@ def test_verifier_result_can_be_stored_in_fast_build_result(self) -> None: ), ( { - "passed": False, "summary": "2 criteria failed", + "passed": False, + "summary": "2 criteria failed", "criteria_results": [], "suggested_fixes": ["fix A", "fix B"], }, @@ -709,7 +755,11 @@ def test_verifier_result_can_be_stored_in_fast_build_result(self) -> None: ]: build_result = FastBuildResult( plan_result={"tasks": []}, - execution_result={"completed_count": 1, "failed_count": 0, "task_results": []}, + execution_result={ + "completed_count": 1, + "failed_count": 0, + "task_results": [], + }, verification=verification, success=expected_passed, summary="test", @@ -848,9 +898,7 @@ def test_fast_verify_registered_on_fast_router(self) -> None: import swe_af.fast.verifier # noqa: F401, PLC0415 names = {r["func"].__name__ for r in fast_pkg.fast_router.reasoners} - assert "fast_verify" in names, ( - "fast_verify must be registered on fast_router" - ) + assert "fast_verify" in names, "fast_verify must be registered on fast_router" def test_no_pipeline_reasoners_on_fast_router(self) -> None: """fast_router must not contain any swe-planner pipeline planning agents.""" @@ -861,8 +909,11 @@ def test_no_pipeline_reasoners_on_fast_router(self) -> None: names = {r["func"].__name__ for r in fast_pkg.fast_router.reasoners} pipeline_forbidden = { - "run_architect", "run_tech_lead", "run_sprint_planner", - "run_product_manager", "run_issue_writer", + "run_architect", + "run_tech_lead", + "run_sprint_planner", + "run_product_manager", + "run_issue_writer", } leaked = pipeline_forbidden & names assert not leaked, ( @@ -896,25 +947,6 @@ def test_open_code_runtime_maps_to_opencode_provider(self) -> None: f"_runtime_to_provider('open_code') must return 'opencode', got {provider!r}" ) - def test_runtime_to_provider_aligns_with_agentai_config_provider_field(self) -> None: - """AgentAIConfig.provider must accept the values returned by _runtime_to_provider.""" - from swe_af.agent_ai import AgentAIConfig # noqa: PLC0415 - import swe_af.fast.app as fast_app # noqa: PLC0415 - - for runtime in ("claude_code", "open_code"): - provider = fast_app._runtime_to_provider(runtime) - try: - cfg = AgentAIConfig(provider=provider, model="haiku", cwd="/tmp") - assert cfg.provider == provider, ( - f"AgentAIConfig.provider must accept {provider!r} from " - f"_runtime_to_provider({runtime!r})" - ) - except Exception as exc: - pytest.fail( - f"AgentAIConfig rejected provider={provider!r} " - f"(from runtime={runtime!r}): {exc}" - ) - def test_build_source_uses_runtime_to_provider(self) -> None: """build() must call _runtime_to_provider to convert config.runtime to ai_provider.""" import swe_af.fast.app as fast_app # noqa: PLC0415 diff --git a/tests/test_agent_ai_provider.py b/tests/test_agent_ai_provider.py deleted file mode 100644 index 7d886d5..0000000 --- a/tests/test_agent_ai_provider.py +++ /dev/null @@ -1,22 +0,0 @@ -import unittest - -from swe_af.agent_ai.client import AgentAIConfig -from swe_af.agent_ai.factory import build_provider_client -from swe_af.execution.schemas import BuildConfig, ExecutionConfig - - -class AgentAIProviderTests(unittest.TestCase): - def test_config_defaults(self) -> None: - self.assertEqual(BuildConfig().runtime, "claude_code") - self.assertEqual(ExecutionConfig().runtime, "claude_code") - self.assertEqual(BuildConfig().ai_provider, "claude") - self.assertEqual(ExecutionConfig().ai_provider, "claude") - - def test_codex_provider_factory(self) -> None: - cfg = AgentAIConfig(provider="codex") - client = build_provider_client(cfg) - self.assertEqual(client.__class__.__name__, "CodexProviderClient") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_claude_provider_compat.py b/tests/test_claude_provider_compat.py deleted file mode 100644 index 733b5d9..0000000 --- a/tests/test_claude_provider_compat.py +++ /dev/null @@ -1,60 +0,0 @@ -import unittest -from unittest.mock import AsyncMock - -from claude_agent_sdk import ClaudeAgentOptions -from swe_af.agent_ai.providers.claude.client import ( - ClaudeProviderClient, - _build_sdk_protocol_error_message, - _is_sdk_protocol_error, - _is_transient, -) - - -class ClaudeProviderCompatTests(unittest.TestCase): - def test_rate_limit_event_is_protocol_error(self) -> None: - err = "Unknown message type: rate_limit_event" - self.assertTrue(_is_sdk_protocol_error(err)) - - def test_rate_limit_event_is_not_treated_as_transient(self) -> None: - err = "Unknown message type: rate_limit_event" - self.assertFalse(_is_transient(err)) - - def test_protocol_error_message_contains_sdk_guidance(self) -> None: - err = "Unknown message type: rate_limit_event" - msg = _build_sdk_protocol_error_message(err, sdk_version="0.1.39") - self.assertIn("version=0.1.39", msg) - self.assertIn("claude-agent-sdk==0.1.20", msg) - - def test_protocol_error_fails_fast_without_retries(self) -> None: - err = "Unknown message type: rate_limit_event" - client = ClaudeProviderClient() - client._execute = AsyncMock(side_effect=RuntimeError(err)) - options = ClaudeAgentOptions(model="sonnet", cwd=".", max_turns=1) - - async def _run() -> None: - await client._run_with_retries( - prompt="test", - final_prompt="test", - options=options, - output_schema=None, - output_path=None, - effective_cwd=".", - effective_model="sonnet", - effective_env={}, - effective_perm=None, - effective_retries=3, - temp_files=[], - stderr_lines=[], - ) - - with self.assertRaises(RuntimeError) as ctx: - import asyncio - - asyncio.run(_run()) - - self.assertIn("claude-agent-sdk==0.1.20", str(ctx.exception)) - self.assertEqual(client._execute.await_count, 1) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_codex_adapter.py b/tests/test_codex_adapter.py deleted file mode 100644 index 9b42c14..0000000 --- a/tests/test_codex_adapter.py +++ /dev/null @@ -1,71 +0,0 @@ -import unittest - -from swe_af.agent_ai.providers.codex.adapter import ( - build_codex_command, - normalize_schema_for_codex, - parse_codex_jsonl, -) - - -class CodexAdapterTests(unittest.TestCase): - def test_build_command_omits_model_for_claude_alias(self) -> None: - cmd = build_codex_command( - codex_bin="codex", - cwd=".", - prompt="hello", - model="sonnet", - output_schema_path=None, - output_last_message_path="/tmp/out.json", - ) - self.assertNotIn("-m", cmd) - - def test_build_command_includes_model_and_schema(self) -> None: - cmd = build_codex_command( - codex_bin="codex", - cwd=".", - prompt="hello", - model="gpt-5.3-codex", - output_schema_path="/tmp/schema.json", - output_last_message_path="/tmp/out.json", - ) - self.assertIn("-m", cmd) - self.assertIn("--output-schema", cmd) - self.assertEqual(cmd[0:3], ["codex", "exec", "--json"]) - - def test_parse_jsonl_extracts_last_message_and_usage(self) -> None: - stream = "\n".join( - [ - '{"type":"item.completed","item":{"type":"agent_message","text":"{\\"ok\\":true}"}}', - '{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":2}}', - ] - ) - final_text, usage, events = parse_codex_jsonl(stream) - self.assertEqual(final_text, '{"ok":true}') - self.assertEqual(usage["output_tokens"], 2) - self.assertEqual(len(events), 2) - - def test_normalize_schema_sets_additional_properties_false(self) -> None: - schema = { - "type": "object", - "properties": { - "item": { - "type": "object", - "properties": {"name": {"type": "string"}}, - } - }, - } - normalized = normalize_schema_for_codex(schema) - self.assertIs(normalized["additionalProperties"], False) - self.assertIs( - normalized["properties"]["item"]["additionalProperties"], - False, - ) - self.assertEqual(normalized["required"], ["item"]) - self.assertEqual( - normalized["properties"]["item"]["required"], - ["name"], - ) - - -if __name__ == "__main__": - unittest.main() From de28876028ee35be3990d99d4dbbe169e34df921 Mon Sep 17 00:00:00 2001 From: santoshkumarradha Date: Fri, 13 Mar 2026 08:49:24 +0530 Subject: [PATCH 7/9] chore: update git identity to eng@agentfield.ai and add .dockerignore --- .dockerignore | 41 +++++++++++++++++++++++++++++++++++++++++ Dockerfile | 6 +++--- 2 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4f17c42 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,41 @@ +.git +.venv +venv +__pycache__ +*.pyc +*.pyo +*.pyd +.pytest_cache +.mypy_cache +.ruff_cache +.coverage +htmlcov +.env +.env.* +!.env.example +.DS_Store +.idea +.vscode +*.swp +*.swo +*.bak + +examples/ +_worktrees/ +assets/ +docs/ +tests/ +swe_af.egg-info/ +.plandb.db + +docker-compose*.yml +Dockerfile +.dockerignore +.github/ +*.md +LICENSE +CODEOWNERS +CODE_OF_CONDUCT.md +SECURITY.md +Makefile +railway.toml diff --git a/Dockerfile b/Dockerfile index aa44397..dcd088b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,14 +25,14 @@ ENV PATH="/root/.opencode/bin:${PATH}" # Git identity — env vars take highest precedence and are inherited by all # subprocesses including Claude Code agent instances spawned by the SDK ENV GIT_AUTHOR_NAME="SWE-AF" \ - GIT_AUTHOR_EMAIL="contact@agentfield.com" \ + GIT_AUTHOR_EMAIL="eng@agentfield.ai" \ GIT_COMMITTER_NAME="SWE-AF" \ - GIT_COMMITTER_EMAIL="contact@agentfield.com" + GIT_COMMITTER_EMAIL="eng@agentfield.ai" # Configure git identity and use gh CLI as credential helper so all git # HTTPS operations (clone, push, fetch) authenticate via GH_TOKEN at runtime. RUN git config --global user.name "SWE-AF" && \ - git config --global user.email "contact@agentfield.com" && \ + git config --global user.email "eng@agentfield.ai" && \ gh auth setup-git --hostname github.com --force # Install uv for fast package installation From 59a8a42bc45836dbdb43021c6225a71b75351022 Mon Sep 17 00:00:00 2001 From: santoshkumarradha Date: Fri, 13 Mar 2026 09:10:27 +0530 Subject: [PATCH 8/9] chore: remove accidentally committed _worktrees/ and add to .gitignore --- .gitignore | 2 ++ _worktrees/issue-22-planning-agents | 1 - _worktrees/issue-23-coding-loop | 1 - _worktrees/issue-24-qa-synthesizer | 1 - _worktrees/issue-25-advisory-agents | 1 - _worktrees/issue-26-git-workflow | 1 - _worktrees/issue-27-verification-output | 1 - _worktrees/issue-28-fast-module | 1 - 8 files changed, 2 insertions(+), 7 deletions(-) delete mode 160000 _worktrees/issue-22-planning-agents delete mode 160000 _worktrees/issue-23-coding-loop delete mode 160000 _worktrees/issue-24-qa-synthesizer delete mode 160000 _worktrees/issue-25-advisory-agents delete mode 160000 _worktrees/issue-26-git-workflow delete mode 160000 _worktrees/issue-27-verification-output delete mode 160000 _worktrees/issue-28-fast-module diff --git a/.gitignore b/.gitignore index 1abb3c5..46bd037 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,8 @@ Thumbs.db # AgentField runtime worktrees .worktrees/ **/.worktrees/ +_worktrees/ +**/_worktrees/ # Root runtime artifacts for this node (example artifacts are versioned separately) /.artifacts/ diff --git a/_worktrees/issue-22-planning-agents b/_worktrees/issue-22-planning-agents deleted file mode 160000 index 751b7cf..0000000 --- a/_worktrees/issue-22-planning-agents +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 751b7cf293b93253702fb58b23728b42f460274b diff --git a/_worktrees/issue-23-coding-loop b/_worktrees/issue-23-coding-loop deleted file mode 160000 index 7048b65..0000000 --- a/_worktrees/issue-23-coding-loop +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7048b65b46adc2d12279b251676c983d47ab2785 diff --git a/_worktrees/issue-24-qa-synthesizer b/_worktrees/issue-24-qa-synthesizer deleted file mode 160000 index 1e19da0..0000000 --- a/_worktrees/issue-24-qa-synthesizer +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1e19da00d38c1046bd7ea7eeb706cf36593ba39d diff --git a/_worktrees/issue-25-advisory-agents b/_worktrees/issue-25-advisory-agents deleted file mode 160000 index 3088f66..0000000 --- a/_worktrees/issue-25-advisory-agents +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3088f669bbd4a9cb68581093ba9f4d0b762038b3 diff --git a/_worktrees/issue-26-git-workflow b/_worktrees/issue-26-git-workflow deleted file mode 160000 index 4369c9f..0000000 --- a/_worktrees/issue-26-git-workflow +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4369c9fafdedd0786e2f85df382077028286253b diff --git a/_worktrees/issue-27-verification-output b/_worktrees/issue-27-verification-output deleted file mode 160000 index d32dc02..0000000 --- a/_worktrees/issue-27-verification-output +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d32dc02e6053e27be84047cb7b435aaf81f23cf5 diff --git a/_worktrees/issue-28-fast-module b/_worktrees/issue-28-fast-module deleted file mode 160000 index 01e17b7..0000000 --- a/_worktrees/issue-28-fast-module +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 01e17b7a5b53d3fe8c5adce3246afd920002a93c From d3e39f4f81701899de36aa4d8b673419d4674bd9 Mon Sep 17 00:00:00 2001 From: Abir Abbas Date: Tue, 17 Mar 2026 13:59:55 -0400 Subject: [PATCH 9/9] fix: add Write tool to all schema-constrained harness calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The harness instructs subprocesses to write structured JSON output to .agentfield_output.json using the Write tool, but 15 of 21 agents had tools lists that didn't include "Write". This caused all schema-constrained agents to fail silently — the subprocess couldn't create the output file. Discovered during manual integration testing of the harness migration. Co-Authored-By: Claude Opus 4.6 (1M context) --- swe_af/execution/_replanner_compat.py | 2 +- swe_af/reasoners/execution_agents.py | 24 ++++++++++++------------ swe_af/reasoners/pipeline.py | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/swe_af/execution/_replanner_compat.py b/swe_af/execution/_replanner_compat.py index 3718610..14b9fdc 100644 --- a/swe_af/execution/_replanner_compat.py +++ b/swe_af/execution/_replanner_compat.py @@ -48,7 +48,7 @@ async def invoke_replanner( provider=provider, model=config.replan_model, max_turns=DEFAULT_AGENT_MAX_TURNS, - tools=["Read", "Glob", "Grep", "Bash"], + tools=["Read", "Write", "Glob", "Grep", "Bash"], permission_mode=None, system_prompt=SYSTEM_PROMPT, cwd=dag_state.repo_path or ".", diff --git a/swe_af/reasoners/execution_agents.py b/swe_af/reasoners/execution_agents.py index 783438c..c9f5430 100644 --- a/swe_af/reasoners/execution_agents.py +++ b/swe_af/reasoners/execution_agents.py @@ -158,7 +158,7 @@ async def run_retry_advisor( schema=RetryAdvice, model=model, provider=provider, - tools=["Read", "Glob", "Grep", "Bash"], + tools=["Read", "Write", "Glob", "Grep", "Bash"], cwd=repo_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -238,7 +238,7 @@ async def run_issue_advisor( schema=IssueAdvisorDecision, model=model, provider=provider, - tools=["Read", "Glob", "Grep", "Bash"], + tools=["Read", "Write", "Glob", "Grep", "Bash"], cwd=cwd, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -317,7 +317,7 @@ async def run_replanner( schema=ReplanDecision, model=replan_model, provider=provider, - tools=["Read", "Glob", "Grep", "Bash"], + tools=["Read", "Write", "Glob", "Grep", "Bash"], cwd=state.repo_path or ".", max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -492,7 +492,7 @@ async def run_verifier( schema=VerificationResult, model=model, provider=provider, - tools=["Read", "Glob", "Grep", "Bash"], + tools=["Read", "Write", "Glob", "Grep", "Bash"], cwd=repo_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -574,7 +574,7 @@ async def run_git_init( schema=GitInitResult, model=model, provider=provider, - tools=["Bash"], + tools=["Bash", "Write"], cwd=repo_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -647,7 +647,7 @@ class WorkspaceSetupResult(BaseModel): schema=WorkspaceSetupResult, model=model, provider=provider, - tools=["Bash"], + tools=["Bash", "Write"], cwd=repo_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -709,7 +709,7 @@ async def run_merger( schema=MergeResult, model=model, provider=provider, - tools=["Bash", "Read", "Glob", "Grep"], + tools=["Bash", "Read", "Write", "Glob", "Grep"], cwd=repo_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -848,7 +848,7 @@ class WorkspaceCleanupResult(BaseModel): schema=WorkspaceCleanupResult, model=model, provider=provider, - tools=["Bash"], + tools=["Bash", "Write"], cwd=repo_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -1073,7 +1073,7 @@ async def run_code_reviewer( schema=CodeReviewResult, model=model, provider=provider, - tools=["Read", "Glob", "Grep", "Bash"], + tools=["Read", "Write", "Glob", "Grep", "Bash"], cwd=worktree_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -1251,7 +1251,7 @@ class FixGeneratorOutput(BaseModel): schema=FixGeneratorOutput, model=model, provider=provider, - tools=["Read", "Glob", "Grep", "Bash"], + tools=["Read", "Write", "Glob", "Grep", "Bash"], cwd=repo_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -1315,7 +1315,7 @@ async def run_repo_finalize( schema=RepoFinalizeResult, model=model, provider=provider, - tools=["Bash", "Read", "Glob", "Grep"], + tools=["Bash", "Read", "Write", "Glob", "Grep"], cwd=repo_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, @@ -1386,7 +1386,7 @@ async def run_github_pr( schema=GitHubPRResult, model=model, provider=provider, - tools=["Bash"], + tools=["Bash", "Write"], cwd=repo_path, max_turns=DEFAULT_AGENT_MAX_TURNS, permission_mode=permission_mode or None, diff --git a/swe_af/reasoners/pipeline.py b/swe_af/reasoners/pipeline.py index d791668..902adac 100644 --- a/swe_af/reasoners/pipeline.py +++ b/swe_af/reasoners/pipeline.py @@ -197,7 +197,7 @@ async def run_product_manager( provider=provider, model=model, max_turns=max_turns, - tools=["Read", "Glob", "Grep", "Bash"], + tools=["Read", "Write", "Glob", "Grep", "Bash"], permission_mode=permission_mode or None, system_prompt=system_prompt, cwd=repo_path, @@ -310,7 +310,7 @@ async def run_tech_lead( provider=provider, model=model, max_turns=max_turns, - tools=["Read", "Glob", "Grep"], + tools=["Read", "Write", "Glob", "Grep"], permission_mode=permission_mode or None, system_prompt=system_prompt, cwd=repo_path, @@ -386,7 +386,7 @@ class SprintPlanOutput(BaseModel): provider=provider, model=model, max_turns=max_turns, - tools=["Read", "Glob", "Grep"], + tools=["Read", "Write", "Glob", "Grep"], permission_mode=permission_mode or None, system_prompt=system_prompt, cwd=repo_path,