@@ -1287,6 +1421,14 @@ def list_revisions(
const approvalsBody = document.getElementById("approvals-body");
const skillsBody = document.getElementById("skills-body");
const skillDetailPre = document.getElementById("skill-detail-pre");
+const auditTrailSummary = document.getElementById("audit-trail-summary");
+const auditTrailBody = document.getElementById("audit-trail-body");
+const auditStatusFilter = document.getElementById("audit-status-filter");
+const auditRoleFilter = document.getElementById("audit-role-filter");
+const auditDetailMeta = document.getElementById("audit-detail-meta");
+const auditInputPre = document.getElementById("audit-input-pre");
+const auditPatchPre = document.getElementById("audit-patch-pre");
+const auditErrorPre = document.getElementById("audit-error-pre");
const revisionsPre = document.getElementById("revisions-pre");
const tokenFromQuery = new URLSearchParams(window.location.search).get("token") || "";
let adminToken = localStorage.getItem("autoresearch_admin_token") || tokenFromQuery;
@@ -1296,6 +1438,7 @@ def list_revisions(
let channelFormMode = "create";
let editingChannelId = null;
let editingChannelStatus = "active";
+let selectedAuditEntryId = "";
function setAdminToken() {
const input = prompt("请输入 Bearer Token(不含 Bearer 前缀)", adminToken || "");
@@ -1337,6 +1480,20 @@ def list_revisions(
return new Date(value).toLocaleString("zh-CN");
}
+function fmtDuration(value) {
+ if (value === null || value === undefined) return "-";
+ if (value < 1000) return `${Math.round(value)} ms`;
+ return `${(value / 1000).toFixed(value >= 10000 ? 0 : 1)} s`;
+}
+
+function compactList(values, limit = 2) {
+ const items = (values || []).filter(Boolean);
+ if (!items.length) return "-";
+ const visible = items.slice(0, limit).join(", ");
+ const extra = items.length - limit;
+ return extra > 0 ? `${visible} +${extra}` : visible;
+}
+
function csvToList(raw) {
if (!raw) return [];
return raw
@@ -1568,6 +1725,99 @@ def list_revisions(
`;
}
+function clearAuditDetail() {
+ selectedAuditEntryId = "";
+ auditDetailMeta.textContent = "点击单条记录查看输入、patch diff 和失败原因。";
+ auditInputPre.textContent = "暂无";
+ auditPatchPre.textContent = "暂无";
+ auditErrorPre.textContent = "暂无";
+}
+
+function auditTrailRow(item) {
+ const finalStatus = item.final_status || item.status || "-";
+ const pillClass = ["failed", "blocked", "interrupted", "human_review", "stalled_no_progress"].includes(finalStatus) ? "inactive" : "active";
+ return `
+
+ | ${fmtDate(item.recorded_at)} |
+ ${item.agent_role} / ${item.source} |
+ ${item.run_id} |
+ ${finalStatus} |
+ ${fmtDuration(item.duration_ms)} |
+ ${item.files_changed || 0} |
+ ${compactList(item.scope_paths, 2)} |
+ ${compactList(item.changed_paths, 2)} |
+ |
+
`;
+}
+
+async function loadAuditDetail(entryId) {
+ selectedAuditEntryId = entryId;
+ auditDetailMeta.textContent = "加载详情中...";
+ try {
+ const detail = await callApi(`/api/v1/admin/audit-trail/${encodeURIComponent(entryId)}`);
+ const entry = detail.entry || {};
+ const detailLines = [
+ `Role: ${entry.agent_role || "-"}`,
+ `Source: ${entry.source || "-"}`,
+ `Run: ${entry.run_id || "-"}`,
+ `Title: ${entry.title || "-"}`,
+ `Status: ${entry.final_status || entry.status || "-"}`,
+ `Raw: ${entry.status || "-"}`,
+ `Recorded: ${fmtDate(entry.recorded_at)}`,
+ `First progress: ${fmtDuration(entry.first_progress_ms)}`,
+ `First write: ${fmtDuration(entry.first_scoped_write_ms)}`,
+ `First state: ${fmtDuration(entry.first_state_heartbeat_ms)}`,
+ `Patch: ${entry.patch_uri || "-"}`,
+ `Workspace: ${entry.isolated_workspace || "-"}`,
+ detail.patch_truncated ? "Patch preview: truncated" : "Patch preview: full",
+ ];
+ auditDetailMeta.textContent = detailLines.join(" | ");
+ auditInputPre.textContent = asJSON({
+ prompt: detail.input_prompt || null,
+ job_spec: detail.job_spec || {},
+ worker_spec: detail.worker_spec || {},
+ controlled_request: detail.controlled_request || {},
+ raw_record: detail.raw_record || {},
+ });
+ auditPatchPre.textContent = detail.patch_text || (entry.patch_uri ? `Patch file: ${entry.patch_uri}` : "暂无 patch");
+ auditErrorPre.textContent = detail.error_reason || detail.traceback
+ ? [detail.error_reason || "no error reason", detail.traceback || ""].filter(Boolean).join("\\n\\n")
+ : "无失败细节";
+ } catch (err) {
+ auditDetailMeta.textContent = `加载详情失败: ${err.message}`;
+ auditInputPre.textContent = "加载失败";
+ auditPatchPre.textContent = "加载失败";
+ auditErrorPre.textContent = String(err);
+ }
+}
+
+async function loadAuditTrail() {
+ try {
+ const params = new URLSearchParams();
+ params.set("limit", "20");
+ params.set("status_filter", auditStatusFilter.value || "all");
+ params.set("agent_role", auditRoleFilter.value || "all");
+ const snapshot = await callApi(`/api/v1/admin/audit-trail?${params.toString()}`);
+ const items = snapshot.items || [];
+ const stats = snapshot.stats || {};
+ auditTrailBody.innerHTML = items.map(auditTrailRow).join("")
+ || "
| 暂无 |
";
+ auditTrailSummary.textContent =
+ `Recent: ${items.length} | Success: ${stats.succeeded || 0} | Failed: ${stats.failed || 0} | Running: ${stats.running || 0} | Queued: ${stats.queued || 0} | Filter: ${(auditStatusFilter.value || "all")}/${(auditRoleFilter.value || "all")}`;
+ if (selectedAuditEntryId && items.some((item) => item.entry_id === selectedAuditEntryId)) {
+ await loadAuditDetail(selectedAuditEntryId);
+ } else if (!selectedAuditEntryId) {
+ clearAuditDetail();
+ } else {
+ clearAuditDetail();
+ auditDetailMeta.textContent = "当前筛选结果不包含已选记录。";
+ }
+ } catch (err) {
+ auditTrailSummary.textContent = `加载失败: ${err.message}`;
+ auditTrailBody.innerHTML = "
| 加载失败 |
";
+ }
+}
+
async function refreshAll() {
try {
const [agents, channels, capabilitySnapshot, approvals, skillSnapshot] = await Promise.all([
@@ -1585,12 +1835,16 @@ def list_revisions(
approvalsBody.innerHTML = approvals.map(approvalRow).join("") || "
| 暂无 |
";
skillsBody.innerHTML = skillItems.map(skillRow).join("") || "
| 暂无 |
";
summary.textContent = `Agents: ${agents.length} | Channels: ${channels.length} | Providers: ${(capabilitySnapshot.providers || []).length} | Approvals: ${approvals.length} | Skills: ${skillItems.length} | API: /api/v1/admin`;
+ await loadAuditTrail();
await loadRevisions();
} catch (err) {
summary.textContent = `加载失败: ${err.message}`;
capabilitiesBody.innerHTML = "
| 加载失败 |
";
approvalsBody.innerHTML = "
| 加载失败 |
";
skillsBody.innerHTML = "
| 加载失败 |
";
+ auditTrailSummary.textContent = `加载失败: ${err.message}`;
+ auditTrailBody.innerHTML = "
| 加载失败 |
";
+ clearAuditDetail();
}
}
diff --git a/src/autoresearch/api/routers/autoresearch_plans.py b/src/autoresearch/api/routers/autoresearch_plans.py
new file mode 100644
index 00000000..d3d7878e
--- /dev/null
+++ b/src/autoresearch/api/routers/autoresearch_plans.py
@@ -0,0 +1,258 @@
+from __future__ import annotations
+
+from urllib.parse import urlparse
+
+from fastapi import APIRouter, Depends, HTTPException, status
+
+from autoresearch.api.dependencies import (
+ get_autoresearch_planner_service,
+ get_housekeeper_service,
+ get_panel_access_service,
+ get_telegram_notifier_service,
+)
+from autoresearch.core.services.autoresearch_planner import AutoResearchPlannerService
+from autoresearch.core.services.housekeeper import HousekeeperService
+from autoresearch.core.services.panel_access import PanelAccessService
+from autoresearch.core.services.telegram_notify import TelegramNotifierService
+from autoresearch.shared.autoresearch_planner_contract import (
+ AutoResearchPlanRead,
+ AutoResearchPlannerRequest,
+ UpstreamWatchDecision,
+)
+
+
+router = APIRouter(prefix="/api/v1/autoresearch/plans", tags=["autoresearch-plans"])
+
+
+@router.post(
+ "",
+ response_model=AutoResearchPlanRead,
+ status_code=status.HTTP_202_ACCEPTED,
+)
+def create_autoresearch_plan(
+ payload: AutoResearchPlannerRequest,
+ service: AutoResearchPlannerService = Depends(get_autoresearch_planner_service),
+ housekeeper_service: HousekeeperService = Depends(get_housekeeper_service),
+ panel_access_service: PanelAccessService = Depends(get_panel_access_service),
+ notifier: TelegramNotifierService = Depends(get_telegram_notifier_service),
+) -> AutoResearchPlanRead:
+ prepared, _, _ = housekeeper_service.prepare_planner_request(payload, trigger_source="api")
+ telegram_uid = _select_plan_notification_uid(payload=prepared, panel_access_service=panel_access_service)
+ plan = service.create(prepared.model_copy(update={"telegram_uid": telegram_uid}))
+ panel_action_url = None
+ notification_sent = False
+
+ if plan.selected_candidate is not None:
+ panel_action_url = _build_plan_panel_action_url(
+ panel_access_service=panel_access_service,
+ plan_id=plan.plan_id,
+ telegram_uid=telegram_uid,
+ )
+ notification_sent = _send_plan_notification(
+ notifier=notifier,
+ panel_access_service=panel_access_service,
+ plan=plan,
+ panel_action_url=panel_action_url,
+ telegram_uid=telegram_uid,
+ )
+ notification_sent = _send_upstream_watch_notification(
+ notifier=notifier,
+ plan=plan,
+ telegram_uid=telegram_uid,
+ ) or notification_sent
+ if plan.selected_candidate is None and not notification_sent and telegram_uid == plan.telegram_uid:
+ return plan
+ return service.update_delivery(
+ plan.plan_id,
+ telegram_uid=telegram_uid,
+ panel_action_url=panel_action_url,
+ notification_sent=notification_sent,
+ )
+
+
+@router.get("", response_model=list[AutoResearchPlanRead])
+def list_autoresearch_plans(
+ service: AutoResearchPlannerService = Depends(get_autoresearch_planner_service),
+) -> list[AutoResearchPlanRead]:
+ return service.list()
+
+
+@router.get("/{plan_id}", response_model=AutoResearchPlanRead)
+def get_autoresearch_plan(
+ plan_id: str,
+ service: AutoResearchPlannerService = Depends(get_autoresearch_planner_service),
+) -> AutoResearchPlanRead:
+ plan = service.get(plan_id)
+ if plan is None:
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="AutoResearch plan not found")
+ return plan
+
+
+def _select_plan_notification_uid(
+ *,
+ payload: AutoResearchPlannerRequest,
+ panel_access_service: PanelAccessService,
+) -> str | None:
+ candidate = (payload.telegram_uid or "").strip()
+ allowed_uids = panel_access_service.allowed_uids
+ if candidate:
+ if allowed_uids and candidate not in allowed_uids:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="telegram uid is not allowed for panel access",
+ )
+ return candidate
+ if len(allowed_uids) == 1:
+ return allowed_uids[0]
+ return None
+
+
+def _build_plan_panel_action_url(
+ *,
+ panel_access_service: PanelAccessService,
+ plan_id: str,
+ telegram_uid: str | None,
+) -> str:
+ return panel_access_service.build_action_url(
+ query_params={"planId": plan_id},
+ telegram_uid=telegram_uid,
+ prefer_mini_app=True,
+ )
+
+
+def _build_plan_notification_markup(
+ *,
+ panel_access_service: PanelAccessService,
+ panel_action_url: str,
+) -> dict[str, object] | None:
+ markups = _build_plan_notification_markups(
+ panel_access_service=panel_access_service,
+ panel_action_url=panel_action_url,
+ )
+ return markups[0] if markups else None
+
+
+def _build_plan_notification_markups(
+ *,
+ panel_access_service: PanelAccessService,
+ panel_action_url: str,
+) -> list[dict[str, object] | None]:
+ parsed = urlparse(panel_action_url)
+ if parsed.scheme != "https":
+ return [None]
+ url_markup = {
+ "inline_keyboard": [
+ [
+ {
+ "text": "打开 Panel 审批",
+ "url": panel_action_url,
+ }
+ ]
+ ]
+ }
+ if panel_access_service.mini_app_url:
+ return [
+ {
+ "inline_keyboard": [
+ [
+ {
+ "text": "打开 Mini App 审批",
+ "web_app": {"url": panel_action_url},
+ }
+ ]
+ ]
+ },
+ url_markup,
+ None,
+ ]
+ return [url_markup, None]
+
+
+def _send_plan_notification(
+ *,
+ notifier: TelegramNotifierService,
+ panel_access_service: PanelAccessService,
+ plan: AutoResearchPlanRead,
+ panel_action_url: str,
+ telegram_uid: str | None,
+) -> bool:
+ if not notifier.enabled or not telegram_uid:
+ return False
+ for reply_markup in _build_plan_notification_markups(
+ panel_access_service=panel_access_service,
+ panel_action_url=panel_action_url,
+ ):
+ if notifier.send_message(
+ chat_id=telegram_uid,
+ text=_build_plan_notification_text(plan, delivery=_plan_notification_delivery(reply_markup)),
+ reply_markup=reply_markup,
+ ):
+ return True
+ return False
+
+
+def _plan_notification_delivery(reply_markup: dict[str, object] | None) -> str:
+ if reply_markup is None:
+ return "text"
+ try:
+ button = reply_markup["inline_keyboard"][0][0]
+ except (KeyError, IndexError, TypeError):
+ return "panel"
+ if isinstance(button, dict) and "web_app" in button:
+ return "mini_app"
+ return "panel"
+
+
+def _build_plan_notification_text(plan: AutoResearchPlanRead, *, delivery: str) -> str:
+ candidate = plan.selected_candidate
+ if candidate is None:
+ return "🔍 AutoResearch 完成扫描,但暂时没有生成可执行的规划单。"
+ estimated_changes = max(1, len(candidate.allowed_paths))
+ approval_hint = "请前往 Panel 审批执行。"
+ if delivery == "mini_app":
+ approval_hint = "请前往 Mini App 审批执行。"
+ return (
+ f"🔍 AutoResearch 发现新优化点: {candidate.title}\n"
+ f"- target: {candidate.source_path}\n"
+ f"- category: {candidate.category}\n"
+ f"- estimated_changes: {estimated_changes}\n"
+ f"{approval_hint}"
+ )
+
+
+def _send_upstream_watch_notification(
+ *,
+ notifier: TelegramNotifierService,
+ plan: AutoResearchPlanRead,
+ telegram_uid: str | None,
+) -> bool:
+ upstream_watch = plan.upstream_watch
+ if not notifier.enabled or not telegram_uid or upstream_watch is None:
+ return False
+ if upstream_watch.decision is not UpstreamWatchDecision.SKIP:
+ return False
+ return notifier.send_message(
+ chat_id=telegram_uid,
+ text=_build_upstream_watch_notification_text(plan),
+ )
+
+
+def _build_upstream_watch_notification_text(plan: AutoResearchPlanRead) -> str:
+ upstream_watch = plan.upstream_watch
+ if upstream_watch is None:
+ return "🛡️ 已完成上游巡检,当前没有需要同步的核心更新。"
+ focus_labels = [_format_upstream_focus_area(item) for item in upstream_watch.focus_areas if item != "repo-meta"]
+ focus_hint = "/".join(focus_labels[:3]) or "近期扩展"
+ return (
+ f"🛡️ 已完成上游巡检,最新变更({focus_hint} 修复)与核心基建无关,"
+ "已自动拦截跳过。"
+ )
+
+
+def _format_upstream_focus_area(focus_area: str) -> str:
+ if focus_area.startswith("extension:"):
+ name = focus_area.split(":", 1)[1]
+ if name.lower() == "line":
+ return "LINE"
+ return name.replace("-", " ").title()
+ return focus_area.replace("-", " ")
diff --git a/src/autoresearch/api/routers/gateway_telegram.py b/src/autoresearch/api/routers/gateway_telegram.py
index a94906fc..9591fdf5 100644
--- a/src/autoresearch/api/routers/gateway_telegram.py
+++ b/src/autoresearch/api/routers/gateway_telegram.py
@@ -13,6 +13,10 @@
get_approval_store_service,
get_capability_provider_registry,
get_claude_agent_service,
+ get_github_issue_service,
+ get_housekeeper_service,
+ get_manager_agent_service,
+ get_media_job_service,
get_openclaw_compat_service,
get_openclaw_memory_service,
get_panel_access_service,
@@ -23,7 +27,11 @@
from autoresearch.core.services.admin_config import AdminConfigService
from autoresearch.core.services.approval_store import ApprovalStoreService
from autoresearch.core.services.claude_agents import ClaudeAgentService
+from autoresearch.core.services.github_issue_service import GitHubIssueRead, GitHubIssueService
from autoresearch.core.services.group_access import GroupAccessManager
+from autoresearch.core.services.housekeeper import HousekeeperService
+from autoresearch.agents.manager_agent import ManagerAgentService
+from autoresearch.core.services.media_jobs import MediaJobService
from autoresearch.core.services.openclaw_compat import OpenClawCompatService
from autoresearch.core.services.openclaw_memory import OpenClawMemoryService
from autoresearch.core.services.panel_access import PanelAccessService
@@ -35,7 +43,10 @@
from autoresearch.shared.models import (
AdminChannelConfigCreateRequest,
AdminChannelConfigUpdateRequest,
+ ActorRole,
ApprovalDecisionRequest,
+ ApprovalRequestCreateRequest,
+ ApprovalRisk,
ApprovalStatus,
AssistantScope,
ClaudeAgentCreateRequest,
@@ -50,6 +61,8 @@
ChatType,
TelegramWebhookAck,
)
+from autoresearch.shared.manager_agent_contract import ManagerDispatchRead, ManagerDispatchRequest
+from autoresearch.shared.media_job_contract import MediaJobRead, MediaJobStatus
router = APIRouter(prefix="/api/v1/gateway/telegram", tags=["gateway", "telegram"])
@@ -80,6 +93,10 @@ def telegram_webhook(
memory_service: OpenClawMemoryService = Depends(get_openclaw_memory_service),
approval_service: ApprovalStoreService = Depends(get_approval_store_service),
agent_service: ClaudeAgentService = Depends(get_claude_agent_service),
+ housekeeper_service: HousekeeperService = Depends(get_housekeeper_service),
+ manager_service: ManagerAgentService = Depends(get_manager_agent_service),
+ media_job_service: MediaJobService = Depends(get_media_job_service),
+ github_issue_service: GitHubIssueService = Depends(get_github_issue_service),
capability_registry: CapabilityProviderRegistry = Depends(get_capability_provider_registry),
panel_access_service: PanelAccessService = Depends(get_panel_access_service),
notifier: TelegramNotifierService = Depends(get_telegram_notifier_service),
@@ -93,6 +110,10 @@ def telegram_webhook(
memory_service=memory_service,
approval_service=approval_service,
agent_service=agent_service,
+ housekeeper_service=housekeeper_service,
+ manager_service=manager_service,
+ media_job_service=media_job_service,
+ github_issue_service=github_issue_service,
capability_registry=capability_registry,
panel_access_service=panel_access_service,
notifier=notifier,
@@ -113,6 +134,10 @@ def legacy_telegram_webhook(
memory_service: OpenClawMemoryService = Depends(get_openclaw_memory_service),
approval_service: ApprovalStoreService = Depends(get_approval_store_service),
agent_service: ClaudeAgentService = Depends(get_claude_agent_service),
+ housekeeper_service: HousekeeperService = Depends(get_housekeeper_service),
+ manager_service: ManagerAgentService = Depends(get_manager_agent_service),
+ media_job_service: MediaJobService = Depends(get_media_job_service),
+ github_issue_service: GitHubIssueService = Depends(get_github_issue_service),
capability_registry: CapabilityProviderRegistry = Depends(get_capability_provider_registry),
panel_access_service: PanelAccessService = Depends(get_panel_access_service),
notifier: TelegramNotifierService = Depends(get_telegram_notifier_service),
@@ -126,6 +151,10 @@ def legacy_telegram_webhook(
memory_service=memory_service,
approval_service=approval_service,
agent_service=agent_service,
+ housekeeper_service=housekeeper_service,
+ manager_service=manager_service,
+ media_job_service=media_job_service,
+ github_issue_service=github_issue_service,
capability_registry=capability_registry,
panel_access_service=panel_access_service,
notifier=notifier,
@@ -142,6 +171,10 @@ def _handle_telegram_webhook(
memory_service: OpenClawMemoryService,
approval_service: ApprovalStoreService,
agent_service: ClaudeAgentService,
+ housekeeper_service: HousekeeperService,
+ manager_service: ManagerAgentService,
+ media_job_service: MediaJobService,
+ github_issue_service: GitHubIssueService,
capability_registry: CapabilityProviderRegistry,
panel_access_service: PanelAccessService,
notifier: TelegramNotifierService,
@@ -228,6 +261,22 @@ def _handle_telegram_webhook(
session_identity=session_identity,
)
+ if _is_task_command(text):
+ return _handle_task_command(
+ chat_id=chat_id,
+ update=update,
+ extracted=extracted,
+ background_tasks=background_tasks,
+ openclaw_service=openclaw_service,
+ approval_service=approval_service,
+ housekeeper_service=housekeeper_service,
+ manager_service=manager_service,
+ media_job_service=media_job_service,
+ github_issue_service=github_issue_service,
+ notifier=notifier,
+ session_identity=session_identity,
+ )
+
if _is_approve_command(text):
return _handle_approve_command(
chat_id=chat_id,
@@ -235,6 +284,7 @@ def _handle_telegram_webhook(
extracted=extracted,
background_tasks=background_tasks,
approval_service=approval_service,
+ github_issue_service=github_issue_service,
notifier=notifier,
session_identity=session_identity,
)
@@ -617,6 +667,241 @@ def _find_or_create_telegram_session(
)
+def _handle_task_command(
+ *,
+ chat_id: str,
+ update: dict[str, Any],
+ extracted: dict[str, Any],
+ background_tasks: BackgroundTasks,
+ openclaw_service: OpenClawCompatService,
+ approval_service: ApprovalStoreService,
+ housekeeper_service: HousekeeperService,
+ manager_service: ManagerAgentService,
+ media_job_service: MediaJobService,
+ github_issue_service: GitHubIssueService,
+ notifier: TelegramNotifierService,
+ session_identity: TelegramSessionIdentityRead,
+) -> TelegramWebhookAck:
+ task_query, approval_requested = _parse_task_command(extracted["text"])
+ if not task_query:
+ message_text = (
+ "用法:\n"
+ "/task <需求>\n"
+ "/task --approve <需求>\n"
+ "/task issue
[补充说明]"
+ )
+ if notifier.enabled:
+ background_tasks.add_task(notifier.send_message, chat_id=chat_id, text=message_text)
+ return TelegramWebhookAck(
+ accepted=False,
+ update_id=_safe_int(update.get("update_id")),
+ chat_id=chat_id,
+ reason="missing task payload",
+ metadata={"source": "telegram_manager_task", "scope": session_identity.scope.value},
+ )
+
+ session = _find_or_create_telegram_session(
+ openclaw_service=openclaw_service,
+ chat_id=chat_id,
+ session_identity=session_identity,
+ )
+ _append_user_event(
+ openclaw_service=openclaw_service,
+ session=session,
+ text=extracted["text"],
+ update=update,
+ extracted=extracted,
+ session_identity=session_identity,
+ )
+
+ issue: GitHubIssueRead | None = None
+ manager_prompt = task_query
+ task_source = "prompt"
+ operator_note = ""
+ issue_reference: str | None = None
+ issue_url: str | None = None
+ approval_granted = approval_requested and _can_telegram_task_self_approve(
+ session_identity=session_identity
+ )
+
+ if approval_requested and not approval_granted and notifier.enabled:
+ background_tasks.add_task(
+ notifier.send_message,
+ chat_id=chat_id,
+ text="`--approve` 仅对 owner/partner 生效;本次仍按常规审批流执行。",
+ )
+
+ media_request = media_job_service.parse_telegram_task(task_query)
+ if media_request is not None:
+ media_request = media_request.model_copy(
+ update={
+ "metadata": {
+ **media_request.metadata,
+ "source": "telegram_media_task",
+ "telegram_chat_id": chat_id,
+ "telegram_user_id": session_identity.actor.user_id,
+ "telegram_session_id": session.session_id,
+ "telegram_scope": session_identity.scope.value,
+ "raw_task_query": task_query,
+ }
+ }
+ )
+ media_job = media_job_service.create(media_request)
+ openclaw_service.append_event(
+ session_id=session.session_id,
+ request=OpenClawSessionEventAppendRequest(
+ role="status",
+ content=f"media job queued: {media_job.job_id}",
+ metadata={
+ "source": "telegram_media_task",
+ "job_id": media_job.job_id,
+ "mode": media_job.mode.value,
+ "target_bucket": media_job.target_bucket.value,
+ },
+ ),
+ )
+ openclaw_service.set_status(
+ session_id=session.session_id,
+ status=JobStatus.QUEUED,
+ metadata_updates={"latest_media_job_id": media_job.job_id},
+ )
+ if notifier.enabled:
+ background_tasks.add_task(
+ notifier.send_message,
+ chat_id=chat_id,
+ text=_build_media_job_queued_message(media_job),
+ )
+ background_tasks.add_task(
+ _execute_media_job_and_notify,
+ media_job_service=media_job_service,
+ housekeeper_service=housekeeper_service,
+ openclaw_service=openclaw_service,
+ notifier=notifier,
+ chat_id=chat_id,
+ session_id=session.session_id,
+ job_id=media_job.job_id,
+ )
+ return TelegramWebhookAck(
+ accepted=True,
+ update_id=_safe_int(update.get("update_id")),
+ chat_id=chat_id,
+ session_id=session.session_id,
+ metadata={
+ "source": "telegram_media_task",
+ "job_id": media_job.job_id,
+ "mode": media_job.mode.value,
+ "target_bucket": media_job.target_bucket.value,
+ "scope": session_identity.scope.value,
+ },
+ )
+
+ if task_query.casefold().startswith("issue "):
+ issue_reference, operator_note = _extract_issue_task_parts(task_query)
+ try:
+ issue = github_issue_service.fetch_issue(issue_reference)
+ except Exception as exc:
+ return TelegramWebhookAck(
+ accepted=False,
+ update_id=_safe_int(update.get("update_id")),
+ chat_id=chat_id,
+ session_id=session.session_id,
+ reason=str(exc),
+ metadata={
+ "source": "telegram_manager_task",
+ "task_source": "issue",
+ "scope": session_identity.scope.value,
+ },
+ )
+ manager_prompt = github_issue_service.build_manager_prompt(issue, operator_note=operator_note or None)
+ task_source = "issue"
+ issue_reference = issue.reference.display
+ issue_url = issue.url
+
+ dispatch_request, _, _ = housekeeper_service.prepare_manager_request(
+ ManagerDispatchRequest(
+ prompt=manager_prompt,
+ approval_granted=approval_granted,
+ auto_dispatch=True,
+ metadata={
+ "source": "telegram_manager_task",
+ "task_source": task_source,
+ "telegram_chat_id": chat_id,
+ "telegram_user_id": session_identity.actor.user_id,
+ "telegram_session_id": session.session_id,
+ "telegram_scope": session_identity.scope.value,
+ "raw_task_query": task_query,
+ "operator_note": operator_note,
+ "github_issue_reference": issue_reference,
+ "github_issue_url": issue_url,
+ "github_issue_title": issue.title if issue is not None else None,
+ "approval_requested": approval_requested,
+ "approval_granted": approval_granted,
+ "approval_source": "telegram_task_flag" if approval_granted else None,
+ },
+ ),
+ manager_service=manager_service,
+ trigger_source="telegram",
+ )
+ dispatch = manager_service.create_dispatch(dispatch_request)
+ openclaw_service.append_event(
+ session_id=session.session_id,
+ request=OpenClawSessionEventAppendRequest(
+ role="status",
+ content=f"manager dispatch queued: {dispatch.dispatch_id}",
+ metadata={
+ "source": "telegram_manager_task",
+ "dispatch_id": dispatch.dispatch_id,
+ "task_source": task_source,
+ "issue_reference": issue_reference,
+ },
+ ),
+ )
+ openclaw_service.set_status(
+ session_id=session.session_id,
+ status=JobStatus.QUEUED if dispatch_request.auto_dispatch else JobStatus.CREATED,
+ metadata_updates={"latest_manager_dispatch_id": dispatch.dispatch_id},
+ )
+
+ if notifier.enabled:
+ background_tasks.add_task(
+ notifier.send_message,
+ chat_id=chat_id,
+ text=_build_manager_dispatch_queued_message(dispatch, issue_reference=issue_reference),
+ )
+
+ if dispatch_request.auto_dispatch:
+ background_tasks.add_task(
+ _execute_manager_dispatch_and_notify,
+ manager_service=manager_service,
+ approval_service=approval_service,
+ openclaw_service=openclaw_service,
+ notifier=notifier,
+ chat_id=chat_id,
+ session_id=session.session_id,
+ approval_uid=session_identity.actor.user_id or chat_id,
+ assistant_scope=session_identity.scope,
+ dispatch_id=dispatch.dispatch_id,
+ issue_reference=issue_reference,
+ issue_url=issue_url,
+ issue_title=issue.title if issue is not None else None,
+ )
+
+ return TelegramWebhookAck(
+ accepted=True,
+ update_id=_safe_int(update.get("update_id")),
+ chat_id=chat_id,
+ session_id=session.session_id,
+ metadata={
+ "source": "telegram_manager_task",
+ "dispatch_id": dispatch.dispatch_id,
+ "task_source": task_source,
+ "issue_reference": issue_reference,
+ "issue_url": issue_url,
+ "scope": session_identity.scope.value,
+ },
+ )
+
+
def _handle_memory_command(
*,
chat_id: str,
@@ -979,6 +1264,7 @@ def _handle_approve_command(
extracted: dict[str, Any],
background_tasks: BackgroundTasks,
approval_service: ApprovalStoreService,
+ github_issue_service: GitHubIssueService,
notifier: TelegramNotifierService,
session_identity: TelegramSessionIdentityRead,
) -> TelegramWebhookAck:
@@ -1009,9 +1295,35 @@ def _handle_approve_command(
message_text = _build_approval_decision_message(approval)
message_source = "telegram_approve_decision"
approval_query = approval.approval_id
+ if decision == "approved" and approval.metadata.get("action_type") == "github_issue_comment":
+ comment_output = _post_github_issue_comment_for_approval(
+ approval=approval,
+ approval_service=approval_service,
+ github_issue_service=github_issue_service,
+ chat_id=chat_id,
+ scope=session_identity.scope.value,
+ )
+ message_text = "\n\n".join(
+ [
+ message_text,
+ _build_github_issue_comment_posted_message(
+ approval_id=approval.approval_id,
+ issue_reference=str(approval.metadata.get("issue_reference") or "unknown"),
+ output=comment_output or None,
+ ),
+ ]
+ ).strip()
except ValueError as exc:
message_text = str(exc)
message_source = "telegram_approve_decision"
+ except RuntimeError as exc:
+ message_text = "\n\n".join(
+ [
+ _build_approval_decision_message(approval),
+ f"[GitHub Reply Failed]\n{str(exc).strip()}",
+ ]
+ ).strip()
+ message_source = "telegram_approve_decision"
elif approval_id:
approval = approval_service.get_request(approval_id)
if approval is None or approval.telegram_uid != approval_uid:
@@ -1159,6 +1471,345 @@ def _build_agent_result_message(run: ClaudeAgentRunRead) -> str:
return text
+def _execute_manager_dispatch_and_notify(
+ *,
+ manager_service: ManagerAgentService,
+ approval_service: ApprovalStoreService,
+ openclaw_service: OpenClawCompatService,
+ notifier: TelegramNotifierService,
+ chat_id: str,
+ session_id: str,
+ approval_uid: str,
+ assistant_scope: AssistantScope,
+ dispatch_id: str,
+ issue_reference: str | None,
+ issue_url: str | None,
+ issue_title: str | None,
+) -> None:
+ dispatch: ManagerDispatchRead | None = None
+ try:
+ dispatch = manager_service.execute_dispatch(dispatch_id)
+ except Exception as exc:
+ dispatch = manager_service.get_dispatch(dispatch_id)
+ error_text = str(exc).strip() or "manager dispatch failed"
+ if dispatch is None:
+ if notifier.enabled:
+ notifier.send_message(
+ chat_id=chat_id,
+ text=_truncate_telegram_text(
+ "\n".join(
+ [
+ "[Manager Task]",
+ f"dispatch: {dispatch_id}",
+ "status: failed",
+ "",
+ error_text,
+ ]
+ )
+ ),
+ )
+ return
+ dispatch = dispatch.model_copy(update={"error": dispatch.error or error_text})
+
+ final_status = dispatch.status
+ openclaw_service.append_event(
+ session_id=session_id,
+ request=OpenClawSessionEventAppendRequest(
+ role="status",
+ content=f"manager dispatch finished: {dispatch.dispatch_id}",
+ metadata={
+ "source": "telegram_manager_task",
+ "dispatch_id": dispatch.dispatch_id,
+ "status": final_status.value,
+ "issue_reference": issue_reference,
+ },
+ ),
+ )
+ openclaw_service.set_status(
+ session_id=session_id,
+ status=JobStatus.COMPLETED if final_status == JobStatus.COMPLETED else JobStatus.FAILED,
+ metadata_updates={"latest_manager_dispatch_status": final_status.value},
+ )
+
+ if notifier.enabled:
+ notifier.send_message(
+ chat_id=chat_id,
+ text=_build_manager_dispatch_result_message(
+ dispatch,
+ issue_reference=issue_reference,
+ issue_url=issue_url,
+ ),
+ )
+
+ if not issue_reference:
+ return
+
+ approval = approval_service.create_request(
+ ApprovalRequestCreateRequest(
+ title=f"Reply to GitHub issue {issue_reference}",
+ summary=f"Review and post the automated execution update for {issue_reference}.",
+ risk=ApprovalRisk.EXTERNAL,
+ source="github_issue_task",
+ telegram_uid=approval_uid,
+ session_id=session_id,
+ assistant_scope=assistant_scope,
+ metadata={
+ "action_type": "github_issue_comment",
+ "issue_reference": issue_reference,
+ "issue_url": issue_url,
+ "issue_title": issue_title,
+ "dispatch_id": dispatch.dispatch_id,
+ "comment_body": _build_github_issue_comment_body(
+ dispatch,
+ issue_reference=issue_reference,
+ issue_url=issue_url,
+ ),
+ },
+ )
+ )
+ openclaw_service.append_event(
+ session_id=session_id,
+ request=OpenClawSessionEventAppendRequest(
+ role="status",
+ content=f"github issue reply approval queued: {approval.approval_id}",
+ metadata={
+ "source": "telegram_manager_task",
+ "approval_id": approval.approval_id,
+ "dispatch_id": dispatch.dispatch_id,
+ "issue_reference": issue_reference,
+ },
+ ),
+ )
+ if notifier.enabled:
+ notifier.send_message(
+ chat_id=chat_id,
+ text=_build_github_issue_reply_approval_message(
+ approval_id=approval.approval_id,
+ issue_reference=issue_reference,
+ issue_url=issue_url,
+ ),
+ )
+
+
+def _execute_media_job_and_notify(
+ *,
+ media_job_service: MediaJobService,
+ housekeeper_service: HousekeeperService,
+ openclaw_service: OpenClawCompatService,
+ notifier: TelegramNotifierService,
+ chat_id: str,
+ session_id: str,
+ job_id: str,
+) -> None:
+ job = media_job_service.execute(job_id)
+ housekeeper_service.record_media_job_outcome(
+ job=job,
+ notifier=notifier,
+ media_jobs=media_job_service.list(),
+ )
+ openclaw_service.append_event(
+ session_id=session_id,
+ request=OpenClawSessionEventAppendRequest(
+ role="status",
+ content=f"media job finished: {job.job_id}",
+ metadata={
+ "source": "telegram_media_task",
+ "job_id": job.job_id,
+ "status": job.status.value,
+ },
+ ),
+ )
+ openclaw_service.set_status(
+ session_id=session_id,
+ status=JobStatus.COMPLETED if job.status is MediaJobStatus.COMPLETED else JobStatus.FAILED,
+ metadata_updates={"latest_media_job_status": job.status.value},
+ )
+ if notifier.enabled:
+ notifier.send_message(chat_id=chat_id, text=_build_media_job_result_message(job))
+
+
+def _build_manager_dispatch_queued_message(
+ dispatch: ManagerDispatchRead,
+ *,
+ issue_reference: str | None,
+) -> str:
+ task_count = len(dispatch.execution_plan.tasks) if dispatch.execution_plan is not None else 0
+ lines = [
+ "[Manager Task]",
+ f"dispatch: {dispatch.dispatch_id}",
+ f"strategy: {dispatch.execution_plan.strategy.value if dispatch.execution_plan is not None else 'single_task'}",
+ f"tasks: {task_count}",
+ ]
+ if issue_reference:
+ lines.append(f"issue: {issue_reference}")
+ deferred_reason = str(dispatch.metadata.get("deferred_reason") or "").strip()
+ if deferred_reason:
+ lines.append(f"deferred: {deferred_reason}")
+ lines.append("已接收,当前不自动执行,等待夜间窗口或人工放行。")
+ else:
+ lines.append("已接收,开始拆解并执行。")
+ return _truncate_telegram_text("\n".join(lines))
+
+
+def _build_media_job_queued_message(job: MediaJobRead) -> str:
+ return _truncate_telegram_text(
+ "\n".join(
+ [
+ "[Media Job]",
+ f"job: {job.job_id}",
+ f"status: {job.status.value}",
+ f"target_bucket: {job.target_bucket.value}",
+ f"mode: {job.mode.value}",
+ ]
+ )
+ )
+
+
+def _build_media_job_result_message(job: MediaJobRead) -> str:
+ lines = [
+ "[Media Job]",
+ f"job: {job.job_id}",
+ f"status: {job.status.value}",
+ f"target_bucket: {job.target_bucket.value}",
+ f"mode: {job.mode.value}",
+ ]
+ if job.output_files:
+ lines.extend(["", "output_files:"])
+ lines.extend(f"- {path}" for path in job.output_files[:8])
+ if job.error:
+ lines.extend(["", "error:", job.error.strip()])
+ return _truncate_telegram_text("\n".join(lines))
+
+
+def _build_manager_dispatch_result_message(
+ dispatch: ManagerDispatchRead,
+ *,
+ issue_reference: str | None,
+ issue_url: str | None,
+) -> str:
+ task_count = len(dispatch.execution_plan.tasks) if dispatch.execution_plan is not None else 0
+ completed_count = (
+ sum(1 for item in dispatch.execution_plan.tasks if item.status == JobStatus.COMPLETED)
+ if dispatch.execution_plan is not None
+ else 0
+ )
+ lines = [
+ "[Manager Task]",
+ f"dispatch: {dispatch.dispatch_id}",
+ f"status: {dispatch.status.value}",
+ f"tasks: {completed_count}/{task_count}",
+ ]
+ if issue_reference:
+ lines.append(f"issue: {issue_reference}")
+ if issue_url:
+ lines.append(f"url: {issue_url}")
+ if dispatch.summary:
+ lines.extend(["", dispatch.summary])
+
+ promotion = dispatch.run_summary.promotion if dispatch.run_summary is not None else None
+ if promotion is not None and promotion.pr_url:
+ lines.append(f"draft_pr: {promotion.pr_url}")
+ elif dispatch.run_summary is not None and dispatch.run_summary.promotion_patch_uri:
+ lines.append(f"patch: {dispatch.run_summary.promotion_patch_uri}")
+
+ error_text = (
+ dispatch.error
+ or (
+ dispatch.run_summary.driver_result.error
+ if dispatch.run_summary is not None and dispatch.run_summary.driver_result.error
+ else None
+ )
+ )
+ if error_text:
+ lines.extend(["", "error:", error_text.strip()])
+ return _truncate_telegram_text("\n".join(lines))
+
+
+def _build_github_issue_comment_body(
+ dispatch: ManagerDispatchRead,
+ *,
+ issue_reference: str,
+ issue_url: str | None,
+) -> str:
+ lines = [
+ "Automated progress update from the local autonomous agent stack.",
+ "",
+ f"- Issue: {issue_reference}",
+ f"- Dispatch: {dispatch.dispatch_id}",
+ f"- Status: {dispatch.status.value}",
+ ]
+ if issue_url:
+ lines.append(f"- Issue URL: {issue_url}")
+ if dispatch.summary:
+ lines.append(f"- Summary: {dispatch.summary}")
+ promotion = dispatch.run_summary.promotion if dispatch.run_summary is not None else None
+ if promotion is not None and promotion.pr_url:
+ lines.append(f"- Draft PR: {promotion.pr_url}")
+ error_text = (
+ dispatch.error
+ or (
+ dispatch.run_summary.driver_result.error
+ if dispatch.run_summary is not None and dispatch.run_summary.driver_result.error
+ else None
+ )
+ )
+ if error_text:
+ lines.append(f"- Error: {error_text.strip()}")
+ lines.extend(
+ [
+ "",
+ "This update was prepared automatically from Telegram `/task issue` and still expects human review before merge.",
+ ]
+ )
+ return "\n".join(lines).strip()
+
+
+def _build_github_issue_reply_approval_message(
+ *,
+ approval_id: str,
+ issue_reference: str,
+ issue_url: str | None,
+) -> str:
+ lines = [
+ "[GitHub Reply Pending]",
+ f"approval: {approval_id}",
+ f"issue: {issue_reference}",
+ ]
+ if issue_url:
+ lines.append(f"url: {issue_url}")
+ lines.extend(
+ [
+ "",
+ f"/approve {approval_id} approve 发布执行结果到 GitHub issue",
+ f"/approve {approval_id} reject 保留结果,仅在 Telegram 查看",
+ ]
+ )
+ return _truncate_telegram_text("\n".join(lines))
+
+
+def _build_github_issue_comment_posted_message(
+ *,
+ approval_id: str,
+ issue_reference: str,
+ output: str | None,
+) -> str:
+ lines = [
+ "[GitHub Reply Posted]",
+ f"approval: {approval_id}",
+ f"issue: {issue_reference}",
+ ]
+ if output:
+ lines.extend(["", output.strip()])
+ return _truncate_telegram_text("\n".join(lines))
+
+
+def _truncate_telegram_text(text: str) -> str:
+ normalized = text.strip()
+ if len(normalized) > 3900:
+ return normalized[:3900] + "\n...[truncated]"
+ return normalized
+
+
def _handle_status_query(
*,
chat_id: str,
@@ -1288,7 +1939,12 @@ def _is_status_query(text: str) -> bool:
def _is_help_command(text: str) -> bool:
normalized = text.strip().lower()
- return normalized in {"/help", "help", "帮助"}
+ return normalized in {"/help", "/start", "help", "帮助"}
+
+
+def _is_task_command(text: str) -> bool:
+ normalized = text.strip().lower()
+ return normalized == "/task" or normalized.startswith("/task ")
def _is_approve_command(text: str) -> bool:
@@ -1366,6 +2022,43 @@ def _extract_approve_query(text: str) -> str:
return ""
+def _extract_task_query(text: str) -> str:
+ return _parse_task_command(text)[0]
+
+
+def _parse_task_command(text: str) -> tuple[str, bool]:
+ normalized = text.strip()
+ lowered = normalized.lower()
+ if lowered == "/task":
+ return "", False
+ if lowered.startswith("/task "):
+ payload = normalized.split(" ", 1)[1].strip()
+ approval_requested = False
+ if payload.startswith("--approve"):
+ approval_requested = True
+ payload = payload[len("--approve") :].strip()
+ return payload, approval_requested
+ return "", False
+
+
+def _can_telegram_task_self_approve(
+ *,
+ session_identity: TelegramSessionIdentityRead,
+) -> bool:
+ return session_identity.actor.role in {ActorRole.OWNER, ActorRole.PARTNER}
+
+
+def _extract_issue_task_parts(task_query: str) -> tuple[str, str]:
+ normalized = task_query.strip()
+ if not normalized.casefold().startswith("issue "):
+ raise ValueError("issue task must start with `issue `")
+ remainder = normalized[6:].strip()
+ if not remainder:
+ raise ValueError("missing GitHub issue reference")
+ issue_reference, _, operator_note = remainder.partition(" ")
+ return issue_reference.strip(), operator_note.strip()
+
+
def _parse_approve_query(query: str) -> tuple[str, str | None, str]:
normalized = query.strip()
if not normalized:
@@ -1570,7 +2263,11 @@ def _build_help_message(*, session_identity: TelegramSessionIdentityRead) -> str
chat_type = session_identity.chat_context.chat_type
lines = [
"[Telegram Commands]",
+ "/start 查看欢迎信息和命令列表",
"/status 查看当前会话、任务和能力摘要",
+ "/task <需求> 走 Manager Agent DAG 执行任务",
+ "/task --approve <需求> owner/partner 直通 Draft PR 审批上下文",
+ "/task issue [补充说明] 读取 GitHub issue 后派发修复",
"/approve 查看待审批列表",
"/approve 查看待审批详情",
"/approve approve [备注] 批准待审批事项",
@@ -1595,6 +2292,32 @@ def _build_help_message(*, session_identity: TelegramSessionIdentityRead) -> str
return "\n".join(lines)
+def _post_github_issue_comment_for_approval(
+ *,
+ approval: Any,
+ approval_service: ApprovalStoreService,
+ github_issue_service: GitHubIssueService,
+ chat_id: str,
+ scope: str,
+) -> str:
+ issue_reference = str(approval.metadata.get("issue_reference") or "").strip()
+ comment_body = str(approval.metadata.get("comment_body") or "").strip()
+ if not issue_reference or not comment_body:
+ raise RuntimeError("approval is missing GitHub issue comment payload")
+ output = github_issue_service.post_comment(issue_reference, comment_body)
+ approval_service.update_request_metadata(
+ approval.approval_id,
+ {
+ "comment_posted": True,
+ "comment_posted_at": _utc_now(),
+ "comment_post_result": output,
+ "resolved_via_chat_id": chat_id,
+ "resolved_scope": scope,
+ },
+ )
+ return output
+
+
def _build_approval_list_message(approvals: list[Any]) -> str:
if not approvals:
return "当前没有待审批事项。"
diff --git a/src/autoresearch/api/routers/housekeeper.py b/src/autoresearch/api/routers/housekeeper.py
new file mode 100644
index 00000000..b3973567
--- /dev/null
+++ b/src/autoresearch/api/routers/housekeeper.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+from fastapi import APIRouter, Depends, status
+
+from autoresearch.agents.manager_agent import ManagerAgentService
+from autoresearch.api.dependencies import (
+ get_approval_store_service,
+ get_autoresearch_planner_service,
+ get_housekeeper_service,
+ get_manager_agent_service,
+ get_media_job_service,
+ get_telegram_notifier_service,
+)
+from autoresearch.core.services.approval_store import ApprovalStoreService
+from autoresearch.core.services.autoresearch_planner import AutoResearchPlannerService
+from autoresearch.core.services.housekeeper import HousekeeperService
+from autoresearch.core.services.media_jobs import MediaJobService
+from autoresearch.core.services.telegram_notify import TelegramNotifierService
+from autoresearch.shared.housekeeper_contract import (
+ HousekeeperModeUpdateRequest,
+ HousekeeperMorningSummaryRead,
+ HousekeeperStateRead,
+ HousekeeperTickRead,
+)
+
+
+router = APIRouter(prefix="/api/v1/housekeeper", tags=["housekeeper"])
+
+
+@router.get("/state", response_model=HousekeeperStateRead)
+def get_housekeeper_state(
+ service: HousekeeperService = Depends(get_housekeeper_service),
+) -> HousekeeperStateRead:
+ return service.get_state()
+
+
+@router.post("/mode", response_model=HousekeeperStateRead)
+def update_housekeeper_mode(
+ payload: HousekeeperModeUpdateRequest,
+ service: HousekeeperService = Depends(get_housekeeper_service),
+) -> HousekeeperStateRead:
+ return service.update_mode(payload)
+
+
+@router.post("/night-explore/tick", response_model=HousekeeperTickRead, status_code=status.HTTP_200_OK)
+def execute_night_explore_tick(
+ service: HousekeeperService = Depends(get_housekeeper_service),
+ manager_service: ManagerAgentService = Depends(get_manager_agent_service),
+ planner_service: AutoResearchPlannerService = Depends(get_autoresearch_planner_service),
+ media_service: MediaJobService = Depends(get_media_job_service),
+ notifier: TelegramNotifierService = Depends(get_telegram_notifier_service),
+) -> HousekeeperTickRead:
+ return service.execute_night_explore_tick(
+ manager_service=manager_service,
+ planner_service=planner_service,
+ notifier=notifier,
+ media_jobs=media_service.list(),
+ )
+
+
+@router.post("/summaries/morning", response_model=HousekeeperMorningSummaryRead, status_code=status.HTTP_200_OK)
+def generate_morning_summary(
+ service: HousekeeperService = Depends(get_housekeeper_service),
+ manager_service: ManagerAgentService = Depends(get_manager_agent_service),
+ planner_service: AutoResearchPlannerService = Depends(get_autoresearch_planner_service),
+ approval_service: ApprovalStoreService = Depends(get_approval_store_service),
+ media_service: MediaJobService = Depends(get_media_job_service),
+ notifier: TelegramNotifierService = Depends(get_telegram_notifier_service),
+) -> HousekeeperMorningSummaryRead:
+ return service.create_morning_summary(
+ manager_service=manager_service,
+ planner_service=planner_service,
+ approval_service=approval_service,
+ notifier=notifier,
+ media_jobs=media_service.list(),
+ )
diff --git a/src/autoresearch/api/routers/manager_agent.py b/src/autoresearch/api/routers/manager_agent.py
new file mode 100644
index 00000000..e1f0a937
--- /dev/null
+++ b/src/autoresearch/api/routers/manager_agent.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
+
+from autoresearch.api.dependencies import get_housekeeper_service, get_manager_agent_service
+from autoresearch.agents.manager_agent import ManagerAgentService
+from autoresearch.core.services.housekeeper import HousekeeperService
+from autoresearch.shared.manager_agent_contract import ManagerDispatchRead, ManagerDispatchRequest
+
+
+router = APIRouter(prefix="/api/v1/agents/manager", tags=["manager-agent"])
+
+
+@router.post(
+ "/dispatch",
+ response_model=ManagerDispatchRead,
+ status_code=status.HTTP_202_ACCEPTED,
+)
+def dispatch_manager_agent(
+ payload: ManagerDispatchRequest,
+ background_tasks: BackgroundTasks,
+ service: ManagerAgentService = Depends(get_manager_agent_service),
+ housekeeper_service: HousekeeperService = Depends(get_housekeeper_service),
+) -> ManagerDispatchRead:
+ prepared, _, _ = housekeeper_service.prepare_manager_request(
+ payload,
+ manager_service=service,
+ trigger_source="api",
+ )
+ dispatch = service.create_dispatch(prepared)
+ if prepared.auto_dispatch:
+ background_tasks.add_task(service.execute_dispatch, dispatch.dispatch_id)
+ return dispatch
+
+
+@router.get("/dispatches", response_model=list[ManagerDispatchRead])
+def list_manager_dispatches(
+ service: ManagerAgentService = Depends(get_manager_agent_service),
+) -> list[ManagerDispatchRead]:
+ return service.list_dispatches()
+
+
+@router.get("/dispatches/{dispatch_id}", response_model=ManagerDispatchRead)
+def get_manager_dispatch(
+ dispatch_id: str,
+ service: ManagerAgentService = Depends(get_manager_agent_service),
+) -> ManagerDispatchRead:
+ dispatch = service.get_dispatch(dispatch_id)
+ if dispatch is None:
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Manager dispatch not found")
+ return dispatch
diff --git a/src/autoresearch/api/routers/media_jobs.py b/src/autoresearch/api/routers/media_jobs.py
new file mode 100644
index 00000000..6d469b92
--- /dev/null
+++ b/src/autoresearch/api/routers/media_jobs.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
+
+from autoresearch.api.dependencies import get_housekeeper_service, get_media_job_service, get_telegram_notifier_service
+from autoresearch.core.services.housekeeper import HousekeeperService
+from autoresearch.core.services.media_jobs import MediaJobService
+from autoresearch.core.services.telegram_notify import TelegramNotifierService
+from autoresearch.shared.media_job_contract import MediaJobRead, MediaJobRequest
+
+
+router = APIRouter(prefix="/api/v1/media/jobs", tags=["media-jobs"])
+
+
+@router.post("", response_model=MediaJobRead, status_code=status.HTTP_202_ACCEPTED)
+def create_media_job(
+ payload: MediaJobRequest,
+ background_tasks: BackgroundTasks,
+ service: MediaJobService = Depends(get_media_job_service),
+ housekeeper_service: HousekeeperService = Depends(get_housekeeper_service),
+ notifier: TelegramNotifierService = Depends(get_telegram_notifier_service),
+) -> MediaJobRead:
+ job = service.create(payload)
+ background_tasks.add_task(
+ _execute_media_job,
+ service=service,
+ housekeeper_service=housekeeper_service,
+ notifier=notifier,
+ job_id=job.job_id,
+ )
+ return job
+
+
+@router.get("", response_model=list[MediaJobRead])
+def list_media_jobs(
+ service: MediaJobService = Depends(get_media_job_service),
+) -> list[MediaJobRead]:
+ return service.list()
+
+
+@router.get("/{job_id}", response_model=MediaJobRead)
+def get_media_job(
+ job_id: str,
+ service: MediaJobService = Depends(get_media_job_service),
+) -> MediaJobRead:
+ job = service.get(job_id)
+ if job is None:
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Media job not found")
+ return job
+
+
+def _execute_media_job(
+ *,
+ service: MediaJobService,
+ housekeeper_service: HousekeeperService,
+ notifier: TelegramNotifierService,
+ job_id: str,
+) -> None:
+ completed = service.execute(job_id)
+ housekeeper_service.record_media_job_outcome(
+ job=completed,
+ notifier=notifier,
+ media_jobs=service.list(),
+ )
diff --git a/src/autoresearch/api/routers/panel.py b/src/autoresearch/api/routers/panel.py
index b62acfe0..30302fc5 100644
--- a/src/autoresearch/api/routers/panel.py
+++ b/src/autoresearch/api/routers/panel.py
@@ -7,6 +7,7 @@
from autoresearch.api.dependencies import (
get_approval_store_service,
+ get_autoresearch_planner_service,
get_capability_provider_registry,
get_claude_agent_service,
get_openclaw_compat_service,
@@ -16,11 +17,13 @@
)
from autoresearch.core.adapters import CapabilityProviderRegistry
from autoresearch.core.services.approval_store import ApprovalStoreService
+from autoresearch.core.services.autoresearch_planner import AutoResearchPlannerService
from autoresearch.core.services.claude_agents import ClaudeAgentService
from autoresearch.core.services.openclaw_compat import OpenClawCompatService
from autoresearch.core.services.panel_access import PanelAccessService
from autoresearch.core.services.panel_audit import PanelAuditService
from autoresearch.core.services.telegram_notify import TelegramNotifierService
+from autoresearch.shared.autoresearch_planner_contract import AutoResearchPlanRead
from autoresearch.shared.models import (
ApprovalDecisionRequest,
ApprovalNoteRequest,
@@ -126,6 +129,7 @@ def get_panel_state(
agent_service: ClaudeAgentService = Depends(get_claude_agent_service),
audit_service: PanelAuditService = Depends(get_panel_audit_service),
approval_service: ApprovalStoreService = Depends(get_approval_store_service),
+ planner_service: AutoResearchPlannerService = Depends(get_autoresearch_planner_service),
capability_registry: CapabilityProviderRegistry = Depends(get_capability_provider_registry),
) -> PanelStateRead:
sessions = _sessions_for_uid(openclaw_service=openclaw_service, telegram_uid=access.telegram_uid)
@@ -142,6 +146,10 @@ def get_panel_state(
CapabilityProviderSummaryRead(**descriptor.model_dump())
for descriptor in capability_registry.list_descriptors()
]
+ pending_autoresearch_plans = [
+ plan.model_dump(mode="json")
+ for plan in planner_service.list_pending(telegram_uid=access.telegram_uid, limit=20)
+ ]
return PanelStateRead(
telegram_uid=access.telegram_uid,
sessions=sessions,
@@ -149,6 +157,7 @@ def get_panel_state(
audit_logs=audit_logs,
capability_providers=capability_providers,
pending_approvals=pending_approvals,
+ pending_autoresearch_plans=pending_autoresearch_plans,
issued_at=utc_now(),
)
@@ -287,6 +296,54 @@ def reject_panel_approval(
return resolved
+@router.post("/autoresearch/plans/{plan_id}/dispatch", response_model=AutoResearchPlanRead)
+def dispatch_panel_autoresearch_plan(
+ plan_id: str,
+ payload: ApprovalNoteRequest,
+ request: Request,
+ background_tasks: BackgroundTasks,
+ access: PanelAccessContext = Depends(_require_panel_access),
+ planner_service: AutoResearchPlannerService = Depends(get_autoresearch_planner_service),
+ audit_service: PanelAuditService = Depends(get_panel_audit_service),
+ notifier: TelegramNotifierService = Depends(get_telegram_notifier_service),
+) -> AutoResearchPlanRead:
+ plan = _authorized_plan(
+ plan_id=plan_id,
+ telegram_uid=access.telegram_uid,
+ planner_service=planner_service,
+ )
+ try:
+ queued = planner_service.request_dispatch(plan.plan_id, requested_by=access.telegram_uid)
+ except ValueError as exc:
+ raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(exc)) from exc
+
+ entry = audit_service.log_action(
+ telegram_uid=access.telegram_uid,
+ action="dispatch",
+ target_id=plan_id,
+ target_type="autoresearch_plan",
+ status="accepted",
+ reason=payload.note,
+ request_ip=_request_ip(request),
+ user_agent=request.headers.get("user-agent"),
+ metadata={
+ "plan_title": plan.selected_candidate.title if plan.selected_candidate is not None else None,
+ "plan_source_path": plan.selected_candidate.source_path if plan.selected_candidate is not None else None,
+ "auth_method": access.auth_method,
+ "token_id": access.token_id,
+ },
+ )
+ background_tasks.add_task(
+ _execute_autoresearch_plan_and_notify,
+ planner_service=planner_service,
+ notifier=notifier,
+ plan_id=plan.plan_id,
+ telegram_uid=access.telegram_uid,
+ audit_entry_id=entry.audit_id,
+ )
+ return queued
+
+
@router.get("/agents/{agent_run_id}", response_model=ClaudeAgentRunRead)
def get_panel_agent(
agent_run_id: str,
@@ -443,6 +500,54 @@ def _authorized_approval(
return approval
+def _authorized_plan(
+ *,
+ plan_id: str,
+ telegram_uid: str,
+ planner_service: AutoResearchPlannerService,
+) -> AutoResearchPlanRead:
+ plan = planner_service.get(plan_id)
+ if plan is None:
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="autoresearch plan not found")
+ if plan.telegram_uid not in {None, telegram_uid}:
+ raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="forbidden")
+ return plan
+
+
+def _execute_autoresearch_plan_and_notify(
+ *,
+ planner_service: AutoResearchPlannerService,
+ notifier: TelegramNotifierService,
+ plan_id: str,
+ telegram_uid: str,
+ audit_entry_id: str,
+) -> None:
+ result = planner_service.execute_dispatch(plan_id)
+ if not notifier.enabled:
+ return
+ candidate = result.selected_candidate
+ title = candidate.title if candidate is not None else plan_id
+ status_value = result.dispatch_status.value
+ lines = [
+ f"[AutoResearch Dispatch] {title}",
+ f"- status: {status_value}",
+ f"- plan: {result.plan_id}",
+ f"- audit: {audit_entry_id}",
+ ]
+ if result.dispatch_run is not None:
+ lines.append(f"- lane: {result.dispatch_run.lane.value}")
+ lines.append(f"- remote_status: {result.dispatch_run.status.value}")
+ if result.dispatch_run.failure_class is not None:
+ lines.append(f"- failure_class: {result.dispatch_run.failure_class.value}")
+ if result.run_summary is not None:
+ lines.append(f"- final_status: {result.run_summary.final_status}")
+ if result.run_summary.promotion_patch_uri:
+ lines.append(f"- patch: {result.run_summary.promotion_patch_uri}")
+ if result.dispatch_error:
+ lines.append(f"- error: {result.dispatch_error}")
+ notifier.send_message(chat_id=telegram_uid, text="\n".join(lines))
+
+
def _request_ip(request: Request) -> str | None:
if request.client is None:
return None
@@ -499,6 +604,10 @@ def _request_ip(request: Request) -> str | None:
待审批
+
审计日志
@@ -525,6 +634,7 @@ def _request_ip(request: Request) -> str | None:
const runsEl = document.getElementById("runs");
const capabilitiesEl = document.getElementById("capabilities");
const approvalsEl = document.getElementById("approvals");
+const autoresearchPlansEl = document.getElementById("autoresearch-plans");
const auditEl = document.getElementById("audit");
if (tgWebApp) {
@@ -592,6 +702,23 @@ def _request_ip(request: Request) -> str | None:
`;
}
+function autoresearchPlanRow(item) {
+ const candidate = item.selected_candidate || {};
+ const estimatedChanges = (candidate.allowed_paths || []).length || 0;
+ return `
+
+ | ${item.plan_id} |
+ ${candidate.source_path || "-"} |
+ ${candidate.category || "-"} |
+ ${estimatedChanges} |
+ ${item.dispatch_status || "-"} |
+
+
+ |
+
+ `;
+}
+
function renderPromotionCard() {
const isSkillPromotion = pendingPromotionAction.action === "managed-skill-promote"
&& pendingPromotionAction.installId
@@ -657,18 +784,21 @@ def _request_ip(request: Request) -> str | None:
try {
const state = await callApi("/api/v1/panel/state?limit_runs=60&limit_audit=40");
const mode = token ? "JWT" : "Telegram Mini App";
- summary.textContent = `UID: ${state.telegram_uid} | mode: ${mode} | sessions: ${state.sessions.length} | runs: ${state.agent_runs.length} | approvals: ${(state.pending_approvals || []).length} | providers: ${(state.capability_providers || []).length}`;
+ summary.textContent = `UID: ${state.telegram_uid} | mode: ${mode} | sessions: ${state.sessions.length} | runs: ${state.agent_runs.length} | approvals: ${(state.pending_approvals || []).length} | plans: ${(state.pending_autoresearch_plans || []).length} | providers: ${(state.capability_providers || []).length}`;
const rows = state.agent_runs.map(runRow).join("");
runsEl.innerHTML = `| Agent | Status | Task | Updated | Action | Hint |
${rows}
`;
const capabilityRows = (state.capability_providers || []).map(capabilityRow).join("");
capabilitiesEl.innerHTML = `| Provider | Domain | Status | Capabilities |
${capabilityRows || "| 暂无 |
"}
`;
const approvalRows = (state.pending_approvals || []).map(approvalRow).join("");
approvalsEl.innerHTML = `| ID | Risk | Title | Source | Expires | Decision |
${approvalRows || "| 暂无 |
"}
`;
+ const planRows = (state.pending_autoresearch_plans || []).map(autoresearchPlanRow).join("");
+ autoresearchPlansEl.innerHTML = `| Plan | Target | Hotspot | Estimated | Status | Action |
${planRows || "| 暂无 |
"}
`;
auditEl.textContent = JSON.stringify(state.audit_logs, null, 2);
} catch (err) {
summary.textContent = `加载失败: ${err.message}`;
capabilitiesEl.innerHTML = "加载失败
";
approvalsEl.innerHTML = "加载失败
";
+ autoresearchPlansEl.innerHTML = "加载失败
";
}
}
@@ -705,6 +835,19 @@ def _request_ip(request: Request) -> str | None:
}
});
+autoresearchPlansEl.addEventListener("click", async (event) => {
+ const target = event.target;
+ if (!target || !target.dataset || !target.dataset.planId) return;
+ const planId = target.dataset.planId;
+ const note = prompt("输入 dispatch 备注", "approved via panel") || "";
+ try {
+ await callApi(`/api/v1/panel/autoresearch/plans/${planId}/dispatch`, "POST", {note, metadata: {}});
+ await refresh();
+ } catch (err) {
+ alert(`派发失败: ${err.message}`);
+ }
+});
+
promotionApproveBtn.addEventListener("click", async () => {
try {
await approvePromotionAndExecute();
diff --git a/src/autoresearch/api/settings.py b/src/autoresearch/api/settings.py
index b1f27bd5..107fd9b2 100644
--- a/src/autoresearch/api/settings.py
+++ b/src/autoresearch/api/settings.py
@@ -331,6 +331,57 @@ def _normalize_allowed_roles(cls, value: Any) -> set[str]:
return roles or {"viewer", "editor", "admin", "owner"}
+class UpstreamWatcherSettings(_BaseApiSettings):
+ upstream_url: str = Field(
+ default="https://github.com/openclaw/openclaw.git",
+ validation_alias="AUTORESEARCH_UPSTREAM_WATCH_URL",
+ )
+ workspace_root: Path = Field(
+ default=Path("/Volumes/AI_LAB/ai_lab/workspace"),
+ validation_alias="AUTORESEARCH_UPSTREAM_WATCH_WORKSPACE_ROOT",
+ )
+ max_commits: int = Field(default=5, validation_alias="AUTORESEARCH_UPSTREAM_WATCH_MAX_COMMITS")
+
+ @field_validator("workspace_root", mode="before")
+ @classmethod
+ def _normalize_workspace_root(cls, value: Any) -> Path:
+ path = _parse_path(value)
+ return path or Path("/Volumes/AI_LAB/ai_lab/workspace")
+
+
+class HousekeeperSettings(_BaseApiSettings):
+ timezone_name: str = Field(default="Asia/Shanghai", validation_alias="AUTORESEARCH_HOUSEKEEPER_TIMEZONE")
+ summary_chat_id: str | None = Field(default=None, validation_alias="AUTORESEARCH_HOUSEKEEPER_SUMMARY_CHAT_ID")
+
+
+class MediaSettings(_BaseApiSettings):
+ media_root: Path = Field(default=Path("/home/lisa/media"), validation_alias="AUTORESEARCH_MEDIA_ROOT")
+ allowed_domains: set[str] = Field(
+ default_factory=lambda: {
+ "youtube.com",
+ "youtu.be",
+ "bilibili.com",
+ "vimeo.com",
+ "tiktok.com",
+ "douyin.com",
+ },
+ validation_alias="AUTORESEARCH_MEDIA_ALLOWED_DOMAINS",
+ )
+ yt_dlp_bin: str = Field(default="yt-dlp", validation_alias="AUTORESEARCH_MEDIA_YT_DLP_BIN")
+ ffmpeg_bin: str = Field(default="ffmpeg", validation_alias="AUTORESEARCH_MEDIA_FFMPEG_BIN")
+
+ @field_validator("media_root", mode="before")
+ @classmethod
+ def _normalize_media_root(cls, value: Any) -> Path:
+ path = _parse_path(value)
+ return path or Path("/home/lisa/media")
+
+ @field_validator("allowed_domains", mode="before")
+ @classmethod
+ def _normalize_allowed_domains(cls, value: Any) -> set[str]:
+ return _parse_csv_set(value)
+
+
def load_runtime_settings() -> RuntimeSettings:
return RuntimeSettings()
@@ -353,6 +404,18 @@ def load_admin_settings() -> AdminSettings:
return AdminSettings()
+def load_upstream_watcher_settings() -> UpstreamWatcherSettings:
+ return UpstreamWatcherSettings()
+
+
+def load_housekeeper_settings() -> HousekeeperSettings:
+ return HousekeeperSettings()
+
+
+def load_media_settings() -> MediaSettings:
+ return MediaSettings()
+
+
@lru_cache(maxsize=1)
def get_runtime_settings() -> RuntimeSettings:
return load_runtime_settings()
@@ -378,10 +441,28 @@ def get_admin_settings() -> AdminSettings:
return load_admin_settings()
+@lru_cache(maxsize=1)
+def get_upstream_watcher_settings() -> UpstreamWatcherSettings:
+ return load_upstream_watcher_settings()
+
+
+@lru_cache(maxsize=1)
+def get_housekeeper_settings() -> HousekeeperSettings:
+ return load_housekeeper_settings()
+
+
+@lru_cache(maxsize=1)
+def get_media_settings() -> MediaSettings:
+ return load_media_settings()
+
+
def clear_settings_caches() -> None:
get_runtime_settings.cache_clear()
get_telegram_settings.cache_clear()
get_panel_settings.cache_clear()
get_feature_settings.cache_clear()
get_admin_settings.cache_clear()
+ get_upstream_watcher_settings.cache_clear()
+ get_housekeeper_settings.cache_clear()
+ get_media_settings.cache_clear()
_WARNED_DEPRECATED_ALIASES.clear()
diff --git a/src/autoresearch/core/dispatch/__init__.py b/src/autoresearch/core/dispatch/__init__.py
new file mode 100644
index 00000000..d3174a44
--- /dev/null
+++ b/src/autoresearch/core/dispatch/__init__.py
@@ -0,0 +1 @@
+"""Dispatch abstractions for local and future remote execution lanes."""
diff --git a/src/autoresearch/core/dispatch/failure_classifier.py b/src/autoresearch/core/dispatch/failure_classifier.py
new file mode 100644
index 00000000..aedf184a
--- /dev/null
+++ b/src/autoresearch/core/dispatch/failure_classifier.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal
+
+from autoresearch.agent_protocol.models import RunSummary
+from autoresearch.shared.remote_run_contract import FailureClass, RecoveryAction, RemoteRunStatus
+
+
+_FAILURE_ACTIONS: dict[FailureClass, RecoveryAction] = {
+ FailureClass.PLANNER_STALLED: RecoveryAction.REQUIRE_HUMAN_REVIEW,
+ FailureClass.EXECUTOR_STALLED: RecoveryAction.RETRY,
+ FailureClass.TOOL_TIMEOUT: RecoveryAction.RETRY,
+ FailureClass.MODEL_FALLBACK: RecoveryAction.DOWNGRADE_TO_DRAFT,
+ FailureClass.ASSERTION_FAILED_AFTER_FALLBACK: RecoveryAction.REQUIRE_HUMAN_REVIEW,
+ FailureClass.ENV_MISSING: RecoveryAction.ABORT,
+ FailureClass.WORKSPACE_DIRTY: RecoveryAction.ABORT,
+ FailureClass.TRANSIENT_NETWORK: RecoveryAction.RETRY,
+ FailureClass.UNKNOWN: RecoveryAction.QUARANTINE,
+}
+
+_ENV_MISSING_MARKERS = (
+ "environmentcheckfailed:",
+ "launch_ai_lab.sh not found",
+ "no such file or directory",
+ "command not found",
+ "docker socket is stale",
+)
+_WORKSPACE_DIRTY_MARKERS = (
+ "repository worktree is not clean",
+ "repo root has uncommitted changes",
+ "clean git checkout",
+ "clean base repository",
+)
+_TRANSIENT_NETWORK_MARKERS = (
+ "connection reset",
+ "connection refused",
+ "temporary failure",
+ "network is unreachable",
+ "timed out while connecting",
+ "ssh:",
+)
+
+
+@dataclass(frozen=True, slots=True)
+class FailureDisposition:
+ failure_class: FailureClass | None
+ recovery_action: RecoveryAction | None
+
+
+def recovery_action_for_failure_class(failure_class: FailureClass | None) -> RecoveryAction | None:
+ if failure_class is None:
+ return None
+ return _FAILURE_ACTIONS[failure_class]
+
+
+def classify_failure_class(failure_class: FailureClass | None) -> FailureDisposition:
+ return FailureDisposition(
+ failure_class=failure_class,
+ recovery_action=recovery_action_for_failure_class(failure_class),
+ )
+
+
+def infer_failure_class_from_error(error_text: str | None) -> FailureClass | None:
+ normalized = (error_text or "").strip().lower()
+ if not normalized:
+ return None
+ if any(marker in normalized for marker in _WORKSPACE_DIRTY_MARKERS):
+ return FailureClass.WORKSPACE_DIRTY
+ if any(marker in normalized for marker in _ENV_MISSING_MARKERS):
+ return FailureClass.ENV_MISSING
+ if any(marker in normalized for marker in _TRANSIENT_NETWORK_MARKERS):
+ return FailureClass.TRANSIENT_NETWORK
+ return None
+
+
+def classify_remote_status(
+ status: RemoteRunStatus,
+ *,
+ stage: Literal["planner", "executor"] = "executor",
+ error_text: str | None = None,
+) -> FailureDisposition:
+ if status is RemoteRunStatus.STALLED:
+ if stage == "planner":
+ return classify_failure_class(FailureClass.PLANNER_STALLED)
+ return classify_failure_class(FailureClass.EXECUTOR_STALLED)
+ if status is RemoteRunStatus.TIMED_OUT:
+ return classify_failure_class(FailureClass.TOOL_TIMEOUT)
+ if status is RemoteRunStatus.FAILED:
+ inferred = infer_failure_class_from_error(error_text)
+ return classify_failure_class(inferred or FailureClass.UNKNOWN)
+ return classify_failure_class(None)
+
+
+def classify_run_summary(summary: RunSummary) -> FailureDisposition:
+ driver_result = summary.driver_result
+ error_text = str(driver_result.error or "").strip()
+
+ if driver_result.status == "stalled_no_progress":
+ return classify_failure_class(FailureClass.EXECUTOR_STALLED)
+ if driver_result.status == "timed_out":
+ return classify_failure_class(FailureClass.TOOL_TIMEOUT)
+
+ inferred = infer_failure_class_from_error(error_text)
+ if inferred is not None:
+ return classify_failure_class(inferred)
+
+ if driver_result.agent_id == "mock" and summary.validation.passed:
+ return classify_failure_class(FailureClass.MODEL_FALLBACK)
+ if driver_result.agent_id == "mock" and not summary.validation.passed:
+ return classify_failure_class(FailureClass.ASSERTION_FAILED_AFTER_FALLBACK)
+ if driver_result.recommended_action == "fallback" and not summary.validation.passed:
+ return classify_failure_class(FailureClass.ASSERTION_FAILED_AFTER_FALLBACK)
+
+ if summary.final_status == "failed":
+ return classify_failure_class(FailureClass.UNKNOWN)
+ return classify_failure_class(None)
+
+
+def classify_remote_terminal(
+ *,
+ status: RemoteRunStatus,
+ stage: Literal["planner", "executor"] = "executor",
+ error_text: str | None = None,
+ run_summary: RunSummary | None = None,
+) -> FailureDisposition:
+ if run_summary is not None:
+ disposition = classify_run_summary(run_summary)
+ if disposition.failure_class is not None or status is not RemoteRunStatus.SUCCEEDED:
+ return disposition
+ return disposition
+ return classify_remote_status(status, stage=stage, error_text=error_text)
diff --git a/src/autoresearch/core/dispatch/fake_remote_adapter.py b/src/autoresearch/core/dispatch/fake_remote_adapter.py
new file mode 100644
index 00000000..be4dce6d
--- /dev/null
+++ b/src/autoresearch/core/dispatch/fake_remote_adapter.py
@@ -0,0 +1,433 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+import json
+from pathlib import Path
+from typing import Any, Callable
+
+from autoresearch.agent_protocol.models import RunSummary
+from autoresearch.core.dispatch.failure_classifier import (
+ FailureDisposition,
+ classify_remote_terminal,
+ classify_run_summary,
+)
+from autoresearch.core.dispatch.remote_adapter import RemoteDispatchAdapter
+from autoresearch.shared.models import utc_now
+from autoresearch.shared.remote_run_contract import (
+ DispatchLane,
+ FailureClass,
+ RemoteHeartbeat,
+ RemoteRunRecord,
+ RemoteRunStatus,
+ RemoteRunSummary,
+ RemoteTaskSpec,
+)
+
+
+@dataclass(slots=True)
+class _FakeRunState:
+ task_spec: RemoteTaskSpec
+ scenario: str
+ record: RemoteRunRecord
+ poll_count: int = 0
+ heartbeats: list[RemoteHeartbeat] = field(default_factory=list)
+ events: list[dict[str, Any]] = field(default_factory=list)
+ summary: RemoteRunSummary | None = None
+
+
+class FakeRemoteAdapter(RemoteDispatchAdapter):
+ TERMINAL_STATUSES = {
+ RemoteRunStatus.SUCCEEDED,
+ RemoteRunStatus.FAILED,
+ RemoteRunStatus.STALLED,
+ RemoteRunStatus.TIMED_OUT,
+ }
+
+ def __init__(
+ self,
+ *,
+ repo_root: Path,
+ local_runner: Callable[[Any], RunSummary],
+ runtime_root: Path | None = None,
+ ) -> None:
+ self._repo_root = repo_root.resolve()
+ self._runtime_root = self._resolve_runtime_root(runtime_root)
+ self._local_runner = local_runner
+ self._states: dict[str, _FakeRunState] = {}
+
+ def _resolve_runtime_root(self, runtime_root: Path | None) -> Path:
+ candidate = (runtime_root or (self._repo_root / ".masfactory_runtime" / "runs")).resolve()
+ try:
+ candidate.relative_to(self._repo_root)
+ except ValueError as exc:
+ raise ValueError(
+ "fake remote runtime_root must live under repo_root so artifact_paths remain repo-relative"
+ ) from exc
+ return candidate
+
+ def dispatch(self, spec: RemoteTaskSpec) -> RemoteRunRecord:
+ now = utc_now()
+ scenario = self._scenario_for(spec)
+ record = RemoteRunRecord(
+ run_id=spec.run_id,
+ requested_lane=spec.requested_lane,
+ lane=spec.lane,
+ status=RemoteRunStatus.QUEUED,
+ summary=f"dispatch queued for {spec.lane.value} lane",
+ updated_at=now,
+ fallback_reason=self._fallback_reason_for(spec),
+ metadata={
+ "runtime_mode": spec.runtime_mode,
+ "scenario": scenario,
+ "planner_plan_id": spec.planner_plan_id,
+ "planner_candidate_id": spec.planner_candidate_id,
+ },
+ )
+ state = _FakeRunState(task_spec=spec, scenario=scenario, record=record)
+ state.events.append(
+ {
+ "type": "queued",
+ "recorded_at": now.isoformat(),
+ "requested_lane": spec.requested_lane.value,
+ "lane": spec.lane.value,
+ "scenario": scenario,
+ }
+ )
+ self._states[spec.run_id] = state
+ self._persist_state(state)
+ return record
+
+ def poll(self, run_id: str) -> RemoteRunRecord:
+ state = self._require_state(run_id)
+ if state.record.status in self.TERMINAL_STATUSES:
+ self._persist_state(state)
+ return state.record
+
+ state.poll_count += 1
+ scenario = state.scenario
+ if scenario in {"local_execute", "fallback_to_local"}:
+ if state.poll_count == 1:
+ state.record = self._running_record(state, summary="local execution in progress")
+ state.heartbeats.append(self._heartbeat(state, sequence=1, summary="local execution heartbeat"))
+ else:
+ run_summary = self._local_runner(state.task_spec.job)
+ state.summary = self._summary_from_run_summary(state, run_summary)
+ state.record = self._record_from_summary(state.summary)
+ elif scenario == "success":
+ if state.poll_count == 1:
+ state.record = self._running_record(state, summary="remote execution in progress")
+ state.heartbeats.append(self._heartbeat(state, sequence=1, summary="remote execution heartbeat"))
+ else:
+ state.summary = self._terminal_summary(
+ state=state,
+ status=RemoteRunStatus.SUCCEEDED,
+ summary_text="remote execution completed successfully",
+ )
+ state.record = self._record_from_summary(state.summary)
+ elif scenario == "stalled":
+ if state.poll_count == 1:
+ state.record = self._running_record(state, summary="remote execution started without progress heartbeat")
+ else:
+ state.summary = self._terminal_summary(
+ state=state,
+ status=RemoteRunStatus.STALLED,
+ summary_text="remote execution stalled without progress heartbeat",
+ )
+ state.record = self._record_from_summary(state.summary)
+ elif scenario == "timed_out":
+ if state.poll_count == 1:
+ state.record = self._running_record(state, summary="remote execution running toward timeout")
+ state.heartbeats.append(self._heartbeat(state, sequence=1, summary="remote execution heartbeat"))
+ else:
+ state.summary = self._terminal_summary(
+ state=state,
+ status=RemoteRunStatus.TIMED_OUT,
+ summary_text="remote execution timed out",
+ )
+ state.record = self._record_from_summary(state.summary)
+ elif scenario == "env_missing":
+ state.summary = self._terminal_summary(
+ state=state,
+ status=RemoteRunStatus.FAILED,
+ summary_text="remote environment is missing required runtime dependencies",
+ error_text="EnvironmentCheckFailed: missing remote runtime dependencies",
+ )
+ state.record = self._record_from_summary(state.summary)
+ elif scenario == "transient_network":
+ state.summary = self._terminal_summary(
+ state=state,
+ status=RemoteRunStatus.FAILED,
+ summary_text="remote dispatch failed because the connection was interrupted",
+ error_text="ssh: connection reset by peer",
+ )
+ state.record = self._record_from_summary(state.summary)
+ elif scenario == "result_fetch_failure":
+ if state.poll_count == 1:
+ state.record = self._running_record(state, summary="remote execution in progress")
+ state.heartbeats.append(self._heartbeat(state, sequence=1, summary="remote execution heartbeat"))
+ else:
+ state.record = RemoteRunRecord(
+ run_id=state.task_spec.run_id,
+ requested_lane=state.task_spec.requested_lane,
+ lane=state.task_spec.lane,
+ status=RemoteRunStatus.SUCCEEDED,
+ summary="remote execution completed but summary artifact was lost",
+ started_at=state.record.started_at or utc_now(),
+ updated_at=utc_now(),
+ finished_at=utc_now(),
+ fallback_reason=state.record.fallback_reason,
+ metadata=state.record.metadata,
+ )
+ state.events.append(
+ {
+ "type": "summary_missing",
+ "recorded_at": utc_now().isoformat(),
+ }
+ )
+ else:
+ state.summary = self._terminal_summary(
+ state=state,
+ status=RemoteRunStatus.FAILED,
+ summary_text=f"unsupported fake remote scenario: {scenario}",
+ error_text=f"unsupported fake remote scenario: {scenario}",
+ )
+ state.record = self._record_from_summary(state.summary)
+
+ self._persist_state(state)
+ return state.record
+
+ def heartbeat(self, run_id: str) -> RemoteHeartbeat | None:
+ state = self._require_state(run_id)
+ return state.heartbeats[-1] if state.heartbeats else None
+
+ def fetch_summary(self, run_id: str) -> RemoteRunSummary:
+ state = self._require_state(run_id)
+ if state.summary is None:
+ raise FileNotFoundError(f"remote summary is not available for run: {run_id}")
+ return state.summary
+
+ def _scenario_for(self, spec: RemoteTaskSpec) -> str:
+ explicit = str(spec.metadata.get("remote_scenario") or "").strip()
+ if explicit:
+ return explicit
+ if spec.requested_lane is DispatchLane.REMOTE and spec.lane is DispatchLane.LOCAL:
+ return "fallback_to_local"
+ if spec.lane is DispatchLane.LOCAL:
+ return "local_execute"
+ return "success"
+
+ @staticmethod
+ def _fallback_reason_for(spec: RemoteTaskSpec) -> str | None:
+ raw_reason = str(spec.metadata.get("fallback_reason") or "").strip()
+ if raw_reason:
+ return raw_reason
+ if spec.requested_lane is DispatchLane.REMOTE and spec.lane is DispatchLane.LOCAL:
+ return "remote lane unavailable; downgraded to local"
+ return None
+
+ def _running_record(self, state: _FakeRunState, *, summary: str) -> RemoteRunRecord:
+ now = utc_now()
+ record = state.record.model_copy(
+ update={
+ "status": RemoteRunStatus.RUNNING,
+ "summary": summary,
+ "started_at": state.record.started_at or now,
+ "updated_at": now,
+ }
+ )
+ state.events.append(
+ {
+ "type": "running",
+ "recorded_at": now.isoformat(),
+ "summary": summary,
+ }
+ )
+ return record
+
+ def _heartbeat(self, state: _FakeRunState, *, sequence: int, summary: str) -> RemoteHeartbeat:
+ heartbeat = RemoteHeartbeat(
+ run_id=state.task_spec.run_id,
+ lane=state.task_spec.lane,
+ status=RemoteRunStatus.RUNNING,
+ sequence=sequence,
+ summary=summary,
+ artifact_paths=self._artifact_paths(state, include_summary=False),
+ )
+ state.events.append(
+ {
+ "type": "heartbeat",
+ "recorded_at": heartbeat.recorded_at.isoformat(),
+ "sequence": sequence,
+ "summary": summary,
+ }
+ )
+ return heartbeat
+
+ def _summary_from_run_summary(self, state: _FakeRunState, run_summary: RunSummary) -> RemoteRunSummary:
+ disposition = classify_run_summary(run_summary)
+ final_status = (
+ RemoteRunStatus.SUCCEEDED
+ if run_summary.final_status in {"ready_for_promotion", "promoted"}
+ else RemoteRunStatus.FAILED
+ )
+ now = utc_now()
+ summary = RemoteRunSummary(
+ run_id=state.task_spec.run_id,
+ requested_lane=state.task_spec.requested_lane,
+ lane=state.task_spec.lane,
+ status=final_status,
+ failure_class=disposition.failure_class,
+ recovery_action=disposition.recovery_action,
+ artifact_paths=self._artifact_paths(state),
+ summary=f"local lane completed with final_status={run_summary.final_status}",
+ started_at=state.record.started_at or now,
+ updated_at=now,
+ finished_at=now,
+ fallback_reason=state.record.fallback_reason,
+ metadata=state.record.metadata,
+ run_summary=run_summary,
+ )
+ state.events.append(
+ {
+ "type": "completed",
+ "recorded_at": now.isoformat(),
+ "status": summary.status.value,
+ "final_status": run_summary.final_status,
+ }
+ )
+ return summary
+
+ def _terminal_summary(
+ self,
+ *,
+ state: _FakeRunState,
+ status: RemoteRunStatus,
+ summary_text: str,
+ error_text: str | None = None,
+ ) -> RemoteRunSummary:
+ now = utc_now()
+ disposition = classify_remote_terminal(status=status, error_text=error_text)
+ summary = RemoteRunSummary(
+ run_id=state.task_spec.run_id,
+ requested_lane=state.task_spec.requested_lane,
+ lane=state.task_spec.lane,
+ status=status,
+ failure_class=disposition.failure_class,
+ recovery_action=disposition.recovery_action,
+ artifact_paths=self._artifact_paths(state),
+ summary=summary_text,
+ started_at=state.record.started_at or now,
+ updated_at=now,
+ finished_at=now,
+ fallback_reason=state.record.fallback_reason,
+ metadata={
+ **state.record.metadata,
+ **({"error": error_text} if error_text else {}),
+ },
+ )
+ state.events.append(
+ {
+ "type": "completed",
+ "recorded_at": now.isoformat(),
+ "status": status.value,
+ "summary": summary_text,
+ "error": error_text,
+ }
+ )
+ return summary
+
+ @staticmethod
+ def _record_from_summary(summary: RemoteRunSummary) -> RemoteRunRecord:
+ return RemoteRunRecord.model_validate(summary.model_dump(mode="json", exclude={"run_summary"}))
+
+ def _artifact_paths(self, state: _FakeRunState, *, include_summary: bool = True) -> dict[str, str]:
+ run_dir = self._runtime_root / state.task_spec.run_id
+ control_dir = run_dir / "remote_control"
+ paths = {
+ "task_spec": self._relpath(control_dir / "task_spec.json"),
+ "record": self._relpath(control_dir / "record.json"),
+ "events": self._relpath(control_dir / "events.ndjson"),
+ }
+ if state.heartbeats:
+ paths["heartbeat"] = self._relpath(control_dir / "heartbeat.json")
+ if include_summary and state.summary is not None:
+ paths["summary"] = self._relpath(control_dir / "summary.json")
+ if state.summary is not None and state.summary.run_summary is not None:
+ legacy_summary = run_dir / "summary.json"
+ if legacy_summary.exists():
+ paths["legacy_run_summary"] = self._relpath(legacy_summary)
+ patch_uri = str(state.summary.run_summary.promotion_patch_uri or "").strip()
+ if patch_uri:
+ patch_path = Path(patch_uri)
+ if not patch_path.is_absolute():
+ paths["promotion_patch"] = patch_path.as_posix()
+ return paths
+
+ def _persist_state(self, state: _FakeRunState) -> None:
+ run_dir = self._runtime_root / state.task_spec.run_id
+ control_dir = run_dir / "remote_control"
+ control_dir.mkdir(parents=True, exist_ok=True)
+
+ task_spec_path = control_dir / "task_spec.json"
+ task_spec_path.write_text(
+ json.dumps(state.task_spec.model_dump(mode="json"), ensure_ascii=False, indent=2),
+ encoding="utf-8",
+ )
+
+ record = RemoteRunRecord.model_validate(
+ {
+ **state.record.model_dump(mode="json"),
+ "artifact_paths": self._artifact_paths(state),
+ }
+ )
+ state.record = record
+ (control_dir / "record.json").write_text(
+ json.dumps(record.model_dump(mode="json"), ensure_ascii=False, indent=2),
+ encoding="utf-8",
+ )
+
+ (control_dir / "events.ndjson").write_text(
+ "".join(json.dumps(event, ensure_ascii=False) + "\n" for event in state.events),
+ encoding="utf-8",
+ )
+
+ if state.heartbeats:
+ heartbeat = RemoteHeartbeat.model_validate(
+ {
+ **state.heartbeats[-1].model_dump(mode="json"),
+ "artifact_paths": self._artifact_paths(state, include_summary=False),
+ }
+ )
+ state.heartbeats[-1] = heartbeat
+ (control_dir / "heartbeat.json").write_text(
+ json.dumps(heartbeat.model_dump(mode="json"), ensure_ascii=False, indent=2),
+ encoding="utf-8",
+ )
+
+ if state.summary is not None:
+ summary = RemoteRunSummary.model_validate(
+ {
+ **state.summary.model_dump(mode="json"),
+ "artifact_paths": self._artifact_paths(state),
+ }
+ )
+ state.summary = summary
+ if state.scenario != "result_fetch_failure":
+ (control_dir / "summary.json").write_text(
+ json.dumps(summary.model_dump(mode="json"), ensure_ascii=False, indent=2),
+ encoding="utf-8",
+ )
+
+ def _require_state(self, run_id: str) -> _FakeRunState:
+ normalized = run_id.strip()
+ if normalized not in self._states:
+ raise KeyError(f"unknown fake remote run: {normalized}")
+ return self._states[normalized]
+
+ def _relpath(self, path: Path) -> str:
+ try:
+ return path.resolve().relative_to(self._repo_root).as_posix()
+ except ValueError as exc:
+ raise ValueError(
+ "fake remote artifact path escaped repo_root; refusing to emit an absolute artifact_paths value"
+ ) from exc
diff --git a/src/autoresearch/core/dispatch/remote_adapter.py b/src/autoresearch/core/dispatch/remote_adapter.py
new file mode 100644
index 00000000..9f0cab62
--- /dev/null
+++ b/src/autoresearch/core/dispatch/remote_adapter.py
@@ -0,0 +1,28 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+from autoresearch.shared.remote_run_contract import (
+ RemoteHeartbeat,
+ RemoteRunRecord,
+ RemoteRunSummary,
+ RemoteTaskSpec,
+)
+
+
+class RemoteDispatchAdapter(ABC):
+ @abstractmethod
+ def dispatch(self, spec: RemoteTaskSpec) -> RemoteRunRecord:
+ raise NotImplementedError
+
+ @abstractmethod
+ def poll(self, run_id: str) -> RemoteRunRecord:
+ raise NotImplementedError
+
+ @abstractmethod
+ def heartbeat(self, run_id: str) -> RemoteHeartbeat | None:
+ raise NotImplementedError
+
+ @abstractmethod
+ def fetch_summary(self, run_id: str) -> RemoteRunSummary:
+ raise NotImplementedError
diff --git a/src/autoresearch/core/runtime/__init__.py b/src/autoresearch/core/runtime/__init__.py
new file mode 100644
index 00000000..20bc0177
--- /dev/null
+++ b/src/autoresearch/core/runtime/__init__.py
@@ -0,0 +1 @@
+"""Runtime mode selection and config helpers."""
diff --git a/src/autoresearch/core/runtime/select_mode.py b/src/autoresearch/core/runtime/select_mode.py
new file mode 100644
index 00000000..3c47d402
--- /dev/null
+++ b/src/autoresearch/core/runtime/select_mode.py
@@ -0,0 +1,102 @@
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any
+
+import yaml
+from pydantic import Field, field_validator
+
+from autoresearch.shared.models import StrictModel
+from autoresearch.shared.remote_run_contract import DispatchLane
+
+
+class RuntimeModePolicy(StrictModel):
+ name: str = Field(..., min_length=1)
+ preferred_lane: DispatchLane = DispatchLane.LOCAL
+ max_workers: int = Field(default=1, ge=1)
+ max_concurrency: int = Field(default=1, ge=1)
+ allow_exploration: bool = False
+ allow_patch: bool = True
+ allow_draft_pr: bool = False
+ require_high_risk_approval: bool = True
+ step_budget: int = Field(default=8, ge=1)
+ token_budget: int = Field(default=20_000, ge=1)
+ timeout_sec: int = Field(default=900, ge=1)
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+ @field_validator("name")
+ @classmethod
+ def _normalize_name(cls, value: str) -> str:
+ normalized = value.strip().lower()
+ if not normalized:
+ raise ValueError("mode name is required")
+ return normalized
+
+
+class SelectedRuntimeMode(StrictModel):
+ name: str = Field(..., min_length=1)
+ requested_lane: DispatchLane
+ lane: DispatchLane
+ fallback_reason: str | None = None
+ policy: RuntimeModePolicy
+
+ @field_validator("name")
+ @classmethod
+ def _normalize_selected_name(cls, value: str) -> str:
+ normalized = value.strip().lower()
+ if not normalized:
+ raise ValueError("selected mode name is required")
+ return normalized
+
+ @field_validator("fallback_reason")
+ @classmethod
+ def _normalize_reason(cls, value: str | None) -> str | None:
+ if value is None:
+ return None
+ normalized = value.strip()
+ return normalized or None
+
+
+def _repo_root(repo_root: Path | None) -> Path:
+ if repo_root is not None:
+ return repo_root.resolve()
+ return Path(__file__).resolve().parents[4]
+
+
+def _config_path(repo_root: Path, mode_name: str) -> Path:
+ return repo_root / "configs" / "runtime" / f"{mode_name}.yaml"
+
+
+def load_mode_policy(repo_root: Path | None = None, mode_name: str = "day") -> RuntimeModePolicy:
+ root = _repo_root(repo_root)
+ normalized_mode = mode_name.strip().lower()
+ config_path = _config_path(root, normalized_mode)
+ if not config_path.exists():
+ config_path = _config_path(_repo_root(None), normalized_mode)
+ payload = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
+ if not isinstance(payload, dict):
+ raise ValueError(f"runtime mode config must be a mapping: {config_path}")
+ return RuntimeModePolicy.model_validate({"name": normalized_mode, **payload})
+
+
+def select_mode(
+ repo_root: Path | None = None,
+ *,
+ requested_mode: str | None = None,
+ remote_available: bool | None = None,
+) -> SelectedRuntimeMode:
+ mode_name = (requested_mode or os.environ.get("AUTORESEARCH_RUNTIME_MODE") or "day").strip().lower()
+ policy = load_mode_policy(repo_root, mode_name)
+ lane = policy.preferred_lane
+ fallback_reason = None
+ if policy.preferred_lane is DispatchLane.REMOTE and remote_available is False:
+ lane = DispatchLane.LOCAL
+ fallback_reason = "remote lane unavailable; downgraded to local"
+ return SelectedRuntimeMode(
+ name=policy.name,
+ requested_lane=policy.preferred_lane,
+ lane=lane,
+ fallback_reason=fallback_reason,
+ policy=policy,
+ )
diff --git a/src/autoresearch/core/services/agent_audit_trail.py b/src/autoresearch/core/services/agent_audit_trail.py
new file mode 100644
index 00000000..bf814730
--- /dev/null
+++ b/src/autoresearch/core/services/agent_audit_trail.py
@@ -0,0 +1,679 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime
+import json
+from pathlib import Path
+from typing import Any
+
+from autoresearch.agents.manager_agent import ManagerAgentService
+from autoresearch.core.services.autoresearch_planner import AutoResearchPlannerService
+from autoresearch.core.services.claude_agents import ClaudeAgentService
+from autoresearch.shared.models import (
+ AdminAgentAuditRole,
+ AdminAgentAuditTrailDetailRead,
+ AdminAgentAuditTrailEntryRead,
+ AdminAgentAuditTrailSnapshotRead,
+ AdminAgentAuditTrailStatsRead,
+ JobStatus,
+ utc_now,
+)
+
+_MAX_PATCH_CHARS = 120_000
+_SUCCESS_STATUSES = {"completed", "ready_for_promotion", "promoted", "succeeded"}
+_FAILED_STATUSES = {
+ "failed",
+ "blocked",
+ "interrupted",
+ "timed_out",
+ "stalled_no_progress",
+ "policy_blocked",
+ "contract_error",
+ "rejected",
+}
+_PENDING_STATUSES = {"queued", "created", "pending", "dispatching"}
+_RUNNING_STATUSES = {"running"}
+_REVIEW_STATUSES = {"human_review", "needs_human_review"}
+
+
+@dataclass(slots=True)
+class _AuditEntryContext:
+ entry: AdminAgentAuditTrailEntryRead
+ input_prompt: str | None = None
+ job_spec: dict[str, Any] = field(default_factory=dict)
+ worker_spec: dict[str, Any] = field(default_factory=dict)
+ controlled_request: dict[str, Any] = field(default_factory=dict)
+ patch_text: str = ""
+ patch_truncated: bool = False
+ error_reason: str | None = None
+ traceback: str | None = None
+ raw_record: dict[str, Any] = field(default_factory=dict)
+
+
+class AgentAuditTrailService:
+ """Aggregate worker execution footprints into an admin-friendly audit timeline."""
+
+ def __init__(
+ self,
+ *,
+ repo_root: Path,
+ planner_service: AutoResearchPlannerService,
+ manager_service: ManagerAgentService,
+ agent_service: ClaudeAgentService,
+ ) -> None:
+ self._repo_root = repo_root.resolve()
+ self._planner_service = planner_service
+ self._manager_service = manager_service
+ self._agent_service = agent_service
+
+ def snapshot(
+ self,
+ *,
+ limit: int = 20,
+ status_filter: str | None = None,
+ agent_role: str | None = None,
+ ) -> AdminAgentAuditTrailSnapshotRead:
+ contexts = self._filter_contexts(
+ self._collect_entry_contexts(),
+ status_filter=status_filter,
+ agent_role=agent_role,
+ )
+ items = [context.entry for context in contexts[: max(1, limit)]]
+ return AdminAgentAuditTrailSnapshotRead(
+ items=items,
+ stats=self._build_stats(items),
+ issued_at=utc_now(),
+ )
+
+ def detail(self, entry_id: str) -> AdminAgentAuditTrailDetailRead:
+ normalized_entry_id = entry_id.strip()
+ if not normalized_entry_id:
+ raise KeyError("audit trail entry id is required")
+ for context in self._collect_entry_contexts():
+ if context.entry.entry_id != normalized_entry_id:
+ continue
+ return AdminAgentAuditTrailDetailRead(
+ entry=context.entry,
+ input_prompt=context.input_prompt,
+ job_spec=dict(context.job_spec),
+ worker_spec=dict(context.worker_spec),
+ controlled_request=dict(context.controlled_request),
+ patch_text=context.patch_text,
+ patch_truncated=context.patch_truncated,
+ error_reason=context.error_reason,
+ traceback=context.traceback,
+ raw_record=dict(context.raw_record),
+ )
+ raise KeyError(f"audit trail entry not found: {entry_id}")
+
+ def _collect_entry_contexts(self) -> list[_AuditEntryContext]:
+ contexts_by_run: dict[str, _AuditEntryContext] = {}
+ for context in self._collect_manager_contexts():
+ contexts_by_run[context.entry.run_id] = context
+ for context in self._collect_planner_contexts():
+ contexts_by_run.setdefault(context.entry.run_id, context)
+ for context in self._collect_claude_contexts():
+ contexts_by_run.setdefault(context.entry.run_id, context)
+ for context in self._collect_runtime_contexts():
+ existing = contexts_by_run.get(context.entry.run_id)
+ if existing is None:
+ contexts_by_run[context.entry.run_id] = context
+ continue
+ contexts_by_run[context.entry.run_id] = self._merge_contexts(
+ existing=existing,
+ incoming=context,
+ )
+ return sorted(
+ contexts_by_run.values(),
+ key=lambda item: item.entry.recorded_at,
+ reverse=True,
+ )
+
+ def _collect_manager_contexts(self) -> list[_AuditEntryContext]:
+ contexts: list[_AuditEntryContext] = []
+ for dispatch in self._manager_service.list_dispatches():
+ if dispatch.execution_plan is None:
+ continue
+ for task in dispatch.execution_plan.tasks:
+ run_summary = task.run_summary
+ patch_uri = run_summary.promotion_patch_uri if run_summary is not None else None
+ patch_text, patch_truncated = self._load_patch_text(patch_uri)
+ changed_paths = self._extract_changed_paths(run_summary)
+ error_reason = (
+ self._normalize_text(task.error)
+ or self._extract_run_summary_error(run_summary)
+ or self._normalize_text(dispatch.error)
+ )
+ contexts.append(
+ _AuditEntryContext(
+ entry=AdminAgentAuditTrailEntryRead(
+ entry_id=f"manager:{task.task_id}",
+ source="manager_task",
+ agent_role=AdminAgentAuditRole.MANAGER,
+ run_id=self._extract_task_run_id(task, fallback=task.task_id),
+ agent_id=run_summary.driver_result.agent_id if run_summary is not None else "openhands",
+ title=task.title,
+ status=task.status.value,
+ final_status=run_summary.final_status if run_summary is not None else None,
+ recorded_at=dispatch.updated_at,
+ duration_ms=self._extract_duration_ms(run_summary),
+ first_progress_ms=self._extract_metric(run_summary, "first_progress_ms"),
+ first_scoped_write_ms=self._extract_metric(run_summary, "first_scoped_write_ms"),
+ first_state_heartbeat_ms=self._extract_metric(run_summary, "first_state_heartbeat_ms"),
+ files_changed=len(changed_paths),
+ changed_paths=changed_paths,
+ scope_paths=list(task.worker_spec.allowed_paths) if task.worker_spec is not None else [],
+ patch_uri=patch_uri,
+ summary=task.summary,
+ metadata={
+ "dispatch_id": dispatch.dispatch_id,
+ "intent": dispatch.selected_intent.label if dispatch.selected_intent is not None else None,
+ "stage": task.stage.value,
+ "depends_on": list(task.depends_on),
+ },
+ ),
+ input_prompt=dispatch.prompt,
+ job_spec=self._model_payload(task.agent_job),
+ worker_spec=self._model_payload(task.worker_spec),
+ controlled_request=self._model_payload(task.controlled_request),
+ patch_text=patch_text,
+ patch_truncated=patch_truncated,
+ error_reason=error_reason,
+ traceback=self._multiline_or_none(task.error),
+ raw_record={"manager_dispatch": dispatch.model_dump(mode="json")},
+ )
+ )
+ return contexts
+
+ def _collect_planner_contexts(self) -> list[_AuditEntryContext]:
+ contexts: list[_AuditEntryContext] = []
+ for plan in self._planner_service.list():
+ run_summary = plan.run_summary
+ dispatch_run = plan.dispatch_run
+ patch_uri = run_summary.promotion_patch_uri if run_summary is not None else None
+ patch_text, patch_truncated = self._load_patch_text(patch_uri)
+ changed_paths = self._extract_changed_paths(run_summary)
+ title = plan.selected_candidate.title if plan.selected_candidate is not None else plan.goal
+ contexts.append(
+ _AuditEntryContext(
+ entry=AdminAgentAuditTrailEntryRead(
+ entry_id=f"plan:{plan.plan_id}",
+ source="autoresearch_plan",
+ agent_role=AdminAgentAuditRole.PLANNER,
+ run_id=run_summary.run_id if run_summary is not None else (plan.agent_job.run_id if plan.agent_job else plan.plan_id),
+ agent_id=run_summary.driver_result.agent_id if run_summary is not None else "openhands",
+ title=title,
+ status=plan.dispatch_status.value,
+ final_status=run_summary.final_status if run_summary is not None else None,
+ recorded_at=plan.dispatch_completed_at or plan.updated_at,
+ duration_ms=self._extract_duration_ms(run_summary),
+ first_progress_ms=self._extract_metric(run_summary, "first_progress_ms"),
+ first_scoped_write_ms=self._extract_metric(run_summary, "first_scoped_write_ms"),
+ first_state_heartbeat_ms=self._extract_metric(run_summary, "first_state_heartbeat_ms"),
+ files_changed=len(changed_paths),
+ changed_paths=changed_paths,
+ scope_paths=list(plan.worker_spec.allowed_paths) if plan.worker_spec is not None else [],
+ patch_uri=patch_uri,
+ summary=plan.summary,
+ metadata={
+ "plan_id": plan.plan_id,
+ "candidate_category": (
+ plan.selected_candidate.category if plan.selected_candidate is not None else None
+ ),
+ "source_path": (
+ plan.selected_candidate.source_path if plan.selected_candidate is not None else None
+ ),
+ "dispatch_requested_lane": (
+ dispatch_run.requested_lane.value if dispatch_run is not None else None
+ ),
+ "dispatch_lane": dispatch_run.lane.value if dispatch_run is not None else None,
+ "dispatch_remote_status": (
+ dispatch_run.status.value if dispatch_run is not None else None
+ ),
+ "dispatch_failure_class": (
+ dispatch_run.failure_class.value
+ if dispatch_run is not None and dispatch_run.failure_class is not None
+ else None
+ ),
+ "dispatch_recovery_action": (
+ dispatch_run.recovery_action.value
+ if dispatch_run is not None and dispatch_run.recovery_action is not None
+ else None
+ ),
+ "dispatch_fallback_reason": (
+ dispatch_run.fallback_reason if dispatch_run is not None else None
+ ),
+ },
+ ),
+ input_prompt=plan.goal,
+ job_spec=self._model_payload(plan.agent_job),
+ worker_spec=self._model_payload(plan.worker_spec),
+ controlled_request=self._model_payload(plan.controlled_request),
+ patch_text=patch_text,
+ patch_truncated=patch_truncated,
+ error_reason=(
+ self._normalize_text(plan.dispatch_error)
+ or self._normalize_text(plan.error)
+ or self._extract_run_summary_error(run_summary)
+ ),
+ traceback=self._multiline_or_none(plan.error),
+ raw_record={"autoresearch_plan": plan.model_dump(mode="json")},
+ )
+ )
+ return contexts
+
+ def _collect_claude_contexts(self) -> list[_AuditEntryContext]:
+ contexts: list[_AuditEntryContext] = []
+ for run in self._agent_service.list():
+ contexts.append(
+ _AuditEntryContext(
+ entry=AdminAgentAuditTrailEntryRead(
+ entry_id=f"claude:{run.agent_run_id}",
+ source="claude_agent",
+ agent_role=AdminAgentAuditRole.WORKER,
+ run_id=run.agent_run_id,
+ agent_id=run.agent_name or "claude_cli",
+ title=run.task_name,
+ status=run.status.value,
+ final_status=(
+ run.status.value
+ if run.status in {JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.INTERRUPTED}
+ else None
+ ),
+ recorded_at=run.updated_at,
+ duration_ms=int(run.duration_seconds * 1000) if run.duration_seconds is not None else None,
+ first_progress_ms=None,
+ first_scoped_write_ms=None,
+ first_state_heartbeat_ms=None,
+ files_changed=0,
+ changed_paths=[],
+ scope_paths=[],
+ patch_uri=self._normalize_text(run.metadata.get("patch_uri")),
+ summary=run.stderr_preview or run.stdout_preview or run.prompt[:160],
+ metadata={"session_id": run.session_id, "parent_agent_id": run.parent_agent_id},
+ ),
+ input_prompt=run.prompt,
+ patch_text=self._load_patch_text(self._normalize_text(run.metadata.get("patch_uri")))[0]
+ if self._normalize_text(run.metadata.get("patch_uri"))
+ else "",
+ patch_truncated=self._load_patch_text(self._normalize_text(run.metadata.get("patch_uri")))[1]
+ if self._normalize_text(run.metadata.get("patch_uri"))
+ else False,
+ error_reason=self._normalize_text(run.error),
+ traceback=self._normalize_text(run.stderr_preview),
+ raw_record={"claude_agent": run.model_dump(mode="json")},
+ )
+ )
+ return contexts
+
+ def _collect_runtime_contexts(self) -> list[_AuditEntryContext]:
+ contexts: list[_AuditEntryContext] = []
+ for path in self._runtime_summary_files():
+ payload = self._load_json(path)
+ if not isinstance(payload, dict):
+ continue
+ run_id = str(payload.get("run_id", "")).strip()
+ if not run_id:
+ continue
+ changed_paths = self._runtime_changed_paths(payload)
+ patch_uri = self._runtime_patch_uri(payload)
+ patch_text, patch_truncated = self._load_patch_text(patch_uri)
+ contexts.append(
+ _AuditEntryContext(
+ entry=AdminAgentAuditTrailEntryRead(
+ entry_id=f"runtime:{run_id}",
+ source="runtime_artifact",
+ agent_role=AdminAgentAuditRole.WORKER,
+ run_id=run_id,
+ agent_id=self._runtime_agent_id(payload),
+ title=str(payload.get("task") or payload.get("run_id") or path.parent.name),
+ status=self._runtime_status(payload),
+ final_status=str(payload.get("final_status") or payload.get("status") or "").strip() or None,
+ recorded_at=datetime.fromtimestamp(path.stat().st_mtime, tz=utc_now().tzinfo),
+ duration_ms=self._runtime_duration_ms(payload),
+ first_progress_ms=self._runtime_metric_ms(payload, "first_progress_ms"),
+ first_scoped_write_ms=self._runtime_metric_ms(payload, "first_scoped_write_ms"),
+ first_state_heartbeat_ms=self._runtime_metric_ms(payload, "first_state_heartbeat_ms"),
+ files_changed=self._runtime_files_changed(payload, changed_paths),
+ changed_paths=changed_paths,
+ scope_paths=[],
+ patch_uri=patch_uri,
+ isolated_workspace=str(payload.get("isolated_workspace", "")).strip() or None,
+ summary=self._runtime_summary_text(payload),
+ metadata={"artifact_path": str(path)},
+ ),
+ input_prompt=self._normalize_text(payload.get("task")),
+ job_spec=self._dict_payload(payload.get("job_spec")),
+ worker_spec=self._dict_payload(payload.get("worker_spec")),
+ controlled_request=self._dict_payload(payload.get("controlled_request")),
+ patch_text=patch_text,
+ patch_truncated=patch_truncated,
+ error_reason=self._runtime_error_reason(payload),
+ traceback=self._runtime_traceback(payload),
+ raw_record={"runtime_artifact": payload},
+ )
+ )
+ return contexts
+
+ def _filter_contexts(
+ self,
+ contexts: list[_AuditEntryContext],
+ *,
+ status_filter: str | None,
+ agent_role: str | None,
+ ) -> list[_AuditEntryContext]:
+ normalized_status = self._normalize_filter(status_filter)
+ normalized_role = self._normalize_filter(agent_role)
+ filtered: list[_AuditEntryContext] = []
+ for context in contexts:
+ if not self._matches_status_filter(context.entry, normalized_status):
+ continue
+ if not self._matches_role_filter(context.entry, normalized_role):
+ continue
+ filtered.append(context)
+ return filtered
+
+ def _merge_contexts(
+ self,
+ *,
+ existing: _AuditEntryContext,
+ incoming: _AuditEntryContext,
+ ) -> _AuditEntryContext:
+ return _AuditEntryContext(
+ entry=existing.entry.model_copy(
+ update={
+ "duration_ms": self._prefer_metric(
+ existing.entry.duration_ms,
+ incoming.entry.duration_ms,
+ ),
+ "first_progress_ms": self._prefer_metric(
+ existing.entry.first_progress_ms,
+ incoming.entry.first_progress_ms,
+ ),
+ "first_scoped_write_ms": self._prefer_metric(
+ existing.entry.first_scoped_write_ms,
+ incoming.entry.first_scoped_write_ms,
+ ),
+ "first_state_heartbeat_ms": self._prefer_metric(
+ existing.entry.first_state_heartbeat_ms,
+ incoming.entry.first_state_heartbeat_ms,
+ ),
+ "files_changed": max(existing.entry.files_changed, incoming.entry.files_changed),
+ "changed_paths": existing.entry.changed_paths or incoming.entry.changed_paths,
+ "patch_uri": existing.entry.patch_uri or incoming.entry.patch_uri,
+ "isolated_workspace": existing.entry.isolated_workspace or incoming.entry.isolated_workspace,
+ "summary": existing.entry.summary or incoming.entry.summary,
+ "metadata": {**incoming.entry.metadata, **existing.entry.metadata},
+ }
+ ),
+ input_prompt=existing.input_prompt or incoming.input_prompt,
+ job_spec=existing.job_spec or incoming.job_spec,
+ worker_spec=existing.worker_spec or incoming.worker_spec,
+ controlled_request=existing.controlled_request or incoming.controlled_request,
+ patch_text=existing.patch_text or incoming.patch_text,
+ patch_truncated=existing.patch_truncated or incoming.patch_truncated,
+ error_reason=existing.error_reason or incoming.error_reason,
+ traceback=existing.traceback or incoming.traceback,
+ raw_record={**incoming.raw_record, **existing.raw_record},
+ )
+
+ def _build_stats(self, items: list[AdminAgentAuditTrailEntryRead]) -> AdminAgentAuditTrailStatsRead:
+ stats = AdminAgentAuditTrailStatsRead(total=len(items))
+ for item in items:
+ normalized = self._status_bucket(item)
+ if normalized == "success":
+ stats.succeeded += 1
+ elif normalized == "failed":
+ stats.failed += 1
+ elif normalized == "running":
+ stats.running += 1
+ elif normalized == "pending":
+ stats.queued += 1
+ elif normalized == "review":
+ stats.review_required += 1
+ return stats
+
+ def _runtime_summary_files(self) -> list[Path]:
+ files: list[Path] = []
+ for pattern in (
+ ".masfactory_runtime/runs/*/summary.json",
+ ".masfactory_runtime/smokes/*/artifacts/chain_summary.json",
+ "logs/audit/openhands/jobs/*/chain_summary.json",
+ ):
+ files.extend(self._repo_root.glob(pattern))
+ files.sort(key=lambda item: item.stat().st_mtime, reverse=True)
+ return files[:80]
+
+ def _load_patch_text(self, patch_uri: str | None) -> tuple[str, bool]:
+ patch_path = self._resolve_repo_path(patch_uri)
+ if patch_path is None or not patch_path.exists() or not patch_path.is_file():
+ return "", False
+ try:
+ patch_text = patch_path.read_text(encoding="utf-8", errors="replace")
+ except OSError:
+ return "", False
+ if len(patch_text) <= _MAX_PATCH_CHARS:
+ return patch_text, False
+ truncated = patch_text[:_MAX_PATCH_CHARS].rstrip()
+ return f"{truncated}\n\n... [patch truncated]", True
+
+ def _resolve_repo_path(self, candidate: str | None) -> Path | None:
+ normalized = self._normalize_text(candidate)
+ if not normalized:
+ return None
+ path = Path(normalized)
+ if path.is_absolute():
+ return path
+ return (self._repo_root / path).resolve()
+
+ @staticmethod
+ def _normalize_filter(value: str | None) -> str | None:
+ normalized = str(value or "").strip().lower()
+ return None if normalized in {"", "all"} else normalized
+
+ @staticmethod
+ def _prefer_metric(primary: int | None, secondary: int | None) -> int | None:
+ return primary if primary is not None else secondary
+
+ @staticmethod
+ def _matches_role_filter(entry: AdminAgentAuditTrailEntryRead, agent_role: str | None) -> bool:
+ if agent_role is None:
+ return True
+ return entry.agent_role.value == agent_role
+
+ def _matches_status_filter(self, entry: AdminAgentAuditTrailEntryRead, status_filter: str | None) -> bool:
+ if status_filter is None:
+ return True
+ return self._status_bucket(entry) == status_filter
+
+ def _status_bucket(self, entry: AdminAgentAuditTrailEntryRead) -> str:
+ normalized = (entry.final_status or entry.status).strip().lower()
+ if normalized in _SUCCESS_STATUSES:
+ return "success"
+ if normalized in _FAILED_STATUSES:
+ return "failed"
+ if normalized in _RUNNING_STATUSES:
+ return "running"
+ if normalized in _PENDING_STATUSES:
+ return "pending"
+ if normalized in _REVIEW_STATUSES:
+ return "review"
+ return normalized or "pending"
+
+ @staticmethod
+ def _extract_task_run_id(task: Any, *, fallback: str) -> str:
+ if getattr(task, "run_summary", None) is not None:
+ return task.run_summary.run_id
+ if getattr(task, "agent_job", None) is not None:
+ return task.agent_job.run_id
+ return fallback
+
+ @staticmethod
+ def _extract_changed_paths(run_summary: Any) -> list[str]:
+ if run_summary is None:
+ return []
+ return list(run_summary.driver_result.changed_paths)
+
+ @staticmethod
+ def _extract_duration_ms(run_summary: Any) -> int | None:
+ if run_summary is None:
+ return None
+ return run_summary.driver_result.metrics.duration_ms
+
+ @staticmethod
+ def _extract_metric(run_summary: Any, metric_name: str) -> int | None:
+ if run_summary is None:
+ return None
+ value = getattr(run_summary.driver_result.metrics, metric_name, None)
+ return int(value) if isinstance(value, (int, float)) else None
+
+ @staticmethod
+ def _extract_run_summary_error(run_summary: Any) -> str | None:
+ if run_summary is None:
+ return None
+ return str(run_summary.driver_result.error or "").strip() or None
+
+ @staticmethod
+ def _model_payload(model: Any) -> dict[str, Any]:
+ if model is None:
+ return {}
+ if hasattr(model, "model_dump"):
+ return model.model_dump(mode="json")
+ if isinstance(model, dict):
+ return dict(model)
+ return {"value": model}
+
+ @staticmethod
+ def _dict_payload(value: Any) -> dict[str, Any]:
+ if isinstance(value, dict):
+ return dict(value)
+ return {}
+
+ @staticmethod
+ def _load_json(path: Path) -> Any:
+ try:
+ return json.loads(path.read_text(encoding="utf-8"))
+ except (OSError, json.JSONDecodeError, UnicodeDecodeError):
+ return None
+
+ @staticmethod
+ def _normalize_text(value: Any) -> str | None:
+ normalized = str(value or "").strip()
+ return normalized or None
+
+ @staticmethod
+ def _multiline_or_none(value: Any) -> str | None:
+ normalized = str(value or "").strip()
+ if "\n" not in normalized:
+ return None
+ return normalized
+
+ @staticmethod
+ def _runtime_status(payload: dict[str, Any]) -> str:
+ if "promotion_ready" in payload:
+ return "ready_for_promotion" if bool(payload.get("promotion_ready")) else "failed"
+ return str(payload.get("final_status") or payload.get("status") or "unknown")
+
+ @staticmethod
+ def _runtime_agent_id(payload: dict[str, Any]) -> str | None:
+ driver_result = payload.get("driver_result")
+ if isinstance(driver_result, dict):
+ agent_id = str(driver_result.get("agent_id", "")).strip()
+ if agent_id:
+ return agent_id
+ return "openhands"
+
+ @staticmethod
+ def _runtime_duration_ms(payload: dict[str, Any]) -> int | None:
+ return AgentAuditTrailService._runtime_metric_ms(payload, "duration_ms")
+
+ @staticmethod
+ def _runtime_metric_ms(payload: dict[str, Any], metric_name: str) -> int | None:
+ driver_result = payload.get("driver_result")
+ if not isinstance(driver_result, dict):
+ return None
+ metrics = driver_result.get("metrics")
+ if not isinstance(metrics, dict):
+ return None
+ value = metrics.get(metric_name)
+ return int(value) if isinstance(value, (int, float)) else None
+
+ @staticmethod
+ def _runtime_changed_paths(payload: dict[str, Any]) -> list[str]:
+ promotion = payload.get("promotion")
+ if isinstance(promotion, dict):
+ changed = promotion.get("changed_files")
+ if isinstance(changed, list):
+ return [str(item) for item in changed]
+ driver_result = payload.get("driver_result")
+ if isinstance(driver_result, dict):
+ changed = driver_result.get("changed_paths")
+ if isinstance(changed, list):
+ return [str(item) for item in changed]
+ return []
+
+ @staticmethod
+ def _runtime_files_changed(payload: dict[str, Any], changed_paths: list[str]) -> int:
+ promotion = payload.get("promotion")
+ if isinstance(promotion, dict):
+ diff_stats = promotion.get("diff_stats")
+ if isinstance(diff_stats, dict):
+ value = diff_stats.get("files_changed")
+ if isinstance(value, (int, float)):
+ return int(value)
+ return len(changed_paths)
+
+ @staticmethod
+ def _runtime_patch_uri(payload: dict[str, Any]) -> str | None:
+ for candidate in (
+ payload.get("promotion_patch_uri"),
+ (payload.get("artifacts") or {}).get("promotion_patch")
+ if isinstance(payload.get("artifacts"), dict)
+ else None,
+ (payload.get("promotion") or {}).get("patch_uri")
+ if isinstance(payload.get("promotion"), dict)
+ else None,
+ ):
+ normalized = str(candidate or "").strip()
+ if normalized:
+ return normalized
+ return None
+
+ @staticmethod
+ def _runtime_summary_text(payload: dict[str, Any]) -> str:
+ driver_result = payload.get("driver_result")
+ if isinstance(driver_result, dict):
+ summary = str(driver_result.get("summary", "")).strip()
+ if summary:
+ return summary
+ return str(payload.get("task") or payload.get("status") or "").strip()
+
+ def _runtime_error_reason(self, payload: dict[str, Any]) -> str | None:
+ for candidate in (
+ payload.get("error"),
+ payload.get("detail"),
+ (payload.get("driver_result") or {}).get("error")
+ if isinstance(payload.get("driver_result"), dict)
+ else None,
+ self._runtime_status(payload) if self._runtime_status(payload) in _FAILED_STATUSES | _REVIEW_STATUSES else None,
+ ):
+ normalized = self._normalize_text(candidate)
+ if normalized:
+ return normalized
+ return None
+
+ def _runtime_traceback(self, payload: dict[str, Any]) -> str | None:
+ for candidate in (
+ payload.get("traceback"),
+ payload.get("stderr"),
+ (payload.get("driver_result") or {}).get("stderr")
+ if isinstance(payload.get("driver_result"), dict)
+ else None,
+ (payload.get("validation") or {}).get("detail")
+ if isinstance(payload.get("validation"), dict)
+ else None,
+ ):
+ normalized = self._normalize_text(candidate)
+ if normalized:
+ return normalized
+ return None
diff --git a/src/autoresearch/core/services/autoresearch_planner.py b/src/autoresearch/core/services/autoresearch_planner.py
new file mode 100644
index 00000000..abd038ab
--- /dev/null
+++ b/src/autoresearch/core/services/autoresearch_planner.py
@@ -0,0 +1,826 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+import os
+from pathlib import Path
+import re
+from typing import Callable
+
+from autoresearch.agent_protocol.models import JobSpec, RunSummary
+from autoresearch.core.dispatch.fake_remote_adapter import FakeRemoteAdapter
+from autoresearch.core.dispatch.failure_classifier import classify_remote_terminal
+from autoresearch.core.dispatch.remote_adapter import RemoteDispatchAdapter
+from autoresearch.core.runtime.select_mode import SelectedRuntimeMode, select_mode
+from autoresearch.core.services.writer_lease import WriterLeaseService
+from autoresearch.executions.runner import AgentExecutionRunner
+from autoresearch.core.services.openhands_worker import OpenHandsWorkerService
+from autoresearch.core.services.upstream_watcher import UpstreamWatcherService
+from autoresearch.shared.autoresearch_planner_contract import (
+ AutoResearchPlanRead,
+ AutoResearchPlanDispatchStatus,
+ AutoResearchPlannerCandidateRead,
+ AutoResearchPlannerEvidenceRead,
+ AutoResearchPlannerRequest,
+ UpstreamWatchDecision,
+ UpstreamWatchRead,
+)
+from autoresearch.shared.models import GitPromotionMode, JobStatus, utc_now
+from autoresearch.shared.openhands_worker_contract import OpenHandsWorkerJobSpec
+from autoresearch.shared.remote_run_contract import (
+ RemoteRunRecord,
+ RemoteRunStatus,
+ RemoteRunSummary,
+ RemoteTaskSpec,
+)
+from autoresearch.shared.store import Repository, create_resource_id
+
+
+_IGNORED_PATH_PARTS = {
+ ".git",
+ ".venv",
+ "__pycache__",
+ ".mypy_cache",
+ ".pytest_cache",
+ ".ruff_cache",
+ ".masfactory_runtime",
+ "node_modules",
+ "panel",
+ "dashboard",
+ "memory",
+ "logs",
+}
+_MARKER_WEIGHTS = {
+ "BUG": 65.0,
+ "FIXME": 60.0,
+ "XXX": 50.0,
+ "HACK": 45.0,
+ "TODO": 35.0,
+}
+_MARKER_PATTERN = re.compile(r"\b(BUG|FIXME|XXX|HACK|TODO)\b[:\s-]*(.*)")
+_CRITICAL_PREFIXES = (
+ "src/autoresearch/core/services/",
+ "src/autoresearch/executions/",
+ "src/autoresearch/api/",
+ "scripts/",
+)
+
+
+@dataclass(slots=True)
+class _MarkerOccurrence:
+ marker: str
+ line: int
+ detail: str
+ weight: float
+
+
+class AutoResearchPlannerService:
+ """Scan the repository for bounded patch-only work and emit worker-ready specs."""
+
+ _TERMINAL_REMOTE_STATUSES = {
+ RemoteRunStatus.SUCCEEDED,
+ RemoteRunStatus.FAILED,
+ RemoteRunStatus.STALLED,
+ RemoteRunStatus.TIMED_OUT,
+ }
+ _MAX_DISPATCH_POLLS = 8
+
+ def __init__(
+ self,
+ repository: Repository[AutoResearchPlanRead],
+ *,
+ repo_root: Path | None = None,
+ worker_service: OpenHandsWorkerService | None = None,
+ dispatch_runner: Callable[[JobSpec], RunSummary] | None = None,
+ remote_adapter: RemoteDispatchAdapter | None = None,
+ writer_lease: WriterLeaseService | None = None,
+ upstream_watcher: UpstreamWatcherService | None = None,
+ ) -> None:
+ self._repository = repository
+ self._repo_root = (repo_root or Path(__file__).resolve().parents[4]).resolve()
+ self._worker_service = worker_service or OpenHandsWorkerService()
+ self._dispatch_runner = dispatch_runner or self._default_dispatch_runner
+ self._remote_adapter = remote_adapter or FakeRemoteAdapter(
+ repo_root=self._repo_root,
+ local_runner=self._dispatch_runner,
+ )
+ self._writer_lease = writer_lease or WriterLeaseService()
+ self._upstream_watcher = upstream_watcher
+
+ def create(self, request: AutoResearchPlannerRequest) -> AutoResearchPlanRead:
+ now = utc_now()
+ plan_id = create_resource_id("plan")
+ upstream_watch = self._inspect_upstream(request)
+ try:
+ candidates = self._scan_candidates(limit=request.max_candidates)
+ selected = candidates[0] if candidates else None
+
+ worker_spec = None
+ controlled_request = None
+ agent_job = None
+ summary = "Planner scanned the repo but did not find a safe patch-only candidate."
+ if selected is not None:
+ worker_spec = self._build_worker_spec(
+ plan_id=plan_id,
+ candidate=selected,
+ request=request,
+ )
+ controlled_request = self._worker_service.build_controlled_request(worker_spec)
+ agent_job = self._worker_service.build_agent_job_spec(worker_spec)
+ summary = (
+ f"Selected {selected.title} from {len(candidates)} candidate(s); "
+ f"score={selected.priority_score:.1f}."
+ )
+ summary = self._augment_summary_with_upstream_watch(summary, upstream_watch)
+
+ plan = AutoResearchPlanRead(
+ plan_id=plan_id,
+ goal=request.goal,
+ status=JobStatus.COMPLETED,
+ summary=summary,
+ created_at=now,
+ updated_at=now,
+ selected_candidate=selected,
+ candidates=candidates,
+ worker_spec=worker_spec,
+ controlled_request=controlled_request,
+ agent_job=agent_job,
+ upstream_watch=upstream_watch,
+ telegram_uid=request.telegram_uid,
+ panel_action_url=None,
+ notification_sent=False,
+ dispatch_status=AutoResearchPlanDispatchStatus.PENDING,
+ dispatch_requested_at=None,
+ dispatch_completed_at=None,
+ dispatch_requested_by=None,
+ dispatch_run=None,
+ run_summary=None,
+ dispatch_error=None,
+ metadata={
+ **request.metadata,
+ "repo_root": str(self._repo_root),
+ "pipeline_target": request.pipeline_target,
+ "target_base_branch": request.target_base_branch,
+ },
+ error=None,
+ )
+ except Exception as exc:
+ plan = AutoResearchPlanRead(
+ plan_id=plan_id,
+ goal=request.goal,
+ status=JobStatus.FAILED,
+ summary="Planner scan failed.",
+ created_at=now,
+ updated_at=now,
+ selected_candidate=None,
+ candidates=[],
+ worker_spec=None,
+ controlled_request=None,
+ agent_job=None,
+ upstream_watch=upstream_watch,
+ telegram_uid=request.telegram_uid,
+ panel_action_url=None,
+ notification_sent=False,
+ dispatch_status=AutoResearchPlanDispatchStatus.FAILED,
+ dispatch_requested_at=None,
+ dispatch_completed_at=None,
+ dispatch_requested_by=None,
+ dispatch_run=None,
+ run_summary=None,
+ dispatch_error=None,
+ metadata={
+ **request.metadata,
+ "repo_root": str(self._repo_root),
+ "pipeline_target": request.pipeline_target,
+ "target_base_branch": request.target_base_branch,
+ },
+ error=str(exc),
+ )
+
+ return self._repository.save(plan.plan_id, plan)
+
+ def list(self) -> list[AutoResearchPlanRead]:
+ return self._repository.list()
+
+ def get(self, plan_id: str) -> AutoResearchPlanRead | None:
+ return self._repository.get(plan_id)
+
+ def list_pending(self, *, telegram_uid: str | None = None, limit: int = 20) -> list[AutoResearchPlanRead]:
+ normalized_uid = (telegram_uid or "").strip() or None
+ items: list[AutoResearchPlanRead] = []
+ for item in self._repository.list():
+ if item.dispatch_status is not AutoResearchPlanDispatchStatus.PENDING:
+ continue
+ if normalized_uid is not None and item.telegram_uid not in {None, normalized_uid}:
+ continue
+ items.append(item)
+ items.sort(key=lambda item: item.updated_at, reverse=True)
+ return items[: max(1, limit)]
+
+ def update_delivery(
+ self,
+ plan_id: str,
+ *,
+ telegram_uid: str | None,
+ panel_action_url: str | None,
+ notification_sent: bool,
+ ) -> AutoResearchPlanRead:
+ with self._writer_lease.acquire(f"autoresearch-plan:{plan_id}"):
+ plan = self._require_plan(plan_id)
+ updated = plan.model_copy(
+ update={
+ "telegram_uid": (telegram_uid or "").strip() or plan.telegram_uid,
+ "panel_action_url": panel_action_url,
+ "notification_sent": notification_sent,
+ "updated_at": utc_now(),
+ }
+ )
+ return self._repository.save(updated.plan_id, updated)
+
+ def request_dispatch(
+ self,
+ plan_id: str,
+ *,
+ requested_by: str,
+ ) -> AutoResearchPlanRead:
+ with self._writer_lease.acquire(f"autoresearch-plan:{plan_id}"):
+ plan = self._require_plan(plan_id)
+ if plan.worker_spec is None or plan.agent_job is None:
+ raise ValueError("plan does not have a dispatchable worker contract")
+ if plan.dispatch_status is AutoResearchPlanDispatchStatus.DISPATCHING:
+ raise ValueError("plan is already dispatching")
+ if plan.dispatch_status is AutoResearchPlanDispatchStatus.DISPATCHED:
+ raise ValueError("plan has already been dispatched")
+
+ dispatch_job, selected_mode, task_spec = self._prepare_dispatch(plan)
+ now = utc_now()
+ updated = plan.model_copy(
+ update={
+ "agent_job": dispatch_job,
+ "dispatch_status": AutoResearchPlanDispatchStatus.DISPATCHING,
+ "dispatch_requested_at": now,
+ "dispatch_requested_by": requested_by.strip(),
+ "dispatch_completed_at": None,
+ "dispatch_run": self._queued_dispatch_run(
+ task_spec=task_spec,
+ selected_mode=selected_mode,
+ ),
+ "dispatch_error": None,
+ "updated_at": now,
+ "metadata": {
+ **plan.metadata,
+ "dispatch_requested_by": requested_by.strip(),
+ },
+ }
+ )
+ return self._repository.save(updated.plan_id, updated)
+
+ def execute_dispatch(self, plan_id: str) -> AutoResearchPlanRead:
+ plan = self._require_plan(plan_id)
+ if plan.worker_spec is None:
+ raise ValueError("plan does not have a worker spec")
+
+ dispatch_job, _, task_spec = self._prepare_dispatch(plan)
+ try:
+ self._remote_adapter.dispatch(task_spec)
+ remote_summary = self._await_remote_summary(task_spec)
+ except Exception as exc:
+ remote_summary = self._dispatch_exception_summary(task_spec, exc)
+ with self._writer_lease.acquire(f"autoresearch-plan:{plan_id}"):
+ current = self._require_plan(plan_id)
+ updated = current.model_copy(
+ update={
+ "agent_job": dispatch_job,
+ "dispatch_status": self._derive_dispatch_status(remote_summary),
+ "dispatch_completed_at": utc_now(),
+ "dispatch_run": self._record_from_summary(remote_summary),
+ "run_summary": remote_summary.run_summary,
+ "dispatch_error": self._dispatch_error_from_summary(remote_summary),
+ "updated_at": utc_now(),
+ }
+ )
+ return self._repository.save(updated.plan_id, updated)
+
+ with self._writer_lease.acquire(f"autoresearch-plan:{plan_id}"):
+ current = self._require_plan(plan_id)
+ updated = current.model_copy(
+ update={
+ "agent_job": dispatch_job,
+ "dispatch_status": self._derive_dispatch_status(remote_summary),
+ "dispatch_completed_at": utc_now(),
+ "dispatch_run": self._record_from_summary(remote_summary),
+ "run_summary": remote_summary.run_summary,
+ "dispatch_error": self._dispatch_error_from_summary(remote_summary),
+ "updated_at": utc_now(),
+ }
+ )
+ return self._repository.save(updated.plan_id, updated)
+
+ def _scan_candidates(self, *, limit: int) -> list[AutoResearchPlannerCandidateRead]:
+ candidates: list[AutoResearchPlannerCandidateRead] = []
+ marker_candidates = self._marker_candidates()
+ candidate_index = {
+ (candidate.category, candidate.source_path): candidate for candidate in marker_candidates
+ }
+ candidates.extend(marker_candidates)
+
+ for candidate in self._test_gap_candidates():
+ key = (candidate.category, candidate.source_path)
+ if key in candidate_index:
+ continue
+ candidates.append(candidate)
+ candidate_index[key] = candidate
+
+ candidates.sort(
+ key=lambda item: (
+ -item.priority_score,
+ item.category,
+ item.source_path,
+ )
+ )
+ return candidates[:limit]
+
+ def _marker_candidates(self) -> list[AutoResearchPlannerCandidateRead]:
+ candidates: list[AutoResearchPlannerCandidateRead] = []
+ for rel_path in self._iter_python_files():
+ occurrences = self._find_markers(rel_path)
+ if not occurrences:
+ continue
+ suggested_tests = self._infer_test_paths(rel_path)
+ score = max(item.weight for item in occurrences)
+ score += min(18.0, (len(occurrences) - 1) * 6.0)
+ score += self._criticality_bonus(rel_path)
+ if not self._has_existing_test(suggested_tests):
+ score += 15.0
+
+ first = occurrences[0]
+ marker_list = ", ".join(item.marker for item in occurrences[:3])
+ evidence = [
+ AutoResearchPlannerEvidenceRead(
+ kind="marker",
+ path=rel_path,
+ line=item.line,
+ detail=f"{item.marker}: {item.detail}".strip(),
+ weight=item.weight,
+ )
+ for item in occurrences[:5]
+ ]
+ if self._criticality_bonus(rel_path) > 0:
+ evidence.append(
+ AutoResearchPlannerEvidenceRead(
+ kind="hotspot",
+ path=rel_path,
+ detail="critical control-plane hotspot",
+ weight=self._criticality_bonus(rel_path),
+ )
+ )
+ if not self._has_existing_test(suggested_tests):
+ evidence.append(
+ AutoResearchPlannerEvidenceRead(
+ kind="test_gap",
+ path=suggested_tests[0],
+ detail="direct regression test is missing and should be added",
+ weight=15.0,
+ )
+ )
+
+ candidates.append(
+ AutoResearchPlannerCandidateRead(
+ candidate_id=create_resource_id("candidate"),
+ title=f"Resolve {first.marker} backlog in {rel_path}",
+ summary=(
+ f"Address {marker_list} markers in {rel_path} and keep the patch focused "
+ f"to the source file plus a targeted regression test."
+ ),
+ category="marker_backlog",
+ priority_score=round(score, 1),
+ source_path=rel_path,
+ allowed_paths=[rel_path, *suggested_tests],
+ suggested_test_paths=suggested_tests,
+ test_command=self._build_test_command(rel_path, suggested_tests),
+ evidence=evidence,
+ metadata={
+ "marker_count": len(occurrences),
+ "primary_marker": first.marker,
+ },
+ )
+ )
+ return candidates
+
+ def _test_gap_candidates(self) -> list[AutoResearchPlannerCandidateRead]:
+ candidates: list[AutoResearchPlannerCandidateRead] = []
+ for rel_path in self._iter_python_files():
+ if rel_path.startswith("tests/"):
+ continue
+ if Path(rel_path).name == "__init__.py":
+ continue
+ line_count = self._count_lines(rel_path)
+ if line_count < 120:
+ continue
+
+ suggested_tests = self._infer_test_paths(rel_path)
+ if self._has_existing_test(suggested_tests):
+ continue
+
+ score = 40.0 + min(20.0, line_count / 20.0)
+ score += self._criticality_bonus(rel_path)
+ candidates.append(
+ AutoResearchPlannerCandidateRead(
+ candidate_id=create_resource_id("candidate"),
+ title=f"Add focused regression coverage for {rel_path}",
+ summary=(
+ f"{rel_path} is relatively large and lacks a direct regression test. "
+ "Add a focused test while keeping source changes minimal."
+ ),
+ category="test_gap",
+ priority_score=round(score, 1),
+ source_path=rel_path,
+ allowed_paths=[rel_path, *suggested_tests],
+ suggested_test_paths=suggested_tests,
+ test_command=self._build_test_command(rel_path, suggested_tests),
+ evidence=[
+ AutoResearchPlannerEvidenceRead(
+ kind="test_gap",
+ path=suggested_tests[0],
+ detail="no direct test file found for this source hotspot",
+ weight=25.0,
+ ),
+ AutoResearchPlannerEvidenceRead(
+ kind="hotspot",
+ path=rel_path,
+ detail=f"file has {line_count} lines",
+ weight=min(20.0, line_count / 20.0),
+ ),
+ ],
+ metadata={
+ "line_count": line_count,
+ },
+ )
+ )
+ return candidates
+
+ def _build_worker_spec(
+ self,
+ *,
+ plan_id: str,
+ candidate: AutoResearchPlannerCandidateRead,
+ request: AutoResearchPlannerRequest,
+ ) -> OpenHandsWorkerJobSpec:
+ slug = self._slugify(candidate.source_path)
+ branch_suffix = candidate.candidate_id.split("_")[-1]
+ problem_statement = (
+ f"{candidate.summary}\n\n"
+ f"Goal: {request.goal}\n"
+ f"Selected source: {candidate.source_path}\n"
+ f"Primary evidence: {candidate.evidence[0].detail if candidate.evidence else 'n/a'}"
+ )
+ return OpenHandsWorkerJobSpec(
+ job_id=f"{plan_id}-{branch_suffix}",
+ problem_statement=problem_statement,
+ allowed_paths=list(candidate.allowed_paths),
+ test_command=candidate.test_command,
+ pipeline_target=request.pipeline_target,
+ target_base_branch=request.target_base_branch,
+ max_iterations=request.max_iterations,
+ metadata={
+ **request.metadata,
+ "planner_plan_id": plan_id,
+ "planner_candidate_id": candidate.candidate_id,
+ "planner_score": candidate.priority_score,
+ "planner_category": candidate.category,
+ "approval_granted": request.approval_granted,
+ "branch_name": f"codex/autoresearch/{slug}-{branch_suffix[:6]}",
+ "commit_message": f"AutoResearch: {candidate.title}",
+ "pr_title": f"AutoResearch: {candidate.title}",
+ "pr_body": candidate.summary,
+ "base_branch": request.target_base_branch,
+ },
+ )
+
+ def _default_dispatch_runner(self, job: JobSpec) -> RunSummary:
+ runner = AgentExecutionRunner(repo_root=self._repo_root)
+ return runner.run_job(job)
+
+ def _prepare_dispatch(
+ self,
+ plan: AutoResearchPlanRead,
+ ) -> tuple[JobSpec, SelectedRuntimeMode, RemoteTaskSpec]:
+ if plan.worker_spec is None:
+ raise ValueError("plan does not have a dispatchable worker contract")
+ base_job = plan.agent_job or self._worker_service.build_agent_job_spec(plan.worker_spec)
+ selected_mode = self._select_dispatch_mode(plan)
+ dispatch_job = self._apply_mode_policy(base_job, selected_mode)
+ task_spec = RemoteTaskSpec(
+ run_id=dispatch_job.run_id,
+ requested_lane=selected_mode.requested_lane,
+ lane=selected_mode.lane,
+ runtime_mode=selected_mode.name,
+ planner_plan_id=plan.plan_id,
+ planner_candidate_id=(
+ plan.selected_candidate.candidate_id if plan.selected_candidate is not None else None
+ ),
+ job=dispatch_job,
+ metadata={
+ **plan.metadata,
+ "runtime_mode": selected_mode.name,
+ "fallback_reason": selected_mode.fallback_reason,
+ },
+ )
+ return dispatch_job, selected_mode, task_spec
+
+ def _select_dispatch_mode(self, plan: AutoResearchPlanRead) -> SelectedRuntimeMode:
+ requested_mode = str(plan.metadata.get("runtime_mode") or "").strip() or None
+ remote_available = self._coerce_bool(
+ plan.metadata.get("remote_available", os.environ.get("AUTORESEARCH_REMOTE_AVAILABLE")),
+ default=False,
+ )
+ return select_mode(
+ self._repo_root,
+ requested_mode=requested_mode,
+ remote_available=remote_available,
+ )
+
+ def _apply_mode_policy(self, job: JobSpec, selected_mode: SelectedRuntimeMode) -> JobSpec:
+ metadata = {
+ **job.metadata,
+ "runtime_mode": selected_mode.name,
+ "dispatch_requested_lane": selected_mode.requested_lane.value,
+ "dispatch_lane": selected_mode.lane.value,
+ "dispatch_max_workers": selected_mode.policy.max_workers,
+ "dispatch_max_concurrency": selected_mode.policy.max_concurrency,
+ "dispatch_allow_exploration": selected_mode.policy.allow_exploration,
+ "dispatch_allow_draft_pr": selected_mode.policy.allow_draft_pr,
+ "dispatch_token_budget": selected_mode.policy.token_budget,
+ }
+ if selected_mode.fallback_reason:
+ metadata["dispatch_fallback_reason"] = selected_mode.fallback_reason
+
+ preferred_mode = str(metadata.get("pipeline_target") or GitPromotionMode.PATCH.value).strip().lower()
+ if not selected_mode.policy.allow_draft_pr and preferred_mode == GitPromotionMode.DRAFT_PR.value:
+ metadata["pipeline_target"] = GitPromotionMode.PATCH.value
+ metadata["dispatch_pipeline_target_downgraded"] = GitPromotionMode.PATCH.value
+
+ policy = job.policy.model_copy(
+ update={
+ "timeout_sec": min(job.policy.timeout_sec, selected_mode.policy.timeout_sec),
+ "max_steps": min(job.policy.max_steps, selected_mode.policy.step_budget),
+ }
+ )
+ return job.model_copy(update={"policy": policy, "metadata": metadata})
+
+ def _queued_dispatch_run(
+ self,
+ *,
+ task_spec: RemoteTaskSpec,
+ selected_mode: SelectedRuntimeMode,
+ ) -> RemoteRunRecord:
+ return RemoteRunRecord(
+ run_id=task_spec.run_id,
+ requested_lane=task_spec.requested_lane,
+ lane=task_spec.lane,
+ status=RemoteRunStatus.QUEUED,
+ summary=f"dispatch queued for {task_spec.lane.value} lane",
+ fallback_reason=selected_mode.fallback_reason,
+ metadata={
+ "runtime_mode": selected_mode.name,
+ "dispatch_max_concurrency": selected_mode.policy.max_concurrency,
+ "dispatch_token_budget": selected_mode.policy.token_budget,
+ },
+ )
+
+ def _await_remote_summary(self, task_spec: RemoteTaskSpec) -> RemoteRunSummary:
+ last_record: RemoteRunRecord | None = None
+ for _ in range(self._MAX_DISPATCH_POLLS):
+ last_record = self._remote_adapter.poll(task_spec.run_id)
+ if last_record.status in self._TERMINAL_REMOTE_STATUSES:
+ break
+ if last_record is None:
+ return self._planner_stalled_summary(task_spec, detail="remote adapter returned no dispatch record")
+ if last_record.status not in self._TERMINAL_REMOTE_STATUSES:
+ return self._planner_stalled_summary(
+ task_spec,
+ detail="dispatch polling exhausted before reaching a terminal state",
+ )
+ try:
+ return self._remote_adapter.fetch_summary(task_spec.run_id)
+ except FileNotFoundError as exc:
+ return self._missing_summary_summary(task_spec, last_record=last_record, exc=exc)
+
+ def _planner_stalled_summary(self, task_spec: RemoteTaskSpec, *, detail: str) -> RemoteRunSummary:
+ disposition = classify_remote_terminal(status=RemoteRunStatus.STALLED, stage="planner")
+ now = utc_now()
+ return RemoteRunSummary(
+ run_id=task_spec.run_id,
+ requested_lane=task_spec.requested_lane,
+ lane=task_spec.lane,
+ status=RemoteRunStatus.STALLED,
+ failure_class=disposition.failure_class,
+ recovery_action=disposition.recovery_action,
+ summary=detail,
+ started_at=now,
+ updated_at=now,
+ finished_at=now,
+ fallback_reason=str(task_spec.metadata.get("fallback_reason") or "").strip() or None,
+ metadata={"runtime_mode": task_spec.runtime_mode},
+ )
+
+ def _missing_summary_summary(
+ self,
+ task_spec: RemoteTaskSpec,
+ *,
+ last_record: RemoteRunRecord,
+ exc: Exception,
+ ) -> RemoteRunSummary:
+ disposition = classify_remote_terminal(
+ status=RemoteRunStatus.FAILED,
+ error_text=str(exc),
+ )
+ now = utc_now()
+ return RemoteRunSummary(
+ run_id=task_spec.run_id,
+ requested_lane=last_record.requested_lane,
+ lane=last_record.lane,
+ status=RemoteRunStatus.FAILED,
+ failure_class=disposition.failure_class,
+ recovery_action=disposition.recovery_action,
+ artifact_paths=last_record.artifact_paths,
+ summary=f"dispatch result fetch failed: {exc}",
+ started_at=last_record.started_at,
+ updated_at=now,
+ finished_at=now,
+ fallback_reason=last_record.fallback_reason,
+ metadata=last_record.metadata,
+ )
+
+ def _dispatch_exception_summary(
+ self,
+ task_spec: RemoteTaskSpec,
+ exc: Exception,
+ ) -> RemoteRunSummary:
+ disposition = classify_remote_terminal(status=RemoteRunStatus.FAILED, error_text=str(exc))
+ now = utc_now()
+ return RemoteRunSummary(
+ run_id=task_spec.run_id,
+ requested_lane=task_spec.requested_lane,
+ lane=task_spec.lane,
+ status=RemoteRunStatus.FAILED,
+ failure_class=disposition.failure_class,
+ recovery_action=disposition.recovery_action,
+ summary=f"dispatch failed before completion: {exc}",
+ started_at=now,
+ updated_at=now,
+ finished_at=now,
+ fallback_reason=str(task_spec.metadata.get("fallback_reason") or "").strip() or None,
+ metadata={"runtime_mode": task_spec.runtime_mode},
+ )
+
+ @staticmethod
+ def _record_from_summary(summary: RemoteRunSummary) -> RemoteRunRecord:
+ return RemoteRunRecord.model_validate(summary.model_dump(mode="json", exclude={"run_summary"}))
+
+ @classmethod
+ def _derive_dispatch_status(cls, summary: RemoteRunSummary) -> AutoResearchPlanDispatchStatus:
+ if summary.status is not RemoteRunStatus.SUCCEEDED:
+ return AutoResearchPlanDispatchStatus.FAILED
+ if summary.run_summary is None:
+ return AutoResearchPlanDispatchStatus.DISPATCHED
+ if summary.run_summary.final_status in {"ready_for_promotion", "promoted"}:
+ return AutoResearchPlanDispatchStatus.DISPATCHED
+ return AutoResearchPlanDispatchStatus.FAILED
+
+ @staticmethod
+ def _dispatch_error_from_summary(summary: RemoteRunSummary) -> str | None:
+ if summary.status is RemoteRunStatus.SUCCEEDED:
+ if summary.run_summary is None or summary.run_summary.final_status in {"ready_for_promotion", "promoted"}:
+ return None
+ if summary.run_summary is not None:
+ error_text = str(summary.run_summary.driver_result.error or "").strip()
+ if error_text:
+ return error_text
+ if summary.run_summary.final_status not in {"ready_for_promotion", "promoted"}:
+ return summary.run_summary.final_status
+ if summary.failure_class is not None:
+ return summary.failure_class.value
+ return summary.summary or None
+
+ @staticmethod
+ def _coerce_bool(value: object, *, default: bool) -> bool:
+ if value is None:
+ return default
+ if isinstance(value, bool):
+ return value
+ normalized = str(value).strip().lower()
+ if not normalized:
+ return default
+ if normalized in {"1", "true", "yes", "on"}:
+ return True
+ if normalized in {"0", "false", "no", "off"}:
+ return False
+ return default
+
+ def _inspect_upstream(self, request: AutoResearchPlannerRequest) -> UpstreamWatchRead | None:
+ if not request.include_upstream_watch or self._upstream_watcher is None:
+ return None
+ return self._upstream_watcher.inspect()
+
+ def _augment_summary_with_upstream_watch(
+ self,
+ summary: str,
+ upstream_watch: UpstreamWatchRead | None,
+ ) -> str:
+ if upstream_watch is None or not upstream_watch.summary:
+ return summary
+ if upstream_watch.decision is UpstreamWatchDecision.SKIP:
+ return f"{summary} Upstream watcher auto-skipped merge noise: {upstream_watch.summary}"
+ if upstream_watch.decision is UpstreamWatchDecision.REVIEW:
+ return f"{summary} Upstream watcher flagged review-required changes: {upstream_watch.summary}"
+ return f"{summary} Upstream watcher failed: {upstream_watch.error or upstream_watch.summary}"
+
+ def _require_plan(self, plan_id: str) -> AutoResearchPlanRead:
+ plan = self._repository.get(plan_id)
+ if plan is None:
+ raise KeyError(f"autoresearch plan not found: {plan_id}")
+ return plan
+
+ def _iter_python_files(self) -> list[str]:
+ files: list[str] = []
+ for root_name in ("src", "scripts", "tests"):
+ root = self._repo_root / root_name
+ if not root.exists():
+ continue
+ for path in root.rglob("*.py"):
+ rel_path = path.relative_to(self._repo_root).as_posix()
+ if self._is_ignored(rel_path):
+ continue
+ files.append(rel_path)
+ return sorted(set(files))
+
+ def _find_markers(self, rel_path: str) -> list[_MarkerOccurrence]:
+ path = self._repo_root / rel_path
+ try:
+ lines = path.read_text(encoding="utf-8").splitlines()
+ except UnicodeDecodeError:
+ return []
+ occurrences: list[_MarkerOccurrence] = []
+ for line_number, line in enumerate(lines, start=1):
+ match = _MARKER_PATTERN.search(line)
+ if match is None:
+ continue
+ marker = match.group(1)
+ detail = match.group(2).strip() or line.strip()
+ occurrences.append(
+ _MarkerOccurrence(
+ marker=marker,
+ line=line_number,
+ detail=detail[:180],
+ weight=_MARKER_WEIGHTS[marker],
+ )
+ )
+ return occurrences
+
+ def _count_lines(self, rel_path: str) -> int:
+ try:
+ return len((self._repo_root / rel_path).read_text(encoding="utf-8").splitlines())
+ except UnicodeDecodeError:
+ return 0
+
+ def _infer_test_paths(self, rel_path: str) -> list[str]:
+ if rel_path.startswith("tests/"):
+ return [rel_path]
+
+ tests_root = self._repo_root / "tests"
+ stem = Path(rel_path).stem
+ existing: list[str] = []
+ if tests_root.exists():
+ for path in tests_root.rglob(f"test_{stem}.py"):
+ candidate = path.relative_to(self._repo_root).as_posix()
+ if self._is_ignored(candidate):
+ continue
+ existing.append(candidate)
+ if existing:
+ return sorted(existing)[:2]
+ return [f"tests/test_{stem}.py"]
+
+ def _build_test_command(self, source_path: str, test_paths: list[str]) -> str:
+ if source_path.startswith("tests/"):
+ return "pytest -q " + " ".join(test_paths)
+ if test_paths:
+ return "pytest -q " + " ".join(test_paths)
+ return f"python -m py_compile {source_path}"
+
+ def _has_existing_test(self, test_paths: list[str]) -> bool:
+ return any((self._repo_root / test_path).exists() for test_path in test_paths)
+
+ def _criticality_bonus(self, rel_path: str) -> float:
+ for prefix in _CRITICAL_PREFIXES:
+ if rel_path.startswith(prefix):
+ return 18.0
+ return 0.0
+
+ @staticmethod
+ def _is_ignored(rel_path: str) -> bool:
+ return any(part in _IGNORED_PATH_PARTS for part in Path(rel_path).parts)
+
+ @staticmethod
+ def _slugify(value: str) -> str:
+ slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
+ return slug or "plan"
diff --git a/src/autoresearch/core/services/git_promotion_gate.py b/src/autoresearch/core/services/git_promotion_gate.py
index 4bf882b9..6f0117d4 100644
--- a/src/autoresearch/core/services/git_promotion_gate.py
+++ b/src/autoresearch/core/services/git_promotion_gate.py
@@ -38,6 +38,19 @@
)
+def _is_benign_runtime_artifact(path: str) -> bool:
+ normalized = path.replace("\\", "/").strip("/")
+ if not normalized:
+ return False
+ if normalized.startswith(".pytest_cache/") or "/.pytest_cache/" in f"/{normalized}":
+ return True
+ if "/__pycache__/" in f"/{normalized}":
+ return True
+ if normalized.startswith("apps/") and normalized.endswith("/README.md"):
+ return True
+ return False
+
+
class GitPromotionProvider(Protocol):
def probe_remote_health(self, repo_root: Path, *, base_branch: str) -> GitRemoteProbe: ...
@@ -490,7 +503,11 @@ def _build_gate_checks(
remote_probe: GitRemoteProbe,
repo_dirty: list[str],
) -> list[PromotionGateCheck]:
- changed_files = [item.replace("\\", "/") for item in intent.changed_files]
+ changed_files = [
+ item.replace("\\", "/")
+ for item in intent.changed_files
+ if not _is_benign_runtime_artifact(item)
+ ]
forbidden_paths = self._metadata_list(intent.metadata, "forbidden_paths") or self._default_forbidden_paths
max_changed_files = int(intent.metadata.get("max_changed_files", self._default_max_changed_files))
max_patch_lines = int(intent.metadata.get("max_patch_lines", self._default_max_patch_lines))
diff --git a/src/autoresearch/core/services/github_issue_service.py b/src/autoresearch/core/services/github_issue_service.py
new file mode 100644
index 00000000..a7f0c567
--- /dev/null
+++ b/src/autoresearch/core/services/github_issue_service.py
@@ -0,0 +1,222 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+import json
+from pathlib import Path
+import re
+import shutil
+import subprocess
+
+
+_ISSUE_URL_RE = re.compile(
+ r"^https://github\.com/(?P[A-Za-z0-9_.-]+)/(?P[A-Za-z0-9_.-]+)/issues/(?P\d+)(?:[/?#].*)?$"
+)
+_ISSUE_REF_RE = re.compile(r"^(?P[A-Za-z0-9_.-]+)/(?P[A-Za-z0-9_.-]+)#(?P\d+)$")
+_ISSUE_NUMBER_RE = re.compile(r"^#?(?P\d+)$")
+_REMOTE_RE = re.compile(
+ r"^(?:https://github\.com/|git@github\.com:)(?P[A-Za-z0-9_.-]+)/(?P[A-Za-z0-9_.-]+?)(?:\.git)?$"
+)
+
+
+@dataclass(frozen=True, slots=True)
+class GitHubIssueCommentRead:
+ author: str
+ body: str
+ created_at: str | None = None
+
+
+@dataclass(frozen=True, slots=True)
+class GitHubIssueReference:
+ owner: str
+ repo: str
+ number: int
+
+ @property
+ def repo_full_name(self) -> str:
+ return f"{self.owner}/{self.repo}"
+
+ @property
+ def display(self) -> str:
+ return f"{self.repo_full_name}#{self.number}"
+
+ @property
+ def url(self) -> str:
+ return f"https://github.com/{self.owner}/{self.repo}/issues/{self.number}"
+
+
+@dataclass(frozen=True, slots=True)
+class GitHubIssueRead:
+ reference: GitHubIssueReference
+ title: str
+ body: str
+ url: str
+ state: str
+ author: str
+ labels: tuple[str, ...] = ()
+ comments: tuple[GitHubIssueCommentRead, ...] = ()
+
+
+class GitHubIssueService:
+ """Thin wrapper around `gh issue` for Telegram intake and safe comment back."""
+
+ def __init__(
+ self,
+ *,
+ repo_root: Path | None = None,
+ gh_binary: str | None = None,
+ ) -> None:
+ self._repo_root = (repo_root or Path(__file__).resolve().parents[3]).resolve()
+ self._gh_binary = gh_binary or shutil.which("gh") or "gh"
+
+ def resolve_issue_reference(self, raw_value: str) -> GitHubIssueReference:
+ value = raw_value.strip()
+ if not value:
+ raise ValueError("missing GitHub issue reference")
+
+ matched = _ISSUE_URL_RE.match(value)
+ if matched:
+ return GitHubIssueReference(
+ owner=matched.group("owner"),
+ repo=matched.group("repo"),
+ number=int(matched.group("number")),
+ )
+
+ matched = _ISSUE_REF_RE.match(value)
+ if matched:
+ return GitHubIssueReference(
+ owner=matched.group("owner"),
+ repo=matched.group("repo"),
+ number=int(matched.group("number")),
+ )
+
+ matched = _ISSUE_NUMBER_RE.match(value)
+ if matched:
+ owner, repo = self._resolve_current_repo()
+ return GitHubIssueReference(
+ owner=owner,
+ repo=repo,
+ number=int(matched.group("number")),
+ )
+
+ raise ValueError("unsupported GitHub issue reference; use URL, owner/repo#123, or #123")
+
+ def fetch_issue(self, raw_reference: str) -> GitHubIssueRead:
+ reference = self.resolve_issue_reference(raw_reference)
+ payload = self._run_gh_json(
+ [
+ "issue",
+ "view",
+ str(reference.number),
+ "--repo",
+ reference.repo_full_name,
+ "--json",
+ "number,title,body,url,state,author,labels,comments",
+ ]
+ )
+ comments = tuple(
+ GitHubIssueCommentRead(
+ author=str((item.get("author") or {}).get("login") or "unknown"),
+ body=str(item.get("body") or ""),
+ created_at=str(item.get("createdAt") or "") or None,
+ )
+ for item in payload.get("comments", [])
+ )
+ labels = tuple(
+ str(item.get("name") or "").strip()
+ for item in payload.get("labels", [])
+ if str(item.get("name") or "").strip()
+ )
+ return GitHubIssueRead(
+ reference=reference,
+ title=str(payload.get("title") or "").strip(),
+ body=str(payload.get("body") or ""),
+ url=str(payload.get("url") or reference.url).strip(),
+ state=str(payload.get("state") or "UNKNOWN").strip(),
+ author=str((payload.get("author") or {}).get("login") or "unknown"),
+ labels=labels,
+ comments=comments,
+ )
+
+ def build_manager_prompt(self, issue: GitHubIssueRead, *, operator_note: str | None = None) -> str:
+ lines = [
+ "Resolve the following GitHub issue in the current repository through the existing patch-only manager pipeline.",
+ "",
+ f"Issue: {issue.reference.display}",
+ f"URL: {issue.url}",
+ f"Title: {issue.title or '(untitled)'}",
+ f"State: {issue.state}",
+ f"Author: {issue.author}",
+ ]
+ if issue.labels:
+ lines.append(f"Labels: {', '.join(issue.labels)}")
+ if operator_note:
+ lines.extend(["", "Operator note:", operator_note.strip()])
+ lines.extend(["", "Issue body:", issue.body.strip() or "(empty)"])
+
+ recent_comments = [item for item in issue.comments if item.body.strip()][-3:]
+ if recent_comments:
+ lines.extend(["", "Recent comments:"])
+ for item in recent_comments:
+ lines.append(f"- {item.author}: {item.body.strip()}")
+
+ lines.extend(
+ [
+ "",
+ "Deliver the smallest useful fix, stay within scoped files, update tests when needed, and prepare a draft PR when possible.",
+ ]
+ )
+ return "\n".join(lines).strip()
+
+ def post_comment(self, raw_reference: str, body: str) -> str:
+ reference = self.resolve_issue_reference(raw_reference)
+ completed = self._run_gh(
+ [
+ "issue",
+ "comment",
+ str(reference.number),
+ "--repo",
+ reference.repo_full_name,
+ "--body",
+ body.strip(),
+ ]
+ )
+ return (completed.stdout or "").strip()
+
+ def _resolve_current_repo(self) -> tuple[str, str]:
+ completed = subprocess.run(
+ ["git", "remote", "get-url", "origin"],
+ cwd=self._repo_root,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ if completed.returncode != 0:
+ raise ValueError("cannot resolve current GitHub repo from git origin")
+ remote = (completed.stdout or "").strip()
+ matched = _REMOTE_RE.match(remote)
+ if not matched:
+ raise ValueError("current git origin is not a supported GitHub remote")
+ return matched.group("owner"), matched.group("repo")
+
+ def _run_gh_json(self, args: list[str]) -> dict[str, object]:
+ completed = self._run_gh(args)
+ try:
+ payload = json.loads((completed.stdout or "").strip() or "{}")
+ except json.JSONDecodeError as exc:
+ raise RuntimeError("gh returned invalid JSON for issue query") from exc
+ if not isinstance(payload, dict):
+ raise RuntimeError("gh returned an unexpected payload for issue query")
+ return payload
+
+ def _run_gh(self, args: list[str]) -> subprocess.CompletedProcess[str]:
+ completed = subprocess.run(
+ [self._gh_binary, *args],
+ cwd=self._repo_root,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ if completed.returncode != 0:
+ detail = (completed.stderr or completed.stdout or "gh command failed").strip()
+ raise RuntimeError(detail)
+ return completed
diff --git a/src/autoresearch/core/services/housekeeper.py b/src/autoresearch/core/services/housekeeper.py
new file mode 100644
index 00000000..e6735c0e
--- /dev/null
+++ b/src/autoresearch/core/services/housekeeper.py
@@ -0,0 +1,842 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime, time, timedelta
+import hashlib
+import math
+from typing import Any
+from zoneinfo import ZoneInfo
+
+from autoresearch.agents.manager_agent import ManagerAgentService
+from autoresearch.core.services.approval_store import ApprovalStoreService
+from autoresearch.core.services.autoresearch_planner import AutoResearchPlannerService
+from autoresearch.core.services.telegram_notify import TelegramNotifierService
+from autoresearch.shared.autoresearch_planner_contract import AutoResearchPlannerRequest
+from autoresearch.shared.housekeeper_contract import (
+ AdmissionRiskLevel,
+ CircuitBreakerStateRead,
+ CircuitBreakerStatus,
+ DeferredReason,
+ ExecutionProfileRead,
+ ExplorationBlockerReason,
+ ExplorationDedupKeyRead,
+ ExplorationRecordRead,
+ HousekeeperChangeReason,
+ HousekeeperMode,
+ HousekeeperModeUpdateRequest,
+ HousekeeperMorningSummaryRead,
+ HousekeeperStateRead,
+ HousekeeperTickRead,
+ NightBudgetStateRead,
+ TaskAdmissionAssessmentRead,
+)
+from autoresearch.shared.manager_agent_contract import ManagerDispatchRead, ManagerDispatchRequest, ManagerPlanStrategy
+from autoresearch.shared.media_job_contract import MediaJobRead, MediaJobStatus
+from autoresearch.shared.models import ApprovalStatus, JobStatus, utc_now
+from autoresearch.shared.store import Repository, create_resource_id
+
+
+@dataclass(frozen=True, slots=True)
+class _NightWindow:
+ start: datetime
+ end: datetime
+
+
+class ExecutionProfileResolver:
+ _PROFILES = {
+ HousekeeperMode.DAY_SAFE: ExecutionProfileRead(
+ profile_name=HousekeeperMode.DAY_SAFE,
+ pipeline_target="patch",
+ max_iterations=1,
+ auto_dispatch_allowed=False,
+ parallelism=1,
+ allow_draft_pr=False,
+ allow_repo_write=True,
+ allow_network=False,
+ allow_long_task_minutes=15,
+ ),
+ HousekeeperMode.NIGHT_READONLY_EXPLORE: ExecutionProfileRead(
+ profile_name=HousekeeperMode.NIGHT_READONLY_EXPLORE,
+ pipeline_target="patch",
+ max_iterations=2,
+ auto_dispatch_allowed=True,
+ parallelism=2,
+ allow_draft_pr=False,
+ allow_repo_write=True,
+ allow_network=False,
+ allow_long_task_minutes=90,
+ ),
+ HousekeeperMode.NIGHT_EXPLORE: ExecutionProfileRead(
+ profile_name=HousekeeperMode.NIGHT_EXPLORE,
+ pipeline_target="draft_pr",
+ max_iterations=2,
+ auto_dispatch_allowed=True,
+ parallelism=3,
+ allow_draft_pr=True,
+ allow_repo_write=True,
+ allow_network=False,
+ allow_long_task_minutes=120,
+ ),
+ }
+
+ def resolve(self, mode: HousekeeperMode) -> ExecutionProfileRead:
+ return self._PROFILES[mode].model_copy(deep=True)
+
+
+class HousekeeperService:
+ def __init__(
+ self,
+ *,
+ state_repository: Repository[HousekeeperStateRead],
+ budget_repository: Repository[NightBudgetStateRead],
+ exploration_repository: Repository[ExplorationRecordRead],
+ timezone_name: str = "Asia/Shanghai",
+ summary_chat_id: str | None = None,
+ profile_resolver: ExecutionProfileResolver | None = None,
+ ) -> None:
+ self._state_repository = state_repository
+ self._budget_repository = budget_repository
+ self._exploration_repository = exploration_repository
+ self._timezone = ZoneInfo(timezone_name)
+ self._summary_chat_id = (summary_chat_id or "").strip() or None
+ self._profile_resolver = profile_resolver or ExecutionProfileResolver()
+
+ def get_state(self, *, now: datetime | None = None) -> HousekeeperStateRead:
+ current = self._state_repository.get("housekeeper")
+ resolved_now = self._normalize_now(now)
+ if current is None:
+ scheduled = self._scheduled_mode_for(resolved_now)
+ current = HousekeeperStateRead(
+ scheduled_mode=scheduled,
+ effective_mode=scheduled,
+ reason=HousekeeperChangeReason.SCHEDULE,
+ changed_by="system",
+ last_changed_at=resolved_now,
+ )
+ return self._state_repository.save(current.state_id, current)
+
+ updated = self._refresh_state(current=current, now=resolved_now)
+ if updated != current:
+ return self._state_repository.save(updated.state_id, updated)
+ return updated
+
+ def update_mode(self, request: HousekeeperModeUpdateRequest, *, now: datetime | None = None) -> HousekeeperStateRead:
+ current = self.get_state(now=now)
+ resolved_now = self._normalize_now(now)
+ if request.action == "set_manual_override":
+ if request.target_mode is None:
+ raise ValueError("target_mode is required for set_manual_override")
+ updated = current.model_copy(
+ update={
+ "manual_override_mode": request.target_mode,
+ "effective_until": request.effective_until or self._next_boundary(resolved_now),
+ "reason": request.reason,
+ "changed_by": request.changed_by.strip(),
+ "last_changed_at": resolved_now,
+ "metadata": {**current.metadata, **request.metadata},
+ }
+ )
+ elif request.action == "clear_manual_override":
+ updated = current.model_copy(
+ update={
+ "manual_override_mode": None,
+ "effective_until": None,
+ "reason": request.reason,
+ "changed_by": request.changed_by.strip(),
+ "last_changed_at": resolved_now,
+ "metadata": {**current.metadata, **request.metadata},
+ }
+ )
+ elif request.action == "ack_circuit_breaker":
+ breaker = current.circuit_breaker_state.model_copy(
+ update={
+ "status": CircuitBreakerStatus.CLOSED,
+ "acknowledged_at": resolved_now,
+ "reason": None,
+ "metadata": {**current.circuit_breaker_state.metadata, **request.metadata},
+ }
+ )
+ updated = current.model_copy(
+ update={
+ "circuit_breaker_state": breaker,
+ "reason": request.reason,
+ "changed_by": request.changed_by.strip(),
+ "last_changed_at": resolved_now,
+ }
+ )
+ else:
+ if request.target_mode is None:
+ raise ValueError("target_mode is required for apply_schedule")
+ updated = current.model_copy(
+ update={
+ "scheduled_mode": request.target_mode,
+ "reason": request.reason,
+ "changed_by": request.changed_by.strip(),
+ "last_changed_at": resolved_now,
+ "metadata": {**current.metadata, **request.metadata},
+ }
+ )
+
+ refreshed = self._refresh_state(current=updated, now=resolved_now)
+ return self._state_repository.save(refreshed.state_id, refreshed)
+
+ def prepare_manager_request(
+ self,
+ request: ManagerDispatchRequest,
+ *,
+ manager_service: ManagerAgentService,
+ trigger_source: str,
+ now: datetime | None = None,
+ ) -> tuple[ManagerDispatchRequest, TaskAdmissionAssessmentRead, HousekeeperStateRead]:
+ state = self.get_state(now=now)
+ profile = self._profile_resolver.resolve(state.effective_mode)
+ assessment = manager_service.assess_request(request)
+ auto_dispatch = bool(request.auto_dispatch and profile.auto_dispatch_allowed)
+ deferred_reason: DeferredReason | None = None
+
+ if state.circuit_breaker_state.status is CircuitBreakerStatus.OPEN:
+ auto_dispatch = False
+ deferred_reason = DeferredReason.CIRCUIT_BREAKER_OPEN
+ elif request.auto_dispatch and not profile.auto_dispatch_allowed:
+ auto_dispatch = False
+ deferred_reason = DeferredReason.DEFERRED_TO_NIGHT
+ elif auto_dispatch and not self._admission_allows(profile=profile, assessment=assessment):
+ auto_dispatch = False
+ deferred_reason = (
+ DeferredReason.APPROVAL_REQUIRED
+ if assessment.risk_level is AdmissionRiskLevel.HIGH
+ else DeferredReason.DEFERRED_TO_NIGHT
+ )
+
+ updated = request.model_copy(
+ update={
+ "pipeline_target": profile.pipeline_target,
+ "max_iterations": profile.max_iterations,
+ "auto_dispatch": auto_dispatch,
+ "metadata": {
+ **request.metadata,
+ "execution_profile": profile.model_dump(mode="json"),
+ "trigger_source": trigger_source,
+ "scheduled_window": state.effective_mode.value,
+ "admission_assessment": assessment.model_dump(mode="json"),
+ "deferred_reason": deferred_reason.value if deferred_reason is not None else None,
+ },
+ }
+ )
+ return updated, assessment, state
+
+ def prepare_planner_request(
+ self,
+ request: AutoResearchPlannerRequest,
+ *,
+ trigger_source: str,
+ now: datetime | None = None,
+ ) -> tuple[AutoResearchPlannerRequest, TaskAdmissionAssessmentRead, HousekeeperStateRead]:
+ state = self.get_state(now=now)
+ profile = self._profile_resolver.resolve(state.effective_mode)
+ assessment = self.assess_planner_request(request)
+ deferred_reason: DeferredReason | None = None
+ if state.circuit_breaker_state.status is CircuitBreakerStatus.OPEN:
+ deferred_reason = DeferredReason.CIRCUIT_BREAKER_OPEN
+
+ updated = request.model_copy(
+ update={
+ "pipeline_target": profile.pipeline_target,
+ "max_iterations": profile.max_iterations,
+ "metadata": {
+ **request.metadata,
+ "execution_profile": profile.model_dump(mode="json"),
+ "trigger_source": trigger_source,
+ "scheduled_window": state.effective_mode.value,
+ "admission_assessment": assessment.model_dump(mode="json"),
+ "deferred_reason": deferred_reason.value if deferred_reason is not None else None,
+ },
+ }
+ )
+ return updated, assessment, state
+
+ def assess_planner_request(self, request: AutoResearchPlannerRequest) -> TaskAdmissionAssessmentRead:
+ estimated_runtime = 12 if request.pipeline_target == "patch" else 20
+ if request.max_candidates > 5:
+ estimated_runtime += 10
+ if request.include_upstream_watch:
+ estimated_runtime += 10
+ risk = AdmissionRiskLevel.LOW if estimated_runtime <= 15 else AdmissionRiskLevel.MEDIUM
+ return TaskAdmissionAssessmentRead(
+ plan_shape="planner_candidate",
+ estimated_runtime_minutes=estimated_runtime,
+ requires_repo_write=True,
+ requires_network=False,
+ fanout_count=1,
+ risk_level=risk,
+ )
+
+ def create_morning_summary(
+ self,
+ *,
+ manager_service: ManagerAgentService,
+ planner_service: AutoResearchPlannerService,
+ approval_service: ApprovalStoreService,
+ notifier: TelegramNotifierService,
+ media_jobs: list[MediaJobRead] | None = None,
+ now: datetime | None = None,
+ ) -> HousekeeperMorningSummaryRead:
+ state = self.get_state(now=now)
+ resolved_now = self._normalize_now(now)
+ window = self._previous_night_window(resolved_now)
+
+ completed_items: list[str] = []
+ blocked_items: list[str] = []
+ decision_items: list[str] = []
+ queue_items: list[str] = []
+
+ for dispatch in manager_service.list_dispatches():
+ if not self._within_window(dispatch.updated_at, window):
+ continue
+ line = f"{dispatch.dispatch_id}: {dispatch.summary}"
+ if dispatch.status is JobStatus.COMPLETED:
+ completed_items.append(line)
+ elif dispatch.status is JobStatus.FAILED:
+ blocked_items.append(line)
+
+ for plan in planner_service.list():
+ if not self._within_window(plan.updated_at, window):
+ continue
+ line = f"{plan.plan_id}: {plan.summary}"
+ if plan.dispatch_status.value == "dispatched" and plan.run_summary is not None:
+ completed_items.append(line)
+ elif plan.dispatch_status.value == "failed":
+ blocked_items.append(line)
+
+ for job in media_jobs or []:
+ if not self._within_window(job.updated_at, window):
+ continue
+ line = f"{job.job_id}: {job.mode.value} -> {job.status.value}"
+ if job.status is MediaJobStatus.COMPLETED:
+ completed_items.append(line)
+ elif job.status is MediaJobStatus.FAILED:
+ blocked_items.append(line)
+
+ for approval in approval_service.list_requests(status=ApprovalStatus.PENDING, limit=20):
+ decision_items.append(f"{approval.approval_id}: {approval.title}")
+
+ for dispatch in manager_service.list_dispatches():
+ if dispatch.status in {JobStatus.CREATED, JobStatus.QUEUED}:
+ queue_items.append(f"manager {dispatch.dispatch_id}: {dispatch.summary}")
+ for plan in planner_service.list_pending(limit=20):
+ queue_items.append(f"plan {plan.plan_id}: {plan.summary}")
+
+ summary_text = "\n".join(
+ [
+ "昨夜完成了什么",
+ *(f"- {item}" for item in (completed_items or ["无"])),
+ "",
+ "失败/阻塞了什么",
+ *(f"- {item}" for item in (blocked_items or ["无"])),
+ "",
+ "今天需要你决定什么",
+ *(f"- {item}" for item in (decision_items or ["无"])),
+ "",
+ "系统当前模式与待执行队列",
+ f"- mode: {state.effective_mode.value}",
+ *(f"- {item}" for item in (queue_items or ["无"])),
+ ]
+ )
+
+ sent = bool(self._summary_chat_id and notifier.enabled and notifier.send_message(chat_id=self._summary_chat_id, text=summary_text))
+ updated_state = state.model_copy(update={"last_summary_at": resolved_now})
+ self._state_repository.save(updated_state.state_id, updated_state)
+ return HousekeeperMorningSummaryRead(
+ sent=sent,
+ summary_text=summary_text,
+ completed_items=completed_items,
+ blocked_items=blocked_items,
+ decision_items=decision_items,
+ queue_items=queue_items,
+ state=updated_state,
+ )
+
+ def execute_night_explore_tick(
+ self,
+ *,
+ manager_service: ManagerAgentService,
+ planner_service: AutoResearchPlannerService,
+ notifier: TelegramNotifierService,
+ media_jobs: list[MediaJobRead] | None = None,
+ now: datetime | None = None,
+ ) -> HousekeeperTickRead:
+ state = self.get_state(now=now)
+ resolved_now = self._normalize_now(now)
+ budget = self._get_or_create_budget(resolved_now)
+
+ if state.circuit_breaker_state.status is CircuitBreakerStatus.OPEN:
+ return HousekeeperTickRead(
+ executed=False,
+ skipped_reason="circuit_breaker_open",
+ blocker_reason=ExplorationBlockerReason.CIRCUIT_BREAKER_OPEN,
+ summary="Night explore skipped because the circuit breaker is open.",
+ state=state,
+ budget=budget,
+ )
+ if state.effective_mode not in {HousekeeperMode.NIGHT_READONLY_EXPLORE, HousekeeperMode.NIGHT_EXPLORE}:
+ return HousekeeperTickRead(
+ executed=False,
+ skipped_reason="not_in_night_mode",
+ summary="Night explore skipped because the effective mode is not a night mode.",
+ state=state,
+ budget=budget,
+ )
+ if self._budget_exhausted(budget):
+ self._record_budget_blocker(now=resolved_now)
+ return HousekeeperTickRead(
+ executed=False,
+ skipped_reason="budget_exhausted",
+ blocker_reason=ExplorationBlockerReason.BUDGET_EXHAUSTED,
+ summary="Night explore skipped because the nightly budget is exhausted.",
+ state=state,
+ budget=budget,
+ )
+
+ pending_dispatch = self._pick_pending_manager_dispatch(manager_service=manager_service)
+ if pending_dispatch is not None:
+ dedup_key = self._build_dispatch_dedup_key(pending_dispatch)
+ if self._is_dedup_blocked(dedup_key=dedup_key, blocker_reason=None, now=resolved_now):
+ return HousekeeperTickRead(
+ executed=False,
+ skipped_reason="dedup_blocked",
+ target_kind="manager_dispatch",
+ target_id=pending_dispatch.dispatch_id,
+ blocker_reason=ExplorationBlockerReason.UNKNOWN,
+ summary="Night explore skipped a deferred manager dispatch because an equivalent attempt ran recently.",
+ state=state,
+ budget=budget,
+ )
+ result = manager_service.execute_dispatch(pending_dispatch.dispatch_id)
+ blocker = self._blocker_from_dispatch(result)
+ budget = self._consume_budget(budget=budget, dispatch=result)
+ self._record_exploration_attempt(
+ dedup_key=dedup_key,
+ target_kind="manager_dispatch",
+ target_id=result.dispatch_id,
+ blocker_reason=blocker,
+ final_status=result.status.value,
+ metadata={"summary": result.summary},
+ now=resolved_now,
+ )
+ state = self._update_circuit_breaker_state(notifier=notifier, media_jobs=media_jobs, now=resolved_now)
+ return HousekeeperTickRead(
+ executed=True,
+ target_kind="manager_dispatch",
+ target_id=result.dispatch_id,
+ blocker_reason=blocker,
+ summary=result.summary,
+ state=state,
+ budget=budget,
+ )
+
+ profile = self._profile_resolver.resolve(state.effective_mode)
+ planner_request = AutoResearchPlannerRequest(
+ goal="Scan the repo for the next safe patch-only improvement.",
+ pipeline_target=profile.pipeline_target,
+ max_iterations=profile.max_iterations,
+ include_upstream_watch=True,
+ metadata={"trigger_source": "night_explore_tick"},
+ )
+ planner_request, _, _ = self.prepare_planner_request(
+ planner_request,
+ trigger_source="night_explore_tick",
+ now=resolved_now,
+ )
+ plan = planner_service.create(planner_request)
+ if plan.selected_candidate is None:
+ return HousekeeperTickRead(
+ executed=False,
+ skipped_reason="no_candidate",
+ summary="Night explore did not find a new planner candidate.",
+ state=state,
+ budget=budget,
+ )
+
+ dedup_key = self._build_plan_dedup_key(plan)
+ if self._is_dedup_blocked(dedup_key=dedup_key, blocker_reason=None, now=resolved_now):
+ return HousekeeperTickRead(
+ executed=False,
+ skipped_reason="dedup_blocked",
+ target_kind="planner_dispatch",
+ target_id=plan.plan_id,
+ blocker_reason=ExplorationBlockerReason.UNKNOWN,
+ summary="Night explore skipped a planner candidate because an equivalent attempt ran recently.",
+ state=state,
+ budget=budget,
+ )
+
+ queued = planner_service.request_dispatch(plan.plan_id, requested_by="housekeeper")
+ result = planner_service.execute_dispatch(queued.plan_id)
+ blocker = self._blocker_from_plan(result)
+ budget = self._consume_budget(budget=budget, plan=result)
+ self._record_exploration_attempt(
+ dedup_key=dedup_key,
+ target_kind="planner_dispatch",
+ target_id=result.plan_id,
+ blocker_reason=blocker,
+ final_status=result.dispatch_status.value,
+ metadata={"summary": result.summary},
+ now=resolved_now,
+ )
+ state = self._update_circuit_breaker_state(notifier=notifier, media_jobs=media_jobs, now=resolved_now)
+ return HousekeeperTickRead(
+ executed=True,
+ target_kind="planner_dispatch",
+ target_id=result.plan_id,
+ blocker_reason=blocker,
+ summary=result.summary,
+ state=state,
+ budget=budget,
+ )
+
+ def record_media_job_outcome(
+ self,
+ *,
+ job: MediaJobRead,
+ notifier: TelegramNotifierService,
+ media_jobs: list[MediaJobRead] | None = None,
+ now: datetime | None = None,
+ ) -> HousekeeperStateRead:
+ resolved_now = self._normalize_now(now)
+ blocker = None if job.status is MediaJobStatus.COMPLETED else ExplorationBlockerReason.UNKNOWN
+ dedup = ExplorationDedupKeyRead(
+ repo_id="media",
+ target_scope_hash=self._hash_text(job.target_bucket.value),
+ intent_id=job.mode.value,
+ normalized_goal_hash=self._hash_text(job.url),
+ )
+ self._record_exploration_attempt(
+ dedup_key=dedup,
+ target_kind="media_job",
+ target_id=job.job_id,
+ blocker_reason=blocker,
+ final_status=job.status.value,
+ metadata={"url": job.url},
+ now=resolved_now,
+ )
+ return self._update_circuit_breaker_state(notifier=notifier, media_jobs=media_jobs, now=resolved_now)
+
+ def _refresh_state(self, *, current: HousekeeperStateRead, now: datetime) -> HousekeeperStateRead:
+ scheduled_mode = self._scheduled_mode_for(now)
+ manual_mode = current.manual_override_mode
+ effective_until = current.effective_until
+ if manual_mode is not None and effective_until is not None and effective_until <= now:
+ manual_mode = None
+ effective_until = None
+
+ effective_mode = scheduled_mode
+ if current.circuit_breaker_state.status is CircuitBreakerStatus.OPEN:
+ effective_mode = HousekeeperMode.DAY_SAFE
+ elif manual_mode is not None and effective_until is not None and effective_until > now:
+ effective_mode = manual_mode
+
+ return current.model_copy(
+ update={
+ "scheduled_mode": scheduled_mode,
+ "manual_override_mode": manual_mode,
+ "effective_until": effective_until,
+ "effective_mode": effective_mode,
+ }
+ )
+
+ def _admission_allows(self, *, profile: ExecutionProfileRead, assessment: TaskAdmissionAssessmentRead) -> bool:
+ if not profile.auto_dispatch_allowed:
+ return False
+ if profile.profile_name is not HousekeeperMode.DAY_SAFE:
+ return True
+ return (
+ assessment.estimated_runtime_minutes <= 15
+ and assessment.fanout_count <= 1
+ and assessment.risk_level in {AdmissionRiskLevel.LOW, AdmissionRiskLevel.MEDIUM}
+ and not profile.allow_draft_pr
+ )
+
+ def _pick_pending_manager_dispatch(self, *, manager_service: ManagerAgentService) -> ManagerDispatchRead | None:
+ candidates: list[ManagerDispatchRead] = []
+ for dispatch in manager_service.list_dispatches():
+ deferred = str(dispatch.metadata.get("deferred_reason") or "").strip()
+ if dispatch.status in {JobStatus.CREATED, JobStatus.QUEUED} and deferred in {"", DeferredReason.DEFERRED_TO_NIGHT.value}:
+ candidates.append(dispatch)
+ candidates.sort(key=lambda item: item.updated_at)
+ return candidates[0] if candidates else None
+
+ def _build_dispatch_dedup_key(self, dispatch: ManagerDispatchRead) -> ExplorationDedupKeyRead:
+ scope = "|".join(dispatch.selected_intent.allowed_paths if dispatch.selected_intent is not None else [])
+ return ExplorationDedupKeyRead(
+ repo_id="repo",
+ target_scope_hash=self._hash_text(scope),
+ intent_id=dispatch.selected_intent.intent_id if dispatch.selected_intent is not None else "unknown",
+ normalized_goal_hash=self._hash_text(dispatch.normalized_goal),
+ )
+
+ def _build_plan_dedup_key(self, plan) -> ExplorationDedupKeyRead:
+ candidate = plan.selected_candidate
+ scope = "|".join(candidate.allowed_paths if candidate is not None else [])
+ return ExplorationDedupKeyRead(
+ repo_id="repo",
+ target_scope_hash=self._hash_text(scope),
+ intent_id=(candidate.category if candidate is not None else "planner"),
+ normalized_goal_hash=self._hash_text(plan.goal),
+ )
+
+ def _is_dedup_blocked(
+ self,
+ *,
+ dedup_key: ExplorationDedupKeyRead,
+ blocker_reason: ExplorationBlockerReason | None,
+ now: datetime,
+ ) -> bool:
+ cutoff = now - timedelta(hours=24)
+ for record in self._exploration_repository.list():
+ if record.created_at < cutoff:
+ continue
+ if record.dedup_key != dedup_key:
+ continue
+ if record.blocker_reason == blocker_reason:
+ return True
+ return False
+
+ def _record_exploration_attempt(
+ self,
+ *,
+ dedup_key: ExplorationDedupKeyRead,
+ target_kind: str,
+ target_id: str,
+ blocker_reason: ExplorationBlockerReason | None,
+ final_status: str | None,
+ metadata: dict[str, Any],
+ now: datetime,
+ ) -> None:
+ record = ExplorationRecordRead(
+ record_id=create_resource_id("explore"),
+ dedup_key=dedup_key,
+ target_kind=target_kind,
+ target_id=target_id,
+ blocker_reason=blocker_reason,
+ final_status=final_status,
+ created_at=now,
+ updated_at=now,
+ metadata=metadata,
+ )
+ self._exploration_repository.save(record.record_id, record)
+
+ def _update_circuit_breaker_state(
+ self,
+ *,
+ notifier: TelegramNotifierService,
+ media_jobs: list[MediaJobRead] | None,
+ now: datetime,
+ ) -> HousekeeperStateRead:
+ state = self.get_state(now=now)
+ recent_records = [
+ record
+ for record in self._exploration_repository.list()
+ if record.updated_at >= now - timedelta(hours=2)
+ ]
+ recent_records.sort(key=lambda item: item.updated_at, reverse=True)
+ failures = 0
+ consecutive_failures = 0
+ for index, record in enumerate(recent_records):
+ failed = record.blocker_reason is not None or str(record.final_status or "").lower() in {"failed", "human_review"}
+ if failed:
+ failures += 1
+ if index == consecutive_failures and failed:
+ consecutive_failures += 1
+ elif index == consecutive_failures:
+ break
+ media_failures = 0
+ for job in sorted(media_jobs or [], key=lambda item: item.updated_at, reverse=True):
+ if job.updated_at < now - timedelta(hours=2):
+ continue
+ if job.status is MediaJobStatus.FAILED:
+ media_failures += 1
+ else:
+ break
+
+ total = len(recent_records)
+ failure_rate = failures / total if total else 0.0
+ should_open = consecutive_failures >= 3 or (total >= 3 and failure_rate >= 0.7) or media_failures >= 3
+ if not should_open:
+ return state
+ if state.circuit_breaker_state.status is CircuitBreakerStatus.OPEN:
+ return state
+
+ breaker = CircuitBreakerStateRead(
+ status=CircuitBreakerStatus.OPEN,
+ triggered_at=now,
+ reason="automatic failure threshold exceeded",
+ consecutive_failures=consecutive_failures,
+ recent_failure_rate=failure_rate,
+ metadata={"media_consecutive_failures": media_failures},
+ )
+ updated = state.model_copy(
+ update={
+ "circuit_breaker_state": breaker,
+ "effective_mode": HousekeeperMode.DAY_SAFE,
+ "reason": HousekeeperChangeReason.CIRCUIT_BREAKER,
+ "changed_by": "system",
+ "last_changed_at": now,
+ }
+ )
+ self._state_repository.save(updated.state_id, updated)
+ if self._summary_chat_id and notifier.enabled:
+ notifier.send_message(
+ chat_id=self._summary_chat_id,
+ text=(
+ "[housekeeper] circuit breaker opened\n"
+ f"- consecutive_failures: {consecutive_failures}\n"
+ f"- failure_rate_2h: {failure_rate:.2f}\n"
+ f"- media_consecutive_failures: {media_failures}"
+ ),
+ )
+ return updated
+
+ def _get_or_create_budget(self, now: datetime) -> NightBudgetStateRead:
+ current = self._budget_repository.get("night_budget")
+ window = self._current_night_window(now)
+ if current is not None and current.window_start == window.start and current.window_end == window.end:
+ return current
+ budget = NightBudgetStateRead(
+ window_start=window.start,
+ window_end=window.end,
+ updated_at=now,
+ )
+ return self._budget_repository.save(budget.budget_id, budget)
+
+ def _consume_budget(self, *, budget: NightBudgetStateRead, dispatch=None, plan=None) -> NightBudgetStateRead:
+ duration_ms = 0
+ draft_prs = budget.draft_prs_used
+ if dispatch is not None and dispatch.run_summary is not None:
+ duration_ms = dispatch.run_summary.driver_result.metrics.duration_ms or 0
+ if dispatch.run_summary.promotion is not None and dispatch.run_summary.promotion.pr_url:
+ draft_prs += 1
+ if plan is not None and plan.run_summary is not None:
+ duration_ms = plan.run_summary.driver_result.metrics.duration_ms or 0
+ if plan.run_summary.promotion is not None and plan.run_summary.promotion.pr_url:
+ draft_prs += 1
+ updated = budget.model_copy(
+ update={
+ "dispatches_used": budget.dispatches_used + 1,
+ "draft_prs_used": draft_prs,
+ "worker_minutes_used": budget.worker_minutes_used + math.ceil(duration_ms / 60000) if duration_ms else budget.worker_minutes_used,
+ "updated_at": utc_now(),
+ }
+ )
+ return self._budget_repository.save(updated.budget_id, updated)
+
+ def _budget_exhausted(self, budget: NightBudgetStateRead) -> bool:
+ return (
+ budget.dispatches_used >= budget.max_dispatches_per_night
+ or budget.draft_prs_used >= budget.max_draft_pr_per_night
+ or budget.worker_minutes_used >= budget.max_worker_minutes_per_night
+ )
+
+ def _record_budget_blocker(self, *, now: datetime) -> None:
+ dedup_key = ExplorationDedupKeyRead(
+ repo_id="repo",
+ target_scope_hash=self._hash_text("night_budget"),
+ intent_id="night_budget",
+ normalized_goal_hash=self._hash_text("night_budget"),
+ )
+ self._record_exploration_attempt(
+ dedup_key=dedup_key,
+ target_kind="planner_dispatch",
+ target_id="night_budget",
+ blocker_reason=ExplorationBlockerReason.BUDGET_EXHAUSTED,
+ final_status="skipped",
+ metadata={},
+ now=now,
+ )
+
+ def _blocker_from_dispatch(self, dispatch: ManagerDispatchRead) -> ExplorationBlockerReason | None:
+ if dispatch.run_summary is None:
+ return ExplorationBlockerReason.UNKNOWN
+ return self._blocker_from_run_summary(
+ final_status=dispatch.run_summary.final_status,
+ error=dispatch.run_summary.driver_result.error,
+ )
+
+ def _blocker_from_plan(self, plan) -> ExplorationBlockerReason | None:
+ if plan.run_summary is None:
+ if plan.dispatch_status.value == "failed":
+ return ExplorationBlockerReason.UNKNOWN
+ return None
+ return self._blocker_from_run_summary(
+ final_status=plan.run_summary.final_status,
+ error=plan.run_summary.driver_result.error,
+ )
+
+ def _blocker_from_run_summary(self, *, final_status: str, error: str | None) -> ExplorationBlockerReason | None:
+ message = str(error or "").lower()
+ if not message and final_status in {"ready_for_promotion", "promoted"}:
+ return None
+ if "permission" in message:
+ return ExplorationBlockerReason.PERMISSION_DENIED
+ if "environmentcheckfailed" in message or "missing" in message:
+ return ExplorationBlockerReason.ENV_MISSING
+ if "dirty" in message:
+ return ExplorationBlockerReason.DIRTY_REPO
+ if final_status == "human_review":
+ return ExplorationBlockerReason.APPROVAL_PENDING
+ if "stalled" in message or "stalled_no_progress" in message:
+ return ExplorationBlockerReason.STALLED_NO_PROGRESS
+ if "validation" in message:
+ return ExplorationBlockerReason.VALIDATION_FAILED
+ return ExplorationBlockerReason.UNKNOWN
+
+ def _scheduled_mode_for(self, now: datetime) -> HousekeeperMode:
+ local_now = now.astimezone(self._timezone)
+ local_time = local_now.timetz().replace(tzinfo=None)
+ if time(9, 0) <= local_time < time(23, 0):
+ return HousekeeperMode.DAY_SAFE
+ return HousekeeperMode.NIGHT_READONLY_EXPLORE
+
+ def _next_boundary(self, now: datetime) -> datetime:
+ local_now = now.astimezone(self._timezone)
+ day_boundary = datetime.combine(local_now.date(), time(9, 0), tzinfo=self._timezone)
+ night_boundary = datetime.combine(local_now.date(), time(23, 0), tzinfo=self._timezone)
+ if local_now < day_boundary:
+ return day_boundary.astimezone(now.tzinfo)
+ if local_now < night_boundary:
+ return night_boundary.astimezone(now.tzinfo)
+ next_day = local_now.date() + timedelta(days=1)
+ return datetime.combine(next_day, time(9, 0), tzinfo=self._timezone).astimezone(now.tzinfo)
+
+ def _current_night_window(self, now: datetime) -> _NightWindow:
+ local_now = now.astimezone(self._timezone)
+ today_23 = datetime.combine(local_now.date(), time(23, 0), tzinfo=self._timezone)
+ today_9 = datetime.combine(local_now.date(), time(9, 0), tzinfo=self._timezone)
+ if local_now < today_9:
+ start = today_23 - timedelta(days=1)
+ end = today_9
+ elif local_now >= today_23:
+ start = today_23
+ end = today_9 + timedelta(days=1)
+ else:
+ start = today_23
+ end = today_9 + timedelta(days=1)
+ return _NightWindow(start=start.astimezone(now.tzinfo), end=end.astimezone(now.tzinfo))
+
+ def _previous_night_window(self, now: datetime) -> _NightWindow:
+ local_now = now.astimezone(self._timezone)
+ today_9 = datetime.combine(local_now.date(), time(9, 0), tzinfo=self._timezone)
+ end = today_9 if local_now >= today_9 else today_9 - timedelta(days=1)
+ start = end - timedelta(hours=10)
+ return _NightWindow(start=start.astimezone(now.tzinfo), end=end.astimezone(now.tzinfo))
+
+ @staticmethod
+ def _within_window(value: datetime, window: _NightWindow) -> bool:
+ return window.start <= value <= window.end
+
+ @staticmethod
+ def _hash_text(value: str) -> str:
+ return hashlib.sha256(value.encode("utf-8")).hexdigest()
+
+ @staticmethod
+ def _normalize_now(now: datetime | None) -> datetime:
+ return now or utc_now()
diff --git a/src/autoresearch/core/services/media_jobs.py b/src/autoresearch/core/services/media_jobs.py
new file mode 100644
index 00000000..553c7446
--- /dev/null
+++ b/src/autoresearch/core/services/media_jobs.py
@@ -0,0 +1,243 @@
+from __future__ import annotations
+
+from datetime import datetime
+import json
+from pathlib import Path
+import re
+import subprocess
+from typing import Callable
+from urllib.parse import urlparse
+
+from autoresearch.shared.media_job_contract import (
+ MediaJobEventRead,
+ MediaJobMode,
+ MediaJobPostprocess,
+ MediaJobRead,
+ MediaJobRequest,
+ MediaJobStatus,
+ MediaTargetBucket,
+)
+from autoresearch.shared.models import utc_now
+from autoresearch.shared.store import Repository, create_resource_id
+
+_TOKEN_TO_YTDLP = {
+ "{title}": "%(title)s",
+ "{id}": "%(id)s",
+ "{uploader}": "%(uploader)s",
+ "{upload_date}": "%(upload_date)s",
+}
+_URL_RE = re.compile(r"^https?://\S+$", re.IGNORECASE)
+
+
+class MediaJobService:
+ def __init__(
+ self,
+ *,
+ repository: Repository[MediaJobRead],
+ event_repository: Repository[MediaJobEventRead],
+ media_root: Path,
+ allowed_domains: set[str],
+ yt_dlp_bin: str = "yt-dlp",
+ ffmpeg_bin: str = "ffmpeg",
+ command_runner: Callable[[list[str]], subprocess.CompletedProcess[str]] | None = None,
+ ) -> None:
+ self._repository = repository
+ self._event_repository = event_repository
+ self._media_root = media_root
+ self._allowed_domains = {item.lower().strip() for item in allowed_domains if item.strip()}
+ self._yt_dlp_bin = yt_dlp_bin
+ self._ffmpeg_bin = ffmpeg_bin
+ self._command_runner = command_runner or self._run_command
+
+ def create(self, request: MediaJobRequest) -> MediaJobRead:
+ now = utc_now()
+ job = MediaJobRead(
+ job_id=create_resource_id("mediajob"),
+ url=request.url,
+ mode=request.mode,
+ target_bucket=request.target_bucket,
+ filename_template=request.filename_template,
+ postprocess=request.postprocess,
+ status=MediaJobStatus.QUEUED,
+ created_at=now,
+ updated_at=now,
+ metadata=request.metadata,
+ )
+ self._ensure_directories()
+ self._record_event(job_id=job.job_id, stage="created", status=job.status.value, detail=request.url)
+ return self._repository.save(job.job_id, job)
+
+ def get(self, job_id: str) -> MediaJobRead | None:
+ return self._repository.get(job_id)
+
+ def list(self) -> list[MediaJobRead]:
+ return self._repository.list()
+
+ def execute(self, job_id: str) -> MediaJobRead:
+ job = self._require_job(job_id)
+ running = job.model_copy(update={"status": MediaJobStatus.RUNNING, "updated_at": utc_now(), "error": None})
+ self._repository.save(running.job_id, running)
+ self._record_event(job_id=job.job_id, stage="running", status="running", detail=job.mode.value)
+
+ try:
+ metadata = self._probe_metadata(running.url)
+ output_files = self._execute_job(running=running, metadata=metadata)
+ completed = running.model_copy(
+ update={
+ "status": MediaJobStatus.COMPLETED,
+ "updated_at": utc_now(),
+ "output_files": output_files,
+ "title": metadata.get("title"),
+ "duration_seconds": self._coerce_int(metadata.get("duration")),
+ "uploader": metadata.get("uploader"),
+ "subtitle_path": self._find_suffix(output_files, {".srt", ".vtt"}),
+ "metadata_path": self._metadata_path_for_job(running).as_posix(),
+ "error": None,
+ }
+ )
+ self._write_metadata_file(completed=completed, metadata=metadata)
+ self._record_event(job_id=job.job_id, stage="completed", status="completed", detail="ok")
+ return self._repository.save(completed.job_id, completed)
+ except Exception as exc:
+ failed = running.model_copy(
+ update={"status": MediaJobStatus.FAILED, "updated_at": utc_now(), "error": str(exc)}
+ )
+ self._record_event(job_id=job.job_id, stage="failed", status="failed", detail=str(exc))
+ return self._repository.save(failed.job_id, failed)
+
+ def parse_telegram_task(self, text: str) -> MediaJobRequest | None:
+ normalized = text.strip()
+ if not normalized:
+ return None
+ parts = normalized.split(maxsplit=1)
+ explicit_mode = None
+ url = normalized
+ if len(parts) == 2 and parts[0].lower() in {"video", "audio", "subtitle", "metadata"}:
+ explicit_mode = MediaJobMode(parts[0].lower())
+ url = parts[1].strip()
+ if not _URL_RE.match(url):
+ return None
+ if not self.is_supported_url(url):
+ return None
+ mode = explicit_mode or MediaJobMode.VIDEO
+ bucket = {
+ MediaJobMode.AUDIO: MediaTargetBucket.AUDIO,
+ MediaJobMode.VIDEO: MediaTargetBucket.VIDEO,
+ MediaJobMode.SUBTITLE: MediaTargetBucket.SUBTITLES,
+ MediaJobMode.METADATA: MediaTargetBucket.META,
+ }[mode]
+ postprocess = {
+ MediaJobMode.AUDIO: MediaJobPostprocess.MP3,
+ MediaJobMode.VIDEO: MediaJobPostprocess.MP4,
+ MediaJobMode.SUBTITLE: MediaJobPostprocess.NONE,
+ MediaJobMode.METADATA: MediaJobPostprocess.NONE,
+ }[mode]
+ return MediaJobRequest(
+ url=url,
+ mode=mode,
+ target_bucket=bucket,
+ filename_template="{title}-{id}",
+ postprocess=postprocess,
+ )
+
+ def is_supported_url(self, url: str) -> bool:
+ host = (urlparse(url).hostname or "").lower()
+ return any(host == domain or host.endswith(f".{domain}") for domain in self._allowed_domains)
+
+ def _execute_job(self, *, running: MediaJobRead, metadata: dict[str, object]) -> list[str]:
+ output_dir = self._job_bucket_dir(running)
+ output_template = output_dir / f"{self._translate_template(running.filename_template)}.%(ext)s"
+ commands = self._build_commands(running=running, output_template=output_template)
+ for command in commands:
+ result = self._command_runner(command)
+ if result.returncode != 0:
+ raise RuntimeError(result.stderr.strip() or result.stdout.strip() or "media command failed")
+
+ output_files = sorted(path.as_posix() for path in output_dir.rglob("*") if path.is_file())
+ if running.mode is MediaJobMode.METADATA:
+ output_files = []
+ return output_files
+
+ def _build_commands(self, *, running: MediaJobRead, output_template: Path) -> list[list[str]]:
+ url = running.url
+ template = output_template.as_posix()
+ if running.mode is MediaJobMode.AUDIO:
+ return [[self._yt_dlp_bin, "-x", "--audio-format", "mp3", "-o", template, url]]
+ if running.mode is MediaJobMode.VIDEO:
+ return [[self._yt_dlp_bin, "-f", "mp4/best", "-o", template, url]]
+ if running.mode is MediaJobMode.SUBTITLE:
+ return [[self._yt_dlp_bin, "--skip-download", "--write-auto-sub", "--write-sub", "--sub-langs", "all", "-o", template, url]]
+ if running.mode is MediaJobMode.METADATA:
+ return []
+ raise ValueError(f"unsupported media mode: {running.mode}")
+
+ def _probe_metadata(self, url: str) -> dict[str, object]:
+ result = self._command_runner([self._yt_dlp_bin, "--dump-single-json", "--skip-download", url])
+ if result.returncode != 0:
+ raise RuntimeError(result.stderr.strip() or "failed to probe media metadata")
+ try:
+ payload = json.loads(result.stdout or "{}")
+ except json.JSONDecodeError as exc:
+ raise RuntimeError("invalid media metadata payload") from exc
+ if not isinstance(payload, dict):
+ raise RuntimeError("invalid media metadata payload")
+ return payload
+
+ def _write_metadata_file(self, *, completed: MediaJobRead, metadata: dict[str, object]) -> None:
+ metadata_path = self._metadata_path_for_job(completed)
+ metadata_path.parent.mkdir(parents=True, exist_ok=True)
+ metadata_path.write_text(json.dumps(metadata, ensure_ascii=False, indent=2), encoding="utf-8")
+
+ def _metadata_path_for_job(self, job: MediaJobRead) -> Path:
+ return self._bucket_dir(MediaTargetBucket.META) / f"{job.job_id}.json"
+
+ def _job_bucket_dir(self, job: MediaJobRead) -> Path:
+ return self._bucket_dir(job.target_bucket) / job.job_id
+
+ def _record_event(self, *, job_id: str, stage: str, status: str, detail: str) -> None:
+ event = MediaJobEventRead(
+ event_id=create_resource_id("mediaevt"),
+ job_id=job_id,
+ stage=stage,
+ status=status,
+ detail=detail,
+ created_at=utc_now(),
+ )
+ self._event_repository.save(event.event_id, event)
+
+ def _bucket_dir(self, bucket: MediaTargetBucket) -> Path:
+ return self._media_root / bucket.value
+
+ def _ensure_directories(self) -> None:
+ for bucket in MediaTargetBucket:
+ (self._media_root / bucket.value).mkdir(parents=True, exist_ok=True)
+ (self._media_root / "jobs").mkdir(parents=True, exist_ok=True)
+
+ def _translate_template(self, template: str) -> str:
+ return "-".join(_TOKEN_TO_YTDLP[token] for token in template.split("-"))
+
+ @staticmethod
+ def _find_suffix(paths: list[str], suffixes: set[str]) -> str | None:
+ for path in paths:
+ if Path(path).suffix.lower() in suffixes:
+ return path
+ return None
+
+ @staticmethod
+ def _coerce_int(value: object) -> int | None:
+ try:
+ if value is None:
+ return None
+ return int(value)
+ except (TypeError, ValueError):
+ return None
+
+ def _require_job(self, job_id: str) -> MediaJobRead:
+ job = self.get(job_id)
+ if job is None:
+ raise KeyError(f"media job not found: {job_id}")
+ return job
+
+ @staticmethod
+ def _run_command(command: list[str]) -> subprocess.CompletedProcess[str]:
+ return subprocess.run(command, text=True, capture_output=True, check=False)
diff --git a/src/autoresearch/core/services/openhands_controlled_backend.py b/src/autoresearch/core/services/openhands_controlled_backend.py
index dd23f7f5..16e3c845 100644
--- a/src/autoresearch/core/services/openhands_controlled_backend.py
+++ b/src/autoresearch/core/services/openhands_controlled_backend.py
@@ -1,12 +1,15 @@
from __future__ import annotations
+import ast
from dataclasses import dataclass
import fnmatch
import json
import os
+import re
import shlex
import shutil
import subprocess
+import sys
from pathlib import Path
from autoresearch.core.services.git_promotion_gate import GitPromotionGateService
@@ -71,6 +74,12 @@ class OpenHandsControlledBackendService:
"SSH_ASKPASS",
}
_GIT_ENV_PREFIXES = ("GITHUB_", "GH_", "GIT_AUTHOR_", "GIT_COMMITTER_", "SSH_")
+ _FAST_FAIL_PATTERNS = (
+ re.compile(r"\bSyntaxError\b"),
+ re.compile(r"\bModuleNotFoundError\b"),
+ re.compile(r"\bImportError\b"),
+ re.compile(r"\bPermission denied\b", re.IGNORECASE),
+ )
def __init__(
self,
@@ -95,6 +104,7 @@ def run(self, request: ControlledExecutionRequest) -> ControlledExecutionRead:
log_file = artifacts_dir / "execution.log"
patch_file = artifacts_dir / "promotion.patch"
summary_file = artifacts_dir / "summary.json"
+ overlay = run_dir / "overlay"
artifacts_dir.mkdir(parents=True, exist_ok=True)
created_at = utc_now()
@@ -154,6 +164,11 @@ def run(self, request: ControlledExecutionRequest) -> ControlledExecutionRead:
current_backend_attempts += 1
total_attempts += 1
self._sync_directory(source=baseline, target=workspace, apply_excludes=False)
+ self._prepare_strict_workspace(
+ workspace=workspace,
+ overlay_root=overlay,
+ allowed_paths=request.allowed_paths,
+ )
self._append_log(
log_file,
f"\n=== attempt {total_attempts} backend={backend.value} iteration={current_backend_attempts}/{backend_limit} ===\n",
@@ -208,6 +223,20 @@ def run(self, request: ControlledExecutionRequest) -> ControlledExecutionRead:
else f"test_command failed with status={validation_status.value}"
)
+ fail_fast_reason = self._detect_fail_fast_reason(
+ execution_outcome=execution_outcome,
+ changed_files=changed_files,
+ workspace=workspace,
+ log_file=log_file,
+ )
+ if fail_fast_reason is not None:
+ status = ControlledRunStatus.FAILED
+ error = fail_fast_reason
+ validation_status = ValidationStatus.FAILED
+ if validation_exit_code is None:
+ validation_exit_code = 2
+ break
+
if exit_code == 0 and validation_status is ValidationStatus.PASSED and changed_files:
status = ControlledRunStatus.READY_FOR_PROMOTION
error = None
@@ -399,6 +428,7 @@ def _finalize_promotion(
def _sync_directory(self, *, source: Path, target: Path, apply_excludes: bool) -> None:
if target.exists():
+ self._make_tree_writable(target)
shutil.rmtree(target)
target.mkdir(parents=True, exist_ok=True)
@@ -429,21 +459,30 @@ def _execute_backend(
log_file: Path,
allowed_paths: list[str],
) -> _BackendExecutionOutcome:
- if backend is ControlledBackend.MOCK:
- return self._run_mock_backend(
+ try:
+ if backend is ControlledBackend.MOCK:
+ return self._run_mock_backend(
+ prompt=prompt,
+ workspace=workspace,
+ log_file=log_file,
+ allowed_paths=allowed_paths,
+ )
+
+ return self._run_openhands_cli(
prompt=prompt,
workspace=workspace,
+ artifacts_dir=artifacts_dir,
log_file=log_file,
allowed_paths=allowed_paths,
)
-
- return self._run_openhands_cli(
- prompt=prompt,
- workspace=workspace,
- artifacts_dir=artifacts_dir,
- log_file=log_file,
- allowed_paths=allowed_paths,
- )
+ except Exception as exc:
+ message = str(exc).strip() or exc.__class__.__name__
+ self._append_log(log_file, f"[backend-exception] {exc.__class__.__name__}: {message}\n")
+ return _BackendExecutionOutcome(
+ exit_code=1,
+ error=message,
+ stderr=f"{exc.__class__.__name__}: {message}\n",
+ )
def _run_mock_backend(
self,
@@ -484,7 +523,7 @@ def _run_openhands_cli(
"Execution contract:\n"
"- Single task execution only. Do not start autonomous loops.\n"
"- Do not commit, push, or edit git config.\n"
- "- Modify files only under /opt/workspace.\n"
+ "- Modify files only inside the provided workspace root.\n"
"- Return changed files and executed commands in final summary.\n"
)
@@ -537,6 +576,8 @@ def _build_openhands_env(self, *, workspace: Path, artifacts_dir: Path) -> dict[
env["OPENHANDS_WORKSPACE"] = str(workspace)
env["OPENHANDS_AUDIT_DIR"] = str(artifacts_dir)
env["OPENHANDS_AUDIT_FILE"] = str(artifacts_dir / "openhands_compliance.json")
+ env["PYTHONDONTWRITEBYTECODE"] = "1"
+ env["PYTHONPYCACHEPREFIX"] = str(artifacts_dir / "pycache")
if env.get("OPENHANDS_DRY_RUN") == "1" and "OPENHANDS_RUNTIME" not in env:
env["OPENHANDS_RUNTIME"] = "host"
return env
@@ -588,6 +629,11 @@ def _run_validation(
completed = subprocess.run(
command,
cwd=workspace,
+ env={
+ **os.environ,
+ "PYTHONDONTWRITEBYTECODE": "1",
+ "PYTHONPYCACHEPREFIX": str(validation_dir / "pycache"),
+ },
capture_output=True,
text=True,
check=False,
@@ -611,6 +657,171 @@ def _run_validation(
return 0, ValidationStatus.PASSED
return completed.returncode, ValidationStatus.FAILED
+ def _prepare_strict_workspace(
+ self,
+ *,
+ workspace: Path,
+ overlay_root: Path,
+ allowed_paths: list[str],
+ ) -> None:
+ if overlay_root.exists():
+ shutil.rmtree(overlay_root)
+ overlay_root.mkdir(parents=True, exist_ok=True)
+
+ file_paths: list[str] = []
+ directory_paths: list[str] = []
+ wildcard_paths: list[str] = []
+
+ for pattern in allowed_paths:
+ normalized = pattern.strip().replace("\\", "/").rstrip("/")
+ if not normalized:
+ continue
+ if any(char in normalized for char in "*?["):
+ wildcard_paths.append(normalized)
+ continue
+ target = workspace / normalized
+ if target.exists() and target.is_dir():
+ directory_paths.append(normalized)
+ continue
+ if "." in Path(normalized).name:
+ file_paths.append(normalized)
+ else:
+ directory_paths.append(normalized)
+
+ for rel_path in file_paths:
+ self._materialize_overlay_file(
+ workspace=workspace,
+ overlay_root=overlay_root,
+ relative_path=rel_path,
+ )
+
+ self._apply_readonly_tree(workspace=workspace)
+
+ for rel_path in directory_paths:
+ self._make_path_tree_writable(workspace / rel_path)
+
+ for pattern in wildcard_paths:
+ self._make_matching_paths_writable(workspace=workspace, pattern=pattern)
+
+ for rel_path in file_paths:
+ self._make_overlay_target_writable(overlay_root=overlay_root, relative_path=rel_path)
+
+ def _materialize_overlay_file(
+ self,
+ *,
+ workspace: Path,
+ overlay_root: Path,
+ relative_path: str,
+ ) -> None:
+ source = workspace / relative_path
+ overlay_target = overlay_root / relative_path
+ overlay_target.parent.mkdir(parents=True, exist_ok=True)
+ if source.exists():
+ if source.is_dir():
+ return
+ shutil.copy2(source, overlay_target)
+ source.unlink()
+ else:
+ overlay_target.touch()
+ source.parent.mkdir(parents=True, exist_ok=True)
+ source.symlink_to(overlay_target)
+
+ def _apply_readonly_tree(self, *, workspace: Path) -> None:
+ for root, dirnames, filenames in os.walk(workspace):
+ root_path = Path(root)
+ os.chmod(root_path, 0o555)
+ for dirname in dirnames:
+ os.chmod(root_path / dirname, 0o555)
+ for filename in filenames:
+ path = root_path / filename
+ if path.is_symlink():
+ continue
+ os.chmod(path, 0o444)
+
+ def _make_path_tree_writable(self, path: Path) -> None:
+ if not path.exists():
+ return
+ if path.is_file():
+ os.chmod(path, 0o644)
+ return
+ for root, dirnames, filenames in os.walk(path):
+ root_path = Path(root)
+ os.chmod(root_path, 0o755)
+ for dirname in dirnames:
+ os.chmod(root_path / dirname, 0o755)
+ for filename in filenames:
+ candidate = root_path / filename
+ if candidate.is_symlink():
+ continue
+ os.chmod(candidate, 0o644)
+
+ def _make_matching_paths_writable(self, *, workspace: Path, pattern: str) -> None:
+ for candidate in workspace.rglob("*"):
+ relative = candidate.relative_to(workspace).as_posix()
+ if fnmatch.fnmatch(relative, pattern):
+ self._make_path_tree_writable(candidate)
+
+ def _make_tree_writable(self, path: Path) -> None:
+ if not path.exists():
+ return
+ for root, dirnames, filenames in os.walk(path):
+ root_path = Path(root)
+ os.chmod(root_path, 0o755)
+ for dirname in dirnames:
+ os.chmod(root_path / dirname, 0o755)
+ for filename in filenames:
+ candidate = root_path / filename
+ if candidate.is_symlink():
+ continue
+ os.chmod(candidate, 0o644)
+
+ def _make_overlay_target_writable(self, *, overlay_root: Path, relative_path: str) -> None:
+ target = overlay_root / relative_path
+ parent = target.parent
+ while True:
+ os.chmod(parent, 0o755)
+ if parent == overlay_root:
+ break
+ parent = parent.parent
+ if target.exists():
+ os.chmod(target, 0o644)
+
+ def _detect_fail_fast_reason(
+ self,
+ *,
+ execution_outcome: _BackendExecutionOutcome,
+ changed_files: list[str],
+ workspace: Path,
+ log_file: Path,
+ ) -> str | None:
+ combined_output = "\n".join(
+ part for part in (execution_outcome.error, execution_outcome.stdout, execution_outcome.stderr) if part
+ )
+ for pattern in self._FAST_FAIL_PATTERNS:
+ if pattern.search(combined_output):
+ reason = f"fail-fast probe tripped on backend output: {pattern.pattern}"
+ self._append_log(log_file, f"[fail-fast] {reason}\n")
+ return reason
+
+ python_changes = [path for path in changed_files if path.endswith(".py")]
+ if not python_changes:
+ return None
+
+ for rel_path in python_changes:
+ target = workspace / rel_path
+ try:
+ source = target.read_text(encoding="utf-8")
+ ast.parse(source, filename=rel_path)
+ except SyntaxError as exc:
+ reason = f"fail-fast probe detected broken python artifacts: SyntaxError in {rel_path}:{exc.lineno}"
+ self._append_log(log_file, f"[fail-fast] {reason}\n")
+ return reason
+ except OSError as exc:
+ reason = f"fail-fast probe could not read changed file {rel_path}: {exc}"
+ self._append_log(log_file, f"[fail-fast] {reason}\n")
+ return reason
+ return None
+
def _detect_scope_violation(
self,
*,
diff --git a/src/autoresearch/core/services/openhands_worker.py b/src/autoresearch/core/services/openhands_worker.py
index ca21a9c5..005f17a0 100644
--- a/src/autoresearch/core/services/openhands_worker.py
+++ b/src/autoresearch/core/services/openhands_worker.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import shlex
+import sys
from autoresearch.agent_protocol.models import ExecutionPolicy, FallbackStep, JobSpec, ValidatorSpec
from autoresearch.shared.models import GitPromotionMode
@@ -15,9 +16,25 @@
class OpenHandsWorkerService:
"""Translate a patch-only OpenHands worker contract into existing AEP/backends."""
+ DEFAULT_TIMEOUT_SEC = 420
+
+ def _execution_test_command_parts(self, raw_command: str) -> list[str]:
+ parts = shlex.split(raw_command)
+ if not parts:
+ return []
+ if parts[0] == "pytest":
+ return [sys.executable, "-m", "pytest", *parts[1:]]
+ if parts[0] in {"python", "python3"}:
+ return [sys.executable, *parts[1:]]
+ return parts
+
+ def _execution_test_command(self, raw_command: str) -> str:
+ return shlex.join(self._execution_test_command_parts(raw_command))
+
def build_prompt(self, spec: OpenHandsWorkerJobSpec) -> str:
allowed_paths = "\n".join(f"- {item}" for item in spec.allowed_paths)
forbidden_paths = "\n".join(f"- {item}" for item in spec.forbidden_paths)
+ test_command = self._execution_test_command(spec.test_command)
return (
"You are OpenHands operating as a constrained patch-only worker.\n\n"
"Problem statement:\n"
@@ -25,6 +42,8 @@ def build_prompt(self, spec: OpenHandsWorkerJobSpec) -> str:
"Hard rules:\n"
"- Only modify files that match allowed_paths.\n"
"- Never modify forbidden_paths.\n"
+ "- The workspace is physically permission-scoped; out-of-scope writes will fail at the filesystem layer.\n"
+ "- If an allowed business surface directory does not exist yet, you may create it inside allowed_paths.\n"
"- Do not run git add, git commit, git push, git merge, git rebase, git reset, or git checkout.\n"
"- Do not create product-facing entrypoints or change approval/promotion policy.\n"
"- Produce the smallest patch that can satisfy the validation command.\n"
@@ -34,10 +53,11 @@ def build_prompt(self, spec: OpenHandsWorkerJobSpec) -> str:
"forbidden_paths:\n"
f"{forbidden_paths}\n\n"
"test_command:\n"
- f"- {spec.test_command}\n"
+ f"- {test_command}\n"
)
def build_agent_job_spec(self, spec: OpenHandsWorkerJobSpec) -> JobSpec:
+ test_command = self._execution_test_command(spec.test_command)
fallback: list[FallbackStep] = []
retry_attempts = max(spec.max_iterations - 1, 0)
if retry_attempts > 0:
@@ -53,6 +73,7 @@ def build_agent_job_spec(self, spec: OpenHandsWorkerJobSpec) -> JobSpec:
mode="patch_only",
task=self.build_prompt(spec),
policy=ExecutionPolicy(
+ timeout_sec=self.DEFAULT_TIMEOUT_SEC,
allowed_paths=list(spec.allowed_paths),
forbidden_paths=list(spec.forbidden_paths),
cleanup_on_success=True,
@@ -62,7 +83,7 @@ def build_agent_job_spec(self, spec: OpenHandsWorkerJobSpec) -> JobSpec:
ValidatorSpec(
id="worker.test_command",
kind="command",
- command=spec.test_command,
+ command=test_command,
)
],
fallback=fallback,
@@ -77,6 +98,7 @@ def build_agent_job_spec(self, spec: OpenHandsWorkerJobSpec) -> JobSpec:
)
def build_controlled_request(self, spec: OpenHandsWorkerJobSpec) -> ControlledExecutionRequest:
+ test_command = self._execution_test_command_parts(spec.test_command)
fallback_backend = ControlledBackend.MOCK if spec.use_mock_fallback else None
failure_strategy = FailureStrategy.FALLBACK if fallback_backend is not None else FailureStrategy.HUMAN_IN_LOOP
return ControlledExecutionRequest(
@@ -84,7 +106,7 @@ def build_controlled_request(self, spec: OpenHandsWorkerJobSpec) -> ControlledEx
prompt=self.build_prompt(spec),
allowed_paths=list(spec.allowed_paths),
forbidden_paths=list(spec.forbidden_paths),
- test_command=shlex.split(spec.test_command),
+ test_command=test_command,
backend=ControlledBackend.OPENHANDS_CLI,
fallback_backend=fallback_backend,
worker_output_mode=spec.worker_output_mode,
diff --git a/src/autoresearch/core/services/panel_access.py b/src/autoresearch/core/services/panel_access.py
index 5283036a..53868e49 100644
--- a/src/autoresearch/core/services/panel_access.py
+++ b/src/autoresearch/core/services/panel_access.py
@@ -74,6 +74,7 @@ def __init__(
*,
secret: str | None,
base_url: str = "http://127.0.0.1:8000/api/v1/panel/view",
+ mini_app_url: str | None = None,
issuer: str = "autoresearch.telegram",
audience: str = "autoresearch.panel",
default_ttl_seconds: int = 300,
@@ -84,6 +85,7 @@ def __init__(
) -> None:
self._secret = (secret or "").strip()
self._base_url = base_url.strip() or "http://127.0.0.1:8000/api/v1/panel/view"
+ self._mini_app_url = (mini_app_url or "").strip() or None
self._issuer = issuer
self._audience = audience
self._default_ttl_seconds = max(30, default_ttl_seconds)
@@ -100,10 +102,43 @@ def enabled(self) -> bool:
def base_url(self) -> str:
return self._base_url
+ @property
+ def mini_app_url(self) -> str | None:
+ return self._mini_app_url
+
@property
def allowed_uids(self) -> tuple[str, ...]:
return tuple(sorted(self._allowed_uids))
+ def build_action_url(
+ self,
+ *,
+ query_params: dict[str, str],
+ telegram_uid: str | None = None,
+ prefer_mini_app: bool = True,
+ ) -> str:
+ base_candidate = self._base_url
+ if prefer_mini_app and self._mini_app_url:
+ base_candidate = self._mini_app_url
+
+ parsed = urlparse(base_candidate)
+ query_items = dict(parse_qsl(parsed.query, keep_blank_values=True))
+ for key, value in query_params.items():
+ if value is None:
+ continue
+ query_items[key] = value
+
+ normalized_uid = (telegram_uid or "").strip()
+ if normalized_uid and self.enabled:
+ magic_link = self.create_magic_link(normalized_uid)
+ magic_parsed = urlparse(magic_link.url)
+ magic_query = dict(parse_qsl(magic_parsed.query, keep_blank_values=True))
+ token = (magic_query.get("token") or "").strip()
+ if token:
+ query_items["token"] = token
+
+ return urlunparse(parsed._replace(query=urlencode(query_items)))
+
def create_magic_link(self, telegram_uid: str, ttl_seconds: int | None = None) -> PanelMagicLinkRead:
if not self.enabled:
raise RuntimeError("panel magic-link signing secret is not configured")
diff --git a/src/autoresearch/core/services/upstream_watcher.py b/src/autoresearch/core/services/upstream_watcher.py
new file mode 100644
index 00000000..71d56dd1
--- /dev/null
+++ b/src/autoresearch/core/services/upstream_watcher.py
@@ -0,0 +1,230 @@
+from __future__ import annotations
+
+from datetime import datetime
+from pathlib import Path
+import shutil
+import subprocess
+import tempfile
+
+from autoresearch.shared.autoresearch_planner_contract import (
+ UpstreamWatchCommitRead,
+ UpstreamWatchDecision,
+ UpstreamWatchRead,
+)
+
+
+_DEFAULT_UPSTREAM_URL = "https://github.com/openclaw/openclaw.git"
+_DEFAULT_WORKSPACE_ROOT = Path("/Volumes/AI_LAB/ai_lab/workspace")
+_NON_CORE_PATH_PREFIXES = (
+ "extensions/",
+ "test/helpers/extensions/",
+ "docs/",
+)
+_NON_CORE_PATHS = {
+ "CHANGELOG.md",
+ ".gitignore",
+ "README.md",
+ "CONTRIBUTING.md",
+ "SECURITY.md",
+}
+
+
+class UpstreamWatcherService:
+ """Inspect upstream changes in an isolated temp clone and decide whether to skip them."""
+
+ def __init__(
+ self,
+ *,
+ upstream_url: str = _DEFAULT_UPSTREAM_URL,
+ workspace_root: Path | None = None,
+ max_commits: int = 5,
+ ) -> None:
+ self._upstream_url = upstream_url.strip() or _DEFAULT_UPSTREAM_URL
+ self._workspace_root = (workspace_root or _DEFAULT_WORKSPACE_ROOT).expanduser().resolve()
+ self._max_commits = max(1, min(max_commits, 20))
+
+ def inspect(self) -> UpstreamWatchRead:
+ self._workspace_root.mkdir(parents=True, exist_ok=True)
+ sync_dir = Path(tempfile.mkdtemp(prefix="openclaw-upstream.", dir=str(self._workspace_root)))
+ default_branch = "main"
+ try:
+ default_branch = self._detect_default_branch()
+ self._git(
+ "clone",
+ "--depth=1",
+ "--single-branch",
+ "--no-tags",
+ "--branch",
+ default_branch,
+ self._upstream_url,
+ str(sync_dir),
+ )
+ self._git(
+ "fetch",
+ f"--depth={self._max_commits + 1}",
+ "origin",
+ default_branch,
+ cwd=sync_dir,
+ )
+ recent_commits = self._load_recent_commits(sync_dir=sync_dir, default_branch=default_branch)
+ changed_paths = self._collect_changed_paths(recent_commits)
+ focus_areas = self._derive_focus_areas(changed_paths)
+ relevant_paths = [path for path in changed_paths if not self._is_non_core_path(path)]
+ latest_commit = recent_commits[0] if recent_commits else None
+ decision = UpstreamWatchDecision.SKIP if not relevant_paths else UpstreamWatchDecision.REVIEW
+ result = UpstreamWatchRead(
+ upstream_url=self._upstream_url,
+ default_branch=default_branch,
+ latest_commit_sha=latest_commit.sha if latest_commit else None,
+ latest_commit_title=latest_commit.title if latest_commit else None,
+ latest_commit_at=latest_commit.committed_at if latest_commit else None,
+ recent_commits=recent_commits,
+ changed_paths=changed_paths,
+ relevant_paths=relevant_paths,
+ focus_areas=focus_areas,
+ decision=decision,
+ summary=self._build_summary(decision=decision, focus_areas=focus_areas, relevant_paths=relevant_paths),
+ )
+ except Exception as exc:
+ result = UpstreamWatchRead(
+ upstream_url=self._upstream_url,
+ default_branch=default_branch,
+ decision=UpstreamWatchDecision.FAILED,
+ summary="Upstream watch failed.",
+ error=str(exc),
+ )
+
+ cleanup_paths = self._cleanup_temp_dirs()
+ return result.model_copy(update={"cleaned_up": True, "cleanup_paths": cleanup_paths})
+
+ def _detect_default_branch(self) -> str:
+ output = self._git("ls-remote", "--symref", self._upstream_url, "HEAD")
+ for line in output.splitlines():
+ if not line.startswith("ref: "):
+ continue
+ parts = line.split()
+ if len(parts) < 2 or not parts[1].startswith("refs/heads/"):
+ continue
+ return parts[1].removeprefix("refs/heads/")
+ return "main"
+
+ def _load_recent_commits(self, *, sync_dir: Path, default_branch: str) -> list[UpstreamWatchCommitRead]:
+ raw = self._git(
+ "log",
+ f"origin/{default_branch}",
+ f"--max-count={self._max_commits}",
+ "--date=iso-strict",
+ "--pretty=format:%H%x1f%cI%x1f%s",
+ cwd=sync_dir,
+ )
+ commits: list[UpstreamWatchCommitRead] = []
+ for line in raw.splitlines():
+ parts = line.split("\x1f", 2)
+ sha = parts[0] if len(parts) > 0 else ""
+ committed_at_raw = parts[1] if len(parts) > 1 else ""
+ title = parts[2] if len(parts) > 2 else ""
+ committed_at = None
+ if committed_at_raw:
+ committed_at = datetime.fromisoformat(committed_at_raw.replace("Z", "+00:00"))
+ commits.append(
+ UpstreamWatchCommitRead(
+ sha=sha,
+ title=title,
+ committed_at=committed_at,
+ touched_paths=self._load_touched_paths(sync_dir=sync_dir, sha=sha),
+ )
+ )
+ return commits
+
+ def _load_touched_paths(self, *, sync_dir: Path, sha: str) -> list[str]:
+ raw = self._git(
+ "diff-tree",
+ "--no-commit-id",
+ "--name-only",
+ "-r",
+ "-m",
+ sha,
+ cwd=sync_dir,
+ )
+ return self._dedupe(line.strip() for line in raw.splitlines() if line.strip())
+
+ def _collect_changed_paths(self, commits: list[UpstreamWatchCommitRead]) -> list[str]:
+ return self._dedupe(path for commit in commits for path in commit.touched_paths)
+
+ def _derive_focus_areas(self, changed_paths: list[str]) -> list[str]:
+ return self._dedupe(self._classify_focus_area(path) for path in changed_paths if path)
+
+ def _classify_focus_area(self, path: str) -> str:
+ normalized = path.strip("/")
+ parts = normalized.split("/")
+ if len(parts) >= 2 and parts[0] == "extensions":
+ return f"extension:{parts[1]}"
+ if len(parts) >= 4 and parts[:3] == ["test", "helpers", "extensions"]:
+ helper_name = Path(parts[3]).stem.split("-", 1)[0]
+ return f"extension:{helper_name}"
+ if normalized in _NON_CORE_PATHS:
+ return "repo-meta"
+ return parts[0] if parts else normalized
+
+ def _build_summary(
+ self,
+ *,
+ decision: UpstreamWatchDecision,
+ focus_areas: list[str],
+ relevant_paths: list[str],
+ ) -> str:
+ if decision is UpstreamWatchDecision.SKIP:
+ focus_text = ", ".join(self._format_focus_area(item) for item in focus_areas[:3]) or "non-core extensions"
+ return f"Recent upstream changes remain in non-core areas ({focus_text}); auto-skipped."
+ if decision is UpstreamWatchDecision.REVIEW:
+ return f"Recent upstream changes touched review-required paths: {', '.join(relevant_paths[:5])}."
+ return "Upstream watch failed."
+
+ def _format_focus_area(self, focus_area: str) -> str:
+ if focus_area.startswith("extension:"):
+ name = focus_area.split(":", 1)[1]
+ if name.lower() == "line":
+ return "LINE"
+ return name.replace("-", " ").title()
+ if focus_area == "repo-meta":
+ return "repo meta"
+ return focus_area.replace("-", " ")
+
+ def _is_non_core_path(self, path: str) -> bool:
+ normalized = path.strip()
+ if normalized in _NON_CORE_PATHS:
+ return True
+ return normalized.startswith(_NON_CORE_PATH_PREFIXES)
+
+ def _cleanup_temp_dirs(self) -> list[str]:
+ cleanup_paths: list[str] = []
+ for path in sorted(self._workspace_root.glob("openclaw-upstream.*")):
+ if not path.is_dir() or path.parent != self._workspace_root:
+ continue
+ cleanup_paths.append(str(path))
+ shutil.rmtree(path, ignore_errors=True)
+ return cleanup_paths
+
+ def _git(self, *args: str, cwd: Path | None = None) -> str:
+ completed = subprocess.run(
+ ["git", *args],
+ cwd=str(cwd) if cwd is not None else None,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ if completed.returncode == 0:
+ return completed.stdout.strip()
+ detail = (completed.stderr or completed.stdout).strip()
+ raise RuntimeError(f"git {' '.join(args)} failed: {detail}")
+
+ def _dedupe(self, values: object) -> list[str]:
+ seen: set[str] = set()
+ ordered: list[str] = []
+ for item in values:
+ normalized = str(item).strip()
+ if not normalized or normalized in seen:
+ continue
+ seen.add(normalized)
+ ordered.append(normalized)
+ return ordered
diff --git a/src/autoresearch/executions/runner.py b/src/autoresearch/executions/runner.py
index 80b64ee5..5835d1ca 100644
--- a/src/autoresearch/executions/runner.py
+++ b/src/autoresearch/executions/runner.py
@@ -6,12 +6,15 @@
import os
import re
import shutil
+import signal
import subprocess
+import sys
import time
from pathlib import Path, PurePosixPath
from typing import Any
from autoresearch.agent_protocol.models import (
+ DriverMetrics,
DriverResult,
JobSpec,
RunSummary,
@@ -32,6 +35,40 @@
".git/",
)
+_AI_LAB_ENV_OVERRIDE_KEYS = (
+ "ENV_FILE",
+ "OPENHANDS_ENV_FILE",
+ "COMPOSE_DIR",
+ "COMPOSE_FILE",
+ "WORKSPACE_DIR",
+ "LOG_DIR",
+ "CACHE_DIR",
+ "LAB_USER",
+ "AUTO_OPEN_DOCKER",
+ "AUTO_START_COLIMA",
+ "AI_LAB_IMAGE_TAG",
+ "AI_LAB_FORCE_DOCKER_RUN",
+ "AI_LAB_HOST_MOUNT_ROOT",
+ "OPENHANDS_HOME_DIR",
+ "DOCKER_HOST_SOCKET_PATH",
+ "DOCKER_HOST_IN_CONTAINER",
+ "DOCKER_HOST_MOUNT_DIR",
+ "AI_LAB_COLIMA_HELPER",
+)
+
+
+def _is_benign_runtime_artifact(path: str) -> bool:
+ normalized = path.replace("\\", "/").strip("/")
+ if not normalized:
+ return False
+ if normalized.startswith(".pytest_cache/") or "/.pytest_cache/" in f"/{normalized}":
+ return True
+ if "/__pycache__/" in f"/{normalized}":
+ return True
+ if normalized.startswith("apps/") and normalized.endswith("/README.md"):
+ return True
+ return False
+
class AgentExecutionRunner:
def __init__(
@@ -52,6 +89,21 @@ def __init__(
writer_lease=WriterLeaseService(),
)
+ def _uses_openhands_ai_lab_runtime(self, manifest_entrypoint: str) -> bool:
+ if Path(manifest_entrypoint).name != "openhands_adapter.sh":
+ return False
+ runtime = str(os.environ.get("OPENHANDS_RUNTIME") or "ai-lab").strip().lower()
+ return runtime == "ai-lab"
+
+ def _build_openhands_ai_lab_env(self) -> dict[str, str]:
+ env = dict(os.environ)
+ for key in _AI_LAB_ENV_OVERRIDE_KEYS:
+ env.pop(key, None)
+ env_file = str(self._repo_root / "ai_lab.env")
+ env["ENV_FILE"] = env_file
+ env["OPENHANDS_ENV_FILE"] = env_file
+ return env
+
def run_job(self, job: JobSpec) -> RunSummary:
manifest = self._registry.load(job.agent_id)
effective_policy = build_effective_policy(manifest.policy_defaults, job.policy)
@@ -68,7 +120,7 @@ def run_job(self, job: JobSpec) -> RunSummary:
patch_path = artifacts_dir / "promotion.patch"
if run_dir.exists():
- shutil.rmtree(run_dir)
+ self._rmtree_force(run_dir)
artifacts_dir.mkdir(parents=True, exist_ok=True)
job_path.write_text(
@@ -91,6 +143,8 @@ def run_job(self, job: JobSpec) -> RunSummary:
pending_attempts = 1
attempt = 0
forced_final_status: str | None = None
+ cleanup_success = False
+ final_summary: RunSummary | None = None
last_result = self._contract_error_result(
run_id=job.run_id,
@@ -101,165 +155,269 @@ def run_job(self, job: JobSpec) -> RunSummary:
last_validation = ValidationReport(run_id=job.run_id, passed=False, checks=[])
last_patch_filtered_paths: list[str] = []
- while True:
- if pending_attempts <= 0:
- if fallback_index >= len(job.fallback):
- break
- step = job.fallback[fallback_index]
- fallback_index += 1
-
- if step.action == "retry":
- pending_attempts = step.max_attempts
- continue
- if step.action == "fallback_agent":
- if step.agent_id:
- current_agent = step.agent_id
- pending_attempts = step.max_attempts
+ try:
+ while True:
+ if pending_attempts <= 0:
+ if fallback_index >= len(job.fallback):
+ break
+ step = job.fallback[fallback_index]
+ fallback_index += 1
+
+ if step.action == "retry":
+ skip_retry_reason = self._retry_skip_reason(last_result)
+ if skip_retry_reason is not None:
+ self._append_event(
+ events_path,
+ {
+ "type": "fallback_skipped",
+ "attempt": attempt,
+ "agent_id": current_agent,
+ "action": "retry",
+ "reason": skip_retry_reason,
+ },
+ )
+ continue
+ pending_attempts = step.max_attempts
+ continue
+ if step.action == "fallback_agent":
+ if step.agent_id:
+ current_agent = step.agent_id
+ pending_attempts = step.max_attempts
+ continue
+ if step.action == "human_review":
+ forced_final_status = "human_review"
+ break
+ if step.action == "reject":
+ forced_final_status = "blocked"
+ break
+
+ attempt += 1
+ pending_attempts -= 1
+
+ active_manifest = self._registry.load(current_agent)
+ preflight_error = self._preflight_agent_environment(
+ agent_id=current_agent,
+ manifest_entrypoint=active_manifest.entrypoint,
+ )
+ if preflight_error is not None:
+ driver_result = self._contract_error_result(
+ run_id=job.run_id,
+ agent_id=current_agent,
+ attempt=attempt,
+ message=preflight_error,
+ recommended_action="fallback",
+ )
+ last_result = driver_result
+ last_validation = ValidationReport(run_id=job.run_id, passed=False, checks=[])
+ self._append_event(
+ events_path,
+ {
+ "type": "attempt_blocked",
+ "attempt": attempt,
+ "agent_id": current_agent,
+ "reason": "environment_preflight_failed",
+ "detail": preflight_error,
+ },
+ )
+ self._append_event(
+ events_path,
+ {
+ "type": "attempt_completed",
+ "attempt": attempt,
+ "agent_id": current_agent,
+ "driver_status": driver_result.status,
+ "validation_passed": False,
+ },
+ )
continue
- if step.action == "human_review":
- forced_final_status = "human_review"
- break
- if step.action == "reject":
- forced_final_status = "blocked"
- break
-
- attempt += 1
- pending_attempts -= 1
- self._snapshot_baseline_to_workspace(baseline_dir, workspace_dir)
- self._append_event(
- events_path,
- {
- "type": "attempt_started",
- "attempt": attempt,
- "agent_id": current_agent,
- },
- )
+ attempt_job = self._job_for_attempt(
+ job=job,
+ agent_id=current_agent,
+ attempt=attempt,
+ last_result=last_result,
+ last_validation=last_validation,
+ )
+ job_path.write_text(
+ json.dumps(attempt_job.model_dump(mode="json"), ensure_ascii=False, indent=2),
+ encoding="utf-8",
+ )
- active_manifest = self._registry.load(current_agent)
- driver_result = self._invoke_adapter(
- manifest_entrypoint=active_manifest.entrypoint,
- run_dir=run_dir,
- workspace_dir=workspace_dir,
- artifacts_dir=artifacts_dir,
- job_path=job_path,
- result_path=result_path,
- events_path=events_path,
- baseline_dir=baseline_dir,
- run_id=job.run_id,
- agent_id=current_agent,
- attempt=attempt,
- timeout_sec=effective_policy.merged.timeout_sec,
- )
- result_path.write_text(
- json.dumps(driver_result.model_dump(mode="json"), ensure_ascii=False, indent=2),
- encoding="utf-8",
- )
+ self._snapshot_baseline_to_workspace(baseline_dir, workspace_dir)
+ self._prepare_shadow_workspace(workspace_dir, effective_policy)
+ self._append_event(
+ events_path,
+ {
+ "type": "attempt_started",
+ "attempt": attempt,
+ "agent_id": current_agent,
+ },
+ )
- changed_paths = self._collect_changed_paths(baseline_dir, workspace_dir)
- patch_text, patch_filtered_paths, builtin_checks = self._build_filtered_patch(
- baseline_dir=baseline_dir,
- workspace_dir=workspace_dir,
- changed_paths=changed_paths,
- driver_result=driver_result,
- policy=effective_policy,
- )
- patch_path.write_text(patch_text, encoding="utf-8")
+ driver_result = self._invoke_adapter(
+ manifest_entrypoint=active_manifest.entrypoint,
+ run_dir=run_dir,
+ workspace_dir=workspace_dir,
+ artifacts_dir=artifacts_dir,
+ job_path=job_path,
+ result_path=result_path,
+ events_path=events_path,
+ baseline_dir=baseline_dir,
+ run_id=job.run_id,
+ agent_id=current_agent,
+ attempt=attempt,
+ timeout_sec=effective_policy.merged.timeout_sec,
+ policy=effective_policy,
+ )
+ result_path.write_text(
+ json.dumps(driver_result.model_dump(mode="json"), ensure_ascii=False, indent=2),
+ encoding="utf-8",
+ )
- validation = self._run_validators(
- run_id=job.run_id,
- workspace_dir=workspace_dir,
- patch_path=patch_path,
- builtin_checks=builtin_checks,
- validator_specs=job.validators,
- timeout_sec=effective_policy.merged.timeout_sec,
- )
+ changed_paths = self._collect_changed_paths(baseline_dir, workspace_dir)
+ patch_text, patch_filtered_paths, builtin_checks = self._build_filtered_patch(
+ baseline_dir=baseline_dir,
+ workspace_dir=workspace_dir,
+ changed_paths=changed_paths,
+ driver_result=driver_result,
+ policy=effective_policy,
+ )
+ patch_path.write_text(patch_text, encoding="utf-8")
- if not driver_result.changed_paths:
- driver_result = driver_result.model_copy(
- update={"changed_paths": patch_filtered_paths}
+ validation = self._run_validators(
+ run_id=job.run_id,
+ workspace_dir=workspace_dir,
+ patch_path=patch_path,
+ builtin_checks=builtin_checks,
+ validator_specs=job.validators,
+ timeout_sec=effective_policy.merged.timeout_sec,
)
- last_patch_filtered_paths = patch_filtered_paths
-
- if self._has_policy_violation(validation):
- driver_result = driver_result.model_copy(
- update={
- "status": "policy_blocked",
- "recommended_action": "reject",
- "error": "execution produced out-of-scope or forbidden changes",
- }
+
+ if not driver_result.changed_paths:
+ driver_result = driver_result.model_copy(
+ update={"changed_paths": patch_filtered_paths}
+ )
+ last_patch_filtered_paths = patch_filtered_paths
+
+ if self._has_policy_violation(validation):
+ driver_result = driver_result.model_copy(
+ update={
+ "status": "policy_blocked",
+ "recommended_action": "reject",
+ "error": "execution produced out-of-scope or forbidden changes",
+ }
+ )
+
+ last_result = driver_result
+ last_validation = validation
+
+ self._append_event(
+ events_path,
+ {
+ "type": "attempt_completed",
+ "attempt": attempt,
+ "agent_id": current_agent,
+ "driver_status": driver_result.status,
+ "validation_passed": validation.passed,
+ },
)
- last_result = driver_result
- last_validation = validation
+ if attempt_succeeded(driver_result=driver_result, validation=validation):
+ promotion_preflight, promotion = self._finalize_promotion(
+ job=job,
+ agent_id=current_agent,
+ patch_path=patch_path,
+ changed_files=patch_filtered_paths,
+ validation=validation,
+ policy=effective_policy,
+ artifacts_dir=artifacts_dir,
+ )
+ final_status = (
+ "promoted"
+ if promotion.mode is GitPromotionMode.DRAFT_PR
+ else "ready_for_promotion"
+ )
+ if not promotion.success:
+ final_status = "blocked"
+ final_summary = RunSummary(
+ run_id=job.run_id,
+ final_status=final_status,
+ driver_result=driver_result,
+ validation=validation,
+ promotion_patch_uri=str(patch_path),
+ promotion_preflight=promotion_preflight,
+ promotion=promotion,
+ )
+ cleanup_success = True
+ break
+
+ if driver_result.status == "policy_blocked":
+ break
+ if final_summary is None:
+ final_status = forced_final_status or derive_terminal_status(last_result, last_validation)
+ final_summary = RunSummary(
+ run_id=job.run_id,
+ final_status=final_status,
+ driver_result=last_result,
+ validation=last_validation,
+ promotion_patch_uri=str(patch_path) if patch_path.exists() else None,
+ promotion_preflight=None,
+ promotion=None,
+ )
+ except Exception as exc:
+ error_message = f"runner crashed: {exc.__class__.__name__}: {exc}"
+ last_result = self._contract_error_result(
+ run_id=job.run_id,
+ agent_id=current_agent,
+ attempt=attempt or 1,
+ message=error_message,
+ )
self._append_event(
events_path,
{
- "type": "attempt_completed",
+ "type": "runner_exception",
"attempt": attempt,
"agent_id": current_agent,
- "driver_status": driver_result.status,
- "validation_passed": validation.passed,
+ "detail": error_message,
},
)
-
- if attempt_succeeded(driver_result=driver_result, validation=validation):
- promotion_preflight, promotion = self._finalize_promotion(
- job=job,
- agent_id=current_agent,
- patch_path=patch_path,
- changed_files=patch_filtered_paths,
- validation=validation,
- policy=effective_policy,
- artifacts_dir=artifacts_dir,
- )
- final_status = "promoted" if promotion.mode is GitPromotionMode.DRAFT_PR else "ready_for_promotion"
- if not promotion.success:
- final_status = "blocked"
- summary = RunSummary(
+ final_summary = RunSummary(
+ run_id=job.run_id,
+ final_status="failed",
+ driver_result=last_result,
+ validation=last_validation,
+ promotion_patch_uri=str(patch_path) if patch_path.exists() else None,
+ promotion_preflight=None,
+ promotion=None,
+ )
+ finally:
+ if final_summary is None:
+ final_summary = RunSummary(
run_id=job.run_id,
- final_status=final_status,
- driver_result=driver_result,
- validation=validation,
- promotion_patch_uri=str(patch_path),
- promotion_preflight=promotion_preflight,
- promotion=promotion,
+ final_status=forced_final_status or derive_terminal_status(last_result, last_validation),
+ driver_result=last_result,
+ validation=last_validation,
+ promotion_patch_uri=str(patch_path) if patch_path.exists() else None,
+ promotion_preflight=None,
+ promotion=None,
)
- summary_path.write_text(
- json.dumps(summary.model_dump(mode="json"), ensure_ascii=False, indent=2),
- encoding="utf-8",
- )
- self._cleanup_workspace(
- workspace_dir=workspace_dir,
- success=True,
- policy=effective_policy,
- )
- return summary
+ self._write_summary(summary_path=summary_path, summary=final_summary)
+ self._cleanup_workspace(
+ workspace_dir=workspace_dir,
+ success=cleanup_success,
+ policy=effective_policy,
+ )
- if driver_result.status == "policy_blocked":
- break
+ return final_summary
- final_status = forced_final_status or derive_terminal_status(last_result, last_validation)
- summary = RunSummary(
- run_id=job.run_id,
- final_status=final_status,
- driver_result=last_result,
- validation=last_validation,
- promotion_patch_uri=str(patch_path) if patch_path.exists() else None,
- promotion_preflight=None,
- promotion=None,
- )
+ @staticmethod
+ def _write_summary(*, summary_path: Path, summary: RunSummary) -> None:
+ summary_path.parent.mkdir(parents=True, exist_ok=True)
summary_path.write_text(
json.dumps(summary.model_dump(mode="json"), ensure_ascii=False, indent=2),
encoding="utf-8",
)
- self._cleanup_workspace(
- workspace_dir=workspace_dir,
- success=False,
- policy=effective_policy,
- )
- return summary
@staticmethod
def _has_policy_violation(validation: ValidationReport) -> bool:
@@ -285,9 +443,152 @@ def _snapshot_repo_to_baseline(self, baseline_dir: Path) -> None:
def _snapshot_baseline_to_workspace(self, baseline_dir: Path, workspace_dir: Path) -> None:
if workspace_dir.exists():
- shutil.rmtree(workspace_dir)
+ self._rmtree_force(workspace_dir)
shutil.copytree(baseline_dir, workspace_dir, dirs_exist_ok=True)
+ def _prepare_shadow_workspace(self, workspace_dir: Path, policy: EffectivePolicy) -> None:
+ if not workspace_dir.exists():
+ return
+
+ self._apply_mode_tree(workspace_dir, file_mode=0o444, dir_mode=0o555)
+
+ writable_paths: set[Path] = set()
+ for pattern in policy.merged.allowed_paths:
+ writable_paths.update(self._resolve_writable_targets(workspace_dir, pattern))
+
+ for target in sorted(writable_paths, key=lambda item: (len(item.parts), str(item))):
+ self._make_target_writable(workspace_dir, target)
+
+ locked_paths: set[Path] = set()
+ for pattern in policy.merged.forbidden_paths:
+ locked_paths.update(self._resolve_matching_paths(workspace_dir, pattern))
+
+ for target in sorted(locked_paths, key=lambda item: len(item.parts), reverse=True):
+ self._make_target_read_only(target)
+
+ def _resolve_writable_targets(self, workspace_dir: Path, pattern: str) -> set[Path]:
+ normalized = pattern.replace("\\", "/").strip("/")
+ if not normalized:
+ return set()
+
+ targets = self._resolve_matching_paths(workspace_dir, normalized)
+ if targets:
+ return targets
+
+ prefix = self._glob_prefix(normalized)
+ if prefix:
+ candidate = workspace_dir / prefix
+ if candidate.exists():
+ return {candidate}
+ return {candidate}
+
+ candidate = workspace_dir / normalized
+ if candidate.exists():
+ return {candidate}
+ return {candidate}
+
+ def _resolve_matching_paths(self, workspace_dir: Path, pattern: str) -> set[Path]:
+ normalized = pattern.replace("\\", "/").strip("/")
+ if not normalized:
+ return set()
+
+ matched: set[Path] = set()
+ for path in [workspace_dir, *workspace_dir.rglob("*")]:
+ rel = path.relative_to(workspace_dir).as_posix() if path != workspace_dir else "."
+ if rel == ".":
+ continue
+ if self._matches_any(rel, [normalized]):
+ matched.add(path)
+ return matched
+
+ @staticmethod
+ def _nearest_existing_ancestor(workspace_dir: Path, candidate: Path) -> Path:
+ probe = candidate
+ while probe != workspace_dir and not probe.exists():
+ probe = probe.parent
+ if probe.exists():
+ return probe
+ return workspace_dir
+
+ def _make_target_writable(self, workspace_dir: Path, target: Path) -> None:
+ for ancestor in reversed(target.parents):
+ if ancestor == workspace_dir.parent or ancestor == target:
+ continue
+ if workspace_dir not in ancestor.parents and ancestor != workspace_dir:
+ continue
+ if ancestor.exists():
+ self._chmod_path(ancestor, 0o777)
+
+ if target.is_dir():
+ self._apply_mode_tree(target, file_mode=0o666, dir_mode=0o777)
+ return
+
+ if target.exists():
+ self._chmod_path(target, 0o666)
+ if target.parent.exists():
+ self._chmod_path(target.parent, 0o777)
+ return
+
+ self._ensure_writable_path_chain(workspace_dir=workspace_dir, target=target)
+ if not target.suffix:
+ try:
+ target.mkdir(parents=True, exist_ok=True)
+ except OSError:
+ pass
+ if target.exists() and target.is_dir():
+ self._apply_mode_tree(target, file_mode=0o666, dir_mode=0o777)
+ return
+
+ if target.parent.exists():
+ self._chmod_path(target.parent, 0o777)
+
+ def _ensure_writable_path_chain(self, *, workspace_dir: Path, target: Path) -> None:
+ self._chmod_path(workspace_dir, 0o777)
+ try:
+ relative_target = target.relative_to(workspace_dir)
+ except ValueError:
+ return
+
+ chain_parts = relative_target.parts if not target.suffix else relative_target.parts[:-1]
+ current = workspace_dir
+ for part in chain_parts:
+ current = current / part
+ if not current.exists():
+ try:
+ current.mkdir(exist_ok=True)
+ except OSError:
+ return
+ self._chmod_path(current, 0o777)
+
+ def _make_target_read_only(self, target: Path) -> None:
+ if target.is_dir():
+ self._apply_mode_tree(target, file_mode=0o444, dir_mode=0o555)
+ return
+ if target.exists():
+ self._chmod_path(target, 0o444)
+ if target.parent.exists():
+ self._chmod_path(target.parent, 0o555)
+
+ def _apply_mode_tree(self, root: Path, *, file_mode: int, dir_mode: int) -> None:
+ if not root.exists():
+ return
+ if root.is_dir():
+ self._chmod_path(root, dir_mode)
+ for path in root.rglob("*"):
+ if path.is_dir():
+ self._chmod_path(path, dir_mode)
+ else:
+ self._chmod_path(path, file_mode)
+ return
+ self._chmod_path(root, file_mode)
+
+ @staticmethod
+ def _chmod_path(path: Path, mode: int) -> None:
+ try:
+ path.chmod(mode)
+ except OSError:
+ return
+
def _invoke_adapter(
self,
*,
@@ -303,6 +604,7 @@ def _invoke_adapter(
agent_id: str,
attempt: int,
timeout_sec: int,
+ policy: EffectivePolicy,
) -> DriverResult:
if result_path.exists():
result_path.unlink()
@@ -316,7 +618,10 @@ def _invoke_adapter(
message=f"adapter entrypoint not found: {entrypoint}",
)
- env = dict(os.environ)
+ if self._uses_openhands_ai_lab_runtime(manifest_entrypoint):
+ env = self._build_openhands_ai_lab_env()
+ else:
+ env = dict(os.environ)
env.update(
{
"AEP_RUN_DIR": str(run_dir),
@@ -334,34 +639,191 @@ def _invoke_adapter(
stderr_log = artifacts_dir / "stderr.log"
started = time.perf_counter()
- try:
- completed = subprocess.run(
+ completed: subprocess.CompletedProcess[str] | None = None
+ probe_signature: tuple[tuple[str, int, int], ...] | None = None
+ last_probed_signature: tuple[tuple[str, int, int], ...] | None = None
+ stable_polls = 0
+ stall_timeout_sec = self._stall_progress_timeout_sec(timeout_sec)
+ last_scoped_progress_signature = self._scoped_progress_signature(
+ baseline_dir=baseline_dir,
+ workspace_dir=workspace_dir,
+ allowed_paths=policy.merged.allowed_paths,
+ )
+ last_state_progress_signature = self._runtime_heartbeat_signature(workspace_dir=workspace_dir)
+ last_progress_at = started
+ first_progress_ms: int | None = None
+ first_scoped_write_ms: int | None = None
+ first_state_heartbeat_ms: int | None = None
+ process_group_id: int | None = None
+
+ with stdout_log.open("a", encoding="utf-8") as stdout_handle, stderr_log.open(
+ "a", encoding="utf-8"
+ ) as stderr_handle:
+ stdout_handle.write(f"\n=== attempt {attempt} ({agent_id}) ===\n")
+ stderr_handle.write(f"\n=== attempt {attempt} ({agent_id}) ===\n")
+ stdout_handle.flush()
+ stderr_handle.flush()
+
+ process = subprocess.Popen(
[str(entrypoint)],
cwd=self._repo_root,
env=env,
- capture_output=True,
+ stdout=stdout_handle,
+ stderr=stderr_handle,
text=True,
- timeout=timeout_sec,
+ start_new_session=True,
)
- duration_ms = int((time.perf_counter() - started) * 1000)
- except subprocess.TimeoutExpired:
- return DriverResult(
+ if hasattr(os, "getpgid"):
+ try:
+ process_group_id = os.getpgid(process.pid)
+ except OSError:
+ process_group_id = None
+
+ while True:
+ returncode = process.poll()
+ now = time.perf_counter()
+ duration_ms = int((now - started) * 1000)
+ if returncode is not None:
+ completed = subprocess.CompletedProcess(
+ args=[str(entrypoint)],
+ returncode=returncode,
+ stdout="",
+ stderr="",
+ )
+ break
+
+ if duration_ms >= timeout_sec * 1000:
+ self._terminate_process(process, process_group_id=process_group_id)
+ return DriverResult(
+ run_id=run_id,
+ agent_id=agent_id,
+ attempt=attempt,
+ status="timed_out",
+ summary=f"adapter timed out after {timeout_sec}s",
+ metrics=DriverMetrics(
+ duration_ms=duration_ms,
+ first_progress_ms=first_progress_ms,
+ first_scoped_write_ms=first_scoped_write_ms,
+ first_state_heartbeat_ms=first_state_heartbeat_ms,
+ ),
+ recommended_action="fallback",
+ error=f"timeout after {timeout_sec}s",
+ )
+
+ current_scoped_progress_signature = self._scoped_progress_signature(
+ baseline_dir=baseline_dir,
+ workspace_dir=workspace_dir,
+ allowed_paths=policy.merged.allowed_paths,
+ )
+ current_state_progress_signature = self._runtime_heartbeat_signature(
+ workspace_dir=workspace_dir,
+ )
+ scoped_progress_changed = (
+ current_scoped_progress_signature != last_scoped_progress_signature
+ )
+ state_progress_changed = (
+ current_state_progress_signature != last_state_progress_signature
+ )
+ if scoped_progress_changed or state_progress_changed:
+ if (
+ scoped_progress_changed
+ and first_scoped_write_ms is None
+ and current_scoped_progress_signature
+ ):
+ first_scoped_write_ms = duration_ms
+ if (
+ state_progress_changed
+ and first_state_heartbeat_ms is None
+ and current_state_progress_signature
+ ):
+ first_state_heartbeat_ms = duration_ms
+ if first_progress_ms is None:
+ first_candidates = [
+ value
+ for value in (
+ first_scoped_write_ms,
+ first_state_heartbeat_ms,
+ )
+ if value is not None
+ ]
+ if first_candidates:
+ first_progress_ms = min(first_candidates)
+ last_scoped_progress_signature = current_scoped_progress_signature
+ last_state_progress_signature = current_state_progress_signature
+ last_progress_at = now
+ elif (now - last_progress_at) >= stall_timeout_sec:
+ self._terminate_process(process, process_group_id=process_group_id)
+ stall_error = f"no workspace progress for {stall_timeout_sec}s"
+ return DriverResult(
+ run_id=run_id,
+ agent_id=agent_id,
+ attempt=attempt,
+ status="stalled_no_progress",
+ summary=f"adapter stalled after {stall_timeout_sec}s without workspace progress",
+ metrics=DriverMetrics(
+ duration_ms=duration_ms,
+ first_progress_ms=first_progress_ms,
+ first_scoped_write_ms=first_scoped_write_ms,
+ first_state_heartbeat_ms=first_state_heartbeat_ms,
+ ),
+ recommended_action="fallback",
+ error=stall_error,
+ )
+
+ current_signature, current_paths = self._changed_python_signature(
+ baseline_dir=baseline_dir,
+ workspace_dir=workspace_dir,
+ allowed_paths=policy.merged.allowed_paths,
+ )
+ if current_signature and current_signature == probe_signature:
+ stable_polls += 1
+ elif current_signature:
+ probe_signature = current_signature
+ stable_polls = 1
+ else:
+ probe_signature = None
+ stable_polls = 0
+
+ if (
+ current_signature
+ and stable_polls >= 2
+ and current_signature != last_probed_signature
+ ):
+ probe_failure = self._run_fast_fail_probe(
+ workspace_dir=workspace_dir,
+ changed_python_paths=current_paths,
+ )
+ last_probed_signature = current_signature
+ if probe_failure is not None:
+ self._terminate_process(process, process_group_id=process_group_id)
+ return DriverResult(
+ run_id=run_id,
+ agent_id=agent_id,
+ attempt=attempt,
+ status="failed",
+ summary="adapter aborted by fast-fail probe",
+ changed_paths=self._collect_changed_paths(baseline_dir, workspace_dir),
+ metrics=DriverMetrics(
+ duration_ms=duration_ms,
+ first_progress_ms=first_progress_ms,
+ first_scoped_write_ms=first_scoped_write_ms,
+ first_state_heartbeat_ms=first_state_heartbeat_ms,
+ ),
+ recommended_action="fallback",
+ error=probe_failure,
+ )
+
+ time.sleep(2)
+
+ duration_ms = int((time.perf_counter() - started) * 1000)
+ if completed is None:
+ return self._contract_error_result(
run_id=run_id,
agent_id=agent_id,
attempt=attempt,
- status="timed_out",
- summary=f"adapter timed out after {timeout_sec}s",
- recommended_action="fallback",
- error=f"timeout after {timeout_sec}s",
+ message="adapter process exited without completion record",
)
- with stdout_log.open("a", encoding="utf-8") as handle:
- handle.write(f"\n=== attempt {attempt} ({agent_id}) ===\n")
- handle.write(completed.stdout or "")
- with stderr_log.open("a", encoding="utf-8") as handle:
- handle.write(f"\n=== attempt {attempt} ({agent_id}) ===\n")
- handle.write(completed.stderr or "")
-
if not result_path.exists():
return self._contract_error_result(
run_id=run_id,
@@ -381,7 +843,26 @@ def _invoke_adapter(
message=f"invalid driver_result.json: {exc}",
)
- merged_metrics = result.metrics.model_copy(update={"duration_ms": duration_ms})
+ merged_metrics = result.metrics.model_copy(
+ update={
+ "duration_ms": duration_ms,
+ "first_progress_ms": (
+ result.metrics.first_progress_ms
+ if result.metrics.first_progress_ms is not None
+ else first_progress_ms
+ ),
+ "first_scoped_write_ms": (
+ result.metrics.first_scoped_write_ms
+ if result.metrics.first_scoped_write_ms is not None
+ else first_scoped_write_ms
+ ),
+ "first_state_heartbeat_ms": (
+ result.metrics.first_state_heartbeat_ms
+ if result.metrics.first_state_heartbeat_ms is not None
+ else first_state_heartbeat_ms
+ ),
+ }
+ )
result = result.model_copy(
update={"metrics": merged_metrics, "attempt": attempt, "agent_id": agent_id}
)
@@ -430,16 +911,17 @@ def _build_filtered_patch(
)
)
+ relevant_changed = [path for path in changed_paths if not _is_benign_runtime_artifact(path)]
forbidden_changed = [
- path for path in changed_paths if self._matches_any(path, policy.merged.forbidden_paths)
+ path for path in relevant_changed if self._matches_any(path, policy.merged.forbidden_paths)
]
runtime_changed = [
- path for path in changed_paths if path.startswith(_RUNTIME_DENY_PREFIXES)
+ path for path in relevant_changed if path.startswith(_RUNTIME_DENY_PREFIXES)
]
allowed_changed = [
path
- for path in changed_paths
+ for path in relevant_changed
if self._matches_any(path, policy.merged.allowed_paths)
and not self._matches_any(path, policy.merged.forbidden_paths)
and not path.startswith(_RUNTIME_DENY_PREFIXES)
@@ -451,7 +933,7 @@ def _build_filtered_patch(
passed=len(
[
p
- for p in changed_paths
+ for p in relevant_changed
if p not in allowed_changed
and p not in forbidden_changed
and p not in runtime_changed
@@ -478,8 +960,8 @@ def _build_filtered_patch(
checks.append(
ValidationCheck(
id="builtin.max_changed_files",
- passed=len(changed_paths) <= policy.merged.max_changed_files,
- detail=f"changed={len(changed_paths)} limit={policy.merged.max_changed_files}",
+ passed=len(relevant_changed) <= policy.merged.max_changed_files,
+ detail=f"changed={len(relevant_changed)} limit={policy.merged.max_changed_files}",
)
)
@@ -533,6 +1015,275 @@ def _build_filtered_patch(
)
return patch_text, allowed_changed, checks
+ def _meaningful_progress_signature(
+ self,
+ *,
+ baseline_dir: Path,
+ workspace_dir: Path,
+ allowed_paths: list[str],
+ ) -> tuple[tuple[str, int, int], ...]:
+ return self._scoped_progress_signature(
+ baseline_dir=baseline_dir,
+ workspace_dir=workspace_dir,
+ allowed_paths=allowed_paths,
+ ) + self._state_heartbeat_signature(workspace_dir=workspace_dir)
+
+ def _scoped_progress_signature(
+ self,
+ *,
+ baseline_dir: Path,
+ workspace_dir: Path,
+ allowed_paths: list[str],
+ ) -> tuple[tuple[str, int, int], ...]:
+ items: list[tuple[str, int, int]] = []
+ changed_paths = self._collect_changed_paths(baseline_dir, workspace_dir)
+ for rel in sorted(changed_paths):
+ if not self._matches_any(rel, allowed_paths):
+ continue
+ path = workspace_dir / rel
+ if not path.exists():
+ items.append((f"delete:{rel}", 0, 0))
+ continue
+ stat = path.stat()
+ items.append((f"change:{rel}", stat.st_mtime_ns, stat.st_size))
+ return tuple(items)
+
+ def _state_heartbeat_signature(
+ self,
+ *,
+ workspace_dir: Path,
+ ) -> tuple[tuple[str, int, int], ...]:
+ items: list[tuple[str, int, int]] = []
+ state_root = workspace_dir / ".openhands-state"
+ if state_root.exists():
+ for path in sorted(candidate for candidate in state_root.rglob("*") if candidate.is_file()):
+ stat = path.stat()
+ items.append(
+ (
+ f"state:{path.relative_to(workspace_dir).as_posix()}",
+ stat.st_mtime_ns,
+ stat.st_size,
+ )
+ )
+ return tuple(items)
+
+ def _runtime_heartbeat_signature(
+ self,
+ *,
+ workspace_dir: Path,
+ ) -> tuple[tuple[str, int, int], ...]:
+ return self._state_heartbeat_signature(workspace_dir=workspace_dir)
+
+ def _changed_python_signature(
+ self,
+ *,
+ baseline_dir: Path,
+ workspace_dir: Path,
+ allowed_paths: list[str],
+ ) -> tuple[tuple[tuple[str, int, int], ...] | None, list[str]]:
+ changed_paths = self._collect_changed_paths(baseline_dir, workspace_dir)
+ python_paths = [
+ path
+ for path in changed_paths
+ if path.endswith(".py")
+ and path.startswith(("src/", "tests/"))
+ and self._matches_any(path, allowed_paths)
+ ]
+ if not python_paths:
+ return None, []
+
+ signature_items: list[tuple[str, int, int]] = []
+ for rel in sorted(python_paths):
+ path = workspace_dir / rel
+ if not path.exists():
+ continue
+ stat = path.stat()
+ signature_items.append((rel, stat.st_mtime_ns, stat.st_size))
+ if not signature_items:
+ return None, []
+ return tuple(signature_items), [item[0] for item in signature_items]
+
+ def _run_fast_fail_probe(
+ self,
+ *,
+ workspace_dir: Path,
+ changed_python_paths: list[str],
+ ) -> str | None:
+ if not changed_python_paths:
+ return None
+
+ compile_probe = subprocess.run(
+ [sys.executable, "-m", "py_compile", *changed_python_paths],
+ cwd=workspace_dir,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ compile_detail = (compile_probe.stderr or compile_probe.stdout or "").strip()
+ if compile_probe.returncode != 0 and "SyntaxError" in compile_detail:
+ return compile_detail[:2000]
+
+ importable_modules: list[str] = []
+ for rel in changed_python_paths:
+ if not rel.startswith("src/"):
+ continue
+ module_parts = list(Path(rel).with_suffix("").parts[1:])
+ if module_parts and module_parts[-1] == "__init__":
+ module_parts = module_parts[:-1]
+ if module_parts:
+ importable_modules.append(".".join(module_parts))
+
+ if not importable_modules:
+ return None
+
+ import_probe = subprocess.run(
+ [
+ sys.executable,
+ "-c",
+ (
+ "import importlib, sys; "
+ "mods = sys.argv[1:]; "
+ "[(importlib.import_module(name), None) for name in mods]"
+ ),
+ *importable_modules,
+ ],
+ cwd=workspace_dir,
+ env={
+ **os.environ,
+ "PYTHONPATH": str(workspace_dir / "src"),
+ "PYTHONDONTWRITEBYTECODE": "1",
+ },
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ import_detail = (import_probe.stderr or import_probe.stdout or "").strip()
+ if import_probe.returncode != 0 and any(
+ token in import_detail for token in ("ModuleNotFoundError", "ImportError", "SyntaxError")
+ ):
+ return import_detail[:2000]
+ return None
+
+ @staticmethod
+ def _stall_progress_timeout_sec(timeout_sec: int) -> int:
+ return min(timeout_sec, min(180, max(60, max(1, timeout_sec // 4))))
+
+ def _preflight_agent_environment(self, *, agent_id: str, manifest_entrypoint: str) -> str | None:
+ if agent_id != "openhands":
+ return None
+ if Path(manifest_entrypoint).name != "openhands_adapter.sh":
+ return None
+ if str(os.environ.get("OPENHANDS_DRY_RUN") or "0").strip() == "1":
+ return None
+
+ if not self._uses_openhands_ai_lab_runtime(manifest_entrypoint):
+ return None
+ preflight_env = self._build_openhands_ai_lab_env()
+
+ override_command = str(os.environ.get("OPENHANDS_PREFLIGHT_CMD") or "").strip()
+ if override_command:
+ completed = subprocess.run(
+ override_command,
+ cwd=self._repo_root,
+ env=preflight_env,
+ shell=True,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ else:
+ script = self._repo_root / "scripts" / "launch_ai_lab.sh"
+ if not script.exists():
+ return f"EnvironmentCheckFailed: launch_ai_lab.sh not found at {script}"
+ completed = subprocess.run(
+ ["bash", str(script), "status"],
+ cwd=self._repo_root,
+ env=preflight_env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ if completed.returncode == 0:
+ return None
+
+ detail = (completed.stderr or completed.stdout or "").strip()
+ if not detail:
+ detail = f"preflight exited with code {completed.returncode}"
+ collapsed = re.sub(r"\s+", " ", detail)[:400]
+ return f"EnvironmentCheckFailed: {collapsed}"
+
+ def _job_for_attempt(
+ self,
+ *,
+ job: JobSpec,
+ agent_id: str,
+ attempt: int,
+ last_result: DriverResult,
+ last_validation: ValidationReport,
+ ) -> JobSpec:
+ if attempt <= 1 or agent_id != job.agent_id:
+ return job
+
+ feedback = self._build_retry_feedback(last_result=last_result, last_validation=last_validation)
+ if feedback is None:
+ return job
+
+ metadata = dict(job.metadata)
+ metadata["retry_feedback"] = feedback
+ metadata["retry_attempt"] = attempt
+ metadata["retry_source_status"] = last_result.status
+ return job.model_copy(
+ update={
+ "task": (
+ f"{job.task.rstrip()}\n\n"
+ "Retry feedback from the previous attempt. Fix these exact failures before making any new changes:\n"
+ f"{feedback}\n"
+ ),
+ "metadata": metadata,
+ }
+ )
+
+ @staticmethod
+ def _build_retry_feedback(
+ *,
+ last_result: DriverResult,
+ last_validation: ValidationReport,
+ ) -> str | None:
+ if last_validation.passed:
+ return None
+
+ failed_checks = [check for check in last_validation.checks if not check.passed and check.detail.strip()]
+ error_text = str(last_result.error or "").strip()
+ if not failed_checks and not error_text:
+ return None
+
+ parts = [
+ f"Previous driver status: {last_result.status}",
+ f"Previous driver summary: {last_result.summary}",
+ ]
+ if last_result.changed_paths:
+ parts.append("Previous changed paths:")
+ parts.extend(f"- {path}" for path in last_result.changed_paths)
+ if failed_checks:
+ parts.append("Raw validator failures:")
+ for check in failed_checks:
+ parts.append(f"[{check.id}]")
+ parts.append(check.detail.strip())
+ if error_text:
+ parts.append("Driver error:")
+ parts.append(error_text)
+ return "\n".join(parts)
+
+ @staticmethod
+ def _retry_skip_reason(result: DriverResult) -> str | None:
+ if result.status == "stalled_no_progress":
+ return "stalled_no_progress"
+ error_text = str(result.error or result.summary or "").strip()
+ if result.status == "contract_error" and error_text.startswith("EnvironmentCheckFailed:"):
+ return "environment_preflight_failed"
+ return None
+
def _run_validators(
self,
*,
@@ -615,6 +1366,7 @@ def _contract_error_result(
agent_id: str,
attempt: int,
message: str,
+ recommended_action: str = "reject",
) -> DriverResult:
return DriverResult(
run_id=run_id,
@@ -622,7 +1374,7 @@ def _contract_error_result(
attempt=attempt,
status="contract_error",
summary=message,
- recommended_action="reject",
+ recommended_action=recommended_action,
error=message,
)
@@ -691,6 +1443,15 @@ def _matches_any(path: str, patterns: list[str]) -> bool:
return True
return False
+ @staticmethod
+ def _glob_prefix(value: str) -> str:
+ prefix: list[str] = []
+ for char in value:
+ if char in "*?[":
+ break
+ prefix.append(char)
+ return "".join(prefix).rstrip("/")
+
@staticmethod
def _collect_files(root: Path) -> list[str]:
files: list[str] = []
@@ -743,6 +1504,74 @@ def _append_event(events_path: Path, payload: dict[str, Any]) -> None:
with events_path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(payload, ensure_ascii=False) + "\n")
+ @staticmethod
+ def _terminate_process(
+ process: subprocess.Popen[str],
+ *,
+ process_group_id: int | None = None,
+ ) -> None:
+ if process.poll() is not None:
+ return
+
+ def _send(sig: signal.Signals) -> None:
+ delivered = False
+ if process_group_id is not None and hasattr(os, "killpg"):
+ try:
+ os.killpg(process_group_id, sig)
+ delivered = True
+ except (OSError, ProcessLookupError):
+ delivered = False
+
+ if delivered:
+ return
+
+ if sig == signal.SIGTERM:
+ process.terminate()
+ else:
+ process.kill()
+
+ try:
+ _send(signal.SIGTERM)
+ except (OSError, ProcessLookupError):
+ return
+
+ try:
+ process.wait(timeout=5)
+ except subprocess.TimeoutExpired:
+ try:
+ _send(signal.SIGKILL)
+ except (OSError, ProcessLookupError):
+ return
+ try:
+ process.wait(timeout=5)
+ except subprocess.TimeoutExpired:
+ try:
+ process.kill()
+ except OSError:
+ return
+ try:
+ process.wait(timeout=1)
+ except subprocess.TimeoutExpired:
+ return
+
+ @staticmethod
+ def _rmtree_force(path: Path) -> None:
+ if not path.exists():
+ return
+
+ def onexc(func, failed_path, excinfo) -> None:
+ _ = excinfo
+ try:
+ os.chmod(failed_path, 0o777)
+ except OSError:
+ pass
+ try:
+ func(failed_path)
+ except OSError:
+ pass
+
+ shutil.rmtree(path, onexc=onexc)
+
def _git_ref(self, args: list[str], *, default: str) -> str:
completed = subprocess.run(
["git", *args],
@@ -763,7 +1592,7 @@ def _sanitize_branch_name(value: str) -> str:
@staticmethod
def _cleanup_workspace(*, workspace_dir: Path, success: bool, policy: EffectivePolicy) -> None:
if success and policy.merged.cleanup_on_success:
- shutil.rmtree(workspace_dir, ignore_errors=True)
+ AgentExecutionRunner._rmtree_force(workspace_dir)
return
if not success and not policy.merged.retain_workspace_on_failure:
- shutil.rmtree(workspace_dir, ignore_errors=True)
+ AgentExecutionRunner._rmtree_force(workspace_dir)
diff --git a/src/autoresearch/shared/autoresearch_planner_contract.py b/src/autoresearch/shared/autoresearch_planner_contract.py
new file mode 100644
index 00000000..024f7f77
--- /dev/null
+++ b/src/autoresearch/shared/autoresearch_planner_contract.py
@@ -0,0 +1,138 @@
+from __future__ import annotations
+
+from datetime import datetime
+from enum import Enum
+from typing import Any, Literal
+
+from pydantic import Field, field_validator
+
+from autoresearch.agent_protocol.models import JobSpec, RunSummary
+from autoresearch.shared.models import JobStatus, StrictModel
+from autoresearch.shared.openhands_controlled_contract import ControlledExecutionRequest
+from autoresearch.shared.openhands_worker_contract import OpenHandsWorkerJobSpec
+from autoresearch.shared.remote_run_contract import RemoteRunRecord
+
+
+class AutoResearchPlannerRequest(StrictModel):
+ goal: str = Field(
+ default="Scan the repo for the next safe patch-only improvement.",
+ min_length=1,
+ )
+ max_candidates: int = Field(default=5, ge=1, le=20)
+ pipeline_target: Literal["patch", "draft_pr"] = "draft_pr"
+ target_base_branch: str = Field(default="main", min_length=1)
+ max_iterations: int = Field(default=2, ge=1, le=5)
+ approval_granted: bool = False
+ include_upstream_watch: bool = False
+ telegram_uid: str | None = None
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+ @field_validator("goal", "target_base_branch")
+ @classmethod
+ def _normalize_non_empty_text(cls, value: str) -> str:
+ normalized = value.strip()
+ if not normalized:
+ raise ValueError("value must not be empty")
+ return normalized
+
+ @field_validator("pipeline_target")
+ @classmethod
+ def _normalize_pipeline_target(cls, value: str) -> str:
+ normalized = value.strip().lower()
+ if normalized not in {"patch", "draft_pr"}:
+ raise ValueError("pipeline_target must be patch or draft_pr")
+ return normalized
+
+ @field_validator("telegram_uid")
+ @classmethod
+ def _normalize_telegram_uid(cls, value: str | None) -> str | None:
+ if value is None:
+ return None
+ normalized = value.strip()
+ return normalized or None
+
+
+class AutoResearchPlanDispatchStatus(str, Enum):
+ PENDING = "pending"
+ DISPATCHING = "dispatching"
+ DISPATCHED = "dispatched"
+ FAILED = "failed"
+
+
+class UpstreamWatchDecision(str, Enum):
+ SKIP = "skip"
+ REVIEW = "review"
+ FAILED = "failed"
+
+
+class UpstreamWatchCommitRead(StrictModel):
+ sha: str
+ title: str
+ committed_at: datetime | None = None
+ touched_paths: list[str] = Field(default_factory=list)
+
+
+class UpstreamWatchRead(StrictModel):
+ upstream_url: str
+ default_branch: str = "main"
+ latest_commit_sha: str | None = None
+ latest_commit_title: str | None = None
+ latest_commit_at: datetime | None = None
+ recent_commits: list[UpstreamWatchCommitRead] = Field(default_factory=list)
+ changed_paths: list[str] = Field(default_factory=list)
+ relevant_paths: list[str] = Field(default_factory=list)
+ focus_areas: list[str] = Field(default_factory=list)
+ decision: UpstreamWatchDecision = UpstreamWatchDecision.SKIP
+ summary: str = ""
+ cleaned_up: bool = False
+ cleanup_paths: list[str] = Field(default_factory=list)
+ error: str | None = None
+
+
+class AutoResearchPlannerEvidenceRead(StrictModel):
+ kind: Literal["marker", "test_gap", "hotspot"]
+ path: str = Field(..., min_length=1)
+ line: int | None = None
+ detail: str = ""
+ weight: float = 0.0
+
+
+class AutoResearchPlannerCandidateRead(StrictModel):
+ candidate_id: str
+ title: str
+ summary: str
+ category: Literal["marker_backlog", "test_gap"]
+ priority_score: float = 0.0
+ source_path: str
+ allowed_paths: list[str] = Field(default_factory=list)
+ suggested_test_paths: list[str] = Field(default_factory=list)
+ test_command: str
+ evidence: list[AutoResearchPlannerEvidenceRead] = Field(default_factory=list)
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class AutoResearchPlanRead(StrictModel):
+ plan_id: str
+ goal: str
+ status: JobStatus
+ summary: str = ""
+ created_at: datetime
+ updated_at: datetime
+ selected_candidate: AutoResearchPlannerCandidateRead | None = None
+ candidates: list[AutoResearchPlannerCandidateRead] = Field(default_factory=list)
+ worker_spec: OpenHandsWorkerJobSpec | None = None
+ controlled_request: ControlledExecutionRequest | None = None
+ agent_job: JobSpec | None = None
+ upstream_watch: UpstreamWatchRead | None = None
+ telegram_uid: str | None = None
+ panel_action_url: str | None = None
+ notification_sent: bool = False
+ dispatch_status: AutoResearchPlanDispatchStatus = AutoResearchPlanDispatchStatus.PENDING
+ dispatch_requested_at: datetime | None = None
+ dispatch_completed_at: datetime | None = None
+ dispatch_requested_by: str | None = None
+ dispatch_run: RemoteRunRecord | None = None
+ run_summary: RunSummary | None = None
+ dispatch_error: str | None = None
+ metadata: dict[str, Any] = Field(default_factory=dict)
+ error: str | None = None
diff --git a/src/autoresearch/shared/housekeeper_contract.py b/src/autoresearch/shared/housekeeper_contract.py
new file mode 100644
index 00000000..c7030c6e
--- /dev/null
+++ b/src/autoresearch/shared/housekeeper_contract.py
@@ -0,0 +1,163 @@
+from __future__ import annotations
+
+from datetime import datetime
+from enum import Enum
+from typing import Any, Literal
+
+from pydantic import Field
+
+from autoresearch.shared.models import StrictModel
+
+
+class HousekeeperMode(str, Enum):
+ DAY_SAFE = "day_safe"
+ NIGHT_READONLY_EXPLORE = "night_readonly_explore"
+ NIGHT_EXPLORE = "night_explore"
+
+
+class HousekeeperChangeReason(str, Enum):
+ SCHEDULE = "schedule"
+ MANUAL_PANEL = "manual_panel"
+ MANUAL_API = "manual_api"
+ CIRCUIT_BREAKER = "circuit_breaker"
+ RECOVERED_FROM_CIRCUIT_BREAKER = "recovered_from_circuit_breaker"
+
+
+class CircuitBreakerStatus(str, Enum):
+ CLOSED = "closed"
+ OPEN = "open"
+
+
+class AdmissionRiskLevel(str, Enum):
+ LOW = "low"
+ MEDIUM = "medium"
+ HIGH = "high"
+
+
+class DeferredReason(str, Enum):
+ DEFERRED_TO_NIGHT = "deferred_to_night"
+ APPROVAL_REQUIRED = "approval_required"
+ CIRCUIT_BREAKER_OPEN = "circuit_breaker_open"
+ BUDGET_EXHAUSTED = "budget_exhausted"
+ DEDUP_BLOCKED = "dedup_blocked"
+
+
+class ExplorationBlockerReason(str, Enum):
+ CIRCUIT_BREAKER_OPEN = "circuit_breaker_open"
+ ENV_MISSING = "env_missing"
+ PERMISSION_DENIED = "permission_denied"
+ EMPTY_SCOPE = "empty_scope"
+ STALLED_NO_PROGRESS = "stalled_no_progress"
+ VALIDATION_FAILED = "validation_failed"
+ APPROVAL_PENDING = "approval_pending"
+ DIRTY_REPO = "dirty_repo"
+ BUDGET_EXHAUSTED = "budget_exhausted"
+ UNKNOWN = "unknown"
+
+
+class CircuitBreakerStateRead(StrictModel):
+ status: CircuitBreakerStatus = CircuitBreakerStatus.CLOSED
+ triggered_at: datetime | None = None
+ reason: str | None = None
+ consecutive_failures: int = 0
+ recent_failure_rate: float = 0.0
+ acknowledged_at: datetime | None = None
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class ExecutionProfileRead(StrictModel):
+ profile_name: HousekeeperMode
+ pipeline_target: Literal["patch", "draft_pr"]
+ max_iterations: int = Field(default=1, ge=1, le=5)
+ auto_dispatch_allowed: bool = False
+ parallelism: int = Field(default=1, ge=1, le=32)
+ allow_draft_pr: bool = False
+ allow_repo_write: bool = True
+ allow_network: bool = False
+ allow_long_task_minutes: int = Field(default=15, ge=1, le=1440)
+
+
+class TaskAdmissionAssessmentRead(StrictModel):
+ plan_shape: Literal["single_task", "task_dag", "planner_candidate", "media_job", "unknown"] = "unknown"
+ estimated_runtime_minutes: int = Field(default=15, ge=0, le=1440)
+ requires_repo_write: bool = True
+ requires_network: bool = False
+ fanout_count: int = Field(default=1, ge=0, le=100)
+ risk_level: AdmissionRiskLevel = AdmissionRiskLevel.MEDIUM
+
+
+class HousekeeperStateRead(StrictModel):
+ state_id: str = "housekeeper"
+ scheduled_mode: HousekeeperMode = HousekeeperMode.DAY_SAFE
+ manual_override_mode: HousekeeperMode | None = None
+ effective_mode: HousekeeperMode = HousekeeperMode.DAY_SAFE
+ effective_until: datetime | None = None
+ reason: HousekeeperChangeReason = HousekeeperChangeReason.SCHEDULE
+ changed_by: str = "system"
+ last_changed_at: datetime
+ circuit_breaker_state: CircuitBreakerStateRead = Field(default_factory=CircuitBreakerStateRead)
+ last_summary_at: datetime | None = None
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class HousekeeperModeUpdateRequest(StrictModel):
+ action: Literal["set_manual_override", "clear_manual_override", "ack_circuit_breaker", "apply_schedule"]
+ target_mode: HousekeeperMode | None = None
+ changed_by: str = Field(..., min_length=1)
+ reason: HousekeeperChangeReason
+ effective_until: datetime | None = None
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class NightBudgetStateRead(StrictModel):
+ budget_id: str = "night_budget"
+ window_start: datetime
+ window_end: datetime
+ dispatches_used: int = 0
+ draft_prs_used: int = 0
+ worker_minutes_used: int = 0
+ max_dispatches_per_night: int = 4
+ max_draft_pr_per_night: int = 2
+ max_worker_minutes_per_night: int = 180
+ updated_at: datetime
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class HousekeeperTickRead(StrictModel):
+ executed: bool = False
+ skipped_reason: str | None = None
+ target_kind: Literal["manager_dispatch", "planner_dispatch", "none"] = "none"
+ target_id: str | None = None
+ blocker_reason: ExplorationBlockerReason | None = None
+ summary: str = ""
+ state: HousekeeperStateRead
+ budget: NightBudgetStateRead | None = None
+
+
+class HousekeeperMorningSummaryRead(StrictModel):
+ sent: bool = False
+ summary_text: str
+ completed_items: list[str] = Field(default_factory=list)
+ blocked_items: list[str] = Field(default_factory=list)
+ decision_items: list[str] = Field(default_factory=list)
+ queue_items: list[str] = Field(default_factory=list)
+ state: HousekeeperStateRead
+
+
+class ExplorationDedupKeyRead(StrictModel):
+ repo_id: str
+ target_scope_hash: str
+ intent_id: str
+ normalized_goal_hash: str
+
+
+class ExplorationRecordRead(StrictModel):
+ record_id: str
+ dedup_key: ExplorationDedupKeyRead
+ target_kind: Literal["manager_dispatch", "planner_dispatch", "media_job"]
+ target_id: str
+ blocker_reason: ExplorationBlockerReason | None = None
+ final_status: str | None = None
+ created_at: datetime
+ updated_at: datetime
+ metadata: dict[str, Any] = Field(default_factory=dict)
diff --git a/src/autoresearch/shared/manager_agent_contract.py b/src/autoresearch/shared/manager_agent_contract.py
new file mode 100644
index 00000000..33310711
--- /dev/null
+++ b/src/autoresearch/shared/manager_agent_contract.py
@@ -0,0 +1,92 @@
+from __future__ import annotations
+
+from datetime import datetime
+from enum import Enum
+from typing import Any, Literal
+
+from pydantic import Field, field_validator
+
+from autoresearch.agent_protocol.models import JobSpec, RunSummary
+from autoresearch.shared.models import JobStatus, StrictModel
+from autoresearch.shared.openhands_controlled_contract import ControlledExecutionRequest
+from autoresearch.shared.openhands_worker_contract import OpenHandsWorkerJobSpec
+
+
+class ManagerDispatchRequest(StrictModel):
+ prompt: str = Field(..., min_length=1)
+ pipeline_target: Literal["patch", "draft_pr"] = "draft_pr"
+ target_base_branch: str = Field(default="main", min_length=1)
+ max_iterations: int = Field(default=2, ge=1, le=5)
+ approval_granted: bool = False
+ auto_dispatch: bool = True
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+ @field_validator("prompt", "target_base_branch")
+ @classmethod
+ def _normalize_text(cls, value: str) -> str:
+ normalized = value.strip()
+ if not normalized:
+ raise ValueError("value must not be empty")
+ return normalized
+
+
+class ManagerIntentRead(StrictModel):
+ intent_id: str
+ label: str
+ summary: str
+ matched_keywords: list[str] = Field(default_factory=list)
+ allowed_paths: list[str] = Field(default_factory=list)
+ suggested_test_paths: list[str] = Field(default_factory=list)
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class ManagerPlanStrategy(str, Enum):
+ SINGLE_TASK = "single_task"
+ TASK_DAG = "task_dag"
+
+
+class ManagerTaskStage(str, Enum):
+ BACKEND = "backend"
+ TESTS = "tests"
+ FRONTEND = "frontend"
+ GENERIC = "generic"
+
+
+class ManagerPlanTaskRead(StrictModel):
+ task_id: str
+ title: str
+ summary: str
+ stage: ManagerTaskStage = ManagerTaskStage.GENERIC
+ depends_on: list[str] = Field(default_factory=list)
+ status: JobStatus = JobStatus.CREATED
+ worker_spec: OpenHandsWorkerJobSpec | None = None
+ controlled_request: ControlledExecutionRequest | None = None
+ agent_job: JobSpec | None = None
+ run_summary: RunSummary | None = None
+ metadata: dict[str, Any] = Field(default_factory=dict)
+ error: str | None = None
+
+
+class ManagerExecutionPlanRead(StrictModel):
+ plan_id: str
+ strategy: ManagerPlanStrategy = ManagerPlanStrategy.SINGLE_TASK
+ summary: str = ""
+ tasks: list[ManagerPlanTaskRead] = Field(default_factory=list)
+
+
+class ManagerDispatchRead(StrictModel):
+ dispatch_id: str
+ prompt: str
+ normalized_goal: str
+ status: JobStatus
+ summary: str = ""
+ created_at: datetime
+ updated_at: datetime
+ selected_intent: ManagerIntentRead | None = None
+ execution_plan: ManagerExecutionPlanRead | None = None
+ worker_spec: OpenHandsWorkerJobSpec | None = None
+ controlled_request: ControlledExecutionRequest | None = None
+ agent_job: JobSpec | None = None
+ run_summary: RunSummary | None = None
+ metadata: dict[str, Any] = Field(default_factory=dict)
+ error: str | None = None
diff --git a/src/autoresearch/shared/media_job_contract.py b/src/autoresearch/shared/media_job_contract.py
new file mode 100644
index 00000000..95ad444a
--- /dev/null
+++ b/src/autoresearch/shared/media_job_contract.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+
+from datetime import datetime
+from enum import Enum
+from typing import Any
+
+from pydantic import Field, field_validator
+
+from autoresearch.shared.models import StrictModel
+
+_ALLOWED_FILENAME_TOKENS = {"{title}", "{id}", "{uploader}", "{upload_date}"}
+
+
+class MediaJobMode(str, Enum):
+ AUDIO = "audio"
+ VIDEO = "video"
+ SUBTITLE = "subtitle"
+ METADATA = "metadata"
+
+
+class MediaJobPostprocess(str, Enum):
+ NONE = "none"
+ MP3 = "mp3"
+ MP4 = "mp4"
+
+
+class MediaTargetBucket(str, Enum):
+ INBOX = "inbox"
+ AUDIO = "audio"
+ VIDEO = "video"
+ SUBTITLES = "subtitles"
+ META = "meta"
+
+
+class MediaJobStatus(str, Enum):
+ CREATED = "created"
+ QUEUED = "queued"
+ RUNNING = "running"
+ COMPLETED = "completed"
+ FAILED = "failed"
+
+
+class MediaJobRequest(StrictModel):
+ url: str = Field(..., min_length=1)
+ mode: MediaJobMode
+ target_bucket: MediaTargetBucket
+ filename_template: str = "{title}-{id}"
+ postprocess: MediaJobPostprocess = MediaJobPostprocess.NONE
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+ @field_validator("url")
+ @classmethod
+ def _normalize_url(cls, value: str) -> str:
+ normalized = value.strip()
+ if not normalized.startswith(("http://", "https://")):
+ raise ValueError("media url must be http or https")
+ return normalized
+
+ @field_validator("filename_template")
+ @classmethod
+ def _validate_template(cls, value: str) -> str:
+ normalized = value.strip()
+ if not normalized:
+ raise ValueError("filename_template is required")
+ parts = [part for part in normalized.split("-") if part]
+ if not parts:
+ raise ValueError("filename_template must include at least one token")
+ for part in parts:
+ if part not in _ALLOWED_FILENAME_TOKENS:
+ raise ValueError(f"unsupported filename token: {part}")
+ return "-".join(parts)
+
+
+class MediaJobRead(StrictModel):
+ job_id: str
+ url: str
+ mode: MediaJobMode
+ target_bucket: MediaTargetBucket
+ filename_template: str
+ postprocess: MediaJobPostprocess
+ status: MediaJobStatus = MediaJobStatus.CREATED
+ output_files: list[str] = Field(default_factory=list)
+ title: str | None = None
+ duration_seconds: int | None = None
+ uploader: str | None = None
+ subtitle_path: str | None = None
+ metadata_path: str | None = None
+ metadata: dict[str, Any] = Field(default_factory=dict)
+ created_at: datetime
+ updated_at: datetime
+ error: str | None = None
+
+
+class MediaJobEventRead(StrictModel):
+ event_id: str
+ job_id: str
+ stage: str
+ status: str
+ detail: str = ""
+ created_at: datetime
+ metadata: dict[str, Any] = Field(default_factory=dict)
diff --git a/src/autoresearch/shared/models.py b/src/autoresearch/shared/models.py
index 3984e0d0..0f805c5f 100644
--- a/src/autoresearch/shared/models.py
+++ b/src/autoresearch/shared/models.py
@@ -893,8 +893,8 @@ class PanelMagicLinkRead(StrictModel):
class PanelAuditLogRead(StrictModel):
audit_id: str
telegram_uid: str
- action: Literal["cancel", "retry", "approve", "reject"]
- target_type: Literal["agent_run", "approval_request"] = "agent_run"
+ action: Literal["cancel", "retry", "approve", "reject", "dispatch"]
+ target_type: Literal["agent_run", "approval_request", "autoresearch_plan"] = "agent_run"
target_id: str
status: Literal["accepted", "rejected", "failed"] = "accepted"
reason: str | None = None
@@ -965,6 +965,63 @@ class AdminManagedSkillPromotionExecuteRequest(StrictModel):
metadata: dict[str, Any] = Field(default_factory=dict)
+class AdminAgentAuditRole(str, Enum):
+ MANAGER = "manager"
+ PLANNER = "planner"
+ WORKER = "worker"
+
+
+class AdminAgentAuditTrailEntryRead(StrictModel):
+ entry_id: str
+ source: Literal["manager_task", "autoresearch_plan", "claude_agent", "runtime_artifact"]
+ agent_role: AdminAgentAuditRole = AdminAgentAuditRole.WORKER
+ run_id: str
+ agent_id: str | None = None
+ title: str
+ status: str
+ final_status: str | None = None
+ recorded_at: datetime
+ duration_ms: int | None = None
+ first_progress_ms: int | None = None
+ first_scoped_write_ms: int | None = None
+ first_state_heartbeat_ms: int | None = None
+ files_changed: int = 0
+ changed_paths: list[str] = Field(default_factory=list)
+ scope_paths: list[str] = Field(default_factory=list)
+ patch_uri: str | None = None
+ isolated_workspace: str | None = None
+ summary: str = ""
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class AdminAgentAuditTrailStatsRead(StrictModel):
+ total: int = 0
+ succeeded: int = 0
+ failed: int = 0
+ running: int = 0
+ queued: int = 0
+ review_required: int = 0
+
+
+class AdminAgentAuditTrailSnapshotRead(StrictModel):
+ items: list[AdminAgentAuditTrailEntryRead] = Field(default_factory=list)
+ stats: AdminAgentAuditTrailStatsRead = Field(default_factory=AdminAgentAuditTrailStatsRead)
+ issued_at: datetime
+
+
+class AdminAgentAuditTrailDetailRead(StrictModel):
+ entry: AdminAgentAuditTrailEntryRead
+ input_prompt: str | None = None
+ job_spec: dict[str, Any] = Field(default_factory=dict)
+ worker_spec: dict[str, Any] = Field(default_factory=dict)
+ controlled_request: dict[str, Any] = Field(default_factory=dict)
+ patch_text: str = ""
+ patch_truncated: bool = False
+ error_reason: str | None = None
+ traceback: str | None = None
+ raw_record: dict[str, Any] = Field(default_factory=dict)
+
+
class PanelStateRead(StrictModel):
telegram_uid: str
sessions: list[OpenClawSessionRead] = Field(default_factory=list)
@@ -972,6 +1029,7 @@ class PanelStateRead(StrictModel):
audit_logs: list[PanelAuditLogRead] = Field(default_factory=list)
capability_providers: list[CapabilityProviderSummaryRead] = Field(default_factory=list)
pending_approvals: list[ApprovalRequestRead] = Field(default_factory=list)
+ pending_autoresearch_plans: list[dict[str, Any]] = Field(default_factory=list)
issued_at: datetime
diff --git a/src/autoresearch/shared/remote_run_contract.py b/src/autoresearch/shared/remote_run_contract.py
new file mode 100644
index 00000000..f15c5705
--- /dev/null
+++ b/src/autoresearch/shared/remote_run_contract.py
@@ -0,0 +1,171 @@
+from __future__ import annotations
+
+from datetime import datetime
+from enum import Enum
+from pathlib import PurePosixPath
+from typing import Any, Literal
+
+from pydantic import Field, field_validator
+
+from autoresearch.agent_protocol.models import JobSpec, RunSummary
+from autoresearch.shared.models import StrictModel, utc_now
+
+
+def _normalize_non_empty_text(value: str) -> str:
+ normalized = value.strip()
+ if not normalized:
+ raise ValueError("value must not be empty")
+ return normalized
+
+
+def _normalize_relative_artifact_path(value: str) -> str:
+ normalized = value.strip().replace("\\", "/")
+ if not normalized:
+ raise ValueError("artifact path must not be empty")
+ candidate = PurePosixPath(normalized)
+ if candidate.is_absolute():
+ raise ValueError("artifact paths must be repo-relative or runtime-relative")
+ parts = candidate.parts
+ if any(part == ".." for part in parts):
+ raise ValueError("artifact paths must stay inside the repo/runtime root")
+ return normalized
+
+
+def _normalize_artifact_paths(value: dict[str, str] | None) -> dict[str, str]:
+ if not value:
+ return {}
+ normalized: dict[str, str] = {}
+ for raw_key, raw_path in value.items():
+ key = _normalize_non_empty_text(str(raw_key))
+ normalized[key] = _normalize_relative_artifact_path(str(raw_path))
+ return normalized
+
+
+class DispatchLane(str, Enum):
+ LOCAL = "local"
+ REMOTE = "remote"
+
+
+class RemoteRunStatus(str, Enum):
+ QUEUED = "queued"
+ RUNNING = "running"
+ SUCCEEDED = "succeeded"
+ FAILED = "failed"
+ STALLED = "stalled"
+ TIMED_OUT = "timed_out"
+
+
+class FailureClass(str, Enum):
+ PLANNER_STALLED = "planner_stalled"
+ EXECUTOR_STALLED = "executor_stalled"
+ TOOL_TIMEOUT = "tool_timeout"
+ MODEL_FALLBACK = "model_fallback"
+ ASSERTION_FAILED_AFTER_FALLBACK = "assertion_failed_after_fallback"
+ ENV_MISSING = "env_missing"
+ WORKSPACE_DIRTY = "workspace_dirty"
+ TRANSIENT_NETWORK = "transient_network"
+ UNKNOWN = "unknown"
+
+
+class RecoveryAction(str, Enum):
+ RETRY = "retry"
+ ABORT = "abort"
+ REQUIRE_HUMAN_REVIEW = "require_human_review"
+ DOWNGRADE_TO_DRAFT = "downgrade_to_draft"
+ QUARANTINE = "quarantine"
+
+
+class RemoteTaskSpec(StrictModel):
+ protocol_version: Literal["remote-run/v1"] = "remote-run/v1"
+ run_id: str = Field(..., min_length=1)
+ requested_lane: DispatchLane = DispatchLane.LOCAL
+ lane: DispatchLane = DispatchLane.LOCAL
+ runtime_mode: str = Field(default="day", min_length=1)
+ planner_plan_id: str | None = None
+ planner_candidate_id: str | None = None
+ job: JobSpec
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+ @field_validator("run_id", "runtime_mode")
+ @classmethod
+ def _normalize_required_text(cls, value: str) -> str:
+ return _normalize_non_empty_text(value)
+
+ @field_validator("planner_plan_id", "planner_candidate_id")
+ @classmethod
+ def _normalize_optional_text(cls, value: str | None) -> str | None:
+ if value is None:
+ return None
+ normalized = value.strip()
+ return normalized or None
+
+
+class RemoteRunRecord(StrictModel):
+ protocol_version: Literal["remote-run/v1"] = "remote-run/v1"
+ run_id: str = Field(..., min_length=1)
+ requested_lane: DispatchLane = DispatchLane.LOCAL
+ lane: DispatchLane = DispatchLane.LOCAL
+ status: RemoteRunStatus = RemoteRunStatus.QUEUED
+ failure_class: FailureClass | None = None
+ recovery_action: RecoveryAction | None = None
+ artifact_paths: dict[str, str] = Field(default_factory=dict)
+ summary: str = ""
+ started_at: datetime | None = None
+ updated_at: datetime = Field(default_factory=utc_now)
+ finished_at: datetime | None = None
+ fallback_reason: str | None = None
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+ @field_validator("run_id")
+ @classmethod
+ def _normalize_run_id(cls, value: str) -> str:
+ return _normalize_non_empty_text(value)
+
+ @field_validator("summary")
+ @classmethod
+ def _normalize_summary(cls, value: str) -> str:
+ return value.strip()
+
+ @field_validator("fallback_reason")
+ @classmethod
+ def _normalize_fallback_reason(cls, value: str | None) -> str | None:
+ if value is None:
+ return None
+ normalized = value.strip()
+ return normalized or None
+
+ @field_validator("artifact_paths", mode="before")
+ @classmethod
+ def _validate_artifact_paths(cls, value: dict[str, str] | None) -> dict[str, str]:
+ return _normalize_artifact_paths(value)
+
+
+class RemoteHeartbeat(StrictModel):
+ protocol_version: Literal["remote-run/v1"] = "remote-run/v1"
+ run_id: str = Field(..., min_length=1)
+ lane: DispatchLane = DispatchLane.LOCAL
+ status: RemoteRunStatus = RemoteRunStatus.RUNNING
+ sequence: int = Field(default=1, ge=1)
+ summary: str = ""
+ recorded_at: datetime = Field(default_factory=utc_now)
+ artifact_paths: dict[str, str] = Field(default_factory=dict)
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+ @field_validator("run_id")
+ @classmethod
+ def _normalize_heartbeat_run_id(cls, value: str) -> str:
+ return _normalize_non_empty_text(value)
+
+ @field_validator("summary")
+ @classmethod
+ def _normalize_heartbeat_summary(cls, value: str) -> str:
+ return value.strip()
+
+ @field_validator("artifact_paths", mode="before")
+ @classmethod
+ def _validate_heartbeat_artifact_paths(cls, value: dict[str, str] | None) -> dict[str, str]:
+ return _normalize_artifact_paths(value)
+
+
+class RemoteRunSummary(RemoteRunRecord):
+ run_summary: RunSummary | None = None
diff --git a/src/gateway/telegram_webhook.py b/src/gateway/telegram_webhook.py
index c1703ae1..a77e8c8c 100644
--- a/src/gateway/telegram_webhook.py
+++ b/src/gateway/telegram_webhook.py
@@ -1,5 +1,9 @@
"""
-Telegram Webhook Handler - 指令拦截与工作流触发
+Legacy Telegram webhook compatibility handler.
+
+This module exists only for backward compatibility with the old
+workflow-driven `/telegram/webhook` path. New Telegram integration work
+must target `autoresearch.api.routers.gateway_telegram`.
"""
import asyncio
@@ -20,7 +24,12 @@
@router.post("/telegram/webhook")
async def telegram_webhook(request: Request):
- """Telegram Webhook 处理器"""
+ """Legacy Telegram webhook handler.
+
+ Deprecated: keep this path stable for existing callers, but do not
+ extend it with new product behavior. The mainline Telegram entrypoint
+ lives under `/api/v1/gateway/telegram/webhook`.
+ """
try:
data = await request.json()
@@ -28,8 +37,10 @@ async def telegram_webhook(request: Request):
message = data.get("message", {})
chat_id = message.get("chat", {}).get("id")
text = message.get("text", "")
- user_id = message.get("from", {}).get("id")
+ logger.warning(
+ "[Legacy Telegram Webhook] compatibility path hit; prefer /api/v1/gateway/telegram/webhook"
+ )
logger.info(f"[Webhook] 收到消息: {text[:50]}...")
# 指令拦截:GitHub 深度审查
diff --git a/tests/remote_dispatch/test_fake_remote_adapter.py b/tests/remote_dispatch/test_fake_remote_adapter.py
new file mode 100644
index 00000000..88f9e9e0
--- /dev/null
+++ b/tests/remote_dispatch/test_fake_remote_adapter.py
@@ -0,0 +1,189 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from autoresearch.agent_protocol.models import DriverResult, JobSpec, RunSummary, ValidationReport
+from autoresearch.core.dispatch.fake_remote_adapter import FakeRemoteAdapter
+from autoresearch.shared.remote_run_contract import DispatchLane, FailureClass, RemoteRunStatus, RemoteTaskSpec
+
+
+def _success_summary(job: JobSpec) -> RunSummary:
+ return RunSummary(
+ run_id=job.run_id,
+ final_status="ready_for_promotion",
+ driver_result=DriverResult(
+ run_id=job.run_id,
+ agent_id=job.agent_id,
+ status="succeeded",
+ summary="local runner succeeded",
+ changed_paths=["src/demo.py"],
+ recommended_action="promote",
+ ),
+ validation=ValidationReport(run_id=job.run_id, passed=True),
+ promotion_patch_uri="artifacts/promotion.patch",
+ )
+
+
+def _task_spec(
+ *,
+ run_id: str,
+ lane: DispatchLane,
+ requested_lane: DispatchLane | None = None,
+ scenario: str | None = None,
+) -> RemoteTaskSpec:
+ metadata: dict[str, object] = {}
+ if scenario is not None:
+ metadata["remote_scenario"] = scenario
+ return RemoteTaskSpec(
+ run_id=run_id,
+ requested_lane=requested_lane or lane,
+ lane=lane,
+ runtime_mode="night",
+ planner_plan_id="plan_test",
+ planner_candidate_id="candidate_test",
+ job=JobSpec(run_id=run_id, agent_id="openhands", task="demo"),
+ metadata=metadata,
+ )
+
+
+def test_fake_remote_adapter_success_flow(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ adapter = FakeRemoteAdapter(repo_root=repo_root, local_runner=_success_summary)
+ spec = _task_spec(run_id="run-success", lane=DispatchLane.REMOTE, scenario="success")
+
+ queued = adapter.dispatch(spec)
+ running = adapter.poll(spec.run_id)
+ heartbeat = adapter.heartbeat(spec.run_id)
+ terminal = adapter.poll(spec.run_id)
+ summary = adapter.fetch_summary(spec.run_id)
+
+ assert queued.status is RemoteRunStatus.QUEUED
+ assert running.status is RemoteRunStatus.RUNNING
+ assert heartbeat is not None
+ assert terminal.status is RemoteRunStatus.SUCCEEDED
+ assert summary.status is RemoteRunStatus.SUCCEEDED
+ assert summary.run_summary is None
+ assert all(not artifact_path.startswith("/") for artifact_path in summary.artifact_paths.values())
+ assert (
+ repo_root
+ / ".masfactory_runtime"
+ / "runs"
+ / spec.run_id
+ / "remote_control"
+ / "summary.json"
+ ).exists()
+
+
+def test_fake_remote_adapter_stalled_flow_uses_missing_heartbeat_signal(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ adapter = FakeRemoteAdapter(repo_root=repo_root, local_runner=_success_summary)
+ spec = _task_spec(run_id="run-stalled", lane=DispatchLane.REMOTE, scenario="stalled")
+
+ adapter.dispatch(spec)
+ running = adapter.poll(spec.run_id)
+ stalled = adapter.poll(spec.run_id)
+ summary = adapter.fetch_summary(spec.run_id)
+
+ assert running.status is RemoteRunStatus.RUNNING
+ assert adapter.heartbeat(spec.run_id) is None
+ assert stalled.status is RemoteRunStatus.STALLED
+ assert summary.failure_class is FailureClass.EXECUTOR_STALLED
+
+
+def test_fake_remote_adapter_timeout_flow(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ adapter = FakeRemoteAdapter(repo_root=repo_root, local_runner=_success_summary)
+ spec = _task_spec(run_id="run-timeout", lane=DispatchLane.REMOTE, scenario="timed_out")
+
+ adapter.dispatch(spec)
+ adapter.poll(spec.run_id)
+ timed_out = adapter.poll(spec.run_id)
+ summary = adapter.fetch_summary(spec.run_id)
+
+ assert timed_out.status is RemoteRunStatus.TIMED_OUT
+ assert summary.failure_class is FailureClass.TOOL_TIMEOUT
+
+
+@pytest.mark.parametrize(
+ ("scenario", "failure_class"),
+ [
+ ("env_missing", FailureClass.ENV_MISSING),
+ ("transient_network", FailureClass.TRANSIENT_NETWORK),
+ ],
+)
+def test_fake_remote_adapter_failure_mapping(
+ tmp_path: Path,
+ scenario: str,
+ failure_class: FailureClass,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ adapter = FakeRemoteAdapter(repo_root=repo_root, local_runner=_success_summary)
+ spec = _task_spec(run_id=f"run-{scenario}", lane=DispatchLane.REMOTE, scenario=scenario)
+
+ adapter.dispatch(spec)
+ failed = adapter.poll(spec.run_id)
+ summary = adapter.fetch_summary(spec.run_id)
+
+ assert failed.status is RemoteRunStatus.FAILED
+ assert summary.failure_class is failure_class
+
+
+def test_fake_remote_adapter_records_remote_to_local_fallback(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ adapter = FakeRemoteAdapter(repo_root=repo_root, local_runner=_success_summary)
+ spec = _task_spec(
+ run_id="run-fallback",
+ lane=DispatchLane.LOCAL,
+ requested_lane=DispatchLane.REMOTE,
+ )
+
+ queued = adapter.dispatch(spec)
+ adapter.poll(spec.run_id)
+ terminal = adapter.poll(spec.run_id)
+ summary = adapter.fetch_summary(spec.run_id)
+
+ assert queued.requested_lane is DispatchLane.REMOTE
+ assert queued.lane is DispatchLane.LOCAL
+ assert queued.fallback_reason is not None
+ assert terminal.status is RemoteRunStatus.SUCCEEDED
+ assert summary.run_summary is not None
+ assert summary.run_summary.final_status == "ready_for_promotion"
+
+
+def test_fake_remote_adapter_result_fetch_failure_raises(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ adapter = FakeRemoteAdapter(repo_root=repo_root, local_runner=_success_summary)
+ spec = _task_spec(
+ run_id="run-fetch-failure",
+ lane=DispatchLane.REMOTE,
+ scenario="result_fetch_failure",
+ )
+
+ adapter.dispatch(spec)
+ adapter.poll(spec.run_id)
+ terminal = adapter.poll(spec.run_id)
+
+ assert terminal.status is RemoteRunStatus.SUCCEEDED
+ with pytest.raises(FileNotFoundError):
+ adapter.fetch_summary(spec.run_id)
+
+
+def test_fake_remote_adapter_rejects_runtime_root_outside_repo_root(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ runtime_root = tmp_path / "external-runs"
+
+ with pytest.raises(ValueError, match="runtime_root must live under repo_root"):
+ FakeRemoteAdapter(
+ repo_root=repo_root,
+ local_runner=_success_summary,
+ runtime_root=runtime_root,
+ )
diff --git a/tests/test_admin_backend.py b/tests/test_admin_backend.py
index 866cc49a..0b4420fb 100644
--- a/tests/test_admin_backend.py
+++ b/tests/test_admin_backend.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import json
import sys
import time
from pathlib import Path
@@ -7,12 +8,16 @@
import pytest
from fastapi.testclient import TestClient
+from autoresearch.agent_protocol.models import DriverMetrics, DriverResult, JobSpec, RunSummary, ValidationReport
from autoresearch.api.dependencies import (
get_admin_auth_service,
get_admin_config_service,
+ get_agent_audit_trail_service,
get_approval_store_service,
+ get_autoresearch_planner_service,
get_capability_provider_registry,
get_claude_agent_service,
+ get_manager_agent_service,
get_openclaw_compat_service,
)
from autoresearch.api.main import app
@@ -21,9 +26,14 @@
from autoresearch.core.services.admin_auth import AdminAuthService
from autoresearch.core.services.admin_config import AdminConfigService
from autoresearch.core.services.admin_secrets import AdminSecretCipher
+from autoresearch.core.services.agent_audit_trail import AgentAuditTrailService
from autoresearch.core.services.approval_store import ApprovalStoreService
+from autoresearch.core.services.autoresearch_planner import AutoResearchPlannerService
from autoresearch.core.services.claude_agents import ClaudeAgentService
+from autoresearch.agents.manager_agent import ManagerAgentService
from autoresearch.core.services.openclaw_compat import OpenClawCompatService
+from autoresearch.shared.autoresearch_planner_contract import AutoResearchPlannerRequest
+from autoresearch.shared.manager_agent_contract import ManagerDispatchRequest
from autoresearch.shared.models import (
AdminAgentConfigRead,
AdminChannelConfigRead,
@@ -31,10 +41,11 @@
AdminSecretRecordRead,
ApprovalRequestCreateRequest,
ApprovalRequestRead,
+ ClaudeAgentCreateRequest,
ClaudeAgentRunRead,
OpenClawSessionRead,
)
-from autoresearch.shared.store import SQLiteModelRepository
+from autoresearch.shared.store import InMemoryRepository, SQLiteModelRepository
class _StubCapabilityProvider:
@@ -112,6 +123,50 @@ def query_events(self, query):
return {}
+def _write(path: Path, content: str) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(content, encoding="utf-8")
+
+
+def _successful_run_summary(job: JobSpec) -> RunSummary:
+ patch_path = Path("/tmp") / f"{job.run_id}.patch"
+ patch_path.write_text(
+ "\n".join(
+ [
+ "diff --git a/src/demo.py b/src/demo.py",
+ "--- a/src/demo.py",
+ "+++ b/src/demo.py",
+ "@@ -1 +1 @@",
+ "+VALUE = 'READY_FOR_PROMOTION'",
+ "",
+ ]
+ ),
+ encoding="utf-8",
+ )
+ return RunSummary(
+ run_id=job.run_id,
+ final_status="ready_for_promotion",
+ driver_result=DriverResult(
+ run_id=job.run_id,
+ agent_id=job.agent_id,
+ status="succeeded",
+ summary="admin audit flow completed successfully",
+ changed_paths=list(job.policy.allowed_paths),
+ metrics=DriverMetrics(
+ duration_ms=1800,
+ steps=3,
+ commands=2,
+ first_progress_ms=300,
+ first_scoped_write_ms=900,
+ first_state_heartbeat_ms=300,
+ ),
+ recommended_action="promote",
+ ),
+ validation=ValidationReport(run_id=job.run_id, passed=True),
+ promotion_patch_uri=str(patch_path),
+ )
+
+
@pytest.fixture
def admin_client(tmp_path: Path) -> TestClient:
db_path = tmp_path / "admin.sqlite3"
@@ -133,6 +188,22 @@ def admin_client(tmp_path: Path) -> TestClient:
max_agents=10,
max_depth=3,
)
+ planner_service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=tmp_path,
+ dispatch_runner=_successful_run_summary,
+ )
+ manager_service = ManagerAgentService(
+ repository=InMemoryRepository(),
+ repo_root=tmp_path,
+ dispatch_runner=_successful_run_summary,
+ )
+ audit_trail_service = AgentAuditTrailService(
+ repo_root=tmp_path,
+ planner_service=planner_service,
+ manager_service=manager_service,
+ agent_service=claude_service,
+ )
admin_service = AdminConfigService(
agent_repository=SQLiteModelRepository(
db_path=db_path,
@@ -174,6 +245,9 @@ def admin_client(tmp_path: Path) -> TestClient:
app.dependency_overrides[get_openclaw_compat_service] = lambda: openclaw_service
app.dependency_overrides[get_claude_agent_service] = lambda: claude_service
+ app.dependency_overrides[get_autoresearch_planner_service] = lambda: planner_service
+ app.dependency_overrides[get_manager_agent_service] = lambda: manager_service
+ app.dependency_overrides[get_agent_audit_trail_service] = lambda: audit_trail_service
app.dependency_overrides[get_admin_config_service] = lambda: admin_service
app.dependency_overrides[get_admin_auth_service] = lambda: auth_service
app.dependency_overrides[get_approval_store_service] = lambda: approval_store
@@ -190,6 +264,10 @@ def admin_client(tmp_path: Path) -> TestClient:
client.headers.update({"authorization": f"Bearer {token}"})
setattr(client, "_admin_service", admin_service)
setattr(client, "_approval_store", approval_store)
+ setattr(client, "_planner_service", planner_service)
+ setattr(client, "_manager_service", manager_service)
+ setattr(client, "_claude_service", claude_service)
+ setattr(client, "_repo_root", tmp_path)
yield client
app.dependency_overrides.clear()
@@ -214,6 +292,8 @@ def test_admin_requires_bearer_token(admin_client: TestClient) -> None:
assert denied.status_code == 401
denied_capabilities = admin_client.get("/api/v1/admin/capabilities")
assert denied_capabilities.status_code == 401
+ denied_audit = admin_client.get("/api/v1/admin/audit-trail")
+ assert denied_audit.status_code == 401
if existing is not None:
admin_client.headers["authorization"] = existing
@@ -223,8 +303,11 @@ def test_admin_view_contains_capability_inventory_section(admin_client: TestClie
assert response.status_code == 200
assert "Capability Inventory" in response.text
+ assert "Agent Audit Trail" in response.text
assert "Approval Queue" in response.text
assert "Managed Skill Queue" in response.text
+ assert "/api/v1/admin/audit-trail" in response.text
+ assert "loadAuditDetail" in response.text
assert "/api/v1/admin/capabilities" in response.text
assert "/api/v1/admin/approvals" in response.text
assert "/api/v1/admin/skills/status" in response.text
@@ -250,6 +333,216 @@ def test_admin_capability_snapshot_lists_provider_inventory(admin_client: TestCl
assert skill_provider["skills"][0]["skill_key"] == "daily_brief"
+def test_admin_audit_trail_lists_recent_worker_activity(admin_client: TestClient) -> None:
+ repo_root: Path = getattr(admin_client, "_repo_root")
+ planner_service: AutoResearchPlannerService = getattr(admin_client, "_planner_service")
+ manager_service: ManagerAgentService = getattr(admin_client, "_manager_service")
+ claude_service: ClaudeAgentService = getattr(admin_client, "_claude_service")
+
+ _write(
+ repo_root / "src" / "autoresearch" / "core" / "services" / "audit_target.py",
+ "def collect_audit_events() -> bool:\n # FIXME: add audit timeline regression coverage\n return True\n",
+ )
+ _write(repo_root / "tests" / "test_audit_target.py", "def test_collect_audit_events():\n assert True\n")
+ _write(repo_root / "panel" / "app.tsx", "export const App = () => null;\n")
+ _write(repo_root / "src" / "autoresearch" / "api" / "routers" / "admin.py", "router = object()\n")
+ _write(repo_root / "src" / "autoresearch" / "api" / "routers" / "panel.py", "router = object()\n")
+ _write(repo_root / "tests" / "test_panel_security.py", "def test_panel_ok():\n assert True\n")
+ _write(repo_root / "tests" / "test_admin_managed_skills.py", "def test_admin_ok():\n assert True\n")
+
+ plan = planner_service.create(
+ AutoResearchPlannerRequest(
+ goal="Find the next audit trail hardening task.",
+ metadata={"runtime_mode": "night", "remote_available": False},
+ )
+ )
+ planner_service.request_dispatch(plan.plan_id, requested_by="admin-ui")
+ planner_service.execute_dispatch(plan.plan_id)
+
+ dispatch = manager_service.create_dispatch(
+ ManagerDispatchRequest(
+ prompt="在 Admin Panel 里加一个带图表的智能体行为审计大屏。",
+ auto_dispatch=False,
+ )
+ )
+ manager_service.execute_dispatch(dispatch.dispatch_id)
+
+ claude_service.create(
+ ClaudeAgentCreateRequest(
+ task_name="audit reviewer",
+ prompt="Inspect the latest worker execution traces.",
+ command_override=[sys.executable, "-c", "print('audit-review')"],
+ append_prompt=False,
+ metadata={
+ "allowed_paths": ["src/autoresearch/api/routers/admin.py"],
+ "changed_paths": ["src/autoresearch/api/routers/admin.py"],
+ },
+ )
+ )
+
+ runtime_summary_path = (
+ repo_root / ".masfactory_runtime" / "smokes" / "audit-smoke" / "artifacts" / "chain_summary.json"
+ )
+ runtime_summary_path.parent.mkdir(parents=True, exist_ok=True)
+ runtime_patch_path = runtime_summary_path.parent / "promotion.patch"
+ runtime_patch_path.write_text(
+ "\n".join(
+ [
+ "diff --git a/src/autoresearch/api/routers/admin.py b/src/autoresearch/api/routers/admin.py",
+ "--- a/src/autoresearch/api/routers/admin.py",
+ "+++ b/src/autoresearch/api/routers/admin.py",
+ "@@ -1 +1 @@",
+ "+AUDIT_TRAIL = True",
+ "",
+ ]
+ ),
+ encoding="utf-8",
+ )
+ runtime_summary_path.write_text(
+ json.dumps(
+ {
+ "run_id": "runtime-audit-001",
+ "status": "ready_for_promotion",
+ "task": "Rebuild audit trail timeline",
+ "isolated_workspace": "/tmp/audit-runtime",
+ "driver_result": {
+ "agent_id": "openhands",
+ "summary": "runtime artifact captured successfully",
+ "changed_paths": ["src/autoresearch/api/routers/admin.py"],
+ "metrics": {
+ "duration_ms": 2400,
+ "first_progress_ms": 700,
+ "first_scoped_write_ms": 1100,
+ "first_state_heartbeat_ms": 700,
+ },
+ },
+ "promotion": {
+ "changed_files": ["src/autoresearch/api/routers/admin.py"],
+ "diff_stats": {"files_changed": 1},
+ "patch_uri": str(runtime_patch_path),
+ },
+ }
+ ),
+ encoding="utf-8",
+ )
+
+ failed_runtime_path = (
+ repo_root / "logs" / "audit" / "openhands" / "jobs" / "audit-failed-001" / "chain_summary.json"
+ )
+ failed_runtime_path.parent.mkdir(parents=True, exist_ok=True)
+ failed_patch_path = failed_runtime_path.parent / "promotion.patch"
+ failed_patch_path.write_text(
+ "\n".join(
+ [
+ "diff --git a/src/autoresearch/core/services/agent_audit_trail.py b/src/autoresearch/core/services/agent_audit_trail.py",
+ "--- a/src/autoresearch/core/services/agent_audit_trail.py",
+ "+++ b/src/autoresearch/core/services/agent_audit_trail.py",
+ "@@ -1 +1 @@",
+ "+BROKEN = True",
+ "",
+ ]
+ ),
+ encoding="utf-8",
+ )
+ failed_runtime_path.write_text(
+ json.dumps(
+ {
+ "run_id": "runtime-audit-failed-001",
+ "status": "failed",
+ "task": "Patch audit trail filters",
+ "error": "validation command failed",
+ "traceback": "Traceback (most recent call last):\\nValueError: missing token",
+ "artifacts": {"promotion_patch": str(failed_patch_path)},
+ "driver_result": {
+ "agent_id": "openhands",
+ "summary": "worker failed during validation",
+ "changed_paths": ["src/autoresearch/core/services/agent_audit_trail.py"],
+ "metrics": {
+ "duration_ms": 3200,
+ "first_progress_ms": 600,
+ "first_scoped_write_ms": 1600,
+ "first_state_heartbeat_ms": 600,
+ },
+ "error": "pytest exited with code 1",
+ },
+ }
+ ),
+ encoding="utf-8",
+ )
+
+ response = admin_client.get("/api/v1/admin/audit-trail?limit=20")
+
+ assert response.status_code == 200
+ payload = response.json()
+ assert payload["stats"]["total"] >= 5
+ assert payload["stats"]["queued"] >= 1
+ assert payload["stats"]["succeeded"] >= 2
+ assert payload["stats"]["failed"] >= 1
+ sources = {item["source"] for item in payload["items"]}
+ assert {"manager_task", "autoresearch_plan", "claude_agent", "runtime_artifact"} <= sources
+
+ planner_item = next(item for item in payload["items"] if item["source"] == "autoresearch_plan")
+ assert planner_item["final_status"] == "ready_for_promotion"
+ assert planner_item["status"] == "dispatched"
+ assert "tests/test_audit_target.py" in planner_item["scope_paths"]
+ assert planner_item["first_progress_ms"] == 300
+ assert planner_item["first_scoped_write_ms"] == 900
+ assert planner_item["first_state_heartbeat_ms"] == 300
+ assert planner_item["metadata"]["dispatch_requested_lane"] == "remote"
+ assert planner_item["metadata"]["dispatch_lane"] == "local"
+ assert planner_item["metadata"]["dispatch_remote_status"] == "succeeded"
+ assert planner_item["metadata"]["dispatch_failure_class"] is None
+ assert planner_item["metadata"]["dispatch_recovery_action"] is None
+ assert planner_item["metadata"]["dispatch_fallback_reason"] is not None
+
+ manager_backend = next(
+ item
+ for item in payload["items"]
+ if item["source"] == "manager_task" and item["metadata"]["stage"] == "backend"
+ )
+ assert manager_backend["final_status"] == "ready_for_promotion"
+ assert "src/autoresearch/api/routers/admin.py" in manager_backend["scope_paths"]
+
+ runtime_item = next(
+ item
+ for item in payload["items"]
+ if item["source"] == "runtime_artifact" and item["run_id"] == "runtime-audit-001"
+ )
+ assert runtime_item["isolated_workspace"] == "/tmp/audit-runtime"
+ assert runtime_item["patch_uri"] == str(runtime_patch_path)
+ assert runtime_item["agent_role"] == "worker"
+ assert runtime_item["first_progress_ms"] == 700
+ assert runtime_item["first_scoped_write_ms"] == 1100
+ assert runtime_item["first_state_heartbeat_ms"] == 700
+
+ detail_response = admin_client.get(f"/api/v1/admin/audit-trail/{planner_item['entry_id']}")
+ assert detail_response.status_code == 200
+ detail_payload = detail_response.json()
+ assert detail_payload["input_prompt"] == "Find the next audit trail hardening task."
+ assert detail_payload["job_spec"]["task"] != ""
+ assert "diff --git" in detail_payload["patch_text"]
+ assert detail_payload["patch_truncated"] is False
+ assert detail_payload["entry"]["first_progress_ms"] == 300
+ assert detail_payload["raw_record"]["autoresearch_plan"]["dispatch_run"]["requested_lane"] == "remote"
+ assert detail_payload["raw_record"]["autoresearch_plan"]["dispatch_run"]["lane"] == "local"
+ assert detail_payload["raw_record"]["autoresearch_plan"]["dispatch_run"]["status"] == "succeeded"
+ assert detail_payload["raw_record"]["autoresearch_plan"]["run_summary"]["final_status"] == "ready_for_promotion"
+
+ failed_response = admin_client.get("/api/v1/admin/audit-trail?limit=20&status_filter=failed&agent_role=worker")
+ assert failed_response.status_code == 200
+ failed_items = failed_response.json()["items"]
+ assert len(failed_items) == 1
+ assert failed_items[0]["run_id"] == "runtime-audit-failed-001"
+
+ failed_detail = admin_client.get(f"/api/v1/admin/audit-trail/{failed_items[0]['entry_id']}")
+ assert failed_detail.status_code == 200
+ failed_detail_payload = failed_detail.json()
+ assert failed_detail_payload["error_reason"] == "validation command failed"
+ assert "Traceback" in (failed_detail_payload["traceback"] or "")
+ assert "diff --git" in failed_detail_payload["patch_text"]
+ assert failed_detail_payload["entry"]["first_progress_ms"] == 600
+
+
def test_admin_approvals_list_and_resolve(admin_client: TestClient) -> None:
approval_store = getattr(admin_client, "_approval_store")
owned = approval_store.create_request(
diff --git a/tests/test_admin_managed_skills.py b/tests/test_admin_managed_skills.py
index d36c6ea9..f92fce16 100644
--- a/tests/test_admin_managed_skills.py
+++ b/tests/test_admin_managed_skills.py
@@ -34,8 +34,9 @@
class StubTelegramNotifier:
- def __init__(self) -> None:
+ def __init__(self, *, send_results: list[bool] | None = None) -> None:
self.messages: list[dict[str, object]] = []
+ self._send_results = list(send_results or [])
@property
def enabled(self) -> bool:
@@ -57,6 +58,8 @@ def send_message(
"reply_markup": reply_markup,
}
)
+ if self._send_results:
+ return self._send_results.pop(0)
return True
@@ -188,6 +191,7 @@ def admin_skill_client(tmp_path: Path) -> TestClient:
telegram_bot_token="123456:TEST_BOT_TOKEN",
telegram_init_data_max_age_seconds=900,
base_url="https://panel.example/api/v1/panel/view",
+ mini_app_url="https://panel.example/api/v1/panel/view",
allowed_uids={"10001"},
)
notifier = StubTelegramNotifier()
@@ -283,6 +287,7 @@ def test_admin_managed_skill_promote_creates_approval_and_mini_app_link(
assert "actionNonce=" in payload["mini_app_url"]
assert "actionHash=" in payload["mini_app_url"]
assert "actionIssuedAt=" in payload["mini_app_url"]
+ assert "token=" in payload["mini_app_url"]
assert payload["notification_sent"] is True
assert notifier.messages[0]["chat_id"] == "10001"
reply_markup = notifier.messages[0]["reply_markup"]
@@ -291,6 +296,79 @@ def test_admin_managed_skill_promote_creates_approval_and_mini_app_link(
assert button["web_app"]["url"] == payload["mini_app_url"]
+def test_admin_managed_skill_promote_falls_back_to_url_button_when_web_app_send_fails(
+ tmp_path: Path,
+) -> None:
+ db_path = tmp_path / "admin-managed-skills-fallback.sqlite3"
+ auth_service = AdminAuthService(
+ secret="test-admin-jwt-secret",
+ bootstrap_key="bootstrap-test-key",
+ )
+ approval_store = ApprovalStoreService(
+ repository=SQLiteModelRepository(
+ db_path=db_path,
+ table_name="approval_requests_admin_skill_fallback",
+ model_cls=ApprovalRequestRead,
+ )
+ )
+ private_key, public_key = _trusted_keys()
+ registry = ManagedSkillRegistryService(
+ repo_root=tmp_path,
+ repository=InMemoryRepository(),
+ quarantine_root=tmp_path / "artifacts" / "managed_skills" / "quarantine",
+ active_root=tmp_path / "artifacts" / "managed_skills" / "active",
+ trusted_signers={"test-signer": public_key},
+ allowed_capabilities={"prompt", "filesystem_read"},
+ )
+ panel_access = PanelAccessService(
+ secret="panel-secret",
+ telegram_bot_token="123456:TEST_BOT_TOKEN",
+ telegram_init_data_max_age_seconds=900,
+ base_url="https://panel.example/api/v1/panel/view",
+ mini_app_url="https://panel.example/api/v1/panel/view",
+ allowed_uids={"10001"},
+ )
+ notifier = StubTelegramNotifier(send_results=[False, True])
+
+ app.dependency_overrides[get_admin_auth_service] = lambda: auth_service
+ app.dependency_overrides[get_approval_store_service] = lambda: approval_store
+ app.dependency_overrides[get_managed_skill_registry_service] = lambda: registry
+ app.dependency_overrides[get_panel_access_service] = lambda: panel_access
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+
+ with TestClient(app) as client:
+ token_response = client.post(
+ "/api/v1/admin/auth/token",
+ json={"subject": "test-owner", "roles": ["owner"], "ttl_seconds": 3600},
+ headers={"x-admin-bootstrap-key": "bootstrap-test-key"},
+ )
+ assert token_response.status_code == 200
+ client.headers.update({"authorization": f"Bearer {token_response.json()['token']}"})
+
+ bundle_dir = _build_bundle(tmp_path / "bundle-promote-fallback", private_key=private_key)
+ validated = registry.run_cold_validation(
+ registry.install_to_quarantine(
+ ManagedSkillInstallRequest(bundle_dir=str(bundle_dir), requested_by="owner")
+ ).install_id
+ )
+
+ response = client.post(
+ f"/api/v1/admin/skills/{validated.install_id}/promote",
+ json={"note": "retry with url button"},
+ )
+
+ app.dependency_overrides.clear()
+
+ assert response.status_code == 200
+ payload = response.json()
+ assert payload["notification_sent"] is True
+ assert len(notifier.messages) == 2
+ assert notifier.messages[0]["reply_markup"]["inline_keyboard"][0][0]["web_app"]["url"] == payload["mini_app_url"]
+ assert notifier.messages[1]["reply_markup"] == {
+ "inline_keyboard": [[{"text": "打开 Panel 审批", "url": payload["mini_app_url"]}]]
+ }
+
+
def test_admin_managed_skill_promote_execute_requires_approved_telegram_flow(
admin_skill_client: TestClient,
tmp_path: Path,
diff --git a/tests/test_agent_fallbacks.py b/tests/test_agent_fallbacks.py
index dc75f7c8..897f818a 100644
--- a/tests/test_agent_fallbacks.py
+++ b/tests/test_agent_fallbacks.py
@@ -101,3 +101,90 @@ def test_retry_then_fallback_agent(tmp_path: Path) -> None:
assert summary.final_status == "ready_for_promotion"
assert summary.driver_result.agent_id == "secondary"
+
+
+def test_stalled_retry_is_skipped_before_fallback_agent(
+ tmp_path: Path,
+ monkeypatch,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True)
+ (repo_root / "src" / "base.py").write_text("x = 1\n", encoding="utf-8")
+
+ stall_adapter = repo_root / "drivers" / "stall_adapter.sh"
+ stall_adapter.parent.mkdir(parents=True, exist_ok=True)
+ stall_adapter.write_text(
+ """#!/usr/bin/env bash
+set -euo pipefail
+sleep 30
+""",
+ encoding="utf-8",
+ )
+ stall_adapter.chmod(0o755)
+
+ success_adapter = repo_root / "drivers" / "fallback_success.sh"
+ success_adapter.write_text(
+ """#!/usr/bin/env bash
+set -euo pipefail
+mkdir -p "$AEP_WORKSPACE/src"
+echo 'print(9)' > "$AEP_WORKSPACE/src/fallback.py"
+cat > "$AEP_RESULT_PATH" <<'JSON'
+{
+ "protocol_version": "aep/v0",
+ "run_id": "run-stall-fallback",
+ "agent_id": "secondary",
+ "attempt": 1,
+ "status": "succeeded",
+ "summary": "secondary succeeded",
+ "changed_paths": [],
+ "output_artifacts": [],
+ "metrics": {"duration_ms": 0, "steps": 1, "commands": 1, "prompt_tokens": null, "completion_tokens": null},
+ "recommended_action": "promote",
+ "error": null
+}
+JSON
+exit 0
+""",
+ encoding="utf-8",
+ )
+ success_adapter.chmod(0o755)
+
+ _write_manifest(repo_root, "primary", "drivers/stall_adapter.sh")
+ _write_manifest(repo_root, "secondary", "drivers/fallback_success.sh")
+
+ runtime_root = tmp_path / "runtime"
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=runtime_root,
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+ monkeypatch.setattr(runner, "_stall_progress_timeout_sec", lambda timeout_sec: 2)
+
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-stall-fallback",
+ agent_id="primary",
+ task="demo",
+ fallback=[
+ FallbackStep(action="retry", max_attempts=1),
+ FallbackStep(action="fallback_agent", agent_id="secondary", max_attempts=1),
+ ],
+ )
+ )
+
+ events_path = runtime_root / "run-stall-fallback" / "events.ndjson"
+ attempts = [
+ json.loads(line)
+ for line in events_path.read_text(encoding="utf-8").splitlines()
+ if line.strip()
+ ]
+ started_agents = [item["agent_id"] for item in attempts if item.get("type") == "attempt_started"]
+
+ assert summary.final_status == "ready_for_promotion"
+ assert summary.driver_result.agent_id == "secondary"
+ assert started_agents == ["primary", "secondary"]
+ assert any(
+ item.get("type") == "fallback_skipped" and item.get("reason") == "stalled_no_progress"
+ for item in attempts
+ )
diff --git a/tests/test_agent_policy_merge.py b/tests/test_agent_policy_merge.py
index 7aa5ef10..8cefb47b 100644
--- a/tests/test_agent_policy_merge.py
+++ b/tests/test_agent_policy_merge.py
@@ -2,6 +2,8 @@
from autoresearch.agent_protocol.models import ExecutionPolicy
from autoresearch.agent_protocol.policy import build_effective_policy
+import json
+from pathlib import Path
def test_policy_merge_deny_wins() -> None:
@@ -34,7 +36,7 @@ def test_policy_merge_deny_wins() -> None:
assert effective.allowed_paths == ["src/**"]
assert "src/secrets/**" in effective.forbidden_paths
assert effective.max_changed_files == 20
- assert effective.max_patch_lines == 500
+ assert effective.max_patch_lines == 900
assert effective.allow_binary_changes is False
@@ -45,3 +47,38 @@ def test_policy_merge_preserves_more_specific_file_scope() -> None:
effective = build_effective_policy(manifest_policy, job_policy).merged
assert effective.allowed_paths == ["src/generated_worker.py"]
+
+
+def test_policy_merge_allows_script_targets_when_manifest_and_job_both_allow_them() -> None:
+ manifest_policy = ExecutionPolicy(allowed_paths=["src/**", "tests/**", "scripts/**"])
+ job_policy = ExecutionPolicy(
+ allowed_paths=["scripts/check_prompt_hygiene.py", "tests/test_check_prompt_hygiene.py"]
+ )
+
+ effective = build_effective_policy(manifest_policy, job_policy).merged
+
+ assert effective.allowed_paths == [
+ "tests/test_check_prompt_hygiene.py",
+ "scripts/check_prompt_hygiene.py",
+ ]
+
+
+def test_policy_merge_allows_isolated_apps_targets_when_job_requests_business_surface() -> None:
+ manifest_policy = ExecutionPolicy(allowed_paths=["src/**", "tests/**", "apps/**"])
+ job_policy = ExecutionPolicy(
+ allowed_paths=["apps/malu/**", "tests/apps/test_malu_landing_page.py"]
+ )
+
+ effective = build_effective_policy(manifest_policy, job_policy).merged
+
+ assert effective.allowed_paths == [
+ "tests/apps/test_malu_landing_page.py",
+ "apps/malu/**",
+ ]
+
+
+def test_openhands_manifest_default_policy_includes_apps_surface() -> None:
+ manifest_path = Path(__file__).resolve().parents[1] / "configs" / "agents" / "openhands.yaml"
+ payload = json.loads(manifest_path.read_text(encoding="utf-8"))
+
+ assert "apps/**" in payload["policy_defaults"]["allowed_paths"]
diff --git a/tests/test_agent_runner_outcomes.py b/tests/test_agent_runner_outcomes.py
index b4ffaf38..e9e0fa30 100644
--- a/tests/test_agent_runner_outcomes.py
+++ b/tests/test_agent_runner_outcomes.py
@@ -2,8 +2,16 @@
import json
from pathlib import Path
+import sys
-from autoresearch.agent_protocol.models import JobSpec
+from autoresearch.agent_protocol.models import (
+ DriverResult,
+ ExecutionPolicy,
+ FallbackStep,
+ JobSpec,
+ ValidatorSpec,
+)
+from autoresearch.agent_protocol.policy import build_effective_policy
from autoresearch.executions.runner import AgentExecutionRunner
@@ -63,6 +71,52 @@ def test_failed_driver_is_terminal_failure_not_human_review(tmp_path: Path) -> N
assert checks["builtin.driver_success"].passed is False
+def test_runner_persists_summary_when_attempt_crashes_unexpectedly(
+ tmp_path: Path,
+ monkeypatch,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True)
+ (repo_root / "src" / "base.py").write_text("x = 1\n", encoding="utf-8")
+
+ adapter = repo_root / "drivers" / "explosive_adapter.sh"
+ adapter.parent.mkdir(parents=True, exist_ok=True)
+ adapter.write_text("#!/usr/bin/env bash\nexit 0\n", encoding="utf-8")
+ adapter.chmod(0o755)
+ _write_manifest(repo_root, "explosive", "drivers/explosive_adapter.sh")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+
+ def _boom(**_: object) -> DriverResult:
+ raise RuntimeError("synthetic invoke failure")
+
+ monkeypatch.setattr(runner, "_invoke_adapter", _boom)
+
+ summary = runner.run_job(JobSpec(run_id="run-explosive", agent_id="explosive", task="demo"))
+
+ summary_path = tmp_path / "runtime" / "run-explosive" / "summary.json"
+ events_path = tmp_path / "runtime" / "run-explosive" / "events.ndjson"
+
+ assert summary.final_status == "failed"
+ assert summary.driver_result.status == "contract_error"
+ assert "RuntimeError" in (summary.driver_result.error or "")
+ assert summary_path.exists()
+ payload = json.loads(summary_path.read_text(encoding="utf-8"))
+ assert payload["final_status"] == "failed"
+ assert payload["driver_result"]["status"] == "contract_error"
+ events = [
+ json.loads(line)
+ for line in events_path.read_text(encoding="utf-8").splitlines()
+ if line.strip()
+ ]
+ assert any(item.get("type") == "runner_exception" for item in events)
+
+
def test_zero_change_success_is_blocked(tmp_path: Path) -> None:
repo_root = tmp_path / "repo"
repo_root.mkdir()
@@ -105,3 +159,332 @@ def test_zero_change_success_is_blocked(tmp_path: Path) -> None:
assert summary.final_status == "blocked"
checks = {item.id: item for item in summary.validation.checks}
assert checks["builtin.nonempty_change_for_promote"].passed is False
+
+
+def test_openhands_environment_preflight_blocks_dirty_runtime_before_attempt(
+ tmp_path: Path,
+ monkeypatch,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True)
+ (repo_root / "src" / "base.py").write_text("x = 1\n", encoding="utf-8")
+
+ adapter = repo_root / "drivers" / "openhands_adapter.sh"
+ adapter.parent.mkdir(parents=True, exist_ok=True)
+ adapter.write_text(
+ """#!/usr/bin/env bash
+set -euo pipefail
+echo "adapter should not run" > "$AEP_WORKSPACE/should_not_exist.txt"
+cat > "$AEP_RESULT_PATH" <<'JSON'
+{
+ "protocol_version": "aep/v0",
+ "run_id": "run-preflight",
+ "agent_id": "openhands",
+ "attempt": 1,
+ "status": "succeeded",
+ "summary": "should not happen",
+ "changed_paths": ["should_not_exist.txt"],
+ "output_artifacts": [],
+ "metrics": {"duration_ms": 0, "steps": 0, "commands": 0, "prompt_tokens": null, "completion_tokens": null},
+ "recommended_action": "promote",
+ "error": null
+}
+JSON
+""",
+ encoding="utf-8",
+ )
+ adapter.chmod(0o755)
+ _write_manifest(repo_root, "openhands", "drivers/openhands_adapter.sh")
+
+ preflight = repo_root / "scripts" / "fake_preflight.sh"
+ preflight.parent.mkdir(parents=True, exist_ok=True)
+ preflight.write_text(
+ """#!/usr/bin/env bash
+set -euo pipefail
+echo "docker socket is stale for current user" >&2
+exit 1
+""",
+ encoding="utf-8",
+ )
+ preflight.chmod(0o755)
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+ monkeypatch.setenv("OPENHANDS_RUNTIME", "ai-lab")
+ monkeypatch.setenv("OPENHANDS_PREFLIGHT_CMD", f"bash {preflight}")
+
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-preflight",
+ agent_id="openhands",
+ task="demo",
+ )
+ )
+
+ events_path = tmp_path / "runtime" / "run-preflight" / "events.ndjson"
+ events = [
+ json.loads(line)
+ for line in events_path.read_text(encoding="utf-8").splitlines()
+ if line.strip()
+ ]
+
+ assert summary.final_status == "failed"
+ assert summary.driver_result.status == "contract_error"
+ assert summary.driver_result.error is not None
+ assert summary.driver_result.error.startswith("EnvironmentCheckFailed:")
+ assert not any(item.get("type") == "attempt_started" for item in events)
+ assert any(
+ item.get("type") == "attempt_blocked"
+ and item.get("reason") == "environment_preflight_failed"
+ for item in events
+ )
+
+
+def test_openhands_ai_lab_env_strips_ambient_path_overrides(tmp_path: Path, monkeypatch) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "ai_lab.env").write_text(
+ "WORKSPACE_DIR=/Volumes/AI_LAB/ai_lab/workspace\n"
+ "LOG_DIR=/Volumes/AI_LAB/ai_lab/logs\n",
+ encoding="utf-8",
+ )
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+ monkeypatch.setenv("OPENHANDS_RUNTIME", "ai-lab")
+ monkeypatch.setenv("ENV_FILE", "/tmp/foreign.env")
+ monkeypatch.setenv("OPENHANDS_ENV_FILE", "/tmp/foreign-openhands.env")
+ monkeypatch.setenv("WORKSPACE_DIR", "/Users/ai_lab/workspace")
+ monkeypatch.setenv("LOG_DIR", "/Users/ai_lab/logs")
+ monkeypatch.setenv("OPENHANDS_HOME_DIR", "/Users/ai_lab/logs/openhands-home")
+
+ env = runner._build_openhands_ai_lab_env()
+
+ assert env["ENV_FILE"] == str(repo_root / "ai_lab.env")
+ assert env["OPENHANDS_ENV_FILE"] == str(repo_root / "ai_lab.env")
+ assert "WORKSPACE_DIR" not in env
+ assert "LOG_DIR" not in env
+ assert "OPENHANDS_HOME_DIR" not in env
+
+
+def test_openhands_environment_preflight_uses_repo_managed_ai_lab_env(
+ tmp_path: Path,
+ monkeypatch,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "ai_lab.env").write_text(
+ "WORKSPACE_DIR=/Volumes/AI_LAB/ai_lab/workspace\n"
+ "LOG_DIR=/Volumes/AI_LAB/ai_lab/logs\n",
+ encoding="utf-8",
+ )
+ snapshot_path = repo_root / "preflight-env.txt"
+ preflight = repo_root / "scripts" / "fake_preflight.sh"
+ preflight.parent.mkdir(parents=True, exist_ok=True)
+ preflight.write_text(
+ f"""#!/usr/bin/env bash
+set -euo pipefail
+cat > "{snapshot_path}" < None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+
+ baseline_dir = tmp_path / "baseline"
+ workspace_dir = tmp_path / "workspace"
+ (baseline_dir / "apps" / "malu").mkdir(parents=True, exist_ok=True)
+ (workspace_dir / "apps" / "malu").mkdir(parents=True, exist_ok=True)
+ (baseline_dir / "tests" / "apps").mkdir(parents=True, exist_ok=True)
+ (workspace_dir / "tests" / "apps" / "__pycache__").mkdir(parents=True, exist_ok=True)
+ (workspace_dir / ".pytest_cache" / "v" / "cache").mkdir(parents=True, exist_ok=True)
+
+ (baseline_dir / "apps" / "malu" / "lead_capture.py").write_text(
+ "VALUE = 1\n",
+ encoding="utf-8",
+ )
+ (workspace_dir / "apps" / "malu" / "lead_capture.py").write_text(
+ "VALUE = 2\n",
+ encoding="utf-8",
+ )
+ (baseline_dir / "tests" / "apps" / "test_malu_landing_page.py").write_text(
+ "def test_placeholder():\n assert True\n",
+ encoding="utf-8",
+ )
+ (workspace_dir / "tests" / "apps" / "test_malu_landing_page.py").write_text(
+ "def test_placeholder():\n assert True\n",
+ encoding="utf-8",
+ )
+ (workspace_dir / "apps" / "malu" / "README.md").write_text("# draft\n", encoding="utf-8")
+ (workspace_dir / ".pytest_cache" / "README.md").write_text("cache\n", encoding="utf-8")
+ (workspace_dir / ".pytest_cache" / "v" / "cache" / "nodeids").write_text("[]\n", encoding="utf-8")
+ (workspace_dir / "tests" / "apps" / "__pycache__" / "test_malu_landing_page.cpython-314.pyc").write_bytes(
+ b"pyc"
+ )
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+ effective_policy = build_effective_policy(
+ ExecutionPolicy(allowed_paths=["apps/**", "tests/**"]),
+ ExecutionPolicy(allowed_paths=["apps/malu/**", "tests/apps/test_malu_landing_page.py"]),
+ )
+ changed_paths = runner._collect_changed_paths(baseline_dir, workspace_dir)
+ patch_text, filtered_paths, checks = runner._build_filtered_patch(
+ baseline_dir=baseline_dir,
+ workspace_dir=workspace_dir,
+ changed_paths=changed_paths,
+ driver_result=DriverResult(
+ run_id="run-benign-artifacts",
+ agent_id="mock",
+ attempt=1,
+ status="succeeded",
+ summary="ok",
+ changed_paths=[],
+ output_artifacts=[],
+ recommended_action="promote",
+ ),
+ policy=effective_policy,
+ )
+
+ check_map = {item.id: item for item in checks}
+
+ assert filtered_paths == ["apps/malu/lead_capture.py"]
+ assert check_map["builtin.allowed_paths"].passed is True
+ assert check_map["builtin.no_runtime_artifacts"].passed is True
+ assert check_map["builtin.max_changed_files"].passed is True
+ assert "apps/malu/README.md" not in patch_text
+ assert ".pytest_cache" not in patch_text
+ assert "__pycache__" not in patch_text
+
+
+def test_retry_attempt_receives_raw_validator_feedback(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True)
+ (repo_root / "src" / "retry_target.py").write_text("VALUE = 'seed'\\n", encoding="utf-8")
+
+ adapter = repo_root / "drivers" / "retry_feedback_adapter.py"
+ adapter.parent.mkdir(parents=True, exist_ok=True)
+ adapter.write_text(
+ """#!/usr/bin/env python3
+import json
+import os
+from pathlib import Path
+
+workspace = Path(os.environ["AEP_WORKSPACE"])
+result_path = Path(os.environ["AEP_RESULT_PATH"])
+job = json.loads(Path(os.environ["AEP_JOB_SPEC"]).read_text(encoding="utf-8"))
+attempt = int(os.environ["AEP_ATTEMPT"])
+target = workspace / "src" / "retry_target.py"
+
+if attempt == 1:
+ target.write_text("raise TypeError('Invalid args for response field!')\\n", encoding="utf-8")
+else:
+ task = job["task"]
+ if "Invalid args for response field!" not in task:
+ raise SystemExit("retry feedback missing raw validator detail")
+ target.write_text("VALUE = 'fixed'\\n", encoding="utf-8")
+
+payload = {
+ "protocol_version": "aep/v0",
+ "run_id": "run-retry-feedback",
+ "agent_id": "openhands",
+ "attempt": attempt,
+ "status": "succeeded",
+ "summary": f"attempt {attempt} completed",
+ "changed_paths": ["src/retry_target.py"],
+ "output_artifacts": [],
+ "metrics": {"duration_ms": 0, "steps": 0, "commands": 0, "prompt_tokens": None, "completion_tokens": None},
+ "recommended_action": "promote",
+ "error": None,
+}
+result_path.write_text(json.dumps(payload), encoding="utf-8")
+""",
+ encoding="utf-8",
+ )
+ adapter.chmod(0o755)
+ _write_manifest(repo_root, "openhands", "drivers/retry_feedback_adapter.py")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-retry-feedback",
+ agent_id="openhands",
+ task="Fix src/retry_target.py.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} src/retry_target.py",
+ )
+ ],
+ fallback=[FallbackStep(action="retry", max_attempts=1)],
+ policy=ExecutionPolicy(
+ allowed_paths=["src/retry_target.py"],
+ cleanup_on_success=False,
+ ),
+ metadata={"pipeline_target": "patch"},
+ )
+ )
+
+ job_payload = json.loads(
+ (tmp_path / "runtime" / "run-retry-feedback" / "job.json").read_text(encoding="utf-8")
+ )
+
+ assert summary.final_status == "ready_for_promotion"
+ assert summary.driver_result.attempt == 2
+ assert "Invalid args for response field!" in job_payload["task"]
diff --git a/tests/test_autoresearch_planner.py b/tests/test_autoresearch_planner.py
new file mode 100644
index 00000000..c861369a
--- /dev/null
+++ b/tests/test_autoresearch_planner.py
@@ -0,0 +1,564 @@
+from __future__ import annotations
+
+import os
+from pathlib import Path
+import subprocess
+from urllib.parse import parse_qs, urlparse
+
+import pytest
+from fastapi.testclient import TestClient
+
+from autoresearch.agent_protocol.models import DriverResult, JobSpec, RunSummary, ValidationReport
+from autoresearch.api.dependencies import (
+ get_autoresearch_planner_service,
+ get_panel_access_service,
+ get_telegram_notifier_service,
+)
+from autoresearch.api.main import app
+from autoresearch.core.services.autoresearch_planner import AutoResearchPlannerService
+from autoresearch.core.services.panel_access import PanelAccessService
+from autoresearch.shared.autoresearch_planner_contract import (
+ AutoResearchPlanDispatchStatus,
+ AutoResearchPlannerRequest,
+ UpstreamWatchDecision,
+ UpstreamWatchRead,
+)
+from autoresearch.shared.models import JobStatus
+from autoresearch.shared.store import InMemoryRepository
+
+
+def _write(path: Path, content: str) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(content, encoding="utf-8")
+
+
+class StubTelegramNotifier:
+ def __init__(self, *, send_results: list[bool] | None = None) -> None:
+ self.messages: list[dict[str, object]] = []
+ self._send_results = list(send_results or [])
+
+ @property
+ def enabled(self) -> bool:
+ return True
+
+ def send_message(
+ self,
+ *,
+ chat_id: str,
+ text: str,
+ disable_web_page_preview: bool = True,
+ reply_markup: dict[str, object] | None = None,
+ ) -> bool:
+ self.messages.append(
+ {
+ "chat_id": chat_id,
+ "text": text,
+ "disable_web_page_preview": disable_web_page_preview,
+ "reply_markup": reply_markup,
+ }
+ )
+ if self._send_results:
+ return self._send_results.pop(0)
+ return True
+
+
+class StubUpstreamWatcher:
+ def __init__(self, result: UpstreamWatchRead) -> None:
+ self._result = result
+ self.calls = 0
+
+ def inspect(self) -> UpstreamWatchRead:
+ self.calls += 1
+ return self._result
+
+
+def _successful_run_summary(job: JobSpec) -> RunSummary:
+ return RunSummary(
+ run_id=job.run_id,
+ final_status="ready_for_promotion",
+ driver_result=DriverResult(
+ run_id=job.run_id,
+ agent_id=job.agent_id,
+ status="succeeded",
+ summary="worker completed successfully",
+ changed_paths=list(job.policy.allowed_paths),
+ recommended_action="promote",
+ ),
+ validation=ValidationReport(run_id=job.run_id, passed=True),
+ promotion_patch_uri="/tmp/autoresearch.patch",
+ )
+
+
+def test_planner_selects_high_signal_marker_and_emits_worker_specs(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ _write(
+ repo_root / "src" / "autoresearch" / "core" / "services" / "demo_service.py",
+ "\n".join(
+ [
+ "def handle() -> str:",
+ " # FIXME: normalize promotion preflight before returning",
+ " return 'ok'",
+ "",
+ ]
+ ),
+ )
+ _write(
+ repo_root / "src" / "misc.py",
+ "\n".join(
+ [
+ "def noop() -> None:",
+ " # TODO: clean this up later",
+ " return None",
+ "",
+ ]
+ ),
+ )
+
+ service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ )
+
+ plan = service.create(AutoResearchPlannerRequest())
+
+ assert plan.status is JobStatus.COMPLETED
+ assert plan.selected_candidate is not None
+ assert plan.selected_candidate.source_path == "src/autoresearch/core/services/demo_service.py"
+ assert plan.worker_spec is not None
+ assert plan.controlled_request is not None
+ assert plan.agent_job is not None
+ assert plan.worker_spec.allowed_paths == [
+ "src/autoresearch/core/services/demo_service.py",
+ "tests/test_demo_service.py",
+ ]
+ assert plan.worker_spec.test_command == "pytest -q tests/test_demo_service.py"
+ assert plan.controlled_request.backend.value == "openhands_cli"
+ assert plan.controlled_request.pipeline_target.value == "draft_pr"
+ assert plan.agent_job.mode == "patch_only"
+ assert plan.agent_job.metadata["planner_candidate_id"] == plan.selected_candidate.candidate_id
+ assert "FIXME" in plan.selected_candidate.title
+
+
+def test_planner_falls_back_to_test_gap_when_repo_has_no_markers(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ large_body = "\n".join([f"def fn_{index}() -> int:\n return {index}\n" for index in range(60)])
+ _write(repo_root / "src" / "autoresearch" / "core" / "services" / "large_module.py", large_body)
+
+ service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ )
+
+ plan = service.create(AutoResearchPlannerRequest(max_candidates=3, pipeline_target="patch"))
+
+ assert plan.status is JobStatus.COMPLETED
+ assert plan.selected_candidate is not None
+ assert plan.selected_candidate.category == "test_gap"
+ assert plan.selected_candidate.source_path == "src/autoresearch/core/services/large_module.py"
+ assert plan.worker_spec is not None
+ assert plan.worker_spec.pipeline_target == "patch"
+ assert plan.worker_spec.allowed_paths[-1] == "tests/test_large_module.py"
+
+
+def test_planner_records_optional_upstream_watch_result(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir(parents=True)
+ watcher = StubUpstreamWatcher(
+ UpstreamWatchRead(
+ upstream_url="https://github.com/openclaw/openclaw.git",
+ decision=UpstreamWatchDecision.SKIP,
+ summary="Recent upstream changes remain in non-core areas (LINE, Zalo); auto-skipped.",
+ focus_areas=["extension:line", "extension:zalo"],
+ cleaned_up=True,
+ )
+ )
+
+ service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ upstream_watcher=watcher,
+ )
+
+ plan = service.create(AutoResearchPlannerRequest(include_upstream_watch=True))
+
+ assert watcher.calls == 1
+ assert plan.upstream_watch is not None
+ assert plan.upstream_watch.decision is UpstreamWatchDecision.SKIP
+ assert "Upstream watcher auto-skipped merge noise" in plan.summary
+
+
+@pytest.fixture
+def autoresearch_plan_client(tmp_path: Path) -> TestClient:
+ repo_root = tmp_path / "repo"
+ _write(
+ repo_root / "src" / "autoresearch" / "core" / "services" / "planner_target.py",
+ "\n".join(
+ [
+ "def check() -> bool:",
+ " # FIXME: add strict regression coverage",
+ " return True",
+ "",
+ ]
+ ),
+ )
+ service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ )
+ panel_access = PanelAccessService(
+ secret="panel-secret",
+ telegram_bot_token="123456:TEST_BOT_TOKEN",
+ telegram_init_data_max_age_seconds=900,
+ base_url="https://panel.example/api/v1/panel/view",
+ mini_app_url="https://panel.example/api/v1/panel/view",
+ allowed_uids={"10001"},
+ )
+ notifier = StubTelegramNotifier()
+
+ app.dependency_overrides[get_autoresearch_planner_service] = lambda: service
+ app.dependency_overrides[get_panel_access_service] = lambda: panel_access
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+ with TestClient(app) as client:
+ setattr(client, "_planner", service)
+ setattr(client, "_notifier", notifier)
+ yield client
+ app.dependency_overrides.clear()
+
+
+def test_autoresearch_plan_api_round_trip(autoresearch_plan_client: TestClient) -> None:
+ notifier = getattr(autoresearch_plan_client, "_notifier")
+ response = autoresearch_plan_client.post(
+ "/api/v1/autoresearch/plans",
+ json={
+ "goal": "Find the next safe promotion candidate.",
+ "max_candidates": 2,
+ "pipeline_target": "patch",
+ },
+ )
+
+ assert response.status_code == 202
+ payload = response.json()
+ assert payload["status"] == "completed"
+ assert payload["selected_candidate"]["source_path"] == "src/autoresearch/core/services/planner_target.py"
+ assert payload["worker_spec"]["pipeline_target"] == "patch"
+ assert payload["telegram_uid"] == "10001"
+ assert payload["notification_sent"] is True
+ parsed = urlparse(payload["panel_action_url"])
+ assert parsed.netloc == "panel.example"
+ query = parse_qs(parsed.query)
+ assert query["planId"] == [payload["plan_id"]]
+ assert "token" in query
+ assert notifier.messages[0]["chat_id"] == "10001"
+ assert "AutoResearch 发现新优化点" in str(notifier.messages[0]["text"])
+ assert notifier.messages[0]["reply_markup"] == {
+ "inline_keyboard": [[{"text": "打开 Mini App 审批", "web_app": {"url": payload["panel_action_url"]}}]]
+ }
+
+ list_response = autoresearch_plan_client.get("/api/v1/autoresearch/plans")
+ assert list_response.status_code == 200
+ items = list_response.json()
+ assert len(items) == 1
+ plan_id = items[0]["plan_id"]
+
+ get_response = autoresearch_plan_client.get(f"/api/v1/autoresearch/plans/{plan_id}")
+ assert get_response.status_code == 200
+ assert get_response.json()["plan_id"] == plan_id
+
+
+def test_autoresearch_plan_api_falls_back_to_text_only_notify_when_panel_url_is_not_https(
+ tmp_path: Path,
+) -> None:
+ repo_root = tmp_path / "repo"
+ _write(
+ repo_root / "src" / "autoresearch" / "core" / "services" / "planner_target.py",
+ "\n".join(
+ [
+ "def check() -> bool:",
+ " # FIXME: add strict regression coverage",
+ " return True",
+ "",
+ ]
+ ),
+ )
+ service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ )
+ panel_access = PanelAccessService(
+ secret="panel-secret",
+ telegram_bot_token="123456:TEST_BOT_TOKEN",
+ telegram_init_data_max_age_seconds=900,
+ base_url="http://127.0.0.1:8000/api/v1/panel/view",
+ allowed_uids={"10001"},
+ )
+ notifier = StubTelegramNotifier()
+
+ app.dependency_overrides[get_autoresearch_planner_service] = lambda: service
+ app.dependency_overrides[get_panel_access_service] = lambda: panel_access
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+ with TestClient(app) as client:
+ response = client.post(
+ "/api/v1/autoresearch/plans",
+ json={
+ "goal": "Find the next safe promotion candidate.",
+ "max_candidates": 2,
+ "pipeline_target": "patch",
+ },
+ )
+ app.dependency_overrides.clear()
+
+ assert response.status_code == 202
+ payload = response.json()
+ assert payload["notification_sent"] is True
+ assert notifier.messages[0]["reply_markup"] is None
+
+
+def test_autoresearch_plan_api_falls_back_to_url_button_when_web_app_send_is_rejected(
+ tmp_path: Path,
+) -> None:
+ repo_root = tmp_path / "repo"
+ _write(
+ repo_root / "src" / "autoresearch" / "core" / "services" / "planner_target.py",
+ "\n".join(
+ [
+ "def check() -> bool:",
+ " # FIXME: add strict regression coverage",
+ " return True",
+ "",
+ ]
+ ),
+ )
+ service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ )
+ panel_access = PanelAccessService(
+ secret="panel-secret",
+ telegram_bot_token="123456:TEST_BOT_TOKEN",
+ telegram_init_data_max_age_seconds=900,
+ base_url="https://panel.example/api/v1/panel/view",
+ mini_app_url="https://panel.example/api/v1/panel/view",
+ allowed_uids={"10001"},
+ )
+ notifier = StubTelegramNotifier(send_results=[False, True])
+
+ app.dependency_overrides[get_autoresearch_planner_service] = lambda: service
+ app.dependency_overrides[get_panel_access_service] = lambda: panel_access
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+ with TestClient(app) as client:
+ response = client.post(
+ "/api/v1/autoresearch/plans",
+ json={
+ "goal": "Find the next safe promotion candidate.",
+ "max_candidates": 2,
+ },
+ )
+ app.dependency_overrides.clear()
+
+ assert response.status_code == 202
+ payload = response.json()
+ assert payload["notification_sent"] is True
+ assert len(notifier.messages) == 2
+ assert notifier.messages[0]["reply_markup"] == {
+ "inline_keyboard": [[{"text": "打开 Mini App 审批", "web_app": {"url": payload["panel_action_url"]}}]]
+ }
+ assert notifier.messages[1]["reply_markup"] == {
+ "inline_keyboard": [[{"text": "打开 Panel 审批", "url": payload["panel_action_url"]}]]
+ }
+ assert "Mini App 审批执行" in str(notifier.messages[0]["text"])
+ assert "Panel 审批执行" in str(notifier.messages[1]["text"])
+
+
+def test_autoresearch_plan_api_uses_secure_url_button_when_only_https_panel_url_is_available(
+ tmp_path: Path,
+) -> None:
+ repo_root = tmp_path / "repo"
+ _write(
+ repo_root / "src" / "autoresearch" / "core" / "services" / "planner_target.py",
+ "\n".join(
+ [
+ "def check() -> bool:",
+ " # FIXME: add strict regression coverage",
+ " return True",
+ "",
+ ]
+ ),
+ )
+ service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ )
+ panel_access = PanelAccessService(
+ secret="panel-secret",
+ telegram_bot_token="123456:TEST_BOT_TOKEN",
+ telegram_init_data_max_age_seconds=900,
+ base_url="https://panel.example/api/v1/panel/view",
+ allowed_uids={"10001"},
+ )
+ notifier = StubTelegramNotifier()
+
+ app.dependency_overrides[get_autoresearch_planner_service] = lambda: service
+ app.dependency_overrides[get_panel_access_service] = lambda: panel_access
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+ with TestClient(app) as client:
+ response = client.post(
+ "/api/v1/autoresearch/plans",
+ json={
+ "goal": "Find the next safe promotion candidate.",
+ "max_candidates": 2,
+ "pipeline_target": "patch",
+ },
+ )
+ app.dependency_overrides.clear()
+
+ assert response.status_code == 202
+ payload = response.json()
+ parsed = urlparse(payload["panel_action_url"])
+ query = parse_qs(parsed.query)
+ assert query["planId"] == [payload["plan_id"]]
+ assert "token" in query
+ assert notifier.messages[0]["reply_markup"] == {
+ "inline_keyboard": [[{"text": "打开 Panel 审批", "url": payload["panel_action_url"]}]]
+ }
+
+
+def test_planner_dispatch_lifecycle_records_run_summary(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ _write(
+ repo_root / "src" / "autoresearch" / "core" / "services" / "dispatch_target.py",
+ "\n".join(
+ [
+ "def check() -> bool:",
+ " # FIXME: dispatch this through the worker",
+ " return True",
+ "",
+ ]
+ ),
+ )
+ service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ dispatch_runner=_successful_run_summary,
+ )
+
+ plan = service.create(AutoResearchPlannerRequest(telegram_uid="10001"))
+ queued = service.request_dispatch(plan.plan_id, requested_by="10001")
+ dispatched = service.execute_dispatch(plan.plan_id)
+
+ assert queued.dispatch_status is AutoResearchPlanDispatchStatus.DISPATCHING
+ assert queued.dispatch_run is not None
+ assert queued.dispatch_run.status.value == "queued"
+ assert queued.dispatch_run.lane.value == "local"
+ assert dispatched.dispatch_status is AutoResearchPlanDispatchStatus.DISPATCHED
+ assert dispatched.dispatch_requested_by == "10001"
+ assert dispatched.dispatch_completed_at is not None
+ assert dispatched.dispatch_run is not None
+ assert dispatched.dispatch_run.status.value == "succeeded"
+ assert dispatched.run_summary is not None
+ assert dispatched.run_summary.final_status == "ready_for_promotion"
+ assert dispatched.run_summary.promotion_patch_uri == "/tmp/autoresearch.patch"
+
+
+def test_request_dispatch_records_remote_fallback_preview(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ _write(
+ repo_root / "src" / "autoresearch" / "core" / "services" / "fallback_target.py",
+ "def check() -> bool:\n # FIXME: run through fallback preview\n return True\n",
+ )
+ service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ dispatch_runner=_successful_run_summary,
+ )
+
+ plan = service.create(
+ AutoResearchPlannerRequest(
+ telegram_uid="10001",
+ metadata={"runtime_mode": "night", "remote_available": False},
+ )
+ )
+ queued = service.request_dispatch(plan.plan_id, requested_by="10001")
+
+ assert queued.dispatch_run is not None
+ assert queued.dispatch_run.requested_lane.value == "remote"
+ assert queued.dispatch_run.lane.value == "local"
+ assert queued.dispatch_run.fallback_reason is not None
+
+
+def test_autoresearch_plan_api_sends_low_noise_upstream_skip_report(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir(parents=True)
+ service = AutoResearchPlannerService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ upstream_watcher=StubUpstreamWatcher(
+ UpstreamWatchRead(
+ upstream_url="https://github.com/openclaw/openclaw.git",
+ decision=UpstreamWatchDecision.SKIP,
+ summary="Recent upstream changes remain in non-core areas (LINE, Zalo); auto-skipped.",
+ focus_areas=["extension:line", "extension:zalo"],
+ cleaned_up=True,
+ )
+ ),
+ )
+ panel_access = PanelAccessService(
+ secret="panel-secret",
+ telegram_bot_token="123456:TEST_BOT_TOKEN",
+ telegram_init_data_max_age_seconds=900,
+ base_url="https://panel.example/api/v1/panel/view",
+ allowed_uids={"10001"},
+ )
+ notifier = StubTelegramNotifier()
+
+ app.dependency_overrides[get_autoresearch_planner_service] = lambda: service
+ app.dependency_overrides[get_panel_access_service] = lambda: panel_access
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+ with TestClient(app) as client:
+ response = client.post(
+ "/api/v1/autoresearch/plans",
+ json={
+ "goal": "Scan planner backlog and upstream noise.",
+ "include_upstream_watch": True,
+ },
+ )
+ app.dependency_overrides.clear()
+
+ assert response.status_code == 202
+ payload = response.json()
+ assert payload["selected_candidate"] is None
+ assert payload["upstream_watch"]["decision"] == "skip"
+ assert payload["notification_sent"] is True
+ assert payload["panel_action_url"] is None
+ assert len(notifier.messages) == 1
+ assert "已完成上游巡检" in str(notifier.messages[0]["text"])
+ assert "LINE/Zalo" in str(notifier.messages[0]["text"])
+
+
+def _git(repo: Path, *args: str, cwd: Path | None = None) -> str:
+ env = os.environ.copy()
+ env.update(
+ {
+ "GIT_AUTHOR_NAME": "Codex Tests",
+ "GIT_AUTHOR_EMAIL": "codex-tests@example.com",
+ "GIT_COMMITTER_NAME": "Codex Tests",
+ "GIT_COMMITTER_EMAIL": "codex-tests@example.com",
+ }
+ )
+ completed = subprocess.run(
+ ["git", *args],
+ cwd=str(cwd or repo),
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ assert completed.returncode == 0, completed.stderr or completed.stdout
+ return completed.stdout.strip()
+
+
+def _commit(repo: Path, rel_path: str, content: str, message: str) -> None:
+ target = repo / rel_path
+ target.parent.mkdir(parents=True, exist_ok=True)
+ target.write_text(content, encoding="utf-8")
+ _git(repo, "add", rel_path)
+ _git(repo, "commit", "-m", message)
diff --git a/tests/test_doctor_linux.py b/tests/test_doctor_linux.py
new file mode 100644
index 00000000..27d8974b
--- /dev/null
+++ b/tests/test_doctor_linux.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+
+def _load_doctor_module():
+ script_path = Path(__file__).resolve().parents[1] / "scripts" / "doctor.py"
+ spec = importlib.util.spec_from_file_location("doctor_script", script_path)
+ assert spec is not None
+ assert spec.loader is not None
+ module = importlib.util.module_from_spec(spec)
+ sys.modules[spec.name] = module
+ spec.loader.exec_module(module)
+ return module
+
+
+def test_linux_runtime_mode_warns_when_unset(tmp_path, monkeypatch):
+ doctor = _load_doctor_module()
+ monkeypatch.delenv("OPENHANDS_RUNTIME", raising=False)
+
+ result = doctor._check_linux_runtime_mode(tmp_path, "linux-remote")
+
+ assert result is not None
+ assert result.status == "WARN"
+ assert "OPENHANDS_RUNTIME is not set" in result.detail
+ assert "OPENHANDS_RUNTIME=host" in (result.hint or "")
+
+
+def test_linux_docker_host_warns_for_mac_colima(monkeypatch):
+ doctor = _load_doctor_module()
+ monkeypatch.setenv("DOCKER_HOST", "unix:///Users/demo/.colima/default/docker.sock")
+
+ result = doctor._check_linux_docker_host("linux-remote")
+
+ assert result is not None
+ assert result.status == "WARN"
+ assert "colima" in result.detail
+
+
+def test_linux_runtime_paths_pass_when_runtime_dirs_are_writable(tmp_path):
+ doctor = _load_doctor_module()
+
+ result = doctor._check_linux_runtime_paths(tmp_path, "linux-remote")
+
+ assert result is not None
+ assert result.status == "PASS"
diff --git a/tests/test_failure_classifier.py b/tests/test_failure_classifier.py
new file mode 100644
index 00000000..391b0e9c
--- /dev/null
+++ b/tests/test_failure_classifier.py
@@ -0,0 +1,114 @@
+from __future__ import annotations
+
+from autoresearch.agent_protocol.models import DriverResult, RunSummary, ValidationCheck, ValidationReport
+from autoresearch.core.dispatch.failure_classifier import classify_remote_status, classify_run_summary
+from autoresearch.shared.remote_run_contract import FailureClass, RecoveryAction, RemoteRunStatus
+
+
+def _summary(
+ *,
+ driver_status: str,
+ final_status: str,
+ agent_id: str = "openhands",
+ validation_passed: bool = True,
+ error: str | None = None,
+) -> RunSummary:
+ checks = []
+ if not validation_passed:
+ checks.append(ValidationCheck(id="worker.test_command", passed=False, detail="assertion failed"))
+ return RunSummary(
+ run_id="run-classifier",
+ final_status=final_status,
+ driver_result=DriverResult(
+ run_id="run-classifier",
+ agent_id=agent_id,
+ status=driver_status,
+ summary="classifier probe",
+ changed_paths=["src/demo.py"],
+ recommended_action="fallback" if agent_id == "mock" else "human_review",
+ error=error,
+ ),
+ validation=ValidationReport(
+ run_id="run-classifier",
+ passed=validation_passed,
+ checks=checks,
+ ),
+ )
+
+
+def test_classifier_maps_stalled_executor_to_retry() -> None:
+ disposition = classify_run_summary(
+ _summary(driver_status="stalled_no_progress", final_status="failed", error="no workspace progress")
+ )
+ assert disposition.failure_class is FailureClass.EXECUTOR_STALLED
+ assert disposition.recovery_action is RecoveryAction.RETRY
+
+
+def test_classifier_maps_timeout_to_retry() -> None:
+ disposition = classify_run_summary(_summary(driver_status="timed_out", final_status="failed"))
+ assert disposition.failure_class is FailureClass.TOOL_TIMEOUT
+ assert disposition.recovery_action is RecoveryAction.RETRY
+
+
+def test_classifier_maps_env_missing_to_abort() -> None:
+ disposition = classify_run_summary(
+ _summary(
+ driver_status="contract_error",
+ final_status="failed",
+ error="EnvironmentCheckFailed: launch_ai_lab.sh not found at scripts/launch_ai_lab.sh",
+ )
+ )
+ assert disposition.failure_class is FailureClass.ENV_MISSING
+ assert disposition.recovery_action is RecoveryAction.ABORT
+
+
+def test_classifier_maps_workspace_dirty_to_abort() -> None:
+ disposition = classify_run_summary(
+ _summary(
+ driver_status="contract_error",
+ final_status="failed",
+ error="repository worktree is not clean; promotion requires a clean base",
+ )
+ )
+ assert disposition.failure_class is FailureClass.WORKSPACE_DIRTY
+ assert disposition.recovery_action is RecoveryAction.ABORT
+
+
+def test_classifier_maps_mock_success_to_downgrade() -> None:
+ disposition = classify_run_summary(
+ _summary(
+ driver_status="succeeded",
+ final_status="ready_for_promotion",
+ agent_id="mock",
+ )
+ )
+ assert disposition.failure_class is FailureClass.MODEL_FALLBACK
+ assert disposition.recovery_action is RecoveryAction.DOWNGRADE_TO_DRAFT
+
+
+def test_classifier_maps_mock_validation_failure_to_human_review() -> None:
+ disposition = classify_run_summary(
+ _summary(
+ driver_status="failed",
+ final_status="human_review",
+ agent_id="mock",
+ validation_passed=False,
+ )
+ )
+ assert disposition.failure_class is FailureClass.ASSERTION_FAILED_AFTER_FALLBACK
+ assert disposition.recovery_action is RecoveryAction.REQUIRE_HUMAN_REVIEW
+
+
+def test_classifier_maps_planner_stall_to_human_review() -> None:
+ disposition = classify_remote_status(RemoteRunStatus.STALLED, stage="planner")
+ assert disposition.failure_class is FailureClass.PLANNER_STALLED
+ assert disposition.recovery_action is RecoveryAction.REQUIRE_HUMAN_REVIEW
+
+
+def test_classifier_maps_transient_network_to_retry() -> None:
+ disposition = classify_remote_status(
+ RemoteRunStatus.FAILED,
+ error_text="ssh: connection reset by peer",
+ )
+ assert disposition.failure_class is FailureClass.TRANSIENT_NETWORK
+ assert disposition.recovery_action is RecoveryAction.RETRY
diff --git a/tests/test_gateway_telegram.py b/tests/test_gateway_telegram.py
index 54c934df..e66310d4 100644
--- a/tests/test_gateway_telegram.py
+++ b/tests/test_gateway_telegram.py
@@ -13,6 +13,9 @@
get_approval_store_service,
get_capability_provider_registry,
get_claude_agent_service,
+ get_github_issue_service,
+ get_housekeeper_service,
+ get_manager_agent_service,
get_openclaw_memory_service,
get_openclaw_compat_service,
get_panel_access_service,
@@ -20,14 +23,20 @@
)
from autoresearch.api.main import app
from autoresearch.api.routers import gateway_telegram
+from autoresearch.agent_protocol.models import DriverResult, RunSummary, ValidationReport
+from autoresearch.agents.manager_agent import ManagerAgentService
from autoresearch.core.adapters import CapabilityProviderDescriptorRead, CapabilityProviderRegistry
from autoresearch.core.adapters.contracts import CapabilityDomain, SkillCatalogRead
from autoresearch.core.services.admin_config import AdminConfigService
from autoresearch.core.services.approval_store import ApprovalStoreService
from autoresearch.core.services.claude_agents import ClaudeAgentService
+from autoresearch.core.services.github_issue_service import GitHubIssueCommentRead, GitHubIssueRead, GitHubIssueReference
+from autoresearch.core.services.housekeeper import HousekeeperService
from autoresearch.core.services.openclaw_compat import OpenClawCompatService
from autoresearch.core.services.openclaw_memory import OpenClawMemoryService
from autoresearch.core.services.panel_access import PanelAccessService
+from autoresearch.shared.housekeeper_contract import HousekeeperChangeReason, HousekeeperMode, HousekeeperModeUpdateRequest
+from autoresearch.shared.manager_agent_contract import ManagerDispatchRead
from autoresearch.shared.models import (
AdminAgentConfigRead,
AdminChannelConfigRead,
@@ -37,8 +46,11 @@
ApprovalRequestCreateRequest,
OpenClawMemoryRecordRead,
OpenClawSessionRead,
+ PromotionDiffStats,
+ PromotionResult,
+ utc_now,
)
-from autoresearch.shared.store import SQLiteModelRepository
+from autoresearch.shared.store import InMemoryRepository, SQLiteModelRepository
class _StubSkillProvider:
@@ -81,6 +93,23 @@ def get_skill(self, skill_name: str):
return OpenClawSkillDetailRead(**self._skill, content="# Daily Brief\nUse this skill.\n")
+def _night_housekeeper_service() -> HousekeeperService:
+ service = HousekeeperService(
+ state_repository=InMemoryRepository(),
+ budget_repository=InMemoryRepository(),
+ exploration_repository=InMemoryRepository(),
+ )
+ service.update_mode(
+ HousekeeperModeUpdateRequest(
+ action="set_manual_override",
+ target_mode=HousekeeperMode.NIGHT_READONLY_EXPLORE,
+ changed_by="test",
+ reason=HousekeeperChangeReason.MANUAL_API,
+ )
+ )
+ return service
+
+
class _StubTelegramNotifier:
def __init__(self) -> None:
self.status_events: list[dict[str, str]] = []
@@ -127,6 +156,77 @@ def notify_manual_action(self, *, chat_id: str, entry: object, run_status: str)
return True
+class _StubGitHubIssueService:
+ def __init__(self) -> None:
+ self.comments: list[dict[str, str]] = []
+
+ def fetch_issue(self, raw_reference: str) -> GitHubIssueRead:
+ return GitHubIssueRead(
+ reference=GitHubIssueReference(owner="owner", repo="repo", number=123),
+ title="Audit trail crashes when comment body is empty",
+ body="Steps:\n1. Trigger the task.\n2. Observe the failure.\n\nExpected: dispatch succeeds.",
+ url="https://github.com/owner/repo/issues/123",
+ state="OPEN",
+ author="founder",
+ labels=("bug", "telegram"),
+ comments=(
+ GitHubIssueCommentRead(author="reviewer", body="Please keep the fix scoped and tested."),
+ ),
+ )
+
+ def build_manager_prompt(self, issue: GitHubIssueRead, *, operator_note: str | None = None) -> str:
+ note = operator_note or ""
+ return f"Fix GitHub issue {issue.reference.display}. {note}".strip()
+
+ def post_comment(self, raw_reference: str, body: str) -> str:
+ self.comments.append({"issue_reference": raw_reference, "body": body})
+ return f"commented on {raw_reference}"
+
+
+def _build_manager_service(db_path: Path) -> ManagerAgentService:
+ repository = SQLiteModelRepository(
+ db_path=db_path,
+ table_name="manager_agent_dispatches_gateway_it",
+ model_cls=ManagerDispatchRead,
+ )
+
+ def _dispatch_runner(job_spec) -> RunSummary:
+ return RunSummary(
+ run_id=job_spec.run_id,
+ final_status="ready_for_promotion",
+ driver_result=DriverResult(
+ run_id=job_spec.run_id,
+ agent_id=job_spec.agent_id,
+ status="succeeded",
+ summary="stub manager runner completed",
+ changed_paths=["src/autoresearch/api/routers/admin.py"],
+ recommended_action="promote",
+ ),
+ validation=ValidationReport(run_id=job_spec.run_id, passed=True),
+ promotion_patch_uri="artifacts/promotion.patch",
+ promotion=None,
+ ).model_copy(
+ update={
+ "promotion": PromotionResult(
+ run_id=job_spec.run_id,
+ success=True,
+ mode="draft_pr",
+ pr_url=f"https://github.com/owner/repo/pull/{job_spec.run_id[-3:]}",
+ changed_files=["src/autoresearch/api/routers/admin.py"],
+ diff_stats=PromotionDiffStats(files_changed=1, insertions=12, deletions=2),
+ created_at=utc_now(),
+ updated_at=utc_now(),
+ )
+ }
+ )
+
+ return ManagerAgentService(
+ repository=repository,
+ repo_root=Path(__file__).resolve().parents[1],
+ dispatch_runner=_dispatch_runner,
+ )
+
+
@pytest.fixture
def telegram_client(tmp_path: Path) -> TestClient:
db_path = tmp_path / "telegram-gateway.sqlite3"
@@ -716,6 +816,8 @@ def test_telegram_help_command_returns_available_commands(
help_text = notifier.messages[0]["text"]
assert "[Telegram Commands]" in help_text
assert "/status" in help_text
+ assert "/task <需求>" in help_text
+ assert "/task --approve <需求>" in help_text
assert "/approve approve" in help_text
assert "/memory <内容>" in help_text
assert "/mode shared" in help_text
@@ -724,6 +826,240 @@ def test_telegram_help_command_returns_available_commands(
app.dependency_overrides.pop(get_telegram_notifier_service, None)
+def test_telegram_start_command_returns_available_commands(
+ telegram_client: TestClient,
+) -> None:
+ notifier = _StubTelegramNotifier()
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+
+ try:
+ response = telegram_client.post(
+ "/api/v1/gateway/telegram/webhook",
+ json={
+ "update_id": 3152,
+ "message": {
+ "message_id": 151,
+ "text": "/start",
+ "chat": {"id": 9535, "type": "private"},
+ "from": {"id": 9535, "username": "start-user"},
+ },
+ },
+ )
+ assert response.status_code == 200
+ payload = response.json()
+ assert payload["accepted"] is True
+ assert payload["agent_run_id"] is None
+ assert payload["metadata"]["source"] == "telegram_help"
+
+ assert len(notifier.messages) == 1
+ help_text = notifier.messages[0]["text"]
+ assert "[Telegram Commands]" in help_text
+ assert "/start 查看欢迎信息和命令列表" in help_text
+ assert "/status" in help_text
+ assert "/help" in help_text
+ finally:
+ app.dependency_overrides.pop(get_telegram_notifier_service, None)
+
+
+def test_telegram_task_issue_dispatches_manager_and_queues_issue_reply_approval(
+ telegram_client: TestClient,
+ tmp_path: Path,
+) -> None:
+ notifier = _StubTelegramNotifier()
+ github_issue_service = _StubGitHubIssueService()
+ manager_service = _build_manager_service(tmp_path / "manager.sqlite3")
+ approval_service = getattr(telegram_client, "_approval_store")
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+ app.dependency_overrides[get_github_issue_service] = lambda: github_issue_service
+ app.dependency_overrides[get_housekeeper_service] = _night_housekeeper_service
+ app.dependency_overrides[get_manager_agent_service] = lambda: manager_service
+
+ try:
+ response = telegram_client.post(
+ "/api/v1/gateway/telegram/webhook",
+ json={
+ "update_id": 3161,
+ "message": {
+ "message_id": 154,
+ "text": "/task issue owner/repo#123 优先检查 Telegram 审批回路",
+ "chat": {"id": 9537, "type": "private"},
+ "from": {"id": 9537, "username": "task-user"},
+ },
+ },
+ )
+ assert response.status_code == 200
+ payload = response.json()
+ assert payload["accepted"] is True
+ assert payload["agent_run_id"] is None
+ assert payload["metadata"]["source"] == "telegram_manager_task"
+ assert payload["metadata"]["task_source"] == "issue"
+ assert payload["metadata"]["dispatch_id"]
+ assert payload["metadata"]["issue_reference"] == "owner/repo#123"
+
+ dispatch = manager_service.get_dispatch(payload["metadata"]["dispatch_id"])
+ assert dispatch is not None
+ assert dispatch.status.value == "completed"
+ assert dispatch.run_summary is not None
+ assert dispatch.run_summary.promotion is not None
+ assert dispatch.run_summary.promotion.pr_url
+
+ approvals = approval_service.list_requests(telegram_uid="9537", limit=10)
+ assert len(approvals) == 1
+ approval = approvals[0]
+ assert approval.metadata["action_type"] == "github_issue_comment"
+ assert approval.metadata["issue_reference"] == "owner/repo#123"
+ assert "Automated progress update" in approval.metadata["comment_body"]
+
+ assert len(notifier.messages) >= 3
+ assert any("已接收,开始拆解并执行" in item["text"] for item in notifier.messages)
+ assert any("draft_pr:" in item["text"] for item in notifier.messages)
+ assert any("[GitHub Reply Pending]" in item["text"] for item in notifier.messages)
+ finally:
+ app.dependency_overrides.pop(get_telegram_notifier_service, None)
+ app.dependency_overrides.pop(get_github_issue_service, None)
+ app.dependency_overrides.pop(get_housekeeper_service, None)
+ app.dependency_overrides.pop(get_manager_agent_service, None)
+
+
+def test_telegram_task_approve_flag_grants_owner_dispatch_context(
+ telegram_client: TestClient,
+ tmp_path: Path,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ notifier = _StubTelegramNotifier()
+ manager_service = _build_manager_service(tmp_path / "manager-approve-flag.sqlite3")
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+ app.dependency_overrides[get_manager_agent_service] = lambda: manager_service
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_OWNER_UIDS", "9541")
+
+ try:
+ response = telegram_client.post(
+ "/api/v1/gateway/telegram/webhook",
+ json={
+ "update_id": 3164,
+ "message": {
+ "message_id": 157,
+ "text": "/task --approve 为美妆品牌玛露开发 6g 遮瑕膏落地页",
+ "chat": {"id": 9541, "type": "private"},
+ "from": {"id": 9541, "username": "owner-user"},
+ },
+ },
+ )
+ assert response.status_code == 200
+ payload = response.json()
+ dispatch = manager_service.get_dispatch(payload["metadata"]["dispatch_id"])
+
+ assert dispatch is not None
+ backend_task = dispatch.execution_plan.tasks[0]
+ assert backend_task.worker_spec is not None
+ assert backend_task.agent_job is not None
+ assert backend_task.worker_spec.metadata["approval_granted"] is True
+ assert backend_task.agent_job.metadata["approval_granted"] is True
+ finally:
+ app.dependency_overrides.pop(get_telegram_notifier_service, None)
+ app.dependency_overrides.pop(get_manager_agent_service, None)
+
+
+def test_telegram_task_approve_flag_is_ignored_for_non_admin_user(
+ telegram_client: TestClient,
+ tmp_path: Path,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ notifier = _StubTelegramNotifier()
+ manager_service = _build_manager_service(tmp_path / "manager-non-admin-approve.sqlite3")
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+ app.dependency_overrides[get_manager_agent_service] = lambda: manager_service
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_ALLOWED_UIDS", "9542")
+
+ try:
+ response = telegram_client.post(
+ "/api/v1/gateway/telegram/webhook",
+ json={
+ "update_id": 3165,
+ "message": {
+ "message_id": 158,
+ "text": "/task --approve 为美妆品牌玛露开发 6g 遮瑕膏落地页",
+ "chat": {"id": 9542, "type": "private"},
+ "from": {"id": 9542, "username": "member-user"},
+ },
+ },
+ )
+ assert response.status_code == 200
+ payload = response.json()
+ dispatch = manager_service.get_dispatch(payload["metadata"]["dispatch_id"])
+
+ assert dispatch is not None
+ backend_task = dispatch.execution_plan.tasks[0]
+ assert backend_task.worker_spec is not None
+ assert backend_task.agent_job is not None
+ assert backend_task.worker_spec.metadata["approval_granted"] is False
+ assert backend_task.agent_job.metadata["approval_granted"] is False
+ assert any("仅对 owner/partner 生效" in item["text"] for item in notifier.messages)
+ finally:
+ app.dependency_overrides.pop(get_telegram_notifier_service, None)
+ app.dependency_overrides.pop(get_manager_agent_service, None)
+
+
+def test_telegram_approve_command_posts_github_issue_reply_for_issue_tasks(
+ telegram_client: TestClient,
+ tmp_path: Path,
+) -> None:
+ notifier = _StubTelegramNotifier()
+ github_issue_service = _StubGitHubIssueService()
+ manager_service = _build_manager_service(tmp_path / "manager-approve.sqlite3")
+ approval_service = getattr(telegram_client, "_approval_store")
+ app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
+ app.dependency_overrides[get_github_issue_service] = lambda: github_issue_service
+ app.dependency_overrides[get_housekeeper_service] = _night_housekeeper_service
+ app.dependency_overrides[get_manager_agent_service] = lambda: manager_service
+
+ try:
+ task_response = telegram_client.post(
+ "/api/v1/gateway/telegram/webhook",
+ json={
+ "update_id": 3162,
+ "message": {
+ "message_id": 155,
+ "text": "/task issue #123 带上修复摘要",
+ "chat": {"id": 9538, "type": "private"},
+ "from": {"id": 9538, "username": "task-user"},
+ },
+ },
+ )
+ assert task_response.status_code == 200
+ approvals = approval_service.list_requests(telegram_uid="9538", limit=10)
+ assert len(approvals) == 1
+ approval = approvals[0]
+
+ approve_response = telegram_client.post(
+ "/api/v1/gateway/telegram/webhook",
+ json={
+ "update_id": 3163,
+ "message": {
+ "message_id": 156,
+ "text": f"/approve {approval.approval_id} approve 发出去",
+ "chat": {"id": 9538, "type": "private"},
+ "from": {"id": 9538, "username": "task-user"},
+ },
+ },
+ )
+ assert approve_response.status_code == 200
+ assert approve_response.json()["accepted"] is True
+
+ resolved = approval_service.get_request(approval.approval_id)
+ assert resolved is not None
+ assert resolved.status.value == "approved"
+ assert resolved.metadata["comment_posted"] is True
+ assert github_issue_service.comments[0]["issue_reference"] == "owner/repo#123"
+ assert "Automated progress update" in github_issue_service.comments[0]["body"]
+ assert any("[GitHub Reply Posted]" in item["text"] for item in notifier.messages)
+ finally:
+ app.dependency_overrides.pop(get_telegram_notifier_service, None)
+ app.dependency_overrides.pop(get_github_issue_service, None)
+ app.dependency_overrides.pop(get_housekeeper_service, None)
+ app.dependency_overrides.pop(get_manager_agent_service, None)
+
+
def test_telegram_approve_command_lists_and_reads_pending_approvals(
telegram_client: TestClient,
) -> None:
diff --git a/tests/test_gateway_telegram_guards.py b/tests/test_gateway_telegram_guards.py
new file mode 100644
index 00000000..4a1028d3
--- /dev/null
+++ b/tests/test_gateway_telegram_guards.py
@@ -0,0 +1,182 @@
+from __future__ import annotations
+
+import sys
+
+import pytest
+from fastapi.testclient import TestClient
+
+from autoresearch.api.routers import gateway_telegram
+from tests.test_gateway_telegram import clear_gateway_guards, telegram_client # noqa: F401
+
+
+def test_mainline_webhook_happy_path_with_secret_header(
+ telegram_client: TestClient,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_SECRET_TOKEN", "mainline-secret")
+ monkeypatch.setenv(
+ "AUTORESEARCH_TELEGRAM_CLAUDE_COMMAND_OVERRIDE",
+ f"{sys.executable} -c \"print('guard-happy-ok')\"",
+ )
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_APPEND_PROMPT", "false")
+
+ response = telegram_client.post(
+ "/api/v1/gateway/telegram/webhook",
+ headers={"x-telegram-bot-api-secret-token": "mainline-secret"},
+ json={
+ "update_id": 4101,
+ "message": {
+ "message_id": 501,
+ "text": "happy path",
+ "chat": {"id": 88001, "type": "private"},
+ },
+ },
+ )
+
+ assert response.status_code == 200
+ payload = response.json()
+ assert payload["accepted"] is True
+ assert payload["agent_run_id"] is not None
+
+
+def test_mainline_webhook_rejects_missing_secret_before_replay_guard(
+ telegram_client: TestClient,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_SECRET_TOKEN", "ordered-secret")
+
+ events: list[str] = []
+ original_validate = gateway_telegram._validate_secret_token
+ original_guard = gateway_telegram._guard_webhook_replay_and_rate
+
+ def wrapped_validate(raw_request) -> None:
+ events.append("secret")
+ return original_validate(raw_request)
+
+ def wrapped_guard(update) -> None:
+ events.append("guard")
+ return original_guard(update)
+
+ monkeypatch.setattr(gateway_telegram, "_validate_secret_token", wrapped_validate)
+ monkeypatch.setattr(gateway_telegram, "_guard_webhook_replay_and_rate", wrapped_guard)
+
+ response = telegram_client.post(
+ "/api/v1/gateway/telegram/webhook",
+ json={
+ "update_id": 4102,
+ "message": {
+ "message_id": 502,
+ "text": "missing secret",
+ "chat": {"id": 88002, "type": "private"},
+ },
+ },
+ )
+
+ assert response.status_code == 401
+ assert events == ["secret"]
+
+
+def test_mainline_webhook_runs_secret_check_before_replay_guard_on_success(
+ telegram_client: TestClient,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_SECRET_TOKEN", "ordered-secret")
+ monkeypatch.setenv(
+ "AUTORESEARCH_TELEGRAM_CLAUDE_COMMAND_OVERRIDE",
+ f"{sys.executable} -c \"print('ordered-ok')\"",
+ )
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_APPEND_PROMPT", "false")
+
+ events: list[str] = []
+ original_validate = gateway_telegram._validate_secret_token
+ original_guard = gateway_telegram._guard_webhook_replay_and_rate
+
+ def wrapped_validate(raw_request) -> None:
+ events.append("secret")
+ return original_validate(raw_request)
+
+ def wrapped_guard(update) -> None:
+ events.append("guard")
+ return original_guard(update)
+
+ monkeypatch.setattr(gateway_telegram, "_validate_secret_token", wrapped_validate)
+ monkeypatch.setattr(gateway_telegram, "_guard_webhook_replay_and_rate", wrapped_guard)
+
+ response = telegram_client.post(
+ "/api/v1/gateway/telegram/webhook",
+ headers={"x-telegram-bot-api-secret-token": "ordered-secret"},
+ json={
+ "update_id": 4103,
+ "message": {
+ "message_id": 503,
+ "text": "ordered success",
+ "chat": {"id": 88003, "type": "private"},
+ },
+ },
+ )
+
+ assert response.status_code == 200
+ assert response.json()["accepted"] is True
+ assert events[:2] == ["secret", "guard"]
+
+
+def test_mainline_webhook_rejects_replayed_update_id(
+ telegram_client: TestClient,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_SECRET_TOKEN", "replay-secret")
+ monkeypatch.setenv(
+ "AUTORESEARCH_TELEGRAM_CLAUDE_COMMAND_OVERRIDE",
+ f"{sys.executable} -c \"print('replay-guard-ok')\"",
+ )
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_APPEND_PROMPT", "false")
+
+ headers = {"x-telegram-bot-api-secret-token": "replay-secret"}
+ payload = {
+ "update_id": 4104,
+ "message": {
+ "message_id": 504,
+ "text": "replay check",
+ "chat": {"id": 88004, "type": "private"},
+ },
+ }
+
+ first = telegram_client.post("/api/v1/gateway/telegram/webhook", headers=headers, json=payload)
+ second = telegram_client.post("/api/v1/gateway/telegram/webhook", headers=headers, json=payload)
+
+ assert first.status_code == 200
+ assert second.status_code == 409
+
+
+def test_mainline_webhook_rejects_per_chat_rate_limit_overflow(
+ telegram_client: TestClient,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_SECRET_TOKEN", "rate-secret")
+ monkeypatch.setenv(
+ "AUTORESEARCH_TELEGRAM_CLAUDE_COMMAND_OVERRIDE",
+ f"{sys.executable} -c \"print('rate-guard-ok')\"",
+ )
+ monkeypatch.setenv("AUTORESEARCH_TELEGRAM_APPEND_PROMPT", "false")
+
+ headers = {"x-telegram-bot-api-secret-token": "rate-secret"}
+ chat_id = 88005
+
+ for i in range(1, 32):
+ response = telegram_client.post(
+ "/api/v1/gateway/telegram/webhook",
+ headers=headers,
+ json={
+ "update_id": 4200 + i,
+ "message": {
+ "message_id": 600 + i,
+ "text": f"rate-{i}",
+ "chat": {"id": chat_id, "type": "private"},
+ },
+ },
+ )
+
+ if i <= 30:
+ assert response.status_code == 200
+ else:
+ assert response.status_code == 429
diff --git a/tests/test_git_promotion_gate.py b/tests/test_git_promotion_gate.py
index d615050d..67baae64 100644
--- a/tests/test_git_promotion_gate.py
+++ b/tests/test_git_promotion_gate.py
@@ -126,6 +126,27 @@ def _write_patch(root: Path, *, filename: str = "src/demo.py") -> Path:
return patch_path
+def _write_large_patch(root: Path, *, filename: str, added_lines: int) -> Path:
+ patch_path = root / "promotion.patch"
+ additions = [f'+LINE_{index} = "{index}"' for index in range(added_lines)]
+ patch_path.write_text(
+ "\n".join(
+ [
+ f"diff --git a/{filename} b/{filename}",
+ "new file mode 100644",
+ "index 0000000..1111111",
+ "--- /dev/null",
+ f"+++ b/{filename}",
+ f"@@ -0,0 +1,{added_lines} @@",
+ *additions,
+ "",
+ ]
+ ),
+ encoding="utf-8",
+ )
+ return patch_path
+
+
def _intent(
patch_path: Path,
*,
@@ -233,6 +254,48 @@ def test_finalize_defaults_to_patch_when_patch_gates_pass(tmp_path: Path) -> Non
assert (artifacts_dir / "promotion_result.json").exists()
+def test_finalize_ignores_benign_runtime_artifacts_and_accepts_large_business_patch(
+ tmp_path: Path,
+) -> None:
+ patch_path = _write_large_patch(
+ tmp_path,
+ filename="apps/malu/lead_capture.py",
+ added_lines=600,
+ )
+ artifacts_dir = tmp_path / "artifacts"
+
+ service = GitPromotionGateService(repo_root=tmp_path)
+ preflight, result = service.finalize(
+ intent=PromotionIntent(
+ run_id="run-business-patch",
+ actor_role=PromotionActorRole.AGGREGATOR,
+ actor_id="aggregator-1",
+ writer_id="worker-1",
+ writer_lease_key="writer:business-patch",
+ patch_uri=str(patch_path),
+ changed_files=[
+ "apps/malu/lead_capture.py",
+ "apps/malu/README.md",
+ ".pytest_cache/README.md",
+ "tests/apps/__pycache__/test_malu_landing_page.cpython-314.pyc",
+ ],
+ base_ref="HEAD",
+ preferred_mode=GitPromotionMode.PATCH,
+ target_base_branch="main",
+ approval_granted=False,
+ metadata={},
+ ),
+ artifacts_dir=artifacts_dir,
+ )
+
+ checks = {item.id: item for item in preflight.checks}
+
+ assert result.success is True
+ assert result.mode is GitPromotionMode.PATCH
+ assert checks["gate.no_runtime_artifacts"].passed is True
+ assert checks["gate.max_patch_lines"].passed is True
+
+
def test_finalize_upgrades_patch_to_draft_pr_when_all_preconditions_pass(tmp_path: Path) -> None:
patch_path = _write_patch(tmp_path)
artifacts_dir = tmp_path / "artifacts"
diff --git a/tests/test_github_issue_service.py b/tests/test_github_issue_service.py
new file mode 100644
index 00000000..09059067
--- /dev/null
+++ b/tests/test_github_issue_service.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from autoresearch.core.services.github_issue_service import (
+ GitHubIssueCommentRead,
+ GitHubIssueRead,
+ GitHubIssueReference,
+ GitHubIssueService,
+)
+
+
+def test_resolve_issue_reference_supports_url_and_shorthand(tmp_path: Path) -> None:
+ service = GitHubIssueService(repo_root=tmp_path)
+
+ from_url = service.resolve_issue_reference("https://github.com/openai/example/issues/42")
+ assert from_url.owner == "openai"
+ assert from_url.repo == "example"
+ assert from_url.number == 42
+
+ from_ref = service.resolve_issue_reference("openai/example#43")
+ assert from_ref.owner == "openai"
+ assert from_ref.repo == "example"
+ assert from_ref.number == 43
+
+
+def test_resolve_issue_reference_supports_current_repo_issue_numbers(tmp_path: Path) -> None:
+ service = GitHubIssueService(repo_root=tmp_path)
+ service._resolve_current_repo = lambda: ("owner", "repo") # type: ignore[method-assign]
+
+ reference = service.resolve_issue_reference("#44")
+ assert reference.owner == "owner"
+ assert reference.repo == "repo"
+ assert reference.number == 44
+
+
+def test_build_manager_prompt_includes_issue_context_and_note(tmp_path: Path) -> None:
+ service = GitHubIssueService(repo_root=tmp_path)
+ issue = GitHubIssueRead(
+ reference=GitHubIssueReference(owner="owner", repo="repo", number=45),
+ title="Telegram task dispatch should create approvals",
+ body="Expected behavior: issue tasks should ask before replying externally.",
+ url="https://github.com/owner/repo/issues/45",
+ state="OPEN",
+ author="founder",
+ labels=("bug", "telegram"),
+ comments=(
+ GitHubIssueCommentRead(author="reviewer", body="Please keep the fix narrow."),
+ ),
+ )
+
+ prompt = service.build_manager_prompt(issue, operator_note="先保证审批链别断。")
+ assert "owner/repo#45" in prompt
+ assert "先保证审批链别断。" in prompt
+ assert "Please keep the fix narrow." in prompt
+ assert "prepare a draft PR when possible" in prompt
diff --git a/tests/test_housekeeper.py b/tests/test_housekeeper.py
new file mode 100644
index 00000000..e6158eaf
--- /dev/null
+++ b/tests/test_housekeeper.py
@@ -0,0 +1,198 @@
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from pathlib import Path
+
+from autoresearch.agent_protocol.models import DriverResult, JobSpec, RunSummary, ValidationReport
+from autoresearch.agents.manager_agent import ManagerAgentService
+from autoresearch.core.services.approval_store import ApprovalStoreService
+from autoresearch.core.services.housekeeper import HousekeeperService
+from autoresearch.core.services.telegram_notify import TelegramNotifierService
+from autoresearch.shared.housekeeper_contract import (
+ CircuitBreakerStatus,
+ ExplorationBlockerReason,
+ HousekeeperChangeReason,
+ HousekeeperMode,
+ HousekeeperModeUpdateRequest,
+)
+from autoresearch.shared.manager_agent_contract import ManagerDispatchRequest
+from autoresearch.shared.models import ApprovalRequestCreateRequest
+from autoresearch.shared.store import InMemoryRepository
+
+
+def _write(path: Path, content: str) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(content, encoding="utf-8")
+
+
+def _seed_admin_dashboard_repo(repo_root: Path) -> None:
+ _write(repo_root / "panel" / "app.tsx", "export const App = () => null;\n")
+ _write(repo_root / "src" / "autoresearch" / "api" / "routers" / "panel.py", "router = object()\n")
+ _write(repo_root / "src" / "autoresearch" / "api" / "routers" / "admin.py", "router = object()\n")
+ _write(repo_root / "src" / "autoresearch" / "core" / "services" / "metrics_dashboard.py", "def ok():\n return True\n")
+ _write(repo_root / "tests" / "test_panel_security.py", "def test_ok():\n assert True\n")
+ _write(repo_root / "tests" / "test_admin_managed_skills.py", "def test_admin_ok():\n assert True\n")
+
+
+def _successful_run_summary(job: JobSpec) -> RunSummary:
+ return RunSummary(
+ run_id=job.run_id,
+ final_status="ready_for_promotion",
+ driver_result=DriverResult(
+ run_id=job.run_id,
+ agent_id=job.agent_id,
+ status="succeeded",
+ summary="ok",
+ changed_paths=list(job.policy.allowed_paths),
+ recommended_action="promote",
+ ),
+ validation=ValidationReport(run_id=job.run_id, passed=True),
+ promotion_patch_uri="/tmp/demo.patch",
+ )
+
+
+def _service() -> HousekeeperService:
+ return HousekeeperService(
+ state_repository=InMemoryRepository(),
+ budget_repository=InMemoryRepository(),
+ exploration_repository=InMemoryRepository(),
+ )
+
+
+def test_housekeeper_manual_override_replaces_prior_override() -> None:
+ service = _service()
+ base_now = datetime(2026, 3, 31, 12, 0, tzinfo=timezone.utc)
+ state = service.get_state(now=base_now)
+ assert state.scheduled_mode is HousekeeperMode.DAY_SAFE
+
+ first = service.update_mode(
+ HousekeeperModeUpdateRequest(
+ action="set_manual_override",
+ target_mode=HousekeeperMode.NIGHT_READONLY_EXPLORE,
+ changed_by="test",
+ reason=HousekeeperChangeReason.MANUAL_API,
+ ),
+ now=base_now,
+ )
+ assert first.manual_override_mode is HousekeeperMode.NIGHT_READONLY_EXPLORE
+ assert first.effective_mode is HousekeeperMode.NIGHT_READONLY_EXPLORE
+
+ replaced = service.update_mode(
+ HousekeeperModeUpdateRequest(
+ action="set_manual_override",
+ target_mode=HousekeeperMode.DAY_SAFE,
+ changed_by="test",
+ reason=HousekeeperChangeReason.MANUAL_API,
+ ),
+ now=base_now,
+ )
+ assert replaced.manual_override_mode is HousekeeperMode.DAY_SAFE
+ assert replaced.effective_mode is HousekeeperMode.DAY_SAFE
+
+
+def test_housekeeper_prepare_manager_request_defers_heavy_dispatch_in_day_mode(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ _seed_admin_dashboard_repo(repo_root)
+ manager_service = ManagerAgentService(repository=InMemoryRepository(), repo_root=repo_root)
+ housekeeper = _service()
+
+ prepared, assessment, state = housekeeper.prepare_manager_request(
+ ManagerDispatchRequest(
+ prompt="在 Admin Panel 里加一个带图表的实时服务器资源监控大屏。",
+ auto_dispatch=True,
+ ),
+ manager_service=manager_service,
+ trigger_source="test",
+ now=datetime(2026, 3, 31, 12, 0, tzinfo=timezone.utc),
+ )
+
+ assert state.effective_mode is HousekeeperMode.DAY_SAFE
+ assert assessment.plan_shape == "task_dag"
+ assert assessment.fanout_count == 3
+ assert prepared.auto_dispatch is False
+ assert prepared.pipeline_target == "patch"
+ assert prepared.metadata["deferred_reason"] == "deferred_to_night"
+ assert prepared.metadata["execution_profile"]["profile_name"] == "day_safe"
+
+
+def test_housekeeper_morning_summary_uses_four_sections(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ _seed_admin_dashboard_repo(repo_root)
+ manager_service = ManagerAgentService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ dispatch_runner=_successful_run_summary,
+ )
+ dispatch = manager_service.create_dispatch(
+ ManagerDispatchRequest(
+ prompt="在 Admin Panel 里加一个带图表的实时服务器资源监控大屏。",
+ auto_dispatch=False,
+ )
+ )
+ manager_service.execute_dispatch(dispatch.dispatch_id)
+
+ approval_service = ApprovalStoreService(repository=InMemoryRepository())
+ approval_service.create_request(
+ ApprovalRequestCreateRequest(
+ title="Review manager result",
+ telegram_uid="10001",
+ )
+ )
+ housekeeper = _service()
+ summary = housekeeper.create_morning_summary(
+ manager_service=manager_service,
+ planner_service=type("PlannerStub", (), {"list": lambda self: [], "list_pending": lambda self, limit=20: []})(),
+ approval_service=approval_service,
+ notifier=TelegramNotifierService(bot_token=None),
+ media_jobs=[],
+ now=datetime(2026, 4, 1, 0, 30, tzinfo=timezone.utc),
+ )
+
+ assert "昨夜完成了什么" in summary.summary_text
+ assert "失败/阻塞了什么" in summary.summary_text
+ assert "今天需要你决定什么" in summary.summary_text
+ assert "系统当前模式与待执行队列" in summary.summary_text
+ assert summary.decision_items
+
+
+def test_housekeeper_night_tick_reports_circuit_breaker_blocker_reason() -> None:
+ housekeeper = _service()
+ housekeeper.update_mode(
+ HousekeeperModeUpdateRequest(
+ action="set_manual_override",
+ target_mode=HousekeeperMode.NIGHT_READONLY_EXPLORE,
+ changed_by="test",
+ reason=HousekeeperChangeReason.MANUAL_API,
+ ),
+ now=datetime(2026, 3, 31, 16, 0, tzinfo=timezone.utc),
+ )
+ housekeeper.update_mode(
+ HousekeeperModeUpdateRequest(
+ action="apply_schedule",
+ target_mode=HousekeeperMode.DAY_SAFE,
+ changed_by="system",
+ reason=HousekeeperChangeReason.CIRCUIT_BREAKER,
+ ),
+ now=datetime(2026, 3, 31, 16, 0, tzinfo=timezone.utc),
+ )
+ state = housekeeper.get_state(now=datetime(2026, 3, 31, 16, 0, tzinfo=timezone.utc))
+ tripped = state.model_copy(
+ update={
+ "circuit_breaker_state": state.circuit_breaker_state.model_copy(
+ update={"status": CircuitBreakerStatus.OPEN}
+ ),
+ }
+ )
+ housekeeper._state_repository.save(tripped.state_id, tripped)
+
+ tick = housekeeper.execute_night_explore_tick(
+ manager_service=type("ManagerStub", (), {})(),
+ planner_service=type("PlannerStub", (), {})(),
+ notifier=TelegramNotifierService(bot_token=None),
+ media_jobs=[],
+ now=datetime(2026, 3, 31, 16, 0, tzinfo=timezone.utc),
+ )
+
+ assert tick.executed is False
+ assert tick.skipped_reason == "circuit_breaker_open"
+ assert tick.blocker_reason is ExplorationBlockerReason.CIRCUIT_BREAKER_OPEN
diff --git a/tests/test_launch_ai_lab.py b/tests/test_launch_ai_lab.py
new file mode 100644
index 00000000..a78a5509
--- /dev/null
+++ b/tests/test_launch_ai_lab.py
@@ -0,0 +1,162 @@
+from __future__ import annotations
+
+import os
+import stat
+import subprocess
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SCRIPT = REPO_ROOT / "scripts" / "launch_ai_lab.sh"
+
+
+def _write_executable(path: Path, content: str) -> None:
+ path.write_text(content, encoding="utf-8")
+ path.chmod(path.stat().st_mode | stat.S_IXUSR)
+
+
+def _base_env(tmp_path: Path) -> dict[str, str]:
+ workspace = tmp_path / "workspace"
+ cache_dir = tmp_path / "cache"
+ log_dir = tmp_path / "logs"
+ workspace.mkdir()
+ cache_dir.mkdir()
+ log_dir.mkdir()
+
+ fake_bin = tmp_path / "bin"
+ fake_bin.mkdir()
+ fake_docker = fake_bin / "docker"
+ _write_executable(
+ fake_docker,
+ """#!/usr/bin/env bash
+set -euo pipefail
+cmd="${1:-}"
+sub="${2:-}"
+case "${cmd} ${sub}" in
+ "context show")
+ printf '%s\n' "${FAKE_DOCKER_CONTEXT:-desktop-linux}"
+ ;;
+ "version --format")
+ if [[ "${FAKE_DOCKER_INFO_OK:-0}" == "1" ]] || [[ "${DOCKER_HOST:-}" == "${FAKE_REPO_DOCKER_HOST:-}" ]]; then
+ printf '%s\n' "27.0.0"
+ exit 0
+ fi
+ exit 1
+ ;;
+ "info ")
+ if [[ "${FAKE_DOCKER_INFO_OK:-0}" == "1" ]] || [[ "${DOCKER_HOST:-}" == "${FAKE_REPO_DOCKER_HOST:-}" ]]; then
+ exit 0
+ fi
+ exit 1
+ ;;
+ "image inspect")
+ exit 0
+ ;;
+ *)
+ exit 0
+ ;;
+esac
+""",
+ )
+
+ inaccessible_socket = tmp_path / "other-user.sock"
+ inaccessible_socket.write_text("", encoding="utf-8")
+ inaccessible_socket.chmod(0)
+
+ env = os.environ.copy()
+ env.update(
+ {
+ "PATH": f"{fake_bin}:{env['PATH']}",
+ "ENV_FILE": str(tmp_path / "missing.env"),
+ "WORKSPACE_DIR": str(workspace),
+ "CACHE_DIR": str(cache_dir),
+ "LOG_DIR": str(log_dir),
+ "OPENHANDS_HOME_DIR": str(log_dir / "openhands-home"),
+ "AUTO_OPEN_DOCKER": "0",
+ "AI_LAB_FORCE_DOCKER_RUN": "1",
+ "AI_LAB_GUARDRAIL_DOCKER_CONTEXT": "colima",
+ "DOCKER_CONTEXT": "colima",
+ "DOCKER_HOST": f"unix://{inaccessible_socket}",
+ }
+ )
+ return env
+
+
+def test_launch_ai_lab_rejects_inaccessible_configured_socket(tmp_path: Path) -> None:
+ env = _base_env(tmp_path)
+ env["AUTO_START_COLIMA"] = "0"
+
+ completed = subprocess.run(
+ ["bash", str(SCRIPT), "status"],
+ cwd=REPO_ROOT,
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert completed.returncode == 1
+ assert "Docker socket is configured but not accessible" in completed.stderr
+
+
+def test_launch_ai_lab_can_fallback_to_repo_managed_colima(tmp_path: Path) -> None:
+ env = _base_env(tmp_path)
+ env["AUTO_START_COLIMA"] = "1"
+ colima_home = tmp_path / "colima-home"
+ repo_socket = colima_home / "default" / "docker.sock"
+ helper = tmp_path / "fake_colima_helper.sh"
+ _write_executable(
+ helper,
+ """#!/usr/bin/env bash
+set -euo pipefail
+mkdir -p "${COLIMA_HOME_PATH}/${COLIMA_PROFILE:-default}"
+: > "${COLIMA_HOME_PATH}/${COLIMA_PROFILE:-default}/docker.sock"
+""",
+ )
+ env["COLIMA_HOME_PATH"] = str(colima_home)
+ env["AI_LAB_COLIMA_HELPER"] = str(helper)
+ env["FAKE_REPO_DOCKER_HOST"] = f"unix://{repo_socket}"
+
+ completed = subprocess.run(
+ ["bash", str(SCRIPT), "status"],
+ cwd=REPO_ROOT,
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert completed.returncode == 0
+ assert "repo-managed Colima is ready" in completed.stdout
+
+
+def test_launch_ai_lab_can_fallback_to_current_user_colima(tmp_path: Path) -> None:
+ env = _base_env(tmp_path)
+ env["AUTO_START_COLIMA"] = "1"
+
+ home_dir = tmp_path / "home"
+ home_dir.mkdir()
+ current_user_socket = home_dir / ".colima" / "default" / "docker.sock"
+ fake_colima = tmp_path / "bin" / "colima"
+ _write_executable(
+ fake_colima,
+ """#!/usr/bin/env bash
+set -euo pipefail
+mkdir -p "${HOME}/.colima/${COLIMA_PROFILE:-default}"
+: > "${HOME}/.colima/${COLIMA_PROFILE:-default}/docker.sock"
+""",
+ )
+ env["HOME"] = str(home_dir)
+ env["FAKE_REPO_DOCKER_HOST"] = f"unix://{current_user_socket}"
+
+ completed = subprocess.run(
+ ["bash", str(SCRIPT), "status"],
+ cwd=REPO_ROOT,
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert completed.returncode == 0
+ assert "current-user Colima is ready" in completed.stdout
diff --git a/tests/test_manager_agent.py b/tests/test_manager_agent.py
new file mode 100644
index 00000000..ff0f7513
--- /dev/null
+++ b/tests/test_manager_agent.py
@@ -0,0 +1,264 @@
+from __future__ import annotations
+
+from pathlib import Path
+import sys
+
+from fastapi.testclient import TestClient
+
+from autoresearch.agent_protocol.models import DriverResult, JobSpec, RunSummary, ValidationReport
+from autoresearch.agents.manager_agent import ManagerAgentService
+from autoresearch.api.dependencies import get_housekeeper_service, get_manager_agent_service
+from autoresearch.api.main import app
+from autoresearch.core.services.housekeeper import HousekeeperService
+from autoresearch.shared.housekeeper_contract import HousekeeperChangeReason, HousekeeperMode, HousekeeperModeUpdateRequest
+from autoresearch.shared.manager_agent_contract import (
+ ManagerDispatchRequest,
+ ManagerPlanStrategy,
+)
+from autoresearch.shared.models import JobStatus
+from autoresearch.shared.store import InMemoryRepository
+
+
+def _write(path: Path, content: str) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(content, encoding="utf-8")
+
+
+def _successful_run_summary(job: JobSpec) -> RunSummary:
+ return RunSummary(
+ run_id=job.run_id,
+ final_status="ready_for_promotion",
+ driver_result=DriverResult(
+ run_id=job.run_id,
+ agent_id=job.agent_id,
+ status="succeeded",
+ summary="manager dispatch completed successfully",
+ changed_paths=list(job.policy.allowed_paths),
+ recommended_action="promote",
+ ),
+ validation=ValidationReport(run_id=job.run_id, passed=True),
+ promotion_patch_uri="/tmp/manager-dispatch.patch",
+ )
+
+
+def _night_housekeeper_service() -> HousekeeperService:
+ service = HousekeeperService(
+ state_repository=InMemoryRepository(),
+ budget_repository=InMemoryRepository(),
+ exploration_repository=InMemoryRepository(),
+ )
+ service.update_mode(
+ HousekeeperModeUpdateRequest(
+ action="set_manual_override",
+ target_mode=HousekeeperMode.NIGHT_READONLY_EXPLORE,
+ changed_by="test",
+ reason=HousekeeperChangeReason.MANUAL_API,
+ )
+ )
+ return service
+
+
+def _seed_basic_panel_repo(repo_root: Path) -> None:
+ _write(repo_root / "panel" / "app.tsx", "export const App = () => null;\n")
+ _write(repo_root / "src" / "autoresearch" / "api" / "routers" / "panel.py", "router = object()\n")
+ _write(repo_root / "src" / "autoresearch" / "api" / "routers" / "openclaw.py", "router = object()\n")
+ _write(repo_root / "tests" / "test_panel_security.py", "def test_ok():\n assert True\n")
+
+
+def _seed_admin_dashboard_repo(repo_root: Path) -> None:
+ _seed_basic_panel_repo(repo_root)
+ _write(repo_root / "src" / "autoresearch" / "api" / "routers" / "admin.py", "router = object()\n")
+ _write(
+ repo_root / "src" / "autoresearch" / "core" / "services" / "metrics_dashboard.py",
+ "def collect_metrics() -> dict[str, int]:\n return {'cpu': 1}\n",
+ )
+ _write(repo_root / "tests" / "test_admin_managed_skills.py", "def test_admin_ok():\n assert True\n")
+ _write(repo_root / "tests" / "test_admin_backend.py", "def test_admin_backend_ok():\n assert True\n")
+
+
+def test_manager_agent_translates_fuzzy_game_prompt_into_worker_contract(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ _seed_basic_panel_repo(repo_root)
+
+ service = ManagerAgentService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ )
+
+ dispatch = service.create_dispatch(
+ ManagerDispatchRequest(
+ prompt="我想做个小游戏,先在现有 panel 里做一个最小可玩的版本。",
+ auto_dispatch=False,
+ )
+ )
+
+ assert dispatch.status is JobStatus.CREATED
+ assert dispatch.selected_intent is not None
+ assert dispatch.selected_intent.intent_id == "game_prototype"
+ assert dispatch.execution_plan is not None
+ assert dispatch.execution_plan.strategy is ManagerPlanStrategy.SINGLE_TASK
+ assert len(dispatch.execution_plan.tasks) == 1
+ assert "panel/**" in dispatch.worker_spec.allowed_paths
+ assert "tests/test_panel_security.py" in dispatch.worker_spec.allowed_paths
+ assert dispatch.worker_spec.test_command == "pytest -q tests/test_panel_security.py"
+ assert dispatch.agent_job is not None
+ assert dispatch.agent_job.metadata["manager_intent_label"] == "game_prototype"
+ assert "小游戏" in dispatch.worker_spec.metadata["manager_prompt"]
+
+
+def test_manager_agent_decomposes_complex_prompt_into_task_dag(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ _seed_admin_dashboard_repo(repo_root)
+
+ service = ManagerAgentService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ )
+
+ dispatch = service.create_dispatch(
+ ManagerDispatchRequest(
+ prompt="在 Admin Panel 里加一个带图表的实时服务器资源监控大屏。",
+ auto_dispatch=False,
+ )
+ )
+
+ assert dispatch.execution_plan is not None
+ assert dispatch.execution_plan.strategy is ManagerPlanStrategy.TASK_DAG
+ assert len(dispatch.execution_plan.tasks) == 3
+
+ backend_task, tests_task, frontend_task = dispatch.execution_plan.tasks
+ assert backend_task.stage.value == "backend"
+ assert tests_task.stage.value == "tests"
+ assert frontend_task.stage.value == "frontend"
+ assert tests_task.depends_on == [backend_task.task_id]
+ assert frontend_task.depends_on == [backend_task.task_id, tests_task.task_id]
+ assert any(path.startswith("src/autoresearch/api/routers/admin.py") for path in backend_task.worker_spec.allowed_paths)
+ assert tests_task.worker_spec.allowed_paths == [
+ "tests/test_panel_security.py",
+ "tests/test_admin_managed_skills.py",
+ ]
+ assert "panel/**" in frontend_task.worker_spec.allowed_paths
+ assert frontend_task.worker_spec.metadata["manager_task_stage"] == "frontend"
+
+
+def test_manager_agent_routes_issue_style_landing_page_prompt_to_business_dag(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ _seed_admin_dashboard_repo(repo_root)
+
+ service = ManagerAgentService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ )
+
+ dispatch = service.create_dispatch(
+ ManagerDispatchRequest(
+ prompt=(
+ "Resolve the following GitHub issue in the current repository through the existing patch-only "
+ "manager pipeline.\n\n"
+ "Title: Chaos Run: 玛露遮瑕膏落地页商业化压力测试\n"
+ "Issue body:\n"
+ "1. 为玛露 6g 罐装遮瑕膏设计一个最小可用的高端浅色风落地页。\n"
+ "2. 提供预约/留资后端接口。\n"
+ "3. 补齐至少一组边界测试。\n"
+ "Deliver the smallest useful fix, stay within scoped files, update tests when needed."
+ ),
+ auto_dispatch=False,
+ )
+ )
+
+ assert dispatch.selected_intent is not None
+ assert dispatch.selected_intent.intent_id == "product_landing_page"
+ assert dispatch.execution_plan is not None
+ assert dispatch.execution_plan.strategy is ManagerPlanStrategy.TASK_DAG
+ backend_task, tests_task, frontend_task = dispatch.execution_plan.tasks
+ assert dispatch.selected_intent.metadata["surface_slug"] == "malu"
+ assert dispatch.selected_intent.metadata["surface_root"] == "apps/malu"
+ assert backend_task.worker_spec.allowed_paths == [
+ "apps/malu/**",
+ "tests/apps/__init__.py",
+ "tests/apps/test_malu_landing_page.py",
+ ]
+ assert backend_task.worker_spec.test_command == "pytest -q tests/apps/test_malu_landing_page.py"
+ assert tests_task.worker_spec.allowed_paths == [
+ "tests/apps/__init__.py",
+ "tests/apps/test_malu_landing_page.py",
+ ]
+ assert tests_task.worker_spec.test_command == "pytest -q tests/apps/test_malu_landing_page.py"
+ assert frontend_task.worker_spec.allowed_paths == ["apps/malu/**"]
+ assert frontend_task.worker_spec.metadata["manager_intent_label"] == "product_landing_page"
+
+
+def test_manager_agent_routes_direct_malu_landing_page_prompt_to_product_intent(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ _seed_admin_dashboard_repo(repo_root)
+
+ service = ManagerAgentService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ )
+
+ dispatch = service.create_dispatch(
+ ManagerDispatchRequest(
+ prompt="给我做一个玛露 6g 遮瑕膏落地页,带浅色品牌 UI、预约留资接口和基础测试。",
+ auto_dispatch=False,
+ )
+ )
+
+ assert dispatch.selected_intent is not None
+ assert dispatch.selected_intent.intent_id == "product_landing_page"
+ assert dispatch.execution_plan is not None
+ assert dispatch.execution_plan.strategy is ManagerPlanStrategy.TASK_DAG
+ assert dispatch.selected_intent.allowed_paths == [
+ "apps/malu/**",
+ "tests/apps/__init__.py",
+ "tests/apps/test_malu_landing_page.py",
+ ]
+ assert dispatch.execution_plan.tasks[0].worker_spec.metadata["manager_intent_label"] == "product_landing_page"
+ assert dispatch.selected_intent.metadata["surface_root"] == "apps/malu"
+ assert dispatch.execution_plan.tasks[0].worker_spec.allowed_paths == [
+ "apps/malu/**",
+ "tests/apps/__init__.py",
+ "tests/apps/test_malu_landing_page.py",
+ ]
+ assert dispatch.execution_plan.tasks[1].worker_spec.allowed_paths == [
+ "tests/apps/__init__.py",
+ "tests/apps/test_malu_landing_page.py",
+ ]
+ assert dispatch.execution_plan.tasks[2].worker_spec.allowed_paths == ["apps/malu/**"]
+
+
+def test_manager_agent_api_dispatch_executes_background_plan(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ _seed_admin_dashboard_repo(repo_root)
+
+ service = ManagerAgentService(
+ repository=InMemoryRepository(),
+ repo_root=repo_root,
+ dispatch_runner=_successful_run_summary,
+ )
+
+ app.dependency_overrides[get_manager_agent_service] = lambda: service
+ app.dependency_overrides[get_housekeeper_service] = _night_housekeeper_service
+ with TestClient(app) as client:
+ response = client.post(
+ "/api/v1/agents/manager/dispatch",
+ json={"prompt": "在 Admin Panel 里加一个带图表的实时服务器资源监控大屏。"},
+ )
+ assert response.status_code == 202
+ payload = response.json()
+ assert payload["status"] == "queued"
+ dispatch_id = payload["dispatch_id"]
+
+ get_response = client.get(f"/api/v1/agents/manager/dispatches/{dispatch_id}")
+ assert get_response.status_code == 200
+ current = get_response.json()
+
+ app.dependency_overrides.clear()
+
+ assert current["status"] == "completed"
+ assert current["run_summary"]["final_status"] == "ready_for_promotion"
+ assert current["execution_plan"]["strategy"] == "task_dag"
+ assert len(current["execution_plan"]["tasks"]) == 3
+ assert all(task["status"] == "completed" for task in current["execution_plan"]["tasks"])
+ assert current["execution_plan"]["tasks"][0]["run_summary"]["final_status"] == "ready_for_promotion"
+ assert current["execution_plan"]["tasks"][2]["metadata"]["manager_stage"] == "frontend"
diff --git a/tests/test_media_jobs.py b/tests/test_media_jobs.py
new file mode 100644
index 00000000..c354e767
--- /dev/null
+++ b/tests/test_media_jobs.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+from pathlib import Path
+import subprocess
+
+import pytest
+
+from autoresearch.core.services.media_jobs import MediaJobService
+from autoresearch.shared.media_job_contract import MediaJobMode, MediaJobRequest, MediaTargetBucket
+from autoresearch.shared.store import InMemoryRepository
+
+
+class _FakeRunner:
+ def __init__(self) -> None:
+ self.commands: list[list[str]] = []
+
+ def __call__(self, command: list[str]) -> subprocess.CompletedProcess[str]:
+ self.commands.append(command)
+ if "--dump-single-json" in command:
+ return subprocess.CompletedProcess(
+ command,
+ 0,
+ stdout='{"title":"Demo","id":"abc123","uploader":"alice","duration":12}',
+ stderr="",
+ )
+
+ output_template = command[command.index("-o") + 1]
+ output_path = (
+ output_template.replace("%(title)s", "Demo")
+ .replace("%(id)s", "abc123")
+ .replace("%(uploader)s", "alice")
+ .replace("%(upload_date)s", "20260331")
+ .replace("%(ext)s", "mp3" if "-x" in command else "mp4")
+ )
+ path = Path(output_path)
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text("media", encoding="utf-8")
+ return subprocess.CompletedProcess(command, 0, stdout="", stderr="")
+
+
+def _service(tmp_path: Path, runner: _FakeRunner | None = None) -> MediaJobService:
+ return MediaJobService(
+ repository=InMemoryRepository(),
+ event_repository=InMemoryRepository(),
+ media_root=tmp_path / "media",
+ allowed_domains={"youtube.com", "youtu.be", "bilibili.com"},
+ command_runner=runner or _FakeRunner(),
+ )
+
+
+def test_media_job_service_parses_explicit_and_bare_urls(tmp_path: Path) -> None:
+ service = _service(tmp_path)
+
+ explicit = service.parse_telegram_task("audio https://youtu.be/demo")
+ assert explicit is not None
+ assert explicit.mode is MediaJobMode.AUDIO
+ assert explicit.target_bucket is MediaTargetBucket.AUDIO
+
+ bare = service.parse_telegram_task("https://www.youtube.com/watch?v=demo")
+ assert bare is not None
+ assert bare.mode is MediaJobMode.VIDEO
+
+ assert service.parse_telegram_task("https://example.com/article") is None
+ assert service.parse_telegram_task("audio https://example.com/file.mp3") is None
+
+
+def test_media_job_service_executes_with_whitelisted_template_and_writes_metadata(tmp_path: Path) -> None:
+ runner = _FakeRunner()
+ service = _service(tmp_path, runner=runner)
+ job = service.create(
+ MediaJobRequest(
+ url="https://youtu.be/demo",
+ mode=MediaJobMode.AUDIO,
+ target_bucket=MediaTargetBucket.AUDIO,
+ filename_template="{title}-{id}",
+ )
+ )
+
+ completed = service.execute(job.job_id)
+
+ assert completed.status.value == "completed"
+ assert completed.title == "Demo"
+ assert completed.uploader == "alice"
+ assert completed.metadata_path is not None
+ assert Path(completed.metadata_path).exists()
+ assert completed.output_files
+ assert Path(completed.output_files[0]).exists()
+ assert Path(completed.output_files[0]).parent.name == job.job_id
+ assert len(runner.commands) == 2
+
+
+def test_media_job_service_only_returns_files_for_current_job(tmp_path: Path) -> None:
+ runner = _FakeRunner()
+ service = _service(tmp_path, runner=runner)
+ bucket_dir = tmp_path / "media" / "audio"
+ bucket_dir.mkdir(parents=True, exist_ok=True)
+ (bucket_dir / "stale.mp3").write_text("stale", encoding="utf-8")
+
+ job = service.create(
+ MediaJobRequest(
+ url="https://youtu.be/demo",
+ mode=MediaJobMode.AUDIO,
+ target_bucket=MediaTargetBucket.AUDIO,
+ filename_template="{title}-{id}",
+ )
+ )
+
+ completed = service.execute(job.job_id)
+
+ assert completed.status.value == "completed"
+ assert all("stale.mp3" not in path for path in completed.output_files)
+ assert all(f"/audio/{job.job_id}/" in path for path in completed.output_files)
+
+
+def test_media_job_request_rejects_unapproved_template_tokens() -> None:
+ with pytest.raises(ValueError):
+ MediaJobRequest(
+ url="https://youtu.be/demo",
+ mode=MediaJobMode.VIDEO,
+ target_bucket=MediaTargetBucket.VIDEO,
+ filename_template="{title}-{badtoken}",
+ )
diff --git a/tests/test_mock_adapter.py b/tests/test_mock_adapter.py
new file mode 100644
index 00000000..b1a40ccc
--- /dev/null
+++ b/tests/test_mock_adapter.py
@@ -0,0 +1,108 @@
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+import subprocess
+import sys
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+MOCK_ADAPTER = REPO_ROOT / "drivers" / "mock_adapter.sh"
+
+
+def _run_mock_adapter(tmp_path: Path, *, allowed_paths: list[str], validator_command: str) -> tuple[dict[str, object], Path]:
+ workspace = tmp_path / "workspace"
+ workspace.mkdir(parents=True, exist_ok=True)
+ job_path = tmp_path / "job.json"
+ result_path = tmp_path / "driver_result.json"
+ job_path.write_text(
+ json.dumps(
+ {
+ "run_id": "run-mock",
+ "agent_id": "mock",
+ "task": "Create a bounded patch candidate.",
+ "policy": {"allowed_paths": allowed_paths},
+ "validators": [
+ {
+ "id": "worker.test_command",
+ "kind": "command",
+ "command": validator_command,
+ }
+ ],
+ },
+ ensure_ascii=False,
+ indent=2,
+ ),
+ encoding="utf-8",
+ )
+
+ completed = subprocess.run(
+ [str(MOCK_ADAPTER)],
+ env={
+ **os.environ,
+ "AEP_WORKSPACE": str(workspace),
+ "AEP_JOB_SPEC": str(job_path),
+ "AEP_RESULT_PATH": str(result_path),
+ "PY_BIN": sys.executable,
+ },
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert completed.returncode == 0, completed.stderr
+ return json.loads(result_path.read_text(encoding="utf-8")), workspace
+
+
+def test_mock_adapter_prefers_validator_targeted_test_file(tmp_path: Path) -> None:
+ payload, workspace = _run_mock_adapter(
+ tmp_path,
+ allowed_paths=["scripts/check_prompt_hygiene.py", "tests/test_check_prompt_hygiene.py"],
+ validator_command=f"{sys.executable} -m pytest -q tests/test_check_prompt_hygiene.py",
+ )
+
+ target = workspace / "tests" / "test_check_prompt_hygiene.py"
+ assert payload["changed_paths"] == ["tests/test_check_prompt_hygiene.py"]
+ assert "def test_mock_autoresearch_candidate" in target.read_text(encoding="utf-8")
+
+
+def test_mock_adapter_falls_back_to_first_allowed_source_file(tmp_path: Path) -> None:
+ payload, workspace = _run_mock_adapter(
+ tmp_path,
+ allowed_paths=["scripts/check_prompt_hygiene.py"],
+ validator_command=f"{sys.executable} -m py_compile scripts/check_prompt_hygiene.py",
+ )
+
+ target = workspace / "scripts" / "check_prompt_hygiene.py"
+ assert payload["changed_paths"] == ["scripts/check_prompt_hygiene.py"]
+ assert "def run()" in target.read_text(encoding="utf-8")
+
+
+def test_mock_adapter_uses_validator_target_inside_globbed_apps_scope(tmp_path: Path) -> None:
+ payload, workspace = _run_mock_adapter(
+ tmp_path,
+ allowed_paths=["apps/malu/**"],
+ validator_command=f"{sys.executable} -m py_compile apps/malu/lead_capture.py",
+ )
+
+ target = workspace / "apps" / "malu" / "lead_capture.py"
+ assert payload["changed_paths"] == ["apps/malu/lead_capture.py"]
+ assert "class PhoneValidator" in target.read_text(encoding="utf-8")
+
+
+def test_mock_adapter_builds_source_and_test_for_pytest_validated_business_surface(tmp_path: Path) -> None:
+ payload, workspace = _run_mock_adapter(
+ tmp_path,
+ allowed_paths=["apps/malu/**", "tests/apps/test_malu_landing_page.py"],
+ validator_command="pytest -q tests/apps/test_malu_landing_page.py",
+ )
+
+ source_target = workspace / "apps" / "malu" / "lead_capture.py"
+ test_target = workspace / "tests" / "apps" / "test_malu_landing_page.py"
+ assert payload["changed_paths"] == [
+ "apps/malu/lead_capture.py",
+ "tests/apps/test_malu_landing_page.py",
+ ]
+ assert "class PhoneValidator" in source_target.read_text(encoding="utf-8")
+ assert "from apps.malu.lead_capture import PhoneValidator, capture_lead" in test_target.read_text(encoding="utf-8")
diff --git a/tests/test_openhands_controlled_backend.py b/tests/test_openhands_controlled_backend.py
index 2047080f..80e50ba1 100644
--- a/tests/test_openhands_controlled_backend.py
+++ b/tests/test_openhands_controlled_backend.py
@@ -80,6 +80,7 @@ def test_scope_violation_is_policy_blocked_and_never_promoted(
_create_min_repo(repo_root)
service = OpenHandsControlledBackendService(repo_root=repo_root, run_root=run_root)
+ monkeypatch.setattr(service, "_prepare_strict_workspace", lambda **_: None)
def _bad_backend(*, prompt: str, workspace: Path, log_file: Path, allowed_paths: list[str]):
_ = prompt, allowed_paths
@@ -210,3 +211,86 @@ def test_openhands_cli_can_fallback_to_mock_patch(tmp_path: Path) -> None:
assert result.iterations_used == 1
assert result.patch_result is not None
assert result.patch_result.changed_files == ["src/openhands_demo_task.py"]
+
+
+def test_strict_workspace_uses_overlay_for_allowed_file(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ run_root = tmp_path / "runs"
+ repo_root.mkdir()
+ _create_min_repo(repo_root)
+ blocked_file = repo_root / "src" / "models.py"
+ blocked_file.write_text("VALUE = 1\n", encoding="utf-8")
+ allowed_file = repo_root / "src" / "landing_pages.py"
+ allowed_file.write_text("VALUE = 1\n", encoding="utf-8")
+
+ service = OpenHandsControlledBackendService(repo_root=repo_root, run_root=run_root)
+
+ def _assert_permissions(*, prompt: str, workspace: Path, log_file: Path, allowed_paths: list[str]):
+ _ = prompt, allowed_paths
+ writable_target = workspace / "src" / "landing_pages.py"
+ blocked_target = workspace / "src" / "models.py"
+ assert writable_target.is_symlink()
+ assert blocked_target.is_symlink() is False
+ writable_target.write_text("VALUE = 2\n", encoding="utf-8")
+ with pytest.raises(PermissionError):
+ blocked_target.write_text("VALUE = 2\n", encoding="utf-8")
+ service._append_log(log_file, "[mock-backend] checked strict workspace\n")
+ return _BackendExecutionOutcome(exit_code=0, stdout="strict workspace ok\n")
+
+ service._run_mock_backend = _assert_permissions # type: ignore[method-assign]
+
+ request = ControlledExecutionRequest(
+ task_id="demo-strict-view",
+ prompt="Update landing page helper",
+ allowed_paths=["src/landing_pages.py"],
+ test_command=[sys.executable, "-m", "py_compile", "src/landing_pages.py"],
+ backend=ControlledBackend.MOCK,
+ )
+
+ result = service.run(request)
+
+ assert result.status is ControlledRunStatus.READY_FOR_PROMOTION
+ assert result.changed_files == ["src/landing_pages.py"]
+ assert "src/models.py" not in result.changed_files
+
+
+def test_fail_fast_probe_aborts_retry_loop_on_module_error(
+ tmp_path: Path,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ repo_root = tmp_path / "repo"
+ run_root = tmp_path / "runs"
+ repo_root.mkdir()
+ _create_min_repo(repo_root)
+
+ service = OpenHandsControlledBackendService(repo_root=repo_root, run_root=run_root)
+ attempts = {"count": 0}
+
+ def _broken_backend(*, prompt: str, workspace: Path, log_file: Path, allowed_paths: list[str]):
+ _ = prompt, log_file, allowed_paths
+ attempts["count"] += 1
+ target = workspace / "src" / "broken.py"
+ target.write_text("import missing_module\n", encoding="utf-8")
+ return _BackendExecutionOutcome(
+ exit_code=1,
+ stderr="ModuleNotFoundError: No module named 'missing_module'\n",
+ )
+
+ monkeypatch.setattr(service, "_run_mock_backend", _broken_backend)
+
+ request = ControlledExecutionRequest(
+ task_id="demo-fail-fast",
+ prompt="Write python helper",
+ allowed_paths=["src/broken.py"],
+ test_command=[sys.executable, "-c", "import sys; sys.exit(7)"],
+ backend=ControlledBackend.MOCK,
+ max_iterations=3,
+ keep_workspace_on_failure=False,
+ )
+
+ result = service.run(request)
+
+ assert attempts["count"] == 1
+ assert result.status is ControlledRunStatus.FAILED
+ assert result.validation_status is ValidationStatus.FAILED
+ assert "fail-fast probe" in (result.error or "")
diff --git a/tests/test_openhands_launcher.py b/tests/test_openhands_launcher.py
index bb6f0914..59279e72 100644
--- a/tests/test_openhands_launcher.py
+++ b/tests/test_openhands_launcher.py
@@ -32,6 +32,12 @@ def test_openhands_start_dry_run_prints_ai_lab_command() -> None:
assert "launch_ai_lab.sh" in completed.stdout
assert "/opt/workspace" in completed.stdout
assert "EXTRA_VOLUME=" in completed.stdout
+ assert "--exp" in completed.stdout
+ assert "--headless" in completed.stdout
+ assert ' -t ' in completed.stdout
+ assert "runuser -u " in completed.stdout
+ assert "nobody" in completed.stdout
+ assert "/tmp/openhands-home/.openhands/agent_settings.json" in completed.stdout
def test_openhands_start_defaults_audit_path_to_workspace_for_ai_lab(tmp_path: Path) -> None:
@@ -58,3 +64,119 @@ def test_openhands_start_defaults_audit_path_to_workspace_for_ai_lab(tmp_path: P
assert completed.returncode == 0
assert "/opt/workspace/.openhands-audit" in completed.stdout
+ assert "OPENHANDS_PERSISTENCE_DIR=/tmp/openhands-home/state/" in completed.stdout
+
+
+def test_openhands_start_legacy_template_can_be_restored_explicitly(tmp_path: Path) -> None:
+ repo_root = Path(__file__).resolve().parents[1]
+ workspace = tmp_path / "worktree"
+ workspace.mkdir()
+ env = os.environ.copy()
+ env.update(
+ {
+ "OPENHANDS_DRY_RUN": "1",
+ "OPENHANDS_CMD": "openhands",
+ "OPENHANDS_HEADLESS": "0",
+ "OPENHANDS_WORKSPACE": str(workspace),
+ }
+ )
+
+ completed = subprocess.run(
+ ["bash", str(repo_root / "scripts" / "openhands_start.sh"), "Touch README.md."],
+ cwd=repo_root,
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert completed.returncode == 0
+ assert "--headless" not in completed.stdout
+ assert "OPENHANDS_HEADLESS=0" in completed.stdout
+ assert 'OPENHANDS_CMD_TEMPLATE="${OPENHANDS_CMD}" "${OPENHANDS_PROMPT}"' in completed.stdout
+
+
+def test_openhands_start_ai_lab_runtime_prefers_container_cli(tmp_path: Path) -> None:
+ repo_root = Path(__file__).resolve().parents[1]
+ workspace = tmp_path / "worktree"
+ workspace.mkdir()
+ fake_bin = tmp_path / "bin" / "openhands"
+ fake_bin.parent.mkdir()
+ fake_bin.write_text("#!/bin/sh\nexit 0\n", encoding="utf-8")
+ fake_bin.chmod(0o755)
+ env = os.environ.copy()
+ env.update(
+ {
+ "OPENHANDS_DRY_RUN": "1",
+ "OPENHANDS_WORKSPACE": str(workspace),
+ "OPENHANDS_LOCAL_BIN": str(fake_bin),
+ }
+ )
+
+ completed = subprocess.run(
+ ["bash", str(repo_root / "scripts" / "openhands_start.sh"), "Touch README.md."],
+ cwd=repo_root,
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert completed.returncode == 0
+ assert f"OPENHANDS_CMD={fake_bin}" not in completed.stdout
+ assert "OPENHANDS_CMD=openhands" in completed.stdout
+
+
+def test_openhands_start_host_runtime_changes_into_workspace(tmp_path: Path) -> None:
+ repo_root = Path(__file__).resolve().parents[1]
+ workspace = tmp_path / "worktree"
+ workspace.mkdir()
+ env = os.environ.copy()
+ env.update(
+ {
+ "OPENHANDS_DRY_RUN": "1",
+ "OPENHANDS_RUNTIME": "host",
+ "OPENHANDS_CMD": "openhands",
+ "OPENHANDS_WORKSPACE": str(workspace),
+ }
+ )
+
+ completed = subprocess.run(
+ ["bash", str(repo_root / "scripts" / "openhands_start.sh"), "Touch README.md."],
+ cwd=repo_root,
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert completed.returncode == 0
+ assert 'cd "${OPENHANDS_WORKSPACE}"' in completed.stdout
+
+
+def test_openhands_start_can_disable_experimental_mode(tmp_path: Path) -> None:
+ repo_root = Path(__file__).resolve().parents[1]
+ workspace = tmp_path / "worktree"
+ workspace.mkdir()
+ env = os.environ.copy()
+ env.update(
+ {
+ "OPENHANDS_DRY_RUN": "1",
+ "OPENHANDS_CMD": "openhands",
+ "OPENHANDS_EXPERIMENTAL": "0",
+ "OPENHANDS_WORKSPACE": str(workspace),
+ }
+ )
+
+ completed = subprocess.run(
+ ["bash", str(repo_root / "scripts" / "openhands_start.sh"), "Touch README.md."],
+ cwd=repo_root,
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert completed.returncode == 0
+ assert "--exp" not in completed.stdout
+ assert "--headless" in completed.stdout
diff --git a/tests/test_openhands_worker.py b/tests/test_openhands_worker.py
index bd81e25b..fd77e63c 100644
--- a/tests/test_openhands_worker.py
+++ b/tests/test_openhands_worker.py
@@ -1,5 +1,7 @@
from __future__ import annotations
+import sys
+
import pytest
from autoresearch.core.services.openhands_worker import OpenHandsWorkerService
@@ -21,14 +23,16 @@ def test_openhands_worker_builds_patch_only_agent_job_spec() -> None:
assert job.agent_id == "openhands"
assert job.mode == "patch_only"
+ assert job.policy.timeout_sec == 420
assert job.policy.allowed_paths == ["src/foo.py", "tests/test_foo.py"]
- assert job.validators[0].command == "pytest tests/test_foo.py -q"
+ assert job.validators[0].command == f"{sys.executable} -m pytest tests/test_foo.py -q"
assert job.metadata["worker_contract"] == "openhands-worker/v1"
assert job.metadata["worker_output_mode"] == "patch"
assert job.metadata["pipeline_target"] == "draft_pr"
assert "Do not run git add, git commit, git push" in job.task
assert "allowed_paths:" in job.task
assert "test_command:" in job.task
+ assert f"{sys.executable} -m pytest tests/test_foo.py -q" in job.task
def test_openhands_worker_builds_controlled_request_with_mock_fallback() -> None:
@@ -48,7 +52,7 @@ def test_openhands_worker_builds_controlled_request_with_mock_fallback() -> None
assert request.failure_strategy is FailureStrategy.FALLBACK
assert request.allowed_paths == ["src/autoresearch/core/services/openhands_worker.py"]
assert request.test_command == [
- "python",
+ sys.executable,
"-m",
"py_compile",
"src/autoresearch/core/services/openhands_worker.py",
@@ -82,3 +86,33 @@ def test_openhands_worker_can_target_patch_pipeline_explicitly() -> None:
assert request.worker_output_mode == "patch"
assert request.pipeline_target.value == "patch"
+
+
+def test_openhands_worker_builds_controlled_request_with_pytest_via_active_python() -> None:
+ service = OpenHandsWorkerService()
+ spec = OpenHandsWorkerJobSpec(
+ job_id="job-5",
+ problem_statement="Keep pytest bound to the active interpreter.",
+ allowed_paths=["tests/test_worker.py"],
+ test_command="pytest -q tests/test_worker.py",
+ )
+
+ request = service.build_controlled_request(spec)
+
+ assert request.test_command == [sys.executable, "-m", "pytest", "-q", "tests/test_worker.py"]
+
+
+def test_openhands_worker_normalizes_python_commands_to_active_interpreter() -> None:
+ service = OpenHandsWorkerService()
+ spec = OpenHandsWorkerJobSpec(
+ job_id="job-6",
+ problem_statement="Compile a scoped business module with the active interpreter.",
+ allowed_paths=["apps/malu/lead_capture.py"],
+ test_command="python -m py_compile apps/malu/lead_capture.py",
+ )
+
+ job = service.build_agent_job_spec(spec)
+ request = service.build_controlled_request(spec)
+
+ assert job.validators[0].command == f"{sys.executable} -m py_compile apps/malu/lead_capture.py"
+ assert request.test_command == [sys.executable, "-m", "py_compile", "apps/malu/lead_capture.py"]
diff --git a/tests/test_openhands_worker_strict_chain.py b/tests/test_openhands_worker_strict_chain.py
index 12a8f61b..e09b8f4a 100644
--- a/tests/test_openhands_worker_strict_chain.py
+++ b/tests/test_openhands_worker_strict_chain.py
@@ -1,9 +1,13 @@
from __future__ import annotations
import json
+import os
from pathlib import Path
import shutil
import sys
+import time
+
+import pytest
from autoresearch.agent_protocol.models import ExecutionPolicy, JobSpec, ValidatorSpec
from autoresearch.core.services.git_promotion_gate import GitPromotionGateService
@@ -100,6 +104,13 @@ def _copy_worker_scripts(repo_root: Path) -> None:
target.chmod(0o755)
+def _write_adapter(repo_root: Path, relative: str, source: str) -> None:
+ target = repo_root / relative
+ target.parent.mkdir(parents=True, exist_ok=True)
+ target.write_text(source, encoding="utf-8")
+ target.chmod(0o755)
+
+
def test_openhands_dry_run_emits_patch_candidate_and_reaches_draft_pr(
tmp_path: Path,
monkeypatch,
@@ -163,3 +174,680 @@ def test_openhands_dry_run_emits_patch_candidate_and_reaches_draft_pr(
patch_text = Path(summary.promotion_patch_uri or "").read_text(encoding="utf-8")
assert "src/generated_worker.py" in patch_text
+
+
+def test_runner_shadow_workspace_blocks_out_of_scope_write_with_permission_error(
+ tmp_path: Path,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True, exist_ok=True)
+ (repo_root / "src" / "__init__.py").write_text("", encoding="utf-8")
+ (repo_root / "src" / "allowed.py").write_text("VALUE = 1\n", encoding="utf-8")
+ (repo_root / "src" / "forbidden.py").write_text("SECRET = 1\n", encoding="utf-8")
+ _write_adapter(
+ repo_root,
+ "drivers/shadow_probe.py",
+ """#!/usr/bin/env python3
+import json
+import os
+from pathlib import Path
+
+workspace = Path(os.environ["AEP_WORKSPACE"])
+result_path = Path(os.environ["AEP_RESULT_PATH"])
+allowed = workspace / "src" / "allowed.py"
+forbidden = workspace / "src" / "forbidden.py"
+
+error = "missing denial"
+try:
+ forbidden.write_text("SECRET = 2\\n", encoding="utf-8")
+except Exception as exc: # pragma: no cover - exercised via runner integration
+ error = f"{type(exc).__name__}: {exc}"
+
+allowed.write_text("VALUE = 2\\n", encoding="utf-8")
+payload = {
+ "protocol_version": "aep/v0",
+ "run_id": "run-shadow-probe",
+ "agent_id": "openhands",
+ "attempt": 1,
+ "status": "succeeded",
+ "summary": error,
+ "changed_paths": ["src/allowed.py"],
+ "output_artifacts": [],
+ "metrics": {"duration_ms": 0, "steps": 0, "commands": 0, "prompt_tokens": None, "completion_tokens": None},
+ "recommended_action": "promote",
+ "error": None,
+}
+result_path.write_text(json.dumps(payload), encoding="utf-8")
+""",
+ )
+ _write_manifest(repo_root, "drivers/shadow_probe.py")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-shadow-probe",
+ agent_id="openhands",
+ task="Only update src/allowed.py.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} -m py_compile src/allowed.py",
+ )
+ ],
+ policy=ExecutionPolicy(
+ allowed_paths=["src/allowed.py"],
+ forbidden_paths=["src/forbidden.py", ".git/**", "logs/**", ".masfactory_runtime/**", "memory/**"],
+ cleanup_on_success=False,
+ ),
+ metadata={"pipeline_target": "patch"},
+ )
+ )
+
+ assert summary.driver_result.status == "succeeded"
+ assert "PermissionError" in summary.driver_result.summary
+ assert (repo_root / "src" / "forbidden.py").read_text(encoding="utf-8") == "SECRET = 1\n"
+ assert "src/forbidden.py" not in summary.driver_result.changed_paths
+
+ repeated = runner.run_job(
+ JobSpec(
+ run_id="run-shadow-probe",
+ agent_id="openhands",
+ task="Only update src/allowed.py.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} -m py_compile src/allowed.py",
+ )
+ ],
+ policy=ExecutionPolicy(
+ allowed_paths=["src/allowed.py"],
+ forbidden_paths=["src/forbidden.py", ".git/**", "logs/**", ".masfactory_runtime/**", "memory/**"],
+ cleanup_on_success=False,
+ ),
+ metadata={"pipeline_target": "patch"},
+ )
+ )
+
+ assert repeated.driver_result.status == "succeeded"
+
+
+def test_runner_shadow_workspace_allows_creating_new_scoped_app_directory_without_unlocking_repo(
+ tmp_path: Path,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True, exist_ok=True)
+ (repo_root / "src" / "__init__.py").write_text("", encoding="utf-8")
+ (repo_root / "src" / "forbidden.py").write_text("SECRET = 1\n", encoding="utf-8")
+ _write_adapter(
+ repo_root,
+ "drivers/new_surface_probe.py",
+ """#!/usr/bin/env python3
+import json
+import os
+from pathlib import Path
+
+workspace = Path(os.environ["AEP_WORKSPACE"])
+result_path = Path(os.environ["AEP_RESULT_PATH"])
+allowed = workspace / "apps" / "malu" / "lead_capture.py"
+forbidden = workspace / "src" / "forbidden.py"
+
+allowed.parent.mkdir(parents=True, exist_ok=True)
+allowed.write_text("PHONE_PATTERN = r'^1[3-9]\\\\d{9}$'\\n", encoding="utf-8")
+
+error = "missing denial"
+try:
+ forbidden.write_text("SECRET = 2\\n", encoding="utf-8")
+except Exception as exc: # pragma: no cover - exercised via runner integration
+ error = f"{type(exc).__name__}: {exc}"
+
+payload = {
+ "protocol_version": "aep/v0",
+ "run_id": "run-new-surface-probe",
+ "agent_id": "openhands",
+ "attempt": 1,
+ "status": "succeeded",
+ "summary": error,
+ "changed_paths": ["apps/malu/lead_capture.py"],
+ "output_artifacts": [],
+ "metrics": {"duration_ms": 0, "steps": 0, "commands": 0, "prompt_tokens": None, "completion_tokens": None},
+ "recommended_action": "promote",
+ "error": None,
+}
+result_path.write_text(json.dumps(payload), encoding="utf-8")
+""",
+ )
+ _write_manifest(repo_root, "drivers/new_surface_probe.py")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-new-surface-probe",
+ agent_id="openhands",
+ task="Create apps/malu/lead_capture.py without touching src/forbidden.py.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} -m py_compile apps/malu/lead_capture.py",
+ )
+ ],
+ policy=ExecutionPolicy(
+ allowed_paths=["apps/malu/**"],
+ forbidden_paths=["src/forbidden.py", ".git/**", "logs/**", ".masfactory_runtime/**", "memory/**"],
+ cleanup_on_success=False,
+ ),
+ metadata={"pipeline_target": "patch"},
+ )
+ )
+
+ assert summary.driver_result.status == "succeeded"
+ assert summary.driver_result.changed_paths == ["apps/malu/lead_capture.py"]
+ assert "PermissionError" in summary.driver_result.summary
+ assert (repo_root / "src" / "forbidden.py").read_text(encoding="utf-8") == "SECRET = 1\n"
+ patch_text = Path(summary.promotion_patch_uri or "").read_text(encoding="utf-8")
+ assert "apps/malu/lead_capture.py" in patch_text
+ assert "PHONE_PATTERN = r'^1[3-9]\\d{9}$'" in patch_text
+
+
+def test_runner_fast_fail_aborts_long_running_syntax_breakage(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True, exist_ok=True)
+ (repo_root / "src" / "__init__.py").write_text("", encoding="utf-8")
+ (repo_root / "src" / "broken_worker.py").write_text("VALUE = 1\n", encoding="utf-8")
+ _write_adapter(
+ repo_root,
+ "drivers/slow_broken_probe.py",
+ """#!/usr/bin/env python3
+import os
+import time
+from pathlib import Path
+
+workspace = Path(os.environ["AEP_WORKSPACE"])
+target = workspace / "src" / "broken_worker.py"
+target.write_text("def broken(:\\n", encoding="utf-8")
+time.sleep(30)
+""",
+ )
+ _write_manifest(repo_root, "drivers/slow_broken_probe.py")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+
+ started = time.perf_counter()
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-fast-fail-probe",
+ agent_id="openhands",
+ task="Update src/broken_worker.py.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} -m py_compile src/broken_worker.py",
+ )
+ ],
+ policy=ExecutionPolicy(
+ timeout_sec=60,
+ allowed_paths=["src/broken_worker.py"],
+ cleanup_on_success=False,
+ ),
+ )
+ )
+ duration = time.perf_counter() - started
+
+ assert duration < 15
+ assert summary.final_status == "failed"
+ assert summary.driver_result.status == "failed"
+ assert summary.driver_result.summary == "adapter aborted by fast-fail probe"
+ assert "SyntaxError" in (summary.driver_result.error or "")
+
+
+def test_runner_stall_watchdog_aborts_no_progress_adapter(
+ tmp_path: Path,
+ monkeypatch,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True, exist_ok=True)
+ (repo_root / "src" / "__init__.py").write_text("", encoding="utf-8")
+ (repo_root / "src" / "idle_worker.py").write_text("VALUE = 1\n", encoding="utf-8")
+ _write_adapter(
+ repo_root,
+ "drivers/idle_probe.py",
+ """#!/usr/bin/env python3
+import time
+
+time.sleep(30)
+""",
+ )
+ _write_manifest(repo_root, "drivers/idle_probe.py")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+ monkeypatch.setattr(runner, "_stall_progress_timeout_sec", lambda timeout_sec: 2)
+
+ started = time.perf_counter()
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-stall-probe",
+ agent_id="openhands",
+ task="Wait forever without writing any files.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} -m py_compile src/idle_worker.py",
+ )
+ ],
+ policy=ExecutionPolicy(
+ timeout_sec=60,
+ allowed_paths=["src/idle_worker.py"],
+ cleanup_on_success=False,
+ ),
+ )
+ )
+ duration = time.perf_counter() - started
+
+ assert duration < 10
+ assert summary.final_status == "failed"
+ assert summary.driver_result.status == "stalled_no_progress"
+ assert summary.driver_result.summary == "adapter stalled after 2s without workspace progress"
+ assert summary.driver_result.error == "no workspace progress for 2s"
+ assert summary.driver_result.metrics.first_scoped_write_ms is None
+
+
+def test_runner_records_first_progress_metrics_for_state_and_scoped_write(tmp_path: Path) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True, exist_ok=True)
+ (repo_root / "src" / "__init__.py").write_text("", encoding="utf-8")
+ (repo_root / "src" / "active_worker.py").write_text("VALUE = 1\n", encoding="utf-8")
+ _write_adapter(
+ repo_root,
+ "drivers/progress_probe.py",
+ """#!/usr/bin/env python3
+import json
+import os
+import time
+from pathlib import Path
+
+workspace = Path(os.environ["AEP_WORKSPACE"])
+result_path = Path(os.environ["AEP_RESULT_PATH"])
+state_dir = workspace / ".openhands-state"
+target = workspace / "src" / "active_worker.py"
+
+time.sleep(1)
+state_dir.mkdir(parents=True, exist_ok=True)
+(state_dir / "heartbeat.json").write_text("{\\"ok\\": true}\\n", encoding="utf-8")
+time.sleep(1.5)
+target.write_text("VALUE = 2\\n", encoding="utf-8")
+time.sleep(2.5)
+payload = {
+ "protocol_version": "aep/v0",
+ "run_id": "run-progress-probe",
+ "agent_id": "openhands",
+ "attempt": 1,
+ "status": "succeeded",
+ "summary": "progress recorded",
+ "changed_paths": ["src/active_worker.py"],
+ "output_artifacts": [],
+ "metrics": {"duration_ms": 0, "steps": 1, "commands": 1, "prompt_tokens": None, "completion_tokens": None},
+ "recommended_action": "promote",
+ "error": None,
+}
+result_path.write_text(json.dumps(payload), encoding="utf-8")
+""",
+ )
+ _write_manifest(repo_root, "drivers/progress_probe.py")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-progress-probe",
+ agent_id="openhands",
+ task="Touch .openhands-state first, then update src/active_worker.py.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} -m py_compile src/active_worker.py",
+ )
+ ],
+ policy=ExecutionPolicy(
+ timeout_sec=60,
+ allowed_paths=["src/active_worker.py"],
+ cleanup_on_success=False,
+ ),
+ )
+ )
+
+ assert summary.final_status == "blocked"
+ assert summary.driver_result.status == "policy_blocked"
+ assert summary.driver_result.metrics.first_progress_ms is not None
+ assert summary.driver_result.metrics.first_scoped_write_ms is not None
+ assert summary.driver_result.metrics.first_state_heartbeat_ms is not None
+ assert summary.driver_result.metrics.first_progress_ms <= summary.driver_result.metrics.first_state_heartbeat_ms
+ assert (
+ summary.driver_result.metrics.first_state_heartbeat_ms
+ <= summary.driver_result.metrics.first_scoped_write_ms
+ )
+
+
+def test_runner_does_not_treat_stdout_as_runtime_heartbeat(
+ tmp_path: Path,
+ monkeypatch,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True, exist_ok=True)
+ (repo_root / "src" / "__init__.py").write_text("", encoding="utf-8")
+ (repo_root / "src" / "chatty_worker.py").write_text("VALUE = 1\n", encoding="utf-8")
+ _write_adapter(
+ repo_root,
+ "drivers/output_heartbeat_probe.py",
+ """#!/usr/bin/env python3
+import json
+import os
+import sys
+import time
+from pathlib import Path
+
+workspace = Path(os.environ["AEP_WORKSPACE"])
+result_path = Path(os.environ["AEP_RESULT_PATH"])
+target = workspace / "src" / "chatty_worker.py"
+
+for step in range(4):
+ print(f"heartbeat {step}", flush=True)
+ time.sleep(1)
+
+target.write_text("VALUE = 2\\n", encoding="utf-8")
+time.sleep(2.5)
+payload = {
+ "protocol_version": "aep/v0",
+ "run_id": "run-output-heartbeat-probe",
+ "agent_id": "openhands",
+ "attempt": 1,
+ "status": "succeeded",
+ "summary": "stdout heartbeat should not keep adapter alive",
+ "changed_paths": ["src/chatty_worker.py"],
+ "output_artifacts": [],
+ "metrics": {"duration_ms": 0, "steps": 1, "commands": 1, "prompt_tokens": None, "completion_tokens": None},
+ "recommended_action": "promote",
+ "error": None,
+}
+result_path.write_text(json.dumps(payload), encoding="utf-8")
+""",
+ )
+ _write_manifest(repo_root, "drivers/output_heartbeat_probe.py")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+ monkeypatch.setattr(runner, "_stall_progress_timeout_sec", lambda timeout_sec: 2)
+
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-output-heartbeat-probe",
+ agent_id="openhands",
+ task="Emit stdout heartbeats before touching the workspace.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} -m py_compile src/chatty_worker.py",
+ )
+ ],
+ policy=ExecutionPolicy(
+ timeout_sec=60,
+ allowed_paths=["src/chatty_worker.py"],
+ cleanup_on_success=False,
+ ),
+ )
+ )
+
+ assert summary.final_status == "failed"
+ assert summary.driver_result.status == "stalled_no_progress"
+ assert summary.driver_result.metrics.first_state_heartbeat_ms is None
+ assert summary.driver_result.metrics.first_scoped_write_ms is None
+
+
+def test_runner_ignores_agent_is_working_spinner_noise(
+ tmp_path: Path,
+ monkeypatch,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True, exist_ok=True)
+ (repo_root / "src" / "__init__.py").write_text("", encoding="utf-8")
+ (repo_root / "src" / "chatty_worker.py").write_text("VALUE = 1\n", encoding="utf-8")
+ _write_adapter(
+ repo_root,
+ "drivers/agent_working_noise_probe.py",
+ """#!/usr/bin/env python3
+import time
+
+for _ in range(10):
+ print("Agent is working", flush=True)
+ time.sleep(0.5)
+""",
+ )
+ _write_manifest(repo_root, "drivers/agent_working_noise_probe.py")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+ monkeypatch.setattr(runner, "_stall_progress_timeout_sec", lambda timeout_sec: 2)
+
+ started = time.perf_counter()
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-agent-working-noise-probe",
+ agent_id="openhands",
+ task="Emit only spinner-like Agent is working noise forever.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} -m py_compile src/chatty_worker.py",
+ )
+ ],
+ policy=ExecutionPolicy(
+ timeout_sec=60,
+ allowed_paths=["src/chatty_worker.py"],
+ cleanup_on_success=False,
+ ),
+ )
+ )
+ duration = time.perf_counter() - started
+
+ assert duration < 8
+ assert summary.final_status == "failed"
+ assert summary.driver_result.status == "stalled_no_progress"
+ assert summary.driver_result.metrics.first_state_heartbeat_ms is None
+ assert summary.driver_result.metrics.first_scoped_write_ms is None
+
+
+def test_runner_kills_process_group_and_persists_summary_for_invalid_log_hang(
+ tmp_path: Path,
+ monkeypatch,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True, exist_ok=True)
+ (repo_root / "src" / "__init__.py").write_text("", encoding="utf-8")
+ (repo_root / "src" / "chatty_worker.py").write_text("VALUE = 1\n", encoding="utf-8")
+ _write_adapter(
+ repo_root,
+ "drivers/process_group_noise_probe.py",
+ """#!/usr/bin/env python3
+import os
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+artifacts_dir = Path(os.environ["AEP_ARTIFACT_DIR"])
+pid_file = artifacts_dir / "child.pid"
+
+child = subprocess.Popen(
+ [
+ sys.executable,
+ "-c",
+ "import time\\nwhile True:\\n print('Agent is working', flush=True)\\n time.sleep(0.5)\\n",
+ ]
+)
+pid_file.write_text(str(child.pid), encoding="utf-8")
+
+while True:
+ print("Agent is working", flush=True)
+ time.sleep(0.5)
+""",
+ )
+ _write_manifest(repo_root, "drivers/process_group_noise_probe.py")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+ monkeypatch.setattr(runner, "_stall_progress_timeout_sec", lambda timeout_sec: 2)
+
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-process-group-noise-probe",
+ agent_id="openhands",
+ task="Spawn a child process that only emits invalid progress noise forever.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} -m py_compile src/chatty_worker.py",
+ )
+ ],
+ policy=ExecutionPolicy(
+ timeout_sec=60,
+ allowed_paths=["src/chatty_worker.py"],
+ cleanup_on_success=False,
+ ),
+ )
+ )
+
+ run_dir = tmp_path / "runtime" / "run-process-group-noise-probe"
+ summary_path = run_dir / "summary.json"
+ pid_path = run_dir / "artifacts" / "child.pid"
+
+ assert summary.final_status == "failed"
+ assert summary.driver_result.status == "stalled_no_progress"
+ assert summary_path.exists()
+ assert pid_path.exists()
+
+ child_pid = int(pid_path.read_text(encoding="utf-8").strip())
+ time.sleep(0.2)
+ with pytest.raises(ProcessLookupError):
+ os.kill(child_pid, 0)
+
+
+def test_runner_ignores_log_heartbeat_after_first_scoped_write(
+ tmp_path: Path,
+ monkeypatch,
+) -> None:
+ repo_root = tmp_path / "repo"
+ repo_root.mkdir()
+ (repo_root / "src").mkdir(parents=True, exist_ok=True)
+ (repo_root / "src" / "__init__.py").write_text("", encoding="utf-8")
+ (repo_root / "src" / "chatty_worker.py").write_text("VALUE = 1\n", encoding="utf-8")
+ _write_adapter(
+ repo_root,
+ "drivers/output_then_spin_probe.py",
+ """#!/usr/bin/env python3
+import os
+import shutil
+import time
+from pathlib import Path
+
+workspace = Path(os.environ["AEP_WORKSPACE"])
+target = workspace / "src" / "chatty_worker.py"
+state_dir = workspace / ".openhands-state"
+
+for step in range(3):
+ state_dir.mkdir(parents=True, exist_ok=True)
+ (state_dir / "heartbeat.json").write_text(f"{{\\"step\\": {step}}}\\n", encoding="utf-8")
+ print(f"warmup heartbeat {step}", flush=True)
+ time.sleep(1)
+
+target.write_text("VALUE = 2\\n", encoding="utf-8")
+shutil.rmtree(state_dir)
+
+step = 0
+while True:
+ print(f"post-write heartbeat {step}", flush=True)
+ step += 1
+ time.sleep(1)
+""",
+ )
+ _write_manifest(repo_root, "drivers/output_then_spin_probe.py")
+
+ runner = AgentExecutionRunner(
+ repo_root=repo_root,
+ runtime_root=tmp_path / "runtime",
+ manifests_dir=repo_root / "configs" / "agents",
+ )
+ monkeypatch.setattr(runner, "_stall_progress_timeout_sec", lambda timeout_sec: 2)
+
+ started = time.perf_counter()
+ summary = runner.run_job(
+ JobSpec(
+ run_id="run-output-then-spin-probe",
+ agent_id="openhands",
+ task="Emit stdout heartbeats, write once, then spin forever without state updates.",
+ validators=[
+ ValidatorSpec(
+ id="worker.test_command",
+ kind="command",
+ command=f"{sys.executable} -m py_compile src/chatty_worker.py",
+ )
+ ],
+ policy=ExecutionPolicy(
+ timeout_sec=60,
+ allowed_paths=["src/chatty_worker.py"],
+ cleanup_on_success=False,
+ ),
+ )
+ )
+ duration = time.perf_counter() - started
+
+ assert duration < 12
+ assert summary.final_status == "failed"
+ assert summary.driver_result.status == "stalled_no_progress"
+ assert summary.driver_result.metrics.first_state_heartbeat_ms is not None
+ assert summary.driver_result.metrics.first_scoped_write_ms is not None
diff --git a/tests/test_panel_security.py b/tests/test_panel_security.py
index 9fd950f7..81a5eb55 100644
--- a/tests/test_panel_security.py
+++ b/tests/test_panel_security.py
@@ -11,8 +11,10 @@
from fastapi.testclient import TestClient
import pytest
+from autoresearch.agent_protocol.models import DriverResult, JobSpec, RunSummary, ValidationReport
from autoresearch.api.dependencies import (
get_approval_store_service,
+ get_autoresearch_planner_service,
get_capability_provider_registry,
get_claude_agent_service,
get_openclaw_compat_service,
@@ -24,10 +26,12 @@
from autoresearch.core.adapters import CapabilityProviderDescriptorRead, CapabilityProviderRegistry
from autoresearch.core.adapters.contracts import CapabilityDomain
from autoresearch.core.services.approval_store import ApprovalStoreService
+from autoresearch.core.services.autoresearch_planner import AutoResearchPlannerService
from autoresearch.core.services.claude_agents import ClaudeAgentService
from autoresearch.core.services.openclaw_compat import OpenClawCompatService
from autoresearch.core.services.panel_access import PanelAccessService, assert_safe_bind_host
from autoresearch.core.services.panel_audit import PanelAuditService
+from autoresearch.shared.autoresearch_planner_contract import AutoResearchPlanRead, AutoResearchPlannerRequest
from autoresearch.shared.models import (
ApprovalRequestCreateRequest,
ApprovalRequestRead,
@@ -42,6 +46,7 @@
class StubTelegramNotifier:
def __init__(self) -> None:
+ self.messages: list[dict[str, object]] = []
self.manual_events: list[dict[str, str]] = []
self.status_events: list[dict[str, str]] = []
@@ -49,6 +54,24 @@ def __init__(self) -> None:
def enabled(self) -> bool:
return True
+ def send_message(
+ self,
+ *,
+ chat_id: str,
+ text: str,
+ disable_web_page_preview: bool = True,
+ reply_markup: dict[str, object] | None = None,
+ ) -> bool:
+ self.messages.append(
+ {
+ "chat_id": chat_id,
+ "text": text,
+ "disable_web_page_preview": disable_web_page_preview,
+ "reply_markup": reply_markup,
+ }
+ )
+ return True
+
def notify_manual_action(self, *, chat_id: str, entry: PanelAuditLogRead, run_status: str) -> bool:
self.manual_events.append(
{
@@ -97,9 +120,32 @@ def describe(self) -> CapabilityProviderDescriptorRead:
return self._descriptor
+def _write(path: Path, content: str) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(content, encoding="utf-8")
+
+
+def _successful_run_summary(job: JobSpec) -> RunSummary:
+ return RunSummary(
+ run_id=job.run_id,
+ final_status="ready_for_promotion",
+ driver_result=DriverResult(
+ run_id=job.run_id,
+ agent_id=job.agent_id,
+ status="succeeded",
+ summary="panel dispatch completed",
+ changed_paths=list(job.policy.allowed_paths),
+ recommended_action="promote",
+ ),
+ validation=ValidationReport(run_id=job.run_id, passed=True),
+ promotion_patch_uri="/tmp/panel-dispatch.patch",
+ )
+
+
@pytest.fixture
def panel_client(tmp_path: Path) -> TestClient:
db_path = tmp_path / "panel-security.sqlite3"
+ planner_repo_root = tmp_path / "planner-repo"
openclaw_service = OpenClawCompatService(
repository=SQLiteModelRepository(
db_path=db_path,
@@ -141,6 +187,15 @@ def panel_client(tmp_path: Path) -> TestClient:
capability_registry = CapabilityProviderRegistry()
capability_registry.register(_StubCapabilityProvider("apple-calendar", CapabilityDomain.CALENDAR, "Apple Calendar"))
capability_registry.register(_StubCapabilityProvider("openclaw-skills", CapabilityDomain.SKILL, "OpenClaw Skills"))
+ planner_service = AutoResearchPlannerService(
+ repository=SQLiteModelRepository(
+ db_path=db_path,
+ table_name="autoresearch_plans_panel_it",
+ model_cls=AutoResearchPlanRead,
+ ),
+ repo_root=planner_repo_root,
+ dispatch_runner=_successful_run_summary,
+ )
notifier = StubTelegramNotifier()
app.dependency_overrides[get_openclaw_compat_service] = lambda: openclaw_service
@@ -148,6 +203,7 @@ def panel_client(tmp_path: Path) -> TestClient:
app.dependency_overrides[get_panel_access_service] = lambda: panel_access
app.dependency_overrides[get_panel_audit_service] = lambda: panel_audit
app.dependency_overrides[get_approval_store_service] = lambda: approval_service
+ app.dependency_overrides[get_autoresearch_planner_service] = lambda: planner_service
app.dependency_overrides[get_capability_provider_registry] = lambda: capability_registry
app.dependency_overrides[get_telegram_notifier_service] = lambda: notifier
@@ -156,6 +212,8 @@ def panel_client(tmp_path: Path) -> TestClient:
setattr(client, "_claude", claude_service)
setattr(client, "_panel_access", panel_access)
setattr(client, "_approval_store", approval_service)
+ setattr(client, "_planner", planner_service)
+ setattr(client, "_planner_repo_root", planner_repo_root)
setattr(client, "_capability_registry", capability_registry)
setattr(client, "_notifier", notifier)
yield client
@@ -185,6 +243,8 @@ def test_panel_view_contains_capability_section(panel_client: TestClient) -> Non
assert "capability_providers" in response.text
assert "待审批" in response.text
assert "pending_approvals" in response.text
+ assert "AutoResearch Plans" in response.text
+ assert "pending_autoresearch_plans" in response.text
def test_panel_state_is_scoped_by_telegram_uid(panel_client: TestClient) -> None:
@@ -409,6 +469,63 @@ def test_panel_rejects_tampered_telegram_init_data(panel_client: TestClient) ->
assert response.status_code == 401
+def test_panel_lists_and_dispatches_autoresearch_plans(panel_client: TestClient) -> None:
+ panel_access = getattr(panel_client, "_panel_access")
+ planner = getattr(panel_client, "_planner")
+ planner_repo_root = getattr(panel_client, "_planner_repo_root")
+ notifier = getattr(panel_client, "_notifier")
+
+ _write(
+ planner_repo_root / "src" / "autoresearch" / "core" / "services" / "panel_target.py",
+ "\n".join(
+ [
+ "def panel_target() -> bool:",
+ " # FIXME: add regression coverage for panel dispatch",
+ " return True",
+ "",
+ ]
+ ),
+ )
+ plan = planner.create(AutoResearchPlannerRequest(telegram_uid="9527"))
+
+ token = _token_from_magic_link(panel_access.create_magic_link("9527").url)
+ headers = {"x-autoresearch-panel-token": token}
+
+ state = panel_client.get("/api/v1/panel/state", headers=headers)
+ assert state.status_code == 200
+ pending_plans = state.json()["pending_autoresearch_plans"]
+ assert len(pending_plans) == 1
+ assert pending_plans[0]["plan_id"] == plan.plan_id
+ assert pending_plans[0]["selected_candidate"]["source_path"] == (
+ "src/autoresearch/core/services/panel_target.py"
+ )
+
+ dispatch = panel_client.post(
+ f"/api/v1/panel/autoresearch/plans/{plan.plan_id}/dispatch",
+ headers=headers,
+ json={"note": "ship it", "metadata": {"source": "panel-test"}},
+ )
+ assert dispatch.status_code == 200
+ assert dispatch.json()["dispatch_status"] == "dispatching"
+
+ stored = planner.get(plan.plan_id)
+ assert stored is not None
+ assert stored.dispatch_status.value == "dispatched"
+ assert stored.run_summary is not None
+ assert stored.run_summary.final_status == "ready_for_promotion"
+
+ refreshed = panel_client.get("/api/v1/panel/state", headers=headers)
+ assert refreshed.status_code == 200
+ assert refreshed.json()["pending_autoresearch_plans"] == []
+
+ audit = panel_client.get("/api/v1/panel/audit/logs?limit=20", headers=headers)
+ assert audit.status_code == 200
+ assert any(item["action"] == "dispatch" for item in audit.json())
+
+ assert any("[AutoResearch Dispatch]" in str(message["text"]) for message in notifier.messages)
+ assert any(message["chat_id"] == "9527" for message in notifier.messages)
+
+
def _token_from_magic_link(url: str) -> str:
parsed = urlparse(url)
return parse_qs(parsed.query)["token"][0]
diff --git a/tests/test_remote_run_contract.py b/tests/test_remote_run_contract.py
new file mode 100644
index 00000000..982d15fd
--- /dev/null
+++ b/tests/test_remote_run_contract.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+import importlib.util
+import json
+import sys
+from pathlib import Path
+
+import pytest
+from pydantic import ValidationError
+
+from autoresearch.agent_protocol.models import JobSpec
+from autoresearch.shared.remote_run_contract import DispatchLane, RemoteRunRecord, RemoteTaskSpec, RemoteRunSummary
+
+
+def _load_export_module():
+ script_path = Path(__file__).resolve().parents[1] / "scripts" / "export_remote_run_schemas.py"
+ spec = importlib.util.spec_from_file_location("export_remote_run_schemas", script_path)
+ assert spec is not None
+ assert spec.loader is not None
+ module = importlib.util.module_from_spec(spec)
+ sys.modules[spec.name] = module
+ spec.loader.exec_module(module)
+ return module
+
+
+def test_remote_contract_rejects_absolute_artifact_paths() -> None:
+ with pytest.raises(ValidationError):
+ RemoteRunRecord(
+ run_id="run-contract",
+ artifact_paths={"summary": "/tmp/summary.json"},
+ )
+
+
+def test_remote_contract_rejects_parent_traversal_artifact_paths() -> None:
+ with pytest.raises(ValidationError):
+ RemoteRunSummary(
+ run_id="run-contract",
+ artifact_paths={"summary": "../outside.json"},
+ )
+
+
+def test_remote_task_spec_rejects_invalid_lane() -> None:
+ with pytest.raises(ValidationError):
+ RemoteTaskSpec(
+ run_id="run-contract",
+ requested_lane=DispatchLane.LOCAL,
+ lane="bogus",
+ runtime_mode="day",
+ job=JobSpec(run_id="run-contract", agent_id="openhands", task="demo"),
+ )
+
+
+def test_remote_task_spec_requires_run_id() -> None:
+ with pytest.raises(ValidationError):
+ RemoteTaskSpec(
+ run_id="",
+ job=JobSpec(run_id="run-contract", agent_id="openhands", task="demo"),
+ )
+
+
+def test_exported_remote_schemas_match_model_json_schema(tmp_path: Path) -> None:
+ module = _load_export_module()
+ written = module.export_schemas(tmp_path)
+
+ task_schema = json.loads((tmp_path / "task_run.schema.json").read_text(encoding="utf-8"))
+ summary_schema = json.loads((tmp_path / "run_summary.schema.json").read_text(encoding="utf-8"))
+
+ assert {path.name for path in written} == {"task_run.schema.json", "run_summary.schema.json"}
+ assert task_schema == RemoteTaskSpec.model_json_schema()
+ assert summary_schema == RemoteRunSummary.model_json_schema()
diff --git a/tests/test_runtime_select_mode.py b/tests/test_runtime_select_mode.py
new file mode 100644
index 00000000..fa932875
--- /dev/null
+++ b/tests/test_runtime_select_mode.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+from autoresearch.core.runtime.select_mode import load_mode_policy, select_mode
+from autoresearch.shared.remote_run_contract import DispatchLane
+
+
+def test_load_runtime_mode_policies() -> None:
+ day = load_mode_policy(mode_name="day")
+ night = load_mode_policy(mode_name="night")
+
+ assert day.preferred_lane is DispatchLane.LOCAL
+ assert day.allow_draft_pr is False
+ assert night.preferred_lane is DispatchLane.REMOTE
+ assert night.allow_exploration is True
+
+
+def test_select_mode_falls_back_to_local_when_remote_is_unavailable() -> None:
+ selected = select_mode(requested_mode="night", remote_available=False)
+
+ assert selected.requested_lane is DispatchLane.REMOTE
+ assert selected.lane is DispatchLane.LOCAL
+ assert selected.fallback_reason is not None
diff --git a/tests/test_sync_openclaw_upstream.py b/tests/test_sync_openclaw_upstream.py
new file mode 100644
index 00000000..4ef8ca5b
--- /dev/null
+++ b/tests/test_sync_openclaw_upstream.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import os
+from pathlib import Path
+import subprocess
+
+
+def _git(repo: Path, *args: str, cwd: Path | None = None) -> str:
+ env = os.environ.copy()
+ env.update(
+ {
+ "GIT_AUTHOR_NAME": "Codex Tests",
+ "GIT_AUTHOR_EMAIL": "codex-tests@example.com",
+ "GIT_COMMITTER_NAME": "Codex Tests",
+ "GIT_COMMITTER_EMAIL": "codex-tests@example.com",
+ }
+ )
+ completed = subprocess.run(
+ ["git", *args],
+ cwd=str(cwd or repo),
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ assert completed.returncode == 0, completed.stderr or completed.stdout
+ return completed.stdout.strip()
+
+
+def _commit(repo: Path, rel_path: str, content: str, message: str) -> None:
+ target = repo / rel_path
+ target.parent.mkdir(parents=True, exist_ok=True)
+ target.write_text(content, encoding="utf-8")
+ _git(repo, "add", rel_path)
+ _git(repo, "commit", "-m", message)
+
+
+def test_sync_openclaw_upstream_script_cleans_temp_worktree(tmp_path: Path) -> None:
+ repo_root = Path(__file__).resolve().parents[1]
+ upstream_repo = tmp_path / "upstream"
+ _git(tmp_path, "init", "-b", "main", str(upstream_repo), cwd=tmp_path)
+ _commit(upstream_repo, "README.md", "# upstream\n", "docs: bootstrap repo")
+ _commit(upstream_repo, "extensions/line/src/channel.ts", "export const ok = true;\n", "fix: line health check")
+
+ workspace_root = tmp_path / "workspace"
+ env = os.environ.copy()
+ env.update(
+ {
+ "OPENCLAW_UPSTREAM_URL": upstream_repo.resolve().as_uri(),
+ "OPENCLAW_SYNC_WORKSPACE_ROOT": str(workspace_root),
+ "OPENCLAW_SYNC_MAX_COMMITS": "2",
+ }
+ )
+
+ completed = subprocess.run(
+ ["bash", str(repo_root / "scripts" / "sync_openclaw_upstream.sh")],
+ cwd=repo_root,
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert completed.returncode == 0, completed.stderr or completed.stdout
+ assert "[sync] latest commit" in completed.stdout
+ assert "fix: line health check" in completed.stdout
+ assert "analysis complete; cleaning" in completed.stdout
+ assert not list(workspace_root.glob("openclaw-upstream.*"))
diff --git a/tests/test_upstream_watcher.py b/tests/test_upstream_watcher.py
new file mode 100644
index 00000000..b00efcfa
--- /dev/null
+++ b/tests/test_upstream_watcher.py
@@ -0,0 +1,94 @@
+from __future__ import annotations
+
+import os
+from pathlib import Path
+import subprocess
+
+from autoresearch.core.services.upstream_watcher import UpstreamWatcherService
+from autoresearch.shared.autoresearch_planner_contract import UpstreamWatchDecision
+
+
+def _git(repo: Path, *args: str, cwd: Path | None = None) -> str:
+ env = os.environ.copy()
+ env.update(
+ {
+ "GIT_AUTHOR_NAME": "Codex Tests",
+ "GIT_AUTHOR_EMAIL": "codex-tests@example.com",
+ "GIT_COMMITTER_NAME": "Codex Tests",
+ "GIT_COMMITTER_EMAIL": "codex-tests@example.com",
+ }
+ )
+ completed = subprocess.run(
+ ["git", *args],
+ cwd=str(cwd or repo),
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ assert completed.returncode == 0, completed.stderr or completed.stdout
+ return completed.stdout.strip()
+
+
+def _commit(repo: Path, rel_path: str, content: str, message: str) -> None:
+ target = repo / rel_path
+ target.parent.mkdir(parents=True, exist_ok=True)
+ target.write_text(content, encoding="utf-8")
+ _git(repo, "add", rel_path)
+ _git(repo, "commit", "-m", message)
+
+
+def _init_upstream_repo(tmp_path: Path) -> Path:
+ repo = tmp_path / "upstream"
+ _git(tmp_path, "init", "-b", "main", str(repo), cwd=tmp_path)
+ _commit(repo, "README.md", "# upstream\n", "docs: bootstrap repo")
+ return repo
+
+
+def test_upstream_watcher_skips_channel_only_updates_and_cleans_workspace(tmp_path: Path) -> None:
+ upstream_repo = _init_upstream_repo(tmp_path)
+ _commit(upstream_repo, "extensions/line/src/channel.ts", "export const ok = true;\n", "fix: line health check")
+ _commit(
+ upstream_repo,
+ "test/helpers/extensions/zalo-lifecycle.ts",
+ "export const helper = true;\n",
+ "test: harden zalo lifecycle",
+ )
+ _commit(upstream_repo, "CHANGELOG.md", "- line and zalo hardening\n", "docs: changelog refresh")
+
+ workspace_root = tmp_path / "workspace"
+ service = UpstreamWatcherService(
+ upstream_url=upstream_repo.resolve().as_uri(),
+ workspace_root=workspace_root,
+ max_commits=3,
+ )
+
+ result = service.inspect()
+
+ assert result.decision is UpstreamWatchDecision.SKIP
+ assert result.cleaned_up is True
+ assert result.latest_commit_title == "docs: changelog refresh"
+ assert "extension:line" in result.focus_areas
+ assert "extension:zalo-lifecycle.ts" not in result.focus_areas
+ assert result.relevant_paths == []
+ assert not list(workspace_root.glob("openclaw-upstream.*"))
+
+
+def test_upstream_watcher_flags_review_when_core_paths_change(tmp_path: Path) -> None:
+ upstream_repo = _init_upstream_repo(tmp_path)
+ _commit(upstream_repo, "src/runtime/core.py", "def boot() -> str:\n return 'ok'\n", "feat: tweak runtime core")
+ _commit(upstream_repo, "extensions/line/src/channel.ts", "export const ok = true;\n", "fix: line health check")
+
+ workspace_root = tmp_path / "workspace"
+ service = UpstreamWatcherService(
+ upstream_url=upstream_repo.resolve().as_uri(),
+ workspace_root=workspace_root,
+ max_commits=2,
+ )
+
+ result = service.inspect()
+
+ assert result.decision is UpstreamWatchDecision.REVIEW
+ assert "src/runtime/core.py" in result.relevant_paths
+ assert result.cleaned_up is True
+ assert not list(workspace_root.glob("openclaw-upstream.*"))