diff --git a/skills/pm/SKILL.md b/skills/pm/SKILL.md index 8a039ae4..0d053957 100644 --- a/skills/pm/SKILL.md +++ b/skills/pm/SKILL.md @@ -58,15 +58,8 @@ After every MCP response, do these three things: Print the MCP content text to the user first. -Then check: does `meta.ask_user_question` exist? - -- **YES** → Pass it directly to `AskUserQuestion`: - ``` - AskUserQuestion(questions=[meta.ask_user_question]) - ``` - Do NOT modify it. Do NOT add options. Do NOT rephrase the question. - -- **NO** → This is an interview question. Use `AskUserQuestion` with `meta.question` and generate 2-3 suggested answers. +Then use `AskUserQuestion` with `meta.question` and generate 2-3 suggested answers. +Do not wait for `meta.ask_user_question` — the PM backend does not emit that field. **C. Relay answer back:** @@ -94,10 +87,18 @@ Arguments: ### Step 5: Copy to Clipboard -After generation, read the pm.md file from `meta.pm_path` and copy its contents to the clipboard: +After generation, read the pm.md file from `meta.pm_path` and copy its contents to the clipboard when a local clipboard tool exists: ```bash -cat | pbcopy +if command -v pbcopy >/dev/null 2>&1; then + cat | pbcopy +elif command -v wl-copy >/dev/null 2>&1; then + cat | wl-copy +elif command -v xclip >/dev/null 2>&1; then + cat | xclip -selection clipboard +else + echo "No clipboard tool found; skipping clipboard copy." +fi ``` ### Step 6: Show Result & Next Step @@ -106,7 +107,8 @@ Show the following to the user: ``` PM document saved: -(Clipboard에 복사되었습니다) +If Step 5 copied successfully, also show: + (Copied to clipboard) Next step: ooo interview diff --git a/src/ouroboros/bigbang/pm_interview.py b/src/ouroboros/bigbang/pm_interview.py index ab8e9810..dea2b454 100644 --- a/src/ouroboros/bigbang/pm_interview.py +++ b/src/ouroboros/bigbang/pm_interview.py @@ -97,6 +97,7 @@ # Model for extraction (uses same as interview for consistency) _FALLBACK_MODEL = "claude-opus-4-6" +_MAX_AUTO_ADVANCE_QUESTIONS = 8 @dataclass @@ -458,93 +459,109 @@ async def ask_next_question( Returns: Result containing the (possibly reframed) question or error. """ - # Generate question via inner engine - question_result = await self.inner.ask_next_question(state) + auto_advanced_questions = 0 - if question_result.is_err: - return question_result + while True: + # Generate question via inner engine + question_result = await self.inner.ask_next_question(state) - question = question_result.value + if question_result.is_err: + return question_result - # Classify the question - context = self._build_interview_context(state) - classify_result = await self.classifier.classify( - question=question, - interview_context=context, - ) + question = question_result.value - if classify_result.is_err: - # Classification failed — return original question (safe fallback) - log.warning("pm.classification_failed", question=question[:100]) - return question_result - - classification = classify_result.value - self.classifications.append(classification) + # Classify the question + context = self._build_interview_context(state) + classify_result = await self.classifier.classify( + question=question, + interview_context=context, + ) - output_type = classification.output_type + if classify_result.is_err: + # Classification failed — return original question (safe fallback) + log.warning("pm.classification_failed", question=question[:100]) + return question_result - if output_type == ClassifierOutputType.DEFERRED: - # Track as deferred item and generate a new question - self.deferred_items.append(classification.original_question) - log.info( - "pm.question_deferred", - question=classification.original_question[:100], - reasoning=classification.reasoning, - output_type=output_type, - ) - # Feed an automatic response back to the inner InterviewEngine - # so the round is properly recorded and the engine advances. - # This prevents the inner engine from re-generating similar - # technical questions it doesn't know were already handled. - await self.record_response( - state, - user_response="[Deferred to development phase] " - "This technical decision will be addressed during the " - "development interview.", - question=classification.original_question, - ) - # Recursively ask for the next real question - return await self.ask_next_question(state) + classification = classify_result.value + self.classifications.append(classification) - if output_type == ClassifierOutputType.DECIDE_LATER: - # Auto-answer with placeholder — no PM interaction needed - placeholder = classification.placeholder_response - self.decide_later_items.append(classification.original_question) - log.info( - "pm.question_decide_later", - question=classification.original_question[:100], - placeholder=placeholder[:100], - reasoning=classification.reasoning, - ) - # Record the placeholder as the response so the interview - # engine advances its round count - await self.record_response( - state, - user_response=f"[Decide later] {placeholder}", - question=classification.original_question, - ) - # Recursively ask for the next real question - return await self.ask_next_question(state) + output_type = classification.output_type - if output_type == ClassifierOutputType.REFRAMED: - # Use the reframed version and track the mapping - reframed = classification.question_for_pm - self._reframe_map[reframed] = classification.original_question - log.info( - "pm.question_reframed", - original=classification.original_question[:100], - reframed=reframed[:100], - output_type=output_type, - ) - return Result.ok(reframed) + if output_type == ClassifierOutputType.DEFERRED: + self.deferred_items.append(classification.original_question) + log.info( + "pm.question_deferred", + question=classification.original_question[:100], + reasoning=classification.reasoning, + output_type=output_type, + ) + record_result = await self.record_response( + state, + user_response="[Deferred to development phase] " + "This technical decision will be addressed during the " + "development interview.", + question=classification.original_question, + ) + if record_result.is_err: + return Result.err(record_result.error) + state = record_result.value + elif output_type == ClassifierOutputType.DECIDE_LATER: + placeholder = classification.placeholder_response + self.decide_later_items.append(classification.original_question) + log.info( + "pm.question_decide_later", + question=classification.original_question[:100], + placeholder=placeholder[:100], + reasoning=classification.reasoning, + ) + record_result = await self.record_response( + state, + user_response=f"[Decide later] {placeholder}", + question=classification.original_question, + ) + if record_result.is_err: + return Result.err(record_result.error) + state = record_result.value + elif output_type == ClassifierOutputType.REFRAMED: + # Use the reframed version and track the mapping + reframed = classification.question_for_pm + self._reframe_map[reframed] = classification.original_question + log.info( + "pm.question_reframed", + original=classification.original_question[:100], + reframed=reframed[:100], + output_type=output_type, + ) + return Result.ok(reframed) + else: + # PASSTHROUGH — planning question forwarded unchanged to the PM + log.debug( + "pm.question_passthrough", + question=classification.original_question[:100], + output_type=output_type, + ) + return Result.ok(classification.question_for_pm) - # PASSTHROUGH — planning question forwarded unchanged to the PM - log.debug( - "pm.question_passthrough", - question=classification.original_question[:100], - output_type=output_type, - ) - return Result.ok(classification.question_for_pm) + auto_advanced_questions += 1 + if auto_advanced_questions >= _MAX_AUTO_ADVANCE_QUESTIONS: + log.warning( + "pm.question_auto_advance_limit_exceeded", + interview_id=state.interview_id, + auto_advanced_questions=auto_advanced_questions, + deferred_count=len(self.deferred_items), + decide_later_count=len(self.decide_later_items), + ) + return Result.err( + ProviderError( + "PM interview skipped too many non-PM questions without " + "finding a PM-answerable follow-up. Please retry.", + details={ + "auto_advanced_questions": auto_advanced_questions, + "deferred_count": len(self.deferred_items), + "decide_later_count": len(self.decide_later_items), + }, + ) + ) async def record_response( self, diff --git a/src/ouroboros/cli/commands/pm.py b/src/ouroboros/cli/commands/pm.py index 4d2f8ec2..06968ff5 100644 --- a/src/ouroboros/cli/commands/pm.py +++ b/src/ouroboros/cli/commands/pm.py @@ -316,7 +316,16 @@ async def _run_pm_interview( output_dir: Optional output directory for the generated PM document. """ from ouroboros.bigbang.pm_interview import PMInterviewEngine - from ouroboros.providers.litellm_adapter import LiteLLMAdapter + + try: + from ouroboros.providers.litellm_adapter import LiteLLMAdapter + except ImportError: + print_error( + "litellm is required for the PM command but is not installed.\n" + " Install with: uv tool install --with litellm ouroboros-ai\n" + " Or: pip install 'ouroboros-ai[litellm]'" + ) + raise typer.Exit(code=1) adapter = LiteLLMAdapter() engine = PMInterviewEngine.create(llm_adapter=adapter, model=model) diff --git a/src/ouroboros/mcp/tools/pm_handler.py b/src/ouroboros/mcp/tools/pm_handler.py index 9910fd60..6bde7a22 100644 --- a/src/ouroboros/mcp/tools/pm_handler.py +++ b/src/ouroboros/mcp/tools/pm_handler.py @@ -53,6 +53,8 @@ _DATA_DIR = Path.home() / ".ouroboros" / "data" +_INTERVIEW_LLM_MAX_TURNS = 5 +_INTERVIEW_LLM_TIMEOUT_SECONDS = 90.0 def _meta_path(session_id: str, data_dir: Path | None = None) -> Path: @@ -305,16 +307,24 @@ def definition(self) -> MCPToolDefinition: ), ) - def _get_engine(self) -> PMInterviewEngine: + def _get_engine(self, *, cwd: str | None = None) -> PMInterviewEngine: """Return the injected engine or create a new one using the server's configured backend.""" if self.pm_engine is not None: return self.pm_engine - adapter = self.llm_adapter or create_llm_adapter( - backend=self.llm_backend, - max_turns=1, - use_case="interview", - allowed_tools=[], - ) + adapter_kwargs: dict[str, Any] = { + "backend": self.llm_backend, + "max_turns": _INTERVIEW_LLM_MAX_TURNS, + "use_case": "interview", + "allowed_tools": [], + "timeout": _INTERVIEW_LLM_TIMEOUT_SECONDS, + } + if cwd is not None: + adapter_kwargs["cwd"] = cwd + if self.llm_backend is not None: + adapter_kwargs["backend"] = self.llm_backend + adapter = create_llm_adapter(**adapter_kwargs) + else: + adapter = self.llm_adapter or create_llm_adapter(**adapter_kwargs) model = get_clarification_model(self.llm_backend) return PMInterviewEngine.create( llm_adapter=adapter, @@ -344,7 +354,15 @@ async def handle( # Auto-detect action from parameter presence (AC 13) action = _detect_action(arguments) - engine = self._get_engine() + if isinstance(initial_context, str) and not initial_context.strip() and action == "start": + return Result.err( + MCPToolError( + "initial_context must be a non-empty string to start a PM interview", + tool_name="ouroboros_pm_interview", + ) + ) + + engine = self._get_engine(cwd=cwd) try: # ── Generate PM seed ────────────────────────────────── @@ -771,7 +789,6 @@ async def _handle_answer( ) state = load_result.value - # Restore PM meta into engine meta = _load_pm_meta(session_id, self.data_dir) if meta: engine.restore_meta(meta) diff --git a/src/ouroboros/providers/claude_code_adapter.py b/src/ouroboros/providers/claude_code_adapter.py index 8c21d198..5aeda15e 100644 --- a/src/ouroboros/providers/claude_code_adapter.py +++ b/src/ouroboros/providers/claude_code_adapter.py @@ -89,9 +89,11 @@ def __init__( self, permission_mode: str = "default", cli_path: str | Path | None = None, + cwd: str | Path | None = None, allowed_tools: list[str] | None = None, max_turns: int = 1, on_message: Callable[[str, str], None] | None = None, + timeout: float | None = None, ) -> None: """Initialize Claude Code adapter. @@ -102,23 +104,34 @@ def __init__( cli_path: Path to the Claude CLI binary. If not provided, checks OUROBOROS_CLI_PATH env var, then falls back to SDK's bundled CLI. - allowed_tools: List of tools to allow. None means no tools. - For interview mode with codebase access, use ["Read", "Glob", "Grep"]. + cwd: Working directory passed to the Claude Agent SDK. + allowed_tools: Explicit allow-list for Claude tools. ``None`` keeps + the default permissive mode while still blocking dangerous tools. + Use ``[]`` to forbid all Claude tools. max_turns: Maximum turns for the conversation. Default 1 for single-response completions (most MCP use cases). on_message: Callback for streaming messages. Called with (type, content): - ("thinking", "content") for agent reasoning - ("tool", "tool_name") for tool usage + timeout: Optional application-level timeout in seconds for a + single completion request. When set, aborts before outer + transport timeouts and returns a ProviderError. """ self._permission_mode: str = permission_mode self._cli_path: Path | None = self._resolve_cli_path(cli_path) - self._allowed_tools: list[str] = allowed_tools or [] + self._cwd: str = str(Path(cwd).expanduser()) if cwd is not None else os.getcwd() + self._allowed_tools: list[str] | None = ( + list(allowed_tools) if allowed_tools is not None else None + ) self._max_turns: int = max_turns self._on_message: Callable[[str, str], None] | None = on_message + self._timeout: float | None = timeout if timeout and timeout > 0 else None log.info( "claude_code_adapter.initialized", permission_mode=permission_mode, cli_path=str(self._cli_path) if self._cli_path else None, + cwd=self._cwd, + timeout_seconds=self._timeout, ) def _resolve_cli_path(self, cli_path: str | Path | None) -> Path | None: @@ -241,9 +254,15 @@ async def complete( for attempt in range(_MAX_RETRIES): try: - result = await self._execute_single_request( - prompt, config, system_prompt=system_prompt - ) + if self._timeout is None: + result = await self._execute_single_request( + prompt, config, system_prompt=system_prompt + ) + else: + async with asyncio.timeout(self._timeout): + result = await self._execute_single_request( + prompt, config, system_prompt=system_prompt + ) if result.is_ok: if attempt > 0: @@ -271,6 +290,22 @@ async def complete( # Non-retryable error return result + except TimeoutError: + log.warning( + "claude_code_adapter.request_timed_out", + timeout_seconds=self._timeout, + attempt=attempt + 1, + ) + return Result.err( + ProviderError( + message=f"Claude Code request timed out after {self._timeout:.1f}s", + details={ + "timed_out": True, + "timeout_seconds": self._timeout, + "attempt": attempt + 1, + }, + ) + ) except Exception as e: error_str = str(e) error_type = type(e).__name__ @@ -386,16 +421,17 @@ def _stderr_callback(line: str) -> None: log.debug("claude_code_adapter.stderr", line=line[:200]) options_kwargs: dict = { - "allowed_tools": self._allowed_tools if self._allowed_tools is not None else [], "disallowed_tools": disallowed, "max_turns": self._max_turns, # Allow MCP and other ~/.claude/ settings to be inherited "permission_mode": self._permission_mode, - "cwd": os.getcwd(), + "cwd": self._cwd, "cli_path": self._cli_path, "stderr": _stderr_callback, "env": env_overrides, } + if self._allowed_tools is not None: + options_kwargs["allowed_tools"] = self._allowed_tools # Pass model from CompletionConfig if specified # "default" is not a valid SDK model — treat it as None (use SDK default) diff --git a/src/ouroboros/providers/codex_cli_adapter.py b/src/ouroboros/providers/codex_cli_adapter.py index f67908d9..5bb438e6 100644 --- a/src/ouroboros/providers/codex_cli_adapter.py +++ b/src/ouroboros/providers/codex_cli_adapter.py @@ -81,7 +81,7 @@ def __init__( self._cli_path = self._resolve_cli_path(cli_path) self._cwd = str(Path(cwd).expanduser()) if cwd is not None else os.getcwd() self._permission_mode = self._resolve_permission_mode(permission_mode) - self._allowed_tools = allowed_tools or [] + self._allowed_tools = list(allowed_tools) if allowed_tools is not None else None self._max_turns = max_turns self._on_message = on_message self._max_retries = max_retries @@ -150,12 +150,22 @@ def _build_prompt(self, messages: list[Message]) -> str: "If you need tools, prefer using only the following tools:\n" + "\n".join(f"- {tool}" for tool in self._allowed_tools) ) + elif self._allowed_tools is not None: + # Explicit empty list means no tools allowed — text-only response + parts.append("## Tool Constraints") + parts.append("Do NOT use any tools or MCP calls. Respond with plain text only.") if self._max_turns > 0: parts.append("## Execution Budget") - parts.append( - f"Keep the work within at most {self._max_turns} tool-assisted turns if possible." - ) + if self._allowed_tools == []: + parts.append( + "Answer directly in plain text and avoid turning this into a " + "multi-step tool workflow." + ) + else: + parts.append( + f"Keep the work within at most {self._max_turns} tool-assisted turns if possible." + ) for message in messages: if message.role == MessageRole.SYSTEM: diff --git a/src/ouroboros/providers/factory.py b/src/ouroboros/providers/factory.py index 214ad474..d6b0ae6b 100644 --- a/src/ouroboros/providers/factory.py +++ b/src/ouroboros/providers/factory.py @@ -94,9 +94,11 @@ def create_llm_adapter( return ClaudeCodeAdapter( permission_mode=resolved_permission_mode, cli_path=cli_path, + cwd=cwd, allowed_tools=allowed_tools, max_turns=max_turns, on_message=on_message, + timeout=timeout, ) if resolved_backend == "codex": return CodexCliLLMAdapter( @@ -110,7 +112,14 @@ def create_llm_adapter( max_retries=max_retries, ) # opencode is rejected at resolve time; this is a defensive fallback - from ouroboros.providers.litellm_adapter import LiteLLMAdapter + try: + from ouroboros.providers.litellm_adapter import LiteLLMAdapter + except ImportError as exc: + msg = ( + "litellm backend requested but litellm is not installed. " + "Install with: pip install 'ouroboros-ai[litellm]'" + ) + raise RuntimeError(msg) from exc return LiteLLMAdapter( api_key=api_key, diff --git a/tests/unit/bigbang/test_decide_later_items.py b/tests/unit/bigbang/test_decide_later_items.py index 1c2920cf..d209b079 100644 --- a/tests/unit/bigbang/test_decide_later_items.py +++ b/tests/unit/bigbang/test_decide_later_items.py @@ -17,7 +17,7 @@ InterviewStatus, ) from ouroboros.bigbang.pm_document import generate_pm_markdown -from ouroboros.bigbang.pm_interview import PMInterviewEngine +from ouroboros.bigbang.pm_interview import _MAX_AUTO_ADVANCE_QUESTIONS, PMInterviewEngine from ouroboros.bigbang.pm_seed import PMSeed from ouroboros.bigbang.question_classifier import ( ClassificationResult, @@ -25,6 +25,7 @@ QuestionCategory, QuestionClassifier, ) +from ouroboros.core.errors import ValidationError from ouroboros.core.types import Result from ouroboros.providers.base import ( CompletionResponse, @@ -250,6 +251,64 @@ async def test_multiple_decide_later_items_accumulate(self): assert len(engine.decide_later_items) == 3 assert engine.decide_later_items == [q1, q2, q3] + @pytest.mark.asyncio + async def test_decide_later_auto_response_failure_returns_error(self): + """Auto-answer persistence failures are surfaced immediately.""" + engine = _make_engine() + state = _make_state() + + question = "How should we handle data retention?" + classification = ClassificationResult( + original_question=question, + category=QuestionCategory.DECIDE_LATER, + reframed_question=question, + reasoning="Needs later legal review.", + decide_later=True, + placeholder_response="To be decided after legal review.", + ) + + engine.inner.ask_next_question.return_value = Result.ok(question) + engine.classifier.classify.return_value = Result.ok(classification) + engine.inner.record_response.return_value = Result.err( + ValidationError("failed to store placeholder", field="user_response") + ) + + result = await engine.ask_next_question(state) + + assert result.is_err + assert "failed to store placeholder" in str(result.error) + + @pytest.mark.asyncio + async def test_auto_advance_limit_returns_error(self): + """Consecutive auto-advanced questions stop at the configured guard.""" + engine = _make_engine() + state = _make_state() + + questions = [f"Technical question {i}?" for i in range(_MAX_AUTO_ADVANCE_QUESTIONS)] + classifications = [ + ClassificationResult( + original_question=question, + category=QuestionCategory.DECIDE_LATER, + reframed_question=question, + reasoning="Premature technical detail.", + decide_later=True, + placeholder_response="TBD later.", + ) + for question in questions + ] + + engine.classifier.classify = AsyncMock(side_effect=[Result.ok(c) for c in classifications]) + engine.inner.ask_next_question = AsyncMock( + side_effect=[Result.ok(question) for question in questions] + ) + engine.inner.record_response = AsyncMock(return_value=Result.ok(state)) + + result = await engine.ask_next_question(state) + + assert result.is_err + assert "too many non-pm questions" in str(result.error).lower() + assert engine.decide_later_items == questions + def test_decide_later_items_empty_by_default(self): """decide_later_items starts as empty list.""" engine = _make_engine() diff --git a/tests/unit/cli/test_pm_overwrite.py b/tests/unit/cli/test_pm_overwrite.py index 0f493852..bd720219 100644 --- a/tests/unit/cli/test_pm_overwrite.py +++ b/tests/unit/cli/test_pm_overwrite.py @@ -1,6 +1,8 @@ """Tests for AC 15: Existing pm_seed triggers overwrite confirmation on re-run.""" +import builtins from pathlib import Path +import sys from unittest.mock import patch import pytest @@ -122,3 +124,34 @@ def test_resume_skips_overwrite_check(self, seeds_dir: Path, _patch_home): source = inspect.getsource(_run_pm_interview) assert "if not resume_id:" in source assert "_check_existing_pm_seeds" in source + + @pytest.mark.asyncio + async def test_run_pm_interview_missing_litellm_exits_cleanly( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """The PM CLI prints a helpful error when litellm is unavailable.""" + from ouroboros.cli.commands import pm as pm_module + + module_name = "ouroboros.providers.litellm_adapter" + real_import = builtins.__import__ + + def fake_import(name, globals=None, locals=None, fromlist=(), level=0): # type: ignore[no-untyped-def] + if name == module_name: + raise ImportError("No module named 'litellm'") + return real_import(name, globals, locals, fromlist, level) + + monkeypatch.delitem(sys.modules, module_name, raising=False) + monkeypatch.setattr(builtins, "__import__", fake_import) + + with ( + patch.object(pm_module, "print_error") as mock_print_error, + pytest.raises(pm_module.typer.Exit) as exc_info, + ): + await pm_module._run_pm_interview( + resume_id=None, + model="anthropic/claude-sonnet-4-20250514", + debug=False, + ) + + assert exc_info.value.exit_code == 1 + assert "litellm is required for the PM command" in mock_print_error.call_args.args[0] diff --git a/tests/unit/mcp/tools/test_pm_handler.py b/tests/unit/mcp/tools/test_pm_handler.py index 639432b4..7235c3bf 100644 --- a/tests/unit/mcp/tools/test_pm_handler.py +++ b/tests/unit/mcp/tools/test_pm_handler.py @@ -13,6 +13,8 @@ from ouroboros.core.types import Result from ouroboros.mcp.tools.pm_handler import ( _DATA_DIR, + _INTERVIEW_LLM_MAX_TURNS, + _INTERVIEW_LLM_TIMEOUT_SECONDS, PMInterviewHandler, _check_completion, _compute_deferred_diff, @@ -756,15 +758,69 @@ def test_creates_engine_when_none_injected(self) -> None: mock_create.assert_called_once_with( backend=None, - max_turns=1, + max_turns=_INTERVIEW_LLM_MAX_TURNS, use_case="interview", allowed_tools=[], + timeout=_INTERVIEW_LLM_TIMEOUT_SECONDS, ) mock_engine_cls.create.assert_called_once() call_kwargs = mock_engine_cls.create.call_args assert call_kwargs.kwargs["llm_adapter"] is mock_adapter assert call_kwargs.kwargs["model"] == "claude-opus-4-6" + def test_get_engine_passes_cwd_to_adapter(self, tmp_path: Path) -> None: + """Factory-built adapters inherit the current working directory.""" + with ( + patch("ouroboros.mcp.tools.pm_handler.create_llm_adapter") as mock_create, + patch("ouroboros.mcp.tools.pm_handler.get_clarification_model", return_value="m"), + patch("ouroboros.mcp.tools.pm_handler.PMInterviewEngine") as mock_engine_cls, + ): + mock_create.return_value = MagicMock() + mock_engine_cls.create.return_value = MagicMock() + + handler = PMInterviewHandler() + handler._get_engine(cwd=str(tmp_path)) + + mock_create.assert_called_once_with( + backend=None, + max_turns=_INTERVIEW_LLM_MAX_TURNS, + use_case="interview", + allowed_tools=[], + timeout=_INTERVIEW_LLM_TIMEOUT_SECONDS, + cwd=str(tmp_path), + ) + + def test_get_engine_rebuilds_shared_adapter_when_backend_is_configured( + self, tmp_path: Path + ) -> None: + """Server-composed handlers rebuild a PM-specific adapter instead of reusing the shared one.""" + with ( + patch("ouroboros.mcp.tools.pm_handler.create_llm_adapter") as mock_create, + patch("ouroboros.mcp.tools.pm_handler.get_clarification_model", return_value="m"), + patch("ouroboros.mcp.tools.pm_handler.PMInterviewEngine") as mock_engine_cls, + ): + shared_adapter = MagicMock(name="shared_adapter") + dedicated_adapter = MagicMock(name="dedicated_adapter") + mock_create.return_value = dedicated_adapter + mock_engine_cls.create.return_value = MagicMock() + + handler = PMInterviewHandler( + llm_adapter=shared_adapter, + llm_backend="claude_code", + ) + handler._get_engine(cwd=str(tmp_path)) + + mock_create.assert_called_once_with( + backend="claude_code", + max_turns=_INTERVIEW_LLM_MAX_TURNS, + use_case="interview", + allowed_tools=[], + timeout=_INTERVIEW_LLM_TIMEOUT_SECONDS, + cwd=str(tmp_path), + ) + call_kwargs = mock_engine_cls.create.call_args + assert call_kwargs.kwargs["llm_adapter"] is dedicated_adapter + def test_uses_custom_data_dir(self, tmp_path: Path) -> None: """When data_dir is set, passes it to PMInterviewEngine.create.""" with ( @@ -784,6 +840,22 @@ def test_uses_custom_data_dir(self, tmp_path: Path) -> None: class TestHandleStartBrownfield: """Test brownfield detection in _handle_start — uses DB default repo.""" + @pytest.mark.asyncio + async def test_start_blank_initial_context_returns_specific_error(self, tmp_path: Path) -> None: + """Explicit start with blank initial_context gets a targeted validation error.""" + handler = PMInterviewHandler(pm_engine=make_pm_engine_mock(), data_dir=tmp_path) + + result = await handler.handle( + { + "action": "start", + "initial_context": " ", + "cwd": str(tmp_path), + } + ) + + assert result.is_err + assert "initial_context must be a non-empty string" in str(result.error) + @pytest.mark.asyncio async def test_start_directly_starts_interview_with_defaults(self, tmp_path: Path) -> None: """Start auto-loads default repos from DB and starts interview directly.""" @@ -1569,6 +1641,27 @@ async def test_resume_loads_state_and_meta(self, tmp_path: Path) -> None: # Verify meta was saved after assert _load_pm_meta("resume-load", data_dir=tmp_path) is not None + @pytest.mark.asyncio + async def test_resume_load_state_failure_does_not_restore_meta(self, tmp_path: Path) -> None: + """Load failures must leave the long-lived engine untouched.""" + engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[]) + engine.restore_meta = MagicMock() + engine.load_state = AsyncMock(return_value=Result.err(Exception("state not found"))) + + _save_pm_meta("resume-load-fail", engine, cwd="/project", data_dir=tmp_path) + + handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) + result = await handler.handle( + { + "session_id": "resume-load-fail", + "answer": "My answer", + } + ) + + assert result.is_err + engine.restore_meta.assert_not_called() + assert "state not found" in str(result.error) + # ── Action auto-detection tests (AC 13) ─────────────────────── diff --git a/tests/unit/providers/test_claude_code_adapter.py b/tests/unit/providers/test_claude_code_adapter.py index a46ad6fa..9d68fc6a 100644 --- a/tests/unit/providers/test_claude_code_adapter.py +++ b/tests/unit/providers/test_claude_code_adapter.py @@ -7,6 +7,7 @@ from __future__ import annotations +import asyncio from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -143,6 +144,30 @@ async def test_non_system_messages_preserved_in_prompt(self) -> None: assert "User: Follow-up" in prompt_arg +class TestCompleteTimeout: + """Test application-level timeout handling.""" + + @pytest.mark.asyncio + async def test_complete_returns_timeout_error(self) -> None: + """Timeouts are surfaced as ProviderError before transport timeout.""" + adapter = ClaudeCodeAdapter(timeout=0.01) + messages = [Message(role=MessageRole.USER, content="Hello")] + config = CompletionConfig(model="claude-sonnet-4-6") + + async def slow_execute(*args, **kwargs): # type: ignore[no-untyped-def] + await asyncio.sleep(1) + return MagicMock(is_ok=True) + + adapter._execute_single_request = AsyncMock(side_effect=slow_execute) + + with patch.dict("sys.modules", {"claude_agent_sdk": MagicMock()}): + result = await adapter.complete(messages, config) + + assert result.is_err + assert "timed out" in result.error.message.lower() + assert result.error.details["timed_out"] is True + + def _make_sdk_mock(mock_options_cls: MagicMock, mock_query: MagicMock) -> MagicMock: """Build a fake claude_agent_sdk module with _errors submodule.""" sdk_module = MagicMock() @@ -229,3 +254,66 @@ async def fake_query(*args, **kwargs): options_call_kwargs = mock_options_cls.call_args.kwargs assert "system_prompt" not in options_call_kwargs + + @pytest.mark.asyncio + async def test_default_tool_policy_omits_allowed_tools_and_uses_configured_cwd(self) -> None: + """Default Claude adapters should not force a blanket no-tools policy.""" + adapter = ClaudeCodeAdapter(cwd="/tmp/project") + config = CompletionConfig(model="claude-sonnet-4-6") + + mock_options_cls = MagicMock() + + async def fake_query(*args, **kwargs): + msg = MagicMock() + type(msg).__name__ = "ResultMessage" + msg.structured_output = None + msg.result = "test response" + msg.is_error = False + yield msg + + sdk_module = _make_sdk_mock(mock_options_cls, MagicMock(side_effect=fake_query)) + + with patch.dict( + "sys.modules", + { + "claude_agent_sdk": sdk_module, + "claude_agent_sdk._errors": sdk_module._errors, + }, + ): + await adapter._execute_single_request("test prompt", config) + + options_call_kwargs = mock_options_cls.call_args.kwargs + assert "allowed_tools" not in options_call_kwargs + assert options_call_kwargs["cwd"] == "/tmp/project" + assert "Write" in options_call_kwargs["disallowed_tools"] + + @pytest.mark.asyncio + async def test_explicit_empty_allowed_tools_blocks_all_sdk_tools(self) -> None: + """An explicit empty list keeps the strict no-tools interview policy.""" + adapter = ClaudeCodeAdapter(allowed_tools=[]) + config = CompletionConfig(model="claude-sonnet-4-6") + + mock_options_cls = MagicMock() + + async def fake_query(*args, **kwargs): + msg = MagicMock() + type(msg).__name__ = "ResultMessage" + msg.structured_output = None + msg.result = "test response" + msg.is_error = False + yield msg + + sdk_module = _make_sdk_mock(mock_options_cls, MagicMock(side_effect=fake_query)) + + with patch.dict( + "sys.modules", + { + "claude_agent_sdk": sdk_module, + "claude_agent_sdk._errors": sdk_module._errors, + }, + ): + await adapter._execute_single_request("test prompt", config) + + options_call_kwargs = mock_options_cls.call_args.kwargs + assert options_call_kwargs["allowed_tools"] == [] + assert "Read" in options_call_kwargs["disallowed_tools"] diff --git a/tests/unit/providers/test_codex_cli_adapter.py b/tests/unit/providers/test_codex_cli_adapter.py index 00115bdf..7ba0c51d 100644 --- a/tests/unit/providers/test_codex_cli_adapter.py +++ b/tests/unit/providers/test_codex_cli_adapter.py @@ -132,6 +132,26 @@ def test_build_prompt_includes_tool_constraints_and_turn_budget(self) -> None: assert "## Execution Budget" in prompt assert "5 tool-assisted turns" in prompt + def test_build_prompt_omits_tool_constraints_when_tools_unspecified(self) -> None: + """Default adapters keep tool policy unspecified for non-interview flows.""" + adapter = CodexCliLLMAdapter(cli_path="codex") + + prompt = adapter._build_prompt([Message(role=MessageRole.USER, content="Summarize this.")]) + + assert "## Tool Constraints" not in prompt + assert "Do NOT use any tools or MCP calls" not in prompt + + def test_build_prompt_explicit_empty_tools_forbids_tool_use(self) -> None: + """An explicit empty tool list requests a text-only response.""" + adapter = CodexCliLLMAdapter(cli_path="codex", allowed_tools=[], max_turns=5) + + prompt = adapter._build_prompt([Message(role=MessageRole.USER, content="Summarize this.")]) + + assert "## Tool Constraints" in prompt + assert "Do NOT use any tools or MCP calls" in prompt + assert "tool-assisted turns" not in prompt + assert "avoid turning this into a multi-step tool workflow" in prompt + def test_normalize_model_omits_default_sentinel(self) -> None: """The backend-safe default sentinel is translated to no explicit model.""" adapter = CodexCliLLMAdapter(cli_path="codex") diff --git a/tests/unit/providers/test_factory.py b/tests/unit/providers/test_factory.py index 4f625890..2d778a13 100644 --- a/tests/unit/providers/test_factory.py +++ b/tests/unit/providers/test_factory.py @@ -1,5 +1,8 @@ """Unit tests for provider factory helpers.""" +import builtins +import sys + import pytest from ouroboros.providers.claude_code_adapter import ClaudeCodeAdapter @@ -63,11 +66,43 @@ def test_creates_claude_code_adapter(self) -> None: adapter = create_llm_adapter(backend="claude_code") assert isinstance(adapter, ClaudeCodeAdapter) + def test_passes_timeout_to_claude_code_adapter(self) -> None: + """Claude backend forwards application-level timeout to the adapter.""" + adapter = create_llm_adapter(backend="claude_code", timeout=42.0) + assert isinstance(adapter, ClaudeCodeAdapter) + assert adapter._timeout == 42.0 + + def test_passes_cwd_to_claude_code_adapter(self) -> None: + """Claude backend forwards cwd to the SDK adapter.""" + adapter = create_llm_adapter(backend="claude_code", cwd="/tmp/project") + assert isinstance(adapter, ClaudeCodeAdapter) + assert adapter._cwd == "/tmp/project" + def test_creates_litellm_adapter(self) -> None: """LiteLLM backend returns LiteLLMAdapter.""" adapter = create_llm_adapter(backend="litellm") assert isinstance(adapter, LiteLLMAdapter) + def test_litellm_import_error_raises_runtime_error( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Missing litellm dependency raises a helpful RuntimeError.""" + module_name = "ouroboros.providers.litellm_adapter" + real_import = builtins.__import__ + + def fake_import(name, globals=None, locals=None, fromlist=(), level=0): # type: ignore[no-untyped-def] + if name == module_name: + raise ImportError("No module named 'litellm'") + return real_import(name, globals, locals, fromlist, level) + + monkeypatch.delitem(sys.modules, module_name, raising=False) + monkeypatch.setattr(builtins, "__import__", fake_import) + + with pytest.raises( + RuntimeError, match="litellm backend requested but litellm is not installed" + ): + create_llm_adapter(backend="litellm") + def test_creates_codex_adapter(self) -> None: """Codex backend returns CodexCliLLMAdapter.""" adapter = create_llm_adapter(backend="codex", cwd="/tmp/project")