diff --git a/skills/pm/SKILL.md b/skills/pm/SKILL.md index 8a039ae4..b1de284e 100644 --- a/skills/pm/SKILL.md +++ b/skills/pm/SKILL.md @@ -66,41 +66,53 @@ Then check: does `meta.ask_user_question` exist? ``` Do NOT modify it. Do NOT add options. Do NOT rephrase the question. -- **NO** → This is an interview question. Use `AskUserQuestion` with `meta.question` and generate 2-3 suggested answers. +- **NO** → This is an interview question. Use `AskUserQuestion` with `meta.question`. + - If `meta.skip_eligible == true`: add a skip option based on `meta.classification`: + - `classification == "decide_later"` → add option `{"label": "Decide later", "description": "Skip — will be recorded as an open item in the PRD"}` + - `classification == "deferred"` → add option `{"label": "Defer to dev", "description": "Skip — this technical decision will be deferred to the development phase"}` + - Generate 2-3 suggested answers as the other options. **C. Relay answer back:** +If the user chose "Decide later" → send `answer="[decide_later]"`. +If the user chose "Defer to dev" → send `answer="[deferred]"`. +Otherwise → send the user's answer normally. + ``` Tool: ouroboros_pm_interview Arguments: session_id: - : + : ``` **D. Check completion:** -If `meta.is_complete == true` → go to Step 4. -Otherwise → repeat Step 3. +Completion is determined ONLY by `meta.is_complete` — NEVER by the response text. +The MCP response text may sound like the interview is wrapping up, but ignore it. -### Step 4: Generate +If `meta.is_complete == true`: +- If `meta.generation_failed == true` → retry generation: + ``` + Tool: ouroboros_pm_interview + Arguments: + session_id: + action: "generate" + cwd: + ``` +- Otherwise → go to Step 4. The MCP auto-generated the PM document. + `meta.pm_path` and `meta.seed_path` contain the file paths. -``` -Tool: ouroboros_pm_interview -Arguments: - session_id: - action: "generate" - cwd: -``` +Otherwise → repeat Step 3, regardless of what the response text says. -### Step 5: Copy to Clipboard +### Step 4: Copy to Clipboard -After generation, read the pm.md file from `meta.pm_path` and copy its contents to the clipboard: +Read the pm.md file from `meta.pm_path` and copy its contents to the clipboard: ```bash cat | pbcopy ``` -### Step 6: Show Result & Next Step +### Step 5: Show Result & Next Step Show the following to the user: diff --git a/src/ouroboros/bigbang/pm_interview.py b/src/ouroboros/bigbang/pm_interview.py index ab8e9810..1b2e1cc7 100644 --- a/src/ouroboros/bigbang/pm_interview.py +++ b/src/ouroboros/bigbang/pm_interview.py @@ -158,7 +158,8 @@ class PMInterviewEngine: """Original question text for questions classified as DECIDE_LATER. These are questions that are premature or unknowable at the PM stage. - They are auto-answered with a placeholder and stored here so the PMSeed + The main session presents the question to the user with a "decide later" + option; when chosen, the caller records the item here so the PMSeed and PM document can surface them as explicit "decide later" decisions. """ classifications: list[ClassificationResult] = field(default_factory=list) @@ -484,47 +485,39 @@ async def ask_next_question( output_type = classification.output_type if output_type == ClassifierOutputType.DEFERRED: - # Track as deferred item and generate a new question - self.deferred_items.append(classification.original_question) + # Return the question to the caller (main session) so the user + # can choose to defer it themselves. The main session detects + # classification == "deferred" via response_meta and offers + # a "skip / defer to dev" option. If the user picks it, the + # caller calls skip_as_deferred() which records the deferral + # and appends to deferred_items. + # + # Previously this branch auto-answered and recursed, which could + # trigger MCP 120s timeouts on consecutive DEFERRED runs. log.info( - "pm.question_deferred", + "pm.question_deferred_candidate", question=classification.original_question[:100], reasoning=classification.reasoning, output_type=output_type, ) - # Feed an automatic response back to the inner InterviewEngine - # so the round is properly recorded and the engine advances. - # This prevents the inner engine from re-generating similar - # technical questions it doesn't know were already handled. - await self.record_response( - state, - user_response="[Deferred to development phase] " - "This technical decision will be addressed during the " - "development interview.", - question=classification.original_question, - ) - # Recursively ask for the next real question - return await self.ask_next_question(state) + return Result.ok(classification.original_question) if output_type == ClassifierOutputType.DECIDE_LATER: - # Auto-answer with placeholder — no PM interaction needed - placeholder = classification.placeholder_response - self.decide_later_items.append(classification.original_question) + # Return the question to the caller (main session) so the user + # can choose "decide later" themselves. The main session detects + # classification == "decide_later" via response_meta and offers + # the option. If the user picks it, the caller calls + # skip_as_decide_later() which records the placeholder and + # appends to decide_later_items. + # + # Previously this branch auto-answered and recursed, which could + # trigger MCP 120s timeouts on consecutive DECIDE_LATER runs. log.info( "pm.question_decide_later", question=classification.original_question[:100], - placeholder=placeholder[:100], reasoning=classification.reasoning, ) - # Record the placeholder as the response so the interview - # engine advances its round count - await self.record_response( - state, - user_response=f"[Decide later] {placeholder}", - question=classification.original_question, - ) - # Recursively ask for the next real question - return await self.ask_next_question(state) + return Result.ok(classification.original_question) if output_type == ClassifierOutputType.REFRAMED: # Use the reframed version and track the mapping @@ -595,6 +588,76 @@ async def record_response( return await self.inner.record_response(state, user_response, question) + async def skip_as_decide_later( + self, + state: InterviewState, + question: str, + ) -> Result[InterviewState, ValidationError]: + """Skip a question as "decide later" at the user's explicit request. + + Records the question in ``decide_later_items`` and feeds a placeholder + response to the inner InterviewEngine so the round is properly recorded + and the engine advances. + + This is called when the main session detects that the user chose the + "decide later" option for a DECIDE_LATER-classified question, instead + of the old auto-skip behavior inside ``ask_next_question``. + + Args: + state: Current interview state. + question: The question the user chose to decide later. + + Returns: + Result containing updated state or ValidationError. + """ + if question not in self.decide_later_items: + self.decide_later_items.append(question) + + log.info( + "pm.question_decide_later_by_user", + question=question[:100], + ) + + return await self.record_response( + state, + user_response="[Decide later] To be determined — user chose to decide later.", + question=question, + ) + + async def skip_as_deferred( + self, + state: InterviewState, + question: str, + ) -> Result[InterviewState, ValidationError]: + """Skip a question as "deferred to dev" at the user's explicit request. + + Records the question in ``deferred_items`` and feeds a deferral + response to the inner InterviewEngine so the round is properly recorded + and the engine advances. + + Args: + state: Current interview state. + question: The question the user chose to defer. + + Returns: + Result containing updated state or ValidationError. + """ + if question not in self.deferred_items: + self.deferred_items.append(question) + + log.info( + "pm.question_deferred_by_user", + question=question[:100], + ) + + return await self.record_response( + state, + user_response="[Deferred to development phase] " + "This technical decision will be addressed during the " + "development interview.", + question=question, + ) + async def complete_interview( self, state: InterviewState, diff --git a/src/ouroboros/bigbang/question_classifier.py b/src/ouroboros/bigbang/question_classifier.py index 202ba7bf..83d22d30 100644 --- a/src/ouroboros/bigbang/question_classifier.py +++ b/src/ouroboros/bigbang/question_classifier.py @@ -51,8 +51,8 @@ class ClassifierOutputType(StrEnum): PASSTHROUGH: Planning question forwarded unchanged to the PM. REFRAMED: Development question rewritten in PM-friendly language. - DEFERRED: Deeply technical question deferred to the dev interview phase. - DECIDE_LATER: Premature question auto-answered with a placeholder. + DEFERRED: Deeply technical question returned to user with defer option. + DECIDE_LATER: Premature question returned to user with decide-later option. """ PASSTHROUGH = "passthrough" @@ -109,13 +109,20 @@ def question_for_pm(self) -> str: For PASSTHROUGH: returns original_question unchanged. For REFRAMED: returns the reframed_question. - For DEFERRED: returns empty string (should not be shown). - For DECIDE_LATER: returns empty string (auto-answered). + For DEFERRED: returns original_question (shown to user + so they can choose to answer or defer to dev). + For DECIDE_LATER: returns original_question (shown to user + so they can choose to answer or defer). """ if self.output_type == ClassifierOutputType.PASSTHROUGH: return self.original_question if self.output_type == ClassifierOutputType.REFRAMED: return self.reframed_question + if self.output_type in ( + ClassifierOutputType.DECIDE_LATER, + ClassifierOutputType.DEFERRED, + ): + return self.original_question return "" diff --git a/src/ouroboros/cli/commands/pm.py b/src/ouroboros/cli/commands/pm.py index 4d2f8ec2..2d6565b7 100644 --- a/src/ouroboros/cli/commands/pm.py +++ b/src/ouroboros/cli/commands/pm.py @@ -410,6 +410,20 @@ async def _run_pm_interview( console.print(f"\n[bold yellow]?[/] {question}\n") + # Check if this question was classified as skippable — + # if so, show a hint that the user can defer it. + classification = engine.get_last_classification() + if classification == "decide_later": + console.print( + "[dim] 💡 This question can be deferred. " + 'Type "decide later" or "skip" to defer it.[/]\n' + ) + elif classification == "deferred": + console.print( + "[dim] 💡 This is a technical question that can be deferred to the dev phase. " + 'Type "defer" or "skip" to defer it.[/]\n' + ) + # Persist state + meta AFTER displaying the question but BEFORE # waiting for input so that an interruption preserves the pending # question and --resume shows the same question. @@ -442,6 +456,39 @@ async def _run_pm_interview( if state.rounds and state.rounds[-1].user_response is None: state.rounds.pop() + # Handle user-initiated skip (decide later / defer to dev) + _lower = user_response.strip().lower() + if classification == "decide_later" and _lower in ( + "decide later", + "skip", + "[decide_later]", + ): + record_result = await engine.skip_as_decide_later(state, question) + if isinstance(record_result, Result) and record_result.is_err: + print_error(f"Failed to skip question: {record_result.error}") + break + save_result = await engine.save_state(state) + if isinstance(save_result, Result) and save_result.is_err: + print_error(f"Failed to save state: {save_result.error}") + break + _save_cli_pm_meta(state.interview_id, engine) + continue + if classification == "deferred" and _lower in ( + "defer", + "skip", + "[deferred]", + ): + record_result = await engine.skip_as_deferred(state, question) + if isinstance(record_result, Result) and record_result.is_err: + print_error(f"Failed to defer question: {record_result.error}") + break + save_result = await engine.save_state(state) + if isinstance(save_result, Result) and save_result.is_err: + print_error(f"Failed to save state: {save_result.error}") + break + _save_cli_pm_meta(state.interview_id, engine) + continue + record_result = await engine.record_response(state, user_response, question) if isinstance(record_result, Result) and record_result.is_err: print_error(f"Failed to record response: {record_result.error}") diff --git a/src/ouroboros/mcp/tools/pm_handler.py b/src/ouroboros/mcp/tools/pm_handler.py index 9910fd60..75b0d013 100644 --- a/src/ouroboros/mcp/tools/pm_handler.py +++ b/src/ouroboros/mcp/tools/pm_handler.py @@ -503,6 +503,12 @@ async def _handle_start( # Include pending_reframe in response meta if a reframe occurred pending_reframe = engine.get_pending_reframe() + # Check classification to signal skip eligibility + classification = _last_classification(engine) + is_decide_later = classification == "decide_later" + is_deferred = classification == "deferred" + skip_eligible = is_decide_later or is_deferred + meta = { "session_id": state.interview_id, "status": "interview_started", @@ -510,6 +516,8 @@ async def _handle_start( "response_param": "answer", "question": question, "is_brownfield": state.is_brownfield, + "classification": classification, + "skip_eligible": skip_eligible, "pending_reframe": pending_reframe, **diff, } @@ -518,18 +526,35 @@ async def _handle_start( "pm_handler.started", session_id=state.interview_id, is_brownfield=state.is_brownfield, + classification=classification, + skip_eligible=skip_eligible, has_pending_reframe=pending_reframe is not None, **diff, ) + # Build response text — include skip hint when applicable + start_text = f"PM interview started. Session ID: {state.interview_id}\n\n{question}" + if is_decide_later: + start_text += ( + "\n\n💡 This question can be deferred. " + 'The user may answer now, or choose "decide later" to skip it. ' + "If they choose to decide later, pass " + f'answer="[decide_later]" with session_id="{state.interview_id}".' + ) + elif is_deferred: + start_text += ( + "\n\n💡 This is a technical question that can be deferred to the dev phase. " + "The user may answer now, or choose to defer it. " + "If they choose to defer, pass " + f'answer="[deferred]" with session_id="{state.interview_id}".' + ) + return Result.ok( MCPToolResult( content=( MCPContentItem( type=ContentType.TEXT, - text=( - f"PM interview started. Session ID: {state.interview_id}\n\n{question}" - ), + text=start_text, ), ), is_error=False, @@ -780,15 +805,35 @@ async def _handle_answer( if not answer and state.rounds and state.rounds[-1].user_response is None: pending_question = state.rounds[-1].question classification = _last_classification(engine) + is_decide_later = classification == "decide_later" + is_deferred = classification == "deferred" + skip_eligible = is_decide_later or is_deferred pending_reframe = engine.get_pending_reframe() + # Include skip hint in re-displayed question + pending_text = f"Session {session_id}\n\n{pending_question}" + if is_decide_later: + pending_text += ( + "\n\n💡 This question can be deferred. " + 'The user may answer now, or choose "decide later" to skip it. ' + "If they choose to decide later, pass " + f'answer="[decide_later]" with session_id="{session_id}".' + ) + elif is_deferred: + pending_text += ( + "\n\n💡 This is a technical question that can be deferred to the dev phase. " + "The user may answer now, or choose to defer it. " + "If they choose to defer, pass " + f'answer="[deferred]" with session_id="{session_id}".' + ) + return Result.ok( MCPToolResult( content=( MCPContentItem( type=ContentType.TEXT, - text=f"Session {session_id}\n\n{pending_question}", + text=pending_text, ), ), is_error=False, @@ -799,6 +844,7 @@ async def _handle_answer( "question": pending_question, "is_complete": False, "classification": classification, + "skip_eligible": skip_eligible, "deferred_this_round": [], "decide_later_this_round": [], "interview_complete": False, @@ -824,20 +870,50 @@ async def _handle_answer( if state.rounds[-1].user_response is None: state.rounds.pop() - record_result = await engine.record_response(state, answer, last_question) - if record_result.is_err: - return Result.err( - MCPToolError( - str(record_result.error), - tool_name="ouroboros_pm_interview", + # ── User chose to skip (decide later / defer to dev) ─── + # The main session detects classification via response_meta + # and offers skip options. The user's choice arrives as: + # answer="[decide_later]" → skip_as_decide_later() + # answer="[deferred]" → skip_as_deferred() + stripped = answer.strip() + if stripped == "[decide_later]": + skip_result = await engine.skip_as_decide_later(state, last_question) + if skip_result.is_err: + return Result.err( + MCPToolError( + str(skip_result.error), + tool_name="ouroboros_pm_interview", + ) ) - ) - state = record_result.value - state.clear_stored_ambiguity() + state = skip_result.value + state.clear_stored_ambiguity() + elif stripped == "[deferred]": + skip_result = await engine.skip_as_deferred(state, last_question) + if skip_result.is_err: + return Result.err( + MCPToolError( + str(skip_result.error), + tool_name="ouroboros_pm_interview", + ) + ) + state = skip_result.value + state.clear_stored_ambiguity() + else: + record_result = await engine.record_response(state, answer, last_question) + if record_result.is_err: + return Result.err( + MCPToolError( + str(record_result.error), + tool_name="ouroboros_pm_interview", + ) + ) + state = record_result.value + state.clear_stored_ambiguity() # ── Completion check (AC 12) ───────────────────────────── # Completion is determined by engine ambiguity scoring. - # User controls when to stop. + # When complete, auto-generate the PM document immediately + # (no separate "generate" call needed from the skill). completion = await _check_completion(state, engine) if completion is not None: # Mark interview as complete @@ -852,21 +928,58 @@ async def _handle_answer( ) _save_pm_meta(session_id, engine, cwd=cwd, data_dir=self.data_dir) + log.info( + "pm_handler.interview_complete", + session_id=session_id, + **completion, + ) + + # Auto-generate PM document on completion + seed_result = await engine.generate_pm_seed(state) + if seed_result.is_err: + # Generation failed — still report completion but without document + summary_text = ( + f"Interview complete but PM generation failed: {seed_result.error}\n" + f"Session ID: {session_id}\n" + f'Retry with: action="generate", session_id="{session_id}"' + ) + return Result.ok( + MCPToolResult( + content=( + MCPContentItem(type=ContentType.TEXT, text=summary_text), + ), + is_error=False, + meta={ + "session_id": session_id, + "is_complete": True, + "generation_failed": True, + **completion, + }, + ) + ) + + seed = seed_result.value + seed_path = engine.save_pm_seed(seed) + pm_output_dir = Path(cwd) / ".ouroboros" + pm_path = save_pm_document(seed, output_dir=pm_output_dir) + decide_later_summary = engine.format_decide_later_summary() summary_text = ( - f"Interview complete. Session ID: {session_id}\n" + f"Interview complete. PM document generated.\n\n" + f"Session ID: {session_id}\n" f"Rounds completed: {completion['rounds_completed']}\n" f"Completion reason: {completion['completion_reason']}\n" ) if completion.get("ambiguity_score") is not None: summary_text += f"Ambiguity score: {completion['ambiguity_score']:.2f}\n" summary_text += ( - f"\nDeferred items: {len(engine.deferred_items)}\n" - f"Decide-later items: {len(engine.decide_later_items)}\n" + f"\nPM document: {pm_path}\n" + f"Seed: {seed_path}\n" + f"\nDeferred items: {len(seed.deferred_items)}\n" + f"Decide-later items: {len(seed.decide_later_items)}\n" ) if decide_later_summary: summary_text += f"\n{decide_later_summary}\n" - summary_text += f'\nGenerate PM with: action="generate", session_id="{session_id}"' response_meta = { "session_id": session_id, @@ -878,14 +991,10 @@ async def _handle_answer( **completion, "deferred_count": len(engine.deferred_items), "decide_later_count": len(engine.decide_later_items), + "seed_path": str(seed_path), + "pm_path": str(pm_path), } - log.info( - "pm_handler.interview_complete", - session_id=session_id, - **completion, - ) - return Result.ok( MCPToolResult( content=( @@ -958,6 +1067,11 @@ async def _handle_answer( # Extract classification from the last classify call classification = _last_classification(engine) + # Signal to the caller that the user can skip this question + is_decide_later = classification == "decide_later" + is_deferred = classification == "deferred" + skip_eligible = is_decide_later or is_deferred + response_meta = { "session_id": session_id, "input_type": "freeText", @@ -965,6 +1079,7 @@ async def _handle_answer( "question": question, "is_complete": False, "classification": classification, + "skip_eligible": skip_eligible, "deferred_this_round": diff["new_deferred"], "decide_later_this_round": diff["new_decide_later"], # Keep backward-compat fields from AC 8 @@ -977,16 +1092,34 @@ async def _handle_answer( "pm_handler.question_asked", session_id=session_id, classification=classification, + skip_eligible=skip_eligible, has_pending_reframe=pending_reframe is not None, **diff, ) + # Build response text — include skip hint when applicable + response_text = f"Session {session_id}\n\n{question}" + if is_decide_later: + response_text += ( + "\n\n💡 This question can be deferred. " + 'The user may answer now, or choose "decide later" to skip it. ' + "If they choose to decide later, pass " + f'answer="[decide_later]" with session_id="{session_id}".' + ) + elif is_deferred: + response_text += ( + "\n\n💡 This is a technical question that can be deferred to the dev phase. " + "The user may answer now, or choose to defer it. " + "If they choose to defer, pass " + f'answer="[deferred]" with session_id="{session_id}".' + ) + return Result.ok( MCPToolResult( content=( MCPContentItem( type=ContentType.TEXT, - text=f"Session {session_id}\n\n{question}", + text=response_text, ), ), is_error=False, @@ -1066,7 +1199,8 @@ async def _handle_generate( f"PM seed generated: {seed.product_name}\n" f"Seed: {seed_path}\n" f"PM document: {pm_path}\n\n" - f"Decide-later items: {len(seed.deferred_items) + len(seed.decide_later_items)}" + f"Deferred items: {len(seed.deferred_items)}\n" + f"Decide-later items: {len(seed.decide_later_items)}" ), ), ), diff --git a/tests/unit/bigbang/test_decide_later_items.py b/tests/unit/bigbang/test_decide_later_items.py index 1c2920cf..2f37776b 100644 --- a/tests/unit/bigbang/test_decide_later_items.py +++ b/tests/unit/bigbang/test_decide_later_items.py @@ -88,8 +88,12 @@ class TestDecideLaterItemsList: """Tests for decide-later items stored as original question text in list.""" @pytest.mark.asyncio - async def test_decide_later_stores_original_question_text(self): - """DECIDE_LATER stores original question text in decide_later_items list.""" + async def test_decide_later_returns_question_to_caller(self): + """DECIDE_LATER returns the question to the caller without auto-answering. + + The caller (main session) is responsible for presenting the decide-later + option and recording the item in decide_later_items when chosen. + """ engine = _make_engine() state = _make_state() @@ -97,7 +101,6 @@ async def test_decide_later_stores_original_question_text(self): # Inner engine generates the question engine.inner.ask_next_question.return_value = Result.ok(original_q) - engine.inner.record_response.return_value = Result.ok(state) # Classifier says DECIDE_LATER classification = ClassificationResult( @@ -110,40 +113,32 @@ async def test_decide_later_stores_original_question_text(self): ) assert classification.output_type == ClassifierOutputType.DECIDE_LATER - # First call returns DECIDE_LATER, second call returns a PASSTHROUGH - passthrough_q = "Who are the primary users?" - passthrough_classification = ClassificationResult( - original_question=passthrough_q, - category=QuestionCategory.PLANNING, - reframed_question=passthrough_q, - reasoning="Planning question.", - ) - - engine.classifier.classify = AsyncMock( - side_effect=[Result.ok(classification), Result.ok(passthrough_classification)] - ) - engine.inner.ask_next_question = AsyncMock( - side_effect=[Result.ok(original_q), Result.ok(passthrough_q)] - ) + engine.classifier.classify = AsyncMock(return_value=Result.ok(classification)) result = await engine.ask_next_question(state) assert result.is_ok - # The decide-later question is stored in decide_later_items - assert original_q in engine.decide_later_items - # It should NOT be in deferred_items - assert original_q not in engine.deferred_items + # The question is returned to the caller (not auto-answered) + assert result.value == original_q + # decide_later_items is NOT populated here — caller handles that + assert engine.decide_later_items == [] + # No recursive call — inner engine asked only once + assert engine.inner.ask_next_question.call_count == 1 + # No auto-response recorded + engine.inner.record_response.assert_not_called() @pytest.mark.asyncio - async def test_decide_later_items_list_is_separate_from_deferred(self): - """decide_later_items and deferred_items are independent lists.""" + async def test_decide_later_does_not_recurse(self): + """DECIDE_LATER returns immediately — no recursive ask_next_question call. + + Unlike DEFERRED (which auto-skips and recurses), DECIDE_LATER returns + the question to the caller so the user can choose decide-later. + """ engine = _make_engine() state = _make_state() decide_later_q = "How should we handle data migration?" - deferred_q = "Which ORM should we use for the database layer?" - # Set up two rounds: first DECIDE_LATER, then DEFERRED, then PASSTHROUGH decide_later_class = ClassificationResult( original_question=decide_later_q, category=QuestionCategory.DECIDE_LATER, @@ -152,103 +147,50 @@ async def test_decide_later_items_list_is_separate_from_deferred(self): decide_later=True, placeholder_response="TBD after schema design.", ) - deferred_class = ClassificationResult( - original_question=deferred_q, - category=QuestionCategory.DEVELOPMENT, - reframed_question=deferred_q, - reasoning="Deeply technical.", - defer_to_dev=True, - ) - passthrough_q = "What are the success metrics?" - passthrough_class = ClassificationResult( - original_question=passthrough_q, - category=QuestionCategory.PLANNING, - reframed_question=passthrough_q, - reasoning="Planning.", - ) - engine.classifier.classify = AsyncMock( - side_effect=[ - Result.ok(decide_later_class), - Result.ok(deferred_class), - Result.ok(passthrough_class), - ] - ) - engine.inner.ask_next_question = AsyncMock( - side_effect=[ - Result.ok(decide_later_q), - Result.ok(deferred_q), - Result.ok(passthrough_q), - ] - ) - engine.inner.record_response = AsyncMock(return_value=Result.ok(state)) + engine.classifier.classify = AsyncMock(return_value=Result.ok(decide_later_class)) + engine.inner.ask_next_question = AsyncMock(return_value=Result.ok(decide_later_q)) result = await engine.ask_next_question(state) assert result.is_ok - assert result.value == passthrough_q - - # Verify separation - assert decide_later_q in engine.decide_later_items - assert decide_later_q not in engine.deferred_items - assert deferred_q in engine.deferred_items - assert deferred_q not in engine.decide_later_items + # Returns the decide-later question directly + assert result.value == decide_later_q + # No auto-recording — caller handles decide-later flow + engine.inner.record_response.assert_not_called() + # Only one call to inner engine (no recursion) + assert engine.inner.ask_next_question.call_count == 1 + # decide_later_items not populated by engine (caller responsibility) + assert engine.decide_later_items == [] @pytest.mark.asyncio - async def test_multiple_decide_later_items_accumulate(self): - """Multiple DECIDE_LATER questions all accumulate in the list.""" + async def test_decide_later_classification_recorded(self): + """DECIDE_LATER classification is recorded so caller can detect it.""" engine = _make_engine() state = _make_state() q1 = "What rate limiting strategy should we use?" - q2 = "How should we handle GDPR data retention?" - q3 = "What's the target latency for API responses?" - passthrough_q = "What is the core problem?" - - classifications = [ - ClassificationResult( - original_question=q1, - category=QuestionCategory.DECIDE_LATER, - reframed_question=q1, - reasoning="Premature.", - decide_later=True, - placeholder_response="TBD.", - ), - ClassificationResult( - original_question=q2, - category=QuestionCategory.DECIDE_LATER, - reframed_question=q2, - reasoning="Needs legal review.", - decide_later=True, - placeholder_response="Needs legal input.", - ), - ClassificationResult( - original_question=q3, - category=QuestionCategory.DECIDE_LATER, - reframed_question=q3, - reasoning="Depends on infra.", - decide_later=True, - placeholder_response="After infra decision.", - ), - ClassificationResult( - original_question=passthrough_q, - category=QuestionCategory.PLANNING, - reframed_question=passthrough_q, - reasoning="Planning.", - ), - ] - engine.classifier.classify = AsyncMock(side_effect=[Result.ok(c) for c in classifications]) - engine.inner.ask_next_question = AsyncMock( - side_effect=[Result.ok(q1), Result.ok(q2), Result.ok(q3), Result.ok(passthrough_q)] + classification = ClassificationResult( + original_question=q1, + category=QuestionCategory.DECIDE_LATER, + reframed_question=q1, + reasoning="Premature.", + decide_later=True, + placeholder_response="TBD.", ) - engine.inner.record_response = AsyncMock(return_value=Result.ok(state)) + + engine.classifier.classify = AsyncMock(return_value=Result.ok(classification)) + engine.inner.ask_next_question = AsyncMock(return_value=Result.ok(q1)) result = await engine.ask_next_question(state) assert result.is_ok - assert len(engine.decide_later_items) == 3 - assert engine.decide_later_items == [q1, q2, q3] + assert result.value == q1 + # Classification is recorded so get_last_classification works + assert len(engine.classifications) == 1 + assert engine.classifications[0].output_type == ClassifierOutputType.DECIDE_LATER + assert engine.get_last_classification() == "decide_later" def test_decide_later_items_empty_by_default(self): """decide_later_items starts as empty list.""" @@ -531,3 +473,110 @@ def test_format_decide_later_summary_preserves_original_question_text(self): summary = engine.format_decide_later_summary() assert original_text in summary + + +class TestSkipAsDecideLater: + """Tests for user-initiated decide-later skip via skip_as_decide_later().""" + + @pytest.mark.asyncio + async def test_skip_records_question_in_decide_later_items(self): + """skip_as_decide_later records the question in decide_later_items.""" + engine = _make_engine() + state = _make_state() + engine.inner.record_response.return_value = Result.ok(state) + + question = "What caching strategy should we use?" + result = await engine.skip_as_decide_later(state, question) + + assert result.is_ok + assert question in engine.decide_later_items + + @pytest.mark.asyncio + async def test_skip_does_not_duplicate_existing_item(self): + """skip_as_decide_later does not duplicate if question already in list.""" + engine = _make_engine() + state = _make_state() + engine.inner.record_response.return_value = Result.ok(state) + + question = "What caching strategy should we use?" + engine.decide_later_items = [question] + + result = await engine.skip_as_decide_later(state, question) + + assert result.is_ok + assert engine.decide_later_items.count(question) == 1 + + @pytest.mark.asyncio + async def test_skip_records_placeholder_response_in_inner_engine(self): + """skip_as_decide_later feeds a placeholder response to inner engine.""" + engine = _make_engine() + state = _make_state() + engine.inner.record_response.return_value = Result.ok(state) + + question = "What deployment model should we use?" + await engine.skip_as_decide_later(state, question) + + engine.inner.record_response.assert_called_once() + call_args = engine.inner.record_response.call_args + recorded_response = call_args[0][1] + assert "[Decide later]" in recorded_response + + @pytest.mark.asyncio + async def test_skip_advances_interview_state(self): + """skip_as_decide_later returns the updated state from record_response.""" + engine = _make_engine() + state = _make_state(current_round=3) + updated_state = _make_state(current_round=4) + engine.inner.record_response.return_value = Result.ok(updated_state) + + question = "What rate limiting approach?" + result = await engine.skip_as_decide_later(state, question) + + assert result.is_ok + assert result.value == updated_state + + @pytest.mark.asyncio + async def test_skip_does_not_add_to_deferred_items(self): + """skip_as_decide_later does not affect deferred_items.""" + engine = _make_engine() + state = _make_state() + engine.inner.record_response.return_value = Result.ok(state) + + question = "What caching strategy?" + await engine.skip_as_decide_later(state, question) + + assert engine.deferred_items == [] + assert question in engine.decide_later_items + + @pytest.mark.asyncio + async def test_skip_multiple_questions_accumulate(self): + """Multiple skip_as_decide_later calls accumulate items.""" + engine = _make_engine() + state = _make_state() + engine.inner.record_response.return_value = Result.ok(state) + + q1 = "What caching strategy?" + q2 = "What rate limiting?" + q3 = "What deployment model?" + + await engine.skip_as_decide_later(state, q1) + await engine.skip_as_decide_later(state, q2) + await engine.skip_as_decide_later(state, q3) + + assert engine.decide_later_items == [q1, q2, q3] + + @pytest.mark.asyncio + async def test_skip_propagates_record_response_error(self): + """skip_as_decide_later propagates errors from record_response.""" + engine = _make_engine() + state = _make_state() + from ouroboros.core.errors import ValidationError as VE + + engine.inner.record_response.return_value = Result.err( + VE("State save failed", field="state") + ) + + question = "What caching strategy?" + result = await engine.skip_as_decide_later(state, question) + + assert result.is_err diff --git a/tests/unit/bigbang/test_pm_interview.py b/tests/unit/bigbang/test_pm_interview.py index 7565d596..bf6cea65 100644 --- a/tests/unit/bigbang/test_pm_interview.py +++ b/tests/unit/bigbang/test_pm_interview.py @@ -431,17 +431,16 @@ async def test_dev_question_gets_reframed(self, tmp_path: Path) -> None: assert engine.classifications[0].category == QuestionCategory.DEVELOPMENT @pytest.mark.asyncio - async def test_deferred_question_skipped(self, tmp_path: Path) -> None: - """DEV-only questions marked as defer_to_dev are skipped and tracked.""" + async def test_deferred_question_returned_to_user(self, tmp_path: Path) -> None: + """DEV-only questions marked as defer_to_dev are returned to the user.""" adapter = _make_adapter() engine = _make_engine(adapter, tmp_path) dev_q = "Should we use gRPC or REST for inter-service communication?" - planning_q = "Who are the end users?" adapter.complete = AsyncMock( side_effect=[ - # First question generation: dev question + # Question generation: dev question Result.ok(_mock_completion(dev_q)), # Classification: defer to dev Result.ok( @@ -456,21 +455,6 @@ async def test_deferred_question_skipped(self, tmp_path: Path) -> None: ) ) ), - # Second question generation (recursive call): planning question - Result.ok(_mock_completion(planning_q)), - # Classification: planning - Result.ok( - _mock_completion( - json.dumps( - { - "category": "planning", - "reframed_question": planning_q, - "reasoning": "User question", - "defer_to_dev": False, - } - ) - ) - ), ] ) @@ -482,85 +466,46 @@ async def test_deferred_question_skipped(self, tmp_path: Path) -> None: result = await engine.ask_next_question(state) assert result.is_ok - assert result.value == planning_q - assert dev_q in engine.deferred_items - # The deferred question gets an automatic response fed back to the - # inner InterviewEngine so it knows the question was handled - assert len(state.rounds) == 1 - assert "[Deferred to development phase]" in state.rounds[0].user_response - assert state.rounds[0].question == dev_q + # The user sees the deferred question directly + assert result.value == dev_q + # deferred_items NOT populated yet (user hasn't chosen to skip) + assert dev_q not in engine.deferred_items + # No rounds auto-recorded + assert len(state.rounds) == 0 @pytest.mark.asyncio - async def test_deferred_auto_response_content(self, tmp_path: Path) -> None: - """DEV-classified deferred questions get an automatic response fed back - to the InterviewEngine with appropriate marker text.""" + async def test_user_can_skip_as_deferred(self, tmp_path: Path) -> None: + """User can defer a technical question via skip_as_deferred().""" adapter = _make_adapter() engine = _make_engine(adapter, tmp_path) dev_q = "What container orchestration platform should we use — Kubernetes or ECS?" - planning_q = "What are the key business goals?" - - adapter.complete = AsyncMock( - side_effect=[ - Result.ok(_mock_completion(dev_q)), - Result.ok( - _mock_completion( - json.dumps( - { - "category": "development", - "reframed_question": dev_q, - "reasoning": "Infrastructure choice is purely technical", - "defer_to_dev": True, - } - ) - ) - ), - Result.ok(_mock_completion(planning_q)), - Result.ok( - _mock_completion( - json.dumps( - { - "category": "planning", - "reframed_question": planning_q, - "reasoning": "Business question", - "defer_to_dev": False, - } - ) - ) - ), - ] - ) state = InterviewState( interview_id="test_auto_response", initial_context="Build a SaaS platform", ) - result = await engine.ask_next_question(state) + # User chooses to defer + adapter.complete = AsyncMock(return_value=Result.ok(_mock_completion("ok"))) + result = await engine.skip_as_deferred(state, dev_q) assert result.is_ok - assert result.value == planning_q - - # Verify the auto-response was properly recorded - deferred_round = state.rounds[0] - assert deferred_round.question == dev_q - assert "[Deferred to development phase]" in deferred_round.user_response - assert "development interview" in deferred_round.user_response - assert deferred_round.round_number == 1 + assert dev_q in engine.deferred_items + # Verify the deferral response was properly recorded + assert len(state.rounds) == 1 + assert "[Deferred to development phase]" in state.rounds[0].user_response @pytest.mark.asyncio - async def test_multiple_deferred_questions_all_auto_responded(self, tmp_path: Path) -> None: - """Multiple consecutive deferred questions each get auto-responses.""" + async def test_deferred_question_returned_not_auto_skipped(self, tmp_path: Path) -> None: + """DEFERRED questions are returned to user, not auto-skipped.""" adapter = _make_adapter() engine = _make_engine(adapter, tmp_path) dev_q1 = "Should we use gRPC or REST?" - dev_q2 = "Which CI/CD pipeline tool should we use?" - planning_q = "Who are the target users?" adapter.complete = AsyncMock( side_effect=[ - # First dev question Result.ok(_mock_completion(dev_q1)), Result.ok( _mock_completion( @@ -574,34 +519,6 @@ async def test_multiple_deferred_questions_all_auto_responded(self, tmp_path: Pa ) ) ), - # Second dev question - Result.ok(_mock_completion(dev_q2)), - Result.ok( - _mock_completion( - json.dumps( - { - "category": "development", - "reframed_question": dev_q2, - "reasoning": "CI/CD is infra", - "defer_to_dev": True, - } - ) - ) - ), - # Finally a planning question - Result.ok(_mock_completion(planning_q)), - Result.ok( - _mock_completion( - json.dumps( - { - "category": "planning", - "reframed_question": planning_q, - "reasoning": "User question", - "defer_to_dev": False, - } - ) - ) - ), ] ) @@ -613,18 +530,11 @@ async def test_multiple_deferred_questions_all_auto_responded(self, tmp_path: Pa result = await engine.ask_next_question(state) assert result.is_ok - assert result.value == planning_q - - # Both dev questions should be tracked as deferred - assert dev_q1 in engine.deferred_items - assert dev_q2 in engine.deferred_items - - # Both should have auto-responses recorded in the inner engine - assert len(state.rounds) == 2 - assert "[Deferred to development phase]" in state.rounds[0].user_response - assert "[Deferred to development phase]" in state.rounds[1].user_response - assert state.rounds[0].question == dev_q1 - assert state.rounds[1].question == dev_q2 + # First deferred question returned directly — no recursion + assert result.value == dev_q1 + # Not yet in deferred_items (user hasn't chosen to skip) + assert dev_q1 not in engine.deferred_items + assert len(state.rounds) == 0 @pytest.mark.asyncio async def test_classification_failure_returns_original(self, tmp_path: Path) -> None: @@ -652,18 +562,24 @@ async def test_classification_failure_returns_original(self, tmp_path: Path) -> assert result.value == question @pytest.mark.asyncio - async def test_decide_later_auto_responds_with_placeholder(self, tmp_path: Path) -> None: - """Decide-later questions are auto-answered with a placeholder and skipped.""" + async def test_decide_later_returns_question_without_auto_answering( + self, tmp_path: Path + ) -> None: + """Decide-later questions are returned to the caller for user decision. + + The engine no longer auto-answers with a placeholder or recurses. + The caller (main session) detects classification == "decide_later" + and presents the user with a decide-later option. + """ adapter = _make_adapter() engine = _make_engine(adapter, tmp_path) decide_later_q = "How should we handle scaling when we reach 1M users?" placeholder = "This will be determined after MVP launch and initial user metrics. Marking as a decision point for later." - planning_q = "Who are the target users?" adapter.complete = AsyncMock( side_effect=[ - # First question generation: decide-later question + # Question generation: decide-later question Result.ok(_mock_completion(decide_later_q)), # Classification: decide_later Result.ok( @@ -680,22 +596,7 @@ async def test_decide_later_auto_responds_with_placeholder(self, tmp_path: Path) ) ) ), - # Second question generation (recursive call): planning question - Result.ok(_mock_completion(planning_q)), - # Classification: planning - Result.ok( - _mock_completion( - json.dumps( - { - "category": "planning", - "reframed_question": planning_q, - "reasoning": "User question", - "defer_to_dev": False, - "decide_later": False, - } - ) - ) - ), + # No second question generation — no recursion ] ) @@ -707,14 +608,14 @@ async def test_decide_later_auto_responds_with_placeholder(self, tmp_path: Path) result = await engine.ask_next_question(state) assert result.is_ok - # The PM sees the next real question, not the decide-later one - assert result.value == planning_q - # The decide-later question is tracked in decide_later_items - assert decide_later_q in engine.decide_later_items - # A placeholder response was auto-recorded in the interview state - assert len(state.rounds) == 1 - assert "[Decide later]" in state.rounds[0].user_response - assert placeholder in state.rounds[0].user_response + # The decide-later question is returned to the caller + assert result.value == decide_later_q + # decide_later_items is NOT populated — caller handles that + assert engine.decide_later_items == [] + # No auto-response recorded — state has no rounds + assert len(state.rounds) == 0 + # Classification is recorded for caller to detect + assert engine.get_last_classification() == "decide_later" @pytest.mark.asyncio async def test_decide_later_classification_result_properties(self) -> None: @@ -729,7 +630,8 @@ async def test_decide_later_classification_result_properties(self) -> None: ) assert result.output_type == ClassifierOutputType.DECIDE_LATER - assert result.question_for_pm == "" # Should not be shown to PM + # Returned to user so they can choose to answer or defer + assert result.question_for_pm == "How should we handle scaling?" class TestRecordResponse: diff --git a/tests/unit/bigbang/test_question_classifier.py b/tests/unit/bigbang/test_question_classifier.py index 0619ee3a..0c95e3a6 100644 --- a/tests/unit/bigbang/test_question_classifier.py +++ b/tests/unit/bigbang/test_question_classifier.py @@ -181,8 +181,8 @@ def test_reframed_returns_reframed_question(self) -> None: ) assert result.question_for_pm == "How do you expect users to interact with data?" - def test_deferred_returns_empty_string(self) -> None: - """DEFERRED returns empty string — question should not be shown to PM.""" + def test_deferred_returns_original_question(self) -> None: + """DEFERRED returns original question — shown to user for defer/answer choice.""" result = ClassificationResult( original_question="Should we use gRPC or REST?", category=QuestionCategory.DEVELOPMENT, @@ -190,10 +190,10 @@ def test_deferred_returns_empty_string(self) -> None: reasoning="Purely technical", defer_to_dev=True, ) - assert result.question_for_pm == "" + assert result.question_for_pm == "Should we use gRPC or REST?" - def test_decide_later_returns_empty_string(self) -> None: - """DECIDE_LATER returns empty string — auto-answered, not shown to PM.""" + def test_decide_later_returns_original_question(self) -> None: + """DECIDE_LATER returns original question — shown to user for decision.""" result = ClassificationResult( original_question="How will this scale to millions of users?", category=QuestionCategory.DECIDE_LATER, @@ -202,7 +202,7 @@ def test_decide_later_returns_empty_string(self) -> None: decide_later=True, placeholder_response="To be determined post-launch.", ) - assert result.question_for_pm == "" + assert result.question_for_pm == "How will this scale to millions of users?" # ────────────────────────────────────────────────────────────── @@ -418,5 +418,5 @@ async def test_classify_decide_later_question(self) -> None: assert cr.category == QuestionCategory.DECIDE_LATER assert cr.decide_later is True assert cr.output_type == ClassifierOutputType.DECIDE_LATER - assert cr.question_for_pm == "" + assert cr.question_for_pm == question # returned to user for decision assert "initial deployment" in cr.placeholder_response diff --git a/tests/unit/mcp/tools/test_pm_handler.py b/tests/unit/mcp/tools/test_pm_handler.py index 639432b4..e02889c0 100644 --- a/tests/unit/mcp/tools/test_pm_handler.py +++ b/tests/unit/mcp/tools/test_pm_handler.py @@ -452,6 +452,121 @@ async def test_handle_answer_includes_interview_complete_false(self, tmp_path: P assert result.is_ok assert result.value.meta["interview_complete"] is False + @pytest.mark.asyncio + async def test_handle_answer_decide_later_skips_and_records(self, tmp_path: Path) -> None: + """When answer='[decide_later]', skip_as_decide_later is called instead of record_response.""" + engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[]) + + question = "What caching strategy should we use?" + state = _make_state( + rounds=[ + InterviewRound(round_number=1, question=question, user_response=None), + ], + ) + + engine.load_state = AsyncMock(return_value=Result.ok(state)) + + # skip_as_decide_later should be called (not record_response) + async def mock_skip(s: Any, q: str) -> Result: + engine.decide_later_items.append(q) + return Result.ok(s) + + engine.skip_as_decide_later = AsyncMock(side_effect=mock_skip) + engine.record_response = AsyncMock(return_value=Result.ok(state)) + + engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?")) + engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json")) + + _save_pm_meta("sess-dl", engine, cwd="/tmp/proj", data_dir=tmp_path) + + handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) + + result = await handler.handle( + { + "session_id": "sess-dl", + "answer": "[decide_later]", + } + ) + + assert result.is_ok + # skip_as_decide_later was called + engine.skip_as_decide_later.assert_called_once_with(state, question) + # record_response was NOT called (skip_as_decide_later handles it internally) + engine.record_response.assert_not_called() + # The question was recorded in decide_later_items + assert question in engine.decide_later_items + + @pytest.mark.asyncio + async def test_handle_answer_decide_later_with_whitespace(self, tmp_path: Path) -> None: + """Whitespace-padded '[decide_later]' is still recognized.""" + engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[]) + + question = "What rate limiting approach?" + state = _make_state( + rounds=[ + InterviewRound(round_number=1, question=question, user_response=None), + ], + ) + + engine.load_state = AsyncMock(return_value=Result.ok(state)) + engine.skip_as_decide_later = AsyncMock(return_value=Result.ok(state)) + engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?")) + engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json")) + + _save_pm_meta("sess-dl2", engine, cwd="/tmp/proj", data_dir=tmp_path) + + handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) + + result = await handler.handle( + { + "session_id": "sess-dl2", + "answer": " [decide_later] ", + } + ) + + assert result.is_ok + engine.skip_as_decide_later.assert_called_once() + + @pytest.mark.asyncio + async def test_handle_answer_deferred_skips_and_records(self, tmp_path: Path) -> None: + """When answer='[deferred]', skip_as_deferred is called instead of record_response.""" + engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[]) + + question = "Should we use gRPC or REST for inter-service communication?" + state = _make_state( + rounds=[ + InterviewRound(round_number=1, question=question, user_response=None), + ], + ) + + engine.load_state = AsyncMock(return_value=Result.ok(state)) + + async def mock_skip(s: Any, q: str) -> Result: + engine.deferred_items.append(q) + return Result.ok(s) + + engine.skip_as_deferred = AsyncMock(side_effect=mock_skip) + engine.record_response = AsyncMock(return_value=Result.ok(state)) + + engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?")) + engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json")) + + _save_pm_meta("sess-def", engine, cwd="/tmp/proj", data_dir=tmp_path) + + handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) + + result = await handler.handle( + { + "session_id": "sess-def", + "answer": "[deferred]", + } + ) + + assert result.is_ok + engine.skip_as_deferred.assert_called_once_with(state, question) + engine.record_response.assert_not_called() + assert question in engine.deferred_items + # ── Unit tests for _check_completion (AC 12) ───────────────────── @@ -652,6 +767,14 @@ async def test_handle_answer_completion_via_ambiguity(self, tmp_path: Path) -> N _save_pm_meta("sess-complete", engine, cwd="/tmp/proj", data_dir=tmp_path) + # Mock auto-generate on completion + seed_mock = MagicMock() + seed_mock.product_name = "Test Product" + seed_mock.deferred_items = ["d1"] + seed_mock.decide_later_items = ["dl1"] + engine.generate_pm_seed = AsyncMock(return_value=Result.ok(seed_mock)) + engine.save_pm_seed = MagicMock(return_value=tmp_path / "seed.json") + handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) # Mock _check_completion to return completion @@ -666,6 +789,9 @@ async def test_handle_answer_completion_via_ambiguity(self, tmp_path: Path) -> N "ouroboros.mcp.tools.pm_handler._check_completion", new_callable=AsyncMock, return_value=completion_meta, + ), patch( + "ouroboros.mcp.tools.pm_handler.save_pm_document", + return_value=tmp_path / "pm.md", ): result = await handler.handle( { @@ -684,10 +810,10 @@ async def test_handle_answer_completion_via_ambiguity(self, tmp_path: Path) -> N # Verify complete_interview was called engine.complete_interview.assert_called_once() - # Verify response text includes generate instructions + # Verify response text includes completion and PM document info text = result.value.content[0].text assert "Interview complete" in text - assert "generate" in text + assert "PM document" in text @pytest.mark.asyncio async def test_no_done_signal_processing(self, tmp_path: Path) -> None: @@ -1504,17 +1630,28 @@ async def test_resume_completion_meta_has_required_fields(self, tmp_path: Path) "ambiguity_score": 0.15, } ) + # Mock auto-generate on completion + seed_mock = MagicMock() + seed_mock.product_name = "Test Product" + seed_mock.deferred_items = ["d1"] + seed_mock.decide_later_items = ["dl1"] + engine.generate_pm_seed = AsyncMock(return_value=Result.ok(seed_mock)) + engine.save_pm_seed = MagicMock(return_value=tmp_path / "seed.json") _save_pm_meta("resume-complete", engine, cwd="/tmp", data_dir=tmp_path) handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) - result = await handler.handle( - { - "session_id": "resume-complete", - "answer": "Final answer", - } - ) + with patch( + "ouroboros.mcp.tools.pm_handler.save_pm_document", + return_value=tmp_path / "pm.md", + ): + result = await handler.handle( + { + "session_id": "resume-complete", + "answer": "Final answer", + } + ) assert result.is_ok meta = result.value.meta @@ -1528,6 +1665,9 @@ async def test_resume_completion_meta_has_required_fields(self, tmp_path: Path) assert meta["decide_later_this_round"] == [] # Also has completion details assert meta["interview_complete"] is True + # Auto-generated PM document + assert "pm_path" in meta + assert "seed_path" in meta @pytest.mark.asyncio async def test_resume_loads_state_and_meta(self, tmp_path: Path) -> None: