From 81200ee7fb32b7095c7e0ff40117462b049074b9 Mon Sep 17 00:00:00 2001 From: heechul Date: Fri, 27 Mar 2026 23:38:18 +0900 Subject: [PATCH 1/5] feat(pm): return DECIDE_LATER questions to user instead of auto-skipping Remove auto-answer + recursion for DECIDE_LATER classified questions in PMInterviewEngine.ask_next_question(). Questions are now returned to the main session with classification metadata, allowing users to explicitly choose "decide later" via the UI. This eliminates infinite recursion that caused MCP 120s timeouts when consecutive technical questions were generated. DEFERRED questions remain auto-skipped (unchanged behavior). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/ouroboros/bigbang/pm_interview.py | 62 ++++- src/ouroboros/bigbang/question_classifier.py | 5 +- src/ouroboros/cli/commands/pm.py | 9 + src/ouroboros/mcp/tools/pm_handler.py | 92 ++++++- tests/unit/bigbang/test_decide_later_items.py | 260 +++++++++++------- tests/unit/bigbang/test_pm_interview.py | 48 ++-- .../unit/bigbang/test_question_classifier.py | 8 +- tests/unit/mcp/tools/test_pm_handler.py | 75 +++++ 8 files changed, 391 insertions(+), 168 deletions(-) diff --git a/src/ouroboros/bigbang/pm_interview.py b/src/ouroboros/bigbang/pm_interview.py index ab8e9810..3db82833 100644 --- a/src/ouroboros/bigbang/pm_interview.py +++ b/src/ouroboros/bigbang/pm_interview.py @@ -158,7 +158,8 @@ class PMInterviewEngine: """Original question text for questions classified as DECIDE_LATER. These are questions that are premature or unknowable at the PM stage. - They are auto-answered with a placeholder and stored here so the PMSeed + The main session presents the question to the user with a "decide later" + option; when chosen, the caller records the item here so the PMSeed and PM document can surface them as explicit "decide later" decisions. """ classifications: list[ClassificationResult] = field(default_factory=list) @@ -507,24 +508,21 @@ async def ask_next_question( return await self.ask_next_question(state) if output_type == ClassifierOutputType.DECIDE_LATER: - # Auto-answer with placeholder — no PM interaction needed - placeholder = classification.placeholder_response - self.decide_later_items.append(classification.original_question) + # Return the question to the caller (main session) so the user + # can choose "decide later" themselves. The main session detects + # classification == "decide_later" via response_meta and offers + # the option. If the user picks it, the caller calls + # skip_as_decide_later() which records the placeholder and + # appends to decide_later_items. + # + # Previously this branch auto-answered and recursed, which could + # trigger MCP 120s timeouts on consecutive DECIDE_LATER runs. log.info( "pm.question_decide_later", question=classification.original_question[:100], - placeholder=placeholder[:100], reasoning=classification.reasoning, ) - # Record the placeholder as the response so the interview - # engine advances its round count - await self.record_response( - state, - user_response=f"[Decide later] {placeholder}", - question=classification.original_question, - ) - # Recursively ask for the next real question - return await self.ask_next_question(state) + return Result.ok(classification.original_question) if output_type == ClassifierOutputType.REFRAMED: # Use the reframed version and track the mapping @@ -595,6 +593,42 @@ async def record_response( return await self.inner.record_response(state, user_response, question) + async def skip_as_decide_later( + self, + state: InterviewState, + question: str, + ) -> Result[InterviewState, ValidationError]: + """Skip a question as "decide later" at the user's explicit request. + + Records the question in ``decide_later_items`` and feeds a placeholder + response to the inner InterviewEngine so the round is properly recorded + and the engine advances. + + This is called when the main session detects that the user chose the + "decide later" option for a DECIDE_LATER-classified question, instead + of the old auto-skip behavior inside ``ask_next_question``. + + Args: + state: Current interview state. + question: The question the user chose to decide later. + + Returns: + Result containing updated state or ValidationError. + """ + if question not in self.decide_later_items: + self.decide_later_items.append(question) + + log.info( + "pm.question_decide_later_by_user", + question=question[:100], + ) + + return await self.record_response( + state, + user_response="[Decide later] To be determined — user chose to decide later.", + question=question, + ) + async def complete_interview( self, state: InterviewState, diff --git a/src/ouroboros/bigbang/question_classifier.py b/src/ouroboros/bigbang/question_classifier.py index 202ba7bf..2b7bad37 100644 --- a/src/ouroboros/bigbang/question_classifier.py +++ b/src/ouroboros/bigbang/question_classifier.py @@ -110,12 +110,15 @@ def question_for_pm(self) -> str: For PASSTHROUGH: returns original_question unchanged. For REFRAMED: returns the reframed_question. For DEFERRED: returns empty string (should not be shown). - For DECIDE_LATER: returns empty string (auto-answered). + For DECIDE_LATER: returns original_question (shown to user + so they can choose to answer or defer). """ if self.output_type == ClassifierOutputType.PASSTHROUGH: return self.original_question if self.output_type == ClassifierOutputType.REFRAMED: return self.reframed_question + if self.output_type == ClassifierOutputType.DECIDE_LATER: + return self.original_question return "" diff --git a/src/ouroboros/cli/commands/pm.py b/src/ouroboros/cli/commands/pm.py index 4d2f8ec2..a3a5b731 100644 --- a/src/ouroboros/cli/commands/pm.py +++ b/src/ouroboros/cli/commands/pm.py @@ -410,6 +410,15 @@ async def _run_pm_interview( console.print(f"\n[bold yellow]?[/] {question}\n") + # Check if this question was classified as decide_later — + # if so, show a hint that the user can defer it. + classification = engine.get_last_classification() + if classification == "decide_later": + console.print( + '[dim] šŸ’” This question can be deferred. ' + 'Type "decide later" or "skip" to defer it.[/]\n' + ) + # Persist state + meta AFTER displaying the question but BEFORE # waiting for input so that an interruption preserves the pending # question and --resume shows the same question. diff --git a/src/ouroboros/mcp/tools/pm_handler.py b/src/ouroboros/mcp/tools/pm_handler.py index 9910fd60..328982ac 100644 --- a/src/ouroboros/mcp/tools/pm_handler.py +++ b/src/ouroboros/mcp/tools/pm_handler.py @@ -503,6 +503,10 @@ async def _handle_start( # Include pending_reframe in response meta if a reframe occurred pending_reframe = engine.get_pending_reframe() + # Check classification to signal decide-later eligibility + classification = _last_classification(engine) + is_decide_later = classification == "decide_later" + meta = { "session_id": state.interview_id, "status": "interview_started", @@ -510,6 +514,8 @@ async def _handle_start( "response_param": "answer", "question": question, "is_brownfield": state.is_brownfield, + "classification": classification, + "decide_later_eligible": is_decide_later, "pending_reframe": pending_reframe, **diff, } @@ -518,18 +524,28 @@ async def _handle_start( "pm_handler.started", session_id=state.interview_id, is_brownfield=state.is_brownfield, + classification=classification, + decide_later_eligible=is_decide_later, has_pending_reframe=pending_reframe is not None, **diff, ) + # Build response text — include decide-later hint when applicable + start_text = f"PM interview started. Session ID: {state.interview_id}\n\n{question}" + if is_decide_later: + start_text += ( + '\n\nšŸ’” This question can be deferred. ' + 'The user may answer now, or choose "decide later" to skip it. ' + "If they choose to decide later, pass " + f'answer="decide_later" with session_id="{state.interview_id}".' + ) + return Result.ok( MCPToolResult( content=( MCPContentItem( type=ContentType.TEXT, - text=( - f"PM interview started. Session ID: {state.interview_id}\n\n{question}" - ), + text=start_text, ), ), is_error=False, @@ -780,15 +796,26 @@ async def _handle_answer( if not answer and state.rounds and state.rounds[-1].user_response is None: pending_question = state.rounds[-1].question classification = _last_classification(engine) + is_decide_later = classification == "decide_later" pending_reframe = engine.get_pending_reframe() + # Include decide-later hint in re-displayed question + pending_text = f"Session {session_id}\n\n{pending_question}" + if is_decide_later: + pending_text += ( + '\n\nšŸ’” This question can be deferred. ' + 'The user may answer now, or choose "decide later" to skip it. ' + "If they choose to decide later, pass " + f'answer="decide_later" with session_id="{session_id}".' + ) + return Result.ok( MCPToolResult( content=( MCPContentItem( type=ContentType.TEXT, - text=f"Session {session_id}\n\n{pending_question}", + text=pending_text, ), ), is_error=False, @@ -799,6 +826,7 @@ async def _handle_answer( "question": pending_question, "is_complete": False, "classification": classification, + "decide_later_eligible": is_decide_later, "deferred_this_round": [], "decide_later_this_round": [], "interview_complete": False, @@ -824,16 +852,35 @@ async def _handle_answer( if state.rounds[-1].user_response is None: state.rounds.pop() - record_result = await engine.record_response(state, answer, last_question) - if record_result.is_err: - return Result.err( - MCPToolError( - str(record_result.error), - tool_name="ouroboros_pm_interview", + # ── User chose "decide later" ───────────────────────── + # When the main session detects classification == "decide_later" + # and the user selects the decide-later option, it sends + # answer="[decide_later]". We skip normal recording and + # delegate to the engine's skip_as_decide_later() method + # which records the question in decide_later_items and + # feeds a placeholder response to the inner engine. + if answer.strip() == "[decide_later]": + skip_result = await engine.skip_as_decide_later(state, last_question) + if skip_result.is_err: + return Result.err( + MCPToolError( + str(skip_result.error), + tool_name="ouroboros_pm_interview", + ) ) - ) - state = record_result.value - state.clear_stored_ambiguity() + state = skip_result.value + state.clear_stored_ambiguity() + else: + record_result = await engine.record_response(state, answer, last_question) + if record_result.is_err: + return Result.err( + MCPToolError( + str(record_result.error), + tool_name="ouroboros_pm_interview", + ) + ) + state = record_result.value + state.clear_stored_ambiguity() # ── Completion check (AC 12) ───────────────────────────── # Completion is determined by engine ambiguity scoring. @@ -958,6 +1005,11 @@ async def _handle_answer( # Extract classification from the last classify call classification = _last_classification(engine) + # When classification is decide_later, signal to the caller + # that the user should be offered a "decide later" option + # instead of being forced to answer now. + is_decide_later = classification == "decide_later" + response_meta = { "session_id": session_id, "input_type": "freeText", @@ -965,6 +1017,7 @@ async def _handle_answer( "question": question, "is_complete": False, "classification": classification, + "decide_later_eligible": is_decide_later, "deferred_this_round": diff["new_deferred"], "decide_later_this_round": diff["new_decide_later"], # Keep backward-compat fields from AC 8 @@ -977,16 +1030,27 @@ async def _handle_answer( "pm_handler.question_asked", session_id=session_id, classification=classification, + decide_later_eligible=is_decide_later, has_pending_reframe=pending_reframe is not None, **diff, ) + # Build response text — include decide-later hint when applicable + response_text = f"Session {session_id}\n\n{question}" + if is_decide_later: + response_text += ( + '\n\nšŸ’” This question can be deferred. ' + 'The user may answer now, or choose "decide later" to skip it. ' + "If they choose to decide later, pass " + f'answer="decide_later" with session_id="{session_id}".' + ) + return Result.ok( MCPToolResult( content=( MCPContentItem( type=ContentType.TEXT, - text=f"Session {session_id}\n\n{question}", + text=response_text, ), ), is_error=False, diff --git a/tests/unit/bigbang/test_decide_later_items.py b/tests/unit/bigbang/test_decide_later_items.py index 1c2920cf..22c7df1f 100644 --- a/tests/unit/bigbang/test_decide_later_items.py +++ b/tests/unit/bigbang/test_decide_later_items.py @@ -88,8 +88,12 @@ class TestDecideLaterItemsList: """Tests for decide-later items stored as original question text in list.""" @pytest.mark.asyncio - async def test_decide_later_stores_original_question_text(self): - """DECIDE_LATER stores original question text in decide_later_items list.""" + async def test_decide_later_returns_question_to_caller(self): + """DECIDE_LATER returns the question to the caller without auto-answering. + + The caller (main session) is responsible for presenting the decide-later + option and recording the item in decide_later_items when chosen. + """ engine = _make_engine() state = _make_state() @@ -97,7 +101,6 @@ async def test_decide_later_stores_original_question_text(self): # Inner engine generates the question engine.inner.ask_next_question.return_value = Result.ok(original_q) - engine.inner.record_response.return_value = Result.ok(state) # Classifier says DECIDE_LATER classification = ClassificationResult( @@ -110,40 +113,32 @@ async def test_decide_later_stores_original_question_text(self): ) assert classification.output_type == ClassifierOutputType.DECIDE_LATER - # First call returns DECIDE_LATER, second call returns a PASSTHROUGH - passthrough_q = "Who are the primary users?" - passthrough_classification = ClassificationResult( - original_question=passthrough_q, - category=QuestionCategory.PLANNING, - reframed_question=passthrough_q, - reasoning="Planning question.", - ) - - engine.classifier.classify = AsyncMock( - side_effect=[Result.ok(classification), Result.ok(passthrough_classification)] - ) - engine.inner.ask_next_question = AsyncMock( - side_effect=[Result.ok(original_q), Result.ok(passthrough_q)] - ) + engine.classifier.classify = AsyncMock(return_value=Result.ok(classification)) result = await engine.ask_next_question(state) assert result.is_ok - # The decide-later question is stored in decide_later_items - assert original_q in engine.decide_later_items - # It should NOT be in deferred_items - assert original_q not in engine.deferred_items + # The question is returned to the caller (not auto-answered) + assert result.value == original_q + # decide_later_items is NOT populated here — caller handles that + assert engine.decide_later_items == [] + # No recursive call — inner engine asked only once + assert engine.inner.ask_next_question.call_count == 1 + # No auto-response recorded + engine.inner.record_response.assert_not_called() @pytest.mark.asyncio - async def test_decide_later_items_list_is_separate_from_deferred(self): - """decide_later_items and deferred_items are independent lists.""" + async def test_decide_later_does_not_recurse(self): + """DECIDE_LATER returns immediately — no recursive ask_next_question call. + + Unlike DEFERRED (which auto-skips and recurses), DECIDE_LATER returns + the question to the caller so the user can choose decide-later. + """ engine = _make_engine() state = _make_state() decide_later_q = "How should we handle data migration?" - deferred_q = "Which ORM should we use for the database layer?" - # Set up two rounds: first DECIDE_LATER, then DEFERRED, then PASSTHROUGH decide_later_class = ClassificationResult( original_question=decide_later_q, category=QuestionCategory.DECIDE_LATER, @@ -152,103 +147,50 @@ async def test_decide_later_items_list_is_separate_from_deferred(self): decide_later=True, placeholder_response="TBD after schema design.", ) - deferred_class = ClassificationResult( - original_question=deferred_q, - category=QuestionCategory.DEVELOPMENT, - reframed_question=deferred_q, - reasoning="Deeply technical.", - defer_to_dev=True, - ) - passthrough_q = "What are the success metrics?" - passthrough_class = ClassificationResult( - original_question=passthrough_q, - category=QuestionCategory.PLANNING, - reframed_question=passthrough_q, - reasoning="Planning.", - ) - engine.classifier.classify = AsyncMock( - side_effect=[ - Result.ok(decide_later_class), - Result.ok(deferred_class), - Result.ok(passthrough_class), - ] - ) - engine.inner.ask_next_question = AsyncMock( - side_effect=[ - Result.ok(decide_later_q), - Result.ok(deferred_q), - Result.ok(passthrough_q), - ] - ) - engine.inner.record_response = AsyncMock(return_value=Result.ok(state)) + engine.classifier.classify = AsyncMock(return_value=Result.ok(decide_later_class)) + engine.inner.ask_next_question = AsyncMock(return_value=Result.ok(decide_later_q)) result = await engine.ask_next_question(state) assert result.is_ok - assert result.value == passthrough_q - - # Verify separation - assert decide_later_q in engine.decide_later_items - assert decide_later_q not in engine.deferred_items - assert deferred_q in engine.deferred_items - assert deferred_q not in engine.decide_later_items + # Returns the decide-later question directly + assert result.value == decide_later_q + # No auto-recording — caller handles decide-later flow + engine.inner.record_response.assert_not_called() + # Only one call to inner engine (no recursion) + assert engine.inner.ask_next_question.call_count == 1 + # decide_later_items not populated by engine (caller responsibility) + assert engine.decide_later_items == [] @pytest.mark.asyncio - async def test_multiple_decide_later_items_accumulate(self): - """Multiple DECIDE_LATER questions all accumulate in the list.""" + async def test_decide_later_classification_recorded(self): + """DECIDE_LATER classification is recorded so caller can detect it.""" engine = _make_engine() state = _make_state() q1 = "What rate limiting strategy should we use?" - q2 = "How should we handle GDPR data retention?" - q3 = "What's the target latency for API responses?" - passthrough_q = "What is the core problem?" - - classifications = [ - ClassificationResult( - original_question=q1, - category=QuestionCategory.DECIDE_LATER, - reframed_question=q1, - reasoning="Premature.", - decide_later=True, - placeholder_response="TBD.", - ), - ClassificationResult( - original_question=q2, - category=QuestionCategory.DECIDE_LATER, - reframed_question=q2, - reasoning="Needs legal review.", - decide_later=True, - placeholder_response="Needs legal input.", - ), - ClassificationResult( - original_question=q3, - category=QuestionCategory.DECIDE_LATER, - reframed_question=q3, - reasoning="Depends on infra.", - decide_later=True, - placeholder_response="After infra decision.", - ), - ClassificationResult( - original_question=passthrough_q, - category=QuestionCategory.PLANNING, - reframed_question=passthrough_q, - reasoning="Planning.", - ), - ] - engine.classifier.classify = AsyncMock(side_effect=[Result.ok(c) for c in classifications]) - engine.inner.ask_next_question = AsyncMock( - side_effect=[Result.ok(q1), Result.ok(q2), Result.ok(q3), Result.ok(passthrough_q)] + classification = ClassificationResult( + original_question=q1, + category=QuestionCategory.DECIDE_LATER, + reframed_question=q1, + reasoning="Premature.", + decide_later=True, + placeholder_response="TBD.", ) - engine.inner.record_response = AsyncMock(return_value=Result.ok(state)) + + engine.classifier.classify = AsyncMock(return_value=Result.ok(classification)) + engine.inner.ask_next_question = AsyncMock(return_value=Result.ok(q1)) result = await engine.ask_next_question(state) assert result.is_ok - assert len(engine.decide_later_items) == 3 - assert engine.decide_later_items == [q1, q2, q3] + assert result.value == q1 + # Classification is recorded so get_last_classification works + assert len(engine.classifications) == 1 + assert engine.classifications[0].output_type == ClassifierOutputType.DECIDE_LATER + assert engine.get_last_classification() == "decide_later" def test_decide_later_items_empty_by_default(self): """decide_later_items starts as empty list.""" @@ -531,3 +473,109 @@ def test_format_decide_later_summary_preserves_original_question_text(self): summary = engine.format_decide_later_summary() assert original_text in summary + + +class TestSkipAsDecideLater: + """Tests for user-initiated decide-later skip via skip_as_decide_later().""" + + @pytest.mark.asyncio + async def test_skip_records_question_in_decide_later_items(self): + """skip_as_decide_later records the question in decide_later_items.""" + engine = _make_engine() + state = _make_state() + engine.inner.record_response.return_value = Result.ok(state) + + question = "What caching strategy should we use?" + result = await engine.skip_as_decide_later(state, question) + + assert result.is_ok + assert question in engine.decide_later_items + + @pytest.mark.asyncio + async def test_skip_does_not_duplicate_existing_item(self): + """skip_as_decide_later does not duplicate if question already in list.""" + engine = _make_engine() + state = _make_state() + engine.inner.record_response.return_value = Result.ok(state) + + question = "What caching strategy should we use?" + engine.decide_later_items = [question] + + result = await engine.skip_as_decide_later(state, question) + + assert result.is_ok + assert engine.decide_later_items.count(question) == 1 + + @pytest.mark.asyncio + async def test_skip_records_placeholder_response_in_inner_engine(self): + """skip_as_decide_later feeds a placeholder response to inner engine.""" + engine = _make_engine() + state = _make_state() + engine.inner.record_response.return_value = Result.ok(state) + + question = "What deployment model should we use?" + await engine.skip_as_decide_later(state, question) + + engine.inner.record_response.assert_called_once() + call_args = engine.inner.record_response.call_args + recorded_response = call_args[0][1] + assert "[Decide later]" in recorded_response + + @pytest.mark.asyncio + async def test_skip_advances_interview_state(self): + """skip_as_decide_later returns the updated state from record_response.""" + engine = _make_engine() + state = _make_state(current_round=3) + updated_state = _make_state(current_round=4) + engine.inner.record_response.return_value = Result.ok(updated_state) + + question = "What rate limiting approach?" + result = await engine.skip_as_decide_later(state, question) + + assert result.is_ok + assert result.value == updated_state + + @pytest.mark.asyncio + async def test_skip_does_not_add_to_deferred_items(self): + """skip_as_decide_later does not affect deferred_items.""" + engine = _make_engine() + state = _make_state() + engine.inner.record_response.return_value = Result.ok(state) + + question = "What caching strategy?" + await engine.skip_as_decide_later(state, question) + + assert engine.deferred_items == [] + assert question in engine.decide_later_items + + @pytest.mark.asyncio + async def test_skip_multiple_questions_accumulate(self): + """Multiple skip_as_decide_later calls accumulate items.""" + engine = _make_engine() + state = _make_state() + engine.inner.record_response.return_value = Result.ok(state) + + q1 = "What caching strategy?" + q2 = "What rate limiting?" + q3 = "What deployment model?" + + await engine.skip_as_decide_later(state, q1) + await engine.skip_as_decide_later(state, q2) + await engine.skip_as_decide_later(state, q3) + + assert engine.decide_later_items == [q1, q2, q3] + + @pytest.mark.asyncio + async def test_skip_propagates_record_response_error(self): + """skip_as_decide_later propagates errors from record_response.""" + engine = _make_engine() + state = _make_state() + from ouroboros.core.errors import ValidationError as VE + engine.inner.record_response.return_value = Result.err( + VE("State save failed", field="state") + ) + + question = "What caching strategy?" + result = await engine.skip_as_decide_later(state, question) + + assert result.is_err diff --git a/tests/unit/bigbang/test_pm_interview.py b/tests/unit/bigbang/test_pm_interview.py index 7565d596..a9717aca 100644 --- a/tests/unit/bigbang/test_pm_interview.py +++ b/tests/unit/bigbang/test_pm_interview.py @@ -652,18 +652,22 @@ async def test_classification_failure_returns_original(self, tmp_path: Path) -> assert result.value == question @pytest.mark.asyncio - async def test_decide_later_auto_responds_with_placeholder(self, tmp_path: Path) -> None: - """Decide-later questions are auto-answered with a placeholder and skipped.""" + async def test_decide_later_returns_question_without_auto_answering(self, tmp_path: Path) -> None: + """Decide-later questions are returned to the caller for user decision. + + The engine no longer auto-answers with a placeholder or recurses. + The caller (main session) detects classification == "decide_later" + and presents the user with a decide-later option. + """ adapter = _make_adapter() engine = _make_engine(adapter, tmp_path) decide_later_q = "How should we handle scaling when we reach 1M users?" placeholder = "This will be determined after MVP launch and initial user metrics. Marking as a decision point for later." - planning_q = "Who are the target users?" adapter.complete = AsyncMock( side_effect=[ - # First question generation: decide-later question + # Question generation: decide-later question Result.ok(_mock_completion(decide_later_q)), # Classification: decide_later Result.ok( @@ -680,22 +684,7 @@ async def test_decide_later_auto_responds_with_placeholder(self, tmp_path: Path) ) ) ), - # Second question generation (recursive call): planning question - Result.ok(_mock_completion(planning_q)), - # Classification: planning - Result.ok( - _mock_completion( - json.dumps( - { - "category": "planning", - "reframed_question": planning_q, - "reasoning": "User question", - "defer_to_dev": False, - "decide_later": False, - } - ) - ) - ), + # No second question generation — no recursion ] ) @@ -707,14 +696,14 @@ async def test_decide_later_auto_responds_with_placeholder(self, tmp_path: Path) result = await engine.ask_next_question(state) assert result.is_ok - # The PM sees the next real question, not the decide-later one - assert result.value == planning_q - # The decide-later question is tracked in decide_later_items - assert decide_later_q in engine.decide_later_items - # A placeholder response was auto-recorded in the interview state - assert len(state.rounds) == 1 - assert "[Decide later]" in state.rounds[0].user_response - assert placeholder in state.rounds[0].user_response + # The decide-later question is returned to the caller + assert result.value == decide_later_q + # decide_later_items is NOT populated — caller handles that + assert engine.decide_later_items == [] + # No auto-response recorded — state has no rounds + assert len(state.rounds) == 0 + # Classification is recorded for caller to detect + assert engine.get_last_classification() == "decide_later" @pytest.mark.asyncio async def test_decide_later_classification_result_properties(self) -> None: @@ -729,7 +718,8 @@ async def test_decide_later_classification_result_properties(self) -> None: ) assert result.output_type == ClassifierOutputType.DECIDE_LATER - assert result.question_for_pm == "" # Should not be shown to PM + # Returned to user so they can choose to answer or defer + assert result.question_for_pm == "How should we handle scaling?" class TestRecordResponse: diff --git a/tests/unit/bigbang/test_question_classifier.py b/tests/unit/bigbang/test_question_classifier.py index 0619ee3a..0554bc36 100644 --- a/tests/unit/bigbang/test_question_classifier.py +++ b/tests/unit/bigbang/test_question_classifier.py @@ -192,8 +192,8 @@ def test_deferred_returns_empty_string(self) -> None: ) assert result.question_for_pm == "" - def test_decide_later_returns_empty_string(self) -> None: - """DECIDE_LATER returns empty string — auto-answered, not shown to PM.""" + def test_decide_later_returns_original_question(self) -> None: + """DECIDE_LATER returns original question — shown to user for decision.""" result = ClassificationResult( original_question="How will this scale to millions of users?", category=QuestionCategory.DECIDE_LATER, @@ -202,7 +202,7 @@ def test_decide_later_returns_empty_string(self) -> None: decide_later=True, placeholder_response="To be determined post-launch.", ) - assert result.question_for_pm == "" + assert result.question_for_pm == "How will this scale to millions of users?" # ────────────────────────────────────────────────────────────── @@ -418,5 +418,5 @@ async def test_classify_decide_later_question(self) -> None: assert cr.category == QuestionCategory.DECIDE_LATER assert cr.decide_later is True assert cr.output_type == ClassifierOutputType.DECIDE_LATER - assert cr.question_for_pm == "" + assert cr.question_for_pm == question # returned to user for decision assert "initial deployment" in cr.placeholder_response diff --git a/tests/unit/mcp/tools/test_pm_handler.py b/tests/unit/mcp/tools/test_pm_handler.py index 639432b4..cffb24da 100644 --- a/tests/unit/mcp/tools/test_pm_handler.py +++ b/tests/unit/mcp/tools/test_pm_handler.py @@ -452,6 +452,81 @@ async def test_handle_answer_includes_interview_complete_false(self, tmp_path: P assert result.is_ok assert result.value.meta["interview_complete"] is False + @pytest.mark.asyncio + async def test_handle_answer_decide_later_skips_and_records(self, tmp_path: Path) -> None: + """When answer='[decide_later]', skip_as_decide_later is called instead of record_response.""" + engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[]) + + question = "What caching strategy should we use?" + state = _make_state( + rounds=[ + InterviewRound(round_number=1, question=question, user_response=None), + ], + ) + + engine.load_state = AsyncMock(return_value=Result.ok(state)) + + # skip_as_decide_later should be called (not record_response) + async def mock_skip(s: Any, q: str) -> Result: + engine.decide_later_items.append(q) + return Result.ok(s) + + engine.skip_as_decide_later = AsyncMock(side_effect=mock_skip) + engine.record_response = AsyncMock(return_value=Result.ok(state)) + + engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?")) + engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json")) + + _save_pm_meta("sess-dl", engine, cwd="/tmp/proj", data_dir=tmp_path) + + handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) + + result = await handler.handle( + { + "session_id": "sess-dl", + "answer": "[decide_later]", + } + ) + + assert result.is_ok + # skip_as_decide_later was called + engine.skip_as_decide_later.assert_called_once_with(state, question) + # record_response was NOT called (skip_as_decide_later handles it internally) + engine.record_response.assert_not_called() + # The question was recorded in decide_later_items + assert question in engine.decide_later_items + + @pytest.mark.asyncio + async def test_handle_answer_decide_later_with_whitespace(self, tmp_path: Path) -> None: + """Whitespace-padded '[decide_later]' is still recognized.""" + engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[]) + + question = "What rate limiting approach?" + state = _make_state( + rounds=[ + InterviewRound(round_number=1, question=question, user_response=None), + ], + ) + + engine.load_state = AsyncMock(return_value=Result.ok(state)) + engine.skip_as_decide_later = AsyncMock(return_value=Result.ok(state)) + engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?")) + engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json")) + + _save_pm_meta("sess-dl2", engine, cwd="/tmp/proj", data_dir=tmp_path) + + handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) + + result = await handler.handle( + { + "session_id": "sess-dl2", + "answer": " [decide_later] ", + } + ) + + assert result.is_ok + engine.skip_as_decide_later.assert_called_once() + # ── Unit tests for _check_completion (AC 12) ───────────────────── From 0426caa4f3959c9ddb68e5e0a0b1b7a6de1cb356 Mon Sep 17 00:00:00 2001 From: heechul Date: Sat, 28 Mar 2026 00:23:13 +0900 Subject: [PATCH 2/5] feat(pm): also return DEFERRED questions to user instead of auto-skipping Apply the same pattern as DECIDE_LATER to DEFERRED questions: return them to the main session so the user can choose to defer or answer. Adds skip_as_deferred() method and [deferred] answer handling in pm_handler. This eliminates ALL recursion in ask_next_question(), fully resolving the MCP 120s timeout issue. Both DEFERRED and DECIDE_LATER are now user-driven, with skip_eligible metadata signaling the UI. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/ouroboros/bigbang/pm_interview.py | 61 ++++++++--- src/ouroboros/mcp/tools/pm_handler.py | 81 ++++++++++---- tests/unit/bigbang/test_pm_interview.py | 140 +++++------------------- 3 files changed, 128 insertions(+), 154 deletions(-) diff --git a/src/ouroboros/bigbang/pm_interview.py b/src/ouroboros/bigbang/pm_interview.py index 3db82833..1b2e1cc7 100644 --- a/src/ouroboros/bigbang/pm_interview.py +++ b/src/ouroboros/bigbang/pm_interview.py @@ -485,27 +485,22 @@ async def ask_next_question( output_type = classification.output_type if output_type == ClassifierOutputType.DEFERRED: - # Track as deferred item and generate a new question - self.deferred_items.append(classification.original_question) + # Return the question to the caller (main session) so the user + # can choose to defer it themselves. The main session detects + # classification == "deferred" via response_meta and offers + # a "skip / defer to dev" option. If the user picks it, the + # caller calls skip_as_deferred() which records the deferral + # and appends to deferred_items. + # + # Previously this branch auto-answered and recursed, which could + # trigger MCP 120s timeouts on consecutive DEFERRED runs. log.info( - "pm.question_deferred", + "pm.question_deferred_candidate", question=classification.original_question[:100], reasoning=classification.reasoning, output_type=output_type, ) - # Feed an automatic response back to the inner InterviewEngine - # so the round is properly recorded and the engine advances. - # This prevents the inner engine from re-generating similar - # technical questions it doesn't know were already handled. - await self.record_response( - state, - user_response="[Deferred to development phase] " - "This technical decision will be addressed during the " - "development interview.", - question=classification.original_question, - ) - # Recursively ask for the next real question - return await self.ask_next_question(state) + return Result.ok(classification.original_question) if output_type == ClassifierOutputType.DECIDE_LATER: # Return the question to the caller (main session) so the user @@ -629,6 +624,40 @@ async def skip_as_decide_later( question=question, ) + async def skip_as_deferred( + self, + state: InterviewState, + question: str, + ) -> Result[InterviewState, ValidationError]: + """Skip a question as "deferred to dev" at the user's explicit request. + + Records the question in ``deferred_items`` and feeds a deferral + response to the inner InterviewEngine so the round is properly recorded + and the engine advances. + + Args: + state: Current interview state. + question: The question the user chose to defer. + + Returns: + Result containing updated state or ValidationError. + """ + if question not in self.deferred_items: + self.deferred_items.append(question) + + log.info( + "pm.question_deferred_by_user", + question=question[:100], + ) + + return await self.record_response( + state, + user_response="[Deferred to development phase] " + "This technical decision will be addressed during the " + "development interview.", + question=question, + ) + async def complete_interview( self, state: InterviewState, diff --git a/src/ouroboros/mcp/tools/pm_handler.py b/src/ouroboros/mcp/tools/pm_handler.py index 328982ac..4beb0fe5 100644 --- a/src/ouroboros/mcp/tools/pm_handler.py +++ b/src/ouroboros/mcp/tools/pm_handler.py @@ -503,9 +503,11 @@ async def _handle_start( # Include pending_reframe in response meta if a reframe occurred pending_reframe = engine.get_pending_reframe() - # Check classification to signal decide-later eligibility + # Check classification to signal skip eligibility classification = _last_classification(engine) is_decide_later = classification == "decide_later" + is_deferred = classification == "deferred" + skip_eligible = is_decide_later or is_deferred meta = { "session_id": state.interview_id, @@ -515,7 +517,7 @@ async def _handle_start( "question": question, "is_brownfield": state.is_brownfield, "classification": classification, - "decide_later_eligible": is_decide_later, + "skip_eligible": skip_eligible, "pending_reframe": pending_reframe, **diff, } @@ -525,19 +527,26 @@ async def _handle_start( session_id=state.interview_id, is_brownfield=state.is_brownfield, classification=classification, - decide_later_eligible=is_decide_later, + skip_eligible=skip_eligible, has_pending_reframe=pending_reframe is not None, **diff, ) - # Build response text — include decide-later hint when applicable + # Build response text — include skip hint when applicable start_text = f"PM interview started. Session ID: {state.interview_id}\n\n{question}" if is_decide_later: start_text += ( '\n\nšŸ’” This question can be deferred. ' 'The user may answer now, or choose "decide later" to skip it. ' "If they choose to decide later, pass " - f'answer="decide_later" with session_id="{state.interview_id}".' + f'answer="[decide_later]" with session_id="{state.interview_id}".' + ) + elif is_deferred: + start_text += ( + '\n\nšŸ’” This is a technical question that can be deferred to the dev phase. ' + 'The user may answer now, or choose to defer it. ' + "If they choose to defer, pass " + f'answer="[deferred]" with session_id="{state.interview_id}".' ) return Result.ok( @@ -797,17 +806,26 @@ async def _handle_answer( pending_question = state.rounds[-1].question classification = _last_classification(engine) is_decide_later = classification == "decide_later" + is_deferred = classification == "deferred" + skip_eligible = is_decide_later or is_deferred pending_reframe = engine.get_pending_reframe() - # Include decide-later hint in re-displayed question + # Include skip hint in re-displayed question pending_text = f"Session {session_id}\n\n{pending_question}" if is_decide_later: pending_text += ( '\n\nšŸ’” This question can be deferred. ' 'The user may answer now, or choose "decide later" to skip it. ' "If they choose to decide later, pass " - f'answer="decide_later" with session_id="{session_id}".' + f'answer="[decide_later]" with session_id="{session_id}".' + ) + elif is_deferred: + pending_text += ( + '\n\nšŸ’” This is a technical question that can be deferred to the dev phase. ' + 'The user may answer now, or choose to defer it. ' + "If they choose to defer, pass " + f'answer="[deferred]" with session_id="{session_id}".' ) return Result.ok( @@ -826,7 +844,7 @@ async def _handle_answer( "question": pending_question, "is_complete": False, "classification": classification, - "decide_later_eligible": is_decide_later, + "skip_eligible": skip_eligible, "deferred_this_round": [], "decide_later_this_round": [], "interview_complete": False, @@ -852,14 +870,13 @@ async def _handle_answer( if state.rounds[-1].user_response is None: state.rounds.pop() - # ── User chose "decide later" ───────────────────────── - # When the main session detects classification == "decide_later" - # and the user selects the decide-later option, it sends - # answer="[decide_later]". We skip normal recording and - # delegate to the engine's skip_as_decide_later() method - # which records the question in decide_later_items and - # feeds a placeholder response to the inner engine. - if answer.strip() == "[decide_later]": + # ── User chose to skip (decide later / defer to dev) ─── + # The main session detects classification via response_meta + # and offers skip options. The user's choice arrives as: + # answer="[decide_later]" → skip_as_decide_later() + # answer="[deferred]" → skip_as_deferred() + stripped = answer.strip() + if stripped == "[decide_later]": skip_result = await engine.skip_as_decide_later(state, last_question) if skip_result.is_err: return Result.err( @@ -870,6 +887,17 @@ async def _handle_answer( ) state = skip_result.value state.clear_stored_ambiguity() + elif stripped == "[deferred]": + skip_result = await engine.skip_as_deferred(state, last_question) + if skip_result.is_err: + return Result.err( + MCPToolError( + str(skip_result.error), + tool_name="ouroboros_pm_interview", + ) + ) + state = skip_result.value + state.clear_stored_ambiguity() else: record_result = await engine.record_response(state, answer, last_question) if record_result.is_err: @@ -1005,10 +1033,10 @@ async def _handle_answer( # Extract classification from the last classify call classification = _last_classification(engine) - # When classification is decide_later, signal to the caller - # that the user should be offered a "decide later" option - # instead of being forced to answer now. + # Signal to the caller that the user can skip this question is_decide_later = classification == "decide_later" + is_deferred = classification == "deferred" + skip_eligible = is_decide_later or is_deferred response_meta = { "session_id": session_id, @@ -1017,7 +1045,7 @@ async def _handle_answer( "question": question, "is_complete": False, "classification": classification, - "decide_later_eligible": is_decide_later, + "skip_eligible": skip_eligible, "deferred_this_round": diff["new_deferred"], "decide_later_this_round": diff["new_decide_later"], # Keep backward-compat fields from AC 8 @@ -1030,19 +1058,26 @@ async def _handle_answer( "pm_handler.question_asked", session_id=session_id, classification=classification, - decide_later_eligible=is_decide_later, + skip_eligible=skip_eligible, has_pending_reframe=pending_reframe is not None, **diff, ) - # Build response text — include decide-later hint when applicable + # Build response text — include skip hint when applicable response_text = f"Session {session_id}\n\n{question}" if is_decide_later: response_text += ( '\n\nšŸ’” This question can be deferred. ' 'The user may answer now, or choose "decide later" to skip it. ' "If they choose to decide later, pass " - f'answer="decide_later" with session_id="{session_id}".' + f'answer="[decide_later]" with session_id="{session_id}".' + ) + elif is_deferred: + response_text += ( + '\n\nšŸ’” This is a technical question that can be deferred to the dev phase. ' + 'The user may answer now, or choose to defer it. ' + "If they choose to defer, pass " + f'answer="[deferred]" with session_id="{session_id}".' ) return Result.ok( diff --git a/tests/unit/bigbang/test_pm_interview.py b/tests/unit/bigbang/test_pm_interview.py index a9717aca..046a0de7 100644 --- a/tests/unit/bigbang/test_pm_interview.py +++ b/tests/unit/bigbang/test_pm_interview.py @@ -431,17 +431,16 @@ async def test_dev_question_gets_reframed(self, tmp_path: Path) -> None: assert engine.classifications[0].category == QuestionCategory.DEVELOPMENT @pytest.mark.asyncio - async def test_deferred_question_skipped(self, tmp_path: Path) -> None: - """DEV-only questions marked as defer_to_dev are skipped and tracked.""" + async def test_deferred_question_returned_to_user(self, tmp_path: Path) -> None: + """DEV-only questions marked as defer_to_dev are returned to the user.""" adapter = _make_adapter() engine = _make_engine(adapter, tmp_path) dev_q = "Should we use gRPC or REST for inter-service communication?" - planning_q = "Who are the end users?" adapter.complete = AsyncMock( side_effect=[ - # First question generation: dev question + # Question generation: dev question Result.ok(_mock_completion(dev_q)), # Classification: defer to dev Result.ok( @@ -456,21 +455,6 @@ async def test_deferred_question_skipped(self, tmp_path: Path) -> None: ) ) ), - # Second question generation (recursive call): planning question - Result.ok(_mock_completion(planning_q)), - # Classification: planning - Result.ok( - _mock_completion( - json.dumps( - { - "category": "planning", - "reframed_question": planning_q, - "reasoning": "User question", - "defer_to_dev": False, - } - ) - ) - ), ] ) @@ -482,85 +466,46 @@ async def test_deferred_question_skipped(self, tmp_path: Path) -> None: result = await engine.ask_next_question(state) assert result.is_ok - assert result.value == planning_q - assert dev_q in engine.deferred_items - # The deferred question gets an automatic response fed back to the - # inner InterviewEngine so it knows the question was handled - assert len(state.rounds) == 1 - assert "[Deferred to development phase]" in state.rounds[0].user_response - assert state.rounds[0].question == dev_q + # The user sees the deferred question directly + assert result.value == dev_q + # deferred_items NOT populated yet (user hasn't chosen to skip) + assert dev_q not in engine.deferred_items + # No rounds auto-recorded + assert len(state.rounds) == 0 @pytest.mark.asyncio - async def test_deferred_auto_response_content(self, tmp_path: Path) -> None: - """DEV-classified deferred questions get an automatic response fed back - to the InterviewEngine with appropriate marker text.""" + async def test_user_can_skip_as_deferred(self, tmp_path: Path) -> None: + """User can defer a technical question via skip_as_deferred().""" adapter = _make_adapter() engine = _make_engine(adapter, tmp_path) dev_q = "What container orchestration platform should we use — Kubernetes or ECS?" - planning_q = "What are the key business goals?" - - adapter.complete = AsyncMock( - side_effect=[ - Result.ok(_mock_completion(dev_q)), - Result.ok( - _mock_completion( - json.dumps( - { - "category": "development", - "reframed_question": dev_q, - "reasoning": "Infrastructure choice is purely technical", - "defer_to_dev": True, - } - ) - ) - ), - Result.ok(_mock_completion(planning_q)), - Result.ok( - _mock_completion( - json.dumps( - { - "category": "planning", - "reframed_question": planning_q, - "reasoning": "Business question", - "defer_to_dev": False, - } - ) - ) - ), - ] - ) state = InterviewState( interview_id="test_auto_response", initial_context="Build a SaaS platform", ) - result = await engine.ask_next_question(state) + # User chooses to defer + adapter.complete = AsyncMock(return_value=Result.ok(_mock_completion("ok"))) + result = await engine.skip_as_deferred(state, dev_q) assert result.is_ok - assert result.value == planning_q - - # Verify the auto-response was properly recorded - deferred_round = state.rounds[0] - assert deferred_round.question == dev_q - assert "[Deferred to development phase]" in deferred_round.user_response - assert "development interview" in deferred_round.user_response - assert deferred_round.round_number == 1 + assert dev_q in engine.deferred_items + # Verify the deferral response was properly recorded + assert len(state.rounds) == 1 + assert "[Deferred to development phase]" in state.rounds[0].user_response @pytest.mark.asyncio - async def test_multiple_deferred_questions_all_auto_responded(self, tmp_path: Path) -> None: - """Multiple consecutive deferred questions each get auto-responses.""" + async def test_deferred_question_returned_not_auto_skipped(self, tmp_path: Path) -> None: + """DEFERRED questions are returned to user, not auto-skipped.""" adapter = _make_adapter() engine = _make_engine(adapter, tmp_path) dev_q1 = "Should we use gRPC or REST?" - dev_q2 = "Which CI/CD pipeline tool should we use?" - planning_q = "Who are the target users?" adapter.complete = AsyncMock( side_effect=[ - # First dev question Result.ok(_mock_completion(dev_q1)), Result.ok( _mock_completion( @@ -574,34 +519,6 @@ async def test_multiple_deferred_questions_all_auto_responded(self, tmp_path: Pa ) ) ), - # Second dev question - Result.ok(_mock_completion(dev_q2)), - Result.ok( - _mock_completion( - json.dumps( - { - "category": "development", - "reframed_question": dev_q2, - "reasoning": "CI/CD is infra", - "defer_to_dev": True, - } - ) - ) - ), - # Finally a planning question - Result.ok(_mock_completion(planning_q)), - Result.ok( - _mock_completion( - json.dumps( - { - "category": "planning", - "reframed_question": planning_q, - "reasoning": "User question", - "defer_to_dev": False, - } - ) - ) - ), ] ) @@ -613,18 +530,11 @@ async def test_multiple_deferred_questions_all_auto_responded(self, tmp_path: Pa result = await engine.ask_next_question(state) assert result.is_ok - assert result.value == planning_q - - # Both dev questions should be tracked as deferred - assert dev_q1 in engine.deferred_items - assert dev_q2 in engine.deferred_items - - # Both should have auto-responses recorded in the inner engine - assert len(state.rounds) == 2 - assert "[Deferred to development phase]" in state.rounds[0].user_response - assert "[Deferred to development phase]" in state.rounds[1].user_response - assert state.rounds[0].question == dev_q1 - assert state.rounds[1].question == dev_q2 + # First deferred question returned directly — no recursion + assert result.value == dev_q1 + # Not yet in deferred_items (user hasn't chosen to skip) + assert dev_q1 not in engine.deferred_items + assert len(state.rounds) == 0 @pytest.mark.asyncio async def test_classification_failure_returns_original(self, tmp_path: Path) -> None: From f5bf4f1d0671807e8a6636ffc0cd04449756a56a Mon Sep 17 00:00:00 2001 From: heechul Date: Sat, 28 Mar 2026 00:29:28 +0900 Subject: [PATCH 3/5] =?UTF-8?q?fix(pm):=20address=20review=20findings=20?= =?UTF-8?q?=E2=80=94=20CLI=20intercept,=20docstring,=20test=20coverage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CLI now intercepts "decide later"/"skip"/"defer" input and calls skip_as_decide_later()/skip_as_deferred() instead of recording verbatim - CLI shows defer hint for DEFERRED questions (not just DECIDE_LATER) - question_for_pm returns original_question for DEFERRED (was empty string) - MCP hint text uses bracketed format [decide_later]/[deferred] consistently - Added MCP handler test for answer="[deferred]" path Co-Authored-By: Claude Opus 4.6 (1M context) --- src/ouroboros/bigbang/question_classifier.py | 8 +++- src/ouroboros/cli/commands/pm.py | 36 ++++++++++++++++- .../unit/bigbang/test_question_classifier.py | 6 +-- tests/unit/mcp/tools/test_pm_handler.py | 40 +++++++++++++++++++ 4 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/ouroboros/bigbang/question_classifier.py b/src/ouroboros/bigbang/question_classifier.py index 2b7bad37..de498d8b 100644 --- a/src/ouroboros/bigbang/question_classifier.py +++ b/src/ouroboros/bigbang/question_classifier.py @@ -109,7 +109,8 @@ def question_for_pm(self) -> str: For PASSTHROUGH: returns original_question unchanged. For REFRAMED: returns the reframed_question. - For DEFERRED: returns empty string (should not be shown). + For DEFERRED: returns original_question (shown to user + so they can choose to answer or defer to dev). For DECIDE_LATER: returns original_question (shown to user so they can choose to answer or defer). """ @@ -117,7 +118,10 @@ def question_for_pm(self) -> str: return self.original_question if self.output_type == ClassifierOutputType.REFRAMED: return self.reframed_question - if self.output_type == ClassifierOutputType.DECIDE_LATER: + if self.output_type in ( + ClassifierOutputType.DECIDE_LATER, + ClassifierOutputType.DEFERRED, + ): return self.original_question return "" diff --git a/src/ouroboros/cli/commands/pm.py b/src/ouroboros/cli/commands/pm.py index a3a5b731..64b08919 100644 --- a/src/ouroboros/cli/commands/pm.py +++ b/src/ouroboros/cli/commands/pm.py @@ -410,7 +410,7 @@ async def _run_pm_interview( console.print(f"\n[bold yellow]?[/] {question}\n") - # Check if this question was classified as decide_later — + # Check if this question was classified as skippable — # if so, show a hint that the user can defer it. classification = engine.get_last_classification() if classification == "decide_later": @@ -418,6 +418,11 @@ async def _run_pm_interview( '[dim] šŸ’” This question can be deferred. ' 'Type "decide later" or "skip" to defer it.[/]\n' ) + elif classification == "deferred": + console.print( + '[dim] šŸ’” This is a technical question that can be deferred to the dev phase. ' + 'Type "defer" or "skip" to defer it.[/]\n' + ) # Persist state + meta AFTER displaying the question but BEFORE # waiting for input so that an interruption preserves the pending @@ -451,6 +456,35 @@ async def _run_pm_interview( if state.rounds and state.rounds[-1].user_response is None: state.rounds.pop() + # Handle user-initiated skip (decide later / defer to dev) + _lower = user_response.strip().lower() + if classification == "decide_later" and _lower in ( + "decide later", "skip", "[decide_later]", + ): + record_result = await engine.skip_as_decide_later(state, question) + if isinstance(record_result, Result) and record_result.is_err: + print_error(f"Failed to skip question: {record_result.error}") + break + save_result = await engine.save_state(state) + if isinstance(save_result, Result) and save_result.is_err: + print_error(f"Failed to save state: {save_result.error}") + break + _save_cli_pm_meta(state.interview_id, engine) + continue + if classification == "deferred" and _lower in ( + "defer", "skip", "[deferred]", + ): + record_result = await engine.skip_as_deferred(state, question) + if isinstance(record_result, Result) and record_result.is_err: + print_error(f"Failed to defer question: {record_result.error}") + break + save_result = await engine.save_state(state) + if isinstance(save_result, Result) and save_result.is_err: + print_error(f"Failed to save state: {save_result.error}") + break + _save_cli_pm_meta(state.interview_id, engine) + continue + record_result = await engine.record_response(state, user_response, question) if isinstance(record_result, Result) and record_result.is_err: print_error(f"Failed to record response: {record_result.error}") diff --git a/tests/unit/bigbang/test_question_classifier.py b/tests/unit/bigbang/test_question_classifier.py index 0554bc36..0c95e3a6 100644 --- a/tests/unit/bigbang/test_question_classifier.py +++ b/tests/unit/bigbang/test_question_classifier.py @@ -181,8 +181,8 @@ def test_reframed_returns_reframed_question(self) -> None: ) assert result.question_for_pm == "How do you expect users to interact with data?" - def test_deferred_returns_empty_string(self) -> None: - """DEFERRED returns empty string — question should not be shown to PM.""" + def test_deferred_returns_original_question(self) -> None: + """DEFERRED returns original question — shown to user for defer/answer choice.""" result = ClassificationResult( original_question="Should we use gRPC or REST?", category=QuestionCategory.DEVELOPMENT, @@ -190,7 +190,7 @@ def test_deferred_returns_empty_string(self) -> None: reasoning="Purely technical", defer_to_dev=True, ) - assert result.question_for_pm == "" + assert result.question_for_pm == "Should we use gRPC or REST?" def test_decide_later_returns_original_question(self) -> None: """DECIDE_LATER returns original question — shown to user for decision.""" diff --git a/tests/unit/mcp/tools/test_pm_handler.py b/tests/unit/mcp/tools/test_pm_handler.py index cffb24da..776890a3 100644 --- a/tests/unit/mcp/tools/test_pm_handler.py +++ b/tests/unit/mcp/tools/test_pm_handler.py @@ -527,6 +527,46 @@ async def test_handle_answer_decide_later_with_whitespace(self, tmp_path: Path) assert result.is_ok engine.skip_as_decide_later.assert_called_once() + @pytest.mark.asyncio + async def test_handle_answer_deferred_skips_and_records(self, tmp_path: Path) -> None: + """When answer='[deferred]', skip_as_deferred is called instead of record_response.""" + engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[]) + + question = "Should we use gRPC or REST for inter-service communication?" + state = _make_state( + rounds=[ + InterviewRound(round_number=1, question=question, user_response=None), + ], + ) + + engine.load_state = AsyncMock(return_value=Result.ok(state)) + + async def mock_skip(s: Any, q: str) -> Result: + engine.deferred_items.append(q) + return Result.ok(s) + + engine.skip_as_deferred = AsyncMock(side_effect=mock_skip) + engine.record_response = AsyncMock(return_value=Result.ok(state)) + + engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?")) + engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json")) + + _save_pm_meta("sess-def", engine, cwd="/tmp/proj", data_dir=tmp_path) + + handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) + + result = await handler.handle( + { + "session_id": "sess-def", + "answer": "[deferred]", + } + ) + + assert result.is_ok + engine.skip_as_deferred.assert_called_once_with(state, question) + engine.record_response.assert_not_called() + assert question in engine.deferred_items + # ── Unit tests for _check_completion (AC 12) ───────────────────── From f08669f4d3eee645603be140d7f71cf6a03ddfe6 Mon Sep 17 00:00:00 2001 From: heechul Date: Sat, 28 Mar 2026 00:32:39 +0900 Subject: [PATCH 4/5] =?UTF-8?q?fix(pm):=20address=20ouroboros-agent=20revi?= =?UTF-8?q?ew=20=E2=80=94=20summary=20count,=20docstring,=20ruff=20format?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix PM generation summary: separate deferred/decide-later counts instead of combining them into one misleading number - Update ClassifierOutputType docstring: DEFERRED and DECIDE_LATER are now returned to user with skip option, not auto-answered - Run ruff format on all changed files to fix CI Co-Authored-By: Claude Opus 4.6 (1M context) --- src/ouroboros/bigbang/question_classifier.py | 4 ++-- src/ouroboros/cli/commands/pm.py | 12 +++++++---- src/ouroboros/mcp/tools/pm_handler.py | 21 ++++++++++--------- tests/unit/bigbang/test_decide_later_items.py | 1 + tests/unit/bigbang/test_pm_interview.py | 4 +++- 5 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/ouroboros/bigbang/question_classifier.py b/src/ouroboros/bigbang/question_classifier.py index de498d8b..83d22d30 100644 --- a/src/ouroboros/bigbang/question_classifier.py +++ b/src/ouroboros/bigbang/question_classifier.py @@ -51,8 +51,8 @@ class ClassifierOutputType(StrEnum): PASSTHROUGH: Planning question forwarded unchanged to the PM. REFRAMED: Development question rewritten in PM-friendly language. - DEFERRED: Deeply technical question deferred to the dev interview phase. - DECIDE_LATER: Premature question auto-answered with a placeholder. + DEFERRED: Deeply technical question returned to user with defer option. + DECIDE_LATER: Premature question returned to user with decide-later option. """ PASSTHROUGH = "passthrough" diff --git a/src/ouroboros/cli/commands/pm.py b/src/ouroboros/cli/commands/pm.py index 64b08919..2d6565b7 100644 --- a/src/ouroboros/cli/commands/pm.py +++ b/src/ouroboros/cli/commands/pm.py @@ -415,12 +415,12 @@ async def _run_pm_interview( classification = engine.get_last_classification() if classification == "decide_later": console.print( - '[dim] šŸ’” This question can be deferred. ' + "[dim] šŸ’” This question can be deferred. " 'Type "decide later" or "skip" to defer it.[/]\n' ) elif classification == "deferred": console.print( - '[dim] šŸ’” This is a technical question that can be deferred to the dev phase. ' + "[dim] šŸ’” This is a technical question that can be deferred to the dev phase. " 'Type "defer" or "skip" to defer it.[/]\n' ) @@ -459,7 +459,9 @@ async def _run_pm_interview( # Handle user-initiated skip (decide later / defer to dev) _lower = user_response.strip().lower() if classification == "decide_later" and _lower in ( - "decide later", "skip", "[decide_later]", + "decide later", + "skip", + "[decide_later]", ): record_result = await engine.skip_as_decide_later(state, question) if isinstance(record_result, Result) and record_result.is_err: @@ -472,7 +474,9 @@ async def _run_pm_interview( _save_cli_pm_meta(state.interview_id, engine) continue if classification == "deferred" and _lower in ( - "defer", "skip", "[deferred]", + "defer", + "skip", + "[deferred]", ): record_result = await engine.skip_as_deferred(state, question) if isinstance(record_result, Result) and record_result.is_err: diff --git a/src/ouroboros/mcp/tools/pm_handler.py b/src/ouroboros/mcp/tools/pm_handler.py index 4beb0fe5..4f20bec8 100644 --- a/src/ouroboros/mcp/tools/pm_handler.py +++ b/src/ouroboros/mcp/tools/pm_handler.py @@ -536,15 +536,15 @@ async def _handle_start( start_text = f"PM interview started. Session ID: {state.interview_id}\n\n{question}" if is_decide_later: start_text += ( - '\n\nšŸ’” This question can be deferred. ' + "\n\nšŸ’” This question can be deferred. " 'The user may answer now, or choose "decide later" to skip it. ' "If they choose to decide later, pass " f'answer="[decide_later]" with session_id="{state.interview_id}".' ) elif is_deferred: start_text += ( - '\n\nšŸ’” This is a technical question that can be deferred to the dev phase. ' - 'The user may answer now, or choose to defer it. ' + "\n\nšŸ’” This is a technical question that can be deferred to the dev phase. " + "The user may answer now, or choose to defer it. " "If they choose to defer, pass " f'answer="[deferred]" with session_id="{state.interview_id}".' ) @@ -815,15 +815,15 @@ async def _handle_answer( pending_text = f"Session {session_id}\n\n{pending_question}" if is_decide_later: pending_text += ( - '\n\nšŸ’” This question can be deferred. ' + "\n\nšŸ’” This question can be deferred. " 'The user may answer now, or choose "decide later" to skip it. ' "If they choose to decide later, pass " f'answer="[decide_later]" with session_id="{session_id}".' ) elif is_deferred: pending_text += ( - '\n\nšŸ’” This is a technical question that can be deferred to the dev phase. ' - 'The user may answer now, or choose to defer it. ' + "\n\nšŸ’” This is a technical question that can be deferred to the dev phase. " + "The user may answer now, or choose to defer it. " "If they choose to defer, pass " f'answer="[deferred]" with session_id="{session_id}".' ) @@ -1067,15 +1067,15 @@ async def _handle_answer( response_text = f"Session {session_id}\n\n{question}" if is_decide_later: response_text += ( - '\n\nšŸ’” This question can be deferred. ' + "\n\nšŸ’” This question can be deferred. " 'The user may answer now, or choose "decide later" to skip it. ' "If they choose to decide later, pass " f'answer="[decide_later]" with session_id="{session_id}".' ) elif is_deferred: response_text += ( - '\n\nšŸ’” This is a technical question that can be deferred to the dev phase. ' - 'The user may answer now, or choose to defer it. ' + "\n\nšŸ’” This is a technical question that can be deferred to the dev phase. " + "The user may answer now, or choose to defer it. " "If they choose to defer, pass " f'answer="[deferred]" with session_id="{session_id}".' ) @@ -1165,7 +1165,8 @@ async def _handle_generate( f"PM seed generated: {seed.product_name}\n" f"Seed: {seed_path}\n" f"PM document: {pm_path}\n\n" - f"Decide-later items: {len(seed.deferred_items) + len(seed.decide_later_items)}" + f"Deferred items: {len(seed.deferred_items)}\n" + f"Decide-later items: {len(seed.decide_later_items)}" ), ), ), diff --git a/tests/unit/bigbang/test_decide_later_items.py b/tests/unit/bigbang/test_decide_later_items.py index 22c7df1f..2f37776b 100644 --- a/tests/unit/bigbang/test_decide_later_items.py +++ b/tests/unit/bigbang/test_decide_later_items.py @@ -571,6 +571,7 @@ async def test_skip_propagates_record_response_error(self): engine = _make_engine() state = _make_state() from ouroboros.core.errors import ValidationError as VE + engine.inner.record_response.return_value = Result.err( VE("State save failed", field="state") ) diff --git a/tests/unit/bigbang/test_pm_interview.py b/tests/unit/bigbang/test_pm_interview.py index 046a0de7..bf6cea65 100644 --- a/tests/unit/bigbang/test_pm_interview.py +++ b/tests/unit/bigbang/test_pm_interview.py @@ -562,7 +562,9 @@ async def test_classification_failure_returns_original(self, tmp_path: Path) -> assert result.value == question @pytest.mark.asyncio - async def test_decide_later_returns_question_without_auto_answering(self, tmp_path: Path) -> None: + async def test_decide_later_returns_question_without_auto_answering( + self, tmp_path: Path + ) -> None: """Decide-later questions are returned to the caller for user decision. The engine no longer auto-answers with a placeholder or recurses. From edef8126719dcd53020feb02ac94ebc773b5ad41 Mon Sep 17 00:00:00 2001 From: heechul Date: Sat, 28 Mar 2026 12:16:01 +0900 Subject: [PATCH 5/5] fix(pm): auto-generate PM document on interview completion When the interview ambiguity score passes the threshold, the MCP now automatically generates the PM document instead of returning is_complete=true and waiting for a separate action="generate" call. This eliminates a race where the LLM response text sounded like completion, causing the skill to call generate before the engine had actually marked the interview complete. The action="generate" endpoint is kept as a fallback for retries when auto-generation fails (meta.generation_failed=true). Co-Authored-By: Claude Opus 4.6 (1M context) --- skills/pm/SKILL.md | 42 ++++++++++++------- src/ouroboros/mcp/tools/pm_handler.py | 56 ++++++++++++++++++++----- tests/unit/mcp/tools/test_pm_handler.py | 41 ++++++++++++++---- 3 files changed, 105 insertions(+), 34 deletions(-) diff --git a/skills/pm/SKILL.md b/skills/pm/SKILL.md index 8a039ae4..b1de284e 100644 --- a/skills/pm/SKILL.md +++ b/skills/pm/SKILL.md @@ -66,41 +66,53 @@ Then check: does `meta.ask_user_question` exist? ``` Do NOT modify it. Do NOT add options. Do NOT rephrase the question. -- **NO** → This is an interview question. Use `AskUserQuestion` with `meta.question` and generate 2-3 suggested answers. +- **NO** → This is an interview question. Use `AskUserQuestion` with `meta.question`. + - If `meta.skip_eligible == true`: add a skip option based on `meta.classification`: + - `classification == "decide_later"` → add option `{"label": "Decide later", "description": "Skip — will be recorded as an open item in the PRD"}` + - `classification == "deferred"` → add option `{"label": "Defer to dev", "description": "Skip — this technical decision will be deferred to the development phase"}` + - Generate 2-3 suggested answers as the other options. **C. Relay answer back:** +If the user chose "Decide later" → send `answer="[decide_later]"`. +If the user chose "Defer to dev" → send `answer="[deferred]"`. +Otherwise → send the user's answer normally. + ``` Tool: ouroboros_pm_interview Arguments: session_id: - : + : ``` **D. Check completion:** -If `meta.is_complete == true` → go to Step 4. -Otherwise → repeat Step 3. +Completion is determined ONLY by `meta.is_complete` — NEVER by the response text. +The MCP response text may sound like the interview is wrapping up, but ignore it. -### Step 4: Generate +If `meta.is_complete == true`: +- If `meta.generation_failed == true` → retry generation: + ``` + Tool: ouroboros_pm_interview + Arguments: + session_id: + action: "generate" + cwd: + ``` +- Otherwise → go to Step 4. The MCP auto-generated the PM document. + `meta.pm_path` and `meta.seed_path` contain the file paths. -``` -Tool: ouroboros_pm_interview -Arguments: - session_id: - action: "generate" - cwd: -``` +Otherwise → repeat Step 3, regardless of what the response text says. -### Step 5: Copy to Clipboard +### Step 4: Copy to Clipboard -After generation, read the pm.md file from `meta.pm_path` and copy its contents to the clipboard: +Read the pm.md file from `meta.pm_path` and copy its contents to the clipboard: ```bash cat | pbcopy ``` -### Step 6: Show Result & Next Step +### Step 5: Show Result & Next Step Show the following to the user: diff --git a/src/ouroboros/mcp/tools/pm_handler.py b/src/ouroboros/mcp/tools/pm_handler.py index 4f20bec8..75b0d013 100644 --- a/src/ouroboros/mcp/tools/pm_handler.py +++ b/src/ouroboros/mcp/tools/pm_handler.py @@ -912,7 +912,8 @@ async def _handle_answer( # ── Completion check (AC 12) ───────────────────────────── # Completion is determined by engine ambiguity scoring. - # User controls when to stop. + # When complete, auto-generate the PM document immediately + # (no separate "generate" call needed from the skill). completion = await _check_completion(state, engine) if completion is not None: # Mark interview as complete @@ -927,21 +928,58 @@ async def _handle_answer( ) _save_pm_meta(session_id, engine, cwd=cwd, data_dir=self.data_dir) + log.info( + "pm_handler.interview_complete", + session_id=session_id, + **completion, + ) + + # Auto-generate PM document on completion + seed_result = await engine.generate_pm_seed(state) + if seed_result.is_err: + # Generation failed — still report completion but without document + summary_text = ( + f"Interview complete but PM generation failed: {seed_result.error}\n" + f"Session ID: {session_id}\n" + f'Retry with: action="generate", session_id="{session_id}"' + ) + return Result.ok( + MCPToolResult( + content=( + MCPContentItem(type=ContentType.TEXT, text=summary_text), + ), + is_error=False, + meta={ + "session_id": session_id, + "is_complete": True, + "generation_failed": True, + **completion, + }, + ) + ) + + seed = seed_result.value + seed_path = engine.save_pm_seed(seed) + pm_output_dir = Path(cwd) / ".ouroboros" + pm_path = save_pm_document(seed, output_dir=pm_output_dir) + decide_later_summary = engine.format_decide_later_summary() summary_text = ( - f"Interview complete. Session ID: {session_id}\n" + f"Interview complete. PM document generated.\n\n" + f"Session ID: {session_id}\n" f"Rounds completed: {completion['rounds_completed']}\n" f"Completion reason: {completion['completion_reason']}\n" ) if completion.get("ambiguity_score") is not None: summary_text += f"Ambiguity score: {completion['ambiguity_score']:.2f}\n" summary_text += ( - f"\nDeferred items: {len(engine.deferred_items)}\n" - f"Decide-later items: {len(engine.decide_later_items)}\n" + f"\nPM document: {pm_path}\n" + f"Seed: {seed_path}\n" + f"\nDeferred items: {len(seed.deferred_items)}\n" + f"Decide-later items: {len(seed.decide_later_items)}\n" ) if decide_later_summary: summary_text += f"\n{decide_later_summary}\n" - summary_text += f'\nGenerate PM with: action="generate", session_id="{session_id}"' response_meta = { "session_id": session_id, @@ -953,14 +991,10 @@ async def _handle_answer( **completion, "deferred_count": len(engine.deferred_items), "decide_later_count": len(engine.decide_later_items), + "seed_path": str(seed_path), + "pm_path": str(pm_path), } - log.info( - "pm_handler.interview_complete", - session_id=session_id, - **completion, - ) - return Result.ok( MCPToolResult( content=( diff --git a/tests/unit/mcp/tools/test_pm_handler.py b/tests/unit/mcp/tools/test_pm_handler.py index 776890a3..e02889c0 100644 --- a/tests/unit/mcp/tools/test_pm_handler.py +++ b/tests/unit/mcp/tools/test_pm_handler.py @@ -767,6 +767,14 @@ async def test_handle_answer_completion_via_ambiguity(self, tmp_path: Path) -> N _save_pm_meta("sess-complete", engine, cwd="/tmp/proj", data_dir=tmp_path) + # Mock auto-generate on completion + seed_mock = MagicMock() + seed_mock.product_name = "Test Product" + seed_mock.deferred_items = ["d1"] + seed_mock.decide_later_items = ["dl1"] + engine.generate_pm_seed = AsyncMock(return_value=Result.ok(seed_mock)) + engine.save_pm_seed = MagicMock(return_value=tmp_path / "seed.json") + handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) # Mock _check_completion to return completion @@ -781,6 +789,9 @@ async def test_handle_answer_completion_via_ambiguity(self, tmp_path: Path) -> N "ouroboros.mcp.tools.pm_handler._check_completion", new_callable=AsyncMock, return_value=completion_meta, + ), patch( + "ouroboros.mcp.tools.pm_handler.save_pm_document", + return_value=tmp_path / "pm.md", ): result = await handler.handle( { @@ -799,10 +810,10 @@ async def test_handle_answer_completion_via_ambiguity(self, tmp_path: Path) -> N # Verify complete_interview was called engine.complete_interview.assert_called_once() - # Verify response text includes generate instructions + # Verify response text includes completion and PM document info text = result.value.content[0].text assert "Interview complete" in text - assert "generate" in text + assert "PM document" in text @pytest.mark.asyncio async def test_no_done_signal_processing(self, tmp_path: Path) -> None: @@ -1619,17 +1630,28 @@ async def test_resume_completion_meta_has_required_fields(self, tmp_path: Path) "ambiguity_score": 0.15, } ) + # Mock auto-generate on completion + seed_mock = MagicMock() + seed_mock.product_name = "Test Product" + seed_mock.deferred_items = ["d1"] + seed_mock.decide_later_items = ["dl1"] + engine.generate_pm_seed = AsyncMock(return_value=Result.ok(seed_mock)) + engine.save_pm_seed = MagicMock(return_value=tmp_path / "seed.json") _save_pm_meta("resume-complete", engine, cwd="/tmp", data_dir=tmp_path) handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path) - result = await handler.handle( - { - "session_id": "resume-complete", - "answer": "Final answer", - } - ) + with patch( + "ouroboros.mcp.tools.pm_handler.save_pm_document", + return_value=tmp_path / "pm.md", + ): + result = await handler.handle( + { + "session_id": "resume-complete", + "answer": "Final answer", + } + ) assert result.is_ok meta = result.value.meta @@ -1643,6 +1665,9 @@ async def test_resume_completion_meta_has_required_fields(self, tmp_path: Path) assert meta["decide_later_this_round"] == [] # Also has completion details assert meta["interview_complete"] is True + # Auto-generated PM document + assert "pm_path" in meta + assert "seed_path" in meta @pytest.mark.asyncio async def test_resume_loads_state_and_meta(self, tmp_path: Path) -> None: