diff --git a/skills/pm/SKILL.md b/skills/pm/SKILL.md
index 8a039ae4..b1de284e 100644
--- a/skills/pm/SKILL.md
+++ b/skills/pm/SKILL.md
@@ -66,41 +66,53 @@ Then check: does `meta.ask_user_question` exist?
   ```
   Do NOT modify it. Do NOT add options. Do NOT rephrase the question.
 
-- **NO** → This is an interview question. Use `AskUserQuestion` with `meta.question` and generate 2-3 suggested answers.
+- **NO** → This is an interview question. Use `AskUserQuestion` with `meta.question`.
+  - If `meta.skip_eligible == true`: add a skip option based on `meta.classification`:
+    - `classification == "decide_later"` → add option `{"label": "Decide later", "description": "Skip — will be recorded as an open item in the PRD"}`
+    - `classification == "deferred"` → add option `{"label": "Defer to dev", "description": "Skip — this technical decision will be deferred to the development phase"}`
+  - Generate 2-3 suggested answers as the other options.
 
 **C. Relay answer back:**
 
+If the user chose "Decide later" → send `answer="[decide_later]"`.
+If the user chose "Defer to dev" → send `answer="[deferred]"`.
+Otherwise → send the user's answer normally.
+
 ```
 Tool: ouroboros_pm_interview
 Arguments:
   session_id: <meta.session_id>
-  <meta.response_param>: <user's answer>
+  <meta.response_param>: <user's answer or "[decide_later]" or "[deferred]">
 ```
 
 **D. Check completion:**
 
-If `meta.is_complete == true` → go to Step 4.
-Otherwise → repeat Step 3.
+Completion is determined ONLY by `meta.is_complete` — NEVER by the response text.
+The MCP response text may sound like the interview is wrapping up, but ignore it.
 
-### Step 4: Generate
+If `meta.is_complete == true`:
+- If `meta.generation_failed == true` → retry generation:
+  ```
+  Tool: ouroboros_pm_interview
+  Arguments:
+    session_id: <session_id>
+    action: "generate"
+    cwd: <current working directory>
+  ```
+- Otherwise → go to Step 4. The MCP auto-generated the PM document.
+  `meta.pm_path` and `meta.seed_path` contain the file paths.
 
-```
-Tool: ouroboros_pm_interview
-Arguments:
-  session_id: <session_id>
-  action: "generate"
-  cwd: <current working directory>
-```
+Otherwise → repeat Step 3, regardless of what the response text says.
 
-### Step 5: Copy to Clipboard
+### Step 4: Copy to Clipboard
 
-After generation, read the pm.md file from `meta.pm_path` and copy its contents to the clipboard:
+Read the pm.md file from `meta.pm_path` and copy its contents to the clipboard:
 
 ```bash
 cat <meta.pm_path> | pbcopy
 ```
 
-### Step 6: Show Result & Next Step
+### Step 5: Show Result & Next Step
 
 Show the following to the user:
 
diff --git a/src/ouroboros/bigbang/pm_interview.py b/src/ouroboros/bigbang/pm_interview.py
index ab8e9810..470fa91a 100644
--- a/src/ouroboros/bigbang/pm_interview.py
+++ b/src/ouroboros/bigbang/pm_interview.py
@@ -55,18 +55,7 @@
 _PM_SYSTEM_PROMPT_PREFIX = """\
 You are a Product Requirements interviewer helping a PM define their product.
 
-Focus on PRODUCT-LEVEL questions:
-- What problem does this solve and for whom?
-- What are the business goals and success metrics?
-- What are the user stories and workflows?
-- What constraints exist (timeline, budget, compliance)?
-- What is in scope vs out of scope?
-- What are the acceptance criteria?
-
-Do NOT ask about:
-- Implementation details (databases, frameworks, APIs)
-- Architecture decisions (microservices, deployment)
-- Code-level patterns or testing strategies
+Focus on: goal, user stories, constraints, success criteria, assumptions.
 
 """
 
@@ -158,7 +147,8 @@ class PMInterviewEngine:
     """Original question text for questions classified as DECIDE_LATER.
 
     These are questions that are premature or unknowable at the PM stage.
-    They are auto-answered with a placeholder and stored here so the PMSeed
+    The main session presents the question to the user with a "decide later"
+    option; when chosen, the caller records the item here so the PMSeed
     and PM document can surface them as explicit "decide later" decisions.
     """
     classifications: list[ClassificationResult] = field(default_factory=list)
@@ -484,47 +474,39 @@ async def ask_next_question(
         output_type = classification.output_type
 
         if output_type == ClassifierOutputType.DEFERRED:
-            # Track as deferred item and generate a new question
-            self.deferred_items.append(classification.original_question)
+            # Return the question to the caller (main session) so the user
+            # can choose to defer it themselves.  The main session detects
+            # classification == "deferred" via response_meta and offers
+            # a "skip / defer to dev" option.  If the user picks it, the
+            # caller calls skip_as_deferred() which records the deferral
+            # and appends to deferred_items.
+            #
+            # Previously this branch auto-answered and recursed, which could
+            # trigger MCP 120s timeouts on consecutive DEFERRED runs.
             log.info(
-                "pm.question_deferred",
+                "pm.question_deferred_candidate",
                 question=classification.original_question[:100],
                 reasoning=classification.reasoning,
                 output_type=output_type,
             )
-            # Feed an automatic response back to the inner InterviewEngine
-            # so the round is properly recorded and the engine advances.
-            # This prevents the inner engine from re-generating similar
-            # technical questions it doesn't know were already handled.
-            await self.record_response(
-                state,
-                user_response="[Deferred to development phase] "
-                "This technical decision will be addressed during the "
-                "development interview.",
-                question=classification.original_question,
-            )
-            # Recursively ask for the next real question
-            return await self.ask_next_question(state)
+            return Result.ok(classification.original_question)
 
         if output_type == ClassifierOutputType.DECIDE_LATER:
-            # Auto-answer with placeholder — no PM interaction needed
-            placeholder = classification.placeholder_response
-            self.decide_later_items.append(classification.original_question)
+            # Return the question to the caller (main session) so the user
+            # can choose "decide later" themselves.  The main session detects
+            # classification == "decide_later" via response_meta and offers
+            # the option.  If the user picks it, the caller calls
+            # skip_as_decide_later() which records the placeholder and
+            # appends to decide_later_items.
+            #
+            # Previously this branch auto-answered and recursed, which could
+            # trigger MCP 120s timeouts on consecutive DECIDE_LATER runs.
             log.info(
                 "pm.question_decide_later",
                 question=classification.original_question[:100],
-                placeholder=placeholder[:100],
                 reasoning=classification.reasoning,
             )
-            # Record the placeholder as the response so the interview
-            # engine advances its round count
-            await self.record_response(
-                state,
-                user_response=f"[Decide later] {placeholder}",
-                question=classification.original_question,
-            )
-            # Recursively ask for the next real question
-            return await self.ask_next_question(state)
+            return Result.ok(classification.original_question)
 
         if output_type == ClassifierOutputType.REFRAMED:
             # Use the reframed version and track the mapping
@@ -595,6 +577,76 @@ async def record_response(
 
         return await self.inner.record_response(state, user_response, question)
 
+    async def skip_as_decide_later(
+        self,
+        state: InterviewState,
+        question: str,
+    ) -> Result[InterviewState, ValidationError]:
+        """Skip a question as "decide later" at the user's explicit request.
+
+        Records the question in ``decide_later_items`` and feeds a placeholder
+        response to the inner InterviewEngine so the round is properly recorded
+        and the engine advances.
+
+        This is called when the main session detects that the user chose the
+        "decide later" option for a DECIDE_LATER-classified question, instead
+        of the old auto-skip behavior inside ``ask_next_question``.
+
+        Args:
+            state: Current interview state.
+            question: The question the user chose to decide later.
+
+        Returns:
+            Result containing updated state or ValidationError.
+        """
+        if question not in self.decide_later_items:
+            self.decide_later_items.append(question)
+
+        log.info(
+            "pm.question_decide_later_by_user",
+            question=question[:100],
+        )
+
+        return await self.record_response(
+            state,
+            user_response="[Decide later] To be determined — user chose to decide later.",
+            question=question,
+        )
+
+    async def skip_as_deferred(
+        self,
+        state: InterviewState,
+        question: str,
+    ) -> Result[InterviewState, ValidationError]:
+        """Skip a question as "deferred to dev" at the user's explicit request.
+
+        Records the question in ``deferred_items`` and feeds a deferral
+        response to the inner InterviewEngine so the round is properly recorded
+        and the engine advances.
+
+        Args:
+            state: Current interview state.
+            question: The question the user chose to defer.
+
+        Returns:
+            Result containing updated state or ValidationError.
+        """
+        if question not in self.deferred_items:
+            self.deferred_items.append(question)
+
+        log.info(
+            "pm.question_deferred_by_user",
+            question=question[:100],
+        )
+
+        return await self.record_response(
+            state,
+            user_response="[Deferred to development phase] "
+            "This technical decision will be addressed during the "
+            "development interview.",
+            question=question,
+        )
+
     async def complete_interview(
         self,
         state: InterviewState,
@@ -1129,13 +1181,14 @@ def _parse_pm_seed(
             for s in data.get("user_stories", [])
         )
 
-        # Merge deferred items from classifier with extraction
+        # Merge LLM-extracted items with engine-tracked items, deduplicating.
+        # The extraction prompt includes raw items as context so the LLM may
+        # already emit them, but engine-tracked items are authoritative and
+        # must survive even if the extractor omits them.
         all_deferred = list(data.get("deferred_items", []))
         for item in self.deferred_items:
             if item not in all_deferred:
                 all_deferred.append(item)
-
-        # Merge decide-later items — stored as original question text
         all_decide_later = list(data.get("decide_later_items", []))
         for item in self.decide_later_items:
             if item not in all_decide_later:
diff --git a/src/ouroboros/bigbang/question_classifier.py b/src/ouroboros/bigbang/question_classifier.py
index 202ba7bf..1e43c428 100644
--- a/src/ouroboros/bigbang/question_classifier.py
+++ b/src/ouroboros/bigbang/question_classifier.py
@@ -3,9 +3,10 @@
 Classifies generated questions as PM-answerable (planning), DEV-only
 (development/technical), or decide-later (premature/unknowable).
 
-DEV-only questions are reframed into PM-friendly language or deferred to
-the development interview phase. Decide-later questions get an automatic
-placeholder response without PM interaction.
+DEV-only questions are reframed into PM-friendly language or returned to
+the user with the option to defer to the development interview phase.
+Decide-later questions are returned to the user with the option to defer
+rather than being automatically skipped.
 
 Uses a Sonnet-grade model for accurate judgment and reframing.
 """
@@ -51,8 +52,8 @@ class ClassifierOutputType(StrEnum):
 
     PASSTHROUGH: Planning question forwarded unchanged to the PM.
     REFRAMED: Development question rewritten in PM-friendly language.
-    DEFERRED: Deeply technical question deferred to the dev interview phase.
-    DECIDE_LATER: Premature question auto-answered with a placeholder.
+    DEFERRED: Deeply technical question returned to user with defer option.
+    DECIDE_LATER: Premature question returned to user with decide-later option.
     """
 
     PASSTHROUGH = "passthrough"
@@ -73,8 +74,9 @@ class ClassificationResult:
         reasoning: Why the classifier chose this category.
         defer_to_dev: Whether this question should be deferred entirely.
         decide_later: Whether this question is premature and should be
-            auto-answered with a placeholder.
-        placeholder_response: Automatic response for decide-later questions.
+            returned to the user with the option to defer.
+        placeholder_response: Placeholder response for decide-later questions
+            (used if the user chooses to defer).
     """
 
     original_question: str
@@ -90,8 +92,10 @@ def output_type(self) -> ClassifierOutputType:
         """Determine the classifier output type for this result.
 
         Returns:
-            DECIDE_LATER if premature/unknowable question (auto-answered),
-            DEFERRED if deeply technical (skipped entirely),
+            DECIDE_LATER if premature/unknowable question (returned to user
+                with option to defer),
+            DEFERRED if deeply technical (returned to user with option to
+                defer to dev phase),
             PASSTHROUGH if planning question (forwarded unchanged),
             REFRAMED if development question rewritten for PM.
         """
@@ -109,13 +113,20 @@ def question_for_pm(self) -> str:
 
         For PASSTHROUGH: returns original_question unchanged.
         For REFRAMED: returns the reframed_question.
-        For DEFERRED: returns empty string (should not be shown).
-        For DECIDE_LATER: returns empty string (auto-answered).
+        For DEFERRED: returns original_question (shown to user
+            so they can choose to answer or defer to dev).
+        For DECIDE_LATER: returns original_question (shown to user
+            so they can choose to answer or defer).
         """
         if self.output_type == ClassifierOutputType.PASSTHROUGH:
             return self.original_question
         if self.output_type == ClassifierOutputType.REFRAMED:
             return self.reframed_question
+        if self.output_type in (
+            ClassifierOutputType.DECIDE_LATER,
+            ClassifierOutputType.DEFERRED,
+        ):
+            return self.original_question
         return ""
 
 
@@ -181,7 +192,8 @@ class QuestionClassifier:
 
     Uses a Sonnet-grade LLM to judge whether questions are appropriate
     for a PM audience, reframes technical questions when possible, and
-    identifies premature questions that should be auto-answered.
+    identifies premature questions that should be returned to the user
+    with the option to defer.
 
     Attributes:
         llm_adapter: LLM adapter for classification calls.
diff --git a/src/ouroboros/cli/commands/pm.py b/src/ouroboros/cli/commands/pm.py
index 4d2f8ec2..2d6565b7 100644
--- a/src/ouroboros/cli/commands/pm.py
+++ b/src/ouroboros/cli/commands/pm.py
@@ -410,6 +410,20 @@ async def _run_pm_interview(
 
         console.print(f"\n[bold yellow]?[/] {question}\n")
 
+        # Check if this question was classified as skippable —
+        # if so, show a hint that the user can defer it.
+        classification = engine.get_last_classification()
+        if classification == "decide_later":
+            console.print(
+                "[dim]  💡 This question can be deferred. "
+                'Type "decide later" or "skip" to defer it.[/]\n'
+            )
+        elif classification == "deferred":
+            console.print(
+                "[dim]  💡 This is a technical question that can be deferred to the dev phase. "
+                'Type "defer" or "skip" to defer it.[/]\n'
+            )
+
         # Persist state + meta AFTER displaying the question but BEFORE
         # waiting for input so that an interruption preserves the pending
         # question and --resume shows the same question.
@@ -442,6 +456,39 @@ async def _run_pm_interview(
         if state.rounds and state.rounds[-1].user_response is None:
             state.rounds.pop()
 
+        # Handle user-initiated skip (decide later / defer to dev)
+        _lower = user_response.strip().lower()
+        if classification == "decide_later" and _lower in (
+            "decide later",
+            "skip",
+            "[decide_later]",
+        ):
+            record_result = await engine.skip_as_decide_later(state, question)
+            if isinstance(record_result, Result) and record_result.is_err:
+                print_error(f"Failed to skip question: {record_result.error}")
+                break
+            save_result = await engine.save_state(state)
+            if isinstance(save_result, Result) and save_result.is_err:
+                print_error(f"Failed to save state: {save_result.error}")
+                break
+            _save_cli_pm_meta(state.interview_id, engine)
+            continue
+        if classification == "deferred" and _lower in (
+            "defer",
+            "skip",
+            "[deferred]",
+        ):
+            record_result = await engine.skip_as_deferred(state, question)
+            if isinstance(record_result, Result) and record_result.is_err:
+                print_error(f"Failed to defer question: {record_result.error}")
+                break
+            save_result = await engine.save_state(state)
+            if isinstance(save_result, Result) and save_result.is_err:
+                print_error(f"Failed to save state: {save_result.error}")
+                break
+            _save_cli_pm_meta(state.interview_id, engine)
+            continue
+
         record_result = await engine.record_response(state, user_response, question)
         if isinstance(record_result, Result) and record_result.is_err:
             print_error(f"Failed to record response: {record_result.error}")
diff --git a/src/ouroboros/mcp/tools/pm_handler.py b/src/ouroboros/mcp/tools/pm_handler.py
index 9910fd60..3d2683af 100644
--- a/src/ouroboros/mcp/tools/pm_handler.py
+++ b/src/ouroboros/mcp/tools/pm_handler.py
@@ -338,12 +338,22 @@ async def handle(
         initial_context = arguments.get("initial_context")
         session_id = arguments.get("session_id")
         answer = arguments.get("answer")
-        cwd = arguments.get("cwd") or os.getcwd()
+        cwd_arg = arguments.get("cwd")
         selected_repos: list[str] | None = arguments.get("selected_repos")
 
         # Auto-detect action from parameter presence (AC 13)
         action = _detect_action(arguments)
 
+        # For resume/generate, prefer persisted session cwd over os.getcwd()
+        # so artifacts land in the workspace where the interview started.
+        if cwd_arg:
+            cwd = cwd_arg
+        elif session_id and action in ("resume", "generate"):
+            meta = _load_pm_meta(session_id, self.data_dir)
+            cwd = (meta.get("cwd") if meta else None) or os.getcwd()
+        else:
+            cwd = os.getcwd()
+
         engine = self._get_engine()
 
         try:
@@ -503,6 +513,12 @@ async def _handle_start(
         # Include pending_reframe in response meta if a reframe occurred
         pending_reframe = engine.get_pending_reframe()
 
+        # Check classification to signal skip eligibility
+        classification = _last_classification(engine)
+        is_decide_later = classification == "decide_later"
+        is_deferred = classification == "deferred"
+        skip_eligible = is_decide_later or is_deferred
+
         meta = {
             "session_id": state.interview_id,
             "status": "interview_started",
@@ -510,6 +526,8 @@ async def _handle_start(
             "response_param": "answer",
             "question": question,
             "is_brownfield": state.is_brownfield,
+            "classification": classification,
+            "skip_eligible": skip_eligible,
             "pending_reframe": pending_reframe,
             **diff,
         }
@@ -518,18 +536,35 @@ async def _handle_start(
             "pm_handler.started",
             session_id=state.interview_id,
             is_brownfield=state.is_brownfield,
+            classification=classification,
+            skip_eligible=skip_eligible,
             has_pending_reframe=pending_reframe is not None,
             **diff,
         )
 
+        # Build response text — include skip hint when applicable
+        start_text = f"PM interview started. Session ID: {state.interview_id}\n\n{question}"
+        if is_decide_later:
+            start_text += (
+                "\n\n💡 This question can be deferred. "
+                'The user may answer now, or choose "decide later" to skip it. '
+                "If they choose to decide later, pass "
+                f'answer="[decide_later]" with session_id="{state.interview_id}".'
+            )
+        elif is_deferred:
+            start_text += (
+                "\n\n💡 This is a technical question that can be deferred to the dev phase. "
+                "The user may answer now, or choose to defer it. "
+                "If they choose to defer, pass "
+                f'answer="[deferred]" with session_id="{state.interview_id}".'
+            )
+
         return Result.ok(
             MCPToolResult(
                 content=(
                     MCPContentItem(
                         type=ContentType.TEXT,
-                        text=(
-                            f"PM interview started. Session ID: {state.interview_id}\n\n{question}"
-                        ),
+                        text=start_text,
                     ),
                 ),
                 is_error=False,
@@ -725,6 +760,12 @@ async def _idempotent_select_repos(
             else (state.rounds[-1].question if state.rounds else "No question available.")
         )
 
+        engine.restore_meta(meta)
+        classification = _last_classification(engine)
+        is_decide_later = classification == "decide_later"
+        is_deferred = classification == "deferred"
+        skip_eligible = is_decide_later or is_deferred
+
         return Result.ok(
             MCPToolResult(
                 content=(
@@ -742,6 +783,8 @@ async def _idempotent_select_repos(
                     "question": first_question,
                     "is_brownfield": state.is_brownfield,
                     "idempotent": True,
+                    "classification": classification,
+                    "skip_eligible": skip_eligible,
                 },
             )
         )
@@ -780,15 +823,35 @@ async def _handle_answer(
         if not answer and state.rounds and state.rounds[-1].user_response is None:
             pending_question = state.rounds[-1].question
             classification = _last_classification(engine)
+            is_decide_later = classification == "decide_later"
+            is_deferred = classification == "deferred"
+            skip_eligible = is_decide_later or is_deferred
 
             pending_reframe = engine.get_pending_reframe()
 
+            # Include skip hint in re-displayed question
+            pending_text = f"Session {session_id}\n\n{pending_question}"
+            if is_decide_later:
+                pending_text += (
+                    "\n\n💡 This question can be deferred. "
+                    'The user may answer now, or choose "decide later" to skip it. '
+                    "If they choose to decide later, pass "
+                    f'answer="[decide_later]" with session_id="{session_id}".'
+                )
+            elif is_deferred:
+                pending_text += (
+                    "\n\n💡 This is a technical question that can be deferred to the dev phase. "
+                    "The user may answer now, or choose to defer it. "
+                    "If they choose to defer, pass "
+                    f'answer="[deferred]" with session_id="{session_id}".'
+                )
+
             return Result.ok(
                 MCPToolResult(
                     content=(
                         MCPContentItem(
                             type=ContentType.TEXT,
-                            text=f"Session {session_id}\n\n{pending_question}",
+                            text=pending_text,
                         ),
                     ),
                     is_error=False,
@@ -799,6 +862,7 @@ async def _handle_answer(
                         "question": pending_question,
                         "is_complete": False,
                         "classification": classification,
+                        "skip_eligible": skip_eligible,
                         "deferred_this_round": [],
                         "decide_later_this_round": [],
                         "interview_complete": False,
@@ -811,6 +875,13 @@ async def _handle_answer(
                 )
             )
 
+        # ── Per-round diff snapshot — must be BEFORE any skip/record call ──
+        # Snapshot list lengths here so that items appended inside
+        # skip_as_decide_later() / skip_as_deferred() are captured in the
+        # per-round diff returned at the end of this call.
+        deferred_before = len(engine.deferred_items)
+        decide_later_before = len(engine.decide_later_items)
+
         # Record answer if provided
         if answer and not state.rounds:
             return Result.err(
@@ -824,20 +895,55 @@ async def _handle_answer(
             if state.rounds[-1].user_response is None:
                 state.rounds.pop()
 
-            record_result = await engine.record_response(state, answer, last_question)
-            if record_result.is_err:
-                return Result.err(
-                    MCPToolError(
-                        str(record_result.error),
-                        tool_name="ouroboros_pm_interview",
+            # ── User chose to skip (decide later / defer to dev) ───
+            # The main session detects classification via response_meta
+            # and offers skip options.  The user's choice arrives as:
+            #   answer="[decide_later]" → skip_as_decide_later()
+            #   answer="[deferred]"     → skip_as_deferred()
+            # Guard: only honour the sentinel when the last question was
+            # actually classified as that type.  If a client sends
+            # "[decide_later]" for a passthrough/reframed question, treat
+            # it as a normal answer so no data is silently discarded.
+            stripped = answer.strip()
+            last_classification = _last_classification(engine)
+            if stripped == "[decide_later]" and last_classification == "decide_later":
+                skip_result = await engine.skip_as_decide_later(state, last_question)
+                if skip_result.is_err:
+                    return Result.err(
+                        MCPToolError(
+                            str(skip_result.error),
+                            tool_name="ouroboros_pm_interview",
+                        )
                     )
-                )
-            state = record_result.value
-            state.clear_stored_ambiguity()
+                state = skip_result.value
+                state.clear_stored_ambiguity()
+            elif stripped == "[deferred]" and last_classification == "deferred":
+                skip_result = await engine.skip_as_deferred(state, last_question)
+                if skip_result.is_err:
+                    return Result.err(
+                        MCPToolError(
+                            str(skip_result.error),
+                            tool_name="ouroboros_pm_interview",
+                        )
+                    )
+                state = skip_result.value
+                state.clear_stored_ambiguity()
+            else:
+                record_result = await engine.record_response(state, answer, last_question)
+                if record_result.is_err:
+                    return Result.err(
+                        MCPToolError(
+                            str(record_result.error),
+                            tool_name="ouroboros_pm_interview",
+                        )
+                    )
+                state = record_result.value
+                state.clear_stored_ambiguity()
 
         # ── Completion check (AC 12) ─────────────────────────────
         # Completion is determined by engine ambiguity scoring.
-        # User controls when to stop.
+        # When complete, auto-generate the PM document immediately
+        # (no separate "generate" call needed from the skill).
         completion = await _check_completion(state, engine)
         if completion is not None:
             # Mark interview as complete
@@ -852,21 +958,76 @@ async def _handle_answer(
                 )
             _save_pm_meta(session_id, engine, cwd=cwd, data_dir=self.data_dir)
 
+            log.info(
+                "pm_handler.interview_complete",
+                session_id=session_id,
+                **completion,
+            )
+
+            # Auto-generate PM document on completion
+            seed_result = await engine.generate_pm_seed(state)
+            if seed_result.is_err:
+                # Generation failed — still report completion but without document
+                summary_text = (
+                    f"Interview complete but PM generation failed: {seed_result.error}\n"
+                    f"Session ID: {session_id}\n"
+                    f'Retry with: action="generate", session_id="{session_id}"'
+                )
+                return Result.ok(
+                    MCPToolResult(
+                        content=(MCPContentItem(type=ContentType.TEXT, text=summary_text),),
+                        is_error=False,
+                        meta={
+                            "session_id": session_id,
+                            "is_complete": True,
+                            "generation_failed": True,
+                            **completion,
+                        },
+                    )
+                )
+
+            seed = seed_result.value
+            try:
+                seed_path = engine.save_pm_seed(seed)
+                pm_output_dir = Path(cwd) / ".ouroboros"
+                pm_path = save_pm_document(seed, output_dir=pm_output_dir)
+            except Exception as e:
+                log.error("pm_handler.save_failed", error=str(e), session_id=session_id)
+                summary_text = (
+                    f"Interview complete but saving PM artifacts failed: {e}\n"
+                    f"Session ID: {session_id}\n"
+                    f'Retry with: action="generate", session_id="{session_id}"'
+                )
+                return Result.ok(
+                    MCPToolResult(
+                        content=(MCPContentItem(type=ContentType.TEXT, text=summary_text),),
+                        is_error=False,
+                        meta={
+                            "session_id": session_id,
+                            "is_complete": True,
+                            "generation_failed": True,
+                            **completion,
+                        },
+                    )
+                )
+
             decide_later_summary = engine.format_decide_later_summary()
             summary_text = (
-                f"Interview complete. Session ID: {session_id}\n"
+                f"Interview complete. PM document generated.\n\n"
+                f"Session ID: {session_id}\n"
                 f"Rounds completed: {completion['rounds_completed']}\n"
                 f"Completion reason: {completion['completion_reason']}\n"
             )
             if completion.get("ambiguity_score") is not None:
                 summary_text += f"Ambiguity score: {completion['ambiguity_score']:.2f}\n"
             summary_text += (
-                f"\nDeferred items: {len(engine.deferred_items)}\n"
-                f"Decide-later items: {len(engine.decide_later_items)}\n"
+                f"\nPM document: {pm_path}\n"
+                f"Seed: {seed_path}\n"
+                f"\nDeferred items: {len(seed.deferred_items)}\n"
+                f"Decide-later items: {len(seed.decide_later_items)}\n"
             )
             if decide_later_summary:
                 summary_text += f"\n{decide_later_summary}\n"
-            summary_text += f'\nGenerate PM with: action="generate", session_id="{session_id}"'
 
             response_meta = {
                 "session_id": session_id,
@@ -878,14 +1039,10 @@ async def _handle_answer(
                 **completion,
                 "deferred_count": len(engine.deferred_items),
                 "decide_later_count": len(engine.decide_later_items),
+                "seed_path": str(seed_path),
+                "pm_path": str(pm_path),
             }
 
-            log.info(
-                "pm_handler.interview_complete",
-                session_id=session_id,
-                **completion,
-            )
-
             return Result.ok(
                 MCPToolResult(
                     content=(
@@ -899,11 +1056,6 @@ async def _handle_answer(
                 )
             )
 
-        # ── Core diff computation (AC 8) ──────────────────────────
-        # Snapshot list lengths BEFORE ask_next_question
-        deferred_before = len(engine.deferred_items)
-        decide_later_before = len(engine.decide_later_items)
-
         question_result = await engine.ask_next_question(state)
         if question_result.is_err:
             error_msg = str(question_result.error)
@@ -958,6 +1110,11 @@ async def _handle_answer(
         # Extract classification from the last classify call
         classification = _last_classification(engine)
 
+        # Signal to the caller that the user can skip this question
+        is_decide_later = classification == "decide_later"
+        is_deferred = classification == "deferred"
+        skip_eligible = is_decide_later or is_deferred
+
         response_meta = {
             "session_id": session_id,
             "input_type": "freeText",
@@ -965,6 +1122,7 @@ async def _handle_answer(
             "question": question,
             "is_complete": False,
             "classification": classification,
+            "skip_eligible": skip_eligible,
             "deferred_this_round": diff["new_deferred"],
             "decide_later_this_round": diff["new_decide_later"],
             # Keep backward-compat fields from AC 8
@@ -977,16 +1135,34 @@ async def _handle_answer(
             "pm_handler.question_asked",
             session_id=session_id,
             classification=classification,
+            skip_eligible=skip_eligible,
             has_pending_reframe=pending_reframe is not None,
             **diff,
         )
 
+        # Build response text — include skip hint when applicable
+        response_text = f"Session {session_id}\n\n{question}"
+        if is_decide_later:
+            response_text += (
+                "\n\n💡 This question can be deferred. "
+                'The user may answer now, or choose "decide later" to skip it. '
+                "If they choose to decide later, pass "
+                f'answer="[decide_later]" with session_id="{session_id}".'
+            )
+        elif is_deferred:
+            response_text += (
+                "\n\n💡 This is a technical question that can be deferred to the dev phase. "
+                "The user may answer now, or choose to defer it. "
+                "If they choose to defer, pass "
+                f'answer="[deferred]" with session_id="{session_id}".'
+            )
+
         return Result.ok(
             MCPToolResult(
                 content=(
                     MCPContentItem(
                         type=ContentType.TEXT,
-                        text=f"Session {session_id}\n\n{question}",
+                        text=response_text,
                     ),
                 ),
                 is_error=False,
@@ -1050,12 +1226,33 @@ async def _handle_generate(
 
         seed = seed_result.value
 
-        # Save seed to ~/.ouroboros/seeds/ (idempotent — overwrites on retry)
-        seed_path = engine.save_pm_seed(seed)
-
-        # Save human-readable pm.md to {cwd}/.ouroboros/ (consistent with CLI path)
-        pm_output_dir = Path(cwd) / ".ouroboros"
-        pm_path = save_pm_document(seed, output_dir=pm_output_dir)
+        # Save seed and PM document with recovery contract
+        try:
+            seed_path = engine.save_pm_seed(seed)
+            pm_output_dir = Path(cwd) / ".ouroboros"
+            pm_path = save_pm_document(seed, output_dir=pm_output_dir)
+        except Exception as e:
+            log.error("pm_handler.generate_save_failed", error=str(e), session_id=session_id)
+            return Result.ok(
+                MCPToolResult(
+                    content=(
+                        MCPContentItem(
+                            type=ContentType.TEXT,
+                            text=(
+                                f"PM generation succeeded but saving artifacts failed: {e}\n"
+                                f"Session ID: {session_id}\n"
+                                f'Retry with: action="generate", session_id="{session_id}"'
+                            ),
+                        ),
+                    ),
+                    is_error=False,
+                    meta={
+                        "session_id": session_id,
+                        "is_complete": True,
+                        "generation_failed": True,
+                    },
+                )
+            )
 
         return Result.ok(
             MCPToolResult(
@@ -1066,7 +1263,8 @@ async def _handle_generate(
                             f"PM seed generated: {seed.product_name}\n"
                             f"Seed: {seed_path}\n"
                             f"PM document: {pm_path}\n\n"
-                            f"Decide-later items: {len(seed.deferred_items) + len(seed.decide_later_items)}"
+                            f"Deferred items: {len(seed.deferred_items)}\n"
+                            f"Decide-later items: {len(seed.decide_later_items)}"
                         ),
                     ),
                 ),
diff --git a/tests/unit/bigbang/test_decide_later_items.py b/tests/unit/bigbang/test_decide_later_items.py
index 1c2920cf..2f37776b 100644
--- a/tests/unit/bigbang/test_decide_later_items.py
+++ b/tests/unit/bigbang/test_decide_later_items.py
@@ -88,8 +88,12 @@ class TestDecideLaterItemsList:
     """Tests for decide-later items stored as original question text in list."""
 
     @pytest.mark.asyncio
-    async def test_decide_later_stores_original_question_text(self):
-        """DECIDE_LATER stores original question text in decide_later_items list."""
+    async def test_decide_later_returns_question_to_caller(self):
+        """DECIDE_LATER returns the question to the caller without auto-answering.
+
+        The caller (main session) is responsible for presenting the decide-later
+        option and recording the item in decide_later_items when chosen.
+        """
         engine = _make_engine()
         state = _make_state()
 
@@ -97,7 +101,6 @@ async def test_decide_later_stores_original_question_text(self):
 
         # Inner engine generates the question
         engine.inner.ask_next_question.return_value = Result.ok(original_q)
-        engine.inner.record_response.return_value = Result.ok(state)
 
         # Classifier says DECIDE_LATER
         classification = ClassificationResult(
@@ -110,40 +113,32 @@ async def test_decide_later_stores_original_question_text(self):
         )
         assert classification.output_type == ClassifierOutputType.DECIDE_LATER
 
-        # First call returns DECIDE_LATER, second call returns a PASSTHROUGH
-        passthrough_q = "Who are the primary users?"
-        passthrough_classification = ClassificationResult(
-            original_question=passthrough_q,
-            category=QuestionCategory.PLANNING,
-            reframed_question=passthrough_q,
-            reasoning="Planning question.",
-        )
-
-        engine.classifier.classify = AsyncMock(
-            side_effect=[Result.ok(classification), Result.ok(passthrough_classification)]
-        )
-        engine.inner.ask_next_question = AsyncMock(
-            side_effect=[Result.ok(original_q), Result.ok(passthrough_q)]
-        )
+        engine.classifier.classify = AsyncMock(return_value=Result.ok(classification))
 
         result = await engine.ask_next_question(state)
 
         assert result.is_ok
-        # The decide-later question is stored in decide_later_items
-        assert original_q in engine.decide_later_items
-        # It should NOT be in deferred_items
-        assert original_q not in engine.deferred_items
+        # The question is returned to the caller (not auto-answered)
+        assert result.value == original_q
+        # decide_later_items is NOT populated here — caller handles that
+        assert engine.decide_later_items == []
+        # No recursive call — inner engine asked only once
+        assert engine.inner.ask_next_question.call_count == 1
+        # No auto-response recorded
+        engine.inner.record_response.assert_not_called()
 
     @pytest.mark.asyncio
-    async def test_decide_later_items_list_is_separate_from_deferred(self):
-        """decide_later_items and deferred_items are independent lists."""
+    async def test_decide_later_does_not_recurse(self):
+        """DECIDE_LATER returns immediately — no recursive ask_next_question call.
+
+        Unlike DEFERRED (which auto-skips and recurses), DECIDE_LATER returns
+        the question to the caller so the user can choose decide-later.
+        """
         engine = _make_engine()
         state = _make_state()
 
         decide_later_q = "How should we handle data migration?"
-        deferred_q = "Which ORM should we use for the database layer?"
 
-        # Set up two rounds: first DECIDE_LATER, then DEFERRED, then PASSTHROUGH
         decide_later_class = ClassificationResult(
             original_question=decide_later_q,
             category=QuestionCategory.DECIDE_LATER,
@@ -152,103 +147,50 @@ async def test_decide_later_items_list_is_separate_from_deferred(self):
             decide_later=True,
             placeholder_response="TBD after schema design.",
         )
-        deferred_class = ClassificationResult(
-            original_question=deferred_q,
-            category=QuestionCategory.DEVELOPMENT,
-            reframed_question=deferred_q,
-            reasoning="Deeply technical.",
-            defer_to_dev=True,
-        )
-        passthrough_q = "What are the success metrics?"
-        passthrough_class = ClassificationResult(
-            original_question=passthrough_q,
-            category=QuestionCategory.PLANNING,
-            reframed_question=passthrough_q,
-            reasoning="Planning.",
-        )
 
-        engine.classifier.classify = AsyncMock(
-            side_effect=[
-                Result.ok(decide_later_class),
-                Result.ok(deferred_class),
-                Result.ok(passthrough_class),
-            ]
-        )
-        engine.inner.ask_next_question = AsyncMock(
-            side_effect=[
-                Result.ok(decide_later_q),
-                Result.ok(deferred_q),
-                Result.ok(passthrough_q),
-            ]
-        )
-        engine.inner.record_response = AsyncMock(return_value=Result.ok(state))
+        engine.classifier.classify = AsyncMock(return_value=Result.ok(decide_later_class))
+        engine.inner.ask_next_question = AsyncMock(return_value=Result.ok(decide_later_q))
 
         result = await engine.ask_next_question(state)
 
         assert result.is_ok
-        assert result.value == passthrough_q
-
-        # Verify separation
-        assert decide_later_q in engine.decide_later_items
-        assert decide_later_q not in engine.deferred_items
-        assert deferred_q in engine.deferred_items
-        assert deferred_q not in engine.decide_later_items
+        # Returns the decide-later question directly
+        assert result.value == decide_later_q
+        # No auto-recording — caller handles decide-later flow
+        engine.inner.record_response.assert_not_called()
+        # Only one call to inner engine (no recursion)
+        assert engine.inner.ask_next_question.call_count == 1
+        # decide_later_items not populated by engine (caller responsibility)
+        assert engine.decide_later_items == []
 
     @pytest.mark.asyncio
-    async def test_multiple_decide_later_items_accumulate(self):
-        """Multiple DECIDE_LATER questions all accumulate in the list."""
+    async def test_decide_later_classification_recorded(self):
+        """DECIDE_LATER classification is recorded so caller can detect it."""
         engine = _make_engine()
         state = _make_state()
 
         q1 = "What rate limiting strategy should we use?"
-        q2 = "How should we handle GDPR data retention?"
-        q3 = "What's the target latency for API responses?"
-        passthrough_q = "What is the core problem?"
-
-        classifications = [
-            ClassificationResult(
-                original_question=q1,
-                category=QuestionCategory.DECIDE_LATER,
-                reframed_question=q1,
-                reasoning="Premature.",
-                decide_later=True,
-                placeholder_response="TBD.",
-            ),
-            ClassificationResult(
-                original_question=q2,
-                category=QuestionCategory.DECIDE_LATER,
-                reframed_question=q2,
-                reasoning="Needs legal review.",
-                decide_later=True,
-                placeholder_response="Needs legal input.",
-            ),
-            ClassificationResult(
-                original_question=q3,
-                category=QuestionCategory.DECIDE_LATER,
-                reframed_question=q3,
-                reasoning="Depends on infra.",
-                decide_later=True,
-                placeholder_response="After infra decision.",
-            ),
-            ClassificationResult(
-                original_question=passthrough_q,
-                category=QuestionCategory.PLANNING,
-                reframed_question=passthrough_q,
-                reasoning="Planning.",
-            ),
-        ]
 
-        engine.classifier.classify = AsyncMock(side_effect=[Result.ok(c) for c in classifications])
-        engine.inner.ask_next_question = AsyncMock(
-            side_effect=[Result.ok(q1), Result.ok(q2), Result.ok(q3), Result.ok(passthrough_q)]
+        classification = ClassificationResult(
+            original_question=q1,
+            category=QuestionCategory.DECIDE_LATER,
+            reframed_question=q1,
+            reasoning="Premature.",
+            decide_later=True,
+            placeholder_response="TBD.",
         )
-        engine.inner.record_response = AsyncMock(return_value=Result.ok(state))
+
+        engine.classifier.classify = AsyncMock(return_value=Result.ok(classification))
+        engine.inner.ask_next_question = AsyncMock(return_value=Result.ok(q1))
 
         result = await engine.ask_next_question(state)
 
         assert result.is_ok
-        assert len(engine.decide_later_items) == 3
-        assert engine.decide_later_items == [q1, q2, q3]
+        assert result.value == q1
+        # Classification is recorded so get_last_classification works
+        assert len(engine.classifications) == 1
+        assert engine.classifications[0].output_type == ClassifierOutputType.DECIDE_LATER
+        assert engine.get_last_classification() == "decide_later"
 
     def test_decide_later_items_empty_by_default(self):
         """decide_later_items starts as empty list."""
@@ -531,3 +473,110 @@ def test_format_decide_later_summary_preserves_original_question_text(self):
         summary = engine.format_decide_later_summary()
 
         assert original_text in summary
+
+
+class TestSkipAsDecideLater:
+    """Tests for user-initiated decide-later skip via skip_as_decide_later()."""
+
+    @pytest.mark.asyncio
+    async def test_skip_records_question_in_decide_later_items(self):
+        """skip_as_decide_later records the question in decide_later_items."""
+        engine = _make_engine()
+        state = _make_state()
+        engine.inner.record_response.return_value = Result.ok(state)
+
+        question = "What caching strategy should we use?"
+        result = await engine.skip_as_decide_later(state, question)
+
+        assert result.is_ok
+        assert question in engine.decide_later_items
+
+    @pytest.mark.asyncio
+    async def test_skip_does_not_duplicate_existing_item(self):
+        """skip_as_decide_later does not duplicate if question already in list."""
+        engine = _make_engine()
+        state = _make_state()
+        engine.inner.record_response.return_value = Result.ok(state)
+
+        question = "What caching strategy should we use?"
+        engine.decide_later_items = [question]
+
+        result = await engine.skip_as_decide_later(state, question)
+
+        assert result.is_ok
+        assert engine.decide_later_items.count(question) == 1
+
+    @pytest.mark.asyncio
+    async def test_skip_records_placeholder_response_in_inner_engine(self):
+        """skip_as_decide_later feeds a placeholder response to inner engine."""
+        engine = _make_engine()
+        state = _make_state()
+        engine.inner.record_response.return_value = Result.ok(state)
+
+        question = "What deployment model should we use?"
+        await engine.skip_as_decide_later(state, question)
+
+        engine.inner.record_response.assert_called_once()
+        call_args = engine.inner.record_response.call_args
+        recorded_response = call_args[0][1]
+        assert "[Decide later]" in recorded_response
+
+    @pytest.mark.asyncio
+    async def test_skip_advances_interview_state(self):
+        """skip_as_decide_later returns the updated state from record_response."""
+        engine = _make_engine()
+        state = _make_state(current_round=3)
+        updated_state = _make_state(current_round=4)
+        engine.inner.record_response.return_value = Result.ok(updated_state)
+
+        question = "What rate limiting approach?"
+        result = await engine.skip_as_decide_later(state, question)
+
+        assert result.is_ok
+        assert result.value == updated_state
+
+    @pytest.mark.asyncio
+    async def test_skip_does_not_add_to_deferred_items(self):
+        """skip_as_decide_later does not affect deferred_items."""
+        engine = _make_engine()
+        state = _make_state()
+        engine.inner.record_response.return_value = Result.ok(state)
+
+        question = "What caching strategy?"
+        await engine.skip_as_decide_later(state, question)
+
+        assert engine.deferred_items == []
+        assert question in engine.decide_later_items
+
+    @pytest.mark.asyncio
+    async def test_skip_multiple_questions_accumulate(self):
+        """Multiple skip_as_decide_later calls accumulate items."""
+        engine = _make_engine()
+        state = _make_state()
+        engine.inner.record_response.return_value = Result.ok(state)
+
+        q1 = "What caching strategy?"
+        q2 = "What rate limiting?"
+        q3 = "What deployment model?"
+
+        await engine.skip_as_decide_later(state, q1)
+        await engine.skip_as_decide_later(state, q2)
+        await engine.skip_as_decide_later(state, q3)
+
+        assert engine.decide_later_items == [q1, q2, q3]
+
+    @pytest.mark.asyncio
+    async def test_skip_propagates_record_response_error(self):
+        """skip_as_decide_later propagates errors from record_response."""
+        engine = _make_engine()
+        state = _make_state()
+        from ouroboros.core.errors import ValidationError as VE
+
+        engine.inner.record_response.return_value = Result.err(
+            VE("State save failed", field="state")
+        )
+
+        question = "What caching strategy?"
+        result = await engine.skip_as_decide_later(state, question)
+
+        assert result.is_err
diff --git a/tests/unit/bigbang/test_pm_interview.py b/tests/unit/bigbang/test_pm_interview.py
index 7565d596..b54bb2ac 100644
--- a/tests/unit/bigbang/test_pm_interview.py
+++ b/tests/unit/bigbang/test_pm_interview.py
@@ -431,17 +431,16 @@ async def test_dev_question_gets_reframed(self, tmp_path: Path) -> None:
         assert engine.classifications[0].category == QuestionCategory.DEVELOPMENT
 
     @pytest.mark.asyncio
-    async def test_deferred_question_skipped(self, tmp_path: Path) -> None:
-        """DEV-only questions marked as defer_to_dev are skipped and tracked."""
+    async def test_deferred_question_returned_to_user(self, tmp_path: Path) -> None:
+        """DEV-only questions marked as defer_to_dev are returned to the user."""
         adapter = _make_adapter()
         engine = _make_engine(adapter, tmp_path)
 
         dev_q = "Should we use gRPC or REST for inter-service communication?"
-        planning_q = "Who are the end users?"
 
         adapter.complete = AsyncMock(
             side_effect=[
-                # First question generation: dev question
+                # Question generation: dev question
                 Result.ok(_mock_completion(dev_q)),
                 # Classification: defer to dev
                 Result.ok(
@@ -456,21 +455,6 @@ async def test_deferred_question_skipped(self, tmp_path: Path) -> None:
                         )
                     )
                 ),
-                # Second question generation (recursive call): planning question
-                Result.ok(_mock_completion(planning_q)),
-                # Classification: planning
-                Result.ok(
-                    _mock_completion(
-                        json.dumps(
-                            {
-                                "category": "planning",
-                                "reframed_question": planning_q,
-                                "reasoning": "User question",
-                                "defer_to_dev": False,
-                            }
-                        )
-                    )
-                ),
             ]
         )
 
@@ -482,85 +466,46 @@ async def test_deferred_question_skipped(self, tmp_path: Path) -> None:
         result = await engine.ask_next_question(state)
 
         assert result.is_ok
-        assert result.value == planning_q
-        assert dev_q in engine.deferred_items
-        # The deferred question gets an automatic response fed back to the
-        # inner InterviewEngine so it knows the question was handled
-        assert len(state.rounds) == 1
-        assert "[Deferred to development phase]" in state.rounds[0].user_response
-        assert state.rounds[0].question == dev_q
+        # The user sees the deferred question directly
+        assert result.value == dev_q
+        # deferred_items NOT populated yet (user hasn't chosen to skip)
+        assert dev_q not in engine.deferred_items
+        # No rounds auto-recorded
+        assert len(state.rounds) == 0
 
     @pytest.mark.asyncio
-    async def test_deferred_auto_response_content(self, tmp_path: Path) -> None:
-        """DEV-classified deferred questions get an automatic response fed back
-        to the InterviewEngine with appropriate marker text."""
+    async def test_user_can_skip_as_deferred(self, tmp_path: Path) -> None:
+        """User can defer a technical question via skip_as_deferred()."""
         adapter = _make_adapter()
         engine = _make_engine(adapter, tmp_path)
 
         dev_q = "What container orchestration platform should we use — Kubernetes or ECS?"
-        planning_q = "What are the key business goals?"
-
-        adapter.complete = AsyncMock(
-            side_effect=[
-                Result.ok(_mock_completion(dev_q)),
-                Result.ok(
-                    _mock_completion(
-                        json.dumps(
-                            {
-                                "category": "development",
-                                "reframed_question": dev_q,
-                                "reasoning": "Infrastructure choice is purely technical",
-                                "defer_to_dev": True,
-                            }
-                        )
-                    )
-                ),
-                Result.ok(_mock_completion(planning_q)),
-                Result.ok(
-                    _mock_completion(
-                        json.dumps(
-                            {
-                                "category": "planning",
-                                "reframed_question": planning_q,
-                                "reasoning": "Business question",
-                                "defer_to_dev": False,
-                            }
-                        )
-                    )
-                ),
-            ]
-        )
 
         state = InterviewState(
             interview_id="test_auto_response",
             initial_context="Build a SaaS platform",
         )
 
-        result = await engine.ask_next_question(state)
+        # User chooses to defer
+        adapter.complete = AsyncMock(return_value=Result.ok(_mock_completion("ok")))
+        result = await engine.skip_as_deferred(state, dev_q)
 
         assert result.is_ok
-        assert result.value == planning_q
-
-        # Verify the auto-response was properly recorded
-        deferred_round = state.rounds[0]
-        assert deferred_round.question == dev_q
-        assert "[Deferred to development phase]" in deferred_round.user_response
-        assert "development interview" in deferred_round.user_response
-        assert deferred_round.round_number == 1
+        assert dev_q in engine.deferred_items
+        # Verify the deferral response was properly recorded
+        assert len(state.rounds) == 1
+        assert "[Deferred to development phase]" in state.rounds[0].user_response
 
     @pytest.mark.asyncio
-    async def test_multiple_deferred_questions_all_auto_responded(self, tmp_path: Path) -> None:
-        """Multiple consecutive deferred questions each get auto-responses."""
+    async def test_deferred_question_returned_not_auto_skipped(self, tmp_path: Path) -> None:
+        """DEFERRED questions are returned to user, not auto-skipped."""
         adapter = _make_adapter()
         engine = _make_engine(adapter, tmp_path)
 
         dev_q1 = "Should we use gRPC or REST?"
-        dev_q2 = "Which CI/CD pipeline tool should we use?"
-        planning_q = "Who are the target users?"
 
         adapter.complete = AsyncMock(
             side_effect=[
-                # First dev question
                 Result.ok(_mock_completion(dev_q1)),
                 Result.ok(
                     _mock_completion(
@@ -574,34 +519,6 @@ async def test_multiple_deferred_questions_all_auto_responded(self, tmp_path: Pa
                         )
                     )
                 ),
-                # Second dev question
-                Result.ok(_mock_completion(dev_q2)),
-                Result.ok(
-                    _mock_completion(
-                        json.dumps(
-                            {
-                                "category": "development",
-                                "reframed_question": dev_q2,
-                                "reasoning": "CI/CD is infra",
-                                "defer_to_dev": True,
-                            }
-                        )
-                    )
-                ),
-                # Finally a planning question
-                Result.ok(_mock_completion(planning_q)),
-                Result.ok(
-                    _mock_completion(
-                        json.dumps(
-                            {
-                                "category": "planning",
-                                "reframed_question": planning_q,
-                                "reasoning": "User question",
-                                "defer_to_dev": False,
-                            }
-                        )
-                    )
-                ),
             ]
         )
 
@@ -613,18 +530,11 @@ async def test_multiple_deferred_questions_all_auto_responded(self, tmp_path: Pa
         result = await engine.ask_next_question(state)
 
         assert result.is_ok
-        assert result.value == planning_q
-
-        # Both dev questions should be tracked as deferred
-        assert dev_q1 in engine.deferred_items
-        assert dev_q2 in engine.deferred_items
-
-        # Both should have auto-responses recorded in the inner engine
-        assert len(state.rounds) == 2
-        assert "[Deferred to development phase]" in state.rounds[0].user_response
-        assert "[Deferred to development phase]" in state.rounds[1].user_response
-        assert state.rounds[0].question == dev_q1
-        assert state.rounds[1].question == dev_q2
+        # First deferred question returned directly — no recursion
+        assert result.value == dev_q1
+        # Not yet in deferred_items (user hasn't chosen to skip)
+        assert dev_q1 not in engine.deferred_items
+        assert len(state.rounds) == 0
 
     @pytest.mark.asyncio
     async def test_classification_failure_returns_original(self, tmp_path: Path) -> None:
@@ -652,18 +562,24 @@ async def test_classification_failure_returns_original(self, tmp_path: Path) ->
         assert result.value == question
 
     @pytest.mark.asyncio
-    async def test_decide_later_auto_responds_with_placeholder(self, tmp_path: Path) -> None:
-        """Decide-later questions are auto-answered with a placeholder and skipped."""
+    async def test_decide_later_returns_question_without_auto_answering(
+        self, tmp_path: Path
+    ) -> None:
+        """Decide-later questions are returned to the caller for user decision.
+
+        The engine no longer auto-answers with a placeholder or recurses.
+        The caller (main session) detects classification == "decide_later"
+        and presents the user with a decide-later option.
+        """
         adapter = _make_adapter()
         engine = _make_engine(adapter, tmp_path)
 
         decide_later_q = "How should we handle scaling when we reach 1M users?"
         placeholder = "This will be determined after MVP launch and initial user metrics. Marking as a decision point for later."
-        planning_q = "Who are the target users?"
 
         adapter.complete = AsyncMock(
             side_effect=[
-                # First question generation: decide-later question
+                # Question generation: decide-later question
                 Result.ok(_mock_completion(decide_later_q)),
                 # Classification: decide_later
                 Result.ok(
@@ -680,22 +596,7 @@ async def test_decide_later_auto_responds_with_placeholder(self, tmp_path: Path)
                         )
                     )
                 ),
-                # Second question generation (recursive call): planning question
-                Result.ok(_mock_completion(planning_q)),
-                # Classification: planning
-                Result.ok(
-                    _mock_completion(
-                        json.dumps(
-                            {
-                                "category": "planning",
-                                "reframed_question": planning_q,
-                                "reasoning": "User question",
-                                "defer_to_dev": False,
-                                "decide_later": False,
-                            }
-                        )
-                    )
-                ),
+                # No second question generation — no recursion
             ]
         )
 
@@ -707,14 +608,14 @@ async def test_decide_later_auto_responds_with_placeholder(self, tmp_path: Path)
         result = await engine.ask_next_question(state)
 
         assert result.is_ok
-        # The PM sees the next real question, not the decide-later one
-        assert result.value == planning_q
-        # The decide-later question is tracked in decide_later_items
-        assert decide_later_q in engine.decide_later_items
-        # A placeholder response was auto-recorded in the interview state
-        assert len(state.rounds) == 1
-        assert "[Decide later]" in state.rounds[0].user_response
-        assert placeholder in state.rounds[0].user_response
+        # The decide-later question is returned to the caller
+        assert result.value == decide_later_q
+        # decide_later_items is NOT populated — caller handles that
+        assert engine.decide_later_items == []
+        # No auto-response recorded — state has no rounds
+        assert len(state.rounds) == 0
+        # Classification is recorded for caller to detect
+        assert engine.get_last_classification() == "decide_later"
 
     @pytest.mark.asyncio
     async def test_decide_later_classification_result_properties(self) -> None:
@@ -729,7 +630,8 @@ async def test_decide_later_classification_result_properties(self) -> None:
         )
 
         assert result.output_type == ClassifierOutputType.DECIDE_LATER
-        assert result.question_for_pm == ""  # Should not be shown to PM
+        # Returned to user so they can choose to answer or defer
+        assert result.question_for_pm == "How should we handle scaling?"
 
 
 class TestRecordResponse:
@@ -960,9 +862,13 @@ async def test_generates_seed_from_interview(self, tmp_path: Path) -> None:
 
     @pytest.mark.asyncio
     async def test_includes_deferred_items(self, tmp_path: Path) -> None:
-        """Deferred items from classifier are included in PMSeed."""
+        """LLM-extracted deferred items are used in PMSeed (raw engine items not merged)."""
         adapter = _make_adapter()
         engine = _make_engine(adapter, tmp_path)
+        # Raw engine items are passed to the extraction prompt as context,
+        # so the LLM should summarise them.  Only LLM-extracted items appear
+        # in the final seed — engine-tracked items are merged with LLM output
+        # and deduplicated to ensure no user-selected skip is lost.
         engine.deferred_items = ["Should we use gRPC or REST?"]
 
         extraction_response = json.dumps(
@@ -993,6 +899,7 @@ async def test_includes_deferred_items(self, tmp_path: Path) -> None:
         assert result.is_ok
         seed = result.value
         assert "Database selection" in seed.deferred_items
+        # Engine-tracked item is merged back to prevent data loss
         assert "Should we use gRPC or REST?" in seed.deferred_items
 
     @pytest.mark.asyncio
diff --git a/tests/unit/bigbang/test_question_classifier.py b/tests/unit/bigbang/test_question_classifier.py
index 0619ee3a..0c95e3a6 100644
--- a/tests/unit/bigbang/test_question_classifier.py
+++ b/tests/unit/bigbang/test_question_classifier.py
@@ -181,8 +181,8 @@ def test_reframed_returns_reframed_question(self) -> None:
         )
         assert result.question_for_pm == "How do you expect users to interact with data?"
 
-    def test_deferred_returns_empty_string(self) -> None:
-        """DEFERRED returns empty string — question should not be shown to PM."""
+    def test_deferred_returns_original_question(self) -> None:
+        """DEFERRED returns original question — shown to user for defer/answer choice."""
         result = ClassificationResult(
             original_question="Should we use gRPC or REST?",
             category=QuestionCategory.DEVELOPMENT,
@@ -190,10 +190,10 @@ def test_deferred_returns_empty_string(self) -> None:
             reasoning="Purely technical",
             defer_to_dev=True,
         )
-        assert result.question_for_pm == ""
+        assert result.question_for_pm == "Should we use gRPC or REST?"
 
-    def test_decide_later_returns_empty_string(self) -> None:
-        """DECIDE_LATER returns empty string — auto-answered, not shown to PM."""
+    def test_decide_later_returns_original_question(self) -> None:
+        """DECIDE_LATER returns original question — shown to user for decision."""
         result = ClassificationResult(
             original_question="How will this scale to millions of users?",
             category=QuestionCategory.DECIDE_LATER,
@@ -202,7 +202,7 @@ def test_decide_later_returns_empty_string(self) -> None:
             decide_later=True,
             placeholder_response="To be determined post-launch.",
         )
-        assert result.question_for_pm == ""
+        assert result.question_for_pm == "How will this scale to millions of users?"
 
 
 # ──────────────────────────────────────────────────────────────
@@ -418,5 +418,5 @@ async def test_classify_decide_later_question(self) -> None:
         assert cr.category == QuestionCategory.DECIDE_LATER
         assert cr.decide_later is True
         assert cr.output_type == ClassifierOutputType.DECIDE_LATER
-        assert cr.question_for_pm == ""
+        assert cr.question_for_pm == question  # returned to user for decision
         assert "initial deployment" in cr.placeholder_response
diff --git a/tests/unit/mcp/tools/test_pm_handler.py b/tests/unit/mcp/tools/test_pm_handler.py
index 639432b4..6a80bb4b 100644
--- a/tests/unit/mcp/tools/test_pm_handler.py
+++ b/tests/unit/mcp/tools/test_pm_handler.py
@@ -10,6 +10,11 @@
 
 from ouroboros.bigbang.interview import InterviewRound, InterviewState
 from ouroboros.bigbang.pm_interview import PMInterviewEngine
+from ouroboros.bigbang.question_classifier import (
+    ClassificationResult,
+    ClassifierOutputType,
+    QuestionCategory,
+)
 from ouroboros.core.types import Result
 from ouroboros.mcp.tools.pm_handler import (
     _DATA_DIR,
@@ -27,6 +32,24 @@
 # ── Helpers ──────────────────────────────────────────────────────
 
 
+def _make_classification(output_type: ClassifierOutputType) -> ClassificationResult:
+    """Create a minimal ClassificationResult stub for a given output type."""
+    category_map = {
+        ClassifierOutputType.PASSTHROUGH: QuestionCategory.PLANNING,
+        ClassifierOutputType.REFRAMED: QuestionCategory.DEVELOPMENT,
+        ClassifierOutputType.DEFERRED: QuestionCategory.DEVELOPMENT,
+        ClassifierOutputType.DECIDE_LATER: QuestionCategory.DECIDE_LATER,
+    }
+    return ClassificationResult(
+        original_question="stub question",
+        category=category_map[output_type],
+        reframed_question="stub question",
+        reasoning="stub",
+        defer_to_dev=(output_type == ClassifierOutputType.DEFERRED),
+        decide_later=(output_type == ClassifierOutputType.DECIDE_LATER),
+    )
+
+
 def _make_engine_stub(
     deferred: list[str] | None = None,
     decide_later: list[str] | None = None,
@@ -452,6 +475,221 @@ async def test_handle_answer_includes_interview_complete_false(self, tmp_path: P
         assert result.is_ok
         assert result.value.meta["interview_complete"] is False
 
+    @pytest.mark.asyncio
+    async def test_handle_answer_decide_later_skips_and_records(self, tmp_path: Path) -> None:
+        """When answer='[decide_later]', skip_as_decide_later is called instead of record_response."""
+        engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[])
+        # Set up classification so the guard accepts the sentinel
+        engine.classifications = [_make_classification(ClassifierOutputType.DECIDE_LATER)]
+
+        question = "What caching strategy should we use?"
+        state = _make_state(
+            rounds=[
+                InterviewRound(round_number=1, question=question, user_response=None),
+            ],
+        )
+
+        engine.load_state = AsyncMock(return_value=Result.ok(state))
+
+        # skip_as_decide_later should be called (not record_response)
+        async def mock_skip(s: Any, q: str) -> Result:
+            engine.decide_later_items.append(q)
+            return Result.ok(s)
+
+        engine.skip_as_decide_later = AsyncMock(side_effect=mock_skip)
+        engine.record_response = AsyncMock(return_value=Result.ok(state))
+
+        engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?"))
+        engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json"))
+
+        _save_pm_meta("sess-dl", engine, cwd="/tmp/proj", data_dir=tmp_path)
+
+        handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path)
+
+        result = await handler.handle(
+            {
+                "session_id": "sess-dl",
+                "answer": "[decide_later]",
+            }
+        )
+
+        assert result.is_ok
+        # skip_as_decide_later was called
+        engine.skip_as_decide_later.assert_called_once_with(state, question)
+        # record_response was NOT called (skip_as_decide_later handles it internally)
+        engine.record_response.assert_not_called()
+        # The question was recorded in decide_later_items
+        assert question in engine.decide_later_items
+
+    @pytest.mark.asyncio
+    async def test_handle_answer_decide_later_with_whitespace(self, tmp_path: Path) -> None:
+        """Whitespace-padded '[decide_later]' is still recognized."""
+        engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[])
+        # Set up classification so the guard accepts the sentinel
+        engine.classifications = [_make_classification(ClassifierOutputType.DECIDE_LATER)]
+
+        question = "What rate limiting approach?"
+        state = _make_state(
+            rounds=[
+                InterviewRound(round_number=1, question=question, user_response=None),
+            ],
+        )
+
+        engine.load_state = AsyncMock(return_value=Result.ok(state))
+        engine.skip_as_decide_later = AsyncMock(return_value=Result.ok(state))
+        engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?"))
+        engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json"))
+
+        _save_pm_meta("sess-dl2", engine, cwd="/tmp/proj", data_dir=tmp_path)
+
+        handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path)
+
+        result = await handler.handle(
+            {
+                "session_id": "sess-dl2",
+                "answer": "  [decide_later]  ",
+            }
+        )
+
+        assert result.is_ok
+        engine.skip_as_decide_later.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_handle_answer_deferred_skips_and_records(self, tmp_path: Path) -> None:
+        """When answer='[deferred]', skip_as_deferred is called instead of record_response."""
+        engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[])
+        # Set up classification so the guard accepts the sentinel
+        engine.classifications = [_make_classification(ClassifierOutputType.DEFERRED)]
+
+        question = "Should we use gRPC or REST for inter-service communication?"
+        state = _make_state(
+            rounds=[
+                InterviewRound(round_number=1, question=question, user_response=None),
+            ],
+        )
+
+        engine.load_state = AsyncMock(return_value=Result.ok(state))
+
+        async def mock_skip(s: Any, q: str) -> Result:
+            engine.deferred_items.append(q)
+            return Result.ok(s)
+
+        engine.skip_as_deferred = AsyncMock(side_effect=mock_skip)
+        engine.record_response = AsyncMock(return_value=Result.ok(state))
+
+        engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?"))
+        engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json"))
+
+        _save_pm_meta("sess-def", engine, cwd="/tmp/proj", data_dir=tmp_path)
+
+        handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path)
+
+        result = await handler.handle(
+            {
+                "session_id": "sess-def",
+                "answer": "[deferred]",
+            }
+        )
+
+        assert result.is_ok
+        engine.skip_as_deferred.assert_called_once_with(state, question)
+        engine.record_response.assert_not_called()
+        assert question in engine.deferred_items
+
+
+    @pytest.mark.asyncio
+    async def test_decide_later_sentinel_for_passthrough_treated_as_normal_answer(
+        self, tmp_path: Path
+    ) -> None:
+        """[decide_later] sent for a passthrough question is recorded as a normal answer.
+
+        Guards against clients blindly sending sentinels regardless of question type.
+        When the last classification is not 'decide_later', the sentinel must not
+        trigger skip_as_decide_later — it should fall through to record_response.
+        """
+        engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[])
+        # Last classification is PASSTHROUGH, not DECIDE_LATER
+        engine.classifications = [_make_classification(ClassifierOutputType.PASSTHROUGH)]
+
+        question = "What is the primary goal of the product?"
+        state = _make_state(
+            rounds=[
+                InterviewRound(round_number=1, question=question, user_response=None),
+            ],
+        )
+
+        engine.load_state = AsyncMock(return_value=Result.ok(state))
+        engine.skip_as_decide_later = AsyncMock(return_value=Result.ok(state))
+        engine.record_response = AsyncMock(return_value=Result.ok(state))
+        engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?"))
+        engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json"))
+
+        _save_pm_meta("sess-passthrough-guard", engine, cwd="/tmp/proj", data_dir=tmp_path)
+
+        handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path)
+
+        result = await handler.handle(
+            {
+                "session_id": "sess-passthrough-guard",
+                "answer": "[decide_later]",
+            }
+        )
+
+        assert result.is_ok
+        # Must NOT call skip_as_decide_later — the sentinel is treated as a normal answer
+        engine.skip_as_decide_later.assert_not_called()
+        # Must record the literal string as a normal response
+        engine.record_response.assert_called_once_with(state, "[decide_later]", question)
+
+    @pytest.mark.asyncio
+    async def test_decide_later_skip_populates_per_round_diff(self, tmp_path: Path) -> None:
+        """Skipping via [decide_later] populates new_decide_later in response metadata.
+
+        Verifies that the per-round diff snapshot is taken BEFORE the skip call
+        so that items added inside skip_as_decide_later() appear in the diff.
+        """
+        engine = make_pm_engine_mock(deferred_items=[], decide_later_items=[])
+        # Set up classification so the guard accepts the sentinel
+        engine.classifications = [_make_classification(ClassifierOutputType.DECIDE_LATER)]
+
+        question = "What scaling approach suits future traffic?"
+        state = _make_state(
+            rounds=[
+                InterviewRound(round_number=1, question=question, user_response=None),
+            ],
+        )
+
+        engine.load_state = AsyncMock(return_value=Result.ok(state))
+
+        async def mock_skip(s: Any, q: str) -> Result:
+            engine.decide_later_items.append(q)
+            return Result.ok(s)
+
+        engine.skip_as_decide_later = AsyncMock(side_effect=mock_skip)
+        engine.record_response = AsyncMock(return_value=Result.ok(state))
+        engine.ask_next_question = AsyncMock(return_value=Result.ok("Next question?"))
+        engine.save_state = AsyncMock(return_value=Result.ok(tmp_path / "state.json"))
+
+        _save_pm_meta("sess-diff-skip", engine, cwd="/tmp/proj", data_dir=tmp_path)
+
+        handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path)
+
+        result = await handler.handle(
+            {
+                "session_id": "sess-diff-skip",
+                "answer": "[decide_later]",
+            }
+        )
+
+        assert result.is_ok
+        meta = result.value.meta
+        # The skipped question must appear in new_decide_later (per-round diff)
+        assert question in meta["new_decide_later"], (
+            f"Expected '{question}' in new_decide_later, got {meta['new_decide_later']}"
+        )
+        assert meta["decide_later_this_round"] == meta["new_decide_later"]
+        assert meta["decide_later_count"] == 1
+
 
 # ── Unit tests for _check_completion (AC 12) ─────────────────────
 
@@ -652,6 +890,14 @@ async def test_handle_answer_completion_via_ambiguity(self, tmp_path: Path) -> N
 
         _save_pm_meta("sess-complete", engine, cwd="/tmp/proj", data_dir=tmp_path)
 
+        # Mock auto-generate on completion
+        seed_mock = MagicMock()
+        seed_mock.product_name = "Test Product"
+        seed_mock.deferred_items = ["d1"]
+        seed_mock.decide_later_items = ["dl1"]
+        engine.generate_pm_seed = AsyncMock(return_value=Result.ok(seed_mock))
+        engine.save_pm_seed = MagicMock(return_value=tmp_path / "seed.json")
+
         handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path)
 
         # Mock _check_completion to return completion
@@ -666,6 +912,9 @@ async def test_handle_answer_completion_via_ambiguity(self, tmp_path: Path) -> N
             "ouroboros.mcp.tools.pm_handler._check_completion",
             new_callable=AsyncMock,
             return_value=completion_meta,
+        ), patch(
+            "ouroboros.mcp.tools.pm_handler.save_pm_document",
+            return_value=tmp_path / "pm.md",
         ):
             result = await handler.handle(
                 {
@@ -684,10 +933,10 @@ async def test_handle_answer_completion_via_ambiguity(self, tmp_path: Path) -> N
         # Verify complete_interview was called
         engine.complete_interview.assert_called_once()
 
-        # Verify response text includes generate instructions
+        # Verify response text includes completion and PM document info
         text = result.value.content[0].text
         assert "Interview complete" in text
-        assert "generate" in text
+        assert "PM document" in text
 
     @pytest.mark.asyncio
     async def test_no_done_signal_processing(self, tmp_path: Path) -> None:
@@ -1504,17 +1753,28 @@ async def test_resume_completion_meta_has_required_fields(self, tmp_path: Path)
                 "ambiguity_score": 0.15,
             }
         )
+        # Mock auto-generate on completion
+        seed_mock = MagicMock()
+        seed_mock.product_name = "Test Product"
+        seed_mock.deferred_items = ["d1"]
+        seed_mock.decide_later_items = ["dl1"]
+        engine.generate_pm_seed = AsyncMock(return_value=Result.ok(seed_mock))
+        engine.save_pm_seed = MagicMock(return_value=tmp_path / "seed.json")
 
         _save_pm_meta("resume-complete", engine, cwd="/tmp", data_dir=tmp_path)
 
         handler = PMInterviewHandler(pm_engine=engine, data_dir=tmp_path)
 
-        result = await handler.handle(
-            {
-                "session_id": "resume-complete",
-                "answer": "Final answer",
-            }
-        )
+        with patch(
+            "ouroboros.mcp.tools.pm_handler.save_pm_document",
+            return_value=tmp_path / "pm.md",
+        ):
+            result = await handler.handle(
+                {
+                    "session_id": "resume-complete",
+                    "answer": "Final answer",
+                }
+            )
 
         assert result.is_ok
         meta = result.value.meta
@@ -1528,6 +1788,9 @@ async def test_resume_completion_meta_has_required_fields(self, tmp_path: Path)
         assert meta["decide_later_this_round"] == []
         # Also has completion details
         assert meta["interview_complete"] is True
+        # Auto-generated PM document
+        assert "pm_path" in meta
+        assert "seed_path" in meta
 
     @pytest.mark.asyncio
     async def test_resume_loads_state_and_meta(self, tmp_path: Path) -> None: