diff --git a/crates/loopal-error/src/helpers.rs b/crates/loopal-error/src/helpers.rs
index 5058f2ea..5d41662c 100644
--- a/crates/loopal-error/src/helpers.rs
+++ b/crates/loopal-error/src/helpers.rs
@@ -13,10 +13,11 @@ impl ProviderError {
             // Network-level errors (connection reset, timeout, DNS) are transient.
             ProviderError::Http(_) => true,
             ProviderError::Api { status, message } => {
-                // 400 with context overflow keywords is deterministic — never retryable
+                // 400 with context overflow keywords is deterministic — never retryable.
+                // Generic "invalid_request_error" is excluded: it covers many
+                // non-overflow 400s (prefill rejection, malformed blocks, etc.).
                 if *status == 400
-                    && (message.contains("invalid_request_error")
-                        || message.contains("prompt is too long")
+                    && (message.contains("prompt is too long")
                         || message.contains("maximum context length"))
                 {
                     return false;
diff --git a/crates/loopal-error/tests/suite/error_edge_test.rs b/crates/loopal-error/tests/suite/error_edge_test.rs
index 2624b9e4..75540b7f 100644
--- a/crates/loopal-error/tests/suite/error_edge_test.rs
+++ b/crates/loopal-error/tests/suite/error_edge_test.rs
@@ -47,6 +47,29 @@ fn test_api_400_invalid_request_not_retryable() {
     assert!(!err.is_retryable());
 }
 
+#[test]
+fn test_prefill_rejection_not_context_overflow() {
+    // Anthropic returns this when thinking mode is active but conversation
+    // ends with an assistant message (prefill not supported).
+    let body = concat!(
+        r#"{"error":{"message":"This model does not support assistant"#,
+        r#" message prefill. The conversation must end with a user"#,
+        r#" message.","type":"invalid_request_error"},"type":"error"}"#,
+    );
+    let err = ProviderError::Api {
+        status: 400,
+        message: body.into(),
+    };
+    assert!(
+        !err.is_context_overflow(),
+        "prefill rejection must not be classified as context overflow"
+    );
+    assert!(
+        !err.is_retryable(),
+        "prefill rejection is deterministic, not retryable"
+    );
+}
+
 #[test]
 fn test_api_500_not_context_overflow() {
     let err = ProviderError::Api {
diff --git a/crates/loopal-provider-api/src/thinking.rs b/crates/loopal-provider-api/src/thinking.rs
index d94ff4d8..6773dda1 100644
--- a/crates/loopal-provider-api/src/thinking.rs
+++ b/crates/loopal-provider-api/src/thinking.rs
@@ -15,6 +15,16 @@ pub enum ThinkingCapability {
     ThinkingBudget,
 }
 
+impl ThinkingCapability {
+    /// Whether this capability forbids assistant-message prefill when active.
+    ///
+    /// Anthropic's API rejects conversations ending with an assistant message
+    /// when thinking is enabled. OpenAI and Google allow prefill regardless.
+    pub fn forbids_prefill(&self) -> bool {
+        matches!(self, Self::BudgetRequired | Self::Adaptive)
+    }
+}
+
 #[derive(Debug, Clone, Default, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ThinkingConfig {
diff --git a/crates/loopal-provider-api/tests/suite.rs b/crates/loopal-provider-api/tests/suite.rs
index 5b76e9ad..086c66da 100644
--- a/crates/loopal-provider-api/tests/suite.rs
+++ b/crates/loopal-provider-api/tests/suite.rs
@@ -5,3 +5,5 @@ mod middleware_test;
 mod model_router_test;
 #[path = "suite/model_types_test.rs"]
 mod model_types_test;
+#[path = "suite/thinking_capability_test.rs"]
+mod thinking_capability_test;
diff --git a/crates/loopal-provider-api/tests/suite/thinking_capability_test.rs b/crates/loopal-provider-api/tests/suite/thinking_capability_test.rs
new file mode 100644
index 00000000..c0a11267
--- /dev/null
+++ b/crates/loopal-provider-api/tests/suite/thinking_capability_test.rs
@@ -0,0 +1,16 @@
+//! Tests for ThinkingCapability methods.
+
+use loopal_provider_api::ThinkingCapability;
+
+#[test]
+fn anthropic_capabilities_forbid_prefill() {
+    assert!(ThinkingCapability::BudgetRequired.forbids_prefill());
+    assert!(ThinkingCapability::Adaptive.forbids_prefill());
+}
+
+#[test]
+fn non_anthropic_capabilities_allow_prefill() {
+    assert!(!ThinkingCapability::None.forbids_prefill());
+    assert!(!ThinkingCapability::ReasoningEffort.forbids_prefill());
+    assert!(!ThinkingCapability::ThinkingBudget.forbids_prefill());
+}
diff --git a/crates/loopal-provider/src/anthropic/mod.rs b/crates/loopal-provider/src/anthropic/mod.rs
index 7d37a01f..af65590f 100644
--- a/crates/loopal-provider/src/anthropic/mod.rs
+++ b/crates/loopal-provider/src/anthropic/mod.rs
@@ -177,11 +177,12 @@ impl AnthropicProvider {
             .unwrap_or_else(|_| "failed to read body".into());
         tracing::error!(status = status.as_u16(), body = %text, "API error");
 
-        // Detect context overflow: 400 + known prompt-too-long patterns
+        // Detect context overflow: 400 + known prompt-too-long patterns.
+        // Intentionally excludes "invalid_request_error" — that type covers
+        // many 400 errors (prefill rejection, malformed blocks, etc.) and
+        // must not be conflated with context overflow.
         if status.as_u16() == 400
-            && (text.contains("prompt is too long")
-                || text.contains("maximum context length")
-                || text.contains("invalid_request_error"))
+            && (text.contains("prompt is too long") || text.contains("maximum context length"))
         {
             return ProviderError::ContextOverflow { message: text }.into();
         }
diff --git a/crates/loopal-runtime/src/agent_loop/llm_params.rs b/crates/loopal-runtime/src/agent_loop/llm_params.rs
index c9057eb5..feb17013 100644
--- a/crates/loopal-runtime/src/agent_loop/llm_params.rs
+++ b/crates/loopal-runtime/src/agent_loop/llm_params.rs
@@ -51,4 +51,23 @@ impl AgentLoopRunner {
             debug_dump_dir: Some(loopal_config::tmp_dir()),
         })
     }
+
+    /// Whether the current model requires a user-message suffix for continuation.
+    ///
+    /// Returns true only when thinking is active AND the provider forbids
+    /// assistant-message prefill (currently Anthropic only). OpenAI and Google
+    /// reasoning models allow prefill regardless of thinking state, so we
+    /// preserve the higher-quality mid-sentence continuation for them.
+    pub(super) fn needs_continuation_injection(&self) -> bool {
+        let capability = get_thinking_capability(self.params.config.model());
+        if !capability.forbids_prefill() {
+            return false;
+        }
+        resolve_thinking_config(
+            &self.model_config.thinking,
+            capability,
+            self.model_config.max_output_tokens,
+        )
+        .is_some()
+    }
 }
diff --git a/crates/loopal-runtime/src/agent_loop/mod.rs b/crates/loopal-runtime/src/agent_loop/mod.rs
index 6892b6bf..c995e5a2 100644
--- a/crates/loopal-runtime/src/agent_loop/mod.rs
+++ b/crates/loopal-runtime/src/agent_loop/mod.rs
@@ -33,6 +33,7 @@ mod tools_inject;
 pub(crate) mod tools_plan;
 mod tools_resolve;
 pub mod turn_context;
+mod turn_continue;
 mod turn_exec;
 pub(crate) mod turn_metrics;
 pub mod turn_observer;
diff --git a/crates/loopal-runtime/src/agent_loop/turn_continue.rs b/crates/loopal-runtime/src/agent_loop/turn_continue.rs
new file mode 100644
index 00000000..548e1c19
--- /dev/null
+++ b/crates/loopal-runtime/src/agent_loop/turn_continue.rs
@@ -0,0 +1,63 @@
+//! Synthetic message injection for auto-continuation compatibility.
+//!
+//! When thinking mode is active, the Anthropic API rejects "assistant message
+//! prefill" — the conversation must end with a user message. The standard
+//! auto-continuation flow records an assistant message and loops back, which
+//! violates this constraint.
+//!
+//! This module provides helpers that inject a synthetic user message when
+//! needed, preserving the normal prefill behavior for non-thinking providers.
+
+use loopal_message::{ContentBlock, Message, MessageRole};
+use tracing::error;
+
+use super::runner::AgentLoopRunner;
+
+/// Synthetic prompt injected when the LLM must continue but thinking mode
+/// forbids assistant-message prefill.
+const CONTINUE_PROMPT: &str = "[Continue from where you left off]";
+
+impl AgentLoopRunner {
+    /// Inject a synthetic user message if the provider forbids prefill with thinking.
+    ///
+    /// Called before `continue` in auto-continuation paths (MaxTokens,
+    /// PauseTurn, stream truncation). When the provider allows prefill, the
+    /// model resumes from the partial assistant message directly, so no
+    /// injection is needed.
+    pub(super) fn push_continuation_if_thinking(&mut self) {
+        if !self.needs_continuation_injection() {
+            return;
+        }
+        self.persist_and_push_user(CONTINUE_PROMPT);
+    }
+
+    /// Push a new user message with stop-hook feedback.
+    ///
+    /// After `record_assistant_message`, the last message in the store is
+    /// Assistant. The old `append_warnings_to_last_user` would violate its
+    /// own `debug_assert!(role == User)`. This method correctly creates a
+    /// new User message regardless of thinking mode.
+    pub(super) fn push_stop_feedback(&mut self, feedback: String) {
+        self.persist_and_push_user(&feedback);
+    }
+
+    /// Construct, persist, and push a User message with the given text.
+    fn persist_and_push_user(&mut self, text: &str) {
+        let mut msg = Message {
+            id: None,
+            role: MessageRole::User,
+            content: vec![ContentBlock::Text {
+                text: text.to_string(),
+            }],
+        };
+        if let Err(e) = self
+            .params
+            .deps
+            .session_manager
+            .save_message(&self.params.session.id, &mut msg)
+        {
+            error!(error = %e, "failed to persist continuation message");
+        }
+        self.params.store.push_user(msg);
+    }
+}
diff --git a/crates/loopal-runtime/src/agent_loop/turn_exec.rs b/crates/loopal-runtime/src/agent_loop/turn_exec.rs
index e03b0ad4..ae22108b 100644
--- a/crates/loopal-runtime/src/agent_loop/turn_exec.rs
+++ b/crates/loopal-runtime/src/agent_loop/turn_exec.rs
@@ -83,6 +83,7 @@ impl AgentLoopRunner {
                         max_continuations: self.params.harness.max_auto_continuations,
                     })
                     .await?;
+                    self.push_continuation_if_thinking();
                     continue;
                 }
                 return Ok(TurnOutput { output: last_text });
@@ -125,6 +126,7 @@ impl AgentLoopRunner {
                         max_continuations: self.params.harness.max_auto_continuations,
                     })
                     .await?;
+                    self.push_continuation_if_thinking();
                     continue;
                 }
                 return Ok(TurnOutput { output: last_text });
@@ -135,9 +137,7 @@ impl AgentLoopRunner {
                     && let Some(feedback) = self.run_stop_hooks().await
                 {
                     stop_feedback_count += 1;
-                    self.params
-                        .store
-                        .append_warnings_to_last_user(vec![feedback]);
+                    self.push_stop_feedback(feedback);
                     continue;
                 }
                 return Ok(TurnOutput { output: last_text });
diff --git a/crates/loopal-runtime/tests/agent_loop/mock_provider.rs b/crates/loopal-runtime/tests/agent_loop/mock_provider.rs
index 1327f6ac..4d69020b 100644
--- a/crates/loopal-runtime/tests/agent_loop/mock_provider.rs
+++ b/crates/loopal-runtime/tests/agent_loop/mock_provider.rs
@@ -28,11 +28,28 @@ fn build_params(
     messages: Vec<loopal_message::Message>,
     permission_mode: PermissionMode,
 ) -> AgentLoopParams {
-    AgentLoopParams {
-        config: AgentConfig {
+    build_params_with_config(
+        kernel,
+        frontend,
+        fixture,
+        messages,
+        AgentConfig {
             permission_mode,
             ..Default::default()
         },
+    )
+}
+
+/// Build AgentLoopParams with a fully custom `AgentConfig`.
+fn build_params_with_config(
+    kernel: Arc<Kernel>,
+    frontend: Arc<dyn loopal_runtime::AgentFrontend>,
+    fixture: &TestFixture,
+    messages: Vec<loopal_message::Message>,
+    config: AgentConfig,
+) -> AgentLoopParams {
+    AgentLoopParams {
+        config,
         deps: AgentDeps {
             kernel,
             frontend,
@@ -96,6 +113,15 @@ pub fn make_runner_with_mock_provider(
 
 pub fn make_multi_runner(
     calls: Vec<Vec<Result<StreamChunk, LoopalError>>>,
+) -> (AgentLoopRunner, mpsc::Receiver<AgentEvent>) {
+    make_multi_runner_with_config(calls, AgentConfig::default())
+}
+
+/// Like `make_multi_runner` but accepts a custom `AgentConfig` (e.g. for
+/// testing with `ThinkingConfig::Disabled`).
+pub fn make_multi_runner_with_config(
+    calls: Vec<Vec<Result<StreamChunk, LoopalError>>>,
+    config: AgentConfig,
 ) -> (AgentLoopRunner, mpsc::Receiver<AgentEvent>) {
     let fixture = TestFixture::new();
     let (event_tx, event_rx) = mpsc::channel(64);
@@ -112,12 +138,12 @@ pub fn make_multi_runner(
     ));
     let mut kernel = Kernel::new(Settings::default()).unwrap();
     kernel.register_provider(Arc::new(MultiCallProvider::new(calls)) as Arc<dyn Provider>);
-    let params = build_params(
+    let params = build_params_with_config(
         Arc::new(kernel),
         frontend,
         &fixture,
         vec![loopal_message::Message::user("go")],
-        PermissionMode::Bypass,
+        config,
     );
     (AgentLoopRunner::new(params), event_rx)
 }
diff --git a/crates/loopal-runtime/tests/agent_loop/mod.rs b/crates/loopal-runtime/tests/agent_loop/mod.rs
index 73a5b233..61562945 100644
--- a/crates/loopal-runtime/tests/agent_loop/mod.rs
+++ b/crates/loopal-runtime/tests/agent_loop/mod.rs
@@ -63,6 +63,7 @@ mod retry_cancel_test;
 mod run_test;
 mod stream_truncation_edge_test;
 mod stream_truncation_test;
+mod thinking_continue_test;
 mod tools_test;
 mod turn_completion_edge_test;
 mod turn_completion_test;
diff --git a/crates/loopal-runtime/tests/agent_loop/thinking_continue_test.rs b/crates/loopal-runtime/tests/agent_loop/thinking_continue_test.rs
new file mode 100644
index 00000000..8365cdf7
--- /dev/null
+++ b/crates/loopal-runtime/tests/agent_loop/thinking_continue_test.rs
@@ -0,0 +1,185 @@
+//! Tests for thinking-mode auto-continuation and stop-feedback message injection.
+
+use loopal_message::MessageRole;
+use loopal_provider_api::{StopReason, StreamChunk, ThinkingConfig};
+use loopal_runtime::AgentConfig;
+use loopal_tool_api::PermissionMode;
+
+use super::mock_provider::{make_multi_runner, make_multi_runner_with_config};
+
+/// Default model (`claude-sonnet-4-20250514`) has `BudgetRequired` thinking
+/// capability, and default `ThinkingConfig::Auto` resolves to `Some(Budget{..})`.
+/// After MaxTokens auto-continuation, a synthetic user message must be injected.
+#[tokio::test]
+async fn test_thinking_active_injects_continuation_on_max_tokens() {
+    let calls = vec![
+        vec![
+            Ok(StreamChunk::Text {
+                text: "part 1".into(),
+            }),
+            Ok(StreamChunk::Done {
+                stop_reason: StopReason::MaxTokens,
+            }),
+        ],
+        vec![
+            Ok(StreamChunk::Text {
+                text: "part 2".into(),
+            }),
+            Ok(StreamChunk::Done {
+                stop_reason: StopReason::EndTurn,
+            }),
+        ],
+    ];
+    let (mut runner, mut event_rx) = make_multi_runner(calls);
+    tokio::spawn(async move { while event_rx.recv().await.is_some() {} });
+
+    let _ = runner.run().await.unwrap();
+
+    // Verify: between the two assistant messages there should be a user
+    // message containing the continuation prompt.
+    let messages = runner.params.store.messages();
+    let has_continuation = messages.iter().any(|m| {
+        m.role == MessageRole::User
+            && m.content.iter().any(|b| match b {
+                loopal_message::ContentBlock::Text { text } => text.contains("[Continue"),
+                _ => false,
+            })
+    });
+    assert!(
+        has_continuation,
+        "thinking-active auto-continuation must inject a user message"
+    );
+}
+
+/// With thinking explicitly disabled, no synthetic user message should be
+/// injected — the model continues via assistant prefill.
+#[tokio::test]
+async fn test_thinking_disabled_no_continuation_message() {
+    let calls = vec![
+        vec![
+            Ok(StreamChunk::Text {
+                text: "part 1".into(),
+            }),
+            Ok(StreamChunk::Done {
+                stop_reason: StopReason::MaxTokens,
+            }),
+        ],
+        vec![
+            Ok(StreamChunk::Text {
+                text: "part 2".into(),
+            }),
+            Ok(StreamChunk::Done {
+                stop_reason: StopReason::EndTurn,
+            }),
+        ],
+    ];
+    let config = AgentConfig {
+        thinking_config: ThinkingConfig::Disabled,
+        permission_mode: PermissionMode::Bypass,
+        ..Default::default()
+    };
+    let (mut runner, mut event_rx) = make_multi_runner_with_config(calls, config);
+    tokio::spawn(async move { while event_rx.recv().await.is_some() {} });
+
+    let _ = runner.run().await.unwrap();
+
+    let messages = runner.params.store.messages();
+    let has_continuation = messages.iter().any(|m| {
+        m.role == MessageRole::User
+            && m.content.iter().any(|b| match b {
+                loopal_message::ContentBlock::Text { text } => text.contains("[Continue"),
+                _ => false,
+            })
+    });
+    assert!(
+        !has_continuation,
+        "thinking-disabled must NOT inject continuation messages"
+    );
+}
+
+/// MaxTokens with truncated tool calls + thinking active → continuation injected.
+#[tokio::test]
+async fn test_thinking_truncated_tools_injects_continuation() {
+    let calls = vec![
+        vec![
+            Ok(StreamChunk::Text {
+                text: "Let me ".into(),
+            }),
+            Ok(StreamChunk::ToolUse {
+                id: "tc-1".into(),
+                name: "Read".into(),
+                input: serde_json::json!({"file_path": "/tmp/truncated"}),
+            }),
+            Ok(StreamChunk::Done {
+                stop_reason: StopReason::MaxTokens,
+            }),
+        ],
+        vec![
+            Ok(StreamChunk::Text {
+                text: "read the file.".into(),
+            }),
+            Ok(StreamChunk::Done {
+                stop_reason: StopReason::EndTurn,
+            }),
+        ],
+    ];
+    let (mut runner, mut event_rx) = make_multi_runner(calls);
+    tokio::spawn(async move { while event_rx.recv().await.is_some() {} });
+
+    let _ = runner.run().await.unwrap();
+
+    let messages = runner.params.store.messages();
+    let has_continuation = messages.iter().any(|m| {
+        m.role == MessageRole::User
+            && m.content.iter().any(|b| match b {
+                loopal_message::ContentBlock::Text { text } => text.contains("[Continue"),
+                _ => false,
+            })
+    });
+    assert!(
+        has_continuation,
+        "truncated tools + thinking must inject continuation"
+    );
+}
+
+/// After recording an assistant message, stop-hook feedback must be stored
+/// as a new User message (not appended to the assistant message).
+#[tokio::test]
+async fn test_messages_alternate_roles_after_continuation() {
+    let calls = vec![
+        vec![
+            Ok(StreamChunk::Text {
+                text: "part 1".into(),
+            }),
+            Ok(StreamChunk::Done {
+                stop_reason: StopReason::MaxTokens,
+            }),
+        ],
+        vec![
+            Ok(StreamChunk::Text {
+                text: "part 2".into(),
+            }),
+            Ok(StreamChunk::Done {
+                stop_reason: StopReason::EndTurn,
+            }),
+        ],
+    ];
+    let (mut runner, mut event_rx) = make_multi_runner(calls);
+    tokio::spawn(async move { while event_rx.recv().await.is_some() {} });
+
+    let _ = runner.run().await.unwrap();
+
+    // Verify strict role alternation (ignoring system messages at the front).
+    let messages = runner.params.store.messages();
+    let non_system: Vec<_> = messages
+        .iter()
+        .filter(|m| m.role != MessageRole::System)
+        .collect();
+    for pair in non_system.windows(2) {
+        assert_ne!(
+            pair[0].role, pair[1].role,
+            "consecutive messages must alternate roles: {:?} followed by {:?}",
+            pair[0].role, pair[1].role
+        );
+    }
+}