AgentsMesh · yishuiliunian · Apr 6, 2026 · Apr 6, 2026
diff --git a/crates/loopal-error/src/helpers.rs b/crates/loopal-error/src/helpers.rs
@@ -13,10 +13,11 @@ impl ProviderError {
             // Network-level errors (connection reset, timeout, DNS) are transient.
             ProviderError::Http(_) => true,
             ProviderError::Api { status, message } => {
-                // 400 with context overflow keywords is deterministic — never retryable
+                // 400 with context overflow keywords is deterministic — never retryable.
+                // Generic "invalid_request_error" is excluded: it covers many
+                // non-overflow 400s (prefill rejection, malformed blocks, etc.).
                 if *status == 400
-                    && (message.contains("invalid_request_error")
-                        || message.contains("prompt is too long")
+                    && (message.contains("prompt is too long")
                         || message.contains("maximum context length"))
                 {
                     return false;

diff --git a/crates/loopal-error/tests/suite/error_edge_test.rs b/crates/loopal-error/tests/suite/error_edge_test.rs
@@ -47,6 +47,29 @@ fn test_api_400_invalid_request_not_retryable() {
     assert!(!err.is_retryable());
 }
 
+#[test]
+fn test_prefill_rejection_not_context_overflow() {
+    // Anthropic returns this when thinking mode is active but conversation
+    // ends with an assistant message (prefill not supported).
+    let body = concat!(
+        r#"{"error":{"message":"This model does not support assistant"#,
+        r#" message prefill. The conversation must end with a user"#,
+        r#" message.","type":"invalid_request_error"},"type":"error"}"#,
+    );
+    let err = ProviderError::Api {
+        status: 400,
+        message: body.into(),
+    };
+    assert!(
+        !err.is_context_overflow(),
+        "prefill rejection must not be classified as context overflow"
+    );
+    assert!(
+        !err.is_retryable(),
+        "prefill rejection is deterministic, not retryable"
+    );
+}
+
 #[test]
 fn test_api_500_not_context_overflow() {
     let err = ProviderError::Api {

diff --git a/crates/loopal-provider-api/src/thinking.rs b/crates/loopal-provider-api/src/thinking.rs
@@ -15,6 +15,16 @@ pub enum ThinkingCapability {
     ThinkingBudget,
 }
 
+impl ThinkingCapability {
+    /// Whether this capability forbids assistant-message prefill when active.
+    ///
+    /// Anthropic's API rejects conversations ending with an assistant message
+    /// when thinking is enabled. OpenAI and Google allow prefill regardless.
+    pub fn forbids_prefill(&self) -> bool {
+        matches!(self, Self::BudgetRequired | Self::Adaptive)
+    }
+}
+
 #[derive(Debug, Clone, Default, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ThinkingConfig {

diff --git a/crates/loopal-provider-api/tests/suite.rs b/crates/loopal-provider-api/tests/suite.rs
@@ -5,3 +5,5 @@ mod middleware_test;
 mod model_router_test;
 #[path = "suite/model_types_test.rs"]
 mod model_types_test;
+#[path = "suite/thinking_capability_test.rs"]
+mod thinking_capability_test;
diff --git a/crates/loopal-provider-api/tests/suite/thinking_capability_test.rs b/crates/loopal-provider-api/tests/suite/thinking_capability_test.rs
@@ -0,0 +1,16 @@
+//! Tests for ThinkingCapability methods.
+
+use loopal_provider_api::ThinkingCapability;
+
+#[test]
+fn anthropic_capabilities_forbid_prefill() {
+    assert!(ThinkingCapability::BudgetRequired.forbids_prefill());
+    assert!(ThinkingCapability::Adaptive.forbids_prefill());
+}
+
+#[test]
+fn non_anthropic_capabilities_allow_prefill() {
+    assert!(!ThinkingCapability::None.forbids_prefill());
+    assert!(!ThinkingCapability::ReasoningEffort.forbids_prefill());
+    assert!(!ThinkingCapability::ThinkingBudget.forbids_prefill());
+}
diff --git a/crates/loopal-provider/src/anthropic/mod.rs b/crates/loopal-provider/src/anthropic/mod.rs
@@ -177,11 +177,12 @@ impl AnthropicProvider {
             .unwrap_or_else(|_| "failed to read body".into());
         tracing::error!(status = status.as_u16(), body = %text, "API error");
 
-        // Detect context overflow: 400 + known prompt-too-long patterns
+        // Detect context overflow: 400 + known prompt-too-long patterns.
+        // Intentionally excludes "invalid_request_error" — that type covers
+        // many 400 errors (prefill rejection, malformed blocks, etc.) and
+        // must not be conflated with context overflow.
         if status.as_u16() == 400
-            && (text.contains("prompt is too long")
-                || text.contains("maximum context length")
-                || text.contains("invalid_request_error"))
+            && (text.contains("prompt is too long") || text.contains("maximum context length"))
         {
             return ProviderError::ContextOverflow { message: text }.into();
         }

diff --git a/crates/loopal-runtime/src/agent_loop/llm_params.rs b/crates/loopal-runtime/src/agent_loop/llm_params.rs
@@ -51,4 +51,23 @@ impl AgentLoopRunner {
             debug_dump_dir: Some(loopal_config::tmp_dir()),
         })
     }
+
+    /// Whether the current model requires a user-message suffix for continuation.
+    ///
+    /// Returns true only when thinking is active AND the provider forbids
+    /// assistant-message prefill (currently Anthropic only). OpenAI and Google
+    /// reasoning models allow prefill regardless of thinking state, so we
+    /// preserve the higher-quality mid-sentence continuation for them.
+    pub(super) fn needs_continuation_injection(&self) -> bool {
+        let capability = get_thinking_capability(self.params.config.model());
+        if !capability.forbids_prefill() {
+            return false;
+        }
+        resolve_thinking_config(
+            &self.model_config.thinking,
+            capability,
+            self.model_config.max_output_tokens,
+        )
+        .is_some()
+    }
 }
diff --git a/crates/loopal-runtime/src/agent_loop/mod.rs b/crates/loopal-runtime/src/agent_loop/mod.rs
@@ -33,6 +33,7 @@ mod tools_inject;
 pub(crate) mod tools_plan;
 mod tools_resolve;
 pub mod turn_context;
+mod turn_continue;
 mod turn_exec;
 pub(crate) mod turn_metrics;
 pub mod turn_observer;

diff --git a/crates/loopal-runtime/src/agent_loop/turn_continue.rs b/crates/loopal-runtime/src/agent_loop/turn_continue.rs
@@ -0,0 +1,63 @@
+//! Synthetic message injection for auto-continuation compatibility.
+//!
+//! When thinking mode is active, the Anthropic API rejects "assistant message
+//! prefill" — the conversation must end with a user message. The standard
+//! auto-continuation flow records an assistant message and loops back, which
+//! violates this constraint.
+//!
+//! This module provides helpers that inject a synthetic user message when
+//! needed, preserving the normal prefill behavior for non-thinking providers.
+
+use loopal_message::{ContentBlock, Message, MessageRole};
+use tracing::error;
+
+use super::runner::AgentLoopRunner;
+
+/// Synthetic prompt injected when the LLM must continue but thinking mode
+/// forbids assistant-message prefill.
+const CONTINUE_PROMPT: &str = "[Continue from where you left off]";
+
+impl AgentLoopRunner {
+    /// Inject a synthetic user message if the provider forbids prefill with thinking.
+    ///
+    /// Called before `continue` in auto-continuation paths (MaxTokens,
+    /// PauseTurn, stream truncation). When the provider allows prefill, the
+    /// model resumes from the partial assistant message directly, so no
+    /// injection is needed.
+    pub(super) fn push_continuation_if_thinking(&mut self) {
+        if !self.needs_continuation_injection() {
+            return;
+        }
+        self.persist_and_push_user(CONTINUE_PROMPT);
+    }
+
+    /// Push a new user message with stop-hook feedback.
+    ///
+    /// After `record_assistant_message`, the last message in the store is
+    /// Assistant. The old `append_warnings_to_last_user` would violate its
+    /// own `debug_assert!(role == User)`. This method correctly creates a
+    /// new User message regardless of thinking mode.
+    pub(super) fn push_stop_feedback(&mut self, feedback: String) {
+        self.persist_and_push_user(&feedback);
+    }
+
+    /// Construct, persist, and push a User message with the given text.
+    fn persist_and_push_user(&mut self, text: &str) {
+        let mut msg = Message {
+            id: None,
+            role: MessageRole::User,
+            content: vec![ContentBlock::Text {
+                text: text.to_string(),
+            }],
+        };
+        if let Err(e) = self
+            .params
+            .deps
+            .session_manager
+            .save_message(&self.params.session.id, &mut msg)
+        {
+            error!(error = %e, "failed to persist continuation message");
+        }
+        self.params.store.push_user(msg);
+    }
+}
diff --git a/crates/loopal-runtime/src/agent_loop/turn_exec.rs b/crates/loopal-runtime/src/agent_loop/turn_exec.rs
@@ -83,6 +83,7 @@ impl AgentLoopRunner {
                         max_continuations: self.params.harness.max_auto_continuations,
                     })
                     .await?;
+                    self.push_continuation_if_thinking();
                     continue;
                 }
                 return Ok(TurnOutput { output: last_text });
@@ -125,6 +126,7 @@ impl AgentLoopRunner {
                         max_continuations: self.params.harness.max_auto_continuations,
                     })
                     .await?;
+                    self.push_continuation_if_thinking();
                     continue;
                 }
                 return Ok(TurnOutput { output: last_text });
@@ -135,9 +137,7 @@ impl AgentLoopRunner {
                     && let Some(feedback) = self.run_stop_hooks().await
                 {
                     stop_feedback_count += 1;
-                    self.params
-                        .store
-                        .append_warnings_to_last_user(vec![feedback]);
+                    self.push_stop_feedback(feedback);
                     continue;
                 }
                 return Ok(TurnOutput { output: last_text });

diff --git a/crates/loopal-runtime/tests/agent_loop/mock_provider.rs b/crates/loopal-runtime/tests/agent_loop/mock_provider.rs
@@ -28,11 +28,28 @@ fn build_params(
     messages: Vec<loopal_message::Message>,
     permission_mode: PermissionMode,
 ) -> AgentLoopParams {
-    AgentLoopParams {
-        config: AgentConfig {
+    build_params_with_config(
+        kernel,
+        frontend,
+        fixture,
+        messages,
+        AgentConfig {
             permission_mode,
             ..Default::default()
         },
+    )
+}
+
+/// Build AgentLoopParams with a fully custom `AgentConfig`.
+fn build_params_with_config(
+    kernel: Arc<Kernel>,
+    frontend: Arc<dyn loopal_runtime::AgentFrontend>,
+    fixture: &TestFixture,
+    messages: Vec<loopal_message::Message>,
+    config: AgentConfig,
+) -> AgentLoopParams {
+    AgentLoopParams {
+        config,
         deps: AgentDeps {
             kernel,
             frontend,
@@ -96,6 +113,15 @@ pub fn make_runner_with_mock_provider(
 
 pub fn make_multi_runner(
     calls: Vec<Vec<Result<StreamChunk, LoopalError>>>,
+) -> (AgentLoopRunner, mpsc::Receiver<AgentEvent>) {
+    make_multi_runner_with_config(calls, AgentConfig::default())
+}
+
+/// Like `make_multi_runner` but accepts a custom `AgentConfig` (e.g. for
+/// testing with `ThinkingConfig::Disabled`).
+pub fn make_multi_runner_with_config(
+    calls: Vec<Vec<Result<StreamChunk, LoopalError>>>,
+    config: AgentConfig,
 ) -> (AgentLoopRunner, mpsc::Receiver<AgentEvent>) {
     let fixture = TestFixture::new();
     let (event_tx, event_rx) = mpsc::channel(64);
@@ -112,12 +138,12 @@ pub fn make_multi_runner(
     ));
     let mut kernel = Kernel::new(Settings::default()).unwrap();
     kernel.register_provider(Arc::new(MultiCallProvider::new(calls)) as Arc<dyn Provider>);
-    let params = build_params(
+    let params = build_params_with_config(
         Arc::new(kernel),
         frontend,
         &fixture,
         vec![loopal_message::Message::user("go")],
-        PermissionMode::Bypass,
+        config,
     );
     (AgentLoopRunner::new(params), event_rx)
 }

diff --git a/crates/loopal-runtime/tests/agent_loop/mod.rs b/crates/loopal-runtime/tests/agent_loop/mod.rs
@@ -63,6 +63,7 @@ mod retry_cancel_test;
 mod run_test;
 mod stream_truncation_edge_test;
 mod stream_truncation_test;
+mod thinking_continue_test;
 mod tools_test;
 mod turn_completion_edge_test;
 mod turn_completion_test;