diff --git a/crates/loopal-error/src/helpers.rs b/crates/loopal-error/src/helpers.rs index 5058f2ea..5d41662c 100644 --- a/crates/loopal-error/src/helpers.rs +++ b/crates/loopal-error/src/helpers.rs @@ -13,10 +13,11 @@ impl ProviderError { // Network-level errors (connection reset, timeout, DNS) are transient. ProviderError::Http(_) => true, ProviderError::Api { status, message } => { - // 400 with context overflow keywords is deterministic — never retryable + // 400 with context overflow keywords is deterministic — never retryable. + // Generic "invalid_request_error" is excluded: it covers many + // non-overflow 400s (prefill rejection, malformed blocks, etc.). if *status == 400 - && (message.contains("invalid_request_error") - || message.contains("prompt is too long") + && (message.contains("prompt is too long") || message.contains("maximum context length")) { return false; diff --git a/crates/loopal-error/tests/suite/error_edge_test.rs b/crates/loopal-error/tests/suite/error_edge_test.rs index 2624b9e4..75540b7f 100644 --- a/crates/loopal-error/tests/suite/error_edge_test.rs +++ b/crates/loopal-error/tests/suite/error_edge_test.rs @@ -47,6 +47,29 @@ fn test_api_400_invalid_request_not_retryable() { assert!(!err.is_retryable()); } +#[test] +fn test_prefill_rejection_not_context_overflow() { + // Anthropic returns this when thinking mode is active but conversation + // ends with an assistant message (prefill not supported). + let body = concat!( + r#"{"error":{"message":"This model does not support assistant"#, + r#" message prefill. The conversation must end with a user"#, + r#" message.","type":"invalid_request_error"},"type":"error"}"#, + ); + let err = ProviderError::Api { + status: 400, + message: body.into(), + }; + assert!( + !err.is_context_overflow(), + "prefill rejection must not be classified as context overflow" + ); + assert!( + !err.is_retryable(), + "prefill rejection is deterministic, not retryable" + ); +} + #[test] fn test_api_500_not_context_overflow() { let err = ProviderError::Api { diff --git a/crates/loopal-provider-api/src/thinking.rs b/crates/loopal-provider-api/src/thinking.rs index d94ff4d8..6773dda1 100644 --- a/crates/loopal-provider-api/src/thinking.rs +++ b/crates/loopal-provider-api/src/thinking.rs @@ -15,6 +15,16 @@ pub enum ThinkingCapability { ThinkingBudget, } +impl ThinkingCapability { + /// Whether this capability forbids assistant-message prefill when active. + /// + /// Anthropic's API rejects conversations ending with an assistant message + /// when thinking is enabled. OpenAI and Google allow prefill regardless. + pub fn forbids_prefill(&self) -> bool { + matches!(self, Self::BudgetRequired | Self::Adaptive) + } +} + #[derive(Debug, Clone, Default, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ThinkingConfig { diff --git a/crates/loopal-provider-api/tests/suite.rs b/crates/loopal-provider-api/tests/suite.rs index 5b76e9ad..086c66da 100644 --- a/crates/loopal-provider-api/tests/suite.rs +++ b/crates/loopal-provider-api/tests/suite.rs @@ -5,3 +5,5 @@ mod middleware_test; mod model_router_test; #[path = "suite/model_types_test.rs"] mod model_types_test; +#[path = "suite/thinking_capability_test.rs"] +mod thinking_capability_test; diff --git a/crates/loopal-provider-api/tests/suite/thinking_capability_test.rs b/crates/loopal-provider-api/tests/suite/thinking_capability_test.rs new file mode 100644 index 00000000..c0a11267 --- /dev/null +++ b/crates/loopal-provider-api/tests/suite/thinking_capability_test.rs @@ -0,0 +1,16 @@ +//! Tests for ThinkingCapability methods. + +use loopal_provider_api::ThinkingCapability; + +#[test] +fn anthropic_capabilities_forbid_prefill() { + assert!(ThinkingCapability::BudgetRequired.forbids_prefill()); + assert!(ThinkingCapability::Adaptive.forbids_prefill()); +} + +#[test] +fn non_anthropic_capabilities_allow_prefill() { + assert!(!ThinkingCapability::None.forbids_prefill()); + assert!(!ThinkingCapability::ReasoningEffort.forbids_prefill()); + assert!(!ThinkingCapability::ThinkingBudget.forbids_prefill()); +} diff --git a/crates/loopal-provider/src/anthropic/mod.rs b/crates/loopal-provider/src/anthropic/mod.rs index 7d37a01f..af65590f 100644 --- a/crates/loopal-provider/src/anthropic/mod.rs +++ b/crates/loopal-provider/src/anthropic/mod.rs @@ -177,11 +177,12 @@ impl AnthropicProvider { .unwrap_or_else(|_| "failed to read body".into()); tracing::error!(status = status.as_u16(), body = %text, "API error"); - // Detect context overflow: 400 + known prompt-too-long patterns + // Detect context overflow: 400 + known prompt-too-long patterns. + // Intentionally excludes "invalid_request_error" — that type covers + // many 400 errors (prefill rejection, malformed blocks, etc.) and + // must not be conflated with context overflow. if status.as_u16() == 400 - && (text.contains("prompt is too long") - || text.contains("maximum context length") - || text.contains("invalid_request_error")) + && (text.contains("prompt is too long") || text.contains("maximum context length")) { return ProviderError::ContextOverflow { message: text }.into(); } diff --git a/crates/loopal-runtime/src/agent_loop/llm_params.rs b/crates/loopal-runtime/src/agent_loop/llm_params.rs index c9057eb5..feb17013 100644 --- a/crates/loopal-runtime/src/agent_loop/llm_params.rs +++ b/crates/loopal-runtime/src/agent_loop/llm_params.rs @@ -51,4 +51,23 @@ impl AgentLoopRunner { debug_dump_dir: Some(loopal_config::tmp_dir()), }) } + + /// Whether the current model requires a user-message suffix for continuation. + /// + /// Returns true only when thinking is active AND the provider forbids + /// assistant-message prefill (currently Anthropic only). OpenAI and Google + /// reasoning models allow prefill regardless of thinking state, so we + /// preserve the higher-quality mid-sentence continuation for them. + pub(super) fn needs_continuation_injection(&self) -> bool { + let capability = get_thinking_capability(self.params.config.model()); + if !capability.forbids_prefill() { + return false; + } + resolve_thinking_config( + &self.model_config.thinking, + capability, + self.model_config.max_output_tokens, + ) + .is_some() + } } diff --git a/crates/loopal-runtime/src/agent_loop/mod.rs b/crates/loopal-runtime/src/agent_loop/mod.rs index 6892b6bf..c995e5a2 100644 --- a/crates/loopal-runtime/src/agent_loop/mod.rs +++ b/crates/loopal-runtime/src/agent_loop/mod.rs @@ -33,6 +33,7 @@ mod tools_inject; pub(crate) mod tools_plan; mod tools_resolve; pub mod turn_context; +mod turn_continue; mod turn_exec; pub(crate) mod turn_metrics; pub mod turn_observer; diff --git a/crates/loopal-runtime/src/agent_loop/turn_continue.rs b/crates/loopal-runtime/src/agent_loop/turn_continue.rs new file mode 100644 index 00000000..548e1c19 --- /dev/null +++ b/crates/loopal-runtime/src/agent_loop/turn_continue.rs @@ -0,0 +1,63 @@ +//! Synthetic message injection for auto-continuation compatibility. +//! +//! When thinking mode is active, the Anthropic API rejects "assistant message +//! prefill" — the conversation must end with a user message. The standard +//! auto-continuation flow records an assistant message and loops back, which +//! violates this constraint. +//! +//! This module provides helpers that inject a synthetic user message when +//! needed, preserving the normal prefill behavior for non-thinking providers. + +use loopal_message::{ContentBlock, Message, MessageRole}; +use tracing::error; + +use super::runner::AgentLoopRunner; + +/// Synthetic prompt injected when the LLM must continue but thinking mode +/// forbids assistant-message prefill. +const CONTINUE_PROMPT: &str = "[Continue from where you left off]"; + +impl AgentLoopRunner { + /// Inject a synthetic user message if the provider forbids prefill with thinking. + /// + /// Called before `continue` in auto-continuation paths (MaxTokens, + /// PauseTurn, stream truncation). When the provider allows prefill, the + /// model resumes from the partial assistant message directly, so no + /// injection is needed. + pub(super) fn push_continuation_if_thinking(&mut self) { + if !self.needs_continuation_injection() { + return; + } + self.persist_and_push_user(CONTINUE_PROMPT); + } + + /// Push a new user message with stop-hook feedback. + /// + /// After `record_assistant_message`, the last message in the store is + /// Assistant. The old `append_warnings_to_last_user` would violate its + /// own `debug_assert!(role == User)`. This method correctly creates a + /// new User message regardless of thinking mode. + pub(super) fn push_stop_feedback(&mut self, feedback: String) { + self.persist_and_push_user(&feedback); + } + + /// Construct, persist, and push a User message with the given text. + fn persist_and_push_user(&mut self, text: &str) { + let mut msg = Message { + id: None, + role: MessageRole::User, + content: vec![ContentBlock::Text { + text: text.to_string(), + }], + }; + if let Err(e) = self + .params + .deps + .session_manager + .save_message(&self.params.session.id, &mut msg) + { + error!(error = %e, "failed to persist continuation message"); + } + self.params.store.push_user(msg); + } +} diff --git a/crates/loopal-runtime/src/agent_loop/turn_exec.rs b/crates/loopal-runtime/src/agent_loop/turn_exec.rs index e03b0ad4..ae22108b 100644 --- a/crates/loopal-runtime/src/agent_loop/turn_exec.rs +++ b/crates/loopal-runtime/src/agent_loop/turn_exec.rs @@ -83,6 +83,7 @@ impl AgentLoopRunner { max_continuations: self.params.harness.max_auto_continuations, }) .await?; + self.push_continuation_if_thinking(); continue; } return Ok(TurnOutput { output: last_text }); @@ -125,6 +126,7 @@ impl AgentLoopRunner { max_continuations: self.params.harness.max_auto_continuations, }) .await?; + self.push_continuation_if_thinking(); continue; } return Ok(TurnOutput { output: last_text }); @@ -135,9 +137,7 @@ impl AgentLoopRunner { && let Some(feedback) = self.run_stop_hooks().await { stop_feedback_count += 1; - self.params - .store - .append_warnings_to_last_user(vec![feedback]); + self.push_stop_feedback(feedback); continue; } return Ok(TurnOutput { output: last_text }); diff --git a/crates/loopal-runtime/tests/agent_loop/mock_provider.rs b/crates/loopal-runtime/tests/agent_loop/mock_provider.rs index 1327f6ac..4d69020b 100644 --- a/crates/loopal-runtime/tests/agent_loop/mock_provider.rs +++ b/crates/loopal-runtime/tests/agent_loop/mock_provider.rs @@ -28,11 +28,28 @@ fn build_params( messages: Vec, permission_mode: PermissionMode, ) -> AgentLoopParams { - AgentLoopParams { - config: AgentConfig { + build_params_with_config( + kernel, + frontend, + fixture, + messages, + AgentConfig { permission_mode, ..Default::default() }, + ) +} + +/// Build AgentLoopParams with a fully custom `AgentConfig`. +fn build_params_with_config( + kernel: Arc, + frontend: Arc, + fixture: &TestFixture, + messages: Vec, + config: AgentConfig, +) -> AgentLoopParams { + AgentLoopParams { + config, deps: AgentDeps { kernel, frontend, @@ -96,6 +113,15 @@ pub fn make_runner_with_mock_provider( pub fn make_multi_runner( calls: Vec>>, +) -> (AgentLoopRunner, mpsc::Receiver) { + make_multi_runner_with_config(calls, AgentConfig::default()) +} + +/// Like `make_multi_runner` but accepts a custom `AgentConfig` (e.g. for +/// testing with `ThinkingConfig::Disabled`). +pub fn make_multi_runner_with_config( + calls: Vec>>, + config: AgentConfig, ) -> (AgentLoopRunner, mpsc::Receiver) { let fixture = TestFixture::new(); let (event_tx, event_rx) = mpsc::channel(64); @@ -112,12 +138,12 @@ pub fn make_multi_runner( )); let mut kernel = Kernel::new(Settings::default()).unwrap(); kernel.register_provider(Arc::new(MultiCallProvider::new(calls)) as Arc); - let params = build_params( + let params = build_params_with_config( Arc::new(kernel), frontend, &fixture, vec![loopal_message::Message::user("go")], - PermissionMode::Bypass, + config, ); (AgentLoopRunner::new(params), event_rx) } diff --git a/crates/loopal-runtime/tests/agent_loop/mod.rs b/crates/loopal-runtime/tests/agent_loop/mod.rs index 73a5b233..61562945 100644 --- a/crates/loopal-runtime/tests/agent_loop/mod.rs +++ b/crates/loopal-runtime/tests/agent_loop/mod.rs @@ -63,6 +63,7 @@ mod retry_cancel_test; mod run_test; mod stream_truncation_edge_test; mod stream_truncation_test; +mod thinking_continue_test; mod tools_test; mod turn_completion_edge_test; mod turn_completion_test; diff --git a/crates/loopal-runtime/tests/agent_loop/thinking_continue_test.rs b/crates/loopal-runtime/tests/agent_loop/thinking_continue_test.rs new file mode 100644 index 00000000..8365cdf7 --- /dev/null +++ b/crates/loopal-runtime/tests/agent_loop/thinking_continue_test.rs @@ -0,0 +1,185 @@ +//! Tests for thinking-mode auto-continuation and stop-feedback message injection. + +use loopal_message::MessageRole; +use loopal_provider_api::{StopReason, StreamChunk, ThinkingConfig}; +use loopal_runtime::AgentConfig; +use loopal_tool_api::PermissionMode; + +use super::mock_provider::{make_multi_runner, make_multi_runner_with_config}; + +/// Default model (`claude-sonnet-4-20250514`) has `BudgetRequired` thinking +/// capability, and default `ThinkingConfig::Auto` resolves to `Some(Budget{..})`. +/// After MaxTokens auto-continuation, a synthetic user message must be injected. +#[tokio::test] +async fn test_thinking_active_injects_continuation_on_max_tokens() { + let calls = vec![ + vec![ + Ok(StreamChunk::Text { + text: "part 1".into(), + }), + Ok(StreamChunk::Done { + stop_reason: StopReason::MaxTokens, + }), + ], + vec![ + Ok(StreamChunk::Text { + text: "part 2".into(), + }), + Ok(StreamChunk::Done { + stop_reason: StopReason::EndTurn, + }), + ], + ]; + let (mut runner, mut event_rx) = make_multi_runner(calls); + tokio::spawn(async move { while event_rx.recv().await.is_some() {} }); + + let _ = runner.run().await.unwrap(); + + // Verify: between the two assistant messages there should be a user + // message containing the continuation prompt. + let messages = runner.params.store.messages(); + let has_continuation = messages.iter().any(|m| { + m.role == MessageRole::User + && m.content.iter().any(|b| match b { + loopal_message::ContentBlock::Text { text } => text.contains("[Continue"), + _ => false, + }) + }); + assert!( + has_continuation, + "thinking-active auto-continuation must inject a user message" + ); +} + +/// With thinking explicitly disabled, no synthetic user message should be +/// injected — the model continues via assistant prefill. +#[tokio::test] +async fn test_thinking_disabled_no_continuation_message() { + let calls = vec![ + vec![ + Ok(StreamChunk::Text { + text: "part 1".into(), + }), + Ok(StreamChunk::Done { + stop_reason: StopReason::MaxTokens, + }), + ], + vec![ + Ok(StreamChunk::Text { + text: "part 2".into(), + }), + Ok(StreamChunk::Done { + stop_reason: StopReason::EndTurn, + }), + ], + ]; + let config = AgentConfig { + thinking_config: ThinkingConfig::Disabled, + permission_mode: PermissionMode::Bypass, + ..Default::default() + }; + let (mut runner, mut event_rx) = make_multi_runner_with_config(calls, config); + tokio::spawn(async move { while event_rx.recv().await.is_some() {} }); + + let _ = runner.run().await.unwrap(); + + let messages = runner.params.store.messages(); + let has_continuation = messages.iter().any(|m| { + m.role == MessageRole::User + && m.content.iter().any(|b| match b { + loopal_message::ContentBlock::Text { text } => text.contains("[Continue"), + _ => false, + }) + }); + assert!( + !has_continuation, + "thinking-disabled must NOT inject continuation messages" + ); +} + +/// MaxTokens with truncated tool calls + thinking active → continuation injected. +#[tokio::test] +async fn test_thinking_truncated_tools_injects_continuation() { + let calls = vec![ + vec![ + Ok(StreamChunk::Text { + text: "Let me ".into(), + }), + Ok(StreamChunk::ToolUse { + id: "tc-1".into(), + name: "Read".into(), + input: serde_json::json!({"file_path": "/tmp/truncated"}), + }), + Ok(StreamChunk::Done { + stop_reason: StopReason::MaxTokens, + }), + ], + vec![ + Ok(StreamChunk::Text { + text: "read the file.".into(), + }), + Ok(StreamChunk::Done { + stop_reason: StopReason::EndTurn, + }), + ], + ]; + let (mut runner, mut event_rx) = make_multi_runner(calls); + tokio::spawn(async move { while event_rx.recv().await.is_some() {} }); + + let _ = runner.run().await.unwrap(); + + let messages = runner.params.store.messages(); + let has_continuation = messages.iter().any(|m| { + m.role == MessageRole::User + && m.content.iter().any(|b| match b { + loopal_message::ContentBlock::Text { text } => text.contains("[Continue"), + _ => false, + }) + }); + assert!( + has_continuation, + "truncated tools + thinking must inject continuation" + ); +} + +/// After recording an assistant message, stop-hook feedback must be stored +/// as a new User message (not appended to the assistant message). +#[tokio::test] +async fn test_messages_alternate_roles_after_continuation() { + let calls = vec![ + vec![ + Ok(StreamChunk::Text { + text: "part 1".into(), + }), + Ok(StreamChunk::Done { + stop_reason: StopReason::MaxTokens, + }), + ], + vec![ + Ok(StreamChunk::Text { + text: "part 2".into(), + }), + Ok(StreamChunk::Done { + stop_reason: StopReason::EndTurn, + }), + ], + ]; + let (mut runner, mut event_rx) = make_multi_runner(calls); + tokio::spawn(async move { while event_rx.recv().await.is_some() {} }); + + let _ = runner.run().await.unwrap(); + + // Verify strict role alternation (ignoring system messages at the front). + let messages = runner.params.store.messages(); + let non_system: Vec<_> = messages + .iter() + .filter(|m| m.role != MessageRole::System) + .collect(); + for pair in non_system.windows(2) { + assert_ne!( + pair[0].role, pair[1].role, + "consecutive messages must alternate roles: {:?} followed by {:?}", + pair[0].role, pair[1].role + ); + } +}