Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions crates/loopal-error/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ impl ProviderError {
// Network-level errors (connection reset, timeout, DNS) are transient.
ProviderError::Http(_) => true,
ProviderError::Api { status, message } => {
// 400 with context overflow keywords is deterministic — never retryable
// 400 with context overflow keywords is deterministic — never retryable.
// Generic "invalid_request_error" is excluded: it covers many
// non-overflow 400s (prefill rejection, malformed blocks, etc.).
if *status == 400
&& (message.contains("invalid_request_error")
|| message.contains("prompt is too long")
&& (message.contains("prompt is too long")
|| message.contains("maximum context length"))
{
return false;
Expand Down
23 changes: 23 additions & 0 deletions crates/loopal-error/tests/suite/error_edge_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,29 @@ fn test_api_400_invalid_request_not_retryable() {
assert!(!err.is_retryable());
}

#[test]
fn test_prefill_rejection_not_context_overflow() {
// Anthropic returns this when thinking mode is active but conversation
// ends with an assistant message (prefill not supported).
let body = concat!(
r#"{"error":{"message":"This model does not support assistant"#,
r#" message prefill. The conversation must end with a user"#,
r#" message.","type":"invalid_request_error"},"type":"error"}"#,
);
let err = ProviderError::Api {
status: 400,
message: body.into(),
};
assert!(
!err.is_context_overflow(),
"prefill rejection must not be classified as context overflow"
);
assert!(
!err.is_retryable(),
"prefill rejection is deterministic, not retryable"
);
}

#[test]
fn test_api_500_not_context_overflow() {
let err = ProviderError::Api {
Expand Down
10 changes: 10 additions & 0 deletions crates/loopal-provider-api/src/thinking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@ pub enum ThinkingCapability {
ThinkingBudget,
}

impl ThinkingCapability {
/// Whether this capability forbids assistant-message prefill when active.
///
/// Anthropic's API rejects conversations ending with an assistant message
/// when thinking is enabled. OpenAI and Google allow prefill regardless.
pub fn forbids_prefill(&self) -> bool {
matches!(self, Self::BudgetRequired | Self::Adaptive)
}
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ThinkingConfig {
Expand Down
2 changes: 2 additions & 0 deletions crates/loopal-provider-api/tests/suite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ mod middleware_test;
mod model_router_test;
#[path = "suite/model_types_test.rs"]
mod model_types_test;
#[path = "suite/thinking_capability_test.rs"]
mod thinking_capability_test;
16 changes: 16 additions & 0 deletions crates/loopal-provider-api/tests/suite/thinking_capability_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//! Tests for ThinkingCapability methods.

use loopal_provider_api::ThinkingCapability;

#[test]
fn anthropic_capabilities_forbid_prefill() {
assert!(ThinkingCapability::BudgetRequired.forbids_prefill());
assert!(ThinkingCapability::Adaptive.forbids_prefill());
}

#[test]
fn non_anthropic_capabilities_allow_prefill() {
assert!(!ThinkingCapability::None.forbids_prefill());
assert!(!ThinkingCapability::ReasoningEffort.forbids_prefill());
assert!(!ThinkingCapability::ThinkingBudget.forbids_prefill());
}
9 changes: 5 additions & 4 deletions crates/loopal-provider/src/anthropic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,12 @@ impl AnthropicProvider {
.unwrap_or_else(|_| "failed to read body".into());
tracing::error!(status = status.as_u16(), body = %text, "API error");

// Detect context overflow: 400 + known prompt-too-long patterns
// Detect context overflow: 400 + known prompt-too-long patterns.
// Intentionally excludes "invalid_request_error" — that type covers
// many 400 errors (prefill rejection, malformed blocks, etc.) and
// must not be conflated with context overflow.
if status.as_u16() == 400
&& (text.contains("prompt is too long")
|| text.contains("maximum context length")
|| text.contains("invalid_request_error"))
&& (text.contains("prompt is too long") || text.contains("maximum context length"))
{
return ProviderError::ContextOverflow { message: text }.into();
}
Expand Down
19 changes: 19 additions & 0 deletions crates/loopal-runtime/src/agent_loop/llm_params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,23 @@ impl AgentLoopRunner {
debug_dump_dir: Some(loopal_config::tmp_dir()),
})
}

/// Whether the current model requires a user-message suffix for continuation.
///
/// Returns true only when thinking is active AND the provider forbids
/// assistant-message prefill (currently Anthropic only). OpenAI and Google
/// reasoning models allow prefill regardless of thinking state, so we
/// preserve the higher-quality mid-sentence continuation for them.
pub(super) fn needs_continuation_injection(&self) -> bool {
let capability = get_thinking_capability(self.params.config.model());
if !capability.forbids_prefill() {
return false;
}
resolve_thinking_config(
&self.model_config.thinking,
capability,
self.model_config.max_output_tokens,
)
.is_some()
}
}
1 change: 1 addition & 0 deletions crates/loopal-runtime/src/agent_loop/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ mod tools_inject;
pub(crate) mod tools_plan;
mod tools_resolve;
pub mod turn_context;
mod turn_continue;
mod turn_exec;
pub(crate) mod turn_metrics;
pub mod turn_observer;
Expand Down
63 changes: 63 additions & 0 deletions crates/loopal-runtime/src/agent_loop/turn_continue.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
//! Synthetic message injection for auto-continuation compatibility.
//!
//! When thinking mode is active, the Anthropic API rejects "assistant message
//! prefill" — the conversation must end with a user message. The standard
//! auto-continuation flow records an assistant message and loops back, which
//! violates this constraint.
//!
//! This module provides helpers that inject a synthetic user message when
//! needed, preserving the normal prefill behavior for non-thinking providers.

use loopal_message::{ContentBlock, Message, MessageRole};
use tracing::error;

use super::runner::AgentLoopRunner;

/// Synthetic prompt injected when the LLM must continue but thinking mode
/// forbids assistant-message prefill.
const CONTINUE_PROMPT: &str = "[Continue from where you left off]";

impl AgentLoopRunner {
/// Inject a synthetic user message if the provider forbids prefill with thinking.
///
/// Called before `continue` in auto-continuation paths (MaxTokens,
/// PauseTurn, stream truncation). When the provider allows prefill, the
/// model resumes from the partial assistant message directly, so no
/// injection is needed.
pub(super) fn push_continuation_if_thinking(&mut self) {
if !self.needs_continuation_injection() {
return;
}
self.persist_and_push_user(CONTINUE_PROMPT);
}

/// Push a new user message with stop-hook feedback.
///
/// After `record_assistant_message`, the last message in the store is
/// Assistant. The old `append_warnings_to_last_user` would violate its
/// own `debug_assert!(role == User)`. This method correctly creates a
/// new User message regardless of thinking mode.
pub(super) fn push_stop_feedback(&mut self, feedback: String) {
self.persist_and_push_user(&feedback);
}

/// Construct, persist, and push a User message with the given text.
fn persist_and_push_user(&mut self, text: &str) {
let mut msg = Message {
id: None,
role: MessageRole::User,
content: vec![ContentBlock::Text {
text: text.to_string(),
}],
};
if let Err(e) = self
.params
.deps
.session_manager
.save_message(&self.params.session.id, &mut msg)
{
error!(error = %e, "failed to persist continuation message");
}
self.params.store.push_user(msg);
}
}
6 changes: 3 additions & 3 deletions crates/loopal-runtime/src/agent_loop/turn_exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ impl AgentLoopRunner {
max_continuations: self.params.harness.max_auto_continuations,
})
.await?;
self.push_continuation_if_thinking();
continue;
}
return Ok(TurnOutput { output: last_text });
Expand Down Expand Up @@ -125,6 +126,7 @@ impl AgentLoopRunner {
max_continuations: self.params.harness.max_auto_continuations,
})
.await?;
self.push_continuation_if_thinking();
continue;
}
return Ok(TurnOutput { output: last_text });
Expand All @@ -135,9 +137,7 @@ impl AgentLoopRunner {
&& let Some(feedback) = self.run_stop_hooks().await
{
stop_feedback_count += 1;
self.params
.store
.append_warnings_to_last_user(vec![feedback]);
self.push_stop_feedback(feedback);
continue;
}
return Ok(TurnOutput { output: last_text });
Expand Down
34 changes: 30 additions & 4 deletions crates/loopal-runtime/tests/agent_loop/mock_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,28 @@ fn build_params(
messages: Vec<loopal_message::Message>,
permission_mode: PermissionMode,
) -> AgentLoopParams {
AgentLoopParams {
config: AgentConfig {
build_params_with_config(
kernel,
frontend,
fixture,
messages,
AgentConfig {
permission_mode,
..Default::default()
},
)
}

/// Build AgentLoopParams with a fully custom `AgentConfig`.
fn build_params_with_config(
kernel: Arc<Kernel>,
frontend: Arc<dyn loopal_runtime::AgentFrontend>,
fixture: &TestFixture,
messages: Vec<loopal_message::Message>,
config: AgentConfig,
) -> AgentLoopParams {
AgentLoopParams {
config,
deps: AgentDeps {
kernel,
frontend,
Expand Down Expand Up @@ -96,6 +113,15 @@ pub fn make_runner_with_mock_provider(

pub fn make_multi_runner(
calls: Vec<Vec<Result<StreamChunk, LoopalError>>>,
) -> (AgentLoopRunner, mpsc::Receiver<AgentEvent>) {
make_multi_runner_with_config(calls, AgentConfig::default())
}

/// Like `make_multi_runner` but accepts a custom `AgentConfig` (e.g. for
/// testing with `ThinkingConfig::Disabled`).
pub fn make_multi_runner_with_config(
calls: Vec<Vec<Result<StreamChunk, LoopalError>>>,
config: AgentConfig,
) -> (AgentLoopRunner, mpsc::Receiver<AgentEvent>) {
let fixture = TestFixture::new();
let (event_tx, event_rx) = mpsc::channel(64);
Expand All @@ -112,12 +138,12 @@ pub fn make_multi_runner(
));
let mut kernel = Kernel::new(Settings::default()).unwrap();
kernel.register_provider(Arc::new(MultiCallProvider::new(calls)) as Arc<dyn Provider>);
let params = build_params(
let params = build_params_with_config(
Arc::new(kernel),
frontend,
&fixture,
vec![loopal_message::Message::user("go")],
PermissionMode::Bypass,
config,
);
(AgentLoopRunner::new(params), event_rx)
}
Expand Down
1 change: 1 addition & 0 deletions crates/loopal-runtime/tests/agent_loop/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ mod retry_cancel_test;
mod run_test;
mod stream_truncation_edge_test;
mod stream_truncation_test;
mod thinking_continue_test;
mod tools_test;
mod turn_completion_edge_test;
mod turn_completion_test;
Expand Down
Loading
Loading