Skip to content
Merged
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
40512c7
Prevent fabricated follow-up after tool failures
SavitayopVBN Mar 8, 2026
e5a9562
Merge remote-tracking branch 'upstream/main' into ai-sync-424-1772935332
tsubasakong Mar 8, 2026
36547a8
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 8, 2026
f7457be
Merge remote-tracking branch 'upstream/main' into fix/414-stop-after-…
tsubasakong Mar 8, 2026
9d284ce
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 8, 2026
1b22c4a
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 8, 2026
56b3306
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 9, 2026
dadaf30
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 9, 2026
9329369
Merge remote-tracking branch 'upstream/main' into sync/pr-424
tsubasakong Mar 10, 2026
6046b65
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 10, 2026
51b75fb
Merge remote-tracking branch 'refs/remotes/upstream-sync/main' into HEAD
tsubasakong Mar 10, 2026
98247d5
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 10, 2026
95f0c61
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 11, 2026
b562f46
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 11, 2026
27ec67d
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 12, 2026
57190ac
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 12, 2026
6964334
Merge remote-tracking branch 'upstream/main' into fix/414-stop-after-…
tsubasakong Mar 12, 2026
5d34995
Merge remote-tracking branch 'upstream/main' into HEAD
tsubasakong Mar 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions crates/openfang-runtime/src/agent_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,27 @@ const MAX_CONTINUATIONS: u32 = 5;
/// Maximum message history size before auto-trimming to prevent context overflow.
const MAX_HISTORY_MESSAGES: usize = 20;

/// Extra guidance injected after failed tool calls to prevent fabricated follow-up actions.
const TOOL_ERROR_GUIDANCE: &str =
"[System: One or more tool calls failed. Failed tools did not produce usable data. Do NOT invent missing results, cite nonexistent search results, or pretend failed tools succeeded. If your next steps depend on a failed tool, either retry with a materially different approach or explain the failure to the user and stop. Do not write files, store memory, or take downstream actions based on failed tool outputs.]";

fn append_tool_error_guidance(tool_result_blocks: &mut Vec<ContentBlock>) {
let has_tool_error = tool_result_blocks.iter().any(|block| {
matches!(
block,
ContentBlock::ToolResult {
is_error: true,
..
}
)
});
if has_tool_error {
tool_result_blocks.push(ContentBlock::Text {
text: TOOL_ERROR_GUIDANCE.to_string(),
});
}
}

/// Strip a provider prefix from a model ID before sending to the API.
///
/// Many models are stored as `provider/org/model` (e.g. `openrouter/google/gemini-2.5-flash`)
Expand Down Expand Up @@ -717,6 +738,8 @@ pub async fn run_agent_loop(
});
}

append_tool_error_guidance(&mut tool_result_blocks);

// Detect approval denials and inject guidance to prevent infinite retry loops
let denial_count = tool_result_blocks.iter().filter(|b| {
matches!(b, ContentBlock::ToolResult { content, is_error: true, .. }
Expand Down Expand Up @@ -1702,6 +1725,8 @@ pub async fn run_agent_loop_streaming(
});
}

append_tool_error_guidance(&mut tool_result_blocks);

// Detect approval denials and inject guidance to prevent infinite retry loops
let denial_count = tool_result_blocks.iter().filter(|b| {
matches!(b, ContentBlock::ToolResult { content, is_error: true, .. }
Expand Down Expand Up @@ -2827,6 +2852,58 @@ mod tests {
);
}

#[tokio::test]
async fn test_tool_error_injects_no_fabrication_guidance() {
let memory = openfang_memory::MemorySubstrate::open_in_memory(0.01).unwrap();
let agent_id = openfang_types::agent::AgentId::new();
let mut session = openfang_memory::session::Session {
id: openfang_types::agent::SessionId::new(),
agent_id,
messages: Vec::new(),
context_window_tokens: 0,
label: None,
};
let manifest = test_manifest();
let driver: Arc<dyn LlmDriver> = Arc::new(EmptyAfterToolUseDriver::new());

run_agent_loop(
&manifest,
"Do something with tools",
&mut session,
&memory,
driver,
&[], // no tools registered — the tool call will fail, which is fine
None,
None,
None,
None,
None,
None,
None,
None, // on_phase
None, // media_engine
None, // tts_engine
None, // docker_config
None, // hooks
None, // context_window_tokens
None, // process_manager
)
.await
.expect("Loop should complete without error");

let guidance_seen = session.messages.iter().any(|msg| match &msg.content {
MessageContent::Blocks(blocks) => blocks.iter().any(|block| {
matches!(block, ContentBlock::Text { text } if text == TOOL_ERROR_GUIDANCE)
}),
_ => false,
});

assert!(
guidance_seen,
"Expected tool error guidance in session messages after failed tool call"
);
}

#[tokio::test]
async fn test_empty_response_max_tokens_returns_fallback() {
let memory = openfang_memory::MemorySubstrate::open_in_memory(0.01).unwrap();
Expand Down