Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 53 additions & 15 deletions openhands-sdk/openhands/sdk/agent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
import sys
from abc import ABC, abstractmethod
from collections.abc import Generator, Iterable
from collections.abc import Generator, Iterable, Sequence
from concurrent.futures import ThreadPoolExecutor
from typing import TYPE_CHECKING, Any

Expand Down Expand Up @@ -300,10 +300,19 @@ def step(
NOTE: state will be mutated in-place.
"""

def resolve_diff_from_deserialized(self, persisted: "AgentBase") -> "AgentBase":
def resolve_diff_from_deserialized(
self,
persisted: "AgentBase",
events: "Sequence[Any] | None" = None,
) -> "AgentBase":
"""
Return a new AgentBase instance equivalent to `persisted` but with
explicitly whitelisted fields (e.g. api_key) taken from `self`.

Args:
persisted: The persisted agent from the conversation state.
events: Optional event sequence to scan for used tools if tool
names don't match. Only scanned when needed (O(n) fallback).
"""
if persisted.__class__ is not self.__class__:
raise ValueError(
Expand Down Expand Up @@ -338,28 +347,57 @@ def resolve_diff_from_deserialized(self, persisted: "AgentBase") -> "AgentBase":
if self.agent_context is not None:
updates["agent_context"] = self.agent_context

# Create maps by tool name for easy lookup
runtime_tools_map = {tool.name: tool for tool in self.tools}
persisted_tools_map = {tool.name: tool for tool in persisted.tools}

# Check that tool names match
runtime_names = set(runtime_tools_map.keys())
persisted_names = set(persisted_tools_map.keys())
# Get tool names for comparison
runtime_names = {tool.name for tool in self.tools}
persisted_names = {tool.name for tool in persisted.tools}

# If tool names match exactly, no need to check event history
if runtime_names == persisted_names:
# Tools unchanged, proceed normally
pass
elif events is not None:
# Tool names differ - scan events to find which tools were actually used
# This is O(n) but only happens when tools change
from openhands.sdk.event import ActionEvent

used_tools = {
event.tool_name
for event in events
if isinstance(event, ActionEvent) and event.tool_name
}

if runtime_names != persisted_names:
# Only require tools that were actually used in history
missing_used_tools = used_tools - runtime_names
if missing_used_tools:
raise ValueError(
f"Cannot resume conversation: tools that were used in history "
f"are missing from runtime: {sorted(missing_used_tools)}. "
f"Available tools: {sorted(runtime_names)}"
)
# Update tools to match runtime (allows new tools to be added)
updates["tools"] = self.tools
else:
# No events provided - strict matching (legacy behavior)
missing_in_runtime = persisted_names - runtime_names
missing_in_persisted = runtime_names - persisted_names
error_msg = "Tools don't match between runtime and persisted agents."
if missing_in_runtime:
error_msg += f" Missing in runtime: {missing_in_runtime}."
error_msg += f" Missing in runtime: {sorted(missing_in_runtime)}."
if missing_in_persisted:
error_msg += f" Missing in persisted: {missing_in_persisted}."
error_msg += f" Missing in persisted: {sorted(missing_in_persisted)}."
raise ValueError(error_msg)

reconciled = persisted.model_copy(update=updates)
if self.model_dump(exclude_none=True) != reconciled.model_dump(
exclude_none=True
):

# Validate agent equality - exclude tools from comparison since we
# already validated tool requirements above
exclude_fields = {"tools"} if events is not None else set()
self_dump = self.model_dump(exclude_none=True, exclude=exclude_fields)
reconciled_dump = reconciled.model_dump(
exclude_none=True, exclude=exclude_fields
)

if self_dump != reconciled_dump:
raise ValueError(
"The Agent provided is different from the one in persisted state.\n"
f"Diff: {pretty_pydantic_diff(self, reconciled)}"
Expand Down
14 changes: 9 additions & 5 deletions openhands-sdk/openhands/sdk/conversation/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ class ConversationState(OpenHandsModel):
default_factory=FIFOLock
) # FIFO lock for thread safety

# ===== Public "events" facade (Sequence[Event]) =====
@property
def events(self) -> EventLog:
return self._events
Expand Down Expand Up @@ -200,12 +199,17 @@ def create(
f"but persisted state has {state.id}"
)

# Reconcile agent config with deserialized one
resolved = agent.resolve_diff_from_deserialized(state.agent)

# Attach runtime handles and commit reconciled agent (may autosave)
# Attach event log early so we can read history
state._fs = file_store
state._events = EventLog(file_store, dir_path=EVENTS_DIR)

# Reconcile agent config with deserialized one
# Pass event log so tool usage can be checked on-the-fly if needed
resolved = agent.resolve_diff_from_deserialized(
state.agent, events=state._events
)

# Commit reconciled agent (may autosave)
state._autosave_enabled = True
state.agent = resolved

Expand Down
148 changes: 132 additions & 16 deletions tests/cross/test_agent_reconciliation.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,14 @@ def test_conversation_restarted_with_changed_working_directory(tmp_path_factory)


# Tests from test_local_conversation_tools_integration.py
def test_conversation_with_different_agent_tools_fails():
"""Test that using an agent with different tools fails (tools must match)."""
import pytest
def test_conversation_allows_removing_unused_tools():
"""Test that removing tools that weren't used in history is allowed.

With relaxed tool matching, only tools that were actually USED in the
conversation history need to be present. Unused tools can be removed.
"""
with tempfile.TemporaryDirectory() as temp_dir:
# Create and save conversation with original agent
# Create conversation with original agent having 2 tools
original_tools = [
Tool(name="TerminalTool"),
Tool(name="FileEditorTool"),
Expand All @@ -128,33 +130,147 @@ def test_conversation_with_different_agent_tools_fails():
visualizer=None,
)

# Send a message to create some state
# Send a message but NO tool is used (no ActionEvent in history)
conversation.send_message(
Message(role="user", content=[TextContent(text="test message")])
)

# Get the conversation ID for reuse
conversation_id = conversation.state.id
del conversation

# Delete conversation to simulate restart
# Resume with only one tool - should succeed since no tools were used
reduced_tools = [Tool(name="TerminalTool")] # Removed FileEditorTool
llm2 = LLM(
model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
)
reduced_agent = Agent(llm=llm2, tools=reduced_tools)

# This should succeed - FileEditorTool was never used
new_conversation = LocalConversation(
agent=reduced_agent,
workspace=temp_dir,
persistence_dir=temp_dir,
conversation_id=conversation_id,
visualizer=None,
)
assert new_conversation.id == conversation_id


def test_conversation_allows_adding_new_tools():
"""Test that adding new tools not in the original conversation is allowed.

With relaxed tool matching, new tools can be added when resuming a
conversation without causing failures.
"""
with tempfile.TemporaryDirectory() as temp_dir:
# Create conversation with only one tool
original_tools = [Tool(name="TerminalTool")]
llm = LLM(
model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
)
original_agent = Agent(llm=llm, tools=original_tools)
conversation = LocalConversation(
agent=original_agent,
workspace=temp_dir,
persistence_dir=temp_dir,
visualizer=None,
)

# Send a message (no tools used)
conversation.send_message(
Message(role="user", content=[TextContent(text="test message")])
)

conversation_id = conversation.state.id
del conversation

# Try to create new conversation with different tools (only bash tool)
different_tools = [Tool(name="TerminalTool")] # Missing FileEditorTool
# Resume with additional tools - should succeed
expanded_tools = [
Tool(name="TerminalTool"),
Tool(name="FileEditorTool"), # New tool added
]
llm2 = LLM(
model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
)
different_agent = Agent(llm=llm2, tools=different_tools)
expanded_agent = Agent(llm=llm2, tools=expanded_tools)

# This should succeed - adding new tools is allowed
new_conversation = LocalConversation(
agent=expanded_agent,
workspace=temp_dir,
persistence_dir=temp_dir,
conversation_id=conversation_id,
visualizer=None,
)
assert new_conversation.id == conversation_id
# Verify the new tools are available
assert len(new_conversation.agent.tools) == 2


def test_conversation_fails_when_used_tool_is_missing():
"""Test that removing a tool that WAS used in history fails.

This is the core correctness requirement: if a tool was actually used
in the conversation history, it MUST be present when resuming.
"""
from openhands.sdk.event import ActionEvent

with tempfile.TemporaryDirectory() as temp_dir:
# Create conversation with two tools
original_tools = [
Tool(name="TerminalTool"),
Tool(name="FileEditorTool"),
]
llm = LLM(
model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
)
original_agent = Agent(llm=llm, tools=original_tools)
conversation = LocalConversation(
agent=original_agent,
workspace=temp_dir,
persistence_dir=temp_dir,
visualizer=None,
)

# Initialize the agent to get actual tool definitions
conversation.agent.init_state(conversation.state, lambda e: None)

# Simulate that TerminalTool was used by adding an ActionEvent
from openhands.sdk.llm import MessageToolCall, TextContent

action_event = ActionEvent(
tool_name="TerminalTool",
tool_call_id="test-call-1",
thought=[TextContent(text="Running a command")],
tool_call=MessageToolCall(
id="test-call-1",
name="TerminalTool",
arguments="{}",
origin="completion",
),
llm_response_id="test-response-1",
)
conversation.state.events.append(action_event)

conversation_id = conversation.state.id
del conversation

# Try to resume WITHOUT TerminalTool - should fail
reduced_tools = [Tool(name="FileEditorTool")] # Missing TerminalTool
llm2 = LLM(
model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
)
reduced_agent = Agent(llm=llm2, tools=reduced_tools)

# This should raise - TerminalTool was used in history but is now missing
import pytest

# This should fail - tools must match during reconciliation
with pytest.raises(
ValueError, match="Tools don't match between runtime and persisted agents"
):
with pytest.raises(ValueError, match="tools that were used in history"):
LocalConversation(
agent=different_agent,
agent=reduced_agent,
workspace=temp_dir,
persistence_dir=temp_dir,
conversation_id=conversation_id, # Use same ID to avoid ID mismatch
conversation_id=conversation_id,
visualizer=None,
)

Expand Down
Loading