From f685409c7ccd743e843fb55dd4391c2c6ae51437 Mon Sep 17 00:00:00 2001 From: amankumarpandeyin Date: Mon, 29 Dec 2025 23:34:24 +0530 Subject: [PATCH 1/4] feat: relax tool matching on resume Only fail if a tool that was *used in history* is missing. Allow adding new tools freely when resuming conversations. Changes: - Add optional used_tools param to resolve_diff_from_deserialized() - Extract used tool names from event history in state.py - Add test_conversation_allows_removing_unused_tools - Add test_conversation_allows_adding_new_tools Fixes #1533 --- openhands-sdk/openhands/sdk/agent/base.py | 64 +++++++++---- .../openhands/sdk/conversation/state.py | 19 +++- tests/cross/test_agent_reconciliation.py | 92 ++++++++++++++----- 3 files changed, 131 insertions(+), 44 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/base.py b/openhands-sdk/openhands/sdk/agent/base.py index 6cc47ab197..3f78321f0e 100644 --- a/openhands-sdk/openhands/sdk/agent/base.py +++ b/openhands-sdk/openhands/sdk/agent/base.py @@ -297,10 +297,21 @@ def step( NOTE: state will be mutated in-place. """ - def resolve_diff_from_deserialized(self, persisted: "AgentBase") -> "AgentBase": + def resolve_diff_from_deserialized( + self, + persisted: "AgentBase", + used_tools: set[str] | None = None, + ) -> "AgentBase": """ Return a new AgentBase instance equivalent to `persisted` but with explicitly whitelisted fields (e.g. api_key) taken from `self`. + + Args: + persisted: The persisted agent from the conversation state. + used_tools: Optional set of tool names that were actually used in + the conversation history. If provided, only these tools are + required to be present in the runtime agent. New tools can be + added freely. """ if persisted.__class__ is not self.__class__: raise ValueError( @@ -339,28 +350,45 @@ def resolve_diff_from_deserialized(self, persisted: "AgentBase") -> "AgentBase": runtime_tools_map = {tool.name: tool for tool in self.tools} persisted_tools_map = {tool.name: tool for tool in persisted.tools} - # Check that tool names match runtime_names = set(runtime_tools_map.keys()) persisted_names = set(persisted_tools_map.keys()) - if runtime_names != persisted_names: - missing_in_runtime = persisted_names - runtime_names - missing_in_persisted = runtime_names - persisted_names - error_msg = "Tools don't match between runtime and persisted agents." - if missing_in_runtime: - error_msg += f" Missing in runtime: {missing_in_runtime}." - if missing_in_persisted: - error_msg += f" Missing in persisted: {missing_in_persisted}." - raise ValueError(error_msg) + # Relaxed tool matching: only fail if tools that were actually USED + # in history are missing. Allow adding new tools freely. + if used_tools is not None: + # Only check that used tools are present in runtime + missing_used_tools = used_tools - runtime_names + if missing_used_tools: + raise ValueError( + f"Cannot resume conversation: tools that were used in history " + f"are missing from runtime: {missing_used_tools}. " + f"Available tools: {runtime_names}" + ) + # Update tools to match runtime (allows new tools to be added) + updates["tools"] = self.tools + else: + # Legacy behavior: require exact match when used_tools not provided + if runtime_names != persisted_names: + missing_in_runtime = persisted_names - runtime_names + missing_in_persisted = runtime_names - persisted_names + error_msg = "Tools don't match between runtime and persisted agents." + if missing_in_runtime: + error_msg += f" Missing in runtime: {missing_in_runtime}." + if missing_in_persisted: + error_msg += f" Missing in persisted: {missing_in_persisted}." + raise ValueError(error_msg) reconciled = persisted.model_copy(update=updates) - if self.model_dump(exclude_none=True) != reconciled.model_dump( - exclude_none=True - ): - raise ValueError( - "The Agent provided is different from the one in persisted state.\n" - f"Diff: {pretty_pydantic_diff(self, reconciled)}" - ) + + # When used_tools is provided, we allow tool changes so skip strict comparison + if used_tools is None: + if self.model_dump(exclude_none=True) != reconciled.model_dump( + exclude_none=True + ): + raise ValueError( + "The Agent provided is different from the one in persisted state.\n" + f"Diff: {pretty_pydantic_diff(self, reconciled)}" + ) return reconciled def model_dump_succint(self, **kwargs): diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py index 7fdf79aa7a..c1fb65f87e 100644 --- a/openhands-sdk/openhands/sdk/conversation/state.py +++ b/openhands-sdk/openhands/sdk/conversation/state.py @@ -188,12 +188,23 @@ def create( f"but persisted state has {state.id}" ) - # Reconcile agent config with deserialized one - resolved = agent.resolve_diff_from_deserialized(state.agent) - - # Attach runtime handles and commit reconciled agent (may autosave) + # Attach event log early so we can read history state._fs = file_store state._events = EventLog(file_store, dir_path=EVENTS_DIR) + + # Extract tool names that were actually used in history + # This allows adding new tools when resuming conversations + used_tools: set[str] = set() + for event in state._events: + if isinstance(event, ActionEvent): + used_tools.add(event.tool_name) + + # Reconcile agent config with deserialized one, passing used tools + resolved = agent.resolve_diff_from_deserialized( + state.agent, used_tools=used_tools + ) + + # Commit reconciled agent (may autosave) state._autosave_enabled = True state.agent = resolved diff --git a/tests/cross/test_agent_reconciliation.py b/tests/cross/test_agent_reconciliation.py index 7864ca729a..6f6f1e8142 100644 --- a/tests/cross/test_agent_reconciliation.py +++ b/tests/cross/test_agent_reconciliation.py @@ -107,12 +107,14 @@ def test_conversation_restarted_with_changed_working_directory(tmp_path_factory) # Tests from test_local_conversation_tools_integration.py -def test_conversation_with_different_agent_tools_fails(): - """Test that using an agent with different tools fails (tools must match).""" - import pytest +def test_conversation_allows_removing_unused_tools(): + """Test that removing tools that weren't used in history is allowed. + With relaxed tool matching, only tools that were actually USED in the + conversation history need to be present. Unused tools can be removed. + """ with tempfile.TemporaryDirectory() as temp_dir: - # Create and save conversation with original agent + # Create conversation with original agent having 2 tools original_tools = [ Tool(name="TerminalTool"), Tool(name="FileEditorTool"), @@ -128,35 +130,81 @@ def test_conversation_with_different_agent_tools_fails(): visualizer=None, ) - # Send a message to create some state + # Send a message but NO tool is used (no ActionEvent in history) conversation.send_message( Message(role="user", content=[TextContent(text="test message")]) ) - # Get the conversation ID for reuse conversation_id = conversation.state.id + del conversation - # Delete conversation to simulate restart + # Resume with only one tool - should succeed since no tools were used + reduced_tools = [Tool(name="TerminalTool")] # Removed FileEditorTool + llm2 = LLM( + model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm" + ) + reduced_agent = Agent(llm=llm2, tools=reduced_tools) + + # This should succeed - FileEditorTool was never used + new_conversation = LocalConversation( + agent=reduced_agent, + workspace=temp_dir, + persistence_dir=temp_dir, + conversation_id=conversation_id, + visualizer=None, + ) + assert new_conversation.id == conversation_id + + +def test_conversation_allows_adding_new_tools(): + """Test that adding new tools not in the original conversation is allowed. + + With relaxed tool matching, new tools can be added when resuming a + conversation without causing failures. + """ + with tempfile.TemporaryDirectory() as temp_dir: + # Create conversation with only one tool + original_tools = [Tool(name="TerminalTool")] + llm = LLM( + model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm" + ) + original_agent = Agent(llm=llm, tools=original_tools) + conversation = LocalConversation( + agent=original_agent, + workspace=temp_dir, + persistence_dir=temp_dir, + visualizer=None, + ) + + # Send a message (no tools used) + conversation.send_message( + Message(role="user", content=[TextContent(text="test message")]) + ) + + conversation_id = conversation.state.id del conversation - # Try to create new conversation with different tools (only bash tool) - different_tools = [Tool(name="TerminalTool")] # Missing FileEditorTool + # Resume with additional tools - should succeed + expanded_tools = [ + Tool(name="TerminalTool"), + Tool(name="FileEditorTool"), # New tool added + ] llm2 = LLM( model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm" ) - different_agent = Agent(llm=llm2, tools=different_tools) - - # This should fail - tools must match during reconciliation - with pytest.raises( - ValueError, match="Tools don't match between runtime and persisted agents" - ): - LocalConversation( - agent=different_agent, - workspace=temp_dir, - persistence_dir=temp_dir, - conversation_id=conversation_id, # Use same ID to avoid ID mismatch - visualizer=None, - ) + expanded_agent = Agent(llm=llm2, tools=expanded_tools) + + # This should succeed - adding new tools is allowed + new_conversation = LocalConversation( + agent=expanded_agent, + workspace=temp_dir, + persistence_dir=temp_dir, + conversation_id=conversation_id, + visualizer=None, + ) + assert new_conversation.id == conversation_id + # Verify the new tools are available + assert len(new_conversation.agent.tools) == 2 def test_conversation_with_same_agent_succeeds(): From 334fe33da39be7dfe5c03735e71f47ee775e3eec Mon Sep 17 00:00:00 2001 From: amankumarpandeyin Date: Thu, 1 Jan 2026 22:19:29 +0530 Subject: [PATCH 2/4] perf: add used_tool_names cache for O(1) lookup on resume Addresses performance concern raised in PR review: - Added used_tool_names field to ConversationState that persists with state - Added append_event() method that maintains the cache incrementally - On resume, reads from persisted cache instead of iterating all events - This provides O(1) lookup vs O(n) iteration through all events --- .../conversation/impl/local_conversation.py | 4 +- .../openhands/sdk/conversation/state.py | 38 +++++++++++++++---- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 984f01f846..ba5d58d45b 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -136,9 +136,9 @@ def __init__( stuck_detection=stuck_detection, ) - # Default callback: persist every event to state + # Default callback: persist every event to state and update caches def _default_callback(e): - self._state.events.append(e) + self._state.append_event(e) self._hook_processor = None hook_callback = None diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py index 825f940f9a..ecea585a3a 100644 --- a/openhands-sdk/openhands/sdk/conversation/state.py +++ b/openhands-sdk/openhands/sdk/conversation/state.py @@ -120,6 +120,14 @@ class ConversationState(OpenHandsModel): serialization_alias="secret_registry", ) + # Cache of tool names that have been used in this conversation + # This is incrementally updated when events are appended and persisted + # with the state, avoiding O(n) iteration on resume + used_tool_names: set[str] = Field( + default_factory=set, + description="Set of tool names that have been used in this conversation", + ) + # ===== Private attrs (NOT Fields) ===== _fs: FileStore = PrivateAttr() # filestore for persistence _events: EventLog = PrivateAttr() # now the storage for events @@ -138,6 +146,25 @@ class ConversationState(OpenHandsModel): def events(self) -> EventLog: return self._events + def append_event(self, event: Event) -> None: + """Append an event to the event log and update caches. + + This method should be preferred over direct events.append() calls + as it maintains the used_tool_names cache for efficient conversation + resume operations. + + Args: + event: The event to append to the log. + """ + self._events.append(event) + + # Update used_tool_names cache for ActionEvents + if isinstance(event, ActionEvent): + tool_name = event.tool_name + if tool_name not in self.used_tool_names: + # Use assignment to trigger autosave via __setattr__ + self.used_tool_names = self.used_tool_names | {tool_name} + @property def env_observation_persistence_dir(self) -> str | None: """Directory for persisting environment observation files.""" @@ -204,16 +231,13 @@ def create( state._fs = file_store state._events = EventLog(file_store, dir_path=EVENTS_DIR) - # Extract tool names that were actually used in history - # This allows adding new tools when resuming conversations - used_tools: set[str] = set() - for event in state._events: - if isinstance(event, ActionEvent): - used_tools.add(event.tool_name) + # Use persisted used_tool_names cache instead of iterating all events + # This provides O(1) lookup on resume vs O(n) iteration + # The cache is incrementally updated via append_event() # Reconcile agent config with deserialized one, passing used tools resolved = agent.resolve_diff_from_deserialized( - state.agent, used_tools=used_tools + state.agent, used_tools=state.used_tool_names ) # Commit reconciled agent (may autosave) From 374a5664d35dcdb04b0367672dbaaadef2fc5205 Mon Sep 17 00:00:00 2001 From: amankumarpandeyin Date: Sat, 3 Jan 2026 16:14:38 +0530 Subject: [PATCH 3/4] fix: address code review feedback on tool matching - Fix agent reconciliation: validate all config except tools when used_tools provided - Add cache backfill for old conversations without used_tool_names - Sort tool names in error messages for deterministic output - Remove unused runtime_tools_map/persisted_tools_map variables - Add test for core requirement: tools used in history must exist --- openhands-sdk/openhands/sdk/agent/base.py | 41 +++++++------- .../openhands/sdk/conversation/state.py | 24 ++++++--- tests/cross/test_agent_reconciliation.py | 53 +++++++++++++++++++ 3 files changed, 91 insertions(+), 27 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/base.py b/openhands-sdk/openhands/sdk/agent/base.py index 8caeb3b726..05c49a20e0 100644 --- a/openhands-sdk/openhands/sdk/agent/base.py +++ b/openhands-sdk/openhands/sdk/agent/base.py @@ -349,12 +349,9 @@ def resolve_diff_from_deserialized( if self.agent_context is not None: updates["agent_context"] = self.agent_context - # Create maps by tool name for easy lookup - runtime_tools_map = {tool.name: tool for tool in self.tools} - persisted_tools_map = {tool.name: tool for tool in persisted.tools} - - runtime_names = set(runtime_tools_map.keys()) - persisted_names = set(persisted_tools_map.keys()) + # Get tool names for comparison + runtime_names = {tool.name for tool in self.tools} + persisted_names = {tool.name for tool in persisted.tools} # Relaxed tool matching: only fail if tools that were actually USED # in history are missing. Allow adding new tools freely. @@ -364,8 +361,8 @@ def resolve_diff_from_deserialized( if missing_used_tools: raise ValueError( f"Cannot resume conversation: tools that were used in history " - f"are missing from runtime: {missing_used_tools}. " - f"Available tools: {runtime_names}" + f"are missing from runtime: {sorted(missing_used_tools)}. " + f"Available tools: {sorted(runtime_names)}" ) # Update tools to match runtime (allows new tools to be added) updates["tools"] = self.tools @@ -376,22 +373,28 @@ def resolve_diff_from_deserialized( missing_in_persisted = runtime_names - persisted_names error_msg = "Tools don't match between runtime and persisted agents." if missing_in_runtime: - error_msg += f" Missing in runtime: {missing_in_runtime}." + error_msg += f" Missing in runtime: {sorted(missing_in_runtime)}." if missing_in_persisted: - error_msg += f" Missing in persisted: {missing_in_persisted}." + error_msg += ( + f" Missing in persisted: {sorted(missing_in_persisted)}." + ) raise ValueError(error_msg) reconciled = persisted.model_copy(update=updates) - # When used_tools is provided, we allow tool changes so skip strict comparison - if used_tools is None: - if self.model_dump(exclude_none=True) != reconciled.model_dump( - exclude_none=True - ): - raise ValueError( - "The Agent provided is different from the one in persisted state.\n" - f"Diff: {pretty_pydantic_diff(self, reconciled)}" - ) + # Validate agent equality - when used_tools is provided, we exclude tools + # from comparison since we already validated tool requirements above + exclude_fields = {"tools"} if used_tools is not None else set() + self_dump = self.model_dump(exclude_none=True, exclude=exclude_fields) + reconciled_dump = reconciled.model_dump( + exclude_none=True, exclude=exclude_fields + ) + + if self_dump != reconciled_dump: + raise ValueError( + "The Agent provided is different from the one in persisted state.\n" + f"Diff: {pretty_pydantic_diff(self, reconciled)}" + ) return reconciled def model_dump_succint(self, **kwargs): diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py index ecea585a3a..319dcf4f06 100644 --- a/openhands-sdk/openhands/sdk/conversation/state.py +++ b/openhands-sdk/openhands/sdk/conversation/state.py @@ -159,11 +159,10 @@ def append_event(self, event: Event) -> None: self._events.append(event) # Update used_tool_names cache for ActionEvents - if isinstance(event, ActionEvent): - tool_name = event.tool_name - if tool_name not in self.used_tool_names: + if isinstance(event, ActionEvent) and event.tool_name: + if event.tool_name not in self.used_tool_names: # Use assignment to trigger autosave via __setattr__ - self.used_tool_names = self.used_tool_names | {tool_name} + self.used_tool_names = self.used_tool_names | {event.tool_name} @property def env_observation_persistence_dir(self) -> str | None: @@ -231,13 +230,22 @@ def create( state._fs = file_store state._events = EventLog(file_store, dir_path=EVENTS_DIR) - # Use persisted used_tool_names cache instead of iterating all events - # This provides O(1) lookup on resume vs O(n) iteration - # The cache is incrementally updated via append_event() + # Use persisted used_tool_names cache for O(1) lookup on resume. + # If the cache is empty but there are events, we need to backfill + # for backward compatibility with old persisted conversations. + used_tools = state.used_tool_names + if not used_tools and len(state._events) > 0: + # Backfill cache from event history (O(n) but only once) + logger.info("Backfilling used_tool_names cache from event history...") + for event in state._events: + if isinstance(event, ActionEvent) and event.tool_name: + used_tools = used_tools | {event.tool_name} + # Persist backfilled cache + state.used_tool_names = used_tools # Reconcile agent config with deserialized one, passing used tools resolved = agent.resolve_diff_from_deserialized( - state.agent, used_tools=state.used_tool_names + state.agent, used_tools=used_tools ) # Commit reconciled agent (may autosave) diff --git a/tests/cross/test_agent_reconciliation.py b/tests/cross/test_agent_reconciliation.py index 6f6f1e8142..881c56179a 100644 --- a/tests/cross/test_agent_reconciliation.py +++ b/tests/cross/test_agent_reconciliation.py @@ -207,6 +207,59 @@ def test_conversation_allows_adding_new_tools(): assert len(new_conversation.agent.tools) == 2 +def test_conversation_fails_when_used_tool_is_missing(): + """Test that removing a tool that WAS used in history fails. + + This is the core correctness requirement: if a tool was actually used + in the conversation history, it MUST be present when resuming. + """ + with tempfile.TemporaryDirectory() as temp_dir: + # Create conversation with two tools + original_tools = [ + Tool(name="TerminalTool"), + Tool(name="FileEditorTool"), + ] + llm = LLM( + model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm" + ) + original_agent = Agent(llm=llm, tools=original_tools) + conversation = LocalConversation( + agent=original_agent, + workspace=temp_dir, + persistence_dir=temp_dir, + visualizer=None, + ) + + # Initialize the agent to get actual tool definitions + conversation.agent.init_state(conversation.state, lambda e: None) + + # Simulate that TerminalTool was used by recording it in used_tool_names + # In real usage this happens via state.append_event() with ActionEvents + conversation.state.used_tool_names = {"TerminalTool"} + + conversation_id = conversation.state.id + del conversation + + # Try to resume WITHOUT TerminalTool - should fail + reduced_tools = [Tool(name="FileEditorTool")] # Missing TerminalTool + llm2 = LLM( + model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm" + ) + reduced_agent = Agent(llm=llm2, tools=reduced_tools) + + # This should raise - TerminalTool was used in history but is now missing + import pytest + + with pytest.raises(ValueError, match="tools that were used in history"): + LocalConversation( + agent=reduced_agent, + workspace=temp_dir, + persistence_dir=temp_dir, + conversation_id=conversation_id, + visualizer=None, + ) + + def test_conversation_with_same_agent_succeeds(): """Test that using the same agent configuration succeeds.""" with tempfile.TemporaryDirectory() as temp_dir: From 3744de617f06fbfc2a9415b4dc5aadc56fe215ce Mon Sep 17 00:00:00 2001 From: amankumarpandeyin Date: Sat, 3 Jan 2026 21:55:58 +0530 Subject: [PATCH 4/4] refactor: simplify tool matching per reviewer feedback - Remove used_tool_names cache and append_event() method - EventLog behaves like regular list again (per @tofarr's feedback) - Scan events on-the-fly only when tool names don't match - O(1) for happy path (same tools), O(n) fallback when tools change --- openhands-sdk/openhands/sdk/agent/base.py | 57 +++++++++++-------- .../conversation/impl/local_conversation.py | 4 +- .../openhands/sdk/conversation/state.py | 45 +-------------- tests/cross/test_agent_reconciliation.py | 21 ++++++- 4 files changed, 55 insertions(+), 72 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/base.py b/openhands-sdk/openhands/sdk/agent/base.py index 05c49a20e0..1bf2bbb166 100644 --- a/openhands-sdk/openhands/sdk/agent/base.py +++ b/openhands-sdk/openhands/sdk/agent/base.py @@ -2,7 +2,7 @@ import re import sys from abc import ABC, abstractmethod -from collections.abc import Generator, Iterable +from collections.abc import Generator, Iterable, Sequence from concurrent.futures import ThreadPoolExecutor from typing import TYPE_CHECKING, Any @@ -303,7 +303,7 @@ def step( def resolve_diff_from_deserialized( self, persisted: "AgentBase", - used_tools: set[str] | None = None, + events: "Sequence[Any] | None" = None, ) -> "AgentBase": """ Return a new AgentBase instance equivalent to `persisted` but with @@ -311,10 +311,8 @@ def resolve_diff_from_deserialized( Args: persisted: The persisted agent from the conversation state. - used_tools: Optional set of tool names that were actually used in - the conversation history. If provided, only these tools are - required to be present in the runtime agent. New tools can be - added freely. + events: Optional event sequence to scan for used tools if tool + names don't match. Only scanned when needed (O(n) fallback). """ if persisted.__class__ is not self.__class__: raise ValueError( @@ -353,10 +351,22 @@ def resolve_diff_from_deserialized( runtime_names = {tool.name for tool in self.tools} persisted_names = {tool.name for tool in persisted.tools} - # Relaxed tool matching: only fail if tools that were actually USED - # in history are missing. Allow adding new tools freely. - if used_tools is not None: - # Only check that used tools are present in runtime + # If tool names match exactly, no need to check event history + if runtime_names == persisted_names: + # Tools unchanged, proceed normally + pass + elif events is not None: + # Tool names differ - scan events to find which tools were actually used + # This is O(n) but only happens when tools change + from openhands.sdk.event import ActionEvent + + used_tools = { + event.tool_name + for event in events + if isinstance(event, ActionEvent) and event.tool_name + } + + # Only require tools that were actually used in history missing_used_tools = used_tools - runtime_names if missing_used_tools: raise ValueError( @@ -367,24 +377,21 @@ def resolve_diff_from_deserialized( # Update tools to match runtime (allows new tools to be added) updates["tools"] = self.tools else: - # Legacy behavior: require exact match when used_tools not provided - if runtime_names != persisted_names: - missing_in_runtime = persisted_names - runtime_names - missing_in_persisted = runtime_names - persisted_names - error_msg = "Tools don't match between runtime and persisted agents." - if missing_in_runtime: - error_msg += f" Missing in runtime: {sorted(missing_in_runtime)}." - if missing_in_persisted: - error_msg += ( - f" Missing in persisted: {sorted(missing_in_persisted)}." - ) - raise ValueError(error_msg) + # No events provided - strict matching (legacy behavior) + missing_in_runtime = persisted_names - runtime_names + missing_in_persisted = runtime_names - persisted_names + error_msg = "Tools don't match between runtime and persisted agents." + if missing_in_runtime: + error_msg += f" Missing in runtime: {sorted(missing_in_runtime)}." + if missing_in_persisted: + error_msg += f" Missing in persisted: {sorted(missing_in_persisted)}." + raise ValueError(error_msg) reconciled = persisted.model_copy(update=updates) - # Validate agent equality - when used_tools is provided, we exclude tools - # from comparison since we already validated tool requirements above - exclude_fields = {"tools"} if used_tools is not None else set() + # Validate agent equality - exclude tools from comparison since we + # already validated tool requirements above + exclude_fields = {"tools"} if events is not None else set() self_dump = self.model_dump(exclude_none=True, exclude=exclude_fields) reconciled_dump = reconciled.model_dump( exclude_none=True, exclude=exclude_fields diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index ba5d58d45b..984f01f846 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -136,9 +136,9 @@ def __init__( stuck_detection=stuck_detection, ) - # Default callback: persist every event to state and update caches + # Default callback: persist every event to state def _default_callback(e): - self._state.append_event(e) + self._state.events.append(e) self._hook_processor = None hook_callback = None diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py index 319dcf4f06..374b9873a7 100644 --- a/openhands-sdk/openhands/sdk/conversation/state.py +++ b/openhands-sdk/openhands/sdk/conversation/state.py @@ -120,14 +120,6 @@ class ConversationState(OpenHandsModel): serialization_alias="secret_registry", ) - # Cache of tool names that have been used in this conversation - # This is incrementally updated when events are appended and persisted - # with the state, avoiding O(n) iteration on resume - used_tool_names: set[str] = Field( - default_factory=set, - description="Set of tool names that have been used in this conversation", - ) - # ===== Private attrs (NOT Fields) ===== _fs: FileStore = PrivateAttr() # filestore for persistence _events: EventLog = PrivateAttr() # now the storage for events @@ -141,29 +133,10 @@ class ConversationState(OpenHandsModel): default_factory=FIFOLock ) # FIFO lock for thread safety - # ===== Public "events" facade (Sequence[Event]) ===== @property def events(self) -> EventLog: return self._events - def append_event(self, event: Event) -> None: - """Append an event to the event log and update caches. - - This method should be preferred over direct events.append() calls - as it maintains the used_tool_names cache for efficient conversation - resume operations. - - Args: - event: The event to append to the log. - """ - self._events.append(event) - - # Update used_tool_names cache for ActionEvents - if isinstance(event, ActionEvent) and event.tool_name: - if event.tool_name not in self.used_tool_names: - # Use assignment to trigger autosave via __setattr__ - self.used_tool_names = self.used_tool_names | {event.tool_name} - @property def env_observation_persistence_dir(self) -> str | None: """Directory for persisting environment observation files.""" @@ -230,22 +203,10 @@ def create( state._fs = file_store state._events = EventLog(file_store, dir_path=EVENTS_DIR) - # Use persisted used_tool_names cache for O(1) lookup on resume. - # If the cache is empty but there are events, we need to backfill - # for backward compatibility with old persisted conversations. - used_tools = state.used_tool_names - if not used_tools and len(state._events) > 0: - # Backfill cache from event history (O(n) but only once) - logger.info("Backfilling used_tool_names cache from event history...") - for event in state._events: - if isinstance(event, ActionEvent) and event.tool_name: - used_tools = used_tools | {event.tool_name} - # Persist backfilled cache - state.used_tool_names = used_tools - - # Reconcile agent config with deserialized one, passing used tools + # Reconcile agent config with deserialized one + # Pass event log so tool usage can be checked on-the-fly if needed resolved = agent.resolve_diff_from_deserialized( - state.agent, used_tools=used_tools + state.agent, events=state._events ) # Commit reconciled agent (may autosave) diff --git a/tests/cross/test_agent_reconciliation.py b/tests/cross/test_agent_reconciliation.py index 881c56179a..1696fffdb6 100644 --- a/tests/cross/test_agent_reconciliation.py +++ b/tests/cross/test_agent_reconciliation.py @@ -213,6 +213,8 @@ def test_conversation_fails_when_used_tool_is_missing(): This is the core correctness requirement: if a tool was actually used in the conversation history, it MUST be present when resuming. """ + from openhands.sdk.event import ActionEvent + with tempfile.TemporaryDirectory() as temp_dir: # Create conversation with two tools original_tools = [ @@ -233,9 +235,22 @@ def test_conversation_fails_when_used_tool_is_missing(): # Initialize the agent to get actual tool definitions conversation.agent.init_state(conversation.state, lambda e: None) - # Simulate that TerminalTool was used by recording it in used_tool_names - # In real usage this happens via state.append_event() with ActionEvents - conversation.state.used_tool_names = {"TerminalTool"} + # Simulate that TerminalTool was used by adding an ActionEvent + from openhands.sdk.llm import MessageToolCall, TextContent + + action_event = ActionEvent( + tool_name="TerminalTool", + tool_call_id="test-call-1", + thought=[TextContent(text="Running a command")], + tool_call=MessageToolCall( + id="test-call-1", + name="TerminalTool", + arguments="{}", + origin="completion", + ), + llm_response_id="test-response-1", + ) + conversation.state.events.append(action_event) conversation_id = conversation.state.id del conversation