OpenHands · tofarr · Jan 4, 2026 · Dec 29, 2025 · Dec 29, 2025 · Dec 30, 2025
diff --git a/openhands-sdk/openhands/sdk/agent/base.py b/openhands-sdk/openhands/sdk/agent/base.py
@@ -2,7 +2,7 @@
 import re
 import sys
 from abc import ABC, abstractmethod
-from collections.abc import Generator, Iterable
+from collections.abc import Generator, Iterable, Sequence
 from concurrent.futures import ThreadPoolExecutor
 from typing import TYPE_CHECKING, Any
 
@@ -300,10 +300,19 @@ def step(
         NOTE: state will be mutated in-place.
         """
 
-    def resolve_diff_from_deserialized(self, persisted: "AgentBase") -> "AgentBase":
+    def resolve_diff_from_deserialized(
+        self,
+        persisted: "AgentBase",
+        events: "Sequence[Any] | None" = None,
+    ) -> "AgentBase":
         """
         Return a new AgentBase instance equivalent to `persisted` but with
         explicitly whitelisted fields (e.g. api_key) taken from `self`.
+
+        Args:
+            persisted: The persisted agent from the conversation state.
+            events: Optional event sequence to scan for used tools if tool
+                names don't match. Only scanned when needed (O(n) fallback).
         """
         if persisted.__class__ is not self.__class__:
             raise ValueError(
@@ -338,28 +347,57 @@ def resolve_diff_from_deserialized(self, persisted: "AgentBase") -> "AgentBase":
         if self.agent_context is not None:
             updates["agent_context"] = self.agent_context
 
-        # Create maps by tool name for easy lookup
-        runtime_tools_map = {tool.name: tool for tool in self.tools}
-        persisted_tools_map = {tool.name: tool for tool in persisted.tools}
-
-        # Check that tool names match
-        runtime_names = set(runtime_tools_map.keys())
-        persisted_names = set(persisted_tools_map.keys())
+        # Get tool names for comparison
+        runtime_names = {tool.name for tool in self.tools}
+        persisted_names = {tool.name for tool in persisted.tools}
+
+        # If tool names match exactly, no need to check event history
+        if runtime_names == persisted_names:
+            # Tools unchanged, proceed normally
+            pass
+        elif events is not None:
+            # Tool names differ - scan events to find which tools were actually used
+            # This is O(n) but only happens when tools change
+            from openhands.sdk.event import ActionEvent
+
+            used_tools = {
+                event.tool_name
+                for event in events
+                if isinstance(event, ActionEvent) and event.tool_name
+            }
 
-        if runtime_names != persisted_names:
+            # Only require tools that were actually used in history
+            missing_used_tools = used_tools - runtime_names
+            if missing_used_tools:
+                raise ValueError(
+                    f"Cannot resume conversation: tools that were used in history "
+                    f"are missing from runtime: {sorted(missing_used_tools)}. "
+                    f"Available tools: {sorted(runtime_names)}"
+                )
+            # Update tools to match runtime (allows new tools to be added)
+            updates["tools"] = self.tools
+        else:
+            # No events provided - strict matching (legacy behavior)
             missing_in_runtime = persisted_names - runtime_names
             missing_in_persisted = runtime_names - persisted_names
             error_msg = "Tools don't match between runtime and persisted agents."
             if missing_in_runtime:
-                error_msg += f" Missing in runtime: {missing_in_runtime}."
+                error_msg += f" Missing in runtime: {sorted(missing_in_runtime)}."
             if missing_in_persisted:
-                error_msg += f" Missing in persisted: {missing_in_persisted}."
+                error_msg += f" Missing in persisted: {sorted(missing_in_persisted)}."
             raise ValueError(error_msg)
 
         reconciled = persisted.model_copy(update=updates)
-        if self.model_dump(exclude_none=True) != reconciled.model_dump(
-            exclude_none=True
-        ):
+
+        # Validate agent equality - exclude tools from comparison since we
+        # already validated tool requirements above
+        exclude_fields = {"tools"} if events is not None else set()
+        self_dump = self.model_dump(exclude_none=True, exclude=exclude_fields)
+        reconciled_dump = reconciled.model_dump(
+            exclude_none=True, exclude=exclude_fields
+        )
+
+        if self_dump != reconciled_dump:
             raise ValueError(
                 "The Agent provided is different from the one in persisted state.\n"
                 f"Diff: {pretty_pydantic_diff(self, reconciled)}"

diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -133,7 +133,6 @@ class ConversationState(OpenHandsModel):
         default_factory=FIFOLock
     )  # FIFO lock for thread safety
 
-    # ===== Public "events" facade (Sequence[Event]) =====
     @property
     def events(self) -> EventLog:
         return self._events
@@ -200,12 +199,17 @@ def create(
                     f"but persisted state has {state.id}"
                 )
 
-            # Reconcile agent config with deserialized one
-            resolved = agent.resolve_diff_from_deserialized(state.agent)
-
-            # Attach runtime handles and commit reconciled agent (may autosave)
+            # Attach event log early so we can read history
             state._fs = file_store
             state._events = EventLog(file_store, dir_path=EVENTS_DIR)
+
+            # Reconcile agent config with deserialized one
+            # Pass event log so tool usage can be checked on-the-fly if needed
+            resolved = agent.resolve_diff_from_deserialized(
+                state.agent, events=state._events
+            )
+
+            # Commit reconciled agent (may autosave)
             state._autosave_enabled = True
             state.agent = resolved
 

diff --git a/tests/cross/test_agent_reconciliation.py b/tests/cross/test_agent_reconciliation.py
@@ -107,12 +107,14 @@ def test_conversation_restarted_with_changed_working_directory(tmp_path_factory)
 
 
 # Tests from test_local_conversation_tools_integration.py
-def test_conversation_with_different_agent_tools_fails():
-    """Test that using an agent with different tools fails (tools must match)."""
-    import pytest
+def test_conversation_allows_removing_unused_tools():
+    """Test that removing tools that weren't used in history is allowed.
 
+    With relaxed tool matching, only tools that were actually USED in the
+    conversation history need to be present. Unused tools can be removed.
+    """
     with tempfile.TemporaryDirectory() as temp_dir:
-        # Create and save conversation with original agent
+        # Create conversation with original agent having 2 tools
         original_tools = [
             Tool(name="TerminalTool"),
             Tool(name="FileEditorTool"),
@@ -128,33 +130,147 @@ def test_conversation_with_different_agent_tools_fails():
             visualizer=None,
         )
 
-        # Send a message to create some state
+        # Send a message but NO tool is used (no ActionEvent in history)
         conversation.send_message(
             Message(role="user", content=[TextContent(text="test message")])
         )
 
-        # Get the conversation ID for reuse
         conversation_id = conversation.state.id
+        del conversation
 
-        # Delete conversation to simulate restart
+        # Resume with only one tool - should succeed since no tools were used
+        reduced_tools = [Tool(name="TerminalTool")]  # Removed FileEditorTool
+        llm2 = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        reduced_agent = Agent(llm=llm2, tools=reduced_tools)
+
+        # This should succeed - FileEditorTool was never used
+        new_conversation = LocalConversation(
+            agent=reduced_agent,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            conversation_id=conversation_id,
+            visualizer=None,
+        )
+        assert new_conversation.id == conversation_id
+
+
+def test_conversation_allows_adding_new_tools():
+    """Test that adding new tools not in the original conversation is allowed.
+
+    With relaxed tool matching, new tools can be added when resuming a
+    conversation without causing failures.
+    """
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create conversation with only one tool
+        original_tools = [Tool(name="TerminalTool")]
+        llm = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        original_agent = Agent(llm=llm, tools=original_tools)
+        conversation = LocalConversation(
+            agent=original_agent,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            visualizer=None,
+        )
+
+        # Send a message (no tools used)
+        conversation.send_message(
+            Message(role="user", content=[TextContent(text="test message")])
+        )
+
+        conversation_id = conversation.state.id
         del conversation
 
-        # Try to create new conversation with different tools (only bash tool)
-        different_tools = [Tool(name="TerminalTool")]  # Missing FileEditorTool
+        # Resume with additional tools - should succeed
+        expanded_tools = [
+            Tool(name="TerminalTool"),
+            Tool(name="FileEditorTool"),  # New tool added
+        ]
         llm2 = LLM(
             model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
         )
-        different_agent = Agent(llm=llm2, tools=different_tools)
+        expanded_agent = Agent(llm=llm2, tools=expanded_tools)
+
+        # This should succeed - adding new tools is allowed
+        new_conversation = LocalConversation(
+            agent=expanded_agent,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            conversation_id=conversation_id,
+            visualizer=None,
+        )
+        assert new_conversation.id == conversation_id
+        # Verify the new tools are available
+        assert len(new_conversation.agent.tools) == 2
+
+
+def test_conversation_fails_when_used_tool_is_missing():
+    """Test that removing a tool that WAS used in history fails.
+
+    This is the core correctness requirement: if a tool was actually used
+    in the conversation history, it MUST be present when resuming.
+    """
+    from openhands.sdk.event import ActionEvent
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create conversation with two tools
+        original_tools = [
+            Tool(name="TerminalTool"),
+            Tool(name="FileEditorTool"),
+        ]
+        llm = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        original_agent = Agent(llm=llm, tools=original_tools)
+        conversation = LocalConversation(
+            agent=original_agent,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            visualizer=None,
+        )
+
+        # Initialize the agent to get actual tool definitions
+        conversation.agent.init_state(conversation.state, lambda e: None)
+
+        # Simulate that TerminalTool was used by adding an ActionEvent
+        from openhands.sdk.llm import MessageToolCall, TextContent
+
+        action_event = ActionEvent(
+            tool_name="TerminalTool",
+            tool_call_id="test-call-1",
+            thought=[TextContent(text="Running a command")],
+            tool_call=MessageToolCall(
+                id="test-call-1",
+                name="TerminalTool",
+                arguments="{}",
+                origin="completion",
+            ),
+            llm_response_id="test-response-1",
+        )
+        conversation.state.events.append(action_event)
+
+        conversation_id = conversation.state.id
+        del conversation
+
+        # Try to resume WITHOUT TerminalTool - should fail
+        reduced_tools = [Tool(name="FileEditorTool")]  # Missing TerminalTool
+        llm2 = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        reduced_agent = Agent(llm=llm2, tools=reduced_tools)
+
+        # This should raise - TerminalTool was used in history but is now missing
+        import pytest
 
-        # This should fail - tools must match during reconciliation
-        with pytest.raises(
-            ValueError, match="Tools don't match between runtime and persisted agents"
-        ):
+        with pytest.raises(ValueError, match="tools that were used in history"):
             LocalConversation(
-                agent=different_agent,
+                agent=reduced_agent,
                 workspace=temp_dir,
                 persistence_dir=temp_dir,
-                conversation_id=conversation_id,  # Use same ID to avoid ID mismatch
+                conversation_id=conversation_id,
                 visualizer=None,
             )