From f685409c7ccd743e843fb55dd4391c2c6ae51437 Mon Sep 17 00:00:00 2001
From: amankumarpandeyin <priyankapandeykum@gmail.com>
Date: Mon, 29 Dec 2025 23:34:24 +0530
Subject: [PATCH 1/4] feat: relax tool matching on resume

Only fail if a tool that was *used in history* is missing.
Allow adding new tools freely when resuming conversations.

Changes:
- Add optional used_tools param to resolve_diff_from_deserialized()
- Extract used tool names from event history in state.py
- Add test_conversation_allows_removing_unused_tools
- Add test_conversation_allows_adding_new_tools

Fixes #1533
---
 openhands-sdk/openhands/sdk/agent/base.py     | 64 +++++++++----
 .../openhands/sdk/conversation/state.py       | 19 +++-
 tests/cross/test_agent_reconciliation.py      | 92 ++++++++++++++-----
 3 files changed, 131 insertions(+), 44 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/base.py b/openhands-sdk/openhands/sdk/agent/base.py
index 6cc47ab197..3f78321f0e 100644
--- a/openhands-sdk/openhands/sdk/agent/base.py
+++ b/openhands-sdk/openhands/sdk/agent/base.py
@@ -297,10 +297,21 @@ def step(
         NOTE: state will be mutated in-place.
         """
 
-    def resolve_diff_from_deserialized(self, persisted: "AgentBase") -> "AgentBase":
+    def resolve_diff_from_deserialized(
+        self,
+        persisted: "AgentBase",
+        used_tools: set[str] | None = None,
+    ) -> "AgentBase":
         """
         Return a new AgentBase instance equivalent to `persisted` but with
         explicitly whitelisted fields (e.g. api_key) taken from `self`.
+
+        Args:
+            persisted: The persisted agent from the conversation state.
+            used_tools: Optional set of tool names that were actually used in
+                the conversation history. If provided, only these tools are
+                required to be present in the runtime agent. New tools can be
+                added freely.
         """
         if persisted.__class__ is not self.__class__:
             raise ValueError(
@@ -339,28 +350,45 @@ def resolve_diff_from_deserialized(self, persisted: "AgentBase") -> "AgentBase":
         runtime_tools_map = {tool.name: tool for tool in self.tools}
         persisted_tools_map = {tool.name: tool for tool in persisted.tools}
 
-        # Check that tool names match
         runtime_names = set(runtime_tools_map.keys())
         persisted_names = set(persisted_tools_map.keys())
 
-        if runtime_names != persisted_names:
-            missing_in_runtime = persisted_names - runtime_names
-            missing_in_persisted = runtime_names - persisted_names
-            error_msg = "Tools don't match between runtime and persisted agents."
-            if missing_in_runtime:
-                error_msg += f" Missing in runtime: {missing_in_runtime}."
-            if missing_in_persisted:
-                error_msg += f" Missing in persisted: {missing_in_persisted}."
-            raise ValueError(error_msg)
+        # Relaxed tool matching: only fail if tools that were actually USED
+        # in history are missing. Allow adding new tools freely.
+        if used_tools is not None:
+            # Only check that used tools are present in runtime
+            missing_used_tools = used_tools - runtime_names
+            if missing_used_tools:
+                raise ValueError(
+                    f"Cannot resume conversation: tools that were used in history "
+                    f"are missing from runtime: {missing_used_tools}. "
+                    f"Available tools: {runtime_names}"
+                )
+            # Update tools to match runtime (allows new tools to be added)
+            updates["tools"] = self.tools
+        else:
+            # Legacy behavior: require exact match when used_tools not provided
+            if runtime_names != persisted_names:
+                missing_in_runtime = persisted_names - runtime_names
+                missing_in_persisted = runtime_names - persisted_names
+                error_msg = "Tools don't match between runtime and persisted agents."
+                if missing_in_runtime:
+                    error_msg += f" Missing in runtime: {missing_in_runtime}."
+                if missing_in_persisted:
+                    error_msg += f" Missing in persisted: {missing_in_persisted}."
+                raise ValueError(error_msg)
 
         reconciled = persisted.model_copy(update=updates)
-        if self.model_dump(exclude_none=True) != reconciled.model_dump(
-            exclude_none=True
-        ):
-            raise ValueError(
-                "The Agent provided is different from the one in persisted state.\n"
-                f"Diff: {pretty_pydantic_diff(self, reconciled)}"
-            )
+
+        # When used_tools is provided, we allow tool changes so skip strict comparison
+        if used_tools is None:
+            if self.model_dump(exclude_none=True) != reconciled.model_dump(
+                exclude_none=True
+            ):
+                raise ValueError(
+                    "The Agent provided is different from the one in persisted state.\n"
+                    f"Diff: {pretty_pydantic_diff(self, reconciled)}"
+                )
         return reconciled
 
     def model_dump_succint(self, **kwargs):
diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
index 7fdf79aa7a..c1fb65f87e 100644
--- a/openhands-sdk/openhands/sdk/conversation/state.py
+++ b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -188,12 +188,23 @@ def create(
                     f"but persisted state has {state.id}"
                 )
 
-            # Reconcile agent config with deserialized one
-            resolved = agent.resolve_diff_from_deserialized(state.agent)
-
-            # Attach runtime handles and commit reconciled agent (may autosave)
+            # Attach event log early so we can read history
             state._fs = file_store
             state._events = EventLog(file_store, dir_path=EVENTS_DIR)
+
+            # Extract tool names that were actually used in history
+            # This allows adding new tools when resuming conversations
+            used_tools: set[str] = set()
+            for event in state._events:
+                if isinstance(event, ActionEvent):
+                    used_tools.add(event.tool_name)
+
+            # Reconcile agent config with deserialized one, passing used tools
+            resolved = agent.resolve_diff_from_deserialized(
+                state.agent, used_tools=used_tools
+            )
+
+            # Commit reconciled agent (may autosave)
             state._autosave_enabled = True
             state.agent = resolved
 
diff --git a/tests/cross/test_agent_reconciliation.py b/tests/cross/test_agent_reconciliation.py
index 7864ca729a..6f6f1e8142 100644
--- a/tests/cross/test_agent_reconciliation.py
+++ b/tests/cross/test_agent_reconciliation.py
@@ -107,12 +107,14 @@ def test_conversation_restarted_with_changed_working_directory(tmp_path_factory)
 
 
 # Tests from test_local_conversation_tools_integration.py
-def test_conversation_with_different_agent_tools_fails():
-    """Test that using an agent with different tools fails (tools must match)."""
-    import pytest
+def test_conversation_allows_removing_unused_tools():
+    """Test that removing tools that weren't used in history is allowed.
 
+    With relaxed tool matching, only tools that were actually USED in the
+    conversation history need to be present. Unused tools can be removed.
+    """
     with tempfile.TemporaryDirectory() as temp_dir:
-        # Create and save conversation with original agent
+        # Create conversation with original agent having 2 tools
         original_tools = [
             Tool(name="TerminalTool"),
             Tool(name="FileEditorTool"),
@@ -128,35 +130,81 @@ def test_conversation_with_different_agent_tools_fails():
             visualizer=None,
         )
 
-        # Send a message to create some state
+        # Send a message but NO tool is used (no ActionEvent in history)
         conversation.send_message(
             Message(role="user", content=[TextContent(text="test message")])
         )
 
-        # Get the conversation ID for reuse
         conversation_id = conversation.state.id
+        del conversation
 
-        # Delete conversation to simulate restart
+        # Resume with only one tool - should succeed since no tools were used
+        reduced_tools = [Tool(name="TerminalTool")]  # Removed FileEditorTool
+        llm2 = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        reduced_agent = Agent(llm=llm2, tools=reduced_tools)
+
+        # This should succeed - FileEditorTool was never used
+        new_conversation = LocalConversation(
+            agent=reduced_agent,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            conversation_id=conversation_id,
+            visualizer=None,
+        )
+        assert new_conversation.id == conversation_id
+
+
+def test_conversation_allows_adding_new_tools():
+    """Test that adding new tools not in the original conversation is allowed.
+
+    With relaxed tool matching, new tools can be added when resuming a
+    conversation without causing failures.
+    """
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create conversation with only one tool
+        original_tools = [Tool(name="TerminalTool")]
+        llm = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        original_agent = Agent(llm=llm, tools=original_tools)
+        conversation = LocalConversation(
+            agent=original_agent,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            visualizer=None,
+        )
+
+        # Send a message (no tools used)
+        conversation.send_message(
+            Message(role="user", content=[TextContent(text="test message")])
+        )
+
+        conversation_id = conversation.state.id
         del conversation
 
-        # Try to create new conversation with different tools (only bash tool)
-        different_tools = [Tool(name="TerminalTool")]  # Missing FileEditorTool
+        # Resume with additional tools - should succeed
+        expanded_tools = [
+            Tool(name="TerminalTool"),
+            Tool(name="FileEditorTool"),  # New tool added
+        ]
         llm2 = LLM(
             model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
         )
-        different_agent = Agent(llm=llm2, tools=different_tools)
-
-        # This should fail - tools must match during reconciliation
-        with pytest.raises(
-            ValueError, match="Tools don't match between runtime and persisted agents"
-        ):
-            LocalConversation(
-                agent=different_agent,
-                workspace=temp_dir,
-                persistence_dir=temp_dir,
-                conversation_id=conversation_id,  # Use same ID to avoid ID mismatch
-                visualizer=None,
-            )
+        expanded_agent = Agent(llm=llm2, tools=expanded_tools)
+
+        # This should succeed - adding new tools is allowed
+        new_conversation = LocalConversation(
+            agent=expanded_agent,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            conversation_id=conversation_id,
+            visualizer=None,
+        )
+        assert new_conversation.id == conversation_id
+        # Verify the new tools are available
+        assert len(new_conversation.agent.tools) == 2
 
 
 def test_conversation_with_same_agent_succeeds():

From 334fe33da39be7dfe5c03735e71f47ee775e3eec Mon Sep 17 00:00:00 2001
From: amankumarpandeyin <priyankapandeykum@gmail.com>
Date: Thu, 1 Jan 2026 22:19:29 +0530
Subject: [PATCH 2/4] perf: add used_tool_names cache for O(1) lookup on resume

Addresses performance concern raised in PR review:
- Added used_tool_names field to ConversationState that persists with state
- Added append_event() method that maintains the cache incrementally
- On resume, reads from persisted cache instead of iterating all events
- This provides O(1) lookup vs O(n) iteration through all events
---
 .../conversation/impl/local_conversation.py   |  4 +-
 .../openhands/sdk/conversation/state.py       | 38 +++++++++++++++----
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
index 984f01f846..ba5d58d45b 100644
--- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
+++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
@@ -136,9 +136,9 @@ def __init__(
             stuck_detection=stuck_detection,
         )
 
-        # Default callback: persist every event to state
+        # Default callback: persist every event to state and update caches
         def _default_callback(e):
-            self._state.events.append(e)
+            self._state.append_event(e)
 
         self._hook_processor = None
         hook_callback = None
diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
index 825f940f9a..ecea585a3a 100644
--- a/openhands-sdk/openhands/sdk/conversation/state.py
+++ b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -120,6 +120,14 @@ class ConversationState(OpenHandsModel):
         serialization_alias="secret_registry",
     )
 
+    # Cache of tool names that have been used in this conversation
+    # This is incrementally updated when events are appended and persisted
+    # with the state, avoiding O(n) iteration on resume
+    used_tool_names: set[str] = Field(
+        default_factory=set,
+        description="Set of tool names that have been used in this conversation",
+    )
+
     # ===== Private attrs (NOT Fields) =====
     _fs: FileStore = PrivateAttr()  # filestore for persistence
     _events: EventLog = PrivateAttr()  # now the storage for events
@@ -138,6 +146,25 @@ class ConversationState(OpenHandsModel):
     def events(self) -> EventLog:
         return self._events
 
+    def append_event(self, event: Event) -> None:
+        """Append an event to the event log and update caches.
+
+        This method should be preferred over direct events.append() calls
+        as it maintains the used_tool_names cache for efficient conversation
+        resume operations.
+
+        Args:
+            event: The event to append to the log.
+        """
+        self._events.append(event)
+
+        # Update used_tool_names cache for ActionEvents
+        if isinstance(event, ActionEvent):
+            tool_name = event.tool_name
+            if tool_name not in self.used_tool_names:
+                # Use assignment to trigger autosave via __setattr__
+                self.used_tool_names = self.used_tool_names | {tool_name}
+
     @property
     def env_observation_persistence_dir(self) -> str | None:
         """Directory for persisting environment observation files."""
@@ -204,16 +231,13 @@ def create(
             state._fs = file_store
             state._events = EventLog(file_store, dir_path=EVENTS_DIR)
 
-            # Extract tool names that were actually used in history
-            # This allows adding new tools when resuming conversations
-            used_tools: set[str] = set()
-            for event in state._events:
-                if isinstance(event, ActionEvent):
-                    used_tools.add(event.tool_name)
+            # Use persisted used_tool_names cache instead of iterating all events
+            # This provides O(1) lookup on resume vs O(n) iteration
+            # The cache is incrementally updated via append_event()
 
             # Reconcile agent config with deserialized one, passing used tools
             resolved = agent.resolve_diff_from_deserialized(
-                state.agent, used_tools=used_tools
+                state.agent, used_tools=state.used_tool_names
             )
 
             # Commit reconciled agent (may autosave)

From 374a5664d35dcdb04b0367672dbaaadef2fc5205 Mon Sep 17 00:00:00 2001
From: amankumarpandeyin <priyankapandeykum@gmail.com>
Date: Sat, 3 Jan 2026 16:14:38 +0530
Subject: [PATCH 3/4] fix: address code review feedback on tool matching

- Fix agent reconciliation: validate all config except tools when used_tools provided
- Add cache backfill for old conversations without used_tool_names
- Sort tool names in error messages for deterministic output
- Remove unused runtime_tools_map/persisted_tools_map variables
- Add test for core requirement: tools used in history must exist
---
 openhands-sdk/openhands/sdk/agent/base.py     | 41 +++++++-------
 .../openhands/sdk/conversation/state.py       | 24 ++++++---
 tests/cross/test_agent_reconciliation.py      | 53 +++++++++++++++++++
 3 files changed, 91 insertions(+), 27 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/base.py b/openhands-sdk/openhands/sdk/agent/base.py
index 8caeb3b726..05c49a20e0 100644
--- a/openhands-sdk/openhands/sdk/agent/base.py
+++ b/openhands-sdk/openhands/sdk/agent/base.py
@@ -349,12 +349,9 @@ def resolve_diff_from_deserialized(
         if self.agent_context is not None:
             updates["agent_context"] = self.agent_context
 
-        # Create maps by tool name for easy lookup
-        runtime_tools_map = {tool.name: tool for tool in self.tools}
-        persisted_tools_map = {tool.name: tool for tool in persisted.tools}
-
-        runtime_names = set(runtime_tools_map.keys())
-        persisted_names = set(persisted_tools_map.keys())
+        # Get tool names for comparison
+        runtime_names = {tool.name for tool in self.tools}
+        persisted_names = {tool.name for tool in persisted.tools}
 
         # Relaxed tool matching: only fail if tools that were actually USED
         # in history are missing. Allow adding new tools freely.
@@ -364,8 +361,8 @@ def resolve_diff_from_deserialized(
             if missing_used_tools:
                 raise ValueError(
                     f"Cannot resume conversation: tools that were used in history "
-                    f"are missing from runtime: {missing_used_tools}. "
-                    f"Available tools: {runtime_names}"
+                    f"are missing from runtime: {sorted(missing_used_tools)}. "
+                    f"Available tools: {sorted(runtime_names)}"
                 )
             # Update tools to match runtime (allows new tools to be added)
             updates["tools"] = self.tools
@@ -376,22 +373,28 @@ def resolve_diff_from_deserialized(
                 missing_in_persisted = runtime_names - persisted_names
                 error_msg = "Tools don't match between runtime and persisted agents."
                 if missing_in_runtime:
-                    error_msg += f" Missing in runtime: {missing_in_runtime}."
+                    error_msg += f" Missing in runtime: {sorted(missing_in_runtime)}."
                 if missing_in_persisted:
-                    error_msg += f" Missing in persisted: {missing_in_persisted}."
+                    error_msg += (
+                        f" Missing in persisted: {sorted(missing_in_persisted)}."
+                    )
                 raise ValueError(error_msg)
 
         reconciled = persisted.model_copy(update=updates)
 
-        # When used_tools is provided, we allow tool changes so skip strict comparison
-        if used_tools is None:
-            if self.model_dump(exclude_none=True) != reconciled.model_dump(
-                exclude_none=True
-            ):
-                raise ValueError(
-                    "The Agent provided is different from the one in persisted state.\n"
-                    f"Diff: {pretty_pydantic_diff(self, reconciled)}"
-                )
+        # Validate agent equality - when used_tools is provided, we exclude tools
+        # from comparison since we already validated tool requirements above
+        exclude_fields = {"tools"} if used_tools is not None else set()
+        self_dump = self.model_dump(exclude_none=True, exclude=exclude_fields)
+        reconciled_dump = reconciled.model_dump(
+            exclude_none=True, exclude=exclude_fields
+        )
+
+        if self_dump != reconciled_dump:
+            raise ValueError(
+                "The Agent provided is different from the one in persisted state.\n"
+                f"Diff: {pretty_pydantic_diff(self, reconciled)}"
+            )
         return reconciled
 
     def model_dump_succint(self, **kwargs):
diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
index ecea585a3a..319dcf4f06 100644
--- a/openhands-sdk/openhands/sdk/conversation/state.py
+++ b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -159,11 +159,10 @@ def append_event(self, event: Event) -> None:
         self._events.append(event)
 
         # Update used_tool_names cache for ActionEvents
-        if isinstance(event, ActionEvent):
-            tool_name = event.tool_name
-            if tool_name not in self.used_tool_names:
+        if isinstance(event, ActionEvent) and event.tool_name:
+            if event.tool_name not in self.used_tool_names:
                 # Use assignment to trigger autosave via __setattr__
-                self.used_tool_names = self.used_tool_names | {tool_name}
+                self.used_tool_names = self.used_tool_names | {event.tool_name}
 
     @property
     def env_observation_persistence_dir(self) -> str | None:
@@ -231,13 +230,22 @@ def create(
             state._fs = file_store
             state._events = EventLog(file_store, dir_path=EVENTS_DIR)
 
-            # Use persisted used_tool_names cache instead of iterating all events
-            # This provides O(1) lookup on resume vs O(n) iteration
-            # The cache is incrementally updated via append_event()
+            # Use persisted used_tool_names cache for O(1) lookup on resume.
+            # If the cache is empty but there are events, we need to backfill
+            # for backward compatibility with old persisted conversations.
+            used_tools = state.used_tool_names
+            if not used_tools and len(state._events) > 0:
+                # Backfill cache from event history (O(n) but only once)
+                logger.info("Backfilling used_tool_names cache from event history...")
+                for event in state._events:
+                    if isinstance(event, ActionEvent) and event.tool_name:
+                        used_tools = used_tools | {event.tool_name}
+                # Persist backfilled cache
+                state.used_tool_names = used_tools
 
             # Reconcile agent config with deserialized one, passing used tools
             resolved = agent.resolve_diff_from_deserialized(
-                state.agent, used_tools=state.used_tool_names
+                state.agent, used_tools=used_tools
             )
 
             # Commit reconciled agent (may autosave)
diff --git a/tests/cross/test_agent_reconciliation.py b/tests/cross/test_agent_reconciliation.py
index 6f6f1e8142..881c56179a 100644
--- a/tests/cross/test_agent_reconciliation.py
+++ b/tests/cross/test_agent_reconciliation.py
@@ -207,6 +207,59 @@ def test_conversation_allows_adding_new_tools():
         assert len(new_conversation.agent.tools) == 2
 
 
+def test_conversation_fails_when_used_tool_is_missing():
+    """Test that removing a tool that WAS used in history fails.
+
+    This is the core correctness requirement: if a tool was actually used
+    in the conversation history, it MUST be present when resuming.
+    """
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create conversation with two tools
+        original_tools = [
+            Tool(name="TerminalTool"),
+            Tool(name="FileEditorTool"),
+        ]
+        llm = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        original_agent = Agent(llm=llm, tools=original_tools)
+        conversation = LocalConversation(
+            agent=original_agent,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            visualizer=None,
+        )
+
+        # Initialize the agent to get actual tool definitions
+        conversation.agent.init_state(conversation.state, lambda e: None)
+
+        # Simulate that TerminalTool was used by recording it in used_tool_names
+        # In real usage this happens via state.append_event() with ActionEvents
+        conversation.state.used_tool_names = {"TerminalTool"}
+
+        conversation_id = conversation.state.id
+        del conversation
+
+        # Try to resume WITHOUT TerminalTool - should fail
+        reduced_tools = [Tool(name="FileEditorTool")]  # Missing TerminalTool
+        llm2 = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        reduced_agent = Agent(llm=llm2, tools=reduced_tools)
+
+        # This should raise - TerminalTool was used in history but is now missing
+        import pytest
+
+        with pytest.raises(ValueError, match="tools that were used in history"):
+            LocalConversation(
+                agent=reduced_agent,
+                workspace=temp_dir,
+                persistence_dir=temp_dir,
+                conversation_id=conversation_id,
+                visualizer=None,
+            )
+
+
 def test_conversation_with_same_agent_succeeds():
     """Test that using the same agent configuration succeeds."""
     with tempfile.TemporaryDirectory() as temp_dir:

From 3744de617f06fbfc2a9415b4dc5aadc56fe215ce Mon Sep 17 00:00:00 2001
From: amankumarpandeyin <priyankapandeykum@gmail.com>
Date: Sat, 3 Jan 2026 21:55:58 +0530
Subject: [PATCH 4/4] refactor: simplify tool matching per reviewer feedback

- Remove used_tool_names cache and append_event() method
- EventLog behaves like regular list again (per @tofarr's feedback)
- Scan events on-the-fly only when tool names don't match
- O(1) for happy path (same tools), O(n) fallback when tools change
---
 openhands-sdk/openhands/sdk/agent/base.py     | 57 +++++++++++--------
 .../conversation/impl/local_conversation.py   |  4 +-
 .../openhands/sdk/conversation/state.py       | 45 +--------------
 tests/cross/test_agent_reconciliation.py      | 21 ++++++-
 4 files changed, 55 insertions(+), 72 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/base.py b/openhands-sdk/openhands/sdk/agent/base.py
index 05c49a20e0..1bf2bbb166 100644
--- a/openhands-sdk/openhands/sdk/agent/base.py
+++ b/openhands-sdk/openhands/sdk/agent/base.py
@@ -2,7 +2,7 @@
 import re
 import sys
 from abc import ABC, abstractmethod
-from collections.abc import Generator, Iterable
+from collections.abc import Generator, Iterable, Sequence
 from concurrent.futures import ThreadPoolExecutor
 from typing import TYPE_CHECKING, Any
 
@@ -303,7 +303,7 @@ def step(
     def resolve_diff_from_deserialized(
         self,
         persisted: "AgentBase",
-        used_tools: set[str] | None = None,
+        events: "Sequence[Any] | None" = None,
     ) -> "AgentBase":
         """
         Return a new AgentBase instance equivalent to `persisted` but with
@@ -311,10 +311,8 @@ def resolve_diff_from_deserialized(
 
         Args:
             persisted: The persisted agent from the conversation state.
-            used_tools: Optional set of tool names that were actually used in
-                the conversation history. If provided, only these tools are
-                required to be present in the runtime agent. New tools can be
-                added freely.
+            events: Optional event sequence to scan for used tools if tool
+                names don't match. Only scanned when needed (O(n) fallback).
         """
         if persisted.__class__ is not self.__class__:
             raise ValueError(
@@ -353,10 +351,22 @@ def resolve_diff_from_deserialized(
         runtime_names = {tool.name for tool in self.tools}
         persisted_names = {tool.name for tool in persisted.tools}
 
-        # Relaxed tool matching: only fail if tools that were actually USED
-        # in history are missing. Allow adding new tools freely.
-        if used_tools is not None:
-            # Only check that used tools are present in runtime
+        # If tool names match exactly, no need to check event history
+        if runtime_names == persisted_names:
+            # Tools unchanged, proceed normally
+            pass
+        elif events is not None:
+            # Tool names differ - scan events to find which tools were actually used
+            # This is O(n) but only happens when tools change
+            from openhands.sdk.event import ActionEvent
+
+            used_tools = {
+                event.tool_name
+                for event in events
+                if isinstance(event, ActionEvent) and event.tool_name
+            }
+
+            # Only require tools that were actually used in history
             missing_used_tools = used_tools - runtime_names
             if missing_used_tools:
                 raise ValueError(
@@ -367,24 +377,21 @@ def resolve_diff_from_deserialized(
             # Update tools to match runtime (allows new tools to be added)
             updates["tools"] = self.tools
         else:
-            # Legacy behavior: require exact match when used_tools not provided
-            if runtime_names != persisted_names:
-                missing_in_runtime = persisted_names - runtime_names
-                missing_in_persisted = runtime_names - persisted_names
-                error_msg = "Tools don't match between runtime and persisted agents."
-                if missing_in_runtime:
-                    error_msg += f" Missing in runtime: {sorted(missing_in_runtime)}."
-                if missing_in_persisted:
-                    error_msg += (
-                        f" Missing in persisted: {sorted(missing_in_persisted)}."
-                    )
-                raise ValueError(error_msg)
+            # No events provided - strict matching (legacy behavior)
+            missing_in_runtime = persisted_names - runtime_names
+            missing_in_persisted = runtime_names - persisted_names
+            error_msg = "Tools don't match between runtime and persisted agents."
+            if missing_in_runtime:
+                error_msg += f" Missing in runtime: {sorted(missing_in_runtime)}."
+            if missing_in_persisted:
+                error_msg += f" Missing in persisted: {sorted(missing_in_persisted)}."
+            raise ValueError(error_msg)
 
         reconciled = persisted.model_copy(update=updates)
 
-        # Validate agent equality - when used_tools is provided, we exclude tools
-        # from comparison since we already validated tool requirements above
-        exclude_fields = {"tools"} if used_tools is not None else set()
+        # Validate agent equality - exclude tools from comparison since we
+        # already validated tool requirements above
+        exclude_fields = {"tools"} if events is not None else set()
         self_dump = self.model_dump(exclude_none=True, exclude=exclude_fields)
         reconciled_dump = reconciled.model_dump(
             exclude_none=True, exclude=exclude_fields
diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
index ba5d58d45b..984f01f846 100644
--- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
+++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
@@ -136,9 +136,9 @@ def __init__(
             stuck_detection=stuck_detection,
         )
 
-        # Default callback: persist every event to state and update caches
+        # Default callback: persist every event to state
         def _default_callback(e):
-            self._state.append_event(e)
+            self._state.events.append(e)
 
         self._hook_processor = None
         hook_callback = None
diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
index 319dcf4f06..374b9873a7 100644
--- a/openhands-sdk/openhands/sdk/conversation/state.py
+++ b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -120,14 +120,6 @@ class ConversationState(OpenHandsModel):
         serialization_alias="secret_registry",
     )
 
-    # Cache of tool names that have been used in this conversation
-    # This is incrementally updated when events are appended and persisted
-    # with the state, avoiding O(n) iteration on resume
-    used_tool_names: set[str] = Field(
-        default_factory=set,
-        description="Set of tool names that have been used in this conversation",
-    )
-
     # ===== Private attrs (NOT Fields) =====
     _fs: FileStore = PrivateAttr()  # filestore for persistence
     _events: EventLog = PrivateAttr()  # now the storage for events
@@ -141,29 +133,10 @@ class ConversationState(OpenHandsModel):
         default_factory=FIFOLock
     )  # FIFO lock for thread safety
 
-    # ===== Public "events" facade (Sequence[Event]) =====
     @property
     def events(self) -> EventLog:
         return self._events
 
-    def append_event(self, event: Event) -> None:
-        """Append an event to the event log and update caches.
-
-        This method should be preferred over direct events.append() calls
-        as it maintains the used_tool_names cache for efficient conversation
-        resume operations.
-
-        Args:
-            event: The event to append to the log.
-        """
-        self._events.append(event)
-
-        # Update used_tool_names cache for ActionEvents
-        if isinstance(event, ActionEvent) and event.tool_name:
-            if event.tool_name not in self.used_tool_names:
-                # Use assignment to trigger autosave via __setattr__
-                self.used_tool_names = self.used_tool_names | {event.tool_name}
-
     @property
     def env_observation_persistence_dir(self) -> str | None:
         """Directory for persisting environment observation files."""
@@ -230,22 +203,10 @@ def create(
             state._fs = file_store
             state._events = EventLog(file_store, dir_path=EVENTS_DIR)
 
-            # Use persisted used_tool_names cache for O(1) lookup on resume.
-            # If the cache is empty but there are events, we need to backfill
-            # for backward compatibility with old persisted conversations.
-            used_tools = state.used_tool_names
-            if not used_tools and len(state._events) > 0:
-                # Backfill cache from event history (O(n) but only once)
-                logger.info("Backfilling used_tool_names cache from event history...")
-                for event in state._events:
-                    if isinstance(event, ActionEvent) and event.tool_name:
-                        used_tools = used_tools | {event.tool_name}
-                # Persist backfilled cache
-                state.used_tool_names = used_tools
-
-            # Reconcile agent config with deserialized one, passing used tools
+            # Reconcile agent config with deserialized one
+            # Pass event log so tool usage can be checked on-the-fly if needed
             resolved = agent.resolve_diff_from_deserialized(
-                state.agent, used_tools=used_tools
+                state.agent, events=state._events
             )
 
             # Commit reconciled agent (may autosave)
diff --git a/tests/cross/test_agent_reconciliation.py b/tests/cross/test_agent_reconciliation.py
index 881c56179a..1696fffdb6 100644
--- a/tests/cross/test_agent_reconciliation.py
+++ b/tests/cross/test_agent_reconciliation.py
@@ -213,6 +213,8 @@ def test_conversation_fails_when_used_tool_is_missing():
     This is the core correctness requirement: if a tool was actually used
     in the conversation history, it MUST be present when resuming.
     """
+    from openhands.sdk.event import ActionEvent
+
     with tempfile.TemporaryDirectory() as temp_dir:
         # Create conversation with two tools
         original_tools = [
@@ -233,9 +235,22 @@ def test_conversation_fails_when_used_tool_is_missing():
         # Initialize the agent to get actual tool definitions
         conversation.agent.init_state(conversation.state, lambda e: None)
 
-        # Simulate that TerminalTool was used by recording it in used_tool_names
-        # In real usage this happens via state.append_event() with ActionEvents
-        conversation.state.used_tool_names = {"TerminalTool"}
+        # Simulate that TerminalTool was used by adding an ActionEvent
+        from openhands.sdk.llm import MessageToolCall, TextContent
+
+        action_event = ActionEvent(
+            tool_name="TerminalTool",
+            tool_call_id="test-call-1",
+            thought=[TextContent(text="Running a command")],
+            tool_call=MessageToolCall(
+                id="test-call-1",
+                name="TerminalTool",
+                arguments="{}",
+                origin="completion",
+            ),
+            llm_response_id="test-response-1",
+        )
+        conversation.state.events.append(action_event)
 
         conversation_id = conversation.state.id
         del conversation