OpenHands · enyst · Oct 20, 2025 · Oct 20, 2025 · Dec 29, 2025 · Dec 29, 2025
diff --git a/.gitignore b/.gitignore
@@ -203,6 +203,7 @@ cache
 /workspace/
 openapi.json
 .client/
+/.agent_server_llm_switch_demo/
 # Local workspace files
 .beads/*.db
 .worktrees/

diff --git a/docs/llm_switching.md b/docs/llm_switching.md
@@ -0,0 +1,56 @@
+# Runtime LLM Switching (SDK + agent-server)
+
+This repo supports **runtime LLM switching** without any “agent immutability” or
+resume-time diff enforcement. The guiding principle is:
+
+- An `Agent` is a *composition* of (effectively) immutable components (`LLM`,
+  `AgentContext`, etc.).
+- The composition is **switchable**: at runtime the conversation can replace
+  components (currently the agent’s primary `LLM`) and persist the change.
+
+## Persistence model (single rule)
+
+Conversation snapshots (`base_state.json`) persist the agent’s LLM as:
+
+- `{"profile_id": "<id>"}` when `LLM.profile_id` is present
+- a full inline LLM payload when `LLM.profile_id` is absent
+
+Snapshots are written with `context={"expose_secrets": True}` so inline LLMs can
+restore without “reconciliation” against a runtime agent.
+
+## SDK API
+
+Local conversations support two LLM update paths:
+
+- `LocalConversation.switch_llm(profile_id: str)`:
+  loads `<profile_id>.json` from the registry’s profile dir and swaps the active
+  LLM for the agent’s `usage_id`.
+- `LocalConversation.set_llm(llm: LLM)`:
+  replaces the active LLM instance for the agent’s `usage_id` (useful for remote
+  clients that can’t rely on server-side profile files).
+
+Both are persisted immediately via the conversation’s base state snapshot.
+
+## agent-server API
+
+For a running (or paused/idle) conversation:
+
+- `POST /api/conversations/{conversation_id}/llm`
+  - `{"profile_id": "<id>"}`: switch via server-side profile loading
+  - `{"llm": {...}}`: set an inline LLM payload (client-supplied config)
+
+There is also a convenience alias:
+
+- `POST /api/conversations/{conversation_id}/llm/switch`
+  - `{"profile_id": "<id>"}`
+
+## Remote clients (VS Code extension)
+
+VS Code LLM Profiles are **local-only**. The recommended remote flow is:
+
+1. Resolve `profileId` locally to an LLM configuration.
+2. Start the conversation with an expanded `agent.llm` payload (no `profile_id`).
+3. On profile changes, call `POST /api/conversations/{id}/llm` with
+   `{"llm": <expanded payload>}` so the server persists the new LLM.
+4. On restore, the server’s persisted LLM is the source of truth; the client
+   can re-apply its selected profile before triggering a new run if desired.
diff --git a/examples/01_standalone_sdk/26_runtime_llm_switch.py b/examples/01_standalone_sdk/26_runtime_llm_switch.py
@@ -22,10 +22,7 @@
 api_key = os.getenv("LLM_API_KEY")
 assert api_key is not None, "LLM_API_KEY environment variable is not set."
 
-# 2. Disable inline conversations so profile references are stored instead
-os.environ.setdefault("OPENHANDS_INLINE_CONVERSATIONS", "false")
-
-# 3. Profiles live under ~/.openhands/llm-profiles by default. We create two
+# 2. Profiles live under ~/.openhands/llm-profiles by default. We create two
 #    variants that share the same usage_id so they can be swapped at runtime.
 registry = LLMRegistry()
 usage_id = "support-agent"
@@ -123,29 +120,3 @@
 reloaded.run()
 
 print("Reloaded run finished with profile:", reloaded.state.agent.llm.profile_id)
-
-# ---------------------------------------------------------------------------
-# Part 2: Inline persistence rejects runtime switching
-# ---------------------------------------------------------------------------
-# When OPENHANDS_INLINE_CONVERSATIONS is true the conversation persists full
-# LLM payloads instead of profile references. Switching profiles would break
-# the diff reconciliation step, so the SDK deliberately rejects it with a
-# RuntimeError. We demonstrate that behaviour below.
-os.environ["OPENHANDS_INLINE_CONVERSATIONS"] = "true"
-
-inline_persistence_dir = Path("./.conversations_switch_demo_inline").resolve()
-inline_agent = Agent(llm=registry.load_profile(base_profile_id), tools=[])
-inline_conversation = Conversation(
-    agent=inline_agent,
-    workspace=str(workspace_dir),
-    persistence_dir=str(inline_persistence_dir),
-    conversation_id=uuid.uuid4(),
-    visualizer=None,
-)
-
-try:
-    inline_conversation.switch_llm(alt_profile_id)
-except RuntimeError as exc:
-    print("Inline mode switch attempt rejected as expected:", exc)
-else:
-    raise AssertionError("Inline mode should have rejected the LLM switch")
diff --git a/examples/02_remote_agent_server/07_llm_switch_and_restore.py b/examples/02_remote_agent_server/07_llm_switch_and_restore.py
@@ -0,0 +1,183 @@
+"""Demonstrate agent-server LLM switching + persistence across restart.
+
+This script:
+1) Starts a local Python agent-server with a dedicated conversations directory.
+2) Creates a conversation (without running it).
+3) Switches the conversation's active LLM via `POST /api/conversations/{id}/llm`.
+4) Restarts the agent-server and verifies the switched LLM persists on restore.
+
+The switch uses an inline LLM payload, which is the recommended path for remote
+clients whose "profiles" are local-only (e.g. the VS Code extension).
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import socket
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+import httpx
+
+
+def _find_free_port() -> int:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+        sock.bind(("127.0.0.1", 0))
+        sock.listen(1)
+        return int(sock.getsockname()[1])
+
+
+def _wait_for_health(base_url: str, timeout_seconds: float = 30.0) -> None:
+    deadline = time.time() + timeout_seconds
+    while time.time() < deadline:
+        try:
+            response = httpx.get(f"{base_url}/health", timeout=1.0)
+            if response.status_code == 200:
+                return
+        except Exception:
+            pass
+        time.sleep(0.25)
+    raise RuntimeError(
+        f"Timed out waiting for agent-server health at {base_url}/health"
+    )
+
+
+def _start_agent_server(
+    *, conversations_path: Path
+) -> tuple[subprocess.Popen[str], str]:
+    port = _find_free_port()
+    base_url = f"http://127.0.0.1:{port}"
+
+    env = {
+        **os.environ,
+        "PYTHONUNBUFFERED": "1",
+        "OH_ENABLE_VSCODE": "0",
+        "OH_ENABLE_VNC": "0",
+        "OH_PRELOAD_TOOLS": "0",
+        "SESSION_API_KEY": "",
+        "OH_CONVERSATIONS_PATH": str(conversations_path),
+    }
+
+    proc = subprocess.Popen(
+        [
+            sys.executable,
+            "-m",
+            "openhands.agent_server",
+            "--host",
+            "127.0.0.1",
+            "--port",
+            str(port),
+        ],
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+    )
+    try:
+        _wait_for_health(base_url)
+    except Exception:
+        try:
+            output = (proc.stdout.read() if proc.stdout else "") or ""
+        except Exception:
+            output = ""
+        proc.terminate()
+        raise RuntimeError(f"agent-server failed to start.\n\n{output}") from None
+
+    return proc, base_url
+
+
+def _stop_agent_server(proc: subprocess.Popen[str]) -> None:
+    proc.terminate()
+    try:
+        proc.wait(timeout=5)
+    except subprocess.TimeoutExpired:
+        proc.kill()
+        proc.wait(timeout=5)
+
+
+def main() -> None:
+    root = Path(".agent_server_llm_switch_demo").resolve()
+    conversations_path = root / "conversations"
+    workspace_path = root / "workspace"
+    conversations_path.mkdir(parents=True, exist_ok=True)
+    workspace_path.mkdir(parents=True, exist_ok=True)
+
+    proc_1, base_1 = _start_agent_server(conversations_path=conversations_path)
+    conversation_id: str
+    try:
+        print("agent-server #1:", base_1)
+
+        create = httpx.post(
+            f"{base_1}/api/conversations",
+            json={
+                "agent": {
+                    "llm": {
+                        "usage_id": "agent",
+                        "model": "test-provider/original",
+                        "api_key": "test-key",
+                    },
+                    "tools": [],
+                },
+                "workspace": {"working_dir": str(workspace_path)},
+            },
+            timeout=10.0,
+        )
+        create.raise_for_status()
+        conversation_id = create.json()["id"]
+        print("conversation id:", conversation_id)
+
+        update = httpx.post(
+            f"{base_1}/api/conversations/{conversation_id}/llm",
+            json={
+                "llm": {
+                    "usage_id": "ignored-by-server",
+                    "model": "test-provider/alternate",
+                    "api_key": "test-key-2",
+                }
+            },
+            timeout=10.0,
+        )
+        update.raise_for_status()
+
+        info = httpx.get(
+            f"{base_1}/api/conversations/{conversation_id}",
+            timeout=10.0,
+        )
+        info.raise_for_status()
+        current_model = info.json()["agent"]["llm"]["model"]
+        print("server #1 model:", current_model)
+        if current_model != "test-provider/alternate":
+            raise RuntimeError("LLM switch did not apply on server #1")
+    finally:
+        _stop_agent_server(proc_1)
+
+    proc_2, base_2 = _start_agent_server(conversations_path=conversations_path)
+    try:
+        print("agent-server #2:", base_2)
+        restored = httpx.get(
+            f"{base_2}/api/conversations/{conversation_id}",
+            timeout=10.0,
+        )
+        restored.raise_for_status()
+        restored_model = restored.json()["agent"]["llm"]["model"]
+        print("server #2 restored model:", restored_model)
+        if restored_model != "test-provider/alternate":
+            raise RuntimeError("LLM switch did not persist across restart")
+    finally:
+        _stop_agent_server(proc_2)
+
+    print("✓ LLM switch persisted across agent-server restart")
+
+    base_state = (
+        conversations_path / conversation_id.replace("-", "") / "base_state.json"
+    )
+    if base_state.exists():
+        payload = json.loads(base_state.read_text(encoding="utf-8"))
+        print("base_state.json agent.llm:", payload.get("agent", {}).get("llm"))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/openhands-agent-server/openhands/agent_server/conversation_router.py b/openhands-agent-server/openhands/agent_server/conversation_router.py
@@ -21,6 +21,8 @@
     SetSecurityAnalyzerRequest,
     StartConversationRequest,
     Success,
+    SwitchLLMProfileRequest,
+    UpdateConversationLLMRequest,
     UpdateConversationRequest,
     UpdateSecretsRequest,
 )
@@ -254,6 +256,44 @@ async def set_conversation_security_analyzer(
     return Success()
 
 
+@conversation_router.post(
+    "/{conversation_id}/llm/switch",
+    responses={404: {"description": "Item not found"}},
+)
+async def switch_conversation_llm(
+    conversation_id: UUID,
+    request: SwitchLLMProfileRequest,
+    conversation_service: ConversationService = Depends(get_conversation_service),
+) -> Success:
+    """Switch the conversation's active agent LLM profile for future requests."""
+    event_service = await conversation_service.get_event_service(conversation_id)
+    if event_service is None:
+        raise HTTPException(status.HTTP_404_NOT_FOUND)
+    await event_service.switch_llm(request.profile_id)
+    return Success()
+
+
+@conversation_router.post(
+    "/{conversation_id}/llm",
+    responses={404: {"description": "Item not found"}},
+)
+async def update_conversation_llm(
+    conversation_id: UUID,
+    request: UpdateConversationLLMRequest,
+    conversation_service: ConversationService = Depends(get_conversation_service),
+) -> Success:
+    """Update the conversation's active agent LLM for future requests."""
+    event_service = await conversation_service.get_event_service(conversation_id)
+    if event_service is None:
+        raise HTTPException(status.HTTP_404_NOT_FOUND)
+    if request.profile_id is not None:
+        await event_service.switch_llm(request.profile_id)
+    else:
+        assert request.llm is not None
+        await event_service.set_llm(request.llm)
+    return Success()
+
+
 @conversation_router.patch(
     "/{conversation_id}", responses={404: {"description": "Item not found"}}
 )

diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py
@@ -543,6 +543,32 @@ async def set_security_analyzer(
             None, self._conversation.set_security_analyzer, security_analyzer
         )
 
+    async def _update_llm(self, update_fn, *args) -> None:
+        """Apply an LLM update and re-wire telemetry for future completions."""
+        conversation = self._conversation
+        if conversation is None:
+            raise ValueError("inactive_service")
+        loop = asyncio.get_running_loop()
+        await loop.run_in_executor(None, update_fn, *args)
+        # The agent may now hold a new LLM instance; re-wire telemetry callbacks so
+        # clients continue receiving logs/stats for future completions.
+        self._setup_llm_log_streaming(conversation.agent)
+        self._setup_stats_streaming(conversation.agent)
+
+    async def switch_llm(self, profile_id: str) -> None:
+        """Switch the conversation's active agent LLM to the given profile."""
+        conversation = self._conversation
+        if conversation is None:
+            raise ValueError("inactive_service")
+        await self._update_llm(conversation.switch_llm, profile_id)
+
+    async def set_llm(self, llm: LLM) -> None:
+        """Replace the conversation's active agent LLM instance."""
+        conversation = self._conversation
+        if conversation is None:
+            raise ValueError("inactive_service")
+        await self._update_llm(conversation.set_llm, llm)
+
     async def close(self):
         await self._pub_sub.close()
         if self._conversation: