From 8ae815378352e896d4e9a754b3ffd9d9fa08e472 Mon Sep 17 00:00:00 2001 From: gabmfranco-ds Date: Mon, 23 Mar 2026 09:58:04 -0300 Subject: [PATCH 1/2] fix: preserve session on timeout instead of destroying it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Timeouts are transient — the Claude session is still valid on the CLI side. Previously, any error during resume (including timeout) would delete the session and start fresh, causing the bot to lose all conversation context. Now ClaudeTimeoutError is caught separately and the session is kept intact so auto-resume works on the next message. Also bumps default timeout from 300s to 1200s for long-running operations. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/claude/facade.py | 16 ++++++++++++++++ src/utils/constants.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/claude/facade.py b/src/claude/facade.py index 5c7276eb..838d423f 100644 --- a/src/claude/facade.py +++ b/src/claude/facade.py @@ -4,12 +4,14 @@ """ import asyncio +from datetime import UTC, datetime from pathlib import Path from typing import Any, Callable, Dict, List, Optional import structlog from ..config.settings import Settings +from .exceptions import ClaudeTimeoutError from .sdk_integration import ClaudeResponse, ClaudeSDKManager, StreamUpdate from .session import SessionManager @@ -89,6 +91,20 @@ async def run_command( stream_callback=on_stream, interrupt_event=interrupt_event, ) + except ClaudeTimeoutError: + # Timeout is transient — the session is likely still valid on + # Claude's side. Preserve it so the next message can resume. + # Touch last_used so the session doesn't expire while the user + # is actively trying to use it. + if session.session_id: + session.last_used = datetime.now(UTC) + await self.session_manager.storage.save_session(session) + logger.warning( + "Claude timed out, preserving session for next attempt", + session_id=claude_session_id, + user_id=user_id, + ) + raise except Exception as resume_error: # If resume failed (e.g., session expired/missing on Claude's side), # retry as a fresh session. The CLI returns a generic exit-code-1 diff --git a/src/utils/constants.py b/src/utils/constants.py index 5ea9a4c3..0399e107 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -5,7 +5,7 @@ APP_DESCRIPTION = "Telegram bot for remote Claude Code access" # Default limits -DEFAULT_CLAUDE_TIMEOUT_SECONDS = 300 +DEFAULT_CLAUDE_TIMEOUT_SECONDS = 1200 DEFAULT_CLAUDE_MAX_TURNS = 10 DEFAULT_CLAUDE_MAX_COST_PER_USER = 10.0 DEFAULT_CLAUDE_MAX_COST_PER_REQUEST = 5.0 From 8f863b459edb1a4fdc3d834e41c4b396c2a94fc5 Mon Sep 17 00:00:00 2001 From: gabmfranco-ds Date: Mon, 23 Mar 2026 11:40:52 -0300 Subject: [PATCH 2/2] fix: only destroy session when CLI confirms it is gone Previously, any error during session resume (timeout, process crash, connection issue) would destroy the session and start fresh. Now the session is only destroyed when the error message explicitly says the session is gone ("session not found", "invalid session", etc.). All other errors preserve the session and touch last_used so auto-resume works on the next user message. Also updates .env CLAUDE_TIMEOUT_SECONDS from 600 to 1200 (the constants.py default is overridden by the env var). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/claude/facade.py | 51 ++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/claude/facade.py b/src/claude/facade.py index 838d423f..3ccdea98 100644 --- a/src/claude/facade.py +++ b/src/claude/facade.py @@ -11,7 +11,6 @@ import structlog from ..config.settings import Settings -from .exceptions import ClaudeTimeoutError from .sdk_integration import ClaudeResponse, ClaudeSDKManager, StreamUpdate from .session import SessionManager @@ -91,34 +90,28 @@ async def run_command( stream_callback=on_stream, interrupt_event=interrupt_event, ) - except ClaudeTimeoutError: - # Timeout is transient — the session is likely still valid on - # Claude's side. Preserve it so the next message can resume. - # Touch last_used so the session doesn't expire while the user - # is actively trying to use it. - if session.session_id: - session.last_used = datetime.now(UTC) - await self.session_manager.storage.save_session(session) - logger.warning( - "Claude timed out, preserving session for next attempt", - session_id=claude_session_id, - user_id=user_id, + except Exception as exec_error: + # Decide whether to destroy the session or preserve it. + # Timeouts and most process errors are transient — the + # Claude session likely still exists. Only destroy when + # the error clearly indicates the session is gone. + _SESSION_GONE_HINTS = ( + "session not found", + "invalid session", + "session expired", + "no such session", ) - raise - except Exception as resume_error: - # If resume failed (e.g., session expired/missing on Claude's side), - # retry as a fresh session. The CLI returns a generic exit-code-1 - # when the session is gone, so we catch *any* error during resume. - if should_continue: + error_str = str(exec_error).lower() + session_is_gone = any(h in error_str for h in _SESSION_GONE_HINTS) + + if should_continue and session_is_gone: logger.warning( - "Session resume failed, starting fresh session", + "Session gone on Claude side, starting fresh", failed_session_id=claude_session_id, - error=str(resume_error), + error=str(exec_error), ) - # Clean up the stale session await self.session_manager.remove_session(session.session_id) - # Create a fresh session and retry session = await self.session_manager.get_or_create_session( user_id, working_directory ) @@ -131,6 +124,18 @@ async def run_command( interrupt_event=interrupt_event, ) else: + # Transient error — preserve the session so the next + # message can resume it. + if session.session_id: + session.last_used = datetime.now(UTC) + await self.session_manager.storage.save_session(session) + logger.warning( + "Claude command failed, preserving session", + session_id=claude_session_id, + user_id=user_id, + error=str(exec_error), + error_type=type(exec_error).__name__, + ) raise # Update session (assigns real session_id for new sessions)