From a3dfe703507494e6082f142276fb91a743375250 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 21 Nov 2025 12:44:51 +0700
Subject: [PATCH 01/54] Remove the unused auto-refresh functionality and
 related imports.

They are no longer needed since the underlying library issue has been resolved.
---
 app/services/client.py | 46 +-----------------------------------------
 1 file changed, 1 insertion(+), 45 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 825564b..1554bdd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -8,11 +8,8 @@
 from gemini_webapi import GeminiClient, ModelOutput
 from gemini_webapi.client import ChatSession
 from gemini_webapi.constants import Model
-from gemini_webapi.exceptions import AuthError, ModelInvalid
+from gemini_webapi.exceptions import ModelInvalid
 from gemini_webapi.types import Gem
-from gemini_webapi.utils import rotate_tasks
-from gemini_webapi.utils.rotate_1psidts import rotate_1psidts
-from loguru import logger
 
 from ..models import Message
 from ..utils import g_config
@@ -76,47 +73,6 @@ async def init(
             verbose=verbose,
         )
 
-    async def start_auto_refresh(self) -> None:
-        """
-        Refresh the __Secure-1PSIDTS cookie periodically and keep the HTTP client in sync.
-        """
-        while True:
-            new_1psidts: str | None = None
-            try:
-                new_1psidts = await rotate_1psidts(self.cookies, self.proxy)
-            except AuthError:
-                if task := rotate_tasks.get(self.cookies.get("__Secure-1PSID", "")):
-                    task.cancel()
-                logger.warning(
-                    "Failed to refresh Gemini cookies (AuthError). Auto refresh task canceled."
-                )
-                return
-            except Exception as exc:
-                logger.warning(f"Unexpected error while refreshing Gemini cookies: {exc}")
-
-            if new_1psidts:
-                self.cookies["__Secure-1PSIDTS"] = new_1psidts
-                self._sync_httpx_cookie("__Secure-1PSIDTS", new_1psidts)
-                logger.debug("Gemini cookies refreshed. New __Secure-1PSIDTS applied.")
-            await asyncio.sleep(self.refresh_interval)
-
-    def _sync_httpx_cookie(self, name: str, value: str) -> None:
-        """
-        Ensure the underlying httpx client uses the refreshed cookie value.
-        """
-        if not self.client:
-            return
-
-        jar = self.client.cookies.jar
-        matched = False
-        for cookie in jar:
-            if cookie.name == name:
-                cookie.value = value
-                matched = True
-        if not matched:
-            # Fall back to setting the cookie with default scope if we did not find an existing entry.
-            self.client.cookies.set(name, value)
-
     async def generate_content(
         self,
         prompt: str,

From 3a692ab014bf6d0cb98f38d499dc2760eb92c096 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 14:54:53 +0700
Subject: [PATCH 02/54] Enhance error handling in client initialization and
 message sending

---
 app/server/chat.py     | 12 ++++++++--
 app/services/client.py | 52 +++++++++++-------------------------------
 app/services/pool.py   | 26 ++++++++++++++-------
 3 files changed, 41 insertions(+), 49 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 66fa6ce..e8752cf 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1129,7 +1129,11 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
     """
     if len(text) <= MAX_CHARS_PER_REQUEST:
         # No need to split - a single request is fine.
-        return await session.send_message(text, files=files)
+        try:
+            return await session.send_message(text, files=files)
+        except Exception as e:
+            logger.exception(f"Error sending message to Gemini: {e}")
+            raise
     hint_len = len(CONTINUATION_HINT)
     chunk_size = MAX_CHARS_PER_REQUEST - hint_len
 
@@ -1155,7 +1159,11 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
             raise
 
     # The last chunk carries the files (if any) and we return its response.
-    return await session.send_message(chunks[-1], files=files)
+    try:
+        return await session.send_message(chunks[-1], files=files)
+    except Exception as e:
+        logger.exception(f"Error sending final chunk to Gemini: {e}")
+        raise
 
 
 def _iter_stream_segments(model_output: str, chunk_size: int = 64):
diff --git a/app/services/client.py b/app/services/client.py
index 1554bdd..26be26f 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,4 +1,3 @@
-import asyncio
 import html
 import json
 import re
@@ -6,10 +5,7 @@
 from typing import Any, cast
 
 from gemini_webapi import GeminiClient, ModelOutput
-from gemini_webapi.client import ChatSession
-from gemini_webapi.constants import Model
-from gemini_webapi.exceptions import ModelInvalid
-from gemini_webapi.types import Gem
+from loguru import logger
 
 from ..models import Message
 from ..utils import g_config
@@ -64,40 +60,18 @@ async def init(
         refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
         verbose = cast(bool, _resolve(verbose, config.verbose))
 
-        await super().init(
-            timeout=timeout,
-            auto_close=auto_close,
-            close_delay=close_delay,
-            auto_refresh=auto_refresh,
-            refresh_interval=refresh_interval,
-            verbose=verbose,
-        )
-
-    async def generate_content(
-        self,
-        prompt: str,
-        files: list[str | Path] | None = None,
-        model: Model | str = Model.UNSPECIFIED,
-        gem: Gem | str | None = None,
-        chat: ChatSession | None = None,
-        **kwargs,
-    ) -> ModelOutput:
-        cnt = 2  # Try 2 times before giving up
-        last_exception: ModelInvalid | None = None
-        while cnt:
-            cnt -= 1
-            try:
-                return await super().generate_content(prompt, files, model, gem, chat, **kwargs)
-            except ModelInvalid as e:
-                # This is not always caused by model selection. Instead, it can be solved by retrying.
-                # So we catch it and retry as a workaround.
-                await asyncio.sleep(1)
-                last_exception = e
-
-        # If retrying failed, re-raise ModelInvalid
-        if last_exception is not None:
-            raise last_exception
-        raise RuntimeError("generate_content failed without receiving a ModelInvalid error.")
+        try:
+            await super().init(
+                timeout=timeout,
+                auto_close=auto_close,
+                close_delay=close_delay,
+                auto_refresh=auto_refresh,
+                refresh_interval=refresh_interval,
+                verbose=verbose,
+            )
+        except Exception:
+            logger.exception(f"Failed to initialize GeminiClient {self.id}")
+            raise
 
     @staticmethod
     async def process_message(
diff --git a/app/services/pool.py b/app/services/pool.py
index abf1fa0..24a21dc 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -35,14 +35,24 @@ def __init__(self) -> None:
 
     async def init(self) -> None:
         """Initialize all clients in the pool."""
+        success_count = 0
         for client in self._clients:
             if not client.running:
-                await client.init(
-                    timeout=g_config.gemini.timeout,
-                    auto_refresh=g_config.gemini.auto_refresh,
-                    verbose=g_config.gemini.verbose,
-                    refresh_interval=g_config.gemini.refresh_interval,
-                )
+                try:
+                    await client.init(
+                        timeout=g_config.gemini.timeout,
+                        auto_refresh=g_config.gemini.auto_refresh,
+                        verbose=g_config.gemini.verbose,
+                        refresh_interval=g_config.gemini.refresh_interval,
+                    )
+                except Exception:
+                    logger.exception(f"Failed to initialize client {client.id}")
+
+            if client.running:
+                success_count += 1
+
+        if success_count == 0:
+            raise RuntimeError("Failed to initialize any Gemini clients")
 
     async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
         """Return a healthy client by id or using round-robin."""
@@ -89,8 +99,8 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
                 )
                 logger.info(f"Restarted Gemini client {client.id} after it stopped.")
                 return True
-            except Exception as exc:
-                logger.warning(f"Failed to restart Gemini client {client.id}: {exc}")
+            except Exception:
+                logger.exception(f"Failed to restart Gemini client {client.id}")
                 return False
 
     @property

From d57e3676fed9fa03e1f51a5aed80d4b7f88e6a88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 17:49:41 +0700
Subject: [PATCH 03/54] Refactor link handling to extract file paths and
 simplify Google search links

---
 app/services/client.py | 46 +++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 26be26f..f5a39dd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -24,9 +24,20 @@
 )
 
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
+
 MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=\s*[-\\`*_{}\[\]()#+.!<>])")
+
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`]*`)", re.DOTALL)
 
+FILE_PATH_PATTERN = re.compile(
+    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|Gemfile|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
+    re.IGNORECASE,
+)
+
+GOOGLE_SEARCH_LINK_PATTERN = re.compile(
+    r"(?:`\s*)?`?\[`?([^`\]]+)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
+)
+
 
 _UNSET = object()
 
@@ -219,28 +230,25 @@ def _unescape_markdown(text_content: str) -> str:
         text = _unescape_html(text)
         text = _unescape_markdown(text)
 
-        def simplify_link_target(text_content: str) -> str:
-            match_colon_num = re.match(r"([^:]+:\d+)", text_content)
-            if match_colon_num:
-                return match_colon_num.group(1)
-            return text_content
+        def extract_file_path_from_display_text(text_content: str) -> str | None:
+            match = re.match(FILE_PATH_PATTERN, text_content)
+            if match:
+                return match.group(1)
+            return None
 
         def replacer(match: re.Match) -> str:
-            outer_open_paren = match.group(1)
-            display_text = match.group(2)
+            display_text = str(match.group(1)).strip()
+            google_search_prefix = match.group(2)
+            query_part = match.group(3)
 
-            new_target_url = simplify_link_target(display_text)
-            new_link_segment = f"[`{display_text}`]({new_target_url})"
+            file_path = extract_file_path_from_display_text(display_text)
 
-            if outer_open_paren:
-                return f"{outer_open_paren}{new_link_segment})"
+            if file_path:
+                # If it's a file path, transform it into a self-referencing Markdown link
+                return f"[`{file_path}`]({file_path})"
             else:
-                return new_link_segment
-
-        # Replace Google search links with simplified Markdown links
-        pattern = r"(\()?\[`([^`]+?)`\]\((https://www.google.com/search\?q=)(.*?)(?<!\\)\)\)*(\))?"
-        text = re.sub(pattern, replacer, text)
+                # Otherwise, reconstruct the original Google search link with the display_text
+                original_google_search_url = f"{google_search_prefix}{query_part}"
+                return f"[`{display_text}`]({original_google_search_url})"
 
-        # Fix inline code blocks
-        pattern = r"`(\[[^\]]+\]\([^\)]+\))`"
-        return re.sub(pattern, r"\1", text)
+        return re.sub(GOOGLE_SEARCH_LINK_PATTERN, replacer, text)

From ccd55f9fd3d9c48f986c42413b71971311ecb5d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 18:29:41 +0700
Subject: [PATCH 04/54] Fix regex pattern for Google search link matching

---
 app/services/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/services/client.py b/app/services/client.py
index f5a39dd..ffc559e 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -35,7 +35,7 @@
 )
 
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
-    r"(?:`\s*)?`?\[`?([^`\]]+)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
+    r"(?:`\s*)?`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
 )
 
 

From 37632b3fb89fab17f2cb728a1c5a66ab5dd013ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 21:44:09 +0700
Subject: [PATCH 05/54] Fix regex patterns for Markdown escaping, code fence
 and Google search link matching

---
 app/services/client.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index ffc559e..0088c74 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -25,17 +25,17 @@
 
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
 
-MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=\s*[-\\`*_{}\[\]()#+.!<>])")
+MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
 
-CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`]*`)", re.DOTALL)
+CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
 
 FILE_PATH_PATTERN = re.compile(
-    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|Gemfile|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
+    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
 )
 
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
-    r"(?:`\s*)?`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
+    r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
 
 

From b11cfcc45f20259d17b1ddfde0e07fdaa80d9532 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 22:52:27 +0700
Subject: [PATCH 06/54] Increase timeout value in configuration files from 60
 to 120 seconds to better handle heavy tasks

---
 app/server/chat.py     | 2 --
 app/services/client.py | 8 --------
 app/utils/config.py    | 2 +-
 config/config.yaml     | 6 +++---
 4 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index e8752cf..b4e88da 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -48,9 +48,7 @@
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
-
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
-
 TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)```", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
     r"<tool_call\s+name=\"([^\"]+)\">(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
diff --git a/app/services/client.py b/app/services/client.py
index 0088c74..166eb70 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -16,29 +16,21 @@
     '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
     "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
 )
-
 CODE_BLOCK_HINT = (
     "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
     "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
     "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
 )
-
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
-
 MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
-
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
-
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
 )
-
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
     r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
-
-
 _UNSET = object()
 
 
diff --git a/app/utils/config.py b/app/utils/config.py
index 48f0792..796ca75 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -56,7 +56,7 @@ class GeminiConfig(BaseModel):
     clients: list[GeminiClientSettings] = Field(
         ..., description="List of Gemini client credential pairs"
     )
-    timeout: int = Field(default=60, ge=1, description="Init timeout")
+    timeout: int = Field(default=120, ge=1, description="Init timeout")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
         default=540, ge=1, description="Interval in seconds to refresh Gemini cookies"
diff --git a/config/config.yaml b/config/config.yaml
index b0f8fbf..89c88b7 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -21,8 +21,8 @@ gemini:
     - id: "example-id-1"   # Arbitrary client ID
       secure_1psid: "YOUR_SECURE_1PSID_HERE"
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
-      proxy: null           # Optional proxy URL (null/empty means direct connection)
-  timeout: 60              # Init timeout in seconds
+      proxy: null          # Optional proxy URL (null/empty means direct connection)
+  timeout: 120             # Init timeout in seconds
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests
@@ -34,4 +34,4 @@ storage:
   retention_days: 14       # Number of days to retain conversations before cleanup
 
 logging:
-  level: "INFO"           # Log level: DEBUG, INFO, WARNING, ERROR
+  level: "INFO"            # Log level: DEBUG, INFO, WARNING, ERROR

From f00ebfcbd0424c7ab06d680f308349a04aff3be0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 2 Dec 2025 13:15:27 +0700
Subject: [PATCH 07/54] Fix Image generation

---
 .github/workflows/docker.yaml | 10 ++---
 .github/workflows/track.yml   | 12 +++---
 app/models/models.py          | 14 +++----
 app/server/chat.py            | 77 +++++++++++++++++++++--------------
 app/services/client.py        |  4 +-
 app/utils/helper.py           | 10 ++++-
 6 files changed, 75 insertions(+), 52 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 4527f3d..eef2a41 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -5,11 +5,11 @@ on:
     branches:
       - main
     tags:
-      - 'v*'
+      - "v*"
     paths-ignore:
-      - '**/*.md'
-      - '.github/workflows/ruff.yaml'
-      - '.github/workflows/track.yml'
+      - "**/*.md"
+      - ".github/workflows/ruff.yaml"
+      - ".github/workflows/track.yml"
 
 env:
   REGISTRY: ghcr.io
@@ -57,4 +57,4 @@ jobs:
           labels: ${{ steps.meta.outputs.labels }}
           platforms: linux/amd64,linux/arm64
           cache-from: type=gha
-          cache-to: type=gha,mode=max
\ No newline at end of file
+          cache-to: type=gha,mode=max
diff --git a/.github/workflows/track.yml b/.github/workflows/track.yml
index 63afbec..838dcf8 100644
--- a/.github/workflows/track.yml
+++ b/.github/workflows/track.yml
@@ -2,7 +2,7 @@ name: Update gemini-webapi
 
 on:
   schedule:
-    - cron: '0 0 * * *' # Runs every day at midnight
+    - cron: "0 0 * * *" # Runs every day at midnight
   workflow_dispatch:
 
 jobs:
@@ -24,7 +24,7 @@ jobs:
         run: |
           # Install dependencies first to enable uv pip show
           uv sync
-          
+
           # Get current version of gemini-webapi before upgrade
           OLD_VERSION=$(uv pip show gemini-webapi 2>/dev/null | grep ^Version: | awk '{print $2}')
           if [ -z "$OLD_VERSION" ]; then
@@ -32,10 +32,10 @@ jobs:
             exit 1
           fi
           echo "Current gemini-webapi version: $OLD_VERSION"
-          
+
           # Update the package using uv, which handles pyproject.toml and uv.lock
           uv add --upgrade gemini-webapi
-          
+
           # Get new version of gemini-webapi after upgrade
           NEW_VERSION=$(uv pip show gemini-webapi | grep ^Version: | awk '{print $2}')
           if [ -z "$NEW_VERSION" ]; then
@@ -43,7 +43,7 @@ jobs:
             exit 1
           fi
           echo "New gemini-webapi version: $NEW_VERSION"
-          
+
           # Only proceed if gemini-webapi version has changed
           if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
             echo "gemini-webapi has been updated from $OLD_VERSION to $NEW_VERSION"
@@ -63,7 +63,7 @@ jobs:
           title: ":arrow_up: update gemini-webapi to ${{ steps.update.outputs.version }}"
           body: |
             Update `gemini-webapi` to version `${{ steps.update.outputs.version }}`.
-            
+
             Auto-generated by GitHub Actions using `uv`.
           branch: update-gemini-webapi
           base: main
diff --git a/app/models/models.py b/app/models/models.py
index 3991f12..74d8cd5 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -154,11 +154,13 @@ class ConversationInStore(BaseModel):
 class ResponseInputContent(BaseModel):
     """Content item for Responses API input."""
 
-    type: Literal["input_text", "input_image"]
+    type: Literal["input_text", "input_image", "input_file"]
     text: Optional[str] = None
     image_url: Optional[str] = None
-    image_base64: Optional[str] = None
-    mime_type: Optional[str] = None
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    file_url: Optional[str] = None
+    file_data: Optional[str] = None
+    filename: Optional[str] = None
 
 
 class ResponseInputItem(BaseModel):
@@ -212,12 +214,8 @@ class ResponseUsage(BaseModel):
 class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
-    type: Literal["output_text", "output_image"]
+    type: Literal["output_text"]
     text: Optional[str] = None
-    image_base64: Optional[str] = None
-    mime_type: Optional[str] = None
-    width: Optional[int] = None
-    height: Optional[int] = None
 
 
 class ResponseOutputMessage(BaseModel):
diff --git a/app/server/chat.py b/app/server/chat.py
index b4e88da..76dc632 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -381,14 +381,6 @@ def _strip_tagged_blocks(text: str) -> str:
     return "".join(result)
 
 
-def _ensure_data_url(part: ResponseInputContent) -> str | None:
-    image_url = part.image_url
-    if not image_url and part.image_base64:
-        mime_type = part.mime_type or "image/png"
-        image_url = f"data:{mime_type};base64,{part.image_base64}"
-    return image_url
-
-
 def _response_items_to_messages(
     items: str | list[ResponseInputItem],
 ) -> tuple[list[Message], str | list[ResponseInputItem]]:
@@ -422,14 +414,34 @@ def _response_items_to_messages(
                     if text_value:
                         converted.append(ContentItem(type="text", text=text_value))
                 elif part.type == "input_image":
-                    image_url = _ensure_data_url(part)
+                    image_url = part.image_url
                     if image_url:
                         normalized_contents.append(
-                            ResponseInputContent(type="input_image", image_url=image_url)
+                            ResponseInputContent(
+                                type="input_image",
+                                image_url=image_url,
+                                detail=part.detail if part.detail else "auto",
+                            )
                         )
                         converted.append(
-                            ContentItem(type="image_url", image_url={"url": image_url})
+                            ContentItem(
+                                type="image_url",
+                                image_url={
+                                    "url": image_url,
+                                    "detail": part.detail if part.detail else "auto",
+                                },
+                            )
                         )
+                elif part.type == "input_file":
+                    if part.file_url or part.file_data:
+                        normalized_contents.append(part)
+                        file_info = {}
+                        if part.file_data:
+                            file_info["file_data"] = part.file_data
+                            file_info["filename"] = part.filename
+                        if part.file_url:
+                            file_info["url"] = part.file_url
+                        converted.append(ContentItem(type="file", file=file_info))
             messages.append(Message(role=role, content=converted or None))
 
         normalized_input.append(
@@ -472,11 +484,26 @@ def _instructions_to_messages(
                     if text_value:
                         converted.append(ContentItem(type="text", text=text_value))
                 elif part.type == "input_image":
-                    image_url = _ensure_data_url(part)
+                    image_url = part.image_url
                     if image_url:
                         converted.append(
-                            ContentItem(type="image_url", image_url={"url": image_url})
+                            ContentItem(
+                                type="image_url",
+                                image_url={
+                                    "url": image_url,
+                                    "detail": part.detail if part.detail else "auto",
+                                },
+                            )
                         )
+                elif part.type == "input_file":
+                    file_info = {}
+                    if part.file_data:
+                        file_info["file_data"] = part.file_data
+                        file_info["filename"] = part.filename
+                    if part.file_url:
+                        file_info["url"] = part.file_url
+                    if file_info:
+                        converted.append(ContentItem(type="file", file=file_info))
             instruction_messages.append(Message(role=role, content=converted or None))
 
     return instruction_messages
@@ -799,13 +826,13 @@ async def create_response(
     session, client, remaining_messages = await _find_reusable_session(db, pool, model, messages)
 
     async def _build_payload(
-        payload_messages: list[Message], reuse_session: bool
+        _payload_messages: list[Message], _reuse_session: bool
     ) -> tuple[str, list[Path | str]]:
-        if reuse_session and len(payload_messages) == 1:
+        if _reuse_session and len(_payload_messages) == 1:
             return await GeminiClientWrapper.process_message(
-                payload_messages[0], tmp_dir, tagged=False
+                _payload_messages[0], tmp_dir, tagged=False
             )
-        return await GeminiClientWrapper.process_conversation(payload_messages, tmp_dir)
+        return await GeminiClientWrapper.process_conversation(_payload_messages, tmp_dir)
 
     reuse_session = session is not None
     if reuse_session:
@@ -821,7 +848,7 @@ async def _build_payload(
                 detail="No new messages to send for the existing session.",
             )
         payload_messages = messages_to_send
-        model_input, files = await _build_payload(payload_messages, reuse_session=True)
+        model_input, files = await _build_payload(payload_messages, _reuse_session=True)
         logger.debug(
             f"Reused session {session.metadata} - sending {len(payload_messages)} prepared messages."
         )
@@ -830,7 +857,7 @@ async def _build_payload(
             client = await pool.acquire()
             session = client.start_chat(model=model)
             payload_messages = messages
-            model_input, files = await _build_payload(payload_messages, reuse_session=False)
+            model_input, files = await _build_payload(payload_messages, _reuse_session=False)
         except ValueError as e:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
         except RuntimeError as e:
@@ -935,7 +962,6 @@ async def _build_payload(
             detail = f"{detail} Assistant response: {summary}"
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail)
 
-    image_contents: list[ResponseOutputContent] = []
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
@@ -943,16 +969,6 @@ async def _build_payload(
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue
-        mime_type = "image/png" if isinstance(image, GeneratedImage) else "image/jpeg"
-        image_contents.append(
-            ResponseOutputContent(
-                type="output_image",
-                image_base64=image_base64,
-                mime_type=mime_type,
-                width=width,
-                height=height,
-            )
-        )
         image_call_items.append(
             ResponseImageGenerationCall(
                 id=f"img_{uuid.uuid4().hex}",
@@ -977,7 +993,6 @@ async def _build_payload(
     response_contents: list[ResponseOutputContent] = []
     if assistant_text:
         response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
-    response_contents.extend(image_contents)
 
     if not response_contents:
         response_contents.append(ResponseOutputContent(type="output_text", text=""))
diff --git a/app/services/client.py b/app/services/client.py
index 166eb70..0207114 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -113,8 +113,10 @@ async def process_message(
                     if file_data := item.file.get("file_data", None):
                         filename = item.file.get("filename", "")
                         files.append(await save_file_to_tempfile(file_data, filename, tempdir))
+                    elif url := item.file.get("url", None):
+                        files.append(await save_url_to_tempfile(url, tempdir))
                     else:
-                        raise ValueError("File must contain 'file_data' key")
+                        raise ValueError("File must contain 'file_data' or 'url' key")
         elif message.content is not None:
             raise ValueError("Unsupported message content type.")
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 48fc99d..3bff469 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,4 +1,5 @@
 import base64
+import mimetypes
 import tempfile
 from pathlib import Path
 
@@ -40,9 +41,16 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None):
     suffix: str | None = None
     if url.startswith("data:image/"):
         # Base64 encoded image
+        metadata_part = url.split(",")[0]
+        mime_type = metadata_part.split(":")[1].split(";")[0]
+
         base64_data = url.split(",")[1]
         data = base64.b64decode(base64_data)
-        suffix = ".png"
+
+        # Guess extension from mime type, default to the subtype if not found
+        suffix = mimetypes.guess_extension(mime_type)
+        if not suffix:
+            suffix = f".{mime_type.split('/')[1]}"
     else:
         # http files
         async with httpx.AsyncClient() as client:

From d911c33e81e83211ed53d77b300c4c203df7b53c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 2 Dec 2025 15:50:45 +0700
Subject: [PATCH 08/54] Refactor tool handling to support standard and image
 generation tools separately

---
 app/models/models.py |  7 ++++---
 app/server/chat.py   | 36 +++++++++++++++++++++++++++++++++---
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 74d8cd5..52dd414 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -174,7 +174,8 @@ class ResponseInputItem(BaseModel):
 class ResponseToolChoice(BaseModel):
     """Tool choice enforcing a specific tool in Responses API."""
 
-    type: Literal["image_generation"]
+    type: Literal["function", "image_generation"]
+    function: Optional[ToolChoiceFunctionDetail] = None
 
 
 class ResponseImageTool(BaseModel):
@@ -195,8 +196,8 @@ class ResponseCreateRequest(BaseModel):
     top_p: Optional[float] = 1.0
     max_output_tokens: Optional[int] = None
     stream: Optional[bool] = False
-    tool_choice: Optional[ResponseToolChoice] = None
-    tools: Optional[List[ResponseImageTool]] = None
+    tool_choice: Optional[Union[str, ResponseToolChoice]] = None
+    tools: Optional[List[Union[Tool, ResponseImageTool]]] = None
     store: Optional[bool] = None
     user: Optional[str] = None
     response_format: Optional[Dict[str, Any]] = None
diff --git a/app/server/chat.py b/app/server/chat.py
index 76dc632..8277d0c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -795,7 +795,28 @@ async def create_response(
             f"Structured response requested for /v1/responses (schema={structured_requirement.schema_name})."
         )
 
-    image_instruction = _build_image_generation_instruction(request.tools, request.tool_choice)
+    # Separate standard tools from image generation tools
+    standard_tools: list[Tool] = []
+    image_tools: list[ResponseImageTool] = []
+
+    if request.tools:
+        for t in request.tools:
+            if isinstance(t, Tool):
+                standard_tools.append(t)
+            elif isinstance(t, ResponseImageTool):
+                image_tools.append(t)
+            # Handle dicts if Pydantic didn't convert them fully (fallback)
+            elif isinstance(t, dict):
+                t_type = t.get("type")
+                if t_type == "function":
+                    standard_tools.append(Tool.model_validate(t))
+                elif t_type == "image_generation":
+                    image_tools.append(ResponseImageTool.model_validate(t))
+
+    image_instruction = _build_image_generation_instruction(
+        image_tools,
+        request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None,
+    )
     if image_instruction:
         extra_instructions.append(image_instruction)
         logger.debug("Image generation support enabled for /v1/responses request.")
@@ -808,10 +829,19 @@ async def create_response(
             f"Injected {len(preface_messages)} instruction messages before sending to Gemini."
         )
 
+    # Pass standard tools to the prompt builder
+    # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction)
+    model_tool_choice = None
+    if isinstance(request.tool_choice, str):
+        model_tool_choice = request.tool_choice
+    elif isinstance(request.tool_choice, ToolChoiceFunction):
+        model_tool_choice = request.tool_choice
+    # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice.
+
     messages = _prepare_messages_for_model(
         conversation_messages,
-        tools=None,
-        tool_choice=None,
+        tools=standard_tools or None,
+        tool_choice=model_tool_choice,
         extra_instructions=extra_instructions or None,
     )
 

From a8241ad78831b675d0321bbe5271c1bf10a6ce2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 2 Dec 2025 17:17:27 +0700
Subject: [PATCH 09/54] Fix: use "ascii" decoding for base64-encoded image data
 consistency

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 8277d0c..67790ab 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1524,4 +1524,4 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
 
     data = Path(saved_path).read_bytes()
     width, height = _extract_image_dimensions(data)
-    return base64.b64encode(data).decode("utf-8"), width, height
+    return base64.b64encode(data).decode("ascii"), width, height

From fd2723d49b5929cb770a231aeb479f392f7a7d53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 12:08:19 +0700
Subject: [PATCH 10/54] Fix: replace `running` with `_running` for internal
 client status checks

---
 app/services/pool.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/services/pool.py b/app/services/pool.py
index 24a21dc..28a3435 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -37,7 +37,7 @@ async def init(self) -> None:
         """Initialize all clients in the pool."""
         success_count = 0
         for client in self._clients:
-            if not client.running:
+            if not client._running:
                 try:
                     await client.init(
                         timeout=g_config.gemini.timeout,
@@ -48,7 +48,7 @@ async def init(self) -> None:
                 except Exception:
                     logger.exception(f"Failed to initialize client {client.id}")
 
-            if client.running:
+            if client._running:
                 success_count += 1
 
         if success_count == 0:
@@ -79,7 +79,7 @@ async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
 
     async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
         """Make sure the client is running, attempting a restart if needed."""
-        if client.running:
+        if client._running:
             return True
 
         lock = self._restart_locks.get(client.id)
@@ -87,7 +87,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
             return False  # Should not happen
 
         async with lock:
-            if client.running:
+            if client._running:
                 return True
 
             try:
@@ -110,4 +110,4 @@ def clients(self) -> List[GeminiClientWrapper]:
 
     def status(self) -> Dict[str, bool]:
         """Return running status for each client."""
-        return {client.id: client.running for client in self._clients}
+        return {client.id: client._running for client in self._clients}

From 8ee6cc0335e4b63df2126a6bf69d6c9e42505485 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 14:10:06 +0700
Subject: [PATCH 11/54] Refactor: replace direct `_running` access with
 `running()` method in client status checks

---
 app/services/client.py |  3 +++
 app/services/pool.py   | 10 +++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 0207114..09c52c1 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -76,6 +76,9 @@ async def init(
             logger.exception(f"Failed to initialize GeminiClient {self.id}")
             raise
 
+    def running(self) -> bool:
+        return self._running
+
     @staticmethod
     async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True
diff --git a/app/services/pool.py b/app/services/pool.py
index 28a3435..a134dda 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -37,7 +37,7 @@ async def init(self) -> None:
         """Initialize all clients in the pool."""
         success_count = 0
         for client in self._clients:
-            if not client._running:
+            if not client.running():
                 try:
                     await client.init(
                         timeout=g_config.gemini.timeout,
@@ -48,7 +48,7 @@ async def init(self) -> None:
                 except Exception:
                     logger.exception(f"Failed to initialize client {client.id}")
 
-            if client._running:
+            if client.running():
                 success_count += 1
 
         if success_count == 0:
@@ -79,7 +79,7 @@ async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
 
     async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
         """Make sure the client is running, attempting a restart if needed."""
-        if client._running:
+        if client.running():
             return True
 
         lock = self._restart_locks.get(client.id)
@@ -87,7 +87,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
             return False  # Should not happen
 
         async with lock:
-            if client._running:
+            if client.running():
                 return True
 
             try:
@@ -110,4 +110,4 @@ def clients(self) -> List[GeminiClientWrapper]:
 
     def status(self) -> Dict[str, bool]:
         """Return running status for each client."""
-        return {client.id: client._running for client in self._clients}
+        return {client.id: client.running() for client in self._clients}

From 453700eba682cfdd4bfc2e061a8139129654d017 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 22:11:11 +0700
Subject: [PATCH 12/54] Extend models with new fields for annotations,
 reasoning, audio, log probabilities, and token details; adjust response
 handling accordingly.

---
 app/models/models.py | 13 ++++++++++++-
 app/server/chat.py   |  7 ++++---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 52dd414..1d7368c 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -12,7 +12,9 @@ class ContentItem(BaseModel):
     type: Literal["text", "image_url", "file", "input_audio"]
     text: Optional[str] = None
     image_url: Optional[Dict[str, str]] = None
+    input_audio: Optional[Dict[str, Any]] = None
     file: Optional[Dict[str, str]] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
 class Message(BaseModel):
@@ -22,6 +24,10 @@ class Message(BaseModel):
     content: Union[str, List[ContentItem], None] = None
     name: Optional[str] = None
     tool_calls: Optional[List["ToolCall"]] = None
+    refusal: Optional[str] = None
+    reasoning_content: Optional[str] = None
+    audio: Optional[Dict[str, Any]] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
 class Choice(BaseModel):
@@ -30,6 +36,7 @@ class Choice(BaseModel):
     index: int
     message: Message
     finish_reason: str
+    logprobs: Optional[Dict[str, Any]] = None
 
 
 class FunctionCall(BaseModel):
@@ -81,6 +88,8 @@ class Usage(BaseModel):
     prompt_tokens: int
     completion_tokens: int
     total_tokens: int
+    prompt_tokens_details: Optional[Dict[str, int]] = None
+    completion_tokens_details: Optional[Dict[str, int]] = None
 
 
 class ModelData(BaseModel):
@@ -118,6 +127,8 @@ class ChatCompletionResponse(BaseModel):
     model: str
     choices: List[Choice]
     usage: Usage
+    system_fingerprint: Optional[str] = None
+    service_tier: Optional[str] = None
 
 
 class ModelListResponse(BaseModel):
@@ -217,6 +228,7 @@ class ResponseOutputContent(BaseModel):
 
     type: Literal["output_text"]
     text: Optional[str] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
 class ResponseOutputMessage(BaseModel):
@@ -257,7 +269,6 @@ class ResponseCreateResponse(BaseModel):
     created: int
     model: str
     output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]]
-    output_text: Optional[str] = None
     status: Literal[
         "in_progress",
         "completed",
diff --git a/app/server/chat.py b/app/server/chat.py
index 67790ab..5848a39 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1022,10 +1022,12 @@ async def _build_payload(
 
     response_contents: list[ResponseOutputContent] = []
     if assistant_text:
-        response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
+        response_contents.append(
+            ResponseOutputContent(type="output_text", text=assistant_text, annotations=[])
+        )
 
     if not response_contents:
-        response_contents.append(ResponseOutputContent(type="output_text", text=""))
+        response_contents.append(ResponseOutputContent(type="output_text", text="", annotations=[]))
 
     created_time = int(datetime.now(tz=timezone.utc).timestamp())
     response_id = f"resp_{uuid.uuid4().hex}"
@@ -1059,7 +1061,6 @@ async def _build_payload(
             *tool_call_items,
             *image_call_items,
         ],
-        output_text=assistant_text or None,
         status="completed",
         usage=usage,
         input=normalized_input or None,

From 9260f8b5cc37192716d4127ed6ab98a087e7e3ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 22:51:54 +0700
Subject: [PATCH 13/54] Extend models with new fields (annotations, error), add
 `normalize_output_text` validator, rename `created` to `created_at`, and
 update response handling accordingly.

---
 app/models/models.py | 16 +++++++++++++---
 app/server/chat.py   |  8 ++++----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 1d7368c..8d5102c 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -3,7 +3,7 @@
 from datetime import datetime
 from typing import Any, Dict, List, Literal, Optional, Union
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 
 
 class ContentItem(BaseModel):
@@ -127,7 +127,6 @@ class ChatCompletionResponse(BaseModel):
     model: str
     choices: List[Choice]
     usage: Usage
-    system_fingerprint: Optional[str] = None
     service_tier: Optional[str] = None
 
 
@@ -172,6 +171,15 @@ class ResponseInputContent(BaseModel):
     file_url: Optional[str] = None
     file_data: Optional[str] = None
     filename: Optional[str] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+
+    @model_validator(mode="before")
+    @classmethod
+    def normalize_output_text(cls, data: Any) -> Any:
+        """Allow output_text (from previous turns) to be treated as input_text."""
+        if isinstance(data, dict) and data.get("type") == "output_text":
+            data["type"] = "input_text"
+        return data
 
 
 class ResponseInputItem(BaseModel):
@@ -266,7 +274,7 @@ class ResponseCreateResponse(BaseModel):
 
     id: str
     object: Literal["response"] = "response"
-    created: int
+    created_at: int
     model: str
     output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]]
     status: Literal[
@@ -274,9 +282,11 @@ class ResponseCreateResponse(BaseModel):
         "completed",
         "failed",
         "incomplete",
+        "cancelled",
         "requires_action",
     ] = "completed"
     usage: ResponseUsage
+    error: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
     system_fingerprint: Optional[str] = None
     input: Optional[Union[str, List[ResponseInputItem]]] = None
diff --git a/app/server/chat.py b/app/server/chat.py
index 5848a39..ef508b9 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1049,7 +1049,7 @@ async def _build_payload(
 
     response_payload = ResponseCreateResponse(
         id=response_id,
-        created=created_time,
+        created_at=created_time,
         model=request.model,
         output=[
             ResponseOutputMessage(
@@ -1334,7 +1334,7 @@ def _create_responses_streaming_response(
 
     response_dict = response_payload.model_dump(mode="json")
     response_id = response_payload.id
-    created_time = response_payload.created
+    created_time = response_payload.created_at
     model = response_payload.model
 
     logger.debug(
@@ -1344,14 +1344,14 @@ def _create_responses_streaming_response(
     base_event = {
         "id": response_id,
         "object": "response",
-        "created": created_time,
+        "created_at": created_time,
         "model": model,
     }
 
     created_snapshot: dict[str, Any] = {
         "id": response_id,
         "object": "response",
-        "created": created_time,
+        "created_at": created_time,
         "model": model,
         "status": "in_progress",
     }

From d6a8e6bdb786bb90dd653cd9aa3fc88469c2b505 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 09:35:30 +0700
Subject: [PATCH 14/54] Extend response models to support tool choices, image
 output, and improved streaming of response items. Refactor image generation
 handling for consistency and add compatibility with output content.

---
 app/models/models.py |  7 ++--
 app/server/chat.py   | 83 ++++++++++++++++++++++++++++++++------------
 2 files changed, 65 insertions(+), 25 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 8d5102c..bbc2140 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -127,7 +127,6 @@ class ChatCompletionResponse(BaseModel):
     model: str
     choices: List[Choice]
     usage: Usage
-    service_tier: Optional[str] = None
 
 
 class ModelListResponse(BaseModel):
@@ -234,8 +233,9 @@ class ResponseUsage(BaseModel):
 class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
-    type: Literal["output_text"]
+    type: Literal["output_text", "output_image"]
     text: Optional[str] = None
+    image_url: Optional[str] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
@@ -285,10 +285,11 @@ class ResponseCreateResponse(BaseModel):
         "cancelled",
         "requires_action",
     ] = "completed"
+    tool_choice: Optional[Union[str, ResponseToolChoice]] = None
+    tools: Optional[List[Union[Tool, ResponseImageTool]]] = None
     usage: ResponseUsage
     error: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
-    system_fingerprint: Optional[str] = None
     input: Optional[Union[str, List[ResponseInputItem]]] = None
 
 
diff --git a/app/server/chat.py b/app/server/chat.py
index ef508b9..cb498a5 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -992,6 +992,7 @@ async def _build_payload(
             detail = f"{detail} Assistant response: {summary}"
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail)
 
+    response_contents: list[ResponseOutputContent] = []
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
@@ -999,15 +1000,25 @@ async def _build_payload(
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue
+
+        img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
         image_call_items.append(
             ResponseImageGenerationCall(
                 id=f"img_{uuid.uuid4().hex}",
                 status="completed",
                 result=image_base64,
-                output_format="png" if isinstance(image, GeneratedImage) else "jpeg",
+                output_format=img_format,
                 size=f"{width}x{height}" if width and height else None,
             )
         )
+        # Add as output_image content for compatibility
+        response_contents.append(
+            ResponseOutputContent(
+                type="output_image",
+                image_url=f"data:image/{img_format};base64,{image_base64}",
+                annotations=[],
+            )
+        )
 
     tool_call_items: list[ResponseToolCall] = []
     if detected_tool_calls:
@@ -1020,7 +1031,6 @@ async def _build_payload(
             for call in detected_tool_calls
         ]
 
-    response_contents: list[ResponseOutputContent] = []
     if assistant_text:
         response_contents.append(
             ResponseOutputContent(type="output_text", text=assistant_text, annotations=[])
@@ -1065,6 +1075,8 @@ async def _build_payload(
         usage=usage,
         input=normalized_input or None,
         metadata=request.metadata or None,
+        tools=request.tools,
+        tool_choice=request.tool_choice,
     )
 
     try:
@@ -1359,6 +1371,10 @@ def _create_responses_streaming_response(
         created_snapshot["metadata"] = response_dict["metadata"]
     if response_dict.get("input") is not None:
         created_snapshot["input"] = response_dict["input"]
+    if response_dict.get("tools") is not None:
+        created_snapshot["tools"] = response_dict["tools"]
+    if response_dict.get("tool_choice") is not None:
+        created_snapshot["tool_choice"] = response_dict["tool_choice"]
 
     async def generate_stream():
         # Emit creation event
@@ -1369,30 +1385,53 @@ async def generate_stream():
         }
         yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-        # Stream textual content, if any
-        if assistant_text:
-            for chunk in _iter_stream_segments(assistant_text):
-                delta_event = {
-                    **base_event,
-                    "type": "response.output_text.delta",
-                    "output_index": 0,
-                    "delta": chunk,
-                }
-                yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n"
-
-            done_event = {
+        # Stream output items (Message/Text, Tool Calls, Images)
+        for i, item in enumerate(response_payload.output):
+            item_json = item.model_dump(mode="json", exclude_none=True)
+
+            added_event = {
                 **base_event,
-                "type": "response.output_text.done",
-                "output_index": 0,
+                "type": "response.output_item.added",
+                "output_index": i,
+                "item": item_json,
             }
-            yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
-        else:
-            done_event = {
+            yield f"data: {orjson.dumps(added_event).decode('utf-8')}\n\n"
+
+            # 2. Stream content if it's a message (text)
+            if item.type == "message":
+                content_text = ""
+                # Aggregate text content to stream
+                for c in item.content:
+                    if c.type == "output_text" and c.text:
+                        content_text += c.text
+
+                if content_text:
+                    for chunk in _iter_stream_segments(content_text):
+                        delta_event = {
+                            **base_event,
+                            "type": "response.output_text.delta",
+                            "output_index": i,
+                            "delta": chunk,
+                        }
+                        yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n"
+
+                    # Text done
+                    done_event = {
+                        **base_event,
+                        "type": "response.output_text.done",
+                        "output_index": i,
+                    }
+                    yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
+
+            # 3. Emit output_item.done for all types
+            # This confirms the item is fully transferred.
+            item_done_event = {
                 **base_event,
-                "type": "response.output_text.done",
-                "output_index": 0,
+                "type": "response.output_item.done",
+                "output_index": i,
+                "item": item_json,
             }
-            yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps(item_done_event).decode('utf-8')}\n\n"
 
         # Emit completed event with full payload
         completed_event = {

From 16435a2ce12a4d37e9f3cfa758f384000aa41123 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 09:50:47 +0700
Subject: [PATCH 15/54] Set default `text` value to an empty string for
 `ResponseOutputContent` and ensure consistent initialization in image output
 handling.

---
 app/models/models.py | 2 +-
 app/server/chat.py   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/models/models.py b/app/models/models.py
index bbc2140..2c987b8 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -234,7 +234,7 @@ class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
     type: Literal["output_text", "output_image"]
-    text: Optional[str] = None
+    text: Optional[str] = ""
     image_url: Optional[str] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
diff --git a/app/server/chat.py b/app/server/chat.py
index cb498a5..7745a26 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1015,6 +1015,7 @@ async def _build_payload(
         response_contents.append(
             ResponseOutputContent(
                 type="output_image",
+                text="",
                 image_url=f"data:image/{img_format};base64,{image_base64}",
                 annotations=[],
             )

From fc99c2d60193f346006f5cf17af4e849d8ea2669 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:03:50 +0700
Subject: [PATCH 16/54] feat: Add /images endpoint with dedicated router and
 improved image management

Add dedicated router for /images endpoint and refactor image handling logic for better modularity. Enhance temporary image management with secure naming, token verification, and cleanup functionality.
---
 app/main.py              |  9 +++++-
 app/server/chat.py       | 35 ++++++++++++++++--------
 app/server/images.py     | 15 ++++++++++
 app/server/middleware.py | 59 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 105 insertions(+), 13 deletions(-)
 create mode 100644 app/server/images.py

diff --git a/app/main.py b/app/main.py
index 95458d3..c215e2a 100644
--- a/app/main.py
+++ b/app/main.py
@@ -6,7 +6,12 @@
 
 from .server.chat import router as chat_router
 from .server.health import router as health_router
-from .server.middleware import add_cors_middleware, add_exception_handler
+from .server.images import router as images_router
+from .server.middleware import (
+    add_cors_middleware,
+    add_exception_handler,
+    cleanup_expired_images,
+)
 from .services import GeminiClientPool, LMDBConversationStore
 
 RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60  # 6 hours
@@ -28,6 +33,7 @@ async def _run_retention_cleanup(stop_event: asyncio.Event) -> None:
     while not stop_event.is_set():
         try:
             store.cleanup_expired()
+            cleanup_expired_images(store.retention_days)
         except Exception:
             logger.exception("LMDB retention cleanup task failed.")
 
@@ -93,5 +99,6 @@ def create_app() -> FastAPI:
 
     app.include_router(health_router, tags=["Health"])
     app.include_router(chat_router, tags=["Chat"])
+    app.include_router(images_router, tags=["Images"])
 
     return app
diff --git a/app/server/chat.py b/app/server/chat.py
index 7745a26..db92dbc 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -44,7 +44,7 @@
 from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT
 from ..utils import g_config
 from ..utils.helper import estimate_tokens
-from .middleware import get_temp_dir, verify_api_key
+from .middleware import get_image_store_dir, get_temp_dir, verify_api_key
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
@@ -588,6 +588,7 @@ async def create_chat_completion(
     request: ChatCompletionRequest,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
+    image_store: Path = Depends(get_image_store_dir),
 ):
     pool = GeminiClientPool()
     db = LMDBConversationStore()
@@ -775,6 +776,7 @@ async def create_response(
     request: ResponseCreateRequest,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
+    image_store: Path = Depends(get_image_store_dir),
 ):
     base_messages, normalized_input = _response_items_to_messages(request.input)
     if not base_messages:
@@ -996,12 +998,16 @@ async def _build_payload(
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
-            image_base64, width, height = await _image_to_base64(image, tmp_dir)
+            image_base64, width, height, filename = await _image_to_base64(image, tmp_dir)
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue
 
         img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
+
+        # Use static URL for compatibility
+        image_url = f"{request.base_url}images/{filename}"
+
         image_call_items.append(
             ResponseImageGenerationCall(
                 id=f"img_{uuid.uuid4().hex}",
@@ -1013,12 +1019,10 @@ async def _build_payload(
         )
         # Add as output_image content for compatibility
         response_contents.append(
-            ResponseOutputContent(
-                type="output_image",
-                text="",
-                image_url=f"data:image/{img_format};base64,{image_base64}",
-                annotations=[],
-            )
+            ResponseOutputContent(type="output_text", text=image_url, annotations=[])
+        )
+        response_contents.append(
+            ResponseOutputContent(type="output_image", text="", image_url=image_url, annotations=[])
         )
 
     tool_call_items: list[ResponseToolCall] = []
@@ -1553,8 +1557,8 @@ def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
     return None, None
 
 
-async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None]:
-    """Persist an image provided by gemini_webapi and return base64 plus dimensions."""
+async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
+    """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
     if isinstance(image, GeneratedImage):
         saved_path = await image.save(path=str(temp_dir), full_size=True)
     else:
@@ -1563,6 +1567,13 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
     if not saved_path:
         raise ValueError("Failed to save generated image")
 
-    data = Path(saved_path).read_bytes()
+    # Rename file to a random UUID to ensure uniqueness and unpredictability
+    original_path = Path(saved_path)
+    random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}"
+    new_path = temp_dir / random_name
+    original_path.rename(new_path)
+
+    data = new_path.read_bytes()
     width, height = _extract_image_dimensions(data)
-    return base64.b64encode(data).decode("ascii"), width, height
+    filename = random_name
+    return base64.b64encode(data).decode("ascii"), width, height, filename
diff --git a/app/server/images.py b/app/server/images.py
new file mode 100644
index 0000000..2867239
--- /dev/null
+++ b/app/server/images.py
@@ -0,0 +1,15 @@
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse
+
+from ..server.middleware import get_image_store_dir
+
+router = APIRouter()
+
+
+@router.get("/images/{filename}", tags=["Images"])
+async def get_image(filename: str):
+    image_store = get_image_store_dir()
+    file_path = image_store / filename
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="Image not found")
+    return FileResponse(file_path)
diff --git a/app/server/middleware.py b/app/server/middleware.py
index b12024f..60e4c8d 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -1,13 +1,72 @@
+import hashlib
+import hmac
 import tempfile
+import time
 from pathlib import Path
 
 from fastapi import Depends, FastAPI, HTTPException, Request, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import ORJSONResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from loguru import logger
 
 from ..utils import g_config
 
+# Persistent directory for storing generated images
+IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "gemini_fastapi_images"
+IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def get_image_store_dir() -> Path:
+    """Returns a persistent directory for storing images."""
+    return IMAGE_STORE_DIR
+
+
+def get_image_token(filename: str) -> str:
+    """Generate a HMAC-SHA256 token for a filename using the API key."""
+    secret = g_config.server.api_key
+    if not secret:
+        return ""
+
+    msg = filename.encode("utf-8")
+    secret_bytes = secret.encode("utf-8")
+    return hmac.new(secret_bytes, msg, hashlib.sha256).hexdigest()
+
+
+def verify_image_token(filename: str, token: str | None) -> bool:
+    """Verify the provided token against the filename."""
+    expected = get_image_token(filename)
+    if not expected:
+        return True  # No auth required
+    if not token:
+        return False
+    return hmac.compare_digest(token, expected)
+
+
+def cleanup_expired_images(retention_days: int) -> int:
+    """Delete images in IMAGE_STORE_DIR older than retention_days."""
+    if retention_days <= 0:
+        return 0
+
+    now = time.time()
+    retention_seconds = retention_days * 24 * 60 * 60
+    cutoff = now - retention_seconds
+
+    count = 0
+    for file_path in IMAGE_STORE_DIR.iterdir():
+        if not file_path.is_file():
+            continue
+        try:
+            if file_path.stat().st_mtime < cutoff:
+                file_path.unlink()
+                count += 1
+        except Exception as e:
+            logger.warning(f"Failed to delete expired image {file_path}: {e}")
+
+    if count > 0:
+        logger.info(f"Cleaned up {count} expired images.")
+    return count
+
 
 def global_exception_handler(request: Request, exc: Exception):
     if isinstance(exc, HTTPException):

From 28441765f3fa47787027620cdc4a6d9e7ddbdd94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:10:29 +0700
Subject: [PATCH 17/54] feat: Add token-based verification for image access

---
 app/server/chat.py   | 4 ++--
 app/server/images.py | 9 ++++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index db92dbc..9371137 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -44,7 +44,7 @@
 from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT
 from ..utils import g_config
 from ..utils.helper import estimate_tokens
-from .middleware import get_image_store_dir, get_temp_dir, verify_api_key
+from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
@@ -1006,7 +1006,7 @@ async def _build_payload(
         img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
 
         # Use static URL for compatibility
-        image_url = f"{request.base_url}images/{filename}"
+        image_url = f"{request.base_url}images/{filename}?token={get_image_token(filename)}"
 
         image_call_items.append(
             ResponseImageGenerationCall(
diff --git a/app/server/images.py b/app/server/images.py
index 2867239..fe078f7 100644
--- a/app/server/images.py
+++ b/app/server/images.py
@@ -1,13 +1,16 @@
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, HTTPException, Query
 from fastapi.responses import FileResponse
 
-from ..server.middleware import get_image_store_dir
+from ..server.middleware import get_image_store_dir, verify_image_token
 
 router = APIRouter()
 
 
 @router.get("/images/{filename}", tags=["Images"])
-async def get_image(filename: str):
+async def get_image(filename: str, token: str | None = Query(default=None)):
+    if not verify_image_token(filename, token):
+        raise HTTPException(status_code=403, detail="Invalid token")
+
     image_store = get_image_store_dir()
     file_path = image_store / filename
     if not file_path.exists():

From 4509c14dfd5a38dfa6b989b3e9ac308e3bc8c982 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:22:07 +0700
Subject: [PATCH 18/54] Refactor: rename image store directory to
 `ai_generated_images` for clarity

---
 app/server/middleware.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/middleware.py b/app/server/middleware.py
index 60e4c8d..630e1f5 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -13,7 +13,7 @@
 from ..utils import g_config
 
 # Persistent directory for storing generated images
-IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "gemini_fastapi_images"
+IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "ai_generated_images"
 IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
 
 

From 75e2f61d3a6b1d12269af2ee82344ab643f34e83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:31:15 +0700
Subject: [PATCH 19/54] fix: Update create_response to use FastAPI Request
 object for base_url and refactor variable handling

---
 app/server/chat.py | 44 +++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 9371137..0010f4a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -9,7 +9,7 @@
 from typing import Any, Iterator
 
 import orjson
-from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import StreamingResponse
 from gemini_webapi.client import ChatSession
 from gemini_webapi.constants import Model
@@ -773,19 +773,15 @@ async def create_chat_completion(
 
 @router.post("/v1/responses")
 async def create_response(
-    request: ResponseCreateRequest,
+    request_data: ResponseCreateRequest,
+    request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
     image_store: Path = Depends(get_image_store_dir),
 ):
-    base_messages, normalized_input = _response_items_to_messages(request.input)
-    if not base_messages:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST, detail="No message input provided."
-        )
-
-    structured_requirement = _build_structured_requirement(request.response_format)
-    if structured_requirement and request.stream:
+    base_messages, normalized_input = _response_items_to_messages(request_data.input)
+    structured_requirement = _build_structured_requirement(request_data.response_format)
+    if structured_requirement and request_data.stream:
         logger.debug(
             "Structured response requested with streaming enabled; streaming not supported for Responses."
         )
@@ -801,7 +797,7 @@ async def create_response(
     standard_tools: list[Tool] = []
     image_tools: list[ResponseImageTool] = []
 
-    if request.tools:
+    if request_data.tools:
         for t in request.tools:
             if isinstance(t, Tool):
                 standard_tools.append(t)
@@ -817,13 +813,15 @@ async def create_response(
 
     image_instruction = _build_image_generation_instruction(
         image_tools,
-        request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None,
+        request_data.tool_choice
+        if isinstance(request_data.tool_choice, ResponseToolChoice)
+        else None,
     )
     if image_instruction:
         extra_instructions.append(image_instruction)
         logger.debug("Image generation support enabled for /v1/responses request.")
 
-    preface_messages = _instructions_to_messages(request.instructions)
+    preface_messages = _instructions_to_messages(request_data.instructions)
     conversation_messages = base_messages
     if preface_messages:
         conversation_messages = [*preface_messages, *base_messages]
@@ -834,10 +832,10 @@ async def create_response(
     # Pass standard tools to the prompt builder
     # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction)
     model_tool_choice = None
-    if isinstance(request.tool_choice, str):
-        model_tool_choice = request.tool_choice
-    elif isinstance(request.tool_choice, ToolChoiceFunction):
-        model_tool_choice = request.tool_choice
+    if isinstance(request_data.tool_choice, str):
+        model_tool_choice = request_data.tool_choice
+    elif isinstance(request_data.tool_choice, ToolChoiceFunction):
+        model_tool_choice = request_data.tool_choice
     # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice.
 
     messages = _prepare_messages_for_model(
@@ -851,7 +849,7 @@ async def create_response(
     db = LMDBConversationStore()
 
     try:
-        model = Model.from_name(request.model)
+        model = Model.from_name(request_data.model)
     except ValueError as exc:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
@@ -971,7 +969,7 @@ async def _build_payload(
         )
 
     expects_image = (
-        request.tool_choice is not None and request.tool_choice.type == "image_generation"
+        request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation"
     )
     images = model_output.images or []
     logger.debug(
@@ -1065,7 +1063,7 @@ async def _build_payload(
     response_payload = ResponseCreateResponse(
         id=response_id,
         created_at=created_time,
-        model=request.model,
+        model=request_data.model,
         output=[
             ResponseOutputMessage(
                 id=message_id,
@@ -1079,9 +1077,9 @@ async def _build_payload(
         status="completed",
         usage=usage,
         input=normalized_input or None,
-        metadata=request.metadata or None,
-        tools=request.tools,
-        tool_choice=request.tool_choice,
+        metadata=request_data.metadata or None,
+        tools=request_data.tools,
+        tool_choice=request_data.tool_choice,
     )
 
     try:

From bde6d0d146fc9088df947cfc0958dc88963e93ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:35:44 +0700
Subject: [PATCH 20/54] fix: Correct attribute access in request_data handling
 within `chat.py` for tools, tool_choice, and streaming settings

---
 app/server/chat.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0010f4a..9a3f19f 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -798,7 +798,7 @@ async def create_response(
     image_tools: list[ResponseImageTool] = []
 
     if request_data.tools:
-        for t in request.tools:
+        for t in request_data.tools:
             if isinstance(t, Tool):
                 standard_tools.append(t)
             elif isinstance(t, ResponseImageTool):
@@ -984,7 +984,7 @@ async def _build_payload(
                 summary = f"{summary[:197]}..."
         logger.warning(
             "Image generation requested but Gemini produced no images. "
-            f"client_id={client_id}, forced_tool_choice={request.tool_choice is not None}, "
+            f"client_id={client_id}, forced_tool_choice={request_data.tool_choice is not None}, "
             f"instruction_applied={bool(image_instruction)}, assistant_preview='{summary}'"
         )
         detail = "LLM returned no images for the requested image_generation tool."
@@ -1100,7 +1100,7 @@ async def _build_payload(
     except Exception as exc:
         logger.warning(f"Failed to save Responses conversation to LMDB: {exc}")
 
-    if request.stream:
+    if request_data.stream:
         logger.debug(
             f"Streaming Responses API payload (response_id={response_payload.id}, text_chunks={bool(assistant_text)})."
         )

From 601451a8dbf8cf689a482fd75cda399b5e815cd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:45:49 +0700
Subject: [PATCH 21/54] fix: Save generated images to persistent storage

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 9a3f19f..4246c53 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -996,7 +996,7 @@ async def _build_payload(
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
-            image_base64, width, height, filename = await _image_to_base64(image, tmp_dir)
+            image_base64, width, height, filename = await _image_to_base64(image, image_store)
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue

From 893eb6d47305f60c4b13896bfc48beb89909dd88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:51:42 +0700
Subject: [PATCH 22/54] fix: Remove unused `output_image` type from
 `ResponseOutputContent` and update response handling for consistency

---
 app/models/models.py | 3 +--
 app/server/chat.py   | 5 +----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 2c987b8..c27e024 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -233,9 +233,8 @@ class ResponseUsage(BaseModel):
 class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
-    type: Literal["output_text", "output_image"]
+    type: Literal["output_text"]
     text: Optional[str] = ""
-    image_url: Optional[str] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
diff --git a/app/server/chat.py b/app/server/chat.py
index 4246c53..3396df0 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1015,13 +1015,10 @@ async def _build_payload(
                 size=f"{width}x{height}" if width and height else None,
             )
         )
-        # Add as output_image content for compatibility
+        # Add as output_text content for compatibility
         response_contents.append(
             ResponseOutputContent(type="output_text", text=image_url, annotations=[])
         )
-        response_contents.append(
-            ResponseOutputContent(type="output_image", text="", image_url=image_url, annotations=[])
-        )
 
     tool_call_items: list[ResponseToolCall] = []
     if detected_tool_calls:

From 80462b586a110cad7e5b5cc259424e405ecbafc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 13:24:14 +0700
Subject: [PATCH 23/54] fix: Update image URL generation in chat response to
 use Markdown format for compatibility

---
 app/server/chat.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 3396df0..c2a60ab 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1004,7 +1004,9 @@ async def _build_payload(
         img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
 
         # Use static URL for compatibility
-        image_url = f"{request.base_url}images/{filename}?token={get_image_token(filename)}"
+        image_url = (
+            f"![{filename}]({request.base_url}images/{filename}?token={get_image_token(filename)})"
+        )
 
         image_call_items.append(
             ResponseImageGenerationCall(

From 8d49a72e0b5c605e2439d6dcbf149925cb670ded Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 8 Dec 2025 09:45:58 +0700
Subject: [PATCH 24/54] fix: Enhance error handling for full-size image saving
 and add fallback to default size

---
 app/server/chat.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index c2a60ab..d14e9ce 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1557,7 +1557,11 @@ def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
 async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
     """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
     if isinstance(image, GeneratedImage):
-        saved_path = await image.save(path=str(temp_dir), full_size=True)
+        try:
+            saved_path = await image.save(path=str(temp_dir), full_size=True)
+        except Exception as e:
+            logger.warning(f"Failed to download full-size image, retrying with default size: {e}")
+            saved_path = await image.save(path=str(temp_dir), full_size=False)
     else:
         saved_path = await image.save(path=str(temp_dir))
 

From d37eae0ab8c4590b3301dc8853ef22a512ab0d98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 9 Dec 2025 20:46:03 +0700
Subject: [PATCH 25/54] fix: Use filename as image ID to ensure consistency in
 generated image handling

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index d14e9ce..fc69293 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1010,7 +1010,7 @@ async def _build_payload(
 
         image_call_items.append(
             ResponseImageGenerationCall(
-                id=f"img_{uuid.uuid4().hex}",
+                id=filename.split(".")[0],
                 status="completed",
                 result=image_base64,
                 output_format=img_format,

From b9f776dfbb9d251ee016e05a1f6001907c3f8b84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 16 Dec 2025 19:50:07 +0700
Subject: [PATCH 26/54] fix: Enhance tempfile saving by adding custom headers,
 content-type handling, and improved extension determination

---
 app/utils/helper.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 3bff469..89fc31e 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -2,12 +2,17 @@
 import mimetypes
 import tempfile
 from pathlib import Path
+from urllib.parse import urlparse
 
 import httpx
 from loguru import logger
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
+}
+
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
     """Surround content with role tags"""
@@ -36,7 +41,7 @@ async def save_file_to_tempfile(
     return path
 
 
-async def save_url_to_tempfile(url: str, tempdir: Path | None = None):
+async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
     data: bytes | None = None
     suffix: str | None = None
     if url.startswith("data:image/"):
@@ -47,17 +52,26 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None):
         base64_data = url.split(",")[1]
         data = base64.b64decode(base64_data)
 
-        # Guess extension from mime type, default to the subtype if not found
         suffix = mimetypes.guess_extension(mime_type)
         if not suffix:
             suffix = f".{mime_type.split('/')[1]}"
     else:
-        # http files
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content
-            suffix = Path(url).suffix or ".bin"
+            content_type = resp.headers.get("content-type")
+
+            if content_type:
+                mime_type = content_type.split(";")[0].strip()
+                suffix = mimetypes.guess_extension(mime_type)
+
+            if not suffix:
+                path_url = urlparse(url).path
+                suffix = Path(path_url).suffix
+
+            if not suffix:
+                suffix = ".bin"
 
     with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp:
         tmp.write(data)

From 4b5fe078250ce0496ca93b1861f9622fc5171746 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 30 Dec 2025 22:39:05 +0700
Subject: [PATCH 27/54] feat: Add support for custom Gemini models and model
 loading strategies

- Introduced `model_strategy` configuration for "append" (default + custom models) or "overwrite" (custom models only).
- Enhanced `/v1/models` endpoint to return models based on the configured strategy.
- Improved model loading with environment variable overrides and validation.
- Refactored model handling logic for improved modularity and error handling.
---
 app/server/chat.py  | 70 ++++++++++++++++++++++++++++++++++--------
 app/utils/config.py | 75 ++++++++++++++++++++++++++++++++++++++++++++-
 config/config.yaml  |  5 +++
 3 files changed, 136 insertions(+), 14 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index fc69293..0a4c16c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -562,24 +562,64 @@ def _replace(match: re.Match[str]) -> str:
     return cleaned, tool_calls
 
 
-@router.get("/v1/models", response_model=ModelListResponse)
-async def list_models(api_key: str = Depends(verify_api_key)):
-    now = int(datetime.now(tz=timezone.utc).timestamp())
+def _get_model_by_name(name: str) -> Model:
+    """
+    Retrieve a Model instance by name, considering custom models from config
+    and the update strategy (append or overwrite).
+    """
+    strategy = g_config.gemini.model_strategy
+    custom_models = {m.model_name: m for m in g_config.gemini.models if m.model_name}
 
-    models = []
-    for model in Model:
-        m_name = model.model_name
-        if not m_name or m_name == "unspecified":
-            continue
+    if name in custom_models:
+        return Model.from_dict(custom_models[name].model_dump())
+
+    if strategy == "overwrite":
+        raise ValueError(f"Model '{name}' not found in custom models (strategy='overwrite').")
 
-        models.append(
+    return Model.from_name(name)
+
+
+def _get_available_models() -> list[ModelData]:
+    """
+    Return a list of available models based on configuration strategy.
+    """
+    now = int(datetime.now(tz=timezone.utc).timestamp())
+    strategy = g_config.gemini.model_strategy
+    models_data = []
+
+    custom_models = [m for m in g_config.gemini.models if m.model_name]
+    for m in custom_models:
+        models_data.append(
             ModelData(
-                id=m_name,
+                id=m.model_name,
                 created=now,
-                owned_by="gemini-web",
+                owned_by="custom",
             )
         )
 
+    if strategy == "append":
+        custom_ids = {m.model_name for m in custom_models}
+        for model in Model:
+            m_name = model.model_name
+            if not m_name or m_name == "unspecified":
+                continue
+            if m_name in custom_ids:
+                continue
+
+            models_data.append(
+                ModelData(
+                    id=m_name,
+                    created=now,
+                    owned_by="gemini-web",
+                )
+            )
+
+    return models_data
+
+
+@router.get("/v1/models", response_model=ModelListResponse)
+async def list_models(api_key: str = Depends(verify_api_key)):
+    models = _get_available_models()
     return ModelListResponse(data=models)
 
 
@@ -592,7 +632,11 @@ async def create_chat_completion(
 ):
     pool = GeminiClientPool()
     db = LMDBConversationStore()
-    model = Model.from_name(request.model)
+
+    try:
+        model = _get_model_by_name(request.model)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
     if len(request.messages) == 0:
         raise HTTPException(
@@ -849,7 +893,7 @@ async def create_response(
     db = LMDBConversationStore()
 
     try:
-        model = Model.from_name(request_data.model)
+        model = _get_model_by_name(request_data.model)
     except ValueError as exc:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
diff --git a/app/utils/config.py b/app/utils/config.py
index 796ca75..a5c924a 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -50,12 +50,26 @@ def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]:
         return stripped or None
 
 
+class GeminiModelConfig(BaseModel):
+    """Configuration for a custom Gemini model."""
+
+    model_name: Optional[str] = Field(default=None, description="Name of the model")
+    model_header: Optional[dict[str, Optional[str]]] = Field(
+        default=None, description="Header for the model"
+    )
+
+
 class GeminiConfig(BaseModel):
     """Gemini API configuration"""
 
     clients: list[GeminiClientSettings] = Field(
         ..., description="List of Gemini client credential pairs"
     )
+    models: list[GeminiModelConfig] = Field(default=[], description="List of custom Gemini models")
+    model_strategy: Literal["append", "overwrite"] = Field(
+        default="append",
+        description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
+    )
     timeout: int = Field(default=120, ge=1, description="Init timeout")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
@@ -68,6 +82,13 @@ class GeminiConfig(BaseModel):
         description="Maximum characters Gemini Web can accept per request",
     )
 
+    @field_validator("models")
+    @classmethod
+    def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]:
+        """Filter out models that don't have a name set (placeholders)."""
+
+        return [model for model in v if model.model_name]
+
 
 class CORSConfig(BaseModel):
     """CORS configuration"""
@@ -211,6 +232,53 @@ def _merge_clients_with_env(
     return result_clients if result_clients else base_clients
 
 
+def extract_gemini_models_env() -> dict[int, dict[str, str]]:
+    """Extract and remove all Gemini models related environment variables, return a mapping from index to field dict."""
+    prefix = "CONFIG_GEMINI__MODELS__"
+    env_overrides: dict[int, dict[str, str]] = {}
+    to_delete = []
+    for k, v in os.environ.items():
+        if k.startswith(prefix):
+            parts = k.split("__")
+            if len(parts) < 4:
+                continue
+            index_str, field = parts[2], parts[3].lower()
+            if not index_str.isdigit():
+                continue
+            idx = int(index_str)
+            env_overrides.setdefault(idx, {})[field] = v
+            to_delete.append(k)
+    # Remove these environment variables to avoid Pydantic parsing errors
+    for k in to_delete:
+        del os.environ[k]
+    return env_overrides
+
+
+def _merge_models_with_env(
+    base_models: list[GeminiModelConfig] | None,
+    env_overrides: dict[int, dict[str, str]],
+):
+    """Override base_models with env_overrides, return the new models list."""
+    if not env_overrides:
+        return base_models or []
+    result_models: list[GeminiModelConfig] = []
+    if base_models:
+        result_models = [model.model_copy() for model in base_models]
+
+    for idx in sorted(env_overrides):
+        overrides = env_overrides[idx]
+        if idx < len(result_models):
+            model_dict = result_models[idx].model_dump()
+            model_dict.update(overrides)
+            result_models[idx] = GeminiModelConfig(**model_dict)
+        elif idx == len(result_models):
+            new_model = GeminiModelConfig(**overrides)
+            result_models.append(new_model)
+        else:
+            raise IndexError(f"Model index {idx} in env is out of range (must be contiguous).")
+    return result_models
+
+
 def initialize_config() -> Config:
     """
     Initialize the configuration.
@@ -221,6 +289,8 @@ def initialize_config() -> Config:
     try:
         # First, extract and remove Gemini clients related environment variables
         env_clients_overrides = extract_gemini_clients_env()
+        # Extract and remove Gemini models related environment variables
+        env_models_overrides = extract_gemini_models_env()
 
         # Then, initialize Config with pydantic_settings
         config = Config()  # type: ignore
@@ -228,7 +298,10 @@ def initialize_config() -> Config:
         # Synthesize clients
         config.gemini.clients = _merge_clients_with_env(
             config.gemini.clients, env_clients_overrides
-        )  # type: ignore
+        )
+
+        # Synthesize models
+        config.gemini.models = _merge_models_with_env(config.gemini.models, env_models_overrides)
 
         return config
     except ValidationError as e:
diff --git a/config/config.yaml b/config/config.yaml
index 89c88b7..84c4602 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -27,6 +27,11 @@ gemini:
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
+  models:
+    - model_name: null
+      model_header:
+        x-goog-ext-xxxxxxxxx-jspb: null
+  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
 
 storage:
   path: "data/lmdb"        # Database storage path

From 5cb29e8ea7333fd3c207f60a75b5269105bae8b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 30 Dec 2025 23:19:49 +0700
Subject: [PATCH 28/54] feat: Improve Gemini model environment variable parsing
 and nested field support

- Enhanced `extract_gemini_models_env` to handle nested fields within environment variables.
- Updated type hints for more flexibility in model overrides.
- Improved `_merge_models_with_env` to better support field-level updates and appending new models.
---
 app/utils/config.py | 31 +++++++++++++++++++++++--------
 config/config.yaml  |  2 +-
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index a5c924a..5782c66 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,6 +1,6 @@
 import os
 import sys
-from typing import Literal, Optional
+from typing import Any, Literal, Optional
 
 from loguru import logger
 from pydantic import BaseModel, Field, ValidationError, field_validator
@@ -232,21 +232,34 @@ def _merge_clients_with_env(
     return result_clients if result_clients else base_clients
 
 
-def extract_gemini_models_env() -> dict[int, dict[str, str]]:
-    """Extract and remove all Gemini models related environment variables, return a mapping from index to field dict."""
+def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
+    """Extract and remove all Gemini models related environment variables, supporting nested fields."""
     prefix = "CONFIG_GEMINI__MODELS__"
-    env_overrides: dict[int, dict[str, str]] = {}
+    env_overrides: dict[int, dict[str, Any]] = {}
     to_delete = []
     for k, v in os.environ.items():
         if k.startswith(prefix):
             parts = k.split("__")
             if len(parts) < 4:
                 continue
-            index_str, field = parts[2], parts[3].lower()
+            index_str = parts[2]
             if not index_str.isdigit():
                 continue
             idx = int(index_str)
-            env_overrides.setdefault(idx, {})[field] = v
+
+            # Navigate to the correct nested dict
+            current = env_overrides.setdefault(idx, {})
+            for i in range(3, len(parts) - 1):
+                field_name = parts[i].lower()
+                current = current.setdefault(field_name, {})
+
+            # Set the value (lowercase root field names, preserve sub-key casing)
+            last_part = parts[-1]
+            if len(parts) == 4:
+                current[last_part.lower()] = v
+            else:
+                current[last_part] = v
+
             to_delete.append(k)
     # Remove these environment variables to avoid Pydantic parsing errors
     for k in to_delete:
@@ -256,9 +269,9 @@ def extract_gemini_models_env() -> dict[int, dict[str, str]]:
 
 def _merge_models_with_env(
     base_models: list[GeminiModelConfig] | None,
-    env_overrides: dict[int, dict[str, str]],
+    env_overrides: dict[int, dict[str, Any]],
 ):
-    """Override base_models with env_overrides, return the new models list."""
+    """Override base_models with env_overrides using standard update (replace whole fields)."""
     if not env_overrides:
         return base_models or []
     result_models: list[GeminiModelConfig] = []
@@ -268,10 +281,12 @@ def _merge_models_with_env(
     for idx in sorted(env_overrides):
         overrides = env_overrides[idx]
         if idx < len(result_models):
+            # Update existing model: overwrite fields found in env
             model_dict = result_models[idx].model_dump()
             model_dict.update(overrides)
             result_models[idx] = GeminiModelConfig(**model_dict)
         elif idx == len(result_models):
+            # Append new model
             new_model = GeminiModelConfig(**overrides)
             result_models.append(new_model)
         else:
diff --git a/config/config.yaml b/config/config.yaml
index 84c4602..2fbc061 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -27,11 +27,11 @@ gemini:
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
+  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
   models:
     - model_name: null
       model_header:
         x-goog-ext-xxxxxxxxx-jspb: null
-  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
 
 storage:
   path: "data/lmdb"        # Database storage path

From f25f16d00118ebeea7936cea34797270d5137b5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 09:52:49 +0700
Subject: [PATCH 29/54] refactor: Consolidate utility functions and clean up
 unused code

- Moved utility functions like `strip_code_fence`, `extract_tool_calls`, and `iter_stream_segments` to a centralized helper module.
- Removed unused and redundant private methods from `chat.py`, including `_strip_code_fence`, `_strip_tagged_blocks`, and `_strip_system_hints`.
- Updated imports and references across modules for consistency.
- Simplified tool call and streaming logic by replacing inline implementations with shared helper functions.
---
 app/server/chat.py     | 306 ++++------------------------------------
 app/services/client.py |  16 +--
 app/utils/helper.py    | 312 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 342 insertions(+), 292 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0a4c16c..9485f7a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,12 +1,11 @@
 import base64
 import json
 import re
-import struct
 import uuid
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Iterator
+from typing import Any
 
 import orjson
 from fastapi import APIRouter, Depends, HTTPException, Request, status
@@ -21,7 +20,6 @@
     ChatCompletionRequest,
     ContentItem,
     ConversationInStore,
-    FunctionCall,
     Message,
     ModelData,
     ModelListResponse,
@@ -37,26 +35,28 @@
     ResponseToolChoice,
     ResponseUsage,
     Tool,
-    ToolCall,
     ToolChoiceFunction,
 )
 from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
-from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT
 from ..utils import g_config
-from ..utils.helper import estimate_tokens
+from ..utils.helper import (
+    CODE_BLOCK_HINT,
+    CODE_HINT_STRIPPED,
+    XML_HINT_STRIPPED,
+    XML_WRAP_HINT,
+    estimate_tokens,
+    extract_image_dimensions,
+    extract_tool_calls,
+    iter_stream_segments,
+    remove_tool_call_blocks,
+    strip_code_fence,
+    text_from_message,
+)
 from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
-TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)```", re.DOTALL | re.IGNORECASE)
-TOOL_CALL_RE = re.compile(
-    r"<tool_call\s+name=\"([^\"]+)\">(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
-)
-JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
-CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
-XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
-CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip()
 
 router = APIRouter()
 
@@ -118,14 +118,6 @@ def _build_structured_requirement(
     )
 
 
-def _strip_code_fence(text: str) -> str:
-    """Remove surrounding ```json fences if present."""
-    match = JSON_FENCE_RE.match(text.strip())
-    if match:
-        return match.group(1).strip()
-    return text.strip()
-
-
 def _build_tool_prompt(
     tools: list[Tool],
     tool_choice: str | ToolChoiceFunction | None,
@@ -312,75 +304,6 @@ def _prepare_messages_for_model(
     return prepared
 
 
-def _strip_system_hints(text: str) -> str:
-    """Remove system-level hint text from a given string."""
-    if not text:
-        return text
-    cleaned = _strip_tagged_blocks(text)
-    cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
-    cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "")
-    cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
-    return cleaned.strip()
-
-
-def _strip_tagged_blocks(text: str) -> str:
-    """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely.
-    - tool blocks are removed entirely (if missing end marker, drop to EOF).
-    - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF).
-    """
-    if not text:
-        return text
-
-    result: list[str] = []
-    idx = 0
-    length = len(text)
-    start_marker = "<|im_start|>"
-    end_marker = "<|im_end|>"
-
-    while idx < length:
-        start = text.find(start_marker, idx)
-        if start == -1:
-            result.append(text[idx:])
-            break
-
-        # append any content before this block
-        result.append(text[idx:start])
-
-        role_start = start + len(start_marker)
-        newline = text.find("\n", role_start)
-        if newline == -1:
-            # malformed block; keep remainder as-is (safe behavior)
-            result.append(text[start:])
-            break
-
-        role = text[role_start:newline].strip().lower()
-
-        end = text.find(end_marker, newline + 1)
-        if end == -1:
-            # missing end marker
-            if role == "tool":
-                # drop from start marker to EOF (skip remainder)
-                break
-            else:
-                # keep inner content from after the role newline to EOF
-                result.append(text[newline + 1 :])
-                break
-
-        block_end = end + len(end_marker)
-
-        if role == "tool":
-            # drop whole block
-            idx = block_end
-            continue
-
-        # keep the content without role markers
-        content = text[newline + 1 : end]
-        result.append(content)
-        idx = block_end
-
-    return "".join(result)
-
-
 def _response_items_to_messages(
     items: str | list[ResponseInputItem],
 ) -> tuple[list[Message], str | list[ResponseInputItem]]:
@@ -509,59 +432,6 @@ def _instructions_to_messages(
     return instruction_messages
 
 
-def _remove_tool_call_blocks(text: str) -> str:
-    """Strip tool call code blocks from text."""
-    if not text:
-        return text
-    cleaned = TOOL_BLOCK_RE.sub("", text)
-    return _strip_system_hints(cleaned)
-
-
-def _extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
-    """Extract tool call definitions and return cleaned text."""
-    if not text:
-        return text, []
-
-    tool_calls: list[ToolCall] = []
-
-    def _replace(match: re.Match[str]) -> str:
-        block_content = match.group(1)
-        if not block_content:
-            return ""
-
-        for call_match in TOOL_CALL_RE.finditer(block_content):
-            name = (call_match.group(1) or "").strip()
-            raw_args = (call_match.group(2) or "").strip()
-            if not name:
-                logger.warning(
-                    f"Encountered tool_call block without a function name: {block_content}"
-                )
-                continue
-
-            arguments = raw_args
-            try:
-                parsed_args = json.loads(raw_args)
-                arguments = json.dumps(parsed_args, ensure_ascii=False)
-            except json.JSONDecodeError:
-                logger.warning(
-                    f"Failed to parse tool call arguments for '{name}'. Passing raw string."
-                )
-
-            tool_calls.append(
-                ToolCall(
-                    id=f"call_{uuid.uuid4().hex}",
-                    type="function",
-                    function=FunctionCall(name=name, arguments=arguments),
-                )
-            )
-
-        return ""
-
-    cleaned = TOOL_BLOCK_RE.sub(_replace, text)
-    cleaned = _strip_system_hints(cleaned)
-    return cleaned, tool_calls
-
-
 def _get_model_by_name(name: str) -> Model:
     """
     Retrieve a Model instance by name, considering custom models from config
@@ -742,12 +612,12 @@ async def create_chat_completion(
             detail="Gemini output parsing failed unexpectedly.",
         ) from exc
 
-    visible_output, tool_calls = _extract_tool_calls(raw_output_with_think)
-    storage_output = _remove_tool_call_blocks(raw_output_clean).strip()
+    visible_output, tool_calls = extract_tool_calls(raw_output_with_think)
+    storage_output = remove_tool_call_blocks(raw_output_clean).strip()
     tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
 
     if structured_requirement:
-        cleaned_visible = _strip_code_fence(visible_output or "")
+        cleaned_visible = strip_code_fence(visible_output or "")
         if not cleaned_visible:
             raise HTTPException(
                 status_code=status.HTTP_502_BAD_GATEWAY,
@@ -982,12 +852,12 @@ async def _build_payload(
             detail="Gemini output parsing failed unexpectedly.",
         ) from exc
 
-    visible_text, detected_tool_calls = _extract_tool_calls(text_with_think)
-    storage_output = _remove_tool_call_blocks(text_without_think).strip()
+    visible_text, detected_tool_calls = extract_tool_calls(text_with_think)
+    storage_output = remove_tool_call_blocks(text_without_think).strip()
     assistant_text = LMDBConversationStore.remove_think_tags(visible_text.strip())
 
     if structured_requirement:
-        cleaned_visible = _strip_code_fence(assistant_text or "")
+        cleaned_visible = strip_code_fence(assistant_text or "")
         if not cleaned_visible:
             raise HTTPException(
                 status_code=status.HTTP_502_BAD_GATEWAY,
@@ -1089,7 +959,7 @@ async def _build_payload(
     response_id = f"resp_{uuid.uuid4().hex}"
     message_id = f"msg_{uuid.uuid4().hex}"
 
-    input_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages)
+    input_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
     tool_arg_text = "".join(call.function.arguments or "" for call in detected_tool_calls)
     completion_basis = assistant_text or ""
     if tool_arg_text:
@@ -1152,25 +1022,6 @@ async def _build_payload(
     return response_payload
 
 
-def _text_from_message(message: Message) -> str:
-    """Return text content from a message for token estimation."""
-    base_text = ""
-    if isinstance(message.content, str):
-        base_text = message.content
-    elif isinstance(message.content, list):
-        base_text = "\n".join(
-            item.text or "" for item in message.content if getattr(item, "type", "") == "text"
-        )
-    elif message.content is None:
-        base_text = ""
-
-    if message.tool_calls:
-        tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls)
-        base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text
-
-    return base_text
-
-
 async def _find_reusable_session(
     db: LMDBConversationStore,
     pool: GeminiClientPool,
@@ -1268,47 +1119,6 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
         raise
 
 
-def _iter_stream_segments(model_output: str, chunk_size: int = 64):
-    """Yield stream segments while keeping <think> markers and words intact."""
-    if not model_output:
-        return
-
-    token_pattern = re.compile(r"\s+|\S+\s*")
-    pending = ""
-
-    def _flush_pending() -> Iterator[str]:
-        nonlocal pending
-        if pending:
-            yield pending
-            pending = ""
-
-    # Split on <think> boundaries so the markers are never fragmented.
-    parts = re.split(r"(</?think>)", model_output)
-    for part in parts:
-        if not part:
-            continue
-        if part in {"<think>", "</think>"}:
-            yield from _flush_pending()
-            yield part
-            continue
-
-        for match in token_pattern.finditer(part):
-            token = match.group(0)
-
-            if len(token) > chunk_size:
-                yield from _flush_pending()
-                for idx in range(0, len(token), chunk_size):
-                    yield token[idx : idx + chunk_size]
-                continue
-
-            if pending and len(pending) + len(token) > chunk_size:
-                yield from _flush_pending()
-
-            pending += token
-
-    yield from _flush_pending()
-
-
 def _create_streaming_response(
     model_output: str,
     tool_calls: list[dict],
@@ -1320,7 +1130,7 @@ def _create_streaming_response(
     """Create streaming response with `usage` calculation included in the final chunk."""
 
     # Calculate token usage
-    prompt_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages)
+    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
     tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or [])
     completion_tokens = estimate_tokens(model_output + tool_args)
     total_tokens = prompt_tokens + completion_tokens
@@ -1338,7 +1148,7 @@ async def generate_stream():
         yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
         # Stream output text in chunks for efficiency
-        for chunk in _iter_stream_segments(model_output):
+        for chunk in iter_stream_segments(model_output):
             data = {
                 "id": completion_id,
                 "object": "chat.completion.chunk",
@@ -1452,7 +1262,7 @@ async def generate_stream():
                         content_text += c.text
 
                 if content_text:
-                    for chunk in _iter_stream_segments(content_text):
+                    for chunk in iter_stream_segments(content_text):
                         delta_event = {
                             **base_event,
                             "type": "response.output_text.delta",
@@ -1501,7 +1311,7 @@ def _create_standard_response(
 ) -> dict:
     """Create standard response"""
     # Calculate token usage
-    prompt_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages)
+    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
     tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or [])
     completion_tokens = estimate_tokens(model_output + tool_args)
     total_tokens = prompt_tokens + completion_tokens
@@ -1534,70 +1344,6 @@ def _create_standard_response(
     return result
 
 
-def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
-    """Return image dimensions (width, height) if PNG or JPEG headers are present."""
-    # PNG: dimensions stored in bytes 16..24 of the IHDR chunk
-    if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"):
-        try:
-            width, height = struct.unpack(">II", data[16:24])
-            return int(width), int(height)
-        except struct.error:
-            return None, None
-
-    # JPEG: dimensions stored in SOF segment; iterate through markers to locate it
-    if len(data) >= 4 and data[0:2] == b"\xff\xd8":
-        idx = 2
-        length = len(data)
-        sof_markers = {
-            0xC0,
-            0xC1,
-            0xC2,
-            0xC3,
-            0xC5,
-            0xC6,
-            0xC7,
-            0xC9,
-            0xCA,
-            0xCB,
-            0xCD,
-            0xCE,
-            0xCF,
-        }
-        while idx < length:
-            # Find marker alignment (markers are prefixed with 0xFF bytes)
-            if data[idx] != 0xFF:
-                idx += 1
-                continue
-            while idx < length and data[idx] == 0xFF:
-                idx += 1
-            if idx >= length:
-                break
-            marker = data[idx]
-            idx += 1
-
-            if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7:
-                continue
-
-            if idx + 1 >= length:
-                break
-            segment_length = (data[idx] << 8) + data[idx + 1]
-            idx += 2
-            if segment_length < 2:
-                break
-
-            if marker in sof_markers:
-                if idx + 4 < length:
-                    # Skip precision byte at idx, then read height/width (big-endian)
-                    height = (data[idx + 1] << 8) + data[idx + 2]
-                    width = (data[idx + 3] << 8) + data[idx + 4]
-                    return int(width), int(height)
-                break
-
-            idx += segment_length - 2
-
-    return None, None
-
-
 async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
     """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
     if isinstance(image, GeneratedImage):
@@ -1619,6 +1365,6 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
     original_path.rename(new_path)
 
     data = new_path.read_bytes()
-    width, height = _extract_image_dimensions(data)
+    width, height = extract_image_dimensions(data)
     filename = random_name
     return base64.b64encode(data).decode("ascii"), width, height, filename
diff --git a/app/services/client.py b/app/services/client.py
index 09c52c1..87c0ca7 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -9,18 +9,12 @@
 
 from ..models import Message
 from ..utils import g_config
-from ..utils.helper import add_tag, save_file_to_tempfile, save_url_to_tempfile
-
-XML_WRAP_HINT = (
-    "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
-    '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
-    "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
-)
-CODE_BLOCK_HINT = (
-    "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
-    "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
-    "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
+from ..utils.helper import (
+    add_tag,
+    save_file_to_tempfile,
+    save_url_to_tempfile,
 )
+
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
 MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 89fc31e..2627faa 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,17 +1,41 @@
 import base64
+import json
 import mimetypes
+import re
+import struct
 import tempfile
+import uuid
 from pathlib import Path
+from typing import Iterator
 from urllib.parse import urlparse
 
 import httpx
 from loguru import logger
 
-VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
+from ..models import FunctionCall, Message, ToolCall
 
 HEADERS = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
 }
+VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
+XML_WRAP_HINT = (
+    "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
+    '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
+    "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
+)
+CODE_BLOCK_HINT = (
+    "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
+    "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
+    "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
+)
+TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
+TOOL_CALL_RE = re.compile(
+    r"<tool_call\s+name=[\"']([^\"']+)[\"']\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
+)
+JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
+XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
+CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip()
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
@@ -78,3 +102,289 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         path = Path(tmp.name)
 
     return path
+
+
+def strip_code_fence(text: str) -> str:
+    """Remove surrounding ```json fences if present."""
+    match = JSON_FENCE_RE.match(text.strip())
+    if match:
+        return match.group(1).strip()
+    return text.strip()
+
+
+def strip_tagged_blocks(text: str) -> str:
+    """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely.
+    - tool blocks are removed entirely (if missing end marker, drop to EOF).
+    - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF).
+    """
+    if not text:
+        return text
+
+    result: list[str] = []
+    idx = 0
+    length = len(text)
+    start_marker = "<|im_start|>"
+    end_marker = "<|im_end|>"
+
+    while idx < length:
+        start = text.find(start_marker, idx)
+        if start == -1:
+            result.append(text[idx:])
+            break
+
+        # append any content before this block
+        result.append(text[idx:start])
+
+        role_start = start + len(start_marker)
+        newline = text.find("\n", role_start)
+        if newline == -1:
+            # malformed block; keep remainder as-is (safe behavior)
+            result.append(text[start:])
+            break
+
+        role = text[role_start:newline].strip().lower()
+
+        end = text.find(end_marker, newline + 1)
+        if end == -1:
+            # missing end marker
+            if role == "tool":
+                # drop from start marker to EOF (skip remainder)
+                break
+            else:
+                # keep inner content from after the role newline to EOF
+                result.append(text[newline + 1 :])
+                break
+
+        block_end = end + len(end_marker)
+
+        if role == "tool":
+            # drop whole block
+            idx = block_end
+            continue
+
+        # keep the content without role markers
+        content = text[newline + 1 : end]
+        result.append(content)
+        idx = block_end
+
+    return "".join(result)
+
+
+def strip_system_hints(text: str) -> str:
+    """Remove system-level hint text from a given string."""
+    if not text:
+        return text
+    cleaned = strip_tagged_blocks(text)
+    cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
+    cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "")
+    cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
+    return cleaned.strip()
+
+
+def remove_tool_call_blocks(text: str) -> str:
+    """Strip tool call code blocks from text."""
+    if not text:
+        return text
+
+    # 1. Remove fenced blocks ONLY if they contain tool calls
+    def _replace_block(match: re.Match[str]) -> str:
+        block_content = match.group(1)
+        if not block_content:
+            return match.group(0)
+
+        # Check if the block contains any tool call tag
+        if TOOL_CALL_RE.search(block_content):
+            return ""
+
+        # Preserve the block if no tool call found
+        return match.group(0)
+
+    cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
+
+    # 2. Remove orphaned tool calls
+    cleaned = TOOL_CALL_RE.sub("", cleaned)
+
+    return strip_system_hints(cleaned)
+
+
+def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
+    """Extract tool call definitions and return cleaned text."""
+    if not text:
+        return text, []
+
+    tool_calls: list[ToolCall] = []
+
+    def _create_tool_call(name: str, raw_args: str) -> None:
+        """Helper to parse args and append to tool_calls list."""
+        if not name:
+            logger.warning("Encountered tool_call without a function name.")
+            return
+
+        arguments = raw_args
+        try:
+            parsed_args = json.loads(raw_args)
+            arguments = json.dumps(parsed_args, ensure_ascii=False)
+        except json.JSONDecodeError:
+            logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
+
+        tool_calls.append(
+            ToolCall(
+                id=f"call_{uuid.uuid4().hex}",
+                type="function",
+                function=FunctionCall(name=name, arguments=arguments),
+            )
+        )
+
+    def _replace_block(match: re.Match[str]) -> str:
+        block_content = match.group(1)
+        if not block_content:
+            return match.group(0)
+
+        found_in_block = False
+        for call_match in TOOL_CALL_RE.finditer(block_content):
+            found_in_block = True
+            name = (call_match.group(1) or "").strip()
+            raw_args = (call_match.group(2) or "").strip()
+            _create_tool_call(name, raw_args)
+
+        if found_in_block:
+            return ""
+        else:
+            return match.group(0)
+
+    cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
+
+    def _replace_orphan(match: re.Match[str]) -> str:
+        name = (match.group(1) or "").strip()
+        raw_args = (match.group(2) or "").strip()
+        _create_tool_call(name, raw_args)
+        return ""
+
+    cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
+
+    cleaned = strip_system_hints(cleaned)
+    return cleaned, tool_calls
+
+
+def iter_stream_segments(model_output: str, chunk_size: int = 64) -> Iterator[str]:
+    """Yield stream segments while keeping <think> markers and words intact."""
+    if not model_output:
+        return
+
+    token_pattern = re.compile(r"\s+|\S+\s*")
+    pending = ""
+
+    def _flush_pending() -> Iterator[str]:
+        nonlocal pending
+        if pending:
+            yield pending
+            pending = ""
+
+    # Split on <think> boundaries so the markers are never fragmented.
+    parts = re.split(r"(</?think>)", model_output)
+    for part in parts:
+        if not part:
+            continue
+        if part in {"<think>", "</think>"}:
+            yield from _flush_pending()
+            yield part
+            continue
+
+        for match in token_pattern.finditer(part):
+            token = match.group(0)
+
+            if len(token) > chunk_size:
+                yield from _flush_pending()
+                for idx in range(0, len(token), chunk_size):
+                    yield token[idx : idx + chunk_size]
+                continue
+
+            if pending and len(pending) + len(token) > chunk_size:
+                yield from _flush_pending()
+
+            pending += token
+
+    yield from _flush_pending()
+
+
+def text_from_message(message: Message) -> str:
+    """Return text content from a message for token estimation."""
+    base_text = ""
+    if isinstance(message.content, str):
+        base_text = message.content
+    elif isinstance(message.content, list):
+        base_text = "\n".join(
+            item.text or "" for item in message.content if getattr(item, "type", "") == "text"
+        )
+    elif message.content is None:
+        base_text = ""
+
+    if message.tool_calls:
+        tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls)
+        base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text
+
+    return base_text
+
+
+def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
+    """Return image dimensions (width, height) if PNG or JPEG headers are present."""
+    # PNG: dimensions stored in bytes 16..24 of the IHDR chunk
+    if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"):
+        try:
+            width, height = struct.unpack(">II", data[16:24])
+            return int(width), int(height)
+        except struct.error:
+            return None, None
+
+    # JPEG: dimensions stored in SOF segment; iterate through markers to locate it
+    if len(data) >= 4 and data[0:2] == b"\xff\xd8":
+        idx = 2
+        length = len(data)
+        sof_markers = {
+            0xC0,
+            0xC1,
+            0xC2,
+            0xC3,
+            0xC5,
+            0xC6,
+            0xC7,
+            0xC9,
+            0xCA,
+            0xCB,
+            0xCD,
+            0xCE,
+            0xCF,
+        }
+        while idx < length:
+            # Find marker alignment (markers are prefixed with 0xFF bytes)
+            if data[idx] != 0xFF:
+                idx += 1
+                continue
+            while idx < length and data[idx] == 0xFF:
+                idx += 1
+            if idx >= length:
+                break
+            marker = data[idx]
+            idx += 1
+
+            if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7:
+                continue
+
+            if idx + 1 >= length:
+                break
+            segment_length = (data[idx] << 8) + data[idx + 1]
+            idx += 2
+            if segment_length < 2:
+                break
+
+            if marker in sof_markers:
+                if idx + 4 < length:
+                    # Skip precision byte at idx, then read height/width (big-endian)
+                    height = (data[idx + 1] << 8) + data[idx + 2]
+                    width = (data[idx + 3] << 8) + data[idx + 4]
+                    return int(width), int(height)
+                break
+
+            idx += segment_length - 2
+
+    return None, None

From a1bc8e289ee797a761eb506dc4d01e486c919aef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 10:01:17 +0700
Subject: [PATCH 30/54] fix: Handle None input in `estimate_tokens` and return
 0 for empty text

---
 app/utils/helper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 2627faa..28be240 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -47,8 +47,10 @@ def add_tag(role: str, content: str, unclose: bool = False) -> str:
     return f"<|im_start|>{role}\n{content}" + ("\n<|im_end|>" if not unclose else "")
 
 
-def estimate_tokens(text: str) -> int:
+def estimate_tokens(text: str | None) -> int:
     """Estimate the number of tokens heuristically based on character count"""
+    if not text:
+        return 0
     return int(len(text) / 3)
 
 

From a7e15d96bd2a4f62094bea02be7e86c8d305e59e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 13:32:49 +0700
Subject: [PATCH 31/54] refactor: Simplify model configuration and add JSON
 parsing validators

- Replaced unused model placeholder in `config.yaml` with an empty list.
- Added JSON parsing validators for `model_header` and `models` to enhance flexibility and error handling.
- Improved validation to filter out incomplete model configurations.
---
 app/utils/config.py | 24 +++++++++++++++++++++++-
 config/config.yaml  |  5 +----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 5782c66..69a4fac 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,3 +1,4 @@
+import json
 import os
 import sys
 from typing import Any, Literal, Optional
@@ -58,6 +59,17 @@ class GeminiModelConfig(BaseModel):
         default=None, description="Header for the model"
     )
 
+    @field_validator("model_header", mode="before")
+    @classmethod
+    def _parse_json_string(cls, v: Any) -> Any:
+        if isinstance(v, str) and v.strip().startswith("{"):
+            try:
+                return json.loads(v)
+            except json.JSONDecodeError:
+                # Return the original value to let Pydantic handle the error or type mismatch
+                return v
+        return v
+
 
 class GeminiConfig(BaseModel):
     """Gemini API configuration"""
@@ -82,11 +94,21 @@ class GeminiConfig(BaseModel):
         description="Maximum characters Gemini Web can accept per request",
     )
 
+    @field_validator("models", mode="before")
+    @classmethod
+    def _parse_models_json(cls, v: Any) -> Any:
+        if isinstance(v, str) and v.strip().startswith("["):
+            try:
+                return json.loads(v)
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse models JSON string: {e}")
+                return v
+        return v
+
     @field_validator("models")
     @classmethod
     def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]:
         """Filter out models that don't have a name set (placeholders)."""
-
         return [model for model in v if model.model_name]
 
 
diff --git a/config/config.yaml b/config/config.yaml
index 2fbc061..f2b17fb 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -28,10 +28,7 @@ gemini:
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
   model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
-  models:
-    - model_name: null
-      model_header:
-        x-goog-ext-xxxxxxxxx-jspb: null
+  models: []
 
 storage:
   path: "data/lmdb"        # Database storage path

From 61c5f3b7af4ef6b78d5dc7e3d5ba9e6009b7d3cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 13:46:58 +0700
Subject: [PATCH 32/54] refactor: Simplify Gemini model environment variable
 parsing with JSON support

- Replaced prefix-based parsing with a root key approach.
- Added JSON parsing to handle list-based model configurations.
- Improved handling of errors and cleanup of environment variables.
---
 app/utils/config.py | 44 +++++++++++++++++---------------------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 69a4fac..6cb5664 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -256,36 +256,26 @@ def _merge_clients_with_env(
 
 def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
     """Extract and remove all Gemini models related environment variables, supporting nested fields."""
-    prefix = "CONFIG_GEMINI__MODELS__"
-    env_overrides: dict[int, dict[str, Any]] = {}
-    to_delete = []
-    for k, v in os.environ.items():
-        if k.startswith(prefix):
-            parts = k.split("__")
-            if len(parts) < 4:
-                continue
-            index_str = parts[2]
-            if not index_str.isdigit():
-                continue
-            idx = int(index_str)
+    import json
 
-            # Navigate to the correct nested dict
-            current = env_overrides.setdefault(idx, {})
-            for i in range(3, len(parts) - 1):
-                field_name = parts[i].lower()
-                current = current.setdefault(field_name, {})
+    root_key = "CONFIG_GEMINI__MODELS"
+    env_overrides: dict[int, dict[str, Any]] = {}
 
-            # Set the value (lowercase root field names, preserve sub-key casing)
-            last_part = parts[-1]
-            if len(parts) == 4:
-                current[last_part.lower()] = v
-            else:
-                current[last_part] = v
+    if root_key in os.environ:
+        try:
+            val = os.environ[root_key]
+            if val.strip().startswith("["):
+                models_list = json.loads(val)
+                if isinstance(models_list, list):
+                    for idx, model_data in enumerate(models_list):
+                        if isinstance(model_data, dict):
+                            env_overrides[idx] = model_data
+
+            # Remove the environment variable to avoid Pydantic parsing errors
+            del os.environ[root_key]
+        except Exception as e:
+            logger.warning(f"Failed to parse {root_key} as JSON: {e}")
 
-            to_delete.append(k)
-    # Remove these environment variables to avoid Pydantic parsing errors
-    for k in to_delete:
-        del os.environ[k]
     return env_overrides
 
 

From efd056c270db5130c59b4e66c2543be7f5e8c6e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 14:09:41 +0700
Subject: [PATCH 33/54] fix: Enhance Gemini model environment variable parsing
 with fallback to Python literals

- Added `ast.literal_eval` as a fallback for parsing environment variables when JSON decoding fails.
- Improved error handling and logging for invalid configurations.
- Ensured proper cleanup of environment variables post-parsing.
---
 app/utils/config.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 6cb5664..74a5294 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,3 +1,4 @@
+import ast
 import json
 import os
 import sys
@@ -256,25 +257,31 @@ def _merge_clients_with_env(
 
 def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
     """Extract and remove all Gemini models related environment variables, supporting nested fields."""
-    import json
-
     root_key = "CONFIG_GEMINI__MODELS"
     env_overrides: dict[int, dict[str, Any]] = {}
 
     if root_key in os.environ:
+        val = os.environ[root_key]
+        models_list = None
+        parsed_successfully = False
+
         try:
-            val = os.environ[root_key]
-            if val.strip().startswith("["):
-                models_list = json.loads(val)
-                if isinstance(models_list, list):
-                    for idx, model_data in enumerate(models_list):
-                        if isinstance(model_data, dict):
-                            env_overrides[idx] = model_data
+            models_list = json.loads(val)
+            parsed_successfully = True
+        except json.JSONDecodeError:
+            try:
+                models_list = ast.literal_eval(val)
+                parsed_successfully = True
+            except (ValueError, SyntaxError) as e:
+                logger.warning(f"Failed to parse {root_key} as JSON or Python literal: {e}")
+
+        if parsed_successfully and isinstance(models_list, list):
+            for idx, model_data in enumerate(models_list):
+                if isinstance(model_data, dict):
+                    env_overrides[idx] = model_data
 
             # Remove the environment variable to avoid Pydantic parsing errors
             del os.environ[root_key]
-        except Exception as e:
-            logger.warning(f"Failed to parse {root_key} as JSON: {e}")
 
     return env_overrides
 
@@ -298,7 +305,7 @@ def _merge_models_with_env(
             model_dict.update(overrides)
             result_models[idx] = GeminiModelConfig(**model_dict)
         elif idx == len(result_models):
-            # Append new model
+            # Append new models
             new_model = GeminiModelConfig(**overrides)
             result_models.append(new_model)
         else:

From 476b9dd228aa99501638987d1f44fe3c5eb23067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 17:53:38 +0700
Subject: [PATCH 34/54] fix: Improve regex patterns in helper module

- Adjusted `TOOL_CALL_RE` regex pattern for better accuracy.
---
 app/utils/helper.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 28be240..99e6d7a 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -30,7 +30,7 @@
 )
 TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
-    r"<tool_call\s+name=[\"']([^\"']+)[\"']\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
+    r"<tool_call\s+name=\"([^\"]+)\"\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
 )
 JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
@@ -140,7 +140,7 @@ def strip_tagged_blocks(text: str) -> str:
         role_start = start + len(start_marker)
         newline = text.find("\n", role_start)
         if newline == -1:
-            # malformed block; keep remainder as-is (safe behavior)
+            # malformed block; keep the remainder as-is (safe behavior)
             result.append(text[start:])
             break
 
@@ -150,7 +150,7 @@ def strip_tagged_blocks(text: str) -> str:
         if end == -1:
             # missing end marker
             if role == "tool":
-                # drop from start marker to EOF (skip remainder)
+                # drop from the start marker to EOF (skip the remainder)
                 break
             else:
                 # keep inner content from after the role newline to EOF
@@ -160,7 +160,7 @@ def strip_tagged_blocks(text: str) -> str:
         block_end = end + len(end_marker)
 
         if role == "tool":
-            # drop whole block
+            # drop the whole block
             idx = block_end
             continue
 
@@ -217,7 +217,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
     tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
-        """Helper to parse args and append to tool_calls list."""
+        """Helper to parse args and append to the tool_calls list."""
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return

From 35c1e99993d11033ae9047e85f645ce5def7f09b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 13 Jan 2026 09:02:10 +0700
Subject: [PATCH 35/54] docs: Update README files to include custom model
 configuration and environment variable setup

---
 README.md    | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 README.zh.md | 51 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 2df3a73..5d6de40 100644
--- a/README.md
+++ b/README.md
@@ -118,7 +118,7 @@ services:
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSID=${SECURE_1PSID}
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS=${SECURE_1PSIDTS}
       - GEMINI_COOKIE_PATH=/app/cache # must match the cache volume mount above
-    restart: on-failure:3             # Avoid retrying too many times
+    restart: on-failure:3 # Avoid retrying too many times
 ```
 
 Then run:
@@ -187,6 +187,51 @@ To use Gemini-FastAPI, you need to extract your Gemini session cookies:
 
 Each client entry can be configured with a different proxy to work around rate limits. Omit the `proxy` field or set it to `null` or an empty string to keep a direct connection.
 
+### Custom Models
+
+You can define custom models in `config/config.yaml` or via environment variables.
+
+#### YAML Configuration
+
+```yaml
+gemini:
+  model_strategy: "append" # "append" (default + custom) or "overwrite" (custom only)
+  models:
+    - model_name: "gemini-3.0-pro"
+      model_header:
+        x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]'
+```
+
+#### Environment Variables
+
+You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`.
+
+##### Bash
+
+```bash
+export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
+export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
+```
+
+##### Docker Compose
+
+```yaml
+services:
+  gemini-fastapi:
+    environment:
+      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
+      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
+```
+
+##### Docker CLI
+
+```bash
+docker run -d \
+  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
+  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
+  ghcr.io/nativu5/gemini-fastapi
+```
+
 ## Acknowledgments
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - The underlying Gemini web API client
diff --git a/README.zh.md b/README.zh.md
index 6b7dd74..791afd8 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -4,7 +4,6 @@
 [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/)
 [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 
-
 [ [English](README.md) | 中文 ]
 
 将 Gemini 网页端模型封装为兼容 OpenAI API 的 API Server。基于 [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) 实现。
@@ -50,6 +49,7 @@ pip install -e .
 ### 配置
 
 编辑 `config/config.yaml` 并提供至少一组凭证：
+
 ```yaml
 gemini:
   clients:
@@ -118,7 +118,7 @@ services:
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSID=${SECURE_1PSID}
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS=${SECURE_1PSIDTS}
       - GEMINI_COOKIE_PATH=/app/cache # must match the cache volume mount above
-    restart: on-failure:3             # Avoid retrying too many times
+    restart: on-failure:3 # Avoid retrying too many times
 ```
 
 然后运行：
@@ -186,6 +186,51 @@ export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
 
 每个客户端条目可以配置不同的代理，从而规避速率限制。省略 `proxy` 字段或将其设置为 `null` 或空字符串以保持直连。
 
+### 自定义模型
+
+你可以在 `config/config.yaml` 中或通过环境变量定义自定义模型。
+
+#### YAML 配置
+
+```yaml
+gemini:
+  model_strategy: "append" # "append" (默认 + 自定义) 或 "overwrite" (仅限自定义)
+  models:
+    - model_name: "gemini-3.0-pro"
+      model_header:
+        x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]'
+```
+
+#### 环境变量
+
+你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。
+
+##### Bash
+
+```bash
+export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
+export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
+```
+
+##### Docker Compose
+
+```yaml
+services:
+  gemini-fastapi:
+    environment:
+      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
+      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
+```
+
+##### Docker CLI
+
+```bash
+docker run -d \
+  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
+  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
+  ghcr.io/nativu5/gemini-fastapi
+```
+
 ## 鸣谢
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - 底层 Gemini Web API 客户端
@@ -193,4 +238,4 @@ export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
 
 ## 免责声明
 
-本项目与 Google 或 OpenAI 无关，仅供学习和研究使用。本项目使用了逆向工程 API，可能不符合 Google 服务条款。使用风险自负。
\ No newline at end of file
+本项目与 Google 或 OpenAI 无关，仅供学习和研究使用。本项目使用了逆向工程 API，可能不符合 Google 服务条款。使用风险自负。

From 9b8162133e86a323400e7e2fb36ed651b31c795f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 13 Jan 2026 09:23:28 +0700
Subject: [PATCH 36/54] fix: Remove unused headers from HTTP client in helper
 module

---
 app/utils/helper.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 99e6d7a..51a6ccf 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -14,9 +14,6 @@
 
 from ..models import FunctionCall, Message, ToolCall
 
-HEADERS = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
-}
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 XML_WRAP_HINT = (
     "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
@@ -82,7 +79,7 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         if not suffix:
             suffix = f".{mime_type.split('/')[1]}"
     else:
-        async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True) as client:
+        async with httpx.AsyncClient(follow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content

From 32a48dcdc98d9e96e791ae6f914e6b3f12804c97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 15 Jan 2026 10:18:58 +0700
Subject: [PATCH 37/54] fix: Update README and README.zh to clarify model
 configuration via environment variables; enhance error logging in config
 validation

---
 README.md           | 23 +----------------------
 README.zh.md        | 23 +----------------------
 app/server/chat.py  |  6 ++++--
 app/utils/config.py | 27 +++++++++++++++++++++++----
 4 files changed, 29 insertions(+), 50 deletions(-)

diff --git a/README.md b/README.md
index 5d6de40..d7a7214 100644
--- a/README.md
+++ b/README.md
@@ -204,34 +204,13 @@ gemini:
 
 #### Environment Variables
 
-You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`.
-
-##### Bash
+You can supply models as a JSON string via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments without modifying the configuration file.
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
 export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
 ```
 
-##### Docker Compose
-
-```yaml
-services:
-  gemini-fastapi:
-    environment:
-      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
-      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
-```
-
-##### Docker CLI
-
-```bash
-docker run -d \
-  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
-  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
-  ghcr.io/nativu5/gemini-fastapi
-```
-
 ## Acknowledgments
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - The underlying Gemini web API client
diff --git a/README.zh.md b/README.zh.md
index 791afd8..09d80a4 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -203,34 +203,13 @@ gemini:
 
 #### 环境变量
 
-你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。
-
-##### Bash
+你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串的形式提供模型。这为通过 shell 或在自动化环境中覆盖设置提供了一种灵活的方式，无需修改配置文件。
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
 export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
 ```
 
-##### Docker Compose
-
-```yaml
-services:
-  gemini-fastapi:
-    environment:
-      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
-      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
-```
-
-##### Docker CLI
-
-```bash
-docker run -d \
-  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
-  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
-  ghcr.io/nativu5/gemini-fastapi
-```
-
 ## 鸣谢
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - 底层 Gemini Web API 客户端
diff --git a/app/server/chat.py b/app/server/chat.py
index 9485f7a..6e517ea 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -924,7 +924,7 @@ async def _build_payload(
 
         image_call_items.append(
             ResponseImageGenerationCall(
-                id=filename.split(".")[0],
+                id=filename.rsplit(".", 1)[0],
                 status="completed",
                 result=image_base64,
                 output_format=img_format,
@@ -1350,7 +1350,9 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
         try:
             saved_path = await image.save(path=str(temp_dir), full_size=True)
         except Exception as e:
-            logger.warning(f"Failed to download full-size image, retrying with default size: {e}")
+            logger.warning(
+                f"Failed to download full-size GeneratedImage, retrying with default size: {e}"
+            )
             saved_path = await image.save(path=str(temp_dir), full_size=False)
     else:
         saved_path = await image.save(path=str(temp_dir))
diff --git a/app/utils/config.py b/app/utils/config.py
index 74a5294..a9c5d44 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -109,8 +109,21 @@ def _parse_models_json(cls, v: Any) -> Any:
     @field_validator("models")
     @classmethod
     def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]:
-        """Filter out models that don't have a name set (placeholders)."""
-        return [model for model in v if model.model_name]
+        """Filter out models that don't have all required fields set."""
+        valid_models = []
+        for model in v:
+            if model.model_name and model.model_header:
+                valid_models.append(model)
+            else:
+                missing = []
+                if not model.model_name:
+                    missing.append("model_name")
+                if not model.model_header:
+                    missing.append("model_header")
+                logger.warning(
+                    f"Discarding custom model due to missing {', '.join(missing)}: {model}"
+                )
+        return valid_models
 
 
 class CORSConfig(BaseModel):
@@ -251,7 +264,10 @@ def _merge_clients_with_env(
             new_client = GeminiClientSettings(**overrides)
             result_clients.append(new_client)
         else:
-            raise IndexError(f"Client index {idx} in env is out of range.")
+            raise IndexError(
+                f"Client index {idx} in env is out of range (current count: {len(result_clients)}). "
+                "Client indices must be contiguous starting from 0."
+            )
     return result_clients if result_clients else base_clients
 
 
@@ -309,7 +325,10 @@ def _merge_models_with_env(
             new_model = GeminiModelConfig(**overrides)
             result_models.append(new_model)
         else:
-            raise IndexError(f"Model index {idx} in env is out of range (must be contiguous).")
+            raise IndexError(
+                f"Model index {idx} in env is out of range (current count: {len(result_models)}). "
+                "Model indices must be contiguous starting from 0."
+            )
     return result_models
 
 

From 0c00b089d5b33e394abaac6a1d36ae08cede166c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 15 Jan 2026 11:24:08 +0700
Subject: [PATCH 38/54] Update README and README.zh to clarify model
 configuration via JSON string or list structure for enhanced flexibility in
 automated environments

---
 README.md    | 2 +-
 README.zh.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d7a7214..330e9c8 100644
--- a/README.md
+++ b/README.md
@@ -204,7 +204,7 @@ gemini:
 
 #### Environment Variables
 
-You can supply models as a JSON string via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments without modifying the configuration file.
+You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments (e.g. Docker) without modifying the configuration file.
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
diff --git a/README.zh.md b/README.zh.md
index 09d80a4..2f9e1b5 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -203,7 +203,7 @@ gemini:
 
 #### 环境变量
 
-你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串的形式提供模型。这为通过 shell 或在自动化环境中覆盖设置提供了一种灵活的方式，无需修改配置文件。
+你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。这为通过 shell 或在自动化环境（例如 Docker）中覆盖设置提供了一种灵活的方式，而无需修改配置文件。
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"

From b599d99f9967188bb8a277fd09951ddf32006f20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 23 Jan 2026 12:14:40 +0700
Subject: [PATCH 39/54] Refactor: compress JSON content to save tokens and
 streamline sending multiple chunks

---
 app/server/chat.py     | 50 +++++++++++++++++++++++++++++-------------
 app/services/client.py |  4 ++--
 app/utils/helper.py    |  2 +-
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 6e517ea..1e7d786 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,5 +1,7 @@
+import asyncio
 import base64
 import json
+import random
 import re
 import uuid
 from dataclasses import dataclass
@@ -95,7 +97,7 @@ def _build_structured_requirement(
     schema_name = json_schema.get("name") or "response"
     strict = json_schema.get("strict", True)
 
-    pretty_schema = json.dumps(schema, ensure_ascii=False, indent=2, sort_keys=True)
+    pretty_schema = json.dumps(schema, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
     instruction_parts = [
         "You must respond with a single valid JSON document that conforms to the schema shown below.",
         "Do not include explanations, comments, or any text before or after the JSON.",
@@ -135,7 +137,7 @@ def _build_tool_prompt(
         description = function.description or "No description provided."
         lines.append(f"Tool `{function.name}`: {description}")
         if function.parameters:
-            schema_text = json.dumps(function.parameters, ensure_ascii=False, indent=2)
+            schema_text = json.dumps(function.parameters, ensure_ascii=False, separators=(",", ":"))
             lines.append("Arguments JSON schema:")
             lines.append(schema_text)
         else:
@@ -635,7 +637,7 @@ async def create_chat_completion(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False)
+        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
         visible_output = canonical_output
         storage_output = canonical_output
 
@@ -875,7 +877,7 @@ async def _build_payload(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False)
+        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
         assistant_text = canonical_output
         storage_output = canonical_output
         logger.debug(
@@ -1081,38 +1083,56 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
     that Gemini can produce the actual answer.
     """
     if len(text) <= MAX_CHARS_PER_REQUEST:
-        # No need to split - a single request is fine.
         try:
             return await session.send_message(text, files=files)
         except Exception as e:
             logger.exception(f"Error sending message to Gemini: {e}")
             raise
+
     hint_len = len(CONTINUATION_HINT)
-    chunk_size = MAX_CHARS_PER_REQUEST - hint_len
+    safe_chunk_size = MAX_CHARS_PER_REQUEST - hint_len
 
     chunks: list[str] = []
     pos = 0
     total = len(text)
+
     while pos < total:
-        end = min(pos + chunk_size, total)
-        chunk = text[pos:end]
-        pos = end
+        remaining = total - pos
+        if remaining <= MAX_CHARS_PER_REQUEST:
+            chunks.append(text[pos:])
+            break
+
+        end = pos + safe_chunk_size
+        slice_candidate = text[pos:end]
+        # Try to find a safe split point
+        split_idx = -1
+        idx = slice_candidate.rfind("\n")
+        if idx != -1:
+            split_idx = idx
+
+        if split_idx != -1:
+            split_at = pos + split_idx + 1
+        else:
+            split_at = end
 
-        # If this is NOT the last chunk, add the continuation hint.
-        if end < total:
-            chunk += CONTINUATION_HINT
+        chunk = text[pos:split_at] + CONTINUATION_HINT
         chunks.append(chunk)
+        pos = split_at
 
-    # Fire off all but the last chunk, discarding the interim "ok" replies.
-    for chk in chunks[:-1]:
+    chunks_size = len(chunks)
+    for i, chk in enumerate(chunks[:-1]):
         try:
+            logger.debug(f"Sending chunk {i + 1}/{chunks_size}...")
             await session.send_message(chk)
+            delay = random.uniform(1.0, 3.0)
+            logger.debug(f"Sleeping for {delay:.2f}s...")
+            await asyncio.sleep(delay)
         except Exception as e:
             logger.exception(f"Error sending chunk to Gemini: {e}")
             raise
 
-    # The last chunk carries the files (if any) and we return its response.
     try:
+        logger.debug(f"Sending final chunk {chunks_size}/{chunks_size}...")
         return await session.send_message(chunks[-1], files=files)
     except Exception as e:
         logger.exception(f"Error sending final chunk to Gemini: {e}")
diff --git a/app/services/client.py b/app/services/client.py
index 87c0ca7..1f23271 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -123,7 +123,7 @@ async def process_message(
                 args_text = call.function.arguments.strip()
                 try:
                     parsed_args = json.loads(args_text)
-                    args_text = json.dumps(parsed_args, ensure_ascii=False)
+                    args_text = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
                 except (json.JSONDecodeError, TypeError):
                     # Leave args_text as is if it is not valid JSON
                     pass
@@ -132,7 +132,7 @@ async def process_message(
                 )
 
             if tool_blocks:
-                tool_section = "```xml\n" + "\n".join(tool_blocks) + "\n```"
+                tool_section = "```xml\n" + "".join(tool_blocks) + "\n```"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 51a6ccf..578b666 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -222,7 +222,7 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         arguments = raw_args
         try:
             parsed_args = json.loads(raw_args)
-            arguments = json.dumps(parsed_args, ensure_ascii=False)
+            arguments = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
         except json.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 

From 186b8448d7f088df621b627ca7b28c5a7acaf341 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 23 Jan 2026 23:08:32 +0700
Subject: [PATCH 40/54] Refactor: Modify the LMDB store to fix issues where no
 conversation is found in either the raw or cleaned history.

---
 app/services/lmdb.py | 46 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 8ccb0d4..d671663 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -9,7 +9,7 @@
 import orjson
 from loguru import logger
 
-from ..models import ConversationInStore, Message
+from ..models import ContentItem, ConversationInStore, Message
 from ..utils import g_config
 from ..utils.singleton import Singleton
 
@@ -18,6 +18,19 @@ def _hash_message(message: Message) -> str:
     """Generate a hash for a single message."""
     # Convert message to dict and sort keys for consistent hashing
     message_dict = message.model_dump(mode="json")
+    content = message_dict.get("content")
+    if isinstance(content, list):
+        is_pure_text = True
+        text_parts = []
+        for item in content:
+            if not isinstance(item, dict) or item.get("type") != "text":
+                is_pure_text = False
+                break
+            text_parts.append(item.get("text") or "")
+
+        if is_pure_text:
+            message_dict["content"] = "".join(text_parts)
+
     message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
     return hashlib.sha256(message_bytes).hexdigest()
 
@@ -435,12 +448,31 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
         """
         cleaned_messages = []
         for msg in messages:
-            if msg.role == "assistant" and isinstance(msg.content, str):
-                normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
-                # Only create a new object if content actually changed
-                if normalized_content != msg.content:
-                    cleaned_msg = Message(role=msg.role, content=normalized_content, name=msg.name)
-                    cleaned_messages.append(cleaned_msg)
+            if msg.role == "assistant":
+                if isinstance(msg.content, str):
+                    normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
+                    if normalized_content != msg.content:
+                        cleaned_msg = Message(
+                            role=msg.role, content=normalized_content, name=msg.name
+                        )
+                        cleaned_messages.append(cleaned_msg)
+                    else:
+                        cleaned_messages.append(msg)
+                elif isinstance(msg.content, list):
+                    new_content = []
+                    changed = False
+                    for item in msg.content:
+                        if isinstance(item, ContentItem) and item.type == "text" and item.text:
+                            cleaned_text = LMDBConversationStore.remove_think_tags(item.text)
+                            if cleaned_text != item.text:
+                                changed = True
+                                item = item.model_copy(update={"text": cleaned_text})
+                        new_content.append(item)
+
+                    if changed:
+                        cleaned_messages.append(msg.model_copy(update={"content": new_content}))
+                    else:
+                        cleaned_messages.append(msg)
                 else:
                     cleaned_messages.append(msg)
             else:

From 6dd1fecdced932c537f579a3c5dd3db87847d475 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 10:03:24 +0700
Subject: [PATCH 41/54] Refactor: Modify the LMDB store to fix issues where no
 conversation is found.

---
 app/services/lmdb.py | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index d671663..93c7723 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -18,8 +18,12 @@ def _hash_message(message: Message) -> str:
     """Generate a hash for a single message."""
     # Convert message to dict and sort keys for consistent hashing
     message_dict = message.model_dump(mode="json")
+
+    # Normalize content: empty string -> None
     content = message_dict.get("content")
-    if isinstance(content, list):
+    if content == "":
+        message_dict["content"] = None
+    elif isinstance(content, list):
         is_pure_text = True
         text_parts = []
         for item in content:
@@ -29,7 +33,27 @@ def _hash_message(message: Message) -> str:
             text_parts.append(item.get("text") or "")
 
         if is_pure_text:
-            message_dict["content"] = "".join(text_parts)
+            text_content = "".join(text_parts)
+            message_dict["content"] = text_content if text_content else None
+
+    # Normalize tool_calls: empty list -> None, and canonicalize arguments
+    tool_calls = message_dict.get("tool_calls")
+    if not tool_calls:
+        message_dict["tool_calls"] = None
+    elif isinstance(tool_calls, list):
+        for tool_call in tool_calls:
+            if isinstance(tool_call, dict) and "function" in tool_call:
+                func = tool_call["function"]
+                args = func.get("arguments")
+                if isinstance(args, str):
+                    try:
+                        # Parse and re-dump to canonicalize (remove extra whitespace, sort keys)
+                        parsed = orjson.loads(args)
+                        func["arguments"] = orjson.dumps(
+                            parsed, option=orjson.OPT_SORT_KEYS
+                        ).decode("utf-8")
+                    except Exception:
+                        pass
 
     message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
     return hashlib.sha256(message_bytes).hexdigest()

From 20ed2456d2324501bbe4ba6392870cd612c9083c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 10:46:27 +0700
Subject: [PATCH 42/54] Refactor: Update all functions to use orjson for better
 performance

---
 app/main.py            |  2 ++
 app/server/chat.py     | 17 ++++++++---------
 app/services/client.py |  8 ++++----
 app/utils/config.py    | 14 +++++++-------
 app/utils/helper.py    |  8 ++++----
 5 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/app/main.py b/app/main.py
index c215e2a..307eb36 100644
--- a/app/main.py
+++ b/app/main.py
@@ -2,6 +2,7 @@
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI
+from fastapi.responses import ORJSONResponse
 from loguru import logger
 
 from .server.chat import router as chat_router
@@ -92,6 +93,7 @@ def create_app() -> FastAPI:
         description="OpenAI-compatible API for Gemini Web",
         version="1.0.0",
         lifespan=lifespan,
+        default_response_class=ORJSONResponse,
     )
 
     add_cors_middleware(app)
diff --git a/app/server/chat.py b/app/server/chat.py
index 1e7d786..a9d9dec 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,6 +1,5 @@
 import asyncio
 import base64
-import json
 import random
 import re
 import uuid
@@ -97,7 +96,7 @@ def _build_structured_requirement(
     schema_name = json_schema.get("name") or "response"
     strict = json_schema.get("strict", True)
 
-    pretty_schema = json.dumps(schema, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
+    pretty_schema = orjson.dumps(schema, option=orjson.OPT_SORT_KEYS).decode("utf-8")
     instruction_parts = [
         "You must respond with a single valid JSON document that conforms to the schema shown below.",
         "Do not include explanations, comments, or any text before or after the JSON.",
@@ -137,7 +136,7 @@ def _build_tool_prompt(
         description = function.description or "No description provided."
         lines.append(f"Tool `{function.name}`: {description}")
         if function.parameters:
-            schema_text = json.dumps(function.parameters, ensure_ascii=False, separators=(",", ":"))
+            schema_text = orjson.dumps(function.parameters).decode("utf-8")
             lines.append("Arguments JSON schema:")
             lines.append(schema_text)
         else:
@@ -626,8 +625,8 @@ async def create_chat_completion(
                 detail="LLM returned an empty response while JSON schema output was requested.",
             )
         try:
-            structured_payload = json.loads(cleaned_visible)
-        except json.JSONDecodeError as exc:
+            structured_payload = orjson.loads(cleaned_visible)
+        except orjson.JSONDecodeError as exc:
             logger.warning(
                 f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): "
                 f"{cleaned_visible}"
@@ -637,7 +636,7 @@ async def create_chat_completion(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
+        canonical_output = orjson.dumps(structured_payload).decode("utf-8")
         visible_output = canonical_output
         storage_output = canonical_output
 
@@ -866,8 +865,8 @@ async def _build_payload(
                 detail="LLM returned an empty response while JSON schema output was requested.",
             )
         try:
-            structured_payload = json.loads(cleaned_visible)
-        except json.JSONDecodeError as exc:
+            structured_payload = orjson.loads(cleaned_visible)
+        except orjson.JSONDecodeError as exc:
             logger.warning(
                 f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): "
                 f"{cleaned_visible}"
@@ -877,7 +876,7 @@ async def _build_payload(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
+        canonical_output = orjson.dumps(structured_payload).decode("utf-8")
         assistant_text = canonical_output
         storage_output = canonical_output
         logger.debug(
diff --git a/app/services/client.py b/app/services/client.py
index 1f23271..55be11a 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,9 +1,9 @@
 import html
-import json
 import re
 from pathlib import Path
 from typing import Any, cast
 
+import orjson
 from gemini_webapi import GeminiClient, ModelOutput
 from loguru import logger
 
@@ -122,9 +122,9 @@ async def process_message(
             for call in message.tool_calls:
                 args_text = call.function.arguments.strip()
                 try:
-                    parsed_args = json.loads(args_text)
-                    args_text = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
-                except (json.JSONDecodeError, TypeError):
+                    parsed_args = orjson.loads(args_text)
+                    args_text = orjson.dumps(parsed_args).decode("utf-8")
+                except orjson.JSONDecodeError:
                     # Leave args_text as is if it is not valid JSON
                     pass
                 tool_blocks.append(
diff --git a/app/utils/config.py b/app/utils/config.py
index a9c5d44..708462d 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,9 +1,9 @@
 import ast
-import json
 import os
 import sys
 from typing import Any, Literal, Optional
 
+import orjson
 from loguru import logger
 from pydantic import BaseModel, Field, ValidationError, field_validator
 from pydantic_settings import (
@@ -65,8 +65,8 @@ class GeminiModelConfig(BaseModel):
     def _parse_json_string(cls, v: Any) -> Any:
         if isinstance(v, str) and v.strip().startswith("{"):
             try:
-                return json.loads(v)
-            except json.JSONDecodeError:
+                return orjson.loads(v)
+            except orjson.JSONDecodeError:
                 # Return the original value to let Pydantic handle the error or type mismatch
                 return v
         return v
@@ -100,8 +100,8 @@ class GeminiConfig(BaseModel):
     def _parse_models_json(cls, v: Any) -> Any:
         if isinstance(v, str) and v.strip().startswith("["):
             try:
-                return json.loads(v)
-            except json.JSONDecodeError as e:
+                return orjson.loads(v)
+            except orjson.JSONDecodeError as e:
                 logger.warning(f"Failed to parse models JSON string: {e}")
                 return v
         return v
@@ -282,9 +282,9 @@ def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
         parsed_successfully = False
 
         try:
-            models_list = json.loads(val)
+            models_list = orjson.loads(val)
             parsed_successfully = True
-        except json.JSONDecodeError:
+        except orjson.JSONDecodeError:
             try:
                 models_list = ast.literal_eval(val)
                 parsed_successfully = True
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 578b666..1dc518f 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,5 +1,4 @@
 import base64
-import json
 import mimetypes
 import re
 import struct
@@ -10,6 +9,7 @@
 from urllib.parse import urlparse
 
 import httpx
+import orjson
 from loguru import logger
 
 from ..models import FunctionCall, Message, ToolCall
@@ -221,9 +221,9 @@ def _create_tool_call(name: str, raw_args: str) -> None:
 
         arguments = raw_args
         try:
-            parsed_args = json.loads(raw_args)
-            arguments = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
-        except json.JSONDecodeError:
+            parsed_args = orjson.loads(raw_args)
+            arguments = orjson.dumps(parsed_args).decode("utf-8")
+        except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
         tool_calls.append(

From f67fe63b3b654d3a28cc5ca0363a4ad894831d84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 10:47:26 +0700
Subject: [PATCH 43/54] Update project dependencies

---
 pyproject.toml |  21 ++++-----
 uv.lock        | 118 +++++++++++++++++++++++++------------------------
 2 files changed, 71 insertions(+), 68 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 32a42b4..1c30f8e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,24 +5,25 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
-    "fastapi>=0.115.12",
-    "gemini-webapi>=1.17.0",
-    "lmdb>=1.6.2",
-    "loguru>=0.7.0",
-    "pydantic-settings[yaml]>=2.9.1",
-    "uvicorn>=0.34.1",
-    "uvloop>=0.21.0; sys_platform != 'win32'",
+    "fastapi>=0.128.0",
+    "gemini-webapi>=1.17.3",
+    "lmdb>=1.7.5",
+    "loguru>=0.7.3",
+    "orjson>=3.11.5",
+    "pydantic-settings[yaml]>=2.12.0",
+    "uvicorn>=0.40.0",
+    "uvloop>=0.22.1; sys_platform != 'win32'",
 ]
 
 [project.optional-dependencies]
 dev = [
-    "ruff>=0.11.7",
+    "ruff>=0.14.14",
 ]
 
 [tool.ruff]
 line-length = 100
 lint.select = ["E", "F", "W", "I", "RUF"]
-lint.ignore = ["E501"] 
+lint.ignore = ["E501"]
 
 [tool.ruff.format]
 quote-style = "double"
@@ -30,5 +31,5 @@ indent-style = "space"
 
 [dependency-groups]
 dev = [
-    "ruff>=0.11.13",
+    "ruff>=0.14.14",
 ]
diff --git a/uv.lock b/uv.lock
index 923e6d3..50a73be 100644
--- a/uv.lock
+++ b/uv.lock
@@ -22,24 +22,24 @@ wheels = [
 
 [[package]]
 name = "anyio"
-version = "4.12.0"
+version = "4.12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/16/ce/8a777047513153587e5434fd752e89334ac33e379aa3497db860eeb60377/anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0", size = 228266, upload-time = "2025-11-28T23:37:38.911Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7f/9c/36c5c37947ebfb8c7f22e0eb6e4d188ee2d53aa3880f3f2744fb894f0cb1/anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb", size = 113362, upload-time = "2025-11-28T23:36:57.897Z" },
+    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]
 
 [[package]]
 name = "certifi"
-version = "2025.11.12"
+version = "2026.1.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
 [[package]]
@@ -65,7 +65,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.123.10"
+version = "0.128.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -73,9 +73,9 @@ dependencies = [
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/22/ff/e01087de891010089f1620c916c0c13130f3898177955c13e2b02d22ec4a/fastapi-0.123.10.tar.gz", hash = "sha256:624d384d7cda7c096449c889fc776a0571948ba14c3c929fa8e9a78cd0b0a6a8", size = 356360, upload-time = "2025-12-05T21:27:46.237Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/f0/7cb92c4a720def85240fd63fbbcf147ce19e7a731c8e1032376bb5a486ac/fastapi-0.123.10-py3-none-any.whl", hash = "sha256:0503b7b7bc71bc98f7c90c9117d21fdf6147c0d74703011b87936becc86985c1", size = 111774, upload-time = "2025-12-05T21:27:44.78Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" },
 ]
 
 [[package]]
@@ -87,6 +87,7 @@ dependencies = [
     { name = "gemini-webapi" },
     { name = "lmdb" },
     { name = "loguru" },
+    { name = "orjson" },
     { name = "pydantic-settings", extra = ["yaml"] },
     { name = "uvicorn" },
     { name = "uvloop", marker = "sys_platform != 'win32'" },
@@ -104,19 +105,20 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.115.12" },
-    { name = "gemini-webapi", specifier = ">=1.17.0" },
-    { name = "lmdb", specifier = ">=1.6.2" },
-    { name = "loguru", specifier = ">=0.7.0" },
-    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.9.1" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.11.7" },
-    { name = "uvicorn", specifier = ">=0.34.1" },
-    { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.21.0" },
+    { name = "fastapi", specifier = ">=0.128.0" },
+    { name = "gemini-webapi", specifier = ">=1.17.3" },
+    { name = "lmdb", specifier = ">=1.7.5" },
+    { name = "loguru", specifier = ">=0.7.3" },
+    { name = "orjson", specifier = ">=3.11.5" },
+    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" },
+    { name = "uvicorn", specifier = ">=0.40.0" },
+    { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
 provides-extras = ["dev"]
 
 [package.metadata.requires-dev]
-dev = [{ name = "ruff", specifier = ">=0.11.13" }]
+dev = [{ name = "ruff", specifier = ">=0.14.14" }]
 
 [[package]]
 name = "gemini-webapi"
@@ -209,25 +211,25 @@ wheels = [
 
 [[package]]
 name = "orjson"
-version = "3.11.4"
+version = "3.11.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c6/fe/ed708782d6709cc60eb4c2d8a361a440661f74134675c72990f2c48c785f/orjson-3.11.4.tar.gz", hash = "sha256:39485f4ab4c9b30a3943cfe99e1a213c4776fb69e8abd68f66b83d5a0b0fdc6d", size = 5945188, upload-time = "2025-10-24T15:50:38.027Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347, upload-time = "2025-12-06T15:55:39.458Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/63/51/6b556192a04595b93e277a9ff71cd0cc06c21a7df98bcce5963fa0f5e36f/orjson-3.11.4-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:d4371de39319d05d3f482f372720b841c841b52f5385bd99c61ed69d55d9ab50", size = 243571, upload-time = "2025-10-24T15:49:10.008Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/2c/2602392ddf2601d538ff11848b98621cd465d1a1ceb9db9e8043181f2f7b/orjson-3.11.4-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:e41fd3b3cac850eaae78232f37325ed7d7436e11c471246b87b2cd294ec94853", size = 128891, upload-time = "2025-10-24T15:49:11.297Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/47/bf85dcf95f7a3a12bf223394a4f849430acd82633848d52def09fa3f46ad/orjson-3.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:600e0e9ca042878c7fdf189cf1b028fe2c1418cc9195f6cb9824eb6ed99cb938", size = 130137, upload-time = "2025-10-24T15:49:12.544Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/4d/a0cb31007f3ab6f1fd2a1b17057c7c349bc2baf8921a85c0180cc7be8011/orjson-3.11.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7bbf9b333f1568ef5da42bc96e18bf30fd7f8d54e9ae066d711056add508e415", size = 129152, upload-time = "2025-10-24T15:49:13.754Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/ef/2811def7ce3d8576b19e3929fff8f8f0d44bc5eb2e0fdecb2e6e6cc6c720/orjson-3.11.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4806363144bb6e7297b8e95870e78d30a649fdc4e23fc84daa80c8ebd366ce44", size = 136834, upload-time = "2025-10-24T15:49:15.307Z" },
-    { url = "https://files.pythonhosted.org/packages/00/d4/9aee9e54f1809cec8ed5abd9bc31e8a9631d19460e3b8470145d25140106/orjson-3.11.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad355e8308493f527d41154e9053b86a5be892b3b359a5c6d5d95cda23601cb2", size = 137519, upload-time = "2025-10-24T15:49:16.557Z" },
-    { url = "https://files.pythonhosted.org/packages/db/ea/67bfdb5465d5679e8ae8d68c11753aaf4f47e3e7264bad66dc2f2249e643/orjson-3.11.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a7517482667fb9f0ff1b2f16fe5829296ed7a655d04d68cd9711a4d8a4e708", size = 136749, upload-time = "2025-10-24T15:49:17.796Z" },
-    { url = "https://files.pythonhosted.org/packages/01/7e/62517dddcfce6d53a39543cd74d0dccfcbdf53967017c58af68822100272/orjson-3.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97eb5942c7395a171cbfecc4ef6701fc3c403e762194683772df4c54cfbb2210", size = 136325, upload-time = "2025-10-24T15:49:19.347Z" },
-    { url = "https://files.pythonhosted.org/packages/18/ae/40516739f99ab4c7ec3aaa5cc242d341fcb03a45d89edeeaabc5f69cb2cf/orjson-3.11.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:149d95d5e018bdd822e3f38c103b1a7c91f88d38a88aada5c4e9b3a73a244241", size = 140204, upload-time = "2025-10-24T15:49:20.545Z" },
-    { url = "https://files.pythonhosted.org/packages/82/18/ff5734365623a8916e3a4037fcef1cd1782bfc14cf0992afe7940c5320bf/orjson-3.11.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:624f3951181eb46fc47dea3d221554e98784c823e7069edb5dbd0dc826ac909b", size = 406242, upload-time = "2025-10-24T15:49:21.884Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/43/96436041f0a0c8c8deca6a05ebeaf529bf1de04839f93ac5e7c479807aec/orjson-3.11.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:03bfa548cf35e3f8b3a96c4e8e41f753c686ff3d8e182ce275b1751deddab58c", size = 150013, upload-time = "2025-10-24T15:49:23.185Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/48/78302d98423ed8780479a1e682b9aecb869e8404545d999d34fa486e573e/orjson-3.11.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:525021896afef44a68148f6ed8a8bf8375553d6066c7f48537657f64823565b9", size = 139951, upload-time = "2025-10-24T15:49:24.428Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/7b/ad613fdcdaa812f075ec0875143c3d37f8654457d2af17703905425981bf/orjson-3.11.4-cp312-cp312-win32.whl", hash = "sha256:b58430396687ce0f7d9eeb3dd47761ca7d8fda8e9eb92b3077a7a353a75efefa", size = 136049, upload-time = "2025-10-24T15:49:25.973Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/3c/9cf47c3ff5f39b8350fb21ba65d789b6a1129d4cbb3033ba36c8a9023520/orjson-3.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:c6dbf422894e1e3c80a177133c0dda260f81428f9de16d61041949f6a2e5c140", size = 131461, upload-time = "2025-10-24T15:49:27.259Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/3b/e2425f61e5825dc5b08c2a5a2b3af387eaaca22a12b9c8c01504f8614c36/orjson-3.11.4-cp312-cp312-win_arm64.whl", hash = "sha256:d38d2bc06d6415852224fcc9c0bfa834c25431e466dc319f0edd56cca81aa96e", size = 126167, upload-time = "2025-10-24T15:49:28.511Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347, upload-time = "2025-12-06T15:54:22.061Z" },
+    { url = "https://files.pythonhosted.org/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435, upload-time = "2025-12-06T15:54:23.615Z" },
+    { url = "https://files.pythonhosted.org/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074, upload-time = "2025-12-06T15:54:24.694Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520, upload-time = "2025-12-06T15:54:26.185Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209, upload-time = "2025-12-06T15:54:27.264Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837, upload-time = "2025-12-06T15:54:28.75Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307, upload-time = "2025-12-06T15:54:29.856Z" },
+    { url = "https://files.pythonhosted.org/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020, upload-time = "2025-12-06T15:54:31.024Z" },
+    { url = "https://files.pythonhosted.org/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099, upload-time = "2025-12-06T15:54:32.196Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540, upload-time = "2025-12-06T15:54:33.361Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530, upload-time = "2025-12-06T15:54:34.6Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863, upload-time = "2025-12-06T15:54:35.801Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255, upload-time = "2025-12-06T15:54:37.209Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252, upload-time = "2025-12-06T15:54:38.401Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777, upload-time = "2025-12-06T15:54:39.515Z" },
 ]
 
 [[package]]
@@ -322,28 +324,28 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.14.8"
+version = "0.14.14"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ed/d9/f7a0c4b3a2bf2556cd5d99b05372c29980249ef71e8e32669ba77428c82c/ruff-0.14.8.tar.gz", hash = "sha256:774ed0dd87d6ce925e3b8496feb3a00ac564bea52b9feb551ecd17e0a23d1eed", size = 5765385, upload-time = "2025-12-04T15:06:17.669Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/48/b8/9537b52010134b1d2b72870cc3f92d5fb759394094741b09ceccae183fbe/ruff-0.14.8-py3-none-linux_armv6l.whl", hash = "sha256:ec071e9c82eca417f6111fd39f7043acb53cd3fde9b1f95bbed745962e345afb", size = 13441540, upload-time = "2025-12-04T15:06:14.896Z" },
-    { url = "https://files.pythonhosted.org/packages/24/00/99031684efb025829713682012b6dd37279b1f695ed1b01725f85fd94b38/ruff-0.14.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8cdb162a7159f4ca36ce980a18c43d8f036966e7f73f866ac8f493b75e0c27e9", size = 13669384, upload-time = "2025-12-04T15:06:51.809Z" },
-    { url = "https://files.pythonhosted.org/packages/72/64/3eb5949169fc19c50c04f28ece2c189d3b6edd57e5b533649dae6ca484fe/ruff-0.14.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e2fcbefe91f9fad0916850edf0854530c15bd1926b6b779de47e9ab619ea38f", size = 12806917, upload-time = "2025-12-04T15:06:08.925Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/08/5250babb0b1b11910f470370ec0cbc67470231f7cdc033cee57d4976f941/ruff-0.14.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d70721066a296f45786ec31916dc287b44040f553da21564de0ab4d45a869b", size = 13256112, upload-time = "2025-12-04T15:06:23.498Z" },
-    { url = "https://files.pythonhosted.org/packages/78/4c/6c588e97a8e8c2d4b522c31a579e1df2b4d003eddfbe23d1f262b1a431ff/ruff-0.14.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c87e09b3cd9d126fc67a9ecd3b5b1d3ded2b9c7fce3f16e315346b9d05cfb52", size = 13227559, upload-time = "2025-12-04T15:06:33.432Z" },
-    { url = "https://files.pythonhosted.org/packages/23/ce/5f78cea13eda8eceac71b5f6fa6e9223df9b87bb2c1891c166d1f0dce9f1/ruff-0.14.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d62cb310c4fbcb9ee4ac023fe17f984ae1e12b8a4a02e3d21489f9a2a5f730c", size = 13896379, upload-time = "2025-12-04T15:06:02.687Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/79/13de4517c4dadce9218a20035b21212a4c180e009507731f0d3b3f5df85a/ruff-0.14.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1af35c2d62633d4da0521178e8a2641c636d2a7153da0bac1b30cfd4ccd91344", size = 15372786, upload-time = "2025-12-04T15:06:29.828Z" },
-    { url = "https://files.pythonhosted.org/packages/00/06/33df72b3bb42be8a1c3815fd4fae83fa2945fc725a25d87ba3e42d1cc108/ruff-0.14.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25add4575ffecc53d60eed3f24b1e934493631b48ebbc6ebaf9d8517924aca4b", size = 14990029, upload-time = "2025-12-04T15:06:36.812Z" },
-    { url = "https://files.pythonhosted.org/packages/64/61/0f34927bd90925880394de0e081ce1afab66d7b3525336f5771dcf0cb46c/ruff-0.14.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c943d847b7f02f7db4201a0600ea7d244d8a404fbb639b439e987edcf2baf9a", size = 14407037, upload-time = "2025-12-04T15:06:39.979Z" },
-    { url = "https://files.pythonhosted.org/packages/96/bc/058fe0aefc0fbf0d19614cb6d1a3e2c048f7dc77ca64957f33b12cfdc5ef/ruff-0.14.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb6e8bf7b4f627548daa1b69283dac5a296bfe9ce856703b03130732e20ddfe2", size = 14102390, upload-time = "2025-12-04T15:06:46.372Z" },
-    { url = "https://files.pythonhosted.org/packages/af/a4/e4f77b02b804546f4c17e8b37a524c27012dd6ff05855d2243b49a7d3cb9/ruff-0.14.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:7aaf2974f378e6b01d1e257c6948207aec6a9b5ba53fab23d0182efb887a0e4a", size = 14230793, upload-time = "2025-12-04T15:06:20.497Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/52/bb8c02373f79552e8d087cedaffad76b8892033d2876c2498a2582f09dcf/ruff-0.14.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e5758ca513c43ad8a4ef13f0f081f80f08008f410790f3611a21a92421ab045b", size = 13160039, upload-time = "2025-12-04T15:06:49.06Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/ad/b69d6962e477842e25c0b11622548df746290cc6d76f9e0f4ed7456c2c31/ruff-0.14.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f74f7ba163b6e85a8d81a590363bf71618847e5078d90827749bfda1d88c9cdf", size = 13205158, upload-time = "2025-12-04T15:06:54.574Z" },
-    { url = "https://files.pythonhosted.org/packages/06/63/54f23da1315c0b3dfc1bc03fbc34e10378918a20c0b0f086418734e57e74/ruff-0.14.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:eed28f6fafcc9591994c42254f5a5c5ca40e69a30721d2ab18bb0bb3baac3ab6", size = 13469550, upload-time = "2025-12-04T15:05:59.209Z" },
-    { url = "https://files.pythonhosted.org/packages/70/7d/a4d7b1961e4903bc37fffb7ddcfaa7beb250f67d97cfd1ee1d5cddb1ec90/ruff-0.14.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:21d48fa744c9d1cb8d71eb0a740c4dd02751a5de9db9a730a8ef75ca34cf138e", size = 14211332, upload-time = "2025-12-04T15:06:06.027Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/93/2a5063341fa17054e5c86582136e9895db773e3c2ffb770dde50a09f35f0/ruff-0.14.8-py3-none-win32.whl", hash = "sha256:15f04cb45c051159baebb0f0037f404f1dc2f15a927418f29730f411a79bc4e7", size = 13151890, upload-time = "2025-12-04T15:06:11.668Z" },
-    { url = "https://files.pythonhosted.org/packages/02/1c/65c61a0859c0add13a3e1cbb6024b42de587456a43006ca2d4fd3d1618fe/ruff-0.14.8-py3-none-win_amd64.whl", hash = "sha256:9eeb0b24242b5bbff3011409a739929f497f3fb5fe3b5698aba5e77e8c833097", size = 14537826, upload-time = "2025-12-04T15:06:26.409Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/63/8b41cea3afd7f58eb64ac9251668ee0073789a3bc9ac6f816c8c6fef986d/ruff-0.14.8-py3-none-win_arm64.whl", hash = "sha256:965a582c93c63fe715fd3e3f8aa37c4b776777203d8e1d8aa3cc0c14424a4b99", size = 13634522, upload-time = "2025-12-04T15:06:43.212Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" },
+    { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" },
+    { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" },
+    { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" },
+    { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" },
+    { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" },
 ]
 
 [[package]]
@@ -382,15 +384,15 @@ wheels = [
 
 [[package]]
 name = "uvicorn"
-version = "0.38.0"
+version = "0.40.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605, upload-time = "2025-10-18T13:46:44.63Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
 ]
 
 [[package]]

From 889f2d257ba15a61339de924fb6a67a6fefe6516 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 11:15:41 +0700
Subject: [PATCH 44/54] Fix IDE warnings

---
 app/services/lmdb.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 93c7723..dec148b 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -52,7 +52,7 @@ def _hash_message(message: Message) -> str:
                         func["arguments"] = orjson.dumps(
                             parsed, option=orjson.OPT_SORT_KEYS
                         ).decode("utf-8")
-                    except Exception:
+                    except orjson.JSONDecodeError:
                         pass
 
     message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
@@ -175,7 +175,7 @@ def store(
         value = orjson.dumps(conv.model_dump(mode="json"))
 
         try:
-            with self._get_transaction(write=True) as txn:
+            with self._get_transaction(self, write=True) as txn:
                 # Store main data
                 txn.put(storage_key.encode("utf-8"), value, overwrite=True)
 
@@ -203,7 +203,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
             Conversation or None if not found
         """
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 data = txn.get(key.encode("utf-8"), default=None)
                 if not data:
                     return None
@@ -255,7 +255,7 @@ def _find_by_message_list(
 
             key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
             try:
-                with self._get_transaction(write=False) as txn:
+                with self._get_transaction(self, write=False) as txn:
                     if mapped := txn.get(key.encode("utf-8")):  # type: ignore
                         return self.get(mapped.decode("utf-8"))  # type: ignore
             except Exception as e:
@@ -279,7 +279,7 @@ def exists(self, key: str) -> bool:
             bool: True if key exists, False otherwise
         """
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 return txn.get(key.encode("utf-8")) is not None
         except Exception as e:
             logger.error(f"Failed to check existence of key {key}: {e}")
@@ -296,7 +296,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
             ConversationInStore: The deleted conversation data, or None if not found
         """
         try:
-            with self._get_transaction(write=True) as txn:
+            with self._get_transaction(self, write=True) as txn:
                 # Get data first to clean up hash mapping
                 data = txn.get(key.encode("utf-8"))
                 if not data:
@@ -333,7 +333,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
         """
         keys = []
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 cursor = txn.cursor()
                 cursor.first()
 
@@ -377,7 +377,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
         expired_entries: list[tuple[str, ConversationInStore]] = []
 
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 cursor = txn.cursor()
 
                 for key_bytes, value_bytes in cursor:
@@ -407,7 +407,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
         removed = 0
         try:
-            with self._get_transaction(write=True) as txn:
+            with self._get_transaction(self, write=True) as txn:
                 for key_str, conv in expired_entries:
                     key_bytes = key_str.encode("utf-8")
                     if not txn.delete(key_bytes):

From 66b62020330e690499ef386e81cee52dc0f97cce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 11:26:16 +0700
Subject: [PATCH 45/54] Incorrect IDE warnings

---
 app/services/lmdb.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index dec148b..c8e78a9 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -175,7 +175,7 @@ def store(
         value = orjson.dumps(conv.model_dump(mode="json"))
 
         try:
-            with self._get_transaction(self, write=True) as txn:
+            with self._get_transaction(write=True) as txn:
                 # Store main data
                 txn.put(storage_key.encode("utf-8"), value, overwrite=True)
 
@@ -203,7 +203,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
             Conversation or None if not found
         """
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 data = txn.get(key.encode("utf-8"), default=None)
                 if not data:
                     return None
@@ -255,7 +255,7 @@ def _find_by_message_list(
 
             key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
             try:
-                with self._get_transaction(self, write=False) as txn:
+                with self._get_transaction(write=False) as txn:
                     if mapped := txn.get(key.encode("utf-8")):  # type: ignore
                         return self.get(mapped.decode("utf-8"))  # type: ignore
             except Exception as e:
@@ -279,7 +279,7 @@ def exists(self, key: str) -> bool:
             bool: True if key exists, False otherwise
         """
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 return txn.get(key.encode("utf-8")) is not None
         except Exception as e:
             logger.error(f"Failed to check existence of key {key}: {e}")
@@ -296,7 +296,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
             ConversationInStore: The deleted conversation data, or None if not found
         """
         try:
-            with self._get_transaction(self, write=True) as txn:
+            with self._get_transaction(write=True) as txn:
                 # Get data first to clean up hash mapping
                 data = txn.get(key.encode("utf-8"))
                 if not data:
@@ -333,7 +333,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
         """
         keys = []
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 cursor = txn.cursor()
                 cursor.first()
 
@@ -377,7 +377,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
         expired_entries: list[tuple[str, ConversationInStore]] = []
 
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 cursor = txn.cursor()
 
                 for key_bytes, value_bytes in cursor:
@@ -407,7 +407,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
         removed = 0
         try:
-            with self._get_transaction(self, write=True) as txn:
+            with self._get_transaction(write=True) as txn:
                 for key_str, conv in expired_entries:
                     key_bytes = key_str.encode("utf-8")
                     if not txn.delete(key_bytes):

From 3297f534f035f869bd7e4a867618b39bc7256f06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 12:05:26 +0700
Subject: [PATCH 46/54] Refactor: Modify the LMDB store to fix issues where no
 conversation is found.

---
 app/services/lmdb.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index c8e78a9..a55d3a9 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -476,9 +476,7 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
                 if isinstance(msg.content, str):
                     normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
                     if normalized_content != msg.content:
-                        cleaned_msg = Message(
-                            role=msg.role, content=normalized_content, name=msg.name
-                        )
+                        cleaned_msg = msg.model_copy(update={"content": normalized_content})
                         cleaned_messages.append(cleaned_msg)
                     else:
                         cleaned_messages.append(msg)

From 5399b260595e77d6c1f0a8d24a880c59d165a57b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 12:06:52 +0700
Subject: [PATCH 47/54] Refactor: Centralized the mapping of the 'developer'
 role to 'system' for better Gemini compatibility.

---
 app/models/models.py | 7 +++++++
 app/server/chat.py   | 6 +-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index c27e024..63ddb94 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -29,6 +29,13 @@ class Message(BaseModel):
     audio: Optional[Dict[str, Any]] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
+    @model_validator(mode="after")
+    def normalize_role(self) -> "Message":
+        """Normalize 'developer' role to 'system' for Gemini compatibility."""
+        if self.role == "developer":
+            self.role = "system"
+        return self
+
 
 class Choice(BaseModel):
     """Choice model"""
diff --git a/app/server/chat.py b/app/server/chat.py
index a9d9dec..66a2720 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -319,8 +319,6 @@ def _response_items_to_messages(
     normalized_input: list[ResponseInputItem] = []
     for item in items:
         role = item.role
-        if role == "developer":
-            role = "system"
 
         content = item.content
         normalized_contents: list[ResponseInputContent] = []
@@ -394,8 +392,6 @@ def _instructions_to_messages(
             continue
 
         role = item.role
-        if role == "developer":
-            role = "system"
 
         content = item.content
         if isinstance(content, str):
@@ -1054,7 +1050,7 @@ async def _find_reusable_session(
     while search_end >= 2:
         search_history = messages[:search_end]
 
-        # Only try to match if the last stored message would be assistant/system.
+        # Only try to match if the last stored message would be assistant/system before querying LMDB.
         if search_history[-1].role in {"assistant", "system"}:
             try:
                 if conv := db.find(model.model_name, search_history):

From de01c7850fa44f4dcbd8f31c47bccaf301861a56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 13:04:31 +0700
Subject: [PATCH 48/54] Refactor: Modify the LMDB store to fix issues where no
 conversation is found.

---
 app/models/models.py |  1 +
 app/services/lmdb.py | 95 +++++++++++++++++++++++++-------------------
 app/utils/helper.py  | 10 +++--
 3 files changed, 63 insertions(+), 43 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 63ddb94..4072b29 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -24,6 +24,7 @@ class Message(BaseModel):
     content: Union[str, List[ContentItem], None] = None
     name: Optional[str] = None
     tool_calls: Optional[List["ToolCall"]] = None
+    tool_call_id: Optional[str] = None
     refusal: Optional[str] = None
     reasoning_content: Optional[str] = None
     audio: Optional[Dict[str, Any]] = None
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index a55d3a9..594acf0 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -15,53 +15,69 @@
 
 
 def _hash_message(message: Message) -> str:
-    """Generate a hash for a single message."""
-    # Convert message to dict and sort keys for consistent hashing
-    message_dict = message.model_dump(mode="json")
-
-    # Normalize content: empty string -> None
-    content = message_dict.get("content")
-    if content == "":
-        message_dict["content"] = None
+    """Generate a consistent hash for a single message focusing only on core identity fields."""
+    # Pick only fields that define the message in a conversation history
+    core_data = {
+        "role": message.role,
+        "name": message.name,
+        "tool_call_id": message.tool_call_id,
+    }
+
+    # Normalize content: strip, handle empty/None, and list-of-text items
+    content = message.content
+    if not content:
+        core_data["content"] = None
+    elif isinstance(content, str):
+        stripped = content.strip()
+        core_data["content"] = stripped if stripped else None
     elif isinstance(content, list):
-        is_pure_text = True
         text_parts = []
         for item in content:
-            if not isinstance(item, dict) or item.get("type") != "text":
-                is_pure_text = False
+            if isinstance(item, ContentItem) and item.type == "text":
+                text_parts.append(item.text or "")
+            elif isinstance(item, dict) and item.get("type") == "text":
+                text_parts.append(item.get("text") or "")
+            else:
+                # If it contains non-text (images/files), keep the full list for hashing
+                text_parts = None
                 break
-            text_parts.append(item.get("text") or "")
-
-        if is_pure_text:
-            text_content = "".join(text_parts)
-            message_dict["content"] = text_content if text_content else None
-
-    # Normalize tool_calls: empty list -> None, and canonicalize arguments
-    tool_calls = message_dict.get("tool_calls")
-    if not tool_calls:
-        message_dict["tool_calls"] = None
-    elif isinstance(tool_calls, list):
-        for tool_call in tool_calls:
-            if isinstance(tool_call, dict) and "function" in tool_call:
-                func = tool_call["function"]
-                args = func.get("arguments")
-                if isinstance(args, str):
-                    try:
-                        # Parse and re-dump to canonicalize (remove extra whitespace, sort keys)
-                        parsed = orjson.loads(args)
-                        func["arguments"] = orjson.dumps(
-                            parsed, option=orjson.OPT_SORT_KEYS
-                        ).decode("utf-8")
-                    except orjson.JSONDecodeError:
-                        pass
-
-    message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
+
+        if text_parts is not None:
+            text_content = "".join(text_parts).strip()
+            core_data["content"] = text_content if text_content else None
+        else:
+            core_data["content"] = message.model_dump(mode="json")["content"]
+
+    # Normalize tool_calls: canonicalize arguments and sort by name if multiple calls exist
+    if message.tool_calls:
+        calls_data = []
+        for tc in message.tool_calls:
+            args = tc.function.arguments or "{}"
+            try:
+                parsed = orjson.loads(args)
+                canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
+            except orjson.JSONDecodeError:
+                canon_args = args
+
+            calls_data.append(
+                {
+                    "id": tc.id,  # Deterministic IDs ensure this is stable
+                    "name": tc.function.name,
+                    "arguments": canon_args,
+                }
+            )
+        # Sort calls to be order-independent
+        calls_data.sort(key=lambda x: (x["name"], x["arguments"]))
+        core_data["tool_calls"] = calls_data
+    else:
+        core_data["tool_calls"] = None
+
+    message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
     return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:
-    """Generate a hash for a list of messages and client id."""
-    # Create a combined hash from all individual message hashes
+    """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
     combined_hash.update(client_id.encode("utf-8"))
     combined_hash.update(model.encode("utf-8"))
@@ -252,7 +268,6 @@ def _find_by_message_list(
         """Internal find implementation based on a message list."""
         for c in g_config.gemini.clients:
             message_hash = _hash_conversation(c.id, model, messages)
-
             key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
             try:
                 with self._get_transaction(write=False) as txn:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 1dc518f..239b7f4 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,9 +1,9 @@
 import base64
+import hashlib
 import mimetypes
 import re
 import struct
 import tempfile
-import uuid
 from pathlib import Path
 from typing import Iterator
 from urllib.parse import urlparse
@@ -222,13 +222,17 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         arguments = raw_args
         try:
             parsed_args = orjson.loads(raw_args)
-            arguments = orjson.dumps(parsed_args).decode("utf-8")
+            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
+        # Generate a deterministic ID based on name and arguments to avoid hash mismatch in LMDB
+        seed = f"{name}:{arguments}".encode("utf-8")
+        call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
+
         tool_calls.append(
             ToolCall(
-                id=f"call_{uuid.uuid4().hex}",
+                id=call_id,
                 type="function",
                 function=FunctionCall(name=name, arguments=arguments),
             )

From 196414755e860f1f6d9c840954eb45c53225a864 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 13:26:58 +0700
Subject: [PATCH 49/54] Refactor: Modify the LMDB store to fix issues where no
 conversation is found.

---
 app/server/chat.py   | 10 +++++++++-
 app/services/lmdb.py |  7 ++-----
 app/utils/helper.py  | 13 +++++++------
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 66a2720..7c683cd 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1047,6 +1047,8 @@ async def _find_reusable_session(
 
     # Start with the full history and iteratively trim from the end.
     search_end = len(messages)
+    logger.debug(f"Searching for reusable session in history of length {search_end}...")
+
     while search_end >= 2:
         search_history = messages[:search_end]
 
@@ -1057,14 +1059,20 @@ async def _find_reusable_session(
                     client = await pool.acquire(conv.client_id)
                     session = client.start_chat(metadata=conv.metadata, model=model)
                     remain = messages[search_end:]
+                    logger.debug(
+                        f"Match found at prefix length {search_end}. Client: {conv.client_id}"
+                    )
                     return session, client, remain
             except Exception as e:
-                logger.warning(f"Error checking LMDB for reusable session: {e}")
+                logger.warning(
+                    f"Error checking LMDB for reusable session at length {search_end}: {e}"
+                )
                 break
 
         # Trim one message and try again.
         search_end -= 1
 
+    logger.debug("No reusable session found after checking all possible prefixes.")
     return None, None, messages
 
 
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 594acf0..5aefa4b 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -15,12 +15,10 @@
 
 
 def _hash_message(message: Message) -> str:
-    """Generate a consistent hash for a single message focusing only on core identity fields."""
-    # Pick only fields that define the message in a conversation history
+    """Generate a consistent hash for a single message focusing ONLY on logic/content, ignoring technical IDs."""
     core_data = {
         "role": message.role,
         "name": message.name,
-        "tool_call_id": message.tool_call_id,
     }
 
     # Normalize content: strip, handle empty/None, and list-of-text items
@@ -48,7 +46,7 @@ def _hash_message(message: Message) -> str:
         else:
             core_data["content"] = message.model_dump(mode="json")["content"]
 
-    # Normalize tool_calls: canonicalize arguments and sort by name if multiple calls exist
+    # Normalize tool_calls: Focus ONLY on function name and arguments
     if message.tool_calls:
         calls_data = []
         for tc in message.tool_calls:
@@ -61,7 +59,6 @@ def _hash_message(message: Message) -> str:
 
             calls_data.append(
                 {
-                    "id": tc.id,  # Deterministic IDs ensure this is stable
                     "name": tc.function.name,
                     "arguments": canon_args,
                 }
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 239b7f4..ecf4a47 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -213,7 +213,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
 
     tool_calls: list[ToolCall] = []
 
-    def _create_tool_call(name: str, raw_args: str) -> None:
+    def _create_tool_call(name: str, raw_args: str, index: int) -> None:
         """Helper to parse args and append to the tool_calls list."""
         if not name:
             logger.warning("Encountered tool_call without a function name.")
@@ -226,8 +226,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
-        # Generate a deterministic ID based on name and arguments to avoid hash mismatch in LMDB
-        seed = f"{name}:{arguments}".encode("utf-8")
+        # Generate a deterministic ID based on name, arguments, and index to avoid collisions
+        seed = f"{name}:{arguments}:{index}".encode("utf-8")
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
 
         tool_calls.append(
@@ -244,11 +244,11 @@ def _replace_block(match: re.Match[str]) -> str:
             return match.group(0)
 
         found_in_block = False
-        for call_match in TOOL_CALL_RE.finditer(block_content):
+        for i, call_match in enumerate(TOOL_CALL_RE.finditer(block_content)):
             found_in_block = True
             name = (call_match.group(1) or "").strip()
             raw_args = (call_match.group(2) or "").strip()
-            _create_tool_call(name, raw_args)
+            _create_tool_call(name, raw_args, i)
 
         if found_in_block:
             return ""
@@ -258,9 +258,10 @@ def _replace_block(match: re.Match[str]) -> str:
     cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
 
     def _replace_orphan(match: re.Match[str]) -> str:
+        # Note: orphan calls are handled with a fallback index if they appear outside blocks
         name = (match.group(1) or "").strip()
         raw_args = (match.group(2) or "").strip()
-        _create_tool_call(name, raw_args)
+        _create_tool_call(name, raw_args, len(tool_calls))
         return ""
 
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)

From 8c5c7498230bc680bf50464dacf0b6f001888981 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 13:42:09 +0700
Subject: [PATCH 50/54] Refactor: Modify the LMDB store to fix issues where no
 conversation is found.

---
 app/server/chat.py   | 4 ++--
 app/services/lmdb.py | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 7c683cd..0d64b71 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1052,8 +1052,8 @@ async def _find_reusable_session(
     while search_end >= 2:
         search_history = messages[:search_end]
 
-        # Only try to match if the last stored message would be assistant/system before querying LMDB.
-        if search_history[-1].role in {"assistant", "system"}:
+        # Only try to match if the last stored message would be assistant/system/tool before querying LMDB.
+        if search_history[-1].role in {"assistant", "system", "tool"}:
             try:
                 if conv := db.find(model.model_name, search_history):
                     client = await pool.acquire(conv.client_id)
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 5aefa4b..c612d9e 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -70,7 +70,11 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = None
 
     message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-    return hashlib.sha256(message_bytes).hexdigest()
+    msg_hash = hashlib.sha256(message_bytes).hexdigest()
+    logger.debug(
+        f"Hashing message (role={message.role}): {message_bytes.decode('utf-8')} -> {msg_hash}"
+    )
+    return msg_hash
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:

From ce67d664b5443726fe518aee1cc9ef550ae640fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 14:41:55 +0700
Subject: [PATCH 51/54] Refactor: Avoid reusing an existing chat session if its
 idle time exceeds METADATA_TTL_MINUTES.

---
 app/server/chat.py   | 14 ++++++++++++--
 app/services/lmdb.py |  9 ++-------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0d64b71..6fbb818 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -58,6 +58,7 @@
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
+METADATA_TTL_MINUTES = 20
 
 router = APIRouter()
 
@@ -1047,7 +1048,6 @@ async def _find_reusable_session(
 
     # Start with the full history and iteratively trim from the end.
     search_end = len(messages)
-    logger.debug(f"Searching for reusable session in history of length {search_end}...")
 
     while search_end >= 2:
         search_history = messages[:search_end]
@@ -1056,6 +1056,17 @@ async def _find_reusable_session(
         if search_history[-1].role in {"assistant", "system", "tool"}:
             try:
                 if conv := db.find(model.model_name, search_history):
+                    # Check if metadata is too old
+                    now = datetime.now()
+                    updated_at = conv.updated_at or conv.created_at or now
+                    age_minutes = (now - updated_at).total_seconds() / 60
+
+                    if age_minutes > METADATA_TTL_MINUTES:
+                        logger.debug(
+                            f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse."
+                        )
+                        break
+
                     client = await pool.acquire(conv.client_id)
                     session = client.start_chat(metadata=conv.metadata, model=model)
                     remain = messages[search_end:]
@@ -1072,7 +1083,6 @@ async def _find_reusable_session(
         # Trim one message and try again.
         search_end -= 1
 
-    logger.debug("No reusable session found after checking all possible prefixes.")
     return None, None, messages
 
 
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index c612d9e..424b357 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -68,13 +68,8 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = calls_data
     else:
         core_data["tool_calls"] = None
-
-    message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-    msg_hash = hashlib.sha256(message_bytes).hexdigest()
-    logger.debug(
-        f"Hashing message (role={message.role}): {message_bytes.decode('utf-8')} -> {msg_hash}"
-    )
-    return msg_hash
+        message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
+        return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:

From 3d32d1226b1399f4286aadd95b2c4a52228fac45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 14:58:58 +0700
Subject: [PATCH 52/54] Refactor: Update the LMDB store to resolve issues
 preventing conversation from being saved

---
 app/services/lmdb.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 424b357..2dbe7b2 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -68,15 +68,16 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = calls_data
     else:
         core_data["tool_calls"] = None
-        message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-        return hashlib.sha256(message_bytes).hexdigest()
+
+    message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
+    return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:
     """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
-    combined_hash.update(client_id.encode("utf-8"))
-    combined_hash.update(model.encode("utf-8"))
+    combined_hash.update((client_id or "").encode("utf-8"))
+    combined_hash.update((model or "").encode("utf-8"))
     for message in messages:
         message_hash = _hash_message(message)
         combined_hash.update(message_hash.encode("utf-8"))

From 2eb9f05142ddfa1cb665b248f3faf2e278b619c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 17:57:04 +0700
Subject: [PATCH 53/54] Refactor: Update the _prepare_messages_for_model helper
 to omit the system instruction when reusing a session to save tokens.

---
 app/server/chat.py   | 66 +++++++++++++++++++++++++----------------
 app/services/lmdb.py | 70 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 96 insertions(+), 40 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 6fbb818..646f4fa 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -58,7 +58,7 @@
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
-METADATA_TTL_MINUTES = 20
+METADATA_TTL_MINUTES = 15
 
 router = APIRouter()
 
@@ -268,31 +268,35 @@ def _prepare_messages_for_model(
     tools: list[Tool] | None,
     tool_choice: str | ToolChoiceFunction | None,
     extra_instructions: list[str] | None = None,
+    inject_system_defaults: bool = True,
 ) -> list[Message]:
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
 
     instructions: list[str] = []
-    if tools:
-        tool_prompt = _build_tool_prompt(tools, tool_choice)
-        if tool_prompt:
-            instructions.append(tool_prompt)
-
-    if extra_instructions:
-        instructions.extend(instr for instr in extra_instructions if instr)
-        logger.debug(
-            f"Applied {len(extra_instructions)} extra instructions for tool/structured output."
-        )
+    if inject_system_defaults:
+        if tools:
+            tool_prompt = _build_tool_prompt(tools, tool_choice)
+            if tool_prompt:
+                instructions.append(tool_prompt)
+
+        if extra_instructions:
+            instructions.extend(instr for instr in extra_instructions if instr)
+            logger.debug(
+                f"Applied {len(extra_instructions)} extra instructions for tool/structured output."
+            )
 
-    if not _conversation_has_code_hint(prepared):
-        instructions.append(CODE_BLOCK_HINT)
-        logger.debug("Injected default code block hint for Gemini conversation.")
+        if not _conversation_has_code_hint(prepared):
+            instructions.append(CODE_BLOCK_HINT)
+            logger.debug("Injected default code block hint for Gemini conversation.")
 
     if not instructions:
+        # Still need to ensure XML hint for the last user message if tools are present
+        if tools and tool_choice != "none":
+            _append_xml_hint_to_last_user_message(prepared)
         return prepared
 
     combined_instructions = "\n\n".join(instructions)
-
     if prepared and prepared[0].role == "system" and isinstance(prepared[0].content, str):
         existing = prepared[0].content or ""
         separator = "\n\n" if existing else ""
@@ -530,8 +534,14 @@ async def create_chat_completion(
     )
 
     if session:
+        # Optimization: When reusing a session, we don't need to resend the heavy tool definitions
+        # or structured output instructions as they are already in the Gemini session history.
         messages_to_send = _prepare_messages_for_model(
-            remaining_messages, request.tools, request.tool_choice, extra_instructions
+            remaining_messages,
+            request.tools,
+            request.tool_choice,
+            extra_instructions,
+            inject_system_defaults=False,
         )
         if not messages_to_send:
             raise HTTPException(
@@ -642,17 +652,20 @@ async def create_chat_completion(
 
     # After formatting, persist the conversation to LMDB
     try:
-        last_message = Message(
+        current_assistant_message = Message(
             role="assistant",
             content=storage_output or None,
             tool_calls=tool_calls or None,
         )
-        cleaned_history = db.sanitize_assistant_messages(request.messages)
+        # Sanitize the entire history including the new message to ensure consistency
+        full_history = [*request.messages, current_assistant_message]
+        cleaned_history = db.sanitize_assistant_messages(full_history)
+
         conv = ConversationInStore(
             model=model.model_name,
             client_id=client.id,
             metadata=session.metadata,
-            messages=[*cleaned_history, last_message],
+            messages=cleaned_history,
         )
         key = db.store(conv)
         logger.debug(f"Conversation saved to LMDB with key: {key}")
@@ -780,9 +793,10 @@ async def _build_payload(
     if reuse_session:
         messages_to_send = _prepare_messages_for_model(
             remaining_messages,
-            tools=None,
-            tool_choice=None,
-            extra_instructions=extra_instructions or None,
+            tools=request_data.tools,  # Keep for XML hint logic
+            tool_choice=request_data.tool_choice,
+            extra_instructions=None,  # Already in session history
+            inject_system_defaults=False,
         )
         if not messages_to_send:
             raise HTTPException(
@@ -994,17 +1008,19 @@ async def _build_payload(
     )
 
     try:
-        last_message = Message(
+        current_assistant_message = Message(
             role="assistant",
             content=storage_output or None,
             tool_calls=detected_tool_calls or None,
         )
-        cleaned_history = db.sanitize_assistant_messages(messages)
+        full_history = [*messages, current_assistant_message]
+        cleaned_history = db.sanitize_assistant_messages(full_history)
+
         conv = ConversationInStore(
             model=model.model_name,
             client_id=client.id,
             metadata=session.metadata,
-            messages=[*cleaned_history, last_message],
+            messages=cleaned_history,
         )
         key = db.store(conv)
         logger.debug(f"Conversation saved to LMDB with key: {key}")
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 2dbe7b2..f4c9938 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -11,6 +11,7 @@
 
 from ..models import ContentItem, ConversationInStore, Message
 from ..utils import g_config
+from ..utils.helper import extract_tool_calls, remove_tool_call_blocks
 from ..utils.singleton import Singleton
 
 
@@ -26,8 +27,9 @@ def _hash_message(message: Message) -> str:
     if not content:
         core_data["content"] = None
     elif isinstance(content, str):
-        stripped = content.strip()
-        core_data["content"] = stripped if stripped else None
+        # Normalize line endings and strip whitespace
+        normalized = content.replace("\r\n", "\n").strip()
+        core_data["content"] = normalized if normalized else None
     elif isinstance(content, list):
         text_parts = []
         for item in content:
@@ -41,7 +43,7 @@ def _hash_message(message: Message) -> str:
                 break
 
         if text_parts is not None:
-            text_content = "".join(text_parts).strip()
+            text_content = "".join(text_parts).replace("\r\n", "\n").strip()
             core_data["content"] = text_content if text_content else None
         else:
             core_data["content"] = message.model_dump(mode="json")["content"]
@@ -260,7 +262,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
         return None
 
     def _find_by_message_list(
-        self, model: str, messages: List[Message]
+        self,
+        model: str,
+        messages: List[Message],
     ) -> Optional[ConversationInStore]:
         """Internal find implementation based on a message list."""
         for c in g_config.gemini.clients:
@@ -471,40 +475,76 @@ def __del__(self):
     @staticmethod
     def remove_think_tags(text: str) -> str:
         """
-        Remove <think>...</think> tags at the start of text and strip whitespace.
+        Remove all <think>...</think> tags and strip whitespace.
         """
-        cleaned_content = re.sub(r"^(\s*<think>.*?</think>\n?)", "", text, flags=re.DOTALL)
+        # Remove all think blocks anywhere in the text
+        cleaned_content = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
         return cleaned_content.strip()
 
     @staticmethod
     def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
         """
-        Create a new list of messages with assistant content cleaned of <think> tags.
-        This is useful for store the chat history.
+        Create a new list of messages with assistant content cleaned of <think> tags
+        and system hints/tool call blocks. This is used for both storing and
+        searching chat history to ensure consistency.
+
+        If a message has no tool_calls but contains tool call XML blocks in its
+        content, they will be extracted and moved to the tool_calls field.
         """
         cleaned_messages = []
         for msg in messages:
             if msg.role == "assistant":
                 if isinstance(msg.content, str):
-                    normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
-                    if normalized_content != msg.content:
-                        cleaned_msg = msg.model_copy(update={"content": normalized_content})
+                    text = LMDBConversationStore.remove_think_tags(msg.content)
+                    tool_calls = msg.tool_calls
+                    if not tool_calls:
+                        text, tool_calls = extract_tool_calls(text)
+                    else:
+                        text = remove_tool_call_blocks(text).strip()
+
+                    normalized_content = text.strip()
+
+                    if normalized_content != msg.content or tool_calls != msg.tool_calls:
+                        cleaned_msg = msg.model_copy(
+                            update={
+                                "content": normalized_content or None,
+                                "tool_calls": tool_calls or None,
+                            }
+                        )
                         cleaned_messages.append(cleaned_msg)
                     else:
                         cleaned_messages.append(msg)
                 elif isinstance(msg.content, list):
                     new_content = []
+                    all_extracted_calls = list(msg.tool_calls or [])
                     changed = False
+
                     for item in msg.content:
                         if isinstance(item, ContentItem) and item.type == "text" and item.text:
-                            cleaned_text = LMDBConversationStore.remove_think_tags(item.text)
-                            if cleaned_text != item.text:
+                            text = LMDBConversationStore.remove_think_tags(item.text)
+
+                            if not msg.tool_calls:
+                                text, extracted = extract_tool_calls(text)
+                                if extracted:
+                                    all_extracted_calls.extend(extracted)
+                                    changed = True
+                            else:
+                                text = remove_tool_call_blocks(text).strip()
+
+                            if text != item.text:
                                 changed = True
-                                item = item.model_copy(update={"text": cleaned_text})
+                                item = item.model_copy(update={"text": text.strip() or None})
                         new_content.append(item)
 
                     if changed:
-                        cleaned_messages.append(msg.model_copy(update={"content": new_content}))
+                        cleaned_messages.append(
+                            msg.model_copy(
+                                update={
+                                    "content": new_content,
+                                    "tool_calls": all_extracted_calls or None,
+                                }
+                            )
+                        )
                     else:
                         cleaned_messages.append(msg)
                 else:

From ade61d6826af1f256e7141ab6c1815b047cf8744 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 26 Jan 2026 11:01:41 +0700
Subject: [PATCH 54/54] Refactor: Modify the logic to convert a large prompt
 into a temporary text file attachment

- When multiple chunks are sent simultaneously, Google will immediately invalidate the access token and reject the request
- When a prompt contains a structured format like JSON, splitting it can break the format and may cause the model to misunderstand the context
- Another minor tweak as Copilot suggested
---
 app/server/chat.py   | 104 ++++++++++++++++---------------------------
 app/services/lmdb.py |   5 ++-
 app/utils/helper.py  |  13 +++---
 3 files changed, 49 insertions(+), 73 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 646f4fa..063d4d4 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,7 +1,6 @@
-import asyncio
 import base64
-import random
 import re
+import tempfile
 import uuid
 from dataclasses import dataclass
 from datetime import datetime, timezone
@@ -375,9 +374,7 @@ def _response_items_to_messages(
             ResponseInputItem(type="message", role=item.role, content=normalized_contents or [])
         )
 
-    logger.debug(
-        f"Normalized Responses input: {len(normalized_input)} message items (developer roles mapped to system)."
-    )
+    logger.debug(f"Normalized Responses input: {len(normalized_input)} message items.")
     return messages, normalized_input
 
 
@@ -1077,19 +1074,18 @@ async def _find_reusable_session(
                     updated_at = conv.updated_at or conv.created_at or now
                     age_minutes = (now - updated_at).total_seconds() / 60
 
-                    if age_minutes > METADATA_TTL_MINUTES:
+                    if age_minutes <= METADATA_TTL_MINUTES:
+                        client = await pool.acquire(conv.client_id)
+                        session = client.start_chat(metadata=conv.metadata, model=model)
+                        remain = messages[search_end:]
+                        logger.debug(
+                            f"Match found at prefix length {search_end}. Client: {conv.client_id}"
+                        )
+                        return session, client, remain
+                    else:
                         logger.debug(
                             f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse."
                         )
-                        break
-
-                    client = await pool.acquire(conv.client_id)
-                    session = client.start_chat(metadata=conv.metadata, model=model)
-                    remain = messages[search_end:]
-                    logger.debug(
-                        f"Match found at prefix length {search_end}. Client: {conv.client_id}"
-                    )
-                    return session, client, remain
             except Exception as e:
                 logger.warning(
                     f"Error checking LMDB for reusable session at length {search_end}: {e}"
@@ -1103,13 +1099,9 @@ async def _find_reusable_session(
 
 
 async def _send_with_split(session: ChatSession, text: str, files: list[Path | str] | None = None):
-    """Send text to Gemini, automatically splitting into multiple batches if it is
-    longer than ``MAX_CHARS_PER_REQUEST``.
-
-    Every intermediate batch (that is **not** the last one) is suffixed with a hint
-    telling Gemini that more content will come, and it should simply reply with
-    "ok". The final batch carries any file uploads and the real user prompt so
-    that Gemini can produce the actual answer.
+    """
+    Send text to Gemini. If text is longer than ``MAX_CHARS_PER_REQUEST``,
+    it is converted into a temporary text file attachment to avoid splitting issues.
     """
     if len(text) <= MAX_CHARS_PER_REQUEST:
         try:
@@ -1118,55 +1110,37 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
             logger.exception(f"Error sending message to Gemini: {e}")
             raise
 
-    hint_len = len(CONTINUATION_HINT)
-    safe_chunk_size = MAX_CHARS_PER_REQUEST - hint_len
-
-    chunks: list[str] = []
-    pos = 0
-    total = len(text)
-
-    while pos < total:
-        remaining = total - pos
-        if remaining <= MAX_CHARS_PER_REQUEST:
-            chunks.append(text[pos:])
-            break
-
-        end = pos + safe_chunk_size
-        slice_candidate = text[pos:end]
-        # Try to find a safe split point
-        split_idx = -1
-        idx = slice_candidate.rfind("\n")
-        if idx != -1:
-            split_idx = idx
-
-        if split_idx != -1:
-            split_at = pos + split_idx + 1
-        else:
-            split_at = end
+    logger.info(
+        f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment."
+    )
 
-        chunk = text[pos:split_at] + CONTINUATION_HINT
-        chunks.append(chunk)
-        pos = split_at
+    # Create a temporary directory to hold the message.txt file
+    # This ensures the filename is exactly 'message.txt' as expected by the instruction.
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        temp_file_path = Path(tmpdirname) / "message.txt"
+        temp_file_path.write_text(text, encoding="utf-8")
 
-    chunks_size = len(chunks)
-    for i, chk in enumerate(chunks[:-1]):
         try:
-            logger.debug(f"Sending chunk {i + 1}/{chunks_size}...")
-            await session.send_message(chk)
-            delay = random.uniform(1.0, 3.0)
-            logger.debug(f"Sleeping for {delay:.2f}s...")
-            await asyncio.sleep(delay)
+            # Prepare the files list
+            final_files = list(files) if files else []
+            final_files.append(temp_file_path)
+
+            instruction = (
+                "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n"
+                "**System Instruction:**\n"
+                "1. Read the content of `message.txt`.\n"
+                "2. Treat that content as the **primary** user prompt for this turn.\n"
+                "3. Execute the instructions or answer the questions found *inside* that file immediately.\n"
+            )
+
+            logger.debug(f"Sending prompt as temporary file: {temp_file_path}")
+
+            return await session.send_message(instruction, files=final_files)
+
         except Exception as e:
-            logger.exception(f"Error sending chunk to Gemini: {e}")
+            logger.exception(f"Error sending large text as file to Gemini: {e}")
             raise
 
-    try:
-        logger.debug(f"Sending final chunk {chunks_size}/{chunks_size}...")
-        return await session.send_message(chunks[-1], files=files)
-    except Exception as e:
-        logger.exception(f"Error sending final chunk to Gemini: {e}")
-        raise
-
 
 def _create_streaming_response(
     model_output: str,
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index f4c9938..c9d42cd 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -43,8 +43,9 @@ def _hash_message(message: Message) -> str:
                 break
 
         if text_parts is not None:
-            text_content = "".join(text_parts).replace("\r\n", "\n").strip()
-            core_data["content"] = text_content if text_content else None
+            # Normalize each part but keep them as a list to preserve boundaries and avoid collisions
+            normalized_parts = [p.replace("\r\n", "\n") for p in text_parts]
+            core_data["content"] = normalized_parts if normalized_parts else None
         else:
             core_data["content"] = message.model_dump(mode="json")["content"]
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index ecf4a47..190b5ce 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -213,7 +213,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
 
     tool_calls: list[ToolCall] = []
 
-    def _create_tool_call(name: str, raw_args: str, index: int) -> None:
+    def _create_tool_call(name: str, raw_args: str) -> None:
         """Helper to parse args and append to the tool_calls list."""
         if not name:
             logger.warning("Encountered tool_call without a function name.")
@@ -226,7 +226,9 @@ def _create_tool_call(name: str, raw_args: str, index: int) -> None:
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
-        # Generate a deterministic ID based on name, arguments, and index to avoid collisions
+        # Generate a deterministic ID based on name, arguments, and its global sequence index
+        # to ensure uniqueness across multiple fenced blocks while remaining stable for storage.
+        index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
 
@@ -244,11 +246,11 @@ def _replace_block(match: re.Match[str]) -> str:
             return match.group(0)
 
         found_in_block = False
-        for i, call_match in enumerate(TOOL_CALL_RE.finditer(block_content)):
+        for call_match in TOOL_CALL_RE.finditer(block_content):
             found_in_block = True
             name = (call_match.group(1) or "").strip()
             raw_args = (call_match.group(2) or "").strip()
-            _create_tool_call(name, raw_args, i)
+            _create_tool_call(name, raw_args)
 
         if found_in_block:
             return ""
@@ -258,10 +260,9 @@ def _replace_block(match: re.Match[str]) -> str:
     cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
 
     def _replace_orphan(match: re.Match[str]) -> str:
-        # Note: orphan calls are handled with a fallback index if they appear outside blocks
         name = (match.group(1) or "").strip()
         raw_args = (match.group(2) or "").strip()
-        _create_tool_call(name, raw_args, len(tool_calls))
+        _create_tool_call(name, raw_args)
         return ""
 
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)