From a3dfe703507494e6082f142276fb91a743375250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 21 Nov 2025 12:44:51 +0700 Subject: [PATCH 01/54] Remove the unused auto-refresh functionality and related imports. They are no longer needed since the underlying library issue has been resolved. --- app/services/client.py | 46 +----------------------------------------- 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 825564b..1554bdd 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -8,11 +8,8 @@ from gemini_webapi import GeminiClient, ModelOutput from gemini_webapi.client import ChatSession from gemini_webapi.constants import Model -from gemini_webapi.exceptions import AuthError, ModelInvalid +from gemini_webapi.exceptions import ModelInvalid from gemini_webapi.types import Gem -from gemini_webapi.utils import rotate_tasks -from gemini_webapi.utils.rotate_1psidts import rotate_1psidts -from loguru import logger from ..models import Message from ..utils import g_config @@ -76,47 +73,6 @@ async def init( verbose=verbose, ) - async def start_auto_refresh(self) -> None: - """ - Refresh the __Secure-1PSIDTS cookie periodically and keep the HTTP client in sync. - """ - while True: - new_1psidts: str | None = None - try: - new_1psidts = await rotate_1psidts(self.cookies, self.proxy) - except AuthError: - if task := rotate_tasks.get(self.cookies.get("__Secure-1PSID", "")): - task.cancel() - logger.warning( - "Failed to refresh Gemini cookies (AuthError). Auto refresh task canceled." - ) - return - except Exception as exc: - logger.warning(f"Unexpected error while refreshing Gemini cookies: {exc}") - - if new_1psidts: - self.cookies["__Secure-1PSIDTS"] = new_1psidts - self._sync_httpx_cookie("__Secure-1PSIDTS", new_1psidts) - logger.debug("Gemini cookies refreshed. New __Secure-1PSIDTS applied.") - await asyncio.sleep(self.refresh_interval) - - def _sync_httpx_cookie(self, name: str, value: str) -> None: - """ - Ensure the underlying httpx client uses the refreshed cookie value. - """ - if not self.client: - return - - jar = self.client.cookies.jar - matched = False - for cookie in jar: - if cookie.name == name: - cookie.value = value - matched = True - if not matched: - # Fall back to setting the cookie with default scope if we did not find an existing entry. - self.client.cookies.set(name, value) - async def generate_content( self, prompt: str, From 3a692ab014bf6d0cb98f38d499dc2760eb92c096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 22 Nov 2025 14:54:53 +0700 Subject: [PATCH 02/54] Enhance error handling in client initialization and message sending --- app/server/chat.py | 12 ++++++++-- app/services/client.py | 52 +++++++++++------------------------------- app/services/pool.py | 26 ++++++++++++++------- 3 files changed, 41 insertions(+), 49 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 66fa6ce..e8752cf 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1129,7 +1129,11 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s """ if len(text) <= MAX_CHARS_PER_REQUEST: # No need to split - a single request is fine. - return await session.send_message(text, files=files) + try: + return await session.send_message(text, files=files) + except Exception as e: + logger.exception(f"Error sending message to Gemini: {e}") + raise hint_len = len(CONTINUATION_HINT) chunk_size = MAX_CHARS_PER_REQUEST - hint_len @@ -1155,7 +1159,11 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s raise # The last chunk carries the files (if any) and we return its response. - return await session.send_message(chunks[-1], files=files) + try: + return await session.send_message(chunks[-1], files=files) + except Exception as e: + logger.exception(f"Error sending final chunk to Gemini: {e}") + raise def _iter_stream_segments(model_output: str, chunk_size: int = 64): diff --git a/app/services/client.py b/app/services/client.py index 1554bdd..26be26f 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -1,4 +1,3 @@ -import asyncio import html import json import re @@ -6,10 +5,7 @@ from typing import Any, cast from gemini_webapi import GeminiClient, ModelOutput -from gemini_webapi.client import ChatSession -from gemini_webapi.constants import Model -from gemini_webapi.exceptions import ModelInvalid -from gemini_webapi.types import Gem +from loguru import logger from ..models import Message from ..utils import g_config @@ -64,40 +60,18 @@ async def init( refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval)) verbose = cast(bool, _resolve(verbose, config.verbose)) - await super().init( - timeout=timeout, - auto_close=auto_close, - close_delay=close_delay, - auto_refresh=auto_refresh, - refresh_interval=refresh_interval, - verbose=verbose, - ) - - async def generate_content( - self, - prompt: str, - files: list[str | Path] | None = None, - model: Model | str = Model.UNSPECIFIED, - gem: Gem | str | None = None, - chat: ChatSession | None = None, - **kwargs, - ) -> ModelOutput: - cnt = 2 # Try 2 times before giving up - last_exception: ModelInvalid | None = None - while cnt: - cnt -= 1 - try: - return await super().generate_content(prompt, files, model, gem, chat, **kwargs) - except ModelInvalid as e: - # This is not always caused by model selection. Instead, it can be solved by retrying. - # So we catch it and retry as a workaround. - await asyncio.sleep(1) - last_exception = e - - # If retrying failed, re-raise ModelInvalid - if last_exception is not None: - raise last_exception - raise RuntimeError("generate_content failed without receiving a ModelInvalid error.") + try: + await super().init( + timeout=timeout, + auto_close=auto_close, + close_delay=close_delay, + auto_refresh=auto_refresh, + refresh_interval=refresh_interval, + verbose=verbose, + ) + except Exception: + logger.exception(f"Failed to initialize GeminiClient {self.id}") + raise @staticmethod async def process_message( diff --git a/app/services/pool.py b/app/services/pool.py index abf1fa0..24a21dc 100644 --- a/app/services/pool.py +++ b/app/services/pool.py @@ -35,14 +35,24 @@ def __init__(self) -> None: async def init(self) -> None: """Initialize all clients in the pool.""" + success_count = 0 for client in self._clients: if not client.running: - await client.init( - timeout=g_config.gemini.timeout, - auto_refresh=g_config.gemini.auto_refresh, - verbose=g_config.gemini.verbose, - refresh_interval=g_config.gemini.refresh_interval, - ) + try: + await client.init( + timeout=g_config.gemini.timeout, + auto_refresh=g_config.gemini.auto_refresh, + verbose=g_config.gemini.verbose, + refresh_interval=g_config.gemini.refresh_interval, + ) + except Exception: + logger.exception(f"Failed to initialize client {client.id}") + + if client.running: + success_count += 1 + + if success_count == 0: + raise RuntimeError("Failed to initialize any Gemini clients") async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper: """Return a healthy client by id or using round-robin.""" @@ -89,8 +99,8 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: ) logger.info(f"Restarted Gemini client {client.id} after it stopped.") return True - except Exception as exc: - logger.warning(f"Failed to restart Gemini client {client.id}: {exc}") + except Exception: + logger.exception(f"Failed to restart Gemini client {client.id}") return False @property From d57e3676fed9fa03e1f51a5aed80d4b7f88e6a88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 22 Nov 2025 17:49:41 +0700 Subject: [PATCH 03/54] Refactor link handling to extract file paths and simplify Google search links --- app/services/client.py | 46 +++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 26be26f..f5a39dd 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -24,9 +24,20 @@ ) HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);") + MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=\s*[-\\`*_{}\[\]()#+.!<>])") + CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`]*`)", re.DOTALL) +FILE_PATH_PATTERN = re.compile( + r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|Gemfile|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", + re.IGNORECASE, +) + +GOOGLE_SEARCH_LINK_PATTERN = re.compile( + r"(?:`\s*)?`?\[`?([^`\]]+)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(? str: text = _unescape_html(text) text = _unescape_markdown(text) - def simplify_link_target(text_content: str) -> str: - match_colon_num = re.match(r"([^:]+:\d+)", text_content) - if match_colon_num: - return match_colon_num.group(1) - return text_content + def extract_file_path_from_display_text(text_content: str) -> str | None: + match = re.match(FILE_PATH_PATTERN, text_content) + if match: + return match.group(1) + return None def replacer(match: re.Match) -> str: - outer_open_paren = match.group(1) - display_text = match.group(2) + display_text = str(match.group(1)).strip() + google_search_prefix = match.group(2) + query_part = match.group(3) - new_target_url = simplify_link_target(display_text) - new_link_segment = f"[`{display_text}`]({new_target_url})" + file_path = extract_file_path_from_display_text(display_text) - if outer_open_paren: - return f"{outer_open_paren}{new_link_segment})" + if file_path: + # If it's a file path, transform it into a self-referencing Markdown link + return f"[`{file_path}`]({file_path})" else: - return new_link_segment - - # Replace Google search links with simplified Markdown links - pattern = r"(\()?\[`([^`]+?)`\]\((https://www.google.com/search\?q=)(.*?)(? Date: Sat, 22 Nov 2025 18:29:41 +0700 Subject: [PATCH 04/54] Fix regex pattern for Google search link matching --- app/services/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/client.py b/app/services/client.py index f5a39dd..ffc559e 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -35,7 +35,7 @@ ) GOOGLE_SEARCH_LINK_PATTERN = re.compile( - r"(?:`\s*)?`?\[`?([^`\]]+)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(? Date: Sat, 22 Nov 2025 21:44:09 +0700 Subject: [PATCH 05/54] Fix regex patterns for Markdown escaping, code fence and Google search link matching --- app/services/client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index ffc559e..0088c74 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -25,17 +25,17 @@ HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);") -MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=\s*[-\\`*_{}\[\]()#+.!<>])") +MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])") -CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`]*`)", re.DOTALL) +CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL) FILE_PATH_PATTERN = re.compile( - r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|Gemfile|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", + r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, ) GOOGLE_SEARCH_LINK_PATTERN = re.compile( - r"(?:`\s*)?`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(? Date: Sat, 22 Nov 2025 22:52:27 +0700 Subject: [PATCH 06/54] Increase timeout value in configuration files from 60 to 120 seconds to better handle heavy tasks --- app/server/chat.py | 2 -- app/services/client.py | 8 -------- app/utils/config.py | 2 +- config/config.yaml | 6 +++--- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index e8752cf..b4e88da 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -48,9 +48,7 @@ # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) - CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')" - TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)```", re.DOTALL | re.IGNORECASE) TOOL_CALL_RE = re.compile( r"(.*?)", re.DOTALL | re.IGNORECASE diff --git a/app/services/client.py b/app/services/client.py index 0088c74..166eb70 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -16,29 +16,21 @@ '```xml\n{"arg": "value"}\n```\n' "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n" ) - CODE_BLOCK_HINT = ( "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced " "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n" "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n" ) - HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);") - MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])") - CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL) - FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, ) - GOOGLE_SEARCH_LINK_PATTERN = re.compile( r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?" ) - - _UNSET = object() diff --git a/app/utils/config.py b/app/utils/config.py index 48f0792..796ca75 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -56,7 +56,7 @@ class GeminiConfig(BaseModel): clients: list[GeminiClientSettings] = Field( ..., description="List of Gemini client credential pairs" ) - timeout: int = Field(default=60, ge=1, description="Init timeout") + timeout: int = Field(default=120, ge=1, description="Init timeout") auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies") refresh_interval: int = Field( default=540, ge=1, description="Interval in seconds to refresh Gemini cookies" diff --git a/config/config.yaml b/config/config.yaml index b0f8fbf..89c88b7 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -21,8 +21,8 @@ gemini: - id: "example-id-1" # Arbitrary client ID secure_1psid: "YOUR_SECURE_1PSID_HERE" secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE" - proxy: null # Optional proxy URL (null/empty means direct connection) - timeout: 60 # Init timeout in seconds + proxy: null # Optional proxy URL (null/empty means direct connection) + timeout: 120 # Init timeout in seconds auto_refresh: true # Auto-refresh session cookies refresh_interval: 540 # Refresh interval in seconds verbose: false # Enable verbose logging for Gemini requests @@ -34,4 +34,4 @@ storage: retention_days: 14 # Number of days to retain conversations before cleanup logging: - level: "INFO" # Log level: DEBUG, INFO, WARNING, ERROR + level: "INFO" # Log level: DEBUG, INFO, WARNING, ERROR From f00ebfcbd0424c7ab06d680f308349a04aff3be0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 2 Dec 2025 13:15:27 +0700 Subject: [PATCH 07/54] Fix Image generation --- .github/workflows/docker.yaml | 10 ++--- .github/workflows/track.yml | 12 +++--- app/models/models.py | 14 +++---- app/server/chat.py | 77 +++++++++++++++++++++-------------- app/services/client.py | 4 +- app/utils/helper.py | 10 ++++- 6 files changed, 75 insertions(+), 52 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 4527f3d..eef2a41 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -5,11 +5,11 @@ on: branches: - main tags: - - 'v*' + - "v*" paths-ignore: - - '**/*.md' - - '.github/workflows/ruff.yaml' - - '.github/workflows/track.yml' + - "**/*.md" + - ".github/workflows/ruff.yaml" + - ".github/workflows/track.yml" env: REGISTRY: ghcr.io @@ -57,4 +57,4 @@ jobs: labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64 cache-from: type=gha - cache-to: type=gha,mode=max \ No newline at end of file + cache-to: type=gha,mode=max diff --git a/.github/workflows/track.yml b/.github/workflows/track.yml index 63afbec..838dcf8 100644 --- a/.github/workflows/track.yml +++ b/.github/workflows/track.yml @@ -2,7 +2,7 @@ name: Update gemini-webapi on: schedule: - - cron: '0 0 * * *' # Runs every day at midnight + - cron: "0 0 * * *" # Runs every day at midnight workflow_dispatch: jobs: @@ -24,7 +24,7 @@ jobs: run: | # Install dependencies first to enable uv pip show uv sync - + # Get current version of gemini-webapi before upgrade OLD_VERSION=$(uv pip show gemini-webapi 2>/dev/null | grep ^Version: | awk '{print $2}') if [ -z "$OLD_VERSION" ]; then @@ -32,10 +32,10 @@ jobs: exit 1 fi echo "Current gemini-webapi version: $OLD_VERSION" - + # Update the package using uv, which handles pyproject.toml and uv.lock uv add --upgrade gemini-webapi - + # Get new version of gemini-webapi after upgrade NEW_VERSION=$(uv pip show gemini-webapi | grep ^Version: | awk '{print $2}') if [ -z "$NEW_VERSION" ]; then @@ -43,7 +43,7 @@ jobs: exit 1 fi echo "New gemini-webapi version: $NEW_VERSION" - + # Only proceed if gemini-webapi version has changed if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then echo "gemini-webapi has been updated from $OLD_VERSION to $NEW_VERSION" @@ -63,7 +63,7 @@ jobs: title: ":arrow_up: update gemini-webapi to ${{ steps.update.outputs.version }}" body: | Update `gemini-webapi` to version `${{ steps.update.outputs.version }}`. - + Auto-generated by GitHub Actions using `uv`. branch: update-gemini-webapi base: main diff --git a/app/models/models.py b/app/models/models.py index 3991f12..74d8cd5 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -154,11 +154,13 @@ class ConversationInStore(BaseModel): class ResponseInputContent(BaseModel): """Content item for Responses API input.""" - type: Literal["input_text", "input_image"] + type: Literal["input_text", "input_image", "input_file"] text: Optional[str] = None image_url: Optional[str] = None - image_base64: Optional[str] = None - mime_type: Optional[str] = None + detail: Optional[Literal["auto", "low", "high"]] = None + file_url: Optional[str] = None + file_data: Optional[str] = None + filename: Optional[str] = None class ResponseInputItem(BaseModel): @@ -212,12 +214,8 @@ class ResponseUsage(BaseModel): class ResponseOutputContent(BaseModel): """Content item for Responses API output.""" - type: Literal["output_text", "output_image"] + type: Literal["output_text"] text: Optional[str] = None - image_base64: Optional[str] = None - mime_type: Optional[str] = None - width: Optional[int] = None - height: Optional[int] = None class ResponseOutputMessage(BaseModel): diff --git a/app/server/chat.py b/app/server/chat.py index b4e88da..76dc632 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -381,14 +381,6 @@ def _strip_tagged_blocks(text: str) -> str: return "".join(result) -def _ensure_data_url(part: ResponseInputContent) -> str | None: - image_url = part.image_url - if not image_url and part.image_base64: - mime_type = part.mime_type or "image/png" - image_url = f"data:{mime_type};base64,{part.image_base64}" - return image_url - - def _response_items_to_messages( items: str | list[ResponseInputItem], ) -> tuple[list[Message], str | list[ResponseInputItem]]: @@ -422,14 +414,34 @@ def _response_items_to_messages( if text_value: converted.append(ContentItem(type="text", text=text_value)) elif part.type == "input_image": - image_url = _ensure_data_url(part) + image_url = part.image_url if image_url: normalized_contents.append( - ResponseInputContent(type="input_image", image_url=image_url) + ResponseInputContent( + type="input_image", + image_url=image_url, + detail=part.detail if part.detail else "auto", + ) ) converted.append( - ContentItem(type="image_url", image_url={"url": image_url}) + ContentItem( + type="image_url", + image_url={ + "url": image_url, + "detail": part.detail if part.detail else "auto", + }, + ) ) + elif part.type == "input_file": + if part.file_url or part.file_data: + normalized_contents.append(part) + file_info = {} + if part.file_data: + file_info["file_data"] = part.file_data + file_info["filename"] = part.filename + if part.file_url: + file_info["url"] = part.file_url + converted.append(ContentItem(type="file", file=file_info)) messages.append(Message(role=role, content=converted or None)) normalized_input.append( @@ -472,11 +484,26 @@ def _instructions_to_messages( if text_value: converted.append(ContentItem(type="text", text=text_value)) elif part.type == "input_image": - image_url = _ensure_data_url(part) + image_url = part.image_url if image_url: converted.append( - ContentItem(type="image_url", image_url={"url": image_url}) + ContentItem( + type="image_url", + image_url={ + "url": image_url, + "detail": part.detail if part.detail else "auto", + }, + ) ) + elif part.type == "input_file": + file_info = {} + if part.file_data: + file_info["file_data"] = part.file_data + file_info["filename"] = part.filename + if part.file_url: + file_info["url"] = part.file_url + if file_info: + converted.append(ContentItem(type="file", file=file_info)) instruction_messages.append(Message(role=role, content=converted or None)) return instruction_messages @@ -799,13 +826,13 @@ async def create_response( session, client, remaining_messages = await _find_reusable_session(db, pool, model, messages) async def _build_payload( - payload_messages: list[Message], reuse_session: bool + _payload_messages: list[Message], _reuse_session: bool ) -> tuple[str, list[Path | str]]: - if reuse_session and len(payload_messages) == 1: + if _reuse_session and len(_payload_messages) == 1: return await GeminiClientWrapper.process_message( - payload_messages[0], tmp_dir, tagged=False + _payload_messages[0], tmp_dir, tagged=False ) - return await GeminiClientWrapper.process_conversation(payload_messages, tmp_dir) + return await GeminiClientWrapper.process_conversation(_payload_messages, tmp_dir) reuse_session = session is not None if reuse_session: @@ -821,7 +848,7 @@ async def _build_payload( detail="No new messages to send for the existing session.", ) payload_messages = messages_to_send - model_input, files = await _build_payload(payload_messages, reuse_session=True) + model_input, files = await _build_payload(payload_messages, _reuse_session=True) logger.debug( f"Reused session {session.metadata} - sending {len(payload_messages)} prepared messages." ) @@ -830,7 +857,7 @@ async def _build_payload( client = await pool.acquire() session = client.start_chat(model=model) payload_messages = messages - model_input, files = await _build_payload(payload_messages, reuse_session=False) + model_input, files = await _build_payload(payload_messages, _reuse_session=False) except ValueError as e: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) except RuntimeError as e: @@ -935,7 +962,6 @@ async def _build_payload( detail = f"{detail} Assistant response: {summary}" raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail) - image_contents: list[ResponseOutputContent] = [] image_call_items: list[ResponseImageGenerationCall] = [] for image in images: try: @@ -943,16 +969,6 @@ async def _build_payload( except Exception as exc: logger.warning(f"Failed to download generated image: {exc}") continue - mime_type = "image/png" if isinstance(image, GeneratedImage) else "image/jpeg" - image_contents.append( - ResponseOutputContent( - type="output_image", - image_base64=image_base64, - mime_type=mime_type, - width=width, - height=height, - ) - ) image_call_items.append( ResponseImageGenerationCall( id=f"img_{uuid.uuid4().hex}", @@ -977,7 +993,6 @@ async def _build_payload( response_contents: list[ResponseOutputContent] = [] if assistant_text: response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text)) - response_contents.extend(image_contents) if not response_contents: response_contents.append(ResponseOutputContent(type="output_text", text="")) diff --git a/app/services/client.py b/app/services/client.py index 166eb70..0207114 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -113,8 +113,10 @@ async def process_message( if file_data := item.file.get("file_data", None): filename = item.file.get("filename", "") files.append(await save_file_to_tempfile(file_data, filename, tempdir)) + elif url := item.file.get("url", None): + files.append(await save_url_to_tempfile(url, tempdir)) else: - raise ValueError("File must contain 'file_data' key") + raise ValueError("File must contain 'file_data' or 'url' key") elif message.content is not None: raise ValueError("Unsupported message content type.") diff --git a/app/utils/helper.py b/app/utils/helper.py index 48fc99d..3bff469 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -1,4 +1,5 @@ import base64 +import mimetypes import tempfile from pathlib import Path @@ -40,9 +41,16 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None): suffix: str | None = None if url.startswith("data:image/"): # Base64 encoded image + metadata_part = url.split(",")[0] + mime_type = metadata_part.split(":")[1].split(";")[0] + base64_data = url.split(",")[1] data = base64.b64decode(base64_data) - suffix = ".png" + + # Guess extension from mime type, default to the subtype if not found + suffix = mimetypes.guess_extension(mime_type) + if not suffix: + suffix = f".{mime_type.split('/')[1]}" else: # http files async with httpx.AsyncClient() as client: From d911c33e81e83211ed53d77b300c4c203df7b53c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 2 Dec 2025 15:50:45 +0700 Subject: [PATCH 08/54] Refactor tool handling to support standard and image generation tools separately --- app/models/models.py | 7 ++++--- app/server/chat.py | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 74d8cd5..52dd414 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -174,7 +174,8 @@ class ResponseInputItem(BaseModel): class ResponseToolChoice(BaseModel): """Tool choice enforcing a specific tool in Responses API.""" - type: Literal["image_generation"] + type: Literal["function", "image_generation"] + function: Optional[ToolChoiceFunctionDetail] = None class ResponseImageTool(BaseModel): @@ -195,8 +196,8 @@ class ResponseCreateRequest(BaseModel): top_p: Optional[float] = 1.0 max_output_tokens: Optional[int] = None stream: Optional[bool] = False - tool_choice: Optional[ResponseToolChoice] = None - tools: Optional[List[ResponseImageTool]] = None + tool_choice: Optional[Union[str, ResponseToolChoice]] = None + tools: Optional[List[Union[Tool, ResponseImageTool]]] = None store: Optional[bool] = None user: Optional[str] = None response_format: Optional[Dict[str, Any]] = None diff --git a/app/server/chat.py b/app/server/chat.py index 76dc632..8277d0c 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -795,7 +795,28 @@ async def create_response( f"Structured response requested for /v1/responses (schema={structured_requirement.schema_name})." ) - image_instruction = _build_image_generation_instruction(request.tools, request.tool_choice) + # Separate standard tools from image generation tools + standard_tools: list[Tool] = [] + image_tools: list[ResponseImageTool] = [] + + if request.tools: + for t in request.tools: + if isinstance(t, Tool): + standard_tools.append(t) + elif isinstance(t, ResponseImageTool): + image_tools.append(t) + # Handle dicts if Pydantic didn't convert them fully (fallback) + elif isinstance(t, dict): + t_type = t.get("type") + if t_type == "function": + standard_tools.append(Tool.model_validate(t)) + elif t_type == "image_generation": + image_tools.append(ResponseImageTool.model_validate(t)) + + image_instruction = _build_image_generation_instruction( + image_tools, + request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None, + ) if image_instruction: extra_instructions.append(image_instruction) logger.debug("Image generation support enabled for /v1/responses request.") @@ -808,10 +829,19 @@ async def create_response( f"Injected {len(preface_messages)} instruction messages before sending to Gemini." ) + # Pass standard tools to the prompt builder + # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction) + model_tool_choice = None + if isinstance(request.tool_choice, str): + model_tool_choice = request.tool_choice + elif isinstance(request.tool_choice, ToolChoiceFunction): + model_tool_choice = request.tool_choice + # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice. + messages = _prepare_messages_for_model( conversation_messages, - tools=None, - tool_choice=None, + tools=standard_tools or None, + tool_choice=model_tool_choice, extra_instructions=extra_instructions or None, ) From a8241ad78831b675d0321bbe5271c1bf10a6ce2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 2 Dec 2025 17:17:27 +0700 Subject: [PATCH 09/54] Fix: use "ascii" decoding for base64-encoded image data consistency --- app/server/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index 8277d0c..67790ab 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1524,4 +1524,4 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non data = Path(saved_path).read_bytes() width, height = _extract_image_dimensions(data) - return base64.b64encode(data).decode("utf-8"), width, height + return base64.b64encode(data).decode("ascii"), width, height From fd2723d49b5929cb770a231aeb479f392f7a7d53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 3 Dec 2025 12:08:19 +0700 Subject: [PATCH 10/54] Fix: replace `running` with `_running` for internal client status checks --- app/services/pool.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/services/pool.py b/app/services/pool.py index 24a21dc..28a3435 100644 --- a/app/services/pool.py +++ b/app/services/pool.py @@ -37,7 +37,7 @@ async def init(self) -> None: """Initialize all clients in the pool.""" success_count = 0 for client in self._clients: - if not client.running: + if not client._running: try: await client.init( timeout=g_config.gemini.timeout, @@ -48,7 +48,7 @@ async def init(self) -> None: except Exception: logger.exception(f"Failed to initialize client {client.id}") - if client.running: + if client._running: success_count += 1 if success_count == 0: @@ -79,7 +79,7 @@ async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper: async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: """Make sure the client is running, attempting a restart if needed.""" - if client.running: + if client._running: return True lock = self._restart_locks.get(client.id) @@ -87,7 +87,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: return False # Should not happen async with lock: - if client.running: + if client._running: return True try: @@ -110,4 +110,4 @@ def clients(self) -> List[GeminiClientWrapper]: def status(self) -> Dict[str, bool]: """Return running status for each client.""" - return {client.id: client.running for client in self._clients} + return {client.id: client._running for client in self._clients} From 8ee6cc0335e4b63df2126a6bf69d6c9e42505485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 3 Dec 2025 14:10:06 +0700 Subject: [PATCH 11/54] Refactor: replace direct `_running` access with `running()` method in client status checks --- app/services/client.py | 3 +++ app/services/pool.py | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 0207114..09c52c1 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -76,6 +76,9 @@ async def init( logger.exception(f"Failed to initialize GeminiClient {self.id}") raise + def running(self) -> bool: + return self._running + @staticmethod async def process_message( message: Message, tempdir: Path | None = None, tagged: bool = True diff --git a/app/services/pool.py b/app/services/pool.py index 28a3435..a134dda 100644 --- a/app/services/pool.py +++ b/app/services/pool.py @@ -37,7 +37,7 @@ async def init(self) -> None: """Initialize all clients in the pool.""" success_count = 0 for client in self._clients: - if not client._running: + if not client.running(): try: await client.init( timeout=g_config.gemini.timeout, @@ -48,7 +48,7 @@ async def init(self) -> None: except Exception: logger.exception(f"Failed to initialize client {client.id}") - if client._running: + if client.running(): success_count += 1 if success_count == 0: @@ -79,7 +79,7 @@ async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper: async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: """Make sure the client is running, attempting a restart if needed.""" - if client._running: + if client.running(): return True lock = self._restart_locks.get(client.id) @@ -87,7 +87,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: return False # Should not happen async with lock: - if client._running: + if client.running(): return True try: @@ -110,4 +110,4 @@ def clients(self) -> List[GeminiClientWrapper]: def status(self) -> Dict[str, bool]: """Return running status for each client.""" - return {client.id: client._running for client in self._clients} + return {client.id: client.running() for client in self._clients} From 453700eba682cfdd4bfc2e061a8139129654d017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 3 Dec 2025 22:11:11 +0700 Subject: [PATCH 12/54] Extend models with new fields for annotations, reasoning, audio, log probabilities, and token details; adjust response handling accordingly. --- app/models/models.py | 13 ++++++++++++- app/server/chat.py | 7 ++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 52dd414..1d7368c 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -12,7 +12,9 @@ class ContentItem(BaseModel): type: Literal["text", "image_url", "file", "input_audio"] text: Optional[str] = None image_url: Optional[Dict[str, str]] = None + input_audio: Optional[Dict[str, Any]] = None file: Optional[Dict[str, str]] = None + annotations: List[Dict[str, Any]] = Field(default_factory=list) class Message(BaseModel): @@ -22,6 +24,10 @@ class Message(BaseModel): content: Union[str, List[ContentItem], None] = None name: Optional[str] = None tool_calls: Optional[List["ToolCall"]] = None + refusal: Optional[str] = None + reasoning_content: Optional[str] = None + audio: Optional[Dict[str, Any]] = None + annotations: List[Dict[str, Any]] = Field(default_factory=list) class Choice(BaseModel): @@ -30,6 +36,7 @@ class Choice(BaseModel): index: int message: Message finish_reason: str + logprobs: Optional[Dict[str, Any]] = None class FunctionCall(BaseModel): @@ -81,6 +88,8 @@ class Usage(BaseModel): prompt_tokens: int completion_tokens: int total_tokens: int + prompt_tokens_details: Optional[Dict[str, int]] = None + completion_tokens_details: Optional[Dict[str, int]] = None class ModelData(BaseModel): @@ -118,6 +127,8 @@ class ChatCompletionResponse(BaseModel): model: str choices: List[Choice] usage: Usage + system_fingerprint: Optional[str] = None + service_tier: Optional[str] = None class ModelListResponse(BaseModel): @@ -217,6 +228,7 @@ class ResponseOutputContent(BaseModel): type: Literal["output_text"] text: Optional[str] = None + annotations: List[Dict[str, Any]] = Field(default_factory=list) class ResponseOutputMessage(BaseModel): @@ -257,7 +269,6 @@ class ResponseCreateResponse(BaseModel): created: int model: str output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]] - output_text: Optional[str] = None status: Literal[ "in_progress", "completed", diff --git a/app/server/chat.py b/app/server/chat.py index 67790ab..5848a39 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1022,10 +1022,12 @@ async def _build_payload( response_contents: list[ResponseOutputContent] = [] if assistant_text: - response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text)) + response_contents.append( + ResponseOutputContent(type="output_text", text=assistant_text, annotations=[]) + ) if not response_contents: - response_contents.append(ResponseOutputContent(type="output_text", text="")) + response_contents.append(ResponseOutputContent(type="output_text", text="", annotations=[])) created_time = int(datetime.now(tz=timezone.utc).timestamp()) response_id = f"resp_{uuid.uuid4().hex}" @@ -1059,7 +1061,6 @@ async def _build_payload( *tool_call_items, *image_call_items, ], - output_text=assistant_text or None, status="completed", usage=usage, input=normalized_input or None, From 9260f8b5cc37192716d4127ed6ab98a087e7e3ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 3 Dec 2025 22:51:54 +0700 Subject: [PATCH 13/54] Extend models with new fields (annotations, error), add `normalize_output_text` validator, rename `created` to `created_at`, and update response handling accordingly. --- app/models/models.py | 16 +++++++++++++--- app/server/chat.py | 8 ++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 1d7368c..8d5102c 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -3,7 +3,7 @@ from datetime import datetime from typing import Any, Dict, List, Literal, Optional, Union -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator class ContentItem(BaseModel): @@ -127,7 +127,6 @@ class ChatCompletionResponse(BaseModel): model: str choices: List[Choice] usage: Usage - system_fingerprint: Optional[str] = None service_tier: Optional[str] = None @@ -172,6 +171,15 @@ class ResponseInputContent(BaseModel): file_url: Optional[str] = None file_data: Optional[str] = None filename: Optional[str] = None + annotations: List[Dict[str, Any]] = Field(default_factory=list) + + @model_validator(mode="before") + @classmethod + def normalize_output_text(cls, data: Any) -> Any: + """Allow output_text (from previous turns) to be treated as input_text.""" + if isinstance(data, dict) and data.get("type") == "output_text": + data["type"] = "input_text" + return data class ResponseInputItem(BaseModel): @@ -266,7 +274,7 @@ class ResponseCreateResponse(BaseModel): id: str object: Literal["response"] = "response" - created: int + created_at: int model: str output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]] status: Literal[ @@ -274,9 +282,11 @@ class ResponseCreateResponse(BaseModel): "completed", "failed", "incomplete", + "cancelled", "requires_action", ] = "completed" usage: ResponseUsage + error: Optional[Dict[str, Any]] = None metadata: Optional[Dict[str, Any]] = None system_fingerprint: Optional[str] = None input: Optional[Union[str, List[ResponseInputItem]]] = None diff --git a/app/server/chat.py b/app/server/chat.py index 5848a39..ef508b9 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1049,7 +1049,7 @@ async def _build_payload( response_payload = ResponseCreateResponse( id=response_id, - created=created_time, + created_at=created_time, model=request.model, output=[ ResponseOutputMessage( @@ -1334,7 +1334,7 @@ def _create_responses_streaming_response( response_dict = response_payload.model_dump(mode="json") response_id = response_payload.id - created_time = response_payload.created + created_time = response_payload.created_at model = response_payload.model logger.debug( @@ -1344,14 +1344,14 @@ def _create_responses_streaming_response( base_event = { "id": response_id, "object": "response", - "created": created_time, + "created_at": created_time, "model": model, } created_snapshot: dict[str, Any] = { "id": response_id, "object": "response", - "created": created_time, + "created_at": created_time, "model": model, "status": "in_progress", } From d6a8e6bdb786bb90dd653cd9aa3fc88469c2b505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 09:35:30 +0700 Subject: [PATCH 14/54] Extend response models to support tool choices, image output, and improved streaming of response items. Refactor image generation handling for consistency and add compatibility with output content. --- app/models/models.py | 7 ++-- app/server/chat.py | 83 ++++++++++++++++++++++++++++++++------------ 2 files changed, 65 insertions(+), 25 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 8d5102c..bbc2140 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -127,7 +127,6 @@ class ChatCompletionResponse(BaseModel): model: str choices: List[Choice] usage: Usage - service_tier: Optional[str] = None class ModelListResponse(BaseModel): @@ -234,8 +233,9 @@ class ResponseUsage(BaseModel): class ResponseOutputContent(BaseModel): """Content item for Responses API output.""" - type: Literal["output_text"] + type: Literal["output_text", "output_image"] text: Optional[str] = None + image_url: Optional[str] = None annotations: List[Dict[str, Any]] = Field(default_factory=list) @@ -285,10 +285,11 @@ class ResponseCreateResponse(BaseModel): "cancelled", "requires_action", ] = "completed" + tool_choice: Optional[Union[str, ResponseToolChoice]] = None + tools: Optional[List[Union[Tool, ResponseImageTool]]] = None usage: ResponseUsage error: Optional[Dict[str, Any]] = None metadata: Optional[Dict[str, Any]] = None - system_fingerprint: Optional[str] = None input: Optional[Union[str, List[ResponseInputItem]]] = None diff --git a/app/server/chat.py b/app/server/chat.py index ef508b9..cb498a5 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -992,6 +992,7 @@ async def _build_payload( detail = f"{detail} Assistant response: {summary}" raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail) + response_contents: list[ResponseOutputContent] = [] image_call_items: list[ResponseImageGenerationCall] = [] for image in images: try: @@ -999,15 +1000,25 @@ async def _build_payload( except Exception as exc: logger.warning(f"Failed to download generated image: {exc}") continue + + img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" image_call_items.append( ResponseImageGenerationCall( id=f"img_{uuid.uuid4().hex}", status="completed", result=image_base64, - output_format="png" if isinstance(image, GeneratedImage) else "jpeg", + output_format=img_format, size=f"{width}x{height}" if width and height else None, ) ) + # Add as output_image content for compatibility + response_contents.append( + ResponseOutputContent( + type="output_image", + image_url=f"data:image/{img_format};base64,{image_base64}", + annotations=[], + ) + ) tool_call_items: list[ResponseToolCall] = [] if detected_tool_calls: @@ -1020,7 +1031,6 @@ async def _build_payload( for call in detected_tool_calls ] - response_contents: list[ResponseOutputContent] = [] if assistant_text: response_contents.append( ResponseOutputContent(type="output_text", text=assistant_text, annotations=[]) @@ -1065,6 +1075,8 @@ async def _build_payload( usage=usage, input=normalized_input or None, metadata=request.metadata or None, + tools=request.tools, + tool_choice=request.tool_choice, ) try: @@ -1359,6 +1371,10 @@ def _create_responses_streaming_response( created_snapshot["metadata"] = response_dict["metadata"] if response_dict.get("input") is not None: created_snapshot["input"] = response_dict["input"] + if response_dict.get("tools") is not None: + created_snapshot["tools"] = response_dict["tools"] + if response_dict.get("tool_choice") is not None: + created_snapshot["tool_choice"] = response_dict["tool_choice"] async def generate_stream(): # Emit creation event @@ -1369,30 +1385,53 @@ async def generate_stream(): } yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" - # Stream textual content, if any - if assistant_text: - for chunk in _iter_stream_segments(assistant_text): - delta_event = { - **base_event, - "type": "response.output_text.delta", - "output_index": 0, - "delta": chunk, - } - yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n" - - done_event = { + # Stream output items (Message/Text, Tool Calls, Images) + for i, item in enumerate(response_payload.output): + item_json = item.model_dump(mode="json", exclude_none=True) + + added_event = { **base_event, - "type": "response.output_text.done", - "output_index": 0, + "type": "response.output_item.added", + "output_index": i, + "item": item_json, } - yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n" - else: - done_event = { + yield f"data: {orjson.dumps(added_event).decode('utf-8')}\n\n" + + # 2. Stream content if it's a message (text) + if item.type == "message": + content_text = "" + # Aggregate text content to stream + for c in item.content: + if c.type == "output_text" and c.text: + content_text += c.text + + if content_text: + for chunk in _iter_stream_segments(content_text): + delta_event = { + **base_event, + "type": "response.output_text.delta", + "output_index": i, + "delta": chunk, + } + yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n" + + # Text done + done_event = { + **base_event, + "type": "response.output_text.done", + "output_index": i, + } + yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n" + + # 3. Emit output_item.done for all types + # This confirms the item is fully transferred. + item_done_event = { **base_event, - "type": "response.output_text.done", - "output_index": 0, + "type": "response.output_item.done", + "output_index": i, + "item": item_json, } - yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n" + yield f"data: {orjson.dumps(item_done_event).decode('utf-8')}\n\n" # Emit completed event with full payload completed_event = { From 16435a2ce12a4d37e9f3cfa758f384000aa41123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 09:50:47 +0700 Subject: [PATCH 15/54] Set default `text` value to an empty string for `ResponseOutputContent` and ensure consistent initialization in image output handling. --- app/models/models.py | 2 +- app/server/chat.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/app/models/models.py b/app/models/models.py index bbc2140..2c987b8 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -234,7 +234,7 @@ class ResponseOutputContent(BaseModel): """Content item for Responses API output.""" type: Literal["output_text", "output_image"] - text: Optional[str] = None + text: Optional[str] = "" image_url: Optional[str] = None annotations: List[Dict[str, Any]] = Field(default_factory=list) diff --git a/app/server/chat.py b/app/server/chat.py index cb498a5..7745a26 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1015,6 +1015,7 @@ async def _build_payload( response_contents.append( ResponseOutputContent( type="output_image", + text="", image_url=f"data:image/{img_format};base64,{image_base64}", annotations=[], ) From fc99c2d60193f346006f5cf17af4e849d8ea2669 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:03:50 +0700 Subject: [PATCH 16/54] feat: Add /images endpoint with dedicated router and improved image management Add dedicated router for /images endpoint and refactor image handling logic for better modularity. Enhance temporary image management with secure naming, token verification, and cleanup functionality. --- app/main.py | 9 +++++- app/server/chat.py | 35 ++++++++++++++++-------- app/server/images.py | 15 ++++++++++ app/server/middleware.py | 59 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 13 deletions(-) create mode 100644 app/server/images.py diff --git a/app/main.py b/app/main.py index 95458d3..c215e2a 100644 --- a/app/main.py +++ b/app/main.py @@ -6,7 +6,12 @@ from .server.chat import router as chat_router from .server.health import router as health_router -from .server.middleware import add_cors_middleware, add_exception_handler +from .server.images import router as images_router +from .server.middleware import ( + add_cors_middleware, + add_exception_handler, + cleanup_expired_images, +) from .services import GeminiClientPool, LMDBConversationStore RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60 # 6 hours @@ -28,6 +33,7 @@ async def _run_retention_cleanup(stop_event: asyncio.Event) -> None: while not stop_event.is_set(): try: store.cleanup_expired() + cleanup_expired_images(store.retention_days) except Exception: logger.exception("LMDB retention cleanup task failed.") @@ -93,5 +99,6 @@ def create_app() -> FastAPI: app.include_router(health_router, tags=["Health"]) app.include_router(chat_router, tags=["Chat"]) + app.include_router(images_router, tags=["Images"]) return app diff --git a/app/server/chat.py b/app/server/chat.py index 7745a26..db92dbc 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -44,7 +44,7 @@ from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT from ..utils import g_config from ..utils.helper import estimate_tokens -from .middleware import get_temp_dir, verify_api_key +from .middleware import get_image_store_dir, get_temp_dir, verify_api_key # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) @@ -588,6 +588,7 @@ async def create_chat_completion( request: ChatCompletionRequest, api_key: str = Depends(verify_api_key), tmp_dir: Path = Depends(get_temp_dir), + image_store: Path = Depends(get_image_store_dir), ): pool = GeminiClientPool() db = LMDBConversationStore() @@ -775,6 +776,7 @@ async def create_response( request: ResponseCreateRequest, api_key: str = Depends(verify_api_key), tmp_dir: Path = Depends(get_temp_dir), + image_store: Path = Depends(get_image_store_dir), ): base_messages, normalized_input = _response_items_to_messages(request.input) if not base_messages: @@ -996,12 +998,16 @@ async def _build_payload( image_call_items: list[ResponseImageGenerationCall] = [] for image in images: try: - image_base64, width, height = await _image_to_base64(image, tmp_dir) + image_base64, width, height, filename = await _image_to_base64(image, tmp_dir) except Exception as exc: logger.warning(f"Failed to download generated image: {exc}") continue img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" + + # Use static URL for compatibility + image_url = f"{request.base_url}images/{filename}" + image_call_items.append( ResponseImageGenerationCall( id=f"img_{uuid.uuid4().hex}", @@ -1013,12 +1019,10 @@ async def _build_payload( ) # Add as output_image content for compatibility response_contents.append( - ResponseOutputContent( - type="output_image", - text="", - image_url=f"data:image/{img_format};base64,{image_base64}", - annotations=[], - ) + ResponseOutputContent(type="output_text", text=image_url, annotations=[]) + ) + response_contents.append( + ResponseOutputContent(type="output_image", text="", image_url=image_url, annotations=[]) ) tool_call_items: list[ResponseToolCall] = [] @@ -1553,8 +1557,8 @@ def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: return None, None -async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None]: - """Persist an image provided by gemini_webapi and return base64 plus dimensions.""" +async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]: + """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename.""" if isinstance(image, GeneratedImage): saved_path = await image.save(path=str(temp_dir), full_size=True) else: @@ -1563,6 +1567,13 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non if not saved_path: raise ValueError("Failed to save generated image") - data = Path(saved_path).read_bytes() + # Rename file to a random UUID to ensure uniqueness and unpredictability + original_path = Path(saved_path) + random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}" + new_path = temp_dir / random_name + original_path.rename(new_path) + + data = new_path.read_bytes() width, height = _extract_image_dimensions(data) - return base64.b64encode(data).decode("ascii"), width, height + filename = random_name + return base64.b64encode(data).decode("ascii"), width, height, filename diff --git a/app/server/images.py b/app/server/images.py new file mode 100644 index 0000000..2867239 --- /dev/null +++ b/app/server/images.py @@ -0,0 +1,15 @@ +from fastapi import APIRouter, HTTPException +from fastapi.responses import FileResponse + +from ..server.middleware import get_image_store_dir + +router = APIRouter() + + +@router.get("/images/{filename}", tags=["Images"]) +async def get_image(filename: str): + image_store = get_image_store_dir() + file_path = image_store / filename + if not file_path.exists(): + raise HTTPException(status_code=404, detail="Image not found") + return FileResponse(file_path) diff --git a/app/server/middleware.py b/app/server/middleware.py index b12024f..60e4c8d 100644 --- a/app/server/middleware.py +++ b/app/server/middleware.py @@ -1,13 +1,72 @@ +import hashlib +import hmac import tempfile +import time from pathlib import Path from fastapi import Depends, FastAPI, HTTPException, Request, status from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import ORJSONResponse from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from loguru import logger from ..utils import g_config +# Persistent directory for storing generated images +IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "gemini_fastapi_images" +IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True) + + +def get_image_store_dir() -> Path: + """Returns a persistent directory for storing images.""" + return IMAGE_STORE_DIR + + +def get_image_token(filename: str) -> str: + """Generate a HMAC-SHA256 token for a filename using the API key.""" + secret = g_config.server.api_key + if not secret: + return "" + + msg = filename.encode("utf-8") + secret_bytes = secret.encode("utf-8") + return hmac.new(secret_bytes, msg, hashlib.sha256).hexdigest() + + +def verify_image_token(filename: str, token: str | None) -> bool: + """Verify the provided token against the filename.""" + expected = get_image_token(filename) + if not expected: + return True # No auth required + if not token: + return False + return hmac.compare_digest(token, expected) + + +def cleanup_expired_images(retention_days: int) -> int: + """Delete images in IMAGE_STORE_DIR older than retention_days.""" + if retention_days <= 0: + return 0 + + now = time.time() + retention_seconds = retention_days * 24 * 60 * 60 + cutoff = now - retention_seconds + + count = 0 + for file_path in IMAGE_STORE_DIR.iterdir(): + if not file_path.is_file(): + continue + try: + if file_path.stat().st_mtime < cutoff: + file_path.unlink() + count += 1 + except Exception as e: + logger.warning(f"Failed to delete expired image {file_path}: {e}") + + if count > 0: + logger.info(f"Cleaned up {count} expired images.") + return count + def global_exception_handler(request: Request, exc: Exception): if isinstance(exc, HTTPException): From 28441765f3fa47787027620cdc4a6d9e7ddbdd94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:10:29 +0700 Subject: [PATCH 17/54] feat: Add token-based verification for image access --- app/server/chat.py | 4 ++-- app/server/images.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index db92dbc..9371137 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -44,7 +44,7 @@ from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT from ..utils import g_config from ..utils.helper import estimate_tokens -from .middleware import get_image_store_dir, get_temp_dir, verify_api_key +from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) @@ -1006,7 +1006,7 @@ async def _build_payload( img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" # Use static URL for compatibility - image_url = f"{request.base_url}images/{filename}" + image_url = f"{request.base_url}images/{filename}?token={get_image_token(filename)}" image_call_items.append( ResponseImageGenerationCall( diff --git a/app/server/images.py b/app/server/images.py index 2867239..fe078f7 100644 --- a/app/server/images.py +++ b/app/server/images.py @@ -1,13 +1,16 @@ -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, HTTPException, Query from fastapi.responses import FileResponse -from ..server.middleware import get_image_store_dir +from ..server.middleware import get_image_store_dir, verify_image_token router = APIRouter() @router.get("/images/{filename}", tags=["Images"]) -async def get_image(filename: str): +async def get_image(filename: str, token: str | None = Query(default=None)): + if not verify_image_token(filename, token): + raise HTTPException(status_code=403, detail="Invalid token") + image_store = get_image_store_dir() file_path = image_store / filename if not file_path.exists(): From 4509c14dfd5a38dfa6b989b3e9ac308e3bc8c982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:22:07 +0700 Subject: [PATCH 18/54] Refactor: rename image store directory to `ai_generated_images` for clarity --- app/server/middleware.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server/middleware.py b/app/server/middleware.py index 60e4c8d..630e1f5 100644 --- a/app/server/middleware.py +++ b/app/server/middleware.py @@ -13,7 +13,7 @@ from ..utils import g_config # Persistent directory for storing generated images -IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "gemini_fastapi_images" +IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "ai_generated_images" IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True) From 75e2f61d3a6b1d12269af2ee82344ab643f34e83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:31:15 +0700 Subject: [PATCH 19/54] fix: Update create_response to use FastAPI Request object for base_url and refactor variable handling --- app/server/chat.py | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 9371137..0010f4a 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -9,7 +9,7 @@ from typing import Any, Iterator import orjson -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi.responses import StreamingResponse from gemini_webapi.client import ChatSession from gemini_webapi.constants import Model @@ -773,19 +773,15 @@ async def create_chat_completion( @router.post("/v1/responses") async def create_response( - request: ResponseCreateRequest, + request_data: ResponseCreateRequest, + request: Request, api_key: str = Depends(verify_api_key), tmp_dir: Path = Depends(get_temp_dir), image_store: Path = Depends(get_image_store_dir), ): - base_messages, normalized_input = _response_items_to_messages(request.input) - if not base_messages: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail="No message input provided." - ) - - structured_requirement = _build_structured_requirement(request.response_format) - if structured_requirement and request.stream: + base_messages, normalized_input = _response_items_to_messages(request_data.input) + structured_requirement = _build_structured_requirement(request_data.response_format) + if structured_requirement and request_data.stream: logger.debug( "Structured response requested with streaming enabled; streaming not supported for Responses." ) @@ -801,7 +797,7 @@ async def create_response( standard_tools: list[Tool] = [] image_tools: list[ResponseImageTool] = [] - if request.tools: + if request_data.tools: for t in request.tools: if isinstance(t, Tool): standard_tools.append(t) @@ -817,13 +813,15 @@ async def create_response( image_instruction = _build_image_generation_instruction( image_tools, - request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None, + request_data.tool_choice + if isinstance(request_data.tool_choice, ResponseToolChoice) + else None, ) if image_instruction: extra_instructions.append(image_instruction) logger.debug("Image generation support enabled for /v1/responses request.") - preface_messages = _instructions_to_messages(request.instructions) + preface_messages = _instructions_to_messages(request_data.instructions) conversation_messages = base_messages if preface_messages: conversation_messages = [*preface_messages, *base_messages] @@ -834,10 +832,10 @@ async def create_response( # Pass standard tools to the prompt builder # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction) model_tool_choice = None - if isinstance(request.tool_choice, str): - model_tool_choice = request.tool_choice - elif isinstance(request.tool_choice, ToolChoiceFunction): - model_tool_choice = request.tool_choice + if isinstance(request_data.tool_choice, str): + model_tool_choice = request_data.tool_choice + elif isinstance(request_data.tool_choice, ToolChoiceFunction): + model_tool_choice = request_data.tool_choice # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice. messages = _prepare_messages_for_model( @@ -851,7 +849,7 @@ async def create_response( db = LMDBConversationStore() try: - model = Model.from_name(request.model) + model = Model.from_name(request_data.model) except ValueError as exc: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc @@ -971,7 +969,7 @@ async def _build_payload( ) expects_image = ( - request.tool_choice is not None and request.tool_choice.type == "image_generation" + request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation" ) images = model_output.images or [] logger.debug( @@ -1065,7 +1063,7 @@ async def _build_payload( response_payload = ResponseCreateResponse( id=response_id, created_at=created_time, - model=request.model, + model=request_data.model, output=[ ResponseOutputMessage( id=message_id, @@ -1079,9 +1077,9 @@ async def _build_payload( status="completed", usage=usage, input=normalized_input or None, - metadata=request.metadata or None, - tools=request.tools, - tool_choice=request.tool_choice, + metadata=request_data.metadata or None, + tools=request_data.tools, + tool_choice=request_data.tool_choice, ) try: From bde6d0d146fc9088df947cfc0958dc88963e93ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:35:44 +0700 Subject: [PATCH 20/54] fix: Correct attribute access in request_data handling within `chat.py` for tools, tool_choice, and streaming settings --- app/server/chat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 0010f4a..9a3f19f 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -798,7 +798,7 @@ async def create_response( image_tools: list[ResponseImageTool] = [] if request_data.tools: - for t in request.tools: + for t in request_data.tools: if isinstance(t, Tool): standard_tools.append(t) elif isinstance(t, ResponseImageTool): @@ -984,7 +984,7 @@ async def _build_payload( summary = f"{summary[:197]}..." logger.warning( "Image generation requested but Gemini produced no images. " - f"client_id={client_id}, forced_tool_choice={request.tool_choice is not None}, " + f"client_id={client_id}, forced_tool_choice={request_data.tool_choice is not None}, " f"instruction_applied={bool(image_instruction)}, assistant_preview='{summary}'" ) detail = "LLM returned no images for the requested image_generation tool." @@ -1100,7 +1100,7 @@ async def _build_payload( except Exception as exc: logger.warning(f"Failed to save Responses conversation to LMDB: {exc}") - if request.stream: + if request_data.stream: logger.debug( f"Streaming Responses API payload (response_id={response_payload.id}, text_chunks={bool(assistant_text)})." ) From 601451a8dbf8cf689a482fd75cda399b5e815cd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:45:49 +0700 Subject: [PATCH 21/54] fix: Save generated images to persistent storage --- app/server/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index 9a3f19f..4246c53 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -996,7 +996,7 @@ async def _build_payload( image_call_items: list[ResponseImageGenerationCall] = [] for image in images: try: - image_base64, width, height, filename = await _image_to_base64(image, tmp_dir) + image_base64, width, height, filename = await _image_to_base64(image, image_store) except Exception as exc: logger.warning(f"Failed to download generated image: {exc}") continue From 893eb6d47305f60c4b13896bfc48beb89909dd88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:51:42 +0700 Subject: [PATCH 22/54] fix: Remove unused `output_image` type from `ResponseOutputContent` and update response handling for consistency --- app/models/models.py | 3 +-- app/server/chat.py | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 2c987b8..c27e024 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -233,9 +233,8 @@ class ResponseUsage(BaseModel): class ResponseOutputContent(BaseModel): """Content item for Responses API output.""" - type: Literal["output_text", "output_image"] + type: Literal["output_text"] text: Optional[str] = "" - image_url: Optional[str] = None annotations: List[Dict[str, Any]] = Field(default_factory=list) diff --git a/app/server/chat.py b/app/server/chat.py index 4246c53..3396df0 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1015,13 +1015,10 @@ async def _build_payload( size=f"{width}x{height}" if width and height else None, ) ) - # Add as output_image content for compatibility + # Add as output_text content for compatibility response_contents.append( ResponseOutputContent(type="output_text", text=image_url, annotations=[]) ) - response_contents.append( - ResponseOutputContent(type="output_image", text="", image_url=image_url, annotations=[]) - ) tool_call_items: list[ResponseToolCall] = [] if detected_tool_calls: From 80462b586a110cad7e5b5cc259424e405ecbafc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 13:24:14 +0700 Subject: [PATCH 23/54] fix: Update image URL generation in chat response to use Markdown format for compatibility --- app/server/chat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index 3396df0..c2a60ab 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1004,7 +1004,9 @@ async def _build_payload( img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" # Use static URL for compatibility - image_url = f"{request.base_url}images/{filename}?token={get_image_token(filename)}" + image_url = ( + f"![{filename}]({request.base_url}images/{filename}?token={get_image_token(filename)})" + ) image_call_items.append( ResponseImageGenerationCall( From 8d49a72e0b5c605e2439d6dcbf149925cb670ded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 8 Dec 2025 09:45:58 +0700 Subject: [PATCH 24/54] fix: Enhance error handling for full-size image saving and add fallback to default size --- app/server/chat.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index c2a60ab..d14e9ce 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1557,7 +1557,11 @@ def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]: """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename.""" if isinstance(image, GeneratedImage): - saved_path = await image.save(path=str(temp_dir), full_size=True) + try: + saved_path = await image.save(path=str(temp_dir), full_size=True) + except Exception as e: + logger.warning(f"Failed to download full-size image, retrying with default size: {e}") + saved_path = await image.save(path=str(temp_dir), full_size=False) else: saved_path = await image.save(path=str(temp_dir)) From d37eae0ab8c4590b3301dc8853ef22a512ab0d98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 9 Dec 2025 20:46:03 +0700 Subject: [PATCH 25/54] fix: Use filename as image ID to ensure consistency in generated image handling --- app/server/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index d14e9ce..fc69293 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1010,7 +1010,7 @@ async def _build_payload( image_call_items.append( ResponseImageGenerationCall( - id=f"img_{uuid.uuid4().hex}", + id=filename.split(".")[0], status="completed", result=image_base64, output_format=img_format, From b9f776dfbb9d251ee016e05a1f6001907c3f8b84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 16 Dec 2025 19:50:07 +0700 Subject: [PATCH 26/54] fix: Enhance tempfile saving by adding custom headers, content-type handling, and improved extension determination --- app/utils/helper.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 3bff469..89fc31e 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -2,12 +2,17 @@ import mimetypes import tempfile from pathlib import Path +from urllib.parse import urlparse import httpx from loguru import logger VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36" +} + def add_tag(role: str, content: str, unclose: bool = False) -> str: """Surround content with role tags""" @@ -36,7 +41,7 @@ async def save_file_to_tempfile( return path -async def save_url_to_tempfile(url: str, tempdir: Path | None = None): +async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: data: bytes | None = None suffix: str | None = None if url.startswith("data:image/"): @@ -47,17 +52,26 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None): base64_data = url.split(",")[1] data = base64.b64decode(base64_data) - # Guess extension from mime type, default to the subtype if not found suffix = mimetypes.guess_extension(mime_type) if not suffix: suffix = f".{mime_type.split('/')[1]}" else: - # http files - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True) as client: resp = await client.get(url) resp.raise_for_status() data = resp.content - suffix = Path(url).suffix or ".bin" + content_type = resp.headers.get("content-type") + + if content_type: + mime_type = content_type.split(";")[0].strip() + suffix = mimetypes.guess_extension(mime_type) + + if not suffix: + path_url = urlparse(url).path + suffix = Path(path_url).suffix + + if not suffix: + suffix = ".bin" with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp: tmp.write(data) From 4b5fe078250ce0496ca93b1861f9622fc5171746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 30 Dec 2025 22:39:05 +0700 Subject: [PATCH 27/54] feat: Add support for custom Gemini models and model loading strategies - Introduced `model_strategy` configuration for "append" (default + custom models) or "overwrite" (custom models only). - Enhanced `/v1/models` endpoint to return models based on the configured strategy. - Improved model loading with environment variable overrides and validation. - Refactored model handling logic for improved modularity and error handling. --- app/server/chat.py | 70 ++++++++++++++++++++++++++++++++++-------- app/utils/config.py | 75 ++++++++++++++++++++++++++++++++++++++++++++- config/config.yaml | 5 +++ 3 files changed, 136 insertions(+), 14 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index fc69293..0a4c16c 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -562,24 +562,64 @@ def _replace(match: re.Match[str]) -> str: return cleaned, tool_calls -@router.get("/v1/models", response_model=ModelListResponse) -async def list_models(api_key: str = Depends(verify_api_key)): - now = int(datetime.now(tz=timezone.utc).timestamp()) +def _get_model_by_name(name: str) -> Model: + """ + Retrieve a Model instance by name, considering custom models from config + and the update strategy (append or overwrite). + """ + strategy = g_config.gemini.model_strategy + custom_models = {m.model_name: m for m in g_config.gemini.models if m.model_name} - models = [] - for model in Model: - m_name = model.model_name - if not m_name or m_name == "unspecified": - continue + if name in custom_models: + return Model.from_dict(custom_models[name].model_dump()) + + if strategy == "overwrite": + raise ValueError(f"Model '{name}' not found in custom models (strategy='overwrite').") - models.append( + return Model.from_name(name) + + +def _get_available_models() -> list[ModelData]: + """ + Return a list of available models based on configuration strategy. + """ + now = int(datetime.now(tz=timezone.utc).timestamp()) + strategy = g_config.gemini.model_strategy + models_data = [] + + custom_models = [m for m in g_config.gemini.models if m.model_name] + for m in custom_models: + models_data.append( ModelData( - id=m_name, + id=m.model_name, created=now, - owned_by="gemini-web", + owned_by="custom", ) ) + if strategy == "append": + custom_ids = {m.model_name for m in custom_models} + for model in Model: + m_name = model.model_name + if not m_name or m_name == "unspecified": + continue + if m_name in custom_ids: + continue + + models_data.append( + ModelData( + id=m_name, + created=now, + owned_by="gemini-web", + ) + ) + + return models_data + + +@router.get("/v1/models", response_model=ModelListResponse) +async def list_models(api_key: str = Depends(verify_api_key)): + models = _get_available_models() return ModelListResponse(data=models) @@ -592,7 +632,11 @@ async def create_chat_completion( ): pool = GeminiClientPool() db = LMDBConversationStore() - model = Model.from_name(request.model) + + try: + model = _get_model_by_name(request.model) + except ValueError as exc: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc if len(request.messages) == 0: raise HTTPException( @@ -849,7 +893,7 @@ async def create_response( db = LMDBConversationStore() try: - model = Model.from_name(request_data.model) + model = _get_model_by_name(request_data.model) except ValueError as exc: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc diff --git a/app/utils/config.py b/app/utils/config.py index 796ca75..a5c924a 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -50,12 +50,26 @@ def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]: return stripped or None +class GeminiModelConfig(BaseModel): + """Configuration for a custom Gemini model.""" + + model_name: Optional[str] = Field(default=None, description="Name of the model") + model_header: Optional[dict[str, Optional[str]]] = Field( + default=None, description="Header for the model" + ) + + class GeminiConfig(BaseModel): """Gemini API configuration""" clients: list[GeminiClientSettings] = Field( ..., description="List of Gemini client credential pairs" ) + models: list[GeminiModelConfig] = Field(default=[], description="List of custom Gemini models") + model_strategy: Literal["append", "overwrite"] = Field( + default="append", + description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom", + ) timeout: int = Field(default=120, ge=1, description="Init timeout") auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies") refresh_interval: int = Field( @@ -68,6 +82,13 @@ class GeminiConfig(BaseModel): description="Maximum characters Gemini Web can accept per request", ) + @field_validator("models") + @classmethod + def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]: + """Filter out models that don't have a name set (placeholders).""" + + return [model for model in v if model.model_name] + class CORSConfig(BaseModel): """CORS configuration""" @@ -211,6 +232,53 @@ def _merge_clients_with_env( return result_clients if result_clients else base_clients +def extract_gemini_models_env() -> dict[int, dict[str, str]]: + """Extract and remove all Gemini models related environment variables, return a mapping from index to field dict.""" + prefix = "CONFIG_GEMINI__MODELS__" + env_overrides: dict[int, dict[str, str]] = {} + to_delete = [] + for k, v in os.environ.items(): + if k.startswith(prefix): + parts = k.split("__") + if len(parts) < 4: + continue + index_str, field = parts[2], parts[3].lower() + if not index_str.isdigit(): + continue + idx = int(index_str) + env_overrides.setdefault(idx, {})[field] = v + to_delete.append(k) + # Remove these environment variables to avoid Pydantic parsing errors + for k in to_delete: + del os.environ[k] + return env_overrides + + +def _merge_models_with_env( + base_models: list[GeminiModelConfig] | None, + env_overrides: dict[int, dict[str, str]], +): + """Override base_models with env_overrides, return the new models list.""" + if not env_overrides: + return base_models or [] + result_models: list[GeminiModelConfig] = [] + if base_models: + result_models = [model.model_copy() for model in base_models] + + for idx in sorted(env_overrides): + overrides = env_overrides[idx] + if idx < len(result_models): + model_dict = result_models[idx].model_dump() + model_dict.update(overrides) + result_models[idx] = GeminiModelConfig(**model_dict) + elif idx == len(result_models): + new_model = GeminiModelConfig(**overrides) + result_models.append(new_model) + else: + raise IndexError(f"Model index {idx} in env is out of range (must be contiguous).") + return result_models + + def initialize_config() -> Config: """ Initialize the configuration. @@ -221,6 +289,8 @@ def initialize_config() -> Config: try: # First, extract and remove Gemini clients related environment variables env_clients_overrides = extract_gemini_clients_env() + # Extract and remove Gemini models related environment variables + env_models_overrides = extract_gemini_models_env() # Then, initialize Config with pydantic_settings config = Config() # type: ignore @@ -228,7 +298,10 @@ def initialize_config() -> Config: # Synthesize clients config.gemini.clients = _merge_clients_with_env( config.gemini.clients, env_clients_overrides - ) # type: ignore + ) + + # Synthesize models + config.gemini.models = _merge_models_with_env(config.gemini.models, env_models_overrides) return config except ValidationError as e: diff --git a/config/config.yaml b/config/config.yaml index 89c88b7..84c4602 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -27,6 +27,11 @@ gemini: refresh_interval: 540 # Refresh interval in seconds verbose: false # Enable verbose logging for Gemini requests max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit + models: + - model_name: null + model_header: + x-goog-ext-xxxxxxxxx-jspb: null + model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only) storage: path: "data/lmdb" # Database storage path From 5cb29e8ea7333fd3c207f60a75b5269105bae8b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 30 Dec 2025 23:19:49 +0700 Subject: [PATCH 28/54] feat: Improve Gemini model environment variable parsing and nested field support - Enhanced `extract_gemini_models_env` to handle nested fields within environment variables. - Updated type hints for more flexibility in model overrides. - Improved `_merge_models_with_env` to better support field-level updates and appending new models. --- app/utils/config.py | 31 +++++++++++++++++++++++-------- config/config.yaml | 2 +- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/app/utils/config.py b/app/utils/config.py index a5c924a..5782c66 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -1,6 +1,6 @@ import os import sys -from typing import Literal, Optional +from typing import Any, Literal, Optional from loguru import logger from pydantic import BaseModel, Field, ValidationError, field_validator @@ -232,21 +232,34 @@ def _merge_clients_with_env( return result_clients if result_clients else base_clients -def extract_gemini_models_env() -> dict[int, dict[str, str]]: - """Extract and remove all Gemini models related environment variables, return a mapping from index to field dict.""" +def extract_gemini_models_env() -> dict[int, dict[str, Any]]: + """Extract and remove all Gemini models related environment variables, supporting nested fields.""" prefix = "CONFIG_GEMINI__MODELS__" - env_overrides: dict[int, dict[str, str]] = {} + env_overrides: dict[int, dict[str, Any]] = {} to_delete = [] for k, v in os.environ.items(): if k.startswith(prefix): parts = k.split("__") if len(parts) < 4: continue - index_str, field = parts[2], parts[3].lower() + index_str = parts[2] if not index_str.isdigit(): continue idx = int(index_str) - env_overrides.setdefault(idx, {})[field] = v + + # Navigate to the correct nested dict + current = env_overrides.setdefault(idx, {}) + for i in range(3, len(parts) - 1): + field_name = parts[i].lower() + current = current.setdefault(field_name, {}) + + # Set the value (lowercase root field names, preserve sub-key casing) + last_part = parts[-1] + if len(parts) == 4: + current[last_part.lower()] = v + else: + current[last_part] = v + to_delete.append(k) # Remove these environment variables to avoid Pydantic parsing errors for k in to_delete: @@ -256,9 +269,9 @@ def extract_gemini_models_env() -> dict[int, dict[str, str]]: def _merge_models_with_env( base_models: list[GeminiModelConfig] | None, - env_overrides: dict[int, dict[str, str]], + env_overrides: dict[int, dict[str, Any]], ): - """Override base_models with env_overrides, return the new models list.""" + """Override base_models with env_overrides using standard update (replace whole fields).""" if not env_overrides: return base_models or [] result_models: list[GeminiModelConfig] = [] @@ -268,10 +281,12 @@ def _merge_models_with_env( for idx in sorted(env_overrides): overrides = env_overrides[idx] if idx < len(result_models): + # Update existing model: overwrite fields found in env model_dict = result_models[idx].model_dump() model_dict.update(overrides) result_models[idx] = GeminiModelConfig(**model_dict) elif idx == len(result_models): + # Append new model new_model = GeminiModelConfig(**overrides) result_models.append(new_model) else: diff --git a/config/config.yaml b/config/config.yaml index 84c4602..2fbc061 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -27,11 +27,11 @@ gemini: refresh_interval: 540 # Refresh interval in seconds verbose: false # Enable verbose logging for Gemini requests max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit + model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only) models: - model_name: null model_header: x-goog-ext-xxxxxxxxx-jspb: null - model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only) storage: path: "data/lmdb" # Database storage path From f25f16d00118ebeea7936cea34797270d5137b5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 09:52:49 +0700 Subject: [PATCH 29/54] refactor: Consolidate utility functions and clean up unused code - Moved utility functions like `strip_code_fence`, `extract_tool_calls`, and `iter_stream_segments` to a centralized helper module. - Removed unused and redundant private methods from `chat.py`, including `_strip_code_fence`, `_strip_tagged_blocks`, and `_strip_system_hints`. - Updated imports and references across modules for consistency. - Simplified tool call and streaming logic by replacing inline implementations with shared helper functions. --- app/server/chat.py | 306 ++++------------------------------------ app/services/client.py | 16 +-- app/utils/helper.py | 312 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 342 insertions(+), 292 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 0a4c16c..9485f7a 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1,12 +1,11 @@ import base64 import json import re -import struct import uuid from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Any, Iterator +from typing import Any import orjson from fastapi import APIRouter, Depends, HTTPException, Request, status @@ -21,7 +20,6 @@ ChatCompletionRequest, ContentItem, ConversationInStore, - FunctionCall, Message, ModelData, ModelListResponse, @@ -37,26 +35,28 @@ ResponseToolChoice, ResponseUsage, Tool, - ToolCall, ToolChoiceFunction, ) from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore -from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT from ..utils import g_config -from ..utils.helper import estimate_tokens +from ..utils.helper import ( + CODE_BLOCK_HINT, + CODE_HINT_STRIPPED, + XML_HINT_STRIPPED, + XML_WRAP_HINT, + estimate_tokens, + extract_image_dimensions, + extract_tool_calls, + iter_stream_segments, + remove_tool_call_blocks, + strip_code_fence, + text_from_message, +) from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')" -TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)```", re.DOTALL | re.IGNORECASE) -TOOL_CALL_RE = re.compile( - r"(.*?)", re.DOTALL | re.IGNORECASE -) -JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE) -CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") -XML_HINT_STRIPPED = XML_WRAP_HINT.strip() -CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip() router = APIRouter() @@ -118,14 +118,6 @@ def _build_structured_requirement( ) -def _strip_code_fence(text: str) -> str: - """Remove surrounding ```json fences if present.""" - match = JSON_FENCE_RE.match(text.strip()) - if match: - return match.group(1).strip() - return text.strip() - - def _build_tool_prompt( tools: list[Tool], tool_choice: str | ToolChoiceFunction | None, @@ -312,75 +304,6 @@ def _prepare_messages_for_model( return prepared -def _strip_system_hints(text: str) -> str: - """Remove system-level hint text from a given string.""" - if not text: - return text - cleaned = _strip_tagged_blocks(text) - cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "") - cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "") - cleaned = CONTROL_TOKEN_RE.sub("", cleaned) - return cleaned.strip() - - -def _strip_tagged_blocks(text: str) -> str: - """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely. - - tool blocks are removed entirely (if missing end marker, drop to EOF). - - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF). - """ - if not text: - return text - - result: list[str] = [] - idx = 0 - length = len(text) - start_marker = "<|im_start|>" - end_marker = "<|im_end|>" - - while idx < length: - start = text.find(start_marker, idx) - if start == -1: - result.append(text[idx:]) - break - - # append any content before this block - result.append(text[idx:start]) - - role_start = start + len(start_marker) - newline = text.find("\n", role_start) - if newline == -1: - # malformed block; keep remainder as-is (safe behavior) - result.append(text[start:]) - break - - role = text[role_start:newline].strip().lower() - - end = text.find(end_marker, newline + 1) - if end == -1: - # missing end marker - if role == "tool": - # drop from start marker to EOF (skip remainder) - break - else: - # keep inner content from after the role newline to EOF - result.append(text[newline + 1 :]) - break - - block_end = end + len(end_marker) - - if role == "tool": - # drop whole block - idx = block_end - continue - - # keep the content without role markers - content = text[newline + 1 : end] - result.append(content) - idx = block_end - - return "".join(result) - - def _response_items_to_messages( items: str | list[ResponseInputItem], ) -> tuple[list[Message], str | list[ResponseInputItem]]: @@ -509,59 +432,6 @@ def _instructions_to_messages( return instruction_messages -def _remove_tool_call_blocks(text: str) -> str: - """Strip tool call code blocks from text.""" - if not text: - return text - cleaned = TOOL_BLOCK_RE.sub("", text) - return _strip_system_hints(cleaned) - - -def _extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: - """Extract tool call definitions and return cleaned text.""" - if not text: - return text, [] - - tool_calls: list[ToolCall] = [] - - def _replace(match: re.Match[str]) -> str: - block_content = match.group(1) - if not block_content: - return "" - - for call_match in TOOL_CALL_RE.finditer(block_content): - name = (call_match.group(1) or "").strip() - raw_args = (call_match.group(2) or "").strip() - if not name: - logger.warning( - f"Encountered tool_call block without a function name: {block_content}" - ) - continue - - arguments = raw_args - try: - parsed_args = json.loads(raw_args) - arguments = json.dumps(parsed_args, ensure_ascii=False) - except json.JSONDecodeError: - logger.warning( - f"Failed to parse tool call arguments for '{name}'. Passing raw string." - ) - - tool_calls.append( - ToolCall( - id=f"call_{uuid.uuid4().hex}", - type="function", - function=FunctionCall(name=name, arguments=arguments), - ) - ) - - return "" - - cleaned = TOOL_BLOCK_RE.sub(_replace, text) - cleaned = _strip_system_hints(cleaned) - return cleaned, tool_calls - - def _get_model_by_name(name: str) -> Model: """ Retrieve a Model instance by name, considering custom models from config @@ -742,12 +612,12 @@ async def create_chat_completion( detail="Gemini output parsing failed unexpectedly.", ) from exc - visible_output, tool_calls = _extract_tool_calls(raw_output_with_think) - storage_output = _remove_tool_call_blocks(raw_output_clean).strip() + visible_output, tool_calls = extract_tool_calls(raw_output_with_think) + storage_output = remove_tool_call_blocks(raw_output_clean).strip() tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls] if structured_requirement: - cleaned_visible = _strip_code_fence(visible_output or "") + cleaned_visible = strip_code_fence(visible_output or "") if not cleaned_visible: raise HTTPException( status_code=status.HTTP_502_BAD_GATEWAY, @@ -982,12 +852,12 @@ async def _build_payload( detail="Gemini output parsing failed unexpectedly.", ) from exc - visible_text, detected_tool_calls = _extract_tool_calls(text_with_think) - storage_output = _remove_tool_call_blocks(text_without_think).strip() + visible_text, detected_tool_calls = extract_tool_calls(text_with_think) + storage_output = remove_tool_call_blocks(text_without_think).strip() assistant_text = LMDBConversationStore.remove_think_tags(visible_text.strip()) if structured_requirement: - cleaned_visible = _strip_code_fence(assistant_text or "") + cleaned_visible = strip_code_fence(assistant_text or "") if not cleaned_visible: raise HTTPException( status_code=status.HTTP_502_BAD_GATEWAY, @@ -1089,7 +959,7 @@ async def _build_payload( response_id = f"resp_{uuid.uuid4().hex}" message_id = f"msg_{uuid.uuid4().hex}" - input_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages) + input_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) tool_arg_text = "".join(call.function.arguments or "" for call in detected_tool_calls) completion_basis = assistant_text or "" if tool_arg_text: @@ -1152,25 +1022,6 @@ async def _build_payload( return response_payload -def _text_from_message(message: Message) -> str: - """Return text content from a message for token estimation.""" - base_text = "" - if isinstance(message.content, str): - base_text = message.content - elif isinstance(message.content, list): - base_text = "\n".join( - item.text or "" for item in message.content if getattr(item, "type", "") == "text" - ) - elif message.content is None: - base_text = "" - - if message.tool_calls: - tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls) - base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text - - return base_text - - async def _find_reusable_session( db: LMDBConversationStore, pool: GeminiClientPool, @@ -1268,47 +1119,6 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s raise -def _iter_stream_segments(model_output: str, chunk_size: int = 64): - """Yield stream segments while keeping markers and words intact.""" - if not model_output: - return - - token_pattern = re.compile(r"\s+|\S+\s*") - pending = "" - - def _flush_pending() -> Iterator[str]: - nonlocal pending - if pending: - yield pending - pending = "" - - # Split on boundaries so the markers are never fragmented. - parts = re.split(r"()", model_output) - for part in parts: - if not part: - continue - if part in {"", ""}: - yield from _flush_pending() - yield part - continue - - for match in token_pattern.finditer(part): - token = match.group(0) - - if len(token) > chunk_size: - yield from _flush_pending() - for idx in range(0, len(token), chunk_size): - yield token[idx : idx + chunk_size] - continue - - if pending and len(pending) + len(token) > chunk_size: - yield from _flush_pending() - - pending += token - - yield from _flush_pending() - - def _create_streaming_response( model_output: str, tool_calls: list[dict], @@ -1320,7 +1130,7 @@ def _create_streaming_response( """Create streaming response with `usage` calculation included in the final chunk.""" # Calculate token usage - prompt_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages) + prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or []) completion_tokens = estimate_tokens(model_output + tool_args) total_tokens = prompt_tokens + completion_tokens @@ -1338,7 +1148,7 @@ async def generate_stream(): yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" # Stream output text in chunks for efficiency - for chunk in _iter_stream_segments(model_output): + for chunk in iter_stream_segments(model_output): data = { "id": completion_id, "object": "chat.completion.chunk", @@ -1452,7 +1262,7 @@ async def generate_stream(): content_text += c.text if content_text: - for chunk in _iter_stream_segments(content_text): + for chunk in iter_stream_segments(content_text): delta_event = { **base_event, "type": "response.output_text.delta", @@ -1501,7 +1311,7 @@ def _create_standard_response( ) -> dict: """Create standard response""" # Calculate token usage - prompt_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages) + prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or []) completion_tokens = estimate_tokens(model_output + tool_args) total_tokens = prompt_tokens + completion_tokens @@ -1534,70 +1344,6 @@ def _create_standard_response( return result -def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: - """Return image dimensions (width, height) if PNG or JPEG headers are present.""" - # PNG: dimensions stored in bytes 16..24 of the IHDR chunk - if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"): - try: - width, height = struct.unpack(">II", data[16:24]) - return int(width), int(height) - except struct.error: - return None, None - - # JPEG: dimensions stored in SOF segment; iterate through markers to locate it - if len(data) >= 4 and data[0:2] == b"\xff\xd8": - idx = 2 - length = len(data) - sof_markers = { - 0xC0, - 0xC1, - 0xC2, - 0xC3, - 0xC5, - 0xC6, - 0xC7, - 0xC9, - 0xCA, - 0xCB, - 0xCD, - 0xCE, - 0xCF, - } - while idx < length: - # Find marker alignment (markers are prefixed with 0xFF bytes) - if data[idx] != 0xFF: - idx += 1 - continue - while idx < length and data[idx] == 0xFF: - idx += 1 - if idx >= length: - break - marker = data[idx] - idx += 1 - - if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7: - continue - - if idx + 1 >= length: - break - segment_length = (data[idx] << 8) + data[idx + 1] - idx += 2 - if segment_length < 2: - break - - if marker in sof_markers: - if idx + 4 < length: - # Skip precision byte at idx, then read height/width (big-endian) - height = (data[idx + 1] << 8) + data[idx + 2] - width = (data[idx + 3] << 8) + data[idx + 4] - return int(width), int(height) - break - - idx += segment_length - 2 - - return None, None - - async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]: """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename.""" if isinstance(image, GeneratedImage): @@ -1619,6 +1365,6 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non original_path.rename(new_path) data = new_path.read_bytes() - width, height = _extract_image_dimensions(data) + width, height = extract_image_dimensions(data) filename = random_name return base64.b64encode(data).decode("ascii"), width, height, filename diff --git a/app/services/client.py b/app/services/client.py index 09c52c1..87c0ca7 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -9,18 +9,12 @@ from ..models import Message from ..utils import g_config -from ..utils.helper import add_tag, save_file_to_tempfile, save_url_to_tempfile - -XML_WRAP_HINT = ( - "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n" - '```xml\n{"arg": "value"}\n```\n' - "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n" -) -CODE_BLOCK_HINT = ( - "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced " - "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n" - "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n" +from ..utils.helper import ( + add_tag, + save_file_to_tempfile, + save_url_to_tempfile, ) + HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);") MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])") CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL) diff --git a/app/utils/helper.py b/app/utils/helper.py index 89fc31e..2627faa 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -1,17 +1,41 @@ import base64 +import json import mimetypes +import re +import struct import tempfile +import uuid from pathlib import Path +from typing import Iterator from urllib.parse import urlparse import httpx from loguru import logger -VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} +from ..models import FunctionCall, Message, ToolCall HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36" } +VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} +XML_WRAP_HINT = ( + "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n" + '```xml\n{"arg": "value"}\n```\n' + "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n" +) +CODE_BLOCK_HINT = ( + "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced " + "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n" + "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n" +) +TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) +TOOL_CALL_RE = re.compile( + r"(.*?)", re.DOTALL | re.IGNORECASE +) +JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE) +CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") +XML_HINT_STRIPPED = XML_WRAP_HINT.strip() +CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip() def add_tag(role: str, content: str, unclose: bool = False) -> str: @@ -78,3 +102,289 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: path = Path(tmp.name) return path + + +def strip_code_fence(text: str) -> str: + """Remove surrounding ```json fences if present.""" + match = JSON_FENCE_RE.match(text.strip()) + if match: + return match.group(1).strip() + return text.strip() + + +def strip_tagged_blocks(text: str) -> str: + """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely. + - tool blocks are removed entirely (if missing end marker, drop to EOF). + - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF). + """ + if not text: + return text + + result: list[str] = [] + idx = 0 + length = len(text) + start_marker = "<|im_start|>" + end_marker = "<|im_end|>" + + while idx < length: + start = text.find(start_marker, idx) + if start == -1: + result.append(text[idx:]) + break + + # append any content before this block + result.append(text[idx:start]) + + role_start = start + len(start_marker) + newline = text.find("\n", role_start) + if newline == -1: + # malformed block; keep remainder as-is (safe behavior) + result.append(text[start:]) + break + + role = text[role_start:newline].strip().lower() + + end = text.find(end_marker, newline + 1) + if end == -1: + # missing end marker + if role == "tool": + # drop from start marker to EOF (skip remainder) + break + else: + # keep inner content from after the role newline to EOF + result.append(text[newline + 1 :]) + break + + block_end = end + len(end_marker) + + if role == "tool": + # drop whole block + idx = block_end + continue + + # keep the content without role markers + content = text[newline + 1 : end] + result.append(content) + idx = block_end + + return "".join(result) + + +def strip_system_hints(text: str) -> str: + """Remove system-level hint text from a given string.""" + if not text: + return text + cleaned = strip_tagged_blocks(text) + cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "") + cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "") + cleaned = CONTROL_TOKEN_RE.sub("", cleaned) + return cleaned.strip() + + +def remove_tool_call_blocks(text: str) -> str: + """Strip tool call code blocks from text.""" + if not text: + return text + + # 1. Remove fenced blocks ONLY if they contain tool calls + def _replace_block(match: re.Match[str]) -> str: + block_content = match.group(1) + if not block_content: + return match.group(0) + + # Check if the block contains any tool call tag + if TOOL_CALL_RE.search(block_content): + return "" + + # Preserve the block if no tool call found + return match.group(0) + + cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) + + # 2. Remove orphaned tool calls + cleaned = TOOL_CALL_RE.sub("", cleaned) + + return strip_system_hints(cleaned) + + +def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: + """Extract tool call definitions and return cleaned text.""" + if not text: + return text, [] + + tool_calls: list[ToolCall] = [] + + def _create_tool_call(name: str, raw_args: str) -> None: + """Helper to parse args and append to tool_calls list.""" + if not name: + logger.warning("Encountered tool_call without a function name.") + return + + arguments = raw_args + try: + parsed_args = json.loads(raw_args) + arguments = json.dumps(parsed_args, ensure_ascii=False) + except json.JSONDecodeError: + logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") + + tool_calls.append( + ToolCall( + id=f"call_{uuid.uuid4().hex}", + type="function", + function=FunctionCall(name=name, arguments=arguments), + ) + ) + + def _replace_block(match: re.Match[str]) -> str: + block_content = match.group(1) + if not block_content: + return match.group(0) + + found_in_block = False + for call_match in TOOL_CALL_RE.finditer(block_content): + found_in_block = True + name = (call_match.group(1) or "").strip() + raw_args = (call_match.group(2) or "").strip() + _create_tool_call(name, raw_args) + + if found_in_block: + return "" + else: + return match.group(0) + + cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) + + def _replace_orphan(match: re.Match[str]) -> str: + name = (match.group(1) or "").strip() + raw_args = (match.group(2) or "").strip() + _create_tool_call(name, raw_args) + return "" + + cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned) + + cleaned = strip_system_hints(cleaned) + return cleaned, tool_calls + + +def iter_stream_segments(model_output: str, chunk_size: int = 64) -> Iterator[str]: + """Yield stream segments while keeping markers and words intact.""" + if not model_output: + return + + token_pattern = re.compile(r"\s+|\S+\s*") + pending = "" + + def _flush_pending() -> Iterator[str]: + nonlocal pending + if pending: + yield pending + pending = "" + + # Split on boundaries so the markers are never fragmented. + parts = re.split(r"()", model_output) + for part in parts: + if not part: + continue + if part in {"", ""}: + yield from _flush_pending() + yield part + continue + + for match in token_pattern.finditer(part): + token = match.group(0) + + if len(token) > chunk_size: + yield from _flush_pending() + for idx in range(0, len(token), chunk_size): + yield token[idx : idx + chunk_size] + continue + + if pending and len(pending) + len(token) > chunk_size: + yield from _flush_pending() + + pending += token + + yield from _flush_pending() + + +def text_from_message(message: Message) -> str: + """Return text content from a message for token estimation.""" + base_text = "" + if isinstance(message.content, str): + base_text = message.content + elif isinstance(message.content, list): + base_text = "\n".join( + item.text or "" for item in message.content if getattr(item, "type", "") == "text" + ) + elif message.content is None: + base_text = "" + + if message.tool_calls: + tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls) + base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text + + return base_text + + +def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: + """Return image dimensions (width, height) if PNG or JPEG headers are present.""" + # PNG: dimensions stored in bytes 16..24 of the IHDR chunk + if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"): + try: + width, height = struct.unpack(">II", data[16:24]) + return int(width), int(height) + except struct.error: + return None, None + + # JPEG: dimensions stored in SOF segment; iterate through markers to locate it + if len(data) >= 4 and data[0:2] == b"\xff\xd8": + idx = 2 + length = len(data) + sof_markers = { + 0xC0, + 0xC1, + 0xC2, + 0xC3, + 0xC5, + 0xC6, + 0xC7, + 0xC9, + 0xCA, + 0xCB, + 0xCD, + 0xCE, + 0xCF, + } + while idx < length: + # Find marker alignment (markers are prefixed with 0xFF bytes) + if data[idx] != 0xFF: + idx += 1 + continue + while idx < length and data[idx] == 0xFF: + idx += 1 + if idx >= length: + break + marker = data[idx] + idx += 1 + + if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7: + continue + + if idx + 1 >= length: + break + segment_length = (data[idx] << 8) + data[idx + 1] + idx += 2 + if segment_length < 2: + break + + if marker in sof_markers: + if idx + 4 < length: + # Skip precision byte at idx, then read height/width (big-endian) + height = (data[idx + 1] << 8) + data[idx + 2] + width = (data[idx + 3] << 8) + data[idx + 4] + return int(width), int(height) + break + + idx += segment_length - 2 + + return None, None From a1bc8e289ee797a761eb506dc4d01e486c919aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 10:01:17 +0700 Subject: [PATCH 30/54] fix: Handle None input in `estimate_tokens` and return 0 for empty text --- app/utils/helper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 2627faa..28be240 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -47,8 +47,10 @@ def add_tag(role: str, content: str, unclose: bool = False) -> str: return f"<|im_start|>{role}\n{content}" + ("\n<|im_end|>" if not unclose else "") -def estimate_tokens(text: str) -> int: +def estimate_tokens(text: str | None) -> int: """Estimate the number of tokens heuristically based on character count""" + if not text: + return 0 return int(len(text) / 3) From a7e15d96bd2a4f62094bea02be7e86c8d305e59e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 13:32:49 +0700 Subject: [PATCH 31/54] refactor: Simplify model configuration and add JSON parsing validators - Replaced unused model placeholder in `config.yaml` with an empty list. - Added JSON parsing validators for `model_header` and `models` to enhance flexibility and error handling. - Improved validation to filter out incomplete model configurations. --- app/utils/config.py | 24 +++++++++++++++++++++++- config/config.yaml | 5 +---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/app/utils/config.py b/app/utils/config.py index 5782c66..69a4fac 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -1,3 +1,4 @@ +import json import os import sys from typing import Any, Literal, Optional @@ -58,6 +59,17 @@ class GeminiModelConfig(BaseModel): default=None, description="Header for the model" ) + @field_validator("model_header", mode="before") + @classmethod + def _parse_json_string(cls, v: Any) -> Any: + if isinstance(v, str) and v.strip().startswith("{"): + try: + return json.loads(v) + except json.JSONDecodeError: + # Return the original value to let Pydantic handle the error or type mismatch + return v + return v + class GeminiConfig(BaseModel): """Gemini API configuration""" @@ -82,11 +94,21 @@ class GeminiConfig(BaseModel): description="Maximum characters Gemini Web can accept per request", ) + @field_validator("models", mode="before") + @classmethod + def _parse_models_json(cls, v: Any) -> Any: + if isinstance(v, str) and v.strip().startswith("["): + try: + return json.loads(v) + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse models JSON string: {e}") + return v + return v + @field_validator("models") @classmethod def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]: """Filter out models that don't have a name set (placeholders).""" - return [model for model in v if model.model_name] diff --git a/config/config.yaml b/config/config.yaml index 2fbc061..f2b17fb 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -28,10 +28,7 @@ gemini: verbose: false # Enable verbose logging for Gemini requests max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only) - models: - - model_name: null - model_header: - x-goog-ext-xxxxxxxxx-jspb: null + models: [] storage: path: "data/lmdb" # Database storage path From 61c5f3b7af4ef6b78d5dc7e3d5ba9e6009b7d3cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 13:46:58 +0700 Subject: [PATCH 32/54] refactor: Simplify Gemini model environment variable parsing with JSON support - Replaced prefix-based parsing with a root key approach. - Added JSON parsing to handle list-based model configurations. - Improved handling of errors and cleanup of environment variables. --- app/utils/config.py | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/app/utils/config.py b/app/utils/config.py index 69a4fac..6cb5664 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -256,36 +256,26 @@ def _merge_clients_with_env( def extract_gemini_models_env() -> dict[int, dict[str, Any]]: """Extract and remove all Gemini models related environment variables, supporting nested fields.""" - prefix = "CONFIG_GEMINI__MODELS__" - env_overrides: dict[int, dict[str, Any]] = {} - to_delete = [] - for k, v in os.environ.items(): - if k.startswith(prefix): - parts = k.split("__") - if len(parts) < 4: - continue - index_str = parts[2] - if not index_str.isdigit(): - continue - idx = int(index_str) + import json - # Navigate to the correct nested dict - current = env_overrides.setdefault(idx, {}) - for i in range(3, len(parts) - 1): - field_name = parts[i].lower() - current = current.setdefault(field_name, {}) + root_key = "CONFIG_GEMINI__MODELS" + env_overrides: dict[int, dict[str, Any]] = {} - # Set the value (lowercase root field names, preserve sub-key casing) - last_part = parts[-1] - if len(parts) == 4: - current[last_part.lower()] = v - else: - current[last_part] = v + if root_key in os.environ: + try: + val = os.environ[root_key] + if val.strip().startswith("["): + models_list = json.loads(val) + if isinstance(models_list, list): + for idx, model_data in enumerate(models_list): + if isinstance(model_data, dict): + env_overrides[idx] = model_data + + # Remove the environment variable to avoid Pydantic parsing errors + del os.environ[root_key] + except Exception as e: + logger.warning(f"Failed to parse {root_key} as JSON: {e}") - to_delete.append(k) - # Remove these environment variables to avoid Pydantic parsing errors - for k in to_delete: - del os.environ[k] return env_overrides From efd056c270db5130c59b4e66c2543be7f5e8c6e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 14:09:41 +0700 Subject: [PATCH 33/54] fix: Enhance Gemini model environment variable parsing with fallback to Python literals - Added `ast.literal_eval` as a fallback for parsing environment variables when JSON decoding fails. - Improved error handling and logging for invalid configurations. - Ensured proper cleanup of environment variables post-parsing. --- app/utils/config.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/app/utils/config.py b/app/utils/config.py index 6cb5664..74a5294 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -1,3 +1,4 @@ +import ast import json import os import sys @@ -256,25 +257,31 @@ def _merge_clients_with_env( def extract_gemini_models_env() -> dict[int, dict[str, Any]]: """Extract and remove all Gemini models related environment variables, supporting nested fields.""" - import json - root_key = "CONFIG_GEMINI__MODELS" env_overrides: dict[int, dict[str, Any]] = {} if root_key in os.environ: + val = os.environ[root_key] + models_list = None + parsed_successfully = False + try: - val = os.environ[root_key] - if val.strip().startswith("["): - models_list = json.loads(val) - if isinstance(models_list, list): - for idx, model_data in enumerate(models_list): - if isinstance(model_data, dict): - env_overrides[idx] = model_data + models_list = json.loads(val) + parsed_successfully = True + except json.JSONDecodeError: + try: + models_list = ast.literal_eval(val) + parsed_successfully = True + except (ValueError, SyntaxError) as e: + logger.warning(f"Failed to parse {root_key} as JSON or Python literal: {e}") + + if parsed_successfully and isinstance(models_list, list): + for idx, model_data in enumerate(models_list): + if isinstance(model_data, dict): + env_overrides[idx] = model_data # Remove the environment variable to avoid Pydantic parsing errors del os.environ[root_key] - except Exception as e: - logger.warning(f"Failed to parse {root_key} as JSON: {e}") return env_overrides @@ -298,7 +305,7 @@ def _merge_models_with_env( model_dict.update(overrides) result_models[idx] = GeminiModelConfig(**model_dict) elif idx == len(result_models): - # Append new model + # Append new models new_model = GeminiModelConfig(**overrides) result_models.append(new_model) else: From 476b9dd228aa99501638987d1f44fe3c5eb23067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 17:53:38 +0700 Subject: [PATCH 34/54] fix: Improve regex patterns in helper module - Adjusted `TOOL_CALL_RE` regex pattern for better accuracy. --- app/utils/helper.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 28be240..99e6d7a 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -30,7 +30,7 @@ ) TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) TOOL_CALL_RE = re.compile( - r"(.*?)", re.DOTALL | re.IGNORECASE + r"(.*?)", re.DOTALL | re.IGNORECASE ) JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE) CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") @@ -140,7 +140,7 @@ def strip_tagged_blocks(text: str) -> str: role_start = start + len(start_marker) newline = text.find("\n", role_start) if newline == -1: - # malformed block; keep remainder as-is (safe behavior) + # malformed block; keep the remainder as-is (safe behavior) result.append(text[start:]) break @@ -150,7 +150,7 @@ def strip_tagged_blocks(text: str) -> str: if end == -1: # missing end marker if role == "tool": - # drop from start marker to EOF (skip remainder) + # drop from the start marker to EOF (skip the remainder) break else: # keep inner content from after the role newline to EOF @@ -160,7 +160,7 @@ def strip_tagged_blocks(text: str) -> str: block_end = end + len(end_marker) if role == "tool": - # drop whole block + # drop the whole block idx = block_end continue @@ -217,7 +217,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: tool_calls: list[ToolCall] = [] def _create_tool_call(name: str, raw_args: str) -> None: - """Helper to parse args and append to tool_calls list.""" + """Helper to parse args and append to the tool_calls list.""" if not name: logger.warning("Encountered tool_call without a function name.") return From 35c1e99993d11033ae9047e85f645ce5def7f09b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 13 Jan 2026 09:02:10 +0700 Subject: [PATCH 35/54] docs: Update README files to include custom model configuration and environment variable setup --- README.md | 47 ++++++++++++++++++++++++++++++++++++++++++++++- README.zh.md | 51 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 94 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2df3a73..5d6de40 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ services: - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSID=${SECURE_1PSID} - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS=${SECURE_1PSIDTS} - GEMINI_COOKIE_PATH=/app/cache # must match the cache volume mount above - restart: on-failure:3 # Avoid retrying too many times + restart: on-failure:3 # Avoid retrying too many times ``` Then run: @@ -187,6 +187,51 @@ To use Gemini-FastAPI, you need to extract your Gemini session cookies: Each client entry can be configured with a different proxy to work around rate limits. Omit the `proxy` field or set it to `null` or an empty string to keep a direct connection. +### Custom Models + +You can define custom models in `config/config.yaml` or via environment variables. + +#### YAML Configuration + +```yaml +gemini: + model_strategy: "append" # "append" (default + custom) or "overwrite" (custom only) + models: + - model_name: "gemini-3.0-pro" + model_header: + x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]' +``` + +#### Environment Variables + +You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`. + +##### Bash + +```bash +export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" +export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' +``` + +##### Docker Compose + +```yaml +services: + gemini-fastapi: + environment: + - CONFIG_GEMINI__MODEL_STRATEGY=overwrite + - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}] +``` + +##### Docker CLI + +```bash +docker run -d \ + -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \ + -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \ + ghcr.io/nativu5/gemini-fastapi +``` + ## Acknowledgments - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - The underlying Gemini web API client diff --git a/README.zh.md b/README.zh.md index 6b7dd74..791afd8 100644 --- a/README.zh.md +++ b/README.zh.md @@ -4,7 +4,6 @@ [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) - [ [English](README.md) | 中文 ] 将 Gemini 网页端模型封装为兼容 OpenAI API 的 API Server。基于 [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) 实现。 @@ -50,6 +49,7 @@ pip install -e . ### 配置 编辑 `config/config.yaml` 并提供至少一组凭证: + ```yaml gemini: clients: @@ -118,7 +118,7 @@ services: - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSID=${SECURE_1PSID} - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS=${SECURE_1PSIDTS} - GEMINI_COOKIE_PATH=/app/cache # must match the cache volume mount above - restart: on-failure:3 # Avoid retrying too many times + restart: on-failure:3 # Avoid retrying too many times ``` 然后运行: @@ -186,6 +186,51 @@ export CONFIG_STORAGE__MAX_SIZE=268435456 # 256 MB 每个客户端条目可以配置不同的代理,从而规避速率限制。省略 `proxy` 字段或将其设置为 `null` 或空字符串以保持直连。 +### 自定义模型 + +你可以在 `config/config.yaml` 中或通过环境变量定义自定义模型。 + +#### YAML 配置 + +```yaml +gemini: + model_strategy: "append" # "append" (默认 + 自定义) 或 "overwrite" (仅限自定义) + models: + - model_name: "gemini-3.0-pro" + model_header: + x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]' +``` + +#### 环境变量 + +你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。 + +##### Bash + +```bash +export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" +export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' +``` + +##### Docker Compose + +```yaml +services: + gemini-fastapi: + environment: + - CONFIG_GEMINI__MODEL_STRATEGY=overwrite + - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}] +``` + +##### Docker CLI + +```bash +docker run -d \ + -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \ + -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \ + ghcr.io/nativu5/gemini-fastapi +``` + ## 鸣谢 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - 底层 Gemini Web API 客户端 @@ -193,4 +238,4 @@ export CONFIG_STORAGE__MAX_SIZE=268435456 # 256 MB ## 免责声明 -本项目与 Google 或 OpenAI 无关,仅供学习和研究使用。本项目使用了逆向工程 API,可能不符合 Google 服务条款。使用风险自负。 \ No newline at end of file +本项目与 Google 或 OpenAI 无关,仅供学习和研究使用。本项目使用了逆向工程 API,可能不符合 Google 服务条款。使用风险自负。 From 9b8162133e86a323400e7e2fb36ed651b31c795f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 13 Jan 2026 09:23:28 +0700 Subject: [PATCH 36/54] fix: Remove unused headers from HTTP client in helper module --- app/utils/helper.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 99e6d7a..51a6ccf 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -14,9 +14,6 @@ from ..models import FunctionCall, Message, ToolCall -HEADERS = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36" -} VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} XML_WRAP_HINT = ( "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n" @@ -82,7 +79,7 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: if not suffix: suffix = f".{mime_type.split('/')[1]}" else: - async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True) as client: + async with httpx.AsyncClient(follow_redirects=True) as client: resp = await client.get(url) resp.raise_for_status() data = resp.content From 32a48dcdc98d9e96e791ae6f914e6b3f12804c97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 15 Jan 2026 10:18:58 +0700 Subject: [PATCH 37/54] fix: Update README and README.zh to clarify model configuration via environment variables; enhance error logging in config validation --- README.md | 23 +---------------------- README.zh.md | 23 +---------------------- app/server/chat.py | 6 ++++-- app/utils/config.py | 27 +++++++++++++++++++++++---- 4 files changed, 29 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 5d6de40..d7a7214 100644 --- a/README.md +++ b/README.md @@ -204,34 +204,13 @@ gemini: #### Environment Variables -You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`. - -##### Bash +You can supply models as a JSON string via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments without modifying the configuration file. ```bash export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' ``` -##### Docker Compose - -```yaml -services: - gemini-fastapi: - environment: - - CONFIG_GEMINI__MODEL_STRATEGY=overwrite - - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}] -``` - -##### Docker CLI - -```bash -docker run -d \ - -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \ - -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \ - ghcr.io/nativu5/gemini-fastapi -``` - ## Acknowledgments - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - The underlying Gemini web API client diff --git a/README.zh.md b/README.zh.md index 791afd8..09d80a4 100644 --- a/README.zh.md +++ b/README.zh.md @@ -203,34 +203,13 @@ gemini: #### 环境变量 -你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。 - -##### Bash +你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串的形式提供模型。这为通过 shell 或在自动化环境中覆盖设置提供了一种灵活的方式,无需修改配置文件。 ```bash export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' ``` -##### Docker Compose - -```yaml -services: - gemini-fastapi: - environment: - - CONFIG_GEMINI__MODEL_STRATEGY=overwrite - - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}] -``` - -##### Docker CLI - -```bash -docker run -d \ - -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \ - -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \ - ghcr.io/nativu5/gemini-fastapi -``` - ## 鸣谢 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - 底层 Gemini Web API 客户端 diff --git a/app/server/chat.py b/app/server/chat.py index 9485f7a..6e517ea 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -924,7 +924,7 @@ async def _build_payload( image_call_items.append( ResponseImageGenerationCall( - id=filename.split(".")[0], + id=filename.rsplit(".", 1)[0], status="completed", result=image_base64, output_format=img_format, @@ -1350,7 +1350,9 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non try: saved_path = await image.save(path=str(temp_dir), full_size=True) except Exception as e: - logger.warning(f"Failed to download full-size image, retrying with default size: {e}") + logger.warning( + f"Failed to download full-size GeneratedImage, retrying with default size: {e}" + ) saved_path = await image.save(path=str(temp_dir), full_size=False) else: saved_path = await image.save(path=str(temp_dir)) diff --git a/app/utils/config.py b/app/utils/config.py index 74a5294..a9c5d44 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -109,8 +109,21 @@ def _parse_models_json(cls, v: Any) -> Any: @field_validator("models") @classmethod def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]: - """Filter out models that don't have a name set (placeholders).""" - return [model for model in v if model.model_name] + """Filter out models that don't have all required fields set.""" + valid_models = [] + for model in v: + if model.model_name and model.model_header: + valid_models.append(model) + else: + missing = [] + if not model.model_name: + missing.append("model_name") + if not model.model_header: + missing.append("model_header") + logger.warning( + f"Discarding custom model due to missing {', '.join(missing)}: {model}" + ) + return valid_models class CORSConfig(BaseModel): @@ -251,7 +264,10 @@ def _merge_clients_with_env( new_client = GeminiClientSettings(**overrides) result_clients.append(new_client) else: - raise IndexError(f"Client index {idx} in env is out of range.") + raise IndexError( + f"Client index {idx} in env is out of range (current count: {len(result_clients)}). " + "Client indices must be contiguous starting from 0." + ) return result_clients if result_clients else base_clients @@ -309,7 +325,10 @@ def _merge_models_with_env( new_model = GeminiModelConfig(**overrides) result_models.append(new_model) else: - raise IndexError(f"Model index {idx} in env is out of range (must be contiguous).") + raise IndexError( + f"Model index {idx} in env is out of range (current count: {len(result_models)}). " + "Model indices must be contiguous starting from 0." + ) return result_models From 0c00b089d5b33e394abaac6a1d36ae08cede166c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 15 Jan 2026 11:24:08 +0700 Subject: [PATCH 38/54] Update README and README.zh to clarify model configuration via JSON string or list structure for enhanced flexibility in automated environments --- README.md | 2 +- README.zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d7a7214..330e9c8 100644 --- a/README.md +++ b/README.md @@ -204,7 +204,7 @@ gemini: #### Environment Variables -You can supply models as a JSON string via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments without modifying the configuration file. +You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments (e.g. Docker) without modifying the configuration file. ```bash export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" diff --git a/README.zh.md b/README.zh.md index 09d80a4..2f9e1b5 100644 --- a/README.zh.md +++ b/README.zh.md @@ -203,7 +203,7 @@ gemini: #### 环境变量 -你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串的形式提供模型。这为通过 shell 或在自动化环境中覆盖设置提供了一种灵活的方式,无需修改配置文件。 +你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。这为通过 shell 或在自动化环境(例如 Docker)中覆盖设置提供了一种灵活的方式,而无需修改配置文件。 ```bash export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" From b599d99f9967188bb8a277fd09951ddf32006f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 23 Jan 2026 12:14:40 +0700 Subject: [PATCH 39/54] Refactor: compress JSON content to save tokens and streamline sending multiple chunks --- app/server/chat.py | 50 +++++++++++++++++++++++++++++------------- app/services/client.py | 4 ++-- app/utils/helper.py | 2 +- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 6e517ea..1e7d786 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1,5 +1,7 @@ +import asyncio import base64 import json +import random import re import uuid from dataclasses import dataclass @@ -95,7 +97,7 @@ def _build_structured_requirement( schema_name = json_schema.get("name") or "response" strict = json_schema.get("strict", True) - pretty_schema = json.dumps(schema, ensure_ascii=False, indent=2, sort_keys=True) + pretty_schema = json.dumps(schema, ensure_ascii=False, separators=(",", ":"), sort_keys=True) instruction_parts = [ "You must respond with a single valid JSON document that conforms to the schema shown below.", "Do not include explanations, comments, or any text before or after the JSON.", @@ -135,7 +137,7 @@ def _build_tool_prompt( description = function.description or "No description provided." lines.append(f"Tool `{function.name}`: {description}") if function.parameters: - schema_text = json.dumps(function.parameters, ensure_ascii=False, indent=2) + schema_text = json.dumps(function.parameters, ensure_ascii=False, separators=(",", ":")) lines.append("Arguments JSON schema:") lines.append(schema_text) else: @@ -635,7 +637,7 @@ async def create_chat_completion( detail="LLM returned invalid JSON for the requested response_format.", ) from exc - canonical_output = json.dumps(structured_payload, ensure_ascii=False) + canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":")) visible_output = canonical_output storage_output = canonical_output @@ -875,7 +877,7 @@ async def _build_payload( detail="LLM returned invalid JSON for the requested response_format.", ) from exc - canonical_output = json.dumps(structured_payload, ensure_ascii=False) + canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":")) assistant_text = canonical_output storage_output = canonical_output logger.debug( @@ -1081,38 +1083,56 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s that Gemini can produce the actual answer. """ if len(text) <= MAX_CHARS_PER_REQUEST: - # No need to split - a single request is fine. try: return await session.send_message(text, files=files) except Exception as e: logger.exception(f"Error sending message to Gemini: {e}") raise + hint_len = len(CONTINUATION_HINT) - chunk_size = MAX_CHARS_PER_REQUEST - hint_len + safe_chunk_size = MAX_CHARS_PER_REQUEST - hint_len chunks: list[str] = [] pos = 0 total = len(text) + while pos < total: - end = min(pos + chunk_size, total) - chunk = text[pos:end] - pos = end + remaining = total - pos + if remaining <= MAX_CHARS_PER_REQUEST: + chunks.append(text[pos:]) + break + + end = pos + safe_chunk_size + slice_candidate = text[pos:end] + # Try to find a safe split point + split_idx = -1 + idx = slice_candidate.rfind("\n") + if idx != -1: + split_idx = idx + + if split_idx != -1: + split_at = pos + split_idx + 1 + else: + split_at = end - # If this is NOT the last chunk, add the continuation hint. - if end < total: - chunk += CONTINUATION_HINT + chunk = text[pos:split_at] + CONTINUATION_HINT chunks.append(chunk) + pos = split_at - # Fire off all but the last chunk, discarding the interim "ok" replies. - for chk in chunks[:-1]: + chunks_size = len(chunks) + for i, chk in enumerate(chunks[:-1]): try: + logger.debug(f"Sending chunk {i + 1}/{chunks_size}...") await session.send_message(chk) + delay = random.uniform(1.0, 3.0) + logger.debug(f"Sleeping for {delay:.2f}s...") + await asyncio.sleep(delay) except Exception as e: logger.exception(f"Error sending chunk to Gemini: {e}") raise - # The last chunk carries the files (if any) and we return its response. try: + logger.debug(f"Sending final chunk {chunks_size}/{chunks_size}...") return await session.send_message(chunks[-1], files=files) except Exception as e: logger.exception(f"Error sending final chunk to Gemini: {e}") diff --git a/app/services/client.py b/app/services/client.py index 87c0ca7..1f23271 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -123,7 +123,7 @@ async def process_message( args_text = call.function.arguments.strip() try: parsed_args = json.loads(args_text) - args_text = json.dumps(parsed_args, ensure_ascii=False) + args_text = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":")) except (json.JSONDecodeError, TypeError): # Leave args_text as is if it is not valid JSON pass @@ -132,7 +132,7 @@ async def process_message( ) if tool_blocks: - tool_section = "```xml\n" + "\n".join(tool_blocks) + "\n```" + tool_section = "```xml\n" + "".join(tool_blocks) + "\n```" text_fragments.append(tool_section) model_input = "\n".join(fragment for fragment in text_fragments if fragment) diff --git a/app/utils/helper.py b/app/utils/helper.py index 51a6ccf..578b666 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -222,7 +222,7 @@ def _create_tool_call(name: str, raw_args: str) -> None: arguments = raw_args try: parsed_args = json.loads(raw_args) - arguments = json.dumps(parsed_args, ensure_ascii=False) + arguments = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":")) except json.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") From 186b8448d7f088df621b627ca7b28c5a7acaf341 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 23 Jan 2026 23:08:32 +0700 Subject: [PATCH 40/54] Refactor: Modify the LMDB store to fix issues where no conversation is found in either the raw or cleaned history. --- app/services/lmdb.py | 46 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 8ccb0d4..d671663 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -9,7 +9,7 @@ import orjson from loguru import logger -from ..models import ConversationInStore, Message +from ..models import ContentItem, ConversationInStore, Message from ..utils import g_config from ..utils.singleton import Singleton @@ -18,6 +18,19 @@ def _hash_message(message: Message) -> str: """Generate a hash for a single message.""" # Convert message to dict and sort keys for consistent hashing message_dict = message.model_dump(mode="json") + content = message_dict.get("content") + if isinstance(content, list): + is_pure_text = True + text_parts = [] + for item in content: + if not isinstance(item, dict) or item.get("type") != "text": + is_pure_text = False + break + text_parts.append(item.get("text") or "") + + if is_pure_text: + message_dict["content"] = "".join(text_parts) + message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS) return hashlib.sha256(message_bytes).hexdigest() @@ -435,12 +448,31 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: """ cleaned_messages = [] for msg in messages: - if msg.role == "assistant" and isinstance(msg.content, str): - normalized_content = LMDBConversationStore.remove_think_tags(msg.content) - # Only create a new object if content actually changed - if normalized_content != msg.content: - cleaned_msg = Message(role=msg.role, content=normalized_content, name=msg.name) - cleaned_messages.append(cleaned_msg) + if msg.role == "assistant": + if isinstance(msg.content, str): + normalized_content = LMDBConversationStore.remove_think_tags(msg.content) + if normalized_content != msg.content: + cleaned_msg = Message( + role=msg.role, content=normalized_content, name=msg.name + ) + cleaned_messages.append(cleaned_msg) + else: + cleaned_messages.append(msg) + elif isinstance(msg.content, list): + new_content = [] + changed = False + for item in msg.content: + if isinstance(item, ContentItem) and item.type == "text" and item.text: + cleaned_text = LMDBConversationStore.remove_think_tags(item.text) + if cleaned_text != item.text: + changed = True + item = item.model_copy(update={"text": cleaned_text}) + new_content.append(item) + + if changed: + cleaned_messages.append(msg.model_copy(update={"content": new_content})) + else: + cleaned_messages.append(msg) else: cleaned_messages.append(msg) else: From 6dd1fecdced932c537f579a3c5dd3db87847d475 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 10:03:24 +0700 Subject: [PATCH 41/54] Refactor: Modify the LMDB store to fix issues where no conversation is found. --- app/services/lmdb.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index d671663..93c7723 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -18,8 +18,12 @@ def _hash_message(message: Message) -> str: """Generate a hash for a single message.""" # Convert message to dict and sort keys for consistent hashing message_dict = message.model_dump(mode="json") + + # Normalize content: empty string -> None content = message_dict.get("content") - if isinstance(content, list): + if content == "": + message_dict["content"] = None + elif isinstance(content, list): is_pure_text = True text_parts = [] for item in content: @@ -29,7 +33,27 @@ def _hash_message(message: Message) -> str: text_parts.append(item.get("text") or "") if is_pure_text: - message_dict["content"] = "".join(text_parts) + text_content = "".join(text_parts) + message_dict["content"] = text_content if text_content else None + + # Normalize tool_calls: empty list -> None, and canonicalize arguments + tool_calls = message_dict.get("tool_calls") + if not tool_calls: + message_dict["tool_calls"] = None + elif isinstance(tool_calls, list): + for tool_call in tool_calls: + if isinstance(tool_call, dict) and "function" in tool_call: + func = tool_call["function"] + args = func.get("arguments") + if isinstance(args, str): + try: + # Parse and re-dump to canonicalize (remove extra whitespace, sort keys) + parsed = orjson.loads(args) + func["arguments"] = orjson.dumps( + parsed, option=orjson.OPT_SORT_KEYS + ).decode("utf-8") + except Exception: + pass message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS) return hashlib.sha256(message_bytes).hexdigest() From 20ed2456d2324501bbe4ba6392870cd612c9083c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 10:46:27 +0700 Subject: [PATCH 42/54] Refactor: Update all functions to use orjson for better performance --- app/main.py | 2 ++ app/server/chat.py | 17 ++++++++--------- app/services/client.py | 8 ++++---- app/utils/config.py | 14 +++++++------- app/utils/helper.py | 8 ++++---- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/app/main.py b/app/main.py index c215e2a..307eb36 100644 --- a/app/main.py +++ b/app/main.py @@ -2,6 +2,7 @@ from contextlib import asynccontextmanager from fastapi import FastAPI +from fastapi.responses import ORJSONResponse from loguru import logger from .server.chat import router as chat_router @@ -92,6 +93,7 @@ def create_app() -> FastAPI: description="OpenAI-compatible API for Gemini Web", version="1.0.0", lifespan=lifespan, + default_response_class=ORJSONResponse, ) add_cors_middleware(app) diff --git a/app/server/chat.py b/app/server/chat.py index 1e7d786..a9d9dec 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1,6 +1,5 @@ import asyncio import base64 -import json import random import re import uuid @@ -97,7 +96,7 @@ def _build_structured_requirement( schema_name = json_schema.get("name") or "response" strict = json_schema.get("strict", True) - pretty_schema = json.dumps(schema, ensure_ascii=False, separators=(",", ":"), sort_keys=True) + pretty_schema = orjson.dumps(schema, option=orjson.OPT_SORT_KEYS).decode("utf-8") instruction_parts = [ "You must respond with a single valid JSON document that conforms to the schema shown below.", "Do not include explanations, comments, or any text before or after the JSON.", @@ -137,7 +136,7 @@ def _build_tool_prompt( description = function.description or "No description provided." lines.append(f"Tool `{function.name}`: {description}") if function.parameters: - schema_text = json.dumps(function.parameters, ensure_ascii=False, separators=(",", ":")) + schema_text = orjson.dumps(function.parameters).decode("utf-8") lines.append("Arguments JSON schema:") lines.append(schema_text) else: @@ -626,8 +625,8 @@ async def create_chat_completion( detail="LLM returned an empty response while JSON schema output was requested.", ) try: - structured_payload = json.loads(cleaned_visible) - except json.JSONDecodeError as exc: + structured_payload = orjson.loads(cleaned_visible) + except orjson.JSONDecodeError as exc: logger.warning( f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): " f"{cleaned_visible}" @@ -637,7 +636,7 @@ async def create_chat_completion( detail="LLM returned invalid JSON for the requested response_format.", ) from exc - canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":")) + canonical_output = orjson.dumps(structured_payload).decode("utf-8") visible_output = canonical_output storage_output = canonical_output @@ -866,8 +865,8 @@ async def _build_payload( detail="LLM returned an empty response while JSON schema output was requested.", ) try: - structured_payload = json.loads(cleaned_visible) - except json.JSONDecodeError as exc: + structured_payload = orjson.loads(cleaned_visible) + except orjson.JSONDecodeError as exc: logger.warning( f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): " f"{cleaned_visible}" @@ -877,7 +876,7 @@ async def _build_payload( detail="LLM returned invalid JSON for the requested response_format.", ) from exc - canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":")) + canonical_output = orjson.dumps(structured_payload).decode("utf-8") assistant_text = canonical_output storage_output = canonical_output logger.debug( diff --git a/app/services/client.py b/app/services/client.py index 1f23271..55be11a 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -1,9 +1,9 @@ import html -import json import re from pathlib import Path from typing import Any, cast +import orjson from gemini_webapi import GeminiClient, ModelOutput from loguru import logger @@ -122,9 +122,9 @@ async def process_message( for call in message.tool_calls: args_text = call.function.arguments.strip() try: - parsed_args = json.loads(args_text) - args_text = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":")) - except (json.JSONDecodeError, TypeError): + parsed_args = orjson.loads(args_text) + args_text = orjson.dumps(parsed_args).decode("utf-8") + except orjson.JSONDecodeError: # Leave args_text as is if it is not valid JSON pass tool_blocks.append( diff --git a/app/utils/config.py b/app/utils/config.py index a9c5d44..708462d 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -1,9 +1,9 @@ import ast -import json import os import sys from typing import Any, Literal, Optional +import orjson from loguru import logger from pydantic import BaseModel, Field, ValidationError, field_validator from pydantic_settings import ( @@ -65,8 +65,8 @@ class GeminiModelConfig(BaseModel): def _parse_json_string(cls, v: Any) -> Any: if isinstance(v, str) and v.strip().startswith("{"): try: - return json.loads(v) - except json.JSONDecodeError: + return orjson.loads(v) + except orjson.JSONDecodeError: # Return the original value to let Pydantic handle the error or type mismatch return v return v @@ -100,8 +100,8 @@ class GeminiConfig(BaseModel): def _parse_models_json(cls, v: Any) -> Any: if isinstance(v, str) and v.strip().startswith("["): try: - return json.loads(v) - except json.JSONDecodeError as e: + return orjson.loads(v) + except orjson.JSONDecodeError as e: logger.warning(f"Failed to parse models JSON string: {e}") return v return v @@ -282,9 +282,9 @@ def extract_gemini_models_env() -> dict[int, dict[str, Any]]: parsed_successfully = False try: - models_list = json.loads(val) + models_list = orjson.loads(val) parsed_successfully = True - except json.JSONDecodeError: + except orjson.JSONDecodeError: try: models_list = ast.literal_eval(val) parsed_successfully = True diff --git a/app/utils/helper.py b/app/utils/helper.py index 578b666..1dc518f 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -1,5 +1,4 @@ import base64 -import json import mimetypes import re import struct @@ -10,6 +9,7 @@ from urllib.parse import urlparse import httpx +import orjson from loguru import logger from ..models import FunctionCall, Message, ToolCall @@ -221,9 +221,9 @@ def _create_tool_call(name: str, raw_args: str) -> None: arguments = raw_args try: - parsed_args = json.loads(raw_args) - arguments = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":")) - except json.JSONDecodeError: + parsed_args = orjson.loads(raw_args) + arguments = orjson.dumps(parsed_args).decode("utf-8") + except orjson.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") tool_calls.append( From f67fe63b3b654d3a28cc5ca0363a4ad894831d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 10:47:26 +0700 Subject: [PATCH 43/54] Update project dependencies --- pyproject.toml | 21 ++++----- uv.lock | 118 +++++++++++++++++++++++++------------------------ 2 files changed, 71 insertions(+), 68 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 32a42b4..1c30f8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,24 +5,25 @@ description = "FastAPI Server built on Gemini Web API" readme = "README.md" requires-python = "==3.12.*" dependencies = [ - "fastapi>=0.115.12", - "gemini-webapi>=1.17.0", - "lmdb>=1.6.2", - "loguru>=0.7.0", - "pydantic-settings[yaml]>=2.9.1", - "uvicorn>=0.34.1", - "uvloop>=0.21.0; sys_platform != 'win32'", + "fastapi>=0.128.0", + "gemini-webapi>=1.17.3", + "lmdb>=1.7.5", + "loguru>=0.7.3", + "orjson>=3.11.5", + "pydantic-settings[yaml]>=2.12.0", + "uvicorn>=0.40.0", + "uvloop>=0.22.1; sys_platform != 'win32'", ] [project.optional-dependencies] dev = [ - "ruff>=0.11.7", + "ruff>=0.14.14", ] [tool.ruff] line-length = 100 lint.select = ["E", "F", "W", "I", "RUF"] -lint.ignore = ["E501"] +lint.ignore = ["E501"] [tool.ruff.format] quote-style = "double" @@ -30,5 +31,5 @@ indent-style = "space" [dependency-groups] dev = [ - "ruff>=0.11.13", + "ruff>=0.14.14", ] diff --git a/uv.lock b/uv.lock index 923e6d3..50a73be 100644 --- a/uv.lock +++ b/uv.lock @@ -22,24 +22,24 @@ wheels = [ [[package]] name = "anyio" -version = "4.12.0" +version = "4.12.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "idna" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/16/ce/8a777047513153587e5434fd752e89334ac33e379aa3497db860eeb60377/anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0", size = 228266, upload-time = "2025-11-28T23:37:38.911Z" } +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/9c/36c5c37947ebfb8c7f22e0eb6e4d188ee2d53aa3880f3f2744fb894f0cb1/anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb", size = 113362, upload-time = "2025-11-28T23:36:57.897Z" }, + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, ] [[package]] name = "certifi" -version = "2025.11.12" +version = "2026.1.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, ] [[package]] @@ -65,7 +65,7 @@ wheels = [ [[package]] name = "fastapi" -version = "0.123.10" +version = "0.128.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, @@ -73,9 +73,9 @@ dependencies = [ { name = "starlette" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/22/ff/e01087de891010089f1620c916c0c13130f3898177955c13e2b02d22ec4a/fastapi-0.123.10.tar.gz", hash = "sha256:624d384d7cda7c096449c889fc776a0571948ba14c3c929fa8e9a78cd0b0a6a8", size = 356360, upload-time = "2025-12-05T21:27:46.237Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d7/f0/7cb92c4a720def85240fd63fbbcf147ce19e7a731c8e1032376bb5a486ac/fastapi-0.123.10-py3-none-any.whl", hash = "sha256:0503b7b7bc71bc98f7c90c9117d21fdf6147c0d74703011b87936becc86985c1", size = 111774, upload-time = "2025-12-05T21:27:44.78Z" }, + { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" }, ] [[package]] @@ -87,6 +87,7 @@ dependencies = [ { name = "gemini-webapi" }, { name = "lmdb" }, { name = "loguru" }, + { name = "orjson" }, { name = "pydantic-settings", extra = ["yaml"] }, { name = "uvicorn" }, { name = "uvloop", marker = "sys_platform != 'win32'" }, @@ -104,19 +105,20 @@ dev = [ [package.metadata] requires-dist = [ - { name = "fastapi", specifier = ">=0.115.12" }, - { name = "gemini-webapi", specifier = ">=1.17.0" }, - { name = "lmdb", specifier = ">=1.6.2" }, - { name = "loguru", specifier = ">=0.7.0" }, - { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.9.1" }, - { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.11.7" }, - { name = "uvicorn", specifier = ">=0.34.1" }, - { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.21.0" }, + { name = "fastapi", specifier = ">=0.128.0" }, + { name = "gemini-webapi", specifier = ">=1.17.3" }, + { name = "lmdb", specifier = ">=1.7.5" }, + { name = "loguru", specifier = ">=0.7.3" }, + { name = "orjson", specifier = ">=3.11.5" }, + { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" }, + { name = "uvicorn", specifier = ">=0.40.0" }, + { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" }, ] provides-extras = ["dev"] [package.metadata.requires-dev] -dev = [{ name = "ruff", specifier = ">=0.11.13" }] +dev = [{ name = "ruff", specifier = ">=0.14.14" }] [[package]] name = "gemini-webapi" @@ -209,25 +211,25 @@ wheels = [ [[package]] name = "orjson" -version = "3.11.4" +version = "3.11.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c6/fe/ed708782d6709cc60eb4c2d8a361a440661f74134675c72990f2c48c785f/orjson-3.11.4.tar.gz", hash = "sha256:39485f4ab4c9b30a3943cfe99e1a213c4776fb69e8abd68f66b83d5a0b0fdc6d", size = 5945188, upload-time = "2025-10-24T15:50:38.027Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347, upload-time = "2025-12-06T15:55:39.458Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/63/51/6b556192a04595b93e277a9ff71cd0cc06c21a7df98bcce5963fa0f5e36f/orjson-3.11.4-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:d4371de39319d05d3f482f372720b841c841b52f5385bd99c61ed69d55d9ab50", size = 243571, upload-time = "2025-10-24T15:49:10.008Z" }, - { url = "https://files.pythonhosted.org/packages/1c/2c/2602392ddf2601d538ff11848b98621cd465d1a1ceb9db9e8043181f2f7b/orjson-3.11.4-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:e41fd3b3cac850eaae78232f37325ed7d7436e11c471246b87b2cd294ec94853", size = 128891, upload-time = "2025-10-24T15:49:11.297Z" }, - { url = "https://files.pythonhosted.org/packages/4e/47/bf85dcf95f7a3a12bf223394a4f849430acd82633848d52def09fa3f46ad/orjson-3.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:600e0e9ca042878c7fdf189cf1b028fe2c1418cc9195f6cb9824eb6ed99cb938", size = 130137, upload-time = "2025-10-24T15:49:12.544Z" }, - { url = "https://files.pythonhosted.org/packages/b4/4d/a0cb31007f3ab6f1fd2a1b17057c7c349bc2baf8921a85c0180cc7be8011/orjson-3.11.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7bbf9b333f1568ef5da42bc96e18bf30fd7f8d54e9ae066d711056add508e415", size = 129152, upload-time = "2025-10-24T15:49:13.754Z" }, - { url = "https://files.pythonhosted.org/packages/f7/ef/2811def7ce3d8576b19e3929fff8f8f0d44bc5eb2e0fdecb2e6e6cc6c720/orjson-3.11.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4806363144bb6e7297b8e95870e78d30a649fdc4e23fc84daa80c8ebd366ce44", size = 136834, upload-time = "2025-10-24T15:49:15.307Z" }, - { url = "https://files.pythonhosted.org/packages/00/d4/9aee9e54f1809cec8ed5abd9bc31e8a9631d19460e3b8470145d25140106/orjson-3.11.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad355e8308493f527d41154e9053b86a5be892b3b359a5c6d5d95cda23601cb2", size = 137519, upload-time = "2025-10-24T15:49:16.557Z" }, - { url = "https://files.pythonhosted.org/packages/db/ea/67bfdb5465d5679e8ae8d68c11753aaf4f47e3e7264bad66dc2f2249e643/orjson-3.11.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a7517482667fb9f0ff1b2f16fe5829296ed7a655d04d68cd9711a4d8a4e708", size = 136749, upload-time = "2025-10-24T15:49:17.796Z" }, - { url = "https://files.pythonhosted.org/packages/01/7e/62517dddcfce6d53a39543cd74d0dccfcbdf53967017c58af68822100272/orjson-3.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97eb5942c7395a171cbfecc4ef6701fc3c403e762194683772df4c54cfbb2210", size = 136325, upload-time = "2025-10-24T15:49:19.347Z" }, - { url = "https://files.pythonhosted.org/packages/18/ae/40516739f99ab4c7ec3aaa5cc242d341fcb03a45d89edeeaabc5f69cb2cf/orjson-3.11.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:149d95d5e018bdd822e3f38c103b1a7c91f88d38a88aada5c4e9b3a73a244241", size = 140204, upload-time = "2025-10-24T15:49:20.545Z" }, - { url = "https://files.pythonhosted.org/packages/82/18/ff5734365623a8916e3a4037fcef1cd1782bfc14cf0992afe7940c5320bf/orjson-3.11.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:624f3951181eb46fc47dea3d221554e98784c823e7069edb5dbd0dc826ac909b", size = 406242, upload-time = "2025-10-24T15:49:21.884Z" }, - { url = "https://files.pythonhosted.org/packages/e1/43/96436041f0a0c8c8deca6a05ebeaf529bf1de04839f93ac5e7c479807aec/orjson-3.11.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:03bfa548cf35e3f8b3a96c4e8e41f753c686ff3d8e182ce275b1751deddab58c", size = 150013, upload-time = "2025-10-24T15:49:23.185Z" }, - { url = "https://files.pythonhosted.org/packages/1b/48/78302d98423ed8780479a1e682b9aecb869e8404545d999d34fa486e573e/orjson-3.11.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:525021896afef44a68148f6ed8a8bf8375553d6066c7f48537657f64823565b9", size = 139951, upload-time = "2025-10-24T15:49:24.428Z" }, - { url = "https://files.pythonhosted.org/packages/4a/7b/ad613fdcdaa812f075ec0875143c3d37f8654457d2af17703905425981bf/orjson-3.11.4-cp312-cp312-win32.whl", hash = "sha256:b58430396687ce0f7d9eeb3dd47761ca7d8fda8e9eb92b3077a7a353a75efefa", size = 136049, upload-time = "2025-10-24T15:49:25.973Z" }, - { url = "https://files.pythonhosted.org/packages/b9/3c/9cf47c3ff5f39b8350fb21ba65d789b6a1129d4cbb3033ba36c8a9023520/orjson-3.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:c6dbf422894e1e3c80a177133c0dda260f81428f9de16d61041949f6a2e5c140", size = 131461, upload-time = "2025-10-24T15:49:27.259Z" }, - { url = "https://files.pythonhosted.org/packages/c6/3b/e2425f61e5825dc5b08c2a5a2b3af387eaaca22a12b9c8c01504f8614c36/orjson-3.11.4-cp312-cp312-win_arm64.whl", hash = "sha256:d38d2bc06d6415852224fcc9c0bfa834c25431e466dc319f0edd56cca81aa96e", size = 126167, upload-time = "2025-10-24T15:49:28.511Z" }, + { url = "https://files.pythonhosted.org/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347, upload-time = "2025-12-06T15:54:22.061Z" }, + { url = "https://files.pythonhosted.org/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435, upload-time = "2025-12-06T15:54:23.615Z" }, + { url = "https://files.pythonhosted.org/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074, upload-time = "2025-12-06T15:54:24.694Z" }, + { url = "https://files.pythonhosted.org/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520, upload-time = "2025-12-06T15:54:26.185Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209, upload-time = "2025-12-06T15:54:27.264Z" }, + { url = "https://files.pythonhosted.org/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837, upload-time = "2025-12-06T15:54:28.75Z" }, + { url = "https://files.pythonhosted.org/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307, upload-time = "2025-12-06T15:54:29.856Z" }, + { url = "https://files.pythonhosted.org/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020, upload-time = "2025-12-06T15:54:31.024Z" }, + { url = "https://files.pythonhosted.org/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099, upload-time = "2025-12-06T15:54:32.196Z" }, + { url = "https://files.pythonhosted.org/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540, upload-time = "2025-12-06T15:54:33.361Z" }, + { url = "https://files.pythonhosted.org/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530, upload-time = "2025-12-06T15:54:34.6Z" }, + { url = "https://files.pythonhosted.org/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863, upload-time = "2025-12-06T15:54:35.801Z" }, + { url = "https://files.pythonhosted.org/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255, upload-time = "2025-12-06T15:54:37.209Z" }, + { url = "https://files.pythonhosted.org/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252, upload-time = "2025-12-06T15:54:38.401Z" }, + { url = "https://files.pythonhosted.org/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777, upload-time = "2025-12-06T15:54:39.515Z" }, ] [[package]] @@ -322,28 +324,28 @@ wheels = [ [[package]] name = "ruff" -version = "0.14.8" +version = "0.14.14" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ed/d9/f7a0c4b3a2bf2556cd5d99b05372c29980249ef71e8e32669ba77428c82c/ruff-0.14.8.tar.gz", hash = "sha256:774ed0dd87d6ce925e3b8496feb3a00ac564bea52b9feb551ecd17e0a23d1eed", size = 5765385, upload-time = "2025-12-04T15:06:17.669Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/b8/9537b52010134b1d2b72870cc3f92d5fb759394094741b09ceccae183fbe/ruff-0.14.8-py3-none-linux_armv6l.whl", hash = "sha256:ec071e9c82eca417f6111fd39f7043acb53cd3fde9b1f95bbed745962e345afb", size = 13441540, upload-time = "2025-12-04T15:06:14.896Z" }, - { url = "https://files.pythonhosted.org/packages/24/00/99031684efb025829713682012b6dd37279b1f695ed1b01725f85fd94b38/ruff-0.14.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8cdb162a7159f4ca36ce980a18c43d8f036966e7f73f866ac8f493b75e0c27e9", size = 13669384, upload-time = "2025-12-04T15:06:51.809Z" }, - { url = "https://files.pythonhosted.org/packages/72/64/3eb5949169fc19c50c04f28ece2c189d3b6edd57e5b533649dae6ca484fe/ruff-0.14.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e2fcbefe91f9fad0916850edf0854530c15bd1926b6b779de47e9ab619ea38f", size = 12806917, upload-time = "2025-12-04T15:06:08.925Z" }, - { url = "https://files.pythonhosted.org/packages/c4/08/5250babb0b1b11910f470370ec0cbc67470231f7cdc033cee57d4976f941/ruff-0.14.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d70721066a296f45786ec31916dc287b44040f553da21564de0ab4d45a869b", size = 13256112, upload-time = "2025-12-04T15:06:23.498Z" }, - { url = "https://files.pythonhosted.org/packages/78/4c/6c588e97a8e8c2d4b522c31a579e1df2b4d003eddfbe23d1f262b1a431ff/ruff-0.14.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c87e09b3cd9d126fc67a9ecd3b5b1d3ded2b9c7fce3f16e315346b9d05cfb52", size = 13227559, upload-time = "2025-12-04T15:06:33.432Z" }, - { url = "https://files.pythonhosted.org/packages/23/ce/5f78cea13eda8eceac71b5f6fa6e9223df9b87bb2c1891c166d1f0dce9f1/ruff-0.14.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d62cb310c4fbcb9ee4ac023fe17f984ae1e12b8a4a02e3d21489f9a2a5f730c", size = 13896379, upload-time = "2025-12-04T15:06:02.687Z" }, - { url = "https://files.pythonhosted.org/packages/cf/79/13de4517c4dadce9218a20035b21212a4c180e009507731f0d3b3f5df85a/ruff-0.14.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1af35c2d62633d4da0521178e8a2641c636d2a7153da0bac1b30cfd4ccd91344", size = 15372786, upload-time = "2025-12-04T15:06:29.828Z" }, - { url = "https://files.pythonhosted.org/packages/00/06/33df72b3bb42be8a1c3815fd4fae83fa2945fc725a25d87ba3e42d1cc108/ruff-0.14.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25add4575ffecc53d60eed3f24b1e934493631b48ebbc6ebaf9d8517924aca4b", size = 14990029, upload-time = "2025-12-04T15:06:36.812Z" }, - { url = "https://files.pythonhosted.org/packages/64/61/0f34927bd90925880394de0e081ce1afab66d7b3525336f5771dcf0cb46c/ruff-0.14.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c943d847b7f02f7db4201a0600ea7d244d8a404fbb639b439e987edcf2baf9a", size = 14407037, upload-time = "2025-12-04T15:06:39.979Z" }, - { url = "https://files.pythonhosted.org/packages/96/bc/058fe0aefc0fbf0d19614cb6d1a3e2c048f7dc77ca64957f33b12cfdc5ef/ruff-0.14.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb6e8bf7b4f627548daa1b69283dac5a296bfe9ce856703b03130732e20ddfe2", size = 14102390, upload-time = "2025-12-04T15:06:46.372Z" }, - { url = "https://files.pythonhosted.org/packages/af/a4/e4f77b02b804546f4c17e8b37a524c27012dd6ff05855d2243b49a7d3cb9/ruff-0.14.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:7aaf2974f378e6b01d1e257c6948207aec6a9b5ba53fab23d0182efb887a0e4a", size = 14230793, upload-time = "2025-12-04T15:06:20.497Z" }, - { url = "https://files.pythonhosted.org/packages/3f/52/bb8c02373f79552e8d087cedaffad76b8892033d2876c2498a2582f09dcf/ruff-0.14.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e5758ca513c43ad8a4ef13f0f081f80f08008f410790f3611a21a92421ab045b", size = 13160039, upload-time = "2025-12-04T15:06:49.06Z" }, - { url = "https://files.pythonhosted.org/packages/1f/ad/b69d6962e477842e25c0b11622548df746290cc6d76f9e0f4ed7456c2c31/ruff-0.14.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f74f7ba163b6e85a8d81a590363bf71618847e5078d90827749bfda1d88c9cdf", size = 13205158, upload-time = "2025-12-04T15:06:54.574Z" }, - { url = "https://files.pythonhosted.org/packages/06/63/54f23da1315c0b3dfc1bc03fbc34e10378918a20c0b0f086418734e57e74/ruff-0.14.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:eed28f6fafcc9591994c42254f5a5c5ca40e69a30721d2ab18bb0bb3baac3ab6", size = 13469550, upload-time = "2025-12-04T15:05:59.209Z" }, - { url = "https://files.pythonhosted.org/packages/70/7d/a4d7b1961e4903bc37fffb7ddcfaa7beb250f67d97cfd1ee1d5cddb1ec90/ruff-0.14.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:21d48fa744c9d1cb8d71eb0a740c4dd02751a5de9db9a730a8ef75ca34cf138e", size = 14211332, upload-time = "2025-12-04T15:06:06.027Z" }, - { url = "https://files.pythonhosted.org/packages/5d/93/2a5063341fa17054e5c86582136e9895db773e3c2ffb770dde50a09f35f0/ruff-0.14.8-py3-none-win32.whl", hash = "sha256:15f04cb45c051159baebb0f0037f404f1dc2f15a927418f29730f411a79bc4e7", size = 13151890, upload-time = "2025-12-04T15:06:11.668Z" }, - { url = "https://files.pythonhosted.org/packages/02/1c/65c61a0859c0add13a3e1cbb6024b42de587456a43006ca2d4fd3d1618fe/ruff-0.14.8-py3-none-win_amd64.whl", hash = "sha256:9eeb0b24242b5bbff3011409a739929f497f3fb5fe3b5698aba5e77e8c833097", size = 14537826, upload-time = "2025-12-04T15:06:26.409Z" }, - { url = "https://files.pythonhosted.org/packages/6d/63/8b41cea3afd7f58eb64ac9251668ee0073789a3bc9ac6f816c8c6fef986d/ruff-0.14.8-py3-none-win_arm64.whl", hash = "sha256:965a582c93c63fe715fd3e3f8aa37c4b776777203d8e1d8aa3cc0c14424a4b99", size = 13634522, upload-time = "2025-12-04T15:06:43.212Z" }, + { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" }, + { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" }, + { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" }, + { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" }, + { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" }, + { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" }, + { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" }, + { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" }, + { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" }, + { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" }, + { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" }, + { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" }, + { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" }, + { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" }, + { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" }, + { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" }, + { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" }, + { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, ] [[package]] @@ -382,15 +384,15 @@ wheels = [ [[package]] name = "uvicorn" -version = "0.38.0" +version = "0.40.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605, upload-time = "2025-10-18T13:46:44.63Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" }, + { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" }, ] [[package]] From 889f2d257ba15a61339de924fb6a67a6fefe6516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 11:15:41 +0700 Subject: [PATCH 44/54] Fix IDE warnings --- app/services/lmdb.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 93c7723..dec148b 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -52,7 +52,7 @@ def _hash_message(message: Message) -> str: func["arguments"] = orjson.dumps( parsed, option=orjson.OPT_SORT_KEYS ).decode("utf-8") - except Exception: + except orjson.JSONDecodeError: pass message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS) @@ -175,7 +175,7 @@ def store( value = orjson.dumps(conv.model_dump(mode="json")) try: - with self._get_transaction(write=True) as txn: + with self._get_transaction(self, write=True) as txn: # Store main data txn.put(storage_key.encode("utf-8"), value, overwrite=True) @@ -203,7 +203,7 @@ def get(self, key: str) -> Optional[ConversationInStore]: Conversation or None if not found """ try: - with self._get_transaction(write=False) as txn: + with self._get_transaction(self, write=False) as txn: data = txn.get(key.encode("utf-8"), default=None) if not data: return None @@ -255,7 +255,7 @@ def _find_by_message_list( key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}" try: - with self._get_transaction(write=False) as txn: + with self._get_transaction(self, write=False) as txn: if mapped := txn.get(key.encode("utf-8")): # type: ignore return self.get(mapped.decode("utf-8")) # type: ignore except Exception as e: @@ -279,7 +279,7 @@ def exists(self, key: str) -> bool: bool: True if key exists, False otherwise """ try: - with self._get_transaction(write=False) as txn: + with self._get_transaction(self, write=False) as txn: return txn.get(key.encode("utf-8")) is not None except Exception as e: logger.error(f"Failed to check existence of key {key}: {e}") @@ -296,7 +296,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]: ConversationInStore: The deleted conversation data, or None if not found """ try: - with self._get_transaction(write=True) as txn: + with self._get_transaction(self, write=True) as txn: # Get data first to clean up hash mapping data = txn.get(key.encode("utf-8")) if not data: @@ -333,7 +333,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: """ keys = [] try: - with self._get_transaction(write=False) as txn: + with self._get_transaction(self, write=False) as txn: cursor = txn.cursor() cursor.first() @@ -377,7 +377,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: expired_entries: list[tuple[str, ConversationInStore]] = [] try: - with self._get_transaction(write=False) as txn: + with self._get_transaction(self, write=False) as txn: cursor = txn.cursor() for key_bytes, value_bytes in cursor: @@ -407,7 +407,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: removed = 0 try: - with self._get_transaction(write=True) as txn: + with self._get_transaction(self, write=True) as txn: for key_str, conv in expired_entries: key_bytes = key_str.encode("utf-8") if not txn.delete(key_bytes): From 66b62020330e690499ef386e81cee52dc0f97cce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 11:26:16 +0700 Subject: [PATCH 45/54] Incorrect IDE warnings --- app/services/lmdb.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index dec148b..c8e78a9 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -175,7 +175,7 @@ def store( value = orjson.dumps(conv.model_dump(mode="json")) try: - with self._get_transaction(self, write=True) as txn: + with self._get_transaction(write=True) as txn: # Store main data txn.put(storage_key.encode("utf-8"), value, overwrite=True) @@ -203,7 +203,7 @@ def get(self, key: str) -> Optional[ConversationInStore]: Conversation or None if not found """ try: - with self._get_transaction(self, write=False) as txn: + with self._get_transaction(write=False) as txn: data = txn.get(key.encode("utf-8"), default=None) if not data: return None @@ -255,7 +255,7 @@ def _find_by_message_list( key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}" try: - with self._get_transaction(self, write=False) as txn: + with self._get_transaction(write=False) as txn: if mapped := txn.get(key.encode("utf-8")): # type: ignore return self.get(mapped.decode("utf-8")) # type: ignore except Exception as e: @@ -279,7 +279,7 @@ def exists(self, key: str) -> bool: bool: True if key exists, False otherwise """ try: - with self._get_transaction(self, write=False) as txn: + with self._get_transaction(write=False) as txn: return txn.get(key.encode("utf-8")) is not None except Exception as e: logger.error(f"Failed to check existence of key {key}: {e}") @@ -296,7 +296,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]: ConversationInStore: The deleted conversation data, or None if not found """ try: - with self._get_transaction(self, write=True) as txn: + with self._get_transaction(write=True) as txn: # Get data first to clean up hash mapping data = txn.get(key.encode("utf-8")) if not data: @@ -333,7 +333,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: """ keys = [] try: - with self._get_transaction(self, write=False) as txn: + with self._get_transaction(write=False) as txn: cursor = txn.cursor() cursor.first() @@ -377,7 +377,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: expired_entries: list[tuple[str, ConversationInStore]] = [] try: - with self._get_transaction(self, write=False) as txn: + with self._get_transaction(write=False) as txn: cursor = txn.cursor() for key_bytes, value_bytes in cursor: @@ -407,7 +407,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: removed = 0 try: - with self._get_transaction(self, write=True) as txn: + with self._get_transaction(write=True) as txn: for key_str, conv in expired_entries: key_bytes = key_str.encode("utf-8") if not txn.delete(key_bytes): From 3297f534f035f869bd7e4a867618b39bc7256f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 12:05:26 +0700 Subject: [PATCH 46/54] Refactor: Modify the LMDB store to fix issues where no conversation is found. --- app/services/lmdb.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index c8e78a9..a55d3a9 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -476,9 +476,7 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: if isinstance(msg.content, str): normalized_content = LMDBConversationStore.remove_think_tags(msg.content) if normalized_content != msg.content: - cleaned_msg = Message( - role=msg.role, content=normalized_content, name=msg.name - ) + cleaned_msg = msg.model_copy(update={"content": normalized_content}) cleaned_messages.append(cleaned_msg) else: cleaned_messages.append(msg) From 5399b260595e77d6c1f0a8d24a880c59d165a57b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 12:06:52 +0700 Subject: [PATCH 47/54] Refactor: Centralized the mapping of the 'developer' role to 'system' for better Gemini compatibility. --- app/models/models.py | 7 +++++++ app/server/chat.py | 6 +----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index c27e024..63ddb94 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -29,6 +29,13 @@ class Message(BaseModel): audio: Optional[Dict[str, Any]] = None annotations: List[Dict[str, Any]] = Field(default_factory=list) + @model_validator(mode="after") + def normalize_role(self) -> "Message": + """Normalize 'developer' role to 'system' for Gemini compatibility.""" + if self.role == "developer": + self.role = "system" + return self + class Choice(BaseModel): """Choice model""" diff --git a/app/server/chat.py b/app/server/chat.py index a9d9dec..66a2720 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -319,8 +319,6 @@ def _response_items_to_messages( normalized_input: list[ResponseInputItem] = [] for item in items: role = item.role - if role == "developer": - role = "system" content = item.content normalized_contents: list[ResponseInputContent] = [] @@ -394,8 +392,6 @@ def _instructions_to_messages( continue role = item.role - if role == "developer": - role = "system" content = item.content if isinstance(content, str): @@ -1054,7 +1050,7 @@ async def _find_reusable_session( while search_end >= 2: search_history = messages[:search_end] - # Only try to match if the last stored message would be assistant/system. + # Only try to match if the last stored message would be assistant/system before querying LMDB. if search_history[-1].role in {"assistant", "system"}: try: if conv := db.find(model.model_name, search_history): From de01c7850fa44f4dcbd8f31c47bccaf301861a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 13:04:31 +0700 Subject: [PATCH 48/54] Refactor: Modify the LMDB store to fix issues where no conversation is found. --- app/models/models.py | 1 + app/services/lmdb.py | 95 +++++++++++++++++++++++++------------------- app/utils/helper.py | 10 +++-- 3 files changed, 63 insertions(+), 43 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 63ddb94..4072b29 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -24,6 +24,7 @@ class Message(BaseModel): content: Union[str, List[ContentItem], None] = None name: Optional[str] = None tool_calls: Optional[List["ToolCall"]] = None + tool_call_id: Optional[str] = None refusal: Optional[str] = None reasoning_content: Optional[str] = None audio: Optional[Dict[str, Any]] = None diff --git a/app/services/lmdb.py b/app/services/lmdb.py index a55d3a9..594acf0 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -15,53 +15,69 @@ def _hash_message(message: Message) -> str: - """Generate a hash for a single message.""" - # Convert message to dict and sort keys for consistent hashing - message_dict = message.model_dump(mode="json") - - # Normalize content: empty string -> None - content = message_dict.get("content") - if content == "": - message_dict["content"] = None + """Generate a consistent hash for a single message focusing only on core identity fields.""" + # Pick only fields that define the message in a conversation history + core_data = { + "role": message.role, + "name": message.name, + "tool_call_id": message.tool_call_id, + } + + # Normalize content: strip, handle empty/None, and list-of-text items + content = message.content + if not content: + core_data["content"] = None + elif isinstance(content, str): + stripped = content.strip() + core_data["content"] = stripped if stripped else None elif isinstance(content, list): - is_pure_text = True text_parts = [] for item in content: - if not isinstance(item, dict) or item.get("type") != "text": - is_pure_text = False + if isinstance(item, ContentItem) and item.type == "text": + text_parts.append(item.text or "") + elif isinstance(item, dict) and item.get("type") == "text": + text_parts.append(item.get("text") or "") + else: + # If it contains non-text (images/files), keep the full list for hashing + text_parts = None break - text_parts.append(item.get("text") or "") - - if is_pure_text: - text_content = "".join(text_parts) - message_dict["content"] = text_content if text_content else None - - # Normalize tool_calls: empty list -> None, and canonicalize arguments - tool_calls = message_dict.get("tool_calls") - if not tool_calls: - message_dict["tool_calls"] = None - elif isinstance(tool_calls, list): - for tool_call in tool_calls: - if isinstance(tool_call, dict) and "function" in tool_call: - func = tool_call["function"] - args = func.get("arguments") - if isinstance(args, str): - try: - # Parse and re-dump to canonicalize (remove extra whitespace, sort keys) - parsed = orjson.loads(args) - func["arguments"] = orjson.dumps( - parsed, option=orjson.OPT_SORT_KEYS - ).decode("utf-8") - except orjson.JSONDecodeError: - pass - - message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS) + + if text_parts is not None: + text_content = "".join(text_parts).strip() + core_data["content"] = text_content if text_content else None + else: + core_data["content"] = message.model_dump(mode="json")["content"] + + # Normalize tool_calls: canonicalize arguments and sort by name if multiple calls exist + if message.tool_calls: + calls_data = [] + for tc in message.tool_calls: + args = tc.function.arguments or "{}" + try: + parsed = orjson.loads(args) + canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8") + except orjson.JSONDecodeError: + canon_args = args + + calls_data.append( + { + "id": tc.id, # Deterministic IDs ensure this is stable + "name": tc.function.name, + "arguments": canon_args, + } + ) + # Sort calls to be order-independent + calls_data.sort(key=lambda x: (x["name"], x["arguments"])) + core_data["tool_calls"] = calls_data + else: + core_data["tool_calls"] = None + + message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) return hashlib.sha256(message_bytes).hexdigest() def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: - """Generate a hash for a list of messages and client id.""" - # Create a combined hash from all individual message hashes + """Generate a hash for a list of messages and model name, tied to a specific client_id.""" combined_hash = hashlib.sha256() combined_hash.update(client_id.encode("utf-8")) combined_hash.update(model.encode("utf-8")) @@ -252,7 +268,6 @@ def _find_by_message_list( """Internal find implementation based on a message list.""" for c in g_config.gemini.clients: message_hash = _hash_conversation(c.id, model, messages) - key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}" try: with self._get_transaction(write=False) as txn: diff --git a/app/utils/helper.py b/app/utils/helper.py index 1dc518f..239b7f4 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -1,9 +1,9 @@ import base64 +import hashlib import mimetypes import re import struct import tempfile -import uuid from pathlib import Path from typing import Iterator from urllib.parse import urlparse @@ -222,13 +222,17 @@ def _create_tool_call(name: str, raw_args: str) -> None: arguments = raw_args try: parsed_args = orjson.loads(raw_args) - arguments = orjson.dumps(parsed_args).decode("utf-8") + arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8") except orjson.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") + # Generate a deterministic ID based on name and arguments to avoid hash mismatch in LMDB + seed = f"{name}:{arguments}".encode("utf-8") + call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}" + tool_calls.append( ToolCall( - id=f"call_{uuid.uuid4().hex}", + id=call_id, type="function", function=FunctionCall(name=name, arguments=arguments), ) From 196414755e860f1f6d9c840954eb45c53225a864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 13:26:58 +0700 Subject: [PATCH 49/54] Refactor: Modify the LMDB store to fix issues where no conversation is found. --- app/server/chat.py | 10 +++++++++- app/services/lmdb.py | 7 ++----- app/utils/helper.py | 13 +++++++------ 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 66a2720..7c683cd 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1047,6 +1047,8 @@ async def _find_reusable_session( # Start with the full history and iteratively trim from the end. search_end = len(messages) + logger.debug(f"Searching for reusable session in history of length {search_end}...") + while search_end >= 2: search_history = messages[:search_end] @@ -1057,14 +1059,20 @@ async def _find_reusable_session( client = await pool.acquire(conv.client_id) session = client.start_chat(metadata=conv.metadata, model=model) remain = messages[search_end:] + logger.debug( + f"Match found at prefix length {search_end}. Client: {conv.client_id}" + ) return session, client, remain except Exception as e: - logger.warning(f"Error checking LMDB for reusable session: {e}") + logger.warning( + f"Error checking LMDB for reusable session at length {search_end}: {e}" + ) break # Trim one message and try again. search_end -= 1 + logger.debug("No reusable session found after checking all possible prefixes.") return None, None, messages diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 594acf0..5aefa4b 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -15,12 +15,10 @@ def _hash_message(message: Message) -> str: - """Generate a consistent hash for a single message focusing only on core identity fields.""" - # Pick only fields that define the message in a conversation history + """Generate a consistent hash for a single message focusing ONLY on logic/content, ignoring technical IDs.""" core_data = { "role": message.role, "name": message.name, - "tool_call_id": message.tool_call_id, } # Normalize content: strip, handle empty/None, and list-of-text items @@ -48,7 +46,7 @@ def _hash_message(message: Message) -> str: else: core_data["content"] = message.model_dump(mode="json")["content"] - # Normalize tool_calls: canonicalize arguments and sort by name if multiple calls exist + # Normalize tool_calls: Focus ONLY on function name and arguments if message.tool_calls: calls_data = [] for tc in message.tool_calls: @@ -61,7 +59,6 @@ def _hash_message(message: Message) -> str: calls_data.append( { - "id": tc.id, # Deterministic IDs ensure this is stable "name": tc.function.name, "arguments": canon_args, } diff --git a/app/utils/helper.py b/app/utils/helper.py index 239b7f4..ecf4a47 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -213,7 +213,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: tool_calls: list[ToolCall] = [] - def _create_tool_call(name: str, raw_args: str) -> None: + def _create_tool_call(name: str, raw_args: str, index: int) -> None: """Helper to parse args and append to the tool_calls list.""" if not name: logger.warning("Encountered tool_call without a function name.") @@ -226,8 +226,8 @@ def _create_tool_call(name: str, raw_args: str) -> None: except orjson.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") - # Generate a deterministic ID based on name and arguments to avoid hash mismatch in LMDB - seed = f"{name}:{arguments}".encode("utf-8") + # Generate a deterministic ID based on name, arguments, and index to avoid collisions + seed = f"{name}:{arguments}:{index}".encode("utf-8") call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}" tool_calls.append( @@ -244,11 +244,11 @@ def _replace_block(match: re.Match[str]) -> str: return match.group(0) found_in_block = False - for call_match in TOOL_CALL_RE.finditer(block_content): + for i, call_match in enumerate(TOOL_CALL_RE.finditer(block_content)): found_in_block = True name = (call_match.group(1) or "").strip() raw_args = (call_match.group(2) or "").strip() - _create_tool_call(name, raw_args) + _create_tool_call(name, raw_args, i) if found_in_block: return "" @@ -258,9 +258,10 @@ def _replace_block(match: re.Match[str]) -> str: cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) def _replace_orphan(match: re.Match[str]) -> str: + # Note: orphan calls are handled with a fallback index if they appear outside blocks name = (match.group(1) or "").strip() raw_args = (match.group(2) or "").strip() - _create_tool_call(name, raw_args) + _create_tool_call(name, raw_args, len(tool_calls)) return "" cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned) From 8c5c7498230bc680bf50464dacf0b6f001888981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 13:42:09 +0700 Subject: [PATCH 50/54] Refactor: Modify the LMDB store to fix issues where no conversation is found. --- app/server/chat.py | 4 ++-- app/services/lmdb.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 7c683cd..0d64b71 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1052,8 +1052,8 @@ async def _find_reusable_session( while search_end >= 2: search_history = messages[:search_end] - # Only try to match if the last stored message would be assistant/system before querying LMDB. - if search_history[-1].role in {"assistant", "system"}: + # Only try to match if the last stored message would be assistant/system/tool before querying LMDB. + if search_history[-1].role in {"assistant", "system", "tool"}: try: if conv := db.find(model.model_name, search_history): client = await pool.acquire(conv.client_id) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 5aefa4b..c612d9e 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -70,7 +70,11 @@ def _hash_message(message: Message) -> str: core_data["tool_calls"] = None message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) - return hashlib.sha256(message_bytes).hexdigest() + msg_hash = hashlib.sha256(message_bytes).hexdigest() + logger.debug( + f"Hashing message (role={message.role}): {message_bytes.decode('utf-8')} -> {msg_hash}" + ) + return msg_hash def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: From ce67d664b5443726fe518aee1cc9ef550ae640fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 14:41:55 +0700 Subject: [PATCH 51/54] Refactor: Avoid reusing an existing chat session if its idle time exceeds METADATA_TTL_MINUTES. --- app/server/chat.py | 14 ++++++++++++-- app/services/lmdb.py | 9 ++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 0d64b71..6fbb818 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -58,6 +58,7 @@ # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')" +METADATA_TTL_MINUTES = 20 router = APIRouter() @@ -1047,7 +1048,6 @@ async def _find_reusable_session( # Start with the full history and iteratively trim from the end. search_end = len(messages) - logger.debug(f"Searching for reusable session in history of length {search_end}...") while search_end >= 2: search_history = messages[:search_end] @@ -1056,6 +1056,17 @@ async def _find_reusable_session( if search_history[-1].role in {"assistant", "system", "tool"}: try: if conv := db.find(model.model_name, search_history): + # Check if metadata is too old + now = datetime.now() + updated_at = conv.updated_at or conv.created_at or now + age_minutes = (now - updated_at).total_seconds() / 60 + + if age_minutes > METADATA_TTL_MINUTES: + logger.debug( + f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse." + ) + break + client = await pool.acquire(conv.client_id) session = client.start_chat(metadata=conv.metadata, model=model) remain = messages[search_end:] @@ -1072,7 +1083,6 @@ async def _find_reusable_session( # Trim one message and try again. search_end -= 1 - logger.debug("No reusable session found after checking all possible prefixes.") return None, None, messages diff --git a/app/services/lmdb.py b/app/services/lmdb.py index c612d9e..424b357 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -68,13 +68,8 @@ def _hash_message(message: Message) -> str: core_data["tool_calls"] = calls_data else: core_data["tool_calls"] = None - - message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) - msg_hash = hashlib.sha256(message_bytes).hexdigest() - logger.debug( - f"Hashing message (role={message.role}): {message_bytes.decode('utf-8')} -> {msg_hash}" - ) - return msg_hash + message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) + return hashlib.sha256(message_bytes).hexdigest() def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: From 3d32d1226b1399f4286aadd95b2c4a52228fac45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 14:58:58 +0700 Subject: [PATCH 52/54] Refactor: Update the LMDB store to resolve issues preventing conversation from being saved --- app/services/lmdb.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 424b357..2dbe7b2 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -68,15 +68,16 @@ def _hash_message(message: Message) -> str: core_data["tool_calls"] = calls_data else: core_data["tool_calls"] = None - message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) - return hashlib.sha256(message_bytes).hexdigest() + + message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) + return hashlib.sha256(message_bytes).hexdigest() def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: """Generate a hash for a list of messages and model name, tied to a specific client_id.""" combined_hash = hashlib.sha256() - combined_hash.update(client_id.encode("utf-8")) - combined_hash.update(model.encode("utf-8")) + combined_hash.update((client_id or "").encode("utf-8")) + combined_hash.update((model or "").encode("utf-8")) for message in messages: message_hash = _hash_message(message) combined_hash.update(message_hash.encode("utf-8")) From 2eb9f05142ddfa1cb665b248f3faf2e278b619c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 17:57:04 +0700 Subject: [PATCH 53/54] Refactor: Update the _prepare_messages_for_model helper to omit the system instruction when reusing a session to save tokens. --- app/server/chat.py | 66 +++++++++++++++++++++++++---------------- app/services/lmdb.py | 70 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 96 insertions(+), 40 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 6fbb818..646f4fa 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -58,7 +58,7 @@ # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')" -METADATA_TTL_MINUTES = 20 +METADATA_TTL_MINUTES = 15 router = APIRouter() @@ -268,31 +268,35 @@ def _prepare_messages_for_model( tools: list[Tool] | None, tool_choice: str | ToolChoiceFunction | None, extra_instructions: list[str] | None = None, + inject_system_defaults: bool = True, ) -> list[Message]: """Return a copy of messages enriched with tool instructions when needed.""" prepared = [msg.model_copy(deep=True) for msg in source_messages] instructions: list[str] = [] - if tools: - tool_prompt = _build_tool_prompt(tools, tool_choice) - if tool_prompt: - instructions.append(tool_prompt) - - if extra_instructions: - instructions.extend(instr for instr in extra_instructions if instr) - logger.debug( - f"Applied {len(extra_instructions)} extra instructions for tool/structured output." - ) + if inject_system_defaults: + if tools: + tool_prompt = _build_tool_prompt(tools, tool_choice) + if tool_prompt: + instructions.append(tool_prompt) + + if extra_instructions: + instructions.extend(instr for instr in extra_instructions if instr) + logger.debug( + f"Applied {len(extra_instructions)} extra instructions for tool/structured output." + ) - if not _conversation_has_code_hint(prepared): - instructions.append(CODE_BLOCK_HINT) - logger.debug("Injected default code block hint for Gemini conversation.") + if not _conversation_has_code_hint(prepared): + instructions.append(CODE_BLOCK_HINT) + logger.debug("Injected default code block hint for Gemini conversation.") if not instructions: + # Still need to ensure XML hint for the last user message if tools are present + if tools and tool_choice != "none": + _append_xml_hint_to_last_user_message(prepared) return prepared combined_instructions = "\n\n".join(instructions) - if prepared and prepared[0].role == "system" and isinstance(prepared[0].content, str): existing = prepared[0].content or "" separator = "\n\n" if existing else "" @@ -530,8 +534,14 @@ async def create_chat_completion( ) if session: + # Optimization: When reusing a session, we don't need to resend the heavy tool definitions + # or structured output instructions as they are already in the Gemini session history. messages_to_send = _prepare_messages_for_model( - remaining_messages, request.tools, request.tool_choice, extra_instructions + remaining_messages, + request.tools, + request.tool_choice, + extra_instructions, + inject_system_defaults=False, ) if not messages_to_send: raise HTTPException( @@ -642,17 +652,20 @@ async def create_chat_completion( # After formatting, persist the conversation to LMDB try: - last_message = Message( + current_assistant_message = Message( role="assistant", content=storage_output or None, tool_calls=tool_calls or None, ) - cleaned_history = db.sanitize_assistant_messages(request.messages) + # Sanitize the entire history including the new message to ensure consistency + full_history = [*request.messages, current_assistant_message] + cleaned_history = db.sanitize_assistant_messages(full_history) + conv = ConversationInStore( model=model.model_name, client_id=client.id, metadata=session.metadata, - messages=[*cleaned_history, last_message], + messages=cleaned_history, ) key = db.store(conv) logger.debug(f"Conversation saved to LMDB with key: {key}") @@ -780,9 +793,10 @@ async def _build_payload( if reuse_session: messages_to_send = _prepare_messages_for_model( remaining_messages, - tools=None, - tool_choice=None, - extra_instructions=extra_instructions or None, + tools=request_data.tools, # Keep for XML hint logic + tool_choice=request_data.tool_choice, + extra_instructions=None, # Already in session history + inject_system_defaults=False, ) if not messages_to_send: raise HTTPException( @@ -994,17 +1008,19 @@ async def _build_payload( ) try: - last_message = Message( + current_assistant_message = Message( role="assistant", content=storage_output or None, tool_calls=detected_tool_calls or None, ) - cleaned_history = db.sanitize_assistant_messages(messages) + full_history = [*messages, current_assistant_message] + cleaned_history = db.sanitize_assistant_messages(full_history) + conv = ConversationInStore( model=model.model_name, client_id=client.id, metadata=session.metadata, - messages=[*cleaned_history, last_message], + messages=cleaned_history, ) key = db.store(conv) logger.debug(f"Conversation saved to LMDB with key: {key}") diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 2dbe7b2..f4c9938 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -11,6 +11,7 @@ from ..models import ContentItem, ConversationInStore, Message from ..utils import g_config +from ..utils.helper import extract_tool_calls, remove_tool_call_blocks from ..utils.singleton import Singleton @@ -26,8 +27,9 @@ def _hash_message(message: Message) -> str: if not content: core_data["content"] = None elif isinstance(content, str): - stripped = content.strip() - core_data["content"] = stripped if stripped else None + # Normalize line endings and strip whitespace + normalized = content.replace("\r\n", "\n").strip() + core_data["content"] = normalized if normalized else None elif isinstance(content, list): text_parts = [] for item in content: @@ -41,7 +43,7 @@ def _hash_message(message: Message) -> str: break if text_parts is not None: - text_content = "".join(text_parts).strip() + text_content = "".join(text_parts).replace("\r\n", "\n").strip() core_data["content"] = text_content if text_content else None else: core_data["content"] = message.model_dump(mode="json")["content"] @@ -260,7 +262,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt return None def _find_by_message_list( - self, model: str, messages: List[Message] + self, + model: str, + messages: List[Message], ) -> Optional[ConversationInStore]: """Internal find implementation based on a message list.""" for c in g_config.gemini.clients: @@ -471,40 +475,76 @@ def __del__(self): @staticmethod def remove_think_tags(text: str) -> str: """ - Remove ... tags at the start of text and strip whitespace. + Remove all ... tags and strip whitespace. """ - cleaned_content = re.sub(r"^(\s*.*?\n?)", "", text, flags=re.DOTALL) + # Remove all think blocks anywhere in the text + cleaned_content = re.sub(r".*?", "", text, flags=re.DOTALL) return cleaned_content.strip() @staticmethod def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: """ - Create a new list of messages with assistant content cleaned of tags. - This is useful for store the chat history. + Create a new list of messages with assistant content cleaned of tags + and system hints/tool call blocks. This is used for both storing and + searching chat history to ensure consistency. + + If a message has no tool_calls but contains tool call XML blocks in its + content, they will be extracted and moved to the tool_calls field. """ cleaned_messages = [] for msg in messages: if msg.role == "assistant": if isinstance(msg.content, str): - normalized_content = LMDBConversationStore.remove_think_tags(msg.content) - if normalized_content != msg.content: - cleaned_msg = msg.model_copy(update={"content": normalized_content}) + text = LMDBConversationStore.remove_think_tags(msg.content) + tool_calls = msg.tool_calls + if not tool_calls: + text, tool_calls = extract_tool_calls(text) + else: + text = remove_tool_call_blocks(text).strip() + + normalized_content = text.strip() + + if normalized_content != msg.content or tool_calls != msg.tool_calls: + cleaned_msg = msg.model_copy( + update={ + "content": normalized_content or None, + "tool_calls": tool_calls or None, + } + ) cleaned_messages.append(cleaned_msg) else: cleaned_messages.append(msg) elif isinstance(msg.content, list): new_content = [] + all_extracted_calls = list(msg.tool_calls or []) changed = False + for item in msg.content: if isinstance(item, ContentItem) and item.type == "text" and item.text: - cleaned_text = LMDBConversationStore.remove_think_tags(item.text) - if cleaned_text != item.text: + text = LMDBConversationStore.remove_think_tags(item.text) + + if not msg.tool_calls: + text, extracted = extract_tool_calls(text) + if extracted: + all_extracted_calls.extend(extracted) + changed = True + else: + text = remove_tool_call_blocks(text).strip() + + if text != item.text: changed = True - item = item.model_copy(update={"text": cleaned_text}) + item = item.model_copy(update={"text": text.strip() or None}) new_content.append(item) if changed: - cleaned_messages.append(msg.model_copy(update={"content": new_content})) + cleaned_messages.append( + msg.model_copy( + update={ + "content": new_content, + "tool_calls": all_extracted_calls or None, + } + ) + ) else: cleaned_messages.append(msg) else: From ade61d6826af1f256e7141ab6c1815b047cf8744 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 26 Jan 2026 11:01:41 +0700 Subject: [PATCH 54/54] Refactor: Modify the logic to convert a large prompt into a temporary text file attachment - When multiple chunks are sent simultaneously, Google will immediately invalidate the access token and reject the request - When a prompt contains a structured format like JSON, splitting it can break the format and may cause the model to misunderstand the context - Another minor tweak as Copilot suggested --- app/server/chat.py | 104 ++++++++++++++++--------------------------- app/services/lmdb.py | 5 ++- app/utils/helper.py | 13 +++--- 3 files changed, 49 insertions(+), 73 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 646f4fa..063d4d4 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1,7 +1,6 @@ -import asyncio import base64 -import random import re +import tempfile import uuid from dataclasses import dataclass from datetime import datetime, timezone @@ -375,9 +374,7 @@ def _response_items_to_messages( ResponseInputItem(type="message", role=item.role, content=normalized_contents or []) ) - logger.debug( - f"Normalized Responses input: {len(normalized_input)} message items (developer roles mapped to system)." - ) + logger.debug(f"Normalized Responses input: {len(normalized_input)} message items.") return messages, normalized_input @@ -1077,19 +1074,18 @@ async def _find_reusable_session( updated_at = conv.updated_at or conv.created_at or now age_minutes = (now - updated_at).total_seconds() / 60 - if age_minutes > METADATA_TTL_MINUTES: + if age_minutes <= METADATA_TTL_MINUTES: + client = await pool.acquire(conv.client_id) + session = client.start_chat(metadata=conv.metadata, model=model) + remain = messages[search_end:] + logger.debug( + f"Match found at prefix length {search_end}. Client: {conv.client_id}" + ) + return session, client, remain + else: logger.debug( f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse." ) - break - - client = await pool.acquire(conv.client_id) - session = client.start_chat(metadata=conv.metadata, model=model) - remain = messages[search_end:] - logger.debug( - f"Match found at prefix length {search_end}. Client: {conv.client_id}" - ) - return session, client, remain except Exception as e: logger.warning( f"Error checking LMDB for reusable session at length {search_end}: {e}" @@ -1103,13 +1099,9 @@ async def _find_reusable_session( async def _send_with_split(session: ChatSession, text: str, files: list[Path | str] | None = None): - """Send text to Gemini, automatically splitting into multiple batches if it is - longer than ``MAX_CHARS_PER_REQUEST``. - - Every intermediate batch (that is **not** the last one) is suffixed with a hint - telling Gemini that more content will come, and it should simply reply with - "ok". The final batch carries any file uploads and the real user prompt so - that Gemini can produce the actual answer. + """ + Send text to Gemini. If text is longer than ``MAX_CHARS_PER_REQUEST``, + it is converted into a temporary text file attachment to avoid splitting issues. """ if len(text) <= MAX_CHARS_PER_REQUEST: try: @@ -1118,55 +1110,37 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s logger.exception(f"Error sending message to Gemini: {e}") raise - hint_len = len(CONTINUATION_HINT) - safe_chunk_size = MAX_CHARS_PER_REQUEST - hint_len - - chunks: list[str] = [] - pos = 0 - total = len(text) - - while pos < total: - remaining = total - pos - if remaining <= MAX_CHARS_PER_REQUEST: - chunks.append(text[pos:]) - break - - end = pos + safe_chunk_size - slice_candidate = text[pos:end] - # Try to find a safe split point - split_idx = -1 - idx = slice_candidate.rfind("\n") - if idx != -1: - split_idx = idx - - if split_idx != -1: - split_at = pos + split_idx + 1 - else: - split_at = end + logger.info( + f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment." + ) - chunk = text[pos:split_at] + CONTINUATION_HINT - chunks.append(chunk) - pos = split_at + # Create a temporary directory to hold the message.txt file + # This ensures the filename is exactly 'message.txt' as expected by the instruction. + with tempfile.TemporaryDirectory() as tmpdirname: + temp_file_path = Path(tmpdirname) / "message.txt" + temp_file_path.write_text(text, encoding="utf-8") - chunks_size = len(chunks) - for i, chk in enumerate(chunks[:-1]): try: - logger.debug(f"Sending chunk {i + 1}/{chunks_size}...") - await session.send_message(chk) - delay = random.uniform(1.0, 3.0) - logger.debug(f"Sleeping for {delay:.2f}s...") - await asyncio.sleep(delay) + # Prepare the files list + final_files = list(files) if files else [] + final_files.append(temp_file_path) + + instruction = ( + "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n" + "**System Instruction:**\n" + "1. Read the content of `message.txt`.\n" + "2. Treat that content as the **primary** user prompt for this turn.\n" + "3. Execute the instructions or answer the questions found *inside* that file immediately.\n" + ) + + logger.debug(f"Sending prompt as temporary file: {temp_file_path}") + + return await session.send_message(instruction, files=final_files) + except Exception as e: - logger.exception(f"Error sending chunk to Gemini: {e}") + logger.exception(f"Error sending large text as file to Gemini: {e}") raise - try: - logger.debug(f"Sending final chunk {chunks_size}/{chunks_size}...") - return await session.send_message(chunks[-1], files=files) - except Exception as e: - logger.exception(f"Error sending final chunk to Gemini: {e}") - raise - def _create_streaming_response( model_output: str, diff --git a/app/services/lmdb.py b/app/services/lmdb.py index f4c9938..c9d42cd 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -43,8 +43,9 @@ def _hash_message(message: Message) -> str: break if text_parts is not None: - text_content = "".join(text_parts).replace("\r\n", "\n").strip() - core_data["content"] = text_content if text_content else None + # Normalize each part but keep them as a list to preserve boundaries and avoid collisions + normalized_parts = [p.replace("\r\n", "\n") for p in text_parts] + core_data["content"] = normalized_parts if normalized_parts else None else: core_data["content"] = message.model_dump(mode="json")["content"] diff --git a/app/utils/helper.py b/app/utils/helper.py index ecf4a47..190b5ce 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -213,7 +213,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: tool_calls: list[ToolCall] = [] - def _create_tool_call(name: str, raw_args: str, index: int) -> None: + def _create_tool_call(name: str, raw_args: str) -> None: """Helper to parse args and append to the tool_calls list.""" if not name: logger.warning("Encountered tool_call without a function name.") @@ -226,7 +226,9 @@ def _create_tool_call(name: str, raw_args: str, index: int) -> None: except orjson.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") - # Generate a deterministic ID based on name, arguments, and index to avoid collisions + # Generate a deterministic ID based on name, arguments, and its global sequence index + # to ensure uniqueness across multiple fenced blocks while remaining stable for storage. + index = len(tool_calls) seed = f"{name}:{arguments}:{index}".encode("utf-8") call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}" @@ -244,11 +246,11 @@ def _replace_block(match: re.Match[str]) -> str: return match.group(0) found_in_block = False - for i, call_match in enumerate(TOOL_CALL_RE.finditer(block_content)): + for call_match in TOOL_CALL_RE.finditer(block_content): found_in_block = True name = (call_match.group(1) or "").strip() raw_args = (call_match.group(2) or "").strip() - _create_tool_call(name, raw_args, i) + _create_tool_call(name, raw_args) if found_in_block: return "" @@ -258,10 +260,9 @@ def _replace_block(match: re.Match[str]) -> str: cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) def _replace_orphan(match: re.Match[str]) -> str: - # Note: orphan calls are handled with a fallback index if they appear outside blocks name = (match.group(1) or "").strip() raw_args = (match.group(2) or "").strip() - _create_tool_call(name, raw_args, len(tool_calls)) + _create_tool_call(name, raw_args) return "" cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)