diff --git a/app/db/alembic/versions/20260321_120000_add_http_bridge_leases.py b/app/db/alembic/versions/20260321_120000_add_http_bridge_leases.py new file mode 100644 index 00000000..63421b56 --- /dev/null +++ b/app/db/alembic/versions/20260321_120000_add_http_bridge_leases.py @@ -0,0 +1,74 @@ +"""add http_bridge_leases table + +Revision ID: 20260321_120000_add_http_bridge_leases +Revises: 20260320_000000_add_request_log_requested_actual_tiers +Create Date: 2026-03-21 +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.engine import Connection + +# revision identifiers, used by Alembic. +revision = "20260321_120000_add_http_bridge_leases" +down_revision = "20260320_000000_add_request_log_requested_actual_tiers" +branch_labels = None +depends_on = None + + +def _table_exists(connection: Connection, table_name: str) -> bool: + inspector = sa.inspect(connection) + return inspector.has_table(table_name) + + +def _index_exists(connection: Connection, index_name: str, table_name: str) -> bool: + inspector = sa.inspect(connection) + if not inspector.has_table(table_name): + return False + return any(index["name"] == index_name for index in inspector.get_indexes(table_name)) + + +def upgrade() -> None: + bind = op.get_bind() + if not _table_exists(bind, "http_bridge_leases"): + op.create_table( + "http_bridge_leases", + sa.Column("session_id", sa.String(), primary_key=True), + sa.Column("affinity_kind", sa.String(), nullable=False), + sa.Column("affinity_key", sa.String(), nullable=False), + sa.Column("api_key_scope", sa.String(), nullable=False, server_default=sa.text("''")), + sa.Column("owner_instance_id", sa.String(), nullable=False), + sa.Column("lease_expires_at", sa.DateTime(), nullable=False), + sa.Column("account_id", sa.String(), nullable=True), + sa.Column("request_model", sa.String(), nullable=True), + sa.Column("codex_session", sa.Boolean(), nullable=False, server_default=sa.false()), + sa.Column("idle_ttl_seconds", sa.Float(), nullable=False), + sa.Column("upstream_turn_state", sa.String(), nullable=True), + sa.Column("downstream_turn_state", sa.String(), nullable=True), + sa.Column("created_at", sa.DateTime(), server_default=sa.func.now(), nullable=False), + sa.Column("updated_at", sa.DateTime(), server_default=sa.func.now(), nullable=False), + ) + if not _index_exists(bind, "ix_http_bridge_leases_owner_expires", "http_bridge_leases"): + op.create_index( + "ix_http_bridge_leases_owner_expires", + "http_bridge_leases", + ["owner_instance_id", "lease_expires_at"], + ) + if not _index_exists(bind, "ix_http_bridge_leases_expires", "http_bridge_leases"): + op.create_index( + "ix_http_bridge_leases_expires", + "http_bridge_leases", + ["lease_expires_at"], + ) + + +def downgrade() -> None: + bind = op.get_bind() + if _table_exists(bind, "http_bridge_leases"): + if _index_exists(bind, "ix_http_bridge_leases_expires", "http_bridge_leases"): + op.drop_index("ix_http_bridge_leases_expires", table_name="http_bridge_leases") + if _index_exists(bind, "ix_http_bridge_leases_owner_expires", "http_bridge_leases"): + op.drop_index("ix_http_bridge_leases_owner_expires", table_name="http_bridge_leases") + op.drop_table("http_bridge_leases") diff --git a/app/db/alembic/versions/20260322_000000_merge_http_bridge_lease_head.py b/app/db/alembic/versions/20260322_000000_merge_http_bridge_lease_head.py new file mode 100644 index 00000000..4fc5d63d --- /dev/null +++ b/app/db/alembic/versions/20260322_000000_merge_http_bridge_lease_head.py @@ -0,0 +1,25 @@ +"""merge http bridge lease head + +Revision ID: 20260322_000000_merge_http_bridge_lease_head +Revises: 20260321_120000_add_http_bridge_leases, 20260321_210000_merge_request_log_tiers_and_dashboard_index_heads +Create Date: 2026-03-22 +""" + +from __future__ import annotations + +# revision identifiers, used by Alembic. +revision = "20260322_000000_merge_http_bridge_lease_head" +down_revision = ( + "20260321_120000_add_http_bridge_leases", + "20260321_210000_merge_request_log_tiers_and_dashboard_index_heads", +) +branch_labels = None +depends_on = None + + +def upgrade() -> None: + pass + + +def downgrade() -> None: + pass diff --git a/app/db/alembic/versions/20260325_120000_enforce_http_bridge_lease_affinity_uniqueness.py b/app/db/alembic/versions/20260325_120000_enforce_http_bridge_lease_affinity_uniqueness.py new file mode 100644 index 00000000..b552d3d6 --- /dev/null +++ b/app/db/alembic/versions/20260325_120000_enforce_http_bridge_lease_affinity_uniqueness.py @@ -0,0 +1,73 @@ +"""enforce http bridge lease affinity uniqueness + +Revision ID: 20260325_120000_enforce_http_bridge_lease_affinity_uniqueness +Revises: 20260322_000000_merge_http_bridge_lease_head +Create Date: 2026-03-25 +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.engine import Connection + +# revision identifiers, used by Alembic. +revision = "20260325_120000_enforce_http_bridge_lease_affinity_uniqueness" +down_revision = "20260322_000000_merge_http_bridge_lease_head" +branch_labels = None +depends_on = None + + +def _table_exists(connection: Connection, table_name: str) -> bool: + inspector = sa.inspect(connection) + return inspector.has_table(table_name) + + +def _index_exists(connection: Connection, index_name: str, table_name: str) -> bool: + inspector = sa.inspect(connection) + if not inspector.has_table(table_name): + return False + return any(index["name"] == index_name for index in inspector.get_indexes(table_name)) + + +def upgrade() -> None: + bind = op.get_bind() + if not _table_exists(bind, "http_bridge_leases"): + return + op.execute( + sa.text( + """ + DELETE FROM http_bridge_leases + WHERE session_id IN ( + SELECT session_id + FROM ( + SELECT + session_id, + ROW_NUMBER() OVER ( + PARTITION BY affinity_kind, affinity_key, api_key_scope + ORDER BY lease_expires_at DESC, updated_at DESC, created_at DESC, session_id DESC + ) AS row_num + FROM http_bridge_leases + ) ranked_leases + WHERE ranked_leases.row_num > 1 + ) + """ + ) + ) + if not _index_exists(bind, "ux_http_bridge_leases_affinity_scope", "http_bridge_leases"): + op.create_index( + "ux_http_bridge_leases_affinity_scope", + "http_bridge_leases", + ["affinity_kind", "affinity_key", "api_key_scope"], + unique=True, + ) + + +def downgrade() -> None: + bind = op.get_bind() + if _table_exists(bind, "http_bridge_leases") and _index_exists( + bind, + "ux_http_bridge_leases_affinity_scope", + "http_bridge_leases", + ): + op.drop_index("ux_http_bridge_leases_affinity_scope", table_name="http_bridge_leases") diff --git a/app/db/models.py b/app/db/models.py index 085a18f1..63263679 100644 --- a/app/db/models.py +++ b/app/db/models.py @@ -157,6 +157,41 @@ class StickySession(Base): ) +class HttpBridgeLease(Base): + __tablename__ = "http_bridge_leases" + __table_args__ = ( + Index( + "ux_http_bridge_leases_affinity_scope", + "affinity_kind", + "affinity_key", + "api_key_scope", + unique=True, + ), + Index("ix_http_bridge_leases_owner_expires", "owner_instance_id", "lease_expires_at"), + Index("ix_http_bridge_leases_expires", "lease_expires_at"), + ) + + session_id: Mapped[str] = mapped_column(String, primary_key=True) + affinity_kind: Mapped[str] = mapped_column(String, nullable=False) + affinity_key: Mapped[str] = mapped_column(String, nullable=False) + api_key_scope: Mapped[str] = mapped_column(String, nullable=False, default="", server_default=text("''")) + owner_instance_id: Mapped[str] = mapped_column(String, nullable=False) + lease_expires_at: Mapped[datetime] = mapped_column(DateTime, nullable=False) + account_id: Mapped[str | None] = mapped_column(String, nullable=True) + request_model: Mapped[str | None] = mapped_column(String, nullable=True) + codex_session: Mapped[bool] = mapped_column(Boolean, default=False, server_default=false(), nullable=False) + idle_ttl_seconds: Mapped[float] = mapped_column(Float, nullable=False) + upstream_turn_state: Mapped[str | None] = mapped_column(String, nullable=True) + downstream_turn_state: Mapped[str | None] = mapped_column(String, nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), nullable=False) + updated_at: Mapped[datetime] = mapped_column( + DateTime, + server_default=func.now(), + onupdate=func.now(), + nullable=False, + ) + + class DashboardSettings(Base): __tablename__ = "dashboard_settings" diff --git a/app/dependencies.py b/app/dependencies.py index c25ef49c..985f180a 100644 --- a/app/dependencies.py +++ b/app/dependencies.py @@ -19,6 +19,7 @@ from app.modules.firewall.repository import FirewallRepository from app.modules.firewall.service import FirewallService from app.modules.oauth.service import OauthService +from app.modules.proxy.bridge_repository import HttpBridgeLeasesRepository from app.modules.proxy.repo_bundle import ProxyRepositories from app.modules.proxy.service import ProxyService from app.modules.proxy.sticky_repository import StickySessionsRepository @@ -151,6 +152,7 @@ async def _proxy_repo_context() -> AsyncIterator[ProxyRepositories]: usage=UsageRepository(session), request_logs=RequestLogsRepository(session), sticky_sessions=StickySessionsRepository(session), + http_bridge_leases=HttpBridgeLeasesRepository(session), api_keys=ApiKeysRepository(session), additional_usage=AdditionalUsageRepository(session), ) diff --git a/app/modules/proxy/api.py b/app/modules/proxy/api.py index 34b018d4..5a059e96 100644 --- a/app/modules/proxy/api.py +++ b/app/modules/proxy/api.py @@ -445,13 +445,9 @@ async def _stream_responses( rate_limit_headers = await context.service.rate_limit_headers() bridge_active = prefer_http_bridge and proxy_service_module.get_settings().http_responses_session_bridge_enabled downstream_turn_state = ( - proxy_service_module.ensure_http_downstream_turn_state(request.headers) if bridge_active else None - ) - turn_state_headers = ( - proxy_service_module.build_downstream_turn_state_response_headers(downstream_turn_state) - if downstream_turn_state is not None - else {} + proxy_service_module.requested_http_downstream_turn_state(request.headers) if bridge_active else None ) + turn_state_headers: dict[str, str] = {} payload.stream = True if prefer_http_bridge: stream = context.service.stream_http_responses( @@ -464,6 +460,7 @@ async def _stream_responses( api_key_reservation=reservation, suppress_text_done_events=suppress_text_done_events, downstream_turn_state=downstream_turn_state, + response_headers_out=turn_state_headers, ) else: stream = context.service.stream_responses( @@ -482,7 +479,7 @@ async def _stream_responses( return StreamingResponse( _prepend_first(None, stream), media_type="text/event-stream", - headers={"Cache-Control": "no-cache", **rate_limit_headers}, + headers={"Cache-Control": "no-cache", **turn_state_headers, **rate_limit_headers}, ) except ProxyResponseError as exc: await _release_reservation(reservation) @@ -490,7 +487,7 @@ async def _stream_responses( request, exc.status_code, exc.payload, - headers=rate_limit_headers, + headers={**turn_state_headers, **rate_limit_headers}, ) return StreamingResponse( _prepend_first(first, stream), @@ -521,13 +518,9 @@ async def _collect_responses( rate_limit_headers = await context.service.rate_limit_headers() bridge_active = prefer_http_bridge and proxy_service_module.get_settings().http_responses_session_bridge_enabled downstream_turn_state = ( - proxy_service_module.ensure_http_downstream_turn_state(request.headers) if bridge_active else None - ) - turn_state_headers = ( - proxy_service_module.build_downstream_turn_state_response_headers(downstream_turn_state) - if downstream_turn_state is not None - else {} + proxy_service_module.requested_http_downstream_turn_state(request.headers) if bridge_active else None ) + turn_state_headers: dict[str, str] = {} payload.stream = True if prefer_http_bridge: stream = context.service.stream_http_responses( @@ -540,6 +533,7 @@ async def _collect_responses( api_key_reservation=reservation, suppress_text_done_events=suppress_text_done_events, downstream_turn_state=downstream_turn_state, + response_headers_out=turn_state_headers, ) else: stream = context.service.stream_responses( @@ -561,7 +555,7 @@ async def _collect_responses( request, exc.status_code, error.model_dump(mode="json", exclude_none=True), - headers=rate_limit_headers, + headers={**turn_state_headers, **rate_limit_headers}, ) if isinstance(response_payload, OpenAIResponsePayload): if response_payload.status == "failed": diff --git a/app/modules/proxy/bridge_repository.py b/app/modules/proxy/bridge_repository.py new file mode 100644 index 00000000..4c67b5e7 --- /dev/null +++ b/app/modules/proxy/bridge_repository.py @@ -0,0 +1,292 @@ +from __future__ import annotations + +from datetime import datetime + +from sqlalchemy import delete, or_, select, update +from sqlalchemy.dialects.postgresql import insert as pg_insert +from sqlalchemy.dialects.sqlite import insert as sqlite_insert +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.sql import Insert, func + +from app.core.utils.time import to_utc_naive +from app.db.models import HttpBridgeLease + + +class HttpBridgeLeasesRepository: + def __init__(self, session: AsyncSession) -> None: + self._session = session + + async def get_by_session_id(self, session_id: str) -> HttpBridgeLease | None: + if not session_id: + return None + statement = select(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id) + result = await self._session.execute(statement) + return result.scalar_one_or_none() + + async def upsert( + self, + *, + session_id: str, + affinity_kind: str, + affinity_key: str, + api_key_scope: str, + owner_instance_id: str, + lease_expires_at: datetime, + account_id: str | None, + request_model: str | None, + codex_session: bool, + idle_ttl_seconds: float, + upstream_turn_state: str | None, + downstream_turn_state: str | None, + ) -> HttpBridgeLease: + statement = self._build_upsert_statement( + session_id=session_id, + affinity_kind=affinity_kind, + affinity_key=affinity_key, + api_key_scope=api_key_scope, + owner_instance_id=owner_instance_id, + lease_expires_at=lease_expires_at, + account_id=account_id, + request_model=request_model, + codex_session=codex_session, + idle_ttl_seconds=idle_ttl_seconds, + upstream_turn_state=upstream_turn_state, + downstream_turn_state=downstream_turn_state, + ) + await self._session.execute(statement) + await self._session.commit() + row = await self.get_by_session_id(session_id) + if row is None: + raise RuntimeError(f"HttpBridgeLease upsert failed for session_id={session_id!r}") + await self._session.refresh(row) + return row + + async def claim( + self, + *, + session_id: str, + affinity_kind: str, + affinity_key: str, + api_key_scope: str, + owner_instance_id: str, + lease_expires_at: datetime, + account_id: str | None, + request_model: str | None, + codex_session: bool, + idle_ttl_seconds: float, + upstream_turn_state: str | None, + downstream_turn_state: str | None, + replace_session_id: str | None, + expires_before: datetime, + ) -> HttpBridgeLease | None: + statement = self._build_claim_statement( + session_id=session_id, + affinity_kind=affinity_kind, + affinity_key=affinity_key, + api_key_scope=api_key_scope, + owner_instance_id=owner_instance_id, + lease_expires_at=lease_expires_at, + account_id=account_id, + request_model=request_model, + codex_session=codex_session, + idle_ttl_seconds=idle_ttl_seconds, + upstream_turn_state=upstream_turn_state, + downstream_turn_state=downstream_turn_state, + replace_session_id=replace_session_id, + expires_before=expires_before, + ) + result = await self._session.execute(statement.returning(HttpBridgeLease.session_id)) + await self._session.commit() + claimed_session_id = result.scalar_one_or_none() + if claimed_session_id != session_id: + return None + row = await self.get_by_session_id(session_id) + if row is None: + raise RuntimeError(f"HttpBridgeLease claim failed for session_id={session_id!r}") + await self._session.refresh(row) + return row + + async def delete(self, session_id: str) -> bool: + if not session_id: + return False + statement = delete(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id) + result = await self._session.execute(statement.returning(HttpBridgeLease.session_id)) + await self._session.commit() + return result.scalar_one_or_none() is not None + + async def delete_if_expires_at(self, session_id: str, *, lease_expires_at: datetime) -> bool: + if not session_id: + return False + statement = delete(HttpBridgeLease).where( + HttpBridgeLease.session_id == session_id, + HttpBridgeLease.lease_expires_at == to_utc_naive(lease_expires_at), + ) + result = await self._session.execute(statement.returning(HttpBridgeLease.session_id)) + await self._session.commit() + return result.scalar_one_or_none() is not None + + async def touch( + self, + session_id: str, + *, + affinity_kind: str, + affinity_key: str, + api_key_scope: str, + owner_instance_id: str, + lease_expires_at: datetime, + account_id: str | None, + request_model: str | None, + codex_session: bool, + idle_ttl_seconds: float, + upstream_turn_state: str | None, + downstream_turn_state: str | None, + ) -> bool: + if not session_id: + return False + statement = ( + update(HttpBridgeLease) + .where(HttpBridgeLease.session_id == session_id) + .values( + affinity_kind=affinity_kind, + affinity_key=affinity_key, + api_key_scope=api_key_scope, + owner_instance_id=owner_instance_id, + lease_expires_at=to_utc_naive(lease_expires_at), + account_id=account_id, + request_model=request_model, + codex_session=codex_session, + idle_ttl_seconds=idle_ttl_seconds, + upstream_turn_state=upstream_turn_state, + downstream_turn_state=downstream_turn_state, + updated_at=func.now(), + ) + .returning(HttpBridgeLease.session_id) + ) + result = await self._session.execute(statement) + await self._session.commit() + return result.scalar_one_or_none() is not None + + async def purge_expired(self, *, expires_before: datetime) -> int: + statement = delete(HttpBridgeLease).where(HttpBridgeLease.lease_expires_at < to_utc_naive(expires_before)) + result = await self._session.execute(statement.returning(HttpBridgeLease.session_id)) + deleted = len(result.scalars().all()) + await self._session.commit() + return deleted + + def _build_upsert_statement( + self, + *, + session_id: str, + affinity_kind: str, + affinity_key: str, + api_key_scope: str, + owner_instance_id: str, + lease_expires_at: datetime, + account_id: str | None, + request_model: str | None, + codex_session: bool, + idle_ttl_seconds: float, + upstream_turn_state: str | None, + downstream_turn_state: str | None, + ) -> Insert: + dialect = self._session.get_bind().dialect.name + if dialect == "postgresql": + insert_fn = pg_insert + elif dialect == "sqlite": + insert_fn = sqlite_insert + else: + raise RuntimeError(f"HttpBridgeLease upsert unsupported for dialect={dialect!r}") + statement = insert_fn(HttpBridgeLease).values( + session_id=session_id, + affinity_kind=affinity_kind, + affinity_key=affinity_key, + api_key_scope=api_key_scope, + owner_instance_id=owner_instance_id, + lease_expires_at=to_utc_naive(lease_expires_at), + account_id=account_id, + request_model=request_model, + codex_session=codex_session, + idle_ttl_seconds=idle_ttl_seconds, + upstream_turn_state=upstream_turn_state, + downstream_turn_state=downstream_turn_state, + ) + return statement.on_conflict_do_update( + index_elements=[HttpBridgeLease.session_id], + set_={ + "affinity_kind": affinity_kind, + "affinity_key": affinity_key, + "api_key_scope": api_key_scope, + "owner_instance_id": owner_instance_id, + "lease_expires_at": to_utc_naive(lease_expires_at), + "account_id": account_id, + "request_model": request_model, + "codex_session": codex_session, + "idle_ttl_seconds": idle_ttl_seconds, + "upstream_turn_state": upstream_turn_state, + "downstream_turn_state": downstream_turn_state, + "updated_at": func.now(), + }, + ) + + def _build_claim_statement( + self, + *, + session_id: str, + affinity_kind: str, + affinity_key: str, + api_key_scope: str, + owner_instance_id: str, + lease_expires_at: datetime, + account_id: str | None, + request_model: str | None, + codex_session: bool, + idle_ttl_seconds: float, + upstream_turn_state: str | None, + downstream_turn_state: str | None, + replace_session_id: str | None, + expires_before: datetime, + ) -> Insert: + dialect = self._session.get_bind().dialect.name + if dialect == "postgresql": + insert_fn = pg_insert + elif dialect == "sqlite": + insert_fn = sqlite_insert + else: + raise RuntimeError(f"HttpBridgeLease claim unsupported for dialect={dialect!r}") + statement = insert_fn(HttpBridgeLease).values( + session_id=session_id, + affinity_kind=affinity_kind, + affinity_key=affinity_key, + api_key_scope=api_key_scope, + owner_instance_id=owner_instance_id, + lease_expires_at=to_utc_naive(lease_expires_at), + account_id=account_id, + request_model=request_model, + codex_session=codex_session, + idle_ttl_seconds=idle_ttl_seconds, + upstream_turn_state=upstream_turn_state, + downstream_turn_state=downstream_turn_state, + ) + replace_condition = HttpBridgeLease.lease_expires_at < to_utc_naive(expires_before) + if replace_session_id is not None: + replace_condition = or_(replace_condition, HttpBridgeLease.session_id == replace_session_id) + return statement.on_conflict_do_update( + index_elements=[ + HttpBridgeLease.affinity_kind, + HttpBridgeLease.affinity_key, + HttpBridgeLease.api_key_scope, + ], + set_={ + "session_id": session_id, + "owner_instance_id": owner_instance_id, + "lease_expires_at": to_utc_naive(lease_expires_at), + "account_id": account_id, + "request_model": request_model, + "codex_session": codex_session, + "idle_ttl_seconds": idle_ttl_seconds, + "upstream_turn_state": upstream_turn_state, + "downstream_turn_state": downstream_turn_state, + "updated_at": func.now(), + }, + where=replace_condition, + ) diff --git a/app/modules/proxy/repo_bundle.py b/app/modules/proxy/repo_bundle.py index afa6508f..a30406e9 100644 --- a/app/modules/proxy/repo_bundle.py +++ b/app/modules/proxy/repo_bundle.py @@ -6,6 +6,7 @@ from app.modules.accounts.repository import AccountsRepository from app.modules.api_keys.repository import ApiKeysRepository +from app.modules.proxy.bridge_repository import HttpBridgeLeasesRepository from app.modules.proxy.sticky_repository import StickySessionsRepository from app.modules.request_logs.repository import RequestLogsRepository from app.modules.usage.repository import AdditionalUsageRepository, UsageRepository @@ -19,6 +20,7 @@ class ProxyRepositories: sticky_sessions: StickySessionsRepository api_keys: ApiKeysRepository additional_usage: AdditionalUsageRepository + http_bridge_leases: HttpBridgeLeasesRepository | None = None ProxyRepoFactory = Callable[[], AsyncContextManager[ProxyRepositories]] diff --git a/app/modules/proxy/service.py b/app/modules/proxy/service.py index 9719a5fe..52a17891 100644 --- a/app/modules/proxy/service.py +++ b/app/modules/proxy/service.py @@ -1,15 +1,18 @@ from __future__ import annotations import asyncio +import errno import inspect import json import logging +import os import time from collections import deque from collections.abc import Sequence from dataclasses import dataclass, field +from datetime import datetime, timedelta from hashlib import sha256 -from typing import AsyncIterator, Mapping, NoReturn, cast +from typing import AsyncIterator, Mapping, NoReturn, TypedDict, cast from uuid import uuid4 import aiohttp @@ -59,6 +62,7 @@ from app.core.utils.request_id import ensure_request_id, get_request_id from app.core.utils.retry import backoff_seconds from app.core.utils.sse import format_sse_event, parse_sse_data_json +from app.core.utils.time import to_utc_naive, utcnow from app.db.models import Account, AccountStatus, DashboardSettings, StickySessionKind, UsageHistory from app.modules.accounts.auth_manager import AuthManager from app.modules.api_keys.service import ( @@ -104,6 +108,11 @@ logger = logging.getLogger(__name__) + +class _HTTPBridgeLeaseClaimLost(RuntimeError): + pass + + _TEXT_DELTA_EVENT_TYPES = frozenset({"response.output_text.delta", "response.refusal.delta"}) _TEXT_DONE_CONTENT_PART_TYPES = frozenset({"output_text", "refusal"}) _REQUEST_TRANSPORT_HTTP = "http" @@ -122,6 +131,8 @@ _TRANSIENT_RETRY_CODES = frozenset({"server_error"}) _MAX_TRANSIENT_SAME_ACCOUNT_RETRIES = 3 _COMPACT_MAX_ACCOUNT_ATTEMPTS = 2 +_HTTP_BRIDGE_TURN_STATE_PREFIX = "http_turn_v2_" +_HTTP_BRIDGE_TURN_STATE_VERSION = 2 @dataclass(frozen=True, slots=True) @@ -187,6 +198,7 @@ def stream_http_responses( api_key_reservation: ApiKeyUsageReservationData | None = None, suppress_text_done_events: bool = False, downstream_turn_state: str | None = None, + response_headers_out: dict[str, str] | None = None, ) -> AsyncIterator[str]: _maybe_log_proxy_request_payload("stream_http", payload, headers) filtered = filter_inbound_headers(headers) @@ -200,6 +212,7 @@ def stream_http_responses( api_key_reservation=api_key_reservation, suppress_text_done_events=suppress_text_done_events, downstream_turn_state=downstream_turn_state, + response_headers_out=response_headers_out, ) async def _stream_http_bridge_or_retry( @@ -214,6 +227,7 @@ async def _stream_http_bridge_or_retry( api_key_reservation: ApiKeyUsageReservationData | None, suppress_text_done_events: bool, downstream_turn_state: str | None = None, + response_headers_out: dict[str, str] | None = None, ) -> AsyncIterator[str]: settings = await get_settings_cache().get() if not _http_responses_session_bridge_enabled(settings): @@ -245,6 +259,7 @@ async def _stream_http_bridge_or_retry( max_sessions=getattr(settings, "http_responses_session_bridge_max_sessions", 256), queue_limit=getattr(settings, "http_responses_session_bridge_queue_limit", 8), downstream_turn_state=downstream_turn_state, + response_headers_out=response_headers_out, ): yield line @@ -264,6 +279,7 @@ async def _stream_via_http_bridge( max_sessions: int, queue_limit: int, downstream_turn_state: str | None = None, + response_headers_out: dict[str, str] | None = None, ) -> AsyncIterator[str]: del propagate_http_errors, suppress_text_done_events request_id = ensure_request_id() @@ -329,8 +345,14 @@ async def _stream_via_http_bridge( text_data=text_data, queue_limit=queue_limit, ) - if downstream_turn_state is not None: - await self._register_http_bridge_turn_state(session, downstream_turn_state) + resolved_downstream_turn_state = self._resolve_http_bridge_downstream_turn_state( + session, + requested_turn_state=downstream_turn_state, + api_key_id=api_key.id if api_key is not None else None, + ) + await self._register_http_bridge_turn_state(session, resolved_downstream_turn_state) + if response_headers_out is not None: + response_headers_out.update(build_downstream_turn_state_response_headers(resolved_downstream_turn_state)) try: event_queue = request_state.event_queue @@ -344,6 +366,13 @@ async def _stream_via_http_bridge( with anyio.CancelScope(shield=True): await self._detach_http_bridge_request(session, request_state=request_state) session.last_used_at = time.monotonic() + try: + await self._touch_http_bridge_lease(session) + except Exception: + await self._invalidate_http_bridge_session_after_lease_failure( + session, + failure_message="Failed to persist HTTP bridge lease after request detach session_id=%s", + ) async def compact_responses( self, @@ -1414,6 +1443,382 @@ async def _http_bridge_pending_count(self, session: "_HTTPBridgeSession") -> int async with session.pending_lock: return max(len(session.pending_requests), session.queued_request_count) + def _new_http_bridge_session_id(self) -> str: + return f"hbs_{uuid4().hex}" + + def _invalid_http_bridge_turn_state(self) -> ProxyResponseError: + return ProxyResponseError( + 409, + openai_error( + "bridge_token_invalid", + "HTTP bridge turn-state token is invalid or scoped to a different API key", + error_type="server_error", + ), + ) + + def _expired_http_bridge_turn_state(self) -> ProxyResponseError: + return ProxyResponseError( + 409, + openai_error( + "bridge_session_expired", + "HTTP bridge session continuity expired; drop x-codex-turn-state and start a new turn", + error_type="server_error", + ), + ) + + def _encode_http_bridge_turn_state( + self, + *, + session_id: str, + owner_instance_id: str, + api_key_id: str | None, + ) -> str: + payload = json.dumps( + { + "v": _HTTP_BRIDGE_TURN_STATE_VERSION, + "sid": session_id, + "own": owner_instance_id, + "api": _http_bridge_api_key_scope(api_key_id), + "iat": int(time.time()), + }, + separators=(",", ":"), + ) + return f"{_HTTP_BRIDGE_TURN_STATE_PREFIX}{self._encryptor.encrypt(payload).decode('ascii')}" + + def _decode_http_bridge_turn_state( + self, + turn_state: str | None, + *, + api_key_id: str | None, + ) -> "_HTTPBridgeTurnStateToken | None": + if not turn_state or not turn_state.startswith(_HTTP_BRIDGE_TURN_STATE_PREFIX): + return None + encrypted = turn_state.removeprefix(_HTTP_BRIDGE_TURN_STATE_PREFIX).strip() + if not encrypted: + raise self._invalid_http_bridge_turn_state() + try: + raw = self._encryptor.decrypt(encrypted.encode("ascii")) + payload = json.loads(raw) + except Exception as exc: + raise self._invalid_http_bridge_turn_state() from exc + version = payload.get("v") + session_id = payload.get("sid") + owner_instance_id = payload.get("own") + api_key_scope = payload.get("api") + issued_at = payload.get("iat") + if ( + version != _HTTP_BRIDGE_TURN_STATE_VERSION + or not isinstance(session_id, str) + or not session_id.strip() + or not isinstance(owner_instance_id, str) + or not owner_instance_id.strip() + or not isinstance(api_key_scope, str) + or not isinstance(issued_at, int) + ): + raise self._invalid_http_bridge_turn_state() + if api_key_scope != _http_bridge_api_key_scope(api_key_id): + raise self._invalid_http_bridge_turn_state() + return _HTTPBridgeTurnStateToken( + session_id=session_id, + owner_instance_id=owner_instance_id, + api_key_scope=api_key_scope, + issued_at=issued_at, + ) + + def _http_bridge_turn_state_matches_session( + self, + turn_state: str, + *, + session: "_HTTPBridgeSession", + api_key_id: str | None, + ) -> bool: + try: + token = self._decode_http_bridge_turn_state(turn_state, api_key_id=api_key_id) + except ProxyResponseError: + return False + if token is None: + return False + return token.session_id == session.bridge_session_id and token.owner_instance_id == session.owner_instance_id + + def _resolve_http_bridge_downstream_turn_state( + self, + session: "_HTTPBridgeSession", + *, + requested_turn_state: str | None, + api_key_id: str | None, + ) -> str: + if session.downstream_turn_state and self._http_bridge_turn_state_matches_session( + session.downstream_turn_state, + session=session, + api_key_id=api_key_id, + ): + return session.downstream_turn_state + if requested_turn_state and self._http_bridge_turn_state_matches_session( + requested_turn_state, + session=session, + api_key_id=api_key_id, + ): + return requested_turn_state + return self._encode_http_bridge_turn_state( + session_id=session.bridge_session_id, + owner_instance_id=session.owner_instance_id, + api_key_id=api_key_id, + ) + + async def _get_live_http_bridge_lease( + self, + session_id: str | None, + ) -> "_HTTPBridgeLeaseSnapshot | None": + if not session_id: + return None + async with self._repo_factory() as repos: + lease = await repos.http_bridge_leases.get_by_session_id(session_id) + if lease is None: + return None + if to_utc_naive(lease.lease_expires_at) < utcnow(): + deleted = await repos.http_bridge_leases.delete_if_expires_at( + session_id, + lease_expires_at=lease.lease_expires_at, + ) + if deleted: + return None + lease = await repos.http_bridge_leases.get_by_session_id(session_id) + if lease is None or to_utc_naive(lease.lease_expires_at) < utcnow(): + return None + return _HTTPBridgeLeaseSnapshot( + session_id=lease.session_id, + affinity_kind=lease.affinity_kind, + affinity_key=lease.affinity_key, + owner_instance_id=lease.owner_instance_id, + api_key_scope=lease.api_key_scope, + account_id=lease.account_id, + lease_expires_at=lease.lease_expires_at, + ) + + async def _delete_http_bridge_lease(self, session_id: str | None) -> None: + if not session_id: + return + async with self._repo_factory() as repos: + await repos.http_bridge_leases.delete(session_id) + + async def _persist_http_bridge_lease(self, session: "_HTTPBridgeSession") -> None: + replace_session_id = session.pending_replaced_bridge_session_id + try: + await self._claim_http_bridge_lease(session, replace_session_id=replace_session_id) + finally: + session.pending_replaced_bridge_session_id = None + + async def _claim_http_bridge_lease( + self, + session: "_HTTPBridgeSession", + *, + replace_session_id: str | None, + ) -> None: + async with self._repo_factory() as repos: + claimed = await repos.http_bridge_leases.claim( + session_id=session.bridge_session_id, + affinity_kind=session.key.affinity_kind, + affinity_key=session.key.affinity_key, + api_key_scope=_http_bridge_api_key_scope(session.key.api_key_id), + owner_instance_id=session.owner_instance_id, + lease_expires_at=_http_bridge_lease_expires_at(session.idle_ttl_seconds), + account_id=session.account.id, + request_model=session.request_model, + codex_session=session.codex_session, + idle_ttl_seconds=session.idle_ttl_seconds, + upstream_turn_state=session.upstream_turn_state, + downstream_turn_state=session.downstream_turn_state, + replace_session_id=replace_session_id, + expires_before=utcnow(), + ) + if claimed is None: + raise _HTTPBridgeLeaseClaimLost( + f"HTTP bridge lease claim lost for affinity={session.key.affinity_kind}:{session.key.affinity_key}" + ) + + async def _touch_http_bridge_lease(self, session: "_HTTPBridgeSession") -> None: + async with session.lease_lock: + if session.closed: + return + async with self._repo_factory() as repos: + touched = await repos.http_bridge_leases.touch( + session.bridge_session_id, + affinity_kind=session.key.affinity_kind, + affinity_key=session.key.affinity_key, + api_key_scope=_http_bridge_api_key_scope(session.key.api_key_id), + owner_instance_id=session.owner_instance_id, + lease_expires_at=_http_bridge_lease_expires_at(session.idle_ttl_seconds), + account_id=session.account.id, + request_model=session.request_model, + codex_session=session.codex_session, + idle_ttl_seconds=session.idle_ttl_seconds, + upstream_turn_state=session.upstream_turn_state, + downstream_turn_state=session.downstream_turn_state, + ) + if not touched: + if session.closed: + return + claimed = await repos.http_bridge_leases.claim( + session_id=session.bridge_session_id, + affinity_kind=session.key.affinity_kind, + affinity_key=session.key.affinity_key, + api_key_scope=_http_bridge_api_key_scope(session.key.api_key_id), + owner_instance_id=session.owner_instance_id, + lease_expires_at=_http_bridge_lease_expires_at(session.idle_ttl_seconds), + account_id=session.account.id, + request_model=session.request_model, + codex_session=session.codex_session, + idle_ttl_seconds=session.idle_ttl_seconds, + upstream_turn_state=session.upstream_turn_state, + downstream_turn_state=session.downstream_turn_state, + replace_session_id=session.bridge_session_id, + expires_before=utcnow(), + ) + if claimed is None: + raise _HTTPBridgeLeaseClaimLost( + "HTTP bridge lease claim lost while recreating missing lease row" + ) + + async def _invalidate_http_bridge_session_after_lease_failure( + self, + session: "_HTTPBridgeSession", + *, + failure_message: str, + ) -> None: + logger.warning( + failure_message, + session.bridge_session_id, + exc_info=True, + ) + try: + await self._close_http_bridge_session( + session, + fail_pending_requests=True, + error_code="upstream_unavailable", + error_message="HTTP bridge session became unavailable", + ) + except Exception: + logger.warning( + "Failed to invalidate HTTP bridge session after lease persistence failure session_id=%s", + session.bridge_session_id, + exc_info=True, + ) + + async def _fail_pending_http_bridge_requests( + self, + session: "_HTTPBridgeSession", + *, + error_code: str, + error_message: str, + error_type: str = "server_error", + ) -> None: + async with session.pending_lock: + remaining = list(session.pending_requests) + session.pending_requests.clear() + session.queued_request_count = 0 + + for request_state in remaining: + _release_websocket_response_create_gate(request_state, session.response_create_gate) + if request_state.event_queue is not None: + await request_state.event_queue.put( + format_sse_event( + response_failed_event( + request_state.error_code_override or error_code, + request_state.error_message_override or error_message, + error_type=request_state.error_type_override or error_type, + response_id=request_state.response_id or request_state.request_id, + error_param=request_state.error_param_override, + ) + ) + ) + await request_state.event_queue.put(None) + await self._release_websocket_reservation(request_state.api_key_reservation) + request_state.api_key_reservation = None + if session.account.id and not request_state.skip_request_log: + await self._write_request_log( + account_id=session.account.id, + api_key=request_state.api_key, + request_id=request_state.response_id or request_state.request_log_id or request_state.request_id, + model=request_state.model or "", + latency_ms=int((time.monotonic() - request_state.started_at) * 1000), + status="error", + error_code=request_state.error_code_override or error_code, + error_message=request_state.error_message_override or error_message, + reasoning_effort=request_state.reasoning_effort, + transport=request_state.transport, + service_tier=request_state.service_tier, + requested_service_tier=request_state.requested_service_tier, + actual_service_tier=request_state.actual_service_tier, + ) + + async def _ensure_http_bridge_lease_keepalive(self, session: "_HTTPBridgeSession") -> None: + task = getattr(session, "lease_keepalive_task", None) + if task is not None and not task.done(): + return + + async def _keepalive() -> None: + interval_seconds = min(session.idle_ttl_seconds / 2.0, 60.0) + if interval_seconds <= 0: + interval_seconds = 0.001 + try: + while True: + await asyncio.sleep(interval_seconds) + if session.closed: + return + pending_count = await self._http_bridge_pending_count(session) + if pending_count <= 0: + return + try: + await self._touch_http_bridge_lease(session) + except Exception: + await self._invalidate_http_bridge_session_after_lease_failure( + session, + failure_message="Failed to refresh HTTP bridge lease during active stream session_id=%s", + ) + return + except asyncio.CancelledError: + raise + + session.lease_keepalive_task = asyncio.create_task(_keepalive()) + + async def _stop_http_bridge_lease_keepalive(self, session: "_HTTPBridgeSession") -> None: + task = getattr(session, "lease_keepalive_task", None) + if task is None: + return + session.lease_keepalive_task = None + if task.done(): + return + if task is asyncio.current_task(): + return + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + async def _delete_http_bridge_lease_after_reader_exit(self, session: "_HTTPBridgeSession") -> None: + if session.preserve_lease_during_reconnect or session.lease_cleanup_owned_by_close: + return + lease_lock = getattr(session, "lease_lock", None) + if lease_lock is None: + await self._delete_http_bridge_lease(session.bridge_session_id) + return + while True: + if session.preserve_lease_during_reconnect or session.lease_cleanup_owned_by_close: + return + try: + lease_lock.acquire_nowait() + except anyio.WouldBlock: + await asyncio.sleep(0) + continue + try: + if session.preserve_lease_during_reconnect or session.lease_cleanup_owned_by_close: + return + await self._delete_http_bridge_lease(session.bridge_session_id) + return + finally: + lease_lock.release() + async def _get_or_create_http_bridge_session( self, key: "_HTTPBridgeSessionKey", @@ -1428,6 +1833,8 @@ async def _get_or_create_http_bridge_session( ) -> "_HTTPBridgeSession": settings = get_settings() api_key_id = api_key.id if api_key is not None else None + current_instance, ring = _normalized_http_bridge_instance_ring(settings) + current_owner = _http_bridge_current_owner_id(settings) effective_idle_ttl_seconds = _effective_http_bridge_idle_ttl_seconds( affinity=affinity, idle_ttl_seconds=idle_ttl_seconds, @@ -1438,76 +1845,148 @@ async def _get_or_create_http_bridge_session( ), ) incoming_turn_state = _sticky_key_from_turn_state_header(headers) + turn_state_token = self._decode_http_bridge_turn_state(incoming_turn_state, api_key_id=api_key_id) + is_bridge_turn_state_replay = bool(incoming_turn_state and incoming_turn_state.startswith("http_turn_")) + create_affinity = affinity + active_turn_state_lease = await self._get_live_http_bridge_lease( + turn_state_token.session_id if turn_state_token is not None else None + ) + created_session_id = self._new_http_bridge_session_id() while True: sessions_to_close: list[_HTTPBridgeSession] = [] inflight_future: asyncio.Future[_HTTPBridgeSession] | None = None capacity_wait_future: asyncio.Future[_HTTPBridgeSession] | None = None owns_creation = False continuity_error: ProxyResponseError | None = None + recovered_turn_state_replay = False + rekey_recovered_turn_state = False + stale_turn_state_lease_session_id: str | None = None + matched_turn_state_alias = False + lookup_key = key + session_key = key async with self._http_bridge_lock: if incoming_turn_state is not None: alias_index_key = _http_bridge_turn_state_alias_key(incoming_turn_state, api_key_id) alias_key = self._http_bridge_turn_state_index.get(alias_index_key) - if alias_key is not None: + alias_session = self._http_bridge_sessions.get(alias_key) if alias_key is not None else None + if alias_session is None and turn_state_token is not None: + for candidate_key, candidate_session in self._http_bridge_sessions.items(): + if candidate_session.bridge_session_id != turn_state_token.session_id: + continue + if candidate_session.closed or candidate_session.account.status != AccountStatus.ACTIVE: + continue + alias_key = candidate_key + alias_session = candidate_session + self._http_bridge_turn_state_index[alias_index_key] = candidate_key + break + if alias_session is not None: + matched_turn_state_alias = True + assert alias_key is not None key = alias_key - alias_session = self._http_bridge_sessions.get(alias_key) - if ( - alias_session is None - or alias_session.closed - or alias_session.account.status != AccountStatus.ACTIVE + if incoming_turn_state is not None and ( + turn_state_token is None + or self._http_bridge_turn_state_matches_session( + incoming_turn_state, + session=alias_session, + api_key_id=alias_session.key.api_key_id, + ) ): - self._http_bridge_turn_state_index.pop(alias_index_key, None) - key = _HTTPBridgeSessionKey("turn_state_header", incoming_turn_state, api_key_id) - else: self._promote_http_bridge_session_to_codex_affinity( alias_session, turn_state=incoming_turn_state, settings=settings, ) - for alias in alias_session.downstream_turn_state_aliases: - self._http_bridge_turn_state_index[ - _http_bridge_turn_state_alias_key(alias, alias_session.key.api_key_id) - ] = alias_session.key - key = alias_session.key - elif incoming_turn_state.startswith("http_turn_"): - key = _HTTPBridgeSessionKey("turn_state_header", incoming_turn_state, api_key_id) - if self._http_bridge_inflight_sessions.get(key) is not None: - pass - elif previous_response_id is not None: - raise ProxyResponseError( - 400, - _http_bridge_previous_response_error_envelope( - previous_response_id, - ( - "HTTP bridge continuity was lost. Replay x-codex-turn-state " - "or retry with a stable prompt_cache_key." - ), - ), + else: + alias_session.downstream_turn_state_aliases.add(incoming_turn_state) + for alias in alias_session.downstream_turn_state_aliases: + self._http_bridge_turn_state_index[ + _http_bridge_turn_state_alias_key(alias, alias_session.key.api_key_id) + ] = alias_session.key + key = alias_session.key + lookup_key = key + session_key = key + elif ( + active_turn_state_lease is not None + and not _http_bridge_owner_matches_current( + active_turn_state_lease.owner_instance_id, + current_owner_id=current_owner, + current_instance_id=current_instance, + ) + and _http_bridge_owner_instance_group(active_turn_state_lease.owner_instance_id) in ring + ): + _log_http_bridge_event( + "owner_mismatch", + key, + account_id=active_turn_state_lease.account_id, + model=request_model, + detail=( + f"lease_session_id={active_turn_state_lease.session_id}, " + f"expected_instance={active_turn_state_lease.owner_instance_id}, " + f"current_instance={current_instance}" + ), + ) + raise ProxyResponseError( + 409, + openai_error( + "bridge_wrong_instance", + "HTTP responses session bridge turn-state is owned by another live instance", + error_type="server_error", + ), + ) + elif turn_state_token is not None: + recovered_turn_state_replay = True + stale_turn_state_lease_session_id = ( + active_turn_state_lease.session_id if active_turn_state_lease is not None else None + ) + if active_turn_state_lease is not None: + key = _HTTPBridgeSessionKey( + active_turn_state_lease.affinity_kind, + active_turn_state_lease.affinity_key, + api_key_id, + ) + lookup_key = key + session_key = key + create_affinity = _affinity_policy_from_http_bridge_session_key( + key, + openai_cache_affinity_max_age_seconds=settings.openai_cache_affinity_max_age_seconds, ) + rekey_recovered_turn_state = True else: - raise ProxyResponseError( - 409, - openai_error( - "bridge_instance_mismatch", - "HTTP bridge turn-state reached an instance that does not own the live session", - error_type="server_error", - ), + lookup_key = _HTTPBridgeSessionKey( + "turn_state_header", + turn_state_token.session_id, + api_key_id, ) + rekey_recovered_turn_state = True + key = lookup_key + session_key = key + else: + key = _HTTPBridgeSessionKey( + "turn_state_header", + incoming_turn_state, + api_key_id, + ) + lookup_key = key + session_key = key await self._prune_http_bridge_sessions_locked() - owner_instance = _http_bridge_owner_instance(key, settings) current_instance, ring = _normalized_http_bridge_instance_ring(settings) + current_owner = _http_bridge_current_owner_id(settings) + owner_instance = _http_bridge_owner_instance(lookup_key, settings) if ( - key.affinity_kind != "request" + not matched_turn_state_alias + and active_turn_state_lease is None + and turn_state_token is None + and lookup_key.affinity_kind != "request" and owner_instance is not None and len(ring) > 1 and owner_instance != current_instance ): _log_http_bridge_event( "owner_mismatch", - key, + lookup_key, account_id=None, model=request_model, detail=f"expected_instance={owner_instance}, current_instance={current_instance}", @@ -1515,7 +1994,7 @@ async def _get_or_create_http_bridge_session( raise ProxyResponseError( 409, openai_error( - "bridge_instance_mismatch", + "bridge_wrong_instance", ( "HTTP responses session bridge request reached the wrong instance " f"(expected {owner_instance}, got {current_instance})" @@ -1524,13 +2003,32 @@ async def _get_or_create_http_bridge_session( ), ) - existing = self._http_bridge_sessions.get(key) + existing = self._http_bridge_sessions.get(lookup_key) if existing is not None and not existing.closed and existing.account.status == AccountStatus.ACTIVE: + if ( + incoming_turn_state is not None + and self._http_bridge_turn_state_index.get( + _http_bridge_turn_state_alias_key(incoming_turn_state, api_key_id) + ) + == existing.key + ): + if turn_state_token is None or self._http_bridge_turn_state_matches_session( + incoming_turn_state, + session=existing, + api_key_id=existing.key.api_key_id, + ): + self._promote_http_bridge_session_to_codex_affinity( + existing, + turn_state=incoming_turn_state, + settings=settings, + ) + else: + existing.downstream_turn_state_aliases.add(incoming_turn_state) existing.request_model = request_model existing.last_used_at = time.monotonic() _log_http_bridge_event( "reuse", - key, + existing.key, account_id=existing.account.id, model=existing.request_model, pending_count=await self._http_bridge_pending_count(existing), @@ -1540,15 +2038,22 @@ async def _get_or_create_http_bridge_session( if existing is not None: _log_http_bridge_event( "discard_stale", - key, + existing.key, account_id=existing.account.id, model=existing.request_model, ) - self._http_bridge_sessions.pop(key, None) + self._http_bridge_sessions.pop(lookup_key, None) sessions_to_close.append(existing) + if turn_state_token is not None: + recovered_turn_state_replay = True + stale_turn_state_lease_session_id = ( + active_turn_state_lease.session_id if active_turn_state_lease is not None else None + ) - inflight_future = self._http_bridge_inflight_sessions.get(key) - if previous_response_id is not None: + if recovered_turn_state_replay and previous_response_id is not None: + continuity_error = self._expired_http_bridge_turn_state() + + if continuity_error is None and previous_response_id is not None: continuity_error = ProxyResponseError( 400, _http_bridge_previous_response_error_envelope( @@ -1559,7 +2064,9 @@ async def _get_or_create_http_bridge_session( ), ), ) - else: + + if continuity_error is None: + inflight_future = self._http_bridge_inflight_sessions.get(lookup_key) if inflight_future is None: while ( len(self._http_bridge_sessions) + len(self._http_bridge_inflight_sessions) >= max_sessions @@ -1591,7 +2098,7 @@ async def _get_or_create_http_bridge_session( else: _log_http_bridge_event( "capacity_exhausted_active_sessions", - key, + lookup_key, account_id=None, model=request_model, pending_count=( @@ -1608,7 +2115,7 @@ async def _get_or_create_http_bridge_session( ) else: inflight_future = asyncio.get_running_loop().create_future() - self._http_bridge_inflight_sessions[key] = inflight_future + self._http_bridge_inflight_sessions[lookup_key] = inflight_future owns_creation = True for stale_session in sessions_to_close: @@ -1646,26 +2153,60 @@ async def _get_or_create_http_bridge_session( session: _HTTPBridgeSession | None = None session_registered = False try: - session = await self._create_http_bridge_session( - key, - headers=headers, - affinity=affinity, - request_model=request_model, - idle_ttl_seconds=effective_idle_ttl_seconds, + if rekey_recovered_turn_state: + session_key = _HTTPBridgeSessionKey( + "turn_state_header", + self._encode_http_bridge_turn_state( + session_id=created_session_id, + owner_instance_id=current_owner, + api_key_id=api_key_id, + ), + api_key_id, + ) + create_affinity = _AffinityPolicy( + key=session_key.affinity_key, + kind=StickySessionKind.CODEX_SESSION, + ) + create_headers = ( + _headers_without_local_http_bridge_turn_state(headers) + if is_bridge_turn_state_replay or turn_state_token is not None + else headers + ) + create_session = self._create_http_bridge_session + create_kwargs: _HTTPBridgeCreateSessionKwargs = { + "headers": create_headers, + "affinity": create_affinity, + "request_model": request_model, + "idle_ttl_seconds": effective_idle_ttl_seconds, + } + create_signature = inspect.signature(create_session) + accepts_extra_create_kwargs = any( + parameter.kind == inspect.Parameter.VAR_KEYWORD + for parameter in create_signature.parameters.values() + ) + if accepts_extra_create_kwargs or "bridge_session_id" in create_signature.parameters: + create_kwargs["bridge_session_id"] = created_session_id + if accepts_extra_create_kwargs or "owner_instance_id" in create_signature.parameters: + create_kwargs["owner_instance_id"] = current_owner + if accepts_extra_create_kwargs or "replaced_bridge_session_id" in create_signature.parameters: + create_kwargs["replaced_bridge_session_id"] = stale_turn_state_lease_session_id + session = await create_session( + session_key, + **create_kwargs, ) async with self._http_bridge_lock: - current_future = self._http_bridge_inflight_sessions.get(key) + current_future = self._http_bridge_inflight_sessions.get(lookup_key) if current_future is inflight_future: - self._http_bridge_inflight_sessions.pop(key, None) - self._http_bridge_sessions[key] = session + self._http_bridge_inflight_sessions.pop(lookup_key, None) + self._http_bridge_sessions[session_key] = session session_registered = True if inflight_future is not None and not inflight_future.done(): inflight_future.set_result(session) except BaseException as exc: async with self._http_bridge_lock: - current_future = self._http_bridge_inflight_sessions.get(key) + current_future = self._http_bridge_inflight_sessions.get(lookup_key) if current_future is inflight_future: - self._http_bridge_inflight_sessions.pop(key, None) + self._http_bridge_inflight_sessions.pop(lookup_key, None) if inflight_future is not None and not inflight_future.done(): if isinstance(exc, asyncio.CancelledError): inflight_future.cancel() @@ -1674,6 +2215,17 @@ async def _get_or_create_http_bridge_session( inflight_future.exception() if session is not None and not session_registered: await self._close_http_bridge_session(session) + if isinstance(exc, _HTTPBridgeLeaseClaimLost): + return await self._get_or_create_http_bridge_session( + key, + headers=headers, + affinity=affinity, + api_key=api_key, + request_model=request_model, + idle_ttl_seconds=idle_ttl_seconds, + max_sessions=max_sessions, + previous_response_id=previous_response_id, + ) raise _log_http_bridge_event( "create", @@ -1681,6 +2233,21 @@ async def _get_or_create_http_bridge_session( account_id=session.account.id, model=session.request_model, ) + if ( + stale_turn_state_lease_session_id is not None + and stale_turn_state_lease_session_id != session.bridge_session_id + ): + try: + await self._delete_http_bridge_lease(stale_turn_state_lease_session_id) + except Exception: + logger.warning( + "Failed to delete stale HTTP bridge lease after replacement registration", + extra={ + "stale_bridge_session_id": stale_turn_state_lease_session_id, + "replacement_bridge_session_id": session.bridge_session_id, + }, + exc_info=True, + ) return session async def _prune_http_bridge_sessions_locked(self) -> None: @@ -1712,22 +2279,49 @@ async def _close_http_bridge_session( session: "_HTTPBridgeSession", *, turn_state_lock_held: bool = False, + fail_pending_requests: bool = False, + error_code: str = "upstream_unavailable", + error_message: str = "HTTP bridge session became unavailable", ) -> None: - session.closed = True + lease_lock = getattr(session, "lease_lock", None) + + async def _claim_close_cleanup() -> None: + session.closed = True + session.lease_cleanup_owned_by_close = True + + if lease_lock is not None: + async with lease_lock: + await _claim_close_cleanup() + else: + await _claim_close_cleanup() + + await self._stop_http_bridge_lease_keepalive(session) + if fail_pending_requests: + await self._fail_pending_http_bridge_requests( + session, + error_code=error_code, + error_message=error_message, + ) if turn_state_lock_held: self._unregister_http_bridge_turn_states_locked(session) else: await self._unregister_http_bridge_turn_states(session) if session.upstream_reader is not None: session.upstream_reader.cancel() - try: - await session.upstream_reader - except asyncio.CancelledError: - pass + if session.upstream_reader is not asyncio.current_task(): + try: + await session.upstream_reader + except asyncio.CancelledError: + pass try: await session.upstream.close() except Exception: logger.debug("Failed to close HTTP bridge upstream websocket", exc_info=True) + if lease_lock is not None: + async with lease_lock: + await self._delete_http_bridge_lease(getattr(session, "bridge_session_id", None)) + else: + await self._delete_http_bridge_lease(getattr(session, "bridge_session_id", None)) _log_http_bridge_event( "close", session.key, @@ -1739,13 +2333,40 @@ async def _register_http_bridge_turn_state(self, session: "_HTTPBridgeSession", async with self._http_bridge_lock: if session.closed: return + session.reconnect_turn_state = turn_state session.downstream_turn_state_aliases.add(turn_state) - if session.downstream_turn_state is None: - session.downstream_turn_state = turn_state + if self._http_bridge_turn_state_matches_session( + turn_state, + session=session, + api_key_id=session.key.api_key_id, + ): + if session.downstream_turn_state is None or not self._http_bridge_turn_state_matches_session( + session.downstream_turn_state, + session=session, + api_key_id=session.key.api_key_id, + ): + session.downstream_turn_state = turn_state + else: + self._promote_http_bridge_session_to_codex_affinity( + session, + turn_state=turn_state, + settings=get_settings(), + ) for alias in session.downstream_turn_state_aliases: self._http_bridge_turn_state_index[_http_bridge_turn_state_alias_key(alias, session.key.api_key_id)] = ( session.key ) + try: + await self._touch_http_bridge_lease(session) + except Exception: + await self._invalidate_http_bridge_session_after_lease_failure( + session, + failure_message="Failed to persist HTTP bridge lease after turn-state registration session_id=%s", + ) + raise ProxyResponseError( + 502, + openai_error("upstream_unavailable", "HTTP bridge session became unavailable"), + ) async def _unregister_http_bridge_turn_states(self, session: "_HTTPBridgeSession") -> None: async with self._http_bridge_lock: @@ -1767,15 +2388,27 @@ def _promote_http_bridge_session_to_codex_affinity( turn_state: str, settings: object, ) -> None: + promoted_key = _HTTPBridgeSessionKey( + affinity_kind="turn_state_header", + affinity_key=turn_state, + api_key_id=session.key.api_key_id, + ) + current_key = session.key + if current_key != promoted_key: + current_session = self._http_bridge_sessions.get(current_key) + if current_session is session: + self._http_bridge_sessions.pop(current_key, None) + session.key = promoted_key + self._http_bridge_sessions[promoted_key] = session session.affinity = _AffinityPolicy(key=turn_state, kind=StickySessionKind.CODEX_SESSION) session.codex_session = True session.downstream_turn_state = turn_state + session.reconnect_turn_state = turn_state session.downstream_turn_state_aliases.add(turn_state) session.idle_ttl_seconds = max( session.idle_ttl_seconds, float(getattr(settings, "http_responses_session_bridge_codex_idle_ttl_seconds", 900.0)), ) - session.headers = _headers_with_turn_state(session.headers, turn_state) async def _create_http_bridge_session( self, @@ -1785,6 +2418,9 @@ async def _create_http_bridge_session( affinity: _AffinityPolicy, request_model: str | None, idle_ttl_seconds: float, + bridge_session_id: str, + owner_instance_id: str, + replaced_bridge_session_id: str | None = None, ) -> "_HTTPBridgeSession": request_state = _WebSocketRequestState( request_id=f"http_bridge_connect_{uuid4().hex}", @@ -1841,6 +2477,8 @@ async def _create_http_bridge_session( _raise_proxy_unavailable(exc.message or "Temporary upstream refresh failure") except (aiohttp.ClientError, asyncio.TimeoutError) as exc: _raise_proxy_unavailable(str(exc) or "Request to upstream timed out") + echoed_turn_state = _upstream_turn_state_from_socket(upstream) + reconnect_turn_state = echoed_turn_state or _sticky_key_from_turn_state_header(connect_headers) session = _HTTPBridgeSession( key=key, headers=connect_headers, @@ -1851,15 +2489,31 @@ async def _create_http_bridge_session( upstream_control=_WebSocketUpstreamControl(), pending_requests=deque(), pending_lock=anyio.Lock(), + lease_lock=anyio.Lock(), response_create_gate=asyncio.Semaphore(1), queued_request_count=0, last_used_at=time.monotonic(), idle_ttl_seconds=idle_ttl_seconds, + bridge_session_id=bridge_session_id, + owner_instance_id=owner_instance_id, codex_session=affinity.kind == StickySessionKind.CODEX_SESSION, prewarm_lock=anyio.Lock(), - upstream_turn_state=_upstream_turn_state_from_socket(upstream), + upstream_turn_state=echoed_turn_state, + reconnect_turn_state=reconnect_turn_state, downstream_turn_state=None, + pending_replaced_bridge_session_id=replaced_bridge_session_id, ) + try: + await self._persist_http_bridge_lease(session) + except BaseException: + session.closed = True + try: + await upstream.close() + except Exception: + logger.debug( + "Failed to close HTTP bridge upstream websocket after lease persistence error", exc_info=True + ) + raise session.upstream_reader = asyncio.create_task(self._relay_http_bridge_upstream_messages(session)) return session @@ -1913,6 +2567,7 @@ async def _submit_http_bridge_request( async with session.pending_lock: session.pending_requests.append(request_state) request_enqueued = True + await self._ensure_http_bridge_lease_keepalive(session) await session.upstream.send_text(text_data) session.last_used_at = time.monotonic() except asyncio.CancelledError: @@ -2027,6 +2682,7 @@ async def _maybe_prewarm_http_bridge_session( async with session.pending_lock: session.pending_requests.append(warmup_state) request_enqueued = True + await self._ensure_http_bridge_lease_keepalive(session) await session.upstream.send_text(warmup_text) while True: event_block = await event_queue.get() @@ -2066,6 +2722,9 @@ async def _cleanup_http_bridge_submit_interruption( if request_enqueued and request_state in session.pending_requests: session.pending_requests.remove(request_state) session.queued_request_count = max(0, session.queued_request_count - 1) + has_pending_requests = bool(session.pending_requests) + if not has_pending_requests: + await self._stop_http_bridge_lease_keepalive(session) if gate_acquired: _release_websocket_response_create_gate(request_state, session.response_create_gate) @@ -2081,9 +2740,12 @@ async def _detach_http_bridge_request( session.pending_requests.remove(request_state) session.queued_request_count = max(0, session.queued_request_count - 1) removed = True + has_pending_requests = bool(session.pending_requests) request_state.event_queue = None if not removed: return False + if not has_pending_requests: + await self._stop_http_bridge_lease_keepalive(session) _release_websocket_response_create_gate(request_state, session.response_create_gate) await self._release_websocket_reservation(request_state.api_key_reservation) request_state.api_key_reservation = None @@ -2154,6 +2816,8 @@ async def _relay_http_bridge_upstream_messages( break finally: session.closed = True + await self._stop_http_bridge_lease_keepalive(session) + await self._delete_http_bridge_lease_after_reader_exit(session) async def _retry_http_bridge_request_on_fresh_upstream( self, @@ -2245,6 +2909,9 @@ async def _reconnect_http_bridge_session( old_account_id = session.account.id old_upstream = session.upstream old_reader = session.upstream_reader if restart_reader else None + new_upstream: UpstreamResponsesWebSocket | None = None + preserve_lease_during_reconnect = True + session.preserve_lease_during_reconnect = True if old_reader is not None: old_reader.cancel() if old_reader is not asyncio.current_task(): @@ -2257,48 +2924,78 @@ async def _reconnect_http_bridge_session( except Exception: logger.debug("Failed to close HTTP bridge upstream websocket before reconnect", exc_info=True) - deadline = _websocket_connect_deadline(request_state, get_settings().proxy_request_budget_seconds) - settings = await get_settings_cache().get() - selection = await self._select_account_with_budget( - deadline, - request_id=request_state.request_log_id or request_state.request_id, - kind="http_bridge", - sticky_key=session.affinity.key, - sticky_kind=session.affinity.kind, - reallocate_sticky=session.affinity.reallocate_sticky, - sticky_max_age_seconds=session.affinity.max_age_seconds, - prefer_earlier_reset_accounts=settings.prefer_earlier_reset_accounts, - routing_strategy=_routing_strategy(settings), - model=session.request_model, - ) - account = selection.account - if account is None: - raise ProxyResponseError( - 503, - openai_error( - selection.error_code or "no_accounts", - selection.error_message or "No active accounts available", - error_type="server_error", - ), + try: + deadline = _websocket_connect_deadline(request_state, get_settings().proxy_request_budget_seconds) + settings = await get_settings_cache().get() + selection = await self._select_account_with_budget( + deadline, + request_id=request_state.request_log_id or request_state.request_id, + kind="http_bridge", + sticky_key=session.affinity.key, + sticky_kind=session.affinity.kind, + reallocate_sticky=session.affinity.reallocate_sticky, + sticky_max_age_seconds=session.affinity.max_age_seconds, + prefer_earlier_reset_accounts=settings.prefer_earlier_reset_accounts, + routing_strategy=_routing_strategy(settings), + model=session.request_model, ) - account = await self._ensure_fresh_with_budget(account, timeout_seconds=_remaining_budget_seconds(deadline)) - connect_headers = _headers_with_turn_state( - session.headers, - _preferred_http_bridge_reconnect_turn_state(session), - ) - upstream = await self._open_upstream_websocket_with_budget( - account, - connect_headers, - timeout_seconds=_remaining_budget_seconds(deadline), - ) - session.account = account - session.headers = connect_headers - session.upstream = upstream - session.upstream_control = _WebSocketUpstreamControl() - session.closed = False - session.upstream_turn_state = _upstream_turn_state_from_socket(upstream) or session.upstream_turn_state - if restart_reader: - session.upstream_reader = asyncio.create_task(self._relay_http_bridge_upstream_messages(session)) + account = selection.account + if account is None: + raise ProxyResponseError( + 503, + openai_error( + selection.error_code or "no_accounts", + selection.error_message or "No active accounts available", + error_type="server_error", + ), + ) + account = await self._ensure_fresh_with_budget(account, timeout_seconds=_remaining_budget_seconds(deadline)) + preferred_turn_state = _preferred_http_bridge_reconnect_turn_state(session) + connect_headers = _headers_with_turn_state( + session.headers, + preferred_turn_state, + ) + new_upstream = await self._open_upstream_websocket_with_budget( + account, + connect_headers, + timeout_seconds=_remaining_budget_seconds(deadline), + ) + session.account = account + session.headers = connect_headers + session.upstream = new_upstream + session.upstream_control = _WebSocketUpstreamControl() + session.closed = False + echoed_turn_state = _upstream_turn_state_from_socket(new_upstream) + session.upstream_turn_state = echoed_turn_state or session.upstream_turn_state + session.reconnect_turn_state = echoed_turn_state or preferred_turn_state + try: + await self._touch_http_bridge_lease(session) + except Exception: + await self._invalidate_http_bridge_session_after_lease_failure( + session, + failure_message="Failed to persist HTTP bridge lease after reconnect session_id=%s", + ) + raise ProxyResponseError( + 502, + openai_error("upstream_unavailable", "HTTP bridge session became unavailable"), + ) + if restart_reader: + session.upstream_reader = asyncio.create_task(self._relay_http_bridge_upstream_messages(session)) + except BaseException: + session.closed = True + if new_upstream is not None: + try: + await new_upstream.close() + except Exception: + logger.debug( + "Failed to close replacement HTTP bridge websocket after reconnect error", exc_info=True + ) + if preserve_lease_during_reconnect: + session.preserve_lease_during_reconnect = False + await self._delete_http_bridge_lease(session.bridge_session_id) + raise + if preserve_lease_during_reconnect: + session.preserve_lease_during_reconnect = False _log_http_bridge_event( "reconnect", session.key, @@ -2362,6 +3059,9 @@ async def _process_http_bridge_upstream_text( if terminal_request_state is None: return + if await self._http_bridge_pending_count(session) <= 0: + await self._stop_http_bridge_lease_keepalive(session) + if terminal_request_state is not matched_request_state and terminal_request_state.event_queue is not None: await terminal_request_state.event_queue.put(event_block) if terminal_request_state.event_queue is not None: @@ -4278,6 +4978,25 @@ class _HTTPBridgeSessionKey: api_key_id: str | None +@dataclass(frozen=True, slots=True) +class _HTTPBridgeTurnStateToken: + session_id: str + owner_instance_id: str + api_key_scope: str + issued_at: int + + +@dataclass(frozen=True, slots=True) +class _HTTPBridgeLeaseSnapshot: + session_id: str + affinity_kind: str + affinity_key: str + owner_instance_id: str + api_key_scope: str + account_id: str | None + lease_expires_at: datetime + + @dataclass(slots=True) class _HTTPBridgeSession: key: _HTTPBridgeSessionKey @@ -4289,17 +5008,25 @@ class _HTTPBridgeSession: upstream_control: _WebSocketUpstreamControl pending_requests: deque[_WebSocketRequestState] pending_lock: anyio.Lock + lease_lock: anyio.Lock response_create_gate: asyncio.Semaphore queued_request_count: int last_used_at: float idle_ttl_seconds: float + bridge_session_id: str + owner_instance_id: str codex_session: bool = False prewarmed: bool = False prewarm_lock: anyio.Lock | None = None upstream_turn_state: str | None = None + reconnect_turn_state: str | None = None downstream_turn_state: str | None = None downstream_turn_state_aliases: set[str] = field(default_factory=set) + pending_replaced_bridge_session_id: str | None = None upstream_reader: asyncio.Task[None] | None = None + lease_keepalive_task: asyncio.Task[None] | None = None + preserve_lease_during_reconnect: bool = False + lease_cleanup_owned_by_close: bool = False closed: bool = False @@ -4323,6 +5050,16 @@ class _WebSocketReceiveTimeout: fail_all_pending: bool = False +class _HTTPBridgeCreateSessionKwargs(TypedDict, total=False): + headers: dict[str, str] + affinity: "_AffinityPolicy" + request_model: str | None + idle_ttl_seconds: float + bridge_session_id: str + owner_instance_id: str + replaced_bridge_session_id: str | None + + def _event_type_from_payload(event: OpenAIEvent | None, payload: dict[str, JsonValue] | None) -> str | None: if event is not None: return event.type @@ -4834,6 +5571,10 @@ def ensure_http_downstream_turn_state(headers: Mapping[str, str]) -> str: return f"http_turn_{uuid4().hex}" +def requested_http_downstream_turn_state(headers: Mapping[str, str]) -> str | None: + return _sticky_key_from_turn_state_header(headers) + + def build_downstream_turn_state_accept_headers(turn_state: str) -> list[tuple[bytes, bytes]]: return [(b"x-codex-turn-state", turn_state.encode("utf-8"))] @@ -4882,21 +5623,37 @@ def _headers_with_turn_state(headers: Mapping[str, str], turn_state: str | None) return forwarded +def _headers_without_local_http_bridge_turn_state(headers: Mapping[str, str]) -> dict[str, str]: + forwarded = dict(headers) + for key, value in list(forwarded.items()): + if key.lower() != "x-codex-turn-state": + continue + if isinstance(value, str): + stripped = value.strip() + if stripped.startswith(_HTTP_BRIDGE_TURN_STATE_PREFIX): + forwarded.pop(key, None) + break + return forwarded + + def _preferred_http_bridge_reconnect_turn_state(session: "_HTTPBridgeSession") -> str | None: - if ( - session.codex_session - and session.downstream_turn_state is not None - and session.affinity.kind == StickySessionKind.CODEX_SESSION - and session.affinity.key == session.downstream_turn_state - ): - return session.downstream_turn_state - return session.upstream_turn_state + if session.upstream_turn_state is not None: + return session.upstream_turn_state + return session.reconnect_turn_state def _http_bridge_turn_state_alias_key(turn_state: str, api_key_id: str | None) -> tuple[str, str | None]: return (turn_state, api_key_id) +def _http_bridge_api_key_scope(api_key_id: str | None) -> str: + return api_key_id or "" + + +def _http_bridge_lease_expires_at(idle_ttl_seconds: float) -> datetime: + return utcnow() + timedelta(seconds=max(0.0, idle_ttl_seconds)) + + def _resolve_prompt_cache_key( payload: ResponsesRequest | ResponsesCompactRequest, *, @@ -4990,6 +5747,31 @@ def _make_http_bridge_session_key( ) +def _affinity_policy_from_http_bridge_session_key( + key: _HTTPBridgeSessionKey, + *, + openai_cache_affinity_max_age_seconds: int, +) -> _AffinityPolicy: + if key.affinity_kind in {"turn_state_header", "session_header"}: + return _AffinityPolicy( + key=key.affinity_key, + kind=StickySessionKind.CODEX_SESSION, + ) + if key.affinity_kind == StickySessionKind.PROMPT_CACHE.value: + return _AffinityPolicy( + key=key.affinity_key, + kind=StickySessionKind.PROMPT_CACHE, + max_age_seconds=openai_cache_affinity_max_age_seconds, + ) + if key.affinity_kind == StickySessionKind.STICKY_THREAD.value: + return _AffinityPolicy( + key=key.affinity_key, + kind=StickySessionKind.STICKY_THREAD, + reallocate_sticky=True, + ) + return _AffinityPolicy() + + def _effective_http_bridge_idle_ttl_seconds( *, affinity: _AffinityPolicy, @@ -5068,6 +5850,100 @@ def _normalized_http_bridge_instance_ring(settings: object) -> tuple[str, tuple[ return instance_id, tuple(sorted(set(ring_entries))) +def _http_bridge_current_owner_id(settings: object) -> str: + instance_id, _ = _normalized_http_bridge_instance_ring(settings) + pid = os.getpid() + process_marker = _http_bridge_process_start_marker(pid) + if process_marker is None: + return f"{instance_id}@{pid}" + return f"{instance_id}@{pid}:{process_marker}" + + +def _http_bridge_owner_instance_group(owner_id: str) -> str: + return owner_id.split("@", 1)[0] + + +def _http_bridge_owner_pid(owner_id: str) -> int | None: + owner_parts = owner_id.split("@", 1) + if len(owner_parts) != 2: + return None + pid_text, _, _ = owner_parts[1].partition(":") + try: + pid = int(pid_text) + except ValueError: + return None + return pid if pid > 0 else None + + +def _http_bridge_owner_process_marker(owner_id: str) -> str | None: + owner_parts = owner_id.split("@", 1) + if len(owner_parts) != 2: + return None + _, separator, process_marker = owner_parts[1].partition(":") + if not separator or not process_marker: + return None + return process_marker + + +def _http_bridge_process_start_marker(pid: int) -> str | None: + try: + with open(f"/proc/{pid}/stat", encoding="utf-8") as process_stat: + payload = process_stat.read().strip() + except OSError: + return None + try: + _, stat_tail = payload.rsplit(") ", 1) + except ValueError: + return None + stat_fields = stat_tail.split() + if len(stat_fields) <= 19: + return None + process_marker = stat_fields[19].strip() + return process_marker or None + + +def _http_bridge_process_exists(pid: int) -> bool: + try: + os.kill(pid, 0) + except ProcessLookupError: + return False + except PermissionError: + return True + except OSError as exc: + if exc.errno == errno.ESRCH: + return False + if exc.errno == errno.EPERM: + return True + return True + return True + + +def _http_bridge_owner_matches_current( + owner_id: str, + *, + current_owner_id: str, + current_instance_id: str, +) -> bool: + if owner_id == current_owner_id or owner_id == current_instance_id: + return True + if _http_bridge_owner_instance_group(owner_id) != current_instance_id: + return False + owner_pid = _http_bridge_owner_pid(owner_id) + if owner_pid is None: + return False + if owner_pid == os.getpid(): + return True + owner_process_marker = _http_bridge_owner_process_marker(owner_id) + if owner_process_marker is None: + return not _http_bridge_process_exists(owner_pid) + live_process_marker = _http_bridge_process_start_marker(owner_pid) + if live_process_marker is None: + return not _http_bridge_process_exists(owner_pid) + if live_process_marker != owner_process_marker: + return True + return False + + def _http_bridge_owner_instance(key: _HTTPBridgeSessionKey, settings: object) -> str | None: instance_id, ring = _normalized_http_bridge_instance_ring(settings) if len(ring) <= 1: diff --git a/openspec/changes/durable-http-bridge-ownership/.openspec.yaml b/openspec/changes/durable-http-bridge-ownership/.openspec.yaml new file mode 100644 index 00000000..5376059c --- /dev/null +++ b/openspec/changes/durable-http-bridge-ownership/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-03-21 diff --git a/openspec/changes/durable-http-bridge-ownership/proposal.md b/openspec/changes/durable-http-bridge-ownership/proposal.md new file mode 100644 index 00000000..fff4c8dd --- /dev/null +++ b/openspec/changes/durable-http-bridge-ownership/proposal.md @@ -0,0 +1,17 @@ +## Why + +HTTP bridge turn-state continuity currently depends on an in-memory alias map. When a process restarts, evicts a session, or receives a replayed request on another replica, the proxy can only tell that the local alias is missing. It currently reports that as `bridge_instance_mismatch`, which conflates stale local state, expired bridge sessions, invalid turn-state tokens, and true live-owner conflicts. + +## What Changes + +- Replace opaque local-only HTTP bridge turn-state aliases with signed, versioned turn-state tokens. +- Add a durable HTTP bridge lease registry so replicas can distinguish a live owner mismatch from expired or stale bridge state. +- Recover stale or expired bridge turn-state on requests that do not require prior-response continuity. +- Fail with specific bridge error codes for invalid tokens, expired continuity, and true wrong-instance conflicts. + +## Impact + +- Code: `app/modules/proxy/service.py`, `app/modules/proxy/api.py`, `app/modules/proxy/repo_bundle.py`, `app/dependencies.py`, `app/db/models.py` +- Data: new `http_bridge_leases` table and migration +- Tests: HTTP bridge integration coverage and proxy repository factory call sites +- Specs: `openspec/specs/responses-api-compat/spec.md`, `context.md`, and `ops.md` diff --git a/openspec/changes/durable-http-bridge-ownership/specs/responses-api-compat/spec.md b/openspec/changes/durable-http-bridge-ownership/specs/responses-api-compat/spec.md new file mode 100644 index 00000000..3ee58a22 --- /dev/null +++ b/openspec/changes/durable-http-bridge-ownership/specs/responses-api-compat/spec.md @@ -0,0 +1,26 @@ +## MODIFIED Requirements + +### Requirement: HTTP Responses routes preserve upstream websocket session continuity +HTTP `/v1/responses` and HTTP `/backend-api/codex/responses` MUST preserve upstream websocket session continuity within one live bridge session and MUST distinguish replayed turn-state failure modes cleanly. The service MUST issue signed, versioned `x-codex-turn-state` headers for HTTP bridge continuity, MUST track live bridge ownership durably enough to detect true wrong-instance conflicts across replicas, and MUST recover stale local bridge state by creating a fresh bridge session when the replayed request does not require `previous_response_id` continuity. + +#### Scenario: replayed turn-state with live owner on another instance fails closed +- **WHEN** a client replays a valid HTTP bridge turn-state token +- **AND** the durable bridge lease shows a different live owner instance +- **THEN** the service fails the request fast with `bridge_wrong_instance` +- **AND** it MUST NOT create a fresh local bridge session for that token on the wrong instance + +#### Scenario: replayed turn-state with expired bridge and no prior-response dependency recovers +- **WHEN** a client replays a valid HTTP bridge turn-state token +- **AND** no live bridge lease exists for that token +- **AND** the request does not include `previous_response_id` +- **THEN** the service creates a fresh bridge session instead of failing with `bridge_instance_mismatch` + +#### Scenario: replayed turn-state with expired bridge and prior-response dependency fails clearly +- **WHEN** a client replays a valid HTTP bridge turn-state token +- **AND** no live bridge lease exists for that token +- **AND** the request includes `previous_response_id` +- **THEN** the service fails the request with `bridge_session_expired` + +#### Scenario: malformed or forged turn-state token is rejected +- **WHEN** a client sends a replayed HTTP bridge turn-state token that cannot be validated +- **THEN** the service fails the request with `bridge_token_invalid` diff --git a/openspec/changes/durable-http-bridge-ownership/tasks.md b/openspec/changes/durable-http-bridge-ownership/tasks.md new file mode 100644 index 00000000..3bd10a80 --- /dev/null +++ b/openspec/changes/durable-http-bridge-ownership/tasks.md @@ -0,0 +1,18 @@ +## 1. Spec + +- [x] 1.1 Add Responses HTTP bridge requirements for signed turn-state tokens and durable live-owner tracking +- [x] 1.2 Update bridge context and ops notes for stale recovery and new error codes +- [x] 1.3 Validate OpenSpec changes + +## 2. Tests + +- [x] 2.1 Update HTTP bridge tests that currently expect missing local aliases to fail with `bridge_instance_mismatch` +- [x] 2.2 Add regression coverage for `bridge_wrong_instance`, `bridge_session_expired`, and `bridge_token_invalid` +- [x] 2.3 Add coverage for stale-session recovery without `previous_response_id` + +## 3. Implementation + +- [x] 3.1 Add durable HTTP bridge lease storage and repository wiring +- [x] 3.2 Issue signed HTTP bridge turn-state tokens and validate replayed tokens +- [x] 3.3 Recover stale bridge sessions when continuity is not required +- [x] 3.4 Preserve fail-closed behavior only for true live owner mismatches diff --git a/openspec/specs/responses-api-compat/context.md b/openspec/specs/responses-api-compat/context.md index 58ad7e3f..11508cd5 100644 --- a/openspec/specs/responses-api-compat/context.md +++ b/openspec/specs/responses-api-compat/context.md @@ -22,7 +22,8 @@ See `openspec/specs/responses-api-compat/spec.md` for normative requirements. - `previous_response_id` is forwarded when `conversation` is absent, but the `conversation + previous_response_id` conflict remains rejected. - HTTP `/v1/responses` and HTTP `/backend-api/codex/responses` now use a server-side upstream websocket session bridge by default so repeated compatible requests can keep upstream response/session continuity without forcing clients onto the public websocket route. - Codex-affinity HTTP bridge sessions can optionally use a conservative first-request prewarm (`generate=false`), but that behavior now stays behind an explicit flag so production defaults do not pay an extra upstream request unless operators opt in. -- When operators configure a multi-instance bridge ring, each stable bridge key now has a deterministic owner replica; non-owner replicas fail closed with `bridge_instance_mismatch` instead of silently creating fragmented continuity on the wrong host. Unstable per-request bridge keys remain local and are allowed on any replica because there is no continuity to preserve. +- When operators configure a multi-instance bridge ring, stable non-turn-state bridge keys still have a deterministic owner replica and non-owner replicas fail closed with `bridge_wrong_instance`. +- HTTP bridge turn-state continuity now uses signed turn-state tokens plus a durable live-owner lease. A replayed token can recover on stale or expired local state when `previous_response_id` continuity is not required, but true live-owner conflicts still fail closed with `bridge_wrong_instance`. - Codex-facing websocket routes now advertise `x-codex-turn-state` during websocket accept and honor client-provided turn-state on reconnect so routing can stay sticky at turn granularity even when the public websocket reconnects. - HTTP responses routes now also return `x-codex-turn-state` headers so clients that persist response headers can promote later HTTP requests from prompt-cache affinity to stronger Codex-session continuity. - `/v1/responses/compact` keeps a final-JSON contract and preserves the raw upstream `/codex/responses/compact` payload shape as the canonical next context window instead of rewriting it through buffered `/codex/responses` streaming. @@ -46,8 +47,9 @@ See `openspec/specs/responses-api-compat/spec.md` for normative requirements. - **Stream ends without terminal event:** Emit `response.failed` with `stream_incomplete`. - **Upstream error / no accounts:** Non-streaming responses return an OpenAI error envelope with 5xx status. - **Compact upstream transport/client failure:** Retry only inside `/codex/responses/compact` when the failure is safely retryable; otherwise return an explicit upstream error without surrogate fallback. -- **HTTP bridge session closes or expires:** The next compatible HTTP `/v1/responses` or `/backend-api/codex/responses` request recreates a fresh upstream websocket bridge session; continuity is guaranteed only within the lifetime of one active bridged session. -- **Multi-instance routing without bridge owner policy:** if operators do not configure a bridge ring or front-door affinity, continuity can still fragment across replicas; with a configured bridge ring, wrong-replica requests now fail closed instead of silently forking bridge state. +- **HTTP bridge session closes or expires:** The next compatible HTTP `/v1/responses` or `/backend-api/codex/responses` request recreates a fresh upstream websocket bridge session when continuity is optional. Requests that still depend on `previous_response_id` fail early with `bridge_session_expired`; continuity is guaranteed only within the lifetime of one active bridged session. +- **Replayed turn-state token is invalid or cross-scoped:** the proxy fails with `bridge_token_invalid`. +- **Multi-instance routing without bridge owner policy:** if operators do not configure a bridge ring or front-door affinity, continuity can still fragment across replicas; with a configured bridge ring and durable bridge leases, true wrong-replica requests fail closed instead of silently forking bridge state. - **Codex websocket reconnects:** Reconnect continuity now depends on the client replaying the accepted `x-codex-turn-state`; generated turn-state is emitted on accept for backend Codex routes and echoed back when the client already supplies one. - **Websocket handshake forbidden/not-found:** Auto transport now fails loud on `403` / `404` instead of silently hiding the websocket regression behind HTTP fallback. - **Invalid request payloads:** Return 4xx with `invalid_request_error`. diff --git a/openspec/specs/responses-api-compat/ops.md b/openspec/specs/responses-api-compat/ops.md index 87eb1eb2..60b382a9 100644 --- a/openspec/specs/responses-api-compat/ops.md +++ b/openspec/specs/responses-api-compat/ops.md @@ -284,13 +284,15 @@ If you deploy multiple replicas behind a load balancer, configure front-door aff Without front-door affinity, each replica will maintain its own in-memory bridge pool and HTTP continuity can fragment across instances. -If you cannot guarantee front-door affinity, configure the deterministic bridge instance ring so the proxy can fail closed with `bridge_instance_mismatch` rather than silently creating a second bridge on the wrong replica. +If you cannot guarantee front-door affinity, configure the deterministic bridge instance ring and keep the durable bridge-lease table healthy so the proxy can fail closed with `bridge_wrong_instance` on true live-owner conflicts rather than silently creating a second bridge on the wrong replica. ### Failure interpretation - `queue_full`: one bridge key is overloaded; increase bridge capacity carefully or reduce per-session concurrency upstream. - `capacity_exhausted_active_sessions`: the bridge pool hit `max_sessions` while every existing session still had pending work. The proxy intentionally refused the new request with `429` instead of evicting an active session. Mitigate by increasing pool size carefully, reducing concurrent bridge fan-out, or improving front-door affinity so related calls land on the same replica. -- `owner_mismatch` / `bridge_instance_mismatch`: deterministic replica ownership is enabled for a stable bridge key and the request landed on the wrong instance. Fix ingress affinity or route the stable bridge key to the logged owner instance. Requests that only have an unstable per-request bridge key are intentionally exempt from owner enforcement. +- `owner_mismatch` / `bridge_wrong_instance`: either deterministic replica ownership rejected a stable non-turn-state key on the wrong instance, or a replayed signed turn-state still has a live lease on another instance. Fix ingress affinity or route the bridge key to the logged owner instance. +- `bridge_session_expired`: the replayed turn-state no longer has a live bridge session and the request still required `previous_response_id` continuity. Drop the stale `x-codex-turn-state` and start a fresh turn. +- `bridge_token_invalid`: the replayed signed turn-state could not be validated or belonged to another API key scope. - `reconnect`: the bridge recreated an upstream websocket before response creation and retried once. - `terminal_error` with `previous_response_not_found`: continuity was already broken upstream; inspect replica affinity, bridge eviction timing, or upstream resets. - plain `transport = "http"` request logs are still expected for bridged HTTP requests; the internal upstream websocket does not change external transport accounting. diff --git a/openspec/specs/responses-api-compat/spec.md b/openspec/specs/responses-api-compat/spec.md index 8d4750de..13c25d72 100644 --- a/openspec/specs/responses-api-compat/spec.md +++ b/openspec/specs/responses-api-compat/spec.md @@ -254,13 +254,34 @@ When serving HTTP `/v1/responses` or HTTP `/backend-api/codex/responses`, the se - **THEN** the service sends one internal `response.create` prewarm with `generate=false` before the client-visible request - **AND** the client-visible response contract remains unchanged -#### Scenario: bridge enforces deterministic owner instance only for stable bridge keys +#### Scenario: bridge enforces deterministic owner instance only for stable non-turn-state keys - **WHEN** operators configure multiple eligible bridge instance ids -- **AND** a request uses a stable bridge key derived from turn-state, session header, or prompt-cache key +- **AND** a request uses a stable bridge key derived from a session header or prompt-cache key - **AND** that request lands on a non-owner instance -- **THEN** the service fails the request fast with `bridge_instance_mismatch` +- **THEN** the service fails the request fast with `bridge_wrong_instance` - **AND** it MUST NOT create a fresh local bridge session for that key on the wrong instance +#### Scenario: replayed signed turn-state fails only for a true live-owner mismatch +- **WHEN** a client replays a signed HTTP bridge `x-codex-turn-state` +- **AND** the durable bridge lease shows another live owner instance +- **THEN** the service fails the request fast with `bridge_wrong_instance` + +#### Scenario: replayed signed turn-state without a live lease recovers when continuity is optional +- **WHEN** a client replays a signed HTTP bridge `x-codex-turn-state` +- **AND** no live bridge lease exists for that turn-state +- **AND** the request does not include `previous_response_id` +- **THEN** the service creates a fresh bridge session and returns a fresh signed `x-codex-turn-state` + +#### Scenario: replayed turn-state without a live lease fails clearly when continuity is required +- **WHEN** a client replays an HTTP bridge `x-codex-turn-state` +- **AND** no live bridge lease exists for that turn-state +- **AND** the request includes `previous_response_id` +- **THEN** the service fails the request with `bridge_session_expired` + +#### Scenario: malformed or cross-scope signed turn-state is rejected +- **WHEN** a client sends a signed HTTP bridge `x-codex-turn-state` that cannot be validated or belongs to another API key scope +- **THEN** the service fails the request with `bridge_token_invalid` + ### Requirement: Websocket responses advertise and honor Codex turn-state affinity When serving websocket Responses endpoints, the service MUST advertise an `x-codex-turn-state` header during websocket accept. If the client reconnects and presents that same `x-codex-turn-state`, the service MUST treat it as the highest-priority Codex-affinity key for upstream routing on that websocket turn. On `/v1/responses`, a proxy-generated turn-state MUST NOT override the first request's prompt-cache routing unless the client explicitly sends the turn-state back. diff --git a/tests/integration/test_http_responses_bridge.py b/tests/integration/test_http_responses_bridge.py index b8f00635..4e080073 100644 --- a/tests/integration/test_http_responses_bridge.py +++ b/tests/integration/test_http_responses_bridge.py @@ -6,19 +6,21 @@ import time from collections import deque from collections.abc import AsyncGenerator +from datetime import timedelta from types import SimpleNamespace from typing import cast import anyio import pytest import pytest_asyncio -from sqlalchemy import select +from sqlalchemy import delete, select import app.modules.proxy.service as proxy_module from app.core.utils.request_id import reset_request_id, set_request_id -from app.db.models import Account, AccountStatus +from app.db.models import Account, AccountStatus, HttpBridgeLease from app.db.session import SessionLocal from app.dependencies import get_proxy_service_for_app +from app.modules.proxy.bridge_repository import HttpBridgeLeasesRepository from app.modules.proxy.load_balancer import AccountSelection pytestmark = pytest.mark.integration @@ -26,19 +28,21 @@ @pytest_asyncio.fixture(autouse=True) async def _cleanup_http_bridge_sessions(app_instance): + async with SessionLocal() as session: + await session.execute(delete(HttpBridgeLease)) + await session.commit() yield service = get_proxy_service_for_app(app_instance) async with service._http_bridge_lock: sessions = list(service._http_bridge_sessions.values()) - inflight_sessions = list(service._http_bridge_inflight_sessions.values()) service._http_bridge_sessions.clear() - service._http_bridge_inflight_sessions.clear() service._http_bridge_turn_state_index.clear() for session in sessions: + session.bridge_session_id = "" await service._close_http_bridge_session(session) - for inflight_future in inflight_sessions: - if not inflight_future.done(): - inflight_future.cancel() + async with SessionLocal() as session: + await session.execute(delete(HttpBridgeLease)) + await session.commit() def _encode_jwt(payload: dict) -> str: @@ -848,11 +852,11 @@ async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_se exc = exc_info.value assert exc.status_code == 409 - assert exc.payload["error"].get("code") == "bridge_instance_mismatch" + assert exc.payload["error"].get("code") == "bridge_wrong_instance" @pytest.mark.asyncio -async def test_v1_responses_http_bridge_missing_turn_state_alias_with_previous_response_id_fails_closed( +async def test_v1_responses_http_bridge_unsigned_legacy_turn_state_preserves_previous_response_compatibility( app_instance, monkeypatch, ): @@ -890,6 +894,95 @@ async def test_v1_responses_http_bridge_missing_turn_state_alias_with_previous_r } +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_unsigned_legacy_turn_state_recovery_forwards_upstream_token( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_legacy_rebuild_forwarding", + "http-bridge-legacy-rebuild-forwarding@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + connect_headers_seen: list[dict[str, str]] = [] + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del access_token, account_id_header, base_url, session + connect_headers_seen.append(dict(headers)) + return fake_upstream + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + + legacy_turn_state = "http_turn_legacy_rebuild_forwarding" + session = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", legacy_turn_state, None), + headers={"x-codex-turn-state": legacy_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=legacy_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + assert session.key.affinity_kind == "turn_state_header" + assert session.key.affinity_key == legacy_turn_state + assert connect_headers_seen[-1]["x-codex-turn-state"] == legacy_turn_state + + @pytest.mark.asyncio async def test_v1_responses_http_bridge_replayed_turn_state_alias_preserves_owner_and_promotes_session( async_client, @@ -1015,11 +1108,19 @@ async def fake_connect_responses_websocket( ) await service._register_http_bridge_turn_state(session, replay_turn_state) replay_key = proxy_module._HTTPBridgeSessionKey("turn_state_header", replay_turn_state, None) + async with SessionLocal() as db_session: + lease = ( + await db_session.execute( + select(HttpBridgeLease).where(HttpBridgeLease.session_id == session.bridge_session_id) + ) + ).scalar_one() + assert lease.affinity_kind == "turn_state_header" + assert lease.affinity_key == replay_turn_state assert ( service._http_bridge_turn_state_index[ proxy_module._http_bridge_turn_state_alias_key(replay_turn_state, session.key.api_key_id) ] - == key + == replay_key ) replayed = await service._get_or_create_http_bridge_session( @@ -1033,14 +1134,14 @@ async def fake_connect_responses_websocket( ) assert replayed is session - assert replayed.key == key - assert service._http_bridge_sessions[key] is session - assert replay_key not in service._http_bridge_sessions + assert replayed.key == replay_key + assert service._http_bridge_sessions[replay_key] is session + assert key not in service._http_bridge_sessions assert ( service._http_bridge_turn_state_index[ proxy_module._http_bridge_turn_state_alias_key(replay_turn_state, session.key.api_key_id) ] - == key + == replay_key ) assert replayed.codex_session is True assert replayed.affinity.kind == proxy_module.StickySessionKind.CODEX_SESSION @@ -1056,48 +1157,12 @@ async def fake_connect_responses_websocket( started_at=time.monotonic(), ) await service._reconnect_http_bridge_session(replayed, request_state=request_state) - assert connect_headers_seen[-1]["x-codex-turn-state"] == replay_turn_state + assert connect_headers_seen[-1]["x-codex-turn-state"] == "upstream_turn_state_stale" await service._close_http_bridge_session(session) @pytest.mark.asyncio -async def test_v1_responses_http_bridge_waits_for_inflight_recreation_on_missing_turn_state_alias(app_instance): - service = get_proxy_service_for_app(app_instance) - service._http_bridge_sessions.clear() - service._http_bridge_turn_state_index.clear() - service._http_bridge_inflight_sessions.clear() - - replay_turn_state = "http_turn_inflight_replay" - replay_key = proxy_module._HTTPBridgeSessionKey("turn_state_header", replay_turn_state, None) - expected_session = _make_dummy_bridge_session(replay_key) - inflight_future: asyncio.Future[SimpleNamespace] = asyncio.get_running_loop().create_future() - service._http_bridge_inflight_sessions[replay_key] = inflight_future - - request_key = proxy_module._HTTPBridgeSessionKey("request", "derived-key", None) - try: - waiter = asyncio.create_task( - service._get_or_create_http_bridge_session( - request_key, - headers={"x-codex-turn-state": replay_turn_state}, - affinity=proxy_module._AffinityPolicy(key="derived-key"), - api_key=None, - request_model="gpt-5.4", - idle_ttl_seconds=120.0, - max_sessions=8, - ) - ) - await asyncio.sleep(0) - assert not waiter.done() - inflight_future.set_result(expected_session) - returned = await waiter - finally: - service._http_bridge_inflight_sessions.clear() - - assert returned is expected_session - - -@pytest.mark.asyncio -async def test_v1_responses_http_bridge_generated_turn_state_fails_closed_without_local_alias( +async def test_v1_responses_http_bridge_unsigned_legacy_turn_state_uses_owner_routing_without_local_alias( async_client, app_instance, monkeypatch, @@ -1113,8 +1178,8 @@ async def test_v1_responses_http_bridge_generated_turn_state_fails_closed_withou "acc_http_bridge_missing_alias", "http-bridge-missing-alias@example.com", ) - account = await _get_account(account_id) service = get_proxy_service_for_app(app_instance) + account = await _get_account(account_id) async def fake_select_account_with_budget( self, @@ -1151,12 +1216,27 @@ async def fake_select_account_with_budget( monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + legacy_turn_state = next( + candidate + for candidate in ( + "http_turn_missing_alias_a", + "http_turn_missing_alias_b", + "http_turn_missing_alias_c", + "http_turn_missing_alias_d", + ) + if proxy_module._http_bridge_owner_instance( + proxy_module._HTTPBridgeSessionKey("turn_state_header", candidate, None), + proxy_module.get_settings(), + ) + == "instance-b" + ) + with pytest.raises(proxy_module.ProxyResponseError) as exc_info: await service._get_or_create_http_bridge_session( - proxy_module._HTTPBridgeSessionKey("turn_state_header", "http_turn_missing_alias", None), - headers={"x-codex-turn-state": "http_turn_missing_alias"}, + proxy_module._HTTPBridgeSessionKey("turn_state_header", legacy_turn_state, None), + headers={"x-codex-turn-state": legacy_turn_state}, affinity=proxy_module._AffinityPolicy( - key="http_turn_missing_alias", + key=legacy_turn_state, kind=proxy_module.StickySessionKind.CODEX_SESSION, ), api_key=None, @@ -1167,24 +1247,36 @@ async def fake_select_account_with_budget( exc = exc_info.value assert exc.status_code == 409 - assert exc.payload["error"].get("code") == "bridge_instance_mismatch" + assert exc.payload["error"].get("code") == "bridge_wrong_instance" @pytest.mark.asyncio -async def test_v1_responses_http_bridge_turn_state_alias_respects_api_key_isolation( +async def test_v1_responses_http_bridge_signed_turn_state_missing_local_alias_recovers_fresh_session( async_client, app_instance, monkeypatch, ): - _install_bridge_settings_with_limits(monkeypatch, enabled=True) + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-a", + instance_ring=["instance-a", "instance-b"], + ) account_id = await _import_account( async_client, - "acc_http_bridge_api_key_alias", - "http-bridge-api-key-alias@example.com", + "acc_http_bridge_missing_signed_alias", + "http-bridge-missing-signed-alias@example.com", ) account = await _get_account(account_id) service = get_proxy_service_for_app(app_instance) fake_upstream = _FakeBridgeUpstreamWebSocket() + connect_headers_seen: list[dict[str, str]] = [] + session_id = "hbs_signed_missing_alias" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session_id, + owner_instance_id="instance-a", + api_key_id=None, + ) async def fake_select_account_with_budget( self, @@ -1231,82 +1323,115 @@ async def fake_connect_responses_websocket( base_url=None, session=None, ): - del headers, access_token, account_id_header, base_url, session + del access_token, account_id_header, base_url, session + connect_headers_seen.append(dict(headers)) return fake_upstream monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + monkeypatch.setattr(proxy_module, "_http_bridge_current_owner_id", lambda settings: "instance-a@222") + monkeypatch.setattr(proxy_module, "_http_bridge_process_exists", lambda pid: False) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + await db_session.commit() + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="turn_state_header", + affinity_key=signed_turn_state, + api_key_scope="", + owner_instance_id="instance-a", + lease_expires_at=proxy_module._http_bridge_lease_expires_at(120.0), + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=signed_turn_state, + ) - payload = proxy_module.ResponsesRequest( - model="gpt-5.1", - instructions="Return exactly OK.", - input="hello", - prompt_cache_key="api-key-alias-thread", - ) - affinity = proxy_module._sticky_key_for_responses_request( - payload, - {}, - codex_session_affinity=False, - openai_cache_affinity=True, - openai_cache_affinity_max_age_seconds=300, - sticky_threads_enabled=False, + session = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, ) - api_key_a = cast(proxy_module.ApiKeyData, SimpleNamespace(id="api-key-a")) - session = await service._get_or_create_http_bridge_session( - proxy_module._make_http_bridge_session_key( - payload, - headers={}, - affinity=affinity, - api_key=api_key_a, - request_id="req_api_key_alias", + + assert session.key.affinity_kind == "turn_state_header" + assert session.bridge_session_id != session_id + assert session.key.affinity_key != signed_turn_state + recovered_token = service._decode_http_bridge_turn_state(session.key.affinity_key, api_key_id=None) + assert recovered_token is not None + assert recovered_token.session_id == session.bridge_session_id + assert proxy_module._http_bridge_owner_instance_group(recovered_token.owner_instance_id) == "instance-a" + assert connect_headers_seen[-1].get("x-codex-turn-state") is None + + replayed = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", session.key.affinity_key, None), + headers={"x-codex-turn-state": session.key.affinity_key}, + affinity=proxy_module._AffinityPolicy( + key=session.key.affinity_key, + kind=proxy_module.StickySessionKind.CODEX_SESSION, ), - headers={}, - affinity=affinity, - api_key=api_key_a, - request_model=payload.model, + api_key=None, + request_model="gpt-5.1", idle_ttl_seconds=120.0, max_sessions=128, ) - await service._register_http_bridge_turn_state(session, "http_turn_api_key_alias") - with pytest.raises(proxy_module.ProxyResponseError) as exc_info: - await service._get_or_create_http_bridge_session( - proxy_module._HTTPBridgeSessionKey("turn_state_header", "http_turn_api_key_alias", "api-key-b"), - headers={"x-codex-turn-state": "http_turn_api_key_alias"}, - affinity=proxy_module._AffinityPolicy( - key="http_turn_api_key_alias", - kind=proxy_module.StickySessionKind.CODEX_SESSION, - ), - api_key=cast(proxy_module.ApiKeyData, SimpleNamespace(id="api-key-b")), - request_model=payload.model, - idle_ttl_seconds=120.0, - max_sessions=128, - ) + async with SessionLocal() as db_session: + stale_lease = ( + await db_session.execute(select(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + ).scalar_one_or_none() + new_lease = ( + await db_session.execute( + select(HttpBridgeLease).where(HttpBridgeLease.session_id == session.bridge_session_id) + ) + ).scalar_one() - assert isinstance(exc_info.value, proxy_module.ProxyResponseError) - exc = exc_info.value - assert exc.status_code == 409 - assert exc.payload["error"].get("code") == "bridge_instance_mismatch" - await service._close_http_bridge_session(session) + assert replayed is session + assert connect_headers_seen and len(connect_headers_seen) == 1 + assert stale_lease is None + assert new_lease.affinity_kind == "turn_state_header" + assert new_lease.affinity_key == session.key.affinity_key @pytest.mark.asyncio -async def test_v1_responses_http_bridge_preserves_prior_turn_state_aliases( +async def test_v1_responses_http_bridge_signed_turn_state_recovery_does_not_alias_stale_token_when_delete_fails( async_client, app_instance, monkeypatch, ): - _install_bridge_settings_with_limits(monkeypatch, enabled=True) + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-a", + instance_ring=["instance-a", "instance-b"], + ) account_id = await _import_account( async_client, - "acc_http_bridge_alias_preserve", - "http-bridge-alias-preserve@example.com", + "acc_http_bridge_stale_alias_preferred", + "http-bridge-stale-alias-preferred@example.com", ) account = await _get_account(account_id) service = get_proxy_service_for_app(app_instance) fake_upstream = _FakeBridgeUpstreamWebSocket() + connect_headers_seen: list[dict[str, str]] = [] + session_id = "hbs_signed_stale_alias_preferred" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session_id, + owner_instance_id="instance-a@111", + api_key_id=None, + ) async def fake_select_account_with_budget( self, @@ -1353,81 +1478,2512 @@ async def fake_connect_responses_websocket( base_url=None, session=None, ): - del headers, access_token, account_id_header, base_url, session + del access_token, account_id_header, base_url, session + connect_headers_seen.append(dict(headers)) return fake_upstream + async def flaky_delete_http_bridge_lease(self, stale_session_id): + del self + if stale_session_id == session_id: + raise RuntimeError("stale delete failed") + return None + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + monkeypatch.setattr(proxy_module.ProxyService, "_delete_http_bridge_lease", flaky_delete_http_bridge_lease) + monkeypatch.setattr(proxy_module, "_http_bridge_current_owner_id", lambda settings: "instance-a@222") + monkeypatch.setattr(proxy_module, "_http_bridge_process_exists", lambda pid: False) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + await db_session.commit() + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="turn_state_header", + affinity_key=signed_turn_state, + api_key_scope="", + owner_instance_id="instance-a@111", + lease_expires_at=proxy_module._http_bridge_lease_expires_at(120.0), + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=signed_turn_state, + ) - payload = proxy_module.ResponsesRequest( - model="gpt-5.1", - instructions="Return exactly OK.", - input="hello", - prompt_cache_key="alias-preserve-thread", - ) - affinity = proxy_module._sticky_key_for_responses_request( - payload, - {}, - codex_session_affinity=False, - openai_cache_affinity=True, - openai_cache_affinity_max_age_seconds=300, - sticky_threads_enabled=False, - api_key=None, - ) session = await service._get_or_create_http_bridge_session( - proxy_module._make_http_bridge_session_key( - payload, - headers={}, - affinity=affinity, - api_key=None, - request_id="req_alias_preserve", + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, ), - headers={}, - affinity=affinity, api_key=None, - request_model=payload.model, + request_model="gpt-5.1", idle_ttl_seconds=120.0, max_sessions=128, ) - - await service._register_http_bridge_turn_state(session, "http_turn_alias_a") - await service._register_http_bridge_turn_state(session, "http_turn_alias_b") - replayed = await service._get_or_create_http_bridge_session( - proxy_module._HTTPBridgeSessionKey("turn_state_header", "http_turn_alias_a", None), - headers={"x-codex-turn-state": "http_turn_alias_a"}, + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, affinity=proxy_module._AffinityPolicy( - key="http_turn_alias_a", + key=signed_turn_state, kind=proxy_module.StickySessionKind.CODEX_SESSION, ), api_key=None, - request_model=payload.model, + request_model="gpt-5.1", idle_ttl_seconds=120.0, max_sessions=128, ) - assert replayed is session - assert "http_turn_alias_a" in replayed.downstream_turn_state_aliases - assert "http_turn_alias_b" in replayed.downstream_turn_state_aliases + async with SessionLocal() as db_session: + stale_lease = ( + await db_session.execute(select(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + ).scalar_one_or_none() + + assert replayed is not session + assert stale_lease is not None + assert len(connect_headers_seen) == 2 await service._close_http_bridge_session(session) + await service._close_http_bridge_session(replayed) @pytest.mark.asyncio -async def test_v1_responses_http_bridge_close_waits_for_turn_state_index_lock( +async def test_v1_responses_http_bridge_signed_turn_state_live_lease_from_restarted_worker_recovers_on_same_instance( async_client, app_instance, monkeypatch, ): - _install_bridge_settings_with_limits(monkeypatch, enabled=True) + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-a", + instance_ring=["instance-a", "instance-b"], + ) + account_id = await _import_account( + async_client, + "acc_http_bridge_restarted_worker_recovery", + "http-bridge-restarted-worker-recovery@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + session_id = "hbs_signed_restarted_worker" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session_id, + owner_instance_id="instance-a@111", + api_key_id=None, + ) + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + return fake_upstream + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + monkeypatch.setattr(proxy_module, "_http_bridge_current_owner_id", lambda settings: "instance-a@222") + monkeypatch.setattr(proxy_module, "_http_bridge_process_exists", lambda pid: False) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + await db_session.commit() + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="turn_state_header", + affinity_key=signed_turn_state, + api_key_scope="", + owner_instance_id="instance-a@111", + lease_expires_at=proxy_module._http_bridge_lease_expires_at(120.0), + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=signed_turn_state, + ) + + recovered = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + assert recovered.bridge_session_id != session_id + assert proxy_module._http_bridge_owner_instance_group(recovered.owner_instance_id) == "instance-a" + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_signed_turn_state_live_lease_from_reused_pid_recovers_on_same_instance( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-a", + instance_ring=["instance-a", "instance-b"], + ) + account_id = await _import_account( + async_client, + "acc_http_bridge_reused_pid_recovery", + "http-bridge-reused-pid-recovery@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + session_id = "hbs_signed_reused_pid_worker" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session_id, + owner_instance_id="instance-a@111:old-start", + api_key_id=None, + ) + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + return fake_upstream + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + monkeypatch.setattr(proxy_module, "_http_bridge_current_owner_id", lambda settings: "instance-a@222:current-start") + monkeypatch.setattr( + proxy_module, + "_http_bridge_process_start_marker", + lambda pid: "reused-start" if pid == 111 else None, + ) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + await db_session.commit() + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="turn_state_header", + affinity_key=signed_turn_state, + api_key_scope="", + owner_instance_id="instance-a@111:old-start", + lease_expires_at=proxy_module._http_bridge_lease_expires_at(120.0), + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=signed_turn_state, + ) + + recovered = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + assert recovered.bridge_session_id != session_id + assert proxy_module._http_bridge_owner_instance_group(recovered.owner_instance_id) == "instance-a" + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_live_lease_lookup_does_not_delete_concurrently_refreshed_row( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_live_lease_race", + "http-bridge-live-lease-race@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + session_id = "hbs_bridge_live_lease_race" + original_expiry = proxy_module.utcnow() - timedelta(seconds=1) + refreshed_expiry = proxy_module.utcnow() + timedelta(seconds=120) + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="turn_state_header", + affinity_key="signed-state", + api_key_scope="", + owner_instance_id="instance-a", + lease_expires_at=original_expiry, + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state="signed-state", + ) + lease = await repos.http_bridge_leases.get_by_session_id(session_id) + assert lease is not None + stale_expiry = lease.lease_expires_at + await repos.http_bridge_leases.touch( + session_id, + affinity_kind="turn_state_header", + affinity_key="signed-state", + api_key_scope="", + owner_instance_id="instance-a", + lease_expires_at=refreshed_expiry, + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state="signed-state", + ) + deleted = await repos.http_bridge_leases.delete_if_expires_at( + session_id, + lease_expires_at=stale_expiry, + ) + async with SessionLocal() as db_session: + remaining = ( + await db_session.execute(select(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + ).scalar_one_or_none() + + assert deleted is False + assert remaining is not None + assert proxy_module.to_utc_naive(remaining.lease_expires_at) == proxy_module.to_utc_naive(refreshed_expiry) + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_live_lease_lookup_rereads_after_refresh_wins_delete_race( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_live_lease_reread", + "http-bridge-live-lease-reread@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + session_id = "hbs_bridge_live_lease_reread" + original_expiry = proxy_module.utcnow() - timedelta(seconds=1) + refreshed_expiry = proxy_module.utcnow() + timedelta(seconds=120) + original_delete_if_expires_at = HttpBridgeLeasesRepository.delete_if_expires_at + + async def fake_delete_if_expires_at(self, session_id_arg, *, lease_expires_at): + row = await self.get_by_session_id(session_id_arg) + assert row is not None + await self.touch( + session_id_arg, + affinity_kind=row.affinity_kind, + affinity_key=row.affinity_key, + api_key_scope=row.api_key_scope, + owner_instance_id=row.owner_instance_id, + lease_expires_at=refreshed_expiry, + account_id=row.account_id, + request_model=row.request_model, + codex_session=row.codex_session, + idle_ttl_seconds=row.idle_ttl_seconds, + upstream_turn_state=row.upstream_turn_state, + downstream_turn_state=row.downstream_turn_state, + ) + return False + + monkeypatch.setattr(HttpBridgeLeasesRepository, "delete_if_expires_at", fake_delete_if_expires_at) + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="turn_state_header", + affinity_key="signed-state", + api_key_scope="", + owner_instance_id="instance-a", + lease_expires_at=original_expiry, + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state="signed-state", + ) + + snapshot = await service._get_live_http_bridge_lease(session_id) + + monkeypatch.setattr(HttpBridgeLeasesRepository, "delete_if_expires_at", original_delete_if_expires_at) + + assert snapshot is not None + assert snapshot.session_id == session_id + assert proxy_module.to_utc_naive(snapshot.lease_expires_at) == proxy_module.to_utc_naive(refreshed_expiry) + + +@pytest.mark.asyncio +async def test_http_bridge_leases_claim_allows_only_one_stale_replacement(): + stale_expiry = proxy_module.utcnow() + timedelta(seconds=120) + + async with SessionLocal() as session: + repo = HttpBridgeLeasesRepository(session) + await repo.upsert( + session_id="hbs_stale_original", + affinity_kind="prompt_cache", + affinity_key="stable-claim-key", + api_key_scope="", + owner_instance_id="instance-a", + lease_expires_at=stale_expiry, + account_id=None, + request_model="gpt-5.1", + codex_session=False, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=None, + ) + + async with SessionLocal() as session_one: + repo_one = HttpBridgeLeasesRepository(session_one) + claimed_one = await repo_one.claim( + session_id="hbs_claim_one", + affinity_kind="prompt_cache", + affinity_key="stable-claim-key", + api_key_scope="", + owner_instance_id="instance-a@worker-1", + lease_expires_at=stale_expiry, + account_id=None, + request_model="gpt-5.1", + codex_session=False, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=None, + replace_session_id="hbs_stale_original", + expires_before=proxy_module.utcnow(), + ) + + async with SessionLocal() as session_two: + repo_two = HttpBridgeLeasesRepository(session_two) + claimed_two = await repo_two.claim( + session_id="hbs_claim_two", + affinity_kind="prompt_cache", + affinity_key="stable-claim-key", + api_key_scope="", + owner_instance_id="instance-a@worker-2", + lease_expires_at=stale_expiry, + account_id=None, + request_model="gpt-5.1", + codex_session=False, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=None, + replace_session_id="hbs_stale_original", + expires_before=proxy_module.utcnow(), + ) + + assert (claimed_one is None) != (claimed_two is None) + + async with SessionLocal() as session: + lease = ( + await session.execute( + select(HttpBridgeLease).where( + HttpBridgeLease.affinity_kind == "prompt_cache", + HttpBridgeLease.affinity_key == "stable-claim-key", + HttpBridgeLease.api_key_scope == "", + ) + ) + ).scalar_one() + + assert lease.session_id in {"hbs_claim_one", "hbs_claim_two"} + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_signed_turn_state_live_lease_on_other_worker_is_wrong_instance( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-a", + instance_ring=["instance-a", "instance-b"], + ) + account_id = await _import_account( + async_client, + "acc_http_bridge_worker_owner_mismatch", + "http-bridge-worker-owner-mismatch@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + session_id = "hbs_signed_worker_owner_mismatch" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session_id, + owner_instance_id="instance-a@worker-1", + api_key_id=None, + ) + monkeypatch.setattr(proxy_module, "_http_bridge_current_owner_id", lambda settings: "instance-a@worker-2") + monkeypatch.setattr(proxy_module, "_http_bridge_process_exists", lambda pid: True) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + await db_session.commit() + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="turn_state_header", + affinity_key=signed_turn_state, + api_key_scope="", + owner_instance_id="instance-a@worker-1", + lease_expires_at=proxy_module._http_bridge_lease_expires_at(120.0), + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=signed_turn_state, + ) + + with pytest.raises(proxy_module.ProxyResponseError) as exc_info: + await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + exc = exc_info.value + assert exc.status_code == 409 + assert exc.payload["error"].get("code") == "bridge_wrong_instance" + assert ( + exc.payload["error"].get("message") + == "HTTP responses session bridge turn-state is owned by another live instance" + ) + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_signed_turn_state_live_peer_with_unreadable_marker_is_wrong_instance( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-a", + instance_ring=["instance-a", "instance-b"], + ) + account_id = await _import_account( + async_client, + "acc_http_bridge_worker_owner_unreadable_marker", + "http-bridge-worker-owner-unreadable-marker@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + session_id = "hbs_signed_worker_owner_unreadable_marker" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session_id, + owner_instance_id="instance-a@111:old-start", + api_key_id=None, + ) + monkeypatch.setattr(proxy_module, "_http_bridge_current_owner_id", lambda settings: "instance-a@222:current-start") + monkeypatch.setattr(proxy_module, "_http_bridge_process_exists", lambda pid: True) + monkeypatch.setattr(proxy_module, "_http_bridge_process_start_marker", lambda pid: None) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + await db_session.commit() + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="turn_state_header", + affinity_key=signed_turn_state, + api_key_scope="", + owner_instance_id="instance-a@111:old-start", + lease_expires_at=proxy_module._http_bridge_lease_expires_at(120.0), + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=signed_turn_state, + ) + + with pytest.raises(proxy_module.ProxyResponseError) as exc_info: + await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + exc = exc_info.value + assert exc.status_code == 409 + assert exc.payload["error"].get("code") == "bridge_wrong_instance" + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_signed_turn_state_owner_mismatch_rekeys_recovered_session( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-a", + instance_ring=["instance-a", "instance-b"], + ) + account_id = await _import_account( + async_client, + "acc_http_bridge_rekey_recovered_signed_alias", + "http-bridge-rekey-recovered-signed-alias@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + stale_session_id = "hbs_signed_missing_alias_other_owner" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=stale_session_id, + owner_instance_id="instance-b", + api_key_id=None, + ) + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + return fake_upstream + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == stale_session_id)) + await db_session.commit() + + session = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + assert session.bridge_session_id != stale_session_id + assert session.key.affinity_kind == "turn_state_header" + assert session.key.affinity_key != signed_turn_state + recovered_token = service._decode_http_bridge_turn_state(session.key.affinity_key, api_key_id=None) + assert recovered_token is not None + assert recovered_token.session_id == session.bridge_session_id + assert proxy_module._http_bridge_owner_instance_group(recovered_token.owner_instance_id) == "instance-a" + + async with SessionLocal() as db_session: + lease = ( + await db_session.execute( + select(HttpBridgeLease).where(HttpBridgeLease.session_id == session.bridge_session_id) + ) + ).scalar_one() + assert proxy_module._http_bridge_owner_instance_group(lease.owner_instance_id) == "instance-a" + assert lease.affinity_kind == "turn_state_header" + assert lease.affinity_key == session.key.affinity_key + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_signed_turn_state_stale_owner_outside_ring_recovers( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-new", + instance_ring=[], + ) + account_id = await _import_account( + async_client, + "acc_http_bridge_missing_signed_alias_stale_owner", + "http-bridge-missing-signed-alias-stale-owner@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + stale_session_id = "hbs_signed_missing_alias_stale_owner" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=stale_session_id, + owner_instance_id="instance-old", + api_key_id=None, + ) + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + return fake_upstream + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == stale_session_id)) + await db_session.commit() + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=stale_session_id, + affinity_kind="prompt_cache", + affinity_key="stale-owner-thread", + api_key_scope="", + owner_instance_id="instance-old", + lease_expires_at=proxy_module._http_bridge_lease_expires_at(120.0), + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=signed_turn_state, + ) + + recovered = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + assert recovered.key.affinity_kind == "turn_state_header" + assert recovered.key.affinity_key != signed_turn_state + assert recovered.bridge_session_id != stale_session_id + assert recovered.affinity == proxy_module._AffinityPolicy( + key=recovered.key.affinity_key, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ) + assert recovered.codex_session is True + assert proxy_module._http_bridge_owner_instance_group(recovered.owner_instance_id) == "instance-new" + + async with SessionLocal() as db_session: + stale_lease = ( + await db_session.execute(select(HttpBridgeLease).where(HttpBridgeLease.session_id == stale_session_id)) + ).scalar_one_or_none() + new_lease = ( + await db_session.execute( + select(HttpBridgeLease).where(HttpBridgeLease.session_id == recovered.bridge_session_id) + ) + ).scalar_one() + + assert stale_lease is None + assert proxy_module._http_bridge_owner_instance_group(new_lease.owner_instance_id) == "instance-new" + assert new_lease.affinity_kind == "turn_state_header" + assert new_lease.affinity_key == recovered.key.affinity_key + await service._close_http_bridge_session(recovered) + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_signed_turn_state_recovery_rekeys_to_codex_affinity( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-a", + instance_ring=["instance-a", "instance-b"], + ) + account_id = await _import_account( + async_client, + "acc_http_bridge_missing_signed_alias_stable_affinity", + "http-bridge-missing-signed-alias-stable-affinity@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + upstreams = [ + _FakeBridgeUpstreamWebSocket(), + _FakeBridgeUpstreamWebSocket(), + _FakeBridgeUpstreamWebSocket(), + ] + connect_count = 0 + sticky_selections: list[tuple[str | None, object | None, bool, int | None]] = [] + session_id = "hbs_signed_missing_alias_stable_affinity" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session_id, + owner_instance_id="instance-a", + api_key_id=None, + ) + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + sticky_selections.append((sticky_key, sticky_kind, reallocate_sticky, sticky_max_age_seconds)) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + nonlocal connect_count + connect_count += 1 + return upstreams.pop(0) + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + await db_session.commit() + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="prompt_cache", + affinity_key="stable-affinity-thread", + api_key_scope="", + owner_instance_id="instance-a", + lease_expires_at=proxy_module._http_bridge_lease_expires_at(120.0), + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=signed_turn_state, + ) + + recovered = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + assert recovered.key.affinity_kind == "turn_state_header" + assert recovered.key.affinity_key != signed_turn_state + assert recovered.affinity == proxy_module._AffinityPolicy( + key=recovered.key.affinity_key, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ) + assert recovered.codex_session is True + assert recovered.idle_ttl_seconds == pytest.approx(900.0) + + replayed = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", recovered.key.affinity_key, None), + headers={"x-codex-turn-state": recovered.key.affinity_key}, + affinity=proxy_module._AffinityPolicy( + key=recovered.key.affinity_key, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + reused = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("prompt_cache", "stable-affinity-thread", None), + headers={}, + affinity=proxy_module._AffinityPolicy( + key="stable-affinity-thread", + kind=proxy_module.StickySessionKind.PROMPT_CACHE, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + request_state = proxy_module._WebSocketRequestState( + request_id="req-stable-affinity-reconnect", + model="gpt-5.1", + service_tier=None, + reasoning_effort=None, + api_key_reservation=None, + started_at=time.monotonic(), + awaiting_response_created=True, + request_text=json.dumps({"type": "response.create", "model": "gpt-5.1", "input": []}), + ) + await service._reconnect_http_bridge_session(recovered, request_state=request_state) + + assert replayed is recovered + assert reused is not recovered + assert reused.key == proxy_module._HTTPBridgeSessionKey("prompt_cache", "stable-affinity-thread", None) + assert connect_count == 3 + assert sticky_selections == [ + (recovered.key.affinity_key, proxy_module.StickySessionKind.CODEX_SESSION, False, None), + ("stable-affinity-thread", proxy_module.StickySessionKind.PROMPT_CACHE, False, None), + (recovered.key.affinity_key, proxy_module.StickySessionKind.CODEX_SESSION, False, None), + ] + await service._close_http_bridge_session(recovered) + await service._close_http_bridge_session(reused) + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_signed_turn_state_missing_local_alias_with_previous_response_expires( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-a", + instance_ring=["instance-a", "instance-b"], + ) + account_id = await _import_account( + async_client, + "acc_http_bridge_missing_signed_alias_previous", + "http-bridge-missing-signed-alias-previous@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + session_id = "hbs_signed_missing_alias_previous" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session_id, + owner_instance_id="instance-a", + api_key_id=None, + ) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + await db_session.commit() + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="turn_state_header", + affinity_key=signed_turn_state, + api_key_scope="", + owner_instance_id="instance-a", + lease_expires_at=proxy_module._http_bridge_lease_expires_at(120.0), + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=signed_turn_state, + ) + + with pytest.raises(proxy_module.ProxyResponseError) as exc_info: + await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + previous_response_id="resp_previous", + ) + + exc = exc_info.value + assert exc.status_code == 409 + assert exc.payload["error"]["code"] == "bridge_session_expired" + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_recovered_stale_turn_state_with_previous_response_expires( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-a", + instance_ring=["instance-a", "instance-b"], + ) + account_id = await _import_account( + async_client, + "acc_http_bridge_recovered_signed_alias_previous", + "http-bridge-recovered-signed-alias-previous@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + connect_headers_seen: list[dict[str, str]] = [] + session_id = "hbs_recovered_signed_alias_previous" + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session_id, + owner_instance_id="instance-a", + api_key_id=None, + ) + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del access_token, account_id_header, base_url, session + connect_headers_seen.append(dict(headers)) + return fake_upstream + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + monkeypatch.setattr(proxy_module, "_http_bridge_current_owner_id", lambda settings: "instance-a@222") + monkeypatch.setattr(proxy_module, "_http_bridge_process_exists", lambda pid: False) + + async with SessionLocal() as db_session: + await db_session.execute(delete(HttpBridgeLease).where(HttpBridgeLease.session_id == session_id)) + await db_session.commit() + + async with service._repo_factory() as repos: + await repos.http_bridge_leases.upsert( + session_id=session_id, + affinity_kind="turn_state_header", + affinity_key=signed_turn_state, + api_key_scope="", + owner_instance_id="instance-a", + lease_expires_at=proxy_module._http_bridge_lease_expires_at(120.0), + account_id=account.id, + request_model="gpt-5.1", + codex_session=True, + idle_ttl_seconds=120.0, + upstream_turn_state=None, + downstream_turn_state=signed_turn_state, + ) + + recovered = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + with pytest.raises(proxy_module.ProxyResponseError) as exc_info: + await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=128, + previous_response_id="resp_previous", + ) + + exc = exc_info.value + assert exc.status_code == 409 + assert exc.payload["error"]["code"] == "bridge_session_expired" + assert len(connect_headers_seen) == 1 + await service._close_http_bridge_session(recovered) + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_turn_state_alias_respects_api_key_isolation( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_api_key_alias", + "http-bridge-api-key-alias@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + return fake_upstream + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + + payload = proxy_module.ResponsesRequest( + model="gpt-5.1", + instructions="Return exactly OK.", + input="hello", + prompt_cache_key="api-key-alias-thread", + ) + affinity = proxy_module._sticky_key_for_responses_request( + payload, + {}, + codex_session_affinity=False, + openai_cache_affinity=True, + openai_cache_affinity_max_age_seconds=300, + sticky_threads_enabled=False, + api_key=None, + ) + api_key_a = cast(proxy_module.ApiKeyData, SimpleNamespace(id="api-key-a")) + session = await service._get_or_create_http_bridge_session( + proxy_module._make_http_bridge_session_key( + payload, + headers={}, + affinity=affinity, + api_key=api_key_a, + request_id="req_api_key_alias", + ), + headers={}, + affinity=affinity, + api_key=api_key_a, + request_model=payload.model, + idle_ttl_seconds=120.0, + max_sessions=128, + ) + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session.bridge_session_id, + owner_instance_id=session.owner_instance_id, + api_key_id="api-key-a", + ) + await service._register_http_bridge_turn_state(session, signed_turn_state) + + with pytest.raises(proxy_module.ProxyResponseError) as exc_info: + await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, "api-key-b"), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=cast(proxy_module.ApiKeyData, SimpleNamespace(id="api-key-b")), + request_model=payload.model, + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + assert isinstance(exc_info.value, proxy_module.ProxyResponseError) + exc = exc_info.value + assert exc.status_code == 409 + assert exc.payload["error"].get("code") == "bridge_token_invalid" + await service._close_http_bridge_session(session) + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_preserves_prior_turn_state_aliases( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_alias_preserve", + "http-bridge-alias-preserve@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + return fake_upstream + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + + payload = proxy_module.ResponsesRequest( + model="gpt-5.1", + instructions="Return exactly OK.", + input="hello", + prompt_cache_key="alias-preserve-thread", + ) + affinity = proxy_module._sticky_key_for_responses_request( + payload, + {}, + codex_session_affinity=False, + openai_cache_affinity=True, + openai_cache_affinity_max_age_seconds=300, + sticky_threads_enabled=False, + api_key=None, + ) + session = await service._get_or_create_http_bridge_session( + proxy_module._make_http_bridge_session_key( + payload, + headers={}, + affinity=affinity, + api_key=None, + request_id="req_alias_preserve", + ), + headers={}, + affinity=affinity, + api_key=None, + request_model=payload.model, + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + await service._register_http_bridge_turn_state(session, "http_turn_alias_a") + await service._register_http_bridge_turn_state(session, "http_turn_alias_b") + + replayed = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", "http_turn_alias_a", None), + headers={"x-codex-turn-state": "http_turn_alias_a"}, + affinity=proxy_module._AffinityPolicy( + key="http_turn_alias_a", + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model=payload.model, + idle_ttl_seconds=120.0, + max_sessions=128, + ) + + assert replayed is session + assert "http_turn_alias_a" in replayed.downstream_turn_state_aliases + assert "http_turn_alias_b" in replayed.downstream_turn_state_aliases + await service._close_http_bridge_session(session) + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_legacy_replay_converges_to_signed_canonical_turn_state( + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + service = get_proxy_service_for_app(app_instance) + session = cast( + proxy_module._HTTPBridgeSession, + _make_dummy_bridge_session(proxy_module._HTTPBridgeSessionKey("request", "legacy-canonical-convergence", None)), + ) + session.bridge_session_id = "hbs_legacy_canonical_convergence" + session.owner_instance_id = "instance-a" + + async def fake_touch_http_bridge_lease(self, session_arg): + del self, session_arg + return None + + monkeypatch.setattr(proxy_module.ProxyService, "_touch_http_bridge_lease", fake_touch_http_bridge_lease) + + await service._register_http_bridge_turn_state(session, "http_turn_legacy_client") + + signed_turn_state = service._resolve_http_bridge_downstream_turn_state( + session, + requested_turn_state="http_turn_legacy_client", + api_key_id=None, + ) + await service._register_http_bridge_turn_state(session, signed_turn_state) + + signed_turn_state_repeat = service._resolve_http_bridge_downstream_turn_state( + session, + requested_turn_state="http_turn_legacy_client", + api_key_id=None, + ) + await service._register_http_bridge_turn_state(session, signed_turn_state_repeat) + + assert signed_turn_state_repeat == signed_turn_state + assert session.downstream_turn_state == signed_turn_state + assert session.downstream_turn_state_aliases == {"http_turn_legacy_client", signed_turn_state} + assert ( + service._http_bridge_turn_state_index[ + proxy_module._http_bridge_turn_state_alias_key("http_turn_legacy_client", session.key.api_key_id) + ] + == session.key + ) + assert ( + service._http_bridge_turn_state_index[ + proxy_module._http_bridge_turn_state_alias_key(signed_turn_state, session.key.api_key_id) + ] + == session.key + ) + + await service._close_http_bridge_session(session) + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_close_waits_for_turn_state_index_lock( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_close_lock", + "http-bridge-close-lock@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + return fake_upstream + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + + payload = proxy_module.ResponsesRequest.model_validate({"model": "gpt-5.1", "instructions": "hi", "input": []}) + affinity = proxy_module._AffinityPolicy(key="turn-close-lock", kind=proxy_module.StickySessionKind.CODEX_SESSION) + + session = await service._get_or_create_http_bridge_session( + proxy_module._make_http_bridge_session_key( + payload, + headers={}, + affinity=affinity, + api_key=None, + request_id="req_close_lock", + ), + headers={}, + affinity=affinity, + api_key=None, + request_model=payload.model, + idle_ttl_seconds=120.0, + max_sessions=128, + ) + await service._register_http_bridge_turn_state(session, "http_turn_close_lock") + + alias_key = proxy_module._http_bridge_turn_state_alias_key("http_turn_close_lock", session.key.api_key_id) + + async with service._http_bridge_lock: + close_task = asyncio.create_task(service._close_http_bridge_session(session)) + await asyncio.sleep(0) + assert not close_task.done() + assert service._http_bridge_turn_state_index[alias_key] == session.key + + await close_task + + assert alias_key not in service._http_bridge_turn_state_index + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_refreshes_lease_after_request_detach(app_instance, monkeypatch): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + service = get_proxy_service_for_app(app_instance) + + payload = proxy_module.ResponsesRequest.model_validate({"model": "gpt-5.1", "instructions": "hi", "input": []}) + session = cast( + proxy_module._HTTPBridgeSession, + _make_dummy_bridge_session(proxy_module._HTTPBridgeSessionKey("request", "bridge-lease-refresh", None)), + ) + session.bridge_session_id = "hbs_bridge_lease_refresh" + session.response_create_gate = asyncio.Semaphore(1) + + event_queue: asyncio.Queue[str | None] = asyncio.Queue() + await event_queue.put('data: {"type":"response.completed"}\n\n') + await event_queue.put(None) + request_state = proxy_module._WebSocketRequestState( + request_id="req_bridge_lease_refresh", + model=payload.model, + service_tier=None, + reasoning_effort=None, + api_key_reservation=None, + started_at=time.monotonic(), + ) + request_state.event_queue = event_queue + session.pending_requests.append(request_state) + session.queued_request_count = 1 + + touch_points: list[float] = [] + + def fake_prepare_http_bridge_request(self, payload, headers, *, api_key, api_key_reservation, request_id): + del self, payload, headers, api_key, api_key_reservation, request_id + return request_state, json.dumps({"type": "response.create", "model": "gpt-5.1", "input": []}) + + async def fake_get_or_create_http_bridge_session( + self, + key, + *, + headers, + affinity, + api_key, + request_model, + idle_ttl_seconds, + max_sessions, + previous_response_id=None, + ): + del self, key, headers, affinity, api_key, request_model, idle_ttl_seconds, max_sessions, previous_response_id + return session + + async def fake_submit_http_bridge_request(self, session, *, request_state, text_data, queue_limit): + del self, session, request_state, text_data, queue_limit + return None + + def fake_resolve_http_bridge_downstream_turn_state(self, session, *, requested_turn_state, api_key_id): + del self, session, requested_turn_state, api_key_id + return "http_turn_refresh_finished" + + async def fake_register_http_bridge_turn_state(self, session, turn_state): + del turn_state + await self._touch_http_bridge_lease(session) + + async def fake_touch_http_bridge_lease(self, session): + del self + touch_points.append(session.last_used_at) + + monkeypatch.setattr(proxy_module.ProxyService, "_prepare_http_bridge_request", fake_prepare_http_bridge_request) + monkeypatch.setattr( + proxy_module.ProxyService, "_get_or_create_http_bridge_session", fake_get_or_create_http_bridge_session + ) + monkeypatch.setattr(proxy_module.ProxyService, "_submit_http_bridge_request", fake_submit_http_bridge_request) + monkeypatch.setattr( + proxy_module.ProxyService, + "_resolve_http_bridge_downstream_turn_state", + fake_resolve_http_bridge_downstream_turn_state, + ) + monkeypatch.setattr( + proxy_module.ProxyService, "_register_http_bridge_turn_state", fake_register_http_bridge_turn_state + ) + monkeypatch.setattr(proxy_module.ProxyService, "_touch_http_bridge_lease", fake_touch_http_bridge_lease) + + events = [ + event + async for event in service._stream_via_http_bridge( + payload, + {}, + codex_session_affinity=False, + propagate_http_errors=False, + openai_cache_affinity=False, + api_key=None, + api_key_reservation=None, + suppress_text_done_events=False, + idle_ttl_seconds=120.0, + codex_idle_ttl_seconds=120.0, + max_sessions=8, + queue_limit=8, + ) + ] + + assert events == ['data: {"type":"response.completed"}\n\n'] + assert len(touch_points) == 2 + assert touch_points[1] >= touch_points[0] + assert session.last_used_at == touch_points[1] + assert not session.pending_requests + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_turn_state_registration_failure_does_not_emit_dead_header( + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + service = get_proxy_service_for_app(app_instance) + payload = proxy_module.ResponsesRequest.model_validate({"model": "gpt-5.1", "instructions": "hi", "input": []}) + session = cast( + proxy_module._HTTPBridgeSession, + _make_dummy_bridge_session(proxy_module._HTTPBridgeSessionKey("request", "register-turn-state-failure", None)), + ) + session.bridge_session_id = "hbs_register_turn_state_failure" + session.response_create_gate = asyncio.Semaphore(1) + session.account = SimpleNamespace(id="acc_register_turn_state_failure", status=AccountStatus.ACTIVE) # type: ignore[assignment] + response_headers_out: dict[str, str] = {} + + def fake_prepare_http_bridge_request(self, *args, **kwargs): + del self, args, kwargs + return ( + proxy_module._WebSocketRequestState( + request_id="req_register_turn_state_failure", + model="gpt-5.1", + service_tier=None, + reasoning_effort=None, + api_key_reservation=None, + started_at=time.monotonic(), + event_queue=asyncio.Queue(), + ), + json.dumps({"type": "response.create", "model": "gpt-5.1", "input": []}), + ) + + async def fake_get_or_create_http_bridge_session(self, *args, **kwargs): + del self, args, kwargs + return session + + async def fake_submit_http_bridge_request(self, session_arg, *, request_state, text_data, queue_limit): + del session_arg, text_data, queue_limit + await request_state.event_queue.put('data: {"type":"response.completed"}\n\n') + await request_state.event_queue.put(None) + + def fake_resolve_http_bridge_downstream_turn_state(self, session_arg, *, requested_turn_state, api_key_id): + del self, session_arg, requested_turn_state, api_key_id + return "http_turn_dead_header" + + async def failing_touch_http_bridge_lease(self, session_arg): + del self, session_arg + raise RuntimeError("lease touch failed") + + monkeypatch.setattr(proxy_module.ProxyService, "_prepare_http_bridge_request", fake_prepare_http_bridge_request) + monkeypatch.setattr( + proxy_module.ProxyService, "_get_or_create_http_bridge_session", fake_get_or_create_http_bridge_session + ) + monkeypatch.setattr(proxy_module.ProxyService, "_submit_http_bridge_request", fake_submit_http_bridge_request) + monkeypatch.setattr( + proxy_module.ProxyService, + "_resolve_http_bridge_downstream_turn_state", + fake_resolve_http_bridge_downstream_turn_state, + ) + monkeypatch.setattr(proxy_module.ProxyService, "_touch_http_bridge_lease", failing_touch_http_bridge_lease) + + with pytest.raises(proxy_module.ProxyResponseError) as exc_info: + async for _ in service._stream_via_http_bridge( + payload, + {}, + codex_session_affinity=False, + propagate_http_errors=False, + openai_cache_affinity=False, + api_key=None, + api_key_reservation=None, + suppress_text_done_events=False, + idle_ttl_seconds=120.0, + codex_idle_ttl_seconds=120.0, + max_sessions=8, + queue_limit=8, + response_headers_out=response_headers_out, + ): + pass + + exc = exc_info.value + assert exc.status_code == 502 + assert exc.payload["error"].get("code") == "upstream_unavailable" + assert "x-codex-turn-state" not in response_headers_out + assert session.closed is True + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_keeps_lease_alive_while_request_is_active(app_instance, monkeypatch): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + service = get_proxy_service_for_app(app_instance) + session = cast( + proxy_module._HTTPBridgeSession, + _make_dummy_bridge_session(proxy_module._HTTPBridgeSessionKey("request", "bridge-lease-keepalive", None)), + ) + session.bridge_session_id = "hbs_bridge_lease_keepalive" + session.idle_ttl_seconds = 0.5 + session.response_create_gate = asyncio.Semaphore(1) + request_state = proxy_module._WebSocketRequestState( + request_id="req_bridge_lease_keepalive", + model="gpt-5.1", + service_tier=None, + reasoning_effort=None, + api_key_reservation=None, + started_at=time.monotonic(), + ) + session.pending_requests.append(request_state) + session.queued_request_count = 1 + + touch_points: list[float] = [] + touched = asyncio.Event() + + async def fake_touch_http_bridge_lease(self, session): + del self + touch_points.append(session.last_used_at) + touched.set() + + monkeypatch.setattr(proxy_module.ProxyService, "_touch_http_bridge_lease", fake_touch_http_bridge_lease) + + await service._ensure_http_bridge_lease_keepalive(session) + await asyncio.wait_for(touched.wait(), timeout=0.4) + await service._stop_http_bridge_lease_keepalive(session) + + assert touch_points + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_keepalive_refresh_failure_closes_session(app_instance, monkeypatch): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + service = get_proxy_service_for_app(app_instance) + session = cast( + proxy_module._HTTPBridgeSession, + _make_dummy_bridge_session( + proxy_module._HTTPBridgeSessionKey("request", "bridge-lease-keepalive-failure", None) + ), + ) + session.bridge_session_id = "hbs_bridge_lease_keepalive_failure" + session.idle_ttl_seconds = 0.5 + session.response_create_gate = asyncio.Semaphore(1) + request_state = proxy_module._WebSocketRequestState( + request_id="req_bridge_lease_keepalive_failure", + model="gpt-5.1", + service_tier=None, + reasoning_effort=None, + api_key_reservation=None, + started_at=time.monotonic(), + event_queue=asyncio.Queue(), + ) + session.pending_requests.append(request_state) + session.queued_request_count = 1 + + async def fake_touch_http_bridge_lease(self, session): + del self, session + raise RuntimeError("lease touch failed") + + async def fake_write_request_log(self, **kwargs): + del self, kwargs + return None + + async def fake_release_websocket_reservation(self, reservation): + del self, reservation + return None + + monkeypatch.setattr(proxy_module.ProxyService, "_touch_http_bridge_lease", fake_touch_http_bridge_lease) + monkeypatch.setattr(proxy_module.ProxyService, "_write_request_log", fake_write_request_log) + monkeypatch.setattr(proxy_module.ProxyService, "_release_websocket_reservation", fake_release_websocket_reservation) + + assert request_state.event_queue is not None + await service._ensure_http_bridge_lease_keepalive(session) + failed_event = await asyncio.wait_for(request_state.event_queue.get(), timeout=1.0) + assert failed_event is not None + failed_payload = proxy_module.parse_sse_data_json(failed_event) + assert failed_payload is not None + assert failed_payload["type"] == "response.failed" + assert await asyncio.wait_for(request_state.event_queue.get(), timeout=1.0) is None + await asyncio.sleep(0) + assert session.closed is True + assert not session.pending_requests + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_creation_closes_upstream_when_lease_persist_fails( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_lease_persist_failure", + "http-bridge-lease-persist-failure@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + return fake_upstream + + async def fake_persist_http_bridge_lease(self, session): + del self, session + raise RuntimeError("lease persistence failed") + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + monkeypatch.setattr(proxy_module.ProxyService, "_persist_http_bridge_lease", fake_persist_http_bridge_lease) + + with pytest.raises(RuntimeError, match="lease persistence failed"): + await service._create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("request", "lease-persist-failure", None), + headers={}, + affinity=proxy_module._AffinityPolicy(), + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + bridge_session_id="hbs_lease_persist_failure", + owner_instance_id="instance-a", + ) + + assert fake_upstream.closed is True + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_creation_with_replacement_uses_persist_hook( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_lease_persist_failure_replace", + "http-bridge-lease-persist-failure-replace@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + persisted_replace_session_ids: list[str | None] = [] + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + return fake_upstream + + async def fake_persist_http_bridge_lease(self, session): + del self + persisted_replace_session_ids.append(session.pending_replaced_bridge_session_id) + raise RuntimeError("lease persistence failed") + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + monkeypatch.setattr(proxy_module.ProxyService, "_persist_http_bridge_lease", fake_persist_http_bridge_lease) + + with pytest.raises(RuntimeError, match="lease persistence failed"): + await service._create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", "replacement-hook", None), + headers={}, + affinity=proxy_module._AffinityPolicy( + key="replacement-hook", + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + bridge_session_id="hbs_lease_persist_failure_replace", + owner_instance_id="instance-a", + replaced_bridge_session_id="hbs_stale_replaced", + ) + + assert persisted_replace_session_ids == ["hbs_stale_replaced"] + assert fake_upstream.closed is True + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_allows_unstable_request_key_even_on_non_owner_instance( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits( + monkeypatch, + enabled=True, + instance_id="instance-b", + instance_ring=["instance-a", "instance-b"], + ) + account_id = await _import_account(async_client, "acc_http_bridge_unstable", "http-bridge-unstable@example.com") + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + fake_upstream = _FakeBridgeUpstreamWebSocket() + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del headers, access_token, account_id_header, base_url, session + return fake_upstream + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + + payload = proxy_module.ResponsesRequest.model_validate( + {"model": "gpt-5.4", "instructions": "hi", "input": [{"role": "user", "content": "hi"}]} + ) + affinity = proxy_module._sticky_key_for_responses_request( + payload, + {}, + codex_session_affinity=False, + openai_cache_affinity=False, + openai_cache_affinity_max_age_seconds=300, + sticky_threads_enabled=False, + api_key=None, + ) + key = proxy_module._make_http_bridge_session_key( + payload, + headers={}, + affinity=affinity, + api_key=None, + request_id="req_owner_unstable", + ) + + session = await service._get_or_create_http_bridge_session( + key, + headers={}, + affinity=affinity, + api_key=None, + request_model=payload.model, + idle_ttl_seconds=120.0, + max_sessions=8, + ) + + assert session.key.affinity_kind == "request" + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_reconnect_uses_last_upstream_turn_state( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) account_id = await _import_account( async_client, - "acc_http_bridge_close_lock", - "http-bridge-close-lock@example.com", + "acc_http_bridge_upstream_turn", + "http-bridge-upstream-turn@example.com", ) account = await _get_account(account_id) service = get_proxy_service_for_app(app_instance) - fake_upstream = _FakeBridgeUpstreamWebSocket() + connect_headers_seen: list[dict[str, str]] = [] + upstreams = [ + _TurnStateBridgeUpstreamWebSocket("upstream_turn_state_1"), + _TurnStateBridgeUpstreamWebSocket("upstream_turn_state_2"), + ] + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del access_token, account_id_header, base_url, session + connect_headers_seen.append(dict(headers)) + return upstreams.pop(0) + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + + payload = proxy_module.ResponsesRequest.model_validate( + {"model": "gpt-5.4", "instructions": "hi", "input": [{"role": "user", "content": "hi"}]} + ) + affinity = proxy_module._sticky_key_for_responses_request( + payload, + {"x-codex-turn-state": "local_turn_state"}, + codex_session_affinity=True, + openai_cache_affinity=True, + openai_cache_affinity_max_age_seconds=300, + sticky_threads_enabled=False, + api_key=None, + ) + key = proxy_module._make_http_bridge_session_key( + payload, + headers={"x-codex-turn-state": "local_turn_state"}, + affinity=affinity, + api_key=None, + request_id="req_turn_state", + ) + bridge_session = await service._get_or_create_http_bridge_session( + key, + headers={"x-codex-turn-state": "local_turn_state"}, + affinity=affinity, + api_key=None, + request_model=payload.model, + idle_ttl_seconds=120.0, + max_sessions=8, + ) + + request_state = proxy_module._WebSocketRequestState( + request_id="req-turn-state-reconnect", + model=payload.model, + service_tier=None, + reasoning_effort=None, + api_key_reservation=None, + started_at=time.monotonic(), + awaiting_response_created=True, + request_text=json.dumps({"type": "response.create", "model": "gpt-5.4", "input": []}), + ) + await service._reconnect_http_bridge_session(bridge_session, request_state=request_state) + + assert connect_headers_seen[0]["x-codex-turn-state"] == "local_turn_state" + assert connect_headers_seen[1]["x-codex-turn-state"] == "upstream_turn_state_1" + assert bridge_session.upstream_turn_state == "upstream_turn_state_2" + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_reconnect_preserves_signed_turn_state_when_handshake_is_silent( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_signed_reconnect_fallback", + "http-bridge-signed-reconnect-fallback@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + connect_headers_seen: list[dict[str, str]] = [] + upstreams = [_FakeBridgeUpstreamWebSocket(), _FakeBridgeUpstreamWebSocket()] + stale_signed_turn_state = service._encode_http_bridge_turn_state( + session_id="hbs_signed_reconnect_fallback_stale", + owner_instance_id="instance-a", + api_key_id=None, + ) + + async def fake_select_account_with_budget( + self, + deadline, + *, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids=None, + additional_limit_name=None, + ): + del ( + self, + deadline, + request_id, + kind, + sticky_key, + sticky_kind, + reallocate_sticky, + sticky_max_age_seconds, + prefer_earlier_reset_accounts, + routing_strategy, + model, + exclude_account_ids, + additional_limit_name, + ) + return AccountSelection(account=account, error_message=None, error_code=None) + + async def fake_ensure_fresh_with_budget(self, target, *, force=False, timeout_seconds): + del self, force, timeout_seconds + return target + + async def fake_connect_responses_websocket( + headers, + access_token, + account_id_header, + *, + base_url=None, + session=None, + ): + del access_token, account_id_header, base_url, session + connect_headers_seen.append(dict(headers)) + return upstreams.pop(0) + + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) + monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) + monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + + bridge_session = await service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", stale_signed_turn_state, None), + headers={"x-codex-turn-state": stale_signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=stale_signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=8, + ) + await service._register_http_bridge_turn_state(bridge_session, bridge_session.key.affinity_key) + + request_state = proxy_module._WebSocketRequestState( + request_id="req-signed-turn-state-reconnect", + model="gpt-5.1", + service_tier=None, + reasoning_effort=None, + api_key_reservation=None, + started_at=time.monotonic(), + awaiting_response_created=True, + request_text=json.dumps({"type": "response.create", "model": "gpt-5.1", "input": []}), + ) + await service._reconnect_http_bridge_session(bridge_session, request_state=request_state) + + assert "x-codex-turn-state" not in connect_headers_seen[0] + assert connect_headers_seen[1]["x-codex-turn-state"] == bridge_session.key.affinity_key + assert bridge_session.upstream_turn_state is None + assert bridge_session.reconnect_turn_state == bridge_session.key.affinity_key + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_reconnect_restart_reader_preserves_lease_until_touch( + async_client, + app_instance, + monkeypatch, +): + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_reconnect_lease_handoff", + "http-bridge-reconnect-lease-handoff@example.com", + ) + account = await _get_account(account_id) + service = get_proxy_service_for_app(app_instance) + first_upstream = _FakeBridgeUpstreamWebSocket() + second_upstream = _FakeBridgeUpstreamWebSocket() + upstreams = [first_upstream, second_upstream] async def fake_select_account_with_budget( self, @@ -1475,22 +4031,24 @@ async def fake_connect_responses_websocket( session=None, ): del headers, access_token, account_id_header, base_url, session - return fake_upstream + return upstreams.pop(0) monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) payload = proxy_module.ResponsesRequest.model_validate({"model": "gpt-5.1", "instructions": "hi", "input": []}) - affinity = proxy_module._AffinityPolicy(key="turn-close-lock", kind=proxy_module.StickySessionKind.CODEX_SESSION) - + affinity = proxy_module._AffinityPolicy( + key="reconnect-lease-handoff", + kind=proxy_module.StickySessionKind.PROMPT_CACHE, + ) session = await service._get_or_create_http_bridge_session( proxy_module._make_http_bridge_session_key( payload, headers={}, affinity=affinity, api_key=None, - request_id="req_close_lock", + request_id="req_reconnect_lease_handoff", ), headers={}, affinity=affinity, @@ -1499,37 +4057,51 @@ async def fake_connect_responses_websocket( idle_ttl_seconds=120.0, max_sessions=128, ) - await service._register_http_bridge_turn_state(session, "http_turn_close_lock") - alias_key = proxy_module._http_bridge_turn_state_alias_key("http_turn_close_lock", session.key.api_key_id) + call_order: list[str] = [] - async with service._http_bridge_lock: - close_task = asyncio.create_task(service._close_http_bridge_session(session)) - await asyncio.sleep(0) - assert not close_task.done() - assert service._http_bridge_turn_state_index[alias_key] == session.key + async def fake_delete_http_bridge_lease(self, session_id): + del self, session_id + call_order.append("delete") - await close_task + async def fake_touch_http_bridge_lease(self, session): + del self, session + call_order.append("touch") - assert alias_key not in service._http_bridge_turn_state_index + monkeypatch.setattr(proxy_module.ProxyService, "_delete_http_bridge_lease", fake_delete_http_bridge_lease) + monkeypatch.setattr(proxy_module.ProxyService, "_touch_http_bridge_lease", fake_touch_http_bridge_lease) + + request_state = proxy_module._WebSocketRequestState( + request_id="req_reconnect_lease_restart", + model=payload.model, + service_tier=None, + reasoning_effort=None, + api_key_reservation=None, + started_at=time.monotonic(), + ) + await service._reconnect_http_bridge_session(session, request_state=request_state, restart_reader=True) + + assert call_order == ["touch"] + await service._close_http_bridge_session(session) @pytest.mark.asyncio -async def test_v1_responses_http_bridge_allows_unstable_request_key_even_on_non_owner_instance( +async def test_v1_responses_http_bridge_reconnect_without_reader_restart_preserves_lease_until_touch( async_client, app_instance, monkeypatch, ): - _install_bridge_settings_with_limits( - monkeypatch, - enabled=True, - instance_id="instance-b", - instance_ring=["instance-a", "instance-b"], + _install_bridge_settings_with_limits(monkeypatch, enabled=True) + account_id = await _import_account( + async_client, + "acc_http_bridge_reconnect_lease_handoff_no_restart", + "http-bridge-reconnect-lease-handoff-no-restart@example.com", ) - account_id = await _import_account(async_client, "acc_http_bridge_unstable", "http-bridge-unstable@example.com") account = await _get_account(account_id) service = get_proxy_service_for_app(app_instance) - fake_upstream = _FakeBridgeUpstreamWebSocket() + first_upstream = _FakeBridgeUpstreamWebSocket() + second_upstream = _FakeBridgeUpstreamWebSocket() + upstreams = [first_upstream, second_upstream] async def fake_select_account_with_budget( self, @@ -1577,47 +4149,63 @@ async def fake_connect_responses_websocket( session=None, ): del headers, access_token, account_id_header, base_url, session - return fake_upstream + return upstreams.pop(0) monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) - payload = proxy_module.ResponsesRequest.model_validate( - {"model": "gpt-5.4", "instructions": "hi", "input": [{"role": "user", "content": "hi"}]} - ) - affinity = proxy_module._sticky_key_for_responses_request( - payload, - {}, - codex_session_affinity=False, - openai_cache_affinity=False, - openai_cache_affinity_max_age_seconds=300, - sticky_threads_enabled=False, - api_key=None, - ) - key = proxy_module._make_http_bridge_session_key( - payload, - headers={}, - affinity=affinity, - api_key=None, - request_id="req_owner_unstable", + payload = proxy_module.ResponsesRequest.model_validate({"model": "gpt-5.1", "instructions": "hi", "input": []}) + affinity = proxy_module._AffinityPolicy( + key="reconnect-lease-handoff-no-restart", + kind=proxy_module.StickySessionKind.PROMPT_CACHE, ) - session = await service._get_or_create_http_bridge_session( - key, + proxy_module._make_http_bridge_session_key( + payload, + headers={}, + affinity=affinity, + api_key=None, + request_id="req_reconnect_lease_handoff_no_restart", + ), headers={}, affinity=affinity, api_key=None, request_model=payload.model, idle_ttl_seconds=120.0, - max_sessions=8, + max_sessions=128, ) - assert session.key.affinity_kind == "request" + call_order: list[str] = [] + + async def fake_delete_http_bridge_lease(self, session_id): + del self, session_id + call_order.append("delete") + + async def fake_touch_http_bridge_lease(self, session): + del self, session + call_order.append("touch") + + monkeypatch.setattr(proxy_module.ProxyService, "_delete_http_bridge_lease", fake_delete_http_bridge_lease) + monkeypatch.setattr(proxy_module.ProxyService, "_touch_http_bridge_lease", fake_touch_http_bridge_lease) + + request_state = proxy_module._WebSocketRequestState( + request_id="req_reconnect_lease_no_restart", + model=payload.model, + service_tier=None, + reasoning_effort=None, + api_key_reservation=None, + started_at=time.monotonic(), + ) + await service._reconnect_http_bridge_session(session, request_state=request_state) + await asyncio.sleep(0) + + assert call_order == ["touch"] + await service._close_http_bridge_session(session) @pytest.mark.asyncio -async def test_v1_responses_http_bridge_reconnect_uses_last_upstream_turn_state( +async def test_v1_responses_http_bridge_reconnect_aborts_after_lease_refresh_failure( async_client, app_instance, monkeypatch, @@ -1625,16 +4213,14 @@ async def test_v1_responses_http_bridge_reconnect_uses_last_upstream_turn_state( _install_bridge_settings_with_limits(monkeypatch, enabled=True) account_id = await _import_account( async_client, - "acc_http_bridge_upstream_turn", - "http-bridge-upstream-turn@example.com", + "acc_http_bridge_reconnect_lease_failure", + "http-bridge-reconnect-lease-failure@example.com", ) account = await _get_account(account_id) service = get_proxy_service_for_app(app_instance) - connect_headers_seen: list[dict[str, str]] = [] - upstreams = [ - _TurnStateBridgeUpstreamWebSocket("upstream_turn_state_1"), - _TurnStateBridgeUpstreamWebSocket("upstream_turn_state_2"), - ] + first_upstream = _FakeBridgeUpstreamWebSocket() + second_upstream = _FakeBridgeUpstreamWebSocket() + upstreams = [first_upstream, second_upstream] async def fake_select_account_with_budget( self, @@ -1681,58 +4267,56 @@ async def fake_connect_responses_websocket( base_url=None, session=None, ): - del access_token, account_id_header, base_url, session - connect_headers_seen.append(dict(headers)) + del headers, access_token, account_id_header, base_url, session return upstreams.pop(0) + async def failing_touch_http_bridge_lease(self, session): + del self, session + raise RuntimeError("lease touch failed") + monkeypatch.setattr(proxy_module.ProxyService, "_select_account_with_budget", fake_select_account_with_budget) monkeypatch.setattr(proxy_module.ProxyService, "_ensure_fresh_with_budget", fake_ensure_fresh_with_budget) monkeypatch.setattr(proxy_module, "connect_responses_websocket", fake_connect_responses_websocket) + monkeypatch.setattr(proxy_module.ProxyService, "_touch_http_bridge_lease", failing_touch_http_bridge_lease) - payload = proxy_module.ResponsesRequest.model_validate( - {"model": "gpt-5.4", "instructions": "hi", "input": [{"role": "user", "content": "hi"}]} - ) - affinity = proxy_module._sticky_key_for_responses_request( - payload, - {"x-codex-turn-state": "local_turn_state"}, - codex_session_affinity=True, - openai_cache_affinity=True, - openai_cache_affinity_max_age_seconds=300, - sticky_threads_enabled=False, - api_key=None, - ) - key = proxy_module._make_http_bridge_session_key( - payload, - headers={"x-codex-turn-state": "local_turn_state"}, - affinity=affinity, - api_key=None, - request_id="req_turn_state", + payload = proxy_module.ResponsesRequest.model_validate({"model": "gpt-5.1", "instructions": "hi", "input": []}) + affinity = proxy_module._AffinityPolicy( + key="reconnect-lease-failure", + kind=proxy_module.StickySessionKind.PROMPT_CACHE, ) - bridge_session = await service._get_or_create_http_bridge_session( - key, - headers={"x-codex-turn-state": "local_turn_state"}, + session = await service._get_or_create_http_bridge_session( + proxy_module._make_http_bridge_session_key( + payload, + headers={}, + affinity=affinity, + api_key=None, + request_id="req_reconnect_lease_failure", + ), + headers={}, affinity=affinity, api_key=None, request_model=payload.model, idle_ttl_seconds=120.0, - max_sessions=8, + max_sessions=128, ) request_state = proxy_module._WebSocketRequestState( - request_id="req-turn-state-reconnect", + request_id="req_reconnect_lease_failure_retry", model=payload.model, service_tier=None, reasoning_effort=None, api_key_reservation=None, started_at=time.monotonic(), - awaiting_response_created=True, - request_text=json.dumps({"type": "response.create", "model": "gpt-5.4", "input": []}), ) - await service._reconnect_http_bridge_session(bridge_session, request_state=request_state) - assert connect_headers_seen[0]["x-codex-turn-state"] == "local_turn_state" - assert connect_headers_seen[1]["x-codex-turn-state"] == "upstream_turn_state_1" - assert bridge_session.upstream_turn_state == "upstream_turn_state_2" + with pytest.raises(proxy_module.ProxyResponseError) as exc_info: + await service._reconnect_http_bridge_session(session, request_state=request_state, restart_reader=True) + + exc = exc_info.value + assert exc.status_code == 502 + assert exc.payload["error"].get("code") == "upstream_unavailable" + assert session.closed is True + assert session.upstream_reader is None or session.upstream_reader.done() @pytest.mark.asyncio @@ -3110,6 +5694,118 @@ async def fail_refresh(self, target, *, force=False, timeout_seconds): assert "x-codex-turn-state" not in response.headers +@pytest.mark.asyncio +async def test_backend_responses_http_bridge_early_error_preserves_turn_state_header(async_client, monkeypatch): + _install_bridge_settings(monkeypatch, enabled=True) + + async def fake_stream_http_responses( + self, + payload, + headers, + *, + response_headers_out=None, + **kwargs, + ): + del self, payload, headers, kwargs + assert response_headers_out is not None + response_headers_out["x-codex-turn-state"] = "http_turn_test_backend_error" + raise proxy_module.ProxyResponseError( + 502, + {"error": {"message": "upstream unavailable", "type": "server_error", "code": "upstream_unavailable"}}, + ) + yield "" + + monkeypatch.setattr(proxy_module.ProxyService, "stream_http_responses", fake_stream_http_responses) + + response = await async_client.post( + "/backend-api/codex/responses", + json={ + "model": "gpt-5.1", + "instructions": "Return exactly OK.", + "input": "hello", + "stream": True, + }, + ) + + assert response.status_code == 502 + assert response.json()["error"]["code"] == "upstream_unavailable" + assert response.headers["x-codex-turn-state"] == "http_turn_test_backend_error" + + +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_early_error_preserves_turn_state_header(async_client, monkeypatch): + _install_bridge_settings(monkeypatch, enabled=True) + + async def fake_stream_http_responses( + self, + payload, + headers, + *, + response_headers_out=None, + **kwargs, + ): + del self, payload, headers, kwargs + assert response_headers_out is not None + response_headers_out["x-codex-turn-state"] = "http_turn_test_v1_error" + raise proxy_module.ProxyResponseError( + 502, + {"error": {"message": "upstream unavailable", "type": "server_error", "code": "upstream_unavailable"}}, + ) + yield "" + + monkeypatch.setattr(proxy_module.ProxyService, "stream_http_responses", fake_stream_http_responses) + + response = await async_client.post( + "/v1/responses", + json={ + "model": "gpt-5.1", + "instructions": "Return exactly OK.", + "input": "hello", + }, + ) + + assert response.status_code == 502 + assert response.json()["error"]["code"] == "upstream_unavailable" + assert response.headers["x-codex-turn-state"] == "http_turn_test_v1_error" + + +@pytest.mark.asyncio +async def test_backend_responses_http_bridge_empty_stream_preserves_turn_state_header(async_client, monkeypatch): + _install_bridge_settings(monkeypatch, enabled=True) + + async def fake_stream_http_responses( + self, + payload, + headers, + *, + response_headers_out=None, + **kwargs, + ): + del self, payload, headers, kwargs + assert response_headers_out is not None + response_headers_out["x-codex-turn-state"] = "http_turn_test_backend_empty" + if False: + yield "" + + monkeypatch.setattr(proxy_module.ProxyService, "stream_http_responses", fake_stream_http_responses) + + async with async_client.stream( + "POST", + "/backend-api/codex/responses", + json={ + "model": "gpt-5.1", + "instructions": "Return exactly OK.", + "input": "hello", + "stream": True, + }, + ) as response: + assert response.status_code == 200 + lines = [line async for line in response.aiter_lines()] + + assert lines == [] + assert response.headers["x-codex-turn-state"] == "http_turn_test_backend_empty" + + @pytest.mark.asyncio async def test_v1_responses_http_bridge_does_not_register_turn_state_alias_before_request_admission( async_client, @@ -3404,7 +6100,8 @@ async def fake_connect_responses_websocket( "error": { "message": ( f"Previous response with id '{first_body['id']}' not found. " - "HTTP bridge continuity was lost. Replay x-codex-turn-state or retry with a stable prompt_cache_key." + "HTTP bridge continuity was lost before upstream created the next response. " + "Replay x-codex-turn-state or retry with a stable prompt_cache_key." ), "type": "invalid_request_error", "code": "previous_response_not_found", @@ -4474,6 +7171,114 @@ async def fake_create_http_bridge_session( service._http_bridge_turn_state_index.clear() +@pytest.mark.asyncio +async def test_v1_responses_http_bridge_singleflights_stale_signed_turn_state_recovery(app_instance, monkeypatch): + service = get_proxy_service_for_app(app_instance) + service._http_bridge_sessions.clear() + service._http_bridge_inflight_sessions.clear() + service._http_bridge_turn_state_index.clear() + + settings = SimpleNamespace( + http_responses_session_bridge_enabled=True, + http_responses_session_bridge_idle_ttl_seconds=120.0, + http_responses_session_bridge_codex_idle_ttl_seconds=120.0, + http_responses_session_bridge_max_sessions=8, + http_responses_session_bridge_instance_id="instance-a", + http_responses_session_bridge_instance_ring=["instance-a", "instance-b"], + ) + monkeypatch.setattr(proxy_module, "get_settings_cache", lambda: _SettingsCache(settings)) + monkeypatch.setattr(proxy_module, "get_settings", lambda: settings) + + create_started: list[str] = [] + session_id = next( + candidate + for candidate in ( + "hbs_signed_stale_singleflight_a", + "hbs_signed_stale_singleflight_b", + "hbs_signed_stale_singleflight_c", + "hbs_signed_stale_singleflight_d", + ) + if proxy_module._http_bridge_owner_instance( + proxy_module._HTTPBridgeSessionKey("turn_state_header", candidate, None), + settings, + ) + == "instance-b" + ) + signed_turn_state = service._encode_http_bridge_turn_state( + session_id=session_id, + owner_instance_id="instance-b", + api_key_id=None, + ) + + async def fake_create_http_bridge_session( + self, + key, + *, + headers, + affinity, + request_model, + idle_ttl_seconds, + bridge_session_id=None, + owner_instance_id=None, + ): + del self, headers, affinity, request_model, idle_ttl_seconds + create_started.append(key.affinity_key) + await asyncio.sleep(0.2) + session = _make_dummy_bridge_session(key) + session.bridge_session_id = bridge_session_id or "" + session.owner_instance_id = owner_instance_id or "instance-a" + return session + + monkeypatch.setattr(proxy_module.ProxyService, "_create_http_bridge_session", fake_create_http_bridge_session) + + try: + first = asyncio.create_task( + service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=8, + ) + ) + second = asyncio.create_task( + service._get_or_create_http_bridge_session( + proxy_module._HTTPBridgeSessionKey("turn_state_header", signed_turn_state, None), + headers={"x-codex-turn-state": signed_turn_state}, + affinity=proxy_module._AffinityPolicy( + key=signed_turn_state, + kind=proxy_module.StickySessionKind.CODEX_SESSION, + ), + api_key=None, + request_model="gpt-5.1", + idle_ttl_seconds=120.0, + max_sessions=8, + ) + ) + session_one, session_two = await asyncio.gather(first, second) + + assert len(create_started) == 1 + assert session_one is session_two + assert ( + proxy_module._http_bridge_owner_instance( + proxy_module._HTTPBridgeSessionKey("turn_state_header", session_id, None), + settings, + ) + == "instance-b" + ) + assert session_one.key.affinity_kind == "turn_state_header" + assert session_one.key.affinity_key != signed_turn_state + finally: + service._http_bridge_sessions.clear() + service._http_bridge_inflight_sessions.clear() + service._http_bridge_turn_state_index.clear() + + @pytest.mark.asyncio async def test_v1_responses_http_bridge_cleans_up_cancelled_singleflight_creator(app_instance, monkeypatch): service = get_proxy_service_for_app(app_instance) diff --git a/tests/integration/test_load_balancer_integration.py b/tests/integration/test_load_balancer_integration.py index 19bb5c07..429e5034 100644 --- a/tests/integration/test_load_balancer_integration.py +++ b/tests/integration/test_load_balancer_integration.py @@ -12,6 +12,7 @@ from app.db.session import SessionLocal from app.modules.accounts.repository import AccountsRepository from app.modules.api_keys.repository import ApiKeysRepository +from app.modules.proxy.bridge_repository import HttpBridgeLeasesRepository from app.modules.proxy.load_balancer import LoadBalancer from app.modules.proxy.repo_bundle import ProxyRepositories from app.modules.proxy.sticky_repository import StickySessionsRepository @@ -29,6 +30,7 @@ async def _repo_factory() -> AsyncIterator[ProxyRepositories]: usage=UsageRepository(session), request_logs=RequestLogsRepository(session), sticky_sessions=StickySessionsRepository(session), + http_bridge_leases=HttpBridgeLeasesRepository(session), api_keys=ApiKeysRepository(session), additional_usage=AdditionalUsageRepository(session), ) diff --git a/tests/unit/test_pricing.py b/tests/unit/test_pricing.py index 2871060f..8816ee79 100644 --- a/tests/unit/test_pricing.py +++ b/tests/unit/test_pricing.py @@ -143,11 +143,13 @@ def test_calculate_cost_from_usage_legacy_gpt_5_service_tiers() -> None: usage = UsageTokens(input_tokens=1_000_000.0, output_tokens=1_000_000.0) gpt_5_priority = calculate_cost_from_usage(usage, DEFAULT_PRICING_MODELS["gpt-5"], service_tier="priority") + gpt_5_1_priority = calculate_cost_from_usage(usage, DEFAULT_PRICING_MODELS["gpt-5.1"], service_tier="priority") gpt_5_1_flex = calculate_cost_from_usage(usage, DEFAULT_PRICING_MODELS["gpt-5.1"], service_tier="flex") gpt_5_2_priority = calculate_cost_from_usage(usage, DEFAULT_PRICING_MODELS["gpt-5.2"], service_tier="priority") gpt_5_2_flex = calculate_cost_from_usage(usage, DEFAULT_PRICING_MODELS["gpt-5.2"], service_tier="flex") assert gpt_5_priority == pytest.approx(22.5) + assert gpt_5_1_priority == pytest.approx(22.5) assert gpt_5_1_flex == pytest.approx(5.625) assert gpt_5_2_priority == pytest.approx(31.5) assert gpt_5_2_flex == pytest.approx(7.875) diff --git a/tests/unit/test_proxy_load_balancer_refresh.py b/tests/unit/test_proxy_load_balancer_refresh.py index 1d208693..d182898b 100644 --- a/tests/unit/test_proxy_load_balancer_refresh.py +++ b/tests/unit/test_proxy_load_balancer_refresh.py @@ -260,6 +260,7 @@ async def _repo_factory( usage=usage_repo, request_logs=StubRequestLogsRepository(), sticky_sessions=sticky_repo, + http_bridge_leases=object(), # type: ignore[arg-type] api_keys=StubApiKeysRepository(), additional_usage=additional_usage_repo or StubAdditionalUsageRepository(), ) @@ -961,6 +962,7 @@ async def repo_factory() -> AsyncIterator[ProxyRepositories]: additional_usage=StubAdditionalUsageRepository(), request_logs=object(), # type: ignore[arg-type] sticky_sessions=sticky_repo, + http_bridge_leases=object(), # type: ignore[arg-type] api_keys=object(), # type: ignore[arg-type] ) diff --git a/tests/unit/test_proxy_service_additional_limits.py b/tests/unit/test_proxy_service_additional_limits.py index ac8efa84..a999afc9 100644 --- a/tests/unit/test_proxy_service_additional_limits.py +++ b/tests/unit/test_proxy_service_additional_limits.py @@ -156,6 +156,7 @@ async def repo_factory() -> Any: usage=object(), # type: ignore[arg-type] request_logs=object(), # type: ignore[arg-type] sticky_sessions=object(), # type: ignore[arg-type] + http_bridge_leases=object(), # type: ignore[arg-type] api_keys=object(), # type: ignore[arg-type] additional_usage=additional_usage, # type: ignore[arg-type] ) @@ -168,6 +169,7 @@ async def repo_factory() -> Any: usage=object(), # type: ignore[arg-type] request_logs=object(), # type: ignore[arg-type] sticky_sessions=object(), # type: ignore[arg-type] + http_bridge_leases=object(), # type: ignore[arg-type] api_keys=object(), # type: ignore[arg-type] additional_usage=additional_usage, # type: ignore[arg-type] ), diff --git a/tests/unit/test_proxy_utils.py b/tests/unit/test_proxy_utils.py index e7c214a2..bad7be48 100644 --- a/tests/unit/test_proxy_utils.py +++ b/tests/unit/test_proxy_utils.py @@ -148,6 +148,7 @@ def test_has_native_codex_transport_headers_requires_allowlisted_originator(): assert proxy_module._has_native_codex_transport_headers({"originator": "codex_chatgpt_desktop"}) is True assert proxy_module._has_native_codex_transport_headers({"originator": "Codex Chat"}) is False assert proxy_module._has_native_codex_transport_headers({"originator": "Codex QA"}) is False + assert proxy_module._has_native_codex_transport_headers({"originator": "Codex Foo"}) is False assert proxy_module._has_native_codex_transport_headers({"originator": "other-client"}) is False