diff --git a/app/db/alembic/versions/20260403_000000_add_credit_api_key_limit_values.py b/app/db/alembic/versions/20260403_000000_add_credit_api_key_limit_values.py new file mode 100644 index 00000000..03a10f48 --- /dev/null +++ b/app/db/alembic/versions/20260403_000000_add_credit_api_key_limit_values.py @@ -0,0 +1,44 @@ +"""add credit-based api key limit enum values + +Revision ID: 20260403_000000_add_credit_api_key_limit_values +Revises: 20260402_000000_switch_dashboard_routing_default_to_capacity_weighted +Create Date: 2026-04-03 +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.engine import Connection + +revision = "20260403_000000_add_credit_api_key_limit_values" +down_revision = "20260402_000000_switch_dashboard_routing_default_to_capacity_weighted" +branch_labels = None +depends_on = None + + +def _table_exists(connection: Connection, table_name: str) -> bool: + inspector = sa.inspect(connection) + return inspector.has_table(table_name) + + +def upgrade() -> None: + bind = op.get_bind() + if not _table_exists(bind, "api_key_limits"): + return + if bind.dialect.name != "postgresql": + return + + op.execute(sa.text("ALTER TYPE limit_type ADD VALUE IF NOT EXISTS 'credits'")) + op.execute(sa.text("ALTER TYPE limit_window ADD VALUE IF NOT EXISTS '5h'")) + op.execute(sa.text("ALTER TYPE limit_window ADD VALUE IF NOT EXISTS '7d'")) + + +def downgrade() -> None: + bind = op.get_bind() + if not _table_exists(bind, "api_key_limits"): + return + if bind.dialect.name != "postgresql": + return + # PostgreSQL enum values cannot be removed safely in-place here. + return diff --git a/app/db/models.py b/app/db/models.py index 98eecb1c..3130d5b3 100644 --- a/app/db/models.py +++ b/app/db/models.py @@ -280,12 +280,15 @@ class LimitType(str, Enum): INPUT_TOKENS = "input_tokens" OUTPUT_TOKENS = "output_tokens" COST_USD = "cost_usd" + CREDITS = "credits" class LimitWindow(str, Enum): DAILY = "daily" WEEKLY = "weekly" MONTHLY = "monthly" + FIVE_HOURS = "5h" + SEVEN_DAYS = "7d" class ApiKeyLimit(Base): diff --git a/app/modules/api_keys/schemas.py b/app/modules/api_keys/schemas.py index a8f46d78..22fa9918 100644 --- a/app/modules/api_keys/schemas.py +++ b/app/modules/api_keys/schemas.py @@ -8,8 +8,8 @@ class LimitRuleCreate(DashboardModel): - limit_type: str = Field(pattern=r"^(total_tokens|input_tokens|output_tokens|cost_usd)$") - limit_window: str = Field(pattern=r"^(daily|weekly|monthly)$") + limit_type: str = Field(pattern=r"^(total_tokens|input_tokens|output_tokens|cost_usd|credits)$") + limit_window: str = Field(pattern=r"^(daily|weekly|monthly|5h|7d)$") max_value: int = Field(ge=1) model_filter: str | None = None diff --git a/app/modules/api_keys/service.py b/app/modules/api_keys/service.py index 2ecb59ad..d9d7cc88 100644 --- a/app/modules/api_keys/service.py +++ b/app/modules/api_keys/service.py @@ -707,10 +707,11 @@ async def get_key_usage_summary_for_self(self, key_id: str) -> ApiKeySelfUsageDa limit_type=limit.limit_type.value, limit_window=limit.limit_window.value, max_value=limit.max_value, - current_value=limit.current_value, - remaining_value=max(0, limit.max_value - limit.current_value), + current_value=max(0, min(limit.current_value, limit.max_value)), + remaining_value=max(0, limit.max_value - max(0, min(limit.current_value, limit.max_value))), model_filter=limit.model_filter, reset_at=limit.reset_at, + source="api_key_override" if limit.limit_type == LimitType.CREDITS else "api_key_limit", ) for limit in refreshed.limits ] @@ -769,6 +770,7 @@ class ApiKeySelfLimitData: remaining_value: int model_filter: str | None reset_at: datetime + source: str = "api_key_limit" @dataclass(frozen=True, slots=True) @@ -975,6 +977,8 @@ def _reserve_budget_for_limit_type( return 8_192 if limit_type == LimitType.COST_USD: return _reserve_cost_budget_microdollars(request_model, request_service_tier) + if limit_type == LimitType.CREDITS: + return 0 return 1 @@ -1006,6 +1010,8 @@ def _compute_increment_for_limit_type( return output_tokens if limit_type == LimitType.COST_USD: return cost_microdollars + if limit_type == LimitType.CREDITS: + return 0 return 0 @@ -1071,6 +1077,8 @@ def _limit_input_to_row( reset_at: datetime | None = None, ) -> ApiKeyLimit: window = LimitWindow(li.limit_window) + if li.limit_type == LimitType.CREDITS.value and li.model_filter is not None: + raise ValueError("credits limits do not support model_filter") return ApiKeyLimit( api_key_id=key_id, limit_type=LimitType(li.limit_type), @@ -1145,6 +1153,10 @@ def _limit_identity_from_row(limit: ApiKeyLimit) -> tuple[str, str, str | None]: def _next_reset(now: datetime, window: LimitWindow) -> datetime: + if window == LimitWindow.FIVE_HOURS: + return now + timedelta(hours=5) + if window == LimitWindow.SEVEN_DAYS: + return now + timedelta(days=7) if window == LimitWindow.DAILY: return now + timedelta(days=1) if window == LimitWindow.WEEKLY: @@ -1163,6 +1175,10 @@ def _advance_reset(reset_at: datetime, now: datetime, window: LimitWindow) -> da def _window_delta(window: LimitWindow) -> timedelta: + if window == LimitWindow.FIVE_HOURS: + return timedelta(hours=5) + if window == LimitWindow.SEVEN_DAYS: + return timedelta(days=7) if window == LimitWindow.DAILY: return timedelta(days=1) if window == LimitWindow.WEEKLY: diff --git a/app/modules/proxy/api.py b/app/modules/proxy/api.py index 4b51b00a..41d114d9 100644 --- a/app/modules/proxy/api.py +++ b/app/modules/proxy/api.py @@ -3,12 +3,16 @@ import logging import time from collections.abc import AsyncIterator, Mapping +from datetime import datetime, timezone from typing import cast from fastapi import APIRouter, Body, Depends, File, Form, Request, Response, Security, UploadFile, WebSocket from fastapi.responses import JSONResponse, StreamingResponse from pydantic import ValidationError +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from app.core import usage as usage_core from app.core.auth.dependencies import ( set_openai_error_format, validate_codex_usage_identity, @@ -39,8 +43,10 @@ from app.core.openai.v1_requests import V1ResponsesCompactRequest, V1ResponsesRequest from app.core.runtime_logging import log_error_response from app.core.types import JsonValue +from app.core.usage.types import UsageWindowRow from app.core.utils.json_guards import is_json_mapping from app.core.utils.sse import parse_sse_data_json +from app.db.models import Account, UsageHistory from app.db.session import get_background_session from app.dependencies import ProxyContext, get_proxy_context, get_proxy_websocket_context from app.modules.api_keys.repository import ApiKeysRepository @@ -48,6 +54,8 @@ ApiKeyData, ApiKeyInvalidError, ApiKeyRateLimitExceededError, + ApiKeySelfLimitData, + ApiKeySelfUsageData, ApiKeysService, ApiKeyUsageReservationData, ) @@ -69,6 +77,7 @@ V1UsageLimitResponse, V1UsageResponse, ) +from app.modules.usage.repository import UsageRepository logger = logging.getLogger(__name__) @@ -251,6 +260,7 @@ async def v1_usage( async with get_background_session() as session: service = ApiKeysService(ApiKeysRepository(session)) usage = await service.get_key_usage_summary_for_self(api_key.id) + aggregate_limits = await _build_aggregate_credit_limits(session) if usage is None: raise ProxyAuthError("Invalid API key") @@ -260,18 +270,124 @@ async def v1_usage( total_tokens=usage.total_tokens, cached_input_tokens=usage.cached_input_tokens, total_cost_usd=usage.total_cost_usd, - limits=[ - V1UsageLimitResponse( - limit_type=limit.limit_type, - limit_window=limit.limit_window, - max_value=limit.max_value, - current_value=limit.current_value, - remaining_value=limit.remaining_value, - model_filter=limit.model_filter, - reset_at=limit.reset_at.isoformat() + "Z", - ) - for limit in usage.limits - ], + limits=_build_v1_usage_limits(usage, aggregate_limits), + ) + + +def _build_v1_usage_limits( + usage: ApiKeySelfUsageData, + aggregate_limits: dict[str, V1UsageLimitResponse], +) -> list[V1UsageLimitResponse]: + raw_limits = [_to_v1_usage_limit_response(limit) for limit in usage.limits] + credit_overrides = { + limit.limit_window: limit + for limit in usage.limits + if limit.limit_type == "credits" and limit.model_filter is None + } + + if aggregate_limits: + merged: list[V1UsageLimitResponse] = [] + for window in ("5h", "7d"): + aggregate = aggregate_limits.get(window) + if aggregate is None: + continue + merged.append(_apply_credit_override(aggregate, credit_overrides.get(window))) + if {item.limit_window for item in merged} == {"5h", "7d"}: + return merged + + return raw_limits + + +def _to_v1_usage_limit_response(limit: ApiKeySelfLimitData) -> V1UsageLimitResponse: + current_value = max(0, min(limit.current_value, limit.max_value)) + return V1UsageLimitResponse( + limit_type=limit.limit_type, + limit_window=limit.limit_window, + max_value=limit.max_value, + current_value=current_value, + remaining_value=max(0, limit.max_value - current_value), + model_filter=limit.model_filter, + reset_at=limit.reset_at.isoformat() + "Z", + source=limit.source, + ) + + +def _apply_credit_override( + aggregate_limit: V1UsageLimitResponse, + override_limit: ApiKeySelfLimitData | None, +) -> V1UsageLimitResponse: + if override_limit is None: + return aggregate_limit + + override_max = max(0, override_limit.max_value) + current_value = max(0, min(aggregate_limit.current_value, override_max)) + return V1UsageLimitResponse( + limit_type="credits", + limit_window=aggregate_limit.limit_window, + max_value=override_max, + current_value=current_value, + remaining_value=max(0, override_max - current_value), + model_filter=None, + reset_at=aggregate_limit.reset_at, + source="api_key_override", + ) + + +async def _build_aggregate_credit_limits(session: AsyncSession) -> dict[str, V1UsageLimitResponse]: + usage_repository = UsageRepository(session) + primary_latest = await usage_repository.latest_by_account(window="primary") + secondary_latest = await usage_repository.latest_by_account(window="secondary") + + primary_rows = [_usage_entry_to_window_row(entry) for entry in primary_latest.values()] + secondary_rows = [_usage_entry_to_window_row(entry) for entry in secondary_latest.values()] + primary_rows, secondary_rows = usage_core.normalize_weekly_only_rows(primary_rows, secondary_rows) + + account_ids = {row.account_id for row in primary_rows} | {row.account_id for row in secondary_rows} + if not account_ids: + return {} + + account_map = {account.id: account for account in await _load_accounts_by_id(session, account_ids)} + limits: dict[str, V1UsageLimitResponse] = {} + + for window_key, rows, label in (("primary", primary_rows, "5h"), ("secondary", secondary_rows, "7d")): + if not rows: + continue + summary = usage_core.summarize_usage_window(rows, account_map, window_key) + max_value = max(0, int(round(summary.capacity_credits or 0.0))) + if max_value <= 0: + continue + current_value = max(0, min(int(round(summary.used_credits or 0.0)), max_value)) + reset_at = "" + if summary.reset_at is not None: + reset_at = datetime.fromtimestamp(summary.reset_at, tz=timezone.utc).isoformat().replace("+00:00", "Z") + limits[label] = V1UsageLimitResponse( + limit_type="credits", + limit_window=label, + max_value=max_value, + current_value=current_value, + remaining_value=max(0, max_value - current_value), + model_filter=None, + reset_at=reset_at, + source="aggregate", + ) + + return limits + + +async def _load_accounts_by_id(session: AsyncSession, account_ids: set[str]) -> list[Account]: + if not account_ids: + return [] + result = await session.execute(select(Account).where(Account.id.in_(account_ids))) + return list(result.scalars().all()) + + +def _usage_entry_to_window_row(entry: UsageHistory) -> UsageWindowRow: + return UsageWindowRow( + account_id=entry.account_id, + used_percent=entry.used_percent, + reset_at=entry.reset_at, + window_minutes=entry.window_minutes, + recorded_at=entry.recorded_at, ) diff --git a/app/modules/proxy/schemas.py b/app/modules/proxy/schemas.py index 47d238e1..8a6d20e3 100644 --- a/app/modules/proxy/schemas.py +++ b/app/modules/proxy/schemas.py @@ -161,6 +161,7 @@ class V1UsageLimitResponse(BaseModel): remaining_value: int model_filter: str | None = None reset_at: str + source: str = "api_key_limit" class V1UsageResponse(BaseModel): diff --git a/frontend/src/features/api-keys/components/api-key-edit-dialog.tsx b/frontend/src/features/api-keys/components/api-key-edit-dialog.tsx index 54f5d8d2..110bdea2 100644 --- a/frontend/src/features/api-keys/components/api-key-edit-dialog.tsx +++ b/frontend/src/features/api-keys/components/api-key-edit-dialog.tsx @@ -253,6 +253,7 @@ const LIMIT_TYPE_SHORT: Record = { input_tokens: "Input", output_tokens: "Output", cost_usd: "Cost", + credits: "Credits", }; function formatTokenCount(n: number): string { diff --git a/frontend/src/features/api-keys/components/api-key-table.tsx b/frontend/src/features/api-keys/components/api-key-table.tsx index 17e8163f..496227b1 100644 --- a/frontend/src/features/api-keys/components/api-key-table.tsx +++ b/frontend/src/features/api-keys/components/api-key-table.tsx @@ -34,6 +34,7 @@ const LIMIT_TYPE_SHORT: Record = { input_tokens: "Input", output_tokens: "Output", cost_usd: "Cost", + credits: "Credits", }; function formatLimitSummary(limits: LimitRule[]): string { @@ -42,13 +43,15 @@ function formatLimitSummary(limits: LimitRule[]): string { .map((l) => { const type = LIMIT_TYPE_SHORT[l.limitType]; const isCost = l.limitType === "cost_usd"; + const isCredits = l.limitType === "credits"; const current = isCost ? `$${(l.currentValue / 1_000_000).toFixed(2)}` : formatCompactNumber(l.currentValue); const max = isCost ? `$${(l.maxValue / 1_000_000).toFixed(2)}` : formatCompactNumber(l.maxValue); - return `${type}: ${current}/${max} ${l.limitWindow}`; + const suffix = isCost ? l.limitWindow : isCredits ? `${l.limitWindow}` : l.limitWindow; + return `${type}: ${current}/${max} ${suffix}`; }) .join(" | "); } diff --git a/frontend/src/features/api-keys/components/limit-rule-card.tsx b/frontend/src/features/api-keys/components/limit-rule-card.tsx index c6d85397..075bd8cc 100644 --- a/frontend/src/features/api-keys/components/limit-rule-card.tsx +++ b/frontend/src/features/api-keys/components/limit-rule-card.tsx @@ -23,12 +23,15 @@ const LIMIT_TYPE_LABELS: Record = { input_tokens: "Input Tokens", output_tokens: "Output Tokens", cost_usd: "Cost ($)", + credits: "Credits", }; const WINDOW_LABELS: Record = { daily: "Daily", weekly: "Weekly", monthly: "Monthly", + "5h": "5h", + "7d": "7d", }; const LIMIT_TYPE_SET: ReadonlySet = new Set(LIMIT_TYPES); @@ -50,6 +53,7 @@ export type LimitRuleCardProps = { export function LimitRuleCard({ rule, onChange, onRemove }: LimitRuleCardProps) { const isCost = rule.limitType === "cost_usd"; + const isCredits = rule.limitType === "credits"; const displayValue = isCost && rule.maxValue > 0 ? String(rule.maxValue / 1_000_000) : rule.maxValue > 0 @@ -76,7 +80,11 @@ export function LimitRuleCard({ rule, onChange, onRemove }: LimitRuleCardProps) const handleLimitTypeChange = (v: string) => { if (isLimitType(v)) { - onChange({ ...rule, limitType: v }); + onChange({ + ...rule, + limitType: v, + modelFilter: v === "credits" ? null : rule.modelFilter, + }); } }; @@ -133,7 +141,7 @@ export function LimitRuleCard({ rule, onChange, onRemove }: LimitRuleCardProps)
Model filter onChange({ ...rule, modelFilter: models[0] || null })} - placeholder="All models" + onChange={(models) => { + if (isCredits) return; + onChange({ ...rule, modelFilter: models[0] || null }); + }} + placeholder={isCredits ? "Credits limits apply globally" : "All models"} />
diff --git a/frontend/src/features/api-keys/schemas.ts b/frontend/src/features/api-keys/schemas.ts index 09001b11..b9f47f88 100644 --- a/frontend/src/features/api-keys/schemas.ts +++ b/frontend/src/features/api-keys/schemas.ts @@ -1,7 +1,7 @@ import { z } from "zod"; -export const LIMIT_TYPES = ["total_tokens", "input_tokens", "output_tokens", "cost_usd"] as const; -export const LIMIT_WINDOWS = ["daily", "weekly", "monthly"] as const; +export const LIMIT_TYPES = ["total_tokens", "input_tokens", "output_tokens", "cost_usd", "credits"] as const; +export const LIMIT_WINDOWS = ["daily", "weekly", "monthly", "5h", "7d"] as const; export type LimitType = (typeof LIMIT_TYPES)[number]; export type LimitWindowType = (typeof LIMIT_WINDOWS)[number]; diff --git a/frontend/src/features/apis/components/api-key-info.tsx b/frontend/src/features/apis/components/api-key-info.tsx index b100e750..f6feda77 100644 --- a/frontend/src/features/apis/components/api-key-info.tsx +++ b/frontend/src/features/apis/components/api-key-info.tsx @@ -11,6 +11,7 @@ const LIMIT_TYPE_LABEL: Record = { input_tokens: "Input Tokens", output_tokens: "Output Tokens", cost_usd: "Cost (USD)", + credits: "Credits", }; export type ApiKeyInfoProps = { diff --git a/tests/integration/test_v1_usage.py b/tests/integration/test_v1_usage.py index 31c798d1..7d7ac0e2 100644 --- a/tests/integration/test_v1_usage.py +++ b/tests/integration/test_v1_usage.py @@ -5,7 +5,7 @@ import pytest from app.core.utils.time import utcnow -from app.db.models import ApiKeyLimit, LimitType, LimitWindow +from app.db.models import Account, AccountStatus, ApiKeyLimit, LimitType, LimitWindow, UsageHistory from app.db.session import SessionLocal from app.modules.api_keys.repository import ApiKeysRepository from app.modules.api_keys.service import ApiKeyCreateData, ApiKeysService, LimitRuleInput @@ -31,6 +31,122 @@ async def _create_api_key( return created.id, created.key +async def _seed_upstream_usage(*, now) -> None: + suffix = str(int(now.timestamp() * 1_000_000)) + account_a_id = f"acc-plus-a-{suffix}" + account_b_id = f"acc-plus-b-{suffix}" + + async with SessionLocal() as session: + session.add_all( + [ + Account( + id=account_a_id, + chatgpt_account_id=f"chatgpt-plus-a-{suffix}", + email=f"plus-a-{suffix}@example.com", + plan_type="plus", + access_token_encrypted=b"a", + refresh_token_encrypted=b"b", + id_token_encrypted=b"c", + last_refresh=now, + status=AccountStatus.ACTIVE, + reset_at=None, + ), + Account( + id=account_b_id, + chatgpt_account_id=f"chatgpt-plus-b-{suffix}", + email=f"plus-b-{suffix}@example.com", + plan_type="plus", + access_token_encrypted=b"d", + refresh_token_encrypted=b"e", + id_token_encrypted=b"f", + last_refresh=now, + status=AccountStatus.ACTIVE, + reset_at=None, + ), + UsageHistory( + account_id=account_a_id, + recorded_at=now, + window="primary", + used_percent=10.0, + reset_at=int((now + timedelta(hours=4)).timestamp()), + window_minutes=300, + ), + UsageHistory( + account_id=account_b_id, + recorded_at=now, + window="primary", + used_percent=20.0, + reset_at=int((now + timedelta(hours=4)).timestamp()), + window_minutes=300, + ), + UsageHistory( + account_id=account_a_id, + recorded_at=now, + window="secondary", + used_percent=20.0, + reset_at=int((now + timedelta(days=6)).timestamp()), + window_minutes=10080, + ), + UsageHistory( + account_id=account_b_id, + recorded_at=now, + window="secondary", + used_percent=30.0, + reset_at=int((now + timedelta(days=6)).timestamp()), + window_minutes=10080, + ), + ] + ) + await session.commit() + + +async def _seed_upstream_usage_partial(*, now, windows: tuple[str, ...]) -> None: + suffix = str(int(now.timestamp() * 1_000_000)) + account_id = f"acc-plus-partial-{suffix}" + + entries = [ + Account( + id=account_id, + chatgpt_account_id=f"chatgpt-plus-partial-{suffix}", + email=f"plus-partial-{suffix}@example.com", + plan_type="plus", + access_token_encrypted=b"a", + refresh_token_encrypted=b"b", + id_token_encrypted=b"c", + last_refresh=now, + status=AccountStatus.ACTIVE, + reset_at=None, + ) + ] + + if "primary" in windows: + entries.append( + UsageHistory( + account_id=account_id, + recorded_at=now, + window="primary", + used_percent=10.0, + reset_at=int((now + timedelta(hours=4)).timestamp()), + window_minutes=300, + ) + ) + if "secondary" in windows: + entries.append( + UsageHistory( + account_id=account_id, + recorded_at=now, + window="secondary", + used_percent=20.0, + reset_at=int((now + timedelta(days=6)).timestamp()), + window_minutes=10080, + ) + ) + + async with SessionLocal() as session: + session.add_all(entries) + await session.commit() + + @pytest.mark.asyncio @pytest.mark.parametrize( ("headers", "expected_message"), @@ -178,6 +294,7 @@ async def test_v1_usage_scopes_usage_and_limits_to_authenticated_key(async_clien "remaining_value": 580, "model_filter": "gpt-5.4", "reset_at": limit_a_total_reset.isoformat() + "Z", + "source": "api_key_limit", }, { "limit_type": "cost_usd", @@ -187,6 +304,7 @@ async def test_v1_usage_scopes_usage_and_limits_to_authenticated_key(async_clien "remaining_value": 7500000, "model_filter": None, "reset_at": limit_a_cost_reset.isoformat() + "Z", + "source": "api_key_limit", }, ] @@ -198,3 +316,172 @@ async def test_v1_usage_still_works_when_global_api_key_auth_is_disabled(async_c response = await async_client.get("/v1/usage", headers={"Authorization": f"Bearer {plain_key}"}) assert response.status_code == 200 + + +@pytest.mark.asyncio +async def test_v1_usage_returns_aggregate_credit_limits_when_upstream_usage_exists(async_client): + _, plain_key = await _create_api_key(name="fallback-aggregate") + now = utcnow() + await _seed_upstream_usage(now=now) + + response = await async_client.get("/v1/usage", headers={"Authorization": f"Bearer {plain_key}"}) + + assert response.status_code == 200 + payload = response.json() + assert payload["limits"][0] == { + "limit_type": "credits", + "limit_window": "5h", + "max_value": 450, + "current_value": 68, + "remaining_value": 382, + "model_filter": None, + "reset_at": payload["limits"][0]["reset_at"], + "source": "aggregate", + } + assert payload["limits"][1] == { + "limit_type": "credits", + "limit_window": "7d", + "max_value": 15120, + "current_value": 3780, + "remaining_value": 11340, + "model_filter": None, + "reset_at": payload["limits"][1]["reset_at"], + "source": "aggregate", + } + assert payload["limits"][0]["reset_at"].endswith("Z") + assert payload["limits"][1]["reset_at"].endswith("Z") + + +@pytest.mark.asyncio +async def test_v1_usage_overrides_aggregate_credit_windows_with_api_key_credit_limits(async_client): + key_id, plain_key = await _create_api_key( + name="credit-override", + limits=[ + LimitRuleInput(limit_type="credits", limit_window="5h", max_value=60), + LimitRuleInput(limit_type="credits", limit_window="7d", max_value=1000), + ], + ) + now = utcnow() + await _seed_upstream_usage(now=now) + + async with SessionLocal() as session: + repo = ApiKeysRepository(session) + await repo.replace_limits( + key_id, + [ + ApiKeyLimit( + api_key_id=key_id, + limit_type=LimitType.CREDITS, + limit_window=LimitWindow.FIVE_HOURS, + max_value=60, + current_value=999, + model_filter=None, + reset_at=now + timedelta(hours=5), + ), + ApiKeyLimit( + api_key_id=key_id, + limit_type=LimitType.CREDITS, + limit_window=LimitWindow.SEVEN_DAYS, + max_value=1000, + current_value=10, + model_filter=None, + reset_at=now + timedelta(days=7), + ), + ], + ) + + response = await async_client.get("/v1/usage", headers={"Authorization": f"Bearer {plain_key}"}) + + assert response.status_code == 200 + payload = response.json() + assert payload["limits"] == [ + { + "limit_type": "credits", + "limit_window": "5h", + "max_value": 60, + "current_value": 60, + "remaining_value": 0, + "model_filter": None, + "reset_at": payload["limits"][0]["reset_at"], + "source": "api_key_override", + }, + { + "limit_type": "credits", + "limit_window": "7d", + "max_value": 1000, + "current_value": 1000, + "remaining_value": 0, + "model_filter": None, + "reset_at": payload["limits"][1]["reset_at"], + "source": "api_key_override", + }, + ] + + +@pytest.mark.asyncio +async def test_v1_usage_prefers_raw_limits_when_aggregate_credit_pair_is_partial(async_client): + key_id, plain_key = await _create_api_key( + name="fallback-partial-raw", + limits=[ + LimitRuleInput(limit_type="total_tokens", limit_window="daily", max_value=300), + LimitRuleInput(limit_type="total_tokens", limit_window="weekly", max_value=1000), + ], + ) + now = utcnow() + await _seed_upstream_usage_partial(now=now, windows=("primary",)) + + daily_reset = now + timedelta(hours=2) + weekly_reset = now + timedelta(days=4) + + async with SessionLocal() as session: + repo = ApiKeysRepository(session) + await repo.replace_limits( + key_id, + [ + ApiKeyLimit( + api_key_id=key_id, + limit_type=LimitType.TOTAL_TOKENS, + limit_window=LimitWindow.DAILY, + max_value=300, + current_value=50, + model_filter=None, + reset_at=daily_reset, + ), + ApiKeyLimit( + api_key_id=key_id, + limit_type=LimitType.TOTAL_TOKENS, + limit_window=LimitWindow.WEEKLY, + max_value=1000, + current_value=200, + model_filter=None, + reset_at=weekly_reset, + ), + ], + ) + + response = await async_client.get("/v1/usage", headers={"Authorization": f"Bearer {plain_key}"}) + + assert response.status_code == 200 + payload = response.json() + assert payload["limits"] == [ + { + "limit_type": "total_tokens", + "limit_window": "daily", + "max_value": 300, + "current_value": 50, + "remaining_value": 250, + "model_filter": None, + "reset_at": daily_reset.isoformat() + "Z", + "source": "api_key_limit", + }, + { + "limit_type": "total_tokens", + "limit_window": "weekly", + "max_value": 1000, + "current_value": 200, + "remaining_value": 800, + "model_filter": None, + "reset_at": weekly_reset.isoformat() + "Z", + "source": "api_key_limit", + }, + ]