From cc812733b8bc22c57285725aa0abf0aaa83fba23 Mon Sep 17 00:00:00 2001 From: charles Date: Tue, 8 Jul 2025 20:42:48 -0700 Subject: [PATCH] Update the openai completion request body --- app/api/schemas/openai.py | 62 +++++++++++---------- app/services/providers/anthropic_adapter.py | 2 +- app/services/providers/bedrock_adapter.py | 4 +- app/services/providers/google_adapter.py | 2 +- app/services/providers/mock_adapter.py | 2 +- 5 files changed, 38 insertions(+), 34 deletions(-) diff --git a/app/api/schemas/openai.py b/app/api/schemas/openai.py index 1559f50..a9061a9 100644 --- a/app/api/schemas/openai.py +++ b/app/api/schemas/openai.py @@ -41,33 +41,36 @@ class ChatMessage(BaseModel): class ChatCompletionRequest(BaseModel): - model: str messages: list[ChatMessage] - temperature: float | None = 1.0 - top_p: float | None = 1.0 - n: int | None = 1 - stream: bool | None = False - stream_options: dict[str, Any] | None = None - stop: str | list[str] | None = None - max_tokens: int | None = None - presence_penalty: float | None = 0.0 + model: str + audio: object | None = None frequency_penalty: float | None = 0.0 - logit_bias: dict[str, float] | None = None - user: str | None = None - web_search_options: Any | None = None - response_format: dict[str, Any] | None = None - tools: list[Any, Any] | None = None - tool_choice: str | None = None - seed: int | None = None + logit_bias: dict[Any, Any] | None = None logprobs: bool | None = None - top_logprobs: int | None = None max_completion_tokens: int | None = None + max_tokens: int | None = None # deprecated metadata: dict[Any, Any] | None = None - modalities: list[str] | None = None + modalities: list[Any] | None = None + n: int | None = 1 parallel_tool_calls: bool | None = True - prediction: dict[Any, Any] | None = None + prediction: object | None = None + presence_penalty: float | None = 0.0 reasoning_effort: str | None = "medium" + response_format: Any | None = None + seed: int | None = None service_tier: str | None = "auto" + stop: str | list[Any] | None = None + store: bool | None = False + stream: bool | None = False + stream_options: object | None = None + temperature: float | None = 1.0 + tool_choice: str | None = None + tools: list[Any] | None = None + top_logprobs: int | None = None + top_p: float | None = 1.0 + user: str | None = None + web_search_options: object | None = None + class ChatCompletionResponseChoice(BaseModel): index: int @@ -93,19 +96,20 @@ class ChatCompletionResponse(BaseModel): class CompletionRequest(BaseModel): model: str prompt: str | list[str] - suffix: str | None = None + best_of: int | None = 1 + echo: bool | None = False + frequency_penalty: float | None = 0.0 + logit_bias: dict[Any, Any] | None = None + logprobs: int | None = None max_tokens: int | None = 16 - temperature: float | None = 1.0 - top_p: float | None = 1.0 n: int | None = 1 - stream: bool | None = False - logprobs: int | None = None - echo: bool | None = False - stop: str | list[str] | None = None presence_penalty: float | None = 0.0 - frequency_penalty: float | None = 0.0 - best_of: int | None = 1 - logit_bias: dict[str, float] | None = None + seed: int | None = None + stop: str | list[str] | None = None + stream: bool | None = False + stream_options: object | None = None + suffix: str | None = None + temperature: float | None = 1.0 user: str | None = None diff --git a/app/services/providers/anthropic_adapter.py b/app/services/providers/anthropic_adapter.py index fe11606..9cb2c7e 100644 --- a/app/services/providers/anthropic_adapter.py +++ b/app/services/providers/anthropic_adapter.py @@ -124,7 +124,7 @@ async def process_completion( streaming = payload.get("stream", False) anthropic_payload = { "model": payload["model"], - "max_tokens": payload.get("max_tokens", ANTHROPIC_DEFAULT_MAX_TOKENS), + "max_tokens": payload.get("max_completion_tokens", payload.get("max_tokens", ANTHROPIC_DEFAULT_MAX_TOKENS)), "temperature": payload.get("temperature", 1.0), "stop_sequences": payload.get("stop", []), } diff --git a/app/services/providers/bedrock_adapter.py b/app/services/providers/bedrock_adapter.py index e83f6c9..e38edf7 100644 --- a/app/services/providers/bedrock_adapter.py +++ b/app/services/providers/bedrock_adapter.py @@ -184,8 +184,8 @@ async def convert_openai_payload_to_bedrock(payload: dict[str, Any]) -> dict[str inferenceConfig = {} if "temperature" in payload: inferenceConfig["temperature"] = payload["temperature"] - if "max_tokens" in payload: - inferenceConfig["maxTokens"] = payload["max_tokens"] + if "max_completion_tokens" in payload or "max_tokens" in payload: + inferenceConfig["maxTokens"] = payload.get("max_completion_tokens", payload.get("max_tokens")) if "top_p" in payload: inferenceConfig["topP"] = payload["top_p"] if "stop" in payload: diff --git a/app/services/providers/google_adapter.py b/app/services/providers/google_adapter.py index 07b5ad8..74228ed 100644 --- a/app/services/providers/google_adapter.py +++ b/app/services/providers/google_adapter.py @@ -392,7 +392,7 @@ async def convert_openai_completion_payload_to_google( "stopSequences": payload.get("stop", []), "temperature": payload.get("temperature", 0.7), "topP": payload.get("top_p", 0.95), - "maxOutputTokens": payload.get("max_tokens", 2048), + "maxOutputTokens": payload.get("max_completion_tokens", payload.get("max_tokens", 2048)), }, } diff --git a/app/services/providers/mock_adapter.py b/app/services/providers/mock_adapter.py index 5f19ed0..2d3d994 100644 --- a/app/services/providers/mock_adapter.py +++ b/app/services/providers/mock_adapter.py @@ -52,7 +52,7 @@ async def process_completion( messages = payload.get("messages", []) temperature = payload.get("temperature", 0.7) stream = payload.get("stream", False) - max_tokens = payload.get("max_tokens") + max_tokens = payload.get("max_completion_tokens", payload.get("max_tokens")) if stream: # For streaming, return a generator