Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 33 additions & 29 deletions app/api/schemas/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,33 +41,36 @@ class ChatMessage(BaseModel):


class ChatCompletionRequest(BaseModel):
model: str
messages: list[ChatMessage]
temperature: float | None = 1.0
top_p: float | None = 1.0
n: int | None = 1
stream: bool | None = False
stream_options: dict[str, Any] | None = None
stop: str | list[str] | None = None
max_tokens: int | None = None
presence_penalty: float | None = 0.0
model: str
audio: object | None = None
frequency_penalty: float | None = 0.0
logit_bias: dict[str, float] | None = None
user: str | None = None
web_search_options: Any | None = None
response_format: dict[str, Any] | None = None
tools: list[Any, Any] | None = None
tool_choice: str | None = None
seed: int | None = None
logit_bias: dict[Any, Any] | None = None
logprobs: bool | None = None
top_logprobs: int | None = None
max_completion_tokens: int | None = None
max_tokens: int | None = None # deprecated
metadata: dict[Any, Any] | None = None
modalities: list[str] | None = None
modalities: list[Any] | None = None
n: int | None = 1
parallel_tool_calls: bool | None = True
prediction: dict[Any, Any] | None = None
prediction: object | None = None
presence_penalty: float | None = 0.0
reasoning_effort: str | None = "medium"
response_format: Any | None = None
seed: int | None = None
service_tier: str | None = "auto"
stop: str | list[Any] | None = None
store: bool | None = False
stream: bool | None = False
stream_options: object | None = None
temperature: float | None = 1.0
tool_choice: str | None = None
tools: list[Any] | None = None
top_logprobs: int | None = None
top_p: float | None = 1.0
user: str | None = None
web_search_options: object | None = None


class ChatCompletionResponseChoice(BaseModel):
index: int
Expand All @@ -93,19 +96,20 @@ class ChatCompletionResponse(BaseModel):
class CompletionRequest(BaseModel):
model: str
prompt: str | list[str]
suffix: str | None = None
best_of: int | None = 1
echo: bool | None = False
frequency_penalty: float | None = 0.0
logit_bias: dict[Any, Any] | None = None
logprobs: int | None = None
max_tokens: int | None = 16
temperature: float | None = 1.0
top_p: float | None = 1.0
n: int | None = 1
stream: bool | None = False
logprobs: int | None = None
echo: bool | None = False
stop: str | list[str] | None = None
presence_penalty: float | None = 0.0
frequency_penalty: float | None = 0.0
best_of: int | None = 1
logit_bias: dict[str, float] | None = None
seed: int | None = None
stop: str | list[str] | None = None
stream: bool | None = False
stream_options: object | None = None
suffix: str | None = None
temperature: float | None = 1.0
user: str | None = None


Expand Down
2 changes: 1 addition & 1 deletion app/services/providers/anthropic_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ async def process_completion(
streaming = payload.get("stream", False)
anthropic_payload = {
"model": payload["model"],
"max_tokens": payload.get("max_tokens", ANTHROPIC_DEFAULT_MAX_TOKENS),
"max_tokens": payload.get("max_completion_tokens", payload.get("max_tokens", ANTHROPIC_DEFAULT_MAX_TOKENS)),
"temperature": payload.get("temperature", 1.0),
"stop_sequences": payload.get("stop", []),
}
Expand Down
4 changes: 2 additions & 2 deletions app/services/providers/bedrock_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,8 @@ async def convert_openai_payload_to_bedrock(payload: dict[str, Any]) -> dict[str
inferenceConfig = {}
if "temperature" in payload:
inferenceConfig["temperature"] = payload["temperature"]
if "max_tokens" in payload:
inferenceConfig["maxTokens"] = payload["max_tokens"]
if "max_completion_tokens" in payload or "max_tokens" in payload:
inferenceConfig["maxTokens"] = payload.get("max_completion_tokens", payload.get("max_tokens"))
if "top_p" in payload:
inferenceConfig["topP"] = payload["top_p"]
if "stop" in payload:
Expand Down
2 changes: 1 addition & 1 deletion app/services/providers/google_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ async def convert_openai_completion_payload_to_google(
"stopSequences": payload.get("stop", []),
"temperature": payload.get("temperature", 0.7),
"topP": payload.get("top_p", 0.95),
"maxOutputTokens": payload.get("max_tokens", 2048),
"maxOutputTokens": payload.get("max_completion_tokens", payload.get("max_tokens", 2048)),
},
}

Expand Down
2 changes: 1 addition & 1 deletion app/services/providers/mock_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ async def process_completion(
messages = payload.get("messages", [])
temperature = payload.get("temperature", 0.7)
stream = payload.get("stream", False)
max_tokens = payload.get("max_tokens")
max_tokens = payload.get("max_completion_tokens", payload.get("max_tokens"))

if stream:
# For streaming, return a generator
Expand Down
Loading