-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain_v2.py
More file actions
36 lines (28 loc) · 1.35 KB
/
main_v2.py
File metadata and controls
36 lines (28 loc) · 1.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import os, random
from openai import AzureOpenAI
app = FastAPI(title="GraTech AI Hub")
# Multi-model load balancing
MODELS = {
"gpt4": {"endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"), "key": os.getenv("AZURE_OPENAI_API_KEY"), "deployment": "gpt-4o"},
"deepseek": {"endpoint": os.getenv("AZURE_FOUNDRY_ENDPOINT"), "key": os.getenv("AZURE_FOUNDRY_API_KEY"), "deployment": "DeepSeek-V3.1"},
"llama": {"endpoint": os.getenv("AZURE_FOUNDRY_ENDPOINT"), "key": os.getenv("AZURE_FOUNDRY_API_KEY"), "deployment": "llama-3-dot1"}
}
class ChatRequest(BaseModel):
message: str
model: str = "auto" # auto, gpt4, deepseek, llama
@app.post("/api/chat")
async def chat(req: ChatRequest):
model_key = req.model if req.model != "auto" else random.choice(list(MODELS.keys()))
config = MODELS[model_key]
client = AzureOpenAI(azure_endpoint=config["endpoint"], api_key=config["key"], api_version="2024-06-01")
response = client.chat.completions.create(
model=config["deployment"],
messages=[{"role": "user", "content": req.message}],
temperature=0.3
)
return {"reply": response.choices[0].message.content, "model_used": model_key}
@app.get("/api/health")
async def health():
return {"status": "healthy", "models": list(MODELS.keys())}