-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlitellm_config.yaml
More file actions
72 lines (65 loc) · 1.93 KB
/
litellm_config.yaml
File metadata and controls
72 lines (65 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# LiteLLM Proxy Configuration
# Model groups match tier names used by get_chat_model() — no translation needed.
# Provider strategy: Groq Qwen3-32B for all operational tiers,
# OpenAI gpt-oss-120b for heavy reasoning. No AWS Bedrock.
model_list:
# --- heavy: complex reasoning (fund-manager, trade-debater, quant-researcher) ---
- model_name: heavy
litellm_params:
model: openai/gpt-oss-120b
api_key: os.environ/OPENAI_API_KEY
model_info:
priority: 1
- model_name: heavy
litellm_params:
model: groq/qwen/qwen3-32b
api_key: os.environ/GROQ_API_KEY
model_info:
priority: 2
# --- medium: structured extraction (earnings-analyst, position-monitor, daily-planner) ---
- model_name: medium
litellm_params:
model: groq/qwen/qwen3-32b
api_key: os.environ/GROQ_API_KEY
model_info:
priority: 1
- model_name: medium
litellm_params:
model: openai/gpt-oss-120b
api_key: os.environ/OPENAI_API_KEY
model_info:
priority: 2
# --- light: simple coordination (supervisor, health-monitor) ---
- model_name: light
litellm_params:
model: groq/qwen/qwen3-32b
api_key: os.environ/GROQ_API_KEY
model_info:
priority: 1
# --- bulk: OPRO/TextGrad loops, cost-sensitive ---
- model_name: bulk
litellm_params:
model: groq/qwen/qwen3-32b
api_key: os.environ/GROQ_API_KEY
model_info:
priority: 1
# --- embedding: local ---
- model_name: embedding
litellm_params:
model: ollama/mxbai-embed-large
api_base: os.environ/OLLAMA_API_BASE
router_settings:
routing_strategy: "simple-shuffle"
allowed_fails: 3
cooldown_time: 60
num_retries: 2
retry_after: 5
enable_pre_call_checks: true
retry_policy:
RateLimitErrorRetries: 3
TimeoutErrorRetries: 2
AuthenticationErrorRetries: 0
litellm_settings:
success_callback: ["langfuse"]
failure_callback: ["langfuse"]
callbacks: ["langfuse"]