-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
80 lines (74 loc) · 2.47 KB
/
docker-compose.yml
File metadata and controls
80 lines (74 loc) · 2.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
version: "3.9"
services:
# ─── AI Gateway API ────────────────────────────────────────────────────────
api:
build:
context: .
dockerfile: Dockerfile
ports:
- "${API_PORT:-8000}:8000"
env_file:
- .env
environment:
- WORKERS=${WORKERS:-2}
- LOG_LEVEL=${LOG_LEVEL:-info}
- CHROMA_PERSIST_DIR=/data/chroma_db
volumes:
- ./chroma_db:/data/chroma_db
- ./logs:/app/logs
depends_on:
- redis
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
# ─── Redis (session store, rate limiting, token budget cache) ───────────────
redis:
image: redis:7-alpine
ports:
- "6379:6379"
command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
volumes:
- redis_data:/data
restart: unless-stopped
# ─── Prometheus ────────────────────────────────────────────────────────────
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
restart: unless-stopped
# ─── Grafana ───────────────────────────────────────────────────────────────
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin}
- GF_USERS_ALLOW_SIGN_UP=false
volumes:
- grafana_data:/var/lib/grafana
depends_on:
- prometheus
restart: unless-stopped
# ─── Ollama (local models, no API key needed) ───────────────────────────────
ollama:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ollama_models:/root/.ollama
restart: unless-stopped
# GPU support (uncomment if NVIDIA GPU available):
# deploy:
# resources:
# reservations:
# devices:
# - capabilities: [gpu]
volumes:
redis_data:
grafana_data:
ollama_models: