-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.coolify.yml
More file actions
88 lines (85 loc) · 2.93 KB
/
docker-compose.coolify.yml
File metadata and controls
88 lines (85 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
version: '3.8'
services:
# --------------------------------------------------------------------------
# 1. API Server (FastAPI)
# Handles HTTP requests, Chat, and Query Embeddings
# --------------------------------------------------------------------------
api:
image: notebookllm-backend:latest
build:
context: .
dockerfile: Dockerfile
target: runtime
restart: always
# CRITICAL: Explicit command override - Coolify builds last stage (worker)
# so the image CMD defaults to the worker. This forces API mode.
command: ["uvicorn", "src.app:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]
environment:
# CRITICAL: Disable embedded worker
- ENABLE_EMBEDDED_WORKER=false
- QDRANT_HOST=${QDRANT_HOST}
- QDRANT_API_KEY=${QDRANT_API_KEY}
- SUPABASE_URL=${SUPABASE_URL}
- SUPABASE_KEY=${SUPABASE_KEY}
- DATABASE_URL=${DATABASE_URL}
- GEMINI_API_KEY=${GEMINI_API_KEY}
# Optimization: Use shared cache
- HF_HOME=/app/.cache/huggingface
expose:
- "8000"
deploy:
resources:
limits:
cpus: '2.5' # Reserve 2.5 cores for API (User-facing priority)
memory: 4000M # 4GB Limit for API
volumes:
- hf_cache:/app/.cache/huggingface
- fastembed_cache:/app/.cache/fastembed
healthcheck:
test: ["CMD", "python", "-c", "import httpx; httpx.get('http://localhost:8000/api/v1/health/liveness', timeout=5)"]
interval: 15s
timeout: 10s
retries: 5
start_period: 60s
# --------------------------------------------------------------------------
# 2. Background Worker (Procrastinate)
# Handles PDF Ingestion, Indexing, and Podcast Generation
# --------------------------------------------------------------------------
worker:
image: notebookllm-backend:latest
build:
context: .
dockerfile: Dockerfile
target: worker
restart: always
# Explicit command override to ensure worker mode
command: ["python", "-m", "src.services.queue.worker"]
environment:
- ENABLE_EMBEDDED_WORKER=false
- QDRANT_HOST=${QDRANT_HOST}
- QDRANT_API_KEY=${QDRANT_API_KEY}
- SUPABASE_URL=${SUPABASE_URL}
- SUPABASE_KEY=${SUPABASE_KEY}
- DATABASE_URL=${DATABASE_URL}
- GEMINI_API_KEY=${GEMINI_API_KEY}
- HF_HOME=/app/.cache/huggingface
deploy:
resources:
limits:
cpus: '2.0' # 2 Cores for Worker (TTS needs CPU)
memory: 4096M # 4GB Limit for Worker (TTS needs RAM)
volumes:
- hf_cache:/app/.cache/huggingface
- fastembed_cache:/app/.cache/fastembed
depends_on:
- api
stop_grace_period: 120s
healthcheck:
test: ["CMD", "python", "-c", "import httpx; httpx.get('http://localhost:8000/api/v1/health/liveness', timeout=5)"]
interval: 60s
timeout: 30s
retries: 3
start_period: 120s
volumes:
hf_cache:
fastembed_cache: