-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
76 lines (65 loc) · 2.02 KB
/
docker-compose.yml
File metadata and controls
76 lines (65 loc) · 2.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
services:
svara-tts-api:
build:
context: .
dockerfile: Dockerfile
image: svara-tts-api:latest
container_name: svara-tts-api
# GPU configuration
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
# Port mappings
ports:
- "8080:8080" # FastAPI API server (vLLM embedded)
# Environment variables (override in .env file)
environment:
# vLLM Configuration
- VLLM_MODEL=${VLLM_MODEL:-kenpath/svara-tts-v1}
- VLLM_GPU_MEMORY_UTILIZATION=${VLLM_GPU_MEMORY_UTILIZATION:-0.9}
- VLLM_MAX_MODEL_LEN=${VLLM_MAX_MODEL_LEN:-4096}
- VLLM_TENSOR_PARALLEL_SIZE=${VLLM_TENSOR_PARALLEL_SIZE:-1}
- VLLM_QUANTIZATION=${VLLM_QUANTIZATION:-}
- VLLM_ENFORCE_EAGER=${VLLM_ENFORCE_EAGER:-false}
- VLLM_ATTENTION_BACKEND=${VLLM_ATTENTION_BACKEND:-}
- VLLM_KV_CACHE_DTYPE=${VLLM_KV_CACHE_DTYPE:-auto}
- VLLM_DTYPE=${VLLM_DTYPE:-auto}
# API Configuration
- API_PORT=${API_PORT:-8080}
- API_HOST=${API_HOST:-0.0.0.0}
- SNAC_DEVICE=${SNAC_DEVICE:-cpu}
- SNAC_WINDOW_SIZE=${SNAC_WINDOW_SIZE:-28}
- SNAC_COMPILE=${SNAC_COMPILE:-true}
# Logging
- LOG_LEVEL=${LOG_LEVEL:-INFO}
# Hugging Face Token (optional, for gated models)
- HF_TOKEN=${HF_TOKEN:-}
# Volume mounts
volumes:
# Cache Hugging Face models to avoid re-downloading
- huggingface_cache:/root/.cache/huggingface
# Optional: Mount local code for development
# - ./tts_engine:/app/tts_engine
# - ./api:/app/api
# Restart policy
restart: unless-stopped
# Health check
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 120s
# Logging configuration
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
volumes:
huggingface_cache:
driver: local