-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
136 lines (120 loc) · 4.81 KB
/
docker-compose.yml
File metadata and controls
136 lines (120 loc) · 4.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
services:
# Backend service
backend:
build:
context: ./backend
dockerfile: Dockerfile
container_name: ragenius-backend
restart: unless-stopped
ports:
- "8000:8000"
environment:
# ============================================
# 基础设置
# ============================================
- DEVICE=${DEVICE:-cpu}
- FLASK_ENV=${FLASK_ENV:-production}
# ============================================
# LLM 设置
# ============================================
- LLM_USE_OPENAI=${LLM_USE_OPENAI:-true}
- LLM_OPENAI_API_KEY=${LLM_OPENAI_API_KEY:-}
- LLM_OPENAI_MODEL=${LLM_OPENAI_MODEL:-gpt-4o}
- LLM_OPENAI_API_BASE=${LLM_OPENAI_API_BASE:-}
- LLM_LOCAL_MODEL=${LLM_LOCAL_MODEL:-deepseek-r1:14b}
# Ollama base URL - use host.docker.internal for Docker on Mac/Windows
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
- LLM_TEMPERATURE=${LLM_TEMPERATURE:-0.1}
- LLM_NUM_THREAD=${LLM_NUM_THREAD:-12}
# Ollama specific parameters
- LLM_NUM_CTX=${LLM_NUM_CTX:-8192}
- LLM_NUM_PREDICT=${LLM_NUM_PREDICT:-2048}
# ============================================
# Embedding 设置
# ============================================
- EMBEDDING_MODEL=${EMBEDDING_MODEL:-BAAI/bge-base-zh-v1.5}
# ============================================
# Chunking 设置
# ============================================
- CHUNK_SIZE=${CHUNK_SIZE:-600}
- CHUNK_OVERLAP=${CHUNK_OVERLAP:-150}
# ============================================
# 检索流水线 - Query Expansion
# ============================================
- QUERY_EXPANSION_ENABLED=${QUERY_EXPANSION_ENABLED:-true}
- QUERY_EXPANSION_N_SUBQUERIES=${QUERY_EXPANSION_N_SUBQUERIES:-2}
- QUERY_EXPANSION_MODEL=${QUERY_EXPANSION_MODEL:-gpt-4o-mini}
- QUERY_EXPANSION_TEMPERATURE=${QUERY_EXPANSION_TEMPERATURE:-0.7}
- QUERY_EXPANSION_INCLUDE_ORIGINAL=${QUERY_EXPANSION_INCLUDE_ORIGINAL:-true}
# ============================================
# 检索流水线 - Hybrid Retrieval
# ============================================
- HYBRID_TOP_K_PER_QUERY=${HYBRID_TOP_K_PER_QUERY:-15}
# ============================================
# 检索流水线 - RRF Fusion
# ============================================
- RRF_K=${RRF_K:-60}
- RRF_TOP_K=${RRF_TOP_K:-12}
# ============================================
# 检索流水线 - Reranking
# ============================================
- RERANKING_ENABLED=${RERANKING_ENABLED:-true}
- RERANKING_MODEL=${RERANKING_MODEL:-cross-encoder/ms-marco-MiniLM-L-2-v2}
- RERANKING_TOP_K=${RERANKING_TOP_K:-8}
- RERANKING_BATCH_SIZE=${RERANKING_BATCH_SIZE:-32}
# ============================================
# 检索流水线 - MMR Post-processing
# ============================================
- MMR_MODE=${MMR_MODE:-auto}
- MMR_SIMILARITY_THRESHOLD=${MMR_SIMILARITY_THRESHOLD:-0.85}
- MMR_LAMBDA=${MMR_LAMBDA:-0.5}
- MMR_FINAL_K=${MMR_FINAL_K:-5}
# ============================================
# 全局检索设置
# ============================================
- SEARCH_K=${SEARCH_K:-8}
# ============================================
# 时间监控
# ============================================
- TIMING_ENABLED=${TIMING_ENABLED:-true}
- TIMING_SHOW_IN_TERMINAL=${TIMING_SHOW_IN_TERMINAL:-true}
# ============================================
# HuggingFace 缓存目录
# ============================================
- HF_HOME=/app/models_cache
# ============================================
# ChromaDB 持久化设置
# ============================================
# 留空使用纯内存模式,设置路径则启用持久化
- CHROMA_PERSIST_DIR=${CHROMA_PERSIST_DIR:-/app/chroma_data}
volumes:
# Cache embedding and reranking models
- models_cache:/app/models_cache
# Persist vector database (comment out for in-memory mode)
- chroma_data:/app/chroma_data
networks:
- ragenius-network
# For connecting to Ollama on host machine (if using local models)
extra_hosts:
- "host.docker.internal:host-gateway"
# Frontend service
frontend:
build:
context: ./frontend
dockerfile: Dockerfile
container_name: ragenius-frontend
restart: unless-stopped
ports:
- "3000:80"
depends_on:
- backend
networks:
- ragenius-network
networks:
ragenius-network:
driver: bridge
volumes:
models_cache:
driver: local
chroma_data:
driver: local