zen-mcp-server/.env.example at main · SingularityAI-Dev/zen-mcp-server · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# Zen MCP Server Environment Configuration
# Copy this file to .env and fill in your values

# API Keys - At least one is required
#
# IMPORTANT: Choose ONE approach:
# - Native APIs (Gemini/OpenAI/XAI) for direct access
# - DIAL for unified enterprise access
# - OpenRouter for unified cloud access
# Having multiple unified providers creates ambiguity about which serves each model.
#
# Option 1: Use native APIs (recommended for direct access)
# Get your Gemini API key from: https://makersuite.google.com/app/apikey
GEMINI_API_KEY=your_gemini_api_key_here
# GEMINI_BASE_URL=                            # Optional: Custom Gemini endpoint (defaults to Google's API)

# Get your OpenAI API key from: https://platform.openai.com/api-keys
OPENAI_API_KEY=your_openai_api_key_here

# Azure OpenAI mirrors OpenAI models through Azure-hosted deployments
# Set the endpoint from Azure Portal. Models are defined in conf/azure_models.json
# (or the file referenced by AZURE_MODELS_CONFIG_PATH).
AZURE_OPENAI_API_KEY=your_azure_openai_key_here
AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
# AZURE_OPENAI_API_VERSION=2024-02-15-preview
# AZURE_OPENAI_ALLOWED_MODELS=gpt-4o,gpt-4o-mini
# AZURE_MODELS_CONFIG_PATH=/absolute/path/to/custom_azure_models.json

# Get your X.AI API key from: https://console.x.ai/
XAI_API_KEY=your_xai_api_key_here

# Get your DIAL API key and configure host URL
# DIAL provides unified access to multiple AI models through a single API
DIAL_API_KEY=your_dial_api_key_here
# DIAL_API_HOST=https://core.dialx.ai        # Optional: Base URL without /openai suffix (auto-appended)
# DIAL_API_VERSION=2025-01-01-preview        # Optional: API version header for DIAL requests

# Option 2: Use OpenRouter for access to multiple models through one API
# Get your OpenRouter API key from: https://openrouter.ai/
# If using OpenRouter, comment out the native API keys above
OPENROUTER_API_KEY=your_openrouter_api_key_here

# Option 3: Use custom API endpoints for local models (Ollama, vLLM, LM Studio, etc.)
# CUSTOM_API_URL=http://localhost:11434/v1  # Ollama example
# CUSTOM_API_KEY=                                      # Empty for Ollama (no auth needed)
# CUSTOM_MODEL_NAME=llama3.2                          # Default model name

# Optional: HTTP timeout tuning for OpenAI-compatible endpoints (OpenRouter/custom/local)
# Values are seconds; defaults are 45s connect / 900s read/write/pool for remote URLs
# and 60s/1800s when pointing at localhost. Raise these if long-running models time out.
# CUSTOM_CONNECT_TIMEOUT=45.0
# CUSTOM_READ_TIMEOUT=900.0
# CUSTOM_WRITE_TIMEOUT=900.0
# CUSTOM_POOL_TIMEOUT=900.0

# Optional: Default model to use
# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high',
#          'gpt-5', 'gpt-5-mini', 'grok', 'opus-4.1', 'sonnet-4.1', or any DIAL model if DIAL is configured
# When set to 'auto', Claude will select the best model for each task
# Defaults to 'auto' if not specified
DEFAULT_MODEL=auto

# Optional: Default thinking mode for ThinkDeep tool
# NOTE: Only applies to models that support extended thinking (e.g., Gemini 2.5 Pro, GPT-5 models)
#       Flash models (2.0) will use system prompt engineering instead
# Token consumption per mode:
#   minimal: 128 tokens   - Quick analysis, fastest response
#   low:     2,048 tokens - Light reasoning tasks
#   medium:  8,192 tokens - Balanced reasoning (good for most cases)
#   high:    16,384 tokens - Complex analysis (recommended for thinkdeep)
#   max:     32,768 tokens - Maximum reasoning depth, slowest but most thorough
# Defaults to 'high' if not specified
DEFAULT_THINKING_MODE_THINKDEEP=high

# Optional: Model usage restrictions
# Limit which models can be used from each provider for cost control, compliance, or standardization
# Format: Comma-separated list of allowed model names (case-insensitive, whitespace tolerant)
# Empty or unset = all models allowed (default behavior)
# If you want to disable a provider entirely, don't set its API key
#
# Supported OpenAI models:
#   - o3               (200K context, high reasoning)
#   - o3-mini          (200K context, balanced)
#   - o4-mini          (200K context, latest balanced, temperature=1.0 only)
#   - o4-mini-high     (200K context, enhanced reasoning, temperature=1.0 only)
#   - gpt-5            (400K context, 128K output, reasoning tokens)
#   - gpt-5-mini       (400K context, 128K output, reasoning tokens)
#   - mini             (shorthand for o4-mini)
#
# Supported Google/Gemini models:
#   - gemini-2.5-flash   (1M context, fast, supports thinking)
#   - gemini-2.5-pro     (1M context, powerful, supports thinking)
#   - flash                             (shorthand for gemini-2.5-flash)
#   - pro                               (shorthand for gemini-2.5-pro)
#
# Supported X.AI GROK models:
#   - grok-3          (131K context, advanced reasoning)
#   - grok-3-fast     (131K context, higher performance but more expensive)
#   - grok            (shorthand for grok-3)
#   - grok3           (shorthand for grok-3)
#   - grokfast        (shorthand for grok-3-fast)
#
# Supported DIAL models (when available in your DIAL deployment):
#   - o3-2025-04-16   (200K context, latest O3 release)
#   - o4-mini-2025-04-16 (200K context, latest O4 mini)
#   - o3              (shorthand for o3-2025-04-16)
#   - o4-mini         (shorthand for o4-mini-2025-04-16)
#   - anthropic.claude-sonnet-4.1-20250805-v1:0 (200K context, Claude 4.1 Sonnet)
#   - anthropic.claude-sonnet-4.1-20250805-v1:0-with-thinking (200K context, Claude 4.1 Sonnet with thinking mode)
#   - anthropic.claude-opus-4.1-20250805-v1:0 (200K context, Claude 4.1 Opus)
#   - anthropic.claude-opus-4.1-20250805-v1:0-with-thinking (200K context, Claude 4.1 Opus with thinking mode)
#   - sonnet-4.1        (shorthand for Claude 4.1 Sonnet)
#   - sonnet-4.1-thinking (shorthand for Claude 4.1 Sonnet with thinking)
#   - opus-4.1          (shorthand for Claude 4.1 Opus)
#   - opus-4.1-thinking (shorthand for Claude 4.1 Opus with thinking)
#   - gemini-2.5-pro-preview-03-25-google-search (1M context, with Google Search)
#   - gemini-2.5-pro-preview-05-06 (1M context, latest preview)
#   - gemini-2.5-flash-preview-05-20 (1M context, latest flash preview)
#   - gemini-2.5-pro  (shorthand for gemini-2.5-pro-preview-05-06)
#   - gemini-2.5-pro-search (shorthand for gemini-2.5-pro-preview-03-25-google-search)
#   - gemini-2.5-flash (shorthand for gemini-2.5-flash-preview-05-20)
#
# Examples:
#   OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini  # Only allow mini models (cost control)
#   GOOGLE_ALLOWED_MODELS=flash                  # Only allow Flash (fast responses)
#   XAI_ALLOWED_MODELS=grok-3                    # Only allow standard GROK (not fast variant)
#   OPENAI_ALLOWED_MODELS=o4-mini                # Single model standardization
#   GOOGLE_ALLOWED_MODELS=flash,pro              # Allow both Gemini models
#   XAI_ALLOWED_MODELS=grok,grok-3-fast          # Allow both GROK variants
#   DIAL_ALLOWED_MODELS=o3,o4-mini                       # Only allow O3/O4 models via DIAL
#   DIAL_ALLOWED_MODELS=opus-4.1,sonnet-4.1                  # Only Claude 4.1 models (without thinking)
#   DIAL_ALLOWED_MODELS=opus-4.1-thinking,sonnet-4.1-thinking # Only Claude 4.1 with thinking mode
#   DIAL_ALLOWED_MODELS=gemini-2.5-pro,gemini-2.5-flash  # Only Gemini 2.5 models via DIAL
#
# Note: These restrictions apply even in 'auto' mode - Claude will only pick from allowed models
# OPENAI_ALLOWED_MODELS=
# GOOGLE_ALLOWED_MODELS=
# XAI_ALLOWED_MODELS=
# DIAL_ALLOWED_MODELS=

# Optional: Custom model configuration file path
# Override the default location of custom_models.json
# CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json

# Note: Conversations are stored in memory during the session

# Optional: Conversation timeout (hours)
# How long AI-to-AI conversation threads persist before expiring
# Longer timeouts use more memory but allow resuming conversations later
# Defaults to 24 hours if not specified
CONVERSATION_TIMEOUT_HOURS=24

# Optional: Max conversation turns
# Maximum number of turns allowed in an AI-to-AI conversation thread
# Each exchange (Claude asks, Gemini responds) counts as 2 turns
# So 20 turns = 10 exchanges. Defaults to 40 if not specified
MAX_CONVERSATION_TURNS=40

# Optional: Logging level (DEBUG, INFO, WARNING, ERROR)
# DEBUG: Shows detailed operational messages for troubleshooting (default)
# INFO: Shows general operational messages
# WARNING: Shows only warnings and errors
# ERROR: Shows only errors
LOG_LEVEL=DEBUG

# Optional: Tool Selection
# Comma-separated list of tools to disable. If not set, all tools are enabled.
# Essential tools (version, listmodels) cannot be disabled.
# Available tools: chat, thinkdeep, planner, consensus, codereview, precommit,
#                  debug, docgen, analyze, refactor, tracer, testgen, challenge, secaudit
#
# DEFAULT CONFIGURATION: To optimize context window usage, non-essential tools
# are disabled by default. Only the essential tools remain enabled:
# - chat, thinkdeep, planner, consensus (collaboration tools)
# - codereview, precommit, debug (code quality tools)
# - challenge (critical thinking utility)
#
# To enable additional tools, remove them from the DISABLED_TOOLS list below.
DISABLED_TOOLS=analyze,refactor,testgen,secaudit,docgen,tracer

# Optional: Language/Locale for AI responses
# When set, all AI tools will respond in the specified language
# while maintaining their analytical capabilities
# Examples: "fr-FR", "en-US", "zh-CN", "zh-TW", "ja-JP", "ko-KR", "es-ES"
# Leave empty for default language (English)
# LOCALE=fr-FR

# ===========================================
# Zen MCP Server Configuration
# ===========================================

# Force .env file values to override system environment variables
# This prevents issues where different AI tools (Claude Code, etc.) pass
# conflicting or cached environment variables that override each other
#
# When enabled (true):
#   - .env file values take absolute precedence
#   - Prevents MCP clients from passing outdated/cached API keys
#   - Ensures consistent configuration across different AI tool integrations
#   - Solves environment variable conflicts between multiple AI applications
#
# When disabled (false):
#   - System environment variables take precedence (standard behavior)
#   - Suitable for production deployments with secure environment injection
#   - Respects container orchestrator and CI/CD pipeline configurations
#
# Recommended settings:
#   Development with multiple AI tools: true (prevents tool conflicts)
#   Production/Container deployments: false (preserves security practices)
#   CI/CD environments: false (respects pipeline secrets)
ZEN_MCP_FORCE_ENV_OVERRIDE=false

# ===========================================
# Docker Configuration
# ===========================================

# Container name for Docker Compose
# Used when running with docker-compose.yml
COMPOSE_PROJECT_NAME=zen-mcp

# Timezone for Docker containers
# Ensures consistent time handling in containerized environments
TZ=UTC

# Maximum log file size (default: 10MB)
# Applicable when using file-based logging
LOG_MAX_SIZE=10MB