diff --git a/.env.example b/.env.example
index 78a3b72c..85eeb95a 100644
--- a/.env.example
+++ b/.env.example
@@ -5,6 +5,14 @@ LLM_API_KEY=your_api_key_here
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
LLM_MODEL_NAME=qwen-plus
+# ===== 使用 MiniMax 模型(可选)=====
+# MiniMax M2.5: 高性能、高性价比,支持 204,800 tokens 上下文
+# 获取 API Key: https://platform.minimax.io/
+# LLM_API_KEY=your_minimax_api_key_here
+# LLM_BASE_URL=https://api.minimax.io/v1
+# LLM_MODEL_NAME=MiniMax-M2.5
+# 国内用户可使用: LLM_BASE_URL=https://api.minimaxi.com/v1
+
# ===== ZEP记忆图谱配置 =====
# 每月免费额度即可支撑简单使用:https://app.getzep.com/
ZEP_API_KEY=your_zep_api_key_here
diff --git a/README-EN.md b/README-EN.md
index cd24e83e..fc6ef9c3 100644
--- a/README-EN.md
+++ b/README-EN.md
@@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus
ZEP_API_KEY=your_zep_api_key
```
+
+Using MiniMax Models
+
+[MiniMax](https://platform.minimax.io/) provides high-performance, cost-effective LLM models with OpenAI-compatible API:
+
+```env
+LLM_API_KEY=your_minimax_api_key
+LLM_BASE_URL=https://api.minimax.io/v1
+LLM_MODEL_NAME=MiniMax-M2.5
+```
+
+| Model | Description |
+|-------|-------------|
+| `MiniMax-M2.5` | Flagship model, 204K context window |
+| `MiniMax-M2.5-highspeed` | Same performance, faster and more agile |
+
+For users in China: `LLM_BASE_URL=https://api.minimaxi.com/v1`
+
+API Documentation: [OpenAI Compatible API](https://platform.minimax.io/docs/api-reference/text-openai-api)
+
+
+
#### 2. Install Dependencies
```bash
diff --git a/README.md b/README.md
index a47976c4..de204adf 100644
--- a/README.md
+++ b/README.md
@@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus
ZEP_API_KEY=your_zep_api_key
```
+
+使用 MiniMax 模型
+
+[MiniMax](https://platform.minimax.io/) 提供高性能、高性价比的 LLM 模型,支持 OpenAI 兼容 API:
+
+```env
+LLM_API_KEY=your_minimax_api_key
+LLM_BASE_URL=https://api.minimax.io/v1
+LLM_MODEL_NAME=MiniMax-M2.5
+```
+
+| 模型 | 说明 |
+|------|------|
+| `MiniMax-M2.5` | 旗舰模型,204K 上下文窗口 |
+| `MiniMax-M2.5-highspeed` | 同等性能,更快更敏捷 |
+
+国内用户可使用:`LLM_BASE_URL=https://api.minimaxi.com/v1`
+
+API 文档:[OpenAI 兼容接口](https://platform.minimax.io/docs/api-reference/text-openai-api)
+
+
+
#### 2. 安装依赖
```bash
diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py
index 57836c53..b05cbfec 100644
--- a/backend/app/services/oasis_profile_generator.py
+++ b/backend/app/services/oasis_profile_generator.py
@@ -20,6 +20,7 @@
from ..config import Config
from ..utils.logger import get_logger
+from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response
from .zep_entity_reader import EntityNode, ZepEntityReader
logger = get_logger('mirofish.oasis_profile')
@@ -523,43 +524,53 @@ def _generate_profile_with_llm(
# 尝试多次生成,直到成功或达到最大重试次数
max_attempts = 3
last_error = None
-
+ use_minimax = _is_minimax(self.model_name, self.base_url)
+
for attempt in range(max_attempts):
try:
- response = self.client.chat.completions.create(
- model=self.model_name,
- messages=[
- {"role": "system", "content": self._get_system_prompt(is_individual)},
- {"role": "user", "content": prompt}
- ],
- response_format={"type": "json_object"},
- temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
+ import re as _re
+ temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url)
+ messages = [
+ {"role": "system", "content": self._get_system_prompt(is_individual)},
+ {"role": "user", "content": prompt}
+ ]
+
+ kwargs = {
+ "model": self.model_name,
+ "messages": _inject_json_instruction(messages) if use_minimax else messages,
+ "temperature": temperature,
# 不设置max_tokens,让LLM自由发挥
- )
-
+ }
+ if not use_minimax:
+ kwargs["response_format"] = {"type": "json_object"}
+
+ response = self.client.chat.completions.create(**kwargs)
+
content = response.choices[0].message.content
-
+ # 移除 标签
+ content = _re.sub(r'[\s\S]*?', '', content).strip()
+
# 检查是否被截断(finish_reason不是'stop')
finish_reason = response.choices[0].finish_reason
if finish_reason == 'length':
logger.warning(f"LLM输出被截断 (attempt {attempt+1}), 尝试修复...")
content = self._fix_truncated_json(content)
-
+
# 尝试解析JSON
try:
- result = json.loads(content)
-
+ result = parse_json_from_response(content)
+
# 验证必需字段
if "bio" not in result or not result["bio"]:
result["bio"] = entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}"
if "persona" not in result or not result["persona"]:
result["persona"] = entity_summary or f"{entity_name}是一个{entity_type}。"
-
+
return result
-
- except json.JSONDecodeError as je:
+
+ except (json.JSONDecodeError, ValueError) as je:
logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(je)[:80]}")
-
+
# 尝试修复JSON
result = self._try_fix_json(content, entity_name, entity_type, entity_summary)
if result.get("_fixed"):
diff --git a/backend/app/services/simulation_config_generator.py b/backend/app/services/simulation_config_generator.py
index cc362508..4d40bc73 100644
--- a/backend/app/services/simulation_config_generator.py
+++ b/backend/app/services/simulation_config_generator.py
@@ -20,6 +20,7 @@
from ..config import Config
from ..utils.logger import get_logger
+from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response
from .zep_entity_reader import EntityNode, ZepEntityReader
logger = get_logger('mirofish.simulation_config')
@@ -433,42 +434,51 @@ def _summarize_entities(self, entities: List[EntityNode]) -> str:
def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any]:
"""带重试的LLM调用,包含JSON修复逻辑"""
import re
-
+
max_attempts = 3
last_error = None
-
+ use_minimax = _is_minimax(self.model_name, self.base_url)
+
for attempt in range(max_attempts):
try:
- response = self.client.chat.completions.create(
- model=self.model_name,
- messages=[
- {"role": "system", "content": system_prompt},
- {"role": "user", "content": prompt}
- ],
- response_format={"type": "json_object"},
- temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
+ temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url)
+ messages = [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": prompt}
+ ]
+
+ kwargs = {
+ "model": self.model_name,
+ "messages": _inject_json_instruction(messages) if use_minimax else messages,
+ "temperature": temperature,
# 不设置max_tokens,让LLM自由发挥
- )
-
+ }
+ if not use_minimax:
+ kwargs["response_format"] = {"type": "json_object"}
+
+ response = self.client.chat.completions.create(**kwargs)
+
content = response.choices[0].message.content
+ # 移除 标签
+ content = re.sub(r'[\s\S]*?', '', content).strip()
finish_reason = response.choices[0].finish_reason
-
+
# 检查是否被截断
if finish_reason == 'length':
logger.warning(f"LLM输出被截断 (attempt {attempt+1})")
content = self._fix_truncated_json(content)
-
+
# 尝试解析JSON
try:
- return json.loads(content)
- except json.JSONDecodeError as e:
+ return parse_json_from_response(content)
+ except (json.JSONDecodeError, ValueError) as e:
logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(e)[:80]}")
-
+
# 尝试修复JSON
fixed = self._try_fix_config_json(content)
if fixed:
return fixed
-
+
last_error = e
except Exception as e:
diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py
index 6c1a81f4..fb614652 100644
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@@ -1,6 +1,6 @@
"""
LLM客户端封装
-统一使用OpenAI格式调用
+统一使用OpenAI格式调用,兼容 MiniMax 等 OpenAI 兼容 API
"""
import json
@@ -11,9 +11,52 @@
from ..config import Config
+def _is_minimax(model: str, base_url: str) -> bool:
+ """检测当前是否使用 MiniMax 模型"""
+ model_lower = (model or "").lower()
+ url_lower = (base_url or "").lower()
+ return "minimax" in model_lower or "minimax" in url_lower
+
+
+def _clamp_temperature(temperature: float, model: str, base_url: str) -> float:
+ """MiniMax 要求 temperature 在 (0.0, 1.0] 之间,不能为 0"""
+ if _is_minimax(model, base_url) and temperature <= 0:
+ return 0.01
+ return temperature
+
+
+def parse_json_from_response(content: str) -> Any:
+ """从 LLM 响应中解析 JSON,支持多种格式"""
+ trimmed = content.strip()
+
+ # 1. 直接解析
+ try:
+ return json.loads(trimmed)
+ except json.JSONDecodeError:
+ pass
+
+ # 2. 提取 markdown code block
+ code_block_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', trimmed)
+ if code_block_match:
+ try:
+ return json.loads(code_block_match.group(1).strip())
+ except json.JSONDecodeError:
+ pass
+
+ # 3. 提取 { } 或 [ ]
+ json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', trimmed)
+ if json_match:
+ try:
+ return json.loads(json_match.group(1))
+ except json.JSONDecodeError:
+ pass
+
+ raise ValueError(f"LLM返回的JSON格式无效: {trimmed}")
+
+
class LLMClient:
"""LLM客户端"""
-
+
def __init__(
self,
api_key: Optional[str] = None,
@@ -23,15 +66,20 @@ def __init__(
self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL
self.model = model or Config.LLM_MODEL_NAME
-
+
if not self.api_key:
raise ValueError("LLM_API_KEY 未配置")
-
+
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
-
+
+ @property
+ def is_minimax(self) -> bool:
+ """检测当前是否使用 MiniMax 模型"""
+ return _is_minimax(self.model, self.base_url)
+
def chat(
self,
messages: List[Dict[str, str]],
@@ -41,32 +89,37 @@ def chat(
) -> str:
"""
发送聊天请求
-
+
Args:
messages: 消息列表
temperature: 温度参数
max_tokens: 最大token数
response_format: 响应格式(如JSON模式)
-
+
Returns:
模型响应文本
"""
+ temperature = _clamp_temperature(temperature, self.model, self.base_url)
+
kwargs = {
"model": self.model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
-
- if response_format:
+
+ # MiniMax 不支持 response_format,改用 prompt 引导 JSON 输出
+ if response_format and self.is_minimax:
+ messages = _inject_json_instruction(messages)
+ elif response_format:
kwargs["response_format"] = response_format
-
+
response = self.client.chat.completions.create(**kwargs)
content = response.choices[0].message.content
# 部分模型(如MiniMax M2.5)会在content中包含思考内容,需要移除
content = re.sub(r'[\s\S]*?', '', content).strip()
return content
-
+
def chat_json(
self,
messages: List[Dict[str, str]],
@@ -75,12 +128,12 @@ def chat_json(
) -> Dict[str, Any]:
"""
发送聊天请求并返回JSON
-
+
Args:
messages: 消息列表
temperature: 温度参数
max_tokens: 最大token数
-
+
Returns:
解析后的JSON对象
"""
@@ -90,14 +143,19 @@ def chat_json(
max_tokens=max_tokens,
response_format={"type": "json_object"}
)
- # 清理markdown代码块标记
- cleaned_response = response.strip()
- cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
- cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
- cleaned_response = cleaned_response.strip()
+ return parse_json_from_response(response)
- try:
- return json.loads(cleaned_response)
- except json.JSONDecodeError:
- raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
+
+def _inject_json_instruction(messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
+ """在消息列表中注入 JSON 输出指令(用于不支持 response_format 的模型)"""
+ json_hint = "\n\nYou must respond with valid JSON only. No markdown, no explanation, no extra text."
+ messages = [msg.copy() for msg in messages]
+ # 优先追加到 system 消息
+ for msg in messages:
+ if msg.get("role") == "system":
+ msg["content"] = msg["content"] + json_hint
+ return messages
+ # 如果没有 system 消息,在开头插入一条
+ messages.insert(0, {"role": "system", "content": json_hint.strip()})
+ return messages
diff --git a/backend/tests/test_minimax_compat.py b/backend/tests/test_minimax_compat.py
new file mode 100644
index 00000000..7f41443b
--- /dev/null
+++ b/backend/tests/test_minimax_compat.py
@@ -0,0 +1,176 @@
+"""
+MiniMax 兼容性测试
+验证 LLMClient 对 MiniMax 模型的兼容处理
+"""
+
+import json
+import re
+import pytest
+import sys
+import os
+
+# 直接导入 llm_client 模块中的独立函数,绕过 Flask 依赖
+# 通过模拟 Config 来避免导入整个 app 模块
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+
+# 直接从源文件提取独立函数进行测试
+def _is_minimax(model, base_url):
+ model_lower = (model or "").lower()
+ url_lower = (base_url or "").lower()
+ return "minimax" in model_lower or "minimax" in url_lower
+
+
+def _clamp_temperature(temperature, model, base_url):
+ if _is_minimax(model, base_url) and temperature <= 0:
+ return 0.01
+ return temperature
+
+
+def parse_json_from_response(content):
+ trimmed = content.strip()
+ try:
+ return json.loads(trimmed)
+ except json.JSONDecodeError:
+ pass
+ code_block_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', trimmed)
+ if code_block_match:
+ try:
+ return json.loads(code_block_match.group(1).strip())
+ except json.JSONDecodeError:
+ pass
+ json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', trimmed)
+ if json_match:
+ try:
+ return json.loads(json_match.group(1))
+ except json.JSONDecodeError:
+ pass
+ raise ValueError(f"LLM返回的JSON格式无效: {trimmed}")
+
+
+def _inject_json_instruction(messages):
+ json_hint = "\n\nYou must respond with valid JSON only. No markdown, no explanation, no extra text."
+ messages = [msg.copy() for msg in messages]
+ for msg in messages:
+ if msg.get("role") == "system":
+ msg["content"] = msg["content"] + json_hint
+ return messages
+ messages.insert(0, {"role": "system", "content": json_hint.strip()})
+ return messages
+
+
+class TestIsMinimax:
+ def test_minimax_model_name(self):
+ assert _is_minimax("MiniMax-M2.5", "https://api.openai.com/v1") is True
+
+ def test_minimax_model_name_lowercase(self):
+ assert _is_minimax("minimax-m2.5", "https://api.openai.com/v1") is True
+
+ def test_minimax_base_url(self):
+ assert _is_minimax("some-model", "https://api.minimax.io/v1") is True
+
+ def test_minimax_base_url_cn(self):
+ assert _is_minimax("some-model", "https://api.minimaxi.com/v1") is True
+
+ def test_not_minimax_openai(self):
+ assert _is_minimax("gpt-4o", "https://api.openai.com/v1") is False
+
+ def test_not_minimax_dashscope(self):
+ assert _is_minimax("qwen-plus", "https://dashscope.aliyuncs.com/compatible-mode/v1") is False
+
+ def test_none_values(self):
+ assert _is_minimax(None, None) is False
+
+ def test_minimax_highspeed(self):
+ assert _is_minimax("MiniMax-M2.5-highspeed", "https://api.minimax.io/v1") is True
+
+
+class TestClampTemperature:
+ def test_zero_temperature_minimax(self):
+ result = _clamp_temperature(0.0, "MiniMax-M2.5", "https://api.minimax.io/v1")
+ assert result == 0.01
+
+ def test_negative_temperature_minimax(self):
+ result = _clamp_temperature(-0.1, "MiniMax-M2.5", "https://api.minimax.io/v1")
+ assert result == 0.01
+
+ def test_valid_temperature_minimax(self):
+ result = _clamp_temperature(0.7, "MiniMax-M2.5", "https://api.minimax.io/v1")
+ assert result == 0.7
+
+ def test_zero_temperature_non_minimax(self):
+ result = _clamp_temperature(0.0, "gpt-4o", "https://api.openai.com/v1")
+ assert result == 0.0
+
+ def test_max_temperature_minimax(self):
+ result = _clamp_temperature(1.0, "MiniMax-M2.5", "https://api.minimax.io/v1")
+ assert result == 1.0
+
+
+class TestInjectJsonInstruction:
+ def test_inject_to_existing_system_message(self):
+ messages = [
+ {"role": "system", "content": "You are a helper."},
+ {"role": "user", "content": "Generate JSON."}
+ ]
+ result = _inject_json_instruction(messages)
+ assert "valid JSON only" in result[0]["content"]
+ assert result[0]["content"].startswith("You are a helper.")
+ # Original should not be mutated
+ assert "valid JSON only" not in messages[0]["content"]
+
+ def test_inject_without_system_message(self):
+ messages = [
+ {"role": "user", "content": "Generate JSON."}
+ ]
+ result = _inject_json_instruction(messages)
+ assert len(result) == 2
+ assert result[0]["role"] == "system"
+ assert "valid JSON only" in result[0]["content"]
+
+ def test_does_not_mutate_original(self):
+ messages = [
+ {"role": "system", "content": "Hello"},
+ {"role": "user", "content": "Test"}
+ ]
+ original_content = messages[0]["content"]
+ _inject_json_instruction(messages)
+ assert messages[0]["content"] == original_content
+
+
+class TestParseJsonFromResponse:
+ def test_direct_json(self):
+ result = parse_json_from_response('{"key": "value"}')
+ assert result == {"key": "value"}
+
+ def test_json_with_markdown_block(self):
+ text = '```json\n{"key": "value"}\n```'
+ result = parse_json_from_response(text)
+ assert result == {"key": "value"}
+
+ def test_json_with_surrounding_text(self):
+ text = 'Here is the result:\n{"key": "value"}\nDone.'
+ result = parse_json_from_response(text)
+ assert result == {"key": "value"}
+
+ def test_json_array(self):
+ result = parse_json_from_response('[1, 2, 3]')
+ assert result == [1, 2, 3]
+
+ def test_invalid_json_raises(self):
+ with pytest.raises(ValueError, match="JSON格式无效"):
+ parse_json_from_response("not json at all")
+
+ def test_nested_json(self):
+ text = '{"agents": [{"name": "Alice"}, {"name": "Bob"}]}'
+ result = parse_json_from_response(text)
+ assert len(result["agents"]) == 2
+
+ def test_markdown_block_without_json_label(self):
+ text = '```\n{"key": "value"}\n```'
+ result = parse_json_from_response(text)
+ assert result == {"key": "value"}
+
+
+if __name__ == "__main__":
+ pytest.main([__file__, "-v"])