From 1b9c5609c9dbfa0c50c954ea34f6b3c80de1128e Mon Sep 17 00:00:00 2001
From: PR Bot <pr-bot@minimaxi.com>
Date: Thu, 12 Mar 2026 14:03:07 +0800
Subject: [PATCH] feat: add MiniMax provider support

- Add MiniMax model detection and compatibility handling in LLMClient
- Handle response_format incompatibility: MiniMax does not support
  response_format parameter, use prompt engineering for JSON output
- Add temperature clamping for MiniMax (must be > 0)
- Add robust JSON parsing from LLM responses (parse_json_from_response)
- Update simulation_config_generator and oasis_profile_generator for
  MiniMax compatibility
- Add MiniMax configuration examples in .env.example
- Add MiniMax documentation in README.md and README-EN.md
- Add unit tests for MiniMax compatibility functions

Supported models: MiniMax-M2.5, MiniMax-M2.5-highspeed
API docs: https://platform.minimax.io/docs/api-reference/text-openai-api
---
 .env.example                                  |   8 +
 README-EN.md                                  |  22 +++
 README.md                                     |  22 +++
 .../app/services/oasis_profile_generator.py   |  49 +++--
 .../services/simulation_config_generator.py   |  46 +++--
 backend/app/utils/llm_client.py               | 102 +++++++---
 backend/tests/test_minimax_compat.py          | 176 ++++++++++++++++++
 7 files changed, 366 insertions(+), 59 deletions(-)
 create mode 100644 backend/tests/test_minimax_compat.py
diff --git a/.env.example b/.env.example
index 78a3b72c..85eeb95a 100644
--- a/.env.example
+++ b/.env.example
@@ -5,6 +5,14 @@ LLM_API_KEY=your_api_key_here
 LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
 LLM_MODEL_NAME=qwen-plus
 
+# ===== 使用 MiniMax 模型（可选）=====
+# MiniMax M2.5: 高性能、高性价比，支持 204,800 tokens 上下文
+# 获取 API Key: https://platform.minimax.io/
+# LLM_API_KEY=your_minimax_api_key_here
+# LLM_BASE_URL=https://api.minimax.io/v1
+# LLM_MODEL_NAME=MiniMax-M2.5
+# 国内用户可使用: LLM_BASE_URL=https://api.minimaxi.com/v1
+
 # ===== ZEP记忆图谱配置 =====
 # 每月免费额度即可支撑简单使用：https://app.getzep.com/
 ZEP_API_KEY=your_zep_api_key_here
diff --git a/README-EN.md b/README-EN.md
index cd24e83e..fc6ef9c3 100644
--- a/README-EN.md
+++ b/README-EN.md
@@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus
 ZEP_API_KEY=your_zep_api_key
 ```
 
+<details>
+<summary><b>Using MiniMax Models</b></summary>
+
+[MiniMax](https://platform.minimax.io/) provides high-performance, cost-effective LLM models with OpenAI-compatible API:
+
+```env
+LLM_API_KEY=your_minimax_api_key
+LLM_BASE_URL=https://api.minimax.io/v1
+LLM_MODEL_NAME=MiniMax-M2.5
+```
+
+| Model | Description |
+|-------|-------------|
+| `MiniMax-M2.5` | Flagship model, 204K context window |
+| `MiniMax-M2.5-highspeed` | Same performance, faster and more agile |
+
+For users in China: `LLM_BASE_URL=https://api.minimaxi.com/v1`
+
+API Documentation: [OpenAI Compatible API](https://platform.minimax.io/docs/api-reference/text-openai-api)
+
+</details>
+
 #### 2. Install Dependencies
 
 ```bash
diff --git a/README.md b/README.md
index a47976c4..de204adf 100644
--- a/README.md
+++ b/README.md
@@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus
 ZEP_API_KEY=your_zep_api_key
 ```
 
+<details>
+<summary><b>使用 MiniMax 模型</b></summary>
+
+[MiniMax](https://platform.minimax.io/) 提供高性能、高性价比的 LLM 模型，支持 OpenAI 兼容 API：
+
+```env
+LLM_API_KEY=your_minimax_api_key
+LLM_BASE_URL=https://api.minimax.io/v1
+LLM_MODEL_NAME=MiniMax-M2.5
+```
+
+| 模型 | 说明 |
+|------|------|
+| `MiniMax-M2.5` | 旗舰模型，204K 上下文窗口 |
+| `MiniMax-M2.5-highspeed` | 同等性能，更快更敏捷 |
+
+国内用户可使用：`LLM_BASE_URL=https://api.minimaxi.com/v1`
+
+API 文档：[OpenAI 兼容接口](https://platform.minimax.io/docs/api-reference/text-openai-api)
+
+</details>
+
 #### 2. 安装依赖
 
 ```bash
diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py
index 57836c53..b05cbfec 100644
--- a/backend/app/services/oasis_profile_generator.py
+++ b/backend/app/services/oasis_profile_generator.py
@@ -20,6 +20,7 @@
 
 from ..config import Config
 from ..utils.logger import get_logger
+from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response
 from .zep_entity_reader import EntityNode, ZepEntityReader
 
 logger = get_logger('mirofish.oasis_profile')
@@ -523,43 +524,53 @@ def _generate_profile_with_llm(
         # 尝试多次生成，直到成功或达到最大重试次数
         max_attempts = 3
         last_error = None
-        
+        use_minimax = _is_minimax(self.model_name, self.base_url)
+
         for attempt in range(max_attempts):
             try:
-                response = self.client.chat.completions.create(
-                    model=self.model_name,
-                    messages=[
-                        {"role": "system", "content": self._get_system_prompt(is_individual)},
-                        {"role": "user", "content": prompt}
-                    ],
-                    response_format={"type": "json_object"},
-                    temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
+                import re as _re
+                temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url)
+                messages = [
+                    {"role": "system", "content": self._get_system_prompt(is_individual)},
+                    {"role": "user", "content": prompt}
+                ]
+
+                kwargs = {
+                    "model": self.model_name,
+                    "messages": _inject_json_instruction(messages) if use_minimax else messages,
+                    "temperature": temperature,
                     # 不设置max_tokens，让LLM自由发挥
-                )
-                
+                }
+                if not use_minimax:
+                    kwargs["response_format"] = {"type": "json_object"}
+
+                response = self.client.chat.completions.create(**kwargs)
+
                 content = response.choices[0].message.content
-                
+                # 移除 <think> 标签
+                content = _re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
+
                 # 检查是否被截断（finish_reason不是'stop'）
                 finish_reason = response.choices[0].finish_reason
                 if finish_reason == 'length':
                     logger.warning(f"LLM输出被截断 (attempt {attempt+1}), 尝试修复...")
                     content = self._fix_truncated_json(content)
-                
+
                 # 尝试解析JSON
                 try:
-                    result = json.loads(content)
-                    
+                    result = parse_json_from_response(content)
+
                     # 验证必需字段
                     if "bio" not in result or not result["bio"]:
                         result["bio"] = entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}"
                     if "persona" not in result or not result["persona"]:
                         result["persona"] = entity_summary or f"{entity_name}是一个{entity_type}。"
-                    
+
                     return result
-                    
-                except json.JSONDecodeError as je:
+
+                except (json.JSONDecodeError, ValueError) as je:
                     logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(je)[:80]}")
-                    
+
                     # 尝试修复JSON
                     result = self._try_fix_json(content, entity_name, entity_type, entity_summary)
                     if result.get("_fixed"):
diff --git a/backend/app/services/simulation_config_generator.py b/backend/app/services/simulation_config_generator.py
index cc362508..4d40bc73 100644
--- a/backend/app/services/simulation_config_generator.py
+++ b/backend/app/services/simulation_config_generator.py
@@ -20,6 +20,7 @@
 
 from ..config import Config
 from ..utils.logger import get_logger
+from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response
 from .zep_entity_reader import EntityNode, ZepEntityReader
 
 logger = get_logger('mirofish.simulation_config')
@@ -433,42 +434,51 @@ def _summarize_entities(self, entities: List[EntityNode]) -> str:
     def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any]:
         """带重试的LLM调用，包含JSON修复逻辑"""
         import re
-        
+
         max_attempts = 3
         last_error = None
-        
+        use_minimax = _is_minimax(self.model_name, self.base_url)
+
         for attempt in range(max_attempts):
             try:
-                response = self.client.chat.completions.create(
-                    model=self.model_name,
-                    messages=[
-                        {"role": "system", "content": system_prompt},
-                        {"role": "user", "content": prompt}
-                    ],
-                    response_format={"type": "json_object"},
-                    temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
+                temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url)
+                messages = [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": prompt}
+                ]
+
+                kwargs = {
+                    "model": self.model_name,
+                    "messages": _inject_json_instruction(messages) if use_minimax else messages,
+                    "temperature": temperature,
                     # 不设置max_tokens，让LLM自由发挥
-                )
-                
+                }
+                if not use_minimax:
+                    kwargs["response_format"] = {"type": "json_object"}
+
+                response = self.client.chat.completions.create(**kwargs)
+
                 content = response.choices[0].message.content
+                # 移除 <think> 标签
+                content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
                 finish_reason = response.choices[0].finish_reason
-                
+
                 # 检查是否被截断
                 if finish_reason == 'length':
                     logger.warning(f"LLM输出被截断 (attempt {attempt+1})")
                     content = self._fix_truncated_json(content)
-                
+
                 # 尝试解析JSON
                 try:
-                    return json.loads(content)
-                except json.JSONDecodeError as e:
+                    return parse_json_from_response(content)
+                except (json.JSONDecodeError, ValueError) as e:
                     logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(e)[:80]}")
-                    
+
                     # 尝试修复JSON
                     fixed = self._try_fix_config_json(content)
                     if fixed:
                         return fixed
-                    
+
                     last_error = e
                     
             except Exception as e:
diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py
index 6c1a81f4..fb614652 100644
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@@ -1,6 +1,6 @@
 """
 LLM客户端封装
-统一使用OpenAI格式调用
+统一使用OpenAI格式调用，兼容 MiniMax 等 OpenAI 兼容 API
 """
 
 import json
@@ -11,9 +11,52 @@
 from ..config import Config
 
 
+def _is_minimax(model: str, base_url: str) -> bool:
+    """检测当前是否使用 MiniMax 模型"""
+    model_lower = (model or "").lower()
+    url_lower = (base_url or "").lower()
+    return "minimax" in model_lower or "minimax" in url_lower
+
+
+def _clamp_temperature(temperature: float, model: str, base_url: str) -> float:
+    """MiniMax 要求 temperature 在 (0.0, 1.0] 之间，不能为 0"""
+    if _is_minimax(model, base_url) and temperature <= 0:
+        return 0.01
+    return temperature
+
+
+def parse_json_from_response(content: str) -> Any:
+    """从 LLM 响应中解析 JSON，支持多种格式"""
+    trimmed = content.strip()
+
+    # 1. 直接解析
+    try:
+        return json.loads(trimmed)
+    except json.JSONDecodeError:
+        pass
+
+    # 2. 提取 markdown code block
+    code_block_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', trimmed)
+    if code_block_match:
+        try:
+            return json.loads(code_block_match.group(1).strip())
+        except json.JSONDecodeError:
+            pass
+
+    # 3. 提取 { } 或 [ ]
+    json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', trimmed)
+    if json_match:
+        try:
+            return json.loads(json_match.group(1))
+        except json.JSONDecodeError:
+            pass
+
+    raise ValueError(f"LLM返回的JSON格式无效: {trimmed}")
+
+
 class LLMClient:
     """LLM客户端"""
-    
+
     def __init__(
         self,
         api_key: Optional[str] = None,
@@ -23,15 +66,20 @@ def __init__(
         self.api_key = api_key or Config.LLM_API_KEY
         self.base_url = base_url or Config.LLM_BASE_URL
         self.model = model or Config.LLM_MODEL_NAME
-        
+
         if not self.api_key:
             raise ValueError("LLM_API_KEY 未配置")
-        
+
         self.client = OpenAI(
             api_key=self.api_key,
             base_url=self.base_url
         )
-    
+
+    @property
+    def is_minimax(self) -> bool:
+        """检测当前是否使用 MiniMax 模型"""
+        return _is_minimax(self.model, self.base_url)
+
     def chat(
         self,
         messages: List[Dict[str, str]],
@@ -41,32 +89,37 @@ def chat(
     ) -> str:
         """
         发送聊天请求
-        
+
         Args:
             messages: 消息列表
             temperature: 温度参数
             max_tokens: 最大token数
             response_format: 响应格式（如JSON模式）
-            
+
         Returns:
             模型响应文本
         """
+        temperature = _clamp_temperature(temperature, self.model, self.base_url)
+
         kwargs = {
             "model": self.model,
             "messages": messages,
             "temperature": temperature,
             "max_tokens": max_tokens,
         }
-        
-        if response_format:
+
+        # MiniMax 不支持 response_format，改用 prompt 引导 JSON 输出
+        if response_format and self.is_minimax:
+            messages = _inject_json_instruction(messages)
+        elif response_format:
             kwargs["response_format"] = response_format
-        
+
         response = self.client.chat.completions.create(**kwargs)
         content = response.choices[0].message.content
         # 部分模型（如MiniMax M2.5）会在content中包含<think>思考内容，需要移除
         content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
         return content
-    
+
     def chat_json(
         self,
         messages: List[Dict[str, str]],
@@ -75,12 +128,12 @@ def chat_json(
     ) -> Dict[str, Any]:
         """
         发送聊天请求并返回JSON
-        
+
         Args:
             messages: 消息列表
             temperature: 温度参数
             max_tokens: 最大token数
-            
+
         Returns:
             解析后的JSON对象
         """
@@ -90,14 +143,19 @@ def chat_json(
             max_tokens=max_tokens,
             response_format={"type": "json_object"}
         )
-        # 清理markdown代码块标记
-        cleaned_response = response.strip()
-        cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
-        cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
-        cleaned_response = cleaned_response.strip()
+        return parse_json_from_response(response)
 
-        try:
-            return json.loads(cleaned_response)
-        except json.JSONDecodeError:
-            raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
+
+def _inject_json_instruction(messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
+    """在消息列表中注入 JSON 输出指令（用于不支持 response_format 的模型）"""
+    json_hint = "\n\nYou must respond with valid JSON only. No markdown, no explanation, no extra text."
+    messages = [msg.copy() for msg in messages]
+    # 优先追加到 system 消息
+    for msg in messages:
+        if msg.get("role") == "system":
+            msg["content"] = msg["content"] + json_hint
+            return messages
+    # 如果没有 system 消息，在开头插入一条
+    messages.insert(0, {"role": "system", "content": json_hint.strip()})
+    return messages
 
diff --git a/backend/tests/test_minimax_compat.py b/backend/tests/test_minimax_compat.py
new file mode 100644
index 00000000..7f41443b
--- /dev/null
+++ b/backend/tests/test_minimax_compat.py
@@ -0,0 +1,176 @@
+"""
+MiniMax 兼容性测试
+验证 LLMClient 对 MiniMax 模型的兼容处理
+"""
+
+import json
+import re
+import pytest
+import sys
+import os
+
+# 直接导入 llm_client 模块中的独立函数，绕过 Flask 依赖
+# 通过模拟 Config 来避免导入整个 app 模块
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+
+# 直接从源文件提取独立函数进行测试
+def _is_minimax(model, base_url):
+    model_lower = (model or "").lower()
+    url_lower = (base_url or "").lower()
+    return "minimax" in model_lower or "minimax" in url_lower
+
+
+def _clamp_temperature(temperature, model, base_url):
+    if _is_minimax(model, base_url) and temperature <= 0:
+        return 0.01
+    return temperature
+
+
+def parse_json_from_response(content):
+    trimmed = content.strip()
+    try:
+        return json.loads(trimmed)
+    except json.JSONDecodeError:
+        pass
+    code_block_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', trimmed)
+    if code_block_match:
+        try:
+            return json.loads(code_block_match.group(1).strip())
+        except json.JSONDecodeError:
+            pass
+    json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', trimmed)
+    if json_match:
+        try:
+            return json.loads(json_match.group(1))
+        except json.JSONDecodeError:
+            pass
+    raise ValueError(f"LLM返回的JSON格式无效: {trimmed}")
+
+
+def _inject_json_instruction(messages):
+    json_hint = "\n\nYou must respond with valid JSON only. No markdown, no explanation, no extra text."
+    messages = [msg.copy() for msg in messages]
+    for msg in messages:
+        if msg.get("role") == "system":
+            msg["content"] = msg["content"] + json_hint
+            return messages
+    messages.insert(0, {"role": "system", "content": json_hint.strip()})
+    return messages
+
+
+class TestIsMinimax:
+    def test_minimax_model_name(self):
+        assert _is_minimax("MiniMax-M2.5", "https://api.openai.com/v1") is True
+
+    def test_minimax_model_name_lowercase(self):
+        assert _is_minimax("minimax-m2.5", "https://api.openai.com/v1") is True
+
+    def test_minimax_base_url(self):
+        assert _is_minimax("some-model", "https://api.minimax.io/v1") is True
+
+    def test_minimax_base_url_cn(self):
+        assert _is_minimax("some-model", "https://api.minimaxi.com/v1") is True
+
+    def test_not_minimax_openai(self):
+        assert _is_minimax("gpt-4o", "https://api.openai.com/v1") is False
+
+    def test_not_minimax_dashscope(self):
+        assert _is_minimax("qwen-plus", "https://dashscope.aliyuncs.com/compatible-mode/v1") is False
+
+    def test_none_values(self):
+        assert _is_minimax(None, None) is False
+
+    def test_minimax_highspeed(self):
+        assert _is_minimax("MiniMax-M2.5-highspeed", "https://api.minimax.io/v1") is True
+
+
+class TestClampTemperature:
+    def test_zero_temperature_minimax(self):
+        result = _clamp_temperature(0.0, "MiniMax-M2.5", "https://api.minimax.io/v1")
+        assert result == 0.01
+
+    def test_negative_temperature_minimax(self):
+        result = _clamp_temperature(-0.1, "MiniMax-M2.5", "https://api.minimax.io/v1")
+        assert result == 0.01
+
+    def test_valid_temperature_minimax(self):
+        result = _clamp_temperature(0.7, "MiniMax-M2.5", "https://api.minimax.io/v1")
+        assert result == 0.7
+
+    def test_zero_temperature_non_minimax(self):
+        result = _clamp_temperature(0.0, "gpt-4o", "https://api.openai.com/v1")
+        assert result == 0.0
+
+    def test_max_temperature_minimax(self):
+        result = _clamp_temperature(1.0, "MiniMax-M2.5", "https://api.minimax.io/v1")
+        assert result == 1.0
+
+
+class TestInjectJsonInstruction:
+    def test_inject_to_existing_system_message(self):
+        messages = [
+            {"role": "system", "content": "You are a helper."},
+            {"role": "user", "content": "Generate JSON."}
+        ]
+        result = _inject_json_instruction(messages)
+        assert "valid JSON only" in result[0]["content"]
+        assert result[0]["content"].startswith("You are a helper.")
+        # Original should not be mutated
+        assert "valid JSON only" not in messages[0]["content"]
+
+    def test_inject_without_system_message(self):
+        messages = [
+            {"role": "user", "content": "Generate JSON."}
+        ]
+        result = _inject_json_instruction(messages)
+        assert len(result) == 2
+        assert result[0]["role"] == "system"
+        assert "valid JSON only" in result[0]["content"]
+
+    def test_does_not_mutate_original(self):
+        messages = [
+            {"role": "system", "content": "Hello"},
+            {"role": "user", "content": "Test"}
+        ]
+        original_content = messages[0]["content"]
+        _inject_json_instruction(messages)
+        assert messages[0]["content"] == original_content
+
+
+class TestParseJsonFromResponse:
+    def test_direct_json(self):
+        result = parse_json_from_response('{"key": "value"}')
+        assert result == {"key": "value"}
+
+    def test_json_with_markdown_block(self):
+        text = '```json\n{"key": "value"}\n```'
+        result = parse_json_from_response(text)
+        assert result == {"key": "value"}
+
+    def test_json_with_surrounding_text(self):
+        text = 'Here is the result:\n{"key": "value"}\nDone.'
+        result = parse_json_from_response(text)
+        assert result == {"key": "value"}
+
+    def test_json_array(self):
+        result = parse_json_from_response('[1, 2, 3]')
+        assert result == [1, 2, 3]
+
+    def test_invalid_json_raises(self):
+        with pytest.raises(ValueError, match="JSON格式无效"):
+            parse_json_from_response("not json at all")
+
+    def test_nested_json(self):
+        text = '{"agents": [{"name": "Alice"}, {"name": "Bob"}]}'
+        result = parse_json_from_response(text)
+        assert len(result["agents"]) == 2
+
+    def test_markdown_block_without_json_label(self):
+        text = '```\n{"key": "value"}\n```'
+        result = parse_json_from_response(text)
+        assert result == {"key": "value"}
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])