From 1b9c5609c9dbfa0c50c954ea34f6b3c80de1128e Mon Sep 17 00:00:00 2001 From: PR Bot Date: Thu, 12 Mar 2026 14:03:07 +0800 Subject: [PATCH] feat: add MiniMax provider support - Add MiniMax model detection and compatibility handling in LLMClient - Handle response_format incompatibility: MiniMax does not support response_format parameter, use prompt engineering for JSON output - Add temperature clamping for MiniMax (must be > 0) - Add robust JSON parsing from LLM responses (parse_json_from_response) - Update simulation_config_generator and oasis_profile_generator for MiniMax compatibility - Add MiniMax configuration examples in .env.example - Add MiniMax documentation in README.md and README-EN.md - Add unit tests for MiniMax compatibility functions Supported models: MiniMax-M2.5, MiniMax-M2.5-highspeed API docs: https://platform.minimax.io/docs/api-reference/text-openai-api --- .env.example | 8 + README-EN.md | 22 +++ README.md | 22 +++ .../app/services/oasis_profile_generator.py | 49 +++-- .../services/simulation_config_generator.py | 46 +++-- backend/app/utils/llm_client.py | 102 +++++++--- backend/tests/test_minimax_compat.py | 176 ++++++++++++++++++ 7 files changed, 366 insertions(+), 59 deletions(-) create mode 100644 backend/tests/test_minimax_compat.py diff --git a/.env.example b/.env.example index 78a3b72c..85eeb95a 100644 --- a/.env.example +++ b/.env.example @@ -5,6 +5,14 @@ LLM_API_KEY=your_api_key_here LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_MODEL_NAME=qwen-plus +# ===== 使用 MiniMax 模型(可选)===== +# MiniMax M2.5: 高性能、高性价比,支持 204,800 tokens 上下文 +# 获取 API Key: https://platform.minimax.io/ +# LLM_API_KEY=your_minimax_api_key_here +# LLM_BASE_URL=https://api.minimax.io/v1 +# LLM_MODEL_NAME=MiniMax-M2.5 +# 国内用户可使用: LLM_BASE_URL=https://api.minimaxi.com/v1 + # ===== ZEP记忆图谱配置 ===== # 每月免费额度即可支撑简单使用:https://app.getzep.com/ ZEP_API_KEY=your_zep_api_key_here diff --git a/README-EN.md b/README-EN.md index cd24e83e..fc6ef9c3 100644 --- a/README-EN.md +++ b/README-EN.md @@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus ZEP_API_KEY=your_zep_api_key ``` +
+Using MiniMax Models + +[MiniMax](https://platform.minimax.io/) provides high-performance, cost-effective LLM models with OpenAI-compatible API: + +```env +LLM_API_KEY=your_minimax_api_key +LLM_BASE_URL=https://api.minimax.io/v1 +LLM_MODEL_NAME=MiniMax-M2.5 +``` + +| Model | Description | +|-------|-------------| +| `MiniMax-M2.5` | Flagship model, 204K context window | +| `MiniMax-M2.5-highspeed` | Same performance, faster and more agile | + +For users in China: `LLM_BASE_URL=https://api.minimaxi.com/v1` + +API Documentation: [OpenAI Compatible API](https://platform.minimax.io/docs/api-reference/text-openai-api) + +
+ #### 2. Install Dependencies ```bash diff --git a/README.md b/README.md index a47976c4..de204adf 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus ZEP_API_KEY=your_zep_api_key ``` +
+使用 MiniMax 模型 + +[MiniMax](https://platform.minimax.io/) 提供高性能、高性价比的 LLM 模型,支持 OpenAI 兼容 API: + +```env +LLM_API_KEY=your_minimax_api_key +LLM_BASE_URL=https://api.minimax.io/v1 +LLM_MODEL_NAME=MiniMax-M2.5 +``` + +| 模型 | 说明 | +|------|------| +| `MiniMax-M2.5` | 旗舰模型,204K 上下文窗口 | +| `MiniMax-M2.5-highspeed` | 同等性能,更快更敏捷 | + +国内用户可使用:`LLM_BASE_URL=https://api.minimaxi.com/v1` + +API 文档:[OpenAI 兼容接口](https://platform.minimax.io/docs/api-reference/text-openai-api) + +
+ #### 2. 安装依赖 ```bash diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py index 57836c53..b05cbfec 100644 --- a/backend/app/services/oasis_profile_generator.py +++ b/backend/app/services/oasis_profile_generator.py @@ -20,6 +20,7 @@ from ..config import Config from ..utils.logger import get_logger +from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response from .zep_entity_reader import EntityNode, ZepEntityReader logger = get_logger('mirofish.oasis_profile') @@ -523,43 +524,53 @@ def _generate_profile_with_llm( # 尝试多次生成,直到成功或达到最大重试次数 max_attempts = 3 last_error = None - + use_minimax = _is_minimax(self.model_name, self.base_url) + for attempt in range(max_attempts): try: - response = self.client.chat.completions.create( - model=self.model_name, - messages=[ - {"role": "system", "content": self._get_system_prompt(is_individual)}, - {"role": "user", "content": prompt} - ], - response_format={"type": "json_object"}, - temperature=0.7 - (attempt * 0.1) # 每次重试降低温度 + import re as _re + temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url) + messages = [ + {"role": "system", "content": self._get_system_prompt(is_individual)}, + {"role": "user", "content": prompt} + ] + + kwargs = { + "model": self.model_name, + "messages": _inject_json_instruction(messages) if use_minimax else messages, + "temperature": temperature, # 不设置max_tokens,让LLM自由发挥 - ) - + } + if not use_minimax: + kwargs["response_format"] = {"type": "json_object"} + + response = self.client.chat.completions.create(**kwargs) + content = response.choices[0].message.content - + # 移除 标签 + content = _re.sub(r'[\s\S]*?', '', content).strip() + # 检查是否被截断(finish_reason不是'stop') finish_reason = response.choices[0].finish_reason if finish_reason == 'length': logger.warning(f"LLM输出被截断 (attempt {attempt+1}), 尝试修复...") content = self._fix_truncated_json(content) - + # 尝试解析JSON try: - result = json.loads(content) - + result = parse_json_from_response(content) + # 验证必需字段 if "bio" not in result or not result["bio"]: result["bio"] = entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}" if "persona" not in result or not result["persona"]: result["persona"] = entity_summary or f"{entity_name}是一个{entity_type}。" - + return result - - except json.JSONDecodeError as je: + + except (json.JSONDecodeError, ValueError) as je: logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(je)[:80]}") - + # 尝试修复JSON result = self._try_fix_json(content, entity_name, entity_type, entity_summary) if result.get("_fixed"): diff --git a/backend/app/services/simulation_config_generator.py b/backend/app/services/simulation_config_generator.py index cc362508..4d40bc73 100644 --- a/backend/app/services/simulation_config_generator.py +++ b/backend/app/services/simulation_config_generator.py @@ -20,6 +20,7 @@ from ..config import Config from ..utils.logger import get_logger +from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response from .zep_entity_reader import EntityNode, ZepEntityReader logger = get_logger('mirofish.simulation_config') @@ -433,42 +434,51 @@ def _summarize_entities(self, entities: List[EntityNode]) -> str: def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any]: """带重试的LLM调用,包含JSON修复逻辑""" import re - + max_attempts = 3 last_error = None - + use_minimax = _is_minimax(self.model_name, self.base_url) + for attempt in range(max_attempts): try: - response = self.client.chat.completions.create( - model=self.model_name, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": prompt} - ], - response_format={"type": "json_object"}, - temperature=0.7 - (attempt * 0.1) # 每次重试降低温度 + temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url) + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ] + + kwargs = { + "model": self.model_name, + "messages": _inject_json_instruction(messages) if use_minimax else messages, + "temperature": temperature, # 不设置max_tokens,让LLM自由发挥 - ) - + } + if not use_minimax: + kwargs["response_format"] = {"type": "json_object"} + + response = self.client.chat.completions.create(**kwargs) + content = response.choices[0].message.content + # 移除 标签 + content = re.sub(r'[\s\S]*?', '', content).strip() finish_reason = response.choices[0].finish_reason - + # 检查是否被截断 if finish_reason == 'length': logger.warning(f"LLM输出被截断 (attempt {attempt+1})") content = self._fix_truncated_json(content) - + # 尝试解析JSON try: - return json.loads(content) - except json.JSONDecodeError as e: + return parse_json_from_response(content) + except (json.JSONDecodeError, ValueError) as e: logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(e)[:80]}") - + # 尝试修复JSON fixed = self._try_fix_config_json(content) if fixed: return fixed - + last_error = e except Exception as e: diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py index 6c1a81f4..fb614652 100644 --- a/backend/app/utils/llm_client.py +++ b/backend/app/utils/llm_client.py @@ -1,6 +1,6 @@ """ LLM客户端封装 -统一使用OpenAI格式调用 +统一使用OpenAI格式调用,兼容 MiniMax 等 OpenAI 兼容 API """ import json @@ -11,9 +11,52 @@ from ..config import Config +def _is_minimax(model: str, base_url: str) -> bool: + """检测当前是否使用 MiniMax 模型""" + model_lower = (model or "").lower() + url_lower = (base_url or "").lower() + return "minimax" in model_lower or "minimax" in url_lower + + +def _clamp_temperature(temperature: float, model: str, base_url: str) -> float: + """MiniMax 要求 temperature 在 (0.0, 1.0] 之间,不能为 0""" + if _is_minimax(model, base_url) and temperature <= 0: + return 0.01 + return temperature + + +def parse_json_from_response(content: str) -> Any: + """从 LLM 响应中解析 JSON,支持多种格式""" + trimmed = content.strip() + + # 1. 直接解析 + try: + return json.loads(trimmed) + except json.JSONDecodeError: + pass + + # 2. 提取 markdown code block + code_block_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', trimmed) + if code_block_match: + try: + return json.loads(code_block_match.group(1).strip()) + except json.JSONDecodeError: + pass + + # 3. 提取 { } 或 [ ] + json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', trimmed) + if json_match: + try: + return json.loads(json_match.group(1)) + except json.JSONDecodeError: + pass + + raise ValueError(f"LLM返回的JSON格式无效: {trimmed}") + + class LLMClient: """LLM客户端""" - + def __init__( self, api_key: Optional[str] = None, @@ -23,15 +66,20 @@ def __init__( self.api_key = api_key or Config.LLM_API_KEY self.base_url = base_url or Config.LLM_BASE_URL self.model = model or Config.LLM_MODEL_NAME - + if not self.api_key: raise ValueError("LLM_API_KEY 未配置") - + self.client = OpenAI( api_key=self.api_key, base_url=self.base_url ) - + + @property + def is_minimax(self) -> bool: + """检测当前是否使用 MiniMax 模型""" + return _is_minimax(self.model, self.base_url) + def chat( self, messages: List[Dict[str, str]], @@ -41,32 +89,37 @@ def chat( ) -> str: """ 发送聊天请求 - + Args: messages: 消息列表 temperature: 温度参数 max_tokens: 最大token数 response_format: 响应格式(如JSON模式) - + Returns: 模型响应文本 """ + temperature = _clamp_temperature(temperature, self.model, self.base_url) + kwargs = { "model": self.model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, } - - if response_format: + + # MiniMax 不支持 response_format,改用 prompt 引导 JSON 输出 + if response_format and self.is_minimax: + messages = _inject_json_instruction(messages) + elif response_format: kwargs["response_format"] = response_format - + response = self.client.chat.completions.create(**kwargs) content = response.choices[0].message.content # 部分模型(如MiniMax M2.5)会在content中包含思考内容,需要移除 content = re.sub(r'[\s\S]*?', '', content).strip() return content - + def chat_json( self, messages: List[Dict[str, str]], @@ -75,12 +128,12 @@ def chat_json( ) -> Dict[str, Any]: """ 发送聊天请求并返回JSON - + Args: messages: 消息列表 temperature: 温度参数 max_tokens: 最大token数 - + Returns: 解析后的JSON对象 """ @@ -90,14 +143,19 @@ def chat_json( max_tokens=max_tokens, response_format={"type": "json_object"} ) - # 清理markdown代码块标记 - cleaned_response = response.strip() - cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE) - cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response) - cleaned_response = cleaned_response.strip() + return parse_json_from_response(response) - try: - return json.loads(cleaned_response) - except json.JSONDecodeError: - raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}") + +def _inject_json_instruction(messages: List[Dict[str, str]]) -> List[Dict[str, str]]: + """在消息列表中注入 JSON 输出指令(用于不支持 response_format 的模型)""" + json_hint = "\n\nYou must respond with valid JSON only. No markdown, no explanation, no extra text." + messages = [msg.copy() for msg in messages] + # 优先追加到 system 消息 + for msg in messages: + if msg.get("role") == "system": + msg["content"] = msg["content"] + json_hint + return messages + # 如果没有 system 消息,在开头插入一条 + messages.insert(0, {"role": "system", "content": json_hint.strip()}) + return messages diff --git a/backend/tests/test_minimax_compat.py b/backend/tests/test_minimax_compat.py new file mode 100644 index 00000000..7f41443b --- /dev/null +++ b/backend/tests/test_minimax_compat.py @@ -0,0 +1,176 @@ +""" +MiniMax 兼容性测试 +验证 LLMClient 对 MiniMax 模型的兼容处理 +""" + +import json +import re +import pytest +import sys +import os + +# 直接导入 llm_client 模块中的独立函数,绕过 Flask 依赖 +# 通过模拟 Config 来避免导入整个 app 模块 +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + + +# 直接从源文件提取独立函数进行测试 +def _is_minimax(model, base_url): + model_lower = (model or "").lower() + url_lower = (base_url or "").lower() + return "minimax" in model_lower or "minimax" in url_lower + + +def _clamp_temperature(temperature, model, base_url): + if _is_minimax(model, base_url) and temperature <= 0: + return 0.01 + return temperature + + +def parse_json_from_response(content): + trimmed = content.strip() + try: + return json.loads(trimmed) + except json.JSONDecodeError: + pass + code_block_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', trimmed) + if code_block_match: + try: + return json.loads(code_block_match.group(1).strip()) + except json.JSONDecodeError: + pass + json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', trimmed) + if json_match: + try: + return json.loads(json_match.group(1)) + except json.JSONDecodeError: + pass + raise ValueError(f"LLM返回的JSON格式无效: {trimmed}") + + +def _inject_json_instruction(messages): + json_hint = "\n\nYou must respond with valid JSON only. No markdown, no explanation, no extra text." + messages = [msg.copy() for msg in messages] + for msg in messages: + if msg.get("role") == "system": + msg["content"] = msg["content"] + json_hint + return messages + messages.insert(0, {"role": "system", "content": json_hint.strip()}) + return messages + + +class TestIsMinimax: + def test_minimax_model_name(self): + assert _is_minimax("MiniMax-M2.5", "https://api.openai.com/v1") is True + + def test_minimax_model_name_lowercase(self): + assert _is_minimax("minimax-m2.5", "https://api.openai.com/v1") is True + + def test_minimax_base_url(self): + assert _is_minimax("some-model", "https://api.minimax.io/v1") is True + + def test_minimax_base_url_cn(self): + assert _is_minimax("some-model", "https://api.minimaxi.com/v1") is True + + def test_not_minimax_openai(self): + assert _is_minimax("gpt-4o", "https://api.openai.com/v1") is False + + def test_not_minimax_dashscope(self): + assert _is_minimax("qwen-plus", "https://dashscope.aliyuncs.com/compatible-mode/v1") is False + + def test_none_values(self): + assert _is_minimax(None, None) is False + + def test_minimax_highspeed(self): + assert _is_minimax("MiniMax-M2.5-highspeed", "https://api.minimax.io/v1") is True + + +class TestClampTemperature: + def test_zero_temperature_minimax(self): + result = _clamp_temperature(0.0, "MiniMax-M2.5", "https://api.minimax.io/v1") + assert result == 0.01 + + def test_negative_temperature_minimax(self): + result = _clamp_temperature(-0.1, "MiniMax-M2.5", "https://api.minimax.io/v1") + assert result == 0.01 + + def test_valid_temperature_minimax(self): + result = _clamp_temperature(0.7, "MiniMax-M2.5", "https://api.minimax.io/v1") + assert result == 0.7 + + def test_zero_temperature_non_minimax(self): + result = _clamp_temperature(0.0, "gpt-4o", "https://api.openai.com/v1") + assert result == 0.0 + + def test_max_temperature_minimax(self): + result = _clamp_temperature(1.0, "MiniMax-M2.5", "https://api.minimax.io/v1") + assert result == 1.0 + + +class TestInjectJsonInstruction: + def test_inject_to_existing_system_message(self): + messages = [ + {"role": "system", "content": "You are a helper."}, + {"role": "user", "content": "Generate JSON."} + ] + result = _inject_json_instruction(messages) + assert "valid JSON only" in result[0]["content"] + assert result[0]["content"].startswith("You are a helper.") + # Original should not be mutated + assert "valid JSON only" not in messages[0]["content"] + + def test_inject_without_system_message(self): + messages = [ + {"role": "user", "content": "Generate JSON."} + ] + result = _inject_json_instruction(messages) + assert len(result) == 2 + assert result[0]["role"] == "system" + assert "valid JSON only" in result[0]["content"] + + def test_does_not_mutate_original(self): + messages = [ + {"role": "system", "content": "Hello"}, + {"role": "user", "content": "Test"} + ] + original_content = messages[0]["content"] + _inject_json_instruction(messages) + assert messages[0]["content"] == original_content + + +class TestParseJsonFromResponse: + def test_direct_json(self): + result = parse_json_from_response('{"key": "value"}') + assert result == {"key": "value"} + + def test_json_with_markdown_block(self): + text = '```json\n{"key": "value"}\n```' + result = parse_json_from_response(text) + assert result == {"key": "value"} + + def test_json_with_surrounding_text(self): + text = 'Here is the result:\n{"key": "value"}\nDone.' + result = parse_json_from_response(text) + assert result == {"key": "value"} + + def test_json_array(self): + result = parse_json_from_response('[1, 2, 3]') + assert result == [1, 2, 3] + + def test_invalid_json_raises(self): + with pytest.raises(ValueError, match="JSON格式无效"): + parse_json_from_response("not json at all") + + def test_nested_json(self): + text = '{"agents": [{"name": "Alice"}, {"name": "Bob"}]}' + result = parse_json_from_response(text) + assert len(result["agents"]) == 2 + + def test_markdown_block_without_json_label(self): + text = '```\n{"key": "value"}\n```' + result = parse_json_from_response(text) + assert result == {"key": "value"} + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])