Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ LLM_API_KEY=your_api_key_here
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
LLM_MODEL_NAME=qwen-plus

# ===== 使用 MiniMax 模型(可选)=====
# MiniMax M2.5: 高性能、高性价比,支持 204,800 tokens 上下文
# 获取 API Key: https://platform.minimax.io/
# LLM_API_KEY=your_minimax_api_key_here
# LLM_BASE_URL=https://api.minimax.io/v1
# LLM_MODEL_NAME=MiniMax-M2.5
# 国内用户可使用: LLM_BASE_URL=https://api.minimaxi.com/v1

# ===== ZEP记忆图谱配置 =====
# 每月免费额度即可支撑简单使用:https://app.getzep.com/
ZEP_API_KEY=your_zep_api_key_here
Expand Down
22 changes: 22 additions & 0 deletions README-EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus
ZEP_API_KEY=your_zep_api_key
```

<details>
<summary><b>Using MiniMax Models</b></summary>

[MiniMax](https://platform.minimax.io/) provides high-performance, cost-effective LLM models with OpenAI-compatible API:

```env
LLM_API_KEY=your_minimax_api_key
LLM_BASE_URL=https://api.minimax.io/v1
LLM_MODEL_NAME=MiniMax-M2.5
```

| Model | Description |
|-------|-------------|
| `MiniMax-M2.5` | Flagship model, 204K context window |
| `MiniMax-M2.5-highspeed` | Same performance, faster and more agile |

For users in China: `LLM_BASE_URL=https://api.minimaxi.com/v1`

API Documentation: [OpenAI Compatible API](https://platform.minimax.io/docs/api-reference/text-openai-api)

</details>

#### 2. Install Dependencies

```bash
Expand Down
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus
ZEP_API_KEY=your_zep_api_key
```

<details>
<summary><b>使用 MiniMax 模型</b></summary>

[MiniMax](https://platform.minimax.io/) 提供高性能、高性价比的 LLM 模型,支持 OpenAI 兼容 API:

```env
LLM_API_KEY=your_minimax_api_key
LLM_BASE_URL=https://api.minimax.io/v1
LLM_MODEL_NAME=MiniMax-M2.5
```

| 模型 | 说明 |
|------|------|
| `MiniMax-M2.5` | 旗舰模型,204K 上下文窗口 |
| `MiniMax-M2.5-highspeed` | 同等性能,更快更敏捷 |

国内用户可使用:`LLM_BASE_URL=https://api.minimaxi.com/v1`

API 文档:[OpenAI 兼容接口](https://platform.minimax.io/docs/api-reference/text-openai-api)

</details>

#### 2. 安装依赖

```bash
Expand Down
49 changes: 30 additions & 19 deletions backend/app/services/oasis_profile_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from ..config import Config
from ..utils.logger import get_logger
from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response
from .zep_entity_reader import EntityNode, ZepEntityReader

logger = get_logger('mirofish.oasis_profile')
Expand Down Expand Up @@ -523,43 +524,53 @@ def _generate_profile_with_llm(
# 尝试多次生成,直到成功或达到最大重试次数
max_attempts = 3
last_error = None

use_minimax = _is_minimax(self.model_name, self.base_url)

for attempt in range(max_attempts):
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": self._get_system_prompt(is_individual)},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
import re as _re
temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url)
messages = [
{"role": "system", "content": self._get_system_prompt(is_individual)},
{"role": "user", "content": prompt}
]

kwargs = {
"model": self.model_name,
"messages": _inject_json_instruction(messages) if use_minimax else messages,
"temperature": temperature,
# 不设置max_tokens,让LLM自由发挥
)

}
if not use_minimax:
kwargs["response_format"] = {"type": "json_object"}

response = self.client.chat.completions.create(**kwargs)

content = response.choices[0].message.content

# 移除 <think> 标签
content = _re.sub(r'<think>[\s\S]*?</think>', '', content).strip()

# 检查是否被截断(finish_reason不是'stop')
finish_reason = response.choices[0].finish_reason
if finish_reason == 'length':
logger.warning(f"LLM输出被截断 (attempt {attempt+1}), 尝试修复...")
content = self._fix_truncated_json(content)

# 尝试解析JSON
try:
result = json.loads(content)
result = parse_json_from_response(content)

# 验证必需字段
if "bio" not in result or not result["bio"]:
result["bio"] = entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}"
if "persona" not in result or not result["persona"]:
result["persona"] = entity_summary or f"{entity_name}是一个{entity_type}。"

return result
except json.JSONDecodeError as je:

except (json.JSONDecodeError, ValueError) as je:
logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(je)[:80]}")

# 尝试修复JSON
result = self._try_fix_json(content, entity_name, entity_type, entity_summary)
if result.get("_fixed"):
Expand Down
46 changes: 28 additions & 18 deletions backend/app/services/simulation_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from ..config import Config
from ..utils.logger import get_logger
from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response
from .zep_entity_reader import EntityNode, ZepEntityReader

logger = get_logger('mirofish.simulation_config')
Expand Down Expand Up @@ -433,42 +434,51 @@ def _summarize_entities(self, entities: List[EntityNode]) -> str:
def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any]:
"""带重试的LLM调用,包含JSON修复逻辑"""
import re

max_attempts = 3
last_error = None

use_minimax = _is_minimax(self.model_name, self.base_url)

for attempt in range(max_attempts):
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]

kwargs = {
"model": self.model_name,
"messages": _inject_json_instruction(messages) if use_minimax else messages,
"temperature": temperature,
# 不设置max_tokens,让LLM自由发挥
)

}
if not use_minimax:
kwargs["response_format"] = {"type": "json_object"}

response = self.client.chat.completions.create(**kwargs)

content = response.choices[0].message.content
# 移除 <think> 标签
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
finish_reason = response.choices[0].finish_reason

# 检查是否被截断
if finish_reason == 'length':
logger.warning(f"LLM输出被截断 (attempt {attempt+1})")
content = self._fix_truncated_json(content)

# 尝试解析JSON
try:
return json.loads(content)
except json.JSONDecodeError as e:
return parse_json_from_response(content)
except (json.JSONDecodeError, ValueError) as e:
logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(e)[:80]}")

# 尝试修复JSON
fixed = self._try_fix_config_json(content)
if fixed:
return fixed

last_error = e

except Exception as e:
Expand Down
Loading