diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py index 57836c53..ee16f7e6 100644 --- a/backend/app/services/oasis_profile_generator.py +++ b/backend/app/services/oasis_profile_generator.py @@ -25,6 +25,47 @@ logger = get_logger('mirofish.oasis_profile') +def _coerce_to_str(value: Any) -> str: + """Coerce a value to a plain string. + + Handles dict, list, and other non-string types that may be returned + by LLM JSON parsing. + """ + if isinstance(value, str): + return value + if isinstance(value, dict): + for key in ('text', 'value', 'description', 'content', 'summary', 'name'): + if key in value and isinstance(value[key], str): + return value[key] + return json.dumps(value, ensure_ascii=False) + if isinstance(value, list): + str_items = [_coerce_to_str(item) for item in value] + return ', '.join(str_items) + return str(value) + + +def _coerce_to_str_list(value: Any) -> List[str]: + """Coerce a value to a list of strings. + + Handles nested structures that may be returned by LLM JSON parsing. + """ + if isinstance(value, list): + result = [] + for item in value: + if isinstance(item, str): + result.append(item) + elif isinstance(item, dict): + result.append(_coerce_to_str(item)) + else: + result.append(str(item)) + return result + if isinstance(value, str): + return [value] + if isinstance(value, dict): + return [_coerce_to_str(value)] + return [] + + @dataclass class OasisAgentProfile: """OASIS Agent Profile数据结构""" @@ -57,6 +98,16 @@ class OasisAgentProfile: created_at: str = field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d")) + def __post_init__(self): + """Normalize field types to guard against structured LLM outputs + (e.g. dict/list instead of plain strings).""" + self.bio = _coerce_to_str(self.bio) + self.persona = _coerce_to_str(self.persona) + self.country = _coerce_to_str(self.country) if self.country is not None else None + self.profession = _coerce_to_str(self.profession) if self.profession is not None else None + self.gender = _coerce_to_str(self.gender) if self.gender is not None else None + self.interested_topics = _coerce_to_str_list(self.interested_topics) + def to_reddit_format(self) -> Dict[str, Any]: """转换为Reddit平台格式""" profile = { @@ -549,6 +600,15 @@ def _generate_profile_with_llm( try: result = json.loads(content) + # Normalize types from LLM output + for str_field in ('bio', 'persona', 'country', 'profession'): + if str_field in result and result[str_field] is not None: + result[str_field] = _coerce_to_str(result[str_field]) + if 'interested_topics' in result: + result['interested_topics'] = _coerce_to_str_list( + result['interested_topics'] + ) + # 验证必需字段 if "bio" not in result or not result["bio"]: result["bio"] = entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}" @@ -1092,15 +1152,19 @@ def _save_twitter_csv(self, profiles: List[OasisAgentProfile], file_path: str): # 写入数据行 for idx, profile in enumerate(profiles): + # Defensive coercion in case __post_init__ was bypassed + bio = _coerce_to_str(profile.bio) if profile.bio else profile.name + persona = _coerce_to_str(profile.persona) if profile.persona else '' + # user_char: 完整人设(bio + persona),用于LLM系统提示 - user_char = profile.bio - if profile.persona and profile.persona != profile.bio: - user_char = f"{profile.bio} {profile.persona}" + user_char = bio + if persona and persona != bio: + user_char = f"{bio} {persona}" # 处理换行符(CSV中用空格替代) user_char = user_char.replace('\n', ' ').replace('\r', ' ') # description: 简短简介,用于外部显示 - description = profile.bio.replace('\n', ' ').replace('\r', ' ') + description = bio.replace('\n', ' ').replace('\r', ' ') row = [ idx, # user_id: 从0开始的顺序ID @@ -1158,27 +1222,40 @@ def _save_reddit_json(self, profiles: List[OasisAgentProfile], file_path: str): """ data = [] for idx, profile in enumerate(profiles): + # Defensive coercion in case __post_init__ was bypassed + bio = _coerce_to_str(profile.bio) if profile.bio else f"{profile.name}" + persona = _coerce_to_str(profile.persona) if profile.persona else ( + f"{profile.name} is a participant in social discussions." + ) + country = _coerce_to_str(profile.country) if profile.country else "中国" + profession = _coerce_to_str(profile.profession) if profile.profession else None + interested_topics = ( + _coerce_to_str_list(profile.interested_topics) + if profile.interested_topics + else None + ) + # 使用与 to_reddit_format() 一致的格式 item = { "user_id": profile.user_id if profile.user_id is not None else idx, # 关键:必须包含 user_id "username": profile.user_name, "name": profile.name, - "bio": profile.bio[:150] if profile.bio else f"{profile.name}", - "persona": profile.persona or f"{profile.name} is a participant in social discussions.", + "bio": bio[:150], + "persona": persona, "karma": profile.karma if profile.karma else 1000, "created_at": profile.created_at, # OASIS必需字段 - 确保都有默认值 "age": profile.age if profile.age else 30, "gender": self._normalize_gender(profile.gender), "mbti": profile.mbti if profile.mbti else "ISTJ", - "country": profile.country if profile.country else "中国", + "country": country, } # 可选字段 - if profile.profession: - item["profession"] = profile.profession - if profile.interested_topics: - item["interested_topics"] = profile.interested_topics + if profession: + item["profession"] = profession + if interested_topics: + item["interested_topics"] = interested_topics data.append(item)