diff --git a/astrbot/core/message/components.py b/astrbot/core/message/components.py index 050e36521..181a71650 100644 --- a/astrbot/core/message/components.py +++ b/astrbot/core/message/components.py @@ -528,6 +528,13 @@ class Reply(BaseMessageComponent): def __init__(self, **_): super().__init__(**_) + async def to_dict(self) -> dict: + chain = self.chain if self.chain is not None else [] + return { + "type": "reply", + "data": {"id": self.id, "chain": [await comp.to_dict() for comp in chain]}, + } + class Poke(BaseMessageComponent): type: str = ComponentType.Poke @@ -630,12 +637,23 @@ async def to_dict(self) -> dict: class Json(BaseMessageComponent): type = ComponentType.Json data: dict + raw_data: str | None = None def __init__(self, data: str | dict, **_): if isinstance(data, str): - data = json.loads(data) + try: + self.raw_data = data + data = json.loads(data) + except json.JSONDecodeError: + data = {"raw": data} super().__init__(data=data, **_) + async def to_dict(self) -> dict: + return { + "type": "json", + "data": {"content": getattr(self, "raw_data", json.dumps(self.data))}, + } + class Unknown(BaseMessageComponent): type = ComponentType.Unknown diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index 2544782f4..5e11107e8 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -247,18 +247,63 @@ async def _parse_openai_completion( # parse the text completion if choice.message.content is not None: - # text completion - completion_text = str(choice.message.content).strip() - # specially, some providers may set tags around reasoning content in the completion text, - # we use regex to remove them, and store then in reasoning_content field - reasoning_pattern = re.compile(r"(.*?)", re.DOTALL) - matches = reasoning_pattern.findall(completion_text) - if matches: - llm_response.reasoning_content = "\n".join( - [match.strip() for match in matches], - ) - completion_text = reasoning_pattern.sub("", completion_text).strip() - llm_response.result_chain = MessageChain().message(completion_text) + # content can be either a plain string or a multimodal list + content = choice.message.content + # handle multimodal content returned as a list of parts + if isinstance(content, list): + reasoning_parts = [] + mc = MessageChain() + for part in content: + if not isinstance(part, dict): + # fallback: append as plain text + mc.message(str(part)) + continue + ptype = part.get("type") + if ptype == "text": + mc.message(part.get("text", "")) + elif ptype == "image_url": + image_field = part.get("image_url") + url = None + if isinstance(image_field, dict): + url = image_field.get("url") + else: + url = image_field + if url: + # data:image/...;base64,xxx + if isinstance(url, str) and "base64," in url: + base64_data = url.split("base64,", 1)[1] + mc.base64_image(base64_data) + elif isinstance(url, str) and url.startswith("base64://"): + mc.base64_image(url.replace("base64://", "")) + else: + mc.url_image(url) + elif ptype == "think": + # collect reasoning parts for later extraction + think_val = part.get("think") + if think_val: + reasoning_parts.append(str(think_val)) + else: + # unknown part type, append its textual representation + mc.message(json.dumps(part, ensure_ascii=False)) + + if reasoning_parts: + llm_response.reasoning_content = "\n".join( + [rp.strip() for rp in reasoning_parts] + ) + llm_response.result_chain = mc + else: + # text completion (string) + completion_text = str(content).strip() + # specially, some providers may set tags around reasoning content in the completion text, + # we use regex to remove them, and store then in reasoning_content field + reasoning_pattern = re.compile(r"(.*?)", re.DOTALL) + matches = reasoning_pattern.findall(completion_text) + if matches: + llm_response.reasoning_content = "\n".join( + [match.strip() for match in matches], + ) + completion_text = reasoning_pattern.sub("", completion_text).strip() + llm_response.result_chain = MessageChain().message(completion_text) # parse the reasoning content if any # the priority is higher than the tag extraction