diff --git a/common/telegram_markdown.py b/common/telegram_markdown.py new file mode 100644 index 0000000..1d90761 --- /dev/null +++ b/common/telegram_markdown.py @@ -0,0 +1,39 @@ +"""Shared Telegram markdown/code formatting helpers. + +This module lives outside `plugins/` so individual plugins can remain +self-contained and not depend on other plugins being installed. +""" + +from __future__ import annotations + +import re + + +def preserve_telegram_markdown(text: str) -> str: + """Preserve common markdown constructs while keeping Telegram compatibility. + + Notes: + - We intentionally keep this conservative to avoid over-escaping and + flattening newlines (which breaks fenced code blocks). + - The behavior here is expected to be shared by both Telegram integrations. + """ + + if not text: + return text + + # Convert _italic_ to __italic__ (Telegram-style italics) + text = re.sub(r"_([^_]+)_", r"__\1__", text) + + # Convert *italic* to __italic__ while preserving **bold** + text = re.sub(r"\*\*([^*]+)\*\*", r"\1", text) + text = re.sub(r"\*([^*]+)\*", r"__\1__", text) + text = re.sub(r"([^<]+)", r"**\1**", text) + + # Convert non-standard ..code.. delimiter into fenced blocks + text = re.sub(r"\.\.\n(.*?)\.\.", r"```\n\1\n```", text, flags=re.DOTALL) + + # Quote blocks: render as a subtle label (Telegram markdown support varies) + text = re.sub(r"^>\s*(.*?)$", r"*Quote:* \1", text, flags=re.MULTILINE) + + return text.strip() + diff --git a/core b/core new file mode 100644 index 0000000..b65922f Binary files /dev/null and b/core differ diff --git a/plugins/telegram/message_handler.py b/plugins/telegram/message_handler.py index d2beee6..f07ea17 100644 --- a/plugins/telegram/message_handler.py +++ b/plugins/telegram/message_handler.py @@ -1,10 +1,10 @@ """Telegram message handler implementation.""" import asyncio -import re from datetime import datetime from typing import Any +from common.telegram_markdown import preserve_telegram_markdown from database.operations.messages import insert_message from database.operations.queue import add_to_queue from database.operations.users import get_or_create_platform_profile @@ -44,35 +44,7 @@ def preserve_markdown(self, text: str) -> str: Returns: str: Text with preserved markdown formatting """ - if not text: - return text - - # Simple, direct approach - just preserve the original formatting - # Don't over-process or convert unnecessarily - - # Only handle the specific cases that Telegram has trouble with - # Convert _italic_ to __italic__ (Telegram uses double underscores) - # Remove word boundary requirement - it's too restrictive - text = re.sub(r"_([^_]+)_", r"__\1__", text) - - # Convert *italic* to __italic__ (standardize to Telegram format) - # But be careful not to break **bold** - only convert single asterisks - # First protect bold patterns - text = re.sub(r"\*\*([^*]+)\*\*", r"\1", text) - # Then convert remaining single asterisks - text = re.sub(r"\*([^*]+)\*", r"__\1__", text) - # Finally restore bold patterns - text = re.sub(r"([^<]+)", r"**\1**", text) - - # Handle non-standard code block delimiters - # Convert ..code.. to ```code``` - text = re.sub(r"\.\.\n(.*?)\.\.", r"```\n\1\n```", text, flags=re.DOTALL) - - # Handle quote blocks - convert to italic prefix (more subtle) - text = re.sub(r"^>\s*(.*?)$", r"*Quote:* \1", text, flags=re.MULTILINE) - - # That's it - preserve everything else as-is - return text.strip() + return preserve_telegram_markdown(text) class MessageBuffer: diff --git a/plugins/telegram_bot/message_handler.py b/plugins/telegram_bot/message_handler.py index 97a7543..ca520a7 100644 --- a/plugins/telegram_bot/message_handler.py +++ b/plugins/telegram_bot/message_handler.py @@ -3,14 +3,26 @@ import logging from typing import Any +from common.telegram_markdown import preserve_telegram_markdown from database.operations.messages import insert_message from database.operations.queue import add_to_queue from database.operations.users import get_or_create_platform_profile -from runtime.core.message import MessageFormatter +from runtime.core.message import MessageFormatter as BaseMessageFormatter logger = logging.getLogger(__name__) +class MessageFormatter(BaseMessageFormatter): + """Telegram-bot-specific formatter (kept self-contained). + + We keep Telegram markdown behavior aligned with the Telethon-based Telegram + plugin by delegating to shared helpers in `common/`. + """ + + def format_response(self, response: str) -> str: + return preserve_telegram_markdown(response) + + class TelegramMessageHandler: """Handles incoming and outgoing messages for the Telegram bot.""" @@ -86,8 +98,12 @@ async def process_outgoing_message(self, message, response: str) -> None: response: The response to send """ try: - # Send response - await message.answer(response) + # Format response for Telegram (preserve markdown/code blocks) + formatted = self.formatter.format_response(response) + + # Send response with markdown enabled (match Telegram plugin behavior) + # Use Telegram "Markdown" to align with the Telethon plugin's parse_mode="markdown". + await message.answer(formatted, parse_mode="Markdown") # Update message status await self.update_message_status(message, "sent") diff --git a/plugins/telegram_bot/tests/test_markdown_fix.py b/plugins/telegram_bot/tests/test_markdown_fix.py index 1fce544..0fcea17 100644 --- a/plugins/telegram_bot/tests/test_markdown_fix.py +++ b/plugins/telegram_bot/tests/test_markdown_fix.py @@ -11,14 +11,14 @@ # Add the broca2 directory to the path so we can import modules sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from plugins.telegram.message_handler import MessageFormatter +from common.telegram_markdown import preserve_telegram_markdown def test_markdown_preservation(): """Test that markdown formatting is preserved correctly.""" - # Create a formatter instance - formatter = MessageFormatter() + def format_response(text: str) -> str: + return preserve_telegram_markdown(text) # Test cases: typical Letta/Broca responses test_cases = [ @@ -74,7 +74,7 @@ def hello(): print("-" * 30) # Format the response using the new method - formatted = formatter.format_response(test_case["input"]) + formatted = format_response(test_case["input"]) print("Input:") print(test_case["input"]) @@ -111,6 +111,8 @@ def hello(): def test_old_vs_new_behavior(): """Compare old sanitize_text behavior vs new preserve_markdown behavior.""" + from runtime.core.message import MessageFormatter + formatter = MessageFormatter() test_input = """# Header @@ -136,7 +138,7 @@ def test_old_vs_new_behavior(): print("\n" + "-" * 30) # New behavior (what should happen now) - new_result = formatter.format_response(test_input) + new_result = preserve_telegram_markdown(test_input) print("NEW behavior (format_response):") print(new_result) diff --git a/tests/unit/plugins/test_telegram_bot_message_handler_comprehensive.py b/tests/unit/plugins/test_telegram_bot_message_handler_comprehensive.py index c7880b4..8944d43 100644 --- a/tests/unit/plugins/test_telegram_bot_message_handler_comprehensive.py +++ b/tests/unit/plugins/test_telegram_bot_message_handler_comprehensive.py @@ -210,7 +210,10 @@ async def test_process_outgoing_message_success(self): ) as mock_update: await handler.process_outgoing_message(mock_message, "Test response") - mock_message.answer.assert_called_once_with("Test response") + mock_message.answer.assert_called_once() + args, kwargs = mock_message.answer.call_args + assert args == ("Test response",) + assert kwargs.get("parse_mode") == "Markdown" mock_update.assert_called_once_with(mock_message, "sent") @pytest.mark.asyncio