From 3bae8a7972aff951beb26417cb24d04a6d69f591 Mon Sep 17 00:00:00 2001 From: user1303836 Date: Sat, 7 Feb 2026 14:13:18 -0500 Subject: [PATCH 1/5] Performance improvements and content poster embed migration Pipeline performance: - Concurrent source fetching with asyncio.gather and Semaphore(5) - Batch content_items_exist query replaces per-item existence checks - Batch get_sources_by_ids replaces per-item source lookups in summarize_pending and backfill Content poster: - Switch from plain text messages to Discord embeds with structured formatting - Add summary truncation that preserves bullet point boundaries Substack adapter: - Deduplicate by extending RSSAdapter directly RSS discovery: - Concurrent path probing for faster RSS feed detection --- .../adapters/strategies/rss_discovery.py | 5 +- src/intelstream/adapters/substack.py | 141 +----------- src/intelstream/config.py | 7 - src/intelstream/database/repository.py | 9 + .../discord/cogs/content_posting.py | 5 +- src/intelstream/services/content_poster.py | 76 +++---- src/intelstream/services/pipeline.py | 214 ++++++++++-------- .../test_strategies/test_rss_discovery.py | 12 + tests/test_database.py | 31 +++ tests/test_discord/test_content_posting.py | 3 +- tests/test_services/test_content_poster.py | 180 ++++++++++----- tests/test_services/test_pipeline.py | 60 +++-- 12 files changed, 370 insertions(+), 373 deletions(-) diff --git a/src/intelstream/adapters/strategies/rss_discovery.py b/src/intelstream/adapters/strategies/rss_discovery.py index 68283a1..6d94857 100644 --- a/src/intelstream/adapters/strategies/rss_discovery.py +++ b/src/intelstream/adapters/strategies/rss_discovery.py @@ -1,3 +1,4 @@ +import asyncio import re from urllib.parse import urljoin, urlparse @@ -142,8 +143,8 @@ async def check_path(path: str) -> str | None: pass return None - for path in RSS_PATHS: - result = await check_path(path) + results = await asyncio.gather(*(check_path(path) for path in RSS_PATHS)) + for result in results: if result: return result diff --git a/src/intelstream/adapters/substack.py b/src/intelstream/adapters/substack.py index 81cc150..347db75 100644 --- a/src/intelstream/adapters/substack.py +++ b/src/intelstream/adapters/substack.py @@ -1,18 +1,12 @@ -from typing import Any - -import feedparser import httpx -import structlog - -from intelstream.adapters.base import BaseAdapter, ContentData -from intelstream.utils.feed_utils import parse_feed_date -logger = structlog.get_logger() +from intelstream.adapters.base import ContentData +from intelstream.adapters.rss import RSSAdapter -class SubstackAdapter(BaseAdapter): +class SubstackAdapter(RSSAdapter): def __init__(self, http_client: httpx.AsyncClient | None = None) -> None: - self._client = http_client + super().__init__(http_client=http_client) @property def source_type(self) -> str: @@ -30,128 +24,9 @@ async def fetch_latest( self, identifier: str, feed_url: str | None = None, - skip_content: bool = False, # noqa: ARG002 + skip_content: bool = False, ) -> list[ContentData]: - url = feed_url or await self.get_feed_url(identifier) - - logger.debug("Fetching Substack feed", identifier=identifier, url=url) - - try: - if self._client: - response = await self._client.get(url, follow_redirects=True) - response.raise_for_status() - content = response.text - else: - async with httpx.AsyncClient() as client: - response = await client.get(url, follow_redirects=True) - response.raise_for_status() - content = response.text - - feed = feedparser.parse(content) - - if feed.bozo and not feed.entries: - logger.warning( - "Failed to parse Substack feed", - identifier=identifier, - error=str(feed.bozo_exception), - ) - return [] - - items: list[ContentData] = [] - for entry in feed.entries: - try: - item = self._parse_entry(entry, feed) - items.append(item) - except Exception as e: - logger.warning( - "Failed to parse feed entry", - identifier=identifier, - entry_id=getattr(entry, "id", "unknown"), - error=str(e), - ) - continue - - logger.info("Fetched Substack content", identifier=identifier, count=len(items)) - return items - - except httpx.HTTPStatusError as e: - logger.error( - "HTTP error fetching Substack feed", - identifier=identifier, - status_code=e.response.status_code, - ) - raise - except httpx.RequestError as e: - logger.error( - "Request error fetching Substack feed", - identifier=identifier, - error=str(e), - ) - raise - - def _parse_entry( - self, entry: feedparser.FeedParserDict, feed: feedparser.FeedParserDict - ) -> ContentData: - external_id: str = str(entry.get("id") or entry.get("link") or "") - title: str = str(entry.get("title", "Untitled")) - original_url: str = str(entry.get("link", "")) - - author: str = str(entry.get("author", "")) - if not author: - feed_data: Any = feed.feed - if feed_data: - author = str(feed_data.get("title", "Unknown Author")) - else: - author = "Unknown Author" - - published_at = parse_feed_date(entry) - raw_content = self._extract_content(entry) - thumbnail_url = self._extract_thumbnail(entry) - - return ContentData( - external_id=external_id, - title=title, - original_url=original_url, - author=author, - published_at=published_at, - raw_content=raw_content, - thumbnail_url=thumbnail_url, + resolved_feed_url = feed_url or await self.get_feed_url(identifier) + return await super().fetch_latest( + identifier, feed_url=resolved_feed_url, skip_content=skip_content ) - - def _extract_content(self, entry: feedparser.FeedParserDict) -> str | None: - if entry.get("content"): - for content in entry.content: - if content.get("type") in ("text/html", "text/plain"): - value = content.get("value") - return str(value) if value else None - - if entry.get("summary"): - return str(entry.summary) - - if entry.get("description"): - return str(entry.description) - - return None - - def _extract_thumbnail(self, entry: feedparser.FeedParserDict) -> str | None: - if entry.get("media_content"): - for media in entry.media_content: - if media.get("medium") == "image" or str(media.get("type", "")).startswith( - "image/" - ): - url = media.get("url") - return str(url) if url else None - - if entry.get("media_thumbnail"): - for thumb in entry.media_thumbnail: - url = thumb.get("url") - if url: - return str(url) - - if entry.get("enclosures"): - for enclosure in entry.enclosures: - if str(enclosure.get("type", "")).startswith("image/"): - url = enclosure.get("href") or enclosure.get("url") - return str(url) if url else None - - return None diff --git a/src/intelstream/config.py b/src/intelstream/config.py index 9ba3452..63ad76a 100644 --- a/src/intelstream/config.py +++ b/src/intelstream/config.py @@ -96,13 +96,6 @@ class Settings(BaseSettings): description="Model to use for interactive /summarize command", ) - discord_max_message_length: int = Field( - default=2000, - ge=500, - le=2000, - description="Maximum Discord message length (Discord limit is 2000)", - ) - http_timeout_seconds: float = Field( default=30.0, ge=5.0, diff --git a/src/intelstream/database/repository.py b/src/intelstream/database/repository.py index 82beb06..291b1c5 100644 --- a/src/intelstream/database/repository.py +++ b/src/intelstream/database/repository.py @@ -302,6 +302,15 @@ async def content_item_exists(self, external_id: str) -> bool: ) return result.scalar_one() + async def content_items_exist(self, external_ids: list[str]) -> set[str]: + if not external_ids: + return set() + async with self.session() as session: + result = await session.execute( + select(ContentItem.external_id).where(ContentItem.external_id.in_(external_ids)) + ) + return {row[0] for row in result.all()} + async def get_unposted_content_items(self, limit: int = 10) -> list[ContentItem]: async with self.session() as session: result = await session.execute( diff --git a/src/intelstream/discord/cogs/content_posting.py b/src/intelstream/discord/cogs/content_posting.py index 3302d7b..5235bde 100644 --- a/src/intelstream/discord/cogs/content_posting.py +++ b/src/intelstream/discord/cogs/content_posting.py @@ -43,10 +43,7 @@ async def cog_load(self) -> None: ) await self._pipeline.initialize() - self._poster = ContentPoster( - self.bot, - max_message_length=self.bot.settings.discord_max_message_length, - ) + self._poster = ContentPoster(self.bot) self._initialized = True self._base_interval = self.bot.settings.content_poll_interval_minutes diff --git a/src/intelstream/services/content_poster.py b/src/intelstream/services/content_poster.py index 4a52f9d..4077ccb 100644 --- a/src/intelstream/services/content_poster.py +++ b/src/intelstream/services/content_poster.py @@ -10,6 +10,9 @@ logger = structlog.get_logger() +MAX_EMBED_TITLE = 256 +MAX_EMBED_DESCRIPTION = 4096 + SOURCE_TYPE_LABELS: dict[SourceType, str] = { SourceType.SUBSTACK: "Substack", SourceType.YOUTUBE: "YouTube", @@ -20,6 +23,16 @@ SourceType.TWITTER: "Twitter", } +SOURCE_TYPE_COLORS: dict[SourceType, discord.Color] = { + SourceType.SUBSTACK: discord.Color.from_rgb(255, 103, 25), + SourceType.YOUTUBE: discord.Color.red(), + SourceType.RSS: discord.Color.from_rgb(255, 165, 0), + SourceType.PAGE: discord.Color.blue(), + SourceType.ARXIV: discord.Color.from_rgb(179, 27, 27), + SourceType.BLOG: discord.Color.teal(), + SourceType.TWITTER: discord.Color.from_rgb(29, 161, 242), +} + TRUNCATION_NOTICE = "\n\n*[Summary truncated]*" @@ -73,56 +86,43 @@ def truncate_summary_at_bullet(summary: str, max_length: int) -> str: class ContentPoster: - def __init__(self, bot: "IntelStreamBot", max_message_length: int = 2000) -> None: + def __init__(self, bot: "IntelStreamBot") -> None: self._bot = bot - self._max_message_length = max_message_length - def format_message( + def format_embed( self, content_item: ContentItem, source_type: SourceType, source_name: str, - ) -> str: - header_parts: list[str] = [] - - if content_item.author: - header_parts.append(f"**{content_item.author}**") - + ) -> discord.Embed: title = content_item.title - if content_item.original_url: - header_parts.append(f"[{title}]({content_item.original_url})") - else: - header_parts.append(f"**{title}**") + if len(title) > MAX_EMBED_TITLE: + title = title[: MAX_EMBED_TITLE - 3] + "..." - header_parts.append("") + summary = content_item.summary or "No summary available." + if len(summary) > MAX_EMBED_DESCRIPTION: + summary = truncate_summary_at_bullet(summary, MAX_EMBED_DESCRIPTION) + color = SOURCE_TYPE_COLORS.get(source_type, discord.Color.greyple()) source_label = SOURCE_TYPE_LABELS.get(source_type, "Unknown") - footer = f"\n*{source_label} | {source_name}*" - - header = "\n".join(header_parts) - overhead = len(header) + len(footer) - summary = content_item.summary or "No summary available." + embed = discord.Embed( + title=title, + url=content_item.original_url or None, + description=summary, + color=color, + timestamp=content_item.published_at, + ) - available_for_summary = self._max_message_length - overhead - if len(summary) > available_for_summary: - summary = truncate_summary_at_bullet(summary, available_for_summary) + if content_item.author: + embed.set_author(name=content_item.author) - message = header + summary + footer + if content_item.thumbnail_url: + embed.set_image(url=content_item.thumbnail_url) - if len(message) > self._max_message_length: - logger.warning( - "Message still exceeds limit after truncation", - length=len(message), - max_length=self._max_message_length, - ) - if summary.endswith(TRUNCATION_NOTICE): - summary = summary[: -len(TRUNCATION_NOTICE)] - excess = len(message) - self._max_message_length + len(TRUNCATION_NOTICE) - summary = summary[:-excess] + TRUNCATION_NOTICE - message = header + summary + footer + embed.set_footer(text=f"{source_label} | {source_name}") - return message + return embed async def post_content( self, @@ -142,10 +142,10 @@ async def post_content( raise ValueError( f"No URL available for skip-summary content item {content_item.id}" ) - content = content_item.original_url + message = await channel.send(content=content_item.original_url) else: - content = self.format_message(content_item, source_type, source_name) - message = await channel.send(content=content) + embed = self.format_embed(content_item, source_type, source_name) + message = await channel.send(embed=embed) logger.info( "Posted content to Discord", diff --git a/src/intelstream/services/pipeline.py b/src/intelstream/services/pipeline.py index 4180208..329a82e 100644 --- a/src/intelstream/services/pipeline.py +++ b/src/intelstream/services/pipeline.py @@ -75,111 +75,54 @@ def _create_adapters(self) -> dict[SourceType, BaseAdapter]: return adapters + MAX_CONCURRENT_FETCHES = 5 + + def _is_due_for_poll(self, source: Source) -> bool: + if source.last_polled_at is None: + return True + interval = self._settings.get_poll_interval(source.type) + last_polled = source.last_polled_at + if last_polled.tzinfo is None: + last_polled = last_polled.replace(tzinfo=UTC) + next_poll_at = last_polled + timedelta(minutes=interval) + return datetime.now(UTC) >= next_poll_at + async def fetch_all_sources(self) -> int: sources = await self._repository.get_all_sources(active_only=True) logger.info("Fetching content from sources", count=len(sources)) fetch_start = time.monotonic() - total_new_items = 0 - sources_polled = 0 - sources_skipped = 0 - sources_failed = 0 - fetch_delay = self._settings.fetch_delay_seconds - - for i, source in enumerate(sources): - if source.last_polled_at is not None: - interval = self._settings.get_poll_interval(source.type) - last_polled = source.last_polled_at - if last_polled.tzinfo is None: - last_polled = last_polled.replace(tzinfo=UTC) - next_poll_at = last_polled + timedelta(minutes=interval) - if datetime.now(UTC) < next_poll_at: - logger.debug( - "Skipping source, not due yet", - source_name=source.name, - source_type=source.type.value, - next_poll_at=next_poll_at.isoformat(), - ) - sources_skipped += 1 - continue - fetch_succeeded = False - try: - new_items = await self._fetch_source(source) - total_new_items += new_items - fetch_succeeded = True - sources_polled += 1 - except httpx.TimeoutException: - logger.warning( - "Source fetch timed out", - source_name=source.name, - source_type=source.type.value, - ) - await self._repository.increment_failure_count(source.id) - sources_failed += 1 - except httpx.HTTPStatusError as e: - status = e.response.status_code - if status == 404: - logger.error( - "Source not found (404), consider removing", - source_name=source.name, - source_type=source.type.value, - ) - await self._repository.increment_failure_count(source.id) - elif status == 429: - logger.warning( - "Rate limited by source", - source_name=source.name, - source_type=source.type.value, - ) - await self._repository.increment_failure_count(source.id) - elif status in (401, 403): - logger.error( - "Auth error fetching source, check credentials", - source_name=source.name, - source_type=source.type.value, - status=status, - ) - await self._repository.increment_failure_count(source.id) - elif status >= 500: - logger.warning( - "Server error fetching source", - source_name=source.name, - source_type=source.type.value, - status=status, - ) - await self._repository.increment_failure_count(source.id) - else: - logger.error( - "HTTP error fetching source", - source_name=source.name, - source_type=source.type.value, - status=status, - ) - sources_failed += 1 - except httpx.RequestError as e: - logger.warning( - "Network error fetching source", - source_name=source.name, - source_type=source.type.value, - error=type(e).__name__, - ) - await self._repository.increment_failure_count(source.id) - sources_failed += 1 - except Exception as e: - logger.exception( - "Unexpected error fetching source", + due_sources: list[Source] = [] + sources_skipped = 0 + for source in sources: + if self._is_due_for_poll(source): + due_sources.append(source) + else: + logger.debug( + "Skipping source, not due yet", source_name=source.name, source_type=source.type.value, - error=str(e), ) - sources_failed += 1 + sources_skipped += 1 - if fetch_succeeded: - await self._repository.reset_failure_count(source.id) + semaphore = asyncio.Semaphore(self.MAX_CONCURRENT_FETCHES) - if fetch_delay > 0 and i < len(sources) - 1: - await asyncio.sleep(fetch_delay) + async def fetch_with_limit(source: Source) -> tuple[int, bool]: + async with semaphore: + return await self._fetch_source_safe(source) + + results = await asyncio.gather(*[fetch_with_limit(s) for s in due_sources]) + + total_new_items = 0 + sources_polled = 0 + sources_failed = 0 + for new_items, succeeded in results: + total_new_items += new_items + if succeeded: + sources_polled += 1 + else: + sources_failed += 1 await self._repository.cleanup_extraction_cache() @@ -194,6 +137,74 @@ async def fetch_all_sources(self) -> int: ) return total_new_items + async def _fetch_source_safe(self, source: Source) -> tuple[int, bool]: + try: + new_items = await self._fetch_source(source) + await self._repository.reset_failure_count(source.id) + return new_items, True + except httpx.TimeoutException: + logger.warning( + "Source fetch timed out", + source_name=source.name, + source_type=source.type.value, + ) + await self._repository.increment_failure_count(source.id) + except httpx.HTTPStatusError as e: + status = e.response.status_code + if status == 404: + logger.error( + "Source not found (404), consider removing", + source_name=source.name, + source_type=source.type.value, + ) + await self._repository.increment_failure_count(source.id) + elif status == 429: + logger.warning( + "Rate limited by source", + source_name=source.name, + source_type=source.type.value, + ) + await self._repository.increment_failure_count(source.id) + elif status in (401, 403): + logger.error( + "Auth error fetching source, check credentials", + source_name=source.name, + source_type=source.type.value, + status=status, + ) + await self._repository.increment_failure_count(source.id) + elif status >= 500: + logger.warning( + "Server error fetching source", + source_name=source.name, + source_type=source.type.value, + status=status, + ) + await self._repository.increment_failure_count(source.id) + else: + logger.error( + "HTTP error fetching source", + source_name=source.name, + source_type=source.type.value, + status=status, + ) + except httpx.RequestError as e: + logger.warning( + "Network error fetching source", + source_name=source.name, + source_type=source.type.value, + error=type(e).__name__, + ) + await self._repository.increment_failure_count(source.id) + except Exception as e: + logger.exception( + "Unexpected error fetching source", + source_name=source.name, + source_type=source.type.value, + error=str(e), + ) + return 0, False + async def _fetch_source(self, source: Source) -> int: adapter: BaseAdapter | None = None @@ -232,9 +243,13 @@ async def _fetch_source(self, source: Source) -> int: is_first_poll = source.last_polled_at is None + existing_ids = await self._repository.content_items_exist( + [item.external_id for item in items] + ) + new_count = 0 for item in items: - if not await self._repository.content_item_exists(item.external_id): + if item.external_id not in existing_ids: try: await self._store_content_item(source, item) new_count += 1 @@ -299,8 +314,11 @@ async def summarize_pending(self, max_items: int = 10) -> int: summarize_start = time.monotonic() summarized_count = 0 + source_ids = {item.source_id for item in items} + sources_map = await self._repository.get_sources_by_ids(source_ids) + for item in items: - source = await self._repository.get_source_by_id(item.source_id) + source = sources_map.get(item.source_id) source_name = source.name if source else "unknown" source_type = source.type.value if source else "unknown" @@ -364,6 +382,8 @@ async def summarize_pending(self, max_items: int = 10) -> int: return summarized_count async def _handle_first_posting_backfill(self, items: list[ContentItem]) -> None: + source_ids = {item.source_id for item in items} + sources_map = await self._repository.get_sources_by_ids(source_ids) processed_sources: set[str] = set() for item in items: @@ -382,7 +402,7 @@ async def _handle_first_posting_backfill(self, items: list[ContentItem]) -> None ) if backfilled_count > 0: - source = await self._repository.get_source_by_id(item.source_id) + source = sources_map.get(item.source_id) source_name = source.name if source else "unknown" logger.info( "First posting for source - backfilled old items", diff --git a/tests/test_adapters/test_strategies/test_rss_discovery.py b/tests/test_adapters/test_strategies/test_rss_discovery.py index d0911ea..ede5ff5 100644 --- a/tests/test_adapters/test_strategies/test_rss_discovery.py +++ b/tests/test_adapters/test_strategies/test_rss_discovery.py @@ -82,6 +82,18 @@ async def test_discover_probes_common_paths(self, rss_strategy: RSSDiscoveryStra headers={"content-type": "application/rss+xml"}, ) ) + for path in [ + "/feed.xml", + "/rss", + "/rss.xml", + "/atom.xml", + "/blog/feed", + "/blog/rss", + "/research/feed", + "/index.xml", + "/feeds/posts/default", + ]: + respx.head(f"https://example.com{path}").mock(return_value=httpx.Response(404)) result = await rss_strategy.discover("https://example.com/blog") diff --git a/tests/test_database.py b/tests/test_database.py index b79215c..a9b3cf2 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -255,6 +255,37 @@ async def test_content_item_exists(self, repository: Repository) -> None: assert await repository.content_item_exists("video123") is True assert await repository.content_item_exists("nonexistent") is False + async def test_content_items_exist_batch(self, repository: Repository) -> None: + source = await repository.add_source( + source_type=SourceType.RSS, + name="Batch Test", + identifier="batch-test", + ) + + await repository.add_content_item( + source_id=source.id, + external_id="item-a", + title="Item A", + original_url="https://example.com/a", + author="Author", + published_at=datetime.now(UTC), + ) + await repository.add_content_item( + source_id=source.id, + external_id="item-b", + title="Item B", + original_url="https://example.com/b", + author="Author", + published_at=datetime.now(UTC), + ) + + result = await repository.content_items_exist(["item-a", "item-b", "item-c"]) + assert result == {"item-a", "item-b"} + + async def test_content_items_exist_empty_list(self, repository: Repository) -> None: + result = await repository.content_items_exist([]) + assert result == set() + async def test_add_duplicate_content_raises_error(self, repository: Repository) -> None: source = await repository.add_source( source_type=SourceType.RSS, diff --git a/tests/test_discord/test_content_posting.py b/tests/test_discord/test_content_posting.py index cecf591..3cf778c 100644 --- a/tests/test_discord/test_content_posting.py +++ b/tests/test_discord/test_content_posting.py @@ -16,7 +16,6 @@ def mock_bot(): bot.settings.summary_model = "claude-sonnet-4-20250514" bot.settings.summary_max_tokens = 2048 bot.settings.summary_max_input_length = 100000 - bot.settings.discord_max_message_length = 2000 bot.guilds = [] bot.wait_until_ready = AsyncMock() bot.notify_owner = AsyncMock() @@ -52,7 +51,7 @@ async def test_cog_load_initializes_components( ) mock_pipeline_cls.assert_called_once() mock_pipeline.initialize.assert_called_once() - mock_poster_cls.assert_called_once_with(mock_bot, max_message_length=2000) + mock_poster_cls.assert_called_once_with(mock_bot) assert cog._initialized is True @patch("intelstream.discord.cogs.content_posting.SummarizationService") diff --git a/tests/test_services/test_content_poster.py b/tests/test_services/test_content_poster.py index 32c46b3..12b8a83 100644 --- a/tests/test_services/test_content_poster.py +++ b/tests/test_services/test_content_poster.py @@ -6,14 +6,15 @@ from intelstream.database.models import ContentItem, SourceType from intelstream.services.content_poster import ( + MAX_EMBED_DESCRIPTION, + MAX_EMBED_TITLE, + SOURCE_TYPE_COLORS, SOURCE_TYPE_LABELS, TRUNCATION_NOTICE, ContentPoster, truncate_summary_at_bullet, ) -DEFAULT_MAX_MESSAGE_LENGTH = 2000 - @pytest.fixture def mock_bot(): @@ -41,118 +42,181 @@ def sample_content_item(): return item -class TestContentPosterFormatMessage: - def test_format_message_basic(self, content_poster, sample_content_item): - message = content_poster.format_message( +class TestContentPosterFormatEmbed: + def test_format_embed_returns_embed(self, content_poster, sample_content_item): + embed = content_poster.format_embed( content_item=sample_content_item, source_type=SourceType.SUBSTACK, source_name="Test Substack", ) - assert isinstance(message, str) - assert sample_content_item.title in message - assert sample_content_item.summary in message - assert sample_content_item.author in message + assert isinstance(embed, discord.Embed) - def test_format_message_includes_author_bold(self, content_poster, sample_content_item): - message = content_poster.format_message( + def test_format_embed_has_title(self, content_poster, sample_content_item): + embed = content_poster.format_embed( content_item=sample_content_item, source_type=SourceType.SUBSTACK, source_name="Test Substack", ) - assert f"**{sample_content_item.author}**" in message + assert embed.title == sample_content_item.title - def test_format_message_includes_title_as_link(self, content_poster, sample_content_item): - message = content_poster.format_message( + def test_format_embed_title_is_clickable_link(self, content_poster, sample_content_item): + embed = content_poster.format_embed( content_item=sample_content_item, source_type=SourceType.SUBSTACK, source_name="Test Substack", ) - expected_link = f"[{sample_content_item.title}]({sample_content_item.original_url})" - assert expected_link in message - - def test_format_message_title_bold_when_no_url(self, content_poster, sample_content_item): - sample_content_item.original_url = None + assert embed.url == sample_content_item.original_url - message = content_poster.format_message( + def test_format_embed_has_summary_in_description(self, content_poster, sample_content_item): + embed = content_poster.format_embed( content_item=sample_content_item, source_type=SourceType.SUBSTACK, source_name="Test Substack", ) - assert f"**{sample_content_item.title}**" in message - - def test_format_message_without_summary(self, content_poster, sample_content_item): - sample_content_item.summary = None + assert embed.description == sample_content_item.summary - message = content_poster.format_message( + def test_format_embed_has_author(self, content_poster, sample_content_item): + embed = content_poster.format_embed( content_item=sample_content_item, source_type=SourceType.SUBSTACK, source_name="Test Substack", ) - assert "No summary available." in message + assert embed.author.name == sample_content_item.author - def test_format_message_without_author(self, content_poster, sample_content_item): + def test_format_embed_without_author(self, content_poster, sample_content_item): sample_content_item.author = None - message = content_poster.format_message( + embed = content_poster.format_embed( content_item=sample_content_item, source_type=SourceType.SUBSTACK, source_name="Test Substack", ) - assert "**None**" not in message - assert sample_content_item.title in message + assert embed.author.name is None - def test_format_message_includes_source_footer(self, content_poster, sample_content_item): - message = content_poster.format_message( + def test_format_embed_has_source_footer(self, content_poster, sample_content_item): + embed = content_poster.format_embed( content_item=sample_content_item, source_type=SourceType.SUBSTACK, source_name="My Newsletter", ) - expected_footer = f"*{SOURCE_TYPE_LABELS[SourceType.SUBSTACK]} | My Newsletter*" - assert expected_footer in message + assert embed.footer.text == "Substack | My Newsletter" - def test_format_message_source_labels_by_type(self, content_poster, sample_content_item): - for source_type, expected_label in SOURCE_TYPE_LABELS.items(): - message = content_poster.format_message( + def test_format_embed_has_color_per_source_type(self, content_poster, sample_content_item): + for source_type, expected_color in SOURCE_TYPE_COLORS.items(): + embed = content_poster.format_embed( content_item=sample_content_item, source_type=source_type, source_name="Test", ) - assert f"*{expected_label} | Test*" in message + assert embed.color == expected_color - def test_format_message_truncates_long_content(self, content_poster, sample_content_item): - sample_content_item.summary = "A" * (DEFAULT_MAX_MESSAGE_LENGTH + 500) + def test_format_embed_has_thumbnail(self, content_poster, sample_content_item): + embed = content_poster.format_embed( + content_item=sample_content_item, + source_type=SourceType.YOUTUBE, + source_name="Test", + ) + + assert embed.image.url == sample_content_item.thumbnail_url + + def test_format_embed_no_thumbnail_when_none(self, content_poster, sample_content_item): + sample_content_item.thumbnail_url = None - message = content_poster.format_message( + embed = content_poster.format_embed( + content_item=sample_content_item, + source_type=SourceType.SUBSTACK, + source_name="Test", + ) + + assert embed.image.url is None + + def test_format_embed_has_timestamp(self, content_poster, sample_content_item): + embed = content_poster.format_embed( + content_item=sample_content_item, + source_type=SourceType.SUBSTACK, + source_name="Test", + ) + + assert embed.timestamp == sample_content_item.published_at + + def test_format_embed_without_summary(self, content_poster, sample_content_item): + sample_content_item.summary = None + + embed = content_poster.format_embed( + content_item=sample_content_item, + source_type=SourceType.SUBSTACK, + source_name="Test Substack", + ) + + assert embed.description == "No summary available." + + def test_format_embed_truncates_long_title(self, content_poster, sample_content_item): + sample_content_item.title = "A" * 300 + + embed = content_poster.format_embed( + content_item=sample_content_item, + source_type=SourceType.RSS, + source_name="Test", + ) + + assert len(embed.title) <= MAX_EMBED_TITLE + assert embed.title.endswith("...") + + def test_format_embed_truncates_long_description(self, content_poster, sample_content_item): + sample_content_item.summary = "A" * (MAX_EMBED_DESCRIPTION + 500) + + embed = content_poster.format_embed( content_item=sample_content_item, source_type=SourceType.RSS, - source_name="Test RSS", + source_name="Test", + ) + + assert len(embed.description) <= MAX_EMBED_DESCRIPTION + assert TRUNCATION_NOTICE.strip() in embed.description + + def test_format_embed_url_none_when_no_url(self, content_poster, sample_content_item): + sample_content_item.original_url = None + + embed = content_poster.format_embed( + content_item=sample_content_item, + source_type=SourceType.SUBSTACK, + source_name="Test", ) - assert len(message) <= DEFAULT_MAX_MESSAGE_LENGTH - assert TRUNCATION_NOTICE.strip() in message + assert embed.url is None + + def test_format_embed_source_labels_in_footer(self, content_poster, sample_content_item): + for source_type, expected_label in SOURCE_TYPE_LABELS.items(): + embed = content_poster.format_embed( + content_item=sample_content_item, + source_type=source_type, + source_name="Test", + ) + assert embed.footer.text == f"{expected_label} | Test" - def test_format_message_unknown_source_type(self, content_poster, sample_content_item): + def test_format_embed_unknown_source_type(self, content_poster, sample_content_item): unknown_type = MagicMock() unknown_type.value = "unknown" - message = content_poster.format_message( + embed = content_poster.format_embed( content_item=sample_content_item, source_type=unknown_type, source_name="Test", ) - assert "*Unknown | Test*" in message + assert embed.footer.text == "Unknown | Test" + assert embed.color == discord.Color.greyple() class TestContentPosterPostContent: - async def test_post_content_sends_message(self, content_poster, sample_content_item): + async def test_post_content_sends_embed(self, content_poster, sample_content_item): mock_channel = MagicMock(spec=discord.TextChannel) mock_message = MagicMock(spec=discord.Message) mock_message.id = 12345 @@ -167,13 +231,11 @@ async def test_post_content_sends_message(self, content_poster, sample_content_i mock_channel.send.assert_called_once() call_kwargs = mock_channel.send.call_args.kwargs - assert "content" in call_kwargs - assert isinstance(call_kwargs["content"], str) + assert "embed" in call_kwargs + assert isinstance(call_kwargs["embed"], discord.Embed) assert result == mock_message - async def test_post_content_message_contains_item_info( - self, content_poster, sample_content_item - ): + async def test_post_content_embed_contains_item_info(self, content_poster, sample_content_item): mock_channel = MagicMock(spec=discord.TextChannel) mock_message = MagicMock(spec=discord.Message) mock_message.id = 12345 @@ -187,9 +249,9 @@ async def test_post_content_message_contains_item_info( ) call_kwargs = mock_channel.send.call_args.kwargs - content = call_kwargs["content"] - assert sample_content_item.title in content - assert sample_content_item.summary in content + embed = call_kwargs["embed"] + assert embed.title == sample_content_item.title + assert embed.description == sample_content_item.summary async def test_post_content_skip_summary_sends_bare_url( self, content_poster, sample_content_item @@ -227,7 +289,7 @@ async def test_post_content_skip_summary_no_url_raises( mock_channel.send.assert_not_called() - async def test_post_content_skip_summary_false_sends_formatted( + async def test_post_content_skip_summary_false_sends_embed( self, content_poster, sample_content_item ): mock_channel = MagicMock(spec=discord.TextChannel) @@ -244,8 +306,8 @@ async def test_post_content_skip_summary_false_sends_formatted( ) call_kwargs = mock_channel.send.call_args.kwargs - assert sample_content_item.title in call_kwargs["content"] - assert sample_content_item.summary in call_kwargs["content"] + assert "embed" in call_kwargs + assert isinstance(call_kwargs["embed"], discord.Embed) class TestContentPosterPostUnpostedItems: diff --git a/tests/test_services/test_pipeline.py b/tests/test_services/test_pipeline.py index c4dee40..59c69dc 100644 --- a/tests/test_services/test_pipeline.py +++ b/tests/test_services/test_pipeline.py @@ -175,7 +175,7 @@ async def test_fetch_all_sources_success( await pipeline.initialize() mock_repository.get_all_sources.return_value = [sample_source] - mock_repository.content_item_exists.return_value = False + mock_repository.content_items_exist.return_value = set() mock_repository.get_most_recent_item_for_source.return_value = MagicMock( spec=ContentItem, id="item-1", title="Test" ) @@ -205,7 +205,7 @@ async def test_fetch_all_sources_skips_existing( await pipeline.initialize() mock_repository.get_all_sources.return_value = [sample_source] - mock_repository.content_item_exists.return_value = True + mock_repository.content_items_exist.return_value = {sample_content_data.external_id} with patch.object( pipeline._adapters[SourceType.SUBSTACK], @@ -286,7 +286,7 @@ async def test_first_poll_stores_all_items_and_backfills( ] mock_repository.get_all_sources.return_value = [sample_source] - mock_repository.content_item_exists.return_value = False + mock_repository.content_items_exist.return_value = set() mock_repository.get_most_recent_item_for_source.return_value = most_recent mock_repository.mark_items_as_backfilled.return_value = 4 @@ -325,7 +325,7 @@ async def test_first_poll_no_backfill_when_single_item( most_recent.title = "Test Article" mock_repository.get_all_sources.return_value = [sample_source] - mock_repository.content_item_exists.return_value = False + mock_repository.content_items_exist.return_value = set() mock_repository.get_most_recent_item_for_source.return_value = most_recent mock_repository.mark_items_as_backfilled.return_value = 0 @@ -368,7 +368,7 @@ async def test_subsequent_poll_stores_all_new_items( ] mock_repository.get_all_sources.return_value = [sample_source] - mock_repository.content_item_exists.return_value = False + mock_repository.content_items_exist.return_value = set() with patch.object( pipeline._adapters[SourceType.SUBSTACK], @@ -383,19 +383,19 @@ async def test_subsequent_poll_stores_all_new_items( await pipeline.close() - async def test_fetch_all_sources_applies_rate_limiting( + async def test_fetch_all_sources_concurrent( self, mock_repository: AsyncMock, mock_summarizer: AsyncMock, ): - """Verify that fetch_delay_seconds is applied between source fetches.""" + """Verify that multiple sources are fetched concurrently.""" settings = MagicMock(spec=Settings) settings.youtube_api_key = "test-key" settings.anthropic_api_key = "test-key" settings.twitter_bearer_token = None settings.http_timeout_seconds = 30.0 settings.summarization_delay_seconds = 0.5 - settings.fetch_delay_seconds = 0.1 + settings.fetch_delay_seconds = 0.0 pipeline = ContentPipeline( settings=settings, repository=mock_repository, summarizer=mock_summarizer @@ -421,21 +421,18 @@ async def test_fetch_all_sources_applies_rate_limiting( source2.skip_summary = False mock_repository.get_all_sources.return_value = [source1, source2] - mock_repository.content_item_exists.return_value = True - - with ( - patch.object( - pipeline._adapters[SourceType.SUBSTACK], - "fetch_latest", - new_callable=AsyncMock, - return_value=[], - ), - patch( - "intelstream.services.pipeline.asyncio.sleep", new_callable=AsyncMock - ) as mock_sleep, + mock_repository.content_items_exist.return_value = set() + + with patch.object( + pipeline._adapters[SourceType.SUBSTACK], + "fetch_latest", + new_callable=AsyncMock, + return_value=[], ): - await pipeline.fetch_all_sources() - mock_sleep.assert_called_once_with(0.1) + result = await pipeline.fetch_all_sources() + + assert result == 0 + assert mock_repository.reset_failure_count.call_count == 2 await pipeline.close() @@ -567,7 +564,7 @@ async def test_fetch_success_resets_failure_count( await pipeline.initialize() mock_repository.get_all_sources.return_value = [sample_source] - mock_repository.content_item_exists.return_value = False + mock_repository.content_items_exist.return_value = set() mock_repository.get_most_recent_item_for_source.return_value = MagicMock( spec=ContentItem, id="item-1", title="Test" ) @@ -598,7 +595,7 @@ async def test_fetch_passes_skip_content_to_adapter( sample_source.skip_summary = True mock_repository.get_all_sources.return_value = [sample_source] - mock_repository.content_item_exists.return_value = False + mock_repository.content_items_exist.return_value = set() mock_repository.get_most_recent_item_for_source.return_value = MagicMock( spec=ContentItem, id="item-1", title="Test" ) @@ -658,7 +655,7 @@ async def test_fetches_source_when_interval_elapsed( sample_source.last_polled_at = datetime(2000, 1, 1, 0, 0, 0, tzinfo=UTC) mock_settings.get_poll_interval.return_value = 5 mock_repository.get_all_sources.return_value = [sample_source] - mock_repository.content_item_exists.return_value = False + mock_repository.content_items_exist.return_value = set() with patch.object( pipeline._adapters[SourceType.SUBSTACK], @@ -684,7 +681,7 @@ async def test_never_skips_first_poll( sample_source.last_polled_at = None mock_repository.get_all_sources.return_value = [sample_source] - mock_repository.content_item_exists.return_value = False + mock_repository.content_items_exist.return_value = set() mock_repository.get_most_recent_item_for_source.return_value = MagicMock( spec=ContentItem, id="item-1", title="Test" ) @@ -716,7 +713,7 @@ async def test_summarize_pending_success( await pipeline.initialize() mock_repository.get_unsummarized_content_items.return_value = [sample_content_item] - mock_repository.get_source_by_id.return_value = sample_source + mock_repository.get_sources_by_ids.return_value = {sample_source.id: sample_source} mock_repository.has_source_posted_content.return_value = True mock_summarizer.summarize.return_value = "This is the summary." @@ -759,6 +756,7 @@ async def test_summarize_pending_marks_items_without_content_ready_for_posting( item_without_content.raw_content = None mock_repository.get_unsummarized_content_items.return_value = [item_without_content] + mock_repository.get_sources_by_ids.return_value = {} mock_repository.has_source_posted_content.return_value = True result = await pipeline.summarize_pending() @@ -780,7 +778,7 @@ async def test_summarize_pending_handles_summarization_error( await pipeline.initialize() mock_repository.get_unsummarized_content_items.return_value = [sample_content_item] - mock_repository.get_source_by_id.return_value = sample_source + mock_repository.get_sources_by_ids.return_value = {sample_source.id: sample_source} mock_repository.has_source_posted_content.return_value = True mock_summarizer.summarize.side_effect = SummarizationError("API error") @@ -802,7 +800,7 @@ async def test_summarize_pending_handles_unexpected_error( await pipeline.initialize() mock_repository.get_unsummarized_content_items.return_value = [sample_content_item] - mock_repository.get_source_by_id.return_value = sample_source + mock_repository.get_sources_by_ids.return_value = {sample_source.id: sample_source} mock_repository.has_source_posted_content.return_value = True mock_summarizer.summarize.side_effect = RuntimeError("Unexpected") @@ -822,7 +820,7 @@ async def test_summarize_pending_unknown_source_type( await pipeline.initialize() mock_repository.get_unsummarized_content_items.return_value = [sample_content_item] - mock_repository.get_source_by_id.return_value = None + mock_repository.get_sources_by_ids.return_value = {} mock_repository.has_source_posted_content.return_value = True mock_summarizer.summarize.return_value = "Summary" @@ -866,7 +864,7 @@ async def test_summarize_pending_backfills_first_posting_items( mock_repository.has_source_posted_content.return_value = False mock_repository.get_most_recent_item_for_source.return_value = new_item mock_repository.mark_items_as_backfilled.return_value = 1 - mock_repository.get_source_by_id.return_value = sample_source + mock_repository.get_sources_by_ids.return_value = {sample_source.id: sample_source} mock_summarizer.summarize.return_value = "Summary" result = await pipeline.summarize_pending() From dd137558d397b565f4e2706121f50716e974e548 Mon Sep 17 00:00:00 2001 From: user1303836 Date: Sat, 7 Feb 2026 14:14:30 -0500 Subject: [PATCH 2/5] Add autocomplete for source and GitHub repo slash commands Add Discord autocomplete handlers for /source remove, /source info, /source toggle, /github remove, and /github toggle commands. Filters suggestions by guild and matches on partial name input. --- src/intelstream/discord/cogs/github.py | 17 +++++++++++++++++ .../discord/cogs/source_management.py | 13 +++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/intelstream/discord/cogs/github.py b/src/intelstream/discord/cogs/github.py index 4cc97cf..ad7ae6b 100644 --- a/src/intelstream/discord/cogs/github.py +++ b/src/intelstream/discord/cogs/github.py @@ -38,6 +38,21 @@ def __init__(self, bot: "IntelStreamBot") -> None: self.bot = bot self._github_service: GitHubService | None = None + async def _repo_autocomplete( + self, interaction: discord.Interaction, current: str + ) -> list[app_commands.Choice[str]]: + repos = await self.bot.repository.get_all_github_repos(active_only=False) + guild_id = str(interaction.guild_id) if interaction.guild_id else None + if guild_id: + repos = [r for r in repos if r.guild_id == guild_id] + matches = [ + r for r in repos if current.lower() in f"{r.owner}/{r.repo}".lower() + ] + return [ + app_commands.Choice(name=f"{r.owner}/{r.repo}", value=f"{r.owner}/{r.repo}") + for r in matches[:25] + ] + def _get_github_service(self) -> GitHubService | None: if not self.bot.settings.github_token: return None @@ -228,6 +243,7 @@ async def github_list( @github_group.command(name="remove", description="Stop monitoring a GitHub repository") @app_commands.default_permissions(manage_guild=True) @app_commands.describe(repo="Repository name (owner/repo format)") + @app_commands.autocomplete(repo=_repo_autocomplete) async def github_remove( self, interaction: discord.Interaction, @@ -273,6 +289,7 @@ async def github_remove( @github_group.command(name="toggle", description="Enable or disable GitHub repo monitoring") @app_commands.default_permissions(manage_guild=True) @app_commands.describe(repo="Repository name (owner/repo format)") + @app_commands.autocomplete(repo=_repo_autocomplete) async def github_toggle( self, interaction: discord.Interaction, diff --git a/src/intelstream/discord/cogs/source_management.py b/src/intelstream/discord/cogs/source_management.py index d6f8bb5..ebdf212 100644 --- a/src/intelstream/discord/cogs/source_management.py +++ b/src/intelstream/discord/cogs/source_management.py @@ -120,6 +120,16 @@ def __init__(self, bot: "IntelStreamBot") -> None: self.bot = bot self._anthropic_client: anthropic.AsyncAnthropic | None = None + async def _source_name_autocomplete( + self, interaction: discord.Interaction, current: str + ) -> list[app_commands.Choice[str]]: + sources = await self.bot.repository.get_all_sources(active_only=False) + guild_id = str(interaction.guild_id) if interaction.guild_id else None + if guild_id: + sources = [s for s in sources if s.guild_id == guild_id] + matches = [s for s in sources if current.lower() in s.name.lower()] + return [app_commands.Choice(name=s.name, value=s.name) for s in matches[:25]] + def _get_anthropic_client(self) -> anthropic.AsyncAnthropic: if self._anthropic_client is None: self._anthropic_client = anthropic.AsyncAnthropic( @@ -367,6 +377,7 @@ async def source_list(self, interaction: discord.Interaction) -> None: @source_group.command(name="remove", description="Remove a content source") @app_commands.default_permissions(manage_guild=True) @app_commands.describe(name="Name of the source to remove") + @app_commands.autocomplete(name=_source_name_autocomplete) async def source_remove( self, interaction: discord.Interaction, @@ -412,6 +423,7 @@ async def source_remove( @source_group.command(name="info", description="Show detailed info about a source") @app_commands.describe(name="Name of the source to inspect") + @app_commands.autocomplete(name=_source_name_autocomplete) async def source_info( self, interaction: discord.Interaction, @@ -462,6 +474,7 @@ async def source_info( @source_group.command(name="toggle", description="Enable or disable a content source") @app_commands.default_permissions(manage_guild=True) @app_commands.describe(name="Name of the source to toggle") + @app_commands.autocomplete(name=_source_name_autocomplete) async def source_toggle( self, interaction: discord.Interaction, From 6600cca3cce0078b4d169abe15199cc55d565ed8 Mon Sep 17 00:00:00 2001 From: user1303836 Date: Sat, 7 Feb 2026 14:14:47 -0500 Subject: [PATCH 3/5] Add tests for source name autocomplete --- tests/test_discord/test_source_management.py | 91 ++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/tests/test_discord/test_source_management.py b/tests/test_discord/test_source_management.py index 7a9cb67..448bace 100644 --- a/tests/test_discord/test_source_management.py +++ b/tests/test_discord/test_source_management.py @@ -683,3 +683,94 @@ async def test_toggle_source_not_found(self, source_management, mock_bot): mock_bot.repository.set_source_active.assert_not_called() call_args = interaction.followup.send.call_args assert "No source found" in call_args[0][0] + + +class TestSourceNameAutocomplete: + async def test_returns_matching_sources(self, source_management, mock_bot): + interaction = MagicMock(spec=discord.Interaction) + interaction.guild_id = 123 + + source1 = MagicMock() + source1.name = "My Newsletter" + source1.guild_id = "123" + + source2 = MagicMock() + source2.name = "Tech Blog" + source2.guild_id = "123" + + mock_bot.repository.get_all_sources = AsyncMock(return_value=[source1, source2]) + + choices = await source_management._source_name_autocomplete(interaction, "news") + + assert len(choices) == 1 + assert choices[0].name == "My Newsletter" + assert choices[0].value == "My Newsletter" + + async def test_returns_all_when_empty_query(self, source_management, mock_bot): + interaction = MagicMock(spec=discord.Interaction) + interaction.guild_id = 123 + + source1 = MagicMock() + source1.name = "Source A" + source1.guild_id = "123" + + source2 = MagicMock() + source2.name = "Source B" + source2.guild_id = "123" + + mock_bot.repository.get_all_sources = AsyncMock(return_value=[source1, source2]) + + choices = await source_management._source_name_autocomplete(interaction, "") + + assert len(choices) == 2 + + async def test_filters_by_guild(self, source_management, mock_bot): + interaction = MagicMock(spec=discord.Interaction) + interaction.guild_id = 123 + + source1 = MagicMock() + source1.name = "My Source" + source1.guild_id = "123" + + source2 = MagicMock() + source2.name = "Other Guild Source" + source2.guild_id = "999" + + mock_bot.repository.get_all_sources = AsyncMock(return_value=[source1, source2]) + + choices = await source_management._source_name_autocomplete(interaction, "") + + assert len(choices) == 1 + assert choices[0].name == "My Source" + + async def test_limits_to_25_choices(self, source_management, mock_bot): + interaction = MagicMock(spec=discord.Interaction) + interaction.guild_id = 123 + + sources = [] + for i in range(30): + s = MagicMock() + s.name = f"Source {i}" + s.guild_id = "123" + sources.append(s) + + mock_bot.repository.get_all_sources = AsyncMock(return_value=sources) + + choices = await source_management._source_name_autocomplete(interaction, "") + + assert len(choices) == 25 + + async def test_case_insensitive_matching(self, source_management, mock_bot): + interaction = MagicMock(spec=discord.Interaction) + interaction.guild_id = 123 + + source1 = MagicMock() + source1.name = "Tech Newsletter" + source1.guild_id = "123" + + mock_bot.repository.get_all_sources = AsyncMock(return_value=[source1]) + + choices = await source_management._source_name_autocomplete(interaction, "TECH") + + assert len(choices) == 1 + assert choices[0].name == "Tech Newsletter" From e37e1917d78ff0d930ec808c6151434defe5e17e Mon Sep 17 00:00:00 2001 From: user1303836 Date: Sat, 7 Feb 2026 14:15:10 -0500 Subject: [PATCH 4/5] Add tests for GitHub repo autocomplete --- tests/test_discord/test_github.py | 95 ++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/tests/test_discord/test_github.py b/tests/test_discord/test_github.py index 95be507..3bffb32 100644 --- a/tests/test_discord/test_github.py +++ b/tests/test_discord/test_github.py @@ -1,4 +1,6 @@ -from intelstream.discord.cogs.github import parse_github_url +from unittest.mock import AsyncMock, MagicMock + +from intelstream.discord.cogs.github import GitHubCommands, parse_github_url class TestParseGitHubUrl: @@ -65,3 +67,94 @@ def test_parse_normalizes_to_lowercase(self) -> None: def test_parse_owner_repo_normalizes_to_lowercase(self) -> None: result = parse_github_url("Owner/Repo") assert result == ("owner", "repo") + + +class TestRepoAutocomplete: + async def test_returns_matching_repos(self) -> None: + mock_bot = MagicMock() + repo1 = MagicMock() + repo1.owner = "acme" + repo1.repo = "backend" + repo1.guild_id = "123" + + repo2 = MagicMock() + repo2.owner = "acme" + repo2.repo = "frontend" + repo2.guild_id = "123" + + mock_bot.repository.get_all_github_repos = AsyncMock(return_value=[repo1, repo2]) + + cog = GitHubCommands(mock_bot) + interaction = MagicMock() + interaction.guild_id = 123 + + choices = await cog._repo_autocomplete(interaction, "back") + + assert len(choices) == 1 + assert choices[0].name == "acme/backend" + assert choices[0].value == "acme/backend" + + async def test_filters_by_guild(self) -> None: + mock_bot = MagicMock() + repo1 = MagicMock() + repo1.owner = "acme" + repo1.repo = "api" + repo1.guild_id = "123" + + repo2 = MagicMock() + repo2.owner = "other" + repo2.repo = "api" + repo2.guild_id = "999" + + mock_bot.repository.get_all_github_repos = AsyncMock(return_value=[repo1, repo2]) + + cog = GitHubCommands(mock_bot) + interaction = MagicMock() + interaction.guild_id = 123 + + choices = await cog._repo_autocomplete(interaction, "") + + assert len(choices) == 1 + assert choices[0].name == "acme/api" + + async def test_returns_all_when_empty_query(self) -> None: + mock_bot = MagicMock() + repo1 = MagicMock() + repo1.owner = "acme" + repo1.repo = "api" + repo1.guild_id = "123" + + repo2 = MagicMock() + repo2.owner = "acme" + repo2.repo = "web" + repo2.guild_id = "123" + + mock_bot.repository.get_all_github_repos = AsyncMock(return_value=[repo1, repo2]) + + cog = GitHubCommands(mock_bot) + interaction = MagicMock() + interaction.guild_id = 123 + + choices = await cog._repo_autocomplete(interaction, "") + + assert len(choices) == 2 + + async def test_limits_to_25_choices(self) -> None: + mock_bot = MagicMock() + repos = [] + for i in range(30): + r = MagicMock() + r.owner = "acme" + r.repo = f"repo-{i}" + r.guild_id = "123" + repos.append(r) + + mock_bot.repository.get_all_github_repos = AsyncMock(return_value=repos) + + cog = GitHubCommands(mock_bot) + interaction = MagicMock() + interaction.guild_id = 123 + + choices = await cog._repo_autocomplete(interaction, "") + + assert len(choices) == 25 From 527092ea98848e63c5d3e094e07a1ed4f86e2ab5 Mon Sep 17 00:00:00 2001 From: user1303836 Date: Sat, 7 Feb 2026 14:15:30 -0500 Subject: [PATCH 5/5] Format github autocomplete list comprehension --- src/intelstream/discord/cogs/github.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/intelstream/discord/cogs/github.py b/src/intelstream/discord/cogs/github.py index ad7ae6b..907c80d 100644 --- a/src/intelstream/discord/cogs/github.py +++ b/src/intelstream/discord/cogs/github.py @@ -45,9 +45,7 @@ async def _repo_autocomplete( guild_id = str(interaction.guild_id) if interaction.guild_id else None if guild_id: repos = [r for r in repos if r.guild_id == guild_id] - matches = [ - r for r in repos if current.lower() in f"{r.owner}/{r.repo}".lower() - ] + matches = [r for r in repos if current.lower() in f"{r.owner}/{r.repo}".lower()] return [ app_commands.Choice(name=f"{r.owner}/{r.repo}", value=f"{r.owner}/{r.repo}") for r in matches[:25]