From c477ad113e3833964ddcf453ad1608b135c4194a Mon Sep 17 00:00:00 2001 From: user1303836 Date: Fri, 6 Feb 2026 17:29:42 -0500 Subject: [PATCH 1/2] Migrate Twitter adapter from twitterapi.io to official X API v2 Replace the third-party twitterapi.io proxy with direct X API v2 integration using Bearer Token authentication. Key changes: - Rewrite TwitterAdapter to use GET /2/users/by/username and GET /2/users/{id}/tweets endpoints with field expansions - Add in-memory user ID caching to minimize API credit usage - Use server-side retweet filtering via exclude=retweets param - Parse X API v2 response format (data/includes/meta structure) - Extract quoted tweet text, media thumbnails, and author info from the includes expansion objects - Rename config field twitter_api_key -> twitter_bearer_token - Update pipeline, source management cog, and .env.example - Rewrite test suite for v2 API format (21 tests, up from 17) - Add tests for user ID caching, media thumbnails, empty timelines, and tweets without user expansion --- .env.example | 4 + README.md | 18 +- src/intelstream/adapters/__init__.py | 2 + src/intelstream/adapters/twitter.py | 225 ++++++++--- src/intelstream/config.py | 6 +- .../discord/cogs/source_management.py | 4 +- src/intelstream/services/pipeline.py | 4 +- tests/test_adapters/test_twitter.py | 349 ++++++++++++------ tests/test_discord/test_source_management.py | 6 +- tests/test_services/test_pipeline.py | 10 +- 10 files changed, 448 insertions(+), 180 deletions(-) diff --git a/.env.example b/.env.example index 86ceb33..825211f 100644 --- a/.env.example +++ b/.env.example @@ -10,6 +10,10 @@ ANTHROPIC_API_KEY=your_anthropic_api_key_here # YouTube API (optional - for YouTube channel monitoring) YOUTUBE_API_KEY=your_youtube_api_key_here +# Twitter/X API (optional - for Twitter account monitoring) +# Get a Bearer Token from https://developer.x.com/en/portal/dashboard +# TWITTER_BEARER_TOKEN=your_x_api_bearer_token_here + # Database Configuration DATABASE_URL=sqlite+aiosqlite:///./data/intelstream.db diff --git a/README.md b/README.md index f5fe9ac..95b9934 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ A Discord bot that monitors content sources and posts AI-generated summaries to - **RSS/Atom feeds** - Support for any standard RSS or Atom feed - **Arxiv papers** - Monitor research paper categories (cs.AI, cs.LG, cs.CL, etc.) - **Blogs** - Smart extraction from any blog using cascading discovery strategies (RSS, Sitemap, LLM extraction) -- **Twitter/X accounts** - Monitor Twitter accounts for new tweets via twitterapi.io +- **Twitter/X accounts** - Monitor Twitter accounts for new tweets via official X API v2 - **Web pages** - Monitor any web page URL with automatic content detection - **GitHub repositories** - Track commits, pull requests, and issues with Discord embeds - **Manual summarization** - Summarize any URL on-demand with `/summarize` @@ -23,7 +23,7 @@ A Discord bot that monitors content sources and posts AI-generated summaries to - Discord Bot Token - Anthropic API Key (for Claude) - YouTube API Key (optional, for YouTube monitoring) -- twitterapi.io API Key (optional, for Twitter monitoring) +- X/Twitter API v2 Bearer Token (optional, for Twitter monitoring) ## Setup @@ -48,8 +48,8 @@ A Discord bot that monitors content sources and posts AI-generated summaries to # Optional: YouTube monitoring # YOUTUBE_API_KEY=your_youtube_api_key - # Optional: Twitter monitoring - # TWITTER_API_KEY=your_twitterapi_io_api_key + # Optional: Twitter monitoring (X API v2) + # TWITTER_BEARER_TOKEN=your_x_api_bearer_token ``` 4. Run the bot: @@ -73,7 +73,7 @@ A Discord bot that monitors content sources and posts AI-generated summaries to | Variable | Default | Description | |----------|---------|-------------| | `YOUTUBE_API_KEY` | - | YouTube Data API key (required for YouTube monitoring) | -| `TWITTER_API_KEY` | - | twitterapi.io API key (required for Twitter monitoring) | +| `TWITTER_BEARER_TOKEN` | - | X API v2 Bearer Token (required for Twitter monitoring) | | `GITHUB_TOKEN` | - | GitHub Personal Access Token (required for GitHub monitoring) | | `GITHUB_POLL_INTERVAL_MINUTES` | `5` | Polling interval for GitHub repositories (1-60) | | `DATABASE_URL` | `sqlite+aiosqlite:///./data/intelstream.db` | Database connection string | @@ -164,7 +164,7 @@ The optional `summarize` parameter controls whether content is summarized by AI. - `RSS` - Any RSS/Atom feed URL - `Arxiv` - Arxiv category code (e.g., `cs.AI`, `cs.LG`, `cs.CL`, `cs.CV`, `stat.ML`) - `Blog` - Any blog URL (uses cascading discovery: RSS, Sitemap, LLM extraction) -- `Twitter` - Twitter/X account URL (requires twitterapi.io API key) +- `Twitter` - Twitter/X account URL (requires X API v2 Bearer Token) - `Page` - Any web page URL (uses AI to detect content structure) #### Configuration @@ -265,7 +265,9 @@ Both full GitHub URLs and `owner/repo` format are supported. The optional `chann Results are cached to avoid repeated extraction on subsequent polls. -**Twitter**: Monitors Twitter/X accounts for new original tweets using the twitterapi.io API. Retweets are skipped; only original tweets and quote tweets are included. Quoted tweet text is included in the content for richer summarization. When added with `summarize:False`, the bot posts bare tweet URLs (Discord auto-embeds the tweet preview). Requires a twitterapi.io API key (`TWITTER_API_KEY`). +**Twitter**: Monitors Twitter/X accounts for new original tweets using the official X API v2. Retweets and replies are filtered server-side for cost efficiency. Quote tweets are included with the quoted text appended for context. Long tweets (over 280 characters) are fully captured. Media attachments (images, videos) are detected and the first image URL is stored as the thumbnail. When added with `summarize:False`, the bot posts bare tweet URLs (Discord auto-embeds the tweet preview). Requires an X API v2 Bearer Token (`TWITTER_BEARER_TOKEN`). + +**Twitter cost considerations**: The X API v2 uses either a tiered subscription (Basic: $200/month, 15,000 reads) or a pay-per-use credit system. IntelStream fetches 5 tweets per poll and caches user ID lookups in memory to minimize API usage. With the default 15-minute poll interval, 10 Twitter sources consume roughly 4,800 reads/month (well within Basic tier limits). Set `TWITTER_POLL_INTERVAL_MINUTES` to a higher value (e.g., 30 or 60) for even lower consumption. **Page**: When you add a Page source, the bot uses Claude to analyze the page structure and automatically determine CSS selectors for extracting posts. @@ -330,7 +332,7 @@ src/intelstream/ │ ├── rss.py # Generic RSS/Atom adapter │ ├── arxiv.py # Arxiv RSS adapter │ ├── smart_blog.py # Blog adapter with cascading strategies -│ ├── twitter.py # Twitter/X adapter via twitterapi.io +│ ├── twitter.py # Twitter/X adapter via official X API v2 │ ├── page.py # Web page adapter │ └── strategies/ # Discovery strategies for Blog adapter │ ├── rss_discovery.py diff --git a/src/intelstream/adapters/__init__.py b/src/intelstream/adapters/__init__.py index 8f90f31..4538594 100644 --- a/src/intelstream/adapters/__init__.py +++ b/src/intelstream/adapters/__init__.py @@ -3,6 +3,7 @@ from intelstream.adapters.page import PageAdapter from intelstream.adapters.rss import RSSAdapter from intelstream.adapters.substack import SubstackAdapter +from intelstream.adapters.twitter import TwitterAdapter from intelstream.adapters.youtube import YouTubeAdapter __all__ = [ @@ -12,5 +13,6 @@ "PageAdapter", "RSSAdapter", "SubstackAdapter", + "TwitterAdapter", "YouTubeAdapter", ] diff --git a/src/intelstream/adapters/twitter.py b/src/intelstream/adapters/twitter.py index 5d8cbe3..4dfb10a 100644 --- a/src/intelstream/adapters/twitter.py +++ b/src/intelstream/adapters/twitter.py @@ -1,4 +1,5 @@ from datetime import UTC, datetime +from typing import Any import httpx import structlog @@ -7,15 +8,22 @@ logger = structlog.get_logger() -TWITTER_API_BASE = "https://api.twitterapi.io" -TWITTER_DATE_FORMAT = "%a %b %d %H:%M:%S %z %Y" +X_API_BASE = "https://api.x.com/2" TITLE_MAX_LENGTH = 100 +TWEET_FIELDS = ( + "created_at,author_id,referenced_tweets,entities,attachments,public_metrics,note_tweet" +) +USER_FIELDS = "name,username,profile_image_url" +MEDIA_FIELDS = "url,preview_image_url,type" +EXPANSIONS = "author_id,attachments.media_keys,referenced_tweets.id" + class TwitterAdapter(BaseAdapter): - def __init__(self, api_key: str, http_client: httpx.AsyncClient | None = None) -> None: - self._api_key = api_key + def __init__(self, bearer_token: str, http_client: httpx.AsyncClient | None = None) -> None: + self._bearer_token = bearer_token self._client = http_client + self._user_id_cache: dict[str, str] = {} @property def source_type(self) -> str: @@ -32,50 +40,59 @@ async def fetch_latest( ) -> list[ContentData]: logger.debug("Fetching Twitter timeline", identifier=identifier, skip_content=skip_content) - headers = {"X-API-Key": self._api_key} - params: dict[str, str] = {"userName": identifier, "includeReplies": "false"} + user_id = await self._resolve_user_id(identifier) + if not user_id: + return [] - if self._client: - response = await self._client.get( - f"{TWITTER_API_BASE}/twitter/user/last_tweets", - headers=headers, - params=params, - ) - else: - async with httpx.AsyncClient(timeout=30.0) as client: - response = await client.get( - f"{TWITTER_API_BASE}/twitter/user/last_tweets", - headers=headers, - params=params, - ) + params: dict[str, str] = { + "max_results": "5", + "exclude": "retweets,replies", + "tweet.fields": TWEET_FIELDS, + "user.fields": USER_FIELDS, + "expansions": EXPANSIONS, + } - response.raise_for_status() + if not skip_content: + params["media.fields"] = MEDIA_FIELDS + + response = await self._request(f"{X_API_BASE}/users/{user_id}/tweets", params=params) data = response.json() - tweets_raw = data.get("tweets", []) + if "errors" in data and "data" not in data: + for error in data["errors"]: + logger.error( + "X API error", + identifier=identifier, + error_title=error.get("title"), + error_detail=error.get("detail"), + ) + return [] + + tweets_raw: list[dict[str, Any]] = data.get("data", []) + includes: dict[str, Any] = data.get("includes", {}) + meta: dict[str, Any] = data.get("meta", {}) + logger.debug( - "Twitter API response", + "X API response", identifier=identifier, - status=data.get("status"), tweet_count=len(tweets_raw), - has_next_page=data.get("has_next_page"), + result_count=meta.get("result_count"), ) - if data.get("status") != "success": - logger.error( - "Twitter API returned error", - identifier=identifier, - message=data.get("message"), - ) - return [] + users_map = self._build_users_map(includes) + media_map = self._build_media_map(includes) + referenced_tweets_map = self._build_referenced_tweets_map(includes) items: list[ContentData] = [] for tweet in tweets_raw: - if tweet.get("retweeted_tweet"): - continue - try: - item = self._parse_tweet(tweet, skip_content=skip_content) + item = self._parse_tweet( + tweet, + users_map=users_map, + media_map=media_map, + referenced_tweets_map=referenced_tweets_map, + skip_content=skip_content, + ) items.append(item) except Exception as e: logger.warning( @@ -88,28 +105,100 @@ async def fetch_latest( logger.info("Fetched Twitter content", identifier=identifier, count=len(items)) return items - def _parse_tweet(self, tweet: dict[str, object], skip_content: bool = False) -> ContentData: + async def _resolve_user_id(self, username: str) -> str | None: + if username in self._user_id_cache: + return self._user_id_cache[username] + + response = await self._request( + f"{X_API_BASE}/users/by/username/{username}", + params={"user.fields": "id"}, + ) + + data = response.json() + + if "errors" in data and "data" not in data: + for error in data["errors"]: + logger.error( + "X API user lookup error", + username=username, + error_title=error.get("title"), + error_detail=error.get("detail"), + ) + return None + + user_data: dict[str, Any] | None = data.get("data") + if not user_data: + logger.error("X API user not found", username=username) + return None + + user_id = str(user_data["id"]) + self._user_id_cache[username] = user_id + return user_id + + async def _request(self, url: str, params: dict[str, str] | None = None) -> httpx.Response: + headers = {"Authorization": f"Bearer {self._bearer_token}"} + + if self._client: + response = await self._client.get(url, headers=headers, params=params) + else: + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=headers, params=params) + + response.raise_for_status() + return response + + def _build_users_map(self, includes: dict[str, Any]) -> dict[str, dict[str, Any]]: + users: list[dict[str, Any]] = includes.get("users", []) + return {str(u["id"]): u for u in users} + + def _build_media_map(self, includes: dict[str, Any]) -> dict[str, dict[str, Any]]: + media: list[dict[str, Any]] = includes.get("media", []) + return {str(m["media_key"]): m for m in media} + + def _build_referenced_tweets_map(self, includes: dict[str, Any]) -> dict[str, dict[str, Any]]: + tweets: list[dict[str, Any]] = includes.get("tweets", []) + return {str(t["id"]): t for t in tweets} + + def _parse_tweet( + self, + tweet: dict[str, Any], + users_map: dict[str, dict[str, Any]], + media_map: dict[str, dict[str, Any]], + referenced_tweets_map: dict[str, dict[str, Any]], + skip_content: bool = False, + ) -> ContentData: tweet_id = str(tweet["id"]) - text = str(tweet.get("text", "")) - url = str(tweet.get("url", f"https://x.com/i/status/{tweet_id}")) + note_tweet = tweet.get("note_tweet") + text = ( + str(note_tweet["text"]) + if isinstance(note_tweet, dict) and "text" in note_tweet + else str(tweet.get("text", "")) + ) + author_id = str(tweet.get("author_id", "")) - author_data = tweet.get("author") or {} - if not isinstance(author_data, dict): - author_data = {} - author_name = str(author_data.get("name") or author_data.get("userName", "Unknown")) - profile_pic = author_data.get("profilePicture") + author_info = users_map.get(author_id, {}) + author_name = str(author_info.get("name") or author_info.get("username", "Unknown")) + username = str(author_info.get("username", "")) + profile_pic = author_info.get("profile_image_url") - title = self._make_title(text) - published_at = self._parse_twitter_date( - str(tweet["createdAt"]) if tweet.get("createdAt") else None + url = ( + f"https://x.com/{username}/status/{tweet_id}" + if username + else f"https://x.com/i/status/{tweet_id}" ) + title = self._make_title(text) + published_at = self._parse_iso_date(tweet.get("created_at")) + raw_content = None if not skip_content: raw_content = text - quoted = tweet.get("quoted_tweet") - if isinstance(quoted, dict) and quoted.get("text"): - raw_content += f"\n\n[Quoted: {quoted['text']}]" + + quoted_text = self._get_quoted_tweet_text(tweet, referenced_tweets_map) + if quoted_text: + raw_content += f"\n\n[Quoted: {quoted_text}]" + + thumbnail_url = self._get_thumbnail_url(tweet, media_map, profile_pic) return ContentData( external_id=tweet_id, @@ -118,19 +207,51 @@ def _parse_tweet(self, tweet: dict[str, object], skip_content: bool = False) -> author=author_name, published_at=published_at, raw_content=raw_content, - thumbnail_url=str(profile_pic) if profile_pic else None, + thumbnail_url=thumbnail_url, ) + def _get_quoted_tweet_text( + self, + tweet: dict[str, Any], + referenced_tweets_map: dict[str, dict[str, Any]], + ) -> str | None: + refs: list[dict[str, Any]] = tweet.get("referenced_tweets", []) + for ref in refs: + if ref.get("type") == "quoted": + ref_id = str(ref["id"]) + quoted = referenced_tweets_map.get(ref_id) + if quoted: + return str(quoted.get("text", "")) + return None + + def _get_thumbnail_url( + self, + tweet: dict[str, Any], + media_map: dict[str, dict[str, Any]], + profile_pic: str | None, + ) -> str | None: + attachments: dict[str, Any] = tweet.get("attachments", {}) + media_keys: list[str] = attachments.get("media_keys", []) + + for key in media_keys: + media = media_map.get(key, {}) + media_url = media.get("url") or media.get("preview_image_url") + if media_url: + return str(media_url) + + return str(profile_pic) if profile_pic else None + def _make_title(self, text: str) -> str: first_line = text.split("\n")[0] if len(first_line) <= TITLE_MAX_LENGTH: return first_line return first_line[: TITLE_MAX_LENGTH - 3] + "..." - def _parse_twitter_date(self, date_str: str | None) -> datetime: + def _parse_iso_date(self, date_str: Any) -> datetime: if not date_str: return datetime.now(UTC) try: - return datetime.strptime(date_str, TWITTER_DATE_FORMAT) + s = str(date_str).replace("Z", "+00:00") + return datetime.fromisoformat(s) except (ValueError, TypeError): return datetime.now(UTC) diff --git a/src/intelstream/config.py b/src/intelstream/config.py index b1b9531..9ba3452 100644 --- a/src/intelstream/config.py +++ b/src/intelstream/config.py @@ -33,8 +33,8 @@ class Settings(BaseSettings): youtube_api_key: str | None = Field(default=None, description="YouTube Data API key (optional)") - twitter_api_key: str | None = Field( - default=None, description="twitterapi.io API key (optional)" + twitter_bearer_token: str | None = Field( + default=None, description="X API v2 Bearer Token for Twitter monitoring (optional)" ) github_token: str | None = Field( @@ -226,7 +226,7 @@ def __repr__(self) -> str: f"discord_owner_id={self.discord_owner_id}, " f"anthropic_api_key='*****', " f"youtube_api_key={'*****' if self.youtube_api_key else None}, " - f"twitter_api_key={'*****' if self.twitter_api_key else None}, " + f"twitter_bearer_token={'*****' if self.twitter_bearer_token else None}, " f"github_token={'*****' if self.github_token else None}, " f"database_url={self.database_url!r}, " f"log_level={self.log_level!r}" diff --git a/src/intelstream/discord/cogs/source_management.py b/src/intelstream/discord/cogs/source_management.py index 5b8ff30..d6f8bb5 100644 --- a/src/intelstream/discord/cogs/source_management.py +++ b/src/intelstream/discord/cogs/source_management.py @@ -196,9 +196,9 @@ async def source_add( ) return - if stype == SourceType.TWITTER and not self.bot.settings.twitter_api_key: + if stype == SourceType.TWITTER and not self.bot.settings.twitter_bearer_token: await interaction.followup.send( - "Twitter sources are not available. No Twitter API key configured.", + "Twitter sources are not available. No Twitter Bearer Token configured.", ephemeral=True, ) return diff --git a/src/intelstream/services/pipeline.py b/src/intelstream/services/pipeline.py index 37cbcca..32abfee 100644 --- a/src/intelstream/services/pipeline.py +++ b/src/intelstream/services/pipeline.py @@ -59,9 +59,9 @@ def _create_adapters(self) -> dict[SourceType, BaseAdapter]: http_client=self._http_client, ) - if self._settings.twitter_api_key: + if self._settings.twitter_bearer_token: adapters[SourceType.TWITTER] = TwitterAdapter( - api_key=self._settings.twitter_api_key, + bearer_token=self._settings.twitter_bearer_token, http_client=self._http_client, ) diff --git a/tests/test_adapters/test_twitter.py b/tests/test_adapters/test_twitter.py index 06ba5b9..b9bee0a 100644 --- a/tests/test_adapters/test_twitter.py +++ b/tests/test_adapters/test_twitter.py @@ -6,70 +6,82 @@ from intelstream.adapters.twitter import TwitterAdapter +SAMPLE_USER_RESPONSE = { + "data": { + "id": "2244994945", + "name": "Test User", + "username": "testuser", + } +} + SAMPLE_TWEETS_RESPONSE = { - "tweets": [ + "data": [ { - "type": "tweet", "id": "12345", - "url": "https://x.com/testuser/status/12345", "text": "This is a test tweet with some content", - "createdAt": "Tue Dec 10 07:00:30 +0000 2024", - "author": { - "userName": "testuser", - "id": "user123", - "name": "Test User", - "profilePicture": "https://pbs.twimg.com/profile.jpg", + "created_at": "2024-12-10T07:00:30.000Z", + "author_id": "2244994945", + "public_metrics": { + "retweet_count": 5, + "like_count": 10, + "reply_count": 2, }, - "retweetCount": 5, - "likeCount": 10, - "quoted_tweet": None, - "retweeted_tweet": None, }, { - "type": "tweet", "id": "12346", - "url": "https://x.com/testuser/status/12346", "text": "Another tweet with a quote", - "createdAt": "Wed Dec 11 08:00:00 +0000 2024", - "author": { - "userName": "testuser", - "id": "user123", + "created_at": "2024-12-11T08:00:00.000Z", + "author_id": "2244994945", + "referenced_tweets": [ + {"type": "quoted", "id": "99999"}, + ], + }, + ], + "includes": { + "users": [ + { + "id": "2244994945", "name": "Test User", - "profilePicture": "https://pbs.twimg.com/profile.jpg", + "username": "testuser", + "profile_image_url": "https://pbs.twimg.com/profile.jpg", }, - "retweetCount": 0, - "likeCount": 3, - "quoted_tweet": { + ], + "tweets": [ + { + "id": "99999", "text": "Original quoted content here", }, - "retweeted_tweet": None, - }, - ], - "has_next_page": False, - "next_cursor": "", - "status": "success", - "message": "", + ], + }, + "meta": { + "result_count": 2, + "newest_id": "12346", + "oldest_id": "12345", + }, } class TestTwitterAdapter: async def test_source_type(self) -> None: - adapter = TwitterAdapter(api_key="test-key") + adapter = TwitterAdapter(bearer_token="test-token") assert adapter.source_type == "twitter" async def test_get_feed_url(self) -> None: - adapter = TwitterAdapter(api_key="test-key") + adapter = TwitterAdapter(bearer_token="test-token") url = await adapter.get_feed_url("testuser") assert url == "https://x.com/testuser" @respx.mock async def test_fetch_latest_success(self) -> None: - respx.get("https://api.twitterapi.io/twitter/user/last_tweets").mock( + respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( return_value=httpx.Response(200, json=SAMPLE_TWEETS_RESPONSE) ) async with httpx.AsyncClient() as client: - adapter = TwitterAdapter(api_key="test-key", http_client=client) + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) items = await adapter.fetch_latest("testuser") assert len(items) == 2 @@ -85,54 +97,32 @@ async def test_fetch_latest_success(self) -> None: @respx.mock async def test_fetch_latest_includes_quoted_text(self) -> None: - respx.get("https://api.twitterapi.io/twitter/user/last_tweets").mock( + respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( return_value=httpx.Response(200, json=SAMPLE_TWEETS_RESPONSE) ) async with httpx.AsyncClient() as client: - adapter = TwitterAdapter(api_key="test-key", http_client=client) + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) items = await adapter.fetch_latest("testuser") second = items[1] assert second.raw_content is not None assert "[Quoted: Original quoted content here]" in second.raw_content - @respx.mock - async def test_fetch_latest_skips_retweets(self) -> None: - response_with_rt = { - "tweets": [ - { - "type": "tweet", - "id": "rt1", - "url": "https://x.com/testuser/status/rt1", - "text": "RT content", - "createdAt": "Tue Dec 10 07:00:30 +0000 2024", - "author": {"userName": "testuser", "name": "Test"}, - "retweeted_tweet": {"id": "original", "text": "Original"}, - "quoted_tweet": None, - }, - ], - "status": "success", - "message": "", - } - respx.get("https://api.twitterapi.io/twitter/user/last_tweets").mock( - return_value=httpx.Response(200, json=response_with_rt) - ) - - async with httpx.AsyncClient() as client: - adapter = TwitterAdapter(api_key="test-key", http_client=client) - items = await adapter.fetch_latest("testuser") - - assert len(items) == 0 - @respx.mock async def test_fetch_latest_skip_content(self) -> None: - respx.get("https://api.twitterapi.io/twitter/user/last_tweets").mock( + respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( return_value=httpx.Response(200, json=SAMPLE_TWEETS_RESPONSE) ) async with httpx.AsyncClient() as client: - adapter = TwitterAdapter(api_key="test-key", http_client=client) + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) items = await adapter.fetch_latest("testuser", skip_content=True) assert len(items) == 2 @@ -141,98 +131,247 @@ async def test_fetch_latest_skip_content(self) -> None: @respx.mock async def test_fetch_latest_http_error(self) -> None: - respx.get("https://api.twitterapi.io/twitter/user/last_tweets").mock( + respx.get("https://api.x.com/2/users/by/username/testuser").mock( return_value=httpx.Response(401) ) async with httpx.AsyncClient() as client: - adapter = TwitterAdapter(api_key="test-key", http_client=client) + adapter = TwitterAdapter(bearer_token="bad-token", http_client=client) with pytest.raises(httpx.HTTPStatusError): await adapter.fetch_latest("testuser") @respx.mock - async def test_fetch_latest_api_error_status(self) -> None: - error_response = {"status": "error", "message": "User not found"} - respx.get("https://api.twitterapi.io/twitter/user/last_tweets").mock( - return_value=httpx.Response(200, json=error_response) + async def test_fetch_latest_user_not_found(self) -> None: + respx.get("https://api.x.com/2/users/by/username/nonexistent").mock( + return_value=httpx.Response( + 200, + json={ + "errors": [ + { + "title": "Not Found Error", + "detail": "Could not find user with username: [nonexistent].", + } + ] + }, + ) ) async with httpx.AsyncClient() as client: - adapter = TwitterAdapter(api_key="test-key", http_client=client) + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) items = await adapter.fetch_latest("nonexistent") assert len(items) == 0 + @respx.mock + async def test_fetch_latest_api_error(self) -> None: + respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( + return_value=httpx.Response( + 200, + json={ + "errors": [ + { + "title": "Authorization Error", + "detail": "Not authorized to view this resource.", + } + ] + }, + ) + ) + + async with httpx.AsyncClient() as client: + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) + items = await adapter.fetch_latest("testuser") + + assert len(items) == 0 + def test_make_title_short_text(self) -> None: - adapter = TwitterAdapter(api_key="test-key") + adapter = TwitterAdapter(bearer_token="test-token") assert adapter._make_title("Short tweet") == "Short tweet" def test_make_title_long_text(self) -> None: - adapter = TwitterAdapter(api_key="test-key") + adapter = TwitterAdapter(bearer_token="test-token") long_text = "A" * 200 title = adapter._make_title(long_text) assert len(title) == 100 assert title.endswith("...") def test_make_title_multiline(self) -> None: - adapter = TwitterAdapter(api_key="test-key") + adapter = TwitterAdapter(bearer_token="test-token") title = adapter._make_title("First line\nSecond line") assert title == "First line" - def test_parse_twitter_date_valid(self) -> None: - adapter = TwitterAdapter(api_key="test-key") - result = adapter._parse_twitter_date("Tue Dec 10 07:00:30 +0000 2024") + def test_parse_iso_date_valid(self) -> None: + adapter = TwitterAdapter(bearer_token="test-token") + result = adapter._parse_iso_date("2024-12-10T07:00:30.000Z") assert result == datetime(2024, 12, 10, 7, 0, 30, tzinfo=UTC) - def test_parse_twitter_date_none(self) -> None: - adapter = TwitterAdapter(api_key="test-key") - result = adapter._parse_twitter_date(None) + def test_parse_iso_date_none(self) -> None: + adapter = TwitterAdapter(bearer_token="test-token") + result = adapter._parse_iso_date(None) assert result.tzinfo is not None - def test_parse_twitter_date_invalid(self) -> None: - adapter = TwitterAdapter(api_key="test-key") - result = adapter._parse_twitter_date("not-a-date") + def test_parse_iso_date_invalid(self) -> None: + adapter = TwitterAdapter(bearer_token="test-token") + result = adapter._parse_iso_date("not-a-date") assert result.tzinfo is not None @respx.mock - async def test_fetch_sends_correct_headers(self) -> None: - route = respx.get("https://api.twitterapi.io/twitter/user/last_tweets").mock( - return_value=httpx.Response( - 200, json={"tweets": [], "status": "success", "message": ""} - ) + async def test_fetch_sends_correct_auth_header(self) -> None: + user_route = respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( + return_value=httpx.Response(200, json={"data": [], "meta": {"result_count": 0}}) ) async with httpx.AsyncClient() as client: - adapter = TwitterAdapter(api_key="my-secret-key", http_client=client) + adapter = TwitterAdapter(bearer_token="my-secret-token", http_client=client) await adapter.fetch_latest("testuser") - assert route.called - request = route.calls[0].request - assert request.headers["X-API-Key"] == "my-secret-key" + assert user_route.called + request = user_route.calls[0].request + assert request.headers["Authorization"] == "Bearer my-secret-token" @respx.mock async def test_fetch_sends_correct_params(self) -> None: - route = respx.get("https://api.twitterapi.io/twitter/user/last_tweets").mock( - return_value=httpx.Response( - 200, json={"tweets": [], "status": "success", "message": ""} - ) + respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + tweets_route = respx.get("https://api.x.com/2/users/2244994945/tweets").mock( + return_value=httpx.Response(200, json={"data": [], "meta": {"result_count": 0}}) ) async with httpx.AsyncClient() as client: - adapter = TwitterAdapter(api_key="test-key", http_client=client) - await adapter.fetch_latest("elonmusk") + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) + await adapter.fetch_latest("testuser") - request = route.calls[0].request - assert "userName=elonmusk" in str(request.url) - assert "includeReplies=false" in str(request.url) + request = tweets_route.calls[0].request + url_str = str(request.url) + assert "max_results=5" in url_str + assert "exclude=retweets" in url_str @respx.mock async def test_fetch_without_http_client(self) -> None: - respx.get("https://api.twitterapi.io/twitter/user/last_tweets").mock( + respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( return_value=httpx.Response(200, json=SAMPLE_TWEETS_RESPONSE) ) - adapter = TwitterAdapter(api_key="test-key") + adapter = TwitterAdapter(bearer_token="test-token") items = await adapter.fetch_latest("testuser") assert len(items) == 2 + + @respx.mock + async def test_user_id_caching(self) -> None: + user_route = respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( + return_value=httpx.Response(200, json={"data": [], "meta": {"result_count": 0}}) + ) + + async with httpx.AsyncClient() as client: + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) + await adapter.fetch_latest("testuser") + await adapter.fetch_latest("testuser") + + assert user_route.call_count == 1 + + @respx.mock + async def test_media_thumbnail_in_tweet(self) -> None: + response_with_media = { + "data": [ + { + "id": "55555", + "text": "Check out this image", + "created_at": "2024-12-10T07:00:30.000Z", + "author_id": "2244994945", + "attachments": { + "media_keys": ["media_1"], + }, + }, + ], + "includes": { + "users": [ + { + "id": "2244994945", + "name": "Test User", + "username": "testuser", + "profile_image_url": "https://pbs.twimg.com/profile.jpg", + }, + ], + "media": [ + { + "media_key": "media_1", + "type": "photo", + "url": "https://pbs.twimg.com/media/photo123.jpg", + }, + ], + }, + "meta": {"result_count": 1}, + } + + respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( + return_value=httpx.Response(200, json=response_with_media) + ) + + async with httpx.AsyncClient() as client: + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) + items = await adapter.fetch_latest("testuser") + + assert len(items) == 1 + assert items[0].thumbnail_url == "https://pbs.twimg.com/media/photo123.jpg" + + @respx.mock + async def test_empty_timeline(self) -> None: + respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( + return_value=httpx.Response(200, json={"meta": {"result_count": 0}}) + ) + + async with httpx.AsyncClient() as client: + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) + items = await adapter.fetch_latest("testuser") + + assert len(items) == 0 + + @respx.mock + async def test_tweet_url_without_username(self) -> None: + response_no_user = { + "data": [ + { + "id": "77777", + "text": "Tweet without user expansion", + "created_at": "2024-12-10T07:00:30.000Z", + "author_id": "9999", + }, + ], + "includes": {}, + "meta": {"result_count": 1}, + } + + respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( + return_value=httpx.Response(200, json=response_no_user) + ) + + async with httpx.AsyncClient() as client: + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) + items = await adapter.fetch_latest("testuser") + + assert len(items) == 1 + assert items[0].original_url == "https://x.com/i/status/77777" + assert items[0].author == "Unknown" diff --git a/tests/test_discord/test_source_management.py b/tests/test_discord/test_source_management.py index 0dafe06..7a9cb67 100644 --- a/tests/test_discord/test_source_management.py +++ b/tests/test_discord/test_source_management.py @@ -18,7 +18,7 @@ def mock_bot(): bot.settings = MagicMock() bot.settings.default_poll_interval_minutes = 5 bot.settings.youtube_api_key = "test-api-key" - bot.settings.twitter_api_key = "test-twitter-key" + bot.settings.twitter_bearer_token = "test-twitter-token" return bot @@ -271,8 +271,8 @@ async def test_add_youtube_without_api_key(self, source_management, mock_bot): call_args = interaction.followup.send.call_args assert "not available" in call_args[0][0] - async def test_add_twitter_without_api_key(self, source_management, mock_bot): - mock_bot.settings.twitter_api_key = None + async def test_add_twitter_without_bearer_token(self, source_management, mock_bot): + mock_bot.settings.twitter_bearer_token = None interaction = MagicMock(spec=discord.Interaction) interaction.response = MagicMock() diff --git a/tests/test_services/test_pipeline.py b/tests/test_services/test_pipeline.py index 8d25ec6..4e90f32 100644 --- a/tests/test_services/test_pipeline.py +++ b/tests/test_services/test_pipeline.py @@ -16,7 +16,7 @@ def mock_settings(): settings = MagicMock(spec=Settings) settings.youtube_api_key = "test-youtube-key" settings.anthropic_api_key = "test-anthropic-key" - settings.twitter_api_key = None + settings.twitter_bearer_token = None settings.http_timeout_seconds = 30.0 settings.summarization_delay_seconds = 0.5 settings.fetch_delay_seconds = 0.0 @@ -102,7 +102,7 @@ async def test_initialize_without_youtube_key(self, mock_repository, mock_summar settings = MagicMock(spec=Settings) settings.youtube_api_key = None settings.anthropic_api_key = "test-anthropic-key" - settings.twitter_api_key = None + settings.twitter_bearer_token = None settings.http_timeout_seconds = 30.0 settings.summarization_delay_seconds = 0.5 settings.fetch_delay_seconds = 0.0 @@ -123,7 +123,7 @@ async def test_initialize_creates_twitter_adapter(self, mock_repository, mock_su settings = MagicMock(spec=Settings) settings.youtube_api_key = "test-key" settings.anthropic_api_key = "test-key" - settings.twitter_api_key = "test-twitter-key" + settings.twitter_bearer_token = "test-twitter-key" settings.http_timeout_seconds = 30.0 settings.summarization_delay_seconds = 0.5 settings.fetch_delay_seconds = 0.0 @@ -141,7 +141,7 @@ async def test_initialize_without_twitter_key(self, mock_repository, mock_summar settings = MagicMock(spec=Settings) settings.youtube_api_key = "test-key" settings.anthropic_api_key = "test-key" - settings.twitter_api_key = None + settings.twitter_bearer_token = None settings.http_timeout_seconds = 30.0 settings.summarization_delay_seconds = 0.5 settings.fetch_delay_seconds = 0.0 @@ -342,7 +342,7 @@ async def test_fetch_all_sources_applies_rate_limiting( settings = MagicMock(spec=Settings) settings.youtube_api_key = "test-key" settings.anthropic_api_key = "test-key" - settings.twitter_api_key = None + settings.twitter_bearer_token = None settings.http_timeout_seconds = 30.0 settings.summarization_delay_seconds = 0.5 settings.fetch_delay_seconds = 0.1 From 8dbb747363a7c79893d457b7bbb0536d5698b26b Mon Sep 17 00:00:00 2001 From: user1303836 Date: Fri, 6 Feb 2026 17:36:54 -0500 Subject: [PATCH 2/2] Fix review findings: correct README math, remove unused field, add note_tweet test - Fix README cost calculation (4,800 -> 28,800 reads/month for 10 sources at 15-min interval) - Remove unused entities field from TWEET_FIELDS to reduce response payload - Add explicit test for note_tweet long-form text preference over truncated text --- README.md | 2 +- src/intelstream/adapters/twitter.py | 4 +-- tests/test_adapters/test_twitter.py | 43 +++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 95b9934..c16f36d 100644 --- a/README.md +++ b/README.md @@ -267,7 +267,7 @@ Results are cached to avoid repeated extraction on subsequent polls. **Twitter**: Monitors Twitter/X accounts for new original tweets using the official X API v2. Retweets and replies are filtered server-side for cost efficiency. Quote tweets are included with the quoted text appended for context. Long tweets (over 280 characters) are fully captured. Media attachments (images, videos) are detected and the first image URL is stored as the thumbnail. When added with `summarize:False`, the bot posts bare tweet URLs (Discord auto-embeds the tweet preview). Requires an X API v2 Bearer Token (`TWITTER_BEARER_TOKEN`). -**Twitter cost considerations**: The X API v2 uses either a tiered subscription (Basic: $200/month, 15,000 reads) or a pay-per-use credit system. IntelStream fetches 5 tweets per poll and caches user ID lookups in memory to minimize API usage. With the default 15-minute poll interval, 10 Twitter sources consume roughly 4,800 reads/month (well within Basic tier limits). Set `TWITTER_POLL_INTERVAL_MINUTES` to a higher value (e.g., 30 or 60) for even lower consumption. +**Twitter cost considerations**: The X API v2 uses either a tiered subscription (Basic: $200/month, 15,000 reads) or a pay-per-use credit system. IntelStream fetches 5 tweets per poll and caches user ID lookups in memory to minimize API usage. With the default 15-minute poll interval, 10 Twitter sources consume roughly 28,800 reads/month (10 sources x 4 polls/hour x 24h x 30d). Set `TWITTER_POLL_INTERVAL_MINUTES` to a higher value (e.g., 30 or 60) for even lower consumption. **Page**: When you add a Page source, the bot uses Claude to analyze the page structure and automatically determine CSS selectors for extracting posts. diff --git a/src/intelstream/adapters/twitter.py b/src/intelstream/adapters/twitter.py index 4dfb10a..4826703 100644 --- a/src/intelstream/adapters/twitter.py +++ b/src/intelstream/adapters/twitter.py @@ -11,9 +11,7 @@ X_API_BASE = "https://api.x.com/2" TITLE_MAX_LENGTH = 100 -TWEET_FIELDS = ( - "created_at,author_id,referenced_tweets,entities,attachments,public_metrics,note_tweet" -) +TWEET_FIELDS = "created_at,author_id,referenced_tweets,attachments,public_metrics,note_tweet" USER_FIELDS = "name,username,profile_image_url" MEDIA_FIELDS = "url,preview_image_url,type" EXPANSIONS = "author_id,attachments.media_keys,referenced_tweets.id" diff --git a/tests/test_adapters/test_twitter.py b/tests/test_adapters/test_twitter.py index b9bee0a..b025fd1 100644 --- a/tests/test_adapters/test_twitter.py +++ b/tests/test_adapters/test_twitter.py @@ -375,3 +375,46 @@ async def test_tweet_url_without_username(self) -> None: assert len(items) == 1 assert items[0].original_url == "https://x.com/i/status/77777" assert items[0].author == "Unknown" + + @respx.mock + async def test_note_tweet_uses_long_form_text(self) -> None: + response_with_note = { + "data": [ + { + "id": "88888", + "text": "This is the truncated version...", + "note_tweet": { + "text": "This is the full long-form tweet text that exceeds 280 characters and would normally be truncated in the regular text field.", + }, + "created_at": "2024-12-10T07:00:30.000Z", + "author_id": "2244994945", + }, + ], + "includes": { + "users": [ + { + "id": "2244994945", + "name": "Test User", + "username": "testuser", + "profile_image_url": "https://pbs.twimg.com/profile.jpg", + }, + ], + }, + "meta": {"result_count": 1}, + } + + respx.get("https://api.x.com/2/users/by/username/testuser").mock( + return_value=httpx.Response(200, json=SAMPLE_USER_RESPONSE) + ) + respx.get("https://api.x.com/2/users/2244994945/tweets").mock( + return_value=httpx.Response(200, json=response_with_note) + ) + + async with httpx.AsyncClient() as client: + adapter = TwitterAdapter(bearer_token="test-token", http_client=client) + items = await adapter.fetch_latest("testuser") + + assert len(items) == 1 + assert items[0].raw_content is not None + assert "full long-form tweet text" in items[0].raw_content + assert "truncated version" not in items[0].raw_content