-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfetch_old_twitter_posts.py
More file actions
62 lines (42 loc) · 1.77 KB
/
fetch_old_twitter_posts.py
File metadata and controls
62 lines (42 loc) · 1.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from datetime import datetime
from desearch_py import Desearch
desearch = Desearch("API_KEY")
TOTAL_TWEETS_TO_FETCH = 100
def parse_twitter_date(date_str: str) -> datetime:
"""Parse Twitter's date format: 'Tue Feb 03 09:38:44 +0000 2026'"""
return datetime.strptime(date_str, "%a %b %d %H:%M:%S %z %Y")
def format_end_date(dt: datetime) -> str:
"""Format datetime to API end_date format: '2021-12-31_23:59:59_UTC'"""
return dt.strftime("%Y-%m-%d_%H:%M:%S_UTC")
def fetch_tweets(query: str, total: int = TOTAL_TWEETS_TO_FETCH) -> list:
"""Fetch tweets with pagination using end_date cursor."""
all_tweets = []
end_date = None
while len(all_tweets) < total:
query_with_filter = query
if end_date:
query_with_filter += f" until:{end_date}"
result = desearch.basic_twitter_search(
query=query_with_filter,
sort="Latest",
count=20,
)
if not result:
print("No more results")
break
print("\n--- Batch dates (UTC) ---")
for tweet in result:
dt = parse_twitter_date(tweet["created_at"])
print(dt.strftime("%Y-%m-%d %H:%M:%S UTC"))
all_tweets.extend(result)
print(f"Fetched {len(result)} tweets, total: {len(all_tweets)}")
# Find the oldest tweet in this batch
oldest_tweet = min(result, key=lambda t: parse_twitter_date(t["created_at"]))
oldest_date = parse_twitter_date(oldest_tweet["created_at"])
# Use oldest date as next end_date
end_date = format_end_date(oldest_date)
print(f"Next end_date: {end_date}")
return all_tweets
if __name__ == "__main__":
tweets = fetch_tweets("bittensor")
print(f"\nFetched {len(tweets)} tweets total")