-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjsonreddit.py
More file actions
49 lines (39 loc) · 1.61 KB
/
jsonreddit.py
File metadata and controls
49 lines (39 loc) · 1.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import urllib.request
import json
# Example: top posts from r/webscraping
subreddit = "webscraping"
json_url = f"https://www.reddit.com/r/{subreddit}/top.json?limit=10&t=all"
# Reddit requires a User-Agent header, otherwise you may get blocked or empty results
headers = {'User-Agent': 'Mozilla/5.0 (compatible; Python WebScraper 1.0)'}
req = urllib.request.Request(json_url, headers=headers)
try:
with urllib.request.urlopen(req) as url:
data = json.loads(url.read())
# Posts in data['data']['children']
posts = data['data']['children']
print(f"\nTop 10 posts from r/{subreddit}:\n")
for i, post in enumerate(posts, start=1):
post_data = post['data']
title = post_data['title']
author = post_data['author']
upvotes = post_data['ups']
comments = post_data['num_comments']
link = "https://www.reddit.com" + post_data['permalink']
text = post_data.get('selftext', '') # Main post content
# If text long shorten it
short_text = (text[:1000] + "...") if len(text) > 200 else text
# If no text must be link or media
if text == "":
short_text = "[No text content]"
print(f"{i}. {title}")
print(f" Author: {author}")
print(f" Upvotes: {upvotes}")
print(f" Comments: {comments}")
print(f" URL: {link}\n")
print(f" Post Text: {short_text}\n")
except urllib.error.URLError as e:
print(f"Error accessing URL: {e.reason}")
except json.JSONDecodeError as e:
print(f"Error decoding JSON: {e}")
except Exception as e:
print(f"Unexpected error: {e}")