-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWeb_crawler.py
More file actions
32 lines (23 loc) · 856 Bytes
/
Web_crawler.py
File metadata and controls
32 lines (23 loc) · 856 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import requests
from bs4 import BeautifulSoup
def crawler(max_pages):
pages = 1
while pages<=max_pages:
url = 'https://thenewboston.com/forum/recent_activity.php?page=' + str(pages)
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "html.parser")
for link in soup.findAll('a', {'class':'title text-semibold'}):
href = link.get('href')
title = link.string
#print(href)
print(title)
get_info_inside(href)
pages += 1
def get_info_inside(url):
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "html.parser")
for link in soup.findAll('span', {'class':'date'}):
print(link.string)
crawler(1)