-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWebScraper.py
More file actions
32 lines (28 loc) · 1.19 KB
/
WebScraper.py
File metadata and controls
32 lines (28 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import re
import requests
from bs4 import BeautifulSoup
def connectToAndParseGoogleNews(symbol):
try:
url = "https://www.google.com/search?q="+symbol+"+stock&rlz=1C1RXQR_enCA1008CA1008&tbm=nws&sxsrf=ALiCzsbE0o2Uo1nomFMjUeMyv2zghrvn3w:1663463316421&source=lnt&tbs=sbd:1&sa=X&ved=2ahUKEwjgtsbgk536AhWbrYkEHZX1CDwQpwV6BAgBECE&biw=1280&bih=649&dpr=3"
return BeautifulSoup(requests.get(url).content, "html.parser")
except:
print("An error has occured connecting to Google News")
def getNewsLinksFromGoogleNews(result):
try:
urls = []
for link in result.findAll('a'):
if("https" in link.get('href') and not ("google" in link.get('href'))):
l = link.get('href').replace("/url?q=","").split("&sa=U")[0]
urls.append(l)
return urls
except:
print("An error has occured extracting links from Google News")
def getArticleText(url):
try:
content = ""
req = requests.get(url)
texts = BeautifulSoup(req.content, "html.parser").select("h1, h2, h3, p")
for text in texts:
content+=text.get_text().strip() + " "
return content
except Exception as e: print(e)