diff --git a/.gitignore b/.gitignore index 2ebbcf5..6f501f4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,4 @@ geckodriver.log ranking.json - -# ignore directory .vscode/ and venv/ -.vscode/ venv/ - -# Ignore driver -geckodriver.exe +geckodriver diff --git a/requeriments.txt b/requeriments.txt new file mode 100644 index 0000000..4710c88 --- /dev/null +++ b/requeriments.txt @@ -0,0 +1,14 @@ +beautifulsoup4==4.9.3 +certifi==2020.12.5 +chardet==3.0.4 +idna==2.5 +lxml==4.6.2 +numpy==1.20.1 +pandas==1.2.2 +python-dateutil==2.8.1 +pytz==2021.1 +requests2==2.16.0 +selenium==3.141.0 +six==1.15.0 +soupsieve==2.2 +urllib3==1.21.1 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1cf46af..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -requests2==2.16.0 -pandas==1.0.1 -lxml==4.5.0 -beautifulsoup4==4.8.2 -selenium==3.141.0 diff --git a/webscraping.py b/webscraping.py index 3cdbba9..1ff935c 100644 --- a/webscraping.py +++ b/webscraping.py @@ -1,5 +1,5 @@ # -*- encoding: utf-8 -*- - +import time import requests import pandas as pd from bs4 import BeautifulSoup @@ -21,6 +21,12 @@ } +def acceptTerms(): + acceptBt = driver.find_element_by_id('onetrust-accept-btn-handler') + acceptBt.click() + return + + def buildrank(type): field = rankings[type]['field'] @@ -51,7 +57,11 @@ def buildrank(type): driver = webdriver.Firefox(options=option) driver.get(url) -driver.implicitly_wait(10) # in seconds +driver.implicitly_wait(10) # in seconds + +acceptTerms() +time.sleep(10) #in seconds + for k in rankings: top10ranking[k] = buildrank(k)