From de62e48f9b74055f3408d35ed17bcf2ebb7f7b02 Mon Sep 17 00:00:00 2001 From: jonathan-mothe Date: Fri, 26 Feb 2021 15:37:13 -0300 Subject: [PATCH 1/6] Update .gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 16e2d88..6f501f4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ geckodriver.log -ranking.json \ No newline at end of file +ranking.json +venv/ +geckodriver From a37ab051f40d40e1c8f2eff3e7246ba9312a5d19 Mon Sep 17 00:00:00 2001 From: jonathan-mothe Date: Fri, 26 Feb 2021 15:40:48 -0300 Subject: [PATCH 2/6] Add requeriments --- requeriments.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 requeriments.txt diff --git a/requeriments.txt b/requeriments.txt new file mode 100644 index 0000000..4710c88 --- /dev/null +++ b/requeriments.txt @@ -0,0 +1,14 @@ +beautifulsoup4==4.9.3 +certifi==2020.12.5 +chardet==3.0.4 +idna==2.5 +lxml==4.6.2 +numpy==1.20.1 +pandas==1.2.2 +python-dateutil==2.8.1 +pytz==2021.1 +requests2==2.16.0 +selenium==3.141.0 +six==1.15.0 +soupsieve==2.2 +urllib3==1.21.1 From 1f06dc951ce03155072cd2f79890b8dcb712081e Mon Sep 17 00:00:00 2001 From: jonathan-mothe Date: Fri, 26 Feb 2021 16:03:18 -0300 Subject: [PATCH 3/6] Accept terms of cookies --- webscraping.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/webscraping.py b/webscraping.py index 6b942e0..17ddd76 100644 --- a/webscraping.py +++ b/webscraping.py @@ -20,6 +20,12 @@ } +def acceptTerms(): + acceptBt = driver.find_element_by_id('onetrust-accept-btn-handler') + acceptBt.click() + return + + def buildrank(type): field = rankings[type]['field'] @@ -50,7 +56,11 @@ def buildrank(type): driver = webdriver.Firefox(options=option) driver.get(url) -time.sleep(10) # in seconds +driver.implicitly_wait(10) # in seconds + +acceptTerms() +time.sleep(10) #in seconds + for k in rankings: top10ranking[k] = buildrank(k) @@ -58,7 +68,6 @@ def buildrank(type): driver.quit() # Dump and Save to JSON file (Converter e salvar em um arquivo JSON) -js = json.dumps(top10ranking) -fp = open('ranking.json', 'w') -fp.write(js) -fp.close() +with open('ranking.json', 'w', encoding='utf-8') as jp: + js = json.dumps(top10ranking, indent=4) + jp.write(js) From 17cf4ace9f2d4771d3e9ab30f21c57f85aa08efe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Moth=C3=A9?= <35958354+jonathan-mothe@users.noreply.github.com> Date: Fri, 26 Feb 2021 16:19:21 -0300 Subject: [PATCH 4/6] Delete requirements.txt --- requirements.txt | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1cf46af..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -requests2==2.16.0 -pandas==1.0.1 -lxml==4.5.0 -beautifulsoup4==4.8.2 -selenium==3.141.0 From d7842abf0bbd1e6202aa031f0f22bd1f5db878a4 Mon Sep 17 00:00:00 2001 From: jonathan-mothe Date: Fri, 26 Feb 2021 16:21:10 -0300 Subject: [PATCH 5/6] Update .gitignore --- .gitignore | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.gitignore b/.gitignore index aeac379..6f501f4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,14 +1,4 @@ geckodriver.log ranking.json -<<<<<<< HEAD venv/ geckodriver -======= - -# ignore directory .vscode/ and venv/ -.vscode/ -venv/ - -# Ignore driver -geckodriver.exe ->>>>>>> cb44b416d10cc08a9b4929444fa07fc84541c90b From 1cddb6b9eca53331b95c0755b36357f6726909d7 Mon Sep 17 00:00:00 2001 From: jonathan-mothe Date: Fri, 26 Feb 2021 16:29:33 -0300 Subject: [PATCH 6/6] Update webscraping --- webscraping.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/webscraping.py b/webscraping.py index c81e37c..1ff935c 100644 --- a/webscraping.py +++ b/webscraping.py @@ -1,5 +1,5 @@ # -*- encoding: utf-8 -*- - +import time import requests import pandas as pd from bs4 import BeautifulSoup @@ -57,15 +57,11 @@ def buildrank(type): driver = webdriver.Firefox(options=option) driver.get(url) -<<<<<<< HEAD driver.implicitly_wait(10) # in seconds acceptTerms() time.sleep(10) #in seconds -======= -driver.implicitly_wait(10) # in seconds ->>>>>>> cb44b416d10cc08a9b4929444fa07fc84541c90b for k in rankings: top10ranking[k] = buildrank(k)