diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile new file mode 100644 index 0000000..f8e49f2 --- /dev/null +++ b/.gitpod.Dockerfile @@ -0,0 +1,7 @@ +FROM gitpod/workspace-full + +# Install custom tools, runtimes, etc. +# For example "bastet", a command-line tetris clone: +# RUN brew install bastet +# +# More information: https://www.gitpod.io/docs/config-docker/ diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 0000000..44bd2e7 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,5 @@ +image: + file: .gitpod.Dockerfile + +tasks: + - init: pip install -r ./requirements.txt diff --git a/README.md b/README.md index 24efb47..4f0ec54 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/gabrielfroes/webscraping_python_selenium) + # Web Scraping JavaScript Generated Pages with Python This project was created just for educational proposes. diff --git a/webscraping.py b/webscraping.py index 3cdbba9..97e8969 100644 --- a/webscraping.py +++ b/webscraping.py @@ -1,5 +1,5 @@ # -*- encoding: utf-8 -*- - +import time import requests import pandas as pd from bs4 import BeautifulSoup @@ -20,6 +20,11 @@ 'blocks': {'field': 'BLK', 'label': 'BLK'}, } +def acceptTerms(): + acceptBt = driver.find_element_by_id('onetrust-accept-btn-handler') + acceptBt.click() + return + def buildrank(type): @@ -53,6 +58,9 @@ def buildrank(type): driver.get(url) driver.implicitly_wait(10) # in seconds +acceptTerms() +time.sleep(10) #in seconds + for k in rankings: top10ranking[k] = buildrank(k)