-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGetDataProvinces.py
More file actions
80 lines (61 loc) · 2.89 KB
/
GetDataProvinces.py
File metadata and controls
80 lines (61 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#Importing packages
import pandas as pd
import gspread as gs
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
start_time = time.time()
options = Options()
# options.headless = True
options.add_argument("disable-gpu")
options.add_argument("--disable-extensions")
options.add_argument('--disable-logging')
options.add_argument("log-level=3")
driver = webdriver.Chrome('chromedriver.exe', options=options) #Google Chrome Version 92.0.4515.107
print ("2) Headless Chrome Initialized")
driver.set_window_position(975, 5)
driver.get('https://datastudio.google.com/embed/reporting/1PLVi5amcc_R5Gh928gTE8-8r8-fLXJQF/page/R24IB')
path='//*[@id="body"]/div/div/div[1]/div[2]/div/div[1]/div[1]/div[1]/div/lego-report/lego-canvas-container/div/file-drop-zone/span/content-section/div[71]/canvas-component/div/div/div[1]/div/div/lego-table'
timeout = 20
try:
element_present = EC.presence_of_element_located((By.XPATH, path))
WebDriverWait(driver, timeout).until(element_present)
element = driver.find_elements_by_class_name('tableBody')[1]
elemText = element.text
driver.quit()
# Convert string to list
def Convert(string):
ls = list(string.split("\n"))
return ls
lsData=Convert(elemText)
df = pd.DataFrame(lsData)
df1= df.iloc[0:5].reset_index(drop=True)
df2= df.iloc[5:10].reset_index(drop=True)
df3= df.iloc[10:15].reset_index(drop=True)
df4= df.iloc[15:20].reset_index(drop=True)
df5= df.iloc[20:25].reset_index(drop=True)
df6= df.iloc[25:30].reset_index(drop=True)
df7= df.iloc[30:35].reset_index(drop=True)
dfs = [df1, df2, df3, df4, df5, df6, df7]
nan_value = 0
result_1 = pd.concat(dfs, join='outer', axis=1).fillna(nan_value)
result_1transposed = result_1.T
finalData = result_1transposed.rename(columns={0: "Province", 1: "Confirmed Cases", 2: "Active Cases", 3: "Deaths", 4: "Recoveries"}).reset_index(drop=True)
products_list = [finalData.columns.values.tolist()] + finalData.values.tolist()
print ("3) Data to Google Sheets Initialized")
gc = gs.service_account(filename='keys.json') #Google credentials from google service
sh = gc.open("countryProvince") #Google sheet file name
worksheet = sh.worksheet("Sheet1") #Sheet tab name
req=worksheet.update('A1', products_list) #Send data to google spread sheet
print("\n")
print(finalData)
print("\n")
seconds = time.time() - start_time
print('Time Taken to Complete this Job:', time.strftime("%H:%M:%S",time.gmtime(seconds)))
except TimeoutException:
print ("Timed out waiting for page to load")
driver.quit()