-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
108 lines (83 loc) · 3.16 KB
/
main.py
File metadata and controls
108 lines (83 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import requests
import time
import os
import base64
import urllib
import json
# Creating a webdriver instance
driver = webdriver.Chrome('./chromedriver.exe')
# Configs
query = ["dogs", "cat"] # Change me to the list you have.
no_of_images = 50 # Number of images you need to scrap from google images.
for query in query:
# Open Google Images in the browser
driver.get('https://images.google.com/')
# Finding the search box
box = driver.find_element(
"xpath", '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
# Type the search query in the search box
box.send_keys(query)
# Pressing enter
box.send_keys(Keys.ENTER)
def scroll_to_bottom():
last_height = driver.execute_script('\
return document.body.scrollHeight')
while True:
driver.execute_script('\
window.scrollTo(0,document.body.scrollHeight)')
time.sleep(2)
new_height = driver.execute_script('\
return document.body.scrollHeight')
try:
driver.find_element("css_selector", ".YstHxe input").click()
time.sleep(2)
except:
pass
# checking if we have reached the bottom of the page
if new_height == last_height:
break
last_height = new_height
# Calling the function
scroll_to_bottom()
# Loop to capture and save each image
i = 1
count = 1
while True:
if (count > no_of_images):
print("Completed")
break
try:
# XPath of each image
img = driver.find_element("xpath",
'//*[@id="islrg"]/div[1]/div[' + str(i) + ']/a[1]/div[1]/img').click()
# Just to avoid unwanted errors
time.sleep(1)
i = i + 1
tot = driver.find_element(
"xpath", '//*[@id="Sva75c"]/div/div/div[2]/div[2]/div[2]/c-wiz/div[2]/div[1]/div[1]/div[2]/div/a/img')
src = tot.get_attribute('src')
if src.startswith("https:"):
urllib.request.urlretrieve(
# save image
src, f'./assets/{query}{str(count)}.png')
count = count + 1
print(src)
# Update meta
# Open the JSON file
with open("meta.json", "r") as file:
# Load the data from the file
data = json.load(file)
# Append a new object to the data
data.append({"alt": tot.get_attribute('alt'), "url": src, "filename": f'{query}{str(count - 1)}.png'})
# Open the JSON file
with open("meta.json", "w") as file:
# Write the updated data to the file
json.dump(data, file)
except Exception as e:
print(e)
i = i + 1
continue
# Driver closure.
driver.close()