-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfinishline_review.py
More file actions
135 lines (104 loc) · 4.47 KB
/
finishline_review.py
File metadata and controls
135 lines (104 loc) · 4.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import csv
import re
# open new chrome browser
driver = webdriver.Chrome()
# open csv to record data
csv_file = open('finishline_review.csv', 'w')
writer = csv.writer(csv_file)
# go to the page to scrape
driver.get("https://www.finishline.com/store/product/mens-nike-air-zoom-mariah-flyknit-racer-running-shoes/prod902579?styleId=918264&colorId=401")
# get item info
item = driver.find_element_by_xpath('//h1[@id="title"]').text
price = driver.find_element_by_xpath('//div[@class="productPrice"]/span').text
# Click review button to go to the review section
review_button = driver.find_element_by_xpath('//span[@class="BVRRNumber"]')
review_button.click()
# Page index used to keep track of where we are. ##################
index = 1
while True:
try:
print("Scraping Page number " + str(index))
index = index + 1
# Find all the reviews on the page
wait_review = WebDriverWait(driver, 10)
reviews = wait_review.until(EC.presence_of_all_elements_located((By.XPATH,
'//span[@itemprop="review"]')))
ave = driver.find_element_by_xpath('//div[@class="BVDIBody BVDI_QTBodySummaryBox"]')
ave_stars = ave.find_element_by_xpath('//div[@class="BVRRRatingNormalImage"]/img').get_attribute('title')
total_review = ave.find_element_by_xpath('//span[@class="BVRRCustomReviewCountNumber"]').text
recom = ave.find_element_by_xpath('//span[@class="BVRRBuyAgainPercentage"]').text
ave_size = ave.find_element_by_xpath('(//div[@class="BVRRRatingSliderImage"]/img)').get_attribute('title')
ave_width = ave.find_element_by_xpath('(//div[@class="BVRRRatingSliderImage"]/img)[2]').get_attribute('title')
ave_comfort = ave.find_element_by_xpath('(//div[@class="BVRRRatingSliderImage"]/img)[3]').get_attribute('title')
for review in reviews:
# Initialize an empty dictionary for each review
review_dict = {}
# Use relative xpath to locate the title, text, username, date.
# Once you locate the element, you can use 'element.text' to return its string.
# To get the attribute instead of the text of each element, use 'element.get_attribute()'
try:
stars = review.find_element_by_xpath('.//div[@class="BVRRRatingNormalImage"]/img').get_attribute('title')
except NoSuchElementException:
stars = ""
try:
title = review.find_element_by_xpath('.//span[@itemprop="name"]').text
except NoSuchElementException:
title = ""
try:
user = review.find_element_by_xpath('.//span[@itemprop="author"]').text
except NoSuchElementException:
user = ""
try:
review_text = review.find_element_by_xpath('.//span[@class="BVRRReviewText"]').text
except NoSuchElementException:
review_text = ""
try:
reason = review.find_element_by_xpath('.//span[@class="BVRRValue BVRRContextDataValue BVRRContextDataValuePurchaseReason"]').text
except NoSuchElementException:
reason = ""
try:
helpful = review.find_element_by_xpath('.//span[@class="BVDINumber"]').text
except NoSuchElementException:
helpful = ""
try:
not_helpful = review.find_element_by_xpath('(.//span[@class="BVDINumber"])[2]').text
except NoSuchElementException:
not_helpful = ""
try:
date = review.find_element_by_xpath('.//meta[@itemprop="datePublished"]').get_attribute('content')
except NoSuchElementException:
date = ""
review_dict['item'] = item
review_dict['price'] = price
review_dict['ave_stars'] = ave_stars
review_dict['total_review'] = total_review
review_dict['recom'] = recom
review_dict['ave_size'] = ave_size
review_dict['ave_width'] = ave_width
review_dict['ave_comfort'] = ave_comfort
review_dict['stars'] = stars
review_dict['title'] = title
review_dict['user'] = user
review_dict['review_text'] = review_text
review_dict['reason'] = reason
review_dict['helpful'] = helpful
review_dict['not_helpful'] = not_helpful
review_dict['date'] = date
writer.writerow(review_dict.values())
# Locate the next button on the page
wait_button = WebDriverWait(driver, 10)
next_button = wait_button.until(EC.element_to_be_clickable((By.XPATH,
'//button[@class="button pag-button light-gray ml-1"]')))
next_button.click()
time.sleep(2)
except Exception as e:
print(e)
csv_file.close()
driver.close()
break