-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapex_webscrape.py
More file actions
34 lines (26 loc) · 1.2 KB
/
apex_webscrape.py
File metadata and controls
34 lines (26 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import requests
from bs4 import BeautifulSoup
# Have to pip install lxml to work properly
# testLink = 'https://www.apexkeyboards.ca/collections/switches/products/alpacas'
baseurl = 'https://www.apexkeyboards.ca/'
productLink = []
switchList = []
tag = ''
output = []
def find_product(tag):
r = requests.get('https://www.apexkeyboards.ca/collections/{}'.format(tag))
soup = BeautifulSoup(r.content, 'lxml')
product_list = soup.find_all('div', class_='grid-product__wrapper')
for item in product_list:
for link in item.find_all('a', href=True):
# if-statement removes duplicate links
if (baseurl + link['href']) not in productLink:
productLink.append(baseurl + link['href'])
# print(link['href'])
for link in productLink:
r = requests.get(link)
soup = BeautifulSoup(r.content, 'lxml')
name = soup.find('h1', class_='product-single__title').text.strip()
price = soup.find('span', class_='product-single__price').text.strip()
productLink.clear()
return "{} {} is one of many {} we have! Checkout ApexKeyboards for the full list.".format(name, price, tag)