KeebsChat/apex_webscrape.py at main · DuckyZero/KeebsChat · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import requests
from bs4 import BeautifulSoup

# Have to pip install lxml to work properly
# testLink = 'https://www.apexkeyboards.ca/collections/switches/products/alpacas'

baseurl = 'https://www.apexkeyboards.ca/'
productLink = []
switchList = []
tag = ''
output = []

def find_product(tag):
    r = requests.get('https://www.apexkeyboards.ca/collections/{}'.format(tag))
    soup = BeautifulSoup(r.content, 'lxml')
    product_list = soup.find_all('div', class_='grid-product__wrapper')

    for item in product_list:
        for link in item.find_all('a', href=True):
            # if-statement removes duplicate links
            if (baseurl + link['href']) not in productLink:
                productLink.append(baseurl + link['href'])
                # print(link['href'])

    for link in productLink:
        r = requests.get(link)
        soup = BeautifulSoup(r.content, 'lxml')

        name = soup.find('h1', class_='product-single__title').text.strip()
        price = soup.find('span', class_='product-single__price').text.strip()

        productLink.clear()

        return "{} {} is one of many {} we have! Checkout ApexKeyboards for the full list.".format(name, price, tag)