-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpid.py
More file actions
63 lines (56 loc) · 1.72 KB
/
pid.py
File metadata and controls
63 lines (56 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Jarman Liu
import requests
import re
from multiprocessing.dummy import Pool as ThreadPool
def htscraping(pid):
headers = {'User-Agent': 'user_agent', }
s = requests.Session()
url = 'https://www.hottopic.com/product/' + str(pid) + '.html'
r = s.get(url, headers=headers, allow_redirects=True)
filter = re.findall(r"funko.*hot-topic-exclusive", str(r.url))
filter1 = re.findall(r"street-date", str(r.url))
if filter or filter1:
print(r.url)
file.write(r.url + "\n")
def blscraping(pid):
headers = {'User-Agent': 'user_agent', }
s = requests.Session()
url = 'https://www.boxlunch.com/product/' + str(pid) + '.html'
r = s.get(url, headers=headers, allow_redirects=True)
filter = re.findall(r"funko.*boxlunch-exclusive", str(r.url))
filter1 = re.findall(r"street-date", str(r.url))
if filter or filter1:
print(r.url)
file.write(r.url + "\n")
print("Stores:")
print("1. Hot Topic")
print("2. Box Lunch")
choice = int(input("Which store would you like to scrape? "))
if choice == 1 or choice == 2:
#pid = input(
# "What PID do you want to start with? (Best to start with 10000000) ")
#ranges = input("What is the range of your desired PID scraping? ")
#pid = int(pid)
#ranges = int(ranges)
pid = 11539160
ranges = 1400000
file = open("pids.txt", "w")
a = []
for i in range(ranges):
a.append(pid)
pid += 1
else:
print("Invalid choice")
pool = ThreadPool(8)
if choice == 1:
results = pool.map(htscraping, a)
pool.close()
pool.join()
file.close()
elif choice == 2:
results = pool.map(blscraping, a)
pool.close()
pool.join()
file.close()
else:
print("Please input a valid choice")