Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions intelmotherboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
import requests
from neweggpy.nefuncs import IterPages,BoolToInt,getPIDS,getData,insertData

baseurl = 'http://m.newegg.com/ProductList?description=Intel+Motherboards' + \
'&categoryId=280&storeId=1&nodeId=7627&parentCategoryId=20' + \
'&isSubCategory=true&categoryType=1'
baseurl = 'https://m.newegg.com/ProductList?description=FHLEsA70dKzKoDqR2lBeblLSvjuwGkdtB%252fdjJTC%252f8VU%253d&storeid=1&categoryid=-1&nodeid=7627&storetype=2&subcategoryid=280&brandid=-1&nvalue=100007627&showseealldeals=False&itemcount=0&issubcategory=true&level=3'

pg1 = requests.get(baseurl).content
headers = {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'}

pg1 = requests.get(baseurl, headers=headers).content
root1 = lxml.html.fromstring(pg1)
page_count = IterPages(root1)
URLs = ['%s&Page=%s' % (baseurl, pgnum) for pgnum in range(1, page_count + 1)]
Expand Down
8 changes: 4 additions & 4 deletions laptop.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
import requests
from neweggpy.nefuncs import IterPages,BoolToInt,getPIDS,getData,insertData

baseurl = 'http://m.newegg.com/ProductList?description=Laptops+%2f+' + \
'Notebooks&categoryId=32&storeId=3&nodeId=6740&' + \
'parentCategoryId=223&isSubCategory=true&categoryType=1'
baseurl = 'https://m.newegg.com/productlist?description=nVjkbn88TPpxbYCO44j7AJ7lKvYiECglej4lhl3FXQM%253d&storeid=3&categoryid=-1&nodeid=6740&storetype=2&subcategoryid=32&brandid=-1&nvalue=100006740&showseealldeals=False&itemcount=0&issubcategory=true&level=3'

headers = {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'}
pg1 = requests.get(baseurl, headers=headers).content

pg1 = requests.get(baseurl).content
root1 = lxml.html.fromstring(pg1)
page_count = IterPages(root1)
URLs = ['%s&Page=%s' % (baseurl, pgnum) for pgnum in range(1, page_count + 1)]
Expand Down
65 changes: 39 additions & 26 deletions neweggpy/nefuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@
import os
import requests
import sqlite3
import traceback
import sys

dtn = datetime.now().strftime('%Y-%m-%d %H:%M:%S')


def IterPages(rootobj):
t = rootobj.cssselect('span.colorGrey')[0].text
t = filter(lambda x: x.isdigit(), t)
return int(ceil(int(t)/20))
t = rootobj.cssselect('#pagesNum > option:nth-child(1)')[0].text
return int(ceil(int(t[2:])/20))


def BoolToInt(boolobj):
Expand All @@ -31,65 +32,77 @@ def getPIDS(urlList, pg1root):
ProductList = []
for k, url in enumerate(urlList):
if k is 0: # Reuse the root object for the first page
for el in pg1root.cssselect('a.listCell'):
for el in pg1root.cssselect('a.item-cell'):
ProductList.append(el.attrib['href'])
else:
r = requests.get(url).content
for el in fromstring(r).cssselect('a.listCell'):
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'}
r = requests.get(url, headers=headers).content
for el in fromstring(r).cssselect('a.item-cell'):
ProductList.append(el.attrib['href'])
pids = [i.split('=')[1] for i in ProductList if i.count('itemNumber=') == 1]
pids = [i.replace('https://m.newegg.com/products/', '') for i in ProductList]
return pids


def getData(pidList):
apiurl = 'http://www.ows.newegg.com/Products.egg'
OutData = []
for pid in pidList:
print pid
sleep(1)
try:
r = requests.get('%s/%s' % (apiurl, pid)).content
url = '%s/%s' % (apiurl, pid)
headers = {
'User-Agent': 'Mozilla/5.0 (Android; Mobile; rv:14.0) Gecko/14.0 Firefox/14.0'}
r = requests.get(url,headers=headers).content
js = loads(r)
basic = js['Basic']
additional = js['Additional']
g = {}
g['Title'] = js['Title']
final_price = js['FinalPrice'].replace(',', '')
g['Title'] = basic['Title']
final_price = basic['FinalPrice'].replace(',', '')
if final_price.count('Checkout') == 1:
g['FinalPrice'] = float('NaN')
elif final_price == 'See price in cart':
g['FinalPrice'] = float(js['MappingFinalPrice'].replace(',', '').replace('$', ''))
g['FinalPrice'] = float(basic['MappingFinalPrice'].replace(',', '').replace('$', ''))
else:
g['FinalPrice'] = float(final_price.replace('$', ''))
g['OriginalPrice'] = float(js['OriginalPrice'].replace(',', '').replace('$', ''))
g['Instock'] = BoolToInt(js['Instock'])
g['Rating'] = js['ReviewSummary']['Rating']
if (basic['OriginalPrice'] != ''):
g['OriginalPrice'] = float(basic['OriginalPrice'].replace(',', '').replace('$', ''))
else:
g['OriginalPrice'] = 0.0
g['Instock'] = BoolToInt(basic['Instock'])
g['Rating'] = basic['ReviewSummary']['Rating']
try:
g['TotalReviews'] = le(js['ReviewSummary']['TotalReviews'])[0]
g['TotalReviews'] = le(basic['ReviewSummary']['TotalReviews'])[0]
except:
g['TotalReviews'] = 0
g['IsHot'] = BoolToInt(js['IsHot'])
ShippingPrice = js['ShippingInfo']['NormalShippingText'].split(' ')[0]
if ShippingPrice.count('Free') == 1:
g['IsHot'] = BoolToInt(basic['IsHot'])
ShippingPrice = basic['ShippingText'].split(' ')[0]
if ShippingPrice.count('FREE') == 1:
g['ShippingPrice'] = 0.0
elif ShippingPrice.count('Special') == 1:
g['ShippingPrice'] = 2.99 # "Special shipping => $2.99 Egg Saver Shipping"
else:
g['ShippingPrice'] = float(ShippingPrice.replace('$', ''))
g['IsShipByNewegg'] = BoolToInt(js['IsShipByNewegg'])
g['IsShipByNewegg'] = BoolToInt(additional['ShippingInfo']['IsShipByNewegg'])

if len(js['PromotionText']) > 0:
g['Promotion'] = js['PromotionText']
if len(basic['PromotionText']) > 0:
g['Promotion'] = basic['PromotionText']
else:
g['Promotion'] = 'NaN'
MIR = js['MailInRebateInfo']
MIR = additional['MailInRebates']
if MIR is None:
g['MailInRebateInfo'] = 'NaN'
else:
g['MailInRebateInfo'] = js['MailInRebateInfo'][0]
g['MailInRebateInfo'] = additional['MailInRebates'][0]
g['PID'] = pid
g['Brand'] = js['CoremetricsInfo']['Brand']
g['Brand'] = basic['ItemBrand']['Description']
g['Date'] = dtn
OutData.append(g)
except:
print 'FAILED: %s' % pid
except Exception, e:
print 'FAILED: %s %s' % (pid, e)
traceback.print_exc()
pass
dframe = DataFrame(OutData)
dframe['FinalPriceShipped'] = dframe['FinalPrice'] + dframe['ShippingPrice']
Expand Down