diff --git a/hypeme.py b/hypeme.py index b23ee36..36ab346 100644 --- a/hypeme.py +++ b/hypeme.py @@ -28,19 +28,26 @@ import string import os + +from mutagen.easyid3 import EasyID3 +import re + + ##############AREA_TO_SCRAPE################ # This is the general area that you'd # like to parse and scrape. # ex. 'popular', 'latest', '' or -# 'track/' -############################################ -AREA_TO_SCRAPE = 'popular' -NUMBER_OF_PAGES = 3 +=############################################ + +# AREA_TO_SCRAPE = raw_input("Area URL to scrape?\ndefault is 'popular': ") or 'http://hypem.com/popular' +# HYPEM_URL = 'http://hypem.com/{}'.format(AREA_TO_SCRAPE) + +HYPEM_URL = raw_input("Area URL to scrape?\ndefault is 'popular': ") or 'http://hypem.com/popular' +DIRECTORY = raw_input("Where do you want to put the files?\ndefault is '/media/Data/Music/Unsorted': ") or '/media/Data/Music/Unsorted' +NUMBER_OF_PAGES = int((raw_input("Number of pages to scrape?\ndefault is 1: ") or 1)) ###DO NOT MODIFY THESE UNLES YOU KNOW WHAT YOU ARE DOING#### DEBUG = False -HYPEM_URL = 'http://hypem.com/{}'.format(AREA_TO_SCRAPE) - validFilenameChars = "-_.() %s%s" % (string.ascii_letters, string.digits) @@ -54,6 +61,7 @@ class HypeScraper: def __init__(self): pass + def start(self): print "--------STARTING DOWNLOAD--------" print "\tURL : {} ".format(HYPEM_URL) @@ -120,7 +128,6 @@ def download_songs(self, tracks, cookie): title = removeDisallowedFilenameChars(track[u"song"]) type = track[u"type"] - print "\tFETCHING SONG...." print u"\t{} by {}".format(title, artist) @@ -129,29 +136,46 @@ def download_songs(self, tracks, cookie): continue try: - serve_url = "http://hypem.com/serve/source/{}/{}".format(id, key) - request = urllib2.Request(serve_url, "" , {'Content-Type': 'application/json'}) - request.add_header('cookie', cookie) - response = urllib2.urlopen(request) - song_data_json = response.read() - response.close() - song_data = json.loads(song_data_json) - url = song_data[u"url"] - - download_response = urllib2.urlopen(url) - filename = "{} - {}.mp3".format(artist, title) - if os.path.exists(filename): + # filename = "{} - {}.mp3".format(artist, title) + filename = "{}.mp3".format(title) + if os.path.exists(os.path.join(DIRECTORY, filename)): print("File already exists , skipping") else: - mp3_song_file = open(filename, "wb") - mp3_song_file.write(download_response.read() ) + # don't waste the time to download the song if it already exists + print "\tFETCHING SONG...." + serve_url = "http://hypem.com/serve/source/{}/{}".format(id, key) + request = urllib2.Request(serve_url, "" , {'Content-Type': 'application/json'}) + request.add_header('cookie', cookie) + response = urllib2.urlopen(request) + song_data_json = response.read() + response.close() + song_data = json.loads(song_data_json) + url = song_data[u"url"] + + + try: + download_response = urllib2.urlopen(url, timeout=5) + except urllib2.URLError, e: + print '\tFUNCTION TIMEOUT. REDOWNLOAD AFTER.' + continue + + mp3_song_file = open(os.path.join(DIRECTORY, filename), "wb") + mp3_song_file.write(download_response.read()) mp3_song_file.close() + + # save tags to same file + id3_tag = EasyID3() + id3_tag['artist'] = artist + id3_tag['title'] = title + id3_tag.save(os.path.join(DIRECTORY, filename)) + except urllib2.HTTPError, e: print 'HTTPError = ' + str(e.code) + " trying hypem download url." except urllib2.URLError, e: print 'URLError = ' + str(e.reason) + " trying hypem download url." except Exception, e: print 'generic exception: ' + str(e) + @@ -162,4 +186,4 @@ def main(): if __name__ == "__main__": main() - \ No newline at end of file + diff --git a/requirements.txt b/requirements.txt index 27be06a..75b9921 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -BeautifulSoup4 \ No newline at end of file +BeautifulSoup4 +mutagen