From 5d06b8af929f21d7b267e101deb19c1fb5690b44 Mon Sep 17 00:00:00 2001 From: Sachin Rudraraju Date: Wed, 20 Aug 2014 12:26:46 -0400 Subject: [PATCH 1/3] id3 tags and other small improvements --- hypeme.py | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/hypeme.py b/hypeme.py index b23ee36..c473a0d 100644 --- a/hypeme.py +++ b/hypeme.py @@ -28,20 +28,24 @@ import string import os + +from mutagen.easyid3 import EasyID3 +import re + + ##############AREA_TO_SCRAPE################ # This is the general area that you'd # like to parse and scrape. # ex. 'popular', 'latest', '' or # 'track/' ############################################ -AREA_TO_SCRAPE = 'popular' -NUMBER_OF_PAGES = 3 +AREA_TO_SCRAPE = raw_input("Area to scrape?\ndefault is 'popular': ") or 'popular' +NUMBER_OF_PAGES = int((raw_input("Number of pages to scrape?\ndefault is 1: ") or 1)) ###DO NOT MODIFY THESE UNLES YOU KNOW WHAT YOU ARE DOING#### DEBUG = False HYPEM_URL = 'http://hypem.com/{}'.format(AREA_TO_SCRAPE) - validFilenameChars = "-_.() %s%s" % (string.ascii_letters, string.digits) def removeDisallowedFilenameChars(filename): @@ -54,6 +58,7 @@ class HypeScraper: def __init__(self): pass + def start(self): print "--------STARTING DOWNLOAD--------" print "\tURL : {} ".format(HYPEM_URL) @@ -120,7 +125,6 @@ def download_songs(self, tracks, cookie): title = removeDisallowedFilenameChars(track[u"song"]) type = track[u"type"] - print "\tFETCHING SONG...." print u"\t{} by {}".format(title, artist) @@ -129,29 +133,40 @@ def download_songs(self, tracks, cookie): continue try: - serve_url = "http://hypem.com/serve/source/{}/{}".format(id, key) - request = urllib2.Request(serve_url, "" , {'Content-Type': 'application/json'}) - request.add_header('cookie', cookie) - response = urllib2.urlopen(request) - song_data_json = response.read() - response.close() - song_data = json.loads(song_data_json) - url = song_data[u"url"] - - download_response = urllib2.urlopen(url) - filename = "{} - {}.mp3".format(artist, title) + # filename = "{} - {}.mp3".format(artist, title) + filename = "{}.mp3".format(title) if os.path.exists(filename): print("File already exists , skipping") else: + # don't waste the time to download the song if it already exists + print "\tFETCHING SONG...." + serve_url = "http://hypem.com/serve/source/{}/{}".format(id, key) + request = urllib2.Request(serve_url, "" , {'Content-Type': 'application/json'}) + request.add_header('cookie', cookie) + response = urllib2.urlopen(request) + song_data_json = response.read() + response.close() + song_data = json.loads(song_data_json) + url = song_data[u"url"] + + download_response = urllib2.urlopen(url) mp3_song_file = open(filename, "wb") - mp3_song_file.write(download_response.read() ) + mp3_song_file.write(download_response.read()) mp3_song_file.close() + + # save tags to same file + id3_tag = EasyID3() + id3_tag['artist'] = artist + id3_tag['title'] = title + id3_tag.save(filename) + except urllib2.HTTPError, e: print 'HTTPError = ' + str(e.code) + " trying hypem download url." except urllib2.URLError, e: print 'URLError = ' + str(e.reason) + " trying hypem download url." except Exception, e: print 'generic exception: ' + str(e) + From ce336b3a2e6140326b0c56ea8daeb8d7caa4a582 Mon Sep 17 00:00:00 2001 From: Sachin Rudraraju Date: Wed, 20 Aug 2014 12:28:08 -0400 Subject: [PATCH 2/3] updated requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 27be06a..75b9921 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -BeautifulSoup4 \ No newline at end of file +BeautifulSoup4 +mutagen From 43b1e4307af412ad8ee2f09468fb59d57de82ca0 Mon Sep 17 00:00:00 2001 From: Sachin Rudraraju Date: Wed, 15 Oct 2014 22:32:17 -0400 Subject: [PATCH 3/3] url directly from raw_input --- hypeme.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/hypeme.py b/hypeme.py index c473a0d..36ab346 100644 --- a/hypeme.py +++ b/hypeme.py @@ -37,14 +37,17 @@ # This is the general area that you'd # like to parse and scrape. # ex. 'popular', 'latest', '' or -# 'track/' -############################################ -AREA_TO_SCRAPE = raw_input("Area to scrape?\ndefault is 'popular': ") or 'popular' +=############################################ + +# AREA_TO_SCRAPE = raw_input("Area URL to scrape?\ndefault is 'popular': ") or 'http://hypem.com/popular' +# HYPEM_URL = 'http://hypem.com/{}'.format(AREA_TO_SCRAPE) + +HYPEM_URL = raw_input("Area URL to scrape?\ndefault is 'popular': ") or 'http://hypem.com/popular' +DIRECTORY = raw_input("Where do you want to put the files?\ndefault is '/media/Data/Music/Unsorted': ") or '/media/Data/Music/Unsorted' NUMBER_OF_PAGES = int((raw_input("Number of pages to scrape?\ndefault is 1: ") or 1)) ###DO NOT MODIFY THESE UNLES YOU KNOW WHAT YOU ARE DOING#### DEBUG = False -HYPEM_URL = 'http://hypem.com/{}'.format(AREA_TO_SCRAPE) validFilenameChars = "-_.() %s%s" % (string.ascii_letters, string.digits) @@ -135,7 +138,7 @@ def download_songs(self, tracks, cookie): try: # filename = "{} - {}.mp3".format(artist, title) filename = "{}.mp3".format(title) - if os.path.exists(filename): + if os.path.exists(os.path.join(DIRECTORY, filename)): print("File already exists , skipping") else: # don't waste the time to download the song if it already exists @@ -149,8 +152,14 @@ def download_songs(self, tracks, cookie): song_data = json.loads(song_data_json) url = song_data[u"url"] - download_response = urllib2.urlopen(url) - mp3_song_file = open(filename, "wb") + + try: + download_response = urllib2.urlopen(url, timeout=5) + except urllib2.URLError, e: + print '\tFUNCTION TIMEOUT. REDOWNLOAD AFTER.' + continue + + mp3_song_file = open(os.path.join(DIRECTORY, filename), "wb") mp3_song_file.write(download_response.read()) mp3_song_file.close() @@ -158,7 +167,7 @@ def download_songs(self, tracks, cookie): id3_tag = EasyID3() id3_tag['artist'] = artist id3_tag['title'] = title - id3_tag.save(filename) + id3_tag.save(os.path.join(DIRECTORY, filename)) except urllib2.HTTPError, e: print 'HTTPError = ' + str(e.code) + " trying hypem download url." @@ -177,4 +186,4 @@ def main(): if __name__ == "__main__": main() - \ No newline at end of file +