hypeme.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -28,19 +28,26 @@ @@
     import string
     import os
+    from mutagen.easyid3 import EasyID3
+    import re
     ##############AREA_TO_SCRAPE################
     # This is the general area that you'd
     # like to parse and scrape.
     # ex. 'popular', 'latest', '<username>' or
-    # 'track/<id>'
-    ############################################
-    AREA_TO_SCRAPE = 'popular'
-    NUMBER_OF_PAGES = 3
+    =############################################
+    # AREA_TO_SCRAPE = raw_input("Area URL to scrape?\ndefault is 'popular': ") or 'http://hypem.com/popular'
+    # HYPEM_URL = 'http://hypem.com/{}'.format(AREA_TO_SCRAPE)
+    HYPEM_URL = raw_input("Area URL to scrape?\ndefault is 'popular': ") or 'http://hypem.com/popular'
+    DIRECTORY = raw_input("Where do you want to put the files?\ndefault is '/media/Data/Music/Unsorted': ") or '/media/Data/Music/Unsorted'
+    NUMBER_OF_PAGES = int((raw_input("Number of pages to scrape?\ndefault is 1: ") or 1))
     ###DO NOT MODIFY THESE UNLES YOU KNOW WHAT YOU ARE DOING####
     DEBUG = False
-    HYPEM_URL = 'http://hypem.com/{}'.format(AREA_TO_SCRAPE)
     validFilenameChars = "-_.() %s%s" % (string.ascii_letters, string.digits)
@@ Expand All / @@ -54,6 +61,7 @@ class HypeScraper: @@
       def __init__(self):
         pass
       def start(self):
         print "--------STARTING DOWNLOAD--------"
         print "\tURL : {} ".format(HYPEM_URL)
@@ Expand Down Expand Up / @@ -120,7 +128,6 @@ def download_songs(self, tracks, cookie): @@
           title = removeDisallowedFilenameChars(track[u"song"])
           type = track[u"type"]
-          print "\tFETCHING SONG...."
           print u"\t{} by {}".format(title, artist)
@@ Expand All / @@ -129,29 +136,46 @@ def download_songs(self, tracks, cookie): @@
             continue
           try:
-            serve_url = "http://hypem.com/serve/source/{}/{}".format(id, key)
-            request = urllib2.Request(serve_url, "" , {'Content-Type': 'application/json'})
-            request.add_header('cookie', cookie)
-            response = urllib2.urlopen(request)
-            song_data_json = response.read()
-            response.close()
-            song_data = json.loads(song_data_json)
-            url = song_data[u"url"]
-            download_response = urllib2.urlopen(url)
-            filename = "{} - {}.mp3".format(artist, title)
-            if os.path.exists(filename):
+            # filename = "{} - {}.mp3".format(artist, title)
+            filename = "{}.mp3".format(title)
+            if os.path.exists(os.path.join(DIRECTORY, filename)):
               print("File already exists , skipping")
             else:
-              mp3_song_file = open(filename, "wb")
-              mp3_song_file.write(download_response.read() )
+              # don't waste the time to download the song if it already exists
+              print "\tFETCHING SONG...."
+              serve_url = "http://hypem.com/serve/source/{}/{}".format(id, key)
+              request = urllib2.Request(serve_url, "" , {'Content-Type': 'application/json'})
+              request.add_header('cookie', cookie)
+              response = urllib2.urlopen(request)
+              song_data_json = response.read()
+              response.close()
+              song_data = json.loads(song_data_json)
+              url = song_data[u"url"]
+              try:
+                download_response = urllib2.urlopen(url, timeout=5)
+              except urllib2.URLError, e:
+                print '\tFUNCTION TIMEOUT. REDOWNLOAD AFTER.'
+                continue
+              mp3_song_file = open(os.path.join(DIRECTORY, filename), "wb")
+              mp3_song_file.write(download_response.read())
               mp3_song_file.close()
+              # save tags to same file
+              id3_tag = EasyID3()
+              id3_tag['artist'] = artist
+              id3_tag['title'] = title
+              id3_tag.save(os.path.join(DIRECTORY, filename))
           except urllib2.HTTPError, e:
                 print 'HTTPError = ' + str(e.code) + " trying hypem download url."
           except urllib2.URLError, e:
                 print 'URLError = ' + str(e.reason)  + " trying hypem download url."
           except Exception, e:
                 print 'generic exception: ' + str(e)
@@ Expand All / @@ -162,4 +186,4 @@ def main(): @@
     if __name__ == "__main__":
         main()

requirements.txt

-Original file line number
+Diff line change
@@ -1 +1,2 @@
-    BeautifulSoup4
+    BeautifulSoup4
+    mutagen

ID3 tags and other small improvements #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

xzys wants to merge 3 commits into fzakaria:master from xzys:master

-Original file line number
+Diff line change
@@ Expand Up / @@ -28,19 +28,26 @@ @@
     import string
     import os
+    from mutagen.easyid3 import EasyID3
+    import re
     ##############AREA_TO_SCRAPE################
     # This is the general area that you'd
     # like to parse and scrape.
     # ex. 'popular', 'latest', '<username>' or
-    # 'track/<id>'
-    ############################################
-    AREA_TO_SCRAPE = 'popular'
-    NUMBER_OF_PAGES = 3
+    =############################################
+    # AREA_TO_SCRAPE = raw_input("Area URL to scrape?\ndefault is 'popular': ") or 'http://hypem.com/popular'
+    # HYPEM_URL = 'http://hypem.com/{}'.format(AREA_TO_SCRAPE)
+    HYPEM_URL = raw_input("Area URL to scrape?\ndefault is 'popular': ") or 'http://hypem.com/popular'
+    DIRECTORY = raw_input("Where do you want to put the files?\ndefault is '/media/Data/Music/Unsorted': ") or '/media/Data/Music/Unsorted'
+    NUMBER_OF_PAGES = int((raw_input("Number of pages to scrape?\ndefault is 1: ") or 1))
     ###DO NOT MODIFY THESE UNLES YOU KNOW WHAT YOU ARE DOING####
     DEBUG = False
-    HYPEM_URL = 'http://hypem.com/{}'.format(AREA_TO_SCRAPE)
     validFilenameChars = "-_.() %s%s" % (string.ascii_letters, string.digits)
@@ Expand All / @@ -54,6 +61,7 @@ class HypeScraper: @@
       def __init__(self):
         pass
       def start(self):
         print "--------STARTING DOWNLOAD--------"
         print "\tURL : {} ".format(HYPEM_URL)
@@ Expand Down Expand Up / @@ -120,7 +128,6 @@ def download_songs(self, tracks, cookie): @@
           title = removeDisallowedFilenameChars(track[u"song"])
           type = track[u"type"]
-          print "\tFETCHING SONG...."
           print u"\t{} by {}".format(title, artist)
@@ Expand All / @@ -129,29 +136,46 @@ def download_songs(self, tracks, cookie): @@
             continue
           try:
-            serve_url = "http://hypem.com/serve/source/{}/{}".format(id, key)
-            request = urllib2.Request(serve_url, "" , {'Content-Type': 'application/json'})
-            request.add_header('cookie', cookie)
-            response = urllib2.urlopen(request)
-            song_data_json = response.read()
-            response.close()
-            song_data = json.loads(song_data_json)
-            url = song_data[u"url"]
-            download_response = urllib2.urlopen(url)
-            filename = "{} - {}.mp3".format(artist, title)
-            if os.path.exists(filename):
+            # filename = "{} - {}.mp3".format(artist, title)
+            filename = "{}.mp3".format(title)
+            if os.path.exists(os.path.join(DIRECTORY, filename)):
               print("File already exists , skipping")
             else:
-              mp3_song_file = open(filename, "wb")
-              mp3_song_file.write(download_response.read() )
+              # don't waste the time to download the song if it already exists
+              print "\tFETCHING SONG...."
+              serve_url = "http://hypem.com/serve/source/{}/{}".format(id, key)
+              request = urllib2.Request(serve_url, "" , {'Content-Type': 'application/json'})
+              request.add_header('cookie', cookie)
+              response = urllib2.urlopen(request)
+              song_data_json = response.read()
+              response.close()
+              song_data = json.loads(song_data_json)
+              url = song_data[u"url"]
+              try:
+                download_response = urllib2.urlopen(url, timeout=5)
+              except urllib2.URLError, e:
+                print '\tFUNCTION TIMEOUT. REDOWNLOAD AFTER.'
+                continue
+              mp3_song_file = open(os.path.join(DIRECTORY, filename), "wb")
+              mp3_song_file.write(download_response.read())
               mp3_song_file.close()
+              # save tags to same file
+              id3_tag = EasyID3()
+              id3_tag['artist'] = artist
+              id3_tag['title'] = title
+              id3_tag.save(os.path.join(DIRECTORY, filename))
           except urllib2.HTTPError, e:
                 print 'HTTPError = ' + str(e.code) + " trying hypem download url."
           except urllib2.URLError, e:
                 print 'URLError = ' + str(e.reason)  + " trying hypem download url."
           except Exception, e:
                 print 'generic exception: ' + str(e)
@@ Expand All / @@ -162,4 +186,4 @@ def main(): @@
     if __name__ == "__main__":
         main()

-Original file line number
+Diff line change
@@ -1 +1,2 @@
-    BeautifulSoup4
+    BeautifulSoup4
+    mutagen

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ID3 tags and other small improvements #15

Uh oh!

Diff view

Diff view

There are no files selected for viewing

ID3 tags and other small improvements #15

Are you sure you want to change the base?

Uh oh!

ID3 tags and other small improvements #15

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing