diff --git a/PyLyrics/functions.py b/PyLyrics/functions.py index e4c7e19..e23a906 100644 --- a/PyLyrics/functions.py +++ b/PyLyrics/functions.py @@ -41,7 +41,7 @@ class PyLyrics: @staticmethod def getAlbums(singer): singer = singer.replace(' ', '_') - s = BeautifulSoup(requests.get('http://lyrics.wikia.com/{0}'.format(singer)).text) + s = BeautifulSoup(requests.get('http://lyrics.wikia.com/{0}'.format(singer)).text,"lxml") spans = s.findAll('span',{'class':'mw-headline'}) als = [] @@ -60,13 +60,17 @@ def getAlbums(singer): @staticmethod def getTracks(album): url = "http://lyrics.wikia.com/api.php?action=lyrics&artist={0}&fmt=xml".format(album.artist()) - soup = BeautifulSoup(requests.get(url).text) + soup = BeautifulSoup(requests.get(url).text,"lxml") for al in soup.find_all('album'): if al.text.lower().strip() == album.name.strip().lower(): currentAlbum = al break - songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')] + if currentAlbum!=None: + #currentAlbum="" + songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')] + else: + songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')] return songs @staticmethod @@ -75,7 +79,7 @@ def getLyrics(singer, song): singer = singer.replace(' ', '_') song = song.replace(' ', '_') r = requests.get('http://lyrics.wikia.com/{0}:{1}'.format(singer,song)) - s = BeautifulSoup(r.text) + s = BeautifulSoup(r.text,"lxml") #Get main lyrics holder lyrics = s.find("div",{'class':'lyricbox'}) if lyrics is None: @@ -93,7 +97,7 @@ def getLyrics(singer, song): for match in lyrics.findAll(tag): match.replaceWithChildren() #Get output as a string and remove non unicode characters and replace
with newlines - output = str(lyrics).encode('utf-8', errors='replace')[22:-6:].decode("utf-8").replace('\n','').replace('
','\n') + output = str(lyrics).decode('utf-8', errors='replace').encode('utf-8', errors='replace')[22:-6:].decode("utf-8").replace('\n','').replace('
','\n') try: return output except: diff --git a/build/lib.linux-x86_64-2.7/PyLyrics/__init__.py b/build/lib.linux-x86_64-2.7/PyLyrics/__init__.py new file mode 100644 index 0000000..6e85576 --- /dev/null +++ b/build/lib.linux-x86_64-2.7/PyLyrics/__init__.py @@ -0,0 +1,14 @@ +__author__ = "Pradipta" +__version__ = '1.0.0' + +try: + #Python 3 Imports + from .classes import * + from .functions import * +except: + #Python 2 imports + from classes import * + from functions import * + + + \ No newline at end of file diff --git a/build/lib.linux-x86_64-2.7/PyLyrics/classes.py b/build/lib.linux-x86_64-2.7/PyLyrics/classes.py new file mode 100644 index 0000000..a19e17f --- /dev/null +++ b/build/lib.linux-x86_64-2.7/PyLyrics/classes.py @@ -0,0 +1,5 @@ +#Classes for Scrapers +try: + from .functions import * +except: + from functions import * diff --git a/build/lib.linux-x86_64-2.7/PyLyrics/functions.py b/build/lib.linux-x86_64-2.7/PyLyrics/functions.py new file mode 100644 index 0000000..5bd1336 --- /dev/null +++ b/build/lib.linux-x86_64-2.7/PyLyrics/functions.py @@ -0,0 +1,110 @@ +import requests +from bs4 import BeautifulSoup, Comment, NavigableString +import sys, codecs, json + +class Track(object): + def __init__(self,trackName,album,artist): + self.name = trackName + self.album = album + self.artist = artist + def __repr__(self): + return self.name + def link(self): + return 'http://lyrics.wikia.com/{0}:{1}'.format(self.artist.replace(' ', '-'),self.name.replace(' ','-')) + def getLyrics(self): + return PyLyrics.getLyrics(self.artist,self.name) +class Artist(object): + def __init__(self, name): + self.name = name + def getAlbums(self): + return PyLyrics.getAlbums(self.name) + def __repr__(self): + return self.name.encode('utf-8') +class Album(object): + def __init__(self, name, link,singer): + self.year = name.split(' ')[-1] + self.name = name.replace(self.year,' ').rstrip() + self.url = link + self.singer = singer + def link(self): + return self.url + def __repr__(self): + if sys.version_info[0] == 2: + return self.name.encode('utf-8','replace') + return self.name + def artist(self): + return self.singer + def tracks(self): + return PyLyrics.getTracks(self) + +class PyLyrics: + @staticmethod + def getAlbums(singer): + singer = singer.replace(' ', '_') + s = BeautifulSoup(requests.get('http://lyrics.wikia.com/{0}'.format(singer)).text,"lxml") + spans = s.findAll('span',{'class':'mw-headline'}) + + als = [] + + for tag in spans: + try: + a = tag.findAll('a')[0] + als.append(Album(a.text,'http://lyrics.wikia.com' + a['href'],singer)) + except: + pass + + if als == []: + raise ValueError("Unknown Artist Name given") + return None + return als + @staticmethod + def getTracks(album): + url = "http://lyrics.wikia.com/api.php?action=lyrics&artist={0}&fmt=xml".format(album.artist()) + soup = BeautifulSoup(requests.get(url).text,"lxml") + + for al in soup.find_all('album'): + if al.text.lower().strip() == album.name.strip().lower(): + currentAlbum = al + break + songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')] + return songs + + @staticmethod + def getLyrics(singer, song): + #Replace spaces with _ + singer = singer.replace(' ', '_') + song = song.replace(' ', '_') + r = requests.get('http://lyrics.wikia.com/{0}:{1}'.format(singer,song)) + s = BeautifulSoup(r.text,"lxml") + #Get main lyrics holder + lyrics = s.find("div",{'class':'lyricbox'}) + if lyrics is None: + raise ValueError("Song or Singer does not exist or the API does not have Lyrics") + return None + #Remove Scripts + [s.extract() for s in lyrics('script')] + + #Remove Comments + comments = lyrics.findAll(text=lambda text:isinstance(text, Comment)) + [comment.extract() for comment in comments] + + #Remove unecessary tags + for tag in ['div','i','b','a']: + for match in lyrics.findAll(tag): + match.replaceWithChildren() + #Get output as a string and remove non unicode characters and replace
with newlines + output = str(lyrics).encode('utf-8', errors='replace')[22:-6:].decode("utf-8").replace('\n','').replace('
','\n') + try: + return output + except: + return output.encode('utf-8') + +def main(): + albums = PyLyrics.getAlbums('OneRepublic') + print (albums) + tracks = PyLyrics.getTracks(albums[-1]) + print (tracks[7].getLyrics()) + + +if __name__=='__main__': + main() \ No newline at end of file diff --git a/build/lib.linux-x86_64-2.7/PyLyrics/tests.py b/build/lib.linux-x86_64-2.7/PyLyrics/tests.py new file mode 100644 index 0000000..0ee31d7 --- /dev/null +++ b/build/lib.linux-x86_64-2.7/PyLyrics/tests.py @@ -0,0 +1,22 @@ +import unittest +try: + from .__init__ import * #Python 3 +except: + from __init__ import * + +try: + basestring = basestring +except NameError: + basestring = (str, bytes) + +albums = PyLyrics.getAlbums('Taylor Swift') +class PyLyricsTest(unittest.TestCase): + def testAlbums(self): + self.assertIsInstance(albums,list) + def testTracks(self): + self.assertIsInstance(albums[0].tracks(),list) + def testLyrics(self): + self.assertIsInstance(PyLyrics.getLyrics('Eminem','The Monster'),basestring) + +if __name__=='__main__': + unittest.main() \ No newline at end of file diff --git a/setup.py b/setup.py index d47ab33..b70bb14 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ packages=['PyLyrics'], url="http://github.com/geekpradd/PyLyrics", install_requires=[ - 'beautifulsoup4','requests',], + 'beautifulsoup4','requests','lxml'], classifiers=[ "Development Status :: 5 - Production/Stable", "Topic :: Internet",