diff --git a/PyLyrics/functions.py b/PyLyrics/functions.py
index e4c7e19..e23a906 100644
--- a/PyLyrics/functions.py
+++ b/PyLyrics/functions.py
@@ -41,7 +41,7 @@ class PyLyrics:
@staticmethod
def getAlbums(singer):
singer = singer.replace(' ', '_')
- s = BeautifulSoup(requests.get('http://lyrics.wikia.com/{0}'.format(singer)).text)
+ s = BeautifulSoup(requests.get('http://lyrics.wikia.com/{0}'.format(singer)).text,"lxml")
spans = s.findAll('span',{'class':'mw-headline'})
als = []
@@ -60,13 +60,17 @@ def getAlbums(singer):
@staticmethod
def getTracks(album):
url = "http://lyrics.wikia.com/api.php?action=lyrics&artist={0}&fmt=xml".format(album.artist())
- soup = BeautifulSoup(requests.get(url).text)
+ soup = BeautifulSoup(requests.get(url).text,"lxml")
for al in soup.find_all('album'):
if al.text.lower().strip() == album.name.strip().lower():
currentAlbum = al
break
- songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')]
+ if currentAlbum!=None:
+ #currentAlbum=""
+ songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')]
+ else:
+ songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')]
return songs
@staticmethod
@@ -75,7 +79,7 @@ def getLyrics(singer, song):
singer = singer.replace(' ', '_')
song = song.replace(' ', '_')
r = requests.get('http://lyrics.wikia.com/{0}:{1}'.format(singer,song))
- s = BeautifulSoup(r.text)
+ s = BeautifulSoup(r.text,"lxml")
#Get main lyrics holder
lyrics = s.find("div",{'class':'lyricbox'})
if lyrics is None:
@@ -93,7 +97,7 @@ def getLyrics(singer, song):
for match in lyrics.findAll(tag):
match.replaceWithChildren()
#Get output as a string and remove non unicode characters and replace
with newlines
- output = str(lyrics).encode('utf-8', errors='replace')[22:-6:].decode("utf-8").replace('\n','').replace('
','\n')
+ output = str(lyrics).decode('utf-8', errors='replace').encode('utf-8', errors='replace')[22:-6:].decode("utf-8").replace('\n','').replace('
','\n')
try:
return output
except:
diff --git a/build/lib.linux-x86_64-2.7/PyLyrics/__init__.py b/build/lib.linux-x86_64-2.7/PyLyrics/__init__.py
new file mode 100644
index 0000000..6e85576
--- /dev/null
+++ b/build/lib.linux-x86_64-2.7/PyLyrics/__init__.py
@@ -0,0 +1,14 @@
+__author__ = "Pradipta"
+__version__ = '1.0.0'
+
+try:
+ #Python 3 Imports
+ from .classes import *
+ from .functions import *
+except:
+ #Python 2 imports
+ from classes import *
+ from functions import *
+
+
+
\ No newline at end of file
diff --git a/build/lib.linux-x86_64-2.7/PyLyrics/classes.py b/build/lib.linux-x86_64-2.7/PyLyrics/classes.py
new file mode 100644
index 0000000..a19e17f
--- /dev/null
+++ b/build/lib.linux-x86_64-2.7/PyLyrics/classes.py
@@ -0,0 +1,5 @@
+#Classes for Scrapers
+try:
+ from .functions import *
+except:
+ from functions import *
diff --git a/build/lib.linux-x86_64-2.7/PyLyrics/functions.py b/build/lib.linux-x86_64-2.7/PyLyrics/functions.py
new file mode 100644
index 0000000..5bd1336
--- /dev/null
+++ b/build/lib.linux-x86_64-2.7/PyLyrics/functions.py
@@ -0,0 +1,110 @@
+import requests
+from bs4 import BeautifulSoup, Comment, NavigableString
+import sys, codecs, json
+
+class Track(object):
+ def __init__(self,trackName,album,artist):
+ self.name = trackName
+ self.album = album
+ self.artist = artist
+ def __repr__(self):
+ return self.name
+ def link(self):
+ return 'http://lyrics.wikia.com/{0}:{1}'.format(self.artist.replace(' ', '-'),self.name.replace(' ','-'))
+ def getLyrics(self):
+ return PyLyrics.getLyrics(self.artist,self.name)
+class Artist(object):
+ def __init__(self, name):
+ self.name = name
+ def getAlbums(self):
+ return PyLyrics.getAlbums(self.name)
+ def __repr__(self):
+ return self.name.encode('utf-8')
+class Album(object):
+ def __init__(self, name, link,singer):
+ self.year = name.split(' ')[-1]
+ self.name = name.replace(self.year,' ').rstrip()
+ self.url = link
+ self.singer = singer
+ def link(self):
+ return self.url
+ def __repr__(self):
+ if sys.version_info[0] == 2:
+ return self.name.encode('utf-8','replace')
+ return self.name
+ def artist(self):
+ return self.singer
+ def tracks(self):
+ return PyLyrics.getTracks(self)
+
+class PyLyrics:
+ @staticmethod
+ def getAlbums(singer):
+ singer = singer.replace(' ', '_')
+ s = BeautifulSoup(requests.get('http://lyrics.wikia.com/{0}'.format(singer)).text,"lxml")
+ spans = s.findAll('span',{'class':'mw-headline'})
+
+ als = []
+
+ for tag in spans:
+ try:
+ a = tag.findAll('a')[0]
+ als.append(Album(a.text,'http://lyrics.wikia.com' + a['href'],singer))
+ except:
+ pass
+
+ if als == []:
+ raise ValueError("Unknown Artist Name given")
+ return None
+ return als
+ @staticmethod
+ def getTracks(album):
+ url = "http://lyrics.wikia.com/api.php?action=lyrics&artist={0}&fmt=xml".format(album.artist())
+ soup = BeautifulSoup(requests.get(url).text,"lxml")
+
+ for al in soup.find_all('album'):
+ if al.text.lower().strip() == album.name.strip().lower():
+ currentAlbum = al
+ break
+ songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')]
+ return songs
+
+ @staticmethod
+ def getLyrics(singer, song):
+ #Replace spaces with _
+ singer = singer.replace(' ', '_')
+ song = song.replace(' ', '_')
+ r = requests.get('http://lyrics.wikia.com/{0}:{1}'.format(singer,song))
+ s = BeautifulSoup(r.text,"lxml")
+ #Get main lyrics holder
+ lyrics = s.find("div",{'class':'lyricbox'})
+ if lyrics is None:
+ raise ValueError("Song or Singer does not exist or the API does not have Lyrics")
+ return None
+ #Remove Scripts
+ [s.extract() for s in lyrics('script')]
+
+ #Remove Comments
+ comments = lyrics.findAll(text=lambda text:isinstance(text, Comment))
+ [comment.extract() for comment in comments]
+
+ #Remove unecessary tags
+ for tag in ['div','i','b','a']:
+ for match in lyrics.findAll(tag):
+ match.replaceWithChildren()
+ #Get output as a string and remove non unicode characters and replace
with newlines
+ output = str(lyrics).encode('utf-8', errors='replace')[22:-6:].decode("utf-8").replace('\n','').replace('
','\n')
+ try:
+ return output
+ except:
+ return output.encode('utf-8')
+
+def main():
+ albums = PyLyrics.getAlbums('OneRepublic')
+ print (albums)
+ tracks = PyLyrics.getTracks(albums[-1])
+ print (tracks[7].getLyrics())
+
+
+if __name__=='__main__':
+ main()
\ No newline at end of file
diff --git a/build/lib.linux-x86_64-2.7/PyLyrics/tests.py b/build/lib.linux-x86_64-2.7/PyLyrics/tests.py
new file mode 100644
index 0000000..0ee31d7
--- /dev/null
+++ b/build/lib.linux-x86_64-2.7/PyLyrics/tests.py
@@ -0,0 +1,22 @@
+import unittest
+try:
+ from .__init__ import * #Python 3
+except:
+ from __init__ import *
+
+try:
+ basestring = basestring
+except NameError:
+ basestring = (str, bytes)
+
+albums = PyLyrics.getAlbums('Taylor Swift')
+class PyLyricsTest(unittest.TestCase):
+ def testAlbums(self):
+ self.assertIsInstance(albums,list)
+ def testTracks(self):
+ self.assertIsInstance(albums[0].tracks(),list)
+ def testLyrics(self):
+ self.assertIsInstance(PyLyrics.getLyrics('Eminem','The Monster'),basestring)
+
+if __name__=='__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/setup.py b/setup.py
index d47ab33..b70bb14 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
packages=['PyLyrics'],
url="http://github.com/geekpradd/PyLyrics",
install_requires=[
- 'beautifulsoup4','requests',],
+ 'beautifulsoup4','requests','lxml'],
classifiers=[
"Development Status :: 5 - Production/Stable",
"Topic :: Internet",