-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathXiamiParser.py
More file actions
72 lines (61 loc) · 2.4 KB
/
XiamiParser.py
File metadata and controls
72 lines (61 loc) · 2.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
import urllib
import logging
from opener import Opener
from bs4 import BeautifulSoup
from itertools import izip
URL = "http://www.xiami.com"
url_pattern = { "song": "http://www.xiami.com/search/song?",
"album": "http://www.xiami.com/search/album?",
"artist": "http://www.xiami.com/search/artist?"}
def url_open(category, name):
p = {'key':name.encode('utf-8')}
url = url_pattern[category] + urllib.urlencode(p)
html = Opener.Instance().open(url)
return BeautifulSoup(html, "html5lib")
def find_song_link(soup):
for link in soup.find_all('td', "song_name"):
link = link.find('a', target="_blank")
yield (link.get('href'))
# category = "song_name", "song_artist", "song_album"
def find_song_info(soup, category):
for link in soup.find_all('td', class_=category):
yield reduce(lambda x, y: x + y, link.find_all(text=True))
def search_song(name):
soup = url_open("song", name)
for title, artist, album, url in izip(find_song_info(soup, "song_name"),
find_song_info(soup, "song_artist"),
find_song_info(soup, "song_album"),
find_song_link(soup)):
yield {"title": title,
"artist": artist,
"album": album,
"url": url
}
def search_album(name):
soup = url_open("album", name)
for link in soup.find_all('div', class_='album_item100_block'):
artist = link.find('p', class_='name').find('a', class_="singer").get('title')
url = link.find('p', class_='name').find('a').get('href')
title = link.find('p', class_='name').find('a').get('title')
yield {"title": title,
"artist": artist,
"url": url
}
def search_artist(name):
soup = url_open("artist", name)
artist_lst = ["artist_name"]
url_lst = ["artist_url"]
for link in soup.find_all('div', class_='artist_item100_block'):
url_lst.append(link.find('p', class_='buddy').find('a').get('href'))
artist_lst.append(link.find('p', class_='name').find('span').get_text())
return izip(izip(artist_lst), url_lst)
def main():
name = u'小时候'
print name
for item in search_song(name):
print item
if __name__ == "__main__":
main()