-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPullGenreData.py
More file actions
executable file
·104 lines (84 loc) · 3.59 KB
/
PullGenreData.py
File metadata and controls
executable file
·104 lines (84 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import pylast
# import numpy as np
import datetime
import json
import pymongo
with open('./credentials.json') as data_file:
creds = json.load(data_file)
API_KEY = creds['API_KEY']
API_SECRET = creds['API_SECRET']
# In order to perform a write operation you need to authenticate yourself
username = creds['username']
password_hash = creds['password_hash']
# year = 2019
network = pylast.LastFMNetwork(api_key=API_KEY, api_secret=API_SECRET,
username=username, password_hash=password_hash)
user = network.get_user(username)
user_registered_time = user.get_unixtime_registered()
client = pymongo.MongoClient(creds['mongo_server'])
db = client.musicdb
#to_csv = pd.DataFrame(columns=['artist', 'album', 'track', 'listen_date'])
artists = db.artists
tracks = db.tracks
# start_date = user_registered_time
# start_date = datetime.date(2019, 7, 1).strftime('%s')
start_date = tracks.find().sort([('listen_date', -1)]).limit(1)[0]['listen_date'].strftime('%s')
end_date = datetime.datetime.now().strftime('%s')
# end_date = datetime.date(2019, 7, 26).strftime('%s')
total_tracks = []
recent_tracks = []
# recent_tracks = user.get_recent_tracks(limit=1000, time_from=start_date, time_to=end_date)
# total_tracks.extend(recent_tracks)
while (True):
recent_tracks = user.get_recent_tracks(
limit=1000, time_from=start_date, time_to=(recent_tracks[-1].timestamp if recent_tracks else end_date))
if (not recent_tracks):
break
total_tracks.extend(recent_tracks)
# print(recent_tracks[len(recent_tracks)-1])
print(datetime.datetime.utcfromtimestamp(int(recent_tracks[-1].timestamp)).strftime('%D %H:%M:%S'))
print(len(total_tracks))
artist_list = artists.distinct('name')
track_list = list(tracks.aggregate( [ {"$group": { "_id": { 'track':"$track", 'listen_date': "$listen_date" } } } ] ));
for index, t in enumerate(total_tracks):
artist_name = t.track.artist.name
track_playback_date = datetime.datetime.strptime(t.playback_date, '%d %b %Y, %H:%M')
if artist_name not in artist_list:
artist_list.append(artist_name)
genre_list = t.track.artist.get_top_tags(limit=10)
genres = [g.item.get_name() for g in genre_list]
artists.insert_one({
'name': artist_name,
'genres': genres
})
print('Added %s to database.' % artist_name)
else:
print('%s already present in the database.' % artist_name)
try:
track_index = track_list.index({'_id': {'track': t.track.title, 'listen_date': track_playback_date}})
except ValueError:
track_index = False;
if track_index is False:
track_list.append({ "_id": { 'track': t.track.title, 'listen_date': track_playback_date} })
print('Adding %s, listened to on %s.' % (t.track.title, t.playback_date))
tracks.insert_one({
'artist': artist_name,
'album': t.album,
'track': t.track.title,
'listen_date': track_playback_date
})
else:
print('Artist %s play at %s already recorded in database.' % (artist_name, t.playback_date))
# if artist_name not in added_artists:
# added_artists.append(artist_name)
# artist_list.append({
# 'name': artist_name,
# 'genres': genres
# })
# to_csv.loc[index] = {
# 'artist': artist_name,
# 'album': t.album,
# 'track': t.track.title,
# 'listen_date': t.playback_date
# }
# to_csv.to_csv('/Users/nick/Dropbox (Personal)/oxfordcomma.github.io/{0}.csv'.format(datetime.datetime.now().strftime('%d%b%Y_%H%M%S')), header=True, index=False)