-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSubiSearch.py
More file actions
159 lines (141 loc) · 6.1 KB
/
SubiSearch.py
File metadata and controls
159 lines (141 loc) · 6.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python
import os, sys, re, time, random
import sqlite3
#import pandas as pd
from collections import Counter
database_file = 'dbTools/subibox.sqlite'
conn = sqlite3.connect(database_file)
conn.row_factory = sqlite3.Row
def timing(f):
def wrap(*args):
time1 = time.time()
ret = f(*args)
time2 = time.time()
print('{} function took {:0.3f} ms'.format(f.__name__, (time2-time1)*1000.0))
return ret
return wrap
class Search():
def __init__(self, verbose=True):
self.verbose = verbose
def get_artist_albums(self, artist_id):
try:
artist_id = int(artist_id)
except ValueError:
print("SubiSearch.Search.get_artist_albums() [Warning] Value: {} doesn't look like an artist id.".format(artist_id))
cursor = conn.execute("""\
SELECT id, full_album_name, album_year, album_path, album_art
FROM albums
WHERE artist_id = ?
ORDER BY album_year
""", (artist_id,))
return cursor.fetchall()
#@timing
def artist_search(self,query_list):
print("Searching artists, query_list has {} items in it".format(len(query_list)))
result = []
new_query_list = query_list
for query in list(query_list):
"""
(1) Look for any match (%string%)
(2) Look for leading match (string%)
(3) Look for exact match (string)
If there are no matches found at any step, the string is removed from the
query list (and the new truncated query_list is returned along with the
query results)
"""
time1 = time.time()
# -(1)- Look for a like match (10 pts)
cursor = conn.execute("""\
SELECT a.id, a.dial_compatible_artist_name, a.full_artist_name
FROM artist_search_strings as ass
LEFT JOIN artists a
ON ass.artist_id = a.id
WHERE ass.search_string
LIKE ?
--GROUP BY a.full_artist_name
""", ('%'+query+'%',))
row = cursor.fetchall()
n_rows = len(row)
if n_rows == 0:
new_query_list.remove(query)
continue
for i in range(n_rows):
artist_id = row[i][0]
artist_full_name = row[i][2]
result.append([artist_full_name, 10, artist_id])
time2 = time.time()
#print('Any (*) Matches took {:0.3f} ms'.format((time2-time1)*1000.0))
time1 = time.time()
# -(2)- Look for an close match (20 pts)...
cursor = conn.execute("""\
SELECT id, dial_compatible_artist_name, full_artist_name
FROM artists
WHERE dial_compatible_artist_name LIKE ?
--GROUP BY full_artist_name
""", (query+'%',))
row = cursor.fetchall()
n_rows = len(row)
for i in range(n_rows):
artist_id = row[i][0]
artist_full_name = row[i][2]
result.append([artist_full_name, 20, artist_id])
time2 = time.time()
#print('Close (^) Matches took {:0.3f} ms'.format((time2-time1)*1000.0))
# -(3)- Look for an exact match (100 pts)...
time1 = time.time()
cursor = conn.execute("""\
SELECT id, dial_compatible_artist_name, full_artist_name
FROM artists
WHERE dial_compatible_artist_name = ?
--GROUP BY full_artist_name
""", (query,))
row = cursor.fetchall()
n_rows = len(row)
for i in range(n_rows):
artist_id = row[i][0]
artist_full_name = row[i][2]
result.append([artist_full_name, 100, artist_id])
time2 = time.time()
#print('Exact Matches took {:0.3f} ms'.format((time2-time1)*1000.0))
if len(result) > 0:
time1 = time.time()
# result looks like this...
#['name', 'score', 'id'])
# ...but we need to group duplicate scores. We use a Counter() do to this
# and make a dict to connect names and ids so we can remake result as
# new_result, which has the same columns, but no duplicates.
c = Counter()
id_map = {}
for artist_name, score, artist_id in result:
id_map[artist_name] = artist_id
c.update({artist_name: score})
sorted_grouped_results = []
for artist_name, score in sorted(c.items(), key=lambda x: x[1], reverse=True):
d = {
'name' : artist_name,
'score' : score,
'id' : id_map[artist_name]
}
sorted_grouped_results.append(d)
# Sort list by second column (score)...
time2 = time.time()
#print('Sorting took {:0.3f} ms'.format((time2-time1)*1000.0))
sorted_grouped_results.sort(key=lambda x:(x['score'], random.random()), reverse=True)
#print("Possible Matches:\n{}".format(sorted_grouped_results))
#print("Old Query List:\n{}".format(query_list))
#print("Query List:\n{}".format(new_query_list))
return sorted_grouped_results, new_query_list
else:
return None, new_query_list
def get_album_colors(self, album_id):
try:
album_id = int(album_id)
except ValueError:
print("SubiSearch.Search.get_album_colors() [Warning] Value: {} doesn't look like an artist id.".format(album_id))
cursor = conn.execute("""\
SELECT color, color_sum
FROM album_colors
WHERE album_id = ?
ORDER BY color_sum
""", (album_id,))
return cursor.fetchall()