-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
70 lines (64 loc) · 2.29 KB
/
utils.py
File metadata and controls
70 lines (64 loc) · 2.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import csv
import itertools
import random
import numpy as np
def read_user_list_file(file_):
ratings = []
with open(file_, 'r') as csvfile:
reader = csv.reader(csvfile, dialect='unix')
for row in reader:
ratings.append(row)
return ratings
def group_ratings_by_key(ratings, key):
ratings_grouped = {}
ratings = sorted(ratings, key=lambda x: x[key])
for grouper, group in itertools.groupby(ratings, lambda x: x[key]):
list_ = [itertools.chain(x[:key] + x[key+1:]) for x in group]
ratings_grouped[grouper] = dict(list_)
return ratings_grouped
def index(dict_):
index = {}
for i, k in enumerate(dict_):
index[k] = i
return index
def ratings_indices(ratings):
coords = []
values = []
for rating in ratings:
coords.append([rating[0], rating[1]])
values.append(rating[2])
return (coords, values)
def _setup_for_interactive(userfile):
def _set_of_variables(key):
grouped = group_ratings_by_key(ratings, key)
table = index(grouped)
list_ = [k for k in table.keys()]
return grouped, table, list_
def calculate_mean_ratings(grouped_ratings):
means = []
for pair in grouped_ratings.values():
means.append([np.mean([r for r in pair.values()]) - global_mean])
return np.array(means)
ratings = read_user_list_file(userfile)
temp_ratings = [] #ignore any rows without rating
for r in ratings:
if len(r[2]) > 0:
temp_ratings.append([int(float(r[0])), r[1], float(r[2])])
ratings = temp_ratings
del temp_ratings
ratings_by_game, table_games, list_games = _set_of_variables(0)
ratings_by_user, table_users, list_users = _set_of_variables(1)
ratings_indexed = [
[table_games[t[0]], table_users[t[1]], t[2]] for t in ratings]
indices, values = ratings_indices(ratings_indexed)
shape = np.array([len(list_games), len(list_users)])
global_mean = np.mean(values)
means_game = calculate_mean_ratings(ratings_by_game)
means_user = calculate_mean_ratings(ratings_by_user)
return (
ratings,
ratings_by_game, table_games, list_games, means_game,
ratings_by_user, table_users, list_users, means_user,
indices, values, shape,
global_mean,
)