MatrixCompletion/SVD.py at master · bjeong16/MatrixCompletion · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Implementation of Stochastic Gradient Descent on SparseMatrix
# Parameters: Array X of size [n_samples, x_features] , Array Y of size [n_samples] (holds target values)

import numpy as np
from sklearn import linear_model
from scipy import sparse
from sklearn.linear_model import SGDClassifier


class vector:
    user_vector = [0.1] * 400000
    movie_vector = [0.1] * 17700

def sgd(a, b, c):

    lrate = 0.0001
    init_feature = 0.01

    matrix_data = sparse.coo_matrix((c, (a, b)))
    matrix_data = sparse.csr_matrix(matrix_data)

    maxA = max(a)
    maxB = max(b)
    globalAverage = global_average(matrix_data, maxA, maxB)
    print globalAverage
    for i in range((max(a))):
        for j in range((max(b))):
            avgRow = getAvg(matrix_data[i,:])
            avgCol = getAvg(matrix_data[:,j])
            offset = predictRating(i, j, matrix_data, globalAverage)
            print matrix_data[i,j]
            err = lrate * (matrix_data[i,j] - offset)
            uv = vector.user_vector[j]
            vector.user_vector[j] += lrate * (err * vector.movie_vector[i] - 0.02 * uv)
            vector.movie_vector[i] += lrate * (err * uv - 0.02 * vector.movie_vector[i])
            print "Feature 1 of userVector of user " + str(j) + " is " + str(vector.user_vector[j])
            print "Feature 1 of movieVector of movie " + str(i) + ' is ' + str(vector.movie_vector[i])


def global_average(matrix, a, b):
    average = 0
    count = 0
    for i in range(a):
        for j in range(b):
            if matrix[i,j] != 0:
                average += matrix[i,j]
                count += 1
            else:
                continue

    return float(average) / count

def predictRating(movie, user, matrix_data, globalAverage):
    row_num = -1
    average = 0
    count = 0
    print matrix_data[:,user].data
    for x in matrix_data[:,user].data:
        row_num += 1
        if x is not 0:
            movie_avg = getbetterAvg(matrix_data[row_num,:], globalAverage)
            average += (movie_avg - x)
            count += 1
        else:
            continue

    return float(average) / count

# Returns the value of the dot product for a given row and column

def dotproduct(row_array, col_array, num_features):
    total_dot_product = 0
    for x in range(num_features):
         total_dot_product += row_array[x] + col_array[x]

    return total_dot_product

# given a vector, find the average rating in that particular vector (can be either movie vector or user vector)

def getAvg(vector):
    total = 0
    count = 0

    for x in vector.data:
        if x is not 0:
            total += x
            count += 1

        else:
            continue

    average = float(total) / count
    return average

def getbetterAvg(vector, glo_average):

    total = 0
    count = 0

    for x in vector.data:
        if x is not 0:
            total += x
            count += 1

        else:
            continue

    k = 25

    betterAverage = (glo_average * k + total) / (k + count)
    return betterAverage