forked from bjeong16/MatrixCompletion
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrecommender_using_Surprise.py
More file actions
92 lines (62 loc) · 2.5 KB
/
recommender_using_Surprise.py
File metadata and controls
92 lines (62 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import surprise
import time
from surprise.model_selection import train_test_split
from surprise import SVD
from surprise import accuracy
def readFile(path):
data_file = open(path)
return data_file
def makeCustomDataFile(data_file):
# combined_data_1.txt에는 4499개의 movieID가 저장되어있음.
custom_data_file = open("/Users/limjungmin/Netflix_Recommender/u.data", 'w')
#cnt = 0 : 디버깅용 Count 계수
for line in data_file:
if ":" in line:
movieID = line.split(":")[0]
#print(movieID)
#cnt+=1
else :
info = line.split(",")
userID = info[0]
rating = info[1]
date = info[2].split('\n')[0]
str = userID + ";" + movieID + ";" + rating + "\r\n"
custom_data_file.write(str)
#if cnt > 50 : break
print("make Custom Data File Done")
reader = surprise.Reader(line_format='user item rating', sep=';')
data = surprise.Dataset.load_from_file('/Users/limjungmin/Netflix_Recommender/u.data', reader=reader)
df = pd.DataFrame(data.raw_ratings, columns=["user", "item", "rate", "id"])
del df["id"]
print(df.head(10))
return data
def train_custom_data_file(data, algo):
trainset, testset = train_test_split(data, test_size=.25)
algo.fit(trainset)
predictions = algo.test(testset)
return predictions
def get_accuracy(predictions):
return accuracy.rmse(predictions)
if __name__ == '__main__':
# 1. 데이터 파일 읽어오기
start_time = time.time()
data_file = readFile("/Users/limjungmin/Netflix_Recommender/netflix-prize-data/combined_data_1.txt")
run_time = time.time() - start_time
print ( " Run time for readFile : %.4f (sec)" % (run_time) )
# 2. Surprise 패키지에 활용할 수 있도록 데이터 전처리
start_time = time.time()
custom_data_file = makeCustomDataFile(data_file)
run_time = time.time() - start_time
print ( " Run time for makeCustomDataFile : %.4f (sec)" % (run_time) )
# 3. 사용할 알고리즘(SVD)를 통한 학습 진행
start_time = time.time()
predictions = train_custom_data_file(custom_data_file, algo = SVD())
run_time = time.time() - start_time
print ( " Run time for train_custom_data_file : %.4f (sec)" % (run_time) )
# 4. 예측 결과를 가지고 RMSE 측정값 구하기
get_accuracy(predictions)