Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added Assignment-3.pdf
Binary file not shown.
Binary file added Report 12111620 Ding Yixuan.pdf
Binary file not shown.
20 changes: 20 additions & 0 deletions config/gmf_factor8neg4 .yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
experiment_type: gmf
alias: gmf_factor8neg4
num_epoch: 100
batch_size: 1024
optimizer: adam
adam_lr: 0.001
num_users: 6040
num_items: 3706
latent_dim_mf: 8
latent_dim_mlp: 8
num_negative: 4
layers: [16, 64, 32, 16, 8] # layers[0] is the concat of latent user vector & latent item vector
l2_regularization: 0.0000001
weight_init_gaussian: true
use_cuda: true
device_id: 0
pretrain: false
pretrain_mf: checkpoints/gmf_factor8neg4_Epoch100_HR0.6391_NDCG0.2852.model
pretrain_mlp: checkpoints/mlp_factor8neg4_Epoch100_HR0.5606_NDCG0.2463.model
model_dir: checkpoints/{}_Epoch{}_HR{:.4f}_NDCG{:.4f}.model
18 changes: 18 additions & 0 deletions config/mlp_factor8neg4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
experiment_type: mlp
alias: mlp_factor8neg4
num_epoch: 100
batch_size: 256 # 1024
optimizer: adam
adam_lr: 0.001
num_users: 6040
num_items: 3706
latent_dim_mlp: 8
num_negative: 4
layers: [16, 64, 32, 16, 8] # layers[0] is the concat of latent user vector & latent item vector
l2_regularization: 0.0000001 # MLP model is sensitive to hyper params
weight_init_gaussian: True
use_cuda: True
device_id: 0
pretrain: False
pretrain_mf: checkpoints/{}'.format('gmf_factor8neg4_Epoch100_HR0.6391_NDCG0.2852.model
model_dir: checkpoints/{}_Epoch{}_HR{:.4f}_NDCG{:.4f}.model
18 changes: 18 additions & 0 deletions config/mlp_factor8neg4_layer0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
experiment_type: mlp
alias: mlp_factor8neg4_layer0
num_epoch: 200
batch_size: 256 # 1024,
optimizer: adam
adam_lr: 0.001
num_users: 6040
num_items: 3706
latent_dim_mlp: 4
num_negative: 4
layers: [8] # layers[0] is the concat of latent user vector & latent item vector
l2_regularization: 0.0000001 # MLP model is sensitive to hyper params
weight_init_gaussian: True
use_cuda: True
device_id: 0
pretrain: False
pretrain_mf: checkpoints/{}'.format('gmf_factor8neg4_Epoch100_HR0.6391_NDCG0.2852.model
model_dir: checkpoints/{}_Epoch{}_HR{:.4f}_NDCG{:.4f}.model
18 changes: 18 additions & 0 deletions config/mlp_factor8neg4_layer1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
experiment_type: mlp
alias: mlp_factor8neg4_layer1
num_epoch: 100
batch_size: 256 # 1024,
optimizer: adam
adam_lr: 0.001
num_users: 6040
num_items: 3706
latent_dim_mlp: 4
num_negative: 4
layers: [8, 8] # layers[0] is the concat of latent user vector & latent item vector
l2_regularization: 0.0000001 # MLP model is sensitive to hyper params
weight_init_gaussian: True
use_cuda: True
device_id: 0
pretrain: False
pretrain_mf: checkpoints/{}'.format('gmf_factor8neg4_Epoch100_HR0.6391_NDCG0.2852.model
model_dir: checkpoints/{}_Epoch{}_HR{:.4f}_NDCG{:.4f}.model
18 changes: 18 additions & 0 deletions config/mlp_factor8neg4_layer2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
experiment_type: mlp
alias: mlp_factor8neg4_layer2
num_epoch: 100
batch_size: 256 # 1024,
optimizer: adam
adam_lr: 0.001
num_users: 6040
num_items: 3706
latent_dim_mlp: 4
num_negative: 4
layers: [8, 16, 8] # layers[0] is the concat of latent user vector & latent item vector
l2_regularization: 0.0000001 # MLP model is sensitive to hyper params
weight_init_gaussian: True
use_cuda: True
device_id: 0
pretrain: False
pretrain_mf: checkpoints/{}'.format('gmf_factor8neg4_Epoch100_HR0.6391_NDCG0.2852.model
model_dir: checkpoints/{}_Epoch{}_HR{:.4f}_NDCG{:.4f}.model
18 changes: 18 additions & 0 deletions config/mlp_factor8neg4_layer3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
experiment_type: mlp
alias: mlp_factor8neg4_layer3
num_epoch: 100
batch_size: 256 # 1024,
optimizer: adam
adam_lr: 0.001
num_users: 6040
num_items: 3706
latent_dim_mlp: 4
num_negative: 4
layers: [8, 32, 16, 8] # layers[0] is the concat of latent user vector & latent item vector
l2_regularization: 0.0000001 # MLP model is sensitive to hyper params
weight_init_gaussian: True
use_cuda: True
device_id: 0
pretrain: False
pretrain_mf: checkpoints/{}'.format('gmf_factor8neg4_Epoch100_HR0.6391_NDCG0.2852.model
model_dir: checkpoints/{}_Epoch{}_HR{:.4f}_NDCG{:.4f}.model
18 changes: 18 additions & 0 deletions config/mlp_factor8neg4_layer4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
experiment_type: mlp
alias: mlp_factor8neg4_layer4
num_epoch: 100
batch_size: 256 # 1024,
optimizer: adam
adam_lr: 0.001
num_users: 6040
num_items: 3706
latent_dim_mlp: 4
num_negative: 4
layers: [8, 64, 32, 16, 8] # layers[0] is the concat of latent user vector & latent item vector
l2_regularization: 0.0000001 # MLP model is sensitive to hyper params
weight_init_gaussian: True
use_cuda: True
device_id: 0
pretrain: False
pretrain_mf: checkpoints/{}'.format('gmf_factor8neg4_Epoch100_HR0.6391_NDCG0.2852.model
model_dir: checkpoints/{}_Epoch{}_HR{:.4f}_NDCG{:.4f}.model
20 changes: 20 additions & 0 deletions config/neumf_factor8neg4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
experiment_type: neumf
alias: neumf_factor8neg4
num_epoch: 100
batch_size: 1024
optimizer: adam
adam_lr: 0.001
num_users: 6040
num_items: 3706
latent_dim_mf: 8
latent_dim_mlp: 8
num_negative: 4
layers: [16, 64, 32, 16, 8] # layers[0] is the concat of latent user vector & latent item vector
l2_regularization: 0.0000001
weight_init_gaussian: true
use_cuda: true
device_id: 0
pretrain: false
pretrain_mf: checkpoints/gmf_factor8neg4_Epoch100_HR0.6391_NDCG0.2852.model
pretrain_mlp: checkpoints/mlp_factor8neg4_Epoch100_HR0.5606_NDCG0.2463.model
model_dir: checkpoints/{}_Epoch{}_HR{:.4f}_NDCG{:.4f}.model
80 changes: 80 additions & 0 deletions data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import torch
import random
import pandas as pd
from copy import deepcopy
from torch.utils.data import DataLoader, Dataset

class UserItemRatingDataset(Dataset):
def __init__(self, user, item, target) -> None:
super().__init__()
self.user = user
self.item = item
self.target = target

def __getitem__(self, index):
return self.user[index], self.item[index], self.target[index]

def __len__(self):
return self.user.size(0)

class Generator:
def __init__(self, ratings):
self.ratings = ratings
self.preprocess_ratings = self._binarize(ratings)
self.user_pool = set(self.ratings['userId'].unique())
self.item_pool = set(self.ratings['itemId'].unique())
self.negatives = self._negative(self.ratings)
self.train_ratings, self.test_ratings = self._split(self.preprocess_ratings)

# For explicit feedback
def _binarize(self, ratings):
ratings = deepcopy(ratings)
ratings['rating'][ratings['rating'] > 0] = 1.0
return ratings

def _negative(self, ratings):
interact_status = ratings.groupby('userId')['itemId'].apply(set).reset_index().rename(
columns={'itemId': 'interacted_items'}
)
interact_status['negative_items'] = interact_status['interacted_items'].apply(lambda x: self.item_pool - x)
interact_status['negative_samples'] = interact_status['negative_items'].apply(lambda x: random.sample(x, 99))
return interact_status[['userId', 'negative_items', 'negative_samples']]

def _split(self, ratings):
ratings['rank_latest'] = ratings.groupby('userId')['timestamp'].rank(method='first', ascending=False)
test = ratings[ratings['rank_latest'] == 1]
train = ratings[ratings['rank_latest'] > 1]
assert train['userId'].nunique() == test['userId'].nunique()
return train[['userId', 'itemId', 'rating']], test[['userId', 'itemId', 'rating']]

def get_train_loader(self, num_negatives, batch_size):
users, items, ratings = [], [], []
train_ratings = pd.merge(self.train_ratings, self.negatives[['userId', 'negative_items']], on='userId')
train_ratings['negatives'] = train_ratings['negative_items'].apply(lambda x: random.sample(x, num_negatives))
for row in train_ratings.itertuples():
users.append(int(row.userId))
items.append(int(row.itemId))
ratings.append(float(row.rating))
for i in range(num_negatives):
users.append(int(row.userId))
items.append(int(row.negatives[i]))
ratings.append(float(0)) # negative samples get 0 rating

dataset = UserItemRatingDataset(user = torch.LongTensor(users),
item = torch.LongTensor(items),
target = torch.FloatTensor(ratings))

return DataLoader(dataset, batch_size=batch_size, shuffle=True)

@property
def get_evaluate_data(self):
test_ratings = pd.merge(self.test_ratings, self.negatives[['userId', 'negative_samples']], on='userId')
test_users, test_items, negative_users, negative_items = [], [], [], []
for row in test_ratings.itertuples():
test_users.append(int(row.userId))
test_items.append(int(row.itemId))
for i in range(len(row.negative_samples)):
negative_users.append(int(row.userId))
negative_items.append(int(row.negative_samples[i]))
return [torch.LongTensor(test_users), torch.LongTensor(test_items), torch.LongTensor(negative_users), torch.LongTensor(negative_items)]

Loading