-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRL_Sampling.py
More file actions
100 lines (85 loc) · 3.31 KB
/
RL_Sampling.py
File metadata and controls
100 lines (85 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# coding = utf-8
import numpy as np
import torch
import csv
import time as clock
import torch.optim as optim
import MyNet
import glob
import read_dataset
import MyFunction
import meta_learner
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# hyper params
action_number = 4060
state_num = 30
# input data
path = 'members'
name_dict, type_dict, data_dict, poi_density_list = read_dataset.import_data(path)
eps = np.finfo(np.float32).eps.item()
# define reward function
def reward_fn(target, action_index):
target_data = target
select_dict = dict()
temp = 0
action = MyFunction.action_transform_fn(action_index)
for i in range(len(action)):
if action[i] == 1:
select_dict[temp] = data_dict[i]
temp += 1
ori_ta = meta_learner.FOMAML_training(select_dict, 20) # pretraining epochs = 20
MAPE = meta_learner.Testing(target_data, ori_ta, 10) # pretraining epochs = 10
reward = MAPE
return reward
# build state
def build_state(Newcomer, type_dict, poi_density_list):
target_type = type_dict[Newcomer]
target_poi_density = poi_density_list[Newcomer]
# state_poi_density shape(30)
SE = (poi_density_list - target_poi_density) * (poi_density_list - target_poi_density) + eps
state_poi_density = target_poi_density / SE
state_poi_density = torch.reshape(state_poi_density, [len(state_poi_density), 1])
# state_type shape(30)
state_type = np.zeros([30, 1])
for i in range(len(state_type)):
if target_type == type_dict[i]:
state_type[i, :] = 1
state_type = torch.tensor(state_type)
state_type = state_type.to(device)
state = torch.cat((state_poi_density, state_type), dim=1)
return state
# instantiation
actor_net = MyNet.Actor(output_size=action_number).to(device)
actor_optimizer = optim.Adam(actor_net.parameters(), lr=1e-2)
for training_comer in range(state_num):
state = build_state((training_comer), type_dict, poi_density_list)
state = state.to(device)
target_data = data_dict[(training_comer)]
state = torch.reshape(state, [len(state), 2])
R_list = torch.zeros([action_number, 1])
R_list = R_list.to(device)
for action_index in range(action_number):
start_time = clock.time()
print('state:', (training_comer), 'action:', action_index)
# compute reward
temp_reward = reward_fn(target_data, action_index)
actor_net.action_index_list.append(action_index)
reward = 15 - temp_reward # bias = 15
R_list[action_index] = reward
actor_net.reward_list.append(reward)
actor_net.state_list.append((training_comer))
end_time = clock.time()
time = end_time - start_time
print("sample time =", time, 's')
# output sample
sample_state = np.reshape(actor_net.state_list, (-1, 1))
sample_action_index = np.reshape(actor_net.action_index_list, (-1, 1))
sample_reward = np.reshape(actor_net.reward_list, (-1, 1))
sample = np.concatenate((sample_state, sample_action_index, sample_reward), axis=1)
output_list = sample
f = open('sample.csv', 'w', newline='')
csv_writer = csv.writer(f)
for l in output_list:
csv_writer.writerow(l)
print('writing')
f.close()