-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain.py
More file actions
168 lines (150 loc) · 6.58 KB
/
train.py
File metadata and controls
168 lines (150 loc) · 6.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
from __future__ import division
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch import mean
from torch.autograd import Variable
import yaml
from lib import model, utils
from utils import uneye, test
with open("MD_GAN.yaml") as stream:
try:
config = yaml.load(stream)
except yaml.YAMLError as exc:
print(exc)
# to print easily
def extract(v):
return v.data.storage().tolist()
# ##### Hyper-parameters
## Set GPU ID
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = config['GPU']
## Learning params
lr = float(config['LEARNING_RATE'])
batch_size = config['BATCH_SIZE']
num_epoch = config['EPOCHS']
d_steps = config['D_STEPS']
g_steps = config['G_STEPS']
eps = float(config['epsilon'])
log_every = config['LOG_EVERY']
num_fold = config['NUM_FOLD']
## Model params
# generator
g_input_size = config['G_INPUT_SIZE'] # random noise (z)
g_hidden_size = config['G_HIDDEN_SIZE'] # generator complexity
g_output_size = config['G_OUTPUT_SIZE'] # size of generated output vector
# discriminator
d_input_size = config['D_INPUT_SIZE']
d_hidden_size = config['D_HIDDEN_SIZE'] # discriminator complexity
d_output_size = config['D_OUTPUT_SIZE']
# autoencoder
ae_input_size = config['AE_INPUT_SIZE']
ae_hidden_size = config['AE_HIDDEN_SIZE']
# ##### define the MDGAN model
G = model.Generator(g_input_size, g_hidden_size, g_output_size)
D = model.Discriminator(d_input_size, d_hidden_size, d_output_size)
AE = model.AutoEncoder(ae_input_size, ae_hidden_size)
# save parameter list
theta_D_gan = [D.fc1._parameters['weight'], D.fc1._parameters['bias'],
D.out_gan._parameters['weight'], D.out_gan._parameters['bias']]
theta_D_aux = [D.out_aux._parameters['weight'], D.out_aux._parameters['bias']]
theta_G = [G.fc1._parameters['weight'], G.fc1._parameters['bias'],
G.fc2._parameters['weight'], G.fc2._parameters['bias'],]
# define function for calculating loss function
loss_bce = torch.nn.BCELoss().cuda()
loss_nll = torch.nn.NLLLoss().cuda()
# GPU mode
if config['CUDA'] == True:
G.cuda(), D.cuda(), AE.cuda()
loss_bce.cuda(), loss_nll.cuda()
# define optimizers
G_solver = optim.RMSprop(theta_G, lr=lr)
D_solver = optim.RMSprop(theta_D_gan + theta_D_aux, lr=lr)
AE_solver = optim.Adam(AE.parameters(), lr=lr)
# ##### Load dataset
# define dataloader
load_data = utils.load_data()
# ##### train loop
for ex_fold in range(num_fold):
for in_fold in range(num_fold):
X_train, y_train, X_valid, y_valid, X_test, y_test = next(load_data)
load_minibatch = utils.load_minibatch(X_train, y_train)
num_batch = int(np.ceil(np.shape(X_train)[0]/batch_size))
for epoch in range(num_epoch):
for batch in range(num_batch):
# load data batch
x_mb, y_mb, z_mb, zy_mb = next(load_minibatch)
X_real = Variable(x_mb).cuda() # input features of real data
y = Variable(y_mb).cuda() # class targets of real data
z = Variable(z_mb, volatile=True).cuda() # inference mode
z_y = Variable(zy_mb, volatile=True).cuda()
# real & fake labels
y_real = Variable(torch.ones(y.size()[0]).unsqueeze(1)).cuda()
y_fake = Variable(torch.zeros(y.size()[0]).unsqueeze(1)).cuda()
## Discriminator
for d_step in range(d_steps):
for p in D.parameters(): # reset requires_grad
p.requires_grad = True # they are set to False below in netG update
D.train(True)
D.zero_grad()
# generate fake data
G_z = G(z, z_y)
X_fake = Variable(G_z.data).cuda() # volatile = False
# forward
D_real, C_real = D(X_real) # model output
D_fake, C_fake = D(X_fake)
# calculate accuracy
_, pred_real = torch.max(C_real.data, 1)
_, pred_fake = torch.max(C_fake.data, 1)
total = y.size(0)#*2 # calc the number of examples
y_c = uneye(y, 'pred')
correct = torch.sum(pred_real == y_c.data)
#correct += torch.sum(pred_fake == y_c.data)
train_acc = correct/total * 100
# loss
D_real_loss = loss_bce(D_real, y_real)
D_fake_loss = loss_bce(D_fake, y_fake)
#D_real_loss = -mean(D_real + eps) # WGAN loss
#D_fake_loss = mean(D_fake + eps)
C_real_loss = loss_nll(C_real, y_c)
C_fake_loss = loss_nll(C_fake, y_c)
DC_real_loss = D_real_loss + C_real_loss
DC_fake_loss = D_fake_loss + C_fake_loss
# backprop & update params : split real and fake loss (GAN Hack)
DC_real_loss.backward()
D_solver.step()
DC_fake_loss.backward()
D_solver.step()
# weight clipping
#for p in theta_D_gan:
# p.data.clamp_(-.01, .01)
## Generator
for g_step in range(g_steps):
for p in D.parameters():
p.requires_grad = False # to avoid computation
G.zero_grad()
# generate fake data
z.volatile = False
X_fake = G(z, y)
# forward
D_real, C_real = D(X_real) # model output
D_fake, C_fake = D(X_fake)
# loss
C_real_loss = loss_nll(C_real, y_c) # cross entropy aux loss
C_fake_loss = loss_nll(C_fake, y_c)
#G_loss = -mean(D_fake + eps) # WGAN loss
G_loss = loss_bce(D_fake, y_real)
GC_loss = G_loss + C_real_loss + C_fake_loss
# backprop & update params
GC_loss.backward()
G_solver.step()
if epoch % log_every == 0:
# calc test accuracy
test_acc = test(X_test, y_test, D)
D_loss = D_real_loss + D_fake_loss
C_loss = C_real_loss + C_fake_loss
print('epoch: %s; D: %s; G: %s; C: %s; train_acc: %.1f; test_acc: %.1f'
% (epoch, extract(D_loss)[0], extract(G_loss)[0], extract(C_loss)[0],train_acc, test_acc))