-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathregression_plotter.py
More file actions
114 lines (92 loc) · 4.95 KB
/
regression_plotter.py
File metadata and controls
114 lines (92 loc) · 4.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from pylab import *
import os
taus = 10.**np.arange(-3,6)
lbdas = 100.**np.arange(-3,2)
splits = range(20)
lrs = ['.01', '.001']
models = ['BHN', 'MCD'] #TODO
models = ['BHN_flow=IAF_coupling=4', 'MCD_drop_prob=.01'] #TODO
dats = ['airfoil', 'parkinsons']
dats = ['naval', 'kin8nm', 'power']#, 'parkinsons']
"""
algorithm:
for each (model, dataset):
for each split
"""
dd = '/home/capybara/BayesianHypernetCW/launchers/'
dd = '/data/lisatmp4/kruegerd/CLUSTER_RESULTS/mp2/OLD/'
dd += 'launch_dev_regression.py/'
dd = '/data/lisatmp4/kruegerd/launch_test_regression_large.py'
vap = [p for p in os.listdir(dd) if 'FINAL_va_LL' in p]
var = {p: float(p.split('LL=')[1]) for p in vap}
tep = [p for p in os.listdir(dd) if 'FINAL_te_LL' in p]
ter = {p: float(p.split('LL=')[1]) for p in tep}
pp = vap + tep
print "done loading"
#pp = [p for p in os.listdir(dd) if 'FINAL_va_LL' in p]
import time
t0 = time.time()
bests = {} # for all splits
avg_bests = {} # averaged across splits
# Here, we find the best hparams for each (dataset, model) INDEPENDENTLY for each split (to avoid hyperoptimization on test set)
# We average the test performances for each (dataset, model), using the hyperparameters that performed best on the valid set OF THAT SPLIT
# This gives us a fair estimate of the performance of the combined (model, hyperoptimization) algorithm.
# In our current case, hopt is just grid_search.
for dat in dats:
for model in models:
bests[(dat,model)] = {}
va_bests = []
te_bests = []
for split in splits:
# find the best results for this model on this dataset, FOR EACH SPLIT
va_best = -np.inf
te_best = -np.inf
va_best_ss = ''
for tau in taus:
for lbda in lbdas:
for lr in lrs:
#ss = '/home/capybara/BayesianHypernetCW/launchers/launch_dev_regression.py/'
ss = 'slurm_script___dataset=' + dat + '_split=' + str(split)+ '_lr0=' + str(lr) + '_epochs=400_lbda=' + str(lbda) + '_model=' + model + '_tau=' + str(tau) + '_FINAL_va_LL='
#ss = 'slurm_script___lr0=.01_lbda=1e-06_dataset=power_split=9_epochs=400_model=MCD_drop_prob=.01_tau=100.0_te_LLs'
ss = 'slurm_script___lr0=' + str(lr) + '_lbda=' + str(lbda) + '_dataset=' + dat + '_split=' + str(split) + '_epochs=400' + '_model=' + model + '_tau=' + str(tau) #+ '_FINAL_va_LL='
strs = [p for p in pp if p.startswith(ss)]
strs = sorted(strs)#[p for p in vap if p.startswith(ss)]
if len(strs) > 0:
print strs
va_LL = float(strs[1].split('FINAL_va_LL=')[1])
if va_LL > va_best:
va_best = va_LL
va_best_ss = ss
te_best = float(strs[0].split('FINAL_te_LL=')[1])
#te_best = float([p for p in tep if p.startswith(ss)][0].split('te_LL=')[1])
else:
print ss
#res = float([p for p in vap if p.startswith(ss)][0].split('va_LL=')[1])
va_bests.append(va_best)
te_bests.append(te_best)
# save best
bests[(dat,model)][split] = (va_best, te_best, va_best_ss)
avg_bests[(dat,model)] = (mean(va_bests), mean(te_bests))
"""
ss = 'regression.py___dataset=' + dat + '_split=' + str(split)+ '_lr0=' + str(lr) + '_epochs=400_lbda=' + str(lbda) + '_model=' + model + '_tau=' + str(tau) + '_FINAL_va_LL='
#/data/lisatmp4/kruegerd/CLUSTER_RESULTS/mp2/OLD/launch_dev_regression.py/regression.py___dataset=airfoil_split=0_lr0=.01_epochs=400_lbda=0.01_model=MCD_drop_prob=.01_tau=1000.0_FINAL_va_LL=-5468571.749
#/data/lisatmp4/kruegerd/CLUSTER_RESULTS/mp2/OLD/launch_dev_regression.py/regression.py___dataset=airfoil_split=0_lr0=.01_epochs=400_lbda=0.01_model=MCD_drop_prob=.01_FINAL_va_RMSE=124.214
strs = [p for p in pp if p.startswith(ss)]
if len(strs) > 0:
print strs
else:
print ss
va_LL = float(strs[0].split('FINAL_va_LL=')[1])
if va_LL > va_best:
va_best_ss = ss
va_best = va_LL
te_best = float(strs[0].split('FINAL_te_LL=')[1])
#print '\t\t\t' + ss
#import ipdb; ipdb.set_trace()#os.
"""
print avg_bests
print time.time() - t0
# TODO: plotting (I'll want to load results above and average across splits... *sigh*
if 0:
figure(); suptitle('RMSE ' + dat)
figure(); suptitle('LL ' + dat)