-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompute_dft_cca.py
More file actions
86 lines (56 loc) · 2.62 KB
/
compute_dft_cca.py
File metadata and controls
86 lines (56 loc) · 2.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
''' Turning run_dft_cca to handle something more general - eg. layer 0 and 34 for epoch 10 and 150'''
''' Works for activations from warmup too '''
import numpy as np
import time
import cca_core
import dft_ccas
import os
import os.path
import ipdb
'''
'results/15420174015569954LB_stepdecay_largelr/epoch_20.t7',
'results/15419858381338964SB_stepdcy_small_lr/iter_199.t7',
'results/15419858030298254LB_warmup_largelr/epoch_init.t7',
'''
path = '/mlodata1/gotmare/sgdr/results/15419858030298254LB_warmup_largelr/activations_newest'
mode = 'diagonal' #'custom col'
os.chdir(path)
save_flag = False
epochA = 'epoch_init.t7'
epochB = 'epoch_20.t7'#str(150) #str(150)
#epochB = 'iter_199.t7'
store_results = path + '/resnet18_comparing_' + epochA + '_and_' + epochB
if not os.path.isdir(store_results):
os.mkdir(store_results)
n_bigdata = 10000
N_layers = 61
completed_set = ['0'] #numbering starts from 1 for this set
layer_set = [str(x) for x in range(N_layers) if str(x) not in completed_set]
def stack_minibatches(epoch, layer):
act_path_list = [epoch + '/layer_' + layer + '_batch_' + str(batch_id) + '.npy' for batch_id in range(20)]
act_list = [np.load(act_path) for act_path in act_path_list]
L = np.vstack(act_list)
if int(layer) > 59:
L = np.expand_dims(L,axis = 2)
L = np.expand_dims(L,axis = 3)
return L
for base_layer in (layer_set[2]):
print('On Layer: '+ base_layer)
if not os.path.isdir(store_results + '/layer_' + base_layer + '_and_X'):
os.mkdir(store_results + '/layer_' + base_layer + '_and_X' )
second_cand_list = [str(x) for x in reversed(range(int(base_layer),N_layers))]
layers_to_compare = [(base_layer,x) for x in second_cand_list]
for pair_ind, (layerA,layerB) in enumerate(layers_to_compare):
save_path = store_results + '/layer_' + base_layer + '_and_X' + \
'/approximate_' + str(n_bigdata) + '_dftcca_models_' + \
layerA + '_and_' + layerB + '.df'
if os.path.isfile(save_path):
print('cca o/p file for this pair already exists, skipping ahead')
L1 = stack_minibatches(epochA, layerA)
L2 = stack_minibatches(epochB, layerB)
#ipdb.set_trace()
L1 = np.transpose(L1,(0,2,3,1)) ; L2 = np.transpose(L2,(0,2,3,1))
df_output = dft_ccas.fourier_ccas(L1, L2)
df_output.to_pickle(save_path)
print('Done computing CCA for pair: ' + str(pair_ind) +
'consisting of: layer ' + epochA + ' and layer ' + epochB)