-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_experiments.py
More file actions
230 lines (190 loc) · 7.92 KB
/
run_experiments.py
File metadata and controls
230 lines (190 loc) · 7.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import time
import subprocess
import os
import json
import argparse
import itertools
import datetime
from shutil import copyfile
from collections import OrderedDict
default_params = {
"mode": "sequential",
"-n": 1,
}
def no_job_running(dry_run=False):
if dry_run:
return True
proc = subprocess.Popen(["bjobs"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
output = proc.stdout.read()
return output == "No unfinished job found\n"
def submit_job(log_name, filename="Distributed_Genetic_Algorithm", n=1, W="00:30", mem=256, program_params="", dry_run=False):
command = "bsub -n {} -W {} -o {}.log -R \"rusage[mem={}]\" mpirun ./{} {}".format(
n, W, log_name, mem, filename, program_params
)
if dry_run:
print(command)
else:
os.system(command)
def param_to_list(param):
if param["type"] == "range":
start = param["min"]
end = param["max"]
step = 1 if "stride" not in param else param["stride"]
return list(range(start, end, step))
elif param["type"] == "list":
return param["list"]
elif param["type"] == "tuple":
return_list = list()
for value in param["values"]:
return_list.append(list(zip(param["names"], value["value"])))
return return_list
def find_param(name, var_names, var_vals, fixed_params, default_params):
# Try in variable params
if name in var_names:
i = var_names.index(name)
return var_vals[i]
# Search in tuples
for tuple_param in filter(lambda x: not len(x) == 0 and x[0] == "tuple", list(zip(var_names, var_vals))):
for name_, value in tuple_param[1]:
if name == name_:
return value
# Try fixed params
if name in fixed_params:
return fixed_params[name]
# Try default params
if name in default_params:
return default_params[name]
print("Could not find parameter {} anywhere!".format(name))
exit(1)
def create_filename(name):
# Replace spaces by underscores
file_name = name.replace(" ", "_")
# Append data and time
timestamp = datetime.datetime.now().strftime("%b_%d_%H%M%S")
return file_name + "_" + timestamp
def generate_job_string(job):
if len(job) == 0:
return "novar"
job_list = list()
for el in job:
# Handle tuples
if isinstance(el, list):
for name, value in el:
job_list.append(str(value))
else:
job_list.append(str(el))
job_str = "_".join(job_list)
return job_str.replace(".", "").replace("-", "")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Running some experiments")
parser.add_argument('--dry_run', default=False, dest="dry_run", action="store_true",
help="Whether to just print the jobs for debugging")
parser.add_argument('--dir', type=str, default="logs", dest="dir",
help="Directory to log to")
parser.add_argument('-e', type=str, default=['experiment.json'], nargs="+", dest="experiment",
help="Which experiment file to run")
args = parser.parse_args()
if not args.dry_run:
print("Compiling... ")
os.system("cmake .")
os.system("make")
print("Done!")
print("Running {} consecutive experiment(s)".format(len(args.experiment)))
job_count = 0
for e, experiment in enumerate(args.experiment):
print("*"*50)
try:
experiment_file = open(experiment, mode="r")
except OSError:
print("Failed to open file {}".format(experiment))
continue
experiments = json.load(experiment_file, object_pairs_hook=OrderedDict)
experiment_file.close()
experiment_name = experiments["name"]
repetitions = experiments["repetitions"]
print("Running Experiment {} from file {} with {} repetitions".format(experiment_name, experiment, repetitions))
# Create folder
experiment_name = create_filename(experiment_name)
experiment_dir = os.path.join(args.dir, experiment_name, "")
if args.dry_run:
print("This will create the folder {}".format(experiment_dir))
else:
if os.path.isdir(experiment_dir):
print("Folder {} already exists... exiting".format(experiment_dir))
exit(1)
else:
print("Logging into folder {}".format(experiment_dir))
os.mkdir(experiment_dir)
copyfile(experiment, experiment_dir + experiment)
# Finished setup
print("*"*50)
# Variable parameters
grid = list()
grid_param = list()
for param_name, param in experiments["variable_params"].items():
param_list = param_to_list(param)
# Handle tuples
if len(param_list) > 0 and isinstance(param_list[0], list):
grid.append(param_list)
grid_param.append("tuple")
else:
grid.append(param_list)
grid_param.append(param_name)
for job_num, job in enumerate(itertools.product(*grid)):
n = find_param("-n", grid_param, job, experiments["fixed_params"], default_params)
mode = find_param("mode", grid_param, job, experiments["fixed_params"], default_params)
program_params = mode + " "
# Use all variable params
for i, el in enumerate(grid_param):
# Skip already handeled parameters
if el == "-n" or el == "mode":
continue
# Handle tuples
if el == "tuple":
for tuple_name, tuple_val in job[i]:
program_params += tuple_name
program_params += " "
program_params += str(tuple_val)
program_params += " "
else:
program_params += el
program_params += " "
program_params += str(job[i])
program_params += " "
# Use all fixed params
for name, param in experiments["fixed_params"].items():
if name == "-n" or name == "mode":
continue
program_params += name
program_params += " "
program_params += str(param)
program_params += " "
for repetition in range(repetitions):
job_count += 1
job_string = generate_job_string(job)
job_string += "_" + str(repetition)
repetition_logging_location = os.path.join(experiment_dir, job_string, "")
if not args.dry_run:
if os.path.isdir(repetition_logging_location):
print("Folder {} already exists... exiting".format(repetition_logging_location))
exit(1)
else:
os.mkdir(repetition_logging_location)
rep_program_params = program_params
rep_program_params += "--log_dir"
rep_program_params += " "
rep_program_params += repetition_logging_location
success = False
while not success:
if no_job_running(args.dry_run):
success = True
log_dir = os.path.join(repetition_logging_location, "leonhard")
submit_job(log_name=log_dir, n=n, program_params=rep_program_params, dry_run=args.dry_run)
# Wait to make sure submitted job is visible
if not args.dry_run:
time.sleep(1)
else:
# Do not retry too much
time.sleep(5)
if args.dry_run:
print("You are about to schedule {} jobs".format(job_count))