-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript_simulation.py
More file actions
105 lines (82 loc) · 3.84 KB
/
script_simulation.py
File metadata and controls
105 lines (82 loc) · 3.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import pandas as pd
import itertools as itt
import ppinetsim
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import wasserstein_distance
import networkx as nx
import seaborn as sns
from os.path import join
import time
import os
from joblib import Parallel, delayed
import json
import codecs
##### create json input files #######
seq_FP = [0.0,0.003125,0.00625,0.0125,0.025,0.05,0.1,0.2,0.3,0.4]
seq_FN = [0.0,0.1,0.2,0.3,0.4]
par = ['params_AP-MS_FPR00.json','params_Y2H_FPR00.json']
for p in par:
fp = open('parameter_settings/'+p)
data = json.load(fp)
fp.close()
for FP in seq_FP:
for FN in seq_FN:
for a in [0.0,0.5]:
data['false_negative_rate'] = FN
data['false_positive_rate'] = FP
data['acceptance_threshold'] = a
# create folder
d = 'parameter_settings/all_param_combinations/'+ data['test_method']
if os.path.exists(d) == False:
os.mkdir(d)
with open(d+'/params_'+data['test_method'] + '_accTh'+str(data['acceptance_threshold']).replace('.','')+'_FPR' + str(data['false_positive_rate']).replace('.','')+'_FNR'+ str(data['false_negative_rate']).replace('.','')+'.json', "w") as outfile:
json.dump(data, outfile, indent = 4)
print('parameters files done!')
###### functions that runs the simulation with Parallel #####
def simulation_forParallel(m,f,nsg):
"""Runs the simulation for several combinations of parameters.
Parameters
----------
m = method
f = file of parameters
nsg = number of simulations for generator
Returns
-------
save outputs of estimate_likelihood in csv and json files
"""
parameters = ppinetsim.Parameters('parameter_settings/all_param_combinations/'+m+'/'+f)
print(f)
if os.path.exists('output_results/'+str(parameters.test_method)) == False:
os.mkdir('output_results/'+str(parameters.test_method))
d = 'output_results/'+str(parameters.test_method)+'/accTh' + str(parameters.acceptance_threshold).replace('.','') + '_FPR'+ str(parameters.false_positive_rate).replace('.','') + '_FNR'+ str(parameters.false_negative_rate).replace('.','')
if(os.path.exists(d)) == False:
os.mkdir(d)
likelihood_at_k, all_results = ppinetsim.estimate_posteriors(parameters, num_simulations_per_generator=nsg)
likelihood_at_k.to_csv(d+'/likelihood_'+parameters.test_method + '_'+ 'accTh' + str(parameters.acceptance_threshold).replace('.','') + '_FPR'+ str(parameters.false_positive_rate).replace('.','') + '_FNR'+ str(parameters.false_negative_rate).replace('.','') +'.csv', index = False)
#save results in json
results4json=[]
for result in all_results:
temp=list(result)
temp[2]=[list(temp[2][0]),list(temp[2][1])]
results4json.append(tuple(temp))
json.dump(results4json, codecs.open(d+'/all_results_'+parameters.test_method + '_'+ 'accTh' + str(parameters.acceptance_threshold).replace('.','') + '_FPR'+ str(parameters.false_positive_rate).replace('.','') + '_FNR'+ str(parameters.false_negative_rate).replace('.','')+'.json', 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True, indent=4)
#--------------------------------------------------------------------------------------------
# run simulations for different methods (AP-MS and Y2H), FPR, FNR and acceptance_threshold
# create the output_results folder
if os.path.exists('output_results') == False:
os.mkdir('output_results')
method = ['AP-MS','Y2H']
# set the number of simulations for generator
nsg = 50
# set the number of cores to use for the parallel processing
jobs = 8
start_time = time.time()
for m in method:
dir_parameters = 'parameter_settings/all_param_combinations/'+ m +'/'
print(m)
files = os.listdir(dir_parameters)
print(files)
Parallel(n_jobs = jobs)(delayed(simulation_forParallel)(m,f,nsg) for f in files)
print(time.time() - start_time)
print('Done!')