-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathparallel_sim.py
More file actions
76 lines (67 loc) · 2.46 KB
/
parallel_sim.py
File metadata and controls
76 lines (67 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import multiprocessing as mp
import numpy as np
import random
import copy
import time
#TODO: create different copy of env.
#TODO: fix one set of strategies.
#TODO: check [nn]*num_episodes
#TODO: nn should change to mixed strategy
#TODO: API has been changed.
def parallel_sim(env, nn_att, nn_def, num_episodes):
G_list, att_list, def_list = copy_env(env, num_episodes)
arg = list(zip(G_list,att_list,[nn_att]*num_episodes,def_list,[nn_def]*num_episodes,[env.T]*num_episodes))
with mp.Pool() as pool:
r = pool.map_async(single_sim, arg)
a =r.get()
return np.sum(np.array(a),0)/num_episodes
def single_sim(param): #single for single episode.
# TODO: APIs have been changed.
aReward = 0
dReward = 0
G, attacker, nn_att, defender, nn_def, T = param
for t in range(T):
timeleft = T - t
attacker.att_greedy_action_builder_single(G, timeleft, nn_att)
att_action_set = attacker.attact
defender.def_greedy_action_builder_single(G, timeleft, nn_def)
def_action_set = defender.defact
for attack in att_action_set:
if isinstance(attack, tuple):
# check OR node
aReward += G.edges[attack]['cost']
if random.uniform(0, 1) <= G.edges[attack]['actProb']:
G.nodes[attack[-1]]['state'] = 1
else:
# check AND node
aReward += G.nodes[attack]['aCost']
if random.uniform(0, 1) <= G.nodes[attack]['actProb']:
G.nodes[attack]['state'] = 1
# defender's action
for node in def_action_set:
G.nodes[node]['state'] = 0
dReward += G.nodes[node]['dCost']
_, targetset = get_Targets(G)
for node in targetset:
if G.nodes[node]['state'] == 1:
aReward += G.nodes[node]['aReward']
dReward += G.nodes[node]['dPenalty']
return aReward, dReward
def get_Targets(G):
count = 0
targetset = set()
for node in G.nodes:
if G.nodes[node]['type'] == 1:
count += 1
targetset.add(node)
return count,targetset
def copy_env(env, num_episodes):
G_list = []
att_list = []
def_list = []
env.reset_everything()
for _ in np.arange(num_episodes):
G_list.append(env.G_reserved.copy())
att_list.append(copy.deepcopy(env.attacker))
def_list.append(copy.deepcopy(env.defender))
return G_list, att_list, def_list