-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathavailable_solvers.py
More file actions
107 lines (70 loc) · 4.16 KB
/
available_solvers.py
File metadata and controls
107 lines (70 loc) · 4.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from functools import partial
from solvers import (ValueIterationPolicy,
PrioritizedValueIterationPolicy,
PolicyIterationPolicy,
RtdpPolicy,
MultiagentRtdpPolicy,
IdPolicy,
OnlineReplanPolicy)
from solvers.ma_rtdp import ma_rtdp_merge
from solvers.rtdp import (local_views_prioritized_value_iteration_min_heuristic,
local_views_prioritized_value_iteration_sum_heuristic,
dijkstra_min_heuristic,
dijkstra_sum_heuristic,
solution_heuristic_min,
solution_heuristic_sum,
rtdp_dijkstra_sum_heuristic,
rtdp_dijkstra_min_heuristic,
stop_when_no_improvement_between_batches_rtdp_merge, )
# Heuristics ##########################################################################################################
# TODO: the fact that I'm passing gamma here is a hack, perhaps the heuristic functions should receive gamma and env.
pvi_min_h = partial(local_views_prioritized_value_iteration_min_heuristic, 1.0)
pvi_sum_h = partial(local_views_prioritized_value_iteration_sum_heuristic, 1.0)
dijkstra_min_h = dijkstra_min_heuristic
dijkstra_sum_h = dijkstra_sum_heuristic
rtdp_dijkstra_min_h = partial(rtdp_dijkstra_min_heuristic, 1.0, 500)
rtdp_dijkstra_sum_h = partial(rtdp_dijkstra_sum_heuristic, 1.0, 500)
solution_heuristic_min_h = solution_heuristic_min
solution_heuristic_sum_h = solution_heuristic_sum
# ID Low Level Mergers ################################################################################################
DEFAULT_LOW_LEVEL_MERGER = None
rtdp_sum_merger = partial(stop_when_no_improvement_between_batches_rtdp_merge,
solution_heuristic_sum_h,
100,
10000)
ma_rtdp_sum_merger = partial(ma_rtdp_merge,
solution_heuristic_sum_h,
100,
10000)
def vi_creator(env, gamma):
return ValueIterationPolicy(env, gamma, 'vi')
def pvi_creator(env, gamma):
return PrioritizedValueIterationPolicy(env, gamma, 'pvi')
def pi_creator(env, gamma):
return PolicyIterationPolicy(env, gamma, 'pi')
def rtdp_pvi_sum_creator(env, gamma):
return RtdpPolicy(env, gamma, pvi_sum_h, 100, 10000, 'rtdp_pvi_sum')
def rtdp_dijkstra_sum_creator(env, gamma):
return RtdpPolicy(env, gamma, dijkstra_sum_h, 100, 10000, 'rtdp_dijkstra_sum')
def rtdp_rtdp_dijkstra_sum_creator(env, gamma):
return RtdpPolicy(env, gamma, rtdp_dijkstra_sum_h, 100, 10000, 'rtdp_rtdp_dijkstra_sum')
def ma_rtdp_pvi_sum_creator(env, gamma):
return MultiagentRtdpPolicy(env, gamma, pvi_sum_h, 100, 10000, 'ma_rtdp_pvi_sum')
def ma_rtdp_dijkstra_sum_creator(env, gamma):
return MultiagentRtdpPolicy(env, gamma, dijkstra_sum_h, 100, 10000, 'ma_rtdp_dijkstra_sum')
def ma_rtdp_rtdp_dijkstra_sum_creator(env, gamma):
return MultiagentRtdpPolicy(env, gamma, rtdp_dijkstra_sum_h, 100, 10000, 'ma_rtdp_rtdp_dijkstra_sum')
def id_vi_creator(env, gamma):
return IdPolicy(env, gamma, vi_creator, None, 'id_vi')
def id_rtdp_dijkstra_sum_creator(env, gamma):
return IdPolicy(env, gamma, rtdp_dijkstra_sum_creator, rtdp_sum_merger, 'id_rtdp_dijsktra_sum')
def id_ma_rtdp_dijkstra_sum_creator(env, gamma):
return IdPolicy(env, gamma, ma_rtdp_dijkstra_sum_creator, ma_rtdp_sum_merger, 'id_ma_rtdp_dijsktra_sum')
def id_rtdp_pvi_sum_creator(env, gamma):
return IdPolicy(env, gamma, rtdp_pvi_sum_creator, rtdp_sum_merger, 'id_rtdp_pvi_sum')
def id_ma_rtdp_pvi_sum_creator(env, gamma):
return IdPolicy(env, gamma, ma_rtdp_pvi_sum_creator, ma_rtdp_sum_merger, 'id_ma_rtdp_pvi_sum')
def online_replan_ma_rtdp_rtdp_dijkstra_sum_creator(env, gamma):
return OnlineReplanPolicy(env, gamma, 3, ma_rtdp_rtdp_dijkstra_sum_creator, 'online_ma_rtdp_rtdp_dijkstra_sum')
def online_replan_rtdp_rtdp_dijkstra_sum_creator(env, gamma):
return OnlineReplanPolicy(env, gamma, 3, rtdp_rtdp_dijkstra_sum_creator, 'online_rtdp_rtdp_dijkstra_sum')