Skip to content

Commit 7b56c7f

Browse files
committed
Return just a sample equilibrium for any set of realisation-equivalent equilibria.
1 parent 018f33d commit 7b56c7f

3 files changed

Lines changed: 49 additions & 91 deletions

File tree

doc/tools.enumpoly.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,19 @@ supports which have the fewest strategies in total. For many classes
2424
of games, this will tend to lower the average time until finding one equilibrium,
2525
as well as finding the second equilibrium (if one exists).
2626

27+
For extensive games, a support of actions equates to allowing positive
28+
probabilities over a subset of terminal nodes. The indifference conditions
29+
used are those for the sequence form defined on the projection of the game
30+
to that support of actions. A solution to these equations implies a probability
31+
distribution over terminal nodes. The algorithm then searches for
32+
a profile which is a Nash equilibrium which implements that probability
33+
distribution. If there exists such a profile, a sample one is returned.
34+
Note that for probability distributions which assign zero probability to some terminal
35+
nodes, it is generally the case that there are (infinitely) many such profiles.
36+
Subsequent analysis of unreached information sets can yield alternative
37+
profiles which specify different choices at unreached information sets
38+
while satisfying the Nash equilibrium conditions.
39+
2740
When the verbose switch `-v` is used, the program outputs each support
2841
as it is considered. The supports are presented as a comma-separated
2942
list of binary strings, where each entry represents one player. The

src/solvers/enumpoly/efgpoly.cc

Lines changed: 27 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -151,79 +151,37 @@ std::map<GameSequence, double> ToSequenceProbs(const ProblemData &p_data, const
151151
return x;
152152
}
153153

154-
/// Compute the set of information sets which are not reachable given the actions in
155-
/// @p p_support, but are reachable via a *single* deviation to an action at a
156-
/// reachable information set.
157-
std::set<GameInfoset> FindDeviationInfosets(const BehaviorSupportProfile &p_support)
154+
std::optional<MixedBehaviorProfile<double>>
155+
FindNashExtension(const MixedBehaviorProfile<double> &p_baseProfile, double p_maxRegret)
158156
{
159-
struct SingleDeviationReachableVisitor {
160-
const BehaviorSupportProfile &m_support;
161-
std::set<GameInfoset> m_deviationReachable;
162-
163-
explicit SingleDeviationReachableVisitor(const BehaviorSupportProfile &p_support)
164-
: m_support(p_support)
165-
{
166-
}
167-
GameRep::DFSCallbackResult OnEnter(const GameNode &p_node, int)
168-
{
169-
const auto infoset = p_node->GetInfoset();
170-
if (!infoset) {
171-
return GameRep::DFSCallbackResult::Continue;
172-
}
173-
if (p_node->GetPlayer()->IsChance()) {
174-
return GameRep::DFSCallbackResult::Continue;
175-
}
176-
if (m_support.IsReachable(infoset)) {
177-
return GameRep::DFSCallbackResult::Continue;
178-
}
179-
m_deviationReachable.insert(infoset);
180-
return GameRep::DFSCallbackResult::Prune;
181-
}
182-
GameRep::DFSCallbackResult OnAction(const GameNode &, const GameNode &, int)
183-
{
184-
return GameRep::DFSCallbackResult::Continue;
157+
const Game &game = p_baseProfile.GetGame();
158+
std::list<GameInfoset> extensionInfosets;
159+
for (const auto &infoset : game->GetInfosets()) {
160+
if (!p_baseProfile.IsDefinedAt(infoset)) {
161+
extensionInfosets.push_back(infoset);
185162
}
186-
GameRep::DFSCallbackResult OnExit(const GameNode &, int)
187-
{
188-
return GameRep::DFSCallbackResult::Continue;
189-
}
190-
void OnVisit(const GameNode &, int) {}
191-
};
192-
193-
SingleDeviationReachableVisitor visitor(p_support);
194-
const Game game = p_support.GetGame();
195-
GameRep::WalkDFS(game, game->GetRoot(), TraversalOrder::Preorder, visitor);
196-
return visitor.m_deviationReachable;
197-
}
198-
199-
/// Produce the set of mixed behavior profiles which extend @param p_baseProfile
200-
/// to complete profiles by specifying a pure action at each information set which
201-
/// is reachable by a single deviation from the profile, and the centroid at all
202-
/// information sets which are reachable only by two deviations.
203-
std::list<MixedBehaviorProfile<double>>
204-
ExtendWithDeviations(const MixedBehaviorProfile<double> &p_baseProfile)
205-
{
206-
const auto deviationInfosets = FindDeviationInfosets(p_baseProfile.GetSupport());
207-
std::list<MixedBehaviorProfile<double>> result;
208-
Array<int> firstIndex(deviationInfosets.size());
163+
}
164+
Array<int> firstIndex(extensionInfosets.size());
209165
std::fill(firstIndex.begin(), firstIndex.end(), 1);
210-
Array<int> lastIndex(deviationInfosets.size());
211-
std::transform(deviationInfosets.begin(), deviationInfosets.end(), lastIndex.begin(),
166+
Array<int> lastIndex(extensionInfosets.size());
167+
std::transform(extensionInfosets.begin(), extensionInfosets.end(), lastIndex.begin(),
212168
[](const auto &infoset) { return infoset->GetActions().size(); });
213169
CartesianIndexProduct indices(firstIndex, lastIndex);
214170
for (const auto &index : indices) {
215171
auto extension = p_baseProfile.ToFullSupport();
216-
for (auto [i, infoset] : enumerate(deviationInfosets)) {
172+
for (auto [i, infoset] : enumerate(extensionInfosets)) {
217173
extension[infoset->GetAction(index[i + 1])] = 1.0;
218174
}
219-
extension.UndefinedToCentroid();
220-
result.push_back(extension);
175+
if (extension.GetMaxRegret() < p_maxRegret) {
176+
return extension;
177+
}
221178
}
222-
return result;
179+
return std::nullopt;
223180
}
224181

225182
std::list<MixedBehaviorProfile<double>> SolveSupport(const BehaviorSupportProfile &p_support,
226-
bool &p_isSingular, int p_stopAfter)
183+
bool &p_isSingular, int p_stopAfter,
184+
double p_maxRegret)
227185
{
228186
ProblemData data(p_support);
229187
PolynomialSystem<double> equations(data.space);
@@ -252,7 +210,10 @@ std::list<MixedBehaviorProfile<double>> SolveSupport(const BehaviorSupportProfil
252210
for (const auto &root : roots) {
253211
const MixedBehaviorProfile<double> sol(
254212
data.m_support.ToMixedBehaviorProfile(ToSequenceProbs(data, root)));
255-
solutions.splice(solutions.end(), ExtendWithDeviations(sol));
213+
auto extended = FindNashExtension(sol, p_maxRegret);
214+
if (extended.has_value()) {
215+
solutions.push_back(extended.value());
216+
}
256217
}
257218
return solutions;
258219
}
@@ -277,12 +238,11 @@ EnumPolyBehaviorSolve(const Game &p_game, int p_stopAfter, double p_maxregret,
277238
for (auto support : possible_supports->m_supports) {
278239
p_onSupport("candidate", support);
279240
bool isSingular = false;
280-
for (const auto &solution : SolveSupport(
281-
support, isSingular, std::max(p_stopAfter - static_cast<int>(ret.size()), 0))) {
282-
if (solution.GetMaxRegret() < p_maxregret) {
283-
p_onEquilibrium(solution);
284-
ret.push_back(solution);
285-
}
241+
for (const auto &solution :
242+
SolveSupport(support, isSingular, std::max(p_stopAfter - static_cast<int>(ret.size()), 0),
243+
p_maxregret)) {
244+
p_onEquilibrium(solution);
245+
ret.push_back(solution);
286246
}
287247
if (isSingular) {
288248
p_onSupport("singular", support);

tests/test_nash.py

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2122,8 +2122,7 @@ def test_nash_strategy_solver_w_start(test_case: EquilibriumTestCaseWithStart, s
21222122
factory=functools.partial(games.read_from_file, "3_player.efg"),
21232123
solver=functools.partial(gbt.nash.enumpoly_solve, stop_after=None),
21242124
expected=[
2125-
[[d(1, 0), d(1, 0)], [d(1, 0), d("1/2", "1/2")], [d(1, 0), d(1, 0)]],
2126-
[[d(1, 0), d(1, 0)], [d(1, 0), d("1/2", "1/2")], [d(1, 0), d(0, 1)]],
2125+
[[d(1, 0), d(1, 0)], [d(1, 0), d(1, 0)], [d(1, 0), d(1, 0)]],
21272126
],
21282127
regret_tol=TOL,
21292128
prob_tol=TOL,
@@ -2136,8 +2135,7 @@ def test_nash_strategy_solver_w_start(test_case: EquilibriumTestCaseWithStart, s
21362135
factory=functools.partial(games.read_from_file, "3_player_with_nonterm_outcomes.efg"),
21372136
solver=functools.partial(gbt.nash.enumpoly_solve, stop_after=None),
21382137
expected=[
2139-
[[d(1, 0), d(1, 0)], [d(1, 0), d("1/2", "1/2")], [d(1, 0), d(1, 0)]],
2140-
[[d(1, 0), d(1, 0)], [d(1, 0), d("1/2", "1/2")], [d(1, 0), d(0, 1)]],
2138+
[[d(1, 0), d(1, 0)], [d(1, 0), d(1, 0)], [d(1, 0), d(1, 0)]],
21412139
],
21422140
regret_tol=TOL,
21432141
prob_tol=TOL,
@@ -2170,10 +2168,8 @@ def test_nash_strategy_solver_w_start(test_case: EquilibriumTestCaseWithStart, s
21702168
# candidate,10,10,1000,10000
21712169
[[d(1, 0)], [d(1, 0), d(1, 0, 0, 0)], [d(1, 0, 0, 0, 0)]],
21722170
# candidate,01,00,0000,00000
2173-
[[d(0, 1)], [d(1, 0), d("1/4", "1/4", "1/4", "1/4")],
2174-
[d("1/5", "1/5", "1/5", "1/5", "1/5")]], # only 1 off path
2175-
[[d(0, 1)], [d(0, 1), d("1/4", "1/4", "1/4", "1/4")],
2176-
[d("1/5", "1/5", "1/5", "1/5", "1/5")]],
2171+
[[d(0, 1)], [d(1, 0), d(1, 0, 0, 0)],
2172+
[d(1, 0, 0, 0, 0)]],
21772173
],
21782174
regret_tol=TOL,
21792175
prob_tol=TOL,
@@ -2191,13 +2187,8 @@ def test_nash_strategy_solver_w_start(test_case: EquilibriumTestCaseWithStart, s
21912187
[[d(1, 0)], [d(1, 0), d(1, 0, 0, 0)], [d(1, 0, 0, 0, 0)]],
21922188
[
21932189
[d(0, 1)],
2194-
[d(1, 0), d("1/4", "1/4", "1/4", "1/4")],
2195-
[d("1/5", "1/5", "1/5", "1/5", "1/5")],
2196-
],
2197-
[
2198-
[d(0, 1)],
2199-
[d(0, 1), d("1/4", "1/4", "1/4", "1/4")],
2200-
[d("1/5", "1/5", "1/5", "1/5", "1/5")],
2190+
[d(1, 0), d(1, 0, 0, 0)],
2191+
[d(1, 0, 0, 0, 0)],
22012192
],
22022193
],
22032194
regret_tol=TOL,
@@ -2216,13 +2207,8 @@ def test_nash_strategy_solver_w_start(test_case: EquilibriumTestCaseWithStart, s
22162207
expected=[
22172208
[
22182209
[d(0, 1)],
2219-
[d(1, 0), d("1/4", "1/4", "1/4", "1/4")],
2220-
[d("1/5", "1/5", "1/5", "1/5", "1/5")],
2221-
],
2222-
[
2223-
[d(0, 1)],
2224-
[d(0, 1), d("1/4", "1/4", "1/4", "1/4")],
2225-
[d("1/5", "1/5", "1/5", "1/5", "1/5")],
2210+
[d(1, 0), d(1, 0, 0, 0)],
2211+
[d(1, 0, 0, 0, 0)],
22262212
],
22272213
],
22282214
regret_tol=TOL,
@@ -2239,8 +2225,7 @@ def test_nash_strategy_solver_w_start(test_case: EquilibriumTestCaseWithStart, s
22392225
),
22402226
solver=functools.partial(gbt.nash.enumpoly_solve, stop_after=None),
22412227
expected=[
2242-
[[d(0, 1), d("1/5", "1/5", "1/5", "1/5", "1/5")], [d(1, 0)]],
2243-
[[d(0, 1), d("1/5", "1/5", "1/5", "1/5", "1/5")], [d(0, 1)]],
2228+
[[d(0, 1), d(1, 0, 0, 0, 0)], [d(1, 0)]],
22442229
],
22452230
regret_tol=TOL,
22462231
prob_tol=TOL,

0 commit comments

Comments
 (0)