Skip to content

Commit d1df82a

Browse files
committed
Clarify definitions of behaviour profile values for unreached information sets.
This clarifies how quantities related to mixed behaviours profiles are defined for information sets which are not reached. * Beliefs, action values, infoset values, and node values are not well-defined in this case. Therefore, these functions now return a `std::optional` in C++, with `std::nullopt` when not defined. The Python equivalents now return `None` * Agent regret concepts (action regret, infoset regret, max agent regret) are well-defined despite information sets not being reachable: from the definition of the (multi)agent form of a game, these are zero. * Along similar lines to the above, in the agent Lyapunov value, the contribution to the value of unreached information sets is set to zero. Closes #446.
1 parent adbe5c6 commit d1df82a

8 files changed

Lines changed: 159 additions & 50 deletions

File tree

src/games/behavmixed.cc

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,11 @@ template <class T> T MixedBehaviorProfile<T>::GetAgentLiapValue() const
260260
{
261261
CheckVersion();
262262
EnsureRegrets();
263-
auto value = static_cast<T>(0);
263+
T value{0};
264264
for (auto infoset : m_support.GetGame()->GetInfosets()) {
265+
if (GetInfosetProb(infoset) == T{0}) {
266+
continue;
267+
}
265268
for (auto action : m_support.GetActions(infoset)) {
266269
value += sqr(std::max(m_cache.m_actionValues[action] - m_cache.m_infosetValues[infoset],
267270
static_cast<T>(0)));
@@ -285,10 +288,14 @@ template <class T> T MixedBehaviorProfile<T>::GetInfosetProb(const GameInfoset &
285288
[&](const auto &node) -> T { return m_cache.m_realizProbs[node]; });
286289
}
287290

288-
template <class T> const T &MixedBehaviorProfile<T>::GetBeliefProb(const GameNode &node) const
291+
template <class T>
292+
std::optional<T> MixedBehaviorProfile<T>::GetBeliefProb(const GameNode &node) const
289293
{
290294
CheckVersion();
291295
EnsureBeliefs();
296+
if (!node->GetInfoset() || GetInfosetProb(node->GetInfoset()) == T{0}) {
297+
return std::nullopt;
298+
}
292299
return m_cache.m_beliefs[node];
293300
}
294301

@@ -304,18 +311,25 @@ template <class T> Vector<T> MixedBehaviorProfile<T>::GetPayoff(const GameNode &
304311
}
305312

306313
template <class T>
307-
const T &MixedBehaviorProfile<T>::GetPayoff(const GamePlayer &p_player,
308-
const GameNode &p_node) const
314+
std::optional<T> MixedBehaviorProfile<T>::GetPayoff(const GamePlayer &p_player,
315+
const GameNode &p_node) const
309316
{
310317
CheckVersion();
311318
EnsureNodeValues();
319+
if (p_node->GetInfoset() && GetInfosetProb(p_node->GetInfoset()) == T{0}) {
320+
return std::nullopt;
321+
}
312322
return m_cache.m_nodeValues[p_node][p_player];
313323
}
314324

315-
template <class T> const T &MixedBehaviorProfile<T>::GetPayoff(const GameInfoset &p_infoset) const
325+
template <class T>
326+
std::optional<T> MixedBehaviorProfile<T>::GetPayoff(const GameInfoset &p_infoset) const
316327
{
317328
CheckVersion();
318329
EnsureRegrets();
330+
if (GetInfosetProb(p_infoset) == T{0}) {
331+
return std::nullopt;
332+
}
319333
return m_cache.m_infosetValues[p_infoset];
320334
}
321335

@@ -331,24 +345,33 @@ template <class T> T MixedBehaviorProfile<T>::GetActionProb(const GameAction &ac
331345
return m_probs[m_profileIndex.at(action)];
332346
}
333347

334-
template <class T> const T &MixedBehaviorProfile<T>::GetPayoff(const GameAction &act) const
348+
template <class T> std::optional<T> MixedBehaviorProfile<T>::GetPayoff(const GameAction &act) const
335349
{
336350
CheckVersion();
337351
EnsureActionValues();
352+
if (GetInfosetProb(act->GetInfoset()) == T{0}) {
353+
return std::nullopt;
354+
}
338355
return m_cache.m_actionValues[act];
339356
}
340357

341-
template <class T> const T &MixedBehaviorProfile<T>::GetRegret(const GameAction &act) const
358+
template <class T> T MixedBehaviorProfile<T>::GetRegret(const GameAction &act) const
342359
{
343360
CheckVersion();
344361
EnsureRegrets();
362+
if (GetInfosetProb(act->GetInfoset()) == T{0}) {
363+
return T{0};
364+
}
345365
return m_cache.m_regret.at(act);
346366
}
347367

348368
template <class T> T MixedBehaviorProfile<T>::GetRegret(const GameInfoset &p_infoset) const
349369
{
350370
CheckVersion();
351371
EnsureRegrets();
372+
if (GetInfosetProb(p_infoset) == T{0}) {
373+
return T{0};
374+
}
352375
T br_payoff = maximize_function(p_infoset->GetActions(), [this](const auto &action) -> T {
353376
return m_cache.m_actionValues.at(action);
354377
});

src/games/behavmixed.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -241,11 +241,11 @@ template <class T> class MixedBehaviorProfile {
241241

242242
const T &GetRealizProb(const GameNode &node) const;
243243
T GetInfosetProb(const GameInfoset &p_infoset) const;
244-
const T &GetBeliefProb(const GameNode &node) const;
244+
std::optional<T> GetBeliefProb(const GameNode &node) const;
245245
Vector<T> GetPayoff(const GameNode &node) const;
246-
const T &GetPayoff(const GamePlayer &player, const GameNode &node) const;
247-
const T &GetPayoff(const GameInfoset &p_infoset) const;
248-
const T &GetPayoff(const GameAction &act) const;
246+
std::optional<T> GetPayoff(const GamePlayer &player, const GameNode &node) const;
247+
std::optional<T> GetPayoff(const GameInfoset &p_infoset) const;
248+
std::optional<T> GetPayoff(const GameAction &act) const;
249249
T GetActionProb(const GameAction &act) const;
250250

251251
/// @brief Computes the regret to playing \p p_action
@@ -256,7 +256,7 @@ template <class T> class MixedBehaviorProfile {
256256
/// @param[in] p_action The action to compute the regret for.
257257
/// @sa GetRegret(const GameInfoset &) const
258258
/// GetAgentMaxRegret() const
259-
const T &GetRegret(const GameAction &p_action) const;
259+
T GetRegret(const GameAction &p_action) const;
260260

261261
/// @brief Computes the regret at information set \p p_infoset
262262
/// @details Computes the regret at the information set to the player of playing

src/gui/analysis.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,9 @@ std::string AnalysisProfileList<T>::GetBeliefProb(const GameNode &p_node, int p_
240240
}
241241

242242
try {
243-
if (m_behavProfiles[index]->GetInfosetProb(p_node->GetInfoset()) > Rational(0)) {
244-
return lexical_cast<std::string>(m_behavProfiles[index]->GetBeliefProb(p_node),
245-
m_doc->GetStyle().NumDecimals());
243+
auto belief = m_behavProfiles[index]->GetBeliefProb(p_node);
244+
if (belief.has_value()) {
245+
return lexical_cast<std::string>(belief.value(), m_doc->GetStyle().NumDecimals());
246246
}
247247
// We don't compute assessments yet!
248248
return "*";
@@ -295,9 +295,9 @@ std::string AnalysisProfileList<T>::GetInfosetValue(const GameNode &p_node, int
295295
}
296296

297297
try {
298-
if (m_behavProfiles[index]->GetInfosetProb(p_node->GetInfoset()) > Rational(0)) {
299-
return lexical_cast<std::string>(m_behavProfiles[index]->GetPayoff(p_node->GetInfoset()),
300-
m_doc->GetStyle().NumDecimals());
298+
auto payoff = m_behavProfiles[index]->GetPayoff(p_node->GetInfoset());
299+
if (payoff.has_value()) {
300+
return lexical_cast<std::string>(payoff.value(), m_doc->GetStyle().NumDecimals());
301301
}
302302
// In the absence of beliefs, this is not well-defined in general
303303
return "*";
@@ -367,10 +367,10 @@ std::string AnalysisProfileList<T>::GetActionValue(const GameNode &p_node, int p
367367
}
368368

369369
try {
370-
if (m_behavProfiles[index]->GetInfosetProb(p_node->GetInfoset()) > Rational(0)) {
371-
return lexical_cast<std::string>(
372-
m_behavProfiles[index]->GetPayoff(p_node->GetInfoset()->GetAction(p_act)),
373-
m_doc->GetStyle().NumDecimals());
370+
std::optional<T> actionValue =
371+
m_behavProfiles[index]->GetPayoff(p_node->GetInfoset()->GetAction(p_act));
372+
if (actionValue.has_value()) {
373+
return lexical_cast<std::string>(actionValue.value(), m_doc->GetStyle().NumDecimals());
374374
}
375375
// In the absence of beliefs, this is not well-defined
376376
return "*";

src/pygambit/behavmixed.pxi

Lines changed: 60 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -587,10 +587,13 @@ class MixedBehaviorProfile:
587587
self._check_validity()
588588
return self._is_defined_at(self.game._resolve_infoset(infoset, "is_defined_at"))
589589

590-
def belief(self, node: NodeReference) -> ProfileDType:
590+
def belief(self, node: NodeReference) -> ProfileDType | None:
591591
"""Returns the conditional probability that a node is reached, given that
592592
its information set is reached.
593593

594+
If the information set is not reachable, the belief is not well-defined.
595+
In this case, the function returns `None`.
596+
594597
Parameters
595598
----------
596599
node
@@ -630,10 +633,13 @@ class MixedBehaviorProfile:
630633
return self._payoff(resolved_player)
631634

632635
def node_value(self, player: PlayerReference,
633-
node: NodeReference) -> ProfileDType:
636+
node: NodeReference) -> ProfileDType | None:
634637
"""Returns the expected payoff to `player` conditional on play reaching `node`,
635638
if all players play according to the profile.
636639

640+
If the node's information set is not reachable, in general the node value
641+
is not well-defined. In this case, the function returns `None`.
642+
637643
Parameters
638644
----------
639645
player : Player or str
@@ -661,10 +667,13 @@ class MixedBehaviorProfile:
661667
raise ValueError("node_value() is not defined for the chance player")
662668
return self._node_value(resolved_player, resolved_node)
663669

664-
def infoset_value(self, infoset: InfosetReference) -> ProfileDType:
670+
def infoset_value(self, infoset: InfosetReference) -> ProfileDType | None:
665671
"""Returns the expected payoff to the player conditional on reaching an information set,
666672
if all players play according to the profile.
667673

674+
If the information set is not reachable, the expected payoff is not well-defined.
675+
In this case, the function returns `None`.
676+
668677
Parameters
669678
----------
670679
infoset : Infoset or str
@@ -686,10 +695,13 @@ class MixedBehaviorProfile:
686695
raise ValueError("infoset_value() is not defined for the chance player")
687696
return self._infoset_value(resolved_infoset)
688697

689-
def action_value(self, action: ActionReference) -> ProfileDType:
698+
def action_value(self, action: ActionReference) -> ProfileDType | None:
690699
"""Returns the expected payoff to the player of playing an action conditional on reaching
691700
its information set, if all players play according to the profile.
692701

702+
If the information set is not reachable, the expected payoff is not well-defined.
703+
In this case, the function returns `None`.
704+
693705
Parameters
694706
----------
695707
action : Action or str
@@ -704,6 +716,10 @@ class MixedBehaviorProfile:
704716
If `action` is a string and no action in the game has that label.
705717
ValueError
706718
If `action` resolves to an action that belongs to the chance player
719+
720+
See also
721+
--------
722+
MixedBehaviorProfile.infoset_prob
707723
"""
708724
self._check_validity()
709725
resolved_action = self.game._resolve_action(action, "action_value")
@@ -945,22 +961,34 @@ class MixedBehaviorProfileDouble(MixedBehaviorProfile):
945961
return deref(self.profile).GetPayoff(player.player)
946962

947963
def _belief(self, node: Node) -> float:
948-
return deref(self.profile).GetBeliefProb(node.node)
964+
cdef optional[double] value = deref(self.profile).GetBeliefProb(node.node)
965+
if value.has_value():
966+
return value.value()
967+
return None
949968

950969
def _realiz_prob(self, node: Node) -> float:
951970
return deref(self.profile).GetRealizProb(node.node)
952971

953972
def _infoset_prob(self, infoset: Infoset) -> float:
954973
return deref(self.profile).GetInfosetProb(infoset.infoset)
955974

956-
def _infoset_value(self, infoset: Infoset) -> float:
957-
return deref(self.profile).GetPayoff(infoset.infoset)
975+
def _infoset_value(self, infoset: Infoset) -> float | None:
976+
cdef optional[double] value = deref(self.profile).GetPayoff(infoset.infoset)
977+
if value.has_value():
978+
return value.value()
979+
return None
958980

959-
def _node_value(self, player: Player, node: Node) -> float:
960-
return deref(self.profile).GetPayoff(player.player, node.node)
981+
def _node_value(self, player: Player, node: Node) -> float | None:
982+
cdef optional[double] value = deref(self.profile).GetPayoff(player.player, node.node)
983+
if value.has_value():
984+
return value.value()
985+
return None
961986

962-
def _action_value(self, action: Action) -> float:
963-
return deref(self.profile).GetPayoff(action.action)
987+
def _action_value(self, action: Action) -> float | None:
988+
cdef optional[double] value = deref(self.profile).GetPayoff(action.action)
989+
if value.has_value():
990+
return value.value()
991+
return None
964992

965993
def _action_regret(self, action: Action) -> float:
966994
return deref(self.profile).GetRegret(action.action)
@@ -1047,22 +1075,34 @@ class MixedBehaviorProfileRational(MixedBehaviorProfile):
10471075
return rat_to_py(deref(self.profile).GetPayoff(player.player))
10481076

10491077
def _belief(self, node: Node) -> Rational:
1050-
return rat_to_py(deref(self.profile).GetBeliefProb(node.node))
1078+
cdef optional[c_Rational] value = deref(self.profile).GetBeliefProb(node.node)
1079+
if value.has_value():
1080+
return rat_to_py(value.value())
1081+
return None
10511082

10521083
def _realiz_prob(self, node: Node) -> Rational:
10531084
return rat_to_py(deref(self.profile).GetRealizProb(node.node))
10541085

10551086
def _infoset_prob(self, infoset: Infoset) -> Rational:
10561087
return rat_to_py(deref(self.profile).GetInfosetProb(infoset.infoset))
10571088

1058-
def _infoset_value(self, infoset: Infoset) -> Rational:
1059-
return rat_to_py(deref(self.profile).GetPayoff(infoset.infoset))
1060-
1061-
def _node_value(self, player: Player, node: Node) -> Rational:
1062-
return rat_to_py(deref(self.profile).GetPayoff(player.player, node.node))
1063-
1064-
def _action_value(self, action: Action) -> Rational:
1065-
return rat_to_py(deref(self.profile).GetPayoff(action.action))
1089+
def _infoset_value(self, infoset: Infoset) -> Rational | None:
1090+
cdef optional[c_Rational] value = deref(self.profile).GetPayoff(infoset.infoset)
1091+
if value.has_value():
1092+
return rat_to_py(value.value())
1093+
return None
1094+
1095+
def _node_value(self, player: Player, node: Node) -> Rational | None:
1096+
cdef optional[c_Rational] value = deref(self.profile).GetPayoff(player.player, node.node)
1097+
if value.has_value():
1098+
return rat_to_py(value.value())
1099+
return None
1100+
1101+
def _action_value(self, action: Action) -> Rational | None:
1102+
cdef optional[c_Rational] value = deref(self.profile).GetPayoff(action.action)
1103+
if value.has_value():
1104+
return rat_to_py(value.value())
1105+
return None
10661106

10671107
def _action_regret(self, action: Action) -> Rational:
10681108
return rat_to_py(deref(self.profile).GetRegret(action.action))

src/pygambit/gambit.pxd

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ from libcpp.memory cimport shared_ptr, unique_ptr
44
from libcpp.list cimport list as stdlist
55
from libcpp.vector cimport vector as stdvector
66
from libcpp.set cimport set as stdset
7+
from libcpp.optional cimport optional
78

89

910
cdef extern from "gambit.h":
@@ -362,12 +363,12 @@ cdef extern from "games/behavmixed.h" namespace "Gambit":
362363
T getitem "operator[]"(int) except +IndexError
363364
T getaction "operator[]"(c_GameAction) except +IndexError
364365
T GetPayoff(c_GamePlayer) except +
365-
T GetBeliefProb(c_GameNode) except +
366+
optional[T] GetBeliefProb(c_GameNode) except +
366367
T GetRealizProb(c_GameNode) except +
367368
T GetInfosetProb(c_GameInfoset) except +
368-
T GetPayoff(c_GameInfoset) except +
369-
T GetPayoff(c_GamePlayer, c_GameNode) except +
370-
T GetPayoff(c_GameAction) except +
369+
optional[T] GetPayoff(c_GameInfoset) except +
370+
optional[T] GetPayoff(c_GamePlayer, c_GameNode) except +
371+
optional[T] GetPayoff(c_GameAction) except +
371372
T GetRegret(c_GameAction) except +
372373
T GetRegret(c_GameInfoset) except +
373374
T GetAgentMaxRegret() except +

src/solvers/liap/efgliap.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@ AgentLyapunovFunction::PenalizedLiapValue(const MixedBehaviorProfile<double> &p_
7474
double value = 0.0;
7575
// Liapunov function proper.
7676
for (const auto &infoset : p_profile.GetGame()->GetInfosets()) {
77-
double infosetValue = p_profile.GetPayoff(infoset);
77+
double infosetValue = p_profile.GetPayoff(infoset).value();
7878
value += sum_function(infoset->GetActions(), [&](const auto &action) -> double {
79-
return sqr(std::max(m_scale * (p_profile.GetPayoff(action) - infosetValue), 0.0));
79+
return sqr(std::max(m_scale * (p_profile.GetPayoff(action).value() - infosetValue), 0.0));
8080
});
8181
}
8282
// Penalty function for non-negativity constraint for each action

src/tools/util.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,10 @@ void MixedBehaviorProfileDetailRenderer<T>::Render(const MixedBehaviorProfile<T>
229229
m_stream << lexical_cast<std::string>(p_profile[action], m_numDecimals);
230230
m_stream << " ";
231231
m_stream << std::setw(11);
232-
m_stream << lexical_cast<std::string>(p_profile.GetPayoff(action), m_numDecimals);
232+
std::optional<T> actionValue = p_profile.GetPayoff(action);
233+
if (actionValue.has_value()) {
234+
m_stream << lexical_cast<std::string>(actionValue.value(), m_numDecimals);
235+
}
233236
m_stream << std::endl;
234237
}
235238
}
@@ -253,7 +256,13 @@ void MixedBehaviorProfileDetailRenderer<T>::Render(const MixedBehaviorProfile<T>
253256
m_stream << std::setw(7) << node->GetNumber() << " ";
254257
}
255258
m_stream << std::setw(11);
256-
m_stream << lexical_cast<std::string>(p_profile.GetBeliefProb(node), m_numDecimals);
259+
auto belief = p_profile.GetBeliefProb(node);
260+
if (belief.has_value()) {
261+
m_stream << lexical_cast<std::string>(belief.value(), m_numDecimals);
262+
}
263+
else {
264+
m_stream << "";
265+
}
257266
m_stream << " ";
258267
m_stream << std::setw(11);
259268
m_stream << lexical_cast<std::string>(p_profile.GetRealizProb(node), m_numDecimals);

0 commit comments

Comments
 (0)