Clarify definitions of behaviour profile values for unreached information sets.

tturocy · tturocy · commit d1df82ab61a9 · 2026-03-19T16:27:15.000Z
This clarifies how quantities related to mixed behaviours profiles are defined for information sets which are not reached. * Beliefs, action values, infoset values, and node values are not well-defined in this case. Therefore, these functions now return a `std::optional` in C++, with `std::nullopt` when not defined. The Python equivalents now return `None` * Agent regret concepts (action regret, infoset regret, max agent regret) are well-defined despite information sets not being reachable: from the definition of the (multi)agent form of a game, these are zero. * Along similar lines to the above, in the agent Lyapunov value, the contribution to the value of unreached information sets is set to zero. Closes #446.
diff --git a/src/games/behavmixed.cc b/src/games/behavmixed.cc
@@ -260,8 +260,11 @@ template <class T> T MixedBehaviorProfile<T>::GetAgentLiapValue() const
 {
   CheckVersion();
   EnsureRegrets();
-  auto value = static_cast<T>(0);
+  T value{0};
   for (auto infoset : m_support.GetGame()->GetInfosets()) {
+    if (GetInfosetProb(infoset) == T{0}) {
+      continue;
+    }
     for (auto action : m_support.GetActions(infoset)) {
       value += sqr(std::max(m_cache.m_actionValues[action] - m_cache.m_infosetValues[infoset],
                             static_cast<T>(0)));
@@ -285,10 +288,14 @@ template <class T> T MixedBehaviorProfile<T>::GetInfosetProb(const GameInfoset &
                       [&](const auto &node) -> T { return m_cache.m_realizProbs[node]; });
 }
 
-template <class T> const T &MixedBehaviorProfile<T>::GetBeliefProb(const GameNode &node) const
+template <class T>
+std::optional<T> MixedBehaviorProfile<T>::GetBeliefProb(const GameNode &node) const
 {
   CheckVersion();
   EnsureBeliefs();
+  if (!node->GetInfoset() || GetInfosetProb(node->GetInfoset()) == T{0}) {
+    return std::nullopt;
+  }
   return m_cache.m_beliefs[node];
 }
 
@@ -304,18 +311,25 @@ template <class T> Vector<T> MixedBehaviorProfile<T>::GetPayoff(const GameNode &
 }
 
 template <class T>
-const T &MixedBehaviorProfile<T>::GetPayoff(const GamePlayer &p_player,
-                                            const GameNode &p_node) const
+std::optional<T> MixedBehaviorProfile<T>::GetPayoff(const GamePlayer &p_player,
+                                                    const GameNode &p_node) const
 {
   CheckVersion();
   EnsureNodeValues();
+  if (p_node->GetInfoset() && GetInfosetProb(p_node->GetInfoset()) == T{0}) {
+    return std::nullopt;
+  }
   return m_cache.m_nodeValues[p_node][p_player];
 }
 
-template <class T> const T &MixedBehaviorProfile<T>::GetPayoff(const GameInfoset &p_infoset) const
+template <class T>
+std::optional<T> MixedBehaviorProfile<T>::GetPayoff(const GameInfoset &p_infoset) const
 {
   CheckVersion();
   EnsureRegrets();
+  if (GetInfosetProb(p_infoset) == T{0}) {
+    return std::nullopt;
+  }
   return m_cache.m_infosetValues[p_infoset];
 }
 
@@ -331,24 +345,33 @@ template <class T> T MixedBehaviorProfile<T>::GetActionProb(const GameAction &ac
   return m_probs[m_profileIndex.at(action)];
 }
 
-template <class T> const T &MixedBehaviorProfile<T>::GetPayoff(const GameAction &act) const
+template <class T> std::optional<T> MixedBehaviorProfile<T>::GetPayoff(const GameAction &act) const
 {
   CheckVersion();
   EnsureActionValues();
+  if (GetInfosetProb(act->GetInfoset()) == T{0}) {
+    return std::nullopt;
+  }
   return m_cache.m_actionValues[act];
 }
 
-template <class T> const T &MixedBehaviorProfile<T>::GetRegret(const GameAction &act) const
+template <class T> T MixedBehaviorProfile<T>::GetRegret(const GameAction &act) const
 {
   CheckVersion();
   EnsureRegrets();
+  if (GetInfosetProb(act->GetInfoset()) == T{0}) {
+    return T{0};
+  }
   return m_cache.m_regret.at(act);
 }
 
 template <class T> T MixedBehaviorProfile<T>::GetRegret(const GameInfoset &p_infoset) const
 {
   CheckVersion();
   EnsureRegrets();
+  if (GetInfosetProb(p_infoset) == T{0}) {
+    return T{0};
+  }
   T br_payoff = maximize_function(p_infoset->GetActions(), [this](const auto &action) -> T {
     return m_cache.m_actionValues.at(action);
   });
diff --git a/src/games/behavmixed.h b/src/games/behavmixed.h
@@ -241,11 +241,11 @@ template <class T> class MixedBehaviorProfile {
 
   const T &GetRealizProb(const GameNode &node) const;
   T GetInfosetProb(const GameInfoset &p_infoset) const;
-  const T &GetBeliefProb(const GameNode &node) const;
+  std::optional<T> GetBeliefProb(const GameNode &node) const;
   Vector<T> GetPayoff(const GameNode &node) const;
-  const T &GetPayoff(const GamePlayer &player, const GameNode &node) const;
-  const T &GetPayoff(const GameInfoset &p_infoset) const;
-  const T &GetPayoff(const GameAction &act) const;
+  std::optional<T> GetPayoff(const GamePlayer &player, const GameNode &node) const;
+  std::optional<T> GetPayoff(const GameInfoset &p_infoset) const;
+  std::optional<T> GetPayoff(const GameAction &act) const;
   T GetActionProb(const GameAction &act) const;
 
   /// @brief Computes the regret to playing \p p_action
@@ -256,7 +256,7 @@ template <class T> class MixedBehaviorProfile {
   /// @param[in] p_action  The action to compute the regret for.
   /// @sa GetRegret(const GameInfoset &) const
   ///     GetAgentMaxRegret() const
-  const T &GetRegret(const GameAction &p_action) const;
+  T GetRegret(const GameAction &p_action) const;
 
   /// @brief Computes the regret at information set \p p_infoset
   /// @details Computes the regret at the information set to the player of playing
diff --git a/src/gui/analysis.cc b/src/gui/analysis.cc
@@ -240,9 +240,9 @@ std::string AnalysisProfileList<T>::GetBeliefProb(const GameNode &p_node, int p_
   }
 
   try {
-    if (m_behavProfiles[index]->GetInfosetProb(p_node->GetInfoset()) > Rational(0)) {
-      return lexical_cast<std::string>(m_behavProfiles[index]->GetBeliefProb(p_node),
-                                       m_doc->GetStyle().NumDecimals());
+    auto belief = m_behavProfiles[index]->GetBeliefProb(p_node);
+    if (belief.has_value()) {
+      return lexical_cast<std::string>(belief.value(), m_doc->GetStyle().NumDecimals());
     }
     // We don't compute assessments yet!
     return "*";
@@ -295,9 +295,9 @@ std::string AnalysisProfileList<T>::GetInfosetValue(const GameNode &p_node, int
   }
 
   try {
-    if (m_behavProfiles[index]->GetInfosetProb(p_node->GetInfoset()) > Rational(0)) {
-      return lexical_cast<std::string>(m_behavProfiles[index]->GetPayoff(p_node->GetInfoset()),
-                                       m_doc->GetStyle().NumDecimals());
+    auto payoff = m_behavProfiles[index]->GetPayoff(p_node->GetInfoset());
+    if (payoff.has_value()) {
+      return lexical_cast<std::string>(payoff.value(), m_doc->GetStyle().NumDecimals());
     }
     // In the absence of beliefs, this is not well-defined in general
     return "*";
@@ -367,10 +367,10 @@ std::string AnalysisProfileList<T>::GetActionValue(const GameNode &p_node, int p
   }
 
   try {
-    if (m_behavProfiles[index]->GetInfosetProb(p_node->GetInfoset()) > Rational(0)) {
-      return lexical_cast<std::string>(
-          m_behavProfiles[index]->GetPayoff(p_node->GetInfoset()->GetAction(p_act)),
-          m_doc->GetStyle().NumDecimals());
+    std::optional<T> actionValue =
+        m_behavProfiles[index]->GetPayoff(p_node->GetInfoset()->GetAction(p_act));
+    if (actionValue.has_value()) {
+      return lexical_cast<std::string>(actionValue.value(), m_doc->GetStyle().NumDecimals());
     }
     // In the absence of beliefs, this is not well-defined
     return "*";
diff --git a/src/pygambit/behavmixed.pxi b/src/pygambit/behavmixed.pxi
@@ -587,10 +587,13 @@ class MixedBehaviorProfile:
         self._check_validity()
         return self._is_defined_at(self.game._resolve_infoset(infoset, "is_defined_at"))
 
-    def belief(self, node: NodeReference) -> ProfileDType:
+    def belief(self, node: NodeReference) -> ProfileDType | None:
         """Returns the conditional probability that a node is reached, given that
         its information set is reached.
 
+        If the information set is not reachable, the belief is not well-defined.
+        In this case, the function returns `None`.
+
         Parameters
         ----------
         node
@@ -630,10 +633,13 @@ class MixedBehaviorProfile:
         return self._payoff(resolved_player)
 
     def node_value(self, player: PlayerReference,
-                   node: NodeReference) -> ProfileDType:
+                   node: NodeReference) -> ProfileDType | None:
         """Returns the expected payoff to `player` conditional on play reaching `node`,
         if all players play according to the profile.
 
+        If the node's information set is not reachable, in general the node value
+        is not well-defined.  In this case, the function returns `None`.
+
         Parameters
         ----------
         player : Player or str
@@ -661,10 +667,13 @@ class MixedBehaviorProfile:
             raise ValueError("node_value() is not defined for the chance player")
         return self._node_value(resolved_player, resolved_node)
 
-    def infoset_value(self, infoset: InfosetReference) -> ProfileDType:
+    def infoset_value(self, infoset: InfosetReference) -> ProfileDType | None:
         """Returns the expected payoff to the player conditional on reaching an information set,
         if all players play according to the profile.
 
+        If the information set is not reachable, the expected payoff is not well-defined.
+        In this case, the function returns `None`.
+
         Parameters
         ----------
         infoset : Infoset or str
@@ -686,10 +695,13 @@ class MixedBehaviorProfile:
             raise ValueError("infoset_value() is not defined for the chance player")
         return self._infoset_value(resolved_infoset)
 
-    def action_value(self, action: ActionReference) -> ProfileDType:
+    def action_value(self, action: ActionReference) -> ProfileDType | None:
         """Returns the expected payoff to the player of playing an action conditional on reaching
         its information set, if all players play according to the profile.
 
+        If the information set is not reachable, the expected payoff is not well-defined.
+        In this case, the function returns `None`.
+
         Parameters
         ----------
         action : Action or str
@@ -704,6 +716,10 @@ class MixedBehaviorProfile:
             If `action` is a string and no action in the game has that label.
         ValueError
             If `action` resolves to an action that belongs to the chance player
+
+        See also
+        --------
+        MixedBehaviorProfile.infoset_prob
         """
         self._check_validity()
         resolved_action = self.game._resolve_action(action, "action_value")
@@ -945,22 +961,34 @@ class MixedBehaviorProfileDouble(MixedBehaviorProfile):
         return deref(self.profile).GetPayoff(player.player)
 
     def _belief(self, node: Node) -> float:
-        return deref(self.profile).GetBeliefProb(node.node)
+        cdef optional[double] value = deref(self.profile).GetBeliefProb(node.node)
+        if value.has_value():
+            return value.value()
+        return None
 
     def _realiz_prob(self, node: Node) -> float:
         return deref(self.profile).GetRealizProb(node.node)
 
     def _infoset_prob(self, infoset: Infoset) -> float:
         return deref(self.profile).GetInfosetProb(infoset.infoset)
 
-    def _infoset_value(self, infoset: Infoset) -> float:
-        return deref(self.profile).GetPayoff(infoset.infoset)
+    def _infoset_value(self, infoset: Infoset) -> float | None:
+        cdef optional[double] value = deref(self.profile).GetPayoff(infoset.infoset)
+        if value.has_value():
+            return value.value()
+        return None
 
-    def _node_value(self, player: Player, node: Node) -> float:
-        return deref(self.profile).GetPayoff(player.player, node.node)
+    def _node_value(self, player: Player, node: Node) -> float | None:
+        cdef optional[double] value = deref(self.profile).GetPayoff(player.player, node.node)
+        if value.has_value():
+            return value.value()
+        return None
 
-    def _action_value(self, action: Action) -> float:
-        return deref(self.profile).GetPayoff(action.action)
+    def _action_value(self, action: Action) -> float | None:
+        cdef optional[double] value = deref(self.profile).GetPayoff(action.action)
+        if value.has_value():
+            return value.value()
+        return None
 
     def _action_regret(self, action: Action) -> float:
         return deref(self.profile).GetRegret(action.action)
@@ -1047,22 +1075,34 @@ class MixedBehaviorProfileRational(MixedBehaviorProfile):
         return rat_to_py(deref(self.profile).GetPayoff(player.player))
 
     def _belief(self, node: Node) -> Rational:
-        return rat_to_py(deref(self.profile).GetBeliefProb(node.node))
+        cdef optional[c_Rational] value = deref(self.profile).GetBeliefProb(node.node)
+        if value.has_value():
+            return rat_to_py(value.value())
+        return None
 
     def _realiz_prob(self, node: Node) -> Rational:
         return rat_to_py(deref(self.profile).GetRealizProb(node.node))
 
     def _infoset_prob(self, infoset: Infoset) -> Rational:
         return rat_to_py(deref(self.profile).GetInfosetProb(infoset.infoset))
 
-    def _infoset_value(self, infoset: Infoset) -> Rational:
-        return rat_to_py(deref(self.profile).GetPayoff(infoset.infoset))
-
-    def _node_value(self, player: Player, node: Node) -> Rational:
-        return rat_to_py(deref(self.profile).GetPayoff(player.player, node.node))
-
-    def _action_value(self, action: Action) -> Rational:
-        return rat_to_py(deref(self.profile).GetPayoff(action.action))
+    def _infoset_value(self, infoset: Infoset) -> Rational | None:
+        cdef optional[c_Rational] value = deref(self.profile).GetPayoff(infoset.infoset)
+        if value.has_value():
+            return rat_to_py(value.value())
+        return None
+
+    def _node_value(self, player: Player, node: Node) -> Rational | None:
+        cdef optional[c_Rational] value = deref(self.profile).GetPayoff(player.player, node.node)
+        if value.has_value():
+            return rat_to_py(value.value())
+        return None
+
+    def _action_value(self, action: Action) -> Rational | None:
+        cdef optional[c_Rational] value = deref(self.profile).GetPayoff(action.action)
+        if value.has_value():
+            return rat_to_py(value.value())
+        return None
 
     def _action_regret(self, action: Action) -> Rational:
         return rat_to_py(deref(self.profile).GetRegret(action.action))
diff --git a/src/pygambit/gambit.pxd b/src/pygambit/gambit.pxd
@@ -4,6 +4,7 @@ from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.list cimport list as stdlist
 from libcpp.vector cimport vector as stdvector
 from libcpp.set cimport set as stdset
+from libcpp.optional cimport optional
 
 
 cdef extern from "gambit.h":
@@ -362,12 +363,12 @@ cdef extern from "games/behavmixed.h" namespace "Gambit":
         T getitem "operator[]"(int) except +IndexError
         T getaction "operator[]"(c_GameAction) except +IndexError
         T GetPayoff(c_GamePlayer) except +
-        T GetBeliefProb(c_GameNode) except +
+        optional[T] GetBeliefProb(c_GameNode) except +
         T GetRealizProb(c_GameNode) except +
         T GetInfosetProb(c_GameInfoset) except +
-        T GetPayoff(c_GameInfoset) except +
-        T GetPayoff(c_GamePlayer, c_GameNode) except +
-        T GetPayoff(c_GameAction) except +
+        optional[T] GetPayoff(c_GameInfoset) except +
+        optional[T] GetPayoff(c_GamePlayer, c_GameNode) except +
+        optional[T] GetPayoff(c_GameAction) except +
         T GetRegret(c_GameAction) except +
         T GetRegret(c_GameInfoset) except +
         T GetAgentMaxRegret() except +
diff --git a/src/solvers/liap/efgliap.cc b/src/solvers/liap/efgliap.cc
@@ -74,9 +74,9 @@ AgentLyapunovFunction::PenalizedLiapValue(const MixedBehaviorProfile<double> &p_
   double value = 0.0;
   // Liapunov function proper.
   for (const auto &infoset : p_profile.GetGame()->GetInfosets()) {
-    double infosetValue = p_profile.GetPayoff(infoset);
+    double infosetValue = p_profile.GetPayoff(infoset).value();
     value += sum_function(infoset->GetActions(), [&](const auto &action) -> double {
-      return sqr(std::max(m_scale * (p_profile.GetPayoff(action) - infosetValue), 0.0));
+      return sqr(std::max(m_scale * (p_profile.GetPayoff(action).value() - infosetValue), 0.0));
     });
   }
   // Penalty function for non-negativity constraint for each action
diff --git a/src/tools/util.h b/src/tools/util.h
@@ -229,7 +229,10 @@ void MixedBehaviorProfileDetailRenderer<T>::Render(const MixedBehaviorProfile<T>
         m_stream << lexical_cast<std::string>(p_profile[action], m_numDecimals);
         m_stream << "   ";
         m_stream << std::setw(11);
-        m_stream << lexical_cast<std::string>(p_profile.GetPayoff(action), m_numDecimals);
+        std::optional<T> actionValue = p_profile.GetPayoff(action);
+        if (actionValue.has_value()) {
+          m_stream << lexical_cast<std::string>(actionValue.value(), m_numDecimals);
+        }
         m_stream << std::endl;
       }
     }
@@ -253,7 +256,13 @@ void MixedBehaviorProfileDetailRenderer<T>::Render(const MixedBehaviorProfile<T>
           m_stream << std::setw(7) << node->GetNumber() << "   ";
         }
         m_stream << std::setw(11);
-        m_stream << lexical_cast<std::string>(p_profile.GetBeliefProb(node), m_numDecimals);
+        auto belief = p_profile.GetBeliefProb(node);
+        if (belief.has_value()) {
+          m_stream << lexical_cast<std::string>(belief.value(), m_numDecimals);
+        }
+        else {
+          m_stream << "";
+        }
         m_stream << "   ";
         m_stream << std::setw(11);
         m_stream << lexical_cast<std::string>(p_profile.GetRealizProb(node), m_numDecimals);
diff --git a/tests/test_behav.py b/tests/test_behav.py