Skip to content

Commit 1dec220

Browse files
committed
Refactor MixedBehaviorProfile caching
This re-writes the cached quantity calculations for MixedBehaviorProfile: * Uses the preorder and postorder traversal of nodes provided by the game class; * Thereby avoids recursion to descend the tree * Cleanly separates the computation of each vector of quantities (which will be a help for further optimisation in future when we look at the data structures used to represent them) * Because there is a dependency order in the cached quantities, implements a slightly more sophisticated cache to compute only what is truly needed. Introduce cache object Remove separate cache invalidation Ensure only as much information as is required.
1 parent c9c00d5 commit 1dec220

2 files changed

Lines changed: 193 additions & 148 deletions

File tree

src/games/behavmixed.cc

Lines changed: 102 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525

2626
#include "gambit.h"
2727
#include "behavmixed.h"
28-
#include "gametree.h"
2928

3029
namespace Gambit {
3130

@@ -171,15 +170,9 @@ MixedBehaviorProfile<T>::operator=(const MixedBehaviorProfile<T> &p_profile)
171170
if (m_support != p_profile.m_support) {
172171
throw MismatchException();
173172
}
174-
InvalidateCache();
175173
m_probs = p_profile.m_probs;
176174
m_gameversion = p_profile.m_gameversion;
177-
map_realizProbs = p_profile.map_realizProbs;
178-
map_beliefs = p_profile.map_beliefs;
179-
map_nodeValues = p_profile.map_nodeValues;
180-
map_infosetValues = p_profile.map_infosetValues;
181-
map_actionValues = p_profile.map_actionValues;
182-
map_regret = p_profile.map_regret;
175+
m_cache = p_profile.m_cache;
183176
return *this;
184177
}
185178

@@ -264,13 +257,12 @@ template <class T> MixedBehaviorProfile<T> MixedBehaviorProfile<T>::ToFullSuppor
264257
template <class T> T MixedBehaviorProfile<T>::GetLiapValue() const
265258
{
266259
CheckVersion();
267-
ComputeSolutionData();
268-
260+
EnsureRegrets();
269261
auto value = static_cast<T>(0);
270262
for (auto infoset : m_support.GetGame()->GetInfosets()) {
271263
for (auto action : m_support.GetActions(infoset)) {
272-
value +=
273-
sqr(std::max(map_actionValues[action] - map_infosetValues[infoset], static_cast<T>(0)));
264+
value += sqr(std::max(m_cache.m_actionValues[action] - m_cache.m_infosetValues[infoset],
265+
static_cast<T>(0)));
274266
}
275267
}
276268
return value;
@@ -279,36 +271,33 @@ template <class T> T MixedBehaviorProfile<T>::GetLiapValue() const
279271
template <class T> const T &MixedBehaviorProfile<T>::GetRealizProb(const GameNode &node) const
280272
{
281273
CheckVersion();
282-
ComputeSolutionData();
283-
return map_realizProbs[node];
274+
EnsureRealizations();
275+
return m_cache.m_realizProbs[node];
284276
}
285277

286-
template <class T> T MixedBehaviorProfile<T>::GetInfosetProb(const GameInfoset &iset) const
278+
template <class T> T MixedBehaviorProfile<T>::GetInfosetProb(const GameInfoset &p_infoset) const
287279
{
288280
CheckVersion();
289-
ComputeSolutionData();
290-
T prob = T(0);
291-
for (auto member : iset->GetMembers()) {
292-
prob += map_realizProbs[member];
293-
}
294-
return prob;
281+
EnsureRealizations();
282+
return sum_function(p_infoset->GetMembers(),
283+
[&](const auto &node) -> T { return m_cache.m_realizProbs[node]; });
295284
}
296285

297286
template <class T> const T &MixedBehaviorProfile<T>::GetBeliefProb(const GameNode &node) const
298287
{
299288
CheckVersion();
300-
ComputeSolutionData();
301-
return map_beliefs[node];
289+
EnsureBeliefs();
290+
return m_cache.m_beliefs[node];
302291
}
303292

304293
template <class T> Vector<T> MixedBehaviorProfile<T>::GetPayoff(const GameNode &node) const
305294
{
306295
CheckVersion();
307-
ComputeSolutionData();
296+
EnsureNodeValues();
308297
Vector<T> ret(node->GetGame()->NumPlayers());
309298
auto players = node->GetGame()->GetPlayers();
310299
std::transform(players.begin(), players.end(), ret.begin(),
311-
[this, node](GamePlayer player) { return map_nodeValues[node][player]; });
300+
[this, node](GamePlayer player) { return m_cache.m_nodeValues[node][player]; });
312301
return ret;
313302
}
314303

@@ -317,15 +306,15 @@ const T &MixedBehaviorProfile<T>::GetPayoff(const GamePlayer &p_player,
317306
const GameNode &p_node) const
318307
{
319308
CheckVersion();
320-
ComputeSolutionData();
321-
return map_nodeValues[p_node][p_player];
309+
EnsureNodeValues();
310+
return m_cache.m_nodeValues[p_node][p_player];
322311
}
323312

324-
template <class T> const T &MixedBehaviorProfile<T>::GetPayoff(const GameInfoset &iset) const
313+
template <class T> const T &MixedBehaviorProfile<T>::GetPayoff(const GameInfoset &p_infoset) const
325314
{
326315
CheckVersion();
327-
ComputeSolutionData();
328-
return map_infosetValues[iset];
316+
EnsureRegrets();
317+
return m_cache.m_infosetValues[p_infoset];
329318
}
330319

331320
template <class T> T MixedBehaviorProfile<T>::GetActionProb(const GameAction &action) const
@@ -343,25 +332,25 @@ template <class T> T MixedBehaviorProfile<T>::GetActionProb(const GameAction &ac
343332
template <class T> const T &MixedBehaviorProfile<T>::GetPayoff(const GameAction &act) const
344333
{
345334
CheckVersion();
346-
ComputeSolutionData();
347-
return map_actionValues[act];
335+
EnsureActionValues();
336+
return m_cache.m_actionValues[act];
348337
}
349338

350339
template <class T> const T &MixedBehaviorProfile<T>::GetRegret(const GameAction &act) const
351340
{
352341
CheckVersion();
353-
ComputeSolutionData();
354-
return map_regret.at(act);
342+
EnsureRegrets();
343+
return m_cache.m_regret.at(act);
355344
}
356345

357346
template <class T> T MixedBehaviorProfile<T>::GetRegret(const GameInfoset &p_infoset) const
358347
{
359348
CheckVersion();
360-
ComputeSolutionData();
349+
EnsureRegrets();
361350
T br_payoff = maximize_function(p_infoset->GetActions(), [this](const auto &action) -> T {
362-
return map_actionValues.at(action);
351+
return m_cache.m_actionValues.at(action);
363352
});
364-
return br_payoff - map_infosetValues[p_infoset];
353+
return br_payoff - m_cache.m_infosetValues[p_infoset];
365354
}
366355

367356
template <class T> T MixedBehaviorProfile<T>::GetMaxRegret() const
@@ -418,7 +407,7 @@ T MixedBehaviorProfile<T>::DiffActionValue(const GameAction &p_action,
418407
const GameAction &p_oppAction) const
419408
{
420409
CheckVersion();
421-
ComputeSolutionData();
410+
EnsureActionValues();
422411
T deriv = T(0);
423412
const GameInfoset infoset = p_action->GetInfoset();
424413
const GamePlayer player = p_action->GetInfoset()->GetPlayer();
@@ -427,9 +416,9 @@ T MixedBehaviorProfile<T>::DiffActionValue(const GameAction &p_action,
427416
const GameNode child = member->GetChild(p_action);
428417

429418
deriv += DiffRealizProb(member, p_oppAction) *
430-
(map_nodeValues[child][player] - map_actionValues[p_action]);
431-
deriv +=
432-
map_realizProbs[member] * DiffNodeValue(member->GetChild(p_action), player, p_oppAction);
419+
(m_cache.m_nodeValues[child][player] - m_cache.m_actionValues[p_action]);
420+
deriv += m_cache.m_realizProbs[member] *
421+
DiffNodeValue(member->GetChild(p_action), player, p_oppAction);
433422
}
434423

435424
return deriv / GetInfosetProb(p_action->GetInfoset());
@@ -440,7 +429,7 @@ T MixedBehaviorProfile<T>::DiffRealizProb(const GameNode &p_node,
440429
const GameAction &p_oppAction) const
441430
{
442431
CheckVersion();
443-
ComputeSolutionData();
432+
EnsureActionValues();
444433
T deriv = T(1);
445434
bool isPrec = false;
446435
GameNode node = p_node;
@@ -463,7 +452,7 @@ T MixedBehaviorProfile<T>::DiffNodeValue(const GameNode &p_node, const GamePlaye
463452
const GameAction &p_oppAction) const
464453
{
465454
CheckVersion();
466-
ComputeSolutionData();
455+
EnsureActionValues();
467456

468457
if (p_node->IsTerminal()) {
469458
// If we reach a terminal node and haven't encountered p_oppAction,
@@ -474,7 +463,7 @@ T MixedBehaviorProfile<T>::DiffNodeValue(const GameNode &p_node, const GamePlaye
474463
// We've encountered the action; since we assume perfect recall,
475464
// we won't encounter it again, and the downtree value must
476465
// be the same.
477-
return map_nodeValues[p_node->GetChild(p_oppAction)][p_player];
466+
return m_cache.m_nodeValues[p_node->GetChild(p_oppAction)][p_player];
478467
}
479468
else {
480469
T deriv = T(0);
@@ -490,99 +479,97 @@ T MixedBehaviorProfile<T>::DiffNodeValue(const GameNode &p_node, const GamePlaye
490479
// MixedBehaviorProfile<T>: Cached profile information
491480
//========================================================================
492481

493-
// compute realization probabilities for nodes and isets.
494-
template <class T>
495-
void MixedBehaviorProfile<T>::ComputePass1_realizProbs(const GameNode &node) const
482+
template <class T> void MixedBehaviorProfile<T>::ComputeRealizationProbs() const
496483
{
497-
map_realizProbs[node] = (node->GetParent()) ? map_realizProbs[node->GetParent()] *
498-
GetActionProb(node->GetPriorAction())
499-
: T(1);
484+
m_cache.m_realizProbs.clear();
500485

501-
for (auto childNode : node->GetChildren()) {
502-
ComputePass1_realizProbs(childNode);
486+
const auto &game = m_support.GetGame();
487+
m_cache.m_realizProbs[game->GetRoot()] = static_cast<T>(1);
488+
for (const auto &node : game->GetNodes()) {
489+
const T incomingProb = m_cache.m_realizProbs[node];
490+
for (auto [action, child] : node->GetActions()) {
491+
m_cache.m_realizProbs[child] = incomingProb * GetActionProb(action);
492+
}
503493
}
504494
}
505495

506-
template <class T>
507-
void MixedBehaviorProfile<T>::ComputePass2_beliefs_nodeValues_actionValues(
508-
const GameNode &node) const
496+
template <class T> void MixedBehaviorProfile<T>::ComputeBeliefs() const
509497
{
510-
if (node->GetOutcome()) {
511-
const GameOutcome outcome = node->GetOutcome();
512-
for (auto player : m_support.GetGame()->GetPlayers()) {
513-
map_nodeValues[node][player] += outcome->GetPayoff<T>(player);
514-
}
515-
}
516-
517-
if (node->IsTerminal()) {
518-
return;
519-
}
498+
m_cache.m_beliefs.clear();
520499

521-
const GameInfoset iset = node->GetInfoset();
522-
auto nodes = iset->GetMembers();
523-
T infosetProb =
524-
std::accumulate(nodes.begin(), nodes.end(), T(0),
525-
[this](T total, GameNode node) { return total + map_realizProbs[node]; });
526-
527-
if (infosetProb != T(0)) {
528-
map_beliefs[node] = map_realizProbs[node] / infosetProb;
529-
}
530-
531-
// push down payoffs from outcomes attached to non-terminal nodes
532-
for (auto child : node->GetChildren()) {
533-
map_nodeValues[child] = map_nodeValues[node];
534-
}
535-
536-
for (auto player : m_support.GetGame()->GetPlayers()) {
537-
map_nodeValues[node][player] = T(0);
500+
for (const auto &infoset : m_support.GetGame()->GetInfosets()) {
501+
const T infosetProb = sum_function(
502+
infoset->GetMembers(), [&](const auto &node) -> T { return m_cache.m_realizProbs[node]; });
503+
if (infosetProb == static_cast<T>(0)) {
504+
continue;
505+
}
506+
for (const auto &node : infoset->GetMembers()) {
507+
m_cache.m_beliefs[node] = m_cache.m_realizProbs[node] / infosetProb;
508+
}
538509
}
510+
}
539511

540-
for (auto child : node->GetChildren()) {
541-
ComputePass2_beliefs_nodeValues_actionValues(child);
542-
543-
const GameAction act = child->GetPriorAction();
512+
template <class T> void MixedBehaviorProfile<T>::ComputeNodeValues() const
513+
{
514+
const auto &game = m_support.GetGame();
515+
m_cache.m_nodeValues.clear();
544516

545-
for (auto player : m_support.GetGame()->GetPlayers()) {
546-
map_nodeValues[node][player] += GetActionProb(act) * map_nodeValues[child][player];
517+
for (const auto &node : game->GetNodes(TraversalOrder::Postorder)) {
518+
auto &vals = m_cache.m_nodeValues[node];
519+
for (const auto &player : game->GetPlayers()) {
520+
vals[player] = static_cast<T>(0);
547521
}
548-
549-
if (!iset->IsChanceInfoset()) {
550-
map_actionValues[act] += (infosetProb != T(0))
551-
? map_beliefs[node] * map_nodeValues[child][iset->GetPlayer()]
552-
: T(0);
522+
if (node->GetOutcome()) {
523+
const GameOutcome &outcome = node->GetOutcome();
524+
for (const auto &player : game->GetPlayers()) {
525+
vals[player] += outcome->GetPayoff<T>(player);
526+
}
527+
}
528+
for (auto [action, child] : node->GetActions()) {
529+
const T p = GetActionProb(action);
530+
for (const auto &player : game->GetPlayers()) {
531+
vals[player] += p * m_cache.m_nodeValues[child][player];
532+
}
553533
}
554534
}
555535
}
556536

557-
template <class T> void MixedBehaviorProfile<T>::ComputePass3_infosetValues_regret() const
537+
template <class T> void MixedBehaviorProfile<T>::ComputeActionValues() const
558538
{
559-
// Populate
560-
for (auto infoset : m_support.GetGame()->GetInfosets()) {
561-
map_infosetValues[infoset] = T(0);
562-
for (auto action : infoset->GetActions()) {
563-
map_infosetValues[infoset] += GetActionProb(action) * map_actionValues[action];
564-
}
565-
auto actions = infoset->GetActions();
566-
T brpayoff = map_actionValues[actions.front()];
567-
for (auto action : infoset->GetActions()) {
568-
brpayoff = std::max(brpayoff, map_actionValues[action]);
569-
}
570-
for (auto action : infoset->GetActions()) {
571-
map_regret[action] = brpayoff - map_actionValues[action];
539+
const auto &game = m_support.GetGame();
540+
m_cache.m_actionValues.clear();
541+
542+
for (const auto &infoset : game->GetInfosets()) {
543+
const auto &player = infoset->GetPlayer();
544+
for (const auto &node : infoset->GetMembers()) {
545+
T belief = m_cache.m_beliefs[node];
546+
if (belief == static_cast<T>(0)) {
547+
continue;
548+
}
549+
for (auto [action, child] : node->GetActions()) {
550+
m_cache.m_actionValues[action] += belief * m_cache.m_nodeValues[child][player];
551+
}
572552
}
573553
}
574554
}
575555

576-
template <class T> void MixedBehaviorProfile<T>::ComputeSolutionData() const
556+
template <class T> void MixedBehaviorProfile<T>::ComputeActionRegrets() const
577557
{
578-
auto rootNode = m_support.GetGame()->GetRoot();
579-
if (contains(map_realizProbs, rootNode)) {
580-
// cache is valid, don't compute anything, simply return
581-
return;
558+
for (const auto &infoset : m_support.GetGame()->GetInfosets()) {
559+
m_cache.m_infosetValues[infoset] =
560+
sum_function(infoset->GetActions(), [&](const auto &action) -> T {
561+
return GetActionProb(action) * m_cache.m_actionValues[action];
562+
});
563+
564+
auto actions = infoset->GetActions();
565+
const T brpayoff = maximize_function(infoset->GetActions(), [&](const auto &action) -> T {
566+
return m_cache.m_actionValues[action];
567+
});
568+
for (const auto &action : infoset->GetActions()) {
569+
m_cache.m_regret[action] =
570+
std::max(brpayoff - m_cache.m_actionValues[action], static_cast<T>(0));
571+
}
582572
}
583-
ComputePass1_realizProbs(rootNode);
584-
ComputePass2_beliefs_nodeValues_actionValues(rootNode);
585-
ComputePass3_infosetValues_regret();
586573
}
587574

588575
template <class T> bool MixedBehaviorProfile<T>::IsDefinedAt(GameInfoset p_infoset) const

0 commit comments

Comments
 (0)