diff --git a/ChangeLog b/ChangeLog index 7b2151654..13b4e88ad 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,10 @@ to detect if an information is absent-minded. ### Changed +- Terminology for agent-form calculations on extensive games has been clarified. Mixed behavior profiles + distinguish "agent" regret and liap values from their strategy-based analogs. Methods which compute + using the agent-form - specifically `enumpure_solve` and `liap_solve`, now clarify this by being named + differently in `pygambit`. (#617) - In the graphical interface, removed option to configure information set link drawing; information sets are always drawn and indicators are always drawn if an information set spans multiple levels. - In `pygambit`, indexing the children of a node by a string inteprets the string as an action label, diff --git a/contrib/games/myerson_fig_4_2.efg b/contrib/games/myerson_fig_4_2.efg new file mode 100644 index 000000000..713c4a34f --- /dev/null +++ b/contrib/games/myerson_fig_4_2.efg @@ -0,0 +1,15 @@ +EFG 2 R "Myerson (1991) Fig 4.2" { "Player 1" "Player 2" } +"An example from Myerson (1991) Fig 4.2 which has an agent Nash equilibrium that is +not a Nash equilibrium" + +p "" 1 1 "" { "A1" "B1" } 0 +p "" 2 1 "" { "W2" "X2" } 0 +p "" 1 2 "" { "Y1" "Z1" } 0 +t "" 1 "" { 3, 0 } +t "" 2 "" { 0, 0 } +p "" 1 2 "" { "Y1" "Z1" } 0 +t "" 3 "" { 2, 3 } +t "" 4 "" { 4, 1 } +p "" 2 1 "" { "W2" "X2" } 0 +t "" 5 "" { 2, 3 } +t "" 6 "" { 3, 2 } diff --git a/doc/pygambit.api.rst b/doc/pygambit.api.rst index b432a6a24..ae029bae7 100644 --- a/doc/pygambit.api.rst +++ b/doc/pygambit.api.rst @@ -258,6 +258,8 @@ Probability distributions over behavior MixedBehaviorProfile.infoset_prob MixedBehaviorProfile.belief MixedBehaviorProfile.is_defined_at + MixedBehaviorProfile.agent_max_regret + MixedBehaviorProfile.agent_liap_value MixedBehaviorProfile.max_regret MixedBehaviorProfile.liap_value MixedBehaviorProfile.as_strategy @@ -297,11 +299,13 @@ Computation of Nash equilibria NashComputationResult enumpure_solve + enumpure_agent_solve enummixed_solve enumpoly_solve lp_solve lcp_solve liap_solve + liap_agent_solve logit_solve simpdiv_solve ipa_solve diff --git a/doc/tools.enumpure.rst b/doc/tools.enumpure.rst index 3dea415e3..32ba3a664 100644 --- a/doc/tools.enumpure.rst +++ b/doc/tools.enumpure.rst @@ -38,10 +38,10 @@ pure-strategy Nash equilibria. .. versionadded:: 14.0.2 - Report agent form equilibria, that is, equilibria which consider - only deviations at one information set. Only has an effect for - extensive games, as strategic games have only one information set - per player. + Report agent Nash equilibria, that is, equilibria which consider + only deviations at a single information set at a time. Only has + an effect for extensive games, as strategic games have only + one information set per player. .. cmdoption:: -h diff --git a/doc/tools.liap.rst b/doc/tools.liap.rst index 0203d7d01..a26c872ce 100644 --- a/doc/tools.liap.rst +++ b/doc/tools.liap.rst @@ -26,8 +26,24 @@ not guaranteed to find all, or even any, Nash equilibria. in terms of the maximum regret. This regret is interpreted as a fraction of the difference between the maximum and minimum payoffs in the game. +.. versionchanged:: 16.5.0 + + The `-A` switch has been introduced to be explicit in choosing to compute + agent Nash equilibria. The default is now to compute using the strategic + form even for extensive games. + + .. program:: gambit-liap +.. cmdoption:: -A + + .. versionadded:: 16.5.0 + + Report agent Nash equilibria, that is, equilibria which consider + only deviations at a single information set at a time. Only has + an effect for extensive games, as strategic games have only + one information set per player. + .. cmdoption:: -d Express all output using decimal representations with the diff --git a/doc/tutorials/03_stripped_down_poker.ipynb b/doc/tutorials/03_stripped_down_poker.ipynb index 7937f092e..32413f7e1 100644 --- a/doc/tutorials/03_stripped_down_poker.ipynb +++ b/doc/tutorials/03_stripped_down_poker.ipynb @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 1, "id": "69cbfe81", "metadata": {}, "outputs": [], @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 2, "id": "ad6a1119", "metadata": {}, "outputs": [], @@ -80,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 3, "id": "841f9f74", "metadata": {}, "outputs": [ @@ -116,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 4, "id": "fe80c64c", "metadata": {}, "outputs": [], @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 5, "id": "867cb1d8-7a5d-45d1-9349-9bbc2a4e2344", "metadata": {}, "outputs": [ @@ -230,7 +230,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -253,7 +253,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -284,7 +284,7 @@ "" ] }, - "execution_count": 41, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -309,7 +309,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 6, "id": "0e3bb5ef", "metadata": {}, "outputs": [], @@ -324,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 7, "id": "0c522c2d-992e-48b6-a1f8-0696d33cdbe0", "metadata": {}, "outputs": [ @@ -488,7 +488,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -541,7 +541,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -609,7 +609,7 @@ "" ] }, - "execution_count": 43, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -638,7 +638,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 8, "id": "dbfa7035", "metadata": {}, "outputs": [], @@ -652,7 +652,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 9, "id": "e85b3346-2fea-4a73-aa72-9efb436c68c1", "metadata": {}, "outputs": [ @@ -820,7 +820,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -895,7 +895,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -986,7 +986,7 @@ "" ] }, - "execution_count": 45, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -1010,7 +1010,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 10, "id": "87c988be", "metadata": {}, "outputs": [], @@ -1031,7 +1031,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 11, "id": "29aa60a0", "metadata": {}, "outputs": [], @@ -1053,7 +1053,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 12, "id": "fdee7b53-7820-44df-9d17-d15d0b9667aa", "metadata": {}, "outputs": [ @@ -1198,7 +1198,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -1226,7 +1226,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -1246,7 +1246,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -1263,7 +1263,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -1294,7 +1294,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -1319,7 +1319,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -1344,7 +1344,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -1361,7 +1361,7 @@ "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", @@ -1392,7 +1392,7 @@ "" ] }, - "execution_count": 48, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1414,7 +1414,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 13, "id": "4d92c8d9", "metadata": {}, "outputs": [ @@ -1424,7 +1424,7 @@ "NashComputationResult(method='lcp', rational=True, use_strategic=False, equilibria=[[[[Rational(1, 1), Rational(0, 1)], [Rational(1, 3), Rational(2, 3)]], [[Rational(2, 3), Rational(1, 3)]]]], parameters={'stop_after': 0, 'max_depth': 0})" ] }, - "execution_count": 49, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1448,7 +1448,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 14, "id": "9967d6f7", "metadata": {}, "outputs": [ @@ -1475,7 +1475,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 15, "id": "3293e818", "metadata": {}, "outputs": [ @@ -1485,7 +1485,7 @@ "pygambit.gambit.MixedBehaviorProfileRational" ] }, - "execution_count": 51, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1506,7 +1506,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 16, "id": "4cf38264", "metadata": {}, "outputs": [ @@ -1516,7 +1516,7 @@ "pygambit.gambit.MixedBehavior" ] }, - "execution_count": 52, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1527,7 +1527,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 17, "id": "85e7fdda", "metadata": {}, "outputs": [ @@ -1540,7 +1540,7 @@ "[[Rational(1, 1), Rational(0, 1)], [Rational(1, 3), Rational(2, 3)]]" ] }, - "execution_count": 53, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1565,7 +1565,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 18, "id": "f45a82b6", "metadata": {}, "outputs": [ @@ -1597,7 +1597,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 19, "id": "83bbd3e5", "metadata": {}, "outputs": [ @@ -1630,7 +1630,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 20, "id": "6bf51b38", "metadata": {}, "outputs": [ @@ -1643,7 +1643,7 @@ "[[Rational(2, 3), Rational(1, 3)]]" ] }, - "execution_count": 56, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1666,7 +1666,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 21, "id": "2966e700", "metadata": {}, "outputs": [ @@ -1679,7 +1679,7 @@ "Rational(2, 3)" ] }, - "execution_count": 57, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1698,7 +1698,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 22, "id": "f5a7f110", "metadata": {}, "outputs": [ @@ -1711,7 +1711,7 @@ "Rational(2, 3)" ] }, - "execution_count": 58, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1732,7 +1732,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 23, "id": "a7d3816d", "metadata": {}, "outputs": [ @@ -1767,7 +1767,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 24, "id": "4a54b20c", "metadata": {}, "outputs": [ @@ -1800,7 +1800,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 25, "id": "b250c1cd", "metadata": {}, "outputs": [ @@ -1813,7 +1813,7 @@ "Rational(2, 3)" ] }, - "execution_count": 61, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1832,7 +1832,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 26, "id": "6f01846b", "metadata": {}, "outputs": [ @@ -1864,7 +1864,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 27, "id": "5079d231", "metadata": {}, "outputs": [ @@ -1877,7 +1877,7 @@ "Rational(1, 3)" ] }, - "execution_count": 63, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1888,7 +1888,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 28, "id": "c55f2c7a", "metadata": {}, "outputs": [ @@ -1901,7 +1901,7 @@ "Rational(-1, 3)" ] }, - "execution_count": 64, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1928,7 +1928,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 29, "id": "d4ecff88", "metadata": {}, "outputs": [ @@ -1938,7 +1938,7 @@ "['11', '12', '21', '22']" ] }, - "execution_count": 65, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -1962,7 +1962,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 30, "id": "24e4b6e8", "metadata": {}, "outputs": [ @@ -1972,7 +1972,7 @@ "NashComputationResult(method='gnm', rational=False, use_strategic=True, equilibria=[[[0.33333333333866677, 0.6666666666613335, 0.0, 0.0], [0.6666666666559997, 0.3333333333440004]]], parameters={'perturbation': [[1.0, 0.0, 0.0, 0.0], [1.0, 0.0]], 'end_lambda': -10.0, 'steps': 100, 'local_newton_interval': 3, 'local_newton_maxits': 10})" ] }, - "execution_count": 66, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -1994,7 +1994,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 31, "id": "d9ffb4b8", "metadata": {}, "outputs": [ @@ -2004,7 +2004,7 @@ "pygambit.gambit.MixedStrategyProfileDouble" ] }, - "execution_count": 67, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -2026,7 +2026,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 32, "id": "56e2f847", "metadata": {}, "outputs": [ @@ -2079,7 +2079,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 33, "id": "d18a91f0", "metadata": {}, "outputs": [ @@ -2145,7 +2145,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 34, "id": "0c55f745", "metadata": {}, "outputs": [ @@ -2155,7 +2155,7 @@ "(Rational(2, 1), Rational(-2, 1))" ] }, - "execution_count": 70, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -2170,14 +2170,14 @@ "metadata": {}, "source": [ "`logit_solve` is a globally-convergent method, in that it computes a sequence of profiles which is guaranteed to have a subsequence that converges to a\n", - "Nash equilibrium.\n", + "Nash equilibrium. \n", "\n", - "The default value of `maxregret` for this method is set at $10^{-8}$:" + "The default value of `maxregret` for this method is set at `1e-8`:" ] }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 53, "id": "101598c6", "metadata": {}, "outputs": [ @@ -2187,7 +2187,7 @@ "1" ] }, - "execution_count": 71, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } @@ -2199,17 +2199,17 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 54, "id": "9b142728", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "3.987411578698641e-08" + "5.0647885885268806e-08" ] }, - "execution_count": 72, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -2224,23 +2224,23 @@ "id": "a2ba06c4", "metadata": {}, "source": [ - "The value of `MixedBehaviorProfile.max_regret` of the computed profile exceeds $10^{-8}$ measured in payoffs of the game.\n", - "However, when considered relative to the scale of the game's payoffs, we see it is less than $10^{-8}$ of the payoff range, as requested:" + "The value of `MixedBehaviorProfile.max_regret` of the computed profile exceeds `1e-8` measured in terms of payoffs of the game.\n", + "However, when considered relative to the scale of the game's payoffs, we see it is less than `1e-8` of the payoff range, as requested:" ] }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 55, "id": "ff405409", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "9.968528946746602e-09" + "6.330985735658601e-09" ] }, - "execution_count": 73, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -2256,28 +2256,31 @@ "source": [ "In general, for globally-convergent methods especially, there is a tradeoff between precision and running time.\n", "\n", - "We could instead ask only for an $\\varepsilon$-equilibrium with a (scaled) $\\varepsilon$ of no more than $10^{-4}$:" + "We could instead ask only for an $\\varepsilon$-equilibrium with a (scaled) $\\varepsilon$ of no more than `1e-4`:" ] }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 57, "id": "31b0143c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "9.395259956013202e-05" + "6.566536354296604e-05" ] }, - "execution_count": 74, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "gbt.nash.logit_solve(g, maxregret=1e-4).equilibria[0].max_regret() / (g.max_payoff - g.min_payoff)" + "(\n", + " gbt.nash.logit_solve(g, maxregret=1e-4).equilibria[0]\n", + " .max_regret() / (g.max_payoff - g.min_payoff)\n", + ")" ] }, { @@ -2290,7 +2293,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 58, "id": "7cfba34a", "metadata": {}, "outputs": [ @@ -2298,17 +2301,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 9.86 ms, sys: 91 μs, total: 9.95 ms\n", - "Wall time: 9.96 ms\n" + "CPU times: user 15.8 ms, sys: 429 μs, total: 16.3 ms\n", + "Wall time: 16.3 ms\n" ] }, { "data": { "text/plain": [ - "NashComputationResult(method='logit', rational=False, use_strategic=False, equilibria=[[[[1.0, 0.0], [0.3338351656285655, 0.666164834417892]], [[0.6670407651644307, 0.3329592348608147]]]], parameters={'first_step': 0.03, 'max_accel': 1.1})" + "NashComputationResult(method='logit', rational=False, use_strategic=False, equilibria=[[[[1.0, 0.0], [0.333859274697877, 0.6661407253531737]], [[0.6670586236711866, 0.3329413763565089]]]], parameters={'first_step': 0.03, 'max_accel': 1.1})" ] }, - "execution_count": 75, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -2320,7 +2323,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 59, "id": "6f1809a7", "metadata": {}, "outputs": [ @@ -2328,17 +2331,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 18.8 ms, sys: 148 μs, total: 19 ms\n", - "Wall time: 19 ms\n" + "CPU times: user 28.7 ms, sys: 883 μs, total: 29.6 ms\n", + "Wall time: 29.7 ms\n" ] }, { "data": { "text/plain": [ - "NashComputationResult(method='logit', rational=False, use_strategic=False, equilibria=[[[[1.0, 0.0], [0.33333338649882943, 0.6666666135011706]], [[0.6666667065407631, 0.3333332934592369]]]], parameters={'first_step': 0.03, 'max_accel': 1.1})" + "NashComputationResult(method='logit', rational=False, use_strategic=False, equilibria=[[[[1.0, 0.0], [0.3333333839812253, 0.6666666160187746]], [[0.6666667046525624, 0.33333329534743755]]]], parameters={'first_step': 0.03, 'max_accel': 1.1})" ] }, - "execution_count": 76, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } @@ -2355,29 +2358,29 @@ "source": [ "The convention of expressing `maxregret` scaled by the game's payoffs standardises the behavior of methods across games.\n", "\n", - "For example, consider solving the poker game instead using `liap_solve()`:" + "For example, consider solving the poker game instead using `liap_solve()`." ] }, { "cell_type": "code", - "execution_count": 77, - "id": "414b6f65", + "execution_count": 60, + "id": "5998d609-8037-4814-ac78-1f5a288f4bdd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "5.509949805110326e-05" + "0.03211100728219732" ] }, - "execution_count": 77, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(\n", - " gbt.nash.liap_solve(g.mixed_behavior_profile(), maxregret=1.0e-4)\n", + " gbt.nash.liap_solve(g.mixed_strategy_profile(), maxregret=1e-1)\n", " .equilibria[0].max_regret() / (g.max_payoff - g.min_payoff)\n", ")" ] @@ -2392,17 +2395,17 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 61, "id": "a892dc2b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "5.509949805110326e-05" + "0.03211100728219732" ] }, - "execution_count": 78, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } @@ -2413,7 +2416,7 @@ " outcome[\"Bob\"] = outcome[\"Bob\"] * 2\n", "\n", "(\n", - " gbt.nash.liap_solve(g.mixed_behavior_profile(), maxregret=1.0e-4)\n", + " gbt.nash.liap_solve(g.mixed_strategy_profile(), maxregret=1e-1)\n", " .equilibria[0].max_regret() / (g.max_payoff - g.min_payoff)\n", ")" ] @@ -2433,7 +2436,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 43, "id": "2f79695a", "metadata": {}, "outputs": [ @@ -2443,7 +2446,7 @@ "[Rational(1, 3), Rational(1, 3), Rational(1, 3)]" ] }, - "execution_count": 79, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -2467,7 +2470,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 44, "id": "5de6acb2", "metadata": {}, "outputs": [ @@ -2477,7 +2480,7 @@ "[Rational(1, 4), Rational(1, 2), Rational(1, 4)]" ] }, - "execution_count": 80, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -2500,7 +2503,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 45, "id": "c47d2ab6", "metadata": {}, "outputs": [ @@ -2510,7 +2513,7 @@ "[Decimal('0.25'), Decimal('0.50'), Decimal('0.25')]" ] }, - "execution_count": 81, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -2537,7 +2540,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 46, "id": "04329084", "metadata": {}, "outputs": [ @@ -2547,7 +2550,7 @@ "[Rational(1, 4), Rational(1, 2), Rational(1, 4)]" ] }, - "execution_count": 82, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -2559,7 +2562,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 47, "id": "9015e129", "metadata": {}, "outputs": [ @@ -2569,7 +2572,7 @@ "[Decimal('0.25'), Decimal('0.50'), Decimal('0.25')]" ] }, - "execution_count": 83, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -2594,7 +2597,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 48, "id": "0a019aa5", "metadata": {}, "outputs": [ @@ -2604,7 +2607,7 @@ "[Decimal('0.25'), Decimal('0.5'), Decimal('0.25')]" ] }, - "execution_count": 84, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -2624,7 +2627,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 49, "id": "1991d288", "metadata": {}, "outputs": [ @@ -2654,7 +2657,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 50, "id": "b1dc37fd", "metadata": {}, "outputs": [ @@ -2664,7 +2667,7 @@ "1.0" ] }, - "execution_count": 86, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -2683,7 +2686,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 51, "id": "dc1edea2", "metadata": {}, "outputs": [ @@ -2693,7 +2696,7 @@ "Decimal('0.3333333333333333')" ] }, - "execution_count": 87, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -2712,7 +2715,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 52, "id": "1edd90d6", "metadata": {}, "outputs": [ @@ -2722,7 +2725,7 @@ "Decimal('0.9999999999999999')" ] }, - "execution_count": 88, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } @@ -2756,7 +2759,7 @@ ], "metadata": { "kernelspec": { - "display_name": "gambit310", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -2770,7 +2773,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.19" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/doc/tutorials/advanced_tutorials/agent_versus_non_agent_regret.ipynb b/doc/tutorials/advanced_tutorials/agent_versus_non_agent_regret.ipynb new file mode 100644 index 000000000..68b78ee11 --- /dev/null +++ b/doc/tutorials/advanced_tutorials/agent_versus_non_agent_regret.ipynb @@ -0,0 +1,707 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "96dfe427-942a-47e9-8f1f-91854989b8c8", + "metadata": {}, + "source": [ + "# Agent and standard notions of extensive form games\n", + "\n", + "The purpose of this tutorial is to explain the notions of `MixedBehaviorProfile.agent_max_regret` and `MixedBehaviorProfile.agent_liap_value`, and the corresponding solvers `Gambit.nash.enumpure_agent_solve` and `Gambit.nash.liap_agent_solve`. These notions are only relevant for *extensive-form games*, and so `agent_max_regret` and \n", + "`agent_liap_value` are only available for `MixedBehaviorProfile`s and not for `MixedStrategyProfile`s." + ] + }, + { + "cell_type": "markdown", + "id": "b87ebb4e-7080-4aa1-9920-67fba5a36114", + "metadata": {}, + "source": [ + "# Nash equilibria are profiles with maximum regret 0\n", + "\n", + "For either a `MixedBehaviorProfile` and a `MixedStrategyProfile`, the profile is a Nash equilibrium if and only if its maximum regret is zero.\n", + "The profiles maximum regret is the maximum over the players of the individual player regrets.\n", + "A player's regret is 0 if they are playing a mixed (including pure) best response; otherwise it is positive and \n", + "is the different between the best response payoff (achievable via a pure strategy) against the other players' and what the player actually gets as payoff in this profile.\n", + "\n", + "Let's see an example taken from [Myerson (1991)](#references)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "5142d6ba-da13-4500-bca6-e68b608bfae9", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from draw_tree import draw_tree\n", + "\n", + "import pygambit as gbt\n", + "\n", + "g = gbt.read_efg(\"../../../contrib/games/myerson_fig_4_2.efg\")\n", + "draw_tree(g)" + ] + }, + { + "cell_type": "markdown", + "id": "dabe6e40-509e-4454-b3ef-f7f0737cc9d8", + "metadata": {}, + "source": [ + "Let's use `enumpure_solve` to find all pure Nash equilibria of this game." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7882d327-ce04-43d3-bb5a-36cff6da6e96", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of pure equilibria: 1\n", + "Max regret: 0\n" + ] + } + ], + "source": [ + "pure_Nash_equilibria = gbt.nash.enumpure_solve(g).equilibria\n", + "print(\"Number of pure equilibria:\", len(pure_Nash_equilibria))\n", + "pure_eq = pure_Nash_equilibria[0]\n", + "print(\"Max regret:\", pure_eq.max_regret())" + ] + }, + { + "cell_type": "markdown", + "id": "1b95e67d-a44e-4622-acb5-37bab18a30f4", + "metadata": {}, + "source": [ + "We see that the game has only one pure Nash equilibrium and, its maximum regret is 0, which is what defines a Nash equilibrium." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6e3e9303-453a-4bac-a449-fa8fda2ba5ec", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Player 1 infoset: 0 behavior probabilities: [Rational(1, 1), Rational(0, 1)]\n", + "Player 1 infoset: 1 behavior probabilities: [Rational(0, 1), Rational(1, 1)]\n", + "Player 2 infoset: 0 behavior probabilities: [Rational(0, 1), Rational(1, 1)]\n" + ] + } + ], + "source": [ + "eq = pure_Nash_equilibria[0]\n", + "for infoset, probs in eq.as_behavior().mixed_actions():\n", + " print(infoset.player.label, \"infoset:\", infoset.number, \"behavior probabilities:\", probs)" + ] + }, + { + "cell_type": "markdown", + "id": "98eb65d8", + "metadata": {}, + "source": [ + "The `liap_value` which stands for \"Liapunov value\" is a related notion that sums the squared regrets of each pure strategy in the game. As with the maximum regret, the `liap_value` of a profile is 0 if and only if the profile is a Nash equilibrium, which we confirm now in our example:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "804345b9-d32b-4f60-b4a0-f9d69dca10a8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Liap value: 0\n" + ] + } + ], + "source": [ + "print(\"Liap value:\", pure_eq.liap_value())" + ] + }, + { + "cell_type": "markdown", + "id": "c88d08e2-33bf-48ad-b71f-4a0c19929fdc", + "metadata": {}, + "source": [ + "As we have seen, both the maximum regret and Liapunov value of a profile are non-negative and zero if and only if the profile is a Nash equilibrium. When positive, one can think of both notions as describing how close one is to an equilibrium.\n", + "\n", + "Based on this idea, the method `Gambit.nash.liap_solve` looks for *local* minima of the function from profiles to the Liapunov value. The set of Nash equilibria are exactly the *global* minima of this function, where the value is 0, but `liap_solve` may terminate at a non-global, local minimum, which is not a Nash equilibrium." + ] + }, + { + "cell_type": "markdown", + "id": "4afdea13-0cbb-4430-9689-ecf9b6a4b18d", + "metadata": {}, + "source": [ + "Let's use the method which requires us to specify a starting profile. The method works only with floating point profiles. We will create two profiles, one in floating point and one in rationals, using the former as the starting point for the method and the latter to check the maximum regret and Liapunov value of the profile exactly." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9d18768b-db9b-41ef-aee7-5fe5f524a59e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Max regret of starting profile: 3\n", + "Liapunov value of starting profile: 14\n" + ] + } + ], + "source": [ + "starting_profile_double = g.mixed_strategy_profile(data=[[0,1,0],[1,0]], rational=False)\n", + "starting_profile_rational = g.mixed_strategy_profile(data=[[0,1,0],[1,0]], rational=True)\n", + "print(\"Max regret of starting profile:\", starting_profile_rational.max_regret())\n", + "print(\"Liapunov value of starting profile:\", starting_profile_rational.liap_value())" + ] + }, + { + "cell_type": "markdown", + "id": "e67d9926-d19d-4745-a406-a3c1198a8484", + "metadata": {}, + "source": [ + "It could be a useful exercise to make sure that you can compute these values of the maximum regret and Liapunov value. For that, the starting point would be computing the reduced strategic form. " + ] + }, + { + "cell_type": "markdown", + "id": "e799eded-c6e1-4a3e-80cb-953c52627762", + "metadata": {}, + "source": [ + "Returning to `liap_solve`, since the maximum regret and therefore Liapunov value are both positive, the starting profile is not a Nash equilibrium and we expect `liap_solve` to return a different profile, which will hopefully, but not necessarily by a Nash equilibrium, depending on whether the solver finding a global minimum, or non-global local minimum, or nothing at all." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b885271f-7279-4d87-a0b9-bc28449b00ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[4.2517925671604327e-07, 0.49999911111761514, 0.5000004637031282], [0.3333333517938241, 0.6666666482061759]]\n" + ] + } + ], + "source": [ + "candidate_eq = gbt.nash.liap_solve(start=starting_profile_double).equilibria[0]\n", + "print(candidate_eq)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f8a90a9c-393e-4812-9418-76e705880f6f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Liap value: 4.43446520109796e-14\n", + "Max regret: 1.694170896904268e-07\n" + ] + } + ], + "source": [ + "print(\"Liap value:\", candidate_eq.liap_value())\n", + "print(\"Max regret:\", candidate_eq.max_regret())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "567e6a6a-fc8d-4142-806c-6510b2a4c624", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Liap value: 0\n", + "Max regret: 0\n" + ] + } + ], + "source": [ + "candidate_eq_rat = g.mixed_strategy_profile(data=[[0,\"1/2\",\"1/2\"],[\"1/3\",\"2/3\"]], rational=True)\n", + "print(\"Liap value:\", candidate_eq_rat.liap_value())\n", + "print(\"Max regret:\", candidate_eq_rat.max_regret())" + ] + }, + { + "cell_type": "markdown", + "id": "3b61364c-3e7f-4094-8ddf-a557863632e5", + "metadata": {}, + "source": [ + "Finally, before looking beyond Nash equilibria to \"agent Nash equilibria\", we will use Gambit's `enummixed_solve` to find all extreme mixed (including pure) Nash equilibria of this game." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "87a62c9e-b109-4f88-ac25-d0e0db3f27ea", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[Rational(0, 1), Rational(1, 1), Rational(0, 1)], [Rational(0, 1), Rational(1, 1)]]\n", + "[[Rational(1, 4), Rational(0, 1), Rational(3, 4)], [Rational(1, 2), Rational(1, 2)]]\n", + "[[Rational(0, 1), Rational(1, 2), Rational(1, 2)], [Rational(1, 3), Rational(2, 3)]]\n" + ] + } + ], + "source": [ + "all_extreme_Nash_equilibria = gbt.nash.enummixed_solve(g).equilibria\n", + "for eq in all_extreme_Nash_equilibria:\n", + " print(eq)" + ] + }, + { + "cell_type": "markdown", + "id": "e2e2e129-e7c7-41fe-8bf9-26e3ab889839", + "metadata": {}, + "source": [ + "The first of these is the pure equilibrium we found above with `enumpure_solve`. The last of these if the mixed equilibrium we just found with `liap_solve`. The middle of these is a new mixed equilibrium we haven't seen yet. Let's just confirm that it too, like the first and last, also have Liapunov value and maximum regret zero, as required for a Nash equilibrium:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2c8ed3df-958e-4ee9-aed6-a106547fbd37", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[Rational(0, 1), Rational(1, 2), Rational(1, 2)], [Rational(1, 3), Rational(2, 3)]]\n", + "Liap value: 0\n", + "Max regret: 0\n" + ] + } + ], + "source": [ + "print(all_extreme_Nash_equilibria[2])\n", + "print(\"Liap value:\", all_extreme_Nash_equilibria[2].liap_value())\n", + "print(\"Max regret:\", all_extreme_Nash_equilibria[2].max_regret())" + ] + }, + { + "cell_type": "markdown", + "id": "141a6c1f-7f3c-450b-8b2f-1d47671595de", + "metadata": {}, + "source": [ + "# Agent maximum regret versus standard maximum regret\n", + "\n", + "Now we can introduce the \"agent\" versions of both of the notions, maximum regret and the Liapunov value. The \"agent\" versions relate to what [Myerson (1991)](#references) called the \"multi-agent representation\" of an extensive form game, in which each information set is treated as an individual \"agent\". The \"agent maximum regret\" is then either 0 (if every information set has regret 0, i.e. `infoset_regret` 0), or it is largest of the information set regrets, which is then necessarily positive.\n", + "\n", + "The maximum regret of a profile is at least at large as the agent maximum regret. \n", + "In short, the reason it cannot be smaller is that all possible deviations of a given player -- even those that require changing behavior at multiple information sets -- are considered.\n", + "In particular, that includes deviations at a single information set, or at more than one.\n", + "On the other hand, the agent maximum regret only considers deviations at a single information set at a time, by considering each such information set as an \"agent\".\n", + "\n", + "Thus, **if the maximum regret is 0, then we have a Nash equilibrium, and the agent maximum regret will be 0 too**.\n", + "However, **there are examples where a profile has agent maximum regret of 0 but positive maximum regret**, so the profile is \n", + "not a Nash equilibrium.\n", + "\n", + "There is also an analagous distinction between `agent_liap_value` and `liap_value`, where the `liap_value` is at least as large as the `agent_liap_value` and there are examples where the former is positive (so we do not have a Nash equilibrium) but the latter is 0 (so we have an \"agent Nash equilibrium\".\n", + "\n", + "The game given above is such an example. It is taken from [Myerson (1991)](#references) figure 4.2. \n", + "\n", + "Gambit implements version of `enumpure_solve` and `liap_solve` called `enumpure_agent_solve` and `agent_liap_solve` that work only for the extensive form and use `agent_max_regret` and `agent_liap_value` respectively. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f46ce825-d2b7-492f-b0cf-6f213607e121", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "[[[Rational(1, 1), Rational(0, 1)], [Rational(0, 1), Rational(1, 1)]], [[Rational(0, 1), Rational(1, 1)]]]\n", + "[[[Rational(0, 1), Rational(1, 1)], [Rational(0, 1), Rational(1, 1)]], [[Rational(1, 1), Rational(0, 1)]]]\n" + ] + } + ], + "source": [ + "pure_agent_equilibria = gbt.nash.enumpure_agent_solve(g).equilibria\n", + "print(len(pure_agent_equilibria))\n", + "for agent_eq in pure_agent_equilibria:\n", + " print(agent_eq)" + ] + }, + { + "cell_type": "markdown", + "id": "912b9af6-a2e4-4bae-9594-41c8861a4d9d", + "metadata": {}, + "source": [ + "The first of the pure agent equilibria is the Nash equilibrium we found above, which we can check if we convert the agent equilibrium from a `MixedBehaviorProfile` to a `MixedStrategyProfile`:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "dbfa7035", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pure_Nash_equilibria[0] == pure_agent_equilibria[0].as_strategy()" + ] + }, + { + "cell_type": "markdown", + "id": "ec2a8564-5102-4847-8110-a26ee1f4f400", + "metadata": {}, + "source": [ + "The second agent equilibrium is not a Nash equilibrium, which we can confirm by showing that it's `max_regret` and `liap_value` are both positive, while the agent versions of these are 0 (which is why this profile was returned by `enumpure_agent_solve`:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "85760cec-5760-4f9d-8ca2-99fba79c7c3c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Max regret: 1\n", + "Liapunov value: 1\n", + "Agent max regret 0\n", + "Agent Liapunov value: 0\n" + ] + } + ], + "source": [ + "aeq = pure_agent_equilibria[1]\n", + "print(\"Max regret:\", aeq.max_regret())\n", + "print(\"Liapunov value:\", aeq.liap_value())\n", + "print(\"Agent max regret\", aeq.agent_max_regret())\n", + "print(\"Agent Liapunov value:\", aeq.agent_liap_value())" + ] + }, + { + "cell_type": "markdown", + "id": "a42f18d7-5fb4-4a45-9afd-76a63477ef1d", + "metadata": {}, + "source": [ + "It is a useful exercise to make sure you can confirm that the pure profile `pure_agent_equilibria[1]` indeed has these values of agent and standard maximum regret and Liapunov value." + ] + }, + { + "cell_type": "markdown", + "id": "c4eeb65f", + "metadata": {}, + "source": [ + "To conclude, we note that, for most use cases, the standard non-agent versions are probably what a user wants. The agent versions have applications in the area of \"equilibrium refinements\", in particular for \"sequential equilibria\"; for more details see Chapter 4, \"Sequential Equilibria of Extensive-Form Games\", in [Myerson (1991)](#references)." + ] + }, + { + "cell_type": "markdown", + "id": "65def67e", + "metadata": {}, + "source": [ + "#### References\n", + "\n", + "Roger Myerson (1991) \"Game Theory: Analysis of Conflict.\" Harvard University Press. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/games/behavmixed.cc b/src/games/behavmixed.cc index f5749c9cc..c5b10e462 100644 --- a/src/games/behavmixed.cc +++ b/src/games/behavmixed.cc @@ -251,6 +251,11 @@ template MixedBehaviorProfile MixedBehaviorProfile::ToFullSuppor //======================================================================== template T MixedBehaviorProfile::GetLiapValue() const +{ + return MixedStrategyProfile(*this).GetLiapValue(); +} + +template T MixedBehaviorProfile::GetAgentLiapValue() const { CheckVersion(); EnsureRegrets(); @@ -350,6 +355,11 @@ template T MixedBehaviorProfile::GetRegret(const GameInfoset &p_inf } template T MixedBehaviorProfile::GetMaxRegret() const +{ + return MixedStrategyProfile(*this).GetMaxRegret(); +} + +template T MixedBehaviorProfile::GetAgentMaxRegret() const { return maximize_function(m_support.GetGame()->GetInfosets(), [this](const auto &infoset) -> T { return this->GetRegret(infoset); }); diff --git a/src/games/behavmixed.h b/src/games/behavmixed.h index 6cf265455..2128f4a0a 100644 --- a/src/games/behavmixed.h +++ b/src/games/behavmixed.h @@ -237,6 +237,7 @@ template class MixedBehaviorProfile { return m_cache.m_nodeValues[m_support.GetGame()->GetRoot()][p_player]; } T GetLiapValue() const; + T GetAgentLiapValue() const; const T &GetRealizProb(const GameNode &node) const; T GetInfosetProb(const GameInfoset &p_infoset) const; @@ -253,8 +254,8 @@ template class MixedBehaviorProfile { /// between the best-response payoff and the payoff to playing /// \p p_action. /// @param[in] p_action The action to compute the regret for. - /// @sa GetRegret(const GameInfoset &) const; - /// GetMaxRegret() const + /// @sa GetRegret(const GameInfoset &) const + /// GetAgentMaxRegret() const const T &GetRegret(const GameAction &p_action) const; /// @brief Computes the regret at information set \p p_infoset @@ -263,14 +264,20 @@ template class MixedBehaviorProfile { /// as the difference between the payoff of the best response action and /// the payoff to playing their specified mixed action. /// @param[in] p_infoset The information set to compute the regret at. - /// @sa GetRegret(const GameAction &) const; - /// GetMaxRegret() const + /// @sa GetRegret(const GameAction &) const + /// GetAgentMaxRegret() const T GetRegret(const GameInfoset &p_infoset) const; /// @brief Computes the maximum regret at any information set in the profile /// @details Computes the maximum of the regrets of the information sets in the profile. - /// @sa GetRegret(const GameInfoset &) const; + /// @sa GetRegret(const GameInfoset &) const /// GetRegret(const GameAction &) const + /// GetMaxRegret() const + T GetAgentMaxRegret() const; + + /// @brief Computes the maximum regret for any player in the profile + /// @sa GetAgentMaxRegret() const + /// T GetMaxRegret() const; T DiffActionValue(const GameAction &action, const GameAction &oppAction) const; diff --git a/src/gui/dlnash.cc b/src/gui/dlnash.cc index f38ca22a6..0bb2d9d65 100644 --- a/src/gui/dlnash.cc +++ b/src/gui/dlnash.cc @@ -104,7 +104,7 @@ NashChoiceDialog::NashChoiceDialog(wxWindow *p_parent, GameDocument *p_doc) SetSizer(topSizer); topSizer->Fit(this); topSizer->SetSizeHints(this); - Layout(); + wxTopLevelWindowBase::Layout(); CenterOnParent(); } @@ -140,9 +140,10 @@ void NashChoiceDialog::OnCount(wxCommandEvent &p_event) void NashChoiceDialog::OnMethod(wxCommandEvent &p_event) { - const wxString method = m_methodChoice->GetString(p_event.GetSelection()); - if (method == s_simpdiv || method == s_enummixed || method == s_gnm || method == s_ipa) { + if (const wxString method = m_methodChoice->GetString(p_event.GetSelection()); + method == s_enumpure || method == s_simpdiv || method == s_enummixed || method == s_liap || + method == s_gnm || method == s_ipa) { m_repChoice->SetSelection(1); m_repChoice->Enable(false); } @@ -233,9 +234,9 @@ std::shared_ptr NashChoiceDialog::GetCommand() const } } else if (method == s_enumpure) { - cmd = std::make_shared>(m_doc, useEfg); + cmd = std::make_shared>(m_doc, false); cmd->SetCommand(prefix + wxT("enumpure") + options); - cmd->SetDescription(count + wxT(" in pure strategies ") + game); + cmd->SetDescription(count + wxT(" in pure strategies in strategic game")); } else if (method == s_enummixed) { cmd = std::make_shared>(m_doc, false); @@ -268,9 +269,9 @@ std::shared_ptr NashChoiceDialog::GetCommand() const cmd->SetDescription(count + wxT(" by solving a linear complementarity program ") + game); } else if (method == s_liap) { - cmd = std::make_shared>(m_doc, useEfg); + cmd = std::make_shared>(m_doc, false); cmd->SetCommand(prefix + wxT("liap -d 10") + options); - cmd->SetDescription(count + wxT(" by function minimization ") + game); + cmd->SetDescription(count + wxT(" by function minimization in strategic game")); } else if (method == s_logit) { cmd = std::make_shared>(m_doc, useEfg); diff --git a/src/gui/dlnash.h b/src/gui/dlnash.h index 7939346aa..ba18110ad 100644 --- a/src/gui/dlnash.h +++ b/src/gui/dlnash.h @@ -43,4 +43,4 @@ class NashChoiceDialog final : public wxDialog { }; } // namespace Gambit::GUI -#endif // DLNFGNASH_H +#endif // GAMBIT_GUI_DLNASH_H diff --git a/src/pygambit/behavmixed.pxi b/src/pygambit/behavmixed.pxi index 8d7d0699b..950b65ac5 100644 --- a/src/pygambit/behavmixed.pxi +++ b/src/pygambit/behavmixed.pxi @@ -811,23 +811,64 @@ class MixedBehaviorProfile: See Also -------- action_regret - max_regret + agent_max_regret """ self._check_validity() return self._infoset_regret(self.game._resolve_infoset(infoset, "infoset_regret")) - def max_regret(self) -> ProfileDType: - """Returns the maximum regret of any player. + def agent_max_regret(self) -> ProfileDType: + """Returns the maximum regret at any information set. - A profile is an agent Nash equilibrium if and only if `max_regret()` is 0. + A profile is an agent Nash equilibrium if and only if `agent_max_regret()` is 0. - .. versionadded:: 16.2.0 + .. versionchanged:: 16.5.0 + + Renamed from `max_regret` to `agent_max_regret` to clarify the distinction between + per-player and per-agent concepts. See Also -------- action_regret infoset_regret + max_regret + agent_liap_value + """ + self._check_validity() + return self._agent_max_regret() + + def agent_liap_value(self) -> ProfileDType: + """Returns the Lyapunov value (see [McK91]_) of the strategy profile. + + The agent Lyapunov value is a non-negative number which is zero exactly at + agent Nash equilibria. + + .. versionchanged:: 16.5.0 + + Renamed from `liap_value` to `agent_liap_value` to clarify the distinction between + per-player and per-agent concepts. + + See Also + -------- + agent_max_regret + liap_value + """ + self._check_validity() + return self._agent_liap_value() + + def max_regret(self) -> ProfileDType: + """Returns the maximum regret at any information set. + + A profile is a Nash equilibrium if and only if `max_regret()` is 0. + + .. versionchanged:: 16.5.0 + + New implementation of `max_regret` to clarify the distinction between + per-player and per-agent concepts. + + See Also + -------- liap_value + agent_max_regret """ self._check_validity() return self._max_regret() @@ -836,11 +877,17 @@ class MixedBehaviorProfile: """Returns the Lyapunov value (see [McK91]_) of the strategy profile. The Lyapunov value is a non-negative number which is zero exactly at - agent Nash equilibria. + Nash equilibria. + + .. versionchanged:: 16.5.0 + + New implementation of `liap_value` to clarify the distinction between + per-player and per-agent concepts. See Also -------- max_regret + agent_liap_value """ self._check_validity() return self._liap_value() @@ -921,6 +968,9 @@ class MixedBehaviorProfileDouble(MixedBehaviorProfile): def _infoset_regret(self, infoset: Infoset) -> float: return deref(self.profile).GetRegret(infoset.infoset) + def _agent_max_regret(self) -> float: + return deref(self.profile).GetAgentMaxRegret() + def _max_regret(self) -> float: return deref(self.profile).GetMaxRegret() @@ -940,6 +990,9 @@ class MixedBehaviorProfileDouble(MixedBehaviorProfile): deref(self.profile).ToMixedProfile() )) + def _agent_liap_value(self) -> float: + return deref(self.profile).GetAgentLiapValue() + def _liap_value(self) -> float: return deref(self.profile).GetLiapValue() @@ -1017,6 +1070,9 @@ class MixedBehaviorProfileRational(MixedBehaviorProfile): def _infoset_regret(self, infoset: Infoset) -> Rational: return rat_to_py(deref(self.profile).GetRegret(infoset.infoset)) + def _agent_max_regret(self) -> Rational: + return rat_to_py(deref(self.profile).GetAgentMaxRegret()) + def _max_regret(self) -> Rational: return rat_to_py(deref(self.profile).GetMaxRegret()) @@ -1036,6 +1092,9 @@ class MixedBehaviorProfileRational(MixedBehaviorProfile): deref(self.profile).ToMixedProfile() )) + def _agent_liap_value(self) -> Rational: + return rat_to_py(deref(self.profile).GetAgentLiapValue()) + def _liap_value(self) -> Rational: return rat_to_py(deref(self.profile).GetLiapValue()) diff --git a/src/pygambit/gambit.pxd b/src/pygambit/gambit.pxd index 1c9750265..664d3eb3e 100644 --- a/src/pygambit/gambit.pxd +++ b/src/pygambit/gambit.pxd @@ -370,6 +370,8 @@ cdef extern from "games/behavmixed.h" namespace "Gambit": T GetPayoff(c_GameAction) except + T GetRegret(c_GameAction) except + T GetRegret(c_GameInfoset) except + + T GetAgentMaxRegret() except + + T GetAgentLiapValue() except + T GetMaxRegret() except + T GetLiapValue() except + c_MixedStrategyProfile[T] ToMixedProfile() # except + doesn't compile @@ -498,7 +500,7 @@ cdef extern from "solvers/liap/liap.h": stdlist[c_MixedStrategyProfile[double]] LiapStrategySolve( c_MixedStrategyProfile[double], double p_maxregret, int p_maxitsN ) except +RuntimeError - stdlist[c_MixedBehaviorProfile[double]] LiapBehaviorSolve( + stdlist[c_MixedBehaviorProfile[double]] LiapAgentSolve( c_MixedBehaviorProfile[double], double p_maxregret, int p_maxitsN ) except +RuntimeError diff --git a/src/pygambit/nash.pxi b/src/pygambit/nash.pxi index 1f39db668..340e4beb4 100644 --- a/src/pygambit/nash.pxi +++ b/src/pygambit/nash.pxi @@ -122,7 +122,7 @@ def _liap_strategy_solve(start: MixedStrategyProfileDouble, def _liap_behavior_solve(start: MixedBehaviorProfileDouble, maxregret: float, maxiter: int) -> list[MixedBehaviorProfileDouble]: - return _convert_mbpd(LiapBehaviorSolve(deref(start.profile), maxregret, maxiter)) + return _convert_mbpd(LiapAgentSolve(deref(start.profile), maxregret, maxiter)) def _simpdiv_strategy_solve( diff --git a/src/pygambit/nash.py b/src/pygambit/nash.py index 6579d11db..f3298302e 100644 --- a/src/pygambit/nash.py +++ b/src/pygambit/nash.py @@ -65,39 +65,65 @@ class NashComputationResult: parameters: dict = dataclasses.field(default_factory=dict) -def enumpure_solve(game: libgbt.Game, use_strategic: bool = True) -> NashComputationResult: +def enumpure_solve(game: libgbt.Game) -> NashComputationResult: """Compute all :ref:`pure-strategy Nash equilibria ` of game. + .. versionchanged:: 16.5.0 + + `use_strategic` parameter removed. The old behavior in the case + of `use_strategic=False` is now available as `enumpure_agent_solve`. + Parameters ---------- game : Game The game to compute equilibria in. - use_strategic : bool, default True - Whether to use the strategic form. If False, computes all agent-form - pure-strategy equilibria, which consider only unilateral deviations at each - individual information set. Returns ------- res : NashComputationResult The result represented as a ``NashComputationResult`` object. + + See also + -------- + enumpure_agent_solve """ - if not game.is_tree or use_strategic: - return NashComputationResult( - game=game, - method="enumpure", - rational=True, - use_strategic=True, - equilibria=libgbt._enumpure_strategy_solve(game) - ) - else: - return NashComputationResult( - game=game, - method="enumpure", - rational=True, - use_strategic=False, - equilibria=libgbt._enumpure_agent_solve(game) - ) + return NashComputationResult( + game=game, + method="enumpure", + rational=True, + use_strategic=True, + equilibria=libgbt._enumpure_strategy_solve(game) + ) + + +def enumpure_agent_solve(game: libgbt.Game) -> NashComputationResult: + """Compute all :ref:`pure-strategy agent Nash equilibria ` of game. + + .. versioncadded:: 16.5.0 + + Formerly implemented as `enumpure_solve` with `use_strategic=False`. + + Parameters + ---------- + game : Game + The game to compute agent-Nash equilibria in. + + Returns + ------- + res : NashComputationResult + The result represented as a ``NashComputationResult`` object. + + See also + -------- + enumpure_solve + """ + return NashComputationResult( + game=game, + method="enumpure-agent", + rational=True, + use_strategic=False, + equilibria=libgbt._enumpure_agent_solve(game) + ) def enummixed_solve( @@ -278,7 +304,7 @@ def lp_solve( def liap_solve( - start: libgbt.MixedStrategyProfileDouble | libgbt.MixedBehaviorProfileDouble, + start: libgbt.MixedStrategyProfileDouble, maxregret: float = 1.0e-4, maxiter: int = 1000 ) -> NashComputationResult: @@ -291,9 +317,14 @@ def liap_solve( instead of a game. Implemented `maxregret` to specify acceptance criterion for approximation. + .. versionchanged:: 16.5.0 + + Computing agent Nash equilibria in the extensive game moved to + `liap_agent_solve` for clarity. + Parameters ---------- - start : MixedStrategyProfileDouble or MixedBehaviorProfileDouble + start : MixedStrategyProfileDouble The starting profile for function minimization. Up to one equilibrium will be found from any starting profile, and the equilibrium found may (and generally will) depend on the initial profile chosen. @@ -317,22 +348,60 @@ def liap_solve( """ if maxregret <= 0.0: raise ValueError("liap_solve(): maxregret argument must be positive") - if isinstance(start, libgbt.MixedStrategyProfileDouble): - equilibria = libgbt._liap_strategy_solve(start, - maxregret=maxregret, maxiter=maxiter) - elif isinstance(start, libgbt.MixedBehaviorProfileDouble): - equilibria = libgbt._liap_behavior_solve(start, - maxregret=maxregret, maxiter=maxiter) - else: - raise TypeError( - f"liap_solve(): start must be a MixedStrategyProfile or MixedBehaviorProfile, " - f"not {start.__class__.__name__}" - ) + equilibria = libgbt._liap_strategy_solve(start, + maxregret=maxregret, maxiter=maxiter) return NashComputationResult( game=start.game, method="liap", rational=False, - use_strategic=isinstance(start, libgbt.MixedStrategyProfileDouble), + use_strategic=True, + equilibria=equilibria, + parameters={"start": start, "maxregret": maxregret, "maxiter": maxiter} + ) + + +def liap_agent_solve( + start: libgbt.MixedBehaviorProfileDouble, + maxregret: float = 1.0e-4, + maxiter: int = 1000 +) -> NashComputationResult: + """Compute approximate agent Nash equilibria of a game using + :ref:`Lyapunov function minimization `. + + .. versionadded:: 16.5.0 + + Moved from `liap_solve` passing a `MixedBehaviorProfileDouble` for additional + clarity in the solution concept computed. + + Parameters + ---------- + start : MixedBehaviorProfileDouble + The starting profile for function minimization. Up to one equilibrium will be found + from any starting profile, and the equilibrium found may (and generally will) + depend on the initial profile chosen. + + maxregret : float, default 1e-4 + The acceptance criterion for approximate Nash equilibrium; the maximum + regret of any player must be no more than `maxregret` times the + difference of the maximum and minimum payoffs of the game + + maxiter : int, default 1000 + Maximum number of iterations in function minimization. + + Returns + ------- + res : NashComputationResult + The result represented as a ``NashComputationResult`` object. + """ + if maxregret <= 0.0: + raise ValueError("liap_solve(): maxregret argument must be positive") + equilibria = libgbt._liap_behavior_solve(start, + maxregret=maxregret, maxiter=maxiter) + return NashComputationResult( + game=start.game, + method="liap-agent", + rational=False, + use_strategic=False, equilibria=equilibria, parameters={"start": start, "maxregret": maxregret, "maxiter": maxiter} ) diff --git a/src/solvers/enumpoly/efgpoly.cc b/src/solvers/enumpoly/efgpoly.cc index 4b642e7aa..1dfbaaf26 100644 --- a/src/solvers/enumpoly/efgpoly.cc +++ b/src/solvers/enumpoly/efgpoly.cc @@ -213,7 +213,7 @@ EnumPolyBehaviorSolve(const Game &p_game, int p_stopAfter, double p_maxregret, for (auto solution : SolveSupport(support, isSingular, std::max(p_stopAfter - int(ret.size()), 0))) { const MixedBehaviorProfile fullProfile = solution.ToFullSupport(); - if (fullProfile.GetMaxRegret() < p_maxregret) { + if (fullProfile.GetAgentMaxRegret() < p_maxregret) { p_onEquilibrium(fullProfile); ret.push_back(fullProfile); } diff --git a/src/solvers/liap/efgliap.cc b/src/solvers/liap/efgliap.cc index a7ce04cd2..8a6a5d40f 100644 --- a/src/solvers/liap/efgliap.cc +++ b/src/solvers/liap/efgliap.cc @@ -127,9 +127,9 @@ MixedBehaviorProfile EnforceNonnegativity(const MixedBehaviorProfile> -LiapBehaviorSolve(const MixedBehaviorProfile &p_start, double p_maxregret, int p_maxitsN, - BehaviorCallbackType p_callback) +std::list> LiapAgentSolve(const MixedBehaviorProfile &p_start, + double p_maxregret, int p_maxitsN, + BehaviorCallbackType p_callback) { if (!p_start.GetGame()->IsPerfectRecall()) { throw UndefinedException( @@ -160,7 +160,7 @@ LiapBehaviorSolve(const MixedBehaviorProfile &p_start, double p_maxregre } auto p2 = EnforceNonnegativity(p); - if (p2.GetMaxRegret() * F.GetScale() < p_maxregret) { + if (p2.GetAgentMaxRegret() * F.GetScale() < p_maxregret) { p_callback(p2, "NE"); solutions.push_back(p2); } diff --git a/src/solvers/liap/liap.h b/src/solvers/liap/liap.h index 7760a89ed..51ef13957 100644 --- a/src/solvers/liap/liap.h +++ b/src/solvers/liap/liap.h @@ -28,8 +28,8 @@ namespace Gambit::Nash { std::list> -LiapBehaviorSolve(const MixedBehaviorProfile &p_start, double p_maxregret, int p_maxitsN, - BehaviorCallbackType p_callback = NullBehaviorCallback); +LiapAgentSolve(const MixedBehaviorProfile &p_start, double p_maxregret, int p_maxitsN, + BehaviorCallbackType p_callback = NullBehaviorCallback); std::list> LiapStrategySolve(const MixedStrategyProfile &p_start, double p_maxregret, int p_maxitsN, diff --git a/src/solvers/logit/efglogit.cc b/src/solvers/logit/efglogit.cc index 4b6c0d715..f81bd38ca 100644 --- a/src/solvers/logit/efglogit.cc +++ b/src/solvers/logit/efglogit.cc @@ -72,7 +72,7 @@ Vector ProfileToPoint(const LogitQREMixedBehaviorProfile &p_profile) bool RegretTerminationFunction(const Game &p_game, const Vector &p_point, double p_regret) { - return (p_point.back() < 0.0 || PointToProfile(p_game, p_point).GetMaxRegret() < p_regret); + return (p_point.back() < 0.0 || PointToProfile(p_game, p_point).GetAgentMaxRegret() < p_regret); } class EquationSystem { diff --git a/src/tools/liap/liap.cc b/src/tools/liap/liap.cc index 121007ddf..e0c5fe2be 100644 --- a/src/tools/liap/liap.cc +++ b/src/tools/liap/liap.cc @@ -45,6 +45,7 @@ void PrintHelp(char *progname) std::cerr << "With no options, attempts to compute one equilibrium starting at centroid.\n"; std::cerr << "Options:\n"; + std::cerr << " -A compute agent form equilibria\n"; std::cerr << " -d DECIMALS print probabilities with DECIMALS digits\n"; std::cerr << " -h, --help print this help message\n"; std::cerr << " -n COUNT number of starting points to generate\n"; @@ -130,7 +131,7 @@ Array> RandomBehaviorProfiles(const Game &p_game, i int main(int argc, char *argv[]) { opterr = 0; - bool quiet = false, useStrategic = false, verbose = false; + bool quiet = false, reportStrategic = false, solveAgent = false, verbose = false; const int numTries = 10; int maxitsN = 1000; int numDecimals = 6; @@ -143,7 +144,7 @@ int main(int argc, char *argv[]) {"verbose", 0, nullptr, 'V'}, {nullptr, 0, nullptr, 0}}; int c; - while ((c = getopt_long(argc, argv, "d:n:i:s:m:hqVvS", long_options, &long_opt_index)) != -1) { + while ((c = getopt_long(argc, argv, "d:n:i:s:m:hqVvAS", long_options, &long_opt_index)) != -1) { switch (c) { case 'v': PrintBanner(std::cerr); @@ -164,7 +165,10 @@ int main(int argc, char *argv[]) PrintHelp(argv[0]); break; case 'S': - useStrategic = true; + reportStrategic = true; + break; + case 'A': + solveAgent = true; break; case 'q': quiet = true; @@ -204,7 +208,7 @@ int main(int argc, char *argv[]) try { const Game game = ReadGame(*input_stream); - if (!game->IsTree() || useStrategic) { + if (!game->IsTree() || !solveAgent) { Array> starts; if (!startFile.empty()) { std::ifstream startPoints(startFile.c_str()); @@ -239,13 +243,13 @@ int main(int argc, char *argv[]) for (size_t i = 1; i <= starts.size(); i++) { auto renderer = MakeMixedBehaviorProfileRenderer(std::cout, numDecimals, false); - LiapBehaviorSolve(starts[i], maxregret, maxitsN, - [renderer, verbose](const MixedBehaviorProfile &p_profile, - const std::string &p_label) { - if (p_label == "NE" || verbose) { - renderer->Render(p_profile, p_label); - } - }); + LiapAgentSolve(starts[i], maxregret, maxitsN, + [renderer, verbose](const MixedBehaviorProfile &p_profile, + const std::string &p_label) { + if (p_label == "NE" || verbose) { + renderer->Render(p_profile, p_label); + } + }); } } return 0; diff --git a/tests/games.py b/tests/games.py index 32acea8e2..5fccf5e38 100644 --- a/tests/games.py +++ b/tests/games.py @@ -5,7 +5,6 @@ from abc import ABC, abstractmethod import numpy as np -import pytest import pygambit as gbt @@ -650,10 +649,6 @@ def create_kuhn_poker_efg(nonterm_outcomes: bool = False) -> gbt.Game: g = _create_kuhn_poker_efg_nonterm_outcomes() else: g = _create_kuhn_poker_efg_only_term_outcomes() - - # Ensure infosets are in the same order as if game was written to efg and read back in - with pytest.warns(FutureWarning): - g.sort_infosets() return g @@ -851,6 +846,105 @@ def create_reduction_both_players_payoff_ties_efg() -> gbt.Game: return g +def create_problem_example_efg() -> gbt.Game: + g = gbt.Game.new_tree(players=["1", "2"], title="") + g.append_move(g.root, player="1", actions=["L", "R"]) + # do the second child first on purpose to diverge from sort infosets order + g.append_move(g.root.children[1], "2", actions=["l2", "r2"]) + g.append_move(g.root.children[0], "2", actions=["l1", "r1"]) + g.set_outcome(g.root.children[0].children[0], outcome=g.add_outcome(payoffs=[5, -5])) + g.set_outcome(g.root.children[0].children[1], outcome=g.add_outcome(payoffs=[2, -2])) + g.set_outcome(g.root.children[1].children[0], outcome=g.add_outcome(payoffs=[-5, 5])) + g.set_outcome(g.root.children[1].children[1], outcome=g.add_outcome(payoffs=[-2, 2])) + return g + + +def create_STOC_simplified() -> gbt.Game: + """ + """ + g = gbt.Game.new_tree(players=["1", "2"], title="") + g.append_move(g.root, g.players.chance, actions=["1", "2"]) + g.set_chance_probs(g.root.infoset, [0.2, 0.8]) + g.append_move(g.root.children[0], player="1", actions=["l", "r"]) + g.append_move(g.root.children[1], player="1", actions=["c", "d"]) + g.append_move(g.root.children[0].children[1], player="2", actions=["p", "q"]) + g.append_move( + g.root.children[0].children[1].children[0], player="1", actions=["L", "R"] + ) + g.append_infoset( + g.root.children[0].children[1].children[1], + g.root.children[0].children[1].children[0].infoset, + ) + g.set_outcome( + g.root.children[0].children[0], + outcome=g.add_outcome(payoffs=[5, -5], label="l"), + ) + g.set_outcome( + g.root.children[0].children[1].children[0].children[0], + outcome=g.add_outcome(payoffs=[10, -10], label="rpL"), + ) + g.set_outcome( + g.root.children[0].children[1].children[0].children[1], + outcome=g.add_outcome(payoffs=[15, -15], label="rpR"), + ) + g.set_outcome( + g.root.children[0].children[1].children[1].children[0], + outcome=g.add_outcome(payoffs=[20, -20], label="rqL"), + ) + g.set_outcome( + g.root.children[0].children[1].children[1].children[1], + outcome=g.add_outcome(payoffs=[-5, 5], label="rqR"), + ) + g.set_outcome( + g.root.children[1].children[0], + outcome=g.add_outcome(payoffs=[10, -10], label="c"), + ) + g.set_outcome( + g.root.children[1].children[1], + outcome=g.add_outcome(payoffs=[20, -20], label="d"), + ) + return g + + +def create_STOC_simplified2() -> gbt.Game: + """ + """ + g = gbt.Game.new_tree(players=["1", "2"], title="") + g.append_move(g.root, g.players.chance, actions=["1", "2"]) + g.set_chance_probs(g.root.infoset, [0.2, 0.8]) + g.append_move(g.root.children[0], player="1", actions=["r"]) + g.append_move(g.root.children[1], player="1", actions=["c"]) + g.append_move(g.root.children[0].children[0], player="2", actions=["p", "q"]) + g.append_move( + g.root.children[0].children[0].children[0], player="1", actions=["L", "R"] + ) + g.append_infoset( + g.root.children[0].children[0].children[1], + g.root.children[0].children[0].children[0].infoset, + ) + g.set_outcome( + g.root.children[0].children[0].children[0].children[0], + outcome=g.add_outcome(payoffs=[10, -10], label="rpL"), + ) + g.set_outcome( + g.root.children[0].children[0].children[0].children[1], + outcome=g.add_outcome(payoffs=[15, -15], label="rpR"), + ) + g.set_outcome( + g.root.children[0].children[0].children[1].children[0], + outcome=g.add_outcome(payoffs=[20, -20], label="rqL"), + ) + g.set_outcome( + g.root.children[0].children[0].children[1].children[1], + outcome=g.add_outcome(payoffs=[-5, 5], label="rqR"), + ) + g.set_outcome( + g.root.children[1].children[0], + outcome=g.add_outcome(payoffs=[10, -10], label="c"), + ) + return g + + def create_seq_form_STOC_paper_zero_sum_2_player_efg() -> gbt.Game: """ Example from @@ -929,7 +1023,6 @@ def create_seq_form_STOC_paper_zero_sum_2_player_efg() -> gbt.Game: g.root.children[0].children[1].infoset.label = "01" g.root.children[2].children[0].infoset.label = "20" g.root.children[0].children[1].children[0].infoset.label = "010" - return g diff --git a/tests/test_behav.py b/tests/test_behav.py index fc7a0c9c2..c3468ef0c 100644 --- a/tests/test_behav.py +++ b/tests/test_behav.py @@ -798,9 +798,11 @@ def test_action_value_by_label_reference(game: gbt.Game, label: str, (games.create_mixed_behav_game_efg(), True), (games.create_stripped_down_poker_efg(), False), (games.create_stripped_down_poker_efg(), True), + (games.create_kuhn_poker_efg(), False), + (games.create_kuhn_poker_efg(), True), ] ) -def test_regret_consistency(game: gbt.Game, rational_flag: bool): +def test_action_regret_consistency(game: gbt.Game, rational_flag: bool): profile = game.mixed_behavior_profile(rational=rational_flag) for player in game.players: for infoset in player.infosets: @@ -812,6 +814,62 @@ def test_regret_consistency(game: gbt.Game, rational_flag: bool): ) +@pytest.mark.parametrize( + "game,rational_flag", + [(games.create_mixed_behav_game_efg(), False), + (games.create_mixed_behav_game_efg(), True), + (games.create_stripped_down_poker_efg(), False), + (games.create_stripped_down_poker_efg(), True), + (games.create_kuhn_poker_efg(), False), + (games.create_kuhn_poker_efg(), True), + ] +) +def test_infoset_regret_consistency(game: gbt.Game, rational_flag: bool): + profile = game.mixed_behavior_profile(rational=rational_flag) + for player in game.players: + for infoset in player.infosets: + assert ( + profile.infoset_regret(infoset) == + max(profile.action_value(a) for a in infoset.actions) - + profile.infoset_value(infoset) + ) + + +@pytest.mark.parametrize( + "game,rational_flag", + [(games.create_mixed_behav_game_efg(), False), + (games.create_mixed_behav_game_efg(), True), + (games.create_stripped_down_poker_efg(), False), + (games.create_stripped_down_poker_efg(), True), + (games.create_kuhn_poker_efg(), False), + (games.create_kuhn_poker_efg(), True), + (games.create_3_player_with_internal_outcomes_efg(), False), + (games.create_3_player_with_internal_outcomes_efg(), True) + ] +) +def test_max_regret_consistency(game: gbt.Game, rational_flag: bool): + profile = game.mixed_behavior_profile(rational=rational_flag) + assert profile.max_regret() == profile.as_strategy().max_regret() + + +@pytest.mark.parametrize( + "game,rational_flag", + [(games.create_mixed_behav_game_efg(), False), + (games.create_mixed_behav_game_efg(), True), + (games.create_stripped_down_poker_efg(), False), + (games.create_stripped_down_poker_efg(), True), + (games.create_kuhn_poker_efg(), False), + (games.create_kuhn_poker_efg(), True), + ] +) +def test_agent_max_regret_consistency(game: gbt.Game, rational_flag: bool): + profile = game.mixed_behavior_profile(rational=rational_flag) + assert ( + profile.agent_max_regret() == + max([profile.infoset_regret(infoset) for infoset in game.infosets]) + ) + + @pytest.mark.parametrize( "game,player_idx,infoset_idx,action_idx,action_probs,rational_flag,tol,value", [ @@ -823,14 +881,14 @@ def test_regret_consistency(game: gbt.Game, rational_flag: bool): (games.create_mixed_behav_game_efg(), 2, 0, 0, None, False, TOL, 0), (games.create_mixed_behav_game_efg(), 2, 0, 1, None, False, TOL, 0.5), # 3.5 - 3 # U1 U2 U3 - (games.create_mixed_behav_game_efg(), 0, 0, 0, [1.0, 0.0, 1.0, 0.0, 1.0, 0.0], False, TOL, 0), - (games.create_mixed_behav_game_efg(), 0, 0, 0, ["1", "0", "1", "0", "1", "0"], True, ZERO, 0), - (games.create_mixed_behav_game_efg(), 0, 0, 1, [1.0, 0.0, 1.0, 0.0, 1.0, 0.0], False, TOL, 9), - (games.create_mixed_behav_game_efg(), 0, 0, 1, ["1", "0", "1", "0", "1", "0"], True, ZERO, 9), - (games.create_mixed_behav_game_efg(), 1, 0, 0, [1.0, 0.0, 1.0, 0.0, 1.0, 0.0], False, TOL, 0), - (games.create_mixed_behav_game_efg(), 1, 0, 0, ["1", "0", "1", "0", "1", "0"], True, ZERO, 0), - (games.create_mixed_behav_game_efg(), 1, 0, 1, [1.0, 0.0, 1.0, 0.0, 1.0, 0.0], False, TOL, 8), - (games.create_mixed_behav_game_efg(), 1, 0, 1, ["1", "0", "1", "0", "1", "0"], True, ZERO, 8), + (games.create_mixed_behav_game_efg(), 0, 0, 0, [1, 0, 1, 0, 1, 0], False, TOL, 0), + (games.create_mixed_behav_game_efg(), 0, 0, 0, [1, 0, 1, 0, 1, 0], True, ZERO, 0), + (games.create_mixed_behav_game_efg(), 0, 0, 1, [1, 0, 1, 0, 1, 0], False, TOL, 9), + (games.create_mixed_behav_game_efg(), 0, 0, 1, [1, 0, 1, 0, 1, 0], True, ZERO, 9), + (games.create_mixed_behav_game_efg(), 1, 0, 0, [1, 0, 1, 0, 1, 0], False, TOL, 0), + (games.create_mixed_behav_game_efg(), 1, 0, 0, [1, 0, 1, 0, 1, 0], True, ZERO, 0), + (games.create_mixed_behav_game_efg(), 1, 0, 1, [1, 0, 1, 0, 1, 0], False, TOL, 8), + (games.create_mixed_behav_game_efg(), 1, 0, 1, [1, 0, 1, 0, 1, 0], True, ZERO, 8), # Mixed Nash equilibrium (games.create_mixed_behav_game_efg(), 0, 0, 0, ["2/5", "3/5", "1/2", "1/2", "1/3", "2/3"], True, ZERO, 0), @@ -858,9 +916,9 @@ def test_regret_consistency(game: gbt.Game, rational_flag: bool): True, ZERO, "8/3"), # (2/3*2 + 1/3*1) - (-1) ] ) -def test_regret_reference(game: gbt.Game, player_idx: int, infoset_idx: int, action_idx: int, - action_probs: None | list, rational_flag: bool, - tol: gbt.Rational | float, value: str | float): +def test_action_regret_reference(game: gbt.Game, player_idx: int, infoset_idx: int, + action_idx: int, action_probs: None | list, rational_flag: bool, + tol: gbt.Rational | float, value: str | float): action = game.players[player_idx].infosets[infoset_idx].actions[action_idx] profile = game.mixed_behavior_profile(rational=rational_flag) if action_probs: @@ -941,16 +999,49 @@ def test_node_value_consistency(game: gbt.Game, rational_flag: bool): (games.create_stripped_down_poker_efg(), [1.0, 0.0, 1.0, 0.0, 1.0, 0.0], False, 1.0), ] ) -def test_liap_value_reference(game: gbt.Game, action_probs: None | list, - rational_flag: bool, expected_value: str | float): - """Tests liap_value under profile given by action_probs +def test_agent_liap_value_reference(game: gbt.Game, action_probs: None | list, + rational_flag: bool, expected_value: str | float): + """Tests agent_liap_value under profile given by action_probs (which will be uniform if action_probs is None) """ profile = game.mixed_behavior_profile(rational=rational_flag) if action_probs: _set_action_probs(profile, action_probs, rational_flag) assert ( - profile.liap_value() == (gbt.Rational(expected_value) if rational_flag else expected_value) + profile.agent_liap_value() == (gbt.Rational(expected_value) + if rational_flag else expected_value) + ) + + +@pytest.mark.parametrize( + "game,action_probs,rational_flag,max_regret,agent_max_regret,liap_value,agent_liap_value", + [ + # uniform (non-Nash): + (games.create_mixed_behav_game_efg(), None, True, "1/4", "1/4", "1/16", "1/16"), + (games.create_mixed_behav_game_efg(), None, False, 0.25, 0.25, 0.0625, 0.0625), + # Myerson fig 4.2 + (games.read_from_file("myerson_fig_4_2.efg"), [0, 1, 0, 1, 1, 0], True, 1, 0, 1, 0), + ] +) +def test_agent_max_regret_versus_non_agent(game: gbt.Game, action_probs: None | list, + rational_flag: bool, + max_regret: str | float, + agent_max_regret: str | float, + agent_liap_value: str | float, + liap_value: str | float, + ): + profile = game.mixed_behavior_profile(rational=rational_flag) + if action_probs: + _set_action_probs(profile, action_probs, rational_flag) + assert (profile.max_regret() == (gbt.Rational(max_regret) if rational_flag else max_regret)) + assert ( + profile.agent_max_regret() == (gbt.Rational(agent_max_regret) + if rational_flag else agent_max_regret) + ) + assert (profile.liap_value() == (gbt.Rational(liap_value) if rational_flag else liap_value)) + assert ( + profile.agent_liap_value() == (gbt.Rational(agent_liap_value) + if rational_flag else agent_liap_value) ) @@ -1146,7 +1237,19 @@ def _get_and_check_answers(game: gbt.Game, action_probs1: tuple, action_probs2: lambda x, y: x.node_value(player=y[0], node=y[1]), lambda x: list(product(x.players, x.nodes))), ###################################################################################### - # liap_value (of profile, hence [1] for objects_to_test, any singleton collection would do) + # agent_liap_value (of profile, hence [1] for objects_to_test, + # any singleton collection would do) + (games.create_mixed_behav_game_efg(), PROBS_1A_doub, PROBS_2A_doub, False, + lambda x, y: x.agent_liap_value(), lambda x: [1]), + (games.create_mixed_behav_game_efg(), PROBS_1A_rat, PROBS_2A_rat, True, + lambda x, y: x.agent_liap_value(), lambda x: [1]), + (games.create_stripped_down_poker_efg(), PROBS_1B_doub, PROBS_2B_doub, False, + lambda x, y: x.agent_liap_value(), lambda x: [1]), + (games.create_stripped_down_poker_efg(), PROBS_1A_rat, PROBS_2A_rat, True, + lambda x, y: x.agent_liap_value(), lambda x: [1]), + ###################################################################################### + # liap_value (of profile, hence [1] for objects_to_test, + # any singleton collection would do) (games.create_mixed_behav_game_efg(), PROBS_1A_doub, PROBS_2A_doub, False, lambda x, y: x.liap_value(), lambda x: [1]), (games.create_mixed_behav_game_efg(), PROBS_1A_rat, PROBS_2A_rat, True, @@ -1155,6 +1258,28 @@ def _get_and_check_answers(game: gbt.Game, action_probs1: tuple, action_probs2: lambda x, y: x.liap_value(), lambda x: [1]), (games.create_stripped_down_poker_efg(), PROBS_1A_rat, PROBS_2A_rat, True, lambda x, y: x.liap_value(), lambda x: [1]), + ###################################################################################### + # agent_max_regret (of profile, hence [1] for objects_to_test, + # any singleton collection would do) + (games.create_mixed_behav_game_efg(), PROBS_1A_doub, PROBS_2A_doub, False, + lambda x, y: x.agent_max_regret(), lambda x: [1]), + (games.create_mixed_behav_game_efg(), PROBS_1A_rat, PROBS_2A_rat, True, + lambda x, y: x.agent_max_regret(), lambda x: [1]), + (games.create_stripped_down_poker_efg(), PROBS_1B_doub, PROBS_2B_doub, False, + lambda x, y: x.agent_max_regret(), lambda x: [1]), + (games.create_stripped_down_poker_efg(), PROBS_1A_rat, PROBS_2A_rat, True, + lambda x, y: x.agent_max_regret(), lambda x: [1]), + ###################################################################################### + # max_regret (of profile, hence [1] for objects_to_test, + # any singleton collection would do) + (games.create_mixed_behav_game_efg(), PROBS_1A_doub, PROBS_2A_doub, False, + lambda x, y: x.max_regret(), lambda x: [1]), + (games.create_mixed_behav_game_efg(), PROBS_1A_rat, PROBS_2A_rat, True, + lambda x, y: x.max_regret(), lambda x: [1]), + (games.create_stripped_down_poker_efg(), PROBS_1B_doub, PROBS_2B_doub, False, + lambda x, y: x.max_regret(), lambda x: [1]), + (games.create_stripped_down_poker_efg(), PROBS_1A_rat, PROBS_2A_rat, True, + lambda x, y: x.max_regret(), lambda x: [1]), ] ) def test_profile_order_consistency(game: gbt.Game, diff --git a/tests/test_game.py b/tests/test_game.py index cebfa1b9f..1a7e7c3b5 100644 --- a/tests/test_game.py +++ b/tests/test_game.py @@ -164,6 +164,17 @@ def test_mixed_strategy_profile_game_structure_changed_no_tree(): profiles = [g.mixed_strategy_profile(rational=b) for b in [False, True]] g.outcomes[0][g.players[0]] = 3 for profile in profiles: + with pytest.raises(gbt.GameStructureChangedError): + profile.copy() + with pytest.raises(gbt.GameStructureChangedError): + profile.liap_value() + with pytest.raises(gbt.GameStructureChangedError): + profile.max_regret() + with pytest.raises(gbt.GameStructureChangedError): + # triggers error via __getitem__ + next(profile.mixed_strategies()) + with pytest.raises(gbt.GameStructureChangedError): + profile.normalize() with pytest.raises(gbt.GameStructureChangedError): profile.copy() with pytest.raises(gbt.GameStructureChangedError): @@ -254,6 +265,26 @@ def test_mixed_behavior_profile_game_structure_changed(): profile.infoset_value(g.infosets[0]) with pytest.raises(gbt.GameStructureChangedError): profile.is_defined_at(g.infosets[0]) + with pytest.raises(gbt.GameStructureChangedError): + profile.agent_liap_value() + with pytest.raises(gbt.GameStructureChangedError): + profile.liap_value() + with pytest.raises(gbt.GameStructureChangedError): + profile.agent_max_regret() + with pytest.raises(gbt.GameStructureChangedError): + profile.max_regret() + with pytest.raises(gbt.GameStructureChangedError): + # triggers error via __getitem__ + next(profile.mixed_actions()) + with pytest.raises(gbt.GameStructureChangedError): + # triggers error via __getitem__ + next(profile.mixed_behaviors()) + with pytest.raises(gbt.GameStructureChangedError): + profile.node_value(g.players[0], g.root) + with pytest.raises(gbt.GameStructureChangedError): + profile.normalize() + with pytest.raises(gbt.GameStructureChangedError): + profile.payoff(g.players[0]) with pytest.raises(gbt.GameStructureChangedError): profile.liap_value() with pytest.raises(gbt.GameStructureChangedError): diff --git a/tests/test_games/myerson_fig_4_2.efg b/tests/test_games/myerson_fig_4_2.efg new file mode 100644 index 000000000..713c4a34f --- /dev/null +++ b/tests/test_games/myerson_fig_4_2.efg @@ -0,0 +1,15 @@ +EFG 2 R "Myerson (1991) Fig 4.2" { "Player 1" "Player 2" } +"An example from Myerson (1991) Fig 4.2 which has an agent Nash equilibrium that is +not a Nash equilibrium" + +p "" 1 1 "" { "A1" "B1" } 0 +p "" 2 1 "" { "W2" "X2" } 0 +p "" 1 2 "" { "Y1" "Z1" } 0 +t "" 1 "" { 3, 0 } +t "" 2 "" { 0, 0 } +p "" 1 2 "" { "Y1" "Z1" } 0 +t "" 3 "" { 2, 3 } +t "" 4 "" { 4, 1 } +p "" 2 1 "" { "W2" "X2" } 0 +t "" 5 "" { 2, 3 } +t "" 6 "" { 3, 2 } diff --git a/tests/test_mixed.py b/tests/test_mixed.py index 7ef840fcd..be6935634 100644 --- a/tests/test_mixed.py +++ b/tests/test_mixed.py @@ -910,7 +910,7 @@ def _get_and_check_answers(game: gbt.Game, action_probs1: tuple, action_probs2: PROBS_2A_doub = (0.5, 0, 0.5, 0) PROBS_1A_rat = ("1/4", "1/4", "1/4", "1/4") PROBS_2A_rat = ("1/2", "0", "1/2", "0") -# For 2x2x2 nfg and Myserson 2-card poker efg (both have 6 strategies in total): +# For 2x2x2 nfg and stripped_down_poker efg (both have 6 strategies in total): PROBS_1B_doub = (0.5, 0.5, 0.5, 0.5, 0.5, 0.5) PROBS_2B_doub = (1.0, 0.0, 1.0, 0.0, 1.0, 0.0) PROBS_1B_rat = ("1/2", "1/2", "1/2", "1/2", "1/2", "1/2") diff --git a/tests/test_nash.py b/tests/test_nash.py index 5f9b6d7b0..4683aa7bd 100644 --- a/tests/test_nash.py +++ b/tests/test_nash.py @@ -19,13 +19,13 @@ def test_enumpure_strategy(): """Test calls of enumeration of pure strategies.""" game = games.read_from_file("stripped_down_poker.efg") - assert len(gbt.nash.enumpure_solve(game, use_strategic=True).equilibria) == 0 + assert len(gbt.nash.enumpure_solve(game).equilibria) == 0 def test_enumpure_agent(): """Test calls of enumeration of pure agent strategies.""" game = games.read_from_file("stripped_down_poker.efg") - assert len(gbt.nash.enumpure_solve(game, use_strategic=False).equilibria) == 0 + assert len(gbt.nash.enumpure_agent_solve(game).equilibria) == 0 def test_enummixed_double(): @@ -135,6 +135,8 @@ def test_enummixed_rational(game: gbt.Game, mixed_strategy_prof_data: list): # ], # 2, # 9 in total found by enumpoly (see unordered test) # ), + ############################################################################## + ############################################################################## ( games.create_3_player_with_internal_outcomes_efg(), [ @@ -151,28 +153,8 @@ def test_enummixed_rational(game: gbt.Game, mixed_strategy_prof_data: list): [[1, 0], ["1/3", "2/3"]]]], 2, ), - ( - games.create_entry_accomodation_efg(), - [ - [[["2/3", "1/3"], [1, 0], [1, 0]], - [["2/3", "1/3"]]], - [[[0, 1], [0, 0], ["1/3", "2/3"]], - [[0, 1]]], - [[[0, 1], [0, 0], [1, 0]], [[1, 0]]], - [[[0, 1], [0, 0], [0, 0]], [[0, 1]]]], - 4, - ), - # ( - # games.create_entry_accomodation_efg(nonterm_outcomes=True), - # [ - # [[["2/3", "1/3"], [1, 0], [1, 0]], - # [["2/3", "1/3"]]], - # [[[0, 1], [0, 0], ["1/3", "2/3"]], - # [[0, 1]]], - # [[[0, 1], [0, 0], [1, 0]], [[1, 0]]], - # [[[0, 1], [0, 0], [0, 0]], [[0, 1]]]], - # 4, - # ), + ############################################################################## + ############################################################################## ( games.create_non_zero_sum_lacking_outcome_efg(), [[[["1/3", "2/3"]], [["1/2", "1/2"]]]], @@ -183,27 +165,21 @@ def test_enummixed_rational(game: gbt.Game, mixed_strategy_prof_data: list): [[[["1/3", "2/3"]], [["1/2", "1/2"]]]], 1, ), + ############################################################################## + ############################################################################## ( games.create_chance_in_middle_efg(), - [[[["3/11", "8/11"], - [1, 0], [1, 0], [1, 0], [1, 0]], - [[1, 0], ["6/11", "5/11"]]], - [[[1, 0], [1, 0], [1, 0], [0, 0], [0, 0]], - [[0, 1], [1, 0]]], - [[[0, 1], [0, 0], [0, 0], [1, 0], [1, 0]], - [[1, 0], [0, 1]]]], - 3, + [[[["3/11", "8/11"], [1, 0], [1, 0], [1, 0], [1, 0]], [[1, 0], ["6/11", "5/11"]]], + ], # [[[1, 0], [1, 0], [1, 0], [0, 0], [0, 0]], [[0, 1], [1, 0]]], + # [[[0, 1], [0, 0], [0, 0], [1, 0], [1, 0]], [[1, 0], [0, 1]]], + 1, # subsequent eqs have undefined infosets; include after #issue 660 ), ( games.create_chance_in_middle_efg(nonterm_outcomes=True), - [[[["3/11", "8/11"], - [1, 0], [1, 0], [1, 0], [1, 0]], - [[1, 0], ["6/11", "5/11"]]], - [[[1, 0], [1, 0], [1, 0], [0, 0], [0, 0]], - [[0, 1], [1, 0]]], - [[[0, 1], [0, 0], [0, 0], [1, 0], [1, 0]], - [[1, 0], [0, 1]]]], - 3, + [[[["3/11", "8/11"], [1, 0], [1, 0], [1, 0], [1, 0]], [[1, 0], ["6/11", "5/11"]]], + ], # [[[1, 0], [1, 0], [1, 0], [0, 0], [0, 0]], [[0, 1], [1, 0]]], + # [[[0, 1], [0, 0], [0, 0], [1, 0], [1, 0]], [[1, 0], [0, 1]]], + 1, ), ], ) @@ -211,8 +187,73 @@ def test_enumpoly_ordered_behavior( game: gbt.Game, mixed_behav_prof_data: list, stop_after: None | int ): """Test calls of enumpoly for mixed behavior equilibria, - using max_regret (internal consistency); and comparison to a set of previously - computed equilibria using this function (regression test). + using max_regret and agent_max_regret (internal consistency); and + comparison to a set of previously computed equilibria with this function (regression test). + This set will be the full set of all computed equilibria if stop_after is None, + else the first stop_after-many equilibria. + + This is the "ordered" version where we test for the outputs coming in a specific + order; there is also an "unordered" version. The game 2x2x2.nfg, for example, + has a point at which the Jacobian is singular. As a result, the order in which it + returns the two totally-mixed equilbria is system-dependent due, essentially, + to inherent numerical instability near that point. + """ + if stop_after: + result = gbt.nash.enumpoly_solve( + game, use_strategic=False, stop_after=stop_after, maxregret=0.00001 + ) + assert len(result.equilibria) == stop_after + else: + # compute all + result = gbt.nash.enumpoly_solve(game, use_strategic=False) + assert len(result.equilibria) == len(mixed_behav_prof_data) + for eq, exp in zip(result.equilibria, mixed_behav_prof_data, strict=True): + print("FOUND EQ:", eq) + print(eq.max_regret()) + print(eq.agent_max_regret()) + assert abs(eq.max_regret()) <= TOL + assert abs(eq.agent_max_regret()) <= TOL + expected = game.mixed_behavior_profile(rational=True, data=exp) + # print(expected) + # print(eq) + for p in game.players: + for i in p.infosets: + for a in i.actions: + assert abs(eq[p][i][a] - expected[p][i][a]) <= TOL + + +@pytest.mark.nash +@pytest.mark.nash_enumpoly_behavior +@pytest.mark.parametrize( + "game,mixed_behav_prof_data,stop_after", + [ + ############################################################################## + ############################################################################## + ( + games.create_3_player_with_internal_outcomes_efg(), + [ + [[[1, 0], [1, 0]], [[1, 0], ["1/2", "1/2"]], [[1, 0], [0, 1]]], + [[[1, 0], [1, 0]], [[1, 0], [0, 1]], [[1, 0], ["1/3", "2/3"]]], + ], + 2, + ), + ( + games.create_3_player_with_internal_outcomes_efg(nonterm_outcomes=True), + [ + [[[1, 0], [1, 0]], [[1, 0], ["1/2", "1/2"]], [[1, 0], [0, 1]]], + [[[1, 0], [1, 0]], [[1, 0], [0, 1]], [[1, 0], ["1/3", "2/3"]]]], + 2, + ), + ############################################################################## + ############################################################################## + ], +) +def test_enumpoly_ordered_behavior_PROBLEM_CASE( + game: gbt.Game, mixed_behav_prof_data: list, stop_after: None | int +): + """Test calls of enumpoly for mixed behavior equilibria, + using max_regret and agent_max_regret (internal consistency); and + comparison to a set of previously computed equilibria with this function (regression test). This set will be the full set of all computed equilibria if stop_after is None, else the first stop_after-many equilibria. @@ -232,8 +273,14 @@ def test_enumpoly_ordered_behavior( result = gbt.nash.enumpoly_solve(game, use_strategic=False) assert len(result.equilibria) == len(mixed_behav_prof_data) for eq, exp in zip(result.equilibria, mixed_behav_prof_data, strict=True): + print("FOUND EQ:", eq) + print("found max regret:", eq.max_regret()) + print("found agent max regret:", eq.agent_max_regret()) assert abs(eq.max_regret()) <= TOL + assert abs(eq.agent_max_regret()) <= TOL expected = game.mixed_behavior_profile(rational=True, data=exp) + print("exp max regret:", eq.max_regret()) + print("exp agent max regret:", eq.agent_max_regret()) for p in game.players: for i in p.infosets: for a in i.actions: @@ -267,8 +314,8 @@ def test_enumpoly_unordered_behavior( game: gbt.Game, mixed_behav_prof_data: list, stop_after: None | int ): """Test calls of enumpoly for mixed behavior equilibria, - using max_regret (internal consistency); and comparison to a set of previously - computed equilibria using this function (regression test). + using max_regret and agent_max_regret (internal consistency); and + comparison to a set of previously computed equilibria using this function (regression test). This set will be the full set of all computed equilibria if stop_after is None, else the first stop_after-many equilibria. @@ -300,6 +347,7 @@ def are_the_same(game, found, candidate): for eq in result.equilibria: assert abs(eq.max_regret()) <= TOL + assert abs(eq.agent_max_regret()) <= TOL found = False for exp in mixed_behav_prof_data[:]: expected = game.mixed_behavior_profile(rational=True, data=exp) @@ -557,13 +605,14 @@ def test_lcp_behavior_double(): def test_lcp_behavior_rational(game: gbt.Game, mixed_behav_prof_data: list): """Test calls of LCP for mixed behavior equilibria, rational precision. - using max_regret (internal consistency); and comparison to a previously - computed equilibrium using this function (regression test) + using max_regret and agent_max_regret (internal consistency); and + comparison to a previously computed equilibrium using this function (regression test). """ result = gbt.nash.lcp_solve(game, use_strategic=False, rational=True) assert len(result.equilibria) == 1 eq = result.equilibria[0] assert eq.max_regret() == 0 + assert eq.agent_max_regret() == 0 expected = game.mixed_behavior_profile(rational=True, data=mixed_behav_prof_data) assert eq == expected @@ -733,13 +782,14 @@ def test_lp_behavior_double(): ) def test_lp_behavior_rational(game: gbt.Game, mixed_behav_prof_data: list): """Test calls of LP for mixed behavior equilibria, rational precision, - using max_regret (internal consistency); and comparison to a previously - computed equilibrium using this function (regression test) + using max_regret and agent_max_regret (internal consistency); and + comparison to a previously computed equilibrium using this function (regression test). """ result = gbt.nash.lp_solve(game, use_strategic=False, rational=True) assert len(result.equilibria) == 1 eq = result.equilibria[0] assert eq.max_regret() == 0 + assert eq.agent_max_regret() == 0 expected = game.mixed_behavior_profile(rational=True, data=mixed_behav_prof_data) assert eq == expected @@ -750,10 +800,10 @@ def test_liap_strategy(): _ = gbt.nash.liap_solve(game.mixed_strategy_profile()) -def test_liap_behavior(): - """Test calls of liap for mixed behavior equilibria.""" +def test_liap_agent(): + """Test calls of agent liap for mixed behavior equilibria.""" game = games.read_from_file("stripped_down_poker.efg") - _ = gbt.nash.liap_solve(game.mixed_behavior_profile()) + _ = gbt.nash.liap_agent_solve(game.mixed_behavior_profile()) def test_simpdiv_strategy(): @@ -841,3 +891,88 @@ def test_logit_solve_lambda(): game = games.read_from_file("const_sum_game.nfg") assert len(gbt.qre.logit_solve_lambda( game=game, lam=[1, 2, 3], first_step=0.2, max_accel=1)) > 0 + + +def test_regrets_tmp(): + + prof_data_doub = [] + prof_data_doub.append([[[1, 0], [1, 0]], [[1, 0], [0.5, 0.5]], [[1, 0], [0, 1]]]) + # prof_data_doub.append([[[1, 0], [1, 0]], [[1, 0], [0, 1]], [[1, 0], [0.33333, 0.6666]]]) + # prof_data_doub.append([[[1, 0], [1, 0]], [[1, 0], [0.5, 0.5]], [[0, 1], [1, 0]]]) + # prof_data_doub.append([[[1, 0], [1, 0]], [[1, 0], [0, 1]], [[0.33333, 0.6666], [1, 0]]]) + + prof_data_rat = [] + prof_data_rat.append([[[1, 0], [1, 0]], [[1, 0], ["1/2", "1/2"]], [[1, 0], [0, 1]]]) + # prof_data_rat.append([[[1, 0], [1, 0]], [[1, 0], [0, 1]], [[1, 0], ["1/3", "2/3"]]]) + # prof_data_rat.append([[[1, 0], [1, 0]], [[1, 0], ["1/2", "1/2"]], [[0, 1], [1, 0]]]) + # prof_data_rat.append([[[1, 0], [1, 0]], [[1, 0], [0, 1]], [["1/3", "2/3"], [1, 0]]]) + + g = games.create_3_player_with_internal_outcomes_efg() + + print() + print("==================") + for p in prof_data_doub: + prof = g.mixed_behavior_profile(rational=False, data=p) + print(prof.max_regret()) + print(prof.agent_max_regret()) + print("==================") + for p in prof_data_rat: + prof = g.mixed_behavior_profile(rational=True, data=p) + print(prof.max_regret()) + print(prof.agent_max_regret()) + print("==================") + for p in prof_data_doub: + prof = g.mixed_behavior_profile(rational=False, data=p) + print(prof.max_regret()) + print(prof.agent_max_regret()) + + +def test_regrets_tmp2(): + g = games.create_3_player_with_internal_outcomes_efg() + prof_data_rat = [[[1, 0], [1, 0]], [[1, 0], ["1/2", "1/2"]], [[1, 0], [0, 1]]] + profile_rat = g.mixed_behavior_profile(rational=True, data=prof_data_rat) + print() + print(profile_rat.max_regret()) # 3/2 + profile_rat = g.mixed_behavior_profile(rational=True, data=prof_data_rat) + print(profile_rat.max_regret()) # now different! 0 + + +@pytest.mark.parametrize( + "game,mixed_behav_prof_data", + [ + ( + games.create_seq_form_STOC_paper_zero_sum_2_player_efg(), + [ + [[0, 1], ["1/3", "2/3"], ["2/3", "1/3"]], + [["5/6", "1/6"], ["5/9", "4/9"]], + ], + ), + ( + games.create_3_player_with_internal_outcomes_efg(), + [ + [[1, 0], [1, 0]], [[1, 0], ["1/2", "1/2"]], + [[1, 0], [0, 1]] + ], + ), + ( + games.create_STOC_simplified(), + [ + [[0, 1], ["1/3", "2/3"], ["2/3", "1/3"]], + [["5/6", "1/6"]], + ], + ), + # ( + # games.create_STOC_simplified2(), + # [ + # [[1], [1], ["1/3", "2/3"]], + # [["5/6", "1/6"]], + # ], + # ), + ], +) +def test_repeat_max_regret(game: gbt.Game, mixed_behav_prof_data: list): + profile1 = game.mixed_behavior_profile(rational=True, data=mixed_behav_prof_data) + mr1 = profile1.max_regret() + profile2 = game.mixed_behavior_profile(rational=True, data=mixed_behav_prof_data) + mr2 = profile2.max_regret() + assert mr1 == mr2