GeneralGameEngine/NN_MCTS.cpp at master · ItachiEU/GeneralGameEngine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#include "NN_MCTS.hpp"

NN_MCTS::NN_MCTS(std::shared_ptr<Game> game, std::shared_ptr<NetRunner> runner, std::shared_ptr<NN_Interface> interface) : MCTS(game)
{
   this->runner = runner;
   this->nn_interface = interface;
   auto g = this->root->getGame();

   auto nn_input = this->nn_interface->getNNInput(g, g->getCurrentPlayer());
   auto nn_out = this->runner->request_run(nn_input);
   auto moves = this->root->getPossibleMoves();
   auto move_scores = this->nn_interface->moveScores(nn_out, moves);

   getRoot()->setMoveScores(move_scores);
}

double NN_MCTS::simulate(std::shared_ptr<Node> root)
{
   auto game = root->getGame();
   auto current_player = game->getCurrentPlayer();

   if (root->getTerminal())
   {
      return root->getTrueGameResult();
   }

   auto nn_input = this->nn_interface->getNNInput(game, current_player);
   auto nn_out = this->runner->request_run(nn_input);
   auto moves = root->getPossibleMoves();
   auto move_scores = this->nn_interface->moveScores(nn_out, moves);

   root->setMoveScores(move_scores);

   double score = this->nn_interface->boardValue(nn_out, current_player);

   assert (score >= -0.01 && "board score < 0");
   // std::cout << "score = " << score << std::endl;

   // root->setScore(current_player, score);
   // root->setScore(1 - current_player, 1 - score);
   return score;
}

std::shared_ptr<Node> NN_MCTS::treePolicy(std::shared_ptr<Node> node)
{
   // std::cout << "starting tree pass" << std::endl;
   assert (node != nullptr && "tree policy called with nullptr node");
   int currentPlayer = node->getGame()->getCurrentPlayer();
   while (node->getTerminal() == false)
   {
      if (node->getSimulations() == 0) break;
      node = this->bestChild(node, currentPlayer);
      assert (node != nullptr && "bestChild returned nullptr");
      currentPlayer = 1 - currentPlayer;
   }
   return node;
}

// Since expansions are also guided by the neural network, bestChild is now responsible for expanding
std::shared_ptr<Node> NN_MCTS::bestChild(std::shared_ptr<Node> node, int currentPlayer){
   double bestScore = -1.1;
   int chosenIndex = -1;
   bool best_expand = false;

   int s = node->getPossibleMoves().size();
   assert((int)node->getMoveScores().size() == s);
   double N_P = node->getSimulations();

   // std::cout << "picking best node as player " << currentPlayer << std::endl;

   double c = log((N_P + 1 + 18000.0)/ 18000.0) + 1.25; // from alphazero paper

   for (int i = 0; i<s; i++)
   {
      double W = 0;
      double N = 0;
      bool expand = true;
      // std::cout << "checking node " << i << std::endl;
      if(node->getChildren().find(i) != node->getChildren().end())
      {
         W = node->getChildren()[i]->getScore(currentPlayer);
         N = node->getChildren()[i]->getSimulations();
         expand = false;
      }

      double score = ((2*W-N)/(N + 0.00001)) + c*node->getMoveScores()[i]*sqrt(N_P)/(1+N);

      // std::cout << "win rate = " << W/N << " score = " << score << std::endl;

      if(((2*W-N)/(N + 0.00001))  <= -1.01){
         std::cerr << "W = " << W << " N = " << N << " N_P = " << N_P << " score = " << score << std::endl;
      }

      if(score > bestScore)
      {
         bestScore = score;
         chosenIndex = i;
         best_expand = expand;
      }
   }

   // std::cout << "bestScore = " << bestScore << std::endl;
   // auto move = node->getPossibleMoves()[chosenIndex];
   // auto cmove = std::static_pointer_cast<ChessMove>(move);
   // std::cout << cmove->getFromRow() << " " << cmove->getFromCol() << " " << cmove->getToRow() << " " << cmove->getToCol() << std::endl;
   // auto game = node->getGame()->clone();
   // game->simulateMove(move);
   // std::cout << game->printBoard() << std::endl;

   assert (chosenIndex != -1);

   if(best_expand)
   {
      // std::cerr << "expanding" << std::endl;
      return this->expand(node, chosenIndex);
   }
   else
   {
      // std::cerr << "not expanding? " << bestScore << std::endl;
      return node->getChildren()[chosenIndex];
   }
}

std::pair<std::shared_ptr<Move>, std::shared_ptr<Node>> NN_MCTS::getBestMove(){
   if(!this->randomness){
      int best_cnt = -1;
      std::shared_ptr<Node> chosenChild = nullptr;
      std::shared_ptr<Move> chosenMove = nullptr;
      for (auto son : root->getChildren())
      {
         if(son.second->getSimulations() > best_cnt)
         {
            chosenChild = son.second;
            chosenMove = root->getPossibleMoves()[son.first];
            best_cnt = son.second->getSimulations();
         }
      }
      return make_pair(chosenMove, chosenChild);
   } else {
      int acc = 0;
      int random_pos = rand() % (root->getSimulations() - 1);
      for (auto son : root->getChildren())
      {
         acc += son.second->getSimulations();
         if(acc >= random_pos)
         {
            return make_pair(root->getPossibleMoves()[son.first], son.second);
         }
      }
      assert (false && "No child found");
   }
}

std::shared_ptr<Node> NN_MCTS::expand(std::shared_ptr<Node> node, int move_index)
{
   auto gameCopy = node->getGame()->clone();

   gameCopy->simulateMove(node->getPossibleMoves()[move_index]);
   gameCopy->setCurrentPlayer(1 - gameCopy->getCurrentPlayer());

   auto possibleMoves = gameCopy->getPossibleMoves();
   auto child = std::make_shared<Node>(possibleMoves, gameCopy, node);
   node->getChildren()[move_index] = child;
   int game_result = child->getGame()->gameStatus(possibleMoves);
   if (game_result != -1){
      child->setTerminal(true);
      if(game_result == 1 || game_result == 2)
      {
         child->setTrueGameResult(0.5);
      }
      else
      {
         child->setTrueGameResult(0); // we can only lose as a current player
      }
   }

   return child;
}


void NN_MCTS::setRandomness(bool randomness)
{
   this->randomness = randomness;
}