From 54a4fef8f55a303241dc1bac4f0f7f823f739d32 Mon Sep 17 00:00:00 2001
From: lambert_wu <lambertine9527@gmail.com>
Date: Sun, 23 Nov 2025 19:34:03 +0800
Subject: [PATCH] Fix UB by checking for invalid moves in RL and play_game

- In play_rl(), skip writing to table when the agent
  returns -1 to avoid writing out of bounds.
- Do the same in play_game() so negamax/MCTS/RL won't
  update the board with an invalid move.
---
 agents/reinforcement_learning.c | 3 ++-
 elo.c                           | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/agents/reinforcement_learning.c b/agents/reinforcement_learning.c
index 5466519..ce009a1 100644
--- a/agents/reinforcement_learning.c
+++ b/agents/reinforcement_learning.c
@@ -96,7 +96,8 @@ int get_action_exploit(char *table, rl_agent_t *agent)
 int play_rl(char *table, rl_agent_t *agent)
 {
     int move = get_action_exploit(table, agent);
-    table[move] = agent->player;
+    if (move != -1)
+        table[move] = agent->player;
     return move;
 }
 
diff --git a/elo.c b/elo.c
index f4110e1..a28406d 100644
--- a/elo.c
+++ b/elo.c
@@ -63,7 +63,8 @@ static int play_game(int player1, int player2)
         else
             move = play_rl(table, &agent);
 
-        table[move] = player == player1 ? 'X' : 'O';
+        if (move != -1)
+            table[move] = player == player1 ? 'X' : 'O';
         player ^= player1 ^ player2;
     }