From bef740b5d968c3113816dcdd9e8d09948176a91c Mon Sep 17 00:00:00 2001 From: Jakob Date: Fri, 18 Apr 2025 21:13:54 +0200 Subject: [PATCH 1/5] Removed debug prints. --- src/TicTacToe/DeepQAgent.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/TicTacToe/DeepQAgent.py b/src/TicTacToe/DeepQAgent.py index a50283d..0101b04 100644 --- a/src/TicTacToe/DeepQAgent.py +++ b/src/TicTacToe/DeepQAgent.py @@ -403,7 +403,6 @@ def compute_standard_loss( The computed loss. """ states, actions, rewards, next_states, dones = samples - # print(f"states.shape = {states.shape}") q_values = self.q_network(states).gather(1, actions.unsqueeze(1)).squeeze(1) next_q_values = self.target_network(next_states).max(1, keepdim=True)[0].squeeze(1) targets = rewards + (~dones) * self.gamma * next_q_values @@ -585,7 +584,6 @@ def get_best_action(self, board: Board, q_network: nn.Module) -> Action: """ state = self.board_to_state(board) state_tensor = torch.FloatTensor(state).to(self.device) - # print(f"state_tensor.shape = {state_tensor.shape}") with torch.no_grad(): q_values = q_network(state_tensor).squeeze() max_q, _ = torch.max(q_values, dim=0) From e3e8744cdc90d3321497f97441440fb665495196 Mon Sep 17 00:00:00 2001 From: Jakob Date: Fri, 18 Apr 2025 21:40:43 +0200 Subject: [PATCH 2/5] Use symmetrized loss together with prioritized experience replay. Fixes #110 --- src/TicTacToe/DeepQAgent.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/TicTacToe/DeepQAgent.py b/src/TicTacToe/DeepQAgent.py index 0101b04..d78cfc4 100644 --- a/src/TicTacToe/DeepQAgent.py +++ b/src/TicTacToe/DeepQAgent.py @@ -303,10 +303,14 @@ def _init_symmetrized_loss(self, params: dict[str, Any]) -> None: lambda x: np.flipud(np.transpose(x)), lambda x: np.flipud(np.fliplr(np.transpose(x))), ] - if params.get("symmetrized_loss", True): + if params.get("symmetrized_loss", True) and params.get("replay_buffer_type", "uniform") == "uniform": self.compute_loss = self.create_symmetrized_loss( self.compute_standard_loss, self.transformations, self.rows ) + elif params.get("symmetrized_loss", True) and params.get("replay_buffer_type", "uniform") == "prioritized": + self.compute_loss = self.create_symmetrized_loss( + self.compute_prioritized_loss, self.transformations, self.rows + ) elif params.get("replay_buffer_type", "uniform") == "prioritized": self.compute_loss = self.compute_prioritized_loss else: From 17f834e60b5f81b8545d908bba1c4053eba65264 Mon Sep 17 00:00:00 2001 From: Jakob Date: Fri, 18 Apr 2025 21:41:08 +0200 Subject: [PATCH 3/5] Use symmetrized loss together with prioritized experience replay. --- train_and_play/train_dqn_sweep.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train_and_play/train_dqn_sweep.py b/train_and_play/train_dqn_sweep.py index 9c948e9..8c695f4 100644 --- a/train_and_play/train_dqn_sweep.py +++ b/train_and_play/train_dqn_sweep.py @@ -74,7 +74,7 @@ "shared_replay_buffer": False, # Share replay buffer between agents # Q Network settings - "network_type": "CNN", # Network architecture: 'Equivariant', 'FullyCNN', 'FCN', 'CNN' + "network_type": "FullyCNN", # Network architecture: 'Equivariant', 'FullyCNN', 'FCN', 'CNN' "periodic": False, # Periodic boundary conditions "load_network": False, # Whether to load pretrained weights "project_name": "TicTacToe", # Weights & Biases project name @@ -88,7 +88,7 @@ # --- Sweep Setup --- # param_sweep = {"replay_buffer_type": ["prioritized", "uniform"], "periodic": [True, False], "state_shape": ["one-hot", "flat"]} -param_sweep = {"periodic": [True, False], "state_shape": ["one-hot", "2D", "flat"], "network_type": ["CNN", "FullyCNN"]} +param_sweep = {"replay_buffer_type": ["prioritized", "uniform"], "symmetrized_loss": [True, False], "state_shape": ["one-hot", "flat"]} sweep_combinations, param_keys = get_param_sweep_combinations(param_sweep) # --- Shared Replay Buffer Setup --- From dfca259add97ba8a6b11d6c46a934761265a5909 Mon Sep 17 00:00:00 2001 From: Jakob Date: Fri, 18 Apr 2025 21:52:41 +0200 Subject: [PATCH 4/5] Removed 'DisplayTest.py'. --- README.md | 1 - pyproject.toml | 1 - src/TicTacToe/DisplayTest.py | 17 ----------------- 3 files changed, 19 deletions(-) delete mode 100644 src/TicTacToe/DisplayTest.py diff --git a/README.md b/README.md index e5697e0..3784000 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,6 @@ Here is a list of all files in the `src` folder and their purposes: - **`TicTacToe/Agent.py`**: Defines the base agent class for the game. - **`TicTacToe/DeepQAgent.py`**: Implements a deep Q-learning agent. - **`TicTacToe/Display.py`**: Handles the display of the game board. -- **`TicTacToe/DisplayTest.py`**: Contains tests for the display module. - **`TicTacToe/EquivariantNN.py`**: Implements equivariant neural networks for symmetry-aware learning. - **`TicTacToe/Evaluation.py`**: Provides evaluation metrics for agents. - **`TicTacToe/game_types.py`**: Defines types and constants used in the game. diff --git a/pyproject.toml b/pyproject.toml index fd565e6..09ae2ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,5 +81,4 @@ exclude = ["**/*.ipynb"] omit = [ # omit this single file "src/TicTacToe/Evaluation.py", - "src/TicTacToe/DisplayTest.py", ] diff --git a/src/TicTacToe/DisplayTest.py b/src/TicTacToe/DisplayTest.py deleted file mode 100644 index bb58b4a..0000000 --- a/src/TicTacToe/DisplayTest.py +++ /dev/null @@ -1,17 +0,0 @@ -# %% -from TicTacToe.Agent import MouseAgent -from TicTacToe.DeepQAgent import DeepQPlayingAgent -from TicTacToe.Display import ScreenDisplay -from TicTacToe.TicTacToe import TicTacToe - -rows = 4 -win_length = 4 -# agent1 = RandomAgent(player='X', switching=False) -# agent1 = HumanAgent(player='X') -agent1 = MouseAgent(player="O") -# agent2 = RandomAgent(player='O', switching=False) -# agent1 = DeepQPlayingAgent(player='X', q_network='models/q_network_4x4x4.pth') -agent2 = DeepQPlayingAgent(player="X", q_network="models/q_network_4x4x4.pth") -# display = ConsoleDisplay(rows=rows, cols=rows, waiting_time=0.5) -display = ScreenDisplay(rows=rows, cols=rows, waiting_time=0.5) -game = TicTacToe(agent1, agent2, display=display, rows=rows, cols=rows, win_length=win_length) From 8f6ebdbdb48903b4aecdcfc99154cc55dd93963d Mon Sep 17 00:00:00 2001 From: Jakob Date: Fri, 18 Apr 2025 22:13:58 +0200 Subject: [PATCH 5/5] Pass params to 'DeepQPlayingAgent'. --- src/TicTacToe/DeepQAgent.py | 16 ++++++++++------ src/TicTacToe/Evaluation.py | 11 +++++++---- tests/test_deep_q_agent.py | 20 ++++++++++---------- train_and_play/play_O_against_model.py | 19 ++++++++++++++----- train_and_play/play_X_against_model.py | 19 ++++++++++++++----- 5 files changed, 55 insertions(+), 30 deletions(-) diff --git a/src/TicTacToe/DeepQAgent.py b/src/TicTacToe/DeepQAgent.py index d78cfc4..4870aa4 100644 --- a/src/TicTacToe/DeepQAgent.py +++ b/src/TicTacToe/DeepQAgent.py @@ -609,10 +609,8 @@ class DeepQPlayingAgent(Agent): def __init__(self, q_network: nn.Module | str, - player: Player = "X", - switching: bool = False, - device : str = "cpu", - state_shape: str = "flat") -> None: + params: dict + ) -> None: """ Initialize the DeepQPlayingAgent. @@ -621,6 +619,12 @@ def __init__(self, player: The player symbol ("X" or "O"). switching: Whether to switch players after each game. """ + player = params["player"] + switching = params["switching"] + device = params["device"] + state_shape = params["state_shape"] + rows = params["rows"] + super().__init__(player=player, switching=switching) self.device = torch.device(device) @@ -633,9 +637,9 @@ def __init__(self, if state_shape == "flat": self.state_converter = FlatStateConverter() elif state_shape == "2D": - self.state_converter = GridStateConverter(shape=(3, 3)) # Assuming a 3x3 grid + self.state_converter = GridStateConverter(shape=(rows, rows)) elif state_shape == "one-hot": - self.state_converter = OneHotStateConverter(rows=3) # Assuming a 3x3 grid + self.state_converter = OneHotStateConverter(rows=rows) else: raise ValueError(f"Unsupported state shape: {state_shape}") diff --git a/src/TicTacToe/Evaluation.py b/src/TicTacToe/Evaluation.py index 6c834ca..d11a1c7 100644 --- a/src/TicTacToe/Evaluation.py +++ b/src/TicTacToe/Evaluation.py @@ -1,4 +1,5 @@ import wandb +import copy from typing import Any, Optional @@ -267,12 +268,13 @@ def evaluate_performance( dict[str, float]: A dictionary containing evaluation metrics. """ wandb_logging = params["wandb_logging"] - device = params["device"] - state_shape = params["state_shape"] evaluation_batch_size = params["evaluation_batch_size"] + playing_params = copy.deepcopy(params) + q_network1 = learning_agent1.q_network - playing_agent1 = DeepQPlayingAgent(q_network1, player="X", switching=False, device=device, state_shape=state_shape) + playing_params["player"] = "X" + playing_agent1 = DeepQPlayingAgent(q_network1, params=playing_params) random_agent2 = RandomAgent(player="O", switching=False) all_data = {} @@ -294,7 +296,8 @@ def evaluate_performance( wandb.log(data) q_network2 = learning_agent2.q_network - playing_agent2 = DeepQPlayingAgent(q_network2, player="O", switching=False, device=device, state_shape=state_shape) + playing_params["player"] = "O" + playing_agent2 = DeepQPlayingAgent(q_network2, params=playing_params) random_agent1 = RandomAgent(player="X", switching=False) game = TicTacToe(random_agent1, playing_agent2, display=None, params=params) diff --git a/tests/test_deep_q_agent.py b/tests/test_deep_q_agent.py index 2a1a3f7..5c5f5d4 100644 --- a/tests/test_deep_q_agent.py +++ b/tests/test_deep_q_agent.py @@ -469,7 +469,14 @@ def get_board(self): class TestDeepQPlayingAgent(unittest.TestCase): def setUp(self): self.q_network = MockQNetwork() - self.agent = DeepQPlayingAgent(q_network=self.q_network, player="X", switching=True) + self.params = { + "player": "X", + "switching": False, + "rows": 3, + "device": "cpu", + "state_shape": "flat", + } + self.agent = DeepQPlayingAgent(q_network=self.q_network, params=self.params) def test_board_to_state(self): board = ["X", " ", "O", " ", " ", "X", "O", " ", " "] @@ -493,7 +500,7 @@ def test_choose_action(self): @patch("torch.load", return_value=MockQNetwork()) def test_q_network_loading(self, mock_load): - agent = DeepQPlayingAgent(q_network="mock_path.pth", player="X", switching=False) + agent = DeepQPlayingAgent(q_network="mock_path.pth", params=self.params) self.assertIsInstance(agent.q_network, MockQNetwork) mock_load.assert_called_once_with("mock_path.pth", weights_only=False) @@ -507,11 +514,4 @@ def test_get_action_done(self): mock_game = MockTicTacToe() state_transition = (None, None, True) # Done flag is True. action = self.agent.get_action(state_transition, mock_game) - self.assertEqual(action, -1) # Game is over, no action taken. - - def test_on_game_end(self): - initial_player = self.agent.player - initial_opponent = self.agent.opponent - self.agent.on_game_end(None) # Pass None for game, not used. - self.assertEqual(self.agent.player, initial_opponent) - self.assertEqual(self.agent.opponent, initial_player) \ No newline at end of file + self.assertEqual(action, -1) # Game is over, no action taken. \ No newline at end of file diff --git a/train_and_play/play_O_against_model.py b/train_and_play/play_O_against_model.py index 5536d92..324bf67 100644 --- a/train_and_play/play_O_against_model.py +++ b/train_and_play/play_O_against_model.py @@ -23,14 +23,23 @@ relative_folder = (script_dir / '../models/all_models').resolve() model_path = f"{relative_folder}/q_network_3x3x3_O.pth" +params = { + "player": "X", # Player symbol for the agent + "rows": 3, # Board size (rows x rows) + "win_length": 3, # Number of in-a-row needed to win + "rewards": { + "W": 1.0, # Reward for a win + "L": -1.0, # Reward for a loss + "D": 0.5, # Reward for a draw + }, +} + # Set up the game -rows = 3 -win_length = 3 agent1 = MouseAgent(player="O") -agent2 = DeepQPlayingAgent(q_network=model_path, player="X") -display = ScreenDisplay(rows=rows, cols=rows, waiting_time=0.5) +agent2 = DeepQPlayingAgent(q_network=model_path, params=params) +display = ScreenDisplay(rows=params["rows"], cols=params["rows"], waiting_time=0.5) -game = TicTacToe(agent1, agent2, display=display, rows=rows, cols=rows, win_length=win_length) +game = TicTacToe(agent1, agent2, display=display, params=params) # Play the game game.play() \ No newline at end of file diff --git a/train_and_play/play_X_against_model.py b/train_and_play/play_X_against_model.py index 3f89b31..8aea075 100644 --- a/train_and_play/play_X_against_model.py +++ b/train_and_play/play_X_against_model.py @@ -23,14 +23,23 @@ relative_folder = (script_dir / '../models/all_models').resolve() model_path = f"{relative_folder}/q_network_3x3x3_X.pth" # Change this path to the desired model +params = { + "player": "O", # Player symbol for the agent + "rows": 3, # Board size (rows x rows) + "win_length": 3, # Number of in-a-row needed to win + "rewards": { + "W": 1.0, # Reward for a win + "L": -1.0, # Reward for a loss + "D": 0.5, # Reward for a draw + }, +} + # Set up the game -rows = 3 -win_length = 3 -agent1 = DeepQPlayingAgent(q_network=model_path, player="O") +agent1 = DeepQPlayingAgent(q_network=model_path, params=params) agent2 = MouseAgent(player="X") -display = ScreenDisplay(rows=rows, cols=rows, waiting_time=0.5) +display = ScreenDisplay(rows=params["rows"], cols=params["rows"], waiting_time=0.5) -game = TicTacToe(agent1, agent2, display=display, rows=rows, cols=rows, win_length=win_length, periodic=True) +game = TicTacToe(agent1, agent2, display=display, params=params) # Play the game game.play() \ No newline at end of file