From 7420ca04535798186b15304f98add9f898ee1fef Mon Sep 17 00:00:00 2001 From: aryansid Date: Sun, 21 Dec 2025 12:44:30 +0400 Subject: [PATCH 1/3] Add Chess arena --- codeclash/arenas/chess/Chess.Dockerfile | 28 ++ codeclash/arenas/chess/__init__.py | 0 codeclash/arenas/chess/chess.py | 522 ++++++++++++++++++++++++ 3 files changed, 550 insertions(+) create mode 100644 codeclash/arenas/chess/Chess.Dockerfile create mode 100644 codeclash/arenas/chess/__init__.py create mode 100644 codeclash/arenas/chess/chess.py diff --git a/codeclash/arenas/chess/Chess.Dockerfile b/codeclash/arenas/chess/Chess.Dockerfile new file mode 100644 index 00000000..fb52e1aa --- /dev/null +++ b/codeclash/arenas/chess/Chess.Dockerfile @@ -0,0 +1,28 @@ +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# Install Python 3.10 (and alias python→python3.10), pip, and prerequisites +# Also install C++ compiler and make for building Kojiro +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + curl ca-certificates python3.10 python3.10-venv \ + python3-pip python-is-python3 wget git build-essential \ + g++ make jq curl locales \ + && rm -rf /var/lib/apt/lists/* + +# Clone Kojiro repository +RUN git clone https://github.com/Babak-SSH/Kojiro.git /workspace \ + && cd /workspace \ + && git remote set-url origin https://github.com/Babak-SSH/Kojiro.git + +# Clone and build Fastchess +RUN git clone https://github.com/Disservin/fastchess.git /tmp/fastchess \ + && cd /tmp/fastchess \ + && make -j \ + && install -d /usr/local/bin \ + && install fastchess /usr/local/bin/fastchess \ + && rm -rf /tmp/fastchess + +WORKDIR /workspace + diff --git a/codeclash/arenas/chess/__init__.py b/codeclash/arenas/chess/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclash/arenas/chess/chess.py b/codeclash/arenas/chess/chess.py new file mode 100644 index 00000000..92a6e87c --- /dev/null +++ b/codeclash/arenas/chess/chess.py @@ -0,0 +1,522 @@ +import json +import random +import re +import subprocess +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +from tqdm.auto import tqdm + +from codeclash.agents.player import Player +from codeclash.arenas.arena import CodeArena, RoundStats +from codeclash.constants import RESULT_TIE +from codeclash.utils.environment import assert_zero_exit_code, create_file_in_container + + +class ChessArena(CodeArena): + name: str = "Chess" + description: str = """Chess is a strategic board game where you improve a chess engine (Kojiro) to compete against other engines. +Your engine is written in C++ and uses the UCI (Universal Chess Interface) protocol. +You can modify the evaluation function, search algorithms, move ordering, and other aspects of the engine to improve its strength. +The engine source code is located in the `src/` directory, and you compile it using `make native`. +IMPORTANT: Do not modify the executable name in the Makefile (keep `EXE = kojiro`). The executable must be named `kojiro`.""" + submission: str = "src/" + default_args: dict = { + "time_control": "1+0.01", + } + + def __init__(self, config, **kwargs): + super().__init__(config, **kwargs) + + # Get time control from config + time_control = self.game_config.get("args", self.default_args).get("time_control", self.default_args["time_control"]) + + # Build base Fastchess command + self.run_cmd_base = f"fastchess -each tc={time_control}" + + # Store time control for reference + self.time_control = time_control + + self.logger.debug(f"Initialized ChessArena with time control: {time_control}") + + def validate_code(self, agent: Player) -> tuple[bool, str | None]: + """ + Validate that agent's Kojiro codebase compiles successfully. + """ + # Check that src/ directory exists + ls_result = agent.environment.execute("ls") + if "src" not in ls_result["output"]: + return False, "There should be a `src/` directory in the workspace" + + # Compile the engine + self.logger.debug(f"Compiling Kojiro for agent {agent.name}") + compile_result = agent.environment.execute( + "cd src && make native", + timeout=120, # 2 minute timeout for compilation + ) + + if compile_result["returncode"] != 0: + error_output = compile_result.get("output", "Unknown compilation error") + # Truncate very long error messages + if len(error_output) > 1000: + error_output = error_output[:1000] + "\n... (truncated)" + return False, f"Compilation failed:\n{error_output}" + + # Verify executable was created + kojiro_check = agent.environment.execute("ls src/kojiro") + if kojiro_check["returncode"] != 0 or "kojiro" not in kojiro_check["output"]: + return False, "Compilation succeeded but executable 'kojiro' not found in src/" + + self.logger.info(f"Agent {agent.name} passed validation: Kojiro compiles successfully") + return True, None + + def _compile_engines_in_game_container(self, agents: list[Player]) -> dict[str, str]: + """ + Recompile each agent's engine in the game container and return engine paths. + + Returns: + dict mapping agent name to engine executable path (only successfully compiled agents) + """ + engine_paths = {} + failed_agents = [] + + for agent in agents: + src_dir = f"/{agent.name}/src" + self.logger.debug(f"Compiling Kojiro for {agent.name} in game container") + + compile_result = self.environment.execute( + f"cd {src_dir} && make native", + timeout=120, # 2 minute timeout for compilation + ) + + if compile_result["returncode"] != 0: + error_output = compile_result.get("output", "Unknown compilation error") + if len(error_output) > 1000: + error_output = error_output[:1000] + "\n... (truncated)" + self.logger.warning(f"Failed to compile {agent.name} in game container, skipping:\n{error_output}") + failed_agents.append(agent.name) + continue + + # Verify executable exists (executable name is fixed as 'kojiro' per Makefile and prompt constraints) + engine_path = f"{src_dir}/kojiro" + check_result = self.environment.execute(f"test -f {engine_path} && echo 'exists'") + if "exists" not in check_result["output"]: + self.logger.warning( + f"Compilation succeeded but executable 'kojiro' not found at {engine_path} for {agent.name}, skipping" + ) + failed_agents.append(agent.name) + continue + + engine_paths[agent.name] = engine_path + self.logger.debug(f"Successfully compiled {agent.name}, engine at {engine_path}") + + if failed_agents: + self.logger.warning(f"Failed to compile {len(failed_agents)} agent(s): {failed_agents}") + + return engine_paths + + def _build_match_pairings(self, agents: list[Player]) -> list[tuple[Player, Player]]: + """ + Build match pairings for sims_per_round simulations. + + Strategy: Round-robin style - pair agents and repeat as needed. + For each simulation, randomly select two different agents. + + Returns: + List of (agent1, agent2) tuples + """ + sims = self.game_config["sims_per_round"] + pairings = [] + + # Generate pairings: for each simulation, pick two random agents + for _ in range(sims): + agent1, agent2 = random.sample(agents, 2) + pairings.append((agent1, agent2)) + + return pairings + + def _run_single_match(self, agent1: Player, agent2: Player, engine1_path: str, engine2_path: str, idx: int): + """ + Run a single Fastchess match between two engines. + + Args: + agent1: First agent + agent2: Second agent + engine1_path: Path to first engine executable in game container + engine2_path: Path to second engine executable in game container + idx: Simulation index for output file naming + """ + + output_file = self.log_env / f"match_{idx}.pgn" + + # Ensure log directory exists + self.environment.execute(f"mkdir -p {self.log_env}") + + cmd = ( + f"{self.run_cmd_base} " + f"-engine cmd={engine1_path} name={agent1.name} " + f"-engine cmd={engine2_path} name={agent2.name} " + f"-rounds 1 " + f"-pgnout file={str(output_file)}" + ) + + self.logger.debug(f"Running match {idx}: {agent1.name} vs {agent2.name}") + self.logger.debug(f"Fastchess command: {cmd}") + self.logger.debug(f"Output file path: {output_file}") + + try: + response = self.environment.execute(cmd, timeout=300) # 5 minute timeout per match + if response["returncode"] != 0: + error_output = response.get('output', '')[:1000] + self.logger.warning( + f"Match {idx} ({agent1.name} vs {agent2.name}) failed with exit code {response['returncode']}:\n{error_output}" + ) + else: + # Verify PGN file was created + check_result = self.environment.execute(f"test -f {str(output_file)} && echo 'exists'") + if "exists" not in check_result["output"]: + self.logger.warning(f"Match {idx} completed but PGN file not found at {output_file}") + # Debug: list files in log directory + ls_result = self.environment.execute(f"ls -la {self.log_env}") + self.logger.debug(f"Files in {self.log_env}: {ls_result.get('output', '')[:500]}") + else: + self.logger.debug(f"Match {idx} PGN file verified at {output_file}") + except subprocess.TimeoutExpired: + self.logger.warning(f"Match {idx} ({agent1.name} vs {agent2.name}) timed out after 5 minutes") + + def execute_round(self, agents: list[Player]): + """ + Execute competition phase - run Fastchess matches between agents. + """ + assert len(agents) >= 2, "Chess requires at least two players" + + # Recompile engines in game container + self.logger.info("Recompiling engines in game container...") + engine_paths = self._compile_engines_in_game_container(agents) + + if len(engine_paths) < 2: + self.logger.warning(f"Only {len(engine_paths)} agent(s) compiled successfully, need at least 2. Skipping round.") + return + + # Build match pairings using only successfully compiled agents + compiled_agents = [agent for agent in agents if agent.name in engine_paths] + self.logger.info(f"Building match pairings for {self.game_config['sims_per_round']} simulations...") + pairings = self._build_match_pairings(compiled_agents) + + # Store pairings to file for retrieval in get_results() + pairings_file = self.log_env / "pairings.json" + pairings_data = [ + {"match_idx": idx, "agent1": agent1.name, "agent2": agent2.name} + for idx, (agent1, agent2) in enumerate(pairings) + ] + # Write to container's log directory + pairings_json = json.dumps(pairings_data, indent=2) + create_file_in_container( + container=self.environment, + content=pairings_json, + dest_path=str(pairings_file), + ) + self.logger.debug(f"Stored pairings to {pairings_file}") + + # Run matches in parallel + self.logger.info(f"Running {len(pairings)} matches in parallel...") + with ThreadPoolExecutor(max_workers=min(20, len(pairings))) as executor: + futures = [ + executor.submit( + self._run_single_match, + agent1, + agent2, + engine_paths[agent1.name], + engine_paths[agent2.name], + idx, + ) + for idx, (agent1, agent2) in enumerate(pairings) + ] + + # Collect results with progress bar + for future in tqdm(as_completed(futures), total=len(futures), desc="Chess matches"): + try: + future.result() + except Exception as e: + self.logger.error(f"Match execution failed: {e}", exc_info=True) + + self.logger.info("All matches completed") + + def _parse_all_games_in_pgn(self, pgn_content: str) -> list[tuple[str | None, str, str]]: + """ + Parse all games from a PGN file. + + Args: + pgn_content: Content of the PGN file (may contain multiple games) + + Returns: + List of (result, white_agent, black_agent) tuples + result is agent name if that agent won, RESULT_TIE for draws, or None if incomplete + """ + games = [] + + # Split PGN into individual games (games are separated by blank lines) + # Look for [Event ...] tags which mark the start of each game + game_blocks = re.split(r'(?=\[Event\s+")', pgn_content) + + for game_block in game_blocks: + game_block = game_block.strip() + if not game_block: + continue + + # Skip if this block doesn't look like a game (no [White] or [Black] tags) + if '[White' not in game_block or '[Black' not in game_block: + continue + + # Extract White and Black agent names + white_match = re.search(r'\[White\s+"([^"]+)"\]', game_block) + black_match = re.search(r'\[Black\s+"([^"]+)"\]', game_block) + result_match = re.search(r'\[Result\s+"([^"]+)"\]', game_block) + + if not white_match or not black_match: + continue # Skip incomplete game headers + + white_agent = white_match.group(1) + black_agent = black_match.group(1) + + if not result_match: + games.append((None, white_agent, black_agent)) + continue + + result = result_match.group(1) + + # Parse result: "1-0" = White wins, "0-1" = Black wins, "1/2-1/2" = draw, "*" = incomplete + if result == "1-0": + games.append((white_agent, white_agent, black_agent)) + elif result == "0-1": + games.append((black_agent, white_agent, black_agent)) + elif result == "1/2-1/2": + games.append((RESULT_TIE, white_agent, black_agent)) + elif result == "*": + games.append((None, white_agent, black_agent)) + else: + self.logger.warning(f"Unknown result format: {result}") + games.append((None, white_agent, black_agent)) + + return games + + def _aggregate_match_result(self, game_results: list[tuple[str | None, str, str]], agent1_name: str, agent2_name: str) -> str | None: + """ + Aggregate results from multiple games into a single match result. + + Args: + game_results: List of (result, white_agent, black_agent) tuples from _parse_all_games_in_pgn + agent1_name: Name of first agent (for reference) + agent2_name: Name of second agent (for reference) + + Returns: + Match winner (agent name), RESULT_TIE for draw, or None if match incomplete + """ + if not game_results: + return None + + if len(game_results) == 1: + self.logger.warning(f"Match has only 1 game, expected 2. Using single game result.") + return game_results[0][0] + + if len(game_results) > 2: + self.logger.warning(f"Match has {len(game_results)} games, expected 2. Using first 2 games.") + game_results = game_results[:2] + + # Count wins for each agent + agent1_wins = 0 + agent2_wins = 0 + draws = 0 + incomplete = 0 + + for result, white_agent, black_agent in game_results: + if result is None: + incomplete += 1 + elif result == RESULT_TIE: + draws += 1 + elif result == agent1_name: + agent1_wins += 1 + elif result == agent2_name: + agent2_wins += 1 + else: + # Result is for an agent not in this match (shouldn't happen, but handle gracefully) + self.logger.warning(f"Unexpected result agent '{result}' in match between {agent1_name} and {agent2_name}") + + # If both games incomplete, match is incomplete + if incomplete == 2: + return None + + # If one game incomplete, use the other game's result + if incomplete == 1: + for result, _, _ in game_results: + if result is not None: + return result + return None + + # Determine match winner based on wins + if agent1_wins > agent2_wins: + return agent1_name + elif agent2_wins > agent1_wins: + return agent2_name + else: + # Equal wins (could be 1-1, 0-0 with draws, etc.) = match draw + return RESULT_TIE + + def _load_pairings(self, round_num: int) -> dict[int, tuple[str, str]]: + """ + Load match pairings from stored JSON file. + + Returns: + Dict mapping match_idx to (agent1_name, agent2_name) tuple + """ + pairings_file = self.log_round(round_num) / "pairings.json" + + try: + with open(pairings_file) as f: + pairings_data = json.load(f) + + pairings = { + item["match_idx"]: (item["agent1"], item["agent2"]) + for item in pairings_data + } + return pairings + except FileNotFoundError: + self.logger.error(f"Pairings file not found: {pairings_file}") + return {} + except json.JSONDecodeError as e: + self.logger.error(f"Failed to parse pairings file: {e}") + return {} + + def _read_all_match_results(self, round_num: int, agents: list[Player]) -> list[tuple[str | None, str, str]]: + """ + Read all match result files and parse them. + + Returns: + List of (winner, agent1_name, agent2_name) tuples + winner is None if match failed or incomplete + """ + match_results = [] + + # Load pairings from stored file + pairings = self._load_pairings(round_num) + if not pairings: + self.logger.warning("No pairings found, cannot parse match results") + return [] + + # Build set of valid agent names for validation + valid_agent_names = {agent.name for agent in agents} + + sims = self.game_config["sims_per_round"] + + for idx in range(sims): + # Get agent names from stored pairings + if idx not in pairings: + self.logger.warning(f"Match {idx} pairing not found in pairings file, skipping") + continue + + agent1_name, agent2_name = pairings[idx] + + # Validate agent names exist in agents list + if agent1_name not in valid_agent_names or agent2_name not in valid_agent_names: + self.logger.warning( + f"Match {idx}: Invalid agent names ({agent1_name}, {agent2_name}) not in agents list, skipping" + ) + continue + + pgn_file = self.log_round(round_num) / f"match_{idx}.pgn" + + self.logger.debug(f"Looking for PGN file at: {pgn_file}") + + try: + if not pgn_file.exists(): + self.logger.warning(f"PGN file does not exist: {pgn_file}") + # List files in the directory for debugging + if self.log_round(round_num).exists(): + files = list(self.log_round(round_num).iterdir()) + self.logger.debug(f"Files in {self.log_round(round_num)}: {[f.name for f in files]}") + else: + self.logger.warning(f"Round directory does not exist: {self.log_round(round_num)}") + continue + + with open(pgn_file) as f: + pgn_content = f.read() + + # Parse all games from PGN file + game_results = self._parse_all_games_in_pgn(pgn_content) + + # Aggregate game results into match result + winner = self._aggregate_match_result(game_results, agent1_name, agent2_name) + match_results.append((winner, agent1_name, agent2_name)) + + except FileNotFoundError: + self.logger.warning(f"Match {idx} result file not found, skipping") + continue + except Exception as e: + self.logger.warning(f"Error parsing match {idx} result: {e}") + continue + + return match_results + + def get_results(self, agents: list[Player], round_num: int, stats: RoundStats): + """ + Parse Fastchess results and determine winners. + """ + # Debug: Check if round directory exists + round_dir = self.log_round(round_num) + self.logger.debug(f"get_results: Looking for round directory at {round_dir}") + if round_dir.exists(): + files = list(round_dir.iterdir()) + self.logger.debug(f"get_results: Files in round directory: {[f.name for f in files]}") + else: + self.logger.warning(f"get_results: Round directory does not exist: {round_dir}") + + # Read and parse all match results + match_results = self._read_all_match_results(round_num, agents) + + # Count wins per agent + scores = defaultdict(int) + valid_matches = 0 + for winner, agent1_name, agent2_name in match_results: + if winner is None: + # Incomplete or failed match - skip it + continue + elif winner == RESULT_TIE: + # Draws count as 0 points for both, but still a valid match + valid_matches += 1 + continue + else: + # Winner exists - give 1 point + scores[winner] += 1 + valid_matches += 1 + + # Determine overall winner + if valid_matches == 0: + self.logger.warning("No valid match results found (all matches failed or incomplete)") + stats.winner = RESULT_TIE + stats.scores = {agent.name: 0 for agent in agents} + elif not scores: + # All valid matches were draws + self.logger.info(f"All {valid_matches} matches were draws") + stats.winner = RESULT_TIE + stats.scores = {agent.name: 0 for agent in agents} + else: + # Find agent(s) with maximum score + max_score = max(scores.values()) + winners = [name for name, score in scores.items() if score == max_score] + + if len(winners) > 1: + stats.winner = RESULT_TIE + else: + stats.winner = winners[0] + + # Update stats object + stats.scores = dict(scores) + + # Ensure all agents have scores (even if 0) + for agent in agents: + stats.scores[agent.name] = scores.get(agent.name, 0) + stats.player_stats[agent.name].score = scores.get(agent.name, 0) + + self.logger.info(f"Round {round_num} results: winner={stats.winner}, scores={stats.scores}") + From 733c485f82309e412c34f28a401e6529cb490337 Mon Sep 17 00:00:00 2001 From: John Yang Date: Fri, 2 Jan 2026 06:46:34 +0000 Subject: [PATCH 2/3] Add ref, config --- codeclash/arenas/__init__.py | 2 ++ configs/test/chess.yaml | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 configs/test/chess.yaml diff --git a/codeclash/arenas/__init__.py b/codeclash/arenas/__init__.py index a955c7e1..8f111332 100644 --- a/codeclash/arenas/__init__.py +++ b/codeclash/arenas/__init__.py @@ -2,6 +2,7 @@ from codeclash.arenas.battlecode25.battlecode25 import BattleCode25Arena from codeclash.arenas.battlesnake.battlesnake import BattleSnakeArena from codeclash.arenas.bridge.bridge import BridgeArena +from codeclash.arenas.chess.chess import ChessArena from codeclash.arenas.corewar.corewar import CoreWarArena from codeclash.arenas.dummy.dummy import DummyArena from codeclash.arenas.figgie.figgie import FiggieArena @@ -17,6 +18,7 @@ BattleCode25Arena, BattleSnakeArena, BridgeArena, + ChessArena, CoreWarArena, DummyArena, FiggieArena, diff --git a/configs/test/chess.yaml b/configs/test/chess.yaml new file mode 100644 index 00000000..65dbf6e5 --- /dev/null +++ b/configs/test/chess.yaml @@ -0,0 +1,24 @@ +tournament: + rounds: 3 +game: + name: Chess + sims_per_round: 20 +players: +- agent: dummy + name: p1 +- agent: dummy + name: p2 +prompts: + game_description: | + You are a software developer ({{player_id}}) competing in a coding game. + You will be writing and improving an engine (Kojiro) to play chess against another competitor's engine. + Your engine is written in C++ and uses the UCI (Universal Chess Interface) protocol. + You can modify the evaluation function, search algorithms, move ordering, and other aspects of the engine to improve its strength. + The engine source code is located in the `src/` directory, and you compile it using `make native`. + IMPORTANT: Do not modify the executable name in the Makefile (keep `EXE = kojiro`). The executable must be named `kojiro`. + + The game is played in {{rounds}} rounds. For every round, you (and your competitor) edit program code that controls your bot. This is round {{round}}. + After you and your competitor finish editing your codebases, the game is run automatically. + + Your task: improve the bot in `src/mysubmission`, located in {{working_dir}}. + {{working_dir}} is your codebase, which contains both your bot and supporting assets. From e7cc43dbe983a2bcac7f3c6a8ff983e43c72f9ba Mon Sep 17 00:00:00 2001 From: John Yang Date: Wed, 7 Jan 2026 08:55:01 -0800 Subject: [PATCH 3/3] Add tests --- tests/arenas/test_chess.py | 246 +++++++++++++++++++++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 tests/arenas/test_chess.py diff --git a/tests/arenas/test_chess.py b/tests/arenas/test_chess.py new file mode 100644 index 00000000..da30a10e --- /dev/null +++ b/tests/arenas/test_chess.py @@ -0,0 +1,246 @@ +""" +Unit tests for ChessArena. + +Tests validate_code() and get_results() methods without requiring Docker. +""" + +import json +import pytest + +from codeclash.arenas.arena import RoundStats +from codeclash.arenas.chess.chess import ChessArena +from codeclash.constants import RESULT_TIE + +from .conftest import MockPlayer + + +class TestChessValidation: + """Tests for ChessArena.validate_code()""" + + @pytest.fixture + def arena(self, tmp_log_dir, minimal_config): + """Create ChessArena instance with mocked environment.""" + arena = ChessArena.__new__(ChessArena) + arena.submission = "src/" + arena.log_local = tmp_log_dir + # Minimal attributes used in validate_code + arena.logger = type("Logger", (), {"debug": lambda self, msg: None, "info": lambda self, msg: None})() + return arena + + def test_valid_submission(self, arena, mock_player_factory): + """Valid C++ engine compiles and produces `src/kojiro` executable.""" + player = mock_player_factory( + name="test_player", + files={ + # Not strictly used by validate_code, but helpful if commands fall back to defaults + "src/kojiro": "", + }, + command_outputs={ + "ls": {"output": "src\n", "returncode": 0}, + "cd src && make native": {"output": "Compile OK", "returncode": 0}, + "ls src/kojiro": {"output": "kojiro\n", "returncode": 0}, + }, + ) + + is_valid, error = arena.validate_code(player) + assert is_valid is True + assert error is None + + def test_missing_src_directory(self, arena, mock_player_factory): + """Missing `src/` directory fails validation.""" + player = mock_player_factory( + name="test_player", + files={}, + command_outputs={ + "ls": {"output": "README.md\n", "returncode": 0}, + }, + ) + + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "src/" in error + + def test_compilation_failure(self, arena, mock_player_factory): + """Compilation errors are surfaced and fail validation.""" + player = mock_player_factory( + name="test_player", + files={}, + command_outputs={ + "ls": {"output": "src\n", "returncode": 0}, + "cd src && make native": {"output": "error: failed to compile", "returncode": 1}, + }, + ) + + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "Compilation failed" in error + + def test_missing_executable_after_compilation(self, arena, mock_player_factory): + """Compilation succeeds but missing `kojiro` executable fails validation.""" + player = mock_player_factory( + name="test_player", + files={}, + command_outputs={ + "ls": {"output": "src\n", "returncode": 0}, + "cd src && make native": {"output": "Compile OK", "returncode": 0}, + "ls src/kojiro": {"output": "", "returncode": 1}, + }, + ) + + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "executable 'kojiro' not found" in error + + +class TestChessResults: + """Tests for ChessArena.get_results()""" + + @pytest.fixture + def arena(self, tmp_log_dir, minimal_config): + """Create ChessArena-like instance with local logging directory.""" + config = minimal_config.copy() + config["game"]["name"] = "Chess" + config["game"]["sims_per_round"] = 2 + + arena = ChessArena.__new__(ChessArena) + arena.submission = "src/" + arena.log_local = tmp_log_dir + arena.config = config + # Lightweight logger stub + arena.logger = type( + "Logger", + (), + { + "debug": lambda self, msg: None, + "info": lambda self, msg: None, + "warning": lambda self, msg: None, + "error": lambda self, msg, **kwargs: None, + }, + )() + return arena + + def _write_pairings(self, round_dir, pairings): + pairings_file = round_dir / "pairings.json" + pairings_file.write_text(json.dumps(pairings, indent=2)) + + def _write_pgn(self, file_path, white: str, black: str, result: str): + content = ( + """ +[Event "FastChess Match"] +[Site "-"] +[Date "2026.01.07"] +[Round "1"] +""".strip() + + f"\n[White \"{white}\"]\n[Black \"{black}\"]\n[Result \"{result}\"]\n\n" + ) + file_path.write_text(content) + + def test_player1_wins(self, arena, tmp_log_dir): + """Alice wins one match; overall winner is Alice.""" + round_dir = tmp_log_dir / "rounds" / "1" + round_dir.mkdir(parents=True) + + # sims_per_round = 2 but only first match is valid; second missing -> ignored + pairings = [ + {"match_idx": 0, "agent1": "Alice", "agent2": "Bob"}, + {"match_idx": 1, "agent1": "Alice", "agent2": "Bob"}, + ] + self._write_pairings(round_dir, pairings) + + # Match 0: Alice (White) wins + self._write_pgn(round_dir / "match_0.pgn", white="Alice", black="Bob", result="1-0") + # Match 1: no file -> ignored + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == "Alice" + assert stats.scores["Alice"] == 1 + assert stats.scores["Bob"] == 0 + + def test_player2_wins(self, arena, tmp_log_dir): + """Bob wins one match; overall winner is Bob.""" + round_dir = tmp_log_dir / "rounds" / "1" + round_dir.mkdir(parents=True) + + pairings = [ + {"match_idx": 0, "agent1": "Alice", "agent2": "Bob"}, + {"match_idx": 1, "agent1": "Alice", "agent2": "Bob"}, + ] + self._write_pairings(round_dir, pairings) + + # Match 0: Bob (Black) wins + self._write_pgn(round_dir / "match_0.pgn", white="Alice", black="Bob", result="0-1") + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == "Bob" + assert stats.scores["Alice"] == 0 + assert stats.scores["Bob"] == 1 + + def test_all_draws(self, arena, tmp_log_dir): + """All matches draw -> overall tie with zero scores.""" + round_dir = tmp_log_dir / "rounds" / "1" + round_dir.mkdir(parents=True) + + pairings = [ + {"match_idx": 0, "agent1": "Alice", "agent2": "Bob"}, + {"match_idx": 1, "agent1": "Alice", "agent2": "Bob"}, + ] + self._write_pairings(round_dir, pairings) + + # Two draws + self._write_pgn(round_dir / "match_0.pgn", white="Alice", black="Bob", result="1/2-1/2") + self._write_pgn(round_dir / "match_1.pgn", white="Bob", black="Alice", result="1/2-1/2") + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == RESULT_TIE + assert stats.scores["Alice"] == 0 + assert stats.scores["Bob"] == 0 + + def test_split_wins_results_in_tie(self, arena, tmp_log_dir): + """Each player wins one match -> tie overall.""" + round_dir = tmp_log_dir / "rounds" / "1" + round_dir.mkdir(parents=True) + + pairings = [ + {"match_idx": 0, "agent1": "Alice", "agent2": "Bob"}, + {"match_idx": 1, "agent1": "Alice", "agent2": "Bob"}, + ] + self._write_pairings(round_dir, pairings) + + # Alice wins match 0, Bob wins match 1 + self._write_pgn(round_dir / "match_0.pgn", white="Alice", black="Bob", result="1-0") + self._write_pgn(round_dir / "match_1.pgn", white="Alice", black="Bob", result="0-1") + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == RESULT_TIE + assert stats.scores["Alice"] == 1 + assert stats.scores["Bob"] == 1 + + +class TestChessConfig: + """Tests for ChessArena configuration and properties.""" + + def test_arena_name(self): + assert ChessArena.name == "Chess" + + def test_submission_folder(self): + assert ChessArena.submission == "src/" + + def test_default_args_contains_time_control(self): + assert "time_control" in ChessArena.default_args +