SakanaAI · Yuki-Imajuku · Jun 17, 2025 · May 28, 2025 · Jun 2, 2025 · Jun 4, 2025
diff --git a/README.md b/README.md
@@ -16,6 +16,8 @@ Drawing on real-world tasks from the AtCoder Heuristic Contest (AHC), ALE-Bench
 
 *Note: This repository is not an official product of SakanaAI or AtCoder and is therefore not officially supported.*
 
+***Important: Please do not use this repository to participate　in AHCs ([AtCoder Heuristic Contest Generative AI Usage Rules - Version 20250616](https://info.atcoder.jp/entry/ahc-llm-rules-en)).***
+
 https://github.com/user-attachments/assets/50a8de5a-b519-4aef-8e54-c60ac9dcbb90
 
 ## Setup
@@ -485,6 +487,55 @@ Set `num_workers` to at most the number of **physical cores** of your instance,
     # Confirm with 'yes' or use -auto-approve
     ```
 
+## MCP (Model Context Protocol) Server
+The MCP server is a lightweight HTTP server that provides a simple interface for interacting with the ALE-Bench toolkit. It allows you to run evaluations and manage sessions without needing to write Python code directly.
+
+### Setup
+1. Install Node.js and npm
+    ```sh
+    # Install nvm (Node Version Manager) for easy Node.js management
+    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash
+    export NVM_DIR="$HOME/.nvm"
+    [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"  # This loads nvm
+    [ -s "$NVM_DIR/bash_completion" ] && \. "$NVM_DIR/bash_completion"  # This loads nvm bash_completion
+    # Install the latest LTS version of Node.js
+    nvm install --lts
+    # Install the Model Context Protocol Inspector
+    npm install -g @modelcontextprotocol/inspector
+    ```
+2. Install the MCP server dependencies using pip or uv:
+    ```sh
+    cd mcp
+    uv sync
+    uv sync --extra dev  # For development dependencies
+    ```
+
+### Running the MCP Server
+```sh
+# Ensure you are in the mcp directory (e.g., cd mcp from the project root)
+uv run mcp run server.py
+uv run mcp dev server.py --with-editable .  # For development
+```
+
+### Use with Claude Desktop
+1. Open the `claude_desktop_config.json` file that configures the Claude Desktop. Add the following configuration to connect to the MCP server, ensuring you replace `/path/to/ALE-Bench` in the `args` with the actual absolute path to your cloned `ALE-Bench` repository directory:
+    ```json
+    {
+        "mcpServers": {
+            "ALE-Bench MCP Server": {
+                "command": "/bin/bash",
+                "args": [
+                    "-c",
+                    "cd /path/to/ALE-Bench/mcp && uv run --with ale_bench --with mcp[cli] mcp run /path/to/ALE-Bench/mcp/server.py"
+                ]
+            }
+        }
+    }
+    ```
+2. Restart the Claude Desktop application to apply the changes.
+
+<img width="680" alt="MCP_Claude_Desktop" src="https://github.com/user-attachments/assets/d9f22719-5686-406d-aa94-44406c700d6f" />
+
 ## Development
 
 -   **Environment Setup:**
@@ -516,13 +567,13 @@ Set `num_workers` to at most the number of **physical cores** of your instance,
 -   **Python Library Development:**
     ```sh
     # Linting
-    ruff check src tests
+    ruff check src mcp tests
 
     # Formatting
-    ruff format src tests
+    ruff format src mcp tests
 
     # Static Type Checking
-    mypy src tests
+    mypy src mcp tests
 
     # Running Tests
     pytest
@@ -534,10 +585,10 @@ Set `num_workers` to at most the number of **physical cores** of your instance,
 Please cite ALE-Bench as follows:
 
 ```bibtex
-@misc{imajuku2025ale-bench,
-    title = {{ALE-Bench}: A Benchmark for Long-Horizon Objective-Driven Algorithm Engineering},
-    author = {Imajuku, Yuki and Horie, Kohki and Iwata, Yoichi and Aoki, Kensho and Takahashi, Naohiro and Akiba, Takuya},
-    url = {https://github.com/SakanaAI/ALE-Bench},
-    year = {2025}
+@article{imajuku2025ale-bench,
+    title={ALE-Bench: A Benchmark for Long-Horizon Objective-Driven Algorithm Engineering},
+    author={Imajuku, Yuki and Horie, Kohki and Iwata, Yoichi and Aoki, Kensho and Takahashi, Naohiro and Akiba, Takuya},
+    journal={arXiv preprint arXiv:2506.09050},
+    year={2025}
 }
 ```
diff --git a/cloud/main.tf b/cloud/main.tf
@@ -104,7 +104,7 @@ resource "aws_instance" "ale_bench_instance" {
     disable_api_termination = false
     instance_initiated_shutdown_behavior = "stop"
 
-    user_data = file("setup.sh")
+    user_data = file(var.setup_file_name)
 
     tags = {
         Name = "ale-bench-instance-${count.index}"

diff --git a/cloud/setup.sh b/cloud/setup.sh
@@ -48,7 +48,7 @@ su - ubuntu -c "cd /home/ubuntu/ && rm awscliv2.zip && rm -rf aws"
 
 
 # Install uv
-su - ubuntu -c "curl -fsSL https://astral.sh/uv/0.6.6/install.sh | sh"
+su - ubuntu -c "curl -fsSL https://astral.sh/uv/install.sh | sh"
 su - ubuntu -c "source /home/ubuntu/.local/bin/env"
 
 

diff --git a/cloud/setup_mcp.sh b/cloud/setup_mcp.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/bash
+set -Eeuo pipefail
+
+# Install docker (https://docs.docker.com/engine/install/ubuntu/)
+## Add Docker's official GPG key:
+sudo apt-get -qq update
+sudo apt-get -qq install -y ca-certificates curl wget
+sudo install -m 0755 -d /etc/apt/keyrings
+sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
+sudo chmod a+r /etc/apt/keyrings/docker.asc
+
+## Add the repository to Apt sources:
+echo \
+  "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
+  $(. /etc/os-release && echo "${UBUNTU_CODENAME:-$VERSION_CODENAME}") stable" | \
+  sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+sudo apt-get -qq update
+
+## Install the Docker packages.
+sudo apt-get -qq install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
+
+## Docker with non-root user
+# sudo groupadd docker  # Already created
+sudo usermod -aG docker ubuntu
+su - ubuntu -c "newgrp docker"
+
+
+# Install Google Chrome
+wget -q -O /tmp/google-chrome-stable_current_amd64.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
+sudo apt-get -qq update
+sudo apt-get -qq install -y /tmp/google-chrome-stable_current_amd64.deb
+rm /tmp/google-chrome-stable_current_amd64.deb
+
+
+# Install another dependencies (including the dependencies for the experiments)
+sudo apt-get -qq update
+sudo apt-get -qq install -y build-essential make unzip libcairo2-dev libffi-dev
+sudo apt-get -qq autoremove --purge -y
+sudo apt-get -qq clean
+rm -rf /var/lib/apt/lists/*
+
+
+# Install AWS CLI version 2 (https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)
+su - ubuntu -c "cd /home/ubuntu/ && curl -fsSL https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip -o awscliv2.zip"
+su - ubuntu -c "cd /home/ubuntu/ && unzip -qq awscliv2.zip"
+su - ubuntu -c "cd /home/ubuntu/ && sudo ./aws/install"
+su - ubuntu -c "cd /home/ubuntu/ && rm awscliv2.zip && rm -rf aws"
+
+
+# Install uv
+su - ubuntu -c "curl -fsSL https://astral.sh/uv/install.sh | sh"
+su - ubuntu -c "source /home/ubuntu/.local/bin/env"
+
+
+# Install Node Version Manager (NVM) and Node.js
+su - ubuntu -c "curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash"
+su - ubuntu -c "export NVM_DIR=\"\${HOME}/.nvm\" && [ -s \"\${NVM_DIR}/nvm.sh\" ] && \\. \"\${NVM_DIR}/nvm.sh\" && nvm install --lts && nvm use --lts && npm install -g @modelcontextprotocol/inspector"
+
+
+# Clone the ALE-Bench repository and setup the environment
+su - ubuntu -c "cd /home/ubuntu/ && git clone https://github.com/SakanaAI/ALE-Bench.git"
+su - ubuntu -c "cd /home/ubuntu/ALE-Bench && uv -q venv --python 3.12.9 && uv -q sync"
+su - ubuntu -c "cd /home/ubuntu/ALE-Bench/mcp && uv -q venv --python 3.12.9 && uv -q sync"
+su - ubuntu -c "cd /home/ubuntu/ALE-Bench && bash ./scripts/docker_build_all.sh \$(id -u) \$(id -g)"
+
+
+# Finish
+echo "$(printf '\033')[1;4;5;32mALE-Bench setup completed! $(printf '\033')[0m"
diff --git a/cloud/variables.tf b/cloud/variables.tf
@@ -39,3 +39,9 @@ variable "allowed_ssh_cidr" {
     type        = string
     default     = "0.0.0.0/0"
 }
+
+variable "setup_file_name" {
+    description = "Name of the setup file to be copied to the instance"
+    type        = string
+    default     = "setup.sh"
+}
diff --git a/mcp/pyproject.toml b/mcp/pyproject.toml
@@ -0,0 +1,100 @@
+[project]
+name = "ale_bench_mcp_server"
+version = "1.0.0"
+description = "The MCP server for the ALE-Bench"
+authors = [
+    { name = "Yuki-Imajuku", email = "yuki.imjk@gmail.com" }
+]
+readme = "README.md"
+requires-python = ">=3.10,<3.14"
+license = { file = "LICENSE" }
+keywords = ["benchmark", "algorithmic programming", "atcoder", "AHC", "AI evaluation", "heuristic", "optimization", "estimation", "MCP", "server"]
+classifiers = [
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: Apache Software License",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+dependencies = [
+    "ahocorapy",
+    "ale-bench",
+    "cairosvg",
+    "docker",
+    "huggingface_hub",
+    "mcp[cli]",
+    "pillow",
+    "polars>=1",
+    "pydantic>=2",
+]
+
+[project.urls]
+Repository = "https://github.com/SakanaAI/ALE-Bench"
+
+[project.optional-dependencies]
+dev = [
+    "mypy==1.15.0",
+    "pytest==8.3.4",
+    "pytest-mock==3.14.0",
+    "ruff==0.9.7",
+    "types-requests==2.32.0.20250301",
+]
+
+[tool.uv.sources]
+ale-bench = { git = "https://github.com/SakanaAI/ALE-Bench.git", tag = "v1.0.0" }
+
+[tool.mypy]
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+disallow_untyped_decorators = true
+no_implicit_optional = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_no_return = true
+warn_unreachable = true
+ignore_missing_imports = true
+
+[tool.pytest.ini_options]
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+    "docker: marks tests as docker (deselect with '-m \"not docker\"')",
+]
+
+[tool.ruff]
+fix = true
+target-version = "py312"
+line-length = 120
+
+[tool.ruff.format]
+quote-style = "double"
+
+[tool.ruff.lint]
+select = [
+    "C9",
+    "E",
+    "F",
+    "W",
+    "I",
+]
+ignore = ["C901"]
+
+[tool.ruff.lint.isort]
+case-sensitive = true
+combine-as-imports = true
+default-section = "first-party"
+known-first-party = ["ale_bench"]
+section-order = [
+    "future",
+    "standard-library",
+    "third-party",
+    "first-party",
+    "local-folder"
+]
+split-on-trailing-comma = true