From 15999e8b597852d46770f4e7fdd85a81902e7130 Mon Sep 17 00:00:00 2001
From: Justin Reese <justaddcoffee+github@gmail.com>
Date: Mon, 15 Dec 2025 16:51:16 -0500
Subject: [PATCH] Add disable_shell_tool option to CodexCoder for MCP-only
 evaluation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When disable_shell_tool is set in the coder config, Codex runs with
--disable shell_tool flag, preventing filesystem access via bash commands.
This enables fair MCP-only evaluations where Codex can only use MCP tools.

Usage in eval config:
  coders:
    codex:
      disable_shell_tool: true

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/metacoder/coders/codex.py | 23 ++++++++++++++++++++++-
 src/metacoder/evals/runner.py | 28 ++++++++++++++++++++++++++--
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/src/metacoder/coders/codex.py b/src/metacoder/coders/codex.py
index ad45c0c..c0a5727 100644
--- a/src/metacoder/coders/codex.py
+++ b/src/metacoder/coders/codex.py
@@ -42,9 +42,18 @@ class CodexCoder(BaseCoder):
         args = ["mcp-server-name"]
         env = { "API_KEY" = "value" }
 
+    Coder Options (passed via coders config in YAML):
+
+        coders:
+          codex:
+            disable_shell_tool: true  # Disable shell/bash access, MCP-only mode
+
     Note: Requires codex CLI to be installed.
     """
 
+    # Coder-specific options (set from YAML config)
+    disable_shell_tool: bool = False
+
     @classmethod
     def is_available(cls) -> bool:
         """Check if codex command is available."""
@@ -142,7 +151,19 @@ def run(self, input_text: str) -> CoderOutput:
             # Codex reads .codex/config.toml from current directory automatically.
             # Do NOT set HOME=. as this breaks authentication (401 Unauthorized).
             text = self.expand_prompt(input_text)
-            command = ["codex", "exec", "--json", "--dangerously-bypass-approvals-and-sandbox", text]
+
+            # Build command with appropriate flags
+            if self.disable_shell_tool:
+                # MCP-only mode: disable shell tool to prevent filesystem access
+                # This forces Codex to use only MCP tools for retrieving information
+                command = [
+                    "codex", "exec", "--json", "--full-auto",
+                    "--skip-git-repo-check", "--disable", "shell_tool", text
+                ]
+                logger.info("Running Codex in MCP-only mode (shell_tool disabled)")
+            else:
+                # Default mode: full access (for general use cases)
+                command = ["codex", "exec", "--json", "--dangerously-bypass-approvals-and-sandbox", text]
 
             print(f"📝 Running command: {' '.join(command)}")
             # time the command
diff --git a/src/metacoder/evals/runner.py b/src/metacoder/evals/runner.py
index 46d4142..08b8c0e 100644
--- a/src/metacoder/evals/runner.py
+++ b/src/metacoder/evals/runner.py
@@ -157,8 +157,20 @@ def get_default_metrics(
     }
 
 
-def create_coder(coder_name: str, workdir: str, config=None) -> BaseCoder:
-    """Create a coder instance."""
+def create_coder(
+    coder_name: str,
+    workdir: str,
+    config=None,
+    coder_options: Optional[Dict[str, Any]] = None,
+) -> BaseCoder:
+    """Create a coder instance.
+
+    Args:
+        coder_name: Name of the coder (e.g., 'codex', 'claude', 'goose')
+        workdir: Working directory for the coder
+        config: CoderConfig with model and extensions
+        coder_options: Coder-specific options from YAML config (e.g., disable_shell_tool)
+    """
     if coder_name not in AVAILABLE_CODERS:
         available = ", ".join(AVAILABLE_CODERS.keys())
         raise ValueError(f"Unknown coder: {coder_name}. Available: {available}")
@@ -172,6 +184,15 @@ def create_coder(coder_name: str, workdir: str, config=None) -> BaseCoder:
     if config:
         coder.config = config
 
+    # Apply coder-specific options (e.g., disable_shell_tool for Codex)
+    if coder_options:
+        for key, value in coder_options.items():
+            if hasattr(coder, key):
+                setattr(coder, key, value)
+                logger.info(f"Set coder option: {key}={value}")
+            else:
+                logger.warning(f"Unknown coder option for {coder_name}: {key}")
+
     return coder
 
 
@@ -311,6 +332,7 @@ def run_single_eval(
         case: EvalCase,
         workdir: Path,
         coder_config: CoderConfig | None = None,
+        coder_options: Optional[Dict[str, Any]] = None,
     ) -> List[EvalResult]:
         """Run evaluation for a single model x coder x case combination."""
         results = []
@@ -320,6 +342,7 @@ def run_single_eval(
             coder_name,
             workdir=str(workdir),
             config=coder_config,
+            coder_options=coder_options,
         )
 
         # Set environment variables for the model
@@ -589,6 +612,7 @@ def run_all_evals(
                             case,
                             combo_workdir,
                             coder_config,
+                            coder_options=coder_config_base,  # Pass coder-specific options
                         )
 
                         # Add server info to results