From 15999e8b597852d46770f4e7fdd85a81902e7130 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 15 Dec 2025 16:51:16 -0500 Subject: [PATCH] Add disable_shell_tool option to CodexCoder for MCP-only evaluation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When disable_shell_tool is set in the coder config, Codex runs with --disable shell_tool flag, preventing filesystem access via bash commands. This enables fair MCP-only evaluations where Codex can only use MCP tools. Usage in eval config: coders: codex: disable_shell_tool: true 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/metacoder/coders/codex.py | 23 ++++++++++++++++++++++- src/metacoder/evals/runner.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/metacoder/coders/codex.py b/src/metacoder/coders/codex.py index ad45c0c..c0a5727 100644 --- a/src/metacoder/coders/codex.py +++ b/src/metacoder/coders/codex.py @@ -42,9 +42,18 @@ class CodexCoder(BaseCoder): args = ["mcp-server-name"] env = { "API_KEY" = "value" } + Coder Options (passed via coders config in YAML): + + coders: + codex: + disable_shell_tool: true # Disable shell/bash access, MCP-only mode + Note: Requires codex CLI to be installed. """ + # Coder-specific options (set from YAML config) + disable_shell_tool: bool = False + @classmethod def is_available(cls) -> bool: """Check if codex command is available.""" @@ -142,7 +151,19 @@ def run(self, input_text: str) -> CoderOutput: # Codex reads .codex/config.toml from current directory automatically. # Do NOT set HOME=. as this breaks authentication (401 Unauthorized). text = self.expand_prompt(input_text) - command = ["codex", "exec", "--json", "--dangerously-bypass-approvals-and-sandbox", text] + + # Build command with appropriate flags + if self.disable_shell_tool: + # MCP-only mode: disable shell tool to prevent filesystem access + # This forces Codex to use only MCP tools for retrieving information + command = [ + "codex", "exec", "--json", "--full-auto", + "--skip-git-repo-check", "--disable", "shell_tool", text + ] + logger.info("Running Codex in MCP-only mode (shell_tool disabled)") + else: + # Default mode: full access (for general use cases) + command = ["codex", "exec", "--json", "--dangerously-bypass-approvals-and-sandbox", text] print(f"📝 Running command: {' '.join(command)}") # time the command diff --git a/src/metacoder/evals/runner.py b/src/metacoder/evals/runner.py index 46d4142..08b8c0e 100644 --- a/src/metacoder/evals/runner.py +++ b/src/metacoder/evals/runner.py @@ -157,8 +157,20 @@ def get_default_metrics( } -def create_coder(coder_name: str, workdir: str, config=None) -> BaseCoder: - """Create a coder instance.""" +def create_coder( + coder_name: str, + workdir: str, + config=None, + coder_options: Optional[Dict[str, Any]] = None, +) -> BaseCoder: + """Create a coder instance. + + Args: + coder_name: Name of the coder (e.g., 'codex', 'claude', 'goose') + workdir: Working directory for the coder + config: CoderConfig with model and extensions + coder_options: Coder-specific options from YAML config (e.g., disable_shell_tool) + """ if coder_name not in AVAILABLE_CODERS: available = ", ".join(AVAILABLE_CODERS.keys()) raise ValueError(f"Unknown coder: {coder_name}. Available: {available}") @@ -172,6 +184,15 @@ def create_coder(coder_name: str, workdir: str, config=None) -> BaseCoder: if config: coder.config = config + # Apply coder-specific options (e.g., disable_shell_tool for Codex) + if coder_options: + for key, value in coder_options.items(): + if hasattr(coder, key): + setattr(coder, key, value) + logger.info(f"Set coder option: {key}={value}") + else: + logger.warning(f"Unknown coder option for {coder_name}: {key}") + return coder @@ -311,6 +332,7 @@ def run_single_eval( case: EvalCase, workdir: Path, coder_config: CoderConfig | None = None, + coder_options: Optional[Dict[str, Any]] = None, ) -> List[EvalResult]: """Run evaluation for a single model x coder x case combination.""" results = [] @@ -320,6 +342,7 @@ def run_single_eval( coder_name, workdir=str(workdir), config=coder_config, + coder_options=coder_options, ) # Set environment variables for the model @@ -589,6 +612,7 @@ def run_all_evals( case, combo_workdir, coder_config, + coder_options=coder_config_base, # Pass coder-specific options ) # Add server info to results