Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion src/metacoder/coders/codex.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,18 @@ class CodexCoder(BaseCoder):
args = ["mcp-server-name"]
env = { "API_KEY" = "value" }

Coder Options (passed via coders config in YAML):

coders:
codex:
disable_shell_tool: true # Disable shell/bash access, MCP-only mode

Note: Requires codex CLI to be installed.
"""

# Coder-specific options (set from YAML config)
disable_shell_tool: bool = False

@classmethod
def is_available(cls) -> bool:
"""Check if codex command is available."""
Expand Down Expand Up @@ -142,7 +151,19 @@ def run(self, input_text: str) -> CoderOutput:
# Codex reads .codex/config.toml from current directory automatically.
# Do NOT set HOME=. as this breaks authentication (401 Unauthorized).
text = self.expand_prompt(input_text)
command = ["codex", "exec", "--json", "--dangerously-bypass-approvals-and-sandbox", text]

# Build command with appropriate flags
if self.disable_shell_tool:
# MCP-only mode: disable shell tool to prevent filesystem access
# This forces Codex to use only MCP tools for retrieving information
command = [
"codex", "exec", "--json", "--full-auto",
"--skip-git-repo-check", "--disable", "shell_tool", text
]
logger.info("Running Codex in MCP-only mode (shell_tool disabled)")
else:
# Default mode: full access (for general use cases)
command = ["codex", "exec", "--json", "--dangerously-bypass-approvals-and-sandbox", text]

print(f"📝 Running command: {' '.join(command)}")
# time the command
Expand Down
28 changes: 26 additions & 2 deletions src/metacoder/evals/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,20 @@ def get_default_metrics(
}


def create_coder(coder_name: str, workdir: str, config=None) -> BaseCoder:
"""Create a coder instance."""
def create_coder(
coder_name: str,
workdir: str,
config=None,
coder_options: Optional[Dict[str, Any]] = None,
) -> BaseCoder:
"""Create a coder instance.

Args:
coder_name: Name of the coder (e.g., 'codex', 'claude', 'goose')
workdir: Working directory for the coder
config: CoderConfig with model and extensions
coder_options: Coder-specific options from YAML config (e.g., disable_shell_tool)
"""
if coder_name not in AVAILABLE_CODERS:
available = ", ".join(AVAILABLE_CODERS.keys())
raise ValueError(f"Unknown coder: {coder_name}. Available: {available}")
Expand All @@ -172,6 +184,15 @@ def create_coder(coder_name: str, workdir: str, config=None) -> BaseCoder:
if config:
coder.config = config

# Apply coder-specific options (e.g., disable_shell_tool for Codex)
if coder_options:
for key, value in coder_options.items():
if hasattr(coder, key):
setattr(coder, key, value)
logger.info(f"Set coder option: {key}={value}")
else:
logger.warning(f"Unknown coder option for {coder_name}: {key}")

return coder


Expand Down Expand Up @@ -311,6 +332,7 @@ def run_single_eval(
case: EvalCase,
workdir: Path,
coder_config: CoderConfig | None = None,
coder_options: Optional[Dict[str, Any]] = None,
) -> List[EvalResult]:
"""Run evaluation for a single model x coder x case combination."""
results = []
Expand All @@ -320,6 +342,7 @@ def run_single_eval(
coder_name,
workdir=str(workdir),
config=coder_config,
coder_options=coder_options,
)

# Set environment variables for the model
Expand Down Expand Up @@ -589,6 +612,7 @@ def run_all_evals(
case,
combo_workdir,
coder_config,
coder_options=coder_config_base, # Pass coder-specific options
)

# Add server info to results
Expand Down
Loading