From 44c7cc46a9edb7d47686d60613ee6ab8cd905638 Mon Sep 17 00:00:00 2001 From: CKwin26 <156837805+CKwin26@users.noreply.github.com> Date: Mon, 30 Mar 2026 18:24:43 -0400 Subject: [PATCH 1/2] Allow sandboxes to pass optional entrypoint args and env --- researchclaw/docker/entrypoint.sh | 7 ++- researchclaw/experiment/colab_sandbox.py | 2 + researchclaw/experiment/docker_sandbox.py | 30 ++++++++++- researchclaw/experiment/sandbox.py | 20 +++++-- researchclaw/experiment/ssh_sandbox.py | 64 ++++++++++++++++++++--- tests/test_entry_point_validation.py | 35 ++++++++++++- tests/test_rc_docker_sandbox.py | 16 ++++++ tests/test_ssh_and_colab_sandbox.py | 30 +++++++++++ 8 files changed, 188 insertions(+), 16 deletions(-) diff --git a/researchclaw/docker/entrypoint.sh b/researchclaw/docker/entrypoint.sh index 316039c0..5104bd9c 100755 --- a/researchclaw/docker/entrypoint.sh +++ b/researchclaw/docker/entrypoint.sh @@ -11,7 +11,10 @@ set -e WORKSPACE="/workspace" -ENTRY_POINT="${1:-main.py}" +ENTRY_POINT="${RC_ENTRY_POINT:-${1:-main.py}}" +if [ "$#" -gt 0 ]; then + shift +fi # ---------------------------------------------------------------- # Phase 0: Install additional pip packages @@ -51,4 +54,4 @@ fi # Phase 2: Run experiment # ---------------------------------------------------------------- echo "[RC] Phase 2: Running experiment ($ENTRY_POINT)..." -exec python3 -u "$WORKSPACE/$ENTRY_POINT" +exec python3 -u "$WORKSPACE/$ENTRY_POINT" "$@" diff --git a/researchclaw/experiment/colab_sandbox.py b/researchclaw/experiment/colab_sandbox.py index b6a46542..eec6ad7e 100644 --- a/researchclaw/experiment/colab_sandbox.py +++ b/researchclaw/experiment/colab_sandbox.py @@ -158,6 +158,8 @@ def run_project( *, entry_point: str = "main.py", timeout_sec: int = 300, + args: list[str] | None = None, + env_overrides: dict[str, str] | None = None, ) -> SandboxResult: # BUG-DA8-07: Validate entry_point (path traversal, etc.) like other backends from researchclaw.experiment.sandbox import validate_entry_point diff --git a/researchclaw/experiment/docker_sandbox.py b/researchclaw/experiment/docker_sandbox.py index 3eda27c9..b45f21cd 100644 --- a/researchclaw/experiment/docker_sandbox.py +++ b/researchclaw/experiment/docker_sandbox.py @@ -138,6 +138,8 @@ def run_project( *, entry_point: str = "main.py", timeout_sec: int = 300, + args: list[str] | None = None, + env_overrides: dict[str, str] | None = None, ) -> SandboxResult: """Run a multi-file experiment project inside a container.""" self._run_counter += 1 @@ -189,7 +191,13 @@ def run_project( metrics={}, ) - return self._execute(staging, entry_point=entry_point, timeout_sec=timeout_sec) + return self._execute( + staging, + entry_point=entry_point, + timeout_sec=timeout_sec, + entry_args=args, + env_overrides=env_overrides, + ) # ------------------------------------------------------------------ # Static helpers @@ -254,7 +262,13 @@ def _inject_harness(target_dir: Path) -> None: # ------------------------------------------------------------------ def _execute( - self, staging_dir: Path, *, entry_point: str, timeout_sec: int + self, + staging_dir: Path, + *, + entry_point: str, + timeout_sec: int, + entry_args: list[str] | None = None, + env_overrides: dict[str, str] | None = None, ) -> SandboxResult: """Core execution: single container, three-phase via entrypoint.sh.""" cfg = self.config @@ -269,6 +283,8 @@ def _execute( staging_dir, entry_point=entry_point, container_name=container_name, + entry_args=entry_args, + env_overrides=env_overrides, ) start = time.monotonic() @@ -349,6 +365,8 @@ def _build_run_command( *, entry_point: str, container_name: str, + entry_args: list[str] | None = None, + env_overrides: dict[str, str] | None = None, ) -> list[str]: """Build the ``docker run`` command list. @@ -453,9 +471,17 @@ def _user_flag() -> list[str]: else: cmd.extend(["--gpus", "all"]) + if env_overrides: + for name, value in sorted(env_overrides.items()): + if not value: + continue + cmd.extend(["-e", f"{name}={value}"]) + # Image + entry point (passed as CMD arg to entrypoint.sh) cmd.append(cfg.image) cmd.append(entry_point) + if entry_args: + cmd.extend(entry_args) return cmd diff --git a/researchclaw/experiment/sandbox.py b/researchclaw/experiment/sandbox.py index d54e0fcf..6b66dc3c 100644 --- a/researchclaw/experiment/sandbox.py +++ b/researchclaw/experiment/sandbox.py @@ -297,6 +297,8 @@ def run_project( *, entry_point: str = "main.py", timeout_sec: int = 300, + args: list[str] | None = None, + env_overrides: dict[str, str] | None = None, ) -> SandboxResult: ... @@ -350,6 +352,8 @@ def run_project( *, entry_point: str = "main.py", timeout_sec: int = 300, + args: list[str] | None = None, + env_overrides: dict[str, str] | None = None, ) -> SandboxResult: """Run a multi-file experiment project in the sandbox. @@ -409,12 +413,14 @@ def run_project( ) start = time.monotonic() - command = self._build_command(entry) + command = self._build_command(entry, args=args) logger.debug("Running project sandbox command: %s (cwd=%s)", command, sandbox_project) result: SandboxResult try: env = {**os.environ, "PYTHONUNBUFFERED": "1"} + if env_overrides: + env.update(env_overrides) completed = subprocess.run( command, capture_output=True, @@ -457,7 +463,12 @@ def _next_script_path(self) -> Path: def _write_script(script_path: Path, code: str) -> None: _ = script_path.write_text(code, encoding="utf-8") - def _build_command(self, script_path: Path) -> list[str]: + def _build_command( + self, + script_path: Path, + *, + args: list[str] | None = None, + ) -> list[str]: # Convert relative python_path to absolute WITHOUT resolving symlinks. # Using .resolve() would follow venv symlinks to the system Python binary, # which loses the venv context (site-packages like numpy become unavailable). @@ -466,7 +477,10 @@ def _build_command(self, script_path: Path) -> list[str]: if not python_path.is_absolute() and python != "python": python_path = Path.cwd() / python_path # -u: unbuffered stdout/stderr so subprocess.run captures all output - return [str(python_path), "-u", str(script_path)] + command = [str(python_path), "-u", str(script_path)] + if args: + command.extend(args) + return command @staticmethod def _result_from_completed( diff --git a/researchclaw/experiment/ssh_sandbox.py b/researchclaw/experiment/ssh_sandbox.py index aad97fca..ec5026da 100644 --- a/researchclaw/experiment/ssh_sandbox.py +++ b/researchclaw/experiment/ssh_sandbox.py @@ -71,6 +71,8 @@ def run_project( *, entry_point: str = "main.py", timeout_sec: int = 300, + args: list[str] | None = None, + env_overrides: dict[str, str] | None = None, ) -> SandboxResult: """Run a multi-file experiment project on the remote host.""" self._run_counter += 1 @@ -119,7 +121,13 @@ def run_project( metrics={}, ) - return self._execute(staging, entry_point=entry_point, timeout_sec=timeout_sec) + return self._execute( + staging, + entry_point=entry_point, + timeout_sec=timeout_sec, + entry_args=args, + env_overrides=env_overrides, + ) # ------------------------------------------------------------------ # Static helpers @@ -158,7 +166,13 @@ def _inject_harness(target_dir: Path) -> None: # ------------------------------------------------------------------ def _execute( - self, staging_dir: Path, *, entry_point: str, timeout_sec: int + self, + staging_dir: Path, + *, + entry_point: str, + timeout_sec: int, + entry_args: list[str] | None = None, + env_overrides: dict[str, str] | None = None, ) -> SandboxResult: """Core execution flow for remote experiments. @@ -213,11 +227,17 @@ def _execute( # 4. Execute experiment if cfg.use_docker: exec_cmd = self._build_docker_exec_cmd( - remote_dir, entry_point=entry_point, + remote_dir, + entry_point=entry_point, + args=entry_args, + env_overrides=env_overrides, ) else: exec_cmd = self._build_bare_exec_cmd( - remote_dir, entry_point=entry_point, + remote_dir, + entry_point=entry_point, + args=entry_args, + env_overrides=env_overrides, ) start = time.monotonic() @@ -242,13 +262,26 @@ def _execute( ) def _build_bare_exec_cmd( - self, remote_dir: str, *, entry_point: str, + self, + remote_dir: str, + *, + entry_point: str, + args: list[str] | None = None, + env_overrides: dict[str, str] | None = None, ) -> str: """Build command to run Python directly on remote host (with basic sandboxing).""" cfg = self.config rd = shlex.quote(remote_dir) ep = shlex.quote(entry_point) py = shlex.quote(cfg.remote_python) + arg_text = " ".join(shlex.quote(arg) for arg in (args or [])) + arg_suffix = f" {arg_text}" if arg_text else "" + env_parts = [ + f"{name}={shlex.quote(value)}" + for name, value in sorted((env_overrides or {}).items()) + if value + ] + env_prefix = (" ".join(env_parts) + " ") if env_parts else "" gpu_env = "" if cfg.gpu_ids: @@ -264,17 +297,24 @@ def _build_bare_exec_cmd( f"if command -v unshare >/dev/null 2>&1; then " f"HOME={rd} " f"{gpu_env}" - f"unshare --net {py} -u {ep}; " + f"{env_prefix}" + f"unshare --net {py} -u {ep}{arg_suffix}; " f"else " f"echo 'WARNING: unshare not available, running without network isolation' >&2; " f"HOME={rd} " f"{gpu_env}" - f"{py} -u {ep}; " + f"{env_prefix}" + f"{py} -u {ep}{arg_suffix}; " f"fi" ) def _build_docker_exec_cmd( - self, remote_dir: str, *, entry_point: str, + self, + remote_dir: str, + *, + entry_point: str, + args: list[str] | None = None, + env_overrides: dict[str, str] | None = None, ) -> str: """Build command to run inside a Docker container on the remote host. @@ -307,8 +347,16 @@ def _build_docker_exec_cmd( # Try to pass all GPUs; fails gracefully if none available parts.extend(["--gpus", "all"]) + if env_overrides: + for name, value in sorted(env_overrides.items()): + if not value: + continue + parts.extend(["-e", shlex.quote(f"{name}={value}")]) + parts.append(shlex.quote(cfg.docker_image)) parts.extend(["python3", "-u", shlex.quote(entry_point)]) + if args: + parts.extend(shlex.quote(arg) for arg in args) return " ".join(parts) diff --git a/tests/test_entry_point_validation.py b/tests/test_entry_point_validation.py index ca51d3b4..6e9b117e 100644 --- a/tests/test_entry_point_validation.py +++ b/tests/test_entry_point_validation.py @@ -2,6 +2,7 @@ from __future__ import annotations +import sys from pathlib import Path from unittest.mock import patch @@ -99,7 +100,7 @@ class TestExperimentSandboxEntryPointValidation: def _make_sandbox(self, tmp_path: Path) -> ExperimentSandbox: from researchclaw.config import SandboxConfig - cfg = SandboxConfig() + cfg = SandboxConfig(python_path=sys.executable) return ExperimentSandbox(cfg, tmp_path / "work") def test_rejects_path_traversal(self, tmp_path: Path) -> None: @@ -140,3 +141,35 @@ def test_rejects_absolute_path(self, tmp_path: Path) -> None: # for future copy mechanism changes; see # TestValidateEntryPointResolved.test_symlink_escape_rejected for # the unit-level proof that the function catches symlink escapes. + + def test_run_project_passes_args_and_env_overrides(self, tmp_path: Path) -> None: + project = tmp_path / "proj" + project.mkdir() + (project / "main.py").write_text( + "\n".join( + [ + "from __future__ import annotations", + "import argparse", + "import os", + "", + "parser = argparse.ArgumentParser()", + "parser.add_argument('--value', required=True)", + "args = parser.parse_args()", + "if os.environ.get('RC_TEST_FLAG') != 'ok':", + " raise SystemExit('missing env override')", + "print(f'metric: {float(args.value):.1f}')", + ] + ), + encoding="utf-8", + ) + + sandbox = self._make_sandbox(tmp_path) + result = sandbox.run_project( + project, + args=["--value", "1.0"], + env_overrides={"RC_TEST_FLAG": "ok"}, + timeout_sec=10, + ) + + assert result.returncode == 0 + assert result.metrics.get("metric") == 1.0 diff --git a/tests/test_rc_docker_sandbox.py b/tests/test_rc_docker_sandbox.py index fc177988..3ac05dd7 100644 --- a/tests/test_rc_docker_sandbox.py +++ b/tests/test_rc_docker_sandbox.py @@ -118,6 +118,22 @@ def test_build_run_command_specific_gpus(tmp_path: Path): assert "0,2" in cmd[gpu_idx + 1] +def test_build_run_command_forwards_entry_args_and_env(tmp_path: Path): + cfg = DockerSandboxConfig(network_policy="none") + sandbox = DockerSandbox(cfg, tmp_path / "work") + cmd = sandbox._build_run_command( + tmp_path / "staging", + entry_point="main.py", + container_name="rc-test-args", + entry_args=["--foo", "bar"], + env_overrides={"B_ENV": "2", "A_ENV": "1"}, + ) + env_values = [cmd[i + 1] for i, token in enumerate(cmd) if token == "-e"] + assert "A_ENV=1" in env_values + assert "B_ENV=2" in env_values + assert cmd[-3:] == ["main.py", "--foo", "bar"] + + # ── Harness injection ───────────────────────────────────────────────── diff --git a/tests/test_ssh_and_colab_sandbox.py b/tests/test_ssh_and_colab_sandbox.py index d3436888..21d7c8ee 100644 --- a/tests/test_ssh_and_colab_sandbox.py +++ b/tests/test_ssh_and_colab_sandbox.py @@ -104,6 +104,19 @@ def test_bare_exec_no_gpu(self, tmp_path: Path): cmd = sb._build_bare_exec_cmd("/tmp/rc-test", entry_point="main.py") assert "CUDA_VISIBLE_DEVICES" not in cmd + def test_bare_exec_cmd_forwards_args_and_env(self, tmp_path: Path): + cfg = SshRemoteConfig(host="server", user="test", remote_python="python3") + sb = SshRemoteSandbox(cfg, tmp_path) + cmd = sb._build_bare_exec_cmd( + "/tmp/rc-test", + entry_point="main.py", + args=["--foo", "bar baz"], + env_overrides={"A_ENV": "1", "B_ENV": "two words"}, + ) + assert "A_ENV=1" in cmd + assert "B_ENV='two words'" in cmd + assert "python3 -u main.py --foo 'bar baz'" in cmd + def test_docker_exec_cmd(self, tmp_path: Path): cfg = SshRemoteConfig( host="server", user="test", @@ -125,6 +138,23 @@ def test_docker_exec_cmd(self, tmp_path: Path): assert "myimage:latest" in cmd assert cmd.endswith("main.py") + def test_docker_exec_cmd_forwards_args_and_env(self, tmp_path: Path): + cfg = SshRemoteConfig( + host="server", + user="test", + use_docker=True, + docker_image="myimage:latest", + ) + sb = SshRemoteSandbox(cfg, tmp_path) + cmd = sb._build_docker_exec_cmd( + "/tmp/rc-test", + entry_point="main.py", + args=["--foo", "bar"], + env_overrides={"A_ENV": "1"}, + ) + assert "-e A_ENV=1" in cmd + assert cmd.endswith("main.py --foo bar") + def test_docker_exec_full_network(self, tmp_path: Path): cfg = SshRemoteConfig( host="server", use_docker=True, From 0326b82b752f4ad0f421f120c31ade2b651494d8 Mon Sep 17 00:00:00 2001 From: CKwin26 <156837805+CKwin26@users.noreply.github.com> Date: Tue, 31 Mar 2026 01:58:29 -0400 Subject: [PATCH 2/2] fix cross-platform absolute entrypoint checks --- researchclaw/experiment/sandbox.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/researchclaw/experiment/sandbox.py b/researchclaw/experiment/sandbox.py index 6b66dc3c..7bb5edd3 100644 --- a/researchclaw/experiment/sandbox.py +++ b/researchclaw/experiment/sandbox.py @@ -9,7 +9,7 @@ import subprocess import time from dataclasses import dataclass -from pathlib import Path +from pathlib import Path, PurePosixPath, PureWindowsPath from typing import Protocol from researchclaw.config import SandboxConfig @@ -27,7 +27,9 @@ def validate_entry_point(entry_point: str) -> str | None: if not entry_point or not entry_point.strip(): return "Entry point is empty" ep = Path(entry_point) - if ep.is_absolute(): + posix_ep = PurePosixPath(entry_point) + windows_ep = PureWindowsPath(entry_point) + if ep.is_absolute() or posix_ep.is_absolute() or windows_ep.is_absolute(): return f"Entry point must be a relative path, got: {entry_point}" if ".." in ep.parts: return f"Entry point must not contain '..': {entry_point}"