aiming-lab · CKwin26 · Mar 30, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/researchclaw/docker/entrypoint.sh b/researchclaw/docker/entrypoint.sh
@@ -11,7 +11,10 @@
 set -e
 
 WORKSPACE="/workspace"
-ENTRY_POINT="${1:-main.py}"
+ENTRY_POINT="${RC_ENTRY_POINT:-${1:-main.py}}"
+if [ "$#" -gt 0 ]; then
+    shift
+fi
 
 # ----------------------------------------------------------------
 # Phase 0: Install additional pip packages
@@ -51,4 +54,4 @@ fi
 # Phase 2: Run experiment
 # ----------------------------------------------------------------
 echo "[RC] Phase 2: Running experiment ($ENTRY_POINT)..."
-exec python3 -u "$WORKSPACE/$ENTRY_POINT"
+exec python3 -u "$WORKSPACE/$ENTRY_POINT" "$@"
diff --git a/researchclaw/experiment/colab_sandbox.py b/researchclaw/experiment/colab_sandbox.py
@@ -158,6 +158,8 @@ def run_project(
         *,
         entry_point: str = "main.py",
         timeout_sec: int = 300,
+        args: list[str] | None = None,
+        env_overrides: dict[str, str] | None = None,
     ) -> SandboxResult:
         # BUG-DA8-07: Validate entry_point (path traversal, etc.) like other backends
         from researchclaw.experiment.sandbox import validate_entry_point

diff --git a/researchclaw/experiment/docker_sandbox.py b/researchclaw/experiment/docker_sandbox.py
@@ -138,6 +138,8 @@ def run_project(
         *,
         entry_point: str = "main.py",
         timeout_sec: int = 300,
+        args: list[str] | None = None,
+        env_overrides: dict[str, str] | None = None,
     ) -> SandboxResult:
         """Run a multi-file experiment project inside a container."""
         self._run_counter += 1
@@ -189,7 +191,13 @@ def run_project(
                 metrics={},
             )
 
-        return self._execute(staging, entry_point=entry_point, timeout_sec=timeout_sec)
+        return self._execute(
+            staging,
+            entry_point=entry_point,
+            timeout_sec=timeout_sec,
+            entry_args=args,
+            env_overrides=env_overrides,
+        )
 
     # ------------------------------------------------------------------
     # Static helpers
@@ -254,7 +262,13 @@ def _inject_harness(target_dir: Path) -> None:
     # ------------------------------------------------------------------
 
     def _execute(
-        self, staging_dir: Path, *, entry_point: str, timeout_sec: int
+        self,
+        staging_dir: Path,
+        *,
+        entry_point: str,
+        timeout_sec: int,
+        entry_args: list[str] | None = None,
+        env_overrides: dict[str, str] | None = None,
     ) -> SandboxResult:
         """Core execution: single container, three-phase via entrypoint.sh."""
         cfg = self.config
@@ -269,6 +283,8 @@ def _execute(
             staging_dir,
             entry_point=entry_point,
             container_name=container_name,
+            entry_args=entry_args,
+            env_overrides=env_overrides,
         )
 
         start = time.monotonic()
@@ -349,6 +365,8 @@ def _build_run_command(
         *,
         entry_point: str,
         container_name: str,
+        entry_args: list[str] | None = None,
+        env_overrides: dict[str, str] | None = None,
     ) -> list[str]:
         """Build the ``docker run`` command list.
 
@@ -453,9 +471,17 @@ def _user_flag() -> list[str]:
             else:
                 cmd.extend(["--gpus", "all"])
 
+        if env_overrides:
+            for name, value in sorted(env_overrides.items()):
+                if not value:
+                    continue
+                cmd.extend(["-e", f"{name}={value}"])
+
         # Image + entry point (passed as CMD arg to entrypoint.sh)
         cmd.append(cfg.image)
         cmd.append(entry_point)
+        if entry_args:
+            cmd.extend(entry_args)
 
         return cmd
 

diff --git a/researchclaw/experiment/sandbox.py b/researchclaw/experiment/sandbox.py
@@ -9,7 +9,7 @@
 import subprocess
 import time
 from dataclasses import dataclass
-from pathlib import Path
+from pathlib import Path, PurePosixPath, PureWindowsPath
 from typing import Protocol
 
 from researchclaw.config import SandboxConfig
@@ -27,8 +27,9 @@ def validate_entry_point(entry_point: str) -> str | None:
     if not entry_point or not entry_point.strip():
         return "Entry point is empty"
     ep = Path(entry_point)
-    # Check both native absolute and Unix-style absolute (for cross-platform safety)
-    if ep.is_absolute() or entry_point.startswith("/"):
+    posix_ep = PurePosixPath(entry_point)
+    windows_ep = PureWindowsPath(entry_point)
+    if ep.is_absolute() or posix_ep.is_absolute() or windows_ep.is_absolute():
         return f"Entry point must be a relative path, got: {entry_point}"
     if ".." in ep.parts:
         return f"Entry point must not contain '..': {entry_point}"
@@ -298,6 +299,8 @@ def run_project(
         *,
         entry_point: str = "main.py",
         timeout_sec: int = 300,
+        args: list[str] | None = None,
+        env_overrides: dict[str, str] | None = None,
     ) -> SandboxResult: ...
 
 
@@ -351,6 +354,8 @@ def run_project(
         *,
         entry_point: str = "main.py",
         timeout_sec: int = 300,
+        args: list[str] | None = None,
+        env_overrides: dict[str, str] | None = None,
     ) -> SandboxResult:
         """Run a multi-file experiment project in the sandbox.
 
@@ -410,12 +415,14 @@ def run_project(
             )
 
         start = time.monotonic()
-        command = self._build_command(entry)
+        command = self._build_command(entry, args=args)
         logger.debug("Running project sandbox command: %s (cwd=%s)", command, sandbox_project)
 
         result: SandboxResult
         try:
             env = {**os.environ, "PYTHONUNBUFFERED": "1"}
+            if env_overrides:
+                env.update(env_overrides)
             completed = subprocess.run(
                 command,
                 capture_output=True,
@@ -458,7 +465,12 @@ def _next_script_path(self) -> Path:
     def _write_script(script_path: Path, code: str) -> None:
         _ = script_path.write_text(code, encoding="utf-8")
 
-    def _build_command(self, script_path: Path) -> list[str]:
+    def _build_command(
+        self,
+        script_path: Path,
+        *,
+        args: list[str] | None = None,
+    ) -> list[str]:
         # Convert relative python_path to absolute WITHOUT resolving symlinks.
         # Using .resolve() would follow venv symlinks to the system Python binary,
         # which loses the venv context (site-packages like numpy become unavailable).
@@ -467,7 +479,10 @@ def _build_command(self, script_path: Path) -> list[str]:
         if not python_path.is_absolute() and python != "python":
             python_path = Path.cwd() / python_path
         # -u: unbuffered stdout/stderr so subprocess.run captures all output
-        return [str(python_path), "-u", str(script_path)]
+        command = [str(python_path), "-u", str(script_path)]
+        if args:
+            command.extend(args)
+        return command
 
     @staticmethod
     def _result_from_completed(

diff --git a/researchclaw/experiment/ssh_sandbox.py b/researchclaw/experiment/ssh_sandbox.py
@@ -71,6 +71,8 @@ def run_project(
         *,
         entry_point: str = "main.py",
         timeout_sec: int = 300,
+        args: list[str] | None = None,
+        env_overrides: dict[str, str] | None = None,
     ) -> SandboxResult:
         """Run a multi-file experiment project on the remote host."""
         self._run_counter += 1
@@ -119,7 +121,13 @@ def run_project(
                 metrics={},
             )
 
-        return self._execute(staging, entry_point=entry_point, timeout_sec=timeout_sec)
+        return self._execute(
+            staging,
+            entry_point=entry_point,
+            timeout_sec=timeout_sec,
+            entry_args=args,
+            env_overrides=env_overrides,
+        )
 
     # ------------------------------------------------------------------
     # Static helpers
@@ -158,7 +166,13 @@ def _inject_harness(target_dir: Path) -> None:
     # ------------------------------------------------------------------
 
     def _execute(
-        self, staging_dir: Path, *, entry_point: str, timeout_sec: int
+        self,
+        staging_dir: Path,
+        *,
+        entry_point: str,
+        timeout_sec: int,
+        entry_args: list[str] | None = None,
+        env_overrides: dict[str, str] | None = None,
     ) -> SandboxResult:
         """Core execution flow for remote experiments.
 
@@ -213,11 +227,17 @@ def _execute(
         # 4. Execute experiment
         if cfg.use_docker:
             exec_cmd = self._build_docker_exec_cmd(
-                remote_dir, entry_point=entry_point,
+                remote_dir,
+                entry_point=entry_point,
+                args=entry_args,
+                env_overrides=env_overrides,
             )
         else:
             exec_cmd = self._build_bare_exec_cmd(
-                remote_dir, entry_point=entry_point,
+                remote_dir,
+                entry_point=entry_point,
+                args=entry_args,
+                env_overrides=env_overrides,
             )
 
         start = time.monotonic()
@@ -242,13 +262,26 @@ def _execute(
         )
 
     def _build_bare_exec_cmd(
-        self, remote_dir: str, *, entry_point: str,
+        self,
+        remote_dir: str,
+        *,
+        entry_point: str,
+        args: list[str] | None = None,
+        env_overrides: dict[str, str] | None = None,
     ) -> str:
         """Build command to run Python directly on remote host (with basic sandboxing)."""
         cfg = self.config
         rd = shlex.quote(remote_dir)
         ep = shlex.quote(entry_point)
         py = shlex.quote(cfg.remote_python)
+        arg_text = " ".join(shlex.quote(arg) for arg in (args or []))
+        arg_suffix = f" {arg_text}" if arg_text else ""
+        env_parts = [
+            f"{name}={shlex.quote(value)}"
+            for name, value in sorted((env_overrides or {}).items())
+            if value
+        ]
+        env_prefix = (" ".join(env_parts) + " ") if env_parts else ""
 
         gpu_env = ""
         if cfg.gpu_ids:
@@ -264,17 +297,24 @@ def _build_bare_exec_cmd(
             f"if command -v unshare >/dev/null 2>&1; then "
             f"HOME={rd} "
             f"{gpu_env}"
-            f"unshare --net {py} -u {ep}; "
+            f"{env_prefix}"
+            f"unshare --net {py} -u {ep}{arg_suffix}; "
             f"else "
             f"echo 'WARNING: unshare not available, running without network isolation' >&2; "
             f"HOME={rd} "
             f"{gpu_env}"
-            f"{py} -u {ep}; "
+            f"{env_prefix}"
+            f"{py} -u {ep}{arg_suffix}; "
             f"fi"
         )
 
     def _build_docker_exec_cmd(
-        self, remote_dir: str, *, entry_point: str,
+        self,
+        remote_dir: str,
+        *,
+        entry_point: str,
+        args: list[str] | None = None,
+        env_overrides: dict[str, str] | None = None,
     ) -> str:
         """Build command to run inside a Docker container on the remote host.
 
@@ -307,8 +347,16 @@ def _build_docker_exec_cmd(
             # Try to pass all GPUs; fails gracefully if none available
             parts.extend(["--gpus", "all"])
 
+        if env_overrides:
+            for name, value in sorted(env_overrides.items()):
+                if not value:
+                    continue
+                parts.extend(["-e", shlex.quote(f"{name}={value}")])
+
         parts.append(shlex.quote(cfg.docker_image))
         parts.extend(["python3", "-u", shlex.quote(entry_point)])
+        if args:
+            parts.extend(shlex.quote(arg) for arg in args)
 
         return " ".join(parts)
 

diff --git a/tests/test_entry_point_validation.py b/tests/test_entry_point_validation.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import sys
 from pathlib import Path
 from unittest.mock import patch
 
@@ -105,7 +106,7 @@ class TestExperimentSandboxEntryPointValidation:
     def _make_sandbox(self, tmp_path: Path) -> ExperimentSandbox:
         from researchclaw.config import SandboxConfig
 
-        cfg = SandboxConfig()
+        cfg = SandboxConfig(python_path=sys.executable)
         return ExperimentSandbox(cfg, tmp_path / "work")
 
     def test_rejects_path_traversal(self, tmp_path: Path) -> None:
@@ -148,3 +149,35 @@ def test_rejects_absolute_path(self, tmp_path: Path) -> None:
     # for future copy mechanism changes; see
     # TestValidateEntryPointResolved.test_symlink_escape_rejected for
     # the unit-level proof that the function catches symlink escapes.
+
+    def test_run_project_passes_args_and_env_overrides(self, tmp_path: Path) -> None:
+        project = tmp_path / "proj"
+        project.mkdir()
+        (project / "main.py").write_text(
+            "\n".join(
+                [
+                    "from __future__ import annotations",
+                    "import argparse",
+                    "import os",
+                    "",
+                    "parser = argparse.ArgumentParser()",
+                    "parser.add_argument('--value', required=True)",
+                    "args = parser.parse_args()",
+                    "if os.environ.get('RC_TEST_FLAG') != 'ok':",
+                    "    raise SystemExit('missing env override')",
+                    "print(f'metric: {float(args.value):.1f}')",
+                ]
+            ),
+            encoding="utf-8",
+        )
+
+        sandbox = self._make_sandbox(tmp_path)
+        result = sandbox.run_project(
+            project,
+            args=["--value", "1.0"],
+            env_overrides={"RC_TEST_FLAG": "ok"},
+            timeout_sec=10,
+        )
+
+        assert result.returncode == 0
+        assert result.metrics.get("metric") == 1.0
diff --git a/tests/test_rc_docker_sandbox.py b/tests/test_rc_docker_sandbox.py
@@ -118,6 +118,22 @@ def test_build_run_command_specific_gpus(tmp_path: Path):
     assert "0,2" in cmd[gpu_idx + 1]
 
 
+def test_build_run_command_forwards_entry_args_and_env(tmp_path: Path):
+    cfg = DockerSandboxConfig(network_policy="none")
+    sandbox = DockerSandbox(cfg, tmp_path / "work")
+    cmd = sandbox._build_run_command(
+        tmp_path / "staging",
+        entry_point="main.py",
+        container_name="rc-test-args",
+        entry_args=["--foo", "bar"],
+        env_overrides={"B_ENV": "2", "A_ENV": "1"},
+    )
+    env_values = [cmd[i + 1] for i, token in enumerate(cmd) if token == "-e"]
+    assert "A_ENV=1" in env_values
+    assert "B_ENV=2" in env_values
+    assert cmd[-3:] == ["main.py", "--foo", "bar"]
+
+
 # ── Harness injection ─────────────────────────────────────────────────