Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions researchclaw/docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
set -e

WORKSPACE="/workspace"
ENTRY_POINT="${1:-main.py}"
ENTRY_POINT="${RC_ENTRY_POINT:-${1:-main.py}}"
if [ "$#" -gt 0 ]; then
shift
fi

# ----------------------------------------------------------------
# Phase 0: Install additional pip packages
Expand Down Expand Up @@ -51,4 +54,4 @@ fi
# Phase 2: Run experiment
# ----------------------------------------------------------------
echo "[RC] Phase 2: Running experiment ($ENTRY_POINT)..."
exec python3 -u "$WORKSPACE/$ENTRY_POINT"
exec python3 -u "$WORKSPACE/$ENTRY_POINT" "$@"
2 changes: 2 additions & 0 deletions researchclaw/experiment/colab_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ def run_project(
*,
entry_point: str = "main.py",
timeout_sec: int = 300,
args: list[str] | None = None,
env_overrides: dict[str, str] | None = None,
) -> SandboxResult:
# BUG-DA8-07: Validate entry_point (path traversal, etc.) like other backends
from researchclaw.experiment.sandbox import validate_entry_point
Expand Down
30 changes: 28 additions & 2 deletions researchclaw/experiment/docker_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ def run_project(
*,
entry_point: str = "main.py",
timeout_sec: int = 300,
args: list[str] | None = None,
env_overrides: dict[str, str] | None = None,
) -> SandboxResult:
"""Run a multi-file experiment project inside a container."""
self._run_counter += 1
Expand Down Expand Up @@ -189,7 +191,13 @@ def run_project(
metrics={},
)

return self._execute(staging, entry_point=entry_point, timeout_sec=timeout_sec)
return self._execute(
staging,
entry_point=entry_point,
timeout_sec=timeout_sec,
entry_args=args,
env_overrides=env_overrides,
)

# ------------------------------------------------------------------
# Static helpers
Expand Down Expand Up @@ -254,7 +262,13 @@ def _inject_harness(target_dir: Path) -> None:
# ------------------------------------------------------------------

def _execute(
self, staging_dir: Path, *, entry_point: str, timeout_sec: int
self,
staging_dir: Path,
*,
entry_point: str,
timeout_sec: int,
entry_args: list[str] | None = None,
env_overrides: dict[str, str] | None = None,
) -> SandboxResult:
"""Core execution: single container, three-phase via entrypoint.sh."""
cfg = self.config
Expand All @@ -269,6 +283,8 @@ def _execute(
staging_dir,
entry_point=entry_point,
container_name=container_name,
entry_args=entry_args,
env_overrides=env_overrides,
)

start = time.monotonic()
Expand Down Expand Up @@ -349,6 +365,8 @@ def _build_run_command(
*,
entry_point: str,
container_name: str,
entry_args: list[str] | None = None,
env_overrides: dict[str, str] | None = None,
) -> list[str]:
"""Build the ``docker run`` command list.

Expand Down Expand Up @@ -453,9 +471,17 @@ def _user_flag() -> list[str]:
else:
cmd.extend(["--gpus", "all"])

if env_overrides:
for name, value in sorted(env_overrides.items()):
if not value:
continue
cmd.extend(["-e", f"{name}={value}"])

# Image + entry point (passed as CMD arg to entrypoint.sh)
cmd.append(cfg.image)
cmd.append(entry_point)
if entry_args:
cmd.extend(entry_args)

return cmd

Expand Down
27 changes: 21 additions & 6 deletions researchclaw/experiment/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import subprocess
import time
from dataclasses import dataclass
from pathlib import Path
from pathlib import Path, PurePosixPath, PureWindowsPath
from typing import Protocol

from researchclaw.config import SandboxConfig
Expand All @@ -27,8 +27,9 @@ def validate_entry_point(entry_point: str) -> str | None:
if not entry_point or not entry_point.strip():
return "Entry point is empty"
ep = Path(entry_point)
# Check both native absolute and Unix-style absolute (for cross-platform safety)
if ep.is_absolute() or entry_point.startswith("/"):
posix_ep = PurePosixPath(entry_point)
windows_ep = PureWindowsPath(entry_point)
if ep.is_absolute() or posix_ep.is_absolute() or windows_ep.is_absolute():
return f"Entry point must be a relative path, got: {entry_point}"
if ".." in ep.parts:
return f"Entry point must not contain '..': {entry_point}"
Expand Down Expand Up @@ -298,6 +299,8 @@ def run_project(
*,
entry_point: str = "main.py",
timeout_sec: int = 300,
args: list[str] | None = None,
env_overrides: dict[str, str] | None = None,
) -> SandboxResult: ...


Expand Down Expand Up @@ -351,6 +354,8 @@ def run_project(
*,
entry_point: str = "main.py",
timeout_sec: int = 300,
args: list[str] | None = None,
env_overrides: dict[str, str] | None = None,
) -> SandboxResult:
"""Run a multi-file experiment project in the sandbox.

Expand Down Expand Up @@ -410,12 +415,14 @@ def run_project(
)

start = time.monotonic()
command = self._build_command(entry)
command = self._build_command(entry, args=args)
logger.debug("Running project sandbox command: %s (cwd=%s)", command, sandbox_project)

result: SandboxResult
try:
env = {**os.environ, "PYTHONUNBUFFERED": "1"}
if env_overrides:
env.update(env_overrides)
completed = subprocess.run(
command,
capture_output=True,
Expand Down Expand Up @@ -458,7 +465,12 @@ def _next_script_path(self) -> Path:
def _write_script(script_path: Path, code: str) -> None:
_ = script_path.write_text(code, encoding="utf-8")

def _build_command(self, script_path: Path) -> list[str]:
def _build_command(
self,
script_path: Path,
*,
args: list[str] | None = None,
) -> list[str]:
# Convert relative python_path to absolute WITHOUT resolving symlinks.
# Using .resolve() would follow venv symlinks to the system Python binary,
# which loses the venv context (site-packages like numpy become unavailable).
Expand All @@ -467,7 +479,10 @@ def _build_command(self, script_path: Path) -> list[str]:
if not python_path.is_absolute() and python != "python":
python_path = Path.cwd() / python_path
# -u: unbuffered stdout/stderr so subprocess.run captures all output
return [str(python_path), "-u", str(script_path)]
command = [str(python_path), "-u", str(script_path)]
if args:
command.extend(args)
return command

@staticmethod
def _result_from_completed(
Expand Down
64 changes: 56 additions & 8 deletions researchclaw/experiment/ssh_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def run_project(
*,
entry_point: str = "main.py",
timeout_sec: int = 300,
args: list[str] | None = None,
env_overrides: dict[str, str] | None = None,
) -> SandboxResult:
"""Run a multi-file experiment project on the remote host."""
self._run_counter += 1
Expand Down Expand Up @@ -119,7 +121,13 @@ def run_project(
metrics={},
)

return self._execute(staging, entry_point=entry_point, timeout_sec=timeout_sec)
return self._execute(
staging,
entry_point=entry_point,
timeout_sec=timeout_sec,
entry_args=args,
env_overrides=env_overrides,
)

# ------------------------------------------------------------------
# Static helpers
Expand Down Expand Up @@ -158,7 +166,13 @@ def _inject_harness(target_dir: Path) -> None:
# ------------------------------------------------------------------

def _execute(
self, staging_dir: Path, *, entry_point: str, timeout_sec: int
self,
staging_dir: Path,
*,
entry_point: str,
timeout_sec: int,
entry_args: list[str] | None = None,
env_overrides: dict[str, str] | None = None,
) -> SandboxResult:
"""Core execution flow for remote experiments.

Expand Down Expand Up @@ -213,11 +227,17 @@ def _execute(
# 4. Execute experiment
if cfg.use_docker:
exec_cmd = self._build_docker_exec_cmd(
remote_dir, entry_point=entry_point,
remote_dir,
entry_point=entry_point,
args=entry_args,
env_overrides=env_overrides,
)
else:
exec_cmd = self._build_bare_exec_cmd(
remote_dir, entry_point=entry_point,
remote_dir,
entry_point=entry_point,
args=entry_args,
env_overrides=env_overrides,
)

start = time.monotonic()
Expand All @@ -242,13 +262,26 @@ def _execute(
)

def _build_bare_exec_cmd(
self, remote_dir: str, *, entry_point: str,
self,
remote_dir: str,
*,
entry_point: str,
args: list[str] | None = None,
env_overrides: dict[str, str] | None = None,
) -> str:
"""Build command to run Python directly on remote host (with basic sandboxing)."""
cfg = self.config
rd = shlex.quote(remote_dir)
ep = shlex.quote(entry_point)
py = shlex.quote(cfg.remote_python)
arg_text = " ".join(shlex.quote(arg) for arg in (args or []))
arg_suffix = f" {arg_text}" if arg_text else ""
env_parts = [
f"{name}={shlex.quote(value)}"
for name, value in sorted((env_overrides or {}).items())
if value
]
env_prefix = (" ".join(env_parts) + " ") if env_parts else ""

gpu_env = ""
if cfg.gpu_ids:
Expand All @@ -264,17 +297,24 @@ def _build_bare_exec_cmd(
f"if command -v unshare >/dev/null 2>&1; then "
f"HOME={rd} "
f"{gpu_env}"
f"unshare --net {py} -u {ep}; "
f"{env_prefix}"
f"unshare --net {py} -u {ep}{arg_suffix}; "
f"else "
f"echo 'WARNING: unshare not available, running without network isolation' >&2; "
f"HOME={rd} "
f"{gpu_env}"
f"{py} -u {ep}; "
f"{env_prefix}"
f"{py} -u {ep}{arg_suffix}; "
f"fi"
)

def _build_docker_exec_cmd(
self, remote_dir: str, *, entry_point: str,
self,
remote_dir: str,
*,
entry_point: str,
args: list[str] | None = None,
env_overrides: dict[str, str] | None = None,
) -> str:
"""Build command to run inside a Docker container on the remote host.

Expand Down Expand Up @@ -307,8 +347,16 @@ def _build_docker_exec_cmd(
# Try to pass all GPUs; fails gracefully if none available
parts.extend(["--gpus", "all"])

if env_overrides:
for name, value in sorted(env_overrides.items()):
if not value:
continue
parts.extend(["-e", shlex.quote(f"{name}={value}")])

parts.append(shlex.quote(cfg.docker_image))
parts.extend(["python3", "-u", shlex.quote(entry_point)])
if args:
parts.extend(shlex.quote(arg) for arg in args)

return " ".join(parts)

Expand Down
35 changes: 34 additions & 1 deletion tests/test_entry_point_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import sys
from pathlib import Path
from unittest.mock import patch

Expand Down Expand Up @@ -105,7 +106,7 @@ class TestExperimentSandboxEntryPointValidation:
def _make_sandbox(self, tmp_path: Path) -> ExperimentSandbox:
from researchclaw.config import SandboxConfig

cfg = SandboxConfig()
cfg = SandboxConfig(python_path=sys.executable)
return ExperimentSandbox(cfg, tmp_path / "work")

def test_rejects_path_traversal(self, tmp_path: Path) -> None:
Expand Down Expand Up @@ -148,3 +149,35 @@ def test_rejects_absolute_path(self, tmp_path: Path) -> None:
# for future copy mechanism changes; see
# TestValidateEntryPointResolved.test_symlink_escape_rejected for
# the unit-level proof that the function catches symlink escapes.

def test_run_project_passes_args_and_env_overrides(self, tmp_path: Path) -> None:
project = tmp_path / "proj"
project.mkdir()
(project / "main.py").write_text(
"\n".join(
[
"from __future__ import annotations",
"import argparse",
"import os",
"",
"parser = argparse.ArgumentParser()",
"parser.add_argument('--value', required=True)",
"args = parser.parse_args()",
"if os.environ.get('RC_TEST_FLAG') != 'ok':",
" raise SystemExit('missing env override')",
"print(f'metric: {float(args.value):.1f}')",
]
),
encoding="utf-8",
)

sandbox = self._make_sandbox(tmp_path)
result = sandbox.run_project(
project,
args=["--value", "1.0"],
env_overrides={"RC_TEST_FLAG": "ok"},
timeout_sec=10,
)

assert result.returncode == 0
assert result.metrics.get("metric") == 1.0
16 changes: 16 additions & 0 deletions tests/test_rc_docker_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,22 @@ def test_build_run_command_specific_gpus(tmp_path: Path):
assert "0,2" in cmd[gpu_idx + 1]


def test_build_run_command_forwards_entry_args_and_env(tmp_path: Path):
cfg = DockerSandboxConfig(network_policy="none")
sandbox = DockerSandbox(cfg, tmp_path / "work")
cmd = sandbox._build_run_command(
tmp_path / "staging",
entry_point="main.py",
container_name="rc-test-args",
entry_args=["--foo", "bar"],
env_overrides={"B_ENV": "2", "A_ENV": "1"},
)
env_values = [cmd[i + 1] for i, token in enumerate(cmd) if token == "-e"]
assert "A_ENV=1" in env_values
assert "B_ENV=2" in env_values
assert cmd[-3:] == ["main.py", "--foo", "bar"]


# ── Harness injection ─────────────────────────────────────────────────


Expand Down
Loading