diff --git a/.env.base.example b/.env.base.example deleted file mode 100644 index 37b6c36..0000000 --- a/.env.base.example +++ /dev/null @@ -1,6 +0,0 @@ -# Optional shared defaults. -# Copy to .env.base (ignored by git) if you want common vars loaded for every profile. - -OAS_MAX_TURNS=12 -SWEBENCH_TIMEOUT=300 - diff --git a/.env.profiles/daytona.env.example b/.env.profiles/daytona.env.example deleted file mode 100644 index 56fca19..0000000 --- a/.env.profiles/daytona.env.example +++ /dev/null @@ -1,5 +0,0 @@ -# Copy to .env.profiles/daytona.env and fill real values (do not commit). - -DAYTONA_API_KEY=your_daytona_api_key -DAYTONA_API_URL=https://app.daytona.io/api - diff --git a/.env.profiles/minimax-cn.env.example b/.env.profiles/minimax-cn.env.example deleted file mode 100644 index 60cbcbc..0000000 --- a/.env.profiles/minimax-cn.env.example +++ /dev/null @@ -1,6 +0,0 @@ -# Copy to .env.profiles/minimax-cn.env and fill real values (do not commit). - -OPENAI_API_KEY=your_minimax_cn_key -OPENAI_BASE_URL=https://api.minimaxi.com/v1 -OPENAI_MODEL=MiniMax-M2.5 - diff --git a/.env.profiles/minimax-global.env.example b/.env.profiles/minimax-global.env.example deleted file mode 100644 index ab94855..0000000 --- a/.env.profiles/minimax-global.env.example +++ /dev/null @@ -1,6 +0,0 @@ -# Copy to .env.profiles/minimax-global.env and fill real values (do not commit). - -OPENAI_API_KEY=your_minimax_global_key -OPENAI_BASE_URL=https://api.minimax.chat/v1 -OPENAI_MODEL=MiniMax-M2.5 - diff --git a/.gitignore b/.gitignore index ad39276..6b9bb37 100644 --- a/.gitignore +++ b/.gitignore @@ -10,9 +10,6 @@ dist/ .env .env.local .env.*.local -.env.base -.env.profiles/*.env -!.env.profiles/*.env.example # IDE .vscode/ diff --git a/benchmark/terminalbench/open_agent_sdk_harbor/agent.py b/benchmark/terminalbench/open_agent_sdk_harbor/agent.py index 62052e7..a37fd50 100644 --- a/benchmark/terminalbench/open_agent_sdk_harbor/agent.py +++ b/benchmark/terminalbench/open_agent_sdk_harbor/agent.py @@ -96,7 +96,7 @@ def create_run_agent_commands(self, instruction: str) -> list[ExecInput]: # - Keep --no-persist only when both transcript and trajectory export are off. # - While the CLI canary may lag behind local code, keep best-effort sync # from /root/.open-agent/sessions to /logs/agent/open-agent-transcript. - cli_flags = f"--model {model} --output-format json" + cli_flags = f"--model {model} --output-format json --cleanup-background never" save_trajectory = os.environ.get("OAS_HARBOR_SAVE_TRAJECTORY") == "1" save_transcript = os.environ.get("OAS_HARBOR_SAVE_TRANSCRIPT", "1") == "1" if save_trajectory: diff --git a/docs/workflows/terminal-bench-harbor-runbook.md b/docs/workflows/terminal-bench-harbor-runbook.md index 66899ce..8d948e8 100644 --- a/docs/workflows/terminal-bench-harbor-runbook.md +++ b/docs/workflows/terminal-bench-harbor-runbook.md @@ -18,7 +18,7 @@ ln -sf "$(pwd)/benchmark/terminalbench/open_agent_sdk_harbor/agent.py" \ ## 2. Load Environment Variables -Use the repository `.env` as the source of truth: +Use the repository `.env` as the single source of truth: ```bash set -a @@ -33,6 +33,13 @@ Required for MiniMax Anthropic-compatible endpoint: If these are empty, `command-0` fails quickly with invalid URL/provider errors. +Quick sanity check: + +```bash +echo "ANTHROPIC_API_KEY length=${#ANTHROPIC_API_KEY}" +echo "ANTHROPIC_BASE_URL=$ANTHROPIC_BASE_URL" +``` + ## 3. Proxy Handling (Important) Use proxy settings for host tooling only if needed, but run Harbor process with proxy vars removed to avoid container networking issues with local `127.0.0.1` proxies. diff --git a/packages/core/src/tools/bash-output.ts b/packages/core/src/tools/bash-output.ts index 10954d5..32a5190 100644 --- a/packages/core/src/tools/bash-output.ts +++ b/packages/core/src/tools/bash-output.ts @@ -3,10 +3,17 @@ */ import type { Tool, ToolContext, JSONSchema } from '../types/tools'; +import { readFileSync } from 'fs'; import { backgroundProcesses } from './bash'; +const MAX_CAPTURE_CHARS = 200_000; const TRUNCATED_NOTICE = '\n[Output truncated to avoid excessive memory usage]'; +function truncateOutput(value: string): { value: string; truncated: boolean } { + if (value.length <= MAX_CAPTURE_CHARS) return { value, truncated: false }; + return { value: value.slice(0, MAX_CAPTURE_CHARS), truncated: true }; +} + export interface BashOutputInput { shellId: string; } @@ -57,6 +64,28 @@ export class BashOutputTool implements Tool { // Check if process is still running const running = process.exitCode === null; + // For detached background processes, refresh output from redirected log files. + if (process.stdoutPath) { + try { + const content = readFileSync(process.stdoutPath, 'utf8'); + const next = truncateOutput(content); + process.stdout = next.value; + process.stdoutTruncated = next.truncated; + } catch { + // Ignore missing/temporary read failures. + } + } + if (process.stderrPath) { + try { + const content = readFileSync(process.stderrPath, 'utf8'); + const next = truncateOutput(content); + process.stderr = next.value; + process.stderrTruncated = next.truncated; + } catch { + // Ignore missing/temporary read failures. + } + } + return { stdout: process.stdout + (process.stdoutTruncated ? TRUNCATED_NOTICE : ''), stderr: process.stderr + (process.stderrTruncated ? TRUNCATED_NOTICE : ''), diff --git a/packages/core/src/tools/bash.ts b/packages/core/src/tools/bash.ts index fa73a2e..6aac422 100644 --- a/packages/core/src/tools/bash.ts +++ b/packages/core/src/tools/bash.ts @@ -3,6 +3,9 @@ */ import { spawn, ChildProcess } from 'child_process'; +import { mkdirSync, readFileSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; import type { Tool, ToolContext, JSONSchema } from '../types/tools'; export interface BackgroundProcess { @@ -10,10 +13,13 @@ export interface BackgroundProcess { startTime: number; stdout: string; stderr: string; + stdoutPath?: string; + stderrPath?: string; stdoutTruncated: boolean; stderrTruncated: boolean; exitCode: number | null; process: ChildProcess; + detached: boolean; } export interface BashInput { @@ -61,6 +67,7 @@ export const backgroundProcesses = new Map(); // Cap captured output to avoid OOM when commands print large streams. const MAX_CAPTURE_CHARS = 200_000; const TRUNCATED_NOTICE = '\n[Output truncated to avoid excessive memory usage]'; +const BG_LOG_DIR = join(tmpdir(), 'open-agent-sdk-bg'); function appendCapped( current: string, @@ -107,11 +114,33 @@ export class BashTool implements Tool { return new Promise((resolve) => { const shell = process.platform === 'win32' ? 'cmd.exe' : '/bin/sh'; const shellFlag = process.platform === 'win32' ? '/c' : '-c'; + let shellId: string | undefined; + let stdoutPath: string | undefined; + let stderrPath: string | undefined; + let detachedBackground = false; - const child = spawn(shell, [shellFlag, command], { + if (run_in_background) { + shellId = `shell_${++backgroundProcessId}`; + if (process.platform !== 'win32') { + detachedBackground = true; + mkdirSync(BG_LOG_DIR, { recursive: true }); + stdoutPath = join(BG_LOG_DIR, `${shellId}.stdout.log`); + stderrPath = join(BG_LOG_DIR, `${shellId}.stderr.log`); + } + } + + const normalizedCommand = run_in_background + ? command.replace(/\s*&\s*$/, '').trim() + : command; + const commandToRun = run_in_background && stdoutPath && stderrPath + ? `( ${normalizedCommand} ) >>"${stdoutPath}" 2>>"${stderrPath}"` + : normalizedCommand; + + const child = spawn(shell, [shellFlag, commandToRun], { cwd: context.cwd, env: { ...process.env, ...context.env }, - stdio: ['ignore', 'pipe', 'pipe'], + stdio: run_in_background && detachedBackground ? ['ignore', 'ignore', 'ignore'] : ['ignore', 'pipe', 'pipe'], + detached: run_in_background ? detachedBackground : false, }); // Set up abort handler @@ -128,35 +157,59 @@ export class BashTool implements Tool { // Handle background execution if (run_in_background) { - const shellId = `shell_${++backgroundProcessId}`; const bgProcess: BackgroundProcess = { pid: child.pid!, startTime: Date.now(), stdout: '', stderr: '', + stdoutPath, + stderrPath, stdoutTruncated: false, stderrTruncated: false, exitCode: null, process: child, + detached: detachedBackground, }; - backgroundProcesses.set(shellId, bgProcess); - - // Capture stdout/stderr - child.stdout?.on('data', (data) => { - const next = appendCapped(bgProcess.stdout, data.toString(), MAX_CAPTURE_CHARS); - bgProcess.stdout = next.value; - if (next.truncated) bgProcess.stdoutTruncated = true; - }); + backgroundProcesses.set(shellId!, bgProcess); + + if (!detachedBackground) { + // Capture stdout/stderr in-process when not detached. + child.stdout?.on('data', (data) => { + const next = appendCapped(bgProcess.stdout, data.toString(), MAX_CAPTURE_CHARS); + bgProcess.stdout = next.value; + if (next.truncated) bgProcess.stdoutTruncated = true; + }); - child.stderr?.on('data', (data) => { - const next = appendCapped(bgProcess.stderr, data.toString(), MAX_CAPTURE_CHARS); - bgProcess.stderr = next.value; - if (next.truncated) bgProcess.stderrTruncated = true; - }); + child.stderr?.on('data', (data) => { + const next = appendCapped(bgProcess.stderr, data.toString(), MAX_CAPTURE_CHARS); + bgProcess.stderr = next.value; + if (next.truncated) bgProcess.stderrTruncated = true; + }); + } // Set exit code when process exits (don't delete from map) child.on('exit', (code) => { bgProcess.exitCode = code ?? -1; + if (bgProcess.stdoutPath) { + try { + const content = readFileSync(bgProcess.stdoutPath, 'utf8'); + const next = appendCapped('', content, MAX_CAPTURE_CHARS); + bgProcess.stdout = next.value; + bgProcess.stdoutTruncated = next.truncated; + } catch { + // Ignore read errors for best-effort output capture. + } + } + if (bgProcess.stderrPath) { + try { + const content = readFileSync(bgProcess.stderrPath, 'utf8'); + const next = appendCapped('', content, MAX_CAPTURE_CHARS); + bgProcess.stderr = next.value; + bgProcess.stderrTruncated = next.truncated; + } catch { + // Ignore read errors for best-effort output capture. + } + } }); // Prevent background child handles from keeping the process alive. @@ -166,9 +219,9 @@ export class BashTool implements Tool { // Don't wait for completion resolve({ - output: `Command running in background with ID: ${shellId}`, + output: `Command running in background with ID: ${shellId!}`, exitCode: 0, - shellId, + shellId: shellId!, }); return; @@ -295,11 +348,27 @@ export async function cleanupBackgroundProcesses( done(); }); - bgProcess.process.kill('SIGTERM'); + try { + if (bgProcess.detached && process.platform !== 'win32') { + process.kill(-bgProcess.pid, 'SIGTERM'); + } else { + bgProcess.process.kill('SIGTERM'); + } + } catch { + // Ignore errors if process already exited. + } const forceKillTimer = setTimeout(() => { if (bgProcess.exitCode === null) { - bgProcess.process.kill('SIGKILL'); + try { + if (bgProcess.detached && process.platform !== 'win32') { + process.kill(-bgProcess.pid, 'SIGKILL'); + } else { + bgProcess.process.kill('SIGKILL'); + } + } catch { + // Ignore errors if process already exited. + } } done(); }, Math.max(1, forceKillAfterMs)); diff --git a/packages/core/src/tools/kill-bash.ts b/packages/core/src/tools/kill-bash.ts index a7f1512..f2ce168 100644 --- a/packages/core/src/tools/kill-bash.ts +++ b/packages/core/src/tools/kill-bash.ts @@ -55,8 +55,16 @@ export class KillBashTool implements Tool { }; } - // Send SIGTERM - bgProcess.process.kill('SIGTERM'); + // Send SIGTERM (target process group for detached background jobs) + try { + if (bgProcess.detached && process.platform !== 'win32') { + process.kill(-bgProcess.pid, 'SIGTERM'); + } else { + bgProcess.process.kill('SIGTERM'); + } + } catch { + // Ignore if already exited between checks. + } // Wait up to 5 seconds for graceful exit, then SIGKILL return new Promise((resolve) => { @@ -74,7 +82,15 @@ export class KillBashTool implements Tool { const forceKillTimeout = setTimeout(() => { clearInterval(checkInterval); if (bgProcess.exitCode === null) { - bgProcess.process.kill('SIGKILL'); + try { + if (bgProcess.detached && process.platform !== 'win32') { + process.kill(-bgProcess.pid, 'SIGKILL'); + } else { + bgProcess.process.kill('SIGKILL'); + } + } catch { + // Ignore if already exited. + } resolve({ success: true, message: `Process ${shellId} force-killed with SIGKILL`, diff --git a/packages/core/tests/tools/bash-enhanced.test.ts b/packages/core/tests/tools/bash-enhanced.test.ts index 7fd7d24..96a99dd 100644 --- a/packages/core/tests/tools/bash-enhanced.test.ts +++ b/packages/core/tests/tools/bash-enhanced.test.ts @@ -167,4 +167,19 @@ describe('BashTool - Enhanced Background Process Tracking', () => { const processAfterCleanup = getBackgroundProcess(shellId); expect(processAfterCleanup?.exitCode).not.toBeNull(); }); + + test('should handle commands that already include trailing & in background mode', async () => { + const result = await tool.handler( + { + command: 'sleep 1 &', + run_in_background: true, + }, + context + ); + + const shellId = result.shellId!; + const process = getBackgroundProcess(shellId); + expect(process).toBeDefined(); + expect(process?.exitCode).toBeNull(); + }); }); diff --git a/scripts/env/use-profile.sh b/scripts/env/use-profile.sh deleted file mode 100755 index 5f8843f..0000000 --- a/scripts/env/use-profile.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash -# Usage: -# source scripts/env/use-profile.sh minimax-global -# -# This script is intended to be sourced, not executed. -# It loads .env.base (if present) and .env.profiles/.env. - -set -euo pipefail - -if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then - echo "Use this script with 'source', e.g.:" - echo " source scripts/env/use-profile.sh minimax-global" - exit 1 -fi - -if [[ $# -ne 1 ]]; then - echo "Usage: source scripts/env/use-profile.sh " - return 1 -fi - -PROFILE_NAME="$1" -REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -BASE_FILE="${REPO_ROOT}/.env.base" -PROFILE_FILE="${REPO_ROOT}/.env.profiles/${PROFILE_NAME}.env" - -if [[ ! -f "${PROFILE_FILE}" ]]; then - echo "Profile file not found: ${PROFILE_FILE}" - echo "Available examples:" - ls -1 "${REPO_ROOT}/.env.profiles/"*.env.example 2>/dev/null || true - return 1 -fi - -set -a -if [[ -f "${BASE_FILE}" ]]; then - # shellcheck disable=SC1090 - source "${BASE_FILE}" -fi -# shellcheck disable=SC1090 -source "${PROFILE_FILE}" -set +a - -echo "Loaded profile: ${PROFILE_NAME}"