diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index edb92f0..6e29010 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -16,8 +16,8 @@ jobs: contents: read env: CARGO_NET_GIT_FETCH_WITH_CLI: true - BT_EVAL_REQUIRED_RUNTIMES: node,bun - BT_EVAL_FIXTURE_RUNTIMES: node,bun + BT_EVAL_REQUIRED_RUNTIMES: node,bun,deno + BT_EVAL_FIXTURE_RUNTIMES: node,bun,deno steps: - uses: actions/checkout@v4 - name: Configure git auth for private dependencies @@ -35,6 +35,9 @@ jobs: with: node-version: ${{ matrix.node-version }} - uses: oven-sh/setup-bun@v2 + - uses: denoland/setup-deno@v2 + with: + deno-version: v2.x - name: Enable pnpm run: | corepack enable diff --git a/.gitignore b/.gitignore index 5378d73..cb42a9b 100644 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,6 @@ Thumbs.db # Node node_modules/ tests/evals/js/eval-bun/test-data.txt +.bt/ __pycache__ diff --git a/Cargo.lock b/Cargo.lock index 1c00b26..8cd98a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -968,9 +968,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "mio" @@ -1367,9 +1367,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "scopeguard" @@ -1815,9 +1815,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" [[package]] name = "unicode-segmentation" @@ -2393,6 +2393,6 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" +checksum = "4de98dfa5d5b7fef4ee834d0073d560c9ca7b6c46a71d058c48db7960f8cfaf7" diff --git a/eval-todo.md b/eval-todo.md index edcd9e3..e464ba2 100644 --- a/eval-todo.md +++ b/eval-todo.md @@ -27,26 +27,26 @@ Legend: - `partial`: implemented but behavior differs - `todo`: missing -| Feature / Flag | JS CLI | PY CLI | `bt` | Notes | -| --------------------------- | ---------------: | ---------------: | -----------------------------------------: | ----------------------------------------------------------------------------------- | -| Run eval files | yes | yes | `done` | Single-language per invocation currently enforced. | -| Local/no-upload mode | `--no-send-logs` | `--no-send-logs` | `done` (`--local`, alias `--no-send-logs`) | | -| Global auth/env passthrough | yes | yes | `done` | Via base args/env (`BRAINTRUST_API_KEY`, `BRAINTRUST_API_URL`, project). | -| Progress rendering | yes | yes | `partial` | `bt` consumes local SSE and renders Rust TUI/progress, but not full SDK parity yet. | -| `--list` (discover only) | yes | yes | `todo` | | -| `--filter` | yes | yes | `todo` | | -| `--jsonl` summaries | yes | yes | `todo` | | -| `--terminate-on-failure` | yes | yes | `todo` | | -| `--watch` | yes | yes | `todo` | | -| `--verbose` | yes | parent flag | `todo` | | -| `--env-file` | yes | yes | `todo` | | -| `--dev` remote eval server | yes | yes | `todo` | Important for `test_remote_evals.py` parity. | -| `--dev-host` | yes | yes | `todo` | | -| `--dev-port` | yes | yes | `todo` | | -| `--dev-org-name` | yes | yes | `todo` | | -| `--num-workers` | n/a | yes | `todo` | Python-specific concurrency control. | -| Directory input expansion | yes | yes | `todo` | Today `bt` expects explicit files/extensions. | -| Mixed runtime selection | n/a | n/a | `partial` | Current `--runner` plus env vars; per-language runner matrix deferred. | +| Feature / Flag | JS CLI | PY CLI | `bt` | Notes | +| --------------------------- | ---------------: | ---------------: | -----------------------------------------: | -------------------------------------------------------------------------------------------------------- | +| Run eval files | yes | yes | `done` | Single-language per invocation currently enforced. | +| Local/no-upload mode | `--no-send-logs` | `--no-send-logs` | `done` (`--local`, alias `--no-send-logs`) | | +| Global auth/env passthrough | yes | yes | `done` | Via base args/env (`BRAINTRUST_API_KEY`, `BRAINTRUST_API_URL`, project). | +| Progress rendering | yes | yes | `partial` | `bt` consumes local SSE and renders Rust TUI/progress, but not full SDK parity yet. | +| `--list` (discover only) | yes | yes | `todo` | | +| `--filter` | yes | yes | `todo` | | +| `--jsonl` summaries | yes | yes | `todo` | | +| `--terminate-on-failure` | yes | yes | `todo` | | +| `--watch` | yes | yes | `partial` | Poll-based watcher with Node/Bun dependency hooks, Deno graph collection, and static JS import fallback. | +| `--verbose` | yes | parent flag | `todo` | | +| `--env-file` | yes | yes | `todo` | | +| `--dev` remote eval server | yes | yes | `todo` | Important for `test_remote_evals.py` parity. | +| `--dev-host` | yes | yes | `todo` | | +| `--dev-port` | yes | yes | `todo` | | +| `--dev-org-name` | yes | yes | `todo` | | +| `--num-workers` | n/a | yes | `todo` | Python-specific concurrency control. | +| Directory input expansion | yes | yes | `todo` | Today `bt` expects explicit files/extensions. | +| Mixed runtime selection | n/a | n/a | `partial` | Current `--runner` plus env vars; per-language runner matrix deferred. | ## Braintrust Test Callsite Inventory diff --git a/scripts/eval-runner.py b/scripts/eval-runner.py index 97d41e4..4d42214 100755 --- a/scripts/eval-runner.py +++ b/scripts/eval-runner.py @@ -33,6 +33,14 @@ INCLUDE = ["**/eval_*.py", "**/*.eval.py"] EXCLUDE = ["**/site-packages/**", "**/__pycache__/**"] +WATCHABLE_PYTHON_EXTENSIONS = {".py"} +WATCH_EXCLUDE_SEGMENTS = ( + "/site-packages/", + "/dist-packages/", + "/__pycache__/", + "/.venv/", + "/venv/", +) @dataclass @@ -155,6 +163,41 @@ def collect_files(input_path: str) -> list[str]: return [input_path] +def is_watchable_dependency(path_input: str, cwd: str) -> bool: + path = os.path.abspath(path_input) + normalized = path.replace("\\", "/") + if not os.path.isfile(path): + return False + if os.path.splitext(path)[1].lower() not in WATCHABLE_PYTHON_EXTENSIONS: + return False + if any(segment in normalized for segment in WATCH_EXCLUDE_SEGMENTS): + return False + + try: + common = os.path.commonpath([path, cwd]) + except ValueError: + return False + return common == cwd + + +def collect_dependency_files(cwd: str, input_files: list[str]) -> list[str]: + dependencies: set[str] = set() + for module in list(sys.modules.values()): + module_file = getattr(module, "__file__", None) + if not module_file: + continue + candidate = module_file[:-1] if module_file.endswith(".pyc") else module_file + if is_watchable_dependency(candidate, cwd): + dependencies.add(os.path.abspath(candidate)) + + for file_path in input_files: + path = os.path.abspath(file_path) + if is_watchable_dependency(path, cwd): + dependencies.add(path) + + return sorted(dependencies) + + def resolve_module_info(in_file: str) -> tuple[str, list[str]]: in_file = os.path.abspath(in_file) module_dir = os.path.dirname(in_file) @@ -356,9 +399,11 @@ def main(argv: list[str] | None = None) -> int: login(api_key=args.api_key, org_name=args.org_name, app_url=args.app_url) sse = create_sse_writer() + cwd = os.path.abspath(os.getcwd()) try: success = asyncio.run(run_once(files, local, sse)) if sse: + sse.send("dependencies", {"files": collect_dependency_files(cwd, files)}) sse.send("done", {"success": success}) return 0 if success else 1 finally: diff --git a/scripts/eval-runner.ts b/scripts/eval-runner.ts index 4c382c0..294e1d8 100644 --- a/scripts/eval-runner.ts +++ b/scripts/eval-runner.ts @@ -1,7 +1,6 @@ -import { createRequire } from "module"; -import net from "net"; -import path from "path"; -import { pathToFileURL } from "url"; +import { createRequire } from "node:module"; +import path from "node:path"; +import { fileURLToPath, pathToFileURL } from "node:url"; type EvaluatorEntry = { evaluator: { @@ -105,6 +104,408 @@ function normalizeFiles(files: string[]): string[] { return files.map((file) => path.resolve(process.cwd(), file)); } +const runtimeRequire = createRequire( + process.argv[1] ?? path.join(process.cwd(), "package.json"), +); +const fsMutable = runtimeRequire("node:fs") as typeof import("node:fs"); +const moduleMutable = (() => { + try { + return runtimeRequire("node:module") as Record; + } catch { + return {}; + } +})(); + +type NetModule = { + createConnection: (options: Record) => { + writable: boolean; + end: () => void; + setNoDelay: (value?: boolean) => void; + on: (event: string, listener: (...args: unknown[]) => void) => void; + write: (data: string) => void; + }; +}; + +const dependencyFiles = new Set(); +const DEPENDENCY_EXTENSIONS = new Set([ + ".ts", + ".tsx", + ".js", + ".jsx", + ".mjs", + ".cjs", + ".mts", + ".cts", + ".json", +]); +const IGNORED_DEPENDENCY_SEGMENTS = [ + "/node_modules/", + "/.git/", + "/.venv/", + "/__pycache__/", + "/site-packages/", + "/dist-packages/", +]; +const STATIC_IMPORT_PATTERN = + /(?:import|export)\s+(?:[^"'`]*?\sfrom\s*)?["'`]([^"'`]+)["'`]|import\s*\(\s*["'`]([^"'`]+)["'`]\s*\)|require\s*\(\s*["'`]([^"'`]+)["'`]\s*\)/g; + +function toDependencyPath(input: unknown): string | null { + try { + if (input instanceof URL) { + return fileURLToPath(input); + } + if (Buffer.isBuffer(input)) { + return path.resolve(process.cwd(), input.toString()); + } + if (typeof input !== "string") { + return null; + } + if (input.startsWith("file://")) { + return fileURLToPath(input); + } + return path.isAbsolute(input) + ? path.normalize(input) + : path.resolve(process.cwd(), input); + } catch { + return null; + } +} + +function shouldIgnoreDependencyPath(filePath: string): boolean { + const normalized = filePath.replaceAll("\\", "/"); + return IGNORED_DEPENDENCY_SEGMENTS.some((segment) => + normalized.includes(segment), + ); +} + +function maybeRecordDependency(input: unknown) { + const filePath = toDependencyPath(input); + if (!filePath || shouldIgnoreDependencyPath(filePath)) { + return; + } + + const extension = path.extname(filePath).toLowerCase(); + if (!DEPENDENCY_EXTENSIONS.has(extension)) { + return; + } + + try { + if (fsMutable.statSync(filePath).isFile()) { + dependencyFiles.add(filePath); + } + } catch { + // Ignore inaccessible or non-file inputs. + } +} + +function maybeRecordDependencyFromSpecifier( + specifier: string, + resolveDir?: string, +) { + if ( + specifier.startsWith("node:") || + specifier.startsWith("bun:") || + specifier.startsWith("npm:") + ) { + return; + } + + if ( + specifier.startsWith("./") || + specifier.startsWith("../") || + specifier.startsWith("/") || + specifier.startsWith("file://") + ) { + const baseDir = resolveDir ?? process.cwd(); + const candidate = specifier.startsWith("file://") + ? specifier + : path.resolve(baseDir, specifier); + maybeRecordDependency(candidate); + } +} + +function collectStaticLocalDependencies(entryFiles: string[]) { + const queue = [...entryFiles]; + const visited = new Set(); + + while (queue.length > 0) { + const file = queue.pop(); + if (!file) { + continue; + } + const absolute = path.resolve(file); + if (visited.has(absolute)) { + continue; + } + visited.add(absolute); + maybeRecordDependency(absolute); + + let source = ""; + try { + source = fsMutable.readFileSync(absolute, "utf8"); + } catch { + continue; + } + + STATIC_IMPORT_PATTERN.lastIndex = 0; + let match: RegExpExecArray | null; + while ((match = STATIC_IMPORT_PATTERN.exec(source)) !== null) { + const specifier = match[1] ?? match[2] ?? match[3]; + if (!specifier) { + continue; + } + const resolved = resolveLocalSpecifier(absolute, specifier); + if (!resolved) { + continue; + } + maybeRecordDependency(resolved); + if (!visited.has(resolved)) { + queue.push(resolved); + } + } + } +} + +function resolveLocalSpecifier( + fromFile: string, + specifier: string, +): string | null { + if ( + !specifier.startsWith("./") && + !specifier.startsWith("../") && + !specifier.startsWith("/") && + !specifier.startsWith("file://") + ) { + return null; + } + + const fromDir = path.dirname(fromFile); + const base = specifier.startsWith("file://") + ? fileURLToPath(specifier) + : specifier.startsWith("/") + ? path.normalize(specifier) + : path.resolve(fromDir, specifier); + + const candidates = [base]; + if (!path.extname(base)) { + for (const ext of DEPENDENCY_EXTENSIONS) { + candidates.push(`${base}${ext}`); + } + for (const ext of DEPENDENCY_EXTENSIONS) { + candidates.push(path.join(base, `index${ext}`)); + } + } + + for (const candidate of candidates) { + try { + if (fsMutable.statSync(candidate).isFile()) { + return path.normalize(candidate); + } + } catch { + continue; + } + } + + return null; +} + +function installNodeModuleHooks() { + const registerHooks = moduleMutable.registerHooks as + | ((hooks: Record unknown>) => void) + | undefined; + if (typeof registerHooks !== "function") { + return; + } + + registerHooks({ + resolve: (specifier, context, nextResolve) => { + const next = nextResolve as ( + specifier: unknown, + context: Record, + ) => { url?: string } & Record; + const ctx = (context ?? {}) as Record; + const result = next(specifier, ctx); + const resolvedUrl = result?.url; + if (typeof resolvedUrl === "string") { + maybeRecordDependency(resolvedUrl); + } else if (typeof specifier === "string") { + const resolveDir = + typeof ctx.parentURL === "string" && + ctx.parentURL.startsWith("file://") + ? path.dirname(fileURLToPath(ctx.parentURL)) + : undefined; + maybeRecordDependencyFromSpecifier(specifier, resolveDir); + } + return result; + }, + }); +} + +function installBunModuleHooks() { + const bun = (globalThis as { Bun?: Record }).Bun as + | { + plugin?: (plugin: { + name: string; + setup: (build: Record) => void; + }) => void; + } + | undefined; + if (!bun || typeof bun.plugin !== "function") { + return; + } + + bun.plugin({ + name: "bt-eval-dependency-tracker", + setup: (build: Record) => { + const onResolve = build.onResolve as + | (( + options: { filter: RegExp }, + callback: (args: Record) => unknown, + ) => void) + | undefined; + if (typeof onResolve === "function") { + onResolve({ filter: /.*/ }, (args) => { + const specifier = args.path; + const resolveDir = + typeof args.resolveDir === "string" + ? args.resolveDir + : process.cwd(); + if (typeof specifier === "string") { + maybeRecordDependencyFromSpecifier(specifier, resolveDir); + } + return null; + }); + } + }, + }); +} + +function installDependencyTracking() { + installNodeModuleHooks(); + installBunModuleHooks(); + + const fsPatched = fsMutable as unknown as Record; + const originalReadFileSync = fsMutable.readFileSync.bind(fsMutable); + Reflect.set( + fsPatched, + "readFileSync", + (file: unknown, ...args: unknown[]) => { + maybeRecordDependency(file); + const callArgs = [file, ...args] as unknown[]; + return Reflect.apply( + originalReadFileSync as (...params: unknown[]) => unknown, + fsMutable, + callArgs, + ); + }, + ); + + const originalReadFile = fsMutable.readFile.bind(fsMutable); + Reflect.set(fsPatched, "readFile", (file: unknown, ...args: unknown[]) => { + maybeRecordDependency(file); + const callArgs = [file, ...args] as unknown[]; + return Reflect.apply( + originalReadFile as (...params: unknown[]) => unknown, + fsMutable, + callArgs, + ); + }); + + const originalPromisesReadFile = fsMutable.promises.readFile.bind( + fsMutable.promises, + ); + const fsPromisesPatched = fsMutable.promises as unknown as Record< + string, + unknown + >; + Reflect.set( + fsPromisesPatched, + "readFile", + async (file: unknown, ...args: unknown[]) => { + maybeRecordDependency(file); + const callArgs = [file, ...args] as unknown[]; + return Reflect.apply( + originalPromisesReadFile as (...params: unknown[]) => Promise, + fsMutable.promises, + callArgs, + ); + }, + ); +} + +function collectRequireCacheDependencies() { + const cache = runtimeRequire.cache as Record< + string, + { filename?: string } | undefined + >; + if (!cache) { + return; + } + for (const [cacheKey, moduleValue] of Object.entries(cache)) { + maybeRecordDependency(moduleValue?.filename ?? cacheKey); + } +} + +async function collectDenoInfoDependencies(files: string[]) { + const deno = (globalThis as Record).Deno as + | { + Command?: new ( + command: string, + options: Record, + ) => { + output: () => Promise<{ + success: boolean; + stdout: Uint8Array; + }>; + }; + } + | undefined; + if (!deno || typeof deno.Command !== "function") { + return; + } + + for (const file of files) { + try { + const cmd = new deno.Command("deno", { + args: ["info", "--json", file], + stdout: "piped", + stderr: "null", + }); + const output = await cmd.output(); + if (!output.success) { + continue; + } + const parsed = JSON.parse(new TextDecoder().decode(output.stdout)); + collectFileUrlsFromJson(parsed); + } catch { + continue; + } + } +} + +function collectFileUrlsFromJson(value: unknown) { + if (typeof value === "string") { + maybeRecordDependency(value); + return; + } + if (Array.isArray(value)) { + for (const item of value) { + collectFileUrlsFromJson(item); + } + return; + } + if (!value || typeof value !== "object") { + return; + } + for (const child of Object.values(value)) { + collectFileUrlsFromJson(child); + } +} + +function collectDependencyFiles(): string[] { + return Array.from(dependencyFiles).sort(); +} + function serializeSseEvent(event: { event?: string; data: string }): string { return ( Object.entries(event) @@ -115,12 +516,33 @@ function serializeSseEvent(event: { event?: string; data: string }): string { } function createSseWriter(): SseWriter | null { + const netModule = (() => { + try { + return runtimeRequire("node:net") as NetModule; + } catch { + return null; + } + })(); + const sock = process.env.BT_EVAL_SSE_SOCK; if (sock) { - const socket = net.createConnection({ path: sock }); + if (!netModule) { + return null; + } + let socket: ReturnType; + try { + socket = netModule.createConnection({ path: sock }); + } catch (err) { + console.error( + `Failed to connect to SSE socket: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + return null; + } socket.on("error", (err) => { - console.error(`Failed to connect to SSE socket: ${err.message}`); - process.exitCode = 1; + const message = err instanceof Error ? err.message : String(err); + console.error(`Failed to connect to SSE socket: ${message}`); }); const send = (event: string, payload: unknown) => { if (!socket.writable) { @@ -147,8 +569,26 @@ function createSseWriter(): SseWriter | null { throw new Error(`Invalid BT_EVAL_SSE_ADDR: ${addr}`); } - const socket = net.createConnection({ host, port }); + if (!netModule) { + return null; + } + + let socket: ReturnType; + try { + socket = netModule.createConnection({ host, port }); + } catch (err) { + console.error( + `Failed to connect to SSE address ${addr}: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + return null; + } socket.setNoDelay(true); + socket.on("error", (err) => { + const message = err instanceof Error ? err.message : String(err); + console.error(`Failed to connect to SSE address ${addr}: ${message}`); + }); const send = (event: string, payload: unknown) => { if (!socket.writable) { @@ -585,6 +1025,7 @@ async function createEvalRunner() { const finish = (ok: boolean) => { if (sse) { + sse.send("dependencies", { files: collectDependencyFiles() }); sse.send("done", ""); sse.close(); } @@ -612,7 +1053,12 @@ async function main() { process.exit(1); } + installDependencyTracking(); const normalized = normalizeFiles(files); + for (const file of normalized) { + maybeRecordDependency(file); + } + collectStaticLocalDependencies(normalized); ensureBraintrustAvailable(); await loadBraintrust(); initRegistry(); @@ -673,6 +1119,8 @@ async function main() { ok = await runner.runRegisteredEvals(evaluators); } } finally { + collectRequireCacheDependencies(); + await collectDenoInfoDependencies(normalized); runner.finish(ok); } } diff --git a/src/eval.rs b/src/eval.rs index 774dda7..998cce7 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -1,9 +1,10 @@ -use std::collections::HashMap; +use std::collections::{BTreeSet, HashMap, VecDeque}; +use std::ffi::{OsStr, OsString}; use std::path::{Path, PathBuf}; -use std::process::Stdio; +use std::process::{ExitStatus, Stdio}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; use anyhow::{Context, Result}; use clap::{Args, ValueEnum}; @@ -31,6 +32,12 @@ use ratatui::Terminal; use crate::args::BaseArgs; const MAX_NAME_LENGTH: usize = 40; +const WATCH_POLL_INTERVAL: Duration = Duration::from_millis(500); + +struct EvalRunOutput { + status: ExitStatus, + dependencies: Vec, +} const JS_RUNNER_FILE: &str = "eval-runner.ts"; const PY_RUNNER_FILE: &str = "eval-runner.py"; const JS_RUNNER_SOURCE: &str = include_str!("../scripts/eval-runner.ts"); @@ -66,7 +73,7 @@ pub struct EvalArgs { #[arg(required = true, value_name = "FILE")] pub files: Vec, - /// Eval runner binary (e.g. tsx, bun, ts-node, python). Defaults to tsx for JS files. + /// Eval runner binary (e.g. tsx, bun, ts-node, deno, python). Defaults to tsx for JS files. #[arg(long, short = 'r', env = "BT_EVAL_RUNNER", value_name = "RUNNER")] pub runner: Option, @@ -88,26 +95,104 @@ pub struct EvalArgs { value_parser = clap::builder::BoolishValueParser::new() )] pub no_send_logs: bool, + + /// Re-run evals when input files change. + #[arg(long, short = 'w')] + pub watch: bool, } pub async fn run(base: BaseArgs, args: EvalArgs) -> Result<()> { - run_eval_files( - &base, - args.language, - args.runner.clone(), - args.files.clone(), - args.no_send_logs, - ) - .await + if args.watch { + run_eval_files_watch( + &base, + args.language, + args.runner.clone(), + args.files.clone(), + args.no_send_logs, + ) + .await + } else { + let output = run_eval_files_once( + &base, + args.language, + args.runner.clone(), + args.files.clone(), + args.no_send_logs, + ) + .await?; + if !output.status.success() { + anyhow::bail!("eval runner exited with status {}", output.status); + } + Ok(()) + } } -async fn run_eval_files( +async fn run_eval_files_watch( base: &BaseArgs, language_override: Option, runner_override: Option, files: Vec, no_send_logs: bool, ) -> Result<()> { + let input_watch_paths = resolve_watch_paths(&files)?; + let mut active_watch_paths = input_watch_paths.clone(); + let mut watch_state = snapshot_watch_state(&active_watch_paths)?; + + eprintln!( + "Watch mode enabled for {} path(s). Press Ctrl-C to stop.", + active_watch_paths.len() + ); + + loop { + match run_eval_files_once( + base, + language_override, + runner_override.clone(), + files.clone(), + no_send_logs, + ) + .await + { + Ok(output) => { + let merged_paths = merge_watch_paths(&input_watch_paths, &output.dependencies); + update_watch_targets(&mut active_watch_paths, &mut watch_state, merged_paths)?; + if output.status.success() { + eprintln!( + "Eval run completed. Watching {} path(s). Waiting for changes...", + active_watch_paths.len() + ); + } else { + eprintln!( + "Eval run failed: eval runner exited with status {}", + output.status + ); + eprintln!( + "Watching {} path(s). Waiting for changes...", + active_watch_paths.len() + ); + } + } + Err(err) => { + eprintln!("Eval run failed: {err:#}"); + eprintln!("Waiting for changes..."); + } + } + + let changed = wait_for_watch_changes(&active_watch_paths, &mut watch_state).await?; + eprintln!( + "Detected changes in {}. Re-running evals.\n", + format_watch_paths(&changed) + ); + } +} + +async fn run_eval_files_once( + base: &BaseArgs, + language_override: Option, + runner_override: Option, + files: Vec, + no_send_logs: bool, +) -> Result { let language = detect_eval_language(&files, language_override)?; let show_js_runner_hint_on_failure = language == EvalLanguage::JavaScript && runner_override.is_none(); @@ -184,6 +269,7 @@ async fn run_eval_files( let mut ui = EvalUi::new(); let mut status = None; + let mut dependency_files: Vec = Vec::new(); drop(tx); @@ -191,6 +277,9 @@ async fn run_eval_files( tokio::select! { event = rx.recv() => { match event { + Some(EvalEvent::Dependencies { files }) => { + dependency_files.extend(files); + } Some(event) => ui.handle(event), None => { if status.is_none() { @@ -220,20 +309,236 @@ async fn run_eval_files( ui.finish(); - if let Some(status) = status { - if !status.success() { - if show_js_runner_hint_on_failure { - anyhow::bail!( - "eval runner exited with status {status}\nHint: If this eval uses ESM features (like top-level await), try `--runner vite-node`." - ); + let status = status.context("eval runner process exited without a status")?; + if !status.success() && show_js_runner_hint_on_failure { + eprintln!( + "Hint: If this eval uses ESM features (like top-level await), try `--runner vite-node`." + ); + } + let mut dependencies = normalize_watch_paths(dependency_files.into_iter().map(PathBuf::from))?; + if language == EvalLanguage::JavaScript { + let static_dependencies = collect_js_static_dependencies(&files)?; + dependencies = merge_watch_paths(&dependencies, &static_dependencies); + } + + Ok(EvalRunOutput { + status, + dependencies, + }) +} + +#[derive(Debug, Clone, Eq, PartialEq)] +struct WatchEntry { + modified: Option, + len: u64, +} + +type WatchState = HashMap>; + +fn resolve_watch_paths(files: &[String]) -> Result> { + normalize_watch_paths(files.iter().map(PathBuf::from)) +} + +fn normalize_watch_paths(paths: impl IntoIterator) -> Result> { + let cwd = std::env::current_dir().context("failed to read current directory")?; + let mut deduped = BTreeSet::new(); + + for path in paths { + let absolute = if path.is_absolute() { + path + } else { + cwd.join(path) + }; + deduped.insert(absolute); + } + + Ok(deduped.into_iter().collect()) +} + +fn merge_watch_paths(inputs: &[PathBuf], dependencies: &[PathBuf]) -> Vec { + let mut deduped = BTreeSet::new(); + deduped.extend(inputs.iter().cloned()); + deduped.extend(dependencies.iter().cloned()); + deduped.into_iter().collect() +} + +fn collect_js_static_dependencies(files: &[String]) -> Result> { + let roots = resolve_watch_paths(files)?; + let mut queue: VecDeque = roots.into_iter().collect(); + let mut visited = BTreeSet::new(); + let mut discovered = BTreeSet::new(); + + while let Some(file) = queue.pop_front() { + if !visited.insert(file.clone()) { + continue; + } + discovered.insert(file.clone()); + + let content = match std::fs::read_to_string(&file) { + Ok(content) => content, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue, + Err(err) => { + return Err(err).with_context(|| format!("failed to read {}", file.display())); } - anyhow::bail!("eval runner exited with status {status}"); + }; + + for specifier in extract_js_local_specifiers(&content) { + if let Some(resolved) = resolve_js_local_specifier(&file, &specifier) { + if !visited.contains(&resolved) { + queue.push_back(resolved.clone()); + } + discovered.insert(resolved); + } + } + } + + Ok(discovered.into_iter().collect()) +} + +fn extract_js_local_specifiers(content: &str) -> Vec { + const PATTERNS: &[(&str, char)] = &[ + ("from \"", '"'), + ("from '", '\''), + ("import(\"", '"'), + ("import('", '\''), + ("require(\"", '"'), + ("require('", '\''), + ]; + + let mut specifiers = Vec::new(); + for (prefix, quote) in PATTERNS { + let mut offset = 0usize; + while let Some(start) = content[offset..].find(prefix) { + let specifier_start = offset + start + prefix.len(); + if let Some(end_rel) = content[specifier_start..].find(*quote) { + let specifier = &content[specifier_start..specifier_start + end_rel]; + if specifier.starts_with("./") + || specifier.starts_with("../") + || specifier.starts_with("/") + || specifier.starts_with("file://") + { + specifiers.push(specifier.to_string()); + } + offset = specifier_start + end_rel + 1; + } else { + break; + } + } + } + specifiers +} + +fn resolve_js_local_specifier(base_file: &Path, specifier: &str) -> Option { + let base_dir = base_file.parent()?; + let candidate = if specifier.starts_with("file://") { + PathBuf::from(specifier.trim_start_matches("file://")) + } else if specifier.starts_with('/') { + PathBuf::from(specifier) + } else { + base_dir.join(specifier) + }; + + let mut candidates = vec![candidate.clone()]; + if candidate.extension().is_none() { + for ext in ["ts", "tsx", "js", "jsx", "mjs", "cjs", "mts", "cts", "json"] { + candidates.push(candidate.with_extension(ext)); + } + for ext in ["ts", "tsx", "js", "jsx", "mjs", "cjs", "mts", "cts", "json"] { + candidates.push(candidate.join(format!("index.{ext}"))); + } + } + + candidates.into_iter().find(|path| path.is_file()) +} + +fn read_watch_entry(path: &Path) -> Result> { + match std::fs::metadata(path) { + Ok(metadata) => Ok(Some(WatchEntry { + modified: metadata.modified().ok(), + len: metadata.len(), + })), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(err) => { + Err(err).with_context(|| format!("failed to read metadata for {}", path.display())) } } +} + +fn snapshot_watch_state(paths: &[PathBuf]) -> Result { + let mut state = HashMap::with_capacity(paths.len()); + for path in paths { + state.insert(path.clone(), read_watch_entry(path)?); + } + Ok(state) +} +fn update_watch_targets( + active_paths: &mut Vec, + state: &mut WatchState, + next_paths: Vec, +) -> Result<()> { + let next_set: BTreeSet = next_paths.into_iter().collect(); + let current_set: BTreeSet = active_paths.iter().cloned().collect(); + if next_set == current_set { + return Ok(()); + } + + state.retain(|path, _| next_set.contains(path)); + for path in &next_set { + if !state.contains_key(path) { + state.insert(path.clone(), read_watch_entry(path)?); + } + } + + *active_paths = next_set.into_iter().collect(); Ok(()) } +fn detect_watch_changes(paths: &[PathBuf], state: &mut WatchState) -> Result> { + let mut changed = Vec::new(); + + for path in paths { + let current = read_watch_entry(path)?; + let previous = state.get(path).cloned().unwrap_or(None); + if current != previous { + changed.push(path.clone()); + state.insert(path.clone(), current); + } + } + + Ok(changed) +} + +async fn wait_for_watch_changes(paths: &[PathBuf], state: &mut WatchState) -> Result> { + loop { + let changed = detect_watch_changes(paths, state)?; + if !changed.is_empty() { + return Ok(changed); + } + tokio::time::sleep(WATCH_POLL_INTERVAL).await; + } +} + +fn format_watch_paths(paths: &[PathBuf]) -> String { + const MAX_DISPLAYED: usize = 3; + + let rendered = paths + .iter() + .take(MAX_DISPLAYED) + .map(|path| path.display().to_string()) + .collect::>(); + + if paths.len() > MAX_DISPLAYED { + format!( + "{} and {} more path(s)", + rendered.join(", "), + paths.len() - MAX_DISPLAYED + ) + } else { + rendered.join(", ") + } +} + fn build_env(base: &BaseArgs) -> Vec<(String, String)> { let mut envs = Vec::new(); if let Some(api_key) = base.api_key.as_ref() { @@ -293,15 +598,25 @@ fn build_js_command( ) -> Result { let command = if let Some(explicit) = runner_override.as_deref() { let resolved_runner = resolve_js_runner_command(explicit, files); - let runner_script = select_js_runner_entrypoint(runner, resolved_runner.as_ref())?; - let mut command = Command::new(resolved_runner); - command.arg(runner_script).args(files); - command + if is_deno_runner(explicit) || is_deno_runner_path(resolved_runner.as_ref()) { + let runner_script = prepare_js_runner_in_cwd()?; + build_deno_js_command(resolved_runner.as_os_str(), &runner_script, files) + } else { + let runner_script = select_js_runner_entrypoint(runner, resolved_runner.as_ref())?; + let mut command = Command::new(resolved_runner); + command.arg(runner_script).args(files); + command + } } else if let Some(auto_runner) = find_js_runner_binary(files) { - let runner_script = select_js_runner_entrypoint(runner, auto_runner.as_ref())?; - let mut command = Command::new(auto_runner); - command.arg(runner_script).args(files); - command + if is_deno_runner_path(&auto_runner) { + let runner_script = prepare_js_runner_in_cwd()?; + build_deno_js_command(auto_runner.as_os_str(), &runner_script, files) + } else { + let runner_script = select_js_runner_entrypoint(runner, auto_runner.as_ref())?; + let mut command = Command::new(auto_runner); + command.arg(runner_script).args(files); + command + } } else { let mut command = Command::new("npx"); command.arg("--yes").arg("tsx").arg(runner).args(files); @@ -311,6 +626,28 @@ fn build_js_command( Ok(command) } +fn build_deno_js_command( + deno_runner: impl AsRef, + runner: &Path, + files: &[String], +) -> Command { + let mut command = Command::new(deno_runner); + command.args(deno_js_command_args(runner, files)); + command +} + +fn deno_js_command_args(runner: &Path, files: &[String]) -> Vec { + let mut args = vec![ + OsString::from("run"), + OsString::from("-A"), + OsString::from("--node-modules-dir=auto"), + OsString::from("--unstable-detect-cjs"), + runner.as_os_str().to_os_string(), + ]; + args.extend(files.iter().map(OsString::from)); + args +} + fn build_python_command( runner_override: Option, runner: &PathBuf, @@ -338,9 +675,9 @@ fn build_python_command( } fn find_js_runner_binary(files: &[String]) -> Option { - // Prefer local project bins first, then PATH. `tsx` and `vite-node` are - // preferred, with ts-node variants as lower-priority fallback. - const RUNNER_CANDIDATES: &[&str] = &["tsx", "vite-node", "ts-node", "ts-node-esm"]; + // Prefer local project bins first, then PATH. `tsx` remains the preferred + // default, with other common TS runners as fallback. + const RUNNER_CANDIDATES: &[&str] = &["tsx", "vite-node", "ts-node", "ts-node-esm", "deno"]; for candidate in RUNNER_CANDIDATES { if let Some(path) = find_node_module_bin_for_files(candidate, files) { @@ -395,6 +732,22 @@ fn js_runner_search_roots(files: &[String]) -> Vec { search_roots } +fn is_deno_runner(runner: &str) -> bool { + let file_name = Path::new(runner) + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or(runner); + file_name.eq_ignore_ascii_case("deno") || file_name.eq_ignore_ascii_case("deno.exe") +} + +fn is_deno_runner_path(runner: &Path) -> bool { + runner + .file_name() + .and_then(|value| value.to_str()) + .map(|name| name.eq_ignore_ascii_case("deno") || name.eq_ignore_ascii_case("deno.exe")) + .unwrap_or(false) +} + fn select_js_runner_entrypoint(default_runner: &Path, runner_command: &Path) -> Result { if is_ts_node_runner(runner_command) { return prepare_js_runner_in_cwd(); @@ -520,6 +873,9 @@ enum EvalEvent { Start(ExperimentSummary), Summary(ExperimentSummary), Progress(SseProgressEventData), + Dependencies { + files: Vec, + }, Done, Error { message: String, @@ -598,6 +954,11 @@ struct SseConsoleEventData { message: String, } +#[derive(Debug, Deserialize)] +struct SseDependenciesEventData { + files: Vec, +} + async fn forward_stream( stream: T, name: &'static str, @@ -688,6 +1049,13 @@ fn handle_sse_event(event: Option, data: String, tx: &mpsc::UnboundedSen }); } } + "dependencies" => { + if let Ok(payload) = serde_json::from_str::(&data) { + let _ = tx.send(EvalEvent::Dependencies { + files: payload.files, + }); + } + } "done" => { let _ = tx.send(EvalEvent::Done); } @@ -738,6 +1106,7 @@ impl EvalUi { EvalEvent::Progress(progress) => { self.handle_progress(progress); } + EvalEvent::Dependencies { .. } => {} EvalEvent::Console { message, .. } => { let _ = self.progress.println(message); } @@ -1217,63 +1586,66 @@ fn convert_color(color: Color) -> CtColor { #[cfg(test)] mod tests { use super::*; + use std::fs; + use std::path::PathBuf; + use std::time::{SystemTime, UNIX_EPOCH}; - fn unique_test_dir(label: &str) -> PathBuf { + fn make_temp_dir(prefix: &str) -> PathBuf { let now = SystemTime::now() .duration_since(UNIX_EPOCH) - .expect("clock should be monotonic") + .expect("system clock before unix epoch") .as_nanos(); - std::env::temp_dir().join(format!( - "bt-eval-tests-{label}-{}-{now}", + let path = std::env::temp_dir().join(format!( + "bt-eval-tests-{prefix}-{}-{now}", std::process::id() - )) + )); + fs::create_dir_all(&path).expect("create temp dir"); + path } #[test] fn materialize_runner_script_writes_file() { - let dir = unique_test_dir("write"); - std::fs::create_dir_all(&dir).expect("test dir should be created"); + let dir = make_temp_dir("write"); let path = materialize_runner_script(&dir, "runner.ts", "console.log('ok');") .expect("runner script should be materialized"); - let contents = std::fs::read_to_string(path).expect("runner script should be readable"); + let contents = fs::read_to_string(path).expect("runner script should be readable"); assert_eq!(contents, "console.log('ok');"); - let _ = std::fs::remove_dir_all(&dir); + let _ = fs::remove_dir_all(&dir); } #[test] fn materialize_runner_script_overwrites_stale_content() { - let dir = unique_test_dir("overwrite"); - std::fs::create_dir_all(&dir).expect("test dir should be created"); + let dir = make_temp_dir("overwrite"); let path = dir.join("runner.py"); - std::fs::write(&path, "stale").expect("stale file should be written"); + fs::write(&path, "stale").expect("stale file should be written"); materialize_runner_script(&dir, "runner.py", "fresh") .expect("runner script should be updated"); - let contents = std::fs::read_to_string(path).expect("runner script should be readable"); + let contents = fs::read_to_string(path).expect("runner script should be readable"); assert_eq!(contents, "fresh"); - let _ = std::fs::remove_dir_all(&dir); + let _ = fs::remove_dir_all(&dir); } #[test] fn prepare_eval_runners_writes_embedded_scripts() { - let dir = unique_test_dir("embedded"); + let dir = make_temp_dir("embedded"); let (js_runner, py_runner) = prepare_eval_runners_in_dir(&dir).expect("embedded runners should be materialized"); - let js = std::fs::read_to_string(js_runner).expect("js runner should be readable"); - let py = std::fs::read_to_string(py_runner).expect("python runner should be readable"); + let js = fs::read_to_string(js_runner).expect("js runner should be readable"); + let py = fs::read_to_string(py_runner).expect("python runner should be readable"); assert_eq!(js, JS_RUNNER_SOURCE); assert_eq!(py, PY_RUNNER_SOURCE); - let _ = std::fs::remove_dir_all(&dir); + let _ = fs::remove_dir_all(&dir); } #[test] fn resolve_js_runner_command_finds_local_node_module_bin() { - let dir = unique_test_dir("resolve-runner"); + let dir = make_temp_dir("resolve-runner"); let eval_dir = dir.join("evals"); let bin_dir = dir.join("node_modules").join(".bin"); std::fs::create_dir_all(&eval_dir).expect("eval dir should be created"); @@ -1298,4 +1670,105 @@ mod tests { assert!(boxed.contains("plain line")); assert!(boxed.contains("red text")); } + + #[test] + fn detect_watch_changes_detects_file_create() { + let dir = make_temp_dir("create"); + let file = dir.join("watch.eval.ts"); + let paths = vec![file.clone()]; + + let mut state = snapshot_watch_state(&paths).expect("snapshot watch state"); + assert!(detect_watch_changes(&paths, &mut state) + .expect("check changes") + .is_empty()); + + fs::write(&file, "export {}").expect("write test file"); + let changed = detect_watch_changes(&paths, &mut state).expect("check changes"); + assert_eq!(changed, vec![file.clone()]); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn detect_watch_changes_detects_file_update() { + let dir = make_temp_dir("update"); + let file = dir.join("watch.eval.ts"); + fs::write(&file, "export const v = 1;").expect("write initial file"); + let paths = vec![file.clone()]; + + let mut state = snapshot_watch_state(&paths).expect("snapshot watch state"); + assert!(detect_watch_changes(&paths, &mut state) + .expect("check changes") + .is_empty()); + + fs::write(&file, "export const value = 2;").expect("write updated file"); + let changed = detect_watch_changes(&paths, &mut state).expect("check changes"); + assert_eq!(changed, vec![file.clone()]); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn merge_watch_paths_dedupes_and_includes_dependencies() { + let input = vec![ + PathBuf::from("/tmp/a.eval.ts"), + PathBuf::from("/tmp/b.eval.ts"), + ]; + let deps = vec![ + PathBuf::from("/tmp/b.eval.ts"), + PathBuf::from("/tmp/helper.ts"), + ]; + + let merged = merge_watch_paths(&input, &deps); + assert_eq!( + merged, + vec![ + PathBuf::from("/tmp/a.eval.ts"), + PathBuf::from("/tmp/b.eval.ts"), + PathBuf::from("/tmp/helper.ts") + ] + ); + } + + #[test] + fn collect_js_static_dependencies_follows_local_imports() { + let dir = make_temp_dir("js-static"); + let entry = dir.join("entry.eval.ts"); + let helper = dir.join("helper.js"); + + fs::write( + &entry, + "import { helper } from './helper.js';\nexport default helper;", + ) + .expect("write entry file"); + fs::write(&helper, "export const helper = 'ok';").expect("write helper file"); + + let files = vec![entry.to_string_lossy().to_string()]; + let dependencies = collect_js_static_dependencies(&files).expect("collect js dependencies"); + + assert!(dependencies.contains(&entry)); + assert!(dependencies.contains(&helper)); + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn build_deno_js_command_includes_detect_cjs_flag() { + let runner = PathBuf::from("/tmp/eval-runner.ts"); + let files = vec!["tests/basic.eval.ts".to_string()]; + let args: Vec = deno_js_command_args(&runner, &files) + .into_iter() + .map(|arg| arg.to_string_lossy().to_string()) + .collect(); + assert_eq!( + args, + vec![ + "run", + "-A", + "--node-modules-dir=auto", + "--unstable-detect-cjs", + "/tmp/eval-runner.ts", + "tests/basic.eval.ts", + ] + ); + } } diff --git a/tests/eval_fixtures.rs b/tests/eval_fixtures.rs index 847671d..1df990c 100644 --- a/tests/eval_fixtures.rs +++ b/tests/eval_fixtures.rs @@ -1,7 +1,11 @@ use std::collections::{BTreeMap, BTreeSet}; use std::fs; +use std::io::{BufRead, BufReader, Read}; use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; +use std::process::{Child, Command, Stdio}; +use std::sync::{Arc, Mutex, MutexGuard, OnceLock}; +use std::thread; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use serde::Deserialize; @@ -16,8 +20,16 @@ struct FixtureConfig { expect_success: Option, } +fn test_lock() -> MutexGuard<'static, ()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + .lock() + .unwrap_or_else(|err| err.into_inner()) +} + #[test] fn eval_fixtures() { + let _guard = test_lock(); let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let fixtures_root = root.join("tests").join("evals"); if !fixtures_root.exists() { @@ -79,6 +91,7 @@ fn eval_fixtures() { match runtime { "node" => ensure_dependencies(&dir), "bun" => ensure_dependencies(&dir), + "deno" => ensure_dependencies(&dir), "python" => {} other => panic!("Unsupported runtime for fixture {fixture_name}: {other}"), } @@ -111,15 +124,22 @@ fn eval_fixtures() { eprintln!("Skipping {fixture_name} [{label}] (bun not installed)."); continue; } + if needs_deno(runtime, runner.as_deref()) && !command_exists("deno") { + if required_runtimes().contains("deno") { + panic!("Deno runtime is required but unavailable for fixture {fixture_name}"); + } + let label = runner.as_deref().unwrap_or("default"); + eprintln!("Skipping {fixture_name} [{label}] (deno not installed)."); + continue; + } let mut cmd = Command::new(&bt_path); cmd.arg("eval"); if let Some(args) = config.args.as_ref() { cmd.args(args); } - if let Some(runner_cmd) = - resolve_runner(&dir, runner.as_deref(), python_runner.as_ref()) - { + let resolved_runner = resolve_runner(&dir, runner.as_deref(), python_runner.as_ref()); + if let Some(runner_cmd) = resolved_runner.as_ref() { cmd.arg("--runner").arg(runner_cmd); } cmd.args(&config.files).current_dir(&dir); @@ -149,8 +169,15 @@ fn eval_fixtures() { if status.success() != expect_success { let stdout = String::from_utf8_lossy(&output.stdout); let stderr = String::from_utf8_lossy(&output.stderr); + let deno_diagnostics = + if needs_deno(runtime, resolved_runner.as_deref()) && expect_success { + collect_deno_eval_diagnostics(&dir, &config.files) + } else { + None + }; panic!( - "Fixture {fixture_name} [{}] had status {status} (expected success={expect_success})\nstdout:\n{stdout}\nstderr:\n{stderr}", + "Fixture {fixture_name} [{}] had status {status} (expected success={expect_success})\nstdout:\n{stdout}\nstderr:\n{stderr}{}", + deno_diagnostics.unwrap_or_default(), runner.as_deref().unwrap_or("default") ); } @@ -167,11 +194,326 @@ fn eval_fixtures() { } } +#[test] +fn eval_watch_js_dependency_retriggers() { + let _guard = test_lock(); + if !command_exists("node") { + if required_runtimes().contains("node") { + panic!("node runtime is required but unavailable for watch test"); + } + eprintln!("Skipping eval_watch_js_dependency_retriggers (node not installed)."); + return; + } + + let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let fixtures_root = root.join("tests").join("evals"); + let fixture_dir = fixtures_root.join("js").join("eval-ts-cjs"); + ensure_dependencies(&fixture_dir); + + let bt_path = bt_binary_path(&root); + let runner = resolve_runner(&fixture_dir, Some("tsx"), None).expect("resolve js runner"); + + assert_watch_detects_dependency_change( + &bt_path, + &fixture_dir, + &runner, + "tests/async-import.eval.ts", + "tests/helper.js", + ); +} + +#[test] +fn eval_watch_bun_dependency_retriggers() { + let _guard = test_lock(); + if !command_exists("bun") { + if required_runtimes().contains("bun") { + panic!("bun runtime is required but unavailable for watch test"); + } + eprintln!("Skipping eval_watch_bun_dependency_retriggers (bun not installed)."); + return; + } + + let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let fixtures_root = root.join("tests").join("evals"); + let fixture_dir = fixtures_root.join("js").join("eval-ts-cjs"); + ensure_dependencies(&fixture_dir); + + let bt_path = bt_binary_path(&root); + + assert_watch_detects_dependency_change( + &bt_path, + &fixture_dir, + "bun", + "tests/async-import.eval.ts", + "tests/helper.js", + ); +} + +#[test] +fn eval_watch_deno_dependency_retriggers() { + let _guard = test_lock(); + if !command_exists("deno") { + if required_runtimes().contains("deno") { + panic!("deno runtime is required but unavailable for watch test"); + } + eprintln!("Skipping eval_watch_deno_dependency_retriggers (deno not installed)."); + return; + } + + let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let fixtures_root = root.join("tests").join("evals"); + let fixture_dir = fixtures_root.join("js").join("eval-deno"); + ensure_dependencies(&fixture_dir); + + let bt_path = bt_binary_path(&root); + + assert_watch_detects_dependency_change( + &bt_path, + &fixture_dir, + "deno", + "tests/basic.eval.ts", + "tests/helper.ts", + ); +} + +#[test] +fn eval_watch_python_dependency_retriggers() { + let _guard = test_lock(); + let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let fixtures_root = root.join("tests").join("evals"); + let fixture_dir = fixtures_root.join("py").join("local_import"); + + let python = match ensure_python_env(&fixtures_root.join("py")) { + Some(python) => python, + None => { + if required_runtimes().contains("python") { + panic!("python runtime unavailable for watch dependency test"); + } + eprintln!( + "Skipping eval_watch_python_dependency_retriggers (python runtime unavailable)." + ); + return; + } + }; + let bt_path = bt_binary_path(&root); + + assert_watch_detects_dependency_change( + &bt_path, + &fixture_dir, + python.to_string_lossy().as_ref(), + "eval_local_import.py", + "helper.py", + ); +} + fn read_fixture_config(path: &Path) -> FixtureConfig { let raw = fs::read_to_string(path).expect("read fixture.json"); serde_json::from_str(&raw).expect("parse fixture.json") } +fn collect_deno_eval_diagnostics(dir: &Path, files: &[String]) -> Option { + if !command_exists("deno") { + return None; + } + + let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let runner_script = root.join("scripts").join("eval-runner.ts"); + let local_runner_dir = dir.join(".bt").join("eval-runners"); + fs::create_dir_all(&local_runner_dir).ok()?; + let local_runner = local_runner_dir.join("diag-eval-runner.ts"); + fs::copy(&runner_script, &local_runner).ok()?; + let runner_script_str = local_runner.to_string_lossy().to_string(); + + let mut cmd = Command::new("deno"); + cmd.args([ + "run", + "-A", + "--node-modules-dir=auto", + "--unstable-detect-cjs", + ]); + cmd.arg(runner_script_str); + cmd.args(files); + cmd.current_dir(dir); + cmd.env("BT_EVAL_LOCAL", "1"); + cmd.env( + "BRAINTRUST_API_KEY", + std::env::var("BRAINTRUST_API_KEY").unwrap_or_else(|_| "local".to_string()), + ); + + let output = cmd.output().ok()?; + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + Some(format!( + "\n[deno-direct] status: {}\n[deno-direct] stdout:\n{}\n[deno-direct] stderr:\n{}\n", + output.status, stdout, stderr + )) +} + +fn bt_binary_path(root: &Path) -> PathBuf { + match std::env::var("CARGO_BIN_EXE_bt") { + Ok(path) => PathBuf::from(path), + Err(_) => { + let candidate = root.join("target").join("debug").join("bt"); + if !candidate.is_file() { + build_bt_binary(root); + } + candidate + } + } +} + +struct FileRestoreGuard { + path: PathBuf, + original: Vec, +} + +impl FileRestoreGuard { + fn new(path: PathBuf) -> Self { + let original = fs::read(&path).expect("read original file bytes"); + Self { path, original } + } +} + +impl Drop for FileRestoreGuard { + fn drop(&mut self) { + let _ = fs::write(&self.path, &self.original); + } +} + +fn assert_watch_detects_dependency_change( + bt_path: &Path, + fixture_dir: &Path, + runner: &str, + entry_file: &str, + dependency_file: &str, +) { + let dep_path = fixture_dir.join(dependency_file); + let _restore_guard = FileRestoreGuard::new(dep_path.clone()); + + let mut cmd = Command::new(bt_path); + cmd.arg("eval") + .arg("--watch") + .arg("--no-send-logs") + .arg("--runner") + .arg(runner) + .arg(entry_file) + .current_dir(fixture_dir) + .env("BT_EVAL_LOCAL", "1") + .env( + "BRAINTRUST_API_KEY", + std::env::var("BRAINTRUST_API_KEY").unwrap_or_else(|_| "local".to_string()), + ) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let mut child = cmd.spawn().expect("spawn bt eval --watch"); + let output = Arc::new(Mutex::new(String::new())); + let mut threads = Vec::new(); + + if let Some(stdout) = child.stdout.take() { + threads.push(spawn_output_collector(stdout, Arc::clone(&output))); + } + if let Some(stderr) = child.stderr.take() { + threads.push(spawn_output_collector(stderr, Arc::clone(&output))); + } + + wait_for_output( + &mut child, + &output, + "Waiting for changes...", + Duration::from_secs(45), + ); + + let marker_prefix = if dep_path.extension().and_then(|ext| ext.to_str()) == Some("py") { + "#" + } else { + "//" + }; + let marker = format!( + "\n{marker_prefix} bt-watch-test-{}\n", + SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("system clock before epoch") + .as_nanos() + ); + let mut updated = fs::read_to_string(&dep_path).expect("read dependency file"); + updated.push_str(&marker); + fs::write(&dep_path, updated).expect("modify dependency file"); + + wait_for_output( + &mut child, + &output, + "Detected changes in", + Duration::from_secs(45), + ); + let dep_name = dep_path + .file_name() + .and_then(|value| value.to_str()) + .expect("dependency file name"); + wait_for_output(&mut child, &output, dep_name, Duration::from_secs(45)); + wait_for_output( + &mut child, + &output, + "Re-running evals.", + Duration::from_secs(45), + ); + + let _ = child.kill(); + let _ = child.wait(); + for handle in threads { + let _ = handle.join(); + } +} + +fn spawn_output_collector(reader: R, output: Arc>) -> thread::JoinHandle<()> +where + R: Read + Send + 'static, +{ + thread::spawn(move || { + let mut buffered = BufReader::new(reader); + let mut line = String::new(); + loop { + line.clear(); + match buffered.read_line(&mut line) { + Ok(0) => break, + Ok(_) => { + let mut guard = output.lock().expect("output lock"); + guard.push_str(&line); + } + Err(_) => break, + } + } + }) +} + +fn wait_for_output( + child: &mut Child, + output: &Arc>, + needle: &str, + timeout: Duration, +) { + let started = Instant::now(); + loop { + if output.lock().expect("output lock").contains(needle) { + return; + } + + if let Some(status) = child.try_wait().expect("try_wait") { + let captured = output.lock().expect("output lock").clone(); + panic!( + "watch process exited early with status {status} while waiting for '{needle}'.\n{captured}" + ); + } + + if started.elapsed() > timeout { + let captured = output.lock().expect("output lock").clone(); + panic!("timed out waiting for '{needle}'.\n{captured}"); + } + + thread::sleep(Duration::from_millis(100)); + } +} + fn collect_runners(config: &FixtureConfig) -> Vec> { if let Some(runners) = config.runners.as_ref() { return runners @@ -214,6 +556,10 @@ fn needs_bun(runtime: &str, runner: Option<&str>) -> bool { runtime == "bun" || runner == Some("bun") } +fn needs_deno(runtime: &str, runner: Option<&str>) -> bool { + runtime == "deno" || runner == Some("deno") +} + fn required_runtimes() -> BTreeSet { parse_runtime_list("BT_EVAL_REQUIRED_RUNTIMES") } @@ -244,6 +590,10 @@ fn enforce_required_runtimes(fixtures_root: &Path) { panic!("bun runtime is required but not installed"); } + if required.contains("deno") && !command_exists("deno") { + panic!("deno runtime is required but not installed"); + } + if required.contains("python") { let python = ensure_python_env(&fixtures_root.join("py")) .expect("python runtime is required but uv/python is unavailable"); diff --git a/tests/evals/js/eval-deno/fixture.json b/tests/evals/js/eval-deno/fixture.json new file mode 100644 index 0000000..65802f3 --- /dev/null +++ b/tests/evals/js/eval-deno/fixture.json @@ -0,0 +1,5 @@ +{ + "runtime": "deno", + "runner": "deno", + "files": ["tests/basic.eval.ts"] +} diff --git a/tests/evals/js/eval-deno/package.json b/tests/evals/js/eval-deno/package.json new file mode 100644 index 0000000..5694110 --- /dev/null +++ b/tests/evals/js/eval-deno/package.json @@ -0,0 +1,8 @@ +{ + "name": "bt-eval-deno", + "private": true, + "type": "module", + "dependencies": { + "braintrust": "^2.2.0" + } +} diff --git a/tests/evals/js/eval-deno/tests/basic.eval.ts b/tests/evals/js/eval-deno/tests/basic.eval.ts new file mode 100644 index 0000000..8926b3a --- /dev/null +++ b/tests/evals/js/eval-deno/tests/basic.eval.ts @@ -0,0 +1,20 @@ +import { Eval } from "braintrust"; +import { cases } from "./helper.ts"; + +const exactMatch = ({ + output, + expected, +}: { + output: string; + expected?: string; +}) => ({ + name: "exact_match", + score: output === expected ? 1 : 0, +}); + +Eval("test-cli-eval-deno", { + experimentName: "Deno basic eval", + data: cases, + task: async (input: string) => `Hello ${input}`, + scores: [exactMatch], +}); diff --git a/tests/evals/js/eval-deno/tests/helper.ts b/tests/evals/js/eval-deno/tests/helper.ts new file mode 100644 index 0000000..1bf7dcb --- /dev/null +++ b/tests/evals/js/eval-deno/tests/helper.ts @@ -0,0 +1,6 @@ +export function cases() { + return [ + { input: "Ada", expected: "Hello Ada" }, + { input: "Ben", expected: "Hello Ben" }, + ]; +}