braintrustdata · ankrgyl · Feb 9, 2026 · Feb 8, 2026 · Feb 8, 2026 · Feb 8, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -16,8 +16,8 @@ jobs:
       contents: read
     env:
       CARGO_NET_GIT_FETCH_WITH_CLI: true
-      BT_EVAL_REQUIRED_RUNTIMES: node,bun
-      BT_EVAL_FIXTURE_RUNTIMES: node,bun
+      BT_EVAL_REQUIRED_RUNTIMES: node,bun,deno
+      BT_EVAL_FIXTURE_RUNTIMES: node,bun,deno
     steps:
       - uses: actions/checkout@v4
       - name: Configure git auth for private dependencies
@@ -35,6 +35,9 @@ jobs:
         with:
           node-version: ${{ matrix.node-version }}
       - uses: oven-sh/setup-bun@v2
+      - uses: denoland/setup-deno@v2
+        with:
+          deno-version: v2.x
       - name: Enable pnpm
         run: |
           corepack enable

diff --git a/.gitignore b/.gitignore
@@ -18,5 +18,6 @@ Thumbs.db
 # Node
 node_modules/
 tests/evals/js/eval-bun/test-data.txt
+.bt/
 
 __pycache__
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/eval-todo.md b/eval-todo.md
@@ -27,26 +27,26 @@ Legend:
 - `partial`: implemented but behavior differs
 - `todo`: missing
 
-| Feature / Flag              |           JS CLI |           PY CLI |                                       `bt` | Notes                                                                               |
-| --------------------------- | ---------------: | ---------------: | -----------------------------------------: | ----------------------------------------------------------------------------------- |
-| Run eval files              |              yes |              yes |                                     `done` | Single-language per invocation currently enforced.                                  |
-| Local/no-upload mode        | `--no-send-logs` | `--no-send-logs` | `done` (`--local`, alias `--no-send-logs`) |                                                                                     |
-| Global auth/env passthrough |              yes |              yes |                                     `done` | Via base args/env (`BRAINTRUST_API_KEY`, `BRAINTRUST_API_URL`, project).            |
-| Progress rendering          |              yes |              yes |                                  `partial` | `bt` consumes local SSE and renders Rust TUI/progress, but not full SDK parity yet. |
-| `--list` (discover only)    |              yes |              yes |                                     `todo` |                                                                                     |
-| `--filter`                  |              yes |              yes |                                     `todo` |                                                                                     |
-| `--jsonl` summaries         |              yes |              yes |                                     `todo` |                                                                                     |
-| `--terminate-on-failure`    |              yes |              yes |                                     `todo` |                                                                                     |
-| `--watch`                   |              yes |              yes |                                     `todo` |                                                                                     |
-| `--verbose`                 |              yes |      parent flag |                                     `todo` |                                                                                     |
-| `--env-file`                |              yes |              yes |                                     `todo` |                                                                                     |
-| `--dev` remote eval server  |              yes |              yes |                                     `todo` | Important for `test_remote_evals.py` parity.                                        |
-| `--dev-host`                |              yes |              yes |                                     `todo` |                                                                                     |
-| `--dev-port`                |              yes |              yes |                                     `todo` |                                                                                     |
-| `--dev-org-name`            |              yes |              yes |                                     `todo` |                                                                                     |
-| `--num-workers`             |              n/a |              yes |                                     `todo` | Python-specific concurrency control.                                                |
-| Directory input expansion   |              yes |              yes |                                     `todo` | Today `bt` expects explicit files/extensions.                                       |
-| Mixed runtime selection     |              n/a |              n/a |                                  `partial` | Current `--runner` plus env vars; per-language runner matrix deferred.              |
+| Feature / Flag              |           JS CLI |           PY CLI |                                       `bt` | Notes                                                                                                    |
+| --------------------------- | ---------------: | ---------------: | -----------------------------------------: | -------------------------------------------------------------------------------------------------------- |
+| Run eval files              |              yes |              yes |                                     `done` | Single-language per invocation currently enforced.                                                       |
+| Local/no-upload mode        | `--no-send-logs` | `--no-send-logs` | `done` (`--local`, alias `--no-send-logs`) |                                                                                                          |
+| Global auth/env passthrough |              yes |              yes |                                     `done` | Via base args/env (`BRAINTRUST_API_KEY`, `BRAINTRUST_API_URL`, project).                                 |
+| Progress rendering          |              yes |              yes |                                  `partial` | `bt` consumes local SSE and renders Rust TUI/progress, but not full SDK parity yet.                      |
+| `--list` (discover only)    |              yes |              yes |                                     `todo` |                                                                                                          |
+| `--filter`                  |              yes |              yes |                                     `todo` |                                                                                                          |
+| `--jsonl` summaries         |              yes |              yes |                                     `todo` |                                                                                                          |
+| `--terminate-on-failure`    |              yes |              yes |                                     `todo` |                                                                                                          |
+| `--watch`                   |              yes |              yes |                                  `partial` | Poll-based watcher with Node/Bun dependency hooks, Deno graph collection, and static JS import fallback. |
+| `--verbose`                 |              yes |      parent flag |                                     `todo` |                                                                                                          |
+| `--env-file`                |              yes |              yes |                                     `todo` |                                                                                                          |
+| `--dev` remote eval server  |              yes |              yes |                                     `todo` | Important for `test_remote_evals.py` parity.                                                             |
+| `--dev-host`                |              yes |              yes |                                     `todo` |                                                                                                          |
+| `--dev-port`                |              yes |              yes |                                     `todo` |                                                                                                          |
+| `--dev-org-name`            |              yes |              yes |                                     `todo` |                                                                                                          |
+| `--num-workers`             |              n/a |              yes |                                     `todo` | Python-specific concurrency control.                                                                     |
+| Directory input expansion   |              yes |              yes |                                     `todo` | Today `bt` expects explicit files/extensions.                                                            |
+| Mixed runtime selection     |              n/a |              n/a |                                  `partial` | Current `--runner` plus env vars; per-language runner matrix deferred.                                   |
 
 ## Braintrust Test Callsite Inventory
 

diff --git a/scripts/eval-runner.py b/scripts/eval-runner.py
@@ -33,6 +33,14 @@
 
 INCLUDE = ["**/eval_*.py", "**/*.eval.py"]
 EXCLUDE = ["**/site-packages/**", "**/__pycache__/**"]
+WATCHABLE_PYTHON_EXTENSIONS = {".py"}
+WATCH_EXCLUDE_SEGMENTS = (
+    "/site-packages/",
+    "/dist-packages/",
+    "/__pycache__/",
+    "/.venv/",
+    "/venv/",
+)
 
 
 @dataclass
@@ -155,6 +163,41 @@ def collect_files(input_path: str) -> list[str]:
     return [input_path]
 
 
+def is_watchable_dependency(path_input: str, cwd: str) -> bool:
+    path = os.path.abspath(path_input)
+    normalized = path.replace("\\", "/")
+    if not os.path.isfile(path):
+        return False
+    if os.path.splitext(path)[1].lower() not in WATCHABLE_PYTHON_EXTENSIONS:
+        return False
+    if any(segment in normalized for segment in WATCH_EXCLUDE_SEGMENTS):
+        return False
+
+    try:
+        common = os.path.commonpath([path, cwd])
+    except ValueError:
+        return False
+    return common == cwd
+
+
+def collect_dependency_files(cwd: str, input_files: list[str]) -> list[str]:
+    dependencies: set[str] = set()
+    for module in list(sys.modules.values()):
+        module_file = getattr(module, "__file__", None)
+        if not module_file:
+            continue
+        candidate = module_file[:-1] if module_file.endswith(".pyc") else module_file
+        if is_watchable_dependency(candidate, cwd):
+            dependencies.add(os.path.abspath(candidate))
+
+    for file_path in input_files:
+        path = os.path.abspath(file_path)
+        if is_watchable_dependency(path, cwd):
+            dependencies.add(path)
+
+    return sorted(dependencies)
+
+
 def resolve_module_info(in_file: str) -> tuple[str, list[str]]:
     in_file = os.path.abspath(in_file)
     module_dir = os.path.dirname(in_file)
@@ -356,9 +399,11 @@ def main(argv: list[str] | None = None) -> int:
         login(api_key=args.api_key, org_name=args.org_name, app_url=args.app_url)
 
     sse = create_sse_writer()
+    cwd = os.path.abspath(os.getcwd())
     try:
         success = asyncio.run(run_once(files, local, sse))
         if sse:
+            sse.send("dependencies", {"files": collect_dependency_files(cwd, files)})
             sse.send("done", {"success": success})
         return 0 if success else 1
     finally: