diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6300b5151..5b4678725 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,14 @@ since = "849762245925cce325c04da1d604088370ec3723"
 
 ## Unreleased (v0.8.4)
 
-- TBD
+- feat(gambit): add `createDefaultedRuntime` and defaulted `runDeck` wrapper
+  with CLI-equivalent provider/model routing for library callers
+- refactor(gambit): route CLI runtime/provider setup through shared
+  `default_runtime` construction path
+- feat(demo-runner): migrate demo test-deck prompt generation to Gambit default
+  runtime wrapper (no hardwired OpenRouter provider)
+- docs(gambit): add migration guidance for `runDeck` wrapper and `runDeckCore`
+  replacement mapping
 
 ## v0.8.3
 
diff --git a/README.md b/README.md
index c17a015a7..97a64b201 100644
--- a/README.md
+++ b/README.md
@@ -100,10 +100,10 @@ Drop into a REPL (streams by default):
 npx @bolt-foundry/gambit repl <deck>
 ```
 
-Run a persona against a root deck (test bot):
+Run a persona against a root deck (scenario):
 
 ```
-npx @bolt-foundry/gambit test-bot <root-deck> --test-deck <persona-deck>
+npx @bolt-foundry/gambit scenario <root-deck> --test-deck <persona-deck>
 ```
 
 Grade a saved session:
@@ -124,6 +124,23 @@ Tracing and state: 
 `--verbose` to print events\
 `--state <file>` to persist a session.
 
+### Worker sandbox defaults
+
+- Deck-executing CLI surfaces default to worker sandbox execution.
+- Use `--no-worker-sandbox` (or `--legacy-exec`) to force legacy in-process
+  execution.
+- `--worker-sandbox` explicitly forces worker execution on.
+- `--sandbox` / `--no-sandbox` are deprecated aliases.
+- `gambit.toml` equivalent:
+  ```toml
+  [execution]
+  worker_sandbox = false # same as --no-worker-sandbox
+  # legacy_exec = true    # equivalent rollback toggle
+  ```
+
+The npm launcher (`npx @bolt-foundry/gambit ...`) runs the Gambit CLI binary for
+your platform, so these defaults and flags apply there as well.
+
 ## Using the Simulator
 
 The simulator is the local Debug UI that streams runs and renders traces.
@@ -173,6 +190,59 @@ Define `contextSchema`/`responseSchema` with Zod to validate IO, and implement\
 `ctx.spawnAndWait({ path, input })`. Emit structured trace events with\
 `ctx.log(...)`.
 
+### Runtime defaults for programmatic `runDeck`
+
+`runDeck` from `@bolt-foundry/gambit` now uses CLI-equivalent provider/model
+defaults (alias expansion, provider routing, fallback behavior).
+
+Before (direct-provider setup in each caller):
+
+```ts
+import { createOpenRouterProvider, runDeck } from "jsr:@bolt-foundry/gambit";
+
+const provider = createOpenRouterProvider({
+  apiKey: Deno.env.get("OPENROUTER_API_KEY")!,
+});
+await runDeck({
+  path: "./root.deck.md",
+  input: { message: "hi" },
+  modelProvider: provider,
+});
+```
+
+After (defaulted wrapper):
+
+```ts
+import { runDeck } from "jsr:@bolt-foundry/gambit";
+
+await runDeck({
+  path: "./root.deck.md",
+  input: { message: "hi" },
+});
+```
+
+Per-runtime override (shared runtime object):
+
+```ts
+import { createDefaultedRuntime, runDeck } from "jsr:@bolt-foundry/gambit";
+
+const runtime = await createDefaultedRuntime({
+  fallbackProvider: "codex-cli",
+});
+
+await runDeck({
+  runtime,
+  path: "./root.deck.md",
+  input: { message: "hi" },
+});
+```
+
+Replacement mapping:
+
+- Legacy direct core passthrough export: `runDeck` -> `runDeckCore`
+- Defaulted wrapper export: `runDeck`
+- Runtime builder: `createDefaultedRuntime`
+
 ---
 
 ## Author your first deck
@@ -271,8 +341,8 @@ npx @bolt-foundry/gambit serve ./examples/respond_flow/decks/root.deck.ts --port
 Then:
 
 1. Open `http://localhost:8000/test`, pick the **Escalation persona**, and run
-   it. Leave the “Use test deck input for init” toggle on to see persona data
-   seed the init form automatically.
+   it. Leave the “Use scenario deck input for init” toggle on to see persona
+   data seed the init form automatically.
 2. Switch to the Debug tab to inspect the session—the child deck emits a
    `gambit_respond` payload that now shows up as a structured assistant turn.
 3. Head to the Calibrate tab and run the **Respond payload grader** to exercise
diff --git a/deno.jsonc b/deno.jsonc
index c33e43626..2a222e9eb 100644
--- a/deno.jsonc
+++ b/deno.jsonc
@@ -25,7 +25,7 @@
     "bundle:sim:sourcemap": "deno run -A scripts/bundle_simulator_ui.ts --sourcemap=external",
     "bundle:sim:web": "deno run -A scripts/bundle_simulator_ui.ts --platform=browser",
     "bundle:sim:web:sourcemap": "deno run -A scripts/bundle_simulator_ui.ts --platform=browser --sourcemap=external",
-    "serve:bot": "mkdir -p /tmp/gambit-bot-root && GAMBIT_BOT_ROOT=/tmp/gambit-bot-root deno run -A src/cli.ts serve src/decks/gambit-bot/PROMPT.md --bundle --port 8000",
+    "serve:bot": "mkdir -p /tmp/gambit-bot-root && GAMBIT_SIMULATOR_BUILD_BOT_ROOT=/tmp/gambit-bot-root GAMBIT_BOT_ROOT=/tmp/gambit-bot-root deno run -A src/cli.ts serve src/decks/gambit-bot/PROMPT.md --bundle --port 8000",
     "serve:bot:sandbox": "deno run -A scripts/serve_bot_sandbox.ts",
     "build_npm": "deno run -A scripts/build_npm.ts"
   },
diff --git a/docs/external/concepts/runtime.md b/docs/external/concepts/runtime.md
index e16251fb2..613f6e614 100644
--- a/docs/external/concepts/runtime.md
+++ b/docs/external/concepts/runtime.md
@@ -31,7 +31,7 @@ safe/observable.
 - `gambit_end`: enable with `![end](gambit://cards/end.card.md)` in Markdown (or
   `allowEnd: true` in TypeScript decks). Calling it returns a sentinel
   `{ __gambitEnd: true, payload?, status?, message?, code?, meta? }` so
-  CLI/test-bot loops stop reinjecting the closing assistant turn.
+  CLI/scenario loops stop reinjecting the closing assistant turn.
 
 ## State and turn order
 
diff --git a/docs/external/guides/authoring.md b/docs/external/guides/authoring.md
index 9f1790c11..18c6d0bab 100644
--- a/docs/external/guides/authoring.md
+++ b/docs/external/guides/authoring.md
@@ -12,10 +12,10 @@ verification.
   references (action/test/grader) and schema fragments into the parent deck.
 - Action decks are child decks exposed as model tools. Names must match
   `^[A-Za-z_][A-Za-z0-9_]*$` and avoid the `gambit_` prefix (reserved).
-- Persona/test decks may accept free-form user turns. Use the `acceptsUserTurns`
-  flag to control this behavior: root decks default to `true`, while action
-  decks default to `false`. Set it explicitly to `true` for persona/bot decks or
-  to `false` for workflow-only decks.
+- Persona/scenario decks may accept free-form user turns. Use the
+  `acceptsUserTurns` flag to control this behavior: root decks default to
+  `true`, while action decks default to `false`. Set it explicitly to `true` for
+  persona/bot decks or to `false` for workflow-only decks.
 
 ## Pick a format
 
@@ -77,7 +77,7 @@ migrate a repository, run:
 deno run -A packages/gambit/scripts/migrate-schema-terms.ts <repo-root>
 ```
 
-## Action decks, test decks, grader decks
+## Action decks, scenario decks, grader decks
 
 - Add action decks in front matter or TS definitions:
   `actionDecks = [{ name = "get_time", path = "./get_time.deck.ts" }]`.
@@ -101,10 +101,10 @@ deno run -A packages/gambit/scripts/migrate-schema-terms.ts <repo-root>
   should set `acceptsUserTurns = true` and may declare its own `contextSchema`
   (for example `contextSchema = "../schemas/my_persona_test.zod.ts"`) so the
   Test tab renders a schema-driven “Scenario” form for that persona.
-- For persona/test decks, you can embed
+- For persona/scenario decks, you can embed
   `![generate-test-input](gambit://cards/generate-test-input.card.md)` to
-  include the Test Bot init-fill contract instructions.
-- Test Bot init fill: when a Test Bot run is missing required init fields, the
+  include the scenario init-fill contract instructions.
+- Scenario init fill: when a scenario run is missing required init fields, the
   selected persona deck is asked to supply only the missing values before the
   run begins. The persona receives a single user message containing a JSON
   payload like:
@@ -133,8 +133,8 @@ deno run -A packages/gambit/scripts/migrate-schema-terms.ts <repo-root>
   - Markdown roots default to `true`; TypeScript decks default to `false`
     everywhere. Set it to `false` for any workflow deck that should never accept
     user turns (regardless of how it's run).
-  - Persona/test decks should set `acceptsUserTurns = true` so they can receive
-    messages even when invoked as non-root bots.
+  - Persona/scenario decks should set `acceptsUserTurns = true` so they can
+    receive messages even when invoked as non-root bots.
 
 ## Synthetic tools and handlers
 
@@ -170,7 +170,7 @@ deno run -A packages/gambit/scripts/migrate-schema-terms.ts <repo-root>
   http://localhost:8000/debug.
 - Tracing: add `--verbose` for console traces or `--trace out.jsonl` to persist
   events; use `--state state.json` with `run` to persist conversation state
-  between turns. When `--state` is omitted, test-bot/serve sessions default to
+  between turns. When `--state` is omitted, scenario/serve sessions default to
   `<project-root>/.gambit/sessions/...` where each session includes `state.json`
   (materialized snapshot) plus append-only `events.jsonl`, `feedback.jsonl`, and
   `grading.jsonl` for downstream ingestion. The project root is the nearest
diff --git a/docs/external/reference/cli.md b/docs/external/reference/cli.md
index 5223c9c16..023e75261 100644
--- a/docs/external/reference/cli.md
+++ b/docs/external/reference/cli.md
@@ -11,22 +11,22 @@ How to run Gambit, the agent harness framework, locally and observe runs.
   - Command help: `deno run -A src/cli.ts help <command>` (or
     `deno run -A src/cli.ts <command> -h`).
 - Run once:
-  `deno run -A src/cli.ts run <deck> [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--trace <file>] [--state <file>] [--stream] [--responses] [--verbose]`
+  `deno run -A src/cli.ts run <deck> [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--trace <file>] [--state <file>] [--stream] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]`
 - Check models: `deno run -A src/cli.ts check <deck>`
 - REPL: `deno run -A src/cli.ts repl <deck>` (defaults to
   `src/decks/gambit-assistant.deck.md` in a local checkout). Streams by default
   and keeps state in memory for the session.
-- Test bot (CLI):
-  `deno run -A src/cli.ts test-bot <root-deck> --test-deck <persona-deck> [--context <json|string>] [--bot-input <json|string>] [--message <json|string>] [--max-turns <n>] [--state <file>] [--grade <grader-deck> ...] [--trace <file>] [--responses] [--verbose]`
+- Scenario (CLI):
+  `deno run -A src/cli.ts scenario <root-deck> --test-deck <persona-deck> [--context <json|string>] [--bot-input <json|string>] [--message <json|string>] [--max-turns <n>] [--state <file>] [--grade <grader-deck> ...] [--trace <file>] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]`
 - Grade (CLI):
-  `deno run -A src/cli.ts grade <grader-deck> --state <file> [--model <id>] [--model-force <id>] [--trace <file>] [--responses] [--verbose]`
+  `deno run -A src/cli.ts grade <grader-deck> --state <file> [--model <id>] [--model-force <id>] [--trace <file>] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]`
 - Export bundle (CLI):
   `deno run -A src/cli.ts export [<deck>] --state <file> --out <bundle.tar.gz>`
 - Debug UI: `deno run -A src/cli.ts serve <deck> --port 8000` then open
   http://localhost:8000/. This serves a multi-page UI:
 
   - Debug (default): `http://localhost:8000/debug`
-  - Test: `http://localhost:8000/test-bot`
+  - Test: `http://localhost:8000/test`
   - Calibrate: `http://localhost:8000/calibrate`
 
   The WebSocket server streams turns, traces, and status updates.
@@ -46,15 +46,24 @@ How to run Gambit, the agent harness framework, locally and observe runs.
 - `GAMBIT_RESPONSES_MODE=1`: env alternative to `--responses` for runtime/state.
 - `GAMBIT_OPENROUTER_RESPONSES=1`: route OpenRouter calls through the Responses
   API (experimental; chat remains the default path).
+- Worker execution defaults on for deck-executing surfaces. Use
+  `--no-worker-sandbox` (or `--legacy-exec`) to roll back to legacy in-process
+  execution. `--sandbox/--no-sandbox` still work as deprecated aliases.
+- `gambit.toml` config equivalent:
+  ```toml
+  [execution]
+  worker_sandbox = false # same as --no-worker-sandbox
+  # legacy_exec = true    # equivalent rollback toggle
+  ```
 
 ## State and tracing
 
-- `--state <file>` (run/test-bot/grade/export): load/persist messages so you can
+- `--state <file>` (run/scenario/grade/export): load/persist messages so you can
   continue a conversation; skips `gambit_context` on resume. `grade` writes
   `meta.gradingRuns` back into the session state, while `export` reads the state
   file to build the bundle.
 - `--out <file>` (export): bundle output path (tar.gz).
-- `--grade <grader-deck>` (test-bot): can be repeated; graders run in the order
+- `--grade <grader-deck>` (scenario): can be repeated; graders run in the order
   provided and append results to `meta.gradingRuns` in the same session state
   file.
 - `--trace <file>` writes JSONL trace events; `--verbose` prints trace to
@@ -91,17 +100,17 @@ How to run Gambit, the agent harness framework, locally and observe runs.
   `window.gambitFormatTrace` hook in the page; return a string or
   `{role?, summary?, details?, depth?}` to override the entry that appears in
   the Traces & Tools pane.
-- The Test page reuses the same simulator runtime but drives persona/test-bot
+- The Test page reuses the same simulator runtime but drives persona/scenario
   decks so you can batch synthetic conversations, inspect per-turn scoring, and
   export JSONL artifacts for later ingestion. List personas by declaring
   `[[testDecks]]` entries in your root deck (for example
   `gambit/examples/advanced/voice_front_desk/decks/root.deck.md`). Each entry’s
   `path` should point to a persona deck (Markdown or TS) that includes
   `acceptsUserTurns = true`; the persona deck’s own `contextSchema` and defaults
-  power the Scenario/Test Bot form (see
+  power the Scenario form (see
   `gambit/examples/advanced/voice_front_desk/tests/new_patient_intake.deck.md`).
   Editing those deck files is how you add/remove personas now—there is no
-  `.gambit/test-bot.md` override.
+  `.gambit/scenario.md` override.
 - The Calibrate page is the regroup/diagnostics view for graders that run
   against saved Debug/Test sessions; it currently serves as a placeholder until
   the grading transport lands.
diff --git a/docs/external/reference/cli/commands/bot.md b/docs/external/reference/cli/commands/bot.md
index 61afa71aa..83ba442ec 100644
--- a/docs/external/reference/cli/commands/bot.md
+++ b/docs/external/reference/cli/commands/bot.md
@@ -1,12 +1,17 @@
 +++
 command = "bot"
 summary = "Run the Gambit bot assistant"
-usage = "gambit bot [<dir>] [--bot-root <dir>] [--model <id>] [--model-force <id>] [--responses] [--verbose]"
+usage = "gambit bot [<dir>] [--bot-root <dir>] [--model <id>] [--model-force <id>] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
 flags = [
   "--bot-root <dir>        Allowed folder for bot file writes (defaults to workspace.decks if set; overrides <dir>)",
   "--model <id>            Default model id",
   "--model-force <id>      Override model id",
   "--responses             Run runtime/state in Responses mode",
+  "--worker-sandbox        Force worker execution on",
+  "--no-worker-sandbox     Force worker execution off",
+  "--legacy-exec           Alias for --no-worker-sandbox",
+  "--sandbox               Deprecated alias for --worker-sandbox",
+  "--no-sandbox            Deprecated alias for --no-worker-sandbox",
   "--verbose               Print trace events to console",
 ]
 +++
diff --git a/docs/external/reference/cli/commands/grade.md b/docs/external/reference/cli/commands/grade.md
index 531bb12c7..6dab256d1 100644
--- a/docs/external/reference/cli/commands/grade.md
+++ b/docs/external/reference/cli/commands/grade.md
@@ -1,7 +1,7 @@
 +++
 command = "grade"
 summary = "Grade a saved state file"
-usage = "gambit grade <grader-deck.(ts|md)> --state <file> [--model <id>] [--model-force <id>] [--trace <file>] [--responses] [--verbose]"
+usage = "gambit grade <grader-deck.(ts|md)> --state <file> [--model <id>] [--model-force <id>] [--trace <file>] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
 flags = [
   "--grader <path>         Grader deck path (overrides positional)",
   "--state <file>          Load/persist state",
@@ -9,6 +9,11 @@ flags = [
   "--model-force <id>      Override model id",
   "--trace <file>          Write trace events to file (JSONL)",
   "--responses             Run runtime/state in Responses mode",
+  "--worker-sandbox        Force worker execution on",
+  "--no-worker-sandbox     Force worker execution off",
+  "--legacy-exec           Alias for --no-worker-sandbox",
+  "--sandbox               Deprecated alias for --worker-sandbox",
+  "--no-sandbox            Deprecated alias for --no-worker-sandbox",
   "--verbose               Print trace events to console",
 ]
 +++
diff --git a/docs/external/reference/cli/commands/repl.md b/docs/external/reference/cli/commands/repl.md
index 37426f039..fd557d713 100644
--- a/docs/external/reference/cli/commands/repl.md
+++ b/docs/external/reference/cli/commands/repl.md
@@ -1,7 +1,7 @@
 +++
 command = "repl"
 summary = "Start an interactive REPL"
-usage = "gambit repl <deck.(ts|md)> [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--responses] [--verbose]"
+usage = "gambit repl <deck.(ts|md)> [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--responses] [--verbose] [-A|--allow-all|--allow-<kind>] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
 flags = [
   "--context <json|string> Context payload (seeds gambit_context; legacy --init still works)",
   "--message <json|string> Initial user message (sent before assistant speaks)",
@@ -9,6 +9,17 @@ flags = [
   "--model-force <id>      Override model id",
   "--responses             Run runtime/state in Responses mode",
   "--verbose               Print trace events to console",
+  "-A, --allow-all         Allow all session permissions (read/write/run/net/env)",
+  "--allow-read[=<paths>]  Session read override (all when value omitted)",
+  "--allow-write[=<paths>] Session write override (all when value omitted)",
+  "--allow-run[=<entries>] Session run override (all when value omitted)",
+  "--allow-net[=<hosts>]   Session net override (all when value omitted)",
+  "--allow-env[=<names>]   Session env override (all when value omitted)",
+  "--worker-sandbox        Force worker execution on",
+  "--no-worker-sandbox     Force worker execution off",
+  "--legacy-exec           Alias for --no-worker-sandbox",
+  "--sandbox               Deprecated alias for --worker-sandbox",
+  "--no-sandbox            Deprecated alias for --no-worker-sandbox",
 ]
 +++
 
diff --git a/docs/external/reference/cli/commands/run.md b/docs/external/reference/cli/commands/run.md
index 1ce8e84e9..e148ed428 100644
--- a/docs/external/reference/cli/commands/run.md
+++ b/docs/external/reference/cli/commands/run.md
@@ -1,7 +1,7 @@
 +++
 command = "run"
 summary = "Run a deck once"
-usage = "gambit run [<deck.(ts|md)>] [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--trace <file>] [--state <file>] [--stream] [--responses] [--verbose]"
+usage = "gambit run [<deck.(ts|md)>] [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--trace <file>] [--state <file>] [--stream] [--responses] [--verbose] [-A|--allow-all|--allow-<kind>] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
 flags = [
   "--context <json|string> Context payload (seeds gambit_context; legacy --init still works)",
   "--message <json|string> Initial user message (sent before assistant speaks)",
@@ -12,6 +12,17 @@ flags = [
   "--stream                Enable streaming responses",
   "--responses             Run runtime/state in Responses mode",
   "--verbose               Print trace events to console",
+  "-A, --allow-all         Allow all session permissions (read/write/run/net/env)",
+  "--allow-read[=<paths>]  Session read override (all when value omitted)",
+  "--allow-write[=<paths>] Session write override (all when value omitted)",
+  "--allow-run[=<entries>] Session run override (all when value omitted)",
+  "--allow-net[=<hosts>]   Session net override (all when value omitted)",
+  "--allow-env[=<names>]   Session env override (all when value omitted)",
+  "--worker-sandbox        Force worker execution on",
+  "--no-worker-sandbox     Force worker execution off",
+  "--legacy-exec           Alias for --no-worker-sandbox",
+  "--sandbox               Deprecated alias for --worker-sandbox",
+  "--no-sandbox            Deprecated alias for --no-worker-sandbox",
 ]
 +++
 
diff --git a/docs/external/reference/cli/commands/test-bot.md b/docs/external/reference/cli/commands/scenario.md
similarity index 55%
rename from docs/external/reference/cli/commands/test-bot.md
rename to docs/external/reference/cli/commands/scenario.md
index 7b54dd238..1e8a224ba 100644
--- a/docs/external/reference/cli/commands/test-bot.md
+++ b/docs/external/reference/cli/commands/scenario.md
@@ -1,25 +1,30 @@
 +++
-command = "test-bot"
-summary = "Run a persona/test-bot loop"
-usage = "gambit test-bot <root-deck.(ts|md)> --test-deck <persona-deck.(ts|md)> [--context <json|string>] [--bot-input <json|string>] [--message <json|string>] [--max-turns <n>] [--state <file>] [--grade <grader-deck.(ts|md)> ...] [--trace <file>] [--responses] [--verbose]"
+command = "scenario"
+summary = "Run a scenario loop with a persona deck"
+usage = "gambit scenario <root-deck.(ts|md)> --test-deck <persona-deck.(ts|md)> [--context <json|string>] [--bot-input <json|string>] [--message <json|string>] [--max-turns <n>] [--state <file>] [--grade <grader-deck.(ts|md)> ...] [--trace <file>] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
 flags = [
-  "--test-deck <path>      Persona/test deck path",
+  "--test-deck <path>      Persona/scenario deck path",
   "--grade <path>          Grader deck path (repeatable)",
   "--context <json|string> Context payload (seeds gambit_context; legacy --init still works)",
-  "--bot-input <json|string> Input payload for the persona deck",
+  "--bot-input <json|string> Input payload for the persona/scenario deck",
   "--message <json|string> Initial user message (sent before assistant speaks)",
-  "--max-turns <n>         Max turns for test-bot (default: 12)",
+  "--max-turns <n>         Max turns for scenario loop (default: 12)",
   "--state <file>          Load/persist state",
   "--model <id>            Default model id",
   "--model-force <id>      Override model id",
   "--trace <file>          Write trace events to file (JSONL)",
   "--responses             Run runtime/state in Responses mode",
+  "--worker-sandbox        Force worker execution on",
+  "--no-worker-sandbox     Force worker execution off",
+  "--legacy-exec           Alias for --no-worker-sandbox",
+  "--sandbox               Deprecated alias for --worker-sandbox",
+  "--no-sandbox            Deprecated alias for --no-worker-sandbox",
   "--verbose               Print trace events to console",
 ]
 +++
 
-Runs a persona deck against a root deck to simulate conversations. Repeat
-`--grade` to apply multiple graders.
+Runs a persona deck against a root deck to execute a scenario. Repeat `--grade`
+to apply multiple graders.
 
 If the root deck has required init fields that are missing, the persona deck is
 asked to return JSON for only the missing fields before the run starts. The
diff --git a/docs/external/reference/cli/commands/serve.md b/docs/external/reference/cli/commands/serve.md
index ce605d6d7..1d3165106 100644
--- a/docs/external/reference/cli/commands/serve.md
+++ b/docs/external/reference/cli/commands/serve.md
@@ -1,7 +1,7 @@
 +++
 command = "serve"
 summary = "Run the debug UI server"
-usage = "gambit serve [<deck.(ts|md)>] [--model <id>] [--model-force <id>] [--port <n>] [--responses] [--verbose] [--watch] [--no-bundle] [--no-sourcemap]"
+usage = "gambit serve [<deck.(ts|md)>] [--model <id>] [--model-force <id>] [--port <n>] [--responses] [--verbose] [--watch] [--no-bundle] [--no-sourcemap] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
 flags = [
   "--model <id>            Default model id",
   "--model-force <id>      Override model id",
@@ -13,12 +13,16 @@ flags = [
   "--sourcemap             Generate external source maps (serve; default in dev)",
   "--no-sourcemap          Disable source map generation (serve)",
   "--platform <platform>   Bundle target platform: deno (default) or web (browser)",
+  "--worker-sandbox        Force worker execution on",
+  "--no-worker-sandbox     Force worker execution off",
+  "--legacy-exec           Alias for --no-worker-sandbox",
+  "--sandbox               Deprecated alias for --worker-sandbox",
+  "--no-sandbox            Deprecated alias for --no-worker-sandbox",
   "--verbose               Print trace events to console",
 ]
 +++
 
 Starts the debug UI server (default at `http://localhost:8000/`).
 
-If no deck path is provided, Gambit creates a new workspace scaffold (root
-`PROMPT.md`, `INTENT.md`, plus default scenario/grader decks) and opens the
-simulator UI in workspace onboarding mode.
+If no deck path is provided, Gambit uses `./PROMPT.md`. If `./PROMPT.md` does
+not exist, Gambit creates a minimal `PROMPT.md` and serves it.
diff --git a/examples/dev/simpsons_explainer_notest/README.md b/examples/dev/simpsons_explainer_notest/README.md
index 471f07d56..171c94556 100644
--- a/examples/dev/simpsons_explainer_notest/README.md
+++ b/examples/dev/simpsons_explainer_notest/README.md
@@ -1,6 +1,7 @@
 # simpsons_explainer_notest
 
-Local dev example for testing the Test tab when no test decks are configured.
+Local dev example for testing the Test tab when no scenario decks are
+configured.
 
 ## Prereqs
 
diff --git a/examples/dev/simpsons_explainer_notest/schemas/test_bot_input.zod.ts b/examples/dev/simpsons_explainer_notest/schemas/test_bot_input.zod.ts
index 0bef4deae..38a9efa9c 100644
--- a/examples/dev/simpsons_explainer_notest/schemas/test_bot_input.zod.ts
+++ b/examples/dev/simpsons_explainer_notest/schemas/test_bot_input.zod.ts
@@ -2,6 +2,6 @@ import { z } from "npm:zod";
 
 export default z.object({
   initialQuestion: z.string().describe(
-    "Optional override for the test bot's first user question.",
+    "Optional override for the scenario's first user question.",
   ).optional(),
 });
diff --git a/examples/dev/simpsons_explainer_user/README.md b/examples/dev/simpsons_explainer_user/README.md
index 33415b5e2..4b2ac7ceb 100644
--- a/examples/dev/simpsons_explainer_user/README.md
+++ b/examples/dev/simpsons_explainer_user/README.md
@@ -1,6 +1,6 @@
 # simpsons_explainer_user
 
-Local dev example with test decks enabled and `startMode = "user"`.
+Local dev example with scenario decks enabled and `startMode = "user"`.
 
 ## Prereqs
 
@@ -21,15 +21,15 @@ From this folder:
 deno run -A jsr:@bolt-foundry/gambit@^0.8.3/cli serve root.deck.md
 ```
 
-## Run a test bot (UI)
+## Run a scenario (UI)
 
 1. Open the simulator UI and go to the "Test" tab.
-2. Click "Run test bot".
+2. Click "Run scenario".
 3. Use the composer to send the first user message (start mode is user).
 4. Optionally switch to the "Grade" tab and click "Run grader".
 
 ## Notes
 
-- Test decks are registered in `cards/test_decks.card.md`.
+- Scenario decks are registered in `cards/test_decks.card.md`.
 - Graders are registered in `cards/grader_decks.card.md`.
 - Instruction cards live in `cards/` and are included by `root.deck.md`.
diff --git a/examples/dev/simpsons_explainer_user/cards/test_decks.card.md b/examples/dev/simpsons_explainer_user/cards/test_decks.card.md
index 6b5bc8065..8c9cc4976 100644
--- a/examples/dev/simpsons_explainer_user/cards/test_decks.card.md
+++ b/examples/dev/simpsons_explainer_user/cards/test_decks.card.md
@@ -4,10 +4,10 @@ label = "demo_test_decks"
 [[testDecks]]
 label = "Planets question"
 path = "../test_bots/planets.deck.md"
-description = "Test bot that asks a basic planets-orbit question, with optional initialQuestion override."
+description = "Scenario that asks a basic planets-orbit question, with optional initialQuestion override."
 
 [[testDecks]]
 label = "Quantum entanglement question"
 path = "../test_bots/quantum_entanglement.deck.md"
-description = "Test bot that asks about quantum entanglement, with optional initialQuestion override."
+description = "Scenario that asks about quantum entanglement, with optional initialQuestion override."
 +++
diff --git a/examples/dev/simpsons_explainer_user/schemas/test_bot_input.zod.ts b/examples/dev/simpsons_explainer_user/schemas/test_bot_input.zod.ts
index 0bef4deae..38a9efa9c 100644
--- a/examples/dev/simpsons_explainer_user/schemas/test_bot_input.zod.ts
+++ b/examples/dev/simpsons_explainer_user/schemas/test_bot_input.zod.ts
@@ -2,6 +2,6 @@ import { z } from "npm:zod";
 
 export default z.object({
   initialQuestion: z.string().describe(
-    "Optional override for the test bot's first user question.",
+    "Optional override for the scenario's first user question.",
   ).optional(),
 });
diff --git a/examples/dev/simpsons_explainer_user_notest/README.md b/examples/dev/simpsons_explainer_user_notest/README.md
index 26bf3588d..b00002640 100644
--- a/examples/dev/simpsons_explainer_user_notest/README.md
+++ b/examples/dev/simpsons_explainer_user_notest/README.md
@@ -1,6 +1,6 @@
 # simpsons_explainer_user_notest
 
-Local dev example for testing the Test tab with no test decks configured and
+Local dev example for testing the Test tab with no scenario decks configured and
 `startMode = "user"`.
 
 ## Prereqs
diff --git a/examples/dev/simpsons_explainer_user_notest/schemas/test_bot_input.zod.ts b/examples/dev/simpsons_explainer_user_notest/schemas/test_bot_input.zod.ts
index 0bef4deae..38a9efa9c 100644
--- a/examples/dev/simpsons_explainer_user_notest/schemas/test_bot_input.zod.ts
+++ b/examples/dev/simpsons_explainer_user_notest/schemas/test_bot_input.zod.ts
@@ -2,6 +2,6 @@ import { z } from "npm:zod";
 
 export default z.object({
   initialQuestion: z.string().describe(
-    "Optional override for the test bot's first user question.",
+    "Optional override for the scenario's first user question.",
   ).optional(),
 });
diff --git a/mod.ts b/mod.ts
index bfea422c7..fa0663bbd 100644
--- a/mod.ts
+++ b/mod.ts
@@ -25,12 +25,27 @@ export type { Guardrails } from "@bolt-foundry/gambit-core";
 export type { JSONValue } from "@bolt-foundry/gambit-core";
 /** Model provider interface for LLM backends. */
 export type { ModelProvider } from "@bolt-foundry/gambit-core";
-/** Test deck definition shape. */
+/** Scenario deck definition shape. */
 export type { TestDeckDefinition } from "@bolt-foundry/gambit-core";
 /** Check if a value is an explicit end-of-run signal. */
 export { isGambitEndSignal } from "@bolt-foundry/gambit-core";
-/** Run a deck and return its execution result. */
-export { runDeck } from "@bolt-foundry/gambit-core";
+/** Check whether an error represents runtime cancellation. */
+export { isRunCanceledError } from "@bolt-foundry/gambit-core";
+/** Build a runtime with CLI-equivalent provider defaults and routing. */
+export { createDefaultedRuntime } from "./src/default_runtime.ts";
+/** Runtime defaults/options for the `runDeck` wrapper. */
+export type {
+  CreateDefaultedRuntimeOptions,
+  DefaultedRuntime,
+  DefaultedRuntimeRunOptions,
+  RunDeckWithDefaultsOptions,
+} from "./src/default_runtime.ts";
+/** Session artifact persistence config for default runtime runs. */
+export type { SessionArtifactsConfig } from "./src/session_artifacts.ts";
+/** Run a deck with default provider/model/runtime behavior. */
+export { runDeck } from "./src/default_runtime.ts";
+/** Run a deck directly through gambit-core without gambit defaults. */
+export { runDeck as runDeckCore } from "@bolt-foundry/gambit-core";
 /** Signal for explicitly ending a Gambit run. */
 export type { GambitEndSignal } from "@bolt-foundry/gambit-core";
 /** OpenAI Chat Completions compatibility helper for a deck. */
diff --git a/packages/gambit-core/README.md b/packages/gambit-core/README.md
index f149ff342..ae5e26840 100644
--- a/packages/gambit-core/README.md
+++ b/packages/gambit-core/README.md
@@ -112,6 +112,14 @@ export default defineCard({
 });
 ```
 
+For built-in Gambit schemas in TypeScript/compute decks, use canonical module
+subpaths:
+
+```
+import contextSchema from "@bolt-foundry/gambit-core/schemas/scenarios/plain_chat_input_optional.zod.ts";
+import responseSchema from "@bolt-foundry/gambit-core/schemas/scenarios/plain_chat_output.zod.ts";
+```
+
 ## Running decks programmatically
 
 The runtime loads the deck (Markdown or TS) and steps through each pass. Provide
@@ -145,7 +153,13 @@ When the deck defines `run`/`execute`, the runtime hands you an
 [`ExecutionContext`](src/types.ts) with:
 
 - `ctx.input`: validated input (narrowable when you type the schema).
-- `ctx.spawnAndWait({ path, input })`: call another deck and await the result.
+- `ctx.initialUserMessage`: current turn user message when provided by caller.
+- `ctx.getSessionMeta(key)`: read persisted run/session metadata.
+- `ctx.setSessionMeta(key, value)`: persist metadata for later turns.
+- `ctx.appendMessage({ role, content })`: append chat transcript messages from
+  execute decks.
+- `ctx.spawnAndWait({ path, input, initialUserMessage? })`: call another deck
+  and await the result; user message is inherited by default unless overridden.
 - `ctx.return(payload)`: respond early without running guards again.
 - `ctx.fail({ message, code?, details? })`: aborts the run (throws).
 - `ctx.log(...)`: emit structured trace entries for observability.
@@ -153,6 +167,19 @@ When the deck defines `run`/`execute`, the runtime hands you an
 Pass `guardrails`, `initialUserMessage`, `modelOverride`, and
 `allowRootStringInput` to `runDeck` when scripting custom runtimes.
 
+### Worker sandbox behavior in `runDeck`
+
+`gambit-core` keeps worker sandboxing opt-in:
+
+- `runDeck` enables worker sandboxing only when `workerSandbox: true` is passed.
+- You can also opt in via `GAMBIT_DECK_WORKER_SANDBOX=1` (or `true` / `yes`).
+- If neither is set, `runDeck` executes without worker sandboxing by default.
+
+Why this is opt-in: `@bolt-foundry/gambit-core` is intended to run in multiple
+hosts (Node, Bun, Deno). Worker sandboxing relies on Deno-specific worker
+permission controls, so host apps must opt in when they run in an environment
+that supports it.
+
 ## Loading Markdown decks and cards
 
 Markdown files use front matter for metadata, with the body becoming the prompt.
diff --git a/packages/gambit-core/decks/anthropic/agent-sdk/PROMPT.md b/packages/gambit-core/decks/anthropic/agent-sdk/PROMPT.md
index cf5a9f35a..f88e199c5 100644
--- a/packages/gambit-core/decks/anthropic/agent-sdk/PROMPT.md
+++ b/packages/gambit-core/decks/anthropic/agent-sdk/PROMPT.md
@@ -1,5 +1,7 @@
 +++
 label = "Anthropic agent SDK bridge"
+contextSchema = "gambit://schemas/scenarios/plain_chat_input_optional.zod.ts"
+responseSchema = "gambit://schemas/scenarios/plain_chat_output.zod.ts"
 +++
 
 This stdlib deck provides the default bridge between the Anthropic agent SDK
diff --git a/packages/gambit-core/decks/openai/codex-sdk/PROMPT.md b/packages/gambit-core/decks/openai/codex-sdk/PROMPT.md
index 1a3ef6e8f..b7091d60e 100644
--- a/packages/gambit-core/decks/openai/codex-sdk/PROMPT.md
+++ b/packages/gambit-core/decks/openai/codex-sdk/PROMPT.md
@@ -1,5 +1,7 @@
 +++
 label = "Codex SDK bridge"
+contextSchema = "gambit://schemas/scenarios/plain_chat_input_optional.zod.ts"
+responseSchema = "gambit://schemas/scenarios/plain_chat_output.zod.ts"
 +++
 
 This stdlib deck provides the default bridge between the Codex SDK runtime and
diff --git a/packages/gambit-core/decks/openai/codex-sdk/codex_client.ts b/packages/gambit-core/decks/openai/codex-sdk/codex_client.ts
new file mode 100644
index 000000000..39e4c49aa
--- /dev/null
+++ b/packages/gambit-core/decks/openai/codex-sdk/codex_client.ts
@@ -0,0 +1,109 @@
+export type CodexTurnInput = {
+  userText: string;
+  threadId?: string;
+  systemPrompt?: string;
+};
+
+export type CodexTurnOutput = {
+  threadId: string;
+  assistantText: string;
+};
+
+type CodexEvent =
+  | { type: "thread.started"; thread_id?: unknown }
+  | {
+    type: "item.completed";
+    item?: { type?: unknown; text?: unknown };
+  }
+  | { type: string; [key: string]: unknown };
+
+function runCwd(): string {
+  const botRoot = Deno.env.get("GAMBIT_BOT_ROOT");
+  if (typeof botRoot === "string" && botRoot.trim().length > 0) {
+    return botRoot.trim();
+  }
+  return Deno.cwd();
+}
+
+function parseCodexEvents(stdout: string): {
+  threadId?: string;
+  assistantText?: string;
+} {
+  let threadId: string | undefined;
+  let assistantText: string | undefined;
+
+  for (const line of stdout.split(/\r?\n/)) {
+    const trimmed = line.trim();
+    if (!trimmed.startsWith("{")) continue;
+    let parsed: CodexEvent | null = null;
+    try {
+      parsed = JSON.parse(trimmed) as CodexEvent;
+    } catch {
+      continue;
+    }
+    if (!parsed || typeof parsed !== "object") continue;
+    if (parsed.type === "thread.started") {
+      if (typeof parsed.thread_id === "string" && parsed.thread_id.trim()) {
+        threadId = parsed.thread_id.trim();
+      }
+      continue;
+    }
+    if (parsed.type === "item.completed") {
+      const item = parsed.item;
+      if (!item || typeof item !== "object") continue;
+      const rec = item as Record<string, unknown>;
+      if (rec.type !== "agent_message") continue;
+      if (typeof rec.text !== "string") continue;
+      const next = rec.text.trim();
+      if (next) assistantText = next;
+    }
+  }
+
+  return { threadId, assistantText };
+}
+
+export async function sendCodexTurn(
+  input: CodexTurnInput,
+): Promise<CodexTurnOutput> {
+  const prompt = input.systemPrompt && input.systemPrompt.trim()
+    ? `${input.systemPrompt.trim()}\n\n${input.userText}`
+    : input.userText;
+
+  const args = input.threadId
+    ? [
+      "exec",
+      "resume",
+      "--skip-git-repo-check",
+      "--json",
+      input.threadId,
+      prompt,
+    ]
+    : ["exec", "--skip-git-repo-check", "--json", prompt];
+
+  const out = await new Deno.Command("codex", {
+    args,
+    cwd: runCwd(),
+    stdout: "piped",
+    stderr: "piped",
+  }).output();
+
+  const stdout = new TextDecoder().decode(out.stdout);
+  const stderr = new TextDecoder().decode(out.stderr);
+  if (!out.success) {
+    throw new Error(
+      `codex exec failed (exit ${out.code}): ${stderr.trim() || stdout.trim()}`,
+    );
+  }
+
+  const parsed = parseCodexEvents(stdout);
+  const threadId = parsed.threadId ?? input.threadId;
+  if (!threadId) {
+    throw new Error(
+      `codex exec succeeded but no thread id found in output: ${stdout.trim()}`,
+    );
+  }
+  return {
+    threadId,
+    assistantText: parsed.assistantText ?? "",
+  };
+}
diff --git a/packages/gambit-core/decks/openai/codex-sdk/codex_sdk_bridge.deck.ts b/packages/gambit-core/decks/openai/codex-sdk/codex_sdk_bridge.deck.ts
new file mode 100644
index 000000000..81da6f0e7
--- /dev/null
+++ b/packages/gambit-core/decks/openai/codex-sdk/codex_sdk_bridge.deck.ts
@@ -0,0 +1,36 @@
+import { defineDeck } from "../../../src/definitions.ts";
+import { z } from "zod";
+import { sendCodexTurn } from "./codex_client.ts";
+
+const CODEX_THREAD_META_KEY = "codex.threadId";
+
+export default defineDeck({
+  label: "codex_sdk_bridge",
+  contextSchema: z.string().optional(),
+  responseSchema: z.string(),
+  async run(ctx) {
+    const userText = typeof ctx.initialUserMessage === "string" &&
+        ctx.initialUserMessage.trim()
+      ? ctx.initialUserMessage.trim()
+      : typeof ctx.input === "string" && ctx.input.trim()
+      ? ctx.input.trim()
+      : "";
+
+    if (!userText) return "";
+
+    const priorThreadId = ctx.getSessionMeta<string>(CODEX_THREAD_META_KEY);
+    const systemPrompt = ctx.getSessionMeta<string>("codex.systemPrompt");
+
+    ctx.appendMessage({ role: "user", content: userText });
+
+    const result = await sendCodexTurn({
+      userText,
+      threadId: priorThreadId,
+      systemPrompt,
+    });
+
+    ctx.setSessionMeta(CODEX_THREAD_META_KEY, result.threadId);
+    ctx.appendMessage({ role: "assistant", content: result.assistantText });
+    return result.assistantText;
+  },
+});
diff --git a/packages/gambit-core/deno.json b/packages/gambit-core/deno.json
index 595f255ad..38715ceca 100644
--- a/packages/gambit-core/deno.json
+++ b/packages/gambit-core/deno.json
@@ -8,7 +8,39 @@
     "url": "git+https://github.com/bolt-foundry/gambit.git"
   },
   "exports": {
-    ".": "./mod.ts"
+    ".": "./mod.ts",
+    "./schemas/graders/respond.ts": "./schemas/graders/respond.ts",
+    "./schemas/graders/respond.zod.ts": "./schemas/graders/respond.zod.ts",
+    "./schemas/graders/grader_output.ts": "./schemas/graders/grader_output.ts",
+    "./schemas/graders/grader_output.zod.ts":
+      "./schemas/graders/grader_output.zod.ts",
+    "./schemas/graders/contexts/turn.ts": "./schemas/graders/contexts/turn.ts",
+    "./schemas/graders/contexts/turn.zod.ts":
+      "./schemas/graders/contexts/turn.zod.ts",
+    "./schemas/graders/contexts/turn_tools.ts":
+      "./schemas/graders/contexts/turn_tools.ts",
+    "./schemas/graders/contexts/turn_tools.zod.ts":
+      "./schemas/graders/contexts/turn_tools.zod.ts",
+    "./schemas/graders/contexts/conversation.ts":
+      "./schemas/graders/contexts/conversation.ts",
+    "./schemas/graders/contexts/conversation.zod.ts":
+      "./schemas/graders/contexts/conversation.zod.ts",
+    "./schemas/graders/contexts/conversation_tools.ts":
+      "./schemas/graders/contexts/conversation_tools.ts",
+    "./schemas/graders/contexts/conversation_tools.zod.ts":
+      "./schemas/graders/contexts/conversation_tools.zod.ts",
+    "./schemas/graders/contexts/tools.ts":
+      "./schemas/graders/contexts/tools.ts",
+    "./schemas/graders/contexts/tools.zod.ts":
+      "./schemas/graders/contexts/tools.zod.ts",
+    "./schemas/scenarios/plain_chat_input_optional.ts":
+      "./schemas/scenarios/plain_chat_input_optional.ts",
+    "./schemas/scenarios/plain_chat_input_optional.zod.ts":
+      "./schemas/scenarios/plain_chat_input_optional.zod.ts",
+    "./schemas/scenarios/plain_chat_output.ts":
+      "./schemas/scenarios/plain_chat_output.ts",
+    "./schemas/scenarios/plain_chat_output.zod.ts":
+      "./schemas/scenarios/plain_chat_output.zod.ts"
   },
   "tasks": {
     "fmt": "deno fmt",
diff --git a/packages/gambit-core/mod.ts b/packages/gambit-core/mod.ts
index e0d0f8e7e..0b05c1e8d 100644
--- a/packages/gambit-core/mod.ts
+++ b/packages/gambit-core/mod.ts
@@ -59,8 +59,12 @@ export {
 export { isGambitEndSignal } from "./src/runtime.ts";
 /** Run a deck and return its execution result. */
 export { runDeck } from "./src/runtime.ts";
+/** Cancellation error type surfaced when a run is aborted. */
+export { isRunCanceledError, RunCanceledError } from "./src/runtime.ts";
 /** Signal for explicitly ending a Gambit run. */
 export type { GambitEndSignal } from "./src/runtime.ts";
+/** Runtime run options accepted by `runDeck`. */
+export type { RunOptions } from "./src/runtime.ts";
 /** Default guardrail settings applied to deck runs. */
 export { DEFAULT_GUARDRAILS } from "./src/constants.ts";
 /** Reserved tool name prefix for Gambit tools. */
@@ -92,4 +96,4 @@ export type { ModelProvider } from "./src/types.ts";
 /** Tool definition passed to model providers. */
 export type { ToolDefinition } from "./src/types.ts";
 /** Trace events emitted during execution. */
-export type { TraceEvent } from "./src/types.ts";
+export type { ProviderTraceEvent, TraceEvent } from "./src/types.ts";
diff --git a/packages/gambit-core/schemas/graders/contexts/conversation.ts b/packages/gambit-core/schemas/graders/contexts/conversation.ts
index de05621ae..a8d5defbc 100644
--- a/packages/gambit-core/schemas/graders/contexts/conversation.ts
+++ b/packages/gambit-core/schemas/graders/contexts/conversation.ts
@@ -1,17 +1,40 @@
 import { z } from "zod";
 
-export const graderMessageSchema = z.object({
+type GraderMessage = {
+  role: string;
+  content?: unknown;
+  name?: string;
+};
+
+type GraderConversation = {
+  messages?: Array<GraderMessage>;
+  meta?: Record<string, unknown>;
+  notes?: {
+    text?: string;
+  };
+};
+
+type GraderConversationContext = {
+  session: GraderConversation;
+};
+
+export const graderMessageSchema: z.ZodType<GraderMessage> = z.object({
   role: z.string(),
   content: z.any().optional(),
   name: z.string().optional(),
 });
 
-export const graderConversationSchema = z.object({
-  messages: z.array(graderMessageSchema).optional(),
-  meta: z.record(z.any()).optional(),
-  notes: z.object({ text: z.string().optional() }).optional(),
-});
+export const graderConversationSchema: z.ZodType<GraderConversation> = z.object(
+  {
+    messages: z.array(graderMessageSchema).optional(),
+    meta: z.record(z.any()).optional(),
+    notes: z.object({ text: z.string().optional() }).optional(),
+  },
+);
 
-export default z.object({
-  session: graderConversationSchema,
-});
+const graderConversationContextSchema: z.ZodType<GraderConversationContext> = z
+  .object({
+    session: graderConversationSchema,
+  });
+
+export default graderConversationContextSchema;
diff --git a/packages/gambit-core/schemas/graders/contexts/conversation_tools.ts b/packages/gambit-core/schemas/graders/contexts/conversation_tools.ts
index a525e4248..7ef3479aa 100644
--- a/packages/gambit-core/schemas/graders/contexts/conversation_tools.ts
+++ b/packages/gambit-core/schemas/graders/contexts/conversation_tools.ts
@@ -1,6 +1,34 @@
 import { z } from "zod";
 
-const graderToolCallSchema = z.object({
+type GraderToolCall = {
+  id?: string;
+  type?: string;
+  function: {
+    name: string;
+    arguments?: string;
+  };
+};
+
+type GraderConversationMessageWithTools = {
+  role: string;
+  content?: unknown;
+  name?: string;
+  tool_calls?: Array<GraderToolCall>;
+};
+
+type GraderConversationWithTools = {
+  messages?: Array<GraderConversationMessageWithTools>;
+  meta?: Record<string, unknown>;
+  notes?: {
+    text?: string;
+  };
+};
+
+type GraderConversationToolsContext = {
+  session: GraderConversationWithTools;
+};
+
+const graderToolCallSchema: z.ZodType<GraderToolCall> = z.object({
   id: z.string().optional(),
   type: z.string().optional(),
   function: z.object({
@@ -9,19 +37,27 @@ const graderToolCallSchema = z.object({
   }),
 });
 
-export const graderConversationMessageWithToolsSchema = z.object({
+export const graderConversationMessageWithToolsSchema: z.ZodType<
+  GraderConversationMessageWithTools
+> = z.object({
   role: z.string(),
   content: z.any().optional(),
   name: z.string().optional(),
   tool_calls: z.array(graderToolCallSchema).optional(),
 });
 
-export const graderConversationWithToolsSchema = z.object({
+export const graderConversationWithToolsSchema: z.ZodType<
+  GraderConversationWithTools
+> = z.object({
   messages: z.array(graderConversationMessageWithToolsSchema).optional(),
   meta: z.record(z.any()).optional(),
   notes: z.object({ text: z.string().optional() }).optional(),
 });
 
-export default z.object({
+const graderConversationToolsContextSchema: z.ZodType<
+  GraderConversationToolsContext
+> = z.object({
   session: graderConversationWithToolsSchema,
 });
+
+export default graderConversationToolsContextSchema;
diff --git a/packages/gambit-core/schemas/graders/contexts/turn.ts b/packages/gambit-core/schemas/graders/contexts/turn.ts
index cf5d523e8..55646564f 100644
--- a/packages/gambit-core/schemas/graders/contexts/turn.ts
+++ b/packages/gambit-core/schemas/graders/contexts/turn.ts
@@ -4,7 +4,14 @@ import {
   graderMessageSchema,
 } from "./conversation.ts";
 
-export default z.object({
+type GraderTurnContext = {
+  session: z.infer<typeof graderConversationSchema>;
+  messageToGrade: z.infer<typeof graderMessageSchema>;
+};
+
+const graderTurnContextSchema: z.ZodType<GraderTurnContext> = z.object({
   session: graderConversationSchema,
   messageToGrade: graderMessageSchema,
 });
+
+export default graderTurnContextSchema;
diff --git a/packages/gambit-core/schemas/graders/contexts/turn_tools.ts b/packages/gambit-core/schemas/graders/contexts/turn_tools.ts
index 50b0e8f34..f555ab31b 100644
--- a/packages/gambit-core/schemas/graders/contexts/turn_tools.ts
+++ b/packages/gambit-core/schemas/graders/contexts/turn_tools.ts
@@ -1,6 +1,35 @@
 import { z } from "zod";
 
-const graderToolCallSchema = z.object({
+type GraderToolCall = {
+  id?: string;
+  type?: string;
+  function: {
+    name: string;
+    arguments?: string;
+  };
+};
+
+type GraderMessageWithTools = {
+  role: string;
+  content?: unknown;
+  name?: string;
+  tool_calls?: Array<GraderToolCall>;
+};
+
+type GraderConversationWithTools = {
+  messages?: Array<GraderMessageWithTools>;
+  meta?: Record<string, unknown>;
+  notes?: {
+    text?: string;
+  };
+};
+
+type GraderTurnToolsContext = {
+  session: GraderConversationWithTools;
+  messageToGrade: GraderMessageWithTools;
+};
+
+const graderToolCallSchema: z.ZodType<GraderToolCall> = z.object({
   id: z.string().optional(),
   type: z.string().optional(),
   function: z.object({
@@ -9,20 +38,26 @@ const graderToolCallSchema = z.object({
   }),
 });
 
-export const graderMessageWithToolsSchema = z.object({
-  role: z.string(),
-  content: z.any().optional(),
-  name: z.string().optional(),
-  tool_calls: z.array(graderToolCallSchema).optional(),
-});
+export const graderMessageWithToolsSchema: z.ZodType<GraderMessageWithTools> = z
+  .object({
+    role: z.string(),
+    content: z.any().optional(),
+    name: z.string().optional(),
+    tool_calls: z.array(graderToolCallSchema).optional(),
+  });
 
-export const graderConversationWithToolsSchema = z.object({
+export const graderConversationWithToolsSchema: z.ZodType<
+  GraderConversationWithTools
+> = z.object({
   messages: z.array(graderMessageWithToolsSchema).optional(),
   meta: z.record(z.any()).optional(),
   notes: z.object({ text: z.string().optional() }).optional(),
 });
 
-export default z.object({
-  session: graderConversationWithToolsSchema,
-  messageToGrade: graderMessageWithToolsSchema,
-});
+const graderTurnToolsContextSchema: z.ZodType<GraderTurnToolsContext> = z
+  .object({
+    session: graderConversationWithToolsSchema,
+    messageToGrade: graderMessageWithToolsSchema,
+  });
+
+export default graderTurnToolsContextSchema;
diff --git a/packages/gambit-core/schemas/graders/grader_output.ts b/packages/gambit-core/schemas/graders/grader_output.ts
index 0b9f38a89..bb9306687 100644
--- a/packages/gambit-core/schemas/graders/grader_output.ts
+++ b/packages/gambit-core/schemas/graders/grader_output.ts
@@ -1,7 +1,15 @@
 import { z } from "zod";
 
-export default z.object({
+type GraderOutput = {
+  score: number;
+  reason: string;
+  evidence?: Array<string>;
+};
+
+const graderOutputSchema: z.ZodType<GraderOutput> = z.object({
   score: z.number().int().min(-3).max(3),
   reason: z.string(),
   evidence: z.array(z.string()).optional(),
 });
+
+export default graderOutputSchema;
diff --git a/packages/gambit-core/schemas/graders/respond.ts b/packages/gambit-core/schemas/graders/respond.ts
index 4d690e7f4..8d2359a1d 100644
--- a/packages/gambit-core/schemas/graders/respond.ts
+++ b/packages/gambit-core/schemas/graders/respond.ts
@@ -1,9 +1,19 @@
 import { z } from "zod";
 
-export default z.object({
+type RespondEnvelope = {
+  payload?: unknown;
+  status?: number;
+  message?: string;
+  code?: string;
+  meta?: Record<string, unknown>;
+};
+
+const respondSchema: z.ZodType<RespondEnvelope> = z.object({
   payload: z.any().optional(),
   status: z.number().int().optional(),
   message: z.string().optional(),
   code: z.string().optional(),
   meta: z.record(z.any()).optional(),
 });
+
+export default respondSchema;
diff --git a/packages/gambit-core/schemas/scenarios/plain_chat_input_optional.ts b/packages/gambit-core/schemas/scenarios/plain_chat_input_optional.ts
index 3807c1c29..7f8731a15 100644
--- a/packages/gambit-core/schemas/scenarios/plain_chat_input_optional.ts
+++ b/packages/gambit-core/schemas/scenarios/plain_chat_input_optional.ts
@@ -1,3 +1,6 @@
 import { z } from "zod";
 
-export default z.string().optional();
+const plainChatInputOptionalSchema: z.ZodType<string | undefined> = z.string()
+  .optional();
+
+export default plainChatInputOptionalSchema;
diff --git a/packages/gambit-core/schemas/scenarios/plain_chat_output.ts b/packages/gambit-core/schemas/scenarios/plain_chat_output.ts
index 91d0a8d3f..221a1e087 100644
--- a/packages/gambit-core/schemas/scenarios/plain_chat_output.ts
+++ b/packages/gambit-core/schemas/scenarios/plain_chat_output.ts
@@ -1,3 +1,5 @@
 import { z } from "zod";
 
-export default z.string();
+const plainChatOutputSchema: z.ZodType<string> = z.string();
+
+export default plainChatOutputSchema;
diff --git a/packages/gambit-core/src/loader.ts b/packages/gambit-core/src/loader.ts
index 63b4727f4..326ff949d 100644
--- a/packages/gambit-core/src/loader.ts
+++ b/packages/gambit-core/src/loader.ts
@@ -25,6 +25,7 @@ import type {
   ActionDeckDefinition,
   CardDefinition,
   DeckDefinition,
+  ExternalToolDefinition,
   GraderDeckDefinition,
   LoadedCard,
   LoadedDeck,
@@ -174,6 +175,39 @@ function checkReserved(action: ActionDeckDefinition) {
   }
 }
 
+function normalizeExternalTools(
+  tools: DeckDefinition["tools"],
+  resolvedPath: string,
+): Array<ExternalToolDefinition> {
+  if (!tools) return [];
+  return tools.map((tool) => {
+    const name = String(tool.name ?? "").trim();
+    if (!name) {
+      throw new Error(`External tool must include a name (${resolvedPath})`);
+    }
+    if (name.startsWith(RESERVED_TOOL_PREFIX)) {
+      throw new Error(
+        `External tool name ${name} is reserved (prefix ${RESERVED_TOOL_PREFIX})`,
+      );
+    }
+    if (
+      !TOOL_NAME_PATTERN.test(name) ||
+      name.length > MAX_TOOL_NAME_LENGTH
+    ) {
+      throw new Error(
+        `External tool name ${name} must match ${TOOL_NAME_PATTERN} and be <= ${MAX_TOOL_NAME_LENGTH} characters`,
+      );
+    }
+    return {
+      name,
+      description: typeof tool.description === "string"
+        ? tool.description
+        : undefined,
+      inputSchema: tool.inputSchema,
+    };
+  });
+}
+
 async function loadCardInternal(
   cardPath: string,
   parentPath?: string,
@@ -272,6 +306,11 @@ export async function loadDeck(
       `Deck at ${resolved} did not export a valid deck definition`,
     );
   }
+  if ((deck as { mcpServers?: unknown }).mcpServers !== undefined) {
+    throw new Error(
+      `Deck-level [[mcpServers]] is unsupported in this phase (${resolved})`,
+    );
+  }
 
   const deckLabel = deck.label;
 
@@ -297,6 +336,15 @@ export async function loadDeck(
   }
 
   const actionDecks = Object.values(mergedActions);
+  const tools = normalizeExternalTools(deck.tools, resolved);
+  const actionNames = new Set(actionDecks.map((action) => action.name));
+  for (const tool of tools) {
+    if (actionNames.has(tool.name)) {
+      logger.warn(
+        `[gambit] tool ${tool.name} is shadowed by an action in ${resolved}`,
+      );
+    }
+  }
 
   const schemaAliases = normalizeDeckSchemas(deck, resolved);
   let inputSchema = schemaAliases.inputSchema;
@@ -383,6 +431,7 @@ export async function loadDeck(
       deck.graderDecks,
       resolved,
     ),
+    tools,
     contextSchema,
     responseSchema,
     inputSchema,
diff --git a/packages/gambit-core/src/markdown.test.ts b/packages/gambit-core/src/markdown.test.ts
index d813430ba..deca50193 100644
--- a/packages/gambit-core/src/markdown.test.ts
+++ b/packages/gambit-core/src/markdown.test.ts
@@ -343,6 +343,38 @@ Root deck.
   assert(deck.graderDecks[0].path.endsWith("graders/qa/PROMPT.md"));
 });
 
+Deno.test("markdown deck loads without front matter", async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `You are a plain markdown deck with no front matter.`,
+  );
+
+  const deck = await loadMarkdownDeck(deckPath);
+  assertEquals(deck.label, undefined);
+  assertStringIncludes(deck.body ?? "", "plain markdown deck");
+});
+
+Deno.test("markdown deck rejects malformed explicit front matter", async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "broken"
+
+This file is missing a closing delimiter.
+`,
+  );
+
+  await assertRejects(
+    () => loadMarkdownDeck(deckPath),
+    Error,
+    "Failed to parse front matter",
+  );
+});
+
 Deno.test("markdown deck requires action descriptions in 1.0 actions", async () => {
   const dir = await Deno.makeTempDir();
   const deckPath = await writeTempDeck(
@@ -406,31 +438,93 @@ Root deck.
   ]);
 });
 
-Deno.test("markdown execute deck loads module and PROMPT overrides schemas", async () => {
+Deno.test("markdown deck rejects top-level execute", async () => {
   const dir = await Deno.makeTempDir();
-  const execPath = path.join(dir, "exec.ts");
-  const definitionsUrl = path.toFileUrl(
-    path.resolve("packages/gambit-core/src/definitions.ts"),
-  ).href;
-  await Deno.writeTextFile(
-    execPath,
-    `import { defineDeck } from "${definitionsUrl}";
-import { z } from "zod";
-
-export default defineDeck({
-  label: "exec",
-  contextSchema: z.object({ fromExec: z.string() }),
-  responseSchema: z.object({ out: z.string() }),
-  run: (_ctx) => ({ out: "ok" }),
+  const deckPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+execute = "./compute.deck.ts"
++++
+
+Root deck.
+`,
+  );
+
+  await assertRejects(
+    () => loadMarkdownDeck(deckPath),
+    Error,
+    "Top-level execute",
+  );
 });
+
+Deno.test("markdown deck rejects action target with both path and execute", async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+
+[[actions]]
+name = "do_thing"
+path = "./actions/do/PROMPT.md"
+execute = "./actions/do.deck.ts"
+description = "Run do thing."
+contextSchema = "./schemas/in.zod.ts"
+responseSchema = "./schemas/out.zod.ts"
++++
+
+Root deck.
 `,
   );
 
-  const schemaPath = path.join(dir, "context.zod.ts");
+  await assertRejects(
+    () => loadMarkdownDeck(deckPath),
+    Error,
+    "exactly one of path or execute",
+  );
+});
+
+Deno.test("markdown deck rejects action target with neither path nor execute", async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+
+[[actions]]
+name = "do_thing"
+description = "Run do thing."
++++
+
+Root deck.
+`,
+  );
+
+  await assertRejects(
+    () => loadMarkdownDeck(deckPath),
+    Error,
+    "exactly one of path or execute",
+  );
+});
+
+Deno.test("markdown deck normalizes actions execute targets with schemas", async () => {
+  const dir = await Deno.makeTempDir();
+  const inputSchemaPath = path.join(dir, "input.zod.ts");
+  const outputSchemaPath = path.join(dir, "output.zod.ts");
   await Deno.writeTextFile(
-    schemaPath,
+    inputSchemaPath,
     `import { z } from "zod";
-export default z.object({ fromPrompt: z.number() });
+export default z.object({ count: z.number() });
+`,
+  );
+  await Deno.writeTextFile(
+    outputSchemaPath,
+    `import { z } from "zod";
+export default z.object({ total: z.number() });
 `,
   );
 
@@ -438,19 +532,31 @@ export default z.object({ fromPrompt: z.number() });
     dir,
     "PROMPT.md",
     `+++
-label = "exec-root"
-execute = "./exec.ts"
-contextSchema = "./context.zod.ts"
+label = "root"
+
+[[actions]]
+name = "compute_rollup"
+execute = "./actions/compute_rollup.deck.ts"
+description = "Compute rollup totals."
+contextSchema = "./input.zod.ts"
+responseSchema = "./output.zod.ts"
 +++
 
-Execute deck.
+Root deck.
 `,
   );
 
   const deck = await loadMarkdownDeck(deckPath);
-  assert(deck.executor, "expected executor to be set");
-  assert(deck.contextSchema, "expected context schema to resolve");
-  deck.contextSchema.parse({ fromPrompt: 123 });
+  assertEquals(deck.actionDecks.length, 1);
+  assert(deck.actionDecks[0].path.endsWith("actions/compute_rollup.deck.ts"));
+  assertEquals(
+    deck.actionDecks[0].execute,
+    deck.actionDecks[0].path,
+  );
+  const parsedInput = deck.actionDecks[0].contextSchema?.parse({ count: 2 });
+  const parsedOutput = deck.actionDecks[0].responseSchema?.parse({ total: 3 });
+  assertEquals(parsedInput, { count: 2 });
+  assertEquals(parsedOutput, { total: 3 });
 });
 
 Deno.test("loadDeck resolves gambit://decks PROMPT.md", async () => {
@@ -459,3 +565,101 @@ Deno.test("loadDeck resolves gambit://decks PROMPT.md", async () => {
   );
   assertEquals(deck.label, "Codex SDK bridge");
 });
+
+Deno.test("markdown deck rejects unsupported mcpServers declarations", async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+
+[[mcpServers]]
+name = "local"
+command = "node"
++++
+Root deck.
+`,
+  );
+
+  await assertRejects(
+    () => loadMarkdownDeck(deckPath),
+    Error,
+    "[[mcpServers]]",
+  );
+});
+
+Deno.test("markdown deck parses tools and warns when action shadows a tool", async () => {
+  const dir = await Deno.makeTempDir();
+  const actionDir = path.join(dir, "actions", "do");
+  await Deno.mkdir(actionDir, { recursive: true });
+  await writeTempDeck(
+    actionDir,
+    "PROMPT.md",
+    `+++
+label = "do"
+contextSchema = "gambit://schemas/graders/respond.zod.ts"
+responseSchema = "gambit://schemas/graders/respond.zod.ts"
++++
+Action deck.
+`,
+  );
+  const schemaPath = path.join(dir, "tool_input.zod.ts");
+  await Deno.writeTextFile(
+    schemaPath,
+    `import { z } from "zod";
+export default z.object({ query: z.string() });
+`,
+  );
+  const deckPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+
+[[actions]]
+name = "search_docs"
+path = "./actions/do/PROMPT.md"
+description = "Run action."
+
+[[tools]]
+name = "search_docs"
+description = "External search."
+inputSchema = "./tool_input.zod.ts"
+
+[[tools]]
+name = "external_lookup"
+description = "External lookup."
+inputSchema = "./tool_input.zod.ts"
++++
+Root deck.
+`,
+  );
+
+  const warnings: Array<string> = [];
+  // deno-lint-ignore no-console
+  const originalWarn = console.warn;
+  // deno-lint-ignore no-console
+  console.warn = (message?: unknown, ...rest: Array<unknown>) => {
+    warnings.push([message, ...rest].map(String).join(" "));
+  };
+  try {
+    const deck = await loadMarkdownDeck(deckPath);
+    assertEquals(deck.tools.length, 2);
+    assertEquals(deck.tools[0].name, "search_docs");
+    assertEquals(deck.tools[1].name, "external_lookup");
+    assert(deck.tools[1].inputSchema, "expected tool input schema");
+    const parsed = deck.tools[1].inputSchema?.parse({ query: "q" });
+    assertEquals(parsed, { query: "q" });
+  } finally {
+    // deno-lint-ignore no-console
+    console.warn = originalWarn;
+  }
+
+  assert(
+    warnings.some((line) =>
+      line.includes("shadowed") && line.includes("search_docs")
+    ),
+    "expected action-shadow warning for tool name collision",
+  );
+});
diff --git a/packages/gambit-core/src/markdown.ts b/packages/gambit-core/src/markdown.ts
index 080ae7e36..8842baa83 100644
--- a/packages/gambit-core/src/markdown.ts
+++ b/packages/gambit-core/src/markdown.ts
@@ -9,17 +9,18 @@ import {
   RESERVED_TOOL_PREFIX,
   TOOL_NAME_PATTERN,
 } from "./constants.ts";
-import { isCardDefinition, isDeckDefinition } from "./definitions.ts";
+import { isCardDefinition } from "./definitions.ts";
 import { loadCard } from "./loader.ts";
 import {
   normalizePermissionDeclaration,
   type PermissionDeclarationInput,
 } from "./permissions.ts";
-import { mergeZodObjects, toJsonSchema } from "./schema.ts";
+import { mergeZodObjects } from "./schema.ts";
 import { resolveBuiltinSchemaPath } from "./builtins.ts";
 import type {
   ActionDeckDefinition,
   DeckDefinition,
+  ExternalToolDefinition,
   GraderDeckDefinition,
   LoadedCard,
   LoadedDeck,
@@ -53,27 +54,6 @@ const END_TEXT = `
 If the entire workflow is finished and no further user turns should be sent, call the \`${GAMBIT_TOOL_END}\` tool with optional \`message\` and \`payload\` fields to explicitly end the session.
 `.trim();
 
-function normalizeJsonSchema(value: unknown): unknown {
-  if (Array.isArray(value)) {
-    return value.map((entry) => normalizeJsonSchema(entry));
-  }
-  if (value && typeof value === "object") {
-    const record = value as Record<string, unknown>;
-    const out: Record<string, unknown> = {};
-    for (const key of Object.keys(record).sort()) {
-      out[key] = normalizeJsonSchema(record[key]);
-    }
-    return out;
-  }
-  return value;
-}
-
-function schemasMatchDeep(a: ZodTypeAny, b: ZodTypeAny): boolean {
-  const aJson = normalizeJsonSchema(toJsonSchema(a as never));
-  const bJson = normalizeJsonSchema(toJsonSchema(b as never));
-  return JSON.stringify(aJson) === JSON.stringify(bJson);
-}
-
 function warnLegacyMarker(
   marker: keyof typeof LEGACY_MARKER_WARNINGS,
   replacement: string,
@@ -116,6 +96,29 @@ function toFileUrl(p: string): string {
   return path.toFileUrl(abs).href;
 }
 
+function startsWithFrontMatterDelimiter(raw: string): boolean {
+  const normalized = raw.startsWith("\uFEFF") ? raw.slice(1) : raw;
+  const trimmed = normalized.trimStart();
+  return /^(\+\+\+|---)\s*(\r?\n|$)/.test(trimmed);
+}
+
+function parseFrontMatterOrRaw(
+  raw: string,
+  resolvedPath: string,
+): { attrs: ParsedFrontmatter; body: string } {
+  try {
+    return extract(raw) as { attrs: ParsedFrontmatter; body: string };
+  } catch (err) {
+    if (!startsWithFrontMatterDelimiter(raw)) {
+      return { attrs: {}, body: raw };
+    }
+    const message = err instanceof Error ? err.message : String(err);
+    throw new Error(
+      `Failed to parse front matter in ${resolvedPath}: ${message}`,
+    );
+  }
+}
+
 async function maybeLoadSchema(
   schemaPath: unknown,
   basePath: string,
@@ -198,20 +201,122 @@ function mergeDeckRefs<T extends DeckRef>(
   return Array.from(merged.values());
 }
 
-function normalizeActionDecks(
+async function normalizeActionDecks(
   entries: unknown,
   basePath: string,
   opts?: { requirePrompt?: boolean; requireDescription?: boolean },
-): Array<ActionDeckDefinition> {
-  return normalizeDeckRefs<ActionDeckDefinition>(entries, basePath, opts).map(
-    (entry) => {
-      const name = "name" in entry ? String(entry.name ?? "").trim() : "";
-      if (!name) {
-        throw new Error(`Action deck must include a name (${basePath})`);
-      }
-      return { ...entry, name };
-    },
-  );
+): Promise<Array<ActionDeckDefinition>> {
+  if (!Array.isArray(entries)) return [];
+  const out: Array<ActionDeckDefinition> = [];
+  for (const rawEntry of entries) {
+    if (!rawEntry || typeof rawEntry !== "object") continue;
+    const rec = rawEntry as Record<string, unknown>;
+    const name = String(rec.name ?? "").trim();
+    if (!name) {
+      throw new Error(`Action deck must include a name (${basePath})`);
+    }
+
+    const desc = typeof rec.description === "string"
+      ? rec.description.trim()
+      : "";
+    if (opts?.requireDescription && !desc) {
+      throw new Error(
+        `Action deck must include a description (${basePath})`,
+      );
+    }
+
+    const rawPath = typeof rec.path === "string" ? rec.path.trim() : "";
+    const rawExecute = typeof rec.execute === "string"
+      ? rec.execute.trim()
+      : "";
+    const hasPath = rawPath.length > 0;
+    const hasExecute = rawExecute.length > 0;
+    if (hasPath === hasExecute) {
+      throw new Error(
+        `Action deck must include exactly one of path or execute (${basePath})`,
+      );
+    }
+    if (hasPath && opts?.requirePrompt && !rawPath.endsWith("PROMPT.md")) {
+      throw new Error(
+        `Deck reference must point to PROMPT.md (${basePath})`,
+      );
+    }
+
+    const actionContextSchema = await maybeLoadSchema(
+      rec.contextSchema,
+      basePath,
+    );
+    const actionResponseSchema = await maybeLoadSchema(
+      rec.responseSchema,
+      basePath,
+    );
+    if (hasExecute && (!actionContextSchema || !actionResponseSchema)) {
+      throw new Error(
+        `Action execute target must include contextSchema and responseSchema (${basePath})`,
+      );
+    }
+
+    const selectedTarget = hasPath ? rawPath : rawExecute;
+    const normalizedPath = selectedTarget.startsWith("gambit://")
+      ? selectedTarget
+      : path.resolve(path.dirname(basePath), selectedTarget);
+    const normalized: ActionDeckDefinition = {
+      name,
+      path: normalizedPath,
+      description: desc || undefined,
+      label: typeof rec.label === "string" ? rec.label : undefined,
+      id: typeof rec.id === "string" ? rec.id : undefined,
+      execute: hasExecute ? normalizedPath : undefined,
+      contextSchema: actionContextSchema,
+      responseSchema: actionResponseSchema,
+    };
+    if (rec.permissions !== undefined) {
+      const parsed = normalizePermissionDeclaration(
+        rec.permissions as PermissionDeclarationInput,
+        path.dirname(basePath),
+      );
+      if (parsed) normalized.permissions = parsed;
+    }
+    out.push(normalized);
+  }
+  return out;
+}
+
+async function normalizeExternalTools(
+  refs: unknown,
+  basePath: string,
+): Promise<Array<ExternalToolDefinition>> {
+  if (!Array.isArray(refs)) return [];
+  const out: Array<ExternalToolDefinition> = [];
+  for (const entry of refs) {
+    if (!entry || typeof entry !== "object") continue;
+    const rec = entry as Record<string, unknown>;
+    const name = String(rec.name ?? "").trim();
+    if (!name) {
+      throw new Error(`External tool must include a name (${basePath})`);
+    }
+    if (name.startsWith(RESERVED_TOOL_PREFIX)) {
+      throw new Error(
+        `External tool name ${name} is reserved (prefix ${RESERVED_TOOL_PREFIX})`,
+      );
+    }
+    if (
+      !TOOL_NAME_PATTERN.test(name) || name.length > MAX_TOOL_NAME_LENGTH
+    ) {
+      throw new Error(
+        `External tool name ${name} must match ${TOOL_NAME_PATTERN} and be <= ${MAX_TOOL_NAME_LENGTH} characters`,
+      );
+    }
+    const inputSchema = await maybeLoadSchema(rec.inputSchema, basePath);
+    out.push({
+      name,
+      description: typeof rec.description === "string"
+        ? rec.description
+        : undefined,
+      inputSchema,
+    });
+  }
+  return out;
 }
 
 async function expandEmbedsInBody(args: {
@@ -278,16 +383,7 @@ export async function loadMarkdownCard(
   }
   const nextStack = [...stack, resolved];
   const raw = await Deno.readTextFile(resolved);
-  let attrs: ParsedFrontmatter;
-  let body: string;
-  try {
-    const parsed = extract(raw) as { attrs: ParsedFrontmatter; body: string };
-    attrs = parsed.attrs;
-    body = parsed.body;
-  } catch (err) {
-    const message = err instanceof Error ? err.message : String(err);
-    throw new Error(`Failed to parse front matter in ${resolved}: ${message}`);
-  }
+  const { attrs, body } = parseFrontMatterOrRaw(raw, resolved);
   const candidate = attrs as unknown;
   if (isCardDefinition(candidate)) {
     // treat attrs as ts-shaped card
@@ -299,7 +395,7 @@ export async function loadMarkdownCard(
   }
   const hasNewActionField = (attrs as { actionDecks?: unknown }).actionDecks;
   const legacyActions = (attrs as { actions?: unknown }).actions;
-  const actionDecks = normalizeActionDecks(
+  const actionDecks = await normalizeActionDecks(
     hasNewActionField ?? legacyActions,
     resolved,
   );
@@ -388,29 +484,33 @@ export async function loadMarkdownDeck(
     ? path.resolve(path.dirname(parentPath), filePath)
     : path.resolve(filePath);
   const raw = await Deno.readTextFile(resolved);
-  let attrs: ParsedFrontmatter;
-  let body: string;
-  try {
-    const parsed = extract(raw) as { attrs: ParsedFrontmatter; body: string };
-    attrs = parsed.attrs;
-    body = parsed.body;
-  } catch (err) {
-    const message = err instanceof Error ? err.message : String(err);
-    throw new Error(`Failed to parse front matter in ${resolved}: ${message}`);
-  }
+  const { attrs, body } = parseFrontMatterOrRaw(raw, resolved);
   const deckAttrs = attrs as { deck?: DeckDefinition } & DeckDefinition;
   const deckMeta: Partial<DeckDefinition> =
     (deckAttrs.deck ?? deckAttrs) as DeckDefinition;
+  if ((deckMeta as { mcpServers?: unknown }).mcpServers !== undefined) {
+    throw new Error(
+      `Deck-level [[mcpServers]] is unsupported in this phase (${resolved})`,
+    );
+  }
+  if ((deckMeta as { execute?: unknown }).execute !== undefined) {
+    throw new Error(
+      `Top-level execute in PROMPT.md is unsupported (${resolved})`,
+    );
+  }
 
   const hasNewActionDecks = (deckMeta as {
     actionDecks?: unknown;
   }).actionDecks;
   const canonicalActions = (deckMeta as { actions?: unknown }).actions;
-  const actionDecks = normalizeActionDecks(canonicalActions, resolved, {
+  const actionDecks = await normalizeActionDecks(canonicalActions, resolved, {
     requirePrompt: true,
     requireDescription: true,
   });
-  const legacyActionDecks = normalizeActionDecks(hasNewActionDecks, resolved);
+  const legacyActionDecks = await normalizeActionDecks(
+    hasNewActionDecks,
+    resolved,
+  );
   if (hasNewActionDecks) {
     logger.warn(
       `[gambit] deck at ${resolved} uses deprecated "actionDecks"; use "[[actions]]" instead.`,
@@ -480,56 +580,6 @@ export async function loadMarkdownDeck(
     warnLegacySchema(resolved, "outputSchema", "responseSchema");
   }
 
-  const executePath = (deckMeta as { execute?: unknown }).execute;
-  let executor: DeckDefinition["run"] | DeckDefinition["execute"] | undefined;
-  let executeContextSchema: ZodTypeAny | undefined;
-  let executeResponseSchema: ZodTypeAny | undefined;
-  if (typeof executePath === "string" && executePath.trim()) {
-    const execResolved = path.resolve(path.dirname(resolved), executePath);
-    const mod = await import(toFileUrl(execResolved));
-    const executeDeck = mod.default;
-    if (!isDeckDefinition(executeDeck)) {
-      throw new Error(
-        `Execute module at ${execResolved} did not export a valid deck definition`,
-      );
-    }
-    executor = typeof executeDeck.run === "function"
-      ? executeDeck.run
-      : typeof executeDeck.execute === "function"
-      ? executeDeck.execute
-      : undefined;
-    if (!executor) {
-      throw new Error(
-        `Execute module at ${execResolved} must export a deck with run(ctx)`,
-      );
-    }
-    executeContextSchema = executeDeck.contextSchema ?? executeDeck.inputSchema;
-    executeResponseSchema = executeDeck.responseSchema ??
-      executeDeck.outputSchema;
-  }
-  if (executor && deckMeta.modelParams) {
-    logger.warn(
-      `[gambit] deck at ${resolved} sets execute + modelParams; modelParams will be ignored.`,
-    );
-  }
-
-  if (
-    contextSchema && executeContextSchema &&
-    !schemasMatchDeep(contextSchema, executeContextSchema)
-  ) {
-    logger.warn(
-      `[gambit] deck at ${resolved} has mismatched contextSchema between PROMPT.md and execute module (pre-1.0: warn; 1.0+: error)`,
-    );
-  }
-  if (
-    responseSchema && executeResponseSchema &&
-    !schemasMatchDeep(responseSchema, executeResponseSchema)
-  ) {
-    logger.warn(
-      `[gambit] deck at ${resolved} has mismatched responseSchema between PROMPT.md and execute module (pre-1.0: warn; 1.0+: error)`,
-    );
-  }
-
   const allCards = flattenCards(cards);
   const cleanedBody = replaced.body;
   const allowEnd = Boolean(deckMeta.allowEnd) ||
@@ -546,8 +596,8 @@ export async function loadMarkdownDeck(
     mergedActions[action.name] = action;
   }
 
-  let mergedContextSchema = contextSchema ?? executeContextSchema;
-  let mergedResponseSchema = responseSchema ?? executeResponseSchema;
+  let mergedContextSchema = contextSchema;
+  let mergedResponseSchema = responseSchema;
   for (const card of allCards) {
     mergedContextSchema = mergeZodObjects(
       mergedContextSchema,
@@ -610,6 +660,18 @@ export async function loadMarkdownDeck(
     : undefined;
 
   const mergedActionDecks = Object.values(mergedActions);
+  const tools = await normalizeExternalTools(
+    (deckMeta as { tools?: unknown }).tools,
+    resolved,
+  );
+  const actionNameSet = new Set(mergedActionDecks.map((action) => action.name));
+  for (const tool of tools) {
+    if (actionNameSet.has(tool.name)) {
+      logger.warn(
+        `[gambit] tool ${tool.name} is shadowed by an action in ${resolved}`,
+      );
+    }
+  }
   const rootTestDecks = normalizeDeckRefs<TestDeckDefinition>(
     (deckMeta as { testDecks?: unknown }).testDecks,
     resolved,
@@ -634,6 +696,7 @@ export async function loadMarkdownDeck(
     allowEnd,
     actionDecks: mergedActionDecks,
     actions: mergedActionDecks,
+    tools,
     testDecks: mergeDeckRefs(
       scenarioDecks,
       rootTestDecks,
@@ -647,13 +710,13 @@ export async function loadMarkdownDeck(
     cards: allCards,
     label: deckMeta.label,
     startMode: deckMeta.startMode,
-    modelParams: executor ? undefined : deckMeta.modelParams,
+    modelParams: deckMeta.modelParams,
     guardrails: deckMeta.guardrails,
     contextSchema: mergedContextSchema,
     responseSchema: mergedResponseSchema,
     inputSchema: mergedInputSchema,
     outputSchema: mergedOutputSchema,
-    executor,
+    executor: undefined,
     handlers,
     respond: Boolean(deckMeta.respond) ||
       replaced.respond ||
diff --git a/packages/gambit-core/src/permissions.test.ts b/packages/gambit-core/src/permissions.test.ts
index ea7cfcf5f..dc20f9cdc 100644
--- a/packages/gambit-core/src/permissions.test.ts
+++ b/packages/gambit-core/src/permissions.test.ts
@@ -1,11 +1,13 @@
-import { assert, assertEquals } from "@std/assert";
+import { assert, assertEquals, assertThrows } from "@std/assert";
 import * as path from "@std/path";
 import {
   canReadPath,
   canRunCommand,
   canRunPath,
+  canWritePath,
   normalizePermissionDeclaration,
   normalizePermissionDeclarationToSet,
+  type PermissionDeclarationInput,
   resolveEffectivePermissions,
 } from "./permissions.ts";
 
@@ -129,6 +131,102 @@ Deno.test("child-only inherited permissions use child baseDir for relative check
   );
 });
 
+Deno.test("path grants cover descendant files within the directory tree", () => {
+  const set = normalizePermissionDeclarationToSet(
+    {
+      read: ["./shared"],
+      write: ["./shared", "./local.txt"],
+    },
+    "/workspace/decks/root",
+  );
+  assert(set, "expected normalized permission set");
+
+  assertEquals(
+    canReadPath(set, "./shared/prompts/prompt.txt"),
+    true,
+    "read grants must apply to files beneath a declared directory",
+  );
+  assertEquals(
+    canReadPath(set, "./shared"),
+    true,
+    "read grants must apply to the directory itself",
+  );
+  assertEquals(
+    canReadPath(set, "./other/path.txt"),
+    false,
+    "read grants must not leak into sibling directories",
+  );
+  assertEquals(
+    canWritePath(set, "./shared/prompts/prompt.txt"),
+    true,
+    "write grants must apply to files beneath a declared directory",
+  );
+  assertEquals(
+    canWritePath(set, "./local.txt"),
+    true,
+    "write grants must still allow file-specific declarations",
+  );
+  assertEquals(
+    canWritePath(set, "./local.txt.bak"),
+    false,
+    "write grants must not allow unrelated files",
+  );
+});
+
+Deno.test("canonical read checks deny symlink escapes outside granted roots", async () => {
+  const dir = await Deno.makeTempDir();
+  const allowedDir = path.join(dir, "allowed");
+  const outsideDir = path.join(dir, "outside");
+  await Deno.mkdir(allowedDir, { recursive: true });
+  await Deno.mkdir(outsideDir, { recursive: true });
+
+  const outsideFile = path.join(outsideDir, "secret.txt");
+  await Deno.writeTextFile(outsideFile, "secret");
+
+  const symlinkPath = path.join(allowedDir, "secret-link.txt");
+  await Deno.symlink(outsideFile, symlinkPath);
+
+  const set = normalizePermissionDeclarationToSet(
+    { read: ["./allowed"] },
+    dir,
+  );
+  assert(set, "expected normalized permission set");
+
+  assertEquals(
+    canReadPath(set, symlinkPath),
+    false,
+    "symlink traversal must not bypass read root",
+  );
+});
+
+Deno.test("canonical write checks deny symlink parent escapes", async () => {
+  const dir = await Deno.makeTempDir();
+  const allowedDir = path.join(dir, "allowed");
+  const outsideDir = path.join(dir, "outside");
+  await Deno.mkdir(allowedDir, { recursive: true });
+  await Deno.mkdir(outsideDir, { recursive: true });
+
+  const symlinkDir = path.join(allowedDir, "linked");
+  await Deno.symlink(outsideDir, symlinkDir);
+
+  const set = normalizePermissionDeclarationToSet(
+    { write: ["./allowed"] },
+    dir,
+  );
+  assert(set, "expected normalized permission set");
+
+  assertEquals(
+    canWritePath(set, path.join(symlinkDir, "escaped.txt")),
+    false,
+    "symlink traversal must not bypass write root",
+  );
+  assertEquals(
+    canWritePath(set, path.join(allowedDir, "safe.txt")),
+    true,
+    "writes inside granted root should remain allowed",
+  );
+});
+
 Deno.test("run grants keep path vs command semantics separate", () => {
   const set = normalizePermissionDeclarationToSet(
     {
@@ -147,22 +245,33 @@ Deno.test("run grants keep path vs command semantics separate", () => {
   assertEquals(canRunCommand(set, "bin/tool"), false);
 });
 
-Deno.test("run object-form booleans honor all-access semantics", () => {
-  const pathsTrue = normalizePermissionDeclarationToSet(
-    { run: { paths: true } },
+Deno.test("run=true grants all run access", () => {
+  const runAll = normalizePermissionDeclarationToSet(
+    { run: true },
     "/workspace",
   );
-  assert(pathsTrue, "expected normalized permission set for paths=true");
-  assertEquals(canRunPath(pathsTrue, "/workspace/bin/anything"), true);
-  assertEquals(canRunCommand(pathsTrue, "anything"), true);
+  assert(runAll, "expected normalized permission set for run=true");
+  assertEquals(canRunPath(runAll, "/workspace/bin/anything"), true);
+  assertEquals(canRunCommand(runAll, "anything"), true);
+});
 
-  const commandsTrue = normalizePermissionDeclarationToSet(
-    { run: { commands: true } },
-    "/workspace",
+Deno.test("run object-form booleans are rejected", () => {
+  const invalidPaths = {
+    run: { paths: true },
+  } as unknown as PermissionDeclarationInput;
+  const invalidCommands = {
+    run: { commands: false },
+  } as unknown as PermissionDeclarationInput;
+  assertThrows(
+    () => normalizePermissionDeclarationToSet(invalidPaths, "/workspace"),
+    Error,
+    "permissions.run.paths must be an array in object form",
+  );
+  assertThrows(
+    () => normalizePermissionDeclarationToSet(invalidCommands, "/workspace"),
+    Error,
+    "permissions.run.commands must be an array in object form",
   );
-  assert(commandsTrue, "expected normalized permission set for commands=true");
-  assertEquals(canRunPath(commandsTrue, "/workspace/bin/anything"), true);
-  assertEquals(canRunCommand(commandsTrue, "anything"), true);
 });
 
 Deno.test("unspecified kinds deny by default when a layer is provided", () => {
diff --git a/packages/gambit-core/src/permissions.ts b/packages/gambit-core/src/permissions.ts
index f7debdcb6..32d450e3c 100644
--- a/packages/gambit-core/src/permissions.ts
+++ b/packages/gambit-core/src/permissions.ts
@@ -11,8 +11,8 @@ export type RunPermissionInput =
   | boolean
   | Array<string>
   | {
-    paths?: boolean | Array<string>;
-    commands?: boolean | Array<string>;
+    paths?: Array<string>;
+    commands?: Array<string>;
   };
 
 export type PermissionDeclarationInput = Partial<{
@@ -175,19 +175,22 @@ function normalizeRun(
     paths?: unknown;
     commands?: unknown;
   };
+  if (typeof record.paths === "boolean") {
+    throw new Error(
+      "permissions.run.paths must be an array in object form; use permissions.run=true for full run access",
+    );
+  }
+  if (typeof record.commands === "boolean") {
+    throw new Error(
+      "permissions.run.commands must be an array in object form; use permissions.run=true for full run access",
+    );
+  }
   const pathsScope = normalizeList(record.paths, "run", baseDir, {
     resolvePaths: true,
   });
   const commandsScope = normalizeList(record.commands, "run", baseDir, {
     resolvePaths: false,
   });
-  if (pathsScope.all || commandsScope.all) {
-    return {
-      all: true,
-      paths: new Set<string>(),
-      commands: new Set<string>(),
-    };
-  }
   return {
     all: false,
     paths: pathsScope.values,
@@ -424,9 +427,61 @@ export function resolveEffectivePermissions(args: {
   };
 }
 
+/**
+ * Checks whether `target` is covered by `scope`, treating each value as either
+ * an exact path grant or the root of an allowed directory tree.
+ */
 function matchScope(scope: NormalizedScope, target: string): boolean {
   if (scope.all) return true;
-  return scope.values.has(target);
+  const canonicalTarget = canonicalizePath(target);
+  if (!canonicalTarget) return false;
+
+  for (const root of scope.values) {
+    const canonicalRoot = canonicalizePath(root);
+    if (!canonicalRoot) continue;
+    if (pathWithinRoot(canonicalRoot, canonicalTarget)) return true;
+  }
+  return false;
+}
+
+function pathWithinRoot(root: string, target: string): boolean {
+  if (root === target) return true;
+  const rel = path.relative(root, target);
+  return rel.length > 0 && !rel.startsWith("..") && !path.isAbsolute(rel);
+}
+
+function canonicalizePath(target: string): string | undefined {
+  const resolved = path.resolve(target);
+  try {
+    return path.resolve(Deno.realPathSync(resolved));
+  } catch (err) {
+    if (err instanceof Deno.errors.NotFound) {
+      return canonicalizeMissingPath(resolved);
+    }
+    return undefined;
+  }
+}
+
+function canonicalizeMissingPath(target: string): string | undefined {
+  const suffix: Array<string> = [];
+  let probe = target;
+
+  while (true) {
+    try {
+      const canonicalBase = path.resolve(Deno.realPathSync(probe));
+      if (suffix.length === 0) return canonicalBase;
+      return path.resolve(canonicalBase, ...suffix.reverse());
+    } catch (err) {
+      if (err instanceof Deno.errors.NotFound) {
+        const parent = path.dirname(probe);
+        if (parent === probe) return undefined;
+        suffix.push(path.basename(probe));
+        probe = parent;
+        continue;
+      }
+      return undefined;
+    }
+  }
 }
 
 /**
@@ -463,8 +518,22 @@ export function canRunPath(
   targetPath: string,
 ): boolean {
   if (set.run.all) return true;
-  const resolved = path.resolve(set.baseDir, targetPath);
-  return set.run.paths.has(resolved);
+  const resolvedTarget = path.resolve(set.baseDir, targetPath);
+  const canonicalTarget = canonicalizePath(resolvedTarget);
+  if (!canonicalTarget) return false;
+  // Run-path grants are exact binary grants; deny symlink-mediated execution.
+  if (canonicalTarget !== resolvedTarget) return false;
+  for (const allowedPath of set.run.paths) {
+    const resolvedAllowed = path.resolve(set.baseDir, allowedPath);
+    if (resolvedAllowed !== resolvedTarget) continue;
+    const canonicalAllowed = canonicalizePath(
+      resolvedAllowed,
+    );
+    if (!canonicalAllowed) continue;
+    if (canonicalAllowed !== resolvedAllowed) continue;
+    if (canonicalAllowed === canonicalTarget) return true;
+  }
+  return false;
 }
 
 /**
diff --git a/packages/gambit-core/src/runtime.test.ts b/packages/gambit-core/src/runtime.test.ts
index 24c4e8dc8..af6bdaea4 100644
--- a/packages/gambit-core/src/runtime.test.ts
+++ b/packages/gambit-core/src/runtime.test.ts
@@ -2,8 +2,9 @@ import { assert, assertEquals, assertRejects } from "@std/assert";
 import * as path from "@std/path";
 import { loadDeck } from "./loader.ts";
 import { loadState } from "./state.ts";
-import { runDeck } from "./runtime.ts";
+import { isRunCanceledError, runDeck } from "./runtime.ts";
 import type {
+  JSONValue,
   ModelMessage,
   ModelProvider,
   ResponseItem,
@@ -58,6 +59,69 @@ Deno.test("deck loads contextSchema/responseSchema aliases", async () => {
   assert(deck.outputSchema, "expected legacy outputSchema alias to be set");
 });
 
+Deno.test("compute deck supports canonical schema module imports", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "canonical-schema-import.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import contextSchema from "@bolt-foundry/gambit-core/schemas/scenarios/plain_chat_input_optional.zod.ts";
+    import responseSchema from "@bolt-foundry/gambit-core/schemas/scenarios/plain_chat_output.zod.ts";
+    export default defineDeck({
+      contextSchema,
+      responseSchema,
+      run: () => "ok",
+    });
+    `,
+  );
+
+  const result = await runDeck({
+    path: deckPath,
+    input: "hello",
+    modelProvider: dummyProvider,
+    isRoot: true,
+  });
+
+  assertEquals(result, "ok");
+});
+
+Deno.test("workspace import map cannot remap trusted schema namespaces", async () => {
+  const dir = await Deno.makeTempDir();
+  await Deno.writeTextFile(
+    path.join(dir, "deno.json"),
+    JSON.stringify({
+      imports: {
+        "@bolt-foundry/gambit-core/schemas/": "./shadow/",
+      },
+    }),
+  );
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "trusted-schema-remap.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    export default defineDeck({
+      run: () => "ok",
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+      }),
+    Error,
+    "trust-boundary violation",
+  );
+});
+
 Deno.test("compute deck returns validated output", async () => {
   const dir = await Deno.makeTempDir();
   const modHref = modImportPath();
@@ -716,6 +780,91 @@ Deno.test("isRoot inferred when omitted", async () => {
   assertEquals(result, "child:hi");
 });
 
+Deno.test("child deck timeout override tightens inherited deadline", async () => {
+  const origNow = performance.now;
+  let now = 0;
+  (performance as { now: () => number }).now = () => now;
+
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "child-timeout.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.any(),
+      outputSchema: z.string(),
+      guardrails: { timeoutMs: 5 },
+      run() {
+        (globalThis).__advanceNow?.(20);
+        return "late";
+      }
+    });
+    `,
+  );
+
+  try {
+    (globalThis as { __advanceNow?: (delta: number) => void }).__advanceNow = (
+      delta,
+    ) => {
+      now += delta;
+    };
+    await assertRejects(
+      () =>
+        runDeck({
+          path: deckPath,
+          input: {},
+          modelProvider: dummyProvider,
+          isRoot: true,
+          guardrails: { timeoutMs: 1_000 },
+          runDeadlineMs: 1_000,
+        }),
+      Error,
+      "Timeout exceeded",
+    );
+  } finally {
+    delete (globalThis as { __advanceNow?: (delta: number) => void })
+      .__advanceNow;
+    (performance as { now: () => number }).now = origNow;
+  }
+});
+
+Deno.test("worker sandbox flag defaults false when env access is denied", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "env-perm.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      run(ctx) { return ctx.input; }
+    });
+    `,
+  );
+
+  const origGet = Deno.env.get;
+  try {
+    Deno.env.get = (() => {
+      throw new Deno.errors.PermissionDenied("env access denied");
+    }) as typeof Deno.env.get;
+    const result = await runDeck({
+      path: deckPath,
+      input: "ok",
+      modelProvider: dummyProvider,
+      isRoot: true,
+    });
+    assertEquals(result, "ok");
+  } finally {
+    Deno.env.get = origGet;
+  }
+});
+
 Deno.test("LLM deck streams via onStreamText", async () => {
   const dir = await Deno.makeTempDir();
   const modHref = modImportPath();
@@ -974,6 +1123,92 @@ Deno.test("responses mode stores response items and calls responses()", async ()
   assert((updatedState?.messages?.length ?? 0) > 0);
 });
 
+Deno.test("responses mode projects tool stream events into tool traces", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+
+  const deckPath = await writeTempDeck(
+    dir,
+    "responses_tool_stream_events.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const traces: Array<TraceEvent> = [];
+  const provider: ModelProvider = {
+    responses({ onStreamEvent }) {
+      onStreamEvent?.(
+        {
+          type: "tool.call",
+          actionCallId: "tool_1",
+          name: "external_lookup",
+          args: { query: "hello" },
+        } as unknown as import("./types.ts").ResponseEvent,
+      );
+      onStreamEvent?.(
+        {
+          type: "tool.result",
+          actionCallId: "tool_1",
+          name: "external_lookup",
+          result: { ok: true },
+        } as unknown as import("./types.ts").ResponseEvent,
+      );
+      return Promise.resolve({
+        id: "resp_1",
+        object: "response",
+        output: [{
+          type: "message",
+          role: "assistant",
+          content: [{ type: "output_text", text: "done" }],
+        }],
+      });
+    },
+    chat() {
+      throw new Error("chat should not be called in responses mode");
+    },
+  };
+
+  const result = await runDeck({
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "hello",
+    modelProvider: provider,
+    isRoot: true,
+    responsesMode: true,
+    stream: true,
+    trace: (event) => traces.push(event),
+  });
+
+  assertEquals(result, "done");
+  const modelCall = traces.find((event) => event.type === "model.call") as
+    | Extract<TraceEvent, { type: "model.call" }>
+    | undefined;
+  const toolCalls = traces.filter((event) =>
+    event.type === "tool.call" && event.name === "external_lookup"
+  ) as Array<Extract<TraceEvent, { type: "tool.call" }>>;
+  const toolResults = traces.filter((event) =>
+    event.type === "tool.result" && event.name === "external_lookup"
+  ) as Array<Extract<TraceEvent, { type: "tool.result" }>>;
+
+  assert(modelCall);
+  assertEquals(toolCalls.length, 1);
+  assertEquals(toolResults.length, 1);
+  assertEquals(toolCalls[0].actionCallId, "tool_1");
+  assertEquals(toolResults[0].actionCallId, "tool_1");
+  assertEquals(toolCalls[0].args, { query: "hello" });
+  assertEquals(toolCalls[0].parentActionCallId, modelCall.actionCallId);
+  assertEquals(toolResults[0].parentActionCallId, modelCall.actionCallId);
+  assertEquals(toolResults[0].result, { ok: true });
+});
+
 Deno.test("responses mode treats empty output as empty string", async () => {
   const dir = await Deno.makeTempDir();
   const modHref = modImportPath();
@@ -1366,6 +1601,55 @@ Deno.test("trace includes parentActionCallId hierarchy", async () => {
   assertEquals(childDeck.parentActionCallId, actionStart.actionCallId);
 });
 
+Deno.test("model.result trace includes model usage fields", async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeTempDeck(
+    dir,
+    "usage-trace.deck.md",
+    `
++++
+modelParams = { model = "dummy-model" }
++++
+
+Deck.
+`.trim(),
+  );
+
+  const traces: Array<TraceEvent> = [];
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+        usage: {
+          promptTokens: 11,
+          completionTokens: 7,
+          totalTokens: 18,
+          reasoningTokens: 4,
+        },
+      });
+    },
+  };
+
+  await runDeck({
+    path: deckPath,
+    input: "hello",
+    modelProvider: provider,
+    isRoot: true,
+    trace: (ev) => traces.push(ev),
+  });
+
+  const modelResult = traces.find((event): event is Extract<
+    TraceEvent,
+    { type: "model.result" }
+  > => event.type === "model.result");
+  assert(modelResult, "expected model.result trace");
+  assertEquals(modelResult.usage?.promptTokens, 11);
+  assertEquals(modelResult.usage?.completionTokens, 7);
+  assertEquals(modelResult.usage?.totalTokens, 18);
+  assertEquals(modelResult.usage?.reasoningTokens, 4);
+});
+
 Deno.test("non-root assistant text emits monolog trace", async () => {
   const dir = await Deno.makeTempDir();
   const modHref = modImportPath();
@@ -1534,337 +1818,3599 @@ Deck body.
   assertEquals(deck.cards.length, 2);
 });
 
-Deno.test("markdown deck strips inline embed markers from system prompt", async () => {
+Deno.test("markdown action execute target runs compute module and returns envelope", async () => {
   const dir = await Deno.makeTempDir();
-
+  const modHref = modImportPath();
+  const inputSchemaPath = path.join(dir, "action_input.zod.ts");
+  const outputSchemaPath = path.join(dir, "action_output.zod.ts");
   await Deno.writeTextFile(
-    path.join(dir, "persona.card.md"),
+    inputSchemaPath,
     `
-+++
-+++
-
-Persona content.
-`.trim(),
+    import { z } from "zod";
+    export default z.object({ count: z.number() });
+    `,
   );
-
-  const deckPath = path.join(dir, "root.deck.md");
   await Deno.writeTextFile(
-    deckPath,
+    outputSchemaPath,
     `
-+++
-modelParams = { model = "dummy-model" }
-+++
-
-Deck intro before embed.
+    import { z } from "zod";
+    export default z.object({ total: z.number() });
+    `,
+  );
+  await Deno.writeTextFile(
+    path.join(dir, "compute_rollup.deck.ts"),
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      contextSchema: z.object({ count: z.number() }),
+      responseSchema: z.object({
+        status: z.number().optional(),
+        message: z.string().optional(),
+        payload: z.object({ total: z.number() }),
+      }),
+      run(ctx) {
+        return {
+          status: 201,
+          message: "computed",
+          payload: { total: ctx.input.count + 1 },
+        };
+      },
+    });
+    `,
+  );
 
-![Persona](./persona.card.md)
+  const rootPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+[modelParams]
+model = "dummy-model"
+
+[[actions]]
+name = "compute_rollup"
+execute = "./compute_rollup.deck.ts"
+description = "Compute totals."
+contextSchema = "./action_input.zod.ts"
+responseSchema = "./action_output.zod.ts"
++++
 
-Deck outro after embed.
-`.trim(),
+Root deck.
+`,
   );
 
-  const seen: Array<Array<ModelMessage>> = [];
+  let pass = 0;
+  let seenToolParams: Record<string, unknown> | undefined;
+  let seenToolContent = "";
   const provider: ModelProvider = {
-    chat({ messages }) {
-      seen.push(messages);
+    chat({ tools, messages }) {
+      if (pass === 0) {
+        pass += 1;
+        const toolDef = tools?.find((entry) =>
+          entry.function.name === "compute_rollup"
+        );
+        seenToolParams = toolDef?.function.parameters;
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: "call-1",
+            name: "compute_rollup",
+            args: { count: 2 },
+          }],
+        });
+      }
+      if (pass === 1) {
+        pass += 1;
+        for (let i = messages.length - 1; i >= 0; i--) {
+          const message = messages[i];
+          if (message.role === "tool" && message.tool_call_id === "call-1") {
+            seenToolContent = String(message.content ?? "");
+            break;
+          }
+        }
+        return Promise.resolve({
+          message: { role: "assistant", content: "done" },
+          finishReason: "stop",
+        });
+      }
       return Promise.resolve({
-        message: { role: "assistant", content: "ok" },
+        message: { role: "assistant", content: "done" },
         finishReason: "stop",
       });
     },
   };
 
-  await runDeck({
-    path: deckPath,
-    input: "hi",
+  const result = await runDeck({
+    path: rootPath,
+    input: {},
     modelProvider: provider,
     isRoot: true,
+    inputProvided: true,
   });
 
-  const last = seen.at(-1);
-  const system = last?.find((m) => m.role === "system");
-  if (!system || typeof system.content !== "string") {
-    throw new Error("missing system message");
-  }
-
-  const content = system.content;
-  assertEquals(content.includes("![Persona](./persona.card.md)"), false);
-  assertEquals(content.includes("Deck intro before embed."), true);
-  assertEquals(content.includes("Deck outro after embed."), true);
-  assertEquals(content.includes("Persona content."), true);
+  assertEquals(result, "done");
+  const params = seenToolParams as {
+    required?: Array<string>;
+    properties?: Record<string, { type?: string }>;
+  };
+  assertEquals(params.required, ["count"]);
+  assertEquals(params.properties?.count?.type, "number");
+  const toolEnvelope = JSON.parse(seenToolContent) as {
+    status?: number;
+    message?: string;
+    payload?: { total?: number };
+  };
+  assertEquals(toolEnvelope.status, 201);
+  assertEquals(toolEnvelope.message, "computed");
+  assertEquals(toolEnvelope.payload?.total, 3);
 });
 
-Deno.test("markdown card embed cycles are rejected", async () => {
+Deno.test("markdown action execute target rejects invalid args with action schema", async () => {
   const dir = await Deno.makeTempDir();
-
+  const modHref = modImportPath();
   await Deno.writeTextFile(
-    path.join(dir, "a.card.md"),
+    path.join(dir, "action_input.zod.ts"),
     `
-+++
-+++
-
-A card body.
-
-![B card](./b.card.md)
-`.trim(),
+    import { z } from "zod";
+    export default z.object({ count: z.number() });
+    `,
   );
-
   await Deno.writeTextFile(
-    path.join(dir, "b.card.md"),
+    path.join(dir, "action_output.zod.ts"),
     `
+    import { z } from "zod";
+    export default z.object({ total: z.number() });
+    `,
+  );
+  await Deno.writeTextFile(
+    path.join(dir, "compute_rollup.deck.ts"),
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      contextSchema: z.object({ count: z.number() }),
+      responseSchema: z.object({ total: z.number() }),
+      run(ctx) {
+        return { total: ctx.input.count + 1 };
+      },
+    });
+    `,
+  );
+  const rootPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+[modelParams]
+model = "dummy-model"
+
+[[actions]]
+name = "compute_rollup"
+execute = "./compute_rollup.deck.ts"
+description = "Compute totals."
+contextSchema = "./action_input.zod.ts"
+responseSchema = "./action_output.zod.ts"
 +++
-+++
+Root deck.
+`,
+  );
 
-B card body.
+  let pass = 0;
+  let seenToolContent = "";
+  const provider: ModelProvider = {
+    chat({ messages }) {
+      if (pass === 0) {
+        pass += 1;
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: "call-1",
+            name: "compute_rollup",
+            args: { count: "bad" },
+          }],
+        });
+      }
+      if (pass === 1) {
+        pass += 1;
+        for (let i = messages.length - 1; i >= 0; i--) {
+          const message = messages[i];
+          if (message.role === "tool" && message.tool_call_id === "call-1") {
+            seenToolContent = String(message.content ?? "");
+            break;
+          }
+        }
+        return Promise.resolve({
+          message: { role: "assistant", content: "done" },
+          finishReason: "stop",
+        });
+      }
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
 
-![A card](./a.card.md)
-`.trim(),
-  );
+  const result = await runDeck({
+    path: rootPath,
+    input: {},
+    modelProvider: provider,
+    isRoot: true,
+    inputProvided: true,
+  });
 
-  const deckPath = path.join(dir, "root.deck.md");
+  assertEquals(result, "done");
+  const toolEnvelope = JSON.parse(seenToolContent) as {
+    status?: number;
+    code?: string;
+  };
+  assertEquals(toolEnvelope.status, 400);
+  assertEquals(toolEnvelope.code, "invalid_input");
+});
+
+Deno.test("markdown external tools dispatch through onTool", async () => {
+  const dir = await Deno.makeTempDir();
   await Deno.writeTextFile(
-    deckPath,
+    path.join(dir, "tool_input.zod.ts"),
     `
+    import { z } from "zod";
+    export default z.object({ query: z.string() });
+    `,
+  );
+  const rootPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+[modelParams]
+model = "dummy-model"
+
+[[tools]]
+name = "external_lookup"
+description = "External lookup."
+inputSchema = "./tool_input.zod.ts"
 +++
-modelParams = { model = "dummy-model" }
-+++
+Root deck.
+`,
+  );
 
-Deck with cyclic cards.
+  let pass = 0;
+  let seenToolContent = "";
+  let seenTools: Array<string> = [];
+  let seenOnToolInput:
+    | {
+      name: string;
+      args: Record<string, unknown>;
+      runId: string;
+      actionCallId: string;
+      parentActionCallId?: string;
+      deckPath: string;
+    }
+    | undefined;
+  const provider: ModelProvider = {
+    chat({ messages, tools }) {
+      if (pass === 0) {
+        pass += 1;
+        seenTools = (tools ?? []).map((entry) => entry.function.name);
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: "call-1",
+            name: "external_lookup",
+            args: { query: "hello" },
+          }],
+        });
+      }
+      if (pass === 1) {
+        pass += 1;
+        for (let i = messages.length - 1; i >= 0; i--) {
+          const message = messages[i];
+          if (message.role === "tool" && message.tool_call_id === "call-1") {
+            seenToolContent = String(message.content ?? "");
+            break;
+          }
+        }
+        return Promise.resolve({
+          message: { role: "assistant", content: "done" },
+          finishReason: "stop",
+        });
+      }
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
 
-![A card](./a.card.md)
-`.trim(),
-  );
+  const result = await runDeck({
+    path: rootPath,
+    input: {},
+    modelProvider: provider,
+    isRoot: true,
+    inputProvided: true,
+    onTool: (input) => {
+      seenOnToolInput = input;
+      return {
+        status: 207,
+        message: "handled",
+        payload: { echo: String(input.args.query ?? "") },
+        meta: { source: "hook" },
+      };
+    },
+  });
 
-  await assertRejects(
-    () =>
-      runDeck({
-        path: deckPath,
-        input: "hi",
-        modelProvider: dummyProvider,
-        isRoot: true,
-      }),
-    Error,
-    "cycle",
+  assertEquals(result, "done");
+  assert(
+    seenTools.includes("external_lookup"),
+    "expected external tool in defs",
   );
+  assertEquals(seenOnToolInput?.name, "external_lookup");
+  assertEquals(seenOnToolInput?.args, { query: "hello" });
+  assertEquals(seenOnToolInput?.actionCallId, "call-1");
+  assertEquals(seenOnToolInput?.deckPath, rootPath);
+  assert(
+    typeof seenOnToolInput?.runId === "string" &&
+      seenOnToolInput.runId.length > 0,
+    "expected onTool runId",
+  );
+  const parsed = JSON.parse(seenToolContent) as {
+    status?: number;
+    message?: string;
+    payload?: { echo?: string };
+    meta?: { source?: string };
+  };
+  assertEquals(parsed.status, 207);
+  assertEquals(parsed.message, "handled");
+  assertEquals(parsed.payload?.echo, "hello");
+  assertEquals(parsed.meta?.source, "hook");
 });
 
-Deno.test("markdown card schema fragments merge into deck schemas", async () => {
+Deno.test("markdown external tools return explicit error when onTool is missing", async () => {
   const dir = await Deno.makeTempDir();
-
   await Deno.writeTextFile(
-    path.join(dir, "fragments.card.md"),
+    path.join(dir, "tool_input.zod.ts"),
     `
+    import { z } from "zod";
+    export default z.object({ query: z.string() });
+    `,
+  );
+  const rootPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+[modelParams]
+model = "dummy-model"
+
+[[tools]]
+name = "external_lookup"
+description = "External lookup."
+inputSchema = "./tool_input.zod.ts"
 +++
-inputSchema = "./input_fragment.zod.ts"
-outputSchema = "./output_fragment.zod.ts"
-+++
-
-Fragments card body.
-`.trim(),
+Root deck.
+`,
   );
 
+  let pass = 0;
+  let seenToolContent = "";
+  const provider: ModelProvider = {
+    chat({ messages }) {
+      if (pass === 0) {
+        pass += 1;
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: "call-1",
+            name: "external_lookup",
+            args: { query: "hello" },
+          }],
+        });
+      }
+      if (pass === 1) {
+        pass += 1;
+        for (let i = messages.length - 1; i >= 0; i--) {
+          const message = messages[i];
+          if (message.role === "tool" && message.tool_call_id === "call-1") {
+            seenToolContent = String(message.content ?? "");
+            break;
+          }
+        }
+        return Promise.resolve({
+          message: { role: "assistant", content: "done" },
+          finishReason: "stop",
+        });
+      }
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const result = await runDeck({
+    path: rootPath,
+    input: {},
+    modelProvider: provider,
+    isRoot: true,
+    inputProvided: true,
+  });
+
+  assertEquals(result, "done");
+  const parsed = JSON.parse(seenToolContent) as {
+    status?: number;
+    code?: string;
+  };
+  assertEquals(parsed.status, 500);
+  assertEquals(parsed.code, "missing_on_tool");
+});
+
+Deno.test("markdown external tools return explicit error when onTool throws", async () => {
+  const dir = await Deno.makeTempDir();
   await Deno.writeTextFile(
-    path.join(dir, "input_fragment.zod.ts"),
+    path.join(dir, "tool_input.zod.ts"),
     `
     import { z } from "zod";
-    export default z.object({ extra: z.string() });
-    `.trim(),
+    export default z.object({ query: z.string() });
+    `,
+  );
+  const rootPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+[modelParams]
+model = "dummy-model"
+
+[[tools]]
+name = "external_lookup"
+description = "External lookup."
+inputSchema = "./tool_input.zod.ts"
++++
+Root deck.
+`,
   );
 
+  let pass = 0;
+  let seenToolContent = "";
+  const provider: ModelProvider = {
+    chat({ messages }) {
+      if (pass === 0) {
+        pass += 1;
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: "call-1",
+            name: "external_lookup",
+            args: { query: "hello" },
+          }],
+        });
+      }
+      if (pass === 1) {
+        pass += 1;
+        for (let i = messages.length - 1; i >= 0; i--) {
+          const message = messages[i];
+          if (message.role === "tool" && message.tool_call_id === "call-1") {
+            seenToolContent = String(message.content ?? "");
+            break;
+          }
+        }
+        return Promise.resolve({
+          message: { role: "assistant", content: "done" },
+          finishReason: "stop",
+        });
+      }
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const result = await runDeck({
+    path: rootPath,
+    input: {},
+    modelProvider: provider,
+    isRoot: true,
+    inputProvided: true,
+    onTool: () => {
+      throw new Error("boom");
+    },
+  });
+
+  assertEquals(result, "done");
+  const parsed = JSON.parse(seenToolContent) as {
+    status?: number;
+    code?: string;
+    message?: string;
+  };
+  assertEquals(parsed.status, 500);
+  assertEquals(parsed.code, "tool_handler_error");
+  assertEquals(parsed.message, "boom");
+});
+
+Deno.test("actions shadow external tools during runtime dispatch", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
   await Deno.writeTextFile(
-    path.join(dir, "output_fragment.zod.ts"),
+    path.join(dir, "action_input.zod.ts"),
     `
     import { z } from "zod";
-    export default z.object({ note: z.number() });
-    `.trim(),
+    export default z.object({ query: z.string() });
+    `,
+  );
+  await Deno.writeTextFile(
+    path.join(dir, "action_output.zod.ts"),
+    `
+    import { z } from "zod";
+    export default z.object({ value: z.string() });
+    `,
+  );
+  await Deno.writeTextFile(
+    path.join(dir, "lookup.deck.ts"),
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      contextSchema: z.object({ query: z.string() }),
+      responseSchema: z.object({ value: z.string() }),
+      run: () => ({ value: "action" }),
+    });
+    `,
+  );
+  const rootPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `+++
+label = "root"
+[modelParams]
+model = "dummy-model"
+
+[[actions]]
+name = "lookup"
+execute = "./lookup.deck.ts"
+description = "Action lookup."
+contextSchema = "./action_input.zod.ts"
+responseSchema = "./action_output.zod.ts"
+
+[[tools]]
+name = "lookup"
+description = "External lookup."
+inputSchema = "./action_input.zod.ts"
++++
+Root deck.
+`,
   );
 
-  const deckPath = path.join(dir, "root.deck.md");
+  let pass = 0;
+  let seenToolContent = "";
+  let onToolCalled = false;
+  const provider: ModelProvider = {
+    chat({ messages }) {
+      if (pass === 0) {
+        pass += 1;
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{ id: "call-1", name: "lookup", args: { query: "x" } }],
+        });
+      }
+      if (pass === 1) {
+        pass += 1;
+        for (let i = messages.length - 1; i >= 0; i--) {
+          const message = messages[i];
+          if (message.role === "tool" && message.tool_call_id === "call-1") {
+            seenToolContent = String(message.content ?? "");
+            break;
+          }
+        }
+        return Promise.resolve({
+          message: { role: "assistant", content: "done" },
+          finishReason: "stop",
+        });
+      }
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const result = await runDeck({
+    path: rootPath,
+    input: {},
+    modelProvider: provider,
+    isRoot: true,
+    inputProvided: true,
+    onTool: () => {
+      onToolCalled = true;
+      return { value: "external" };
+    },
+  });
+
+  assertEquals(result, "done");
+  assertEquals(onToolCalled, false);
+  const parsed = JSON.parse(seenToolContent) as {
+    payload?: { value?: string };
+  };
+  assertEquals(parsed.payload?.value, "action");
+});
+
+Deno.test("markdown deck strips inline embed markers from system prompt", async () => {
+  const dir = await Deno.makeTempDir();
+
   await Deno.writeTextFile(
-    deckPath,
+    path.join(dir, "persona.card.md"),
     `
 +++
-inputSchema = "./base_input.zod.ts"
-outputSchema = "./base_output.zod.ts"
 +++
 
-Deck body.
+Persona content.
+`.trim(),
+  );
+
+  const deckPath = path.join(dir, "root.deck.md");
+  await Deno.writeTextFile(
+    deckPath,
+    `
++++
+modelParams = { model = "dummy-model" }
++++
+
+Deck intro before embed.
+
+![Persona](./persona.card.md)
+
+Deck outro after embed.
+`.trim(),
+  );
+
+  const seen: Array<Array<ModelMessage>> = [];
+  const provider: ModelProvider = {
+    chat({ messages }) {
+      seen.push(messages);
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  await runDeck({
+    path: deckPath,
+    input: "hi",
+    modelProvider: provider,
+    isRoot: true,
+  });
+
+  const last = seen.at(-1);
+  const system = last?.find((m) => m.role === "system");
+  if (!system || typeof system.content !== "string") {
+    throw new Error("missing system message");
+  }
+
+  const content = system.content;
+  assertEquals(content.includes("![Persona](./persona.card.md)"), false);
+  assertEquals(content.includes("Deck intro before embed."), true);
+  assertEquals(content.includes("Deck outro after embed."), true);
+  assertEquals(content.includes("Persona content."), true);
+});
+
+Deno.test("markdown card embed cycles are rejected", async () => {
+  const dir = await Deno.makeTempDir();
+
+  await Deno.writeTextFile(
+    path.join(dir, "a.card.md"),
+    `
++++
++++
+
+A card body.
+
+![B card](./b.card.md)
+`.trim(),
+  );
+
+  await Deno.writeTextFile(
+    path.join(dir, "b.card.md"),
+    `
++++
++++
+
+B card body.
+
+![A card](./a.card.md)
+`.trim(),
+  );
+
+  const deckPath = path.join(dir, "root.deck.md");
+  await Deno.writeTextFile(
+    deckPath,
+    `
++++
+modelParams = { model = "dummy-model" }
++++
+
+Deck with cyclic cards.
+
+![A card](./a.card.md)
+`.trim(),
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: "hi",
+        modelProvider: dummyProvider,
+        isRoot: true,
+      }),
+    Error,
+    "cycle",
+  );
+});
+
+Deno.test("markdown card schema fragments merge into deck schemas", async () => {
+  const dir = await Deno.makeTempDir();
+
+  await Deno.writeTextFile(
+    path.join(dir, "fragments.card.md"),
+    `
++++
+inputSchema = "./input_fragment.zod.ts"
+outputSchema = "./output_fragment.zod.ts"
++++
+
+Fragments card body.
+`.trim(),
+  );
+
+  await Deno.writeTextFile(
+    path.join(dir, "input_fragment.zod.ts"),
+    `
+    import { z } from "zod";
+    export default z.object({ extra: z.string() });
+    `.trim(),
+  );
+
+  await Deno.writeTextFile(
+    path.join(dir, "output_fragment.zod.ts"),
+    `
+    import { z } from "zod";
+    export default z.object({ note: z.number() });
+    `.trim(),
+  );
+
+  const deckPath = path.join(dir, "root.deck.md");
+  await Deno.writeTextFile(
+    deckPath,
+    `
++++
+inputSchema = "./base_input.zod.ts"
+outputSchema = "./base_output.zod.ts"
++++
+
+Deck body.
+
+![Fragments card](./fragments.card.md)
+`.trim(),
+  );
+
+  await Deno.writeTextFile(
+    path.join(dir, "base_input.zod.ts"),
+    `
+    import { z } from "zod";
+    export default z.object({ text: z.string() });
+    `.trim(),
+  );
+
+  await Deno.writeTextFile(
+    path.join(dir, "base_output.zod.ts"),
+    `
+    import { z } from "zod";
+    export default z.object({ result: z.string() });
+    `.trim(),
+  );
+
+  const deck = await loadDeck(deckPath);
+  const inputShape = (deck.inputSchema as unknown as {
+    shape: Record<string, unknown>;
+  }).shape;
+  const outputShape = (deck.outputSchema as unknown as {
+    shape: Record<string, unknown>;
+  }).shape;
+
+  assertEquals(Object.keys(inputShape).sort(), ["extra", "text"]);
+  assertEquals(Object.keys(outputShape).sort(), ["note", "result"]);
+});
+
+Deno.test("cards cannot declare handlers (ts card)", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+
+  await writeTempDeck(
+    dir,
+    "bad_handlers.card.ts",
+    `
+    import { defineCard } from "${modHref}";
+    export default defineCard({
+      handlers: { onBusy: { path: "./noop.deck.ts" } }
+    });
+    `,
+  );
+
+  const deckPath = await writeTempDeck(
+    dir,
+    "root.deck.md",
+    `
++++
+modelParams = { model = "dummy-model" }
++++
+
+Deck.
+
+![Bad handlers](./bad_handlers.card.ts)
+`.trim(),
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: "hi",
+        modelProvider: dummyProvider,
+        isRoot: true,
+      }),
+    Error,
+    "handlers",
+  );
+});
+
+Deno.test("cards cannot declare handlers (markdown card)", async () => {
+  const dir = await Deno.makeTempDir();
+
+  await Deno.writeTextFile(
+    path.join(dir, "bad.card.md"),
+    `
++++
+handlers = { onBusy = { path = "./noop.deck.ts" } }
++++
+
+Body.
+`.trim(),
+  );
+
+  const deckPath = path.join(dir, "root.deck.md");
+  await Deno.writeTextFile(
+    deckPath,
+    `
++++
+modelParams = { model = "dummy-model" }
++++
+
+Deck.
+
+![Bad card](./bad.card.md)
+`.trim(),
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: "hi",
+        modelProvider: dummyProvider,
+        isRoot: true,
+      }),
+    Error,
+    "handlers",
+  );
+});
+
+Deno.test("runDeck resolves model arrays via modelProvider", async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeTempDeck(
+    dir,
+    "root.deck.md",
+    `
++++
+modelParams = { model = ["ollama/llama3.1", "openrouter/openai/gpt-4o-mini"] }
++++
+
+Deck.
+`.trim(),
+  );
+  let resolvedInput: { model?: string | Array<string> } = {};
+  const provider: ModelProvider = {
+    resolveModel: (input) => {
+      resolvedInput = { model: input.model };
+      return Promise.resolve({
+        model: "openrouter/openai/gpt-4o-mini",
+        params: { temp: 1 },
+      });
+    },
+    chat: (input) => {
+      assertEquals(input.model, "openrouter/openai/gpt-4o-mini");
+      assertEquals(input.params?.temp, 1);
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  await runDeck({
+    path: deckPath,
+    input: "hi",
+    modelProvider: provider,
+    isRoot: true,
+  });
+
+  assert(Array.isArray(resolvedInput.model));
+});
+
+Deno.test("modelParams.reasoning passes through to provider params", async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeTempDeck(
+    dir,
+    "root.deck.md",
+    `
++++
+modelParams = { model = "dummy-model", temperature = 0.2, reasoning = { effort = "high", summary = "detailed" } }
++++
+
+Deck.
+`.trim(),
+  );
+
+  let seenParams: Record<string, unknown> | undefined;
+  const provider: ModelProvider = {
+    chat: (input) => {
+      seenParams = input.params;
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  await runDeck({
+    path: deckPath,
+    input: "hi",
+    modelProvider: provider,
+    isRoot: true,
+  });
+
+  assertEquals(seenParams?.temperature, 0.2);
+  assertEquals(seenParams?.reasoning, {
+    effort: "high",
+    summary: "detailed",
+  });
+});
+
+Deno.test("modelParams.verbosity passes through to provider params", async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeTempDeck(
+    dir,
+    "root.deck.md",
+    `
++++
+modelParams = { model = "dummy-model", verbosity = "high" }
++++
+
+Deck.
+`.trim(),
+  );
+
+  let seenParams: Record<string, unknown> | undefined;
+  const provider: ModelProvider = {
+    chat: (input) => {
+      seenParams = input.params;
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  await runDeck({
+    path: deckPath,
+    input: "hi",
+    modelProvider: provider,
+    isRoot: true,
+  });
+
+  assertEquals(seenParams?.verbosity, "high");
+});
+
+Deno.test("worker sandbox denies write when write permission is absent", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const targetPath = path.join(dir, "denied-write.txt");
+  const deckPath = await writeTempDeck(
+    dir,
+    "write-denied.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: async () => {
+        await Deno.writeTextFile(${JSON.stringify(targetPath)}, "nope");
+        return "ok";
+      }
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: { read: true, write: false, run: false },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "write",
+  );
+});
+
+Deno.test("worker sandbox denies run when run permission is absent", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "run-denied.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: async () => {
+        const cmd = new Deno.Command("sh", { args: ["-c", "echo hi"] });
+        await cmd.output();
+        return "ok";
+      }
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: { read: true, write: false, run: false },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "run",
+  );
+});
+
+Deno.test("worker sandbox denies writes outside allowed roots", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const allowedDir = path.join(dir, "allowed");
+  const blockedPath = path.join(dir, "blocked.txt");
+  await Deno.mkdir(allowedDir, { recursive: true });
+  const deckPath = await writeTempDeck(
+    dir,
+    "write-outside.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: async () => {
+        await Deno.writeTextFile(${JSON.stringify(blockedPath)}, "nope");
+        return "ok";
+      }
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: {
+          read: true,
+          write: [allowedDir],
+          run: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "write",
+  );
+});
+
+Deno.test("worker sandbox restrictive profile still boots compute deck", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "restrictive-start.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: () => "ok"
+    });
+    `,
+  );
+
+  const result = await runDeck({
+    path: deckPath,
+    input: {},
+    modelProvider: dummyProvider,
+    isRoot: true,
+    workerSandbox: true,
+    workspacePermissions: {
+      read: false,
+      write: false,
+      run: false,
+      net: false,
+      env: false,
+    },
+    workspacePermissionsBaseDir: dir,
+  });
+  assertEquals(result, "ok");
+});
+
+Deno.test("worker sandbox bootstrap does not grant package-root reads", async () => {
+  const dir = await Deno.makeTempDir();
+  const here = path.dirname(path.fromFileUrl(import.meta.url));
+  const defineDeckHref = path.toFileUrl(path.resolve(here, "definitions.ts"))
+    .href;
+  const packageCardPath = path.resolve(
+    here,
+    "..",
+    "cards",
+    "generate-test-input.card.md",
+  );
+  const deckPath = await writeTempDeck(
+    dir,
+    "bootstrap-read-bypass.deck.ts",
+    `
+    import { defineDeck } from "${defineDeckHref}";
+    export default defineDeck({
+      run: async () => {
+        await Deno.readTextFile(${JSON.stringify(packageCardPath)});
+        return "ok";
+      }
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: {
+          read: false,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "read",
+  );
+});
+
+Deno.test("worker sandbox bootstrap ignores fake imports in comments", async () => {
+  const dir = await Deno.makeTempDir();
+  const here = path.dirname(path.fromFileUrl(import.meta.url));
+  const defineDeckHref = path.toFileUrl(path.resolve(here, "definitions.ts"))
+    .href;
+  const secretPath = path.join(dir, "secret.txt");
+  await Deno.writeTextFile(secretPath, "top-secret");
+  const deckPath = await writeTempDeck(
+    dir,
+    "comment-import-escalation.deck.ts",
+    `
+    import { defineDeck } from "${defineDeckHref}";
+    // import "${secretPath}"
+    export default defineDeck({
+      run: async () => {
+        await Deno.readTextFile(${JSON.stringify(secretPath)});
+        return "ok";
+      }
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: {
+          read: false,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "read",
+  );
+});
+
+Deno.test("worker sandbox bootstrap does not widen reads via imported modules", async () => {
+  const dir = await Deno.makeTempDir();
+  const decksDir = path.join(dir, "decks");
+  const secretsDir = path.join(dir, "secrets");
+  await Deno.mkdir(decksDir, { recursive: true });
+  await Deno.mkdir(secretsDir, { recursive: true });
+  const here = path.dirname(path.fromFileUrl(import.meta.url));
+  const defineDeckHref = path.toFileUrl(path.resolve(here, "definitions.ts"))
+    .href;
+  const secretModulePath = path.join(secretsDir, "secret-module.ts");
+  await Deno.writeTextFile(secretModulePath, 'export const secret = "nope";\n');
+  const deckPath = await writeTempDeck(
+    decksDir,
+    "import-read-escalation.deck.ts",
+    `
+    import { defineDeck } from "${defineDeckHref}";
+    import "../secrets/secret-module.ts";
+    export default defineDeck({
+      run: async () => {
+        await Deno.readTextFile(${JSON.stringify(secretModulePath)});
+        return "ok";
+      }
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: {
+          read: false,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "read",
+  );
+});
+
+Deno.test("worker sandbox inspect does not execute out-of-root imports", async () => {
+  const dir = await Deno.makeTempDir();
+  const decksDir = path.join(dir, "decks");
+  const secretsDir = path.join(dir, "secrets");
+  await Deno.mkdir(decksDir, { recursive: true });
+  await Deno.mkdir(secretsDir, { recursive: true });
+  const here = path.dirname(path.fromFileUrl(import.meta.url));
+  const defineDeckHref = path.toFileUrl(path.resolve(here, "definitions.ts"))
+    .href;
+  const secretModulePath = path.join(secretsDir, "secret-module.ts");
+  await Deno.writeTextFile(
+    secretModulePath,
+    'throw new Error("inspect-secret-module-loaded");\n',
+  );
+  const deckPath = await writeTempDeck(
+    decksDir,
+    "inspect-import-escalation.deck.ts",
+    `
+    import { defineDeck } from "${defineDeckHref}";
+    import "../secrets/secret-module.ts";
+    export default defineDeck({
+      run: () => "ok",
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: {
+          read: false,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "read access",
+  );
+});
+
+Deno.test("worker sandbox inspect enforces hard timeout", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "inspect-timeout.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    await new Promise(() => {});
+    export default defineDeck({
+      run: () => "ok",
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: {
+          read: true,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "Deck inspection timed out",
+  );
+});
+
+Deno.test("worker sandbox blocks remote imports when net is false", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "remote-import-denied.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import "https://example.com/gambit-runtime-net-blocked.ts";
+    export default defineDeck({
+      run: () => "ok",
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: {
+          read: true,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "import access",
+  );
+});
+
+Deno.test("worker sandbox restrictive profile loads local deck imports", async () => {
+  const dir = await Deno.makeTempDir();
+  const here = path.dirname(path.fromFileUrl(import.meta.url));
+  const defineDeckHref = path.toFileUrl(path.resolve(here, "definitions.ts"))
+    .href;
+
+  await writeTempDeck(
+    dir,
+    "helper.ts",
+    `
+    export const helperValue = "ok-from-helper";
+    `,
+  );
+  const deckPath = await writeTempDeck(
+    dir,
+    "restrictive-import.deck.ts",
+    `
+    import { defineDeck } from "${defineDeckHref}";
+    import { helperValue } from "./helper.ts";
+    export default defineDeck({
+      run: () => helperValue,
+    });
+    `,
+  );
+
+  const result = await runDeck({
+    path: deckPath,
+    input: {},
+    modelProvider: dummyProvider,
+    isRoot: true,
+    workerSandbox: true,
+    workspacePermissions: {
+      read: false,
+      write: false,
+      run: false,
+      net: false,
+      env: false,
+    },
+    workspacePermissionsBaseDir: dir,
+  });
+  assertEquals(result, "ok-from-helper");
+});
+
+Deno.test("worker sandbox restrictive profile loads markdown decks with builtin snippet embeds", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  await writeTempDeck(
+    dir,
+    "snippet-embed.card.md",
+    `![respond](gambit://snippets/respond.md)
+`,
+  );
+  const deckPath = await writeTempDeck(
+    dir,
+    "builtin-snippet.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      cards: ["./snippet-embed.card.md"],
+      contextSchema: z.object({}),
+      responseSchema: z.string(),
+      run: () => "ok",
+    });
+    `,
+  );
+
+  const result = await runDeck({
+    path: deckPath,
+    input: {},
+    modelProvider: dummyProvider,
+    isRoot: true,
+    workerSandbox: true,
+    workspacePermissions: {
+      read: false,
+      write: false,
+      run: false,
+      net: false,
+      env: false,
+    },
+    workspacePermissionsBaseDir: dir,
+  });
+
+  assertEquals(result, "ok");
+});
+
+Deno.test("worker sandbox restrictive profile loads markdown decks with local embeds", async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeTempDeck(
+    dir,
+    "PROMPT.md",
+    `
++++
+[modelParams]
+model = "dummy-model"
++++
+
+Ground answers using this FAQ:
+![](./faq.md)
+`.trim(),
+  );
+  await writeTempDeck(
+    dir,
+    "faq.md",
+    `
+# FAQ
+
+- Q: Reset password?
+- A: Use the reset flow.
+`.trim(),
+  );
+
+  const result = await runDeck({
+    path: deckPath,
+    input: "hello",
+    modelProvider: {
+      chat: () =>
+        Promise.resolve({
+          message: { role: "assistant", content: "ok" },
+          finishReason: "stop",
+        }),
+    },
+    isRoot: true,
+    workerSandbox: true,
+  });
+
+  assertEquals(result, "ok");
+});
+
+Deno.test("worker sandbox blocks top-level deck side effects", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const sideEffectPath = path.join(dir, "top-level-side-effect.txt");
+  const deckPath = await writeTempDeck(
+    dir,
+    "top-level-side-effect.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "leak");
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: () => "ok",
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: {
+          read: true,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "write",
+  );
+
+  const leaked = await Deno.stat(sideEffectPath).then(
+    () => true,
+    () => false,
+  );
+  assertEquals(leaked, false);
+});
+
+Deno.test(
+  "worker sandbox blocks top-level model deck side effects during host orchestration",
+  async () => {
+    const dir = await Deno.makeTempDir();
+    const modHref = modImportPath();
+    const sideEffectPath = path.join(dir, "llm-top-level-side-effect.txt");
+    const deckPath = await writeTempDeck(
+      dir,
+      "llm-top-level-side-effect.deck.ts",
+      `
+      import { defineDeck } from "${modHref}";
+      import { z } from "zod";
+      try {
+        await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "leak");
+      } catch {
+        // no-op: sandboxed deck import should deny this write
+      }
+      export default defineDeck({
+        inputSchema: z.string(),
+        outputSchema: z.string(),
+        modelParams: { model: "dummy-model" },
+      });
+      `,
+    );
+
+    const provider: ModelProvider = {
+      chat() {
+        return Promise.resolve({
+          message: { role: "assistant", content: "ok" },
+          finishReason: "stop",
+        });
+      },
+    };
+
+    const result = await runDeck({
+      path: deckPath,
+      input: "hi",
+      modelProvider: provider,
+      isRoot: true,
+      workerSandbox: true,
+      workspacePermissions: {
+        read: true,
+        write: false,
+        run: false,
+        net: false,
+        env: false,
+      },
+      workspacePermissionsBaseDir: dir,
+    });
+    assertEquals(result, "ok");
+
+    const leaked = await Deno.stat(sideEffectPath).then(
+      () => true,
+      () => false,
+    );
+    assertEquals(leaked, false);
+  },
+);
+
+Deno.test("worker spawn bridge preserves parent permission ceiling for child", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const childWritePath = path.join(dir, "bridge-child-write.txt");
+
+  const childPath = await writeTempDeck(
+    dir,
+    "bridge-child.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      permissions: { write: true },
+      run: async () => {
+        await Deno.writeTextFile(${JSON.stringify(childWritePath)}, "nope");
+        return "child-ok";
+      }
+    });
+    `,
+  );
+  const parentPath = await writeTempDeck(
+    dir,
+    "bridge-parent.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: async (ctx) => {
+        return await ctx.spawnAndWait({ path: ${
+      JSON.stringify(childPath)
+    }, input: {} });
+      }
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: parentPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: { read: true, write: false, run: false },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "write",
+  );
+});
+
+Deno.test("worker timeout cancels spawned children before side effects", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const sideEffectPath = path.join(dir, "late-side-effect.txt");
+
+  const childPath = await writeTempDeck(
+    dir,
+    "timeout-child.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: async () => {
+        await new Promise((resolve) => setTimeout(resolve, 300));
+        await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "late");
+        return "late";
+      }
+    });
+    `,
+  );
+  const parentPath = await writeTempDeck(
+    dir,
+    "timeout-parent.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: async (ctx) => {
+        return await ctx.spawnAndWait({ path: ${
+      JSON.stringify(childPath)
+    }, input: {} });
+      }
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: parentPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        guardrails: { timeoutMs: 80 },
+        workspacePermissions: { read: true, write: [dir], run: false },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "Timeout exceeded",
+  );
+
+  await new Promise((resolve) => setTimeout(resolve, 350));
+  let sideEffectExists = true;
+  try {
+    await Deno.stat(sideEffectPath);
+  } catch (err) {
+    if (err instanceof Deno.errors.NotFound) {
+      sideEffectExists = false;
+    } else {
+      throw err;
+    }
+  }
+  assertEquals(sideEffectExists, false);
+});
+
+Deno.test(
+  "compute spawnAndWait inherits initialUserMessage by default",
+  async () => {
+    for (const workerSandbox of [false, true]) {
+      const dir = await Deno.makeTempDir();
+      const modHref = modImportPath();
+      const childPath = await writeTempDeck(
+        dir,
+        `initial-user-child-${workerSandbox ? "worker" : "inproc"}.deck.ts`,
+        `
+        import { defineDeck } from "${modHref}";
+        import { z } from "zod";
+        export default defineDeck({
+          contextSchema: z.object({}),
+          responseSchema: z.string(),
+          body: "echo latest user message",
+          modelParams: { model: "dummy-model" },
+        });
+        `,
+      );
+      const parentPath = await writeTempDeck(
+        dir,
+        `initial-user-parent-${workerSandbox ? "worker" : "inproc"}.deck.ts`,
+        `
+        import { defineDeck } from "${modHref}";
+        import { z } from "zod";
+        export default defineDeck({
+          contextSchema: z.object({}),
+          responseSchema: z.string(),
+          run: async (ctx) => {
+            return await ctx.spawnAndWait({ path: ${
+          JSON.stringify(childPath)
+        }, input: {} });
+          }
+        });
+        `,
+      );
+
+      const provider: ModelProvider = {
+        chat({ messages }) {
+          const userMessages = messages.filter((msg) =>
+            msg.role === "user" && typeof msg.content === "string"
+          );
+          const latest = userMessages.length
+            ? userMessages[userMessages.length - 1].content as string
+            : "missing-user-message";
+          return Promise.resolve({
+            message: { role: "assistant", content: latest },
+            finishReason: "stop",
+          });
+        },
+      };
+
+      const result = await runDeck({
+        path: parentPath,
+        input: {},
+        modelProvider: provider,
+        isRoot: true,
+        initialUserMessage: "forward-this-message",
+        workerSandbox,
+        workspacePermissions: {
+          read: true,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      });
+
+      assertEquals(result, "forward-this-message");
+    }
+  },
+);
+
+Deno.test(
+  "execute deck helpers persist session meta and transcript across turns",
+  async () => {
+    for (const workerSandbox of [false, true]) {
+      const dir = await Deno.makeTempDir();
+      const modHref = modImportPath();
+      const deckPath = await writeTempDeck(
+        dir,
+        `execute-session-meta-${workerSandbox ? "worker" : "inproc"}.deck.ts`,
+        `
+        import { defineDeck } from "${modHref}";
+        export default defineDeck({
+          run: (ctx) => {
+            const existing = ctx.getSessionMeta("codex.threadId");
+            const threadId = typeof existing === "string" && existing
+              ? existing
+              : "thread-" + crypto.randomUUID();
+            ctx.setSessionMeta("codex.threadId", threadId);
+            if (typeof ctx.initialUserMessage === "string" && ctx.initialUserMessage.trim()) {
+              ctx.appendMessage({ role: "user", content: ctx.initialUserMessage.trim() });
+            }
+            const assistant = "thread=" + threadId;
+            ctx.appendMessage({ role: "assistant", content: assistant });
+            return assistant;
+          },
+        });
+        `,
+      );
+
+      let savedState: import("./state.ts").SavedState | undefined;
+      const onStateUpdate = (state: import("./state.ts").SavedState) => {
+        savedState = state;
+      };
+
+      const first = await runDeck({
+        path: deckPath,
+        input: "",
+        modelProvider: dummyProvider,
+        isRoot: true,
+        initialUserMessage: "first turn",
+        state: savedState,
+        onStateUpdate,
+        workerSandbox,
+        workspacePermissions: {
+          read: true,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      });
+
+      const firstThread = String(first).replace(/^thread=/, "");
+      assert(firstThread.length > 0);
+      assertEquals(savedState?.meta?.["codex.threadId"], firstThread);
+      assertEquals(savedState?.messages?.length, 2);
+      assertEquals(savedState?.messages?.[0]?.role, "user");
+      assertEquals(savedState?.messages?.[1]?.role, "assistant");
+
+      const second = await runDeck({
+        path: deckPath,
+        input: "",
+        modelProvider: dummyProvider,
+        isRoot: true,
+        initialUserMessage: "second turn",
+        state: savedState,
+        onStateUpdate,
+        workerSandbox,
+        workspacePermissions: {
+          read: true,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      });
+
+      const secondThread = String(second).replace(/^thread=/, "");
+      assertEquals(secondThread, firstThread);
+      assertEquals(savedState?.meta?.["codex.threadId"], firstThread);
+      assertEquals(savedState?.messages?.length, 4);
+      assertEquals(savedState?.messages?.[2]?.role, "user");
+      assertEquals(savedState?.messages?.[3]?.role, "assistant");
+    }
+  },
+);
+
+Deno.test("orchestration worker preserves serial LLM trace ordering and correlation ids", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const childPath = await writeTempDeck(
+    dir,
+    "serial-child.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({ value: z.string() }),
+      outputSchema: z.string(),
+      run: (ctx) => "child:" + ctx.input.value,
+    });
+    `,
+  );
+  const parentPath = await writeTempDeck(
+    dir,
+    "serial-parent.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+      actions: [{ name: "child", path: "${childPath}" }],
+    });
+    `,
+  );
+
+  const makeProvider = (): ModelProvider => {
+    let pass = 0;
+    return {
+      chat() {
+        pass += 1;
+        if (pass === 1) {
+          return Promise.resolve({
+            message: { role: "assistant", content: null },
+            finishReason: "tool_calls",
+            toolCalls: [{
+              id: "call-child",
+              name: "child",
+              args: { value: "x" },
+            }],
+          });
+        }
+        return Promise.resolve({
+          message: { role: "assistant", content: "done" },
+          finishReason: "stop",
+        });
+      },
+    };
+  };
+
+  const legacyTraces: Array<TraceEvent> = [];
+  const workerTraces: Array<TraceEvent> = [];
+  const legacy = await runDeck({
+    path: parentPath,
+    input: "hi",
+    modelProvider: makeProvider(),
+    isRoot: true,
+    workerSandbox: false,
+    trace: (ev) => legacyTraces.push(ev),
+  });
+  const worker = await runDeck({
+    path: parentPath,
+    input: "hi",
+    modelProvider: makeProvider(),
+    isRoot: true,
+    workerSandbox: true,
+    trace: (ev) => workerTraces.push(ev),
+    workspacePermissions: { read: true, write: false, run: false },
+    workspacePermissionsBaseDir: dir,
+  });
+
+  assertEquals(legacy, "done");
+  assertEquals(worker, legacy);
+
+  const workerActionStart = workerTraces.find((event) =>
+    event.type === "action.start" && event.name === "child"
+  ) as Extract<TraceEvent, { type: "action.start" }> | undefined;
+  const workerToolCall = workerTraces.find((event) =>
+    event.type === "tool.call" && event.name === "child"
+  ) as Extract<TraceEvent, { type: "tool.call" }> | undefined;
+  const workerToolResult = workerTraces.find((event) =>
+    event.type === "tool.result" && event.name === "child"
+  ) as Extract<TraceEvent, { type: "tool.result" }> | undefined;
+  const workerActionEnd = workerTraces.find((event) =>
+    event.type === "action.end" && event.name === "child"
+  ) as Extract<TraceEvent, { type: "action.end" }> | undefined;
+
+  assert(workerActionStart);
+  assert(workerToolCall);
+  assert(workerToolResult);
+  assert(workerActionEnd);
+  assertEquals(workerActionStart.actionCallId, "call-child");
+  assertEquals(workerToolCall.actionCallId, "call-child");
+  assertEquals(workerToolResult.actionCallId, "call-child");
+  assertEquals(workerActionEnd.actionCallId, "call-child");
+
+  const startIdx = workerTraces.findIndex((event) =>
+    event.type === "action.start" && event.name === "child"
+  );
+  const callIdx = workerTraces.findIndex((event) =>
+    event.type === "tool.call" && event.name === "child"
+  );
+  const resultIdx = workerTraces.findIndex((event) =>
+    event.type === "tool.result" && event.name === "child"
+  );
+  const endIdx = workerTraces.findIndex((event) =>
+    event.type === "action.end" && event.name === "child"
+  );
+  assert(
+    startIdx >= 0 && callIdx > startIdx && resultIdx > callIdx &&
+      endIdx > resultIdx,
+  );
+});
+
+Deno.test("orchestration worker enforces parent permission ceiling for LLM child actions", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deniedPath = path.join(dir, "llm-child-denied.txt");
+  const childPath = await writeTempDeck(
+    dir,
+    "llm-child-write.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({ path: z.string() }),
+      outputSchema: z.string(),
+      permissions: { write: true },
+      run: async (ctx) => {
+        await Deno.writeTextFile(ctx.input.path, "nope");
+        return "ok";
+      },
+    });
+    `,
+  );
+  const parentPath = await writeTempDeck(
+    dir,
+    "llm-parent-write.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+      actions: [{ name: "child", path: "${childPath}" }],
+    });
+    `,
+  );
+
+  let pass = 0;
+  const provider: ModelProvider = {
+    chat() {
+      pass += 1;
+      if (pass === 1) {
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: "call-write",
+            name: "child",
+            args: { path: deniedPath },
+          }],
+        });
+      }
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: parentPath,
+        input: "hi",
+        modelProvider: provider,
+        isRoot: true,
+        workerSandbox: true,
+        workspacePermissions: { read: true, write: false, run: false },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "write",
+  );
+});
+
+Deno.test("orchestration worker enforces action reference narrowing for child compute deck writes", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const writePath = path.join(dir, "child-write-should-be-denied.txt");
+  const childPath = await writeTempDeck(
+    dir,
+    "llm-child-reference-deny.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({ path: z.string() }),
+      outputSchema: z.string(),
+      run: async (ctx) => {
+        await Deno.writeTextFile(ctx.input.path, "should-fail");
+        return "ok";
+      },
+    });
+    `,
+  );
+  const parentPath = await writeTempDeck(
+    dir,
+    "llm-parent-reference-deny.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+      actions: [{
+        name: "child",
+        path: "${childPath}",
+        permissions: { write: false },
+      }],
+    });
+    `,
+  );
+
+  let pass = 0;
+  const provider: ModelProvider = {
+    chat() {
+      pass += 1;
+      if (pass === 1) {
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: "call-ref-deny",
+            name: "child",
+            args: { path: writePath },
+          }],
+        });
+      }
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: parentPath,
+        input: "hi",
+        modelProvider: provider,
+        isRoot: true,
+        workerSandbox: true,
+        // Root allows writes; action reference must still narrow child writes.
+        workspacePermissions: { read: true, write: [dir], run: false },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "write",
+  );
+});
+
+Deno.test("orchestration worker timeout cancels nested LLM child actions before side effects", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const sideEffectPath = path.join(dir, "llm-late-side-effect.txt");
+  const childPath = await writeTempDeck(
+    dir,
+    "llm-timeout-child.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: async () => {
+        await new Promise((resolve) => setTimeout(resolve, 300));
+        await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "late");
+        return "late";
+      },
+    });
+    `,
+  );
+  const parentPath = await writeTempDeck(
+    dir,
+    "llm-timeout-parent.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+      actions: [{ name: "child", path: "${childPath}" }],
+    });
+    `,
+  );
+
+  let pass = 0;
+  const provider: ModelProvider = {
+    chat() {
+      pass += 1;
+      if (pass === 1) {
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{ id: "call-timeout", name: "child", args: {} }],
+        });
+      }
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: parentPath,
+        input: "hi",
+        modelProvider: provider,
+        isRoot: true,
+        workerSandbox: true,
+        guardrails: { timeoutMs: 80 },
+        workspacePermissions: { read: true, write: [dir], run: false },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "Timeout exceeded",
+  );
+
+  await new Promise((resolve) => setTimeout(resolve, 350));
+  let sideEffectExists = true;
+  try {
+    await Deno.stat(sideEffectPath);
+  } catch (err) {
+    if (err instanceof Deno.errors.NotFound) {
+      sideEffectExists = false;
+    } else {
+      throw err;
+    }
+  }
+  assertEquals(sideEffectExists, false);
+});
+
+Deno.test("orchestration worker clamps forged child deadlines to parent timeout", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const sideEffectPath = path.join(dir, "forged-deadline-side-effect.txt");
+  const childPath = await writeTempDeck(
+    dir,
+    "forged-deadline-child.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: async () => {
+        await new Promise((resolve) => setTimeout(resolve, 200));
+        await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "late");
+        return "late";
+      },
+    });
+    `,
+  );
+  const parentPath = await writeTempDeck(
+    dir,
+    "forged-deadline-parent.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    export default defineDeck({
+      run: async () => {
+        globalThis.postMessage({
+          type: "spawn.request",
+          requestId: "forged-request",
+          payload: {
+            path: ${JSON.stringify(childPath)},
+            input: {},
+            parentActionCallId: "forged-action",
+            parentPermissionsBaseDir: ${JSON.stringify(dir)},
+            parentPermissions: {
+              baseDir: ${JSON.stringify(dir)},
+              read: true,
+              write: true,
+              run: false,
+              net: false,
+              env: false,
+            },
+            workspacePermissions: {
+              read: true,
+              write: [${JSON.stringify(dir)}],
+              run: false,
+              net: false,
+              env: false,
+            },
+            workspacePermissionsBaseDir: ${JSON.stringify(dir)},
+            runDeadlineMs: performance.now() + 10_000,
+          },
+        });
+        await new Promise(() => {});
+      },
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: parentPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        guardrails: { timeoutMs: 80 },
+        workspacePermissions: { read: true, write: [dir], run: false },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "Timeout exceeded",
+  );
+
+  await new Promise((resolve) => setTimeout(resolve, 260));
+  let sideEffectExists = true;
+  try {
+    await Deno.stat(sideEffectPath);
+  } catch (err) {
+    if (err instanceof Deno.errors.NotFound) {
+      sideEffectExists = false;
+    } else {
+      throw err;
+    }
+  }
+  assertEquals(sideEffectExists, false);
+});
+
+Deno.test("compute worker rejects forged run.result messages", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "forged-run-result.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    export default defineDeck({
+      run: async () => {
+        globalThis.postMessage({ type: "run.result", result: "forged" });
+        await new Promise(() => {});
+      },
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: deckPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        guardrails: { timeoutMs: 80 },
+        workspacePermissions: {
+          read: true,
+          write: false,
+          run: false,
+          net: false,
+          env: false,
+        },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "Timeout exceeded",
+  );
+});
+
+Deno.test(
+  "compute worker does not leak bridge session back to untrusted deck messages",
+  async () => {
+    const dir = await Deno.makeTempDir();
+    const modHref = modImportPath();
+    const childPath = await writeTempDeck(
+      dir,
+      "bridge-leak-child.deck.ts",
+      `
+      import { defineDeck } from "${modHref}";
+      import { z } from "zod";
+      export default defineDeck({
+        inputSchema: z.object({}),
+        outputSchema: z.string(),
+        run: () => "child-ok",
+      });
+      `,
+    );
+    const parentPath = await writeTempDeck(
+      dir,
+      "bridge-leak-parent.deck.ts",
+      `
+      import { defineDeck } from "${modHref}";
+      import { z } from "zod";
+      export default defineDeck({
+        inputSchema: z.object({}),
+        outputSchema: z.string(),
+        run: async (ctx) => {
+          globalThis.addEventListener("message", (event) => {
+            const data = event.data;
+            if (data?.type !== "spawn.result") return;
+            if (typeof data?.bridgeSession !== "string") return;
+            globalThis.postMessage({
+              type: "run.result",
+              bridgeSession: data.bridgeSession,
+              completionNonce: data.completionNonce,
+              result: "forged",
+            });
+          });
+          await ctx.spawnAndWait({ path: ${
+        JSON.stringify(childPath)
+      }, input: {} });
+          await new Promise(() => {});
+        },
+      });
+      `,
+    );
+
+    await assertRejects(
+      () =>
+        runDeck({
+          path: parentPath,
+          input: {},
+          modelProvider: dummyProvider,
+          isRoot: true,
+          workerSandbox: true,
+          guardrails: { timeoutMs: 80 },
+          workspacePermissions: {
+            read: true,
+            write: false,
+            run: false,
+            net: false,
+            env: false,
+          },
+          workspacePermissionsBaseDir: dir,
+        }),
+      Error,
+      "Timeout exceeded",
+    );
+  },
+);
+
+Deno.test("compute worker rejects forged spawn.request messages", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const sideEffectPath = path.join(dir, "forged-spawn-side-effect.txt");
+  const childPath = await writeTempDeck(
+    dir,
+    "forged-spawn-child.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    export default defineDeck({
+      run: async () => {
+        await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "forged");
+        return "ok";
+      },
+    });
+    `,
+  );
+  const parentPath = await writeTempDeck(
+    dir,
+    "forged-spawn-parent.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    export default defineDeck({
+      run: async () => {
+        globalThis.postMessage({
+          type: "spawn.request",
+          requestId: "forged-request",
+          payload: {
+            path: ${JSON.stringify(childPath)},
+            input: {},
+            parentActionCallId: "forged-action",
+            parentPermissionsBaseDir: ${JSON.stringify(dir)},
+            parentPermissions: {
+              baseDir: ${JSON.stringify(dir)},
+              read: true,
+              write: true,
+              run: false,
+              net: false,
+              env: false,
+            },
+            workspacePermissions: {
+              read: true,
+              write: [${JSON.stringify(dir)}],
+              run: false,
+              net: false,
+              env: false,
+            },
+            workspacePermissionsBaseDir: ${JSON.stringify(dir)},
+            runDeadlineMs: performance.now() + 10_000,
+          },
+        });
+        await new Promise(() => {});
+      },
+    });
+    `,
+  );
+
+  await assertRejects(
+    () =>
+      runDeck({
+        path: parentPath,
+        input: {},
+        modelProvider: dummyProvider,
+        isRoot: true,
+        workerSandbox: true,
+        guardrails: { timeoutMs: 80 },
+        workspacePermissions: { read: true, write: [dir], run: false },
+        workspacePermissionsBaseDir: dir,
+      }),
+    Error,
+    "Timeout exceeded",
+  );
+
+  await new Promise((resolve) => setTimeout(resolve, 120));
+  let sideEffectExists = true;
+  try {
+    await Deno.stat(sideEffectPath);
+  } catch (err) {
+    if (err instanceof Deno.errors.NotFound) {
+      sideEffectExists = false;
+    } else {
+      throw err;
+    }
+  }
+  assertEquals(sideEffectExists, false);
+});
+
+Deno.test("orchestration worker serial scheduler runs one child tool invocation at a time", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const orderPath = path.join(dir, "serial-order.txt");
+  await Deno.writeTextFile(orderPath, "");
+  const childPath = await writeTempDeck(
+    dir,
+    "serial-child-work.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({ id: z.number(), path: z.string() }),
+      outputSchema: z.string(),
+      run: async (ctx) => {
+        await Deno.writeTextFile(ctx.input.path, "start" + ctx.input.id + "\\n", { append: true });
+        await new Promise((resolve) => setTimeout(resolve, 60));
+        await Deno.writeTextFile(ctx.input.path, "end" + ctx.input.id + "\\n", { append: true });
+        return "ok-" + ctx.input.id;
+      },
+    });
+    `,
+  );
+  const parentPath = await writeTempDeck(
+    dir,
+    "serial-parent-llm.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+      actions: [{ name: "child", path: "${childPath}" }],
+    });
+    `,
+  );
+
+  let pass = 0;
+  const provider: ModelProvider = {
+    chat() {
+      pass += 1;
+      if (pass === 1) {
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [
+            { id: "call-1", name: "child", args: { id: 1, path: orderPath } },
+            { id: "call-2", name: "child", args: { id: 2, path: orderPath } },
+          ],
+        });
+      }
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const result = await runDeck({
+    path: parentPath,
+    input: "hi",
+    modelProvider: provider,
+    isRoot: true,
+    workerSandbox: true,
+    workspacePermissions: { read: true, write: [dir], run: false },
+    workspacePermissionsBaseDir: dir,
+  });
+  assertEquals(result, "done");
+
+  const order = (await Deno.readTextFile(orderPath))
+    .split("\n")
+    .map((line) => line.trim())
+    .filter(Boolean);
+  assertEquals(order, ["start1", "end1", "start2", "end2"]);
+});
+
+Deno.test("LLM built-in tools are gated by effective permissions", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "tool-gating.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  let toolNames: Array<string> = [];
+  const provider: ModelProvider = {
+    chat(input) {
+      toolNames = (input.tools ?? []).map((tool) => tool.function.name);
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const result = await runDeck({
+    path: deckPath,
+    input: "hi",
+    modelProvider: provider,
+    isRoot: true,
+    workspacePermissions: {
+      read: true,
+      write: false,
+      run: false,
+      net: false,
+      env: false,
+    },
+    workspacePermissionsBaseDir: dir,
+  });
+
+  assertEquals(result, "ok");
+  assert(toolNames.includes("read_file"));
+  assert(toolNames.includes("list_dir"));
+  assert(toolNames.includes("grep_files"));
+  assertEquals(toolNames.includes("apply_patch"), false);
+  assertEquals(toolNames.includes("exec"), false);
+});
+
+Deno.test(
+  "LLM file tools enforce directory-scoped read permissions",
+  async () => {
+    const dir = await Deno.makeTempDir();
+    const modHref = modImportPath();
+    const deckPath = await writeTempDeck(
+      dir,
+      "file-tools.deck.ts",
+      `
+      import { defineDeck } from "${modHref}";
+      import { z } from "zod";
+      export default defineDeck({
+        inputSchema: z.string(),
+        outputSchema: z.string(),
+        modelParams: { model: "dummy-model" },
+      });
+      `,
+    );
+
+    const allowedDir = path.join(dir, "allowed");
+    const nestedDir = path.join(allowedDir, "nested");
+    await Deno.mkdir(nestedDir, { recursive: true });
+    const allowedFile = path.join(nestedDir, "note.txt");
+    await Deno.writeTextFile(allowedFile, "line-one\\nline-two\\nmatch-line");
+
+    const otherDir = path.join(dir, "other");
+    await Deno.mkdir(otherDir, { recursive: true });
+    const deniedFile = path.join(otherDir, "secret.txt");
+    await Deno.writeTextFile(deniedFile, "top-secret");
+    const allowedPatchPath = path.join(allowedDir, "editable.txt");
+    await Deno.writeTextFile(allowedPatchPath, "before-allowed");
+    const deniedPatchPath = path.join(otherDir, "blocked.txt");
+    await Deno.writeTextFile(deniedPatchPath, "before-denied");
+
+    type ToolPlanEntry = {
+      id: string;
+      name: "read_file" | "list_dir" | "grep_files" | "apply_patch";
+      args: Record<string, JSONValue>;
+      expectStatus: 200 | 403;
+    };
+    const toolPlan: Array<ToolPlanEntry> = [
+      {
+        id: "call-1",
+        name: "read_file",
+        args: { path: allowedFile },
+        expectStatus: 200,
+      },
+      {
+        id: "call-2",
+        name: "read_file",
+        args: { path: deniedFile },
+        expectStatus: 403,
+      },
+      {
+        id: "call-3",
+        name: "list_dir",
+        args: { path: allowedDir, recursive: true },
+        expectStatus: 200,
+      },
+      {
+        id: "call-4",
+        name: "list_dir",
+        args: { path: otherDir, recursive: true },
+        expectStatus: 403,
+      },
+      {
+        id: "call-5",
+        name: "grep_files",
+        args: { path: allowedDir, query: "match" },
+        expectStatus: 200,
+      },
+      {
+        id: "call-6",
+        name: "grep_files",
+        args: { path: otherDir, query: "match" },
+        expectStatus: 403,
+      },
+      {
+        id: "call-7",
+        name: "apply_patch",
+        args: {
+          path: allowedPatchPath,
+          edits: [{
+            old_text: "before-allowed",
+            new_text: "after-allowed",
+          }],
+        },
+        expectStatus: 200,
+      },
+      {
+        id: "call-8",
+        name: "apply_patch",
+        args: {
+          path: deniedPatchPath,
+          edits: [{
+            old_text: "before-denied",
+            new_text: "after-denied",
+          }],
+        },
+        expectStatus: 403,
+      },
+    ];
+
+    type ToolResponseBody = {
+      status?: number;
+      code?: string;
+      message?: string;
+      payload?: Record<string, unknown>;
+    };
+    const toolResults: Array<{ plan: ToolPlanEntry; body: ToolResponseBody }> =
+      [];
+    const seenToolIds = new Set<string>();
+    const captureToolMessages = (input: { messages: Array<ModelMessage> }) => {
+      for (const message of input.messages) {
+        if (message.role !== "tool" || !message.tool_call_id) continue;
+        if (seenToolIds.has(message.tool_call_id)) continue;
+        seenToolIds.add(message.tool_call_id);
+        if (message.content === null) continue;
+        const plan = toolPlan.find((entry) =>
+          entry.id === message.tool_call_id
+        );
+        if (!plan) continue;
+        const body = JSON.parse(String(message.content)) as ToolResponseBody;
+        toolResults.push({ plan, body });
+      }
+    };
+
+    let pass = 0;
+    const provider: ModelProvider = {
+      chat(input) {
+        captureToolMessages(input);
+        const plan = toolPlan[pass];
+        pass += 1;
+        if (!plan) {
+          return Promise.resolve({
+            message: { role: "assistant", content: "done" },
+            finishReason: "stop",
+          });
+        }
+        return Promise.resolve({
+          message: {
+            role: "assistant",
+            content: null,
+            tool_calls: [{
+              id: plan.id,
+              type: "function",
+              function: {
+                name: plan.name,
+                arguments: JSON.stringify(plan.args),
+              },
+            }],
+          },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: plan.id,
+            name: plan.name,
+            args: plan.args,
+          }],
+        });
+      },
+    };
+
+    const result = await runDeck({
+      path: deckPath,
+      input: "hi",
+      modelProvider: provider,
+      isRoot: true,
+      workspacePermissions: {
+        read: ["./allowed"],
+        write: ["./allowed"],
+        run: false,
+        net: false,
+        env: false,
+      },
+      workspacePermissionsBaseDir: dir,
+    });
+
+    assertEquals(result, "done");
+    assertEquals(toolResults.length, toolPlan.length);
+
+    const listDirEntries = (payload: Record<string, unknown> | undefined) => {
+      return Array.isArray(payload?.entries)
+        ? payload.entries as Array<Record<string, string>>
+        : [];
+    };
+
+    for (const { plan, body } of toolResults) {
+      assertEquals(body.status, plan.expectStatus);
+      if (plan.expectStatus === 200) {
+        assertEquals(body.code ?? null, null);
+        if (plan.name === "read_file") {
+          const payload = body.payload as Record<string, unknown>;
+          assert(payload, "expected read_file payload");
+          assertEquals(payload.path, plan.args.path);
+          assert(
+            typeof payload.content === "string" &&
+              payload.content.includes("match-line"),
+          );
+        }
+        if (plan.name === "list_dir") {
+          const payload = body.payload as Record<string, unknown>;
+          assert(payload, "expected list_dir payload");
+          const entries = listDirEntries(payload);
+          assert(
+            entries.some((entry) => entry.path === allowedFile),
+            "expected list_dir entries to include allowed file",
+          );
+        }
+        if (plan.name === "grep_files") {
+          const payload = body.payload as Record<string, unknown>;
+          assert(payload, "expected grep_files payload");
+          const matches = Array.isArray(payload.matches)
+            ? payload.matches as Array<Record<string, unknown>>
+            : [];
+          assert(
+            matches.some((match) => match.path === allowedFile),
+            "expected grep_files to return match from allowed file",
+          );
+        }
+        if (plan.name === "apply_patch") {
+          const payload = body.payload as Record<string, unknown>;
+          assert(payload, "expected apply_patch payload");
+          assertEquals(payload.path, plan.args.path);
+          assertEquals(payload.applied, 1);
+        }
+      } else {
+        assertEquals(body.code, "permission_denied");
+        assert(
+          typeof body.message === "string" &&
+            body.message.includes(plan.name),
+          "expected permission denial message to mention the tool",
+        );
+      }
+    }
+
+    assertEquals(
+      await Deno.readTextFile(allowedPatchPath),
+      "after-allowed",
+      "apply_patch should modify files in allowed directories",
+    );
+    assertEquals(
+      await Deno.readTextFile(deniedPatchPath),
+      "before-denied",
+      "apply_patch must not modify files outside allowed directories",
+    );
+  },
+);
+
+Deno.test("LLM file tools deny symlink escapes outside granted roots", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "file-tools-symlink.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const allowedDir = path.join(dir, "allowed");
+  const outsideDir = path.join(dir, "outside");
+  await Deno.mkdir(allowedDir, { recursive: true });
+  await Deno.mkdir(outsideDir, { recursive: true });
+
+  const outsideReadTarget = path.join(outsideDir, "secret.txt");
+  await Deno.writeTextFile(outsideReadTarget, "secret");
+  const outsideWriteTarget = path.join(outsideDir, "edit.txt");
+  await Deno.writeTextFile(outsideWriteTarget, "before");
+
+  const readSymlink = path.join(allowedDir, "secret-link.txt");
+  await Deno.symlink(outsideReadTarget, readSymlink);
+  const writeSymlinkDir = path.join(allowedDir, "linked");
+  await Deno.symlink(outsideDir, writeSymlinkDir);
+
+  type ToolResponseBody = {
+    status?: number;
+    code?: string;
+    message?: string;
+  };
+  const toolResults: Array<ToolResponseBody> = [];
+  const seenToolIds = new Set<string>();
+
+  let pass = 0;
+  const provider: ModelProvider = {
+    chat(input) {
+      if (pass > 0) {
+        const toolMessages = input.messages.filter((message) =>
+          message.role === "tool" &&
+          (message.name === "read_file" || message.name === "apply_patch")
+        );
+        for (const message of toolMessages) {
+          if (!message.tool_call_id || seenToolIds.has(message.tool_call_id)) {
+            continue;
+          }
+          seenToolIds.add(message.tool_call_id);
+          toolResults.push(
+            JSON.parse(String(message.content)) as ToolResponseBody,
+          );
+        }
+      }
+      pass += 1;
+      if (pass === 1) {
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: "tool-read",
+            name: "read_file",
+            args: { path: readSymlink },
+          }],
+        });
+      }
+      if (pass === 2) {
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: "tool-write",
+            name: "apply_patch",
+            args: {
+              path: path.join(writeSymlinkDir, "edit.txt"),
+              edits: [{ old_text: "before", new_text: "after" }],
+            },
+          }],
+        });
+      }
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const result = await runDeck({
+    path: deckPath,
+    input: "hi",
+    modelProvider: provider,
+    isRoot: true,
+    workspacePermissions: {
+      read: ["./allowed"],
+      write: ["./allowed"],
+      run: false,
+      net: false,
+      env: false,
+    },
+    workspacePermissionsBaseDir: dir,
+  });
+
+  assertEquals(result, "done");
+  assertEquals(toolResults.length, 2);
+  assertEquals(toolResults[0].status, 403);
+  assertEquals(toolResults[0].code, "permission_denied");
+  assertEquals(toolResults[1].status, 403);
+  assertEquals(toolResults[1].code, "permission_denied");
+  assertEquals(await Deno.readTextFile(outsideWriteTarget), "before");
+});
+
+Deno.test(
+  "LLM built-in exec denies symlink targets outside allowed run.paths",
+  async () => {
+    const dir = await Deno.makeTempDir();
+    const modHref = modImportPath();
+    const deckPath = await writeTempDeck(
+      dir,
+      "exec-symlink-path-deny.deck.ts",
+      `
+      import { defineDeck } from "${modHref}";
+      import { z } from "zod";
+      export default defineDeck({
+        inputSchema: z.string(),
+        outputSchema: z.string(),
+        modelParams: { model: "dummy-model" },
+      });
+      `,
+    );
+    const allowedDir = path.join(dir, "allowed");
+    await Deno.mkdir(allowedDir, { recursive: true });
+    const symlinkCommand = path.join(allowedDir, "tool");
+    await Deno.symlink("/bin/sh", symlinkCommand);
+    const sideEffectPath = path.join(dir, "exec-symlink-side-effect.txt");
+
+    let pass = 0;
+    let toolPayload = "";
+    const provider: ModelProvider = {
+      chat(input) {
+        pass++;
+        if (pass === 1) {
+          return Promise.resolve({
+            message: { role: "assistant", content: null },
+            finishReason: "tool_calls",
+            toolCalls: [{
+              id: "tool-exec",
+              name: "exec",
+              args: {
+                command: symlinkCommand,
+                args: ["-c", `echo escaped > ${sideEffectPath}`],
+              },
+            }],
+          });
+        }
+        toolPayload = String(
+          input.messages.find((message) =>
+            message.role === "tool" && message.name === "exec"
+          )?.content ?? "",
+        );
+        return Promise.resolve({
+          message: { role: "assistant", content: "done" },
+          finishReason: "stop",
+        });
+      },
+    };
+
+    const result = await runDeck({
+      path: deckPath,
+      input: "hi",
+      modelProvider: provider,
+      isRoot: true,
+      workerSandbox: true,
+      workspacePermissions: {
+        read: true,
+        write: false,
+        run: { paths: ["./allowed/tool"] },
+        net: false,
+        env: false,
+      },
+      workspacePermissionsBaseDir: dir,
+    });
+    assertEquals(result, "done");
+
+    const parsed = JSON.parse(toolPayload) as {
+      status?: number;
+      code?: string;
+      message?: string;
+    };
+    assertEquals(parsed.status, 403);
+    assertEquals(parsed.code, "permission_denied");
+    assert(
+      typeof parsed.message === "string" && parsed.message.includes("exec"),
+      "expected permission denial to mention exec",
+    );
+
+    const leaked = await Deno.stat(sideEffectPath).then(
+      () => true,
+      () => false,
+    );
+    assertEquals(leaked, false);
+  },
+);
+
+Deno.test("LLM built-in apply_patch returns stable permission denial", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const targetPath = path.join(dir, "target.txt");
+  await Deno.writeTextFile(targetPath, "before");
+  const deckPath = await writeTempDeck(
+    dir,
+    "tool-deny.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  let pass = 0;
+  let toolPayload = "";
+  const provider: ModelProvider = {
+    chat(input) {
+      pass++;
+      if (pass === 1) {
+        return Promise.resolve({
+          message: {
+            role: "assistant",
+            content: null,
+            tool_calls: [{
+              id: "tool-1",
+              type: "function",
+              function: {
+                name: "apply_patch",
+                arguments: JSON.stringify({
+                  path: targetPath,
+                  edits: [{ old_text: "before", new_text: "after" }],
+                }),
+              },
+            }],
+          },
+          finishReason: "tool_calls",
+          toolCalls: [{
+            id: "tool-1",
+            name: "apply_patch",
+            args: {
+              path: targetPath,
+              edits: [{ old_text: "before", new_text: "after" }],
+            },
+          }],
+        });
+      }
+      toolPayload = String(
+        input.messages.find((message) =>
+          message.role === "tool" && message.name === "apply_patch"
+        )?.content ?? "",
+      );
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const result = await runDeck({
+    path: deckPath,
+    input: "hi",
+    modelProvider: provider,
+    isRoot: true,
+    workspacePermissions: {
+      read: true,
+      write: false,
+      run: false,
+      net: false,
+      env: false,
+    },
+    workspacePermissionsBaseDir: dir,
+  });
+
+  assertEquals(result, "done");
+  assert(toolPayload.includes('"code":"permission_denied"'));
+  assert(toolPayload.includes("apply_patch denied"));
+  assertEquals(await Deno.readTextFile(targetPath), "before");
+});
+
+Deno.test(
+  "LLM built-in apply_patch create_if_missing creates nested parent directories",
+  async () => {
+    const dir = await Deno.makeTempDir();
+    const modHref = modImportPath();
+    const deckPath = await writeTempDeck(
+      dir,
+      "tool-create-missing.deck.ts",
+      `
+      import { defineDeck } from "${modHref}";
+      import { z } from "zod";
+      export default defineDeck({
+        inputSchema: z.string(),
+        outputSchema: z.string(),
+        modelParams: { model: "dummy-model" },
+      });
+      `,
+    );
+
+    const nestedTarget = path.join(dir, "faq", "faq.txt");
+    let pass = 0;
+    let toolPayload = "";
+    const provider: ModelProvider = {
+      chat(input) {
+        pass += 1;
+        if (pass === 1) {
+          return Promise.resolve({
+            message: {
+              role: "assistant",
+              content: null,
+              tool_calls: [{
+                id: "tool-create",
+                type: "function",
+                function: {
+                  name: "apply_patch",
+                  arguments: JSON.stringify({
+                    path: nestedTarget,
+                    create_if_missing: true,
+                    edits: [{ old_text: "placeholder", new_text: "hello faq" }],
+                  }),
+                },
+              }],
+            },
+            finishReason: "tool_calls",
+            toolCalls: [{
+              id: "tool-create",
+              name: "apply_patch",
+              args: {
+                path: nestedTarget,
+                create_if_missing: true,
+                edits: [{ old_text: "placeholder", new_text: "hello faq" }],
+              },
+            }],
+          });
+        }
+        toolPayload = String(
+          input.messages.find((message) =>
+            message.role === "tool" && message.name === "apply_patch"
+          )?.content ?? "",
+        );
+        return Promise.resolve({
+          message: { role: "assistant", content: "done" },
+          finishReason: "stop",
+        });
+      },
+    };
 
-![Fragments card](./fragments.card.md)
-`.trim(),
-  );
+    const result = await runDeck({
+      path: deckPath,
+      input: "hi",
+      modelProvider: provider,
+      isRoot: true,
+      workspacePermissions: {
+        read: true,
+        write: true,
+        run: false,
+        net: false,
+        env: false,
+      },
+      workspacePermissionsBaseDir: dir,
+    });
 
-  await Deno.writeTextFile(
-    path.join(dir, "base_input.zod.ts"),
-    `
-    import { z } from "zod";
-    export default z.object({ text: z.string() });
-    `.trim(),
-  );
+    assertEquals(result, "done");
+    assert(toolPayload.includes('"status":200'));
+    assert(toolPayload.includes('"created":true'));
+    assertEquals(await Deno.readTextFile(nestedTarget), "");
+  },
+);
 
-  await Deno.writeTextFile(
-    path.join(dir, "base_output.zod.ts"),
+Deno.test("runDeck abort signal cancels in-flight model call and fires onCancel once", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = await writeTempDeck(
+    dir,
+    "abort.deck.ts",
     `
+    import { defineDeck } from "${modHref}";
     import { z } from "zod";
-    export default z.object({ result: z.string() });
-    `.trim(),
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
   );
 
-  const deck = await loadDeck(deckPath);
-  const inputShape = (deck.inputSchema as unknown as {
-    shape: Record<string, unknown>;
-  }).shape;
-  const outputShape = (deck.outputSchema as unknown as {
-    shape: Record<string, unknown>;
-  }).shape;
+  const controller = new AbortController();
+  let onCancelCalls = 0;
+  let providerSawSignal = false;
+  const provider: ModelProvider = {
+    chat(input) {
+      providerSawSignal = Boolean(input.signal);
+      if (input.signal && !controller.signal.aborted) {
+        setTimeout(() => controller.abort("stop"), 0);
+      }
+      return new Promise((_, reject) => {
+        input.signal?.addEventListener(
+          "abort",
+          () => reject(new DOMException("Run canceled", "AbortError")),
+          { once: true },
+        );
+      });
+    },
+  };
 
-  assertEquals(Object.keys(inputShape).sort(), ["extra", "text"]);
-  assertEquals(Object.keys(outputShape).sort(), ["note", "result"]);
+  const runPromise = runDeck({
+    path: deckPath,
+    input: "hello",
+    modelProvider: provider,
+    isRoot: true,
+    signal: controller.signal,
+    onCancel: () => {
+      onCancelCalls += 1;
+    },
+  });
+
+  await assertRejects(() => runPromise);
+  await runPromise.catch((err) => {
+    assert(isRunCanceledError(err));
+  });
+  assertEquals(providerSawSignal, true);
+  assertEquals(onCancelCalls, 1);
 });
 
-Deno.test("cards cannot declare handlers (ts card)", async () => {
+Deno.test("runDeck ignores post-abort stream chunks", async () => {
   const dir = await Deno.makeTempDir();
   const modHref = modImportPath();
-
-  await writeTempDeck(
+  const deckPath = await writeTempDeck(
     dir,
-    "bad_handlers.card.ts",
+    "abort-stream.deck.ts",
     `
-    import { defineCard } from "${modHref}";
-    export default defineCard({
-      handlers: { onBusy: { path: "./noop.deck.ts" } }
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
     });
     `,
   );
 
-  const deckPath = await writeTempDeck(
-    dir,
-    "root.deck.md",
-    `
-+++
-modelParams = { model = "dummy-model" }
-+++
-
-Deck.
-
-![Bad handlers](./bad_handlers.card.ts)
-`.trim(),
-  );
+  const controller = new AbortController();
+  const chunks: Array<string> = [];
+  const provider: ModelProvider = {
+    chat(input) {
+      input.onStreamText?.("a");
+      return new Promise((_, reject) => {
+        input.signal?.addEventListener(
+          "abort",
+          () => {
+            input.onStreamText?.("b");
+            reject(new DOMException("Run canceled", "AbortError"));
+          },
+          { once: true },
+        );
+      });
+    },
+  };
 
-  await assertRejects(
-    () =>
-      runDeck({
-        path: deckPath,
-        input: "hi",
-        modelProvider: dummyProvider,
-        isRoot: true,
-      }),
-    Error,
-    "handlers",
-  );
+  const runPromise = runDeck({
+    path: deckPath,
+    input: "hello",
+    modelProvider: provider,
+    isRoot: true,
+    stream: true,
+    signal: controller.signal,
+    onStreamText: (chunk) => chunks.push(chunk),
+  });
+  setTimeout(() => controller.abort(), 0);
+  await assertRejects(() => runPromise);
+  assertEquals(chunks.includes("b"), false);
 });
 
-Deno.test("cards cannot declare handlers (markdown card)", async () => {
+Deno.test("runDeck propagates cancellation through nested action runs", async () => {
   const dir = await Deno.makeTempDir();
-
-  await Deno.writeTextFile(
-    path.join(dir, "bad.card.md"),
+  const modHref = modImportPath();
+  const childPath = await writeTempDeck(
+    dir,
+    "abort-child.deck.ts",
     `
-+++
-handlers = { onBusy = { path = "./noop.deck.ts" } }
-+++
-
-Body.
-`.trim(),
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
   );
-
-  const deckPath = path.join(dir, "root.deck.md");
-  await Deno.writeTextFile(
-    deckPath,
+  const parentPath = await writeTempDeck(
+    dir,
+    "abort-parent.deck.ts",
     `
-+++
-modelParams = { model = "dummy-model" }
-+++
-
-Deck.
-
-![Bad card](./bad.card.md)
-`.trim(),
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+      actions: [{ name: "child", path: "${childPath}" }],
+    });
+    `,
   );
 
-  await assertRejects(
-    () =>
-      runDeck({
-        path: deckPath,
-        input: "hi",
-        modelProvider: dummyProvider,
-        isRoot: true,
-      }),
-    Error,
-    "handlers",
-  );
+  let parentCalls = 0;
+  let childCalls = 0;
+  const controller = new AbortController();
+  const provider: ModelProvider = {
+    chat(input) {
+      if (input.deckPath?.endsWith("abort-parent.deck.ts")) {
+        parentCalls += 1;
+        return Promise.resolve({
+          message: { role: "assistant", content: null },
+          finishReason: "tool_calls",
+          toolCalls: [{ id: "child-1", name: "child", args: {} }],
+        });
+      }
+      childCalls += 1;
+      if (!controller.signal.aborted) {
+        controller.abort("stop-child");
+      }
+      return Promise.reject(new DOMException("Run canceled", "AbortError"));
+    },
+  };
+
+  const runPromise = runDeck({
+    path: parentPath,
+    input: "start",
+    modelProvider: provider,
+    isRoot: true,
+    signal: controller.signal,
+  });
+  await assertRejects(() => runPromise);
+  assertEquals(parentCalls, 1);
+  assertEquals(childCalls, 1);
 });
 
-Deno.test("runDeck resolves model arrays via modelProvider", async () => {
+Deno.test("runDeck keeps cancellation distinct from normal errors", async () => {
   const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
   const deckPath = await writeTempDeck(
     dir,
-    "root.deck.md",
+    "cancel-vs-error.deck.ts",
     `
-+++
-modelParams = { model = ["ollama/llama3.1", "openrouter/openai/gpt-4o-mini"] }
-+++
-
-Deck.
-`.trim(),
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
   );
-  let resolvedInput: { model?: string | Array<string> } = {};
-  const provider: ModelProvider = {
-    resolveModel: (input) => {
-      resolvedInput = { model: input.model };
-      return Promise.resolve({
-        model: "openrouter/openai/gpt-4o-mini",
-        params: { temp: 1 },
-      });
-    },
-    chat: (input) => {
-      assertEquals(input.model, "openrouter/openai/gpt-4o-mini");
-      assertEquals(input.params?.temp, 1);
-      return Promise.resolve({
-        message: { role: "assistant", content: "ok" },
-        finishReason: "stop",
+
+  const canceledProvider: ModelProvider = {
+    chat(input) {
+      return new Promise((_, reject) => {
+        input.signal?.addEventListener(
+          "abort",
+          () => reject(new DOMException("Run canceled", "AbortError")),
+          { once: true },
+        );
       });
     },
   };
-
-  await runDeck({
+  const canceledController = new AbortController();
+  const canceled = runDeck({
     path: deckPath,
-    input: "hi",
-    modelProvider: provider,
+    input: "hello",
+    modelProvider: canceledProvider,
     isRoot: true,
-  });
+    signal: canceledController.signal,
+  }).catch((err) => err);
+  canceledController.abort();
+  const canceledErr = await canceled;
+  assert(isRunCanceledError(canceledErr));
 
-  assert(Array.isArray(resolvedInput.model));
+  const failingProvider: ModelProvider = {
+    chat() {
+      throw new Error("normal failure");
+    },
+  };
+  const failingErr = await runDeck({
+    path: deckPath,
+    input: "hello",
+    modelProvider: failingProvider,
+    isRoot: true,
+  }).catch((err) => err);
+  assertEquals(isRunCanceledError(failingErr), false);
 });
 
-Deno.test("modelParams.additionalParams pass through and top-level wins", async () => {
+Deno.test("runDeck rejects as canceled when signal aborts before final output without onStateUpdate", async () => {
   const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
   const deckPath = await writeTempDeck(
     dir,
-    "root.deck.md",
+    "abort-before-final-output.deck.ts",
     `
-+++
-modelParams = { model = "dummy-model", temperature = 0.2, additionalParams = { temperature = 0.9, seed = 42, my_param = "x" } }
-+++
-
-Deck.
-`.trim(),
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
   );
 
-  let seenParams: Record<string, unknown> | undefined;
+  const controller = new AbortController();
   const provider: ModelProvider = {
-    chat: (input) => {
-      seenParams = input.params;
+    chat() {
+      controller.abort("stop-before-final");
       return Promise.resolve({
         message: { role: "assistant", content: "ok" },
         finishReason: "stop",
@@ -1872,14 +5418,13 @@ Deck.
     },
   };
 
-  await runDeck({
+  const err = await runDeck({
     path: deckPath,
-    input: "hi",
+    input: "hello",
     modelProvider: provider,
     isRoot: true,
-  });
+    signal: controller.signal,
+  }).catch((caught) => caught);
 
-  assertEquals(seenParams?.temperature, 0.2);
-  assertEquals(seenParams?.seed, 42);
-  assertEquals(seenParams?.my_param, "x");
+  assert(isRunCanceledError(err));
 });
diff --git a/packages/gambit-core/src/runtime.ts b/packages/gambit-core/src/runtime.ts
index a9004dab4..68b534f1d 100644
--- a/packages/gambit-core/src/runtime.ts
+++ b/packages/gambit-core/src/runtime.ts
@@ -9,19 +9,30 @@ import {
   GAMBIT_TOOL_RESPOND,
 } from "./constants.ts";
 import { loadDeck } from "./loader.ts";
-import { resolveEffectivePermissions } from "./permissions.ts";
+import {
+  canReadPath,
+  canRunCommand,
+  canRunPath,
+  canWritePath,
+  intersectPermissions,
+  resolveEffectivePermissions,
+} from "./permissions.ts";
 import { assertZodSchema, toJsonSchema, validateWithSchema } from "./schema.ts";
 import type {
+  CreateResponseRequest,
+  CreateResponseResponse,
   ExecutionContext,
   Guardrails,
   JSONValue,
   LoadedDeck,
   ModelMessage,
   ModelProvider,
+  ResponseEvent,
   ResponseItem,
   ResponseToolDefinition,
   ToolCallResult,
   ToolDefinition,
+  ToolKind,
 } from "./types.ts";
 import type { MessageRef, SavedState } from "./state.ts";
 import type {
@@ -62,7 +73,7 @@ type IdleController = {
   stop: () => void;
 };
 
-type RunOptions = {
+export type RunOptions = {
   path: string;
   input: unknown;
   inputProvided?: boolean;
@@ -90,8 +101,351 @@ type RunOptions = {
   parentPermissions?: NormalizedPermissionSet;
   referencePermissions?: PermissionDeclarationInput;
   referencePermissionsBaseDir?: string;
+  runDeadlineMs?: number;
+  workerSandbox?: boolean;
+  inOrchestrationWorker?: boolean;
+  signal?: AbortSignal;
+  onCancel?: () => unknown | Promise<unknown>;
+  onTool?: (input: {
+    name: string;
+    args: Record<string, unknown>;
+    runId: string;
+    actionCallId: string;
+    parentActionCallId?: string;
+    deckPath: string;
+  }) => unknown | Promise<unknown>;
+};
+
+const WORKER_SANDBOX_ENV = "GAMBIT_DECK_WORKER_SANDBOX";
+const WORKER_TIMEOUT_MESSAGE = "Timeout exceeded";
+const RUN_CANCELED_MESSAGE = "Run canceled";
+const INSPECT_WORKER_TIMEOUT_MS = 1_500;
+const INSPECT_WORKER_TIMEOUT_MESSAGE = "Deck inspection timed out";
+const BUILTIN_TOOL_READ_FILE = "read_file";
+const BUILTIN_TOOL_LIST_DIR = "list_dir";
+const BUILTIN_TOOL_GREP_FILES = "grep_files";
+const BUILTIN_TOOL_APPLY_PATCH = "apply_patch";
+const BUILTIN_TOOL_EXEC = "exec";
+const BUILTIN_TOOL_NAMES = new Set<string>([
+  BUILTIN_TOOL_READ_FILE,
+  BUILTIN_TOOL_LIST_DIR,
+  BUILTIN_TOOL_GREP_FILES,
+  BUILTIN_TOOL_APPLY_PATCH,
+  BUILTIN_TOOL_EXEC,
+]);
+const TRUSTED_SCHEMA_IMPORT_PREFIXES = [
+  "@bolt-foundry/gambit-core/schemas",
+  "gambit://schemas",
+];
+
+type WireScope = true | false | Array<string>;
+type WireRunScope = true | false | {
+  paths: Array<string>;
+  commands: Array<string>;
+};
+type WirePermissionSet = {
+  baseDir: string;
+  read: WireScope;
+  write: WireScope;
+  run: WireRunScope;
+  net: WireScope;
+  env: WireScope;
 };
 
+type WorkerDeckInspection = {
+  deckPath: string;
+  hasModelParams: boolean;
+  permissions?: PermissionDeclarationInput;
+  guardrails?: Partial<Guardrails>;
+};
+
+export class RunCanceledError extends Error {
+  code = "run_canceled";
+
+  constructor(message = RUN_CANCELED_MESSAGE) {
+    super(message);
+    this.name = "RunCanceledError";
+  }
+}
+
+export function isRunCanceledError(err: unknown): boolean {
+  if (!err || typeof err !== "object") return false;
+  const name = (err as { name?: unknown }).name;
+  const code = (err as { code?: unknown }).code;
+  if (name === "RunCanceledError" || code === "run_canceled") return true;
+  if (name === "AbortError") return true;
+  return false;
+}
+
+function shouldUseWorkerSandbox(): boolean {
+  let raw: string | undefined;
+  try {
+    raw = Deno.env.get(WORKER_SANDBOX_ENV);
+  } catch {
+    return false;
+  }
+  raw = raw?.trim().toLowerCase();
+  return raw === "1" || raw === "true" || raw === "yes";
+}
+
+function normalizedScopeToWire(scope: {
+  all: boolean;
+  values: Set<string>;
+}): WireScope {
+  if (scope.all) return true;
+  if (scope.values.size === 0) return false;
+  return Array.from(scope.values).sort();
+}
+
+function normalizedRunToWire(scope: {
+  all: boolean;
+  paths: Set<string>;
+  commands: Set<string>;
+}): WireRunScope {
+  if (scope.all) return true;
+  if (scope.paths.size === 0 && scope.commands.size === 0) return false;
+  return {
+    paths: Array.from(scope.paths).sort(),
+    commands: Array.from(scope.commands).sort(),
+  };
+}
+
+function toWirePermissionSet(set: NormalizedPermissionSet): WirePermissionSet {
+  return {
+    baseDir: set.baseDir,
+    read: normalizedScopeToWire(set.read),
+    write: normalizedScopeToWire(set.write),
+    run: normalizedRunToWire(set.run),
+    net: normalizedScopeToWire(set.net),
+    env: normalizedScopeToWire(set.env),
+  };
+}
+
+function wireScopeToNormalized(
+  scope: WireScope,
+): { all: boolean; values: Set<string> } {
+  if (scope === true) return { all: true, values: new Set<string>() };
+  if (scope === false) return { all: false, values: new Set<string>() };
+  return { all: false, values: new Set(scope) };
+}
+
+function wireRunToNormalized(
+  scope: WireRunScope,
+): { all: boolean; paths: Set<string>; commands: Set<string> } {
+  if (scope === true) {
+    return {
+      all: true,
+      paths: new Set<string>(),
+      commands: new Set<string>(),
+    };
+  }
+  if (scope === false) {
+    return {
+      all: false,
+      paths: new Set<string>(),
+      commands: new Set<string>(),
+    };
+  }
+  return {
+    all: false,
+    paths: new Set(scope.paths),
+    commands: new Set(scope.commands),
+  };
+}
+
+function fromWirePermissionSet(
+  set: WirePermissionSet,
+): NormalizedPermissionSet {
+  return {
+    baseDir: set.baseDir,
+    read: wireScopeToNormalized(set.read),
+    write: wireScopeToNormalized(set.write),
+    run: wireRunToNormalized(set.run),
+    net: wireScopeToNormalized(set.net),
+    env: wireScopeToNormalized(set.env),
+  };
+}
+
+function normalizePermissionBaseDir(
+  set: NormalizedPermissionSet,
+  baseDir: string,
+): NormalizedPermissionSet {
+  return {
+    ...set,
+    baseDir,
+    read: { all: set.read.all, values: new Set(set.read.values) },
+    write: { all: set.write.all, values: new Set(set.write.values) },
+    run: {
+      all: set.run.all,
+      paths: new Set(set.run.paths),
+      commands: new Set(set.run.commands),
+    },
+    net: { all: set.net.all, values: new Set(set.net.values) },
+    env: { all: set.env.all, values: new Set(set.env.values) },
+  };
+}
+
+function deadlineForRun(
+  guardrails: Guardrails,
+  existing?: number,
+): number {
+  const timeoutDeadline = performance.now() + guardrails.timeoutMs;
+  if (typeof existing === "number" && Number.isFinite(existing)) {
+    return Math.min(existing, timeoutDeadline);
+  }
+  return timeoutDeadline;
+}
+
+function ensureNotExpired(deadlineMs: number) {
+  if (performance.now() > deadlineMs) {
+    throw new Error(WORKER_TIMEOUT_MESSAGE);
+  }
+}
+
+function throwIfCanceled(signal?: AbortSignal) {
+  if (!signal?.aborted) return;
+  const reason = signal.reason;
+  if (typeof reason === "string" && reason.trim().length > 0) {
+    throw new RunCanceledError(reason);
+  }
+  if (reason instanceof Error && reason.message.trim().length > 0) {
+    throw new RunCanceledError(reason.message);
+  }
+  throw new RunCanceledError();
+}
+
+function ensureRunActive(deadlineMs: number, signal?: AbortSignal) {
+  throwIfCanceled(signal);
+  ensureNotExpired(deadlineMs);
+}
+
+function isTrustedSchemaImportKey(key: string): boolean {
+  const normalized = key.trim();
+  if (!normalized) return false;
+  return TRUSTED_SCHEMA_IMPORT_PREFIXES.some((prefix) =>
+    normalized === prefix || normalized.startsWith(`${prefix}/`)
+  );
+}
+
+function tryReadWorkspaceConfigPath(deckPath: string): string | undefined {
+  const startDir = path.dirname(path.resolve(deckPath));
+  let current = startDir;
+  while (true) {
+    const denoJson = path.join(current, "deno.json");
+    const denoJsonc = path.join(current, "deno.jsonc");
+    try {
+      if (Deno.statSync(denoJson).isFile) return denoJson;
+    } catch {
+      // continue search
+    }
+    try {
+      if (Deno.statSync(denoJsonc).isFile) return denoJsonc;
+    } catch {
+      // continue search
+    }
+    const parent = path.dirname(current);
+    if (parent === current) break;
+    current = parent;
+  }
+  return undefined;
+}
+
+function readWorkspaceImportMapKeys(configPath: string): Array<string> {
+  const text = Deno.readTextFileSync(configPath);
+  const parsed = parseWorkspaceConfig(text) as { imports?: unknown };
+  if (
+    !parsed || typeof parsed !== "object" || Array.isArray(parsed) ||
+    !parsed.imports || typeof parsed.imports !== "object" ||
+    Array.isArray(parsed.imports)
+  ) {
+    return [];
+  }
+  return Object.keys(parsed.imports as Record<string, unknown>);
+}
+
+function parseWorkspaceConfig(text: string): unknown {
+  try {
+    return JSON.parse(text);
+  } catch {
+    const stripped = stripJsonComments(text);
+    return JSON.parse(stripped);
+  }
+}
+
+function stripJsonComments(text: string): string {
+  let out = "";
+  let inString = false;
+  let escapeNext = false;
+  let inLineComment = false;
+  let inBlockComment = false;
+
+  for (let i = 0; i < text.length; i++) {
+    const ch = text[i];
+    const next = text[i + 1];
+
+    if (inLineComment) {
+      if (ch === "\n") {
+        inLineComment = false;
+        out += ch;
+      }
+      continue;
+    }
+
+    if (inBlockComment) {
+      if (ch === "*" && next === "/") {
+        inBlockComment = false;
+        i++;
+      }
+      continue;
+    }
+
+    if (inString) {
+      out += ch;
+      if (escapeNext) {
+        escapeNext = false;
+      } else if (ch === "\\") {
+        escapeNext = true;
+      } else if (ch === '"') {
+        inString = false;
+      }
+      continue;
+    }
+
+    if (ch === '"') {
+      inString = true;
+      out += ch;
+      continue;
+    }
+    if (ch === "/" && next === "/") {
+      inLineComment = true;
+      i++;
+      continue;
+    }
+    if (ch === "/" && next === "*") {
+      inBlockComment = true;
+      i++;
+      continue;
+    }
+    out += ch;
+  }
+  return out;
+}
+
+function enforceTrustedSchemaImportMapPolicy(deckPath: string) {
+  if (deckPath.startsWith("gambit://")) return;
+  const configPath = tryReadWorkspaceConfigPath(deckPath);
+  if (!configPath) return;
+  const violations = readWorkspaceImportMapKeys(configPath).filter((key) =>
+    isTrustedSchemaImportKey(key)
+  );
+  if (violations.length === 0) return;
+  throw new Error(
+    `[gambit] trust-boundary violation: workspace import map at ${configPath} remaps trusted schema namespace (${
+      violations.join(", ")
+    })`,
+  );
+}
+
 export async function runDeck(opts: RunOptions): Promise<unknown> {
   const guardrails: Guardrails = {
     ...DEFAULT_GUARDRAILS,
@@ -104,68 +458,263 @@ export async function runDeck(opts: RunOptions): Promise<unknown> {
     throw new Error(`Max depth ${guardrails.maxDepth} exceeded`);
   }
   const runId = opts.runId ?? opts.state?.runId ?? randomId("run");
-
-  const deck = await loadDeck(opts.path);
-  const permissions = resolveEffectivePermissions({
-    baseDir: path.dirname(deck.path),
-    parent: opts.parentPermissions,
-    workspace: opts.workspacePermissions
-      ? {
-        baseDir: opts.workspacePermissionsBaseDir ?? path.dirname(deck.path),
-        permissions: opts.workspacePermissions,
-      }
-      : undefined,
-    declaration: deck.permissions
-      ? { baseDir: path.dirname(deck.path), permissions: deck.permissions }
-      : undefined,
-    reference: opts.referencePermissions
-      ? {
-        baseDir: opts.referencePermissionsBaseDir ?? path.dirname(deck.path),
-        permissions: opts.referencePermissions,
-      }
-      : undefined,
-    session: opts.sessionPermissions
-      ? {
-        baseDir: opts.sessionPermissionsBaseDir ?? Deno.cwd(),
-        permissions: opts.sessionPermissions,
-      }
-      : undefined,
-  });
-  const deckGuardrails = deck.guardrails ?? {};
-  const effectiveGuardrails: Guardrails = {
-    ...guardrails,
-    ...deckGuardrails,
-  };
+  enforceTrustedSchemaImportMapPolicy(opts.path);
+  // AbortSignal is not bridged into worker runtimes yet, so preserve
+  // cancellation semantics by keeping signal-bound runs in-process.
+  const workerSandbox = (opts.workerSandbox ?? shouldUseWorkerSandbox()) &&
+    !opts.signal;
   const isRoot = Boolean(inferredRoot);
+  const shouldEmitRun = opts.depth === undefined || opts.depth === 0;
+  let canceled = false;
+  let cancelHandled = false;
+  const handleCancel = async () => {
+    if (cancelHandled) return;
+    cancelHandled = true;
+    if (!opts.onCancel) return;
+    try {
+      await opts.onCancel();
+    } catch (err) {
+      logger.warn(
+        `[gambit] runDeck onCancel callback failed: ${
+          err instanceof Error ? err.message : String(err)
+        }`,
+      );
+    }
+  };
+  try {
+    throwIfCanceled(opts.signal);
+    if (workerSandbox) {
+      const preInspectRunDeadlineMs = deadlineForRun(
+        guardrails,
+        opts.runDeadlineMs,
+      );
+      ensureRunActive(preInspectRunDeadlineMs, opts.signal);
+      const inspectedDeck = await inspectDeckInWorker(
+        opts.path,
+        preInspectRunDeadlineMs,
+      );
+      const deckDir = path.dirname(inspectedDeck.deckPath);
+      const permissions = resolveEffectivePermissions({
+        baseDir: deckDir,
+        parent: opts.parentPermissions,
+        workspace: opts.workspacePermissions
+          ? {
+            baseDir: opts.workspacePermissionsBaseDir ?? deckDir,
+            permissions: opts.workspacePermissions,
+          }
+          : undefined,
+        declaration: inspectedDeck.permissions
+          ? { baseDir: deckDir, permissions: inspectedDeck.permissions }
+          : undefined,
+        reference: opts.referencePermissions
+          ? {
+            baseDir: opts.referencePermissionsBaseDir ?? deckDir,
+            permissions: opts.referencePermissions,
+          }
+          : undefined,
+        session: opts.sessionPermissions
+          ? {
+            baseDir: opts.sessionPermissionsBaseDir ?? Deno.cwd(),
+            permissions: opts.sessionPermissions,
+          }
+          : undefined,
+      });
+      const effectiveGuardrails: Guardrails = {
+        ...guardrails,
+        ...(inspectedDeck.guardrails ?? {}),
+      };
+      const runDeadlineMs = deadlineForRun(
+        effectiveGuardrails,
+        opts.runDeadlineMs,
+      );
+      ensureRunActive(runDeadlineMs, opts.signal);
+      const resolvedInput = resolveInputWithoutDeck({
+        input: opts.input,
+        state: opts.state,
+        isRoot,
+        initialUserMessage: opts.initialUserMessage,
+      });
+
+      if (!inspectedDeck.hasModelParams) {
+        if (shouldEmitRun) {
+          opts.trace?.({
+            type: "run.start",
+            runId,
+            deckPath: inspectedDeck.deckPath,
+            input: resolvedInput as unknown as import("./types.ts").JSONValue,
+            initialUserMessage: opts
+              .initialUserMessage as unknown as import("./types.ts").JSONValue,
+            permissions: permissions.trace,
+          });
+        }
 
-  ensureSchemaPresence(deck, isRoot);
+        return await runComputeDeckInWorker({
+          deckPath: inspectedDeck.deckPath,
+          guardrails: effectiveGuardrails,
+          depth,
+          runId,
+          initialUserMessage: opts.initialUserMessage,
+          parentActionCallId: opts.parentActionCallId,
+          modelProvider: opts.modelProvider,
+          input: resolvedInput,
+          defaultModel: opts.defaultModel,
+          modelOverride: opts.modelOverride,
+          trace: opts.trace,
+          stream: opts.stream,
+          state: opts.state,
+          onStateUpdate: opts.onStateUpdate,
+          onStreamText: opts.onStreamText,
+          responsesMode: opts.responsesMode,
+          permissions: permissions.effective,
+          permissionsTrace: permissions.trace,
+          workspacePermissions: opts.workspacePermissions,
+          workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
+          sessionPermissions: opts.sessionPermissions,
+          sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+          runDeadlineMs,
+          isRoot,
+          allowRootStringInput: opts.allowRootStringInput ?? false,
+          signal: opts.signal,
+        });
+      }
 
-  const resolvedInput = resolveInput({
-    deck,
-    input: opts.input,
-    state: opts.state,
-    isRoot,
-    initialUserMessage: opts.initialUserMessage,
-  });
-  const validatedInput = validateInput(
-    deck,
-    resolvedInput,
-    isRoot,
-    opts.allowRootStringInput ?? false,
-  );
-  const shouldEmitRun = opts.depth === undefined || opts.depth === 0;
-  if (shouldEmitRun) {
-    opts.trace?.({
-      type: "run.start",
-      runId,
-      deckPath: deck.path,
-      input: validatedInput as unknown as import("./types.ts").JSONValue,
-      initialUserMessage: opts
-        .initialUserMessage as unknown as import("./types.ts").JSONValue,
-      permissions: permissions.trace,
+      if (!opts.inOrchestrationWorker) {
+        return await runLlmDeckInWorker({
+          deckPath: inspectedDeck.deckPath,
+          guardrails: effectiveGuardrails,
+          depth,
+          runId,
+          parentActionCallId: opts.parentActionCallId,
+          modelProvider: opts.modelProvider,
+          input: resolvedInput,
+          inputProvided: opts.inputProvided ?? true,
+          initialUserMessage: opts.initialUserMessage,
+          defaultModel: opts.defaultModel,
+          modelOverride: opts.modelOverride,
+          trace: opts.trace,
+          stream: opts.stream,
+          state: opts.state,
+          onStateUpdate: opts.onStateUpdate,
+          onStreamText: opts.onStreamText,
+          responsesMode: opts.responsesMode,
+          permissions: permissions.effective,
+          permissionsTrace: permissions.trace,
+          workspacePermissions: opts.workspacePermissions,
+          workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
+          sessionPermissions: opts.sessionPermissions,
+          sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+          runDeadlineMs,
+          workerSandbox,
+          allowRootStringInput: opts.allowRootStringInput,
+          isRoot,
+          signal: opts.signal,
+        });
+      }
+    }
+
+    const deck = await loadDeck(opts.path);
+    const permissions = resolveEffectivePermissions({
+      baseDir: path.dirname(deck.path),
+      parent: opts.parentPermissions,
+      workspace: opts.workspacePermissions
+        ? {
+          baseDir: opts.workspacePermissionsBaseDir ?? path.dirname(deck.path),
+          permissions: opts.workspacePermissions,
+        }
+        : undefined,
+      declaration: deck.permissions
+        ? { baseDir: path.dirname(deck.path), permissions: deck.permissions }
+        : undefined,
+      reference: opts.referencePermissions
+        ? {
+          baseDir: opts.referencePermissionsBaseDir ?? path.dirname(deck.path),
+          permissions: opts.referencePermissions,
+        }
+        : undefined,
+      session: opts.sessionPermissions
+        ? {
+          baseDir: opts.sessionPermissionsBaseDir ?? Deno.cwd(),
+          permissions: opts.sessionPermissions,
+        }
+        : undefined,
     });
-  }
-  try {
+    const deckGuardrails = deck.guardrails ?? {};
+    const effectiveGuardrails: Guardrails = {
+      ...guardrails,
+      ...deckGuardrails,
+    };
+    const runDeadlineMs = deadlineForRun(
+      effectiveGuardrails,
+      opts.runDeadlineMs,
+    );
+    ensureRunActive(runDeadlineMs, opts.signal);
+
+    ensureSchemaPresence(deck, isRoot);
+
+    const resolvedInput = resolveInput({
+      deck,
+      input: opts.input,
+      state: opts.state,
+      isRoot,
+      initialUserMessage: opts.initialUserMessage,
+    });
+    const validatedInput = validateInput(
+      deck,
+      resolvedInput,
+      isRoot,
+      opts.allowRootStringInput ?? false,
+    );
+    const useOrchestrationWorker = workerSandbox &&
+      !opts.inOrchestrationWorker &&
+      isRoot &&
+      !opts.onTool &&
+      Boolean(
+        deck.modelParams?.model || deck.modelParams?.temperature !== undefined,
+      );
+    if (useOrchestrationWorker) {
+      return await runLlmDeckInWorker({
+        deckPath: deck.path,
+        guardrails: effectiveGuardrails,
+        depth,
+        runId,
+        parentActionCallId: opts.parentActionCallId,
+        modelProvider: opts.modelProvider,
+        input: validatedInput,
+        inputProvided: opts.inputProvided ?? true,
+        initialUserMessage: opts.initialUserMessage,
+        defaultModel: opts.defaultModel,
+        modelOverride: opts.modelOverride,
+        trace: opts.trace,
+        stream: opts.stream,
+        state: opts.state,
+        onStateUpdate: opts.onStateUpdate,
+        onStreamText: opts.onStreamText,
+        responsesMode: opts.responsesMode,
+        permissions: permissions.effective,
+        permissionsTrace: permissions.trace,
+        workspacePermissions: opts.workspacePermissions,
+        workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
+        sessionPermissions: opts.sessionPermissions,
+        sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+        runDeadlineMs,
+        workerSandbox,
+        allowRootStringInput: opts.allowRootStringInput,
+        isRoot,
+        signal: opts.signal,
+      });
+    }
+    if (shouldEmitRun) {
+      opts.trace?.({
+        type: "run.start",
+        runId,
+        deckPath: deck.path,
+        input: validatedInput as unknown as import("./types.ts").JSONValue,
+        initialUserMessage: opts
+          .initialUserMessage as unknown as import("./types.ts").JSONValue,
+        permissions: permissions.trace,
+      });
+    }
+
     if (
       deck.modelParams?.model || deck.modelParams?.temperature !== undefined
     ) {
@@ -193,6 +742,10 @@ export async function runDeck(opts: RunOptions): Promise<unknown> {
         workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
         sessionPermissions: opts.sessionPermissions,
         sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+        runDeadlineMs,
+        workerSandbox,
+        onTool: opts.onTool,
+        signal: opts.signal,
       });
     }
 
@@ -207,6 +760,7 @@ export async function runDeck(opts: RunOptions): Promise<unknown> {
       guardrails: effectiveGuardrails,
       depth,
       runId,
+      initialUserMessage: opts.initialUserMessage,
       parentActionCallId: opts.parentActionCallId,
       modelProvider: opts.modelProvider,
       input: validatedInput,
@@ -214,6 +768,8 @@ export async function runDeck(opts: RunOptions): Promise<unknown> {
       modelOverride: opts.modelOverride,
       trace: opts.trace,
       stream: opts.stream,
+      state: opts.state,
+      onStateUpdate: opts.onStateUpdate,
       onStreamText: opts.onStreamText,
       responsesMode: opts.responsesMode,
       permissions: permissions.effective,
@@ -222,11 +778,24 @@ export async function runDeck(opts: RunOptions): Promise<unknown> {
       workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
       sessionPermissions: opts.sessionPermissions,
       sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+      runDeadlineMs,
+      workerSandbox,
+      onTool: opts.onTool,
+      signal: opts.signal,
     });
+  } catch (err) {
+    if (isRunCanceledError(err)) {
+      canceled = true;
+      await handleCancel();
+    }
+    throw err;
   } finally {
     if (shouldEmitRun) {
       opts.trace?.({ type: "run.end", runId });
     }
+    if (opts.signal?.aborted && !canceled) {
+      await handleCancel();
+    }
   }
 }
 
@@ -236,24 +805,15 @@ function toProviderParams(
   if (!params) return undefined;
   const {
     model: _model,
-    additionalParams,
     temperature,
     top_p,
     frequency_penalty,
     presence_penalty,
     max_tokens,
+    verbosity,
+    reasoning,
   } = params;
   const out: Record<string, unknown> = {};
-  if (
-    additionalParams &&
-    typeof additionalParams === "object" &&
-    !Array.isArray(additionalParams)
-  ) {
-    for (const [key, value] of Object.entries(additionalParams)) {
-      if (value === undefined) continue;
-      out[key] = value;
-    }
-  }
   if (temperature !== undefined) out.temperature = temperature;
   if (top_p !== undefined) out.top_p = top_p;
   if (frequency_penalty !== undefined) {
@@ -261,6 +821,8 @@ function toProviderParams(
   }
   if (presence_penalty !== undefined) out.presence_penalty = presence_penalty;
   if (max_tokens !== undefined) out.max_tokens = max_tokens;
+  if (verbosity !== undefined) out.verbosity = verbosity;
+  if (reasoning !== undefined) out.reasoning = reasoning;
   return Object.keys(out).length ? out : undefined;
 }
 
@@ -356,6 +918,25 @@ function resolveInput(args: {
   return args.input;
 }
 
+function resolveInputWithoutDeck(args: {
+  input: unknown;
+  state?: SavedState;
+  isRoot: boolean;
+  initialUserMessage?: unknown;
+}) {
+  if (args.input !== undefined) return args.input;
+  if (!args.isRoot) return args.input;
+
+  const persisted = extractContextInput(args.state);
+  if (persisted !== undefined) return persisted;
+
+  if (args.initialUserMessage !== undefined) {
+    return "";
+  }
+
+  return args.input;
+}
+
 function extractContextInput(state?: SavedState): unknown {
   if (!state) return undefined;
   if (state.format === "responses" && Array.isArray(state.items)) {
@@ -490,6 +1071,75 @@ function safeJsonArgs(value: string): Record<string, JSONValue> {
   return {};
 }
 
+function asToolKind(value: unknown, fallback: ToolKind): ToolKind {
+  if (
+    value === "action" || value === "external" || value === "mcp_bridge" ||
+    value === "internal"
+  ) {
+    return value;
+  }
+  return fallback;
+}
+
+function projectStreamToolTraceEvents(input: {
+  streamEvent: Record<string, JSONValue>;
+  runId: string;
+  parentActionCallId: string;
+  trace?: (event: import("./types.ts").TraceEvent) => void;
+  emittedCalls: Set<string>;
+  emittedResults: Set<string>;
+  toolNames: Map<string, string>;
+}): void {
+  if (!input.trace) return;
+  const type = typeof input.streamEvent.type === "string"
+    ? input.streamEvent.type
+    : "";
+  if (type !== "tool.call" && type !== "tool.result") return;
+  const actionCallId = typeof input.streamEvent.actionCallId === "string"
+    ? input.streamEvent.actionCallId
+    : "";
+  const name = typeof input.streamEvent.name === "string"
+    ? input.streamEvent.name
+    : input.toolNames.get(actionCallId) ?? "";
+  if (!actionCallId || !name) return;
+
+  if (type === "tool.call") {
+    if (input.emittedCalls.has(actionCallId)) return;
+    input.emittedCalls.add(actionCallId);
+    input.toolNames.set(actionCallId, name);
+    const args = "args" in input.streamEvent
+      ? (input.streamEvent.args ?? {}) as JSONValue
+      : {};
+    const toolKind = asToolKind(input.streamEvent.toolKind, "mcp_bridge");
+    input.trace({
+      type: "tool.call",
+      runId: input.runId,
+      actionCallId,
+      name,
+      args,
+      toolKind,
+      parentActionCallId: input.parentActionCallId,
+    });
+    return;
+  }
+
+  if (input.emittedResults.has(actionCallId)) return;
+  input.emittedResults.add(actionCallId);
+  const result = "result" in input.streamEvent
+    ? (input.streamEvent.result ?? null) as JSONValue
+    : null;
+  const toolKind = asToolKind(input.streamEvent.toolKind, "mcp_bridge");
+  input.trace({
+    type: "tool.result",
+    runId: input.runId,
+    actionCallId,
+    name,
+    result,
+    toolKind,
+    parentActionCallId: input.parentActionCallId,
+  });
+}
+
 function mapResponseOutput(
   output: Array<ResponseItem>,
 ): {
@@ -575,6 +1225,7 @@ type RuntimeCtxBase = {
   depth: number;
   runId: string;
   inputProvided?: boolean;
+  initialUserMessage?: unknown;
   parentActionCallId?: string;
   modelProvider: ModelProvider;
   input: unknown;
@@ -592,11 +1243,1480 @@ type RuntimeCtxBase = {
   workspacePermissionsBaseDir?: string;
   sessionPermissions?: PermissionDeclarationInput;
   sessionPermissionsBaseDir?: string;
+  runDeadlineMs: number;
+  workerSandbox: boolean;
+  signal?: AbortSignal;
+  onTool?: RunOptions["onTool"];
+};
+
+type WorkerRuntimeCtx = Omit<RuntimeCtxBase, "deck" | "workerSandbox"> & {
+  deckPath: string;
+  isRoot: boolean;
+  allowRootStringInput: boolean;
 };
 
-async function runComputeDeck(ctx: RuntimeCtxBase): Promise<unknown> {
+async function runComputeDeck(ctx: RuntimeCtxBase): Promise<unknown> {
+  if (ctx.workerSandbox) {
+    return await runComputeDeckInWorker({
+      guardrails: ctx.guardrails,
+      depth: ctx.depth,
+      runId: ctx.runId,
+      inputProvided: ctx.inputProvided,
+      initialUserMessage: ctx.initialUserMessage,
+      parentActionCallId: ctx.parentActionCallId,
+      modelProvider: ctx.modelProvider,
+      input: ctx.input,
+      defaultModel: ctx.defaultModel,
+      modelOverride: ctx.modelOverride,
+      trace: ctx.trace,
+      stream: ctx.stream,
+      state: ctx.state,
+      onStateUpdate: ctx.onStateUpdate,
+      onStreamText: ctx.onStreamText,
+      responsesMode: ctx.responsesMode,
+      permissions: ctx.permissions,
+      permissionsTrace: ctx.permissionsTrace,
+      workspacePermissions: ctx.workspacePermissions,
+      workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir,
+      sessionPermissions: ctx.sessionPermissions,
+      sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir,
+      runDeadlineMs: ctx.runDeadlineMs,
+      deckPath: ctx.deck.path,
+      isRoot: ctx.depth === 0 && !ctx.parentActionCallId,
+      allowRootStringInput: false,
+      signal: ctx.signal,
+    });
+  }
+  return await runComputeDeckInProcess(ctx);
+}
+
+function toDenoPermissionList(scope: {
+  all: boolean;
+  values: Set<string>;
+}): true | false | Array<string> {
+  if (scope.all) return true;
+  if (scope.values.size === 0) return false;
+  return Array.from(scope.values).sort();
+}
+
+function toDenoRunPermission(scope: {
+  all: boolean;
+  paths: Set<string>;
+  commands: Set<string>;
+}): true | false | Array<string> {
+  if (scope.all) return true;
+  const values = new Set<string>([
+    ...Array.from(scope.paths),
+    ...Array.from(scope.commands),
+  ]);
+  if (values.size === 0) return false;
+  return Array.from(values).sort();
+}
+
+const IMPORT_SOURCE_EXTENSIONS = new Set([
+  ".ts",
+  ".tsx",
+  ".mts",
+  ".cts",
+  ".js",
+  ".jsx",
+  ".mjs",
+  ".cjs",
+]);
+const RESOLVABLE_MODULE_EXTENSIONS = [
+  ".ts",
+  ".tsx",
+  ".mts",
+  ".cts",
+  ".js",
+  ".jsx",
+  ".mjs",
+  ".cjs",
+  ".json",
+];
+
+function stripSpecifierSuffix(specifier: string): string {
+  let out = specifier;
+  const q = out.indexOf("?");
+  if (q >= 0) out = out.slice(0, q);
+  const h = out.indexOf("#");
+  if (h >= 0) out = out.slice(0, h);
+  return out.trim();
+}
+
+function isIdentifierStart(ch: string): boolean {
+  return /[A-Za-z_$]/.test(ch);
+}
+
+function isIdentifierContinue(ch: string): boolean {
+  return /[A-Za-z0-9_$]/.test(ch);
+}
+
+function skipWhitespaceAndComments(source: string, start: number): number {
+  let i = start;
+  while (i < source.length) {
+    const ch = source[i];
+    if (/\s/.test(ch)) {
+      i++;
+      continue;
+    }
+    if (ch === "/" && source[i + 1] === "/") {
+      i += 2;
+      while (i < source.length && source[i] !== "\n" && source[i] !== "\r") {
+        i++;
+      }
+      continue;
+    }
+    if (ch === "/" && source[i + 1] === "*") {
+      i += 2;
+      while (i < source.length) {
+        if (source[i] === "*" && source[i + 1] === "/") {
+          i += 2;
+          break;
+        }
+        i++;
+      }
+      continue;
+    }
+    break;
+  }
+  return i;
+}
+
+function readIdentifier(
+  source: string,
+  start: number,
+): { value: string; end: number } | undefined {
+  if (start >= source.length) return undefined;
+  if (!isIdentifierStart(source[start])) return undefined;
+  let i = start + 1;
+  while (i < source.length && isIdentifierContinue(source[i])) i++;
+  return { value: source.slice(start, i), end: i };
+}
+
+function readStringLiteral(
+  source: string,
+  start: number,
+): { value: string; end: number } | undefined {
+  const quote = source[start];
+  if (quote !== "'" && quote !== '"') return undefined;
+  let i = start + 1;
+  let value = "";
+  while (i < source.length) {
+    const ch = source[i];
+    if (ch === "\\") {
+      if (i + 1 >= source.length) return undefined;
+      value += source[i + 1];
+      i += 2;
+      continue;
+    }
+    if (ch === quote) return { value, end: i + 1 };
+    if (ch === "\n" || ch === "\r") return undefined;
+    value += ch;
+    i++;
+  }
+  return undefined;
+}
+
+function skipTemplateExpression(source: string, start: number): number {
+  let i = start;
+  let depth = 1;
+  while (i < source.length && depth > 0) {
+    i = skipWhitespaceAndComments(source, i);
+    if (i >= source.length) break;
+    const ch = source[i];
+    if (ch === "'" || ch === '"') {
+      const stringLiteral = readStringLiteral(source, i);
+      i = stringLiteral ? stringLiteral.end : i + 1;
+      continue;
+    }
+    if (ch === "`") {
+      i = skipTemplateLiteral(source, i);
+      continue;
+    }
+    if (ch === "{") {
+      depth++;
+      i++;
+      continue;
+    }
+    if (ch === "}") {
+      depth--;
+      i++;
+      continue;
+    }
+    i++;
+  }
+  return i;
+}
+
+function skipTemplateLiteral(source: string, start: number): number {
+  let i = start + 1;
+  while (i < source.length) {
+    const ch = source[i];
+    if (ch === "\\") {
+      i += 2;
+      continue;
+    }
+    if (ch === "`") return i + 1;
+    if (ch === "$" && source[i + 1] === "{") {
+      i = skipTemplateExpression(source, i + 2);
+      continue;
+    }
+    i++;
+  }
+  return i;
+}
+
+function readSpecifierAfterFrom(
+  source: string,
+  start: number,
+): { specifier?: string; end: number } {
+  const i = skipWhitespaceAndComments(source, start);
+  const stringLiteral = readStringLiteral(source, i);
+  if (!stringLiteral) return { end: i };
+  return { specifier: stringLiteral.value, end: stringLiteral.end };
+}
+
+function readImportCallSpecifier(
+  source: string,
+  start: number,
+): { specifier?: string; end: number } {
+  let i = skipWhitespaceAndComments(source, start);
+  if (source[i] !== "(") return { end: i };
+  i = skipWhitespaceAndComments(source, i + 1);
+  const stringLiteral = readStringLiteral(source, i);
+  if (!stringLiteral) return { end: i };
+  i = skipWhitespaceAndComments(source, stringLiteral.end);
+  if (source[i] === ")") i++;
+  return { specifier: stringLiteral.value, end: i };
+}
+
+function readImportOrExportStatementSpecifier(
+  source: string,
+  start: number,
+  keyword: "import" | "export",
+): { specifier?: string; end: number } {
+  let i = skipWhitespaceAndComments(source, start);
+
+  if (keyword === "import") {
+    if (source[i] === ".") return { end: i + 1 }; // import.meta
+    const sideEffectImport = readStringLiteral(source, i);
+    if (sideEffectImport) {
+      return { specifier: sideEffectImport.value, end: sideEffectImport.end };
+    }
+  }
+
+  let depth = 0;
+  while (i < source.length) {
+    i = skipWhitespaceAndComments(source, i);
+    if (i >= source.length) break;
+    const ch = source[i];
+
+    if (ch === "'" || ch === '"') {
+      const stringLiteral = readStringLiteral(source, i);
+      i = stringLiteral ? stringLiteral.end : i + 1;
+      continue;
+    }
+    if (ch === "`") {
+      i = skipTemplateLiteral(source, i);
+      continue;
+    }
+    if (ch === "(" || ch === "[" || ch === "{") {
+      depth++;
+      i++;
+      continue;
+    }
+    if (ch === ")" || ch === "]" || ch === "}") {
+      if (depth > 0) depth--;
+      i++;
+      continue;
+    }
+    if (depth === 0) {
+      if (ch === ";") return { end: i + 1 };
+      const identifier = readIdentifier(source, i);
+      if (identifier?.value === "from") {
+        return readSpecifierAfterFrom(source, identifier.end);
+      }
+      if (identifier) {
+        i = identifier.end;
+        continue;
+      }
+    }
+    i++;
+  }
+  return { end: i };
+}
+
+function extractModuleSpecifiers(source: string): Set<string> {
+  const out = new Set<string>();
+  let i = 0;
+  while (i < source.length) {
+    i = skipWhitespaceAndComments(source, i);
+    if (i >= source.length) break;
+
+    const ch = source[i];
+    if (ch === "'" || ch === '"') {
+      const stringLiteral = readStringLiteral(source, i);
+      i = stringLiteral ? stringLiteral.end : i + 1;
+      continue;
+    }
+    if (ch === "`") {
+      i = skipTemplateLiteral(source, i);
+      continue;
+    }
+
+    const identifier = readIdentifier(source, i);
+    if (!identifier) {
+      i++;
+      continue;
+    }
+
+    if (identifier.value === "import") {
+      const afterImport = skipWhitespaceAndComments(source, identifier.end);
+      if (source[afterImport] === "(") {
+        const result = readImportCallSpecifier(source, afterImport);
+        if (result.specifier) out.add(result.specifier);
+        i = Math.max(result.end, afterImport + 1);
+        continue;
+      }
+      const result = readImportOrExportStatementSpecifier(
+        source,
+        identifier.end,
+        "import",
+      );
+      if (result.specifier) out.add(result.specifier);
+      i = Math.max(result.end, identifier.end);
+      continue;
+    }
+
+    if (identifier.value === "export") {
+      const result = readImportOrExportStatementSpecifier(
+        source,
+        identifier.end,
+        "export",
+      );
+      if (result.specifier) out.add(result.specifier);
+      i = Math.max(result.end, identifier.end);
+      continue;
+    }
+
+    i = identifier.end;
+  }
+  return out;
+}
+
+function resolveExistingModulePath(candidate: string): string | undefined {
+  const resolved = path.resolve(candidate);
+  const candidates = new Set<string>([resolved]);
+  if (!path.extname(resolved)) {
+    for (const ext of RESOLVABLE_MODULE_EXTENSIONS) {
+      candidates.add(`${resolved}${ext}`);
+      candidates.add(path.join(resolved, `index${ext}`));
+    }
+  }
+  for (const filePath of candidates) {
+    try {
+      if (Deno.statSync(filePath).isFile) {
+        return path.resolve(filePath);
+      }
+    } catch {
+      // ignore unresolved module candidates
+    }
+  }
+  return undefined;
+}
+
+function resolveLocalImportPath(
+  importerPath: string,
+  specifier: string,
+): string | undefined {
+  const cleaned = stripSpecifierSuffix(specifier);
+  if (!cleaned) return undefined;
+  if (cleaned.startsWith("file://")) {
+    try {
+      return resolveExistingModulePath(path.fromFileUrl(cleaned));
+    } catch {
+      return undefined;
+    }
+  }
+  if (
+    !(cleaned.startsWith("./") || cleaned.startsWith("../") ||
+      path.isAbsolute(cleaned))
+  ) {
+    return undefined;
+  }
+  const base = path.isAbsolute(cleaned)
+    ? cleaned
+    : path.resolve(path.dirname(importerPath), cleaned);
+  return resolveExistingModulePath(base);
+}
+
+function collectLocalImportGraph(entryPath: string): Set<string> {
+  const visited = new Set<string>();
+  const queue: Array<string> = [path.resolve(entryPath)];
+
+  while (queue.length > 0) {
+    const current = queue.pop()!;
+    if (visited.has(current)) continue;
+    visited.add(current);
+
+    const ext = path.extname(current).toLowerCase();
+    if (!IMPORT_SOURCE_EXTENSIONS.has(ext)) {
+      continue;
+    }
+
+    let source: string;
+    try {
+      source = Deno.readTextFileSync(current);
+    } catch {
+      continue;
+    }
+
+    const specifiers = extractModuleSpecifiers(source);
+    for (const specifier of specifiers) {
+      const resolved = resolveLocalImportPath(current, specifier);
+      if (resolved && !visited.has(resolved)) {
+        queue.push(resolved);
+      }
+    }
+  }
+
+  return visited;
+}
+
+const WORKER_ENTRY_PATHS = [
+  "./runtime_worker.ts",
+  "./runtime_orchestration_worker.ts",
+].map((relative) => path.fromFileUrl(new URL(relative, import.meta.url)));
+const BUILTIN_SCHEMAS_DIR = path.resolve(
+  path.dirname(path.fromFileUrl(import.meta.url)),
+  "../schemas",
+);
+const BUILTIN_SNIPPETS_DIR = path.resolve(
+  path.dirname(path.fromFileUrl(import.meta.url)),
+  "../snippets",
+);
+
+let builtinSchemaBootstrapCache: Array<string> | undefined;
+function builtinSchemaBootstrapReads(): Array<string> {
+  if (builtinSchemaBootstrapCache) return builtinSchemaBootstrapCache;
+  const schemaModules: Array<string> = [];
+  const stack: Array<string> = [BUILTIN_SCHEMAS_DIR];
+  while (stack.length > 0) {
+    const current = stack.pop()!;
+    let entries: Array<Deno.DirEntry> = [];
+    try {
+      entries = Array.from(Deno.readDirSync(current));
+    } catch {
+      continue;
+    }
+    for (const entry of entries) {
+      const target = path.join(current, entry.name);
+      if (entry.isDirectory) {
+        stack.push(target);
+        continue;
+      }
+      if (!entry.isFile) continue;
+      const ext = path.extname(entry.name).toLowerCase();
+      if (ext !== ".ts") continue;
+      schemaModules.push(target);
+    }
+  }
+  builtinSchemaBootstrapCache = Array.from(
+    new Set<string>(
+      schemaModules.flatMap((entry) =>
+        Array.from(collectLocalImportGraph(entry))
+      ),
+    ),
+  ).sort();
+  return builtinSchemaBootstrapCache;
+}
+
+let builtinSnippetBootstrapCache: Array<string> | undefined;
+function builtinSnippetBootstrapReads(): Array<string> {
+  if (builtinSnippetBootstrapCache) return builtinSnippetBootstrapCache;
+  const snippetFiles: Array<string> = [];
+  const stack: Array<string> = [BUILTIN_SNIPPETS_DIR];
+  while (stack.length > 0) {
+    const current = stack.pop()!;
+    let entries: Array<Deno.DirEntry> = [];
+    try {
+      entries = Array.from(Deno.readDirSync(current));
+    } catch {
+      continue;
+    }
+    for (const entry of entries) {
+      const target = path.join(current, entry.name);
+      if (entry.isDirectory) {
+        stack.push(target);
+        continue;
+      }
+      if (!entry.isFile) continue;
+      const ext = path.extname(entry.name).toLowerCase();
+      if (ext !== ".md") continue;
+      snippetFiles.push(target);
+    }
+  }
+  builtinSnippetBootstrapCache = Array.from(new Set(snippetFiles)).sort();
+  return builtinSnippetBootstrapCache;
+}
+
+function workerBootstrapReadAllowlist(deckPath: string): Array<string> {
+  return Array.from(
+    new Set<string>([
+      ...Array.from(collectLocalImportGraph(deckPath)),
+      ...WORKER_ENTRY_PATHS.flatMap((entry) =>
+        Array.from(collectLocalImportGraph(entry))
+      ),
+      ...builtinSchemaBootstrapReads(),
+      ...builtinSnippetBootstrapReads(),
+    ]),
+  ).sort();
+}
+
+let trustedWorkerBootstrapCache: Array<string> | undefined;
+function trustedWorkerBootstrapReads(): Array<string> {
+  if (trustedWorkerBootstrapCache) return trustedWorkerBootstrapCache;
+  const definitionsPath = path.fromFileUrl(
+    new URL("./definitions.ts", import.meta.url),
+  );
+  const modPath = path.fromFileUrl(new URL("../mod.ts", import.meta.url));
+  trustedWorkerBootstrapCache = Array.from(
+    new Set<string>([
+      ...WORKER_ENTRY_PATHS.flatMap((entry) =>
+        Array.from(collectLocalImportGraph(entry))
+      ),
+      ...Array.from(collectLocalImportGraph(definitionsPath)),
+      ...Array.from(collectLocalImportGraph(modPath)),
+      ...builtinSchemaBootstrapReads(),
+      ...builtinSnippetBootstrapReads(),
+    ]),
+  ).sort();
+  return trustedWorkerBootstrapCache;
+}
+
+function pathMatchesPermissionRoot(root: string, target: string): boolean {
+  if (root === target) return true;
+  const rel = path.relative(root, target);
+  return rel.length > 0 && !rel.startsWith("..") && !path.isAbsolute(rel);
+}
+
+function constrainBootstrapReads(
+  permissions: NormalizedPermissionSet,
+  roots: Array<string>,
+  trustedReads: Set<string>,
+  reads: Array<string>,
+): Array<string> {
+  const allowedRoots = [
+    ...roots.map((entry) => path.resolve(entry)),
+    ...Array.from(permissions.read.values).map((entry) =>
+      path.resolve(permissions.baseDir, entry)
+    ),
+  ];
+  if (permissions.read.all) {
+    return Array.from(new Set(reads)).sort();
+  }
+  if (allowedRoots.length === 0) return [];
+  return reads.filter((entry) => {
+    const target = path.resolve(permissions.baseDir, entry);
+    if (trustedReads.has(target)) return true;
+    return allowedRoots.some((root) => pathMatchesPermissionRoot(root, target));
+  });
+}
+
+function buildWorkerPermissions(
+  permissions: NormalizedPermissionSet,
+  deckPath: string,
+): WorkerOptions["deno"] {
+  const workerDirs = WORKER_ENTRY_PATHS.map((entry) => path.dirname(entry));
+  const bootstrapReads = constrainBootstrapReads(
+    permissions,
+    [path.dirname(deckPath), ...workerDirs],
+    new Set(trustedWorkerBootstrapReads()),
+    workerBootstrapReadAllowlist(deckPath),
+  );
+  const mergedRead = permissions.read.all ? true : Array.from(
+    new Set<string>([
+      ...Array.from(permissions.read.values),
+      ...bootstrapReads,
+    ]),
+  ).sort();
+  return {
+    permissions: {
+      read: mergedRead === true
+        ? true
+        : mergedRead.length > 0
+        ? mergedRead
+        : false,
+      write: toDenoPermissionList(permissions.write),
+      run: toDenoRunPermission(permissions.run),
+      net: toDenoPermissionList(permissions.net),
+      env: toDenoPermissionList(permissions.env),
+      // Worker module graphs include JSR dependencies (e.g. @std/*). Allow
+      // manifest resolution without widening deck runtime file/run permissions.
+      import: ["jsr.io:443"],
+    },
+  };
+}
+
+function buildDeckInspectWorkerPermissions(
+  deckPath: string,
+): WorkerOptions["deno"] {
+  const deckDir = path.dirname(deckPath);
+  const workerDirs = WORKER_ENTRY_PATHS.map((entry) => path.dirname(entry));
+  const inspectSeedPermissions: NormalizedPermissionSet = {
+    baseDir: deckDir,
+    read: { all: false, values: new Set<string>() },
+    write: { all: false, values: new Set<string>() },
+    run: { all: false, paths: new Set<string>(), commands: new Set<string>() },
+    net: { all: false, values: new Set<string>() },
+    env: { all: false, values: new Set<string>() },
+  };
+  const bootstrapReads = constrainBootstrapReads(
+    inspectSeedPermissions,
+    [path.dirname(deckPath), ...workerDirs],
+    new Set(trustedWorkerBootstrapReads()),
+    workerBootstrapReadAllowlist(deckPath),
+  );
+  const inspectReads = Array.from(
+    new Set<string>([deckDir, ...bootstrapReads]),
+  ).sort();
+  return {
+    permissions: {
+      read: inspectReads.length > 0 ? inspectReads : false,
+      write: false,
+      run: false,
+      net: false,
+      env: false,
+    },
+  };
+}
+
+async function inspectDeckInWorker(
+  deckPath: string,
+  runDeadlineMs?: number,
+): Promise<WorkerDeckInspection> {
+  if (typeof runDeadlineMs === "number" && Number.isFinite(runDeadlineMs)) {
+    ensureNotExpired(runDeadlineMs);
+  }
+  const bridgeSession = randomId("bridge");
+  const worker = new Worker(
+    new URL("./runtime_worker.ts", import.meta.url).href,
+    {
+      type: "module",
+      deno: buildDeckInspectWorkerPermissions(deckPath),
+    },
+  );
+  let settled = false;
+  const clearAndTerminate = () => {
+    try {
+      worker.terminate();
+    } catch {
+      // ignore
+    }
+  };
+  let timeoutId: number | undefined;
+
+  const outcome = new Promise<WorkerDeckInspection>((resolve, reject) => {
+    const finishResolve = (value: WorkerDeckInspection) => {
+      if (settled) return;
+      settled = true;
+      if (timeoutId !== undefined) clearTimeout(timeoutId);
+      resolve(value);
+    };
+    const finishReject = (err: unknown) => {
+      if (settled) return;
+      settled = true;
+      if (timeoutId !== undefined) clearTimeout(timeoutId);
+      reject(err);
+    };
+
+    const deadlineConstrained = typeof runDeadlineMs === "number" &&
+      Number.isFinite(runDeadlineMs);
+    const timeoutMs = deadlineConstrained
+      ? Math.max(
+        0,
+        Math.min(
+          INSPECT_WORKER_TIMEOUT_MS,
+          Math.floor(runDeadlineMs - performance.now()),
+        ),
+      )
+      : INSPECT_WORKER_TIMEOUT_MS;
+    const timeoutMessage = deadlineConstrained &&
+        timeoutMs < INSPECT_WORKER_TIMEOUT_MS
+      ? WORKER_TIMEOUT_MESSAGE
+      : INSPECT_WORKER_TIMEOUT_MESSAGE;
+    timeoutId = setTimeout(() => {
+      finishReject(new Error(timeoutMessage));
+      clearAndTerminate();
+    }, timeoutMs) as unknown as number;
+
+    worker.addEventListener("error", (event) => {
+      event.preventDefault();
+      finishReject(event.error ?? new Error(event.message));
+    });
+
+    worker.addEventListener("messageerror", () => {
+      finishReject(new Error("Worker bridge message serialization failed"));
+    });
+
+    worker.addEventListener("message", (event: MessageEvent) => {
+      const msg = event.data as Record<string, unknown>;
+      const receivedSession = typeof msg.bridgeSession === "string"
+        ? msg.bridgeSession
+        : "";
+      if (receivedSession !== bridgeSession) {
+        if (typeof msg.type === "string") {
+          logger.warn(
+            `[gambit] rejected inspect-worker message with mismatched bridge session (type=${msg.type})`,
+          );
+        }
+        return;
+      }
+      const type = typeof msg.type === "string" ? msg.type : "";
+      if (type === "deck.inspect.result") {
+        finishResolve((msg as { result: WorkerDeckInspection }).result);
+        return;
+      }
+      if (type === "deck.inspect.error" || type === "run.error") {
+        finishReject(normalizeWorkerError((msg as { error?: unknown }).error));
+      }
+    });
+  });
+
+  try {
+    worker.postMessage({ type: "deck.inspect", bridgeSession, deckPath });
+    return await outcome;
+  } finally {
+    if (timeoutId !== undefined) clearTimeout(timeoutId);
+    clearAndTerminate();
+  }
+}
+
+function normalizeWorkerError(err: unknown): Error {
+  if (!err || typeof err !== "object") {
+    return new Error(String(err));
+  }
+  const rec = err as Record<string, unknown>;
+  const message =
+    typeof rec.message === "string" && rec.message.trim().length > 0
+      ? rec.message
+      : "Worker execution failed";
+  const code = typeof rec.code === "string" ? rec.code : undefined;
+  const name = typeof rec.name === "string" ? rec.name : undefined;
+  const source = typeof rec.source === "string" ? rec.source : undefined;
+  const out = new Error(
+    source ? `[${source}] ${message}${code ? ` (${code})` : ""}` : message,
+  );
+  if (name) out.name = name;
+  return out;
+}
+
+type OrchestrationRunStartMessage = {
+  type: "run.start";
+  bridgeSession: string;
+  completionNonce: string;
+  options: {
+    path: string;
+    input: unknown;
+    inputProvided?: boolean;
+    initialUserMessage?: unknown;
+    isRoot?: boolean;
+    guardrails?: Partial<Guardrails>;
+    depth?: number;
+    parentActionCallId?: string;
+    runId: string;
+    defaultModel?: string;
+    modelOverride?: string;
+    stream?: boolean;
+    state?: SavedState;
+    responsesMode?: boolean;
+    allowRootStringInput?: boolean;
+    runDeadlineMs: number;
+  };
+  permissionCeiling: WirePermissionSet;
+};
+
+type OrchestrationModelChatRequest = {
+  type: "model.chat.request";
+  bridgeSession: string;
+  requestId: string;
+  input: {
+    model: string;
+    messages: Array<ModelMessage>;
+    tools?: Array<ToolDefinition>;
+    stream?: boolean;
+    state?: SavedState;
+    deckPath?: string;
+    params?: Record<string, unknown>;
+  };
+};
+
+type OrchestrationModelResponsesRequest = {
+  type: "model.responses.request";
+  bridgeSession: string;
+  requestId: string;
+  input: {
+    request: CreateResponseRequest;
+    state?: SavedState;
+    deckPath?: string;
+  };
+};
+
+type OrchestrationModelResolveRequest = {
+  type: "model.resolveModel.request";
+  bridgeSession: string;
+  requestId: string;
+  input: {
+    model: string | Array<string>;
+    params?: Record<string, unknown>;
+    deckPath?: string;
+  };
+};
+
+type OrchestrationWorkerMessageToParent =
+  | {
+    type: "trace.event";
+    bridgeSession: string;
+    event: import("./types.ts").TraceEvent;
+  }
+  | { type: "state.update"; bridgeSession: string; state: SavedState }
+  | { type: "stream.text"; bridgeSession: string; chunk: string }
+  | OrchestrationModelChatRequest
+  | OrchestrationModelResponsesRequest
+  | OrchestrationModelResolveRequest
+  | {
+    type: "run.result";
+    bridgeSession: string;
+    completionNonce?: string;
+    result: unknown;
+  }
+  | {
+    type: "run.error";
+    bridgeSession: string;
+    completionNonce?: string;
+    error: unknown;
+  };
+
+type OrchestrationParentMessage =
+  | OrchestrationRunStartMessage
+  | {
+    type: "model.chat.result";
+    requestId: string;
+    result: Awaited<ReturnType<ModelProvider["chat"]>>;
+  }
+  | {
+    type: "model.responses.result";
+    requestId: string;
+    result: CreateResponseResponse;
+  }
+  | {
+    type: "model.resolveModel.result";
+    requestId: string;
+    result: {
+      model: string;
+      params?: Record<string, unknown>;
+    };
+  }
+  | {
+    type: "model.chat.stream";
+    requestId: string;
+    chunk: string;
+  }
+  | {
+    type: "model.responses.event";
+    requestId: string;
+    event: ResponseEvent;
+  }
+  | {
+    type:
+      | "model.chat.error"
+      | "model.responses.error"
+      | "model.resolveModel.error";
+    requestId: string;
+    error: {
+      source?: string;
+      name?: string;
+      message: string;
+      code?: unknown;
+    };
+  };
+
+async function runLlmDeckInWorker(
+  ctx: Omit<RuntimeCtxBase, "deck"> & {
+    deckPath: string;
+    initialUserMessage?: unknown;
+    inputProvided?: boolean;
+    allowRootStringInput?: boolean;
+    isRoot: boolean;
+  },
+): Promise<unknown> {
+  throwIfCanceled(ctx.signal);
+  const bridgeSession = randomId("bridge");
+  const completionNonce = randomId("done");
+  const worker = new Worker(
+    new URL("./runtime_orchestration_worker.ts", import.meta.url).href,
+    {
+      type: "module",
+      deno: buildWorkerPermissions(ctx.permissions, ctx.deckPath),
+    },
+  );
+
+  let settled = false;
+  const clearAndTerminate = () => {
+    try {
+      worker.terminate();
+    } catch {
+      // ignore
+    }
+  };
+  let timeoutId: number | undefined;
+
+  const outcome = new Promise<unknown>((resolve, reject) => {
+    const finishResolve = (value: unknown) => {
+      if (settled) return;
+      settled = true;
+      if (timeoutId !== undefined) clearTimeout(timeoutId);
+      resolve(value);
+    };
+    const finishReject = (err: unknown) => {
+      if (settled) return;
+      settled = true;
+      if (timeoutId !== undefined) clearTimeout(timeoutId);
+      reject(err);
+    };
+
+    const remainingMs = Math.max(
+      0,
+      Math.floor(ctx.runDeadlineMs - performance.now()),
+    );
+    timeoutId = setTimeout(() => {
+      finishReject(new Error(WORKER_TIMEOUT_MESSAGE));
+      clearAndTerminate();
+    }, remainingMs) as unknown as number;
+
+    worker.addEventListener("error", (event) => {
+      event.preventDefault();
+      finishReject(event.error ?? new Error(event.message));
+    });
+
+    worker.addEventListener("messageerror", () => {
+      finishReject(new Error("Worker bridge message serialization failed"));
+    });
+
+    worker.addEventListener("message", (event: MessageEvent) => {
+      const msg = event.data as OrchestrationWorkerMessageToParent;
+      if (!msg || typeof msg !== "object") return;
+      if (msg.bridgeSession !== bridgeSession) {
+        logger.warn(
+          `[gambit] rejected orchestration-worker message with mismatched bridge session (type=${msg.type})`,
+        );
+        return;
+      }
+
+      if (msg.type === "trace.event") {
+        ctx.trace?.(msg.event);
+        return;
+      }
+      if (msg.type === "state.update") {
+        ctx.onStateUpdate?.(msg.state);
+        return;
+      }
+      if (msg.type === "stream.text") {
+        ctx.onStreamText?.(msg.chunk);
+        return;
+      }
+
+      if (msg.type === "model.chat.request") {
+        (async () => {
+          try {
+            const result = await ctx.modelProvider.chat({
+              ...msg.input,
+              signal: ctx.signal,
+              onStreamText: (chunk) => {
+                worker.postMessage(
+                  {
+                    type: "model.chat.stream",
+                    requestId: msg.requestId,
+                    chunk,
+                  } satisfies OrchestrationParentMessage,
+                );
+              },
+            });
+            worker.postMessage(
+              {
+                type: "model.chat.result",
+                requestId: msg.requestId,
+                result,
+              } satisfies OrchestrationParentMessage,
+            );
+          } catch (err) {
+            worker.postMessage(
+              {
+                type: "model.chat.error",
+                requestId: msg.requestId,
+                error: {
+                  source: "model",
+                  name: err instanceof Error ? err.name : undefined,
+                  message: err instanceof Error ? err.message : String(err),
+                  code: (err as { code?: unknown })?.code,
+                },
+              } satisfies OrchestrationParentMessage,
+            );
+          }
+        })();
+        return;
+      }
+
+      if (msg.type === "model.responses.request") {
+        (async () => {
+          try {
+            if (!ctx.modelProvider.responses) {
+              throw new Error(
+                "Responses API unavailable for current model provider",
+              );
+            }
+            const result = await ctx.modelProvider.responses({
+              ...msg.input,
+              signal: ctx.signal,
+              onStreamEvent: (streamEvent) => {
+                worker.postMessage(
+                  {
+                    type: "model.responses.event",
+                    requestId: msg.requestId,
+                    event: streamEvent,
+                  } satisfies OrchestrationParentMessage,
+                );
+              },
+            });
+            worker.postMessage(
+              {
+                type: "model.responses.result",
+                requestId: msg.requestId,
+                result,
+              } satisfies OrchestrationParentMessage,
+            );
+          } catch (err) {
+            worker.postMessage(
+              {
+                type: "model.responses.error",
+                requestId: msg.requestId,
+                error: {
+                  source: "model",
+                  name: err instanceof Error ? err.name : undefined,
+                  message: err instanceof Error ? err.message : String(err),
+                  code: (err as { code?: unknown })?.code,
+                },
+              } satisfies OrchestrationParentMessage,
+            );
+          }
+        })();
+        return;
+      }
+
+      if (msg.type === "model.resolveModel.request") {
+        (async () => {
+          try {
+            const result = ctx.modelProvider.resolveModel
+              ? await ctx.modelProvider.resolveModel(msg.input)
+              : {
+                model: Array.isArray(msg.input.model)
+                  ? msg.input.model[0]
+                  : msg.input.model,
+                params: msg.input.params,
+              };
+            worker.postMessage(
+              {
+                type: "model.resolveModel.result",
+                requestId: msg.requestId,
+                result,
+              } satisfies OrchestrationParentMessage,
+            );
+          } catch (err) {
+            worker.postMessage(
+              {
+                type: "model.resolveModel.error",
+                requestId: msg.requestId,
+                error: {
+                  source: "model",
+                  name: err instanceof Error ? err.name : undefined,
+                  message: err instanceof Error ? err.message : String(err),
+                  code: (err as { code?: unknown })?.code,
+                },
+              } satisfies OrchestrationParentMessage,
+            );
+          }
+        })();
+        return;
+      }
+
+      if (msg.type === "run.result") {
+        if (msg.completionNonce !== completionNonce) {
+          logger.warn(
+            `[gambit] rejected orchestration-worker run.result with invalid completion nonce`,
+          );
+          return;
+        }
+        finishResolve(msg.result);
+        return;
+      }
+      if (msg.type === "run.error") {
+        if (msg.completionNonce !== completionNonce) {
+          logger.warn(
+            `[gambit] rejected orchestration-worker run.error with invalid completion nonce`,
+          );
+          return;
+        }
+        finishReject(normalizeWorkerError(msg.error));
+      }
+    });
+  });
+
+  try {
+    worker.postMessage(
+      {
+        type: "run.start",
+        bridgeSession,
+        completionNonce,
+        options: {
+          path: ctx.deckPath,
+          input: ctx.input,
+          inputProvided: ctx.inputProvided,
+          initialUserMessage: ctx.initialUserMessage,
+          isRoot: ctx.isRoot,
+          guardrails: ctx.guardrails,
+          depth: ctx.depth,
+          parentActionCallId: ctx.parentActionCallId,
+          runId: ctx.runId,
+          defaultModel: ctx.defaultModel,
+          modelOverride: ctx.modelOverride,
+          stream: ctx.stream,
+          state: ctx.state,
+          responsesMode: ctx.responsesMode,
+          allowRootStringInput: ctx.allowRootStringInput,
+          runDeadlineMs: ctx.runDeadlineMs,
+        },
+        permissionCeiling: toWirePermissionSet(ctx.permissions),
+      } satisfies OrchestrationRunStartMessage,
+    );
+    ensureRunActive(ctx.runDeadlineMs, ctx.signal);
+    return await outcome;
+  } finally {
+    if (timeoutId !== undefined) clearTimeout(timeoutId);
+    clearAndTerminate();
+  }
+}
+
+type WorkerSpawnRequest = {
+  bridgeSession: string;
+  requestId: string;
+  payload: {
+    path: string;
+    input: unknown;
+    initialUserMessage?: unknown;
+    parentActionCallId?: string;
+    parentPermissionsBaseDir: string;
+    parentPermissions: WirePermissionSet;
+    workspacePermissions?: PermissionDeclarationInput;
+    workspacePermissionsBaseDir?: string;
+    sessionPermissions?: PermissionDeclarationInput;
+    sessionPermissionsBaseDir?: string;
+    runDeadlineMs: number;
+  };
+};
+
+async function runComputeDeckInWorker(ctx: WorkerRuntimeCtx): Promise<unknown> {
+  throwIfCanceled(ctx.signal);
+  const { runId } = ctx;
+  const actionCallId = randomId("action");
+  const bridgeSession = randomId("bridge");
+  const completionNonce = randomId("done");
+  const worker = new Worker(
+    new URL("./runtime_worker.ts", import.meta.url).href,
+    {
+      type: "module",
+      deno: buildWorkerPermissions(ctx.permissions, ctx.deckPath),
+    },
+  );
+
+  let settled = false;
+  const clearAndTerminate = () => {
+    try {
+      worker.terminate();
+    } catch {
+      // ignore
+    }
+  };
+  let timeoutId: number | undefined;
+  const activeSpawnRequests = new Set<string>();
+  let currentState = ctx.state;
+
+  const outcome = new Promise<unknown>((resolve, reject) => {
+    const finishResolve = (value: unknown) => {
+      if (settled) return;
+      settled = true;
+      if (timeoutId !== undefined) clearTimeout(timeoutId);
+      resolve(value);
+    };
+    const finishReject = (err: unknown) => {
+      if (settled) return;
+      settled = true;
+      if (timeoutId !== undefined) clearTimeout(timeoutId);
+      reject(err);
+    };
+    const remainingMs = Math.max(
+      0,
+      Math.floor(ctx.runDeadlineMs - performance.now()),
+    );
+    timeoutId = setTimeout(() => {
+      finishReject(new Error(WORKER_TIMEOUT_MESSAGE));
+      clearAndTerminate();
+    }, remainingMs) as unknown as number;
+
+    worker.addEventListener("error", (event) => {
+      event.preventDefault();
+      finishReject(event.error ?? new Error(event.message));
+    });
+
+    worker.addEventListener("messageerror", () => {
+      finishReject(new Error("Worker bridge message serialization failed"));
+    });
+
+    worker.addEventListener("message", (event: MessageEvent) => {
+      const msg = event.data as Record<string, unknown>;
+      const receivedBridgeSession = typeof msg.bridgeSession === "string"
+        ? msg.bridgeSession
+        : "";
+      if (receivedBridgeSession !== bridgeSession) {
+        const type = typeof msg.type === "string" ? msg.type : "unknown";
+        logger.warn(
+          `[gambit] rejected compute-worker message with mismatched bridge session (type=${type})`,
+        );
+        return;
+      }
+      // Ignore any late worker messages once this run has already settled.
+      if (settled) return;
+      const type = typeof msg.type === "string" ? msg.type : "";
+      if (type === "log.entry") {
+        if (!ctx.trace) return;
+        const entry = msg.entry;
+        const raw = typeof entry === "string"
+          ? { message: entry }
+          : entry && typeof entry === "object"
+          ? entry as Record<string, unknown>
+          : { message: "" };
+        const message = typeof raw.message === "string"
+          ? raw.message
+          : raw.message !== undefined
+          ? String(raw.message)
+          : "";
+        const title = typeof raw.title === "string" ? raw.title : undefined;
+        const body = raw.body ?? raw.message ?? message;
+        ctx.trace({
+          type: "log",
+          runId,
+          deckPath: ctx.deckPath,
+          actionCallId,
+          parentActionCallId: ctx.parentActionCallId,
+          level: (raw.level as "debug" | "info" | "warn" | "error") ?? "info",
+          title: title ?? (message || undefined),
+          message,
+          body,
+          meta: raw.meta,
+        });
+        return;
+      }
+
+      if (type === "spawn.request") {
+        const req = msg as unknown as WorkerSpawnRequest;
+        const requestId = req.requestId;
+        if (!requestId) return;
+        if (activeSpawnRequests.has(requestId)) {
+          logger.warn(
+            `[gambit] rejected duplicate compute-worker spawn.request (${requestId})`,
+          );
+          return;
+        }
+        activeSpawnRequests.add(requestId);
+        (async () => {
+          try {
+            const parentFromWorker = normalizePermissionBaseDir(
+              fromWirePermissionSet(req.payload.parentPermissions),
+              req.payload.parentPermissionsBaseDir,
+            );
+            // Enforce monotonicity against the parent effective ceiling.
+            const bridgedParent = intersectPermissions(
+              ctx.permissions,
+              parentFromWorker,
+              req.payload.parentPermissionsBaseDir,
+            );
+            const childResult = await runDeck({
+              path: req.payload.path,
+              input: req.payload.input,
+              modelProvider: ctx.modelProvider,
+              isRoot: false,
+              guardrails: ctx.guardrails,
+              depth: ctx.depth + 1,
+              parentActionCallId: req.payload.parentActionCallId,
+              runId,
+              defaultModel: ctx.defaultModel,
+              modelOverride: ctx.modelOverride,
+              trace: ctx.trace,
+              stream: ctx.stream,
+              state: currentState,
+              onStateUpdate: (state) => {
+                currentState = state;
+                ctx.onStateUpdate?.(state);
+              },
+              onStreamText: ctx.onStreamText,
+              responsesMode: ctx.responsesMode,
+              initialUserMessage: req.payload.initialUserMessage,
+              inputProvided: true,
+              parentPermissions: bridgedParent,
+              workspacePermissions: req.payload.workspacePermissions,
+              workspacePermissionsBaseDir:
+                req.payload.workspacePermissionsBaseDir,
+              sessionPermissions: req.payload.sessionPermissions,
+              sessionPermissionsBaseDir: req.payload.sessionPermissionsBaseDir,
+              runDeadlineMs: Math.min(
+                ctx.runDeadlineMs,
+                Number.isFinite(req.payload.runDeadlineMs)
+                  ? req.payload.runDeadlineMs
+                  : ctx.runDeadlineMs,
+              ),
+              workerSandbox: true,
+              signal: ctx.signal,
+              onTool: ctx.onTool,
+            });
+            worker.postMessage({
+              type: "spawn.result",
+              requestId,
+              result: childResult,
+            });
+          } catch (err) {
+            worker.postMessage({
+              type: "spawn.error",
+              requestId,
+              error: {
+                source: "child",
+                name: err instanceof Error ? err.name : undefined,
+                message: err instanceof Error ? err.message : String(err),
+                code: (err as { code?: unknown })?.code,
+              },
+            });
+          } finally {
+            activeSpawnRequests.delete(requestId);
+          }
+        })();
+        return;
+      }
+
+      if (type === "state.update") {
+        const nextState = (msg as { state?: SavedState }).state;
+        if (!nextState || typeof nextState !== "object") return;
+        currentState = nextState;
+        ctx.onStateUpdate?.(nextState);
+        return;
+      }
+
+      if (type === "run.result") {
+        if (
+          (msg as { completionNonce?: unknown }).completionNonce !==
+            completionNonce
+        ) {
+          logger.warn(
+            `[gambit] rejected compute-worker run.result with invalid completion nonce`,
+          );
+          return;
+        }
+        finishResolve((msg as { result?: unknown }).result);
+        return;
+      }
+
+      if (type === "run.error") {
+        if (
+          (msg as { completionNonce?: unknown }).completionNonce !==
+            completionNonce
+        ) {
+          logger.warn(
+            `[gambit] rejected compute-worker run.error with invalid completion nonce`,
+          );
+          return;
+        }
+        finishReject(normalizeWorkerError((msg as { error?: unknown }).error));
+      }
+    });
+  });
+
+  try {
+    worker.postMessage({
+      type: "run.start",
+      bridgeSession,
+      completionNonce,
+      runId,
+      actionCallId,
+      deckPath: ctx.deckPath,
+      input: ctx.input,
+      state: ctx.state,
+      initialUserMessage: ctx.initialUserMessage,
+      depth: ctx.depth,
+      parentActionCallId: ctx.parentActionCallId,
+      permissions: toWirePermissionSet(ctx.permissions),
+      workspacePermissions: ctx.workspacePermissions,
+      workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir,
+      sessionPermissions: ctx.sessionPermissions,
+      sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir,
+      runDeadlineMs: ctx.runDeadlineMs,
+      isRoot: ctx.isRoot,
+      allowRootStringInput: ctx.allowRootStringInput,
+    });
+    const raw = await outcome;
+    ensureRunActive(ctx.runDeadlineMs, ctx.signal);
+    return raw;
+  } finally {
+    if (timeoutId !== undefined) clearTimeout(timeoutId);
+    clearAndTerminate();
+  }
+}
+
+async function runComputeDeckInProcess(ctx: RuntimeCtxBase): Promise<unknown> {
   const { deck, runId } = ctx;
   const actionCallId = randomId("action");
+  let computeState = ctx.state
+    ? {
+      ...ctx.state,
+      messages: Array.isArray(ctx.state.messages)
+        ? ctx.state.messages.map(sanitizeMessage)
+        : [],
+      meta: ctx.state.meta ? { ...ctx.state.meta } : undefined,
+      messageRefs: Array.isArray(ctx.state.messageRefs)
+        ? [...ctx.state.messageRefs]
+        : undefined,
+    }
+    : undefined;
+
+  const ensureComputeState = (): SavedState => {
+    if (computeState) return computeState;
+    computeState = {
+      runId,
+      messages: [],
+      meta: {},
+      messageRefs: [],
+    };
+    return computeState;
+  };
+
+  const publishComputeState = () => {
+    if (!computeState) return;
+    ctx.onStateUpdate?.({
+      ...computeState,
+      messages: computeState.messages.map(sanitizeMessage),
+      meta: computeState.meta ? { ...computeState.meta } : undefined,
+      messageRefs: Array.isArray(computeState.messageRefs)
+        ? [...computeState.messageRefs]
+        : undefined,
+    });
+  };
 
   const execContext: ExecutionContext = {
     runId,
@@ -604,6 +2724,39 @@ async function runComputeDeck(ctx: RuntimeCtxBase): Promise<unknown> {
     parentActionCallId: ctx.parentActionCallId,
     depth: ctx.depth,
     input: ctx.input,
+    initialUserMessage: ctx.initialUserMessage,
+    getSessionMeta: <T = unknown>(key: string): T | undefined => {
+      if (!key) return undefined;
+      return computeState?.meta?.[key] as T | undefined;
+    },
+    setSessionMeta: (key, value) => {
+      if (!key) return;
+      const state = ensureComputeState();
+      const nextMeta = { ...(state.meta ?? {}) };
+      if (value === undefined) {
+        delete nextMeta[key];
+      } else {
+        nextMeta[key] = value;
+      }
+      state.meta = nextMeta;
+      publishComputeState();
+    },
+    appendMessage: (message) => {
+      const role = message.role;
+      const content = String(message.content ?? "");
+      if ((role !== "user" && role !== "assistant") || !content.trim()) {
+        return;
+      }
+      const state = ensureComputeState();
+      const sanitized = sanitizeMessage({ role, content: content.trim() });
+      state.messages = [...(state.messages ?? []), sanitized];
+      const refs = Array.isArray(state.messageRefs)
+        ? [...state.messageRefs]
+        : [];
+      refs.push({ id: randomId("msg"), role: sanitized.role });
+      state.messageRefs = refs;
+      publishComputeState();
+    },
     label: deck.label,
     log: (entry) => {
       if (!ctx.trace) return;
@@ -635,9 +2788,13 @@ async function runComputeDeck(ctx: RuntimeCtxBase): Promise<unknown> {
       });
     },
     spawnAndWait: async (opts) => {
+      ensureRunActive(ctx.runDeadlineMs, ctx.signal);
       const childPath = path.isAbsolute(opts.path)
         ? opts.path
         : path.resolve(path.dirname(deck.path), opts.path);
+      const childInitialUserMessage = Object.hasOwn(opts, "initialUserMessage")
+        ? opts.initialUserMessage
+        : ctx.initialUserMessage;
       return await runDeck({
         path: childPath,
         input: opts.input,
@@ -651,17 +2808,33 @@ async function runComputeDeck(ctx: RuntimeCtxBase): Promise<unknown> {
         modelOverride: ctx.modelOverride,
         trace: ctx.trace,
         stream: ctx.stream,
-        state: ctx.state,
-        onStateUpdate: ctx.onStateUpdate,
+        state: computeState,
+        onStateUpdate: (state) => {
+          computeState = {
+            ...state,
+            messages: Array.isArray(state.messages)
+              ? state.messages.map(sanitizeMessage)
+              : [],
+            meta: state.meta ? { ...state.meta } : undefined,
+            messageRefs: Array.isArray(state.messageRefs)
+              ? [...state.messageRefs]
+              : undefined,
+          };
+          ctx.onStateUpdate?.(state);
+        },
         onStreamText: ctx.onStreamText,
         responsesMode: ctx.responsesMode,
-        initialUserMessage: undefined,
+        initialUserMessage: childInitialUserMessage,
         inputProvided: true,
         parentPermissions: ctx.permissions,
         workspacePermissions: ctx.workspacePermissions,
         workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir,
         sessionPermissions: ctx.sessionPermissions,
         sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir,
+        runDeadlineMs: ctx.runDeadlineMs,
+        workerSandbox: ctx.workerSandbox,
+        signal: ctx.signal,
+        onTool: ctx.onTool,
       });
     },
     fail: (opts) => {
@@ -670,7 +2843,9 @@ async function runComputeDeck(ctx: RuntimeCtxBase): Promise<unknown> {
     return: (payload) => Promise.resolve(payload),
   };
 
+  ensureRunActive(ctx.runDeadlineMs, ctx.signal);
   const raw = await deck.executor!(execContext);
+  ensureRunActive(ctx.runDeadlineMs, ctx.signal);
   return validateOutput(deck, raw, ctx.depth === 0);
 }
 
@@ -725,11 +2900,15 @@ async function runLlmDeck(
     workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir,
     sessionPermissions: ctx.sessionPermissions,
     sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir,
+    runDeadlineMs: ctx.runDeadlineMs,
+    workerSandbox: ctx.workerSandbox,
+    signal: ctx.signal,
+    onTool: ctx.onTool,
   });
   let streamingBuffer = "";
   let streamingCommitted = false;
   const wrappedOnStreamText = (chunk: string) => {
-    if (!chunk) return;
+    if (!chunk || ctx.signal?.aborted) return;
     idleController.touch();
     streamingBuffer += chunk;
     ctx.onStreamText?.(chunk);
@@ -743,6 +2922,7 @@ async function runLlmDeck(
         actionCallId: refToolCallId,
         name: GAMBIT_TOOL_CONTEXT,
         args: {},
+        toolKind: "internal",
         parentActionCallId: actionCallId,
       });
       messages.push(
@@ -771,6 +2951,7 @@ async function runLlmDeck(
         actionCallId: refToolCallId,
         name: GAMBIT_TOOL_CONTEXT,
         result: input as unknown as import("./types.ts").JSONValue,
+        toolKind: "internal",
         parentActionCallId: actionCallId,
       });
     }
@@ -793,7 +2974,7 @@ async function runLlmDeck(
   }
   idleController.touch();
 
-  const tools = await buildToolDefs(deck);
+  const tools = await buildToolDefs(deck, ctx.permissions);
   ctx.trace?.({
     type: "deck.start",
     runId,
@@ -806,9 +2987,7 @@ async function runLlmDeck(
   try {
     while (passes < guardrails.maxPasses) {
       passes++;
-      if (performance.now() - start > guardrails.timeoutMs) {
-        throw new Error("Timeout exceeded");
-      }
+      ensureRunActive(ctx.runDeadlineMs, ctx.signal);
       streamingBuffer = "";
       streamingCommitted = false;
       const modelCandidate = ctx.modelOverride ??
@@ -850,6 +3029,9 @@ async function runLlmDeck(
 
       let responseOutputItems: Array<ResponseItem> | undefined;
       const responses = modelProvider.responses;
+      const projectedToolCalls = new Set<string>();
+      const projectedToolResults = new Set<string>();
+      const projectedToolNames = new Map<string, string>();
       type ModelCallResult = Awaited<ReturnType<ModelProvider["chat"]>>;
       const result: ModelCallResult = (useResponses && responses)
         ? await (async () => {
@@ -864,18 +3046,47 @@ async function runLlmDeck(
               params: providerParams,
             },
             state: ctx.state,
-            onStreamEvent: (ctx.onStreamText || deck.handlers?.onIdle)
-              ? (event) => {
-                if (event.type === "response.output_text.delta") {
-                  sawDelta = true;
-                  wrappedOnStreamText(event.delta);
-                } else if (
-                  event.type === "response.output_text.done" && !sawDelta
-                ) {
-                  wrappedOnStreamText(event.text);
+            deckPath: deck.path,
+            signal: ctx.signal,
+            onStreamEvent:
+              (ctx.trace || ctx.onStreamText || deck.handlers?.onIdle)
+                ? (event) => {
+                  if (ctx.trace) {
+                    ctx.trace({
+                      type: "model.stream.event",
+                      runId,
+                      actionCallId,
+                      deckPath: deck.path,
+                      model,
+                      event: event as unknown as Record<
+                        string,
+                        import("./types.ts").JSONValue
+                      >,
+                      parentActionCallId: ctx.parentActionCallId,
+                    });
+                    projectStreamToolTraceEvents({
+                      streamEvent: event as unknown as Record<
+                        string,
+                        JSONValue
+                      >,
+                      runId,
+                      parentActionCallId: actionCallId,
+                      trace: ctx.trace,
+                      emittedCalls: projectedToolCalls,
+                      emittedResults: projectedToolResults,
+                      toolNames: projectedToolNames,
+                    });
+                  }
+                  if (event.type === "response.output_text.delta") {
+                    sawDelta = true;
+                    wrappedOnStreamText(event.delta);
+                  } else if (
+                    event.type === "response.output_text.done" && !sawDelta
+                  ) {
+                    wrappedOnStreamText(event.text);
+                  }
                 }
-              }
-              : undefined,
+                : undefined,
           });
           responseOutputItems = response.output ?? [];
           const mapped = mapResponseOutput(responseOutputItems);
@@ -883,7 +3094,8 @@ async function runLlmDeck(
             message: mapped.message,
             finishReason: mapped.toolCalls?.length ? "tool_calls" : "stop",
             toolCalls: mapped.toolCalls,
-            updatedState: undefined,
+            usage: response.usage,
+            updatedState: response.updatedState,
           };
         })()
         : await modelProvider.chat({
@@ -892,10 +3104,34 @@ async function runLlmDeck(
           tools,
           stream: ctx.stream,
           state: ctx.state,
+          deckPath: deck.path,
+          signal: ctx.signal,
           params: providerParams,
           onStreamText: (ctx.onStreamText || deck.handlers?.onIdle)
             ? wrappedOnStreamText
             : undefined,
+          onStreamEvent: ctx.trace
+            ? (event) => {
+              ctx.trace?.({
+                type: "model.stream.event",
+                runId,
+                actionCallId,
+                deckPath: deck.path,
+                model,
+                event,
+                parentActionCallId: ctx.parentActionCallId,
+              });
+              projectStreamToolTraceEvents({
+                streamEvent: event,
+                runId,
+                parentActionCallId: actionCallId,
+                trace: ctx.trace,
+                emittedCalls: projectedToolCalls,
+                emittedResults: projectedToolResults,
+                toolNames: projectedToolNames,
+              });
+            }
+            : undefined,
         });
       idleController.touch();
       let message = result.message;
@@ -909,6 +3145,7 @@ async function runLlmDeck(
         message: sanitizeMessage(message),
         toolCalls: result.toolCalls,
         stateMessages: result.updatedState?.messages?.length,
+        usage: result.usage,
         mode: useResponses ? "responses" : "chat",
         responseItems: responseOutputItems,
         parentActionCallId: ctx.parentActionCallId,
@@ -928,6 +3165,10 @@ async function runLlmDeck(
         );
         const feedback = updated?.feedback ?? ctx.state?.feedback;
         const traces = updated?.traces ?? ctx.state?.traces;
+        const meta = updated?.meta ?? ctx.state?.meta;
+        const notes = updated?.notes ?? ctx.state?.notes;
+        const conversationScore = updated?.conversationScore ??
+          ctx.state?.conversationScore;
         return {
           ...base,
           runId,
@@ -939,6 +3180,9 @@ async function runLlmDeck(
           messageRefs,
           feedback,
           traces,
+          meta,
+          notes,
+          conversationScore,
         };
       };
 
@@ -997,6 +3241,7 @@ async function runLlmDeck(
               actionCallId: call.id,
               name: call.name,
               args: call.args,
+              toolKind: "internal",
               parentActionCallId: actionCallId,
             });
             const toolContent = JSON.stringify(call.args ?? {});
@@ -1027,6 +3272,7 @@ async function runLlmDeck(
               name: call.name,
               result:
                 respondEnvelope as unknown as import("./types.ts").JSONValue,
+              toolKind: "internal",
               parentActionCallId: actionCallId,
             });
             continue;
@@ -1054,6 +3300,7 @@ async function runLlmDeck(
               actionCallId: call.id,
               name: call.name,
               args: call.args,
+              toolKind: "internal",
               parentActionCallId: actionCallId,
             });
             const toolContent = JSON.stringify(call.args ?? {});
@@ -1088,12 +3335,14 @@ async function runLlmDeck(
               actionCallId: call.id,
               name: call.name,
               result: signal as unknown as import("./types.ts").JSONValue,
+              toolKind: "internal",
               parentActionCallId: actionCallId,
             });
             continue;
           }
 
           const actionRef = deck.actionDecks.find((a) => a.name === call.name);
+          const toolKind: ToolKind = actionRef ? "action" : "external";
           const actionPermissions = resolveEffectivePermissions({
             baseDir: path.dirname(deck.path),
             parent: ctx.permissions,
@@ -1119,6 +3368,7 @@ async function runLlmDeck(
             actionCallId: call.id,
             name: call.name,
             args: call.args,
+            toolKind,
             parentActionCallId: actionCallId,
           });
           const toolResult = await handleToolCall(call, {
@@ -1143,6 +3393,10 @@ async function runLlmDeck(
             workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir,
             sessionPermissions: ctx.sessionPermissions,
             sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir,
+            runDeadlineMs: ctx.runDeadlineMs,
+            workerSandbox: ctx.workerSandbox,
+            signal: ctx.signal,
+            onTool: ctx.onTool,
           });
           ctx.trace?.({
             type: "tool.result",
@@ -1150,6 +3404,7 @@ async function runLlmDeck(
             actionCallId: call.id,
             name: call.name,
             result: toolResult.toolContent,
+            toolKind,
             parentActionCallId: actionCallId,
           });
           appendedMessages.push({
@@ -1188,6 +3443,7 @@ async function runLlmDeck(
           idleController.touch();
         }
         if (ctx.onStateUpdate) {
+          ensureRunActive(ctx.runDeadlineMs, ctx.signal);
           const state = computeState(result.updatedState);
           ctx.onStateUpdate(state);
         }
@@ -1236,6 +3492,7 @@ async function runLlmDeck(
 
       if (message.content !== null && message.content !== undefined) {
         messages.push(sanitizeMessage(message));
+        ensureRunActive(ctx.runDeadlineMs, ctx.signal);
         if (ctx.onStateUpdate) {
           const state = computeState(result.updatedState);
           ctx.onStateUpdate(state);
@@ -1305,25 +3562,17 @@ async function handleToolCall(
     workspacePermissionsBaseDir?: string;
     sessionPermissions?: PermissionDeclarationInput;
     sessionPermissionsBaseDir?: string;
+    runDeadlineMs: number;
+    workerSandbox: boolean;
+    signal?: AbortSignal;
+    onTool?: RunOptions["onTool"];
   },
 ): Promise<ToolCallResult> {
-  const action = ctx.parentDeck.actionDecks.find((a) => a.name === call.name);
+  ensureRunActive(ctx.runDeadlineMs, ctx.signal);
   const source = {
     deckPath: ctx.parentDeck.path,
-    actionName: action?.name ?? call.name,
+    actionName: call.name,
   };
-  if (!action) {
-    return {
-      toolContent: JSON.stringify({
-        runId: ctx.runId,
-        actionCallId: call.id,
-        parentActionCallId: ctx.parentActionCallId,
-        source,
-        status: 404,
-        message: "unknown action",
-      }),
-    };
-  }
 
   const baseComplete = (payload: {
     status?: number;
@@ -1346,6 +3595,474 @@ async function handleToolCall(
   const extraMessages: Array<ModelMessage> = [];
   const started = performance.now();
 
+  const runBuiltinTool = async (): Promise<ToolCallResult | null> => {
+    if (!isBuiltinTool(call.name)) return null;
+    const deny = (message: string): ToolCallResult => ({
+      toolContent: baseComplete({
+        status: 403,
+        code: "permission_denied",
+        message,
+      }),
+    });
+
+    if (call.name === BUILTIN_TOOL_READ_FILE) {
+      let targetPath: string;
+      try {
+        targetPath = resolveToolPath(ctx.permissions.baseDir, call.args.path);
+      } catch (err) {
+        return {
+          toolContent: baseComplete({
+            status: 400,
+            code: "invalid_input",
+            message: err instanceof Error ? err.message : String(err),
+          }),
+        };
+      }
+      if (!canReadPath(ctx.permissions, targetPath)) {
+        return deny(`read_file denied for ${targetPath}`);
+      }
+      const text = await Deno.readTextFile(targetPath);
+      const lines = text.split(/\r?\n/);
+      const { startLine, endLine } = parseLineRange(call.args);
+      const sliced = lines.slice(startLine - 1, endLine).join("\n");
+      return {
+        toolContent: baseComplete({
+          status: 200,
+          payload: {
+            path: targetPath,
+            start_line: startLine,
+            end_line: endLine,
+            total_lines: lines.length,
+            content: sliced,
+          },
+        }),
+      };
+    }
+
+    if (call.name === BUILTIN_TOOL_LIST_DIR) {
+      let targetPath: string;
+      try {
+        targetPath = resolveToolPath(ctx.permissions.baseDir, call.args.path);
+      } catch (err) {
+        return {
+          toolContent: baseComplete({
+            status: 400,
+            code: "invalid_input",
+            message: err instanceof Error ? err.message : String(err),
+          }),
+        };
+      }
+      if (!canReadPath(ctx.permissions, targetPath)) {
+        return deny(`list_dir denied for ${targetPath}`);
+      }
+      const recursive = Boolean(call.args.recursive);
+      const maxEntries = parseToolLimit(call.args.max_entries, 200, 2000);
+      const out: Array<{ path: string; type: "file" | "dir" | "symlink" }> = [];
+      const pending: Array<string> = [targetPath];
+      while (pending.length > 0 && out.length < maxEntries) {
+        const current = pending.pop()!;
+        for await (const entry of Deno.readDir(current)) {
+          if (out.length >= maxEntries) break;
+          const entryPath = path.join(current, entry.name);
+          if (!canReadPath(ctx.permissions, entryPath)) continue;
+          const type = entry.isDirectory
+            ? "dir"
+            : entry.isSymlink
+            ? "symlink"
+            : "file";
+          out.push({ path: entryPath, type });
+          if (recursive && entry.isDirectory) {
+            pending.push(entryPath);
+          }
+        }
+      }
+      return {
+        toolContent: baseComplete({
+          status: 200,
+          payload: {
+            path: targetPath,
+            recursive,
+            entries: out,
+            truncated: out.length >= maxEntries,
+          },
+        }),
+      };
+    }
+
+    if (call.name === BUILTIN_TOOL_GREP_FILES) {
+      let targetPath: string;
+      try {
+        targetPath = resolveToolPath(ctx.permissions.baseDir, call.args.path);
+      } catch (err) {
+        return {
+          toolContent: baseComplete({
+            status: 400,
+            code: "invalid_input",
+            message: err instanceof Error ? err.message : String(err),
+          }),
+        };
+      }
+      if (!canReadPath(ctx.permissions, targetPath)) {
+        return deny(`grep_files denied for ${targetPath}`);
+      }
+      const query = typeof call.args.query === "string" ? call.args.query : "";
+      if (!query) {
+        return {
+          toolContent: baseComplete({
+            status: 400,
+            code: "invalid_input",
+            message: "query is required",
+          }),
+        };
+      }
+      let re: RegExp;
+      try {
+        re = new RegExp(query, "g");
+      } catch (err) {
+        return {
+          toolContent: baseComplete({
+            status: 400,
+            code: "invalid_regex",
+            message: err instanceof Error ? err.message : String(err),
+          }),
+        };
+      }
+      const maxMatches = parseToolLimit(call.args.max_matches, 200, 2000);
+      const matches: Array<{
+        path: string;
+        line: number;
+        text: string;
+      }> = [];
+      const pending: Array<string> = [targetPath];
+      while (pending.length > 0 && matches.length < maxMatches) {
+        const current = pending.pop()!;
+        const stat = await Deno.stat(current);
+        if (stat.isDirectory) {
+          for await (const entry of Deno.readDir(current)) {
+            const entryPath = path.join(current, entry.name);
+            if (!canReadPath(ctx.permissions, entryPath)) continue;
+            if (entry.isDirectory) {
+              pending.push(entryPath);
+              continue;
+            }
+            if (!entry.isFile) continue;
+            const text = await Deno.readTextFile(entryPath).catch(() => null);
+            if (text === null) continue;
+            const lines = text.split(/\r?\n/);
+            for (let i = 0; i < lines.length; i++) {
+              re.lastIndex = 0;
+              if (!re.test(lines[i])) continue;
+              matches.push({ path: entryPath, line: i + 1, text: lines[i] });
+              if (matches.length >= maxMatches) break;
+            }
+            if (matches.length >= maxMatches) break;
+          }
+          continue;
+        }
+        if (!stat.isFile) continue;
+        const text = await Deno.readTextFile(current).catch(() => null);
+        if (text === null) continue;
+        const lines = text.split(/\r?\n/);
+        for (let i = 0; i < lines.length; i++) {
+          re.lastIndex = 0;
+          if (!re.test(lines[i])) continue;
+          matches.push({ path: current, line: i + 1, text: lines[i] });
+          if (matches.length >= maxMatches) break;
+        }
+      }
+      return {
+        toolContent: baseComplete({
+          status: 200,
+          payload: {
+            path: targetPath,
+            query,
+            matches,
+            truncated: matches.length >= maxMatches,
+          },
+        }),
+      };
+    }
+
+    if (call.name === BUILTIN_TOOL_APPLY_PATCH) {
+      let targetPath: string;
+      try {
+        targetPath = resolveToolPath(ctx.permissions.baseDir, call.args.path);
+      } catch (err) {
+        return {
+          toolContent: baseComplete({
+            status: 400,
+            code: "invalid_input",
+            message: err instanceof Error ? err.message : String(err),
+          }),
+        };
+      }
+      if (!canWritePath(ctx.permissions, targetPath)) {
+        return deny(`apply_patch denied for ${targetPath}`);
+      }
+
+      const rawEdits = Array.isArray(call.args.edits) ? call.args.edits : [];
+      const edits = rawEdits.flatMap((entry) => {
+        if (!entry || typeof entry !== "object") return [];
+        const rec = entry as Record<string, unknown>;
+        if (
+          typeof rec.old_text !== "string" || typeof rec.new_text !== "string"
+        ) {
+          return [];
+        }
+        return [{
+          oldText: rec.old_text,
+          newText: rec.new_text,
+          replaceAll: Boolean(rec.replace_all),
+        }];
+      });
+      if (edits.length === 0) {
+        return {
+          toolContent: baseComplete({
+            status: 400,
+            code: "invalid_input",
+            message: "edits must include at least one old_text/new_text pair",
+          }),
+        };
+      }
+
+      const createIfMissing = Boolean(call.args.create_if_missing);
+      let existing = "";
+      let created = false;
+      try {
+        if (!canReadPath(ctx.permissions, targetPath)) {
+          return deny(`apply_patch read denied for ${targetPath}`);
+        }
+        existing = await Deno.readTextFile(targetPath);
+      } catch (err) {
+        if (err instanceof Deno.errors.NotFound) {
+          if (!createIfMissing) {
+            return {
+              toolContent: baseComplete({
+                status: 404,
+                code: "not_found",
+                message: `file not found: ${targetPath}`,
+              }),
+            };
+          }
+          created = true;
+          existing = "";
+        } else {
+          throw err;
+        }
+      }
+
+      const patched = applySimplePatch(existing, edits);
+      if (!created && patched.applied === 0) {
+        return {
+          toolContent: baseComplete({
+            status: 409,
+            code: "no_changes",
+            message: `No edit targets were found in ${targetPath}`,
+          }),
+        };
+      }
+      if (created) {
+        const parentDir = path.dirname(targetPath);
+        if (parentDir && parentDir !== "." && parentDir !== targetPath) {
+          await Deno.mkdir(parentDir, { recursive: true });
+        }
+      }
+      try {
+        await Deno.writeTextFile(targetPath, patched.next);
+      } catch (err) {
+        if (err instanceof Deno.errors.NotFound) {
+          return {
+            toolContent: baseComplete({
+              status: 404,
+              code: "not_found",
+              message: `path not found: ${targetPath}`,
+            }),
+          };
+        }
+        return {
+          toolContent: baseComplete({
+            status: 500,
+            code: "write_failed",
+            message: err instanceof Error ? err.message : String(err),
+          }),
+        };
+      }
+      return {
+        toolContent: baseComplete({
+          status: 200,
+          payload: {
+            path: targetPath,
+            applied: patched.applied,
+            created,
+          },
+        }),
+      };
+    }
+
+    if (call.name === BUILTIN_TOOL_EXEC) {
+      const command = typeof call.args.command === "string"
+        ? call.args.command
+        : "";
+      if (!command) {
+        return {
+          toolContent: baseComplete({
+            status: 400,
+            code: "invalid_input",
+            message: "command is required",
+          }),
+        };
+      }
+      if (
+        !canRunCommand(ctx.permissions, command) &&
+        !canRunPath(ctx.permissions, command)
+      ) {
+        return deny(`exec denied for command ${command}`);
+      }
+      const args = toStringArray(call.args.args);
+      const cwd = typeof call.args.cwd === "string"
+        ? path.resolve(ctx.permissions.baseDir, call.args.cwd)
+        : ctx.permissions.baseDir;
+      const timeoutMs = parseToolLimit(call.args.timeout_ms, 5000, 30000);
+      const remainingMs = Math.max(
+        1,
+        Math.min(timeoutMs, Math.floor(ctx.runDeadlineMs - performance.now())),
+      );
+      const controller = new AbortController();
+      const onAbort = () => controller.abort();
+      if (ctx.signal?.aborted) {
+        controller.abort();
+      } else if (ctx.signal) {
+        ctx.signal.addEventListener("abort", onAbort, { once: true });
+      }
+      const timeoutId = setTimeout(() => controller.abort(), remainingMs);
+      try {
+        const output = await new Deno.Command(command, {
+          args,
+          cwd,
+          stdout: "piped",
+          stderr: "piped",
+          signal: controller.signal,
+        }).output();
+        const stdout = new TextDecoder().decode(output.stdout).slice(0, 65536);
+        const stderr = new TextDecoder().decode(output.stderr).slice(0, 65536);
+        return {
+          toolContent: baseComplete({
+            status: 200,
+            payload: {
+              command,
+              args,
+              cwd,
+              code: output.code,
+              success: output.success,
+              stdout,
+              stderr,
+            },
+          }),
+        };
+      } catch (err) {
+        return {
+          toolContent: baseComplete({
+            status: 500,
+            code: "exec_failed",
+            message: err instanceof Error ? err.message : String(err),
+          }),
+        };
+      } finally {
+        clearTimeout(timeoutId);
+        if (ctx.signal) {
+          ctx.signal.removeEventListener("abort", onAbort);
+        }
+      }
+    }
+
+    return null;
+  };
+
+  const builtinResult = await runBuiltinTool();
+  if (builtinResult) {
+    return builtinResult;
+  }
+  const action = ctx.parentDeck.actionDecks.find((a) => a.name === call.name);
+  if (!action) {
+    const externalTool = ctx.parentDeck.tools.find((tool) =>
+      tool.name === call.name
+    );
+    if (!externalTool) {
+      return {
+        toolContent: JSON.stringify({
+          runId: ctx.runId,
+          actionCallId: call.id,
+          parentActionCallId: ctx.parentActionCallId,
+          source,
+          status: 404,
+          message: "unknown action",
+        }),
+      };
+    }
+    let externalInput: Record<string, unknown> = call.args;
+    if (externalTool.inputSchema) {
+      try {
+        externalInput = validateWithSchema(
+          externalTool.inputSchema as never,
+          call.args,
+        ) as Record<string, unknown>;
+      } catch (err) {
+        return {
+          toolContent: baseComplete({
+            status: 400,
+            code: "invalid_input",
+            message: err instanceof Error ? err.message : String(err),
+          }),
+        };
+      }
+    }
+    if (!ctx.onTool) {
+      return {
+        toolContent: baseComplete({
+          status: 500,
+          code: "missing_on_tool",
+          message: `External tool ${call.name} requires runtime onTool handler`,
+        }),
+      };
+    }
+    try {
+      const result = await ctx.onTool({
+        name: call.name,
+        args: externalInput,
+        runId: ctx.runId,
+        actionCallId: call.id,
+        parentActionCallId: ctx.parentActionCallId,
+        deckPath: ctx.parentDeck.path,
+      });
+      return { toolContent: baseComplete(normalizeChildResult(result)) };
+    } catch (err) {
+      return {
+        toolContent: baseComplete({
+          status: 500,
+          code: "tool_handler_error",
+          message: err instanceof Error ? err.message : String(err),
+        }),
+      };
+    }
+  }
+  let actionInput: unknown = call.args;
+  if (action.contextSchema) {
+    try {
+      actionInput = validateWithSchema(
+        action.contextSchema as never,
+        call.args,
+      );
+    } catch (err) {
+      return {
+        toolContent: baseComplete({
+          status: 400,
+          code: "invalid_input",
+          message: err instanceof Error ? err.message : String(err),
+        }),
+      };
+    }
+  }
+
   const busyCfg = ctx.parentDeck.handlers?.onBusy ??
     ctx.parentDeck.handlers?.onInterval;
   const busyDelay = busyCfg?.delayMs ?? DEFAULT_STATUS_DELAY_MS;
@@ -1362,7 +4079,7 @@ async function handleToolCall(
     try {
       const result = await runDeck({
         path: action.path,
-        input: call.args,
+        input: actionInput,
         modelProvider: ctx.modelProvider,
         isRoot: false,
         guardrails: ctx.guardrails,
@@ -1383,6 +4100,10 @@ async function handleToolCall(
         workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir,
         sessionPermissions: ctx.sessionPermissions,
         sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir,
+        runDeadlineMs: ctx.runDeadlineMs,
+        workerSandbox: ctx.workerSandbox,
+        signal: ctx.signal,
+        onTool: ctx.onTool,
       });
       return { ok: true, result };
     } catch (err) {
@@ -1419,6 +4140,10 @@ async function handleToolCall(
         workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir,
         sessionPermissions: ctx.sessionPermissions,
         sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir,
+        runDeadlineMs: ctx.runDeadlineMs,
+        workerSandbox: ctx.workerSandbox,
+        signal: ctx.signal,
+        onTool: ctx.onTool,
       });
       if (envelope.length) {
         extraMessages.push(...envelope.map(sanitizeMessage));
@@ -1480,6 +4205,12 @@ async function handleToolCall(
   }
 
   const normalized = normalizeChildResult(childResult.result);
+  if (action.responseSchema) {
+    normalized.payload = validateWithSchema(
+      action.responseSchema as never,
+      normalized.payload,
+    );
+  }
   const toolContent = baseComplete(normalized);
 
   if (busyCfg?.path) {
@@ -1509,6 +4240,10 @@ async function handleToolCall(
           workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir,
           sessionPermissions: ctx.sessionPermissions,
           sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir,
+          runDeadlineMs: ctx.runDeadlineMs,
+          workerSandbox: ctx.workerSandbox,
+          signal: ctx.signal,
+          onTool: ctx.onTool,
         });
         if (envelope.length) {
           extraMessages.push(...envelope.map(sanitizeMessage));
@@ -1594,8 +4329,13 @@ async function runBusyHandler(args: {
   workspacePermissionsBaseDir?: string;
   sessionPermissions?: PermissionDeclarationInput;
   sessionPermissionsBaseDir?: string;
+  runDeadlineMs: number;
+  workerSandbox: boolean;
+  signal?: AbortSignal;
+  onTool?: RunOptions["onTool"];
 }): Promise<Array<ModelMessage>> {
   try {
+    ensureRunActive(args.runDeadlineMs, args.signal);
     const input = {
       kind: "busy",
       label: args.action.label ?? args.parentDeck.label,
@@ -1628,6 +4368,10 @@ async function runBusyHandler(args: {
       workspacePermissionsBaseDir: args.workspacePermissionsBaseDir,
       sessionPermissions: args.sessionPermissions,
       sessionPermissionsBaseDir: args.sessionPermissionsBaseDir,
+      runDeadlineMs: args.runDeadlineMs,
+      workerSandbox: args.workerSandbox,
+      signal: args.signal,
+      onTool: args.onTool,
     });
     const elapsedMs = Math.floor(args.elapsedMs);
     let message: string | undefined;
@@ -1643,7 +4387,7 @@ async function runBusyHandler(args: {
       }
     }
     if (!message) return [];
-    if (args.onStreamText) {
+    if (args.onStreamText && !args.signal?.aborted) {
       args.onStreamText(`${message}\n`);
     } else {
       logger.log(message);
@@ -1677,6 +4421,10 @@ function createIdleController(args: {
   workspacePermissionsBaseDir?: string;
   sessionPermissions?: PermissionDeclarationInput;
   sessionPermissionsBaseDir?: string;
+  runDeadlineMs: number;
+  workerSandbox: boolean;
+  signal?: AbortSignal;
+  onTool?: RunOptions["onTool"];
 }): IdleController {
   if (!args.cfg?.path) {
     return {
@@ -1728,6 +4476,10 @@ function createIdleController(args: {
           workspacePermissionsBaseDir: args.workspacePermissionsBaseDir,
           sessionPermissions: args.sessionPermissions,
           sessionPermissionsBaseDir: args.sessionPermissionsBaseDir,
+          runDeadlineMs: args.runDeadlineMs,
+          workerSandbox: args.workerSandbox,
+          signal: args.signal,
+          onTool: args.onTool,
         });
         if (envelope.length) args.pushMessages(envelope.map(sanitizeMessage));
       } catch {
@@ -1783,8 +4535,13 @@ async function runIdleHandler(args: {
   workspacePermissionsBaseDir?: string;
   sessionPermissions?: PermissionDeclarationInput;
   sessionPermissionsBaseDir?: string;
+  runDeadlineMs: number;
+  workerSandbox: boolean;
+  signal?: AbortSignal;
+  onTool?: RunOptions["onTool"];
 }): Promise<Array<ModelMessage>> {
   try {
+    ensureRunActive(args.runDeadlineMs, args.signal);
     const input = {
       kind: "idle",
       label: args.deck.label,
@@ -1816,6 +4573,10 @@ async function runIdleHandler(args: {
       workspacePermissionsBaseDir: args.workspacePermissionsBaseDir,
       sessionPermissions: args.sessionPermissions,
       sessionPermissionsBaseDir: args.sessionPermissionsBaseDir,
+      runDeadlineMs: args.runDeadlineMs,
+      workerSandbox: args.workerSandbox,
+      signal: args.signal,
+      onTool: args.onTool,
     });
     const elapsedMs = Math.floor(args.elapsedMs);
     let message: string | undefined;
@@ -1831,7 +4592,7 @@ async function runIdleHandler(args: {
       }
     }
     if (!message) return [];
-    if (args.onStreamText) {
+    if (args.onStreamText && !args.signal?.aborted) {
       args.onStreamText(`${message}\n`);
     } else {
       logger.log(message);
@@ -1866,11 +4627,16 @@ async function maybeHandleError(args: {
     workspacePermissionsBaseDir?: string;
     sessionPermissions?: PermissionDeclarationInput;
     sessionPermissionsBaseDir?: string;
+    runDeadlineMs: number;
+    workerSandbox: boolean;
+    signal?: AbortSignal;
+    onTool?: RunOptions["onTool"];
   };
   action: { name: string; path: string; label?: string; description?: string };
 }): Promise<ToolCallResult | undefined> {
   const handlerPath = args.ctx.parentDeck.handlers?.onError?.path;
   if (!handlerPath) return undefined;
+  ensureRunActive(args.ctx.runDeadlineMs, args.ctx.signal);
 
   const message = args.err instanceof Error
     ? args.err.message
@@ -1909,6 +4675,10 @@ async function maybeHandleError(args: {
       workspacePermissionsBaseDir: args.ctx.workspacePermissionsBaseDir,
       sessionPermissions: args.ctx.sessionPermissions,
       sessionPermissionsBaseDir: args.ctx.sessionPermissionsBaseDir,
+      runDeadlineMs: args.ctx.runDeadlineMs,
+      workerSandbox: args.ctx.workerSandbox,
+      signal: args.ctx.signal,
+      onTool: args.ctx.onTool,
     });
 
     const parsed = typeof handlerOutput === "object" && handlerOutput !== null
@@ -2037,8 +4807,195 @@ function sanitizeMessage(msg: ModelMessage): ModelMessage {
   return { ...msg, tool_calls: toolCalls };
 }
 
-async function buildToolDefs(deck: LoadedDeck): Promise<Array<ToolDefinition>> {
+function toStringArray(value: unknown): Array<string> {
+  if (!Array.isArray(value)) return [];
+  return value.filter((entry): entry is string => typeof entry === "string");
+}
+
+function resolveToolPath(baseDir: string, rawPath: unknown): string {
+  if (typeof rawPath !== "string" || rawPath.trim().length === 0) {
+    throw new Error("path is required");
+  }
+  return path.resolve(baseDir, rawPath);
+}
+
+function parseLineRange(args: Record<string, unknown>): {
+  startLine: number;
+  endLine: number;
+} {
+  const startLine = Number.isInteger(args.start_line)
+    ? Math.max(1, Number(args.start_line))
+    : 1;
+  const endLine = Number.isInteger(args.end_line)
+    ? Math.max(startLine, Number(args.end_line))
+    : startLine + 399;
+  return { startLine, endLine };
+}
+
+function parseToolLimit(value: unknown, fallback: number, max: number): number {
+  if (!Number.isInteger(value)) return fallback;
+  return Math.min(max, Math.max(1, Number(value)));
+}
+
+function hasAnyScope(scope: { all: boolean; values: Set<string> }): boolean {
+  return scope.all || scope.values.size > 0;
+}
+
+function hasAnyRunScope(
+  scope: { all: boolean; paths: Set<string>; commands: Set<string> },
+): boolean {
+  return scope.all || scope.paths.size > 0 || scope.commands.size > 0;
+}
+
+function isBuiltinTool(name: string): boolean {
+  return BUILTIN_TOOL_NAMES.has(name);
+}
+
+function applySimplePatch(
+  content: string,
+  edits: Array<{ oldText: string; newText: string; replaceAll?: boolean }>,
+): { next: string; applied: number } {
+  let next = content;
+  let applied = 0;
+  for (const edit of edits) {
+    const oldText = edit.oldText ?? "";
+    const newText = edit.newText ?? "";
+    if (!oldText) continue;
+    if (edit.replaceAll) {
+      if (!next.includes(oldText)) continue;
+      next = next.split(oldText).join(newText);
+      applied++;
+      continue;
+    }
+    const idx = next.indexOf(oldText);
+    if (idx === -1) continue;
+    next = `${next.slice(0, idx)}${newText}${next.slice(idx + oldText.length)}`;
+    applied++;
+  }
+  return { next, applied };
+}
+
+async function buildToolDefs(
+  deck: LoadedDeck,
+  permissions: NormalizedPermissionSet,
+): Promise<Array<ToolDefinition>> {
   const defs: Array<ToolDefinition> = [];
+  const addBuiltinTools = () => {
+    if (hasAnyScope(permissions.read)) {
+      defs.push(
+        {
+          type: "function",
+          function: {
+            name: BUILTIN_TOOL_READ_FILE,
+            description: "Read a UTF-8 text file.",
+            parameters: {
+              type: "object",
+              properties: {
+                path: { type: "string" },
+                start_line: { type: "number" },
+                end_line: { type: "number" },
+              },
+              required: ["path"],
+              additionalProperties: false,
+            },
+          },
+        },
+        {
+          type: "function",
+          function: {
+            name: BUILTIN_TOOL_LIST_DIR,
+            description: "List directory entries.",
+            parameters: {
+              type: "object",
+              properties: {
+                path: { type: "string" },
+                recursive: { type: "boolean" },
+                max_entries: { type: "number" },
+              },
+              required: ["path"],
+              additionalProperties: false,
+            },
+          },
+        },
+        {
+          type: "function",
+          function: {
+            name: BUILTIN_TOOL_GREP_FILES,
+            description: "Search text files using a regular expression.",
+            parameters: {
+              type: "object",
+              properties: {
+                path: { type: "string" },
+                query: { type: "string" },
+                max_matches: { type: "number" },
+              },
+              required: ["path", "query"],
+              additionalProperties: false,
+            },
+          },
+        },
+      );
+    }
+
+    if (hasAnyScope(permissions.write)) {
+      defs.push({
+        type: "function",
+        function: {
+          name: BUILTIN_TOOL_APPLY_PATCH,
+          description:
+            "Apply text replacements to a file using old/new edit pairs.",
+          parameters: {
+            type: "object",
+            properties: {
+              path: { type: "string" },
+              create_if_missing: { type: "boolean" },
+              edits: {
+                type: "array",
+                items: {
+                  type: "object",
+                  properties: {
+                    old_text: { type: "string" },
+                    new_text: { type: "string" },
+                    replace_all: { type: "boolean" },
+                  },
+                  required: ["old_text", "new_text"],
+                  additionalProperties: false,
+                },
+              },
+            },
+            required: ["path", "edits"],
+            additionalProperties: false,
+          },
+        },
+      });
+    }
+
+    if (hasAnyRunScope(permissions.run)) {
+      defs.push({
+        type: "function",
+        function: {
+          name: BUILTIN_TOOL_EXEC,
+          description: "Run an allowed command with optional args.",
+          parameters: {
+            type: "object",
+            properties: {
+              command: { type: "string" },
+              args: {
+                type: "array",
+                items: { type: "string" },
+              },
+              cwd: { type: "string" },
+              timeout_ms: { type: "number" },
+            },
+            required: ["command"],
+            additionalProperties: false,
+          },
+        },
+      });
+    }
+  };
+
+  addBuiltinTools();
   if (deck.allowEnd) {
     defs.push({
       type: "function",
@@ -2080,9 +5037,17 @@ async function buildToolDefs(deck: LoadedDeck): Promise<Array<ToolDefinition>> {
     });
   }
   for (const action of deck.actionDecks) {
-    const child = await loadDeck(action.path, deck.path);
-    ensureSchemaPresence(child, false);
-    const schema = resolveContextSchema(child)!;
+    if (isBuiltinTool(action.name)) {
+      throw new Error(
+        `Action name ${action.name} conflicts with a built-in tool name`,
+      );
+    }
+    let schema = action.contextSchema;
+    if (!schema) {
+      const child = await loadDeck(action.path, deck.path);
+      ensureSchemaPresence(child, false);
+      schema = resolveContextSchema(child)!;
+    }
     const params = toJsonSchema(schema as never);
     defs.push({
       type: "function",
@@ -2093,5 +5058,24 @@ async function buildToolDefs(deck: LoadedDeck): Promise<Array<ToolDefinition>> {
       },
     });
   }
+  const actionNames = new Set(deck.actionDecks.map((action) => action.name));
+  for (const external of deck.tools) {
+    if (actionNames.has(external.name)) continue;
+    if (isBuiltinTool(external.name)) {
+      throw new Error(
+        `External tool name ${external.name} conflicts with a built-in tool name`,
+      );
+    }
+    defs.push({
+      type: "function",
+      function: {
+        name: external.name,
+        description: external.description,
+        parameters: external.inputSchema
+          ? toJsonSchema(external.inputSchema as never)
+          : { type: "object", additionalProperties: true },
+      },
+    });
+  }
   return defs;
 }
diff --git a/packages/gambit-core/src/runtime_orchestration_worker.ts b/packages/gambit-core/src/runtime_orchestration_worker.ts
new file mode 100644
index 000000000..b79d60b17
--- /dev/null
+++ b/packages/gambit-core/src/runtime_orchestration_worker.ts
@@ -0,0 +1,416 @@
+import { runDeck } from "./runtime.ts";
+import type { SavedState } from "./state.ts";
+import type { NormalizedPermissionSet } from "./permissions.ts";
+import type {
+  CreateResponseResponse,
+  Guardrails,
+  ModelMessage,
+  ModelProvider,
+  ProviderTraceEvent,
+  ResponseEvent,
+  TraceEvent,
+} from "./types.ts";
+
+type WireScope = true | false | Array<string>;
+type WireRunScope = true | false | {
+  paths: Array<string>;
+  commands: Array<string>;
+};
+type WirePermissionSet = {
+  baseDir: string;
+  read: WireScope;
+  write: WireScope;
+  run: WireRunScope;
+  net: WireScope;
+  env: WireScope;
+};
+
+type RunStartMessage = {
+  type: "run.start";
+  bridgeSession: string;
+  completionNonce: string;
+  options: {
+    path: string;
+    input: unknown;
+    inputProvided?: boolean;
+    initialUserMessage?: unknown;
+    isRoot?: boolean;
+    guardrails?: Partial<Guardrails>;
+    depth?: number;
+    parentActionCallId?: string;
+    runId: string;
+    defaultModel?: string;
+    modelOverride?: string;
+    stream?: boolean;
+    state?: SavedState;
+    responsesMode?: boolean;
+    allowRootStringInput?: boolean;
+    runDeadlineMs: number;
+  };
+  permissionCeiling: WirePermissionSet;
+};
+
+type ModelChatResultMessage = {
+  type: "model.chat.result";
+  requestId: string;
+  result: {
+    message: ModelMessage;
+    finishReason: "stop" | "tool_calls" | "length";
+    toolCalls?: Array<{
+      id: string;
+      name: string;
+      args: Record<string, unknown>;
+    }>;
+    updatedState?: SavedState;
+    usage?: {
+      promptTokens: number;
+      completionTokens: number;
+      totalTokens: number;
+    };
+  };
+};
+
+type ModelResponsesResultMessage = {
+  type: "model.responses.result";
+  requestId: string;
+  result: CreateResponseResponse;
+};
+
+type ModelResolveResultMessage = {
+  type: "model.resolveModel.result";
+  requestId: string;
+  result: {
+    model: string;
+    params?: Record<string, unknown>;
+  };
+};
+
+type ModelStreamMessage = {
+  type: "model.chat.stream";
+  requestId: string;
+  chunk: string;
+};
+
+type ModelResponsesEventMessage = {
+  type: "model.responses.event";
+  requestId: string;
+  event: ResponseEvent;
+};
+
+type ModelTraceMessage =
+  | {
+    type: "model.chat.trace";
+    requestId: string;
+    event: ProviderTraceEvent;
+  }
+  | {
+    type: "model.responses.trace";
+    requestId: string;
+    event: ProviderTraceEvent;
+  };
+
+type ModelErrorMessage = {
+  type:
+    | "model.chat.error"
+    | "model.responses.error"
+    | "model.resolveModel.error";
+  requestId: string;
+  error: {
+    source?: string;
+    name?: string;
+    message: string;
+    code?: unknown;
+  };
+};
+
+type ParentMessage =
+  | RunStartMessage
+  | ModelChatResultMessage
+  | ModelResponsesResultMessage
+  | ModelResolveResultMessage
+  | ModelStreamMessage
+  | ModelResponsesEventMessage
+  | ModelTraceMessage
+  | ModelErrorMessage;
+
+type PendingRequest = {
+  kind: "chat" | "responses" | "resolveModel";
+  resolve: (value: unknown) => void;
+  reject: (error: unknown) => void;
+  onStreamText?: (chunk: string) => void;
+  onStreamEvent?: (event: ResponseEvent) => void;
+  onTraceEvent?: (event: ProviderTraceEvent) => void;
+};
+
+const pending = new Map<string, PendingRequest>();
+let activeBridgeSession: string | undefined;
+let activeCompletionNonce: string | undefined;
+let runInFlight = false;
+const bridgePostMessage = self.postMessage.bind(self);
+
+function postBridgeMessage(message: Record<string, unknown>) {
+  if (!activeBridgeSession) {
+    throw new Error("Orchestration bridge session not established");
+  }
+  bridgePostMessage({ ...message, bridgeSession: activeBridgeSession });
+}
+
+function randomId(prefix: string) {
+  const suffix = crypto.randomUUID().replace(/-/g, "").slice(0, 24);
+  return `${prefix}-${suffix}`;
+}
+
+function wireScopeToNormalized(
+  scope: WireScope,
+): { all: boolean; values: Set<string> } {
+  if (scope === true) return { all: true, values: new Set<string>() };
+  if (scope === false) return { all: false, values: new Set<string>() };
+  return { all: false, values: new Set(scope) };
+}
+
+function wireRunToNormalized(
+  scope: WireRunScope,
+): { all: boolean; paths: Set<string>; commands: Set<string> } {
+  if (scope === true) {
+    return {
+      all: true,
+      paths: new Set<string>(),
+      commands: new Set<string>(),
+    };
+  }
+  if (scope === false) {
+    return {
+      all: false,
+      paths: new Set<string>(),
+      commands: new Set<string>(),
+    };
+  }
+  return {
+    all: false,
+    paths: new Set(scope.paths),
+    commands: new Set(scope.commands),
+  };
+}
+
+function fromWirePermissionSet(
+  set: WirePermissionSet,
+): NormalizedPermissionSet {
+  return {
+    baseDir: set.baseDir,
+    read: wireScopeToNormalized(set.read),
+    write: wireScopeToNormalized(set.write),
+    run: wireRunToNormalized(set.run),
+    net: wireScopeToNormalized(set.net),
+    env: wireScopeToNormalized(set.env),
+  };
+}
+
+function workerErrorPayload(err: unknown) {
+  return {
+    source: "worker",
+    name: err instanceof Error ? err.name : undefined,
+    message: err instanceof Error ? err.message : String(err),
+    code: (err as { code?: unknown })?.code,
+  };
+}
+
+const requestModelProvider: ModelProvider = {
+  chat(input) {
+    const requestId = randomId("model-chat");
+    const {
+      onStreamText,
+      onStreamEvent: _onStreamEvent,
+      onTraceEvent,
+      ...wireInput
+    } = input;
+    return new Promise<Awaited<ReturnType<ModelProvider["chat"]>>>(
+      (resolve, reject) => {
+        pending.set(requestId, {
+          kind: "chat",
+          resolve: (value) =>
+            resolve(value as Awaited<ReturnType<ModelProvider["chat"]>>),
+          reject: (error) => reject(error),
+          onStreamText,
+          onTraceEvent,
+        });
+        postBridgeMessage({
+          type: "model.chat.request",
+          requestId,
+          input: wireInput,
+        });
+      },
+    );
+  },
+  responses(input) {
+    const requestId = randomId("model-responses");
+    const { onStreamEvent, onTraceEvent, ...wireInput } = input;
+    return new Promise<CreateResponseResponse>((resolve, reject) => {
+      pending.set(requestId, {
+        kind: "responses",
+        resolve: (value) => resolve(value as CreateResponseResponse),
+        reject: (error) => reject(error),
+        onStreamEvent,
+        onTraceEvent,
+      });
+      postBridgeMessage({
+        type: "model.responses.request",
+        requestId,
+        input: wireInput,
+      });
+    });
+  },
+  resolveModel(input) {
+    const requestId = randomId("model-resolve");
+    return new Promise<{ model: string; params?: Record<string, unknown> }>(
+      (resolve, reject) => {
+        pending.set(requestId, {
+          kind: "resolveModel",
+          resolve: (value) =>
+            resolve(
+              value as { model: string; params?: Record<string, unknown> },
+            ),
+          reject: (error) => reject(error),
+        });
+        postBridgeMessage({
+          type: "model.resolveModel.request",
+          requestId,
+          input,
+        });
+      },
+    );
+  },
+};
+
+async function runOrchestration(msg: RunStartMessage): Promise<unknown> {
+  return await runDeck({
+    path: msg.options.path,
+    input: msg.options.input,
+    inputProvided: msg.options.inputProvided,
+    initialUserMessage: msg.options.initialUserMessage,
+    modelProvider: requestModelProvider,
+    isRoot: msg.options.isRoot,
+    guardrails: msg.options.guardrails,
+    depth: msg.options.depth,
+    parentActionCallId: msg.options.parentActionCallId,
+    runId: msg.options.runId,
+    defaultModel: msg.options.defaultModel,
+    modelOverride: msg.options.modelOverride,
+    trace: (event: TraceEvent) => {
+      postBridgeMessage({ type: "trace.event", event });
+    },
+    stream: msg.options.stream,
+    state: msg.options.state,
+    onStateUpdate: (state: SavedState) => {
+      postBridgeMessage({ type: "state.update", state });
+    },
+    onStreamText: (chunk: string) => {
+      postBridgeMessage({ type: "stream.text", chunk });
+    },
+    allowRootStringInput: msg.options.allowRootStringInput,
+    responsesMode: msg.options.responsesMode,
+    parentPermissions: fromWirePermissionSet(msg.permissionCeiling),
+    runDeadlineMs: msg.options.runDeadlineMs,
+    // Keep sandboxing enabled for nested runs so child compute decks are
+    // executed with narrowed OS permissions derived from effective ceilings.
+    workerSandbox: true,
+    inOrchestrationWorker: true,
+  });
+}
+
+self.addEventListener("message", (event: MessageEvent<ParentMessage>) => {
+  const data = event.data;
+  if (!data || typeof data !== "object") return;
+
+  if (data.type === "run.start") {
+    if (runInFlight) return;
+    if (typeof data.bridgeSession !== "string" || !data.bridgeSession) return;
+    if (typeof data.completionNonce !== "string" || !data.completionNonce) {
+      return;
+    }
+    activeBridgeSession = data.bridgeSession;
+    activeCompletionNonce = data.completionNonce;
+    runInFlight = true;
+    runOrchestration(data).then(
+      (result) => {
+        postBridgeMessage({
+          type: "run.result",
+          result,
+          completionNonce: activeCompletionNonce,
+        });
+        runInFlight = false;
+      },
+      (err) => {
+        postBridgeMessage({
+          type: "run.error",
+          error: workerErrorPayload(err),
+          completionNonce: activeCompletionNonce,
+        });
+        runInFlight = false;
+      },
+    );
+    return;
+  }
+
+  if (data.type === "model.chat.stream") {
+    const req = pending.get(data.requestId);
+    if (!req || req.kind !== "chat") return;
+    req.onStreamText?.(data.chunk);
+    return;
+  }
+
+  if (data.type === "model.responses.event") {
+    const req = pending.get(data.requestId);
+    if (!req || req.kind !== "responses") return;
+    req.onStreamEvent?.(data.event);
+    return;
+  }
+
+  if (data.type === "model.chat.trace") {
+    const req = pending.get(data.requestId);
+    if (!req || req.kind !== "chat") return;
+    req.onTraceEvent?.(data.event);
+    return;
+  }
+
+  if (data.type === "model.responses.trace") {
+    const req = pending.get(data.requestId);
+    if (!req || req.kind !== "responses") return;
+    req.onTraceEvent?.(data.event);
+    return;
+  }
+
+  if (data.type === "model.chat.result") {
+    const req = pending.get(data.requestId);
+    if (!req || req.kind !== "chat") return;
+    pending.delete(data.requestId);
+    req.resolve(data.result);
+    return;
+  }
+
+  if (data.type === "model.responses.result") {
+    const req = pending.get(data.requestId);
+    if (!req || req.kind !== "responses") return;
+    pending.delete(data.requestId);
+    req.resolve(data.result);
+    return;
+  }
+
+  if (data.type === "model.resolveModel.result") {
+    const req = pending.get(data.requestId);
+    if (!req || req.kind !== "resolveModel") return;
+    pending.delete(data.requestId);
+    req.resolve(data.result);
+    return;
+  }
+
+  if (
+    data.type === "model.chat.error" || data.type === "model.responses.error" ||
+    data.type === "model.resolveModel.error"
+  ) {
+    const req = pending.get(data.requestId);
+    if (!req) return;
+    pending.delete(data.requestId);
+    req.reject(new Error(data.error.message));
+    return;
+  }
+});
diff --git a/packages/gambit-core/src/runtime_worker.ts b/packages/gambit-core/src/runtime_worker.ts
new file mode 100644
index 000000000..4a4f8e24b
--- /dev/null
+++ b/packages/gambit-core/src/runtime_worker.ts
@@ -0,0 +1,397 @@
+import * as path from "@std/path";
+import { loadDeck } from "./loader.ts";
+import type { SavedState } from "./state.ts";
+import type { PermissionDeclarationInput } from "./permissions.ts";
+import { assertZodSchema, validateWithSchema } from "./schema.ts";
+import type { ExecutionContext, Guardrails, LoadedDeck } from "./types.ts";
+
+type WireScope = true | false | Array<string>;
+type WireRunScope = true | false | {
+  paths: Array<string>;
+  commands: Array<string>;
+};
+type WirePermissionSet = {
+  baseDir: string;
+  read: WireScope;
+  write: WireScope;
+  run: WireRunScope;
+  net: WireScope;
+  env: WireScope;
+};
+
+type RunStartMessage = {
+  type: "run.start";
+  bridgeSession: string;
+  completionNonce: string;
+  runId: string;
+  actionCallId: string;
+  deckPath: string;
+  input: unknown;
+  state?: SavedState;
+  initialUserMessage?: unknown;
+  depth: number;
+  parentActionCallId?: string;
+  permissions: WirePermissionSet;
+  workspacePermissions?: PermissionDeclarationInput;
+  workspacePermissionsBaseDir?: string;
+  sessionPermissions?: PermissionDeclarationInput;
+  sessionPermissionsBaseDir?: string;
+  runDeadlineMs: number;
+  isRoot: boolean;
+  allowRootStringInput: boolean;
+};
+
+type DeckInspectStartMessage = {
+  type: "deck.inspect";
+  bridgeSession: string;
+  deckPath: string;
+};
+
+type WorkerDeckInspection = {
+  deckPath: string;
+  hasModelParams: boolean;
+  permissions?: PermissionDeclarationInput;
+  guardrails?: Partial<Guardrails>;
+};
+
+type SpawnResultMessage = {
+  type: "spawn.result";
+  requestId: string;
+  result: unknown;
+};
+
+type SpawnErrorMessage = {
+  type: "spawn.error";
+  requestId: string;
+  error: {
+    source?: string;
+    name?: string;
+    message: string;
+    code?: unknown;
+  };
+};
+
+type ParentMessage =
+  | RunStartMessage
+  | DeckInspectStartMessage
+  | SpawnResultMessage
+  | SpawnErrorMessage;
+
+const logger = console;
+
+function randomId(prefix: string) {
+  const suffix = crypto.randomUUID().replace(/-/g, "").slice(0, 24);
+  return `${prefix}-${suffix}`;
+}
+
+function ensureNotExpired(deadlineMs: number) {
+  if (performance.now() > deadlineMs) {
+    throw new Error("Timeout exceeded");
+  }
+}
+
+function workerErrorPayload(err: unknown) {
+  return {
+    source: "worker",
+    name: err instanceof Error ? err.name : undefined,
+    message: err instanceof Error ? err.message : String(err),
+    code: (err as { code?: unknown })?.code,
+  };
+}
+
+function resolveContextSchema(deck: LoadedDeck) {
+  return deck.contextSchema ?? deck.inputSchema;
+}
+
+function resolveResponseSchema(deck: LoadedDeck) {
+  return deck.responseSchema ?? deck.outputSchema;
+}
+
+function ensureSchemaPresence(deck: LoadedDeck, isRoot: boolean) {
+  if (!isRoot) {
+    const contextSchema = resolveContextSchema(deck);
+    const responseSchema = resolveResponseSchema(deck);
+    if (!contextSchema || !responseSchema) {
+      throw new Error(
+        `Deck ${deck.path} must declare contextSchema and responseSchema (non-root)`,
+      );
+    }
+    assertZodSchema(contextSchema, "contextSchema");
+    assertZodSchema(responseSchema, "responseSchema");
+  }
+}
+
+function validateInput(
+  deck: LoadedDeck,
+  input: unknown,
+  isRoot: boolean,
+  allowRootStringInput: boolean,
+) {
+  const contextSchema = resolveContextSchema(deck);
+  if (contextSchema) {
+    if (isRoot && typeof input === "string" && allowRootStringInput) {
+      try {
+        return validateWithSchema(contextSchema as never, input);
+      } catch {
+        return input;
+      }
+    }
+    return validateWithSchema(contextSchema as never, input);
+  }
+  if (isRoot) {
+    if (input === undefined) return "";
+    if (typeof input === "string") return input;
+    return input;
+  }
+  throw new Error(`Deck ${deck.path} requires contextSchema (non-root)`);
+}
+
+function validateOutput(
+  deck: LoadedDeck,
+  output: unknown,
+  isRoot: boolean,
+): unknown {
+  const responseSchema = resolveResponseSchema(deck);
+  if (responseSchema) {
+    return validateWithSchema(responseSchema as never, output);
+  }
+  if (isRoot) {
+    if (typeof output === "string") return output;
+    return JSON.stringify(output);
+  }
+  throw new Error(`Deck ${deck.path} requires responseSchema (non-root)`);
+}
+
+type PendingRequest = {
+  resolve: (value: unknown) => void;
+  reject: (error: unknown) => void;
+};
+
+const pending = new Map<string, PendingRequest>();
+let activeBridgeSession: string | undefined;
+let activeCompletionNonce: string | undefined;
+let runInFlight = false;
+let inspectInFlight = false;
+const bridgePostMessage = self.postMessage.bind(self);
+
+function postBridgeMessage(message: Record<string, unknown>) {
+  if (!activeBridgeSession) {
+    throw new Error("Worker bridge session not established");
+  }
+  bridgePostMessage({ ...message, bridgeSession: activeBridgeSession });
+}
+
+async function inspectDeck(deckPath: string): Promise<WorkerDeckInspection> {
+  const deck = await loadDeck(deckPath);
+  return {
+    deckPath: deck.path,
+    hasModelParams: Boolean(
+      deck.modelParams?.model || deck.modelParams?.temperature !== undefined,
+    ),
+    permissions: deck.permissions,
+    guardrails: deck.guardrails,
+  };
+}
+
+async function runCompute(msg: RunStartMessage) {
+  ensureNotExpired(msg.runDeadlineMs);
+  const deck = await loadDeck(msg.deckPath);
+  ensureSchemaPresence(deck, msg.isRoot);
+  const validatedInput = validateInput(
+    deck,
+    msg.input,
+    msg.isRoot,
+    msg.allowRootStringInput,
+  );
+
+  if (!deck.executor) {
+    throw new Error(
+      `Deck ${deck.path} has no model and no executor (add run or execute to the deck definition)`,
+    );
+  }
+
+  let computeState = msg.state
+    ? {
+      ...msg.state,
+      messages: Array.isArray(msg.state.messages)
+        ? msg.state.messages.map((entry) => ({
+          ...entry,
+          content: entry.content ?? null,
+        }))
+        : [],
+      meta: msg.state.meta ? { ...msg.state.meta } : undefined,
+      messageRefs: Array.isArray(msg.state.messageRefs)
+        ? [...msg.state.messageRefs]
+        : undefined,
+    }
+    : undefined;
+
+  const ensureComputeState = (): SavedState => {
+    if (computeState) return computeState;
+    computeState = {
+      runId: msg.runId,
+      messages: [],
+      meta: {},
+      messageRefs: [],
+    };
+    return computeState;
+  };
+
+  const publishComputeState = () => {
+    if (!computeState) return;
+    postBridgeMessage({ type: "state.update", state: computeState });
+  };
+
+  const execContext: ExecutionContext = {
+    runId: msg.runId,
+    actionCallId: msg.actionCallId,
+    parentActionCallId: msg.parentActionCallId,
+    depth: msg.depth,
+    input: validatedInput,
+    initialUserMessage: msg.initialUserMessage,
+    getSessionMeta: <T = unknown>(key: string): T | undefined => {
+      if (!key) return undefined;
+      return computeState?.meta?.[key] as T | undefined;
+    },
+    setSessionMeta: (key, value) => {
+      if (!key) return;
+      const state = ensureComputeState();
+      const nextMeta = { ...(state.meta ?? {}) };
+      if (value === undefined) {
+        delete nextMeta[key];
+      } else {
+        nextMeta[key] = value;
+      }
+      state.meta = nextMeta;
+      publishComputeState();
+    },
+    appendMessage: (message) => {
+      const role = message.role;
+      const content = String(message.content ?? "").trim();
+      if ((role !== "user" && role !== "assistant") || !content) return;
+      const state = ensureComputeState();
+      state.messages = [...(state.messages ?? []), { role, content }];
+      const refs = Array.isArray(state.messageRefs)
+        ? [...state.messageRefs]
+        : [];
+      refs.push({ id: randomId("msg"), role });
+      state.messageRefs = refs;
+      publishComputeState();
+    },
+    label: deck.label,
+    log: (entry) => {
+      postBridgeMessage({ type: "log.entry", entry });
+    },
+    spawnAndWait: async (opts) => {
+      ensureNotExpired(msg.runDeadlineMs);
+      const childPath = path.isAbsolute(opts.path)
+        ? opts.path
+        : path.resolve(path.dirname(deck.path), opts.path);
+      const requestId = randomId("spawn");
+      const childPromise = new Promise<unknown>((resolve, reject) => {
+        pending.set(requestId, { resolve, reject });
+      });
+      postBridgeMessage({
+        type: "spawn.request",
+        requestId,
+        payload: {
+          path: childPath,
+          input: opts.input,
+          initialUserMessage: Object.hasOwn(opts, "initialUserMessage")
+            ? opts.initialUserMessage
+            : msg.initialUserMessage,
+          parentActionCallId: msg.actionCallId,
+          parentPermissionsBaseDir: msg.permissions.baseDir,
+          parentPermissions: msg.permissions,
+          workspacePermissions: msg.workspacePermissions,
+          workspacePermissionsBaseDir: msg.workspacePermissionsBaseDir,
+          sessionPermissions: msg.sessionPermissions,
+          sessionPermissionsBaseDir: msg.sessionPermissionsBaseDir,
+          runDeadlineMs: msg.runDeadlineMs,
+        },
+      });
+      const result = await childPromise;
+      ensureNotExpired(msg.runDeadlineMs);
+      return result;
+    },
+    fail: (opts) => {
+      throw new Error(opts.message);
+    },
+    return: (payload) => Promise.resolve(payload),
+  };
+
+  const raw = await deck.executor(execContext);
+  ensureNotExpired(msg.runDeadlineMs);
+  return validateOutput(deck, raw, msg.isRoot);
+}
+
+self.addEventListener("message", (event: MessageEvent<ParentMessage>) => {
+  const data = event.data;
+  if (!data || typeof data !== "object") return;
+
+  if (data.type === "spawn.result") {
+    const pendingRequest = pending.get(data.requestId);
+    if (!pendingRequest) return;
+    pending.delete(data.requestId);
+    pendingRequest.resolve(data.result);
+    return;
+  }
+
+  if (data.type === "spawn.error") {
+    const pendingRequest = pending.get(data.requestId);
+    if (!pendingRequest) return;
+    pending.delete(data.requestId);
+    pendingRequest.reject(new Error(data.error.message));
+    return;
+  }
+
+  if (data.type === "deck.inspect") {
+    if (inspectInFlight) return;
+    if (typeof data.bridgeSession !== "string" || !data.bridgeSession) return;
+    activeBridgeSession = data.bridgeSession;
+    inspectInFlight = true;
+    inspectDeck(data.deckPath).then(
+      (result) => {
+        postBridgeMessage({ type: "deck.inspect.result", result });
+        inspectInFlight = false;
+      },
+      (err) => {
+        logger.error("[gambit-worker] deck inspection failed", err);
+        postBridgeMessage({
+          type: "deck.inspect.error",
+          error: workerErrorPayload(err),
+        });
+        inspectInFlight = false;
+      },
+    );
+    return;
+  }
+
+  if (data.type !== "run.start") return;
+  if (runInFlight) return;
+  if (typeof data.bridgeSession !== "string" || !data.bridgeSession) return;
+  if (typeof data.completionNonce !== "string" || !data.completionNonce) return;
+  activeBridgeSession = data.bridgeSession;
+  activeCompletionNonce = data.completionNonce;
+  runInFlight = true;
+
+  runCompute(data).then(
+    (result) => {
+      postBridgeMessage({
+        type: "run.result",
+        result,
+        completionNonce: activeCompletionNonce,
+      });
+      runInFlight = false;
+    },
+    (err) => {
+      logger.error("[gambit-worker] compute execution failed", err);
+      postBridgeMessage({
+        type: "run.error",
+        error: workerErrorPayload(err),
+        completionNonce: activeCompletionNonce,
+      });
+      runInFlight = false;
+    },
+  );
+});
diff --git a/packages/gambit-core/src/state.ts b/packages/gambit-core/src/state.ts
index f3a2dbbff..522a58aeb 100644
--- a/packages/gambit-core/src/state.ts
+++ b/packages/gambit-core/src/state.ts
@@ -17,6 +17,7 @@ export type SavedState = {
 export type MessageRef = {
   id: string;
   role: ModelMessage["role"];
+  source?: "scenario" | "manual";
 };
 
 export type FeedbackEntry = {
diff --git a/packages/gambit-core/src/types.ts b/packages/gambit-core/src/types.ts
index 614060666..f085c9576 100644
--- a/packages/gambit-core/src/types.ts
+++ b/packages/gambit-core/src/types.ts
@@ -31,11 +31,11 @@ export type ModelParams = {
   frequency_penalty?: number;
   presence_penalty?: number;
   max_tokens?: number;
-  /**
-   * Provider-specific pass-through parameters. Values must be JSON-serializable.
-   * Top-level supported fields take precedence when keys overlap.
-   */
-  additionalParams?: Record<string, JSONValue>;
+  verbosity?: "low" | "medium" | "high";
+  reasoning?: {
+    effort?: "none" | "low" | "medium" | "high" | "xhigh";
+    summary?: "concise" | "detailed" | "auto";
+  };
 };
 
 export type Guardrails = {
@@ -56,6 +56,15 @@ export type DeckReferenceDefinition = {
 
 export type ActionDeckDefinition = DeckReferenceDefinition & {
   name: string;
+  execute?: string;
+  contextSchema?: ZodTypeAny;
+  responseSchema?: ZodTypeAny;
+};
+
+export type ExternalToolDefinition = {
+  name: string;
+  description?: string;
+  inputSchema?: ZodTypeAny;
 };
 
 export type TestDeckDefinition = DeckReferenceDefinition;
@@ -116,6 +125,7 @@ export type BaseDefinition = {
 export type DeckDefinition<Input = unknown> = BaseDefinition & {
   kind: "gambit.deck";
   modelParams?: ModelParams;
+  tools?: ReadonlyArray<ExternalToolDefinition>;
   handlers?: HandlersConfig;
   prompt?: string; // deprecated; prefer body
   body?: string;
@@ -162,8 +172,18 @@ export type ExecutionContext<Input = unknown> = {
   depth: number;
   label?: Label;
   input: Input;
+  initialUserMessage?: unknown;
+  getSessionMeta: <T = unknown>(key: string) => T | undefined;
+  setSessionMeta: (key: string, value: unknown) => void;
+  appendMessage: (
+    message: { role: "user" | "assistant"; content: string },
+  ) => void;
   log: (entry: LogEntry | string) => void;
-  spawnAndWait: (opts: { path: string; input: unknown }) => Promise<unknown>;
+  spawnAndWait: (opts: {
+    path: string;
+    input: unknown;
+    initialUserMessage?: unknown;
+  }) => Promise<unknown>;
   fail: (
     opts: { message: string; code?: string; details?: JSONValue },
   ) => never;
@@ -199,7 +219,9 @@ export type ToolDefinition = {
 
 export type ResponseTextContent =
   | { type: "input_text"; text: string }
-  | { type: "output_text"; text: string };
+  | { type: "output_text"; text: string }
+  | { type: "summary_text"; text: string }
+  | { type: "reasoning_text"; text: string };
 
 export type ResponseMessageItem = {
   type: "message";
@@ -223,10 +245,19 @@ export type ResponseFunctionCallOutputItem = {
   id?: string;
 };
 
+export type ResponseReasoningItem = {
+  type: "reasoning";
+  id?: string;
+  content?: Array<ResponseTextContent>;
+  summary: Array<ResponseTextContent>;
+  encrypted_content?: string | null;
+};
+
 export type ResponseItem =
   | ResponseMessageItem
   | ResponseFunctionCallItem
-  | ResponseFunctionCallOutputItem;
+  | ResponseFunctionCallOutputItem
+  | ResponseReasoningItem;
 
 export type ResponseToolDefinition = {
   type: "function";
@@ -238,9 +269,37 @@ export type ResponseToolDefinition = {
 };
 
 export type ResponseToolChoice =
+  | "none"
   | "auto"
   | "required"
-  | { type: "function"; function: { name: string } };
+  | { type: "function"; function: { name: string } }
+  | {
+    type: "allowed_tools";
+    tools: Array<{ type: "function"; name: string }>;
+    mode?: "none" | "auto" | "required";
+  };
+
+export type ResponseReasoningConfig = {
+  effort?: "none" | "low" | "medium" | "high" | "xhigh" | null;
+  summary?: "auto" | "concise" | "detailed" | null;
+};
+
+export type ResponseTextConfig = {
+  format?:
+    | { type: "text" }
+    | { type: "json_object" }
+    | {
+      type: "json_schema";
+      name?: string;
+      description?: string | null;
+      schema?: JSONValue | null;
+      strict?: boolean;
+    }
+    | null;
+  verbosity?: "low" | "medium" | "high";
+};
+
+export type ResponseAllowedTool = { type: "function"; name: string };
 
 export type CreateResponseRequest = {
   model: string;
@@ -248,9 +307,30 @@ export type CreateResponseRequest = {
   instructions?: string;
   tools?: Array<ResponseToolDefinition>;
   tool_choice?: ResponseToolChoice;
+  allowed_tools?: Array<ResponseAllowedTool>;
+  previous_response_id?: string;
+  store?: boolean;
+  reasoning?: ResponseReasoningConfig;
+  parallel_tool_calls?: boolean;
+  max_tool_calls?: number;
+  temperature?: number;
+  top_p?: number;
+  frequency_penalty?: number;
+  presence_penalty?: number;
   stream?: boolean;
+  stream_options?: {
+    include_obfuscation?: boolean;
+  };
+  background?: boolean;
   max_output_tokens?: number;
+  top_logprobs?: number;
+  truncation?: "auto" | "disabled";
+  text?: ResponseTextConfig;
+  service_tier?: "auto" | "default" | "flex" | "priority";
+  include?: Array<string>;
   metadata?: Record<string, JSONValue>;
+  safety_identifier?: string;
+  prompt_cache_key?: string;
   params?: Record<string, unknown>;
 };
 
@@ -258,51 +338,163 @@ export type ResponseUsage = {
   promptTokens: number;
   completionTokens: number;
   totalTokens: number;
+  reasoningTokens?: number;
 };
 
 export type CreateResponseResponse = {
   id: string;
   object: "response";
   model?: string;
+  created_at?: number;
+  completed_at?: number | null;
+  previous_response_id?: string | null;
+  instructions?: string | null;
+  reasoning?: ResponseReasoningConfig | null;
   created?: number;
   status?: "completed" | "in_progress" | "failed";
   output: Array<ResponseItem>;
+  tools?: Array<ResponseToolDefinition>;
+  tool_choice?: ResponseToolChoice;
+  parallel_tool_calls?: boolean;
+  truncation?: "auto" | "disabled";
+  text?: ResponseTextConfig;
+  top_p?: number;
+  presence_penalty?: number;
+  frequency_penalty?: number;
+  top_logprobs?: number;
+  temperature?: number;
+  max_output_tokens?: number | null;
+  max_tool_calls?: number | null;
+  store?: boolean;
+  background?: boolean;
+  service_tier?: "auto" | "default" | "flex" | "priority";
+  metadata?: Record<string, JSONValue>;
+  safety_identifier?: string | null;
+  prompt_cache_key?: string | null;
   usage?: ResponseUsage;
-  error?: { code?: string; message?: string };
+  error?: { code?: string; message?: string } | null;
+  updatedState?: SavedState;
 };
 
 export type ResponseEvent =
-  | { type: "response.created"; response: CreateResponseResponse }
+  | {
+    type: "response.created";
+    response: CreateResponseResponse;
+    sequence_number?: number;
+  }
+  | {
+    type: "tool.call";
+    actionCallId: string;
+    name: string;
+    args?: JSONValue;
+  }
+  | {
+    type: "tool.result";
+    actionCallId: string;
+    name: string;
+    result?: JSONValue;
+  }
   | {
     type: "response.output_text.delta";
     output_index: number;
     delta: string;
     item_id?: string;
+    content_index?: number;
+    sequence_number?: number;
+    logprobs?: Array<{
+      token?: string;
+      logprob?: number;
+    }>;
   }
   | {
     type: "response.output_text.done";
     output_index: number;
     text: string;
     item_id?: string;
+    content_index?: number;
+    sequence_number?: number;
   }
   | {
     type: "response.output_item.added";
     output_index: number;
     item: ResponseItem;
+    sequence_number?: number;
   }
   | {
     type: "response.output_item.done";
     output_index: number;
     item: ResponseItem;
+    sequence_number?: number;
+  }
+  | {
+    type: "response.reasoning.delta";
+    output_index: number;
+    item_id: string;
+    content_index: number;
+    delta: string;
+    sequence_number?: number;
+    obfuscation?: string;
+  }
+  | {
+    type: "response.reasoning.done";
+    output_index: number;
+    item_id: string;
+    content_index: number;
+    text: string;
+    sequence_number?: number;
+  }
+  | {
+    type: "response.reasoning_summary_text.delta";
+    output_index: number;
+    item_id: string;
+    summary_index: number;
+    delta: string;
+    sequence_number?: number;
+    obfuscation?: string;
+  }
+  | {
+    type: "response.reasoning_summary_text.done";
+    output_index: number;
+    item_id: string;
+    summary_index: number;
+    text: string;
+    sequence_number?: number;
+  }
+  | {
+    type: "response.reasoning_summary_part.added";
+    output_index: number;
+    item_id: string;
+    summary_index: number;
+    part: ResponseTextContent;
+    sequence_number?: number;
+  }
+  | {
+    type: "response.reasoning_summary_part.done";
+    output_index: number;
+    item_id: string;
+    summary_index: number;
+    part: ResponseTextContent;
+    sequence_number?: number;
+  }
+  | {
+    type: "response.completed";
+    response: CreateResponseResponse;
+    sequence_number?: number;
   }
-  | { type: "response.completed"; response: CreateResponseResponse }
-  | { type: "response.failed"; error: { code?: string; message?: string } };
+  | {
+    type: "response.failed";
+    error: { code?: string; message?: string };
+    sequence_number?: number;
+  };
 
 export type ModelProvider = {
   responses?: (input: {
     request: CreateResponseRequest;
     state?: SavedState;
+    deckPath?: string;
+    signal?: AbortSignal;
     onStreamEvent?: (event: ResponseEvent) => void;
+    onTraceEvent?: (event: ProviderTraceEvent) => void;
   }) => Promise<CreateResponseResponse>;
   resolveModel?: (input: {
     model: string | Array<string>;
@@ -318,7 +510,11 @@ export type ModelProvider = {
     tools?: Array<ToolDefinition>;
     stream?: boolean;
     state?: SavedState;
+    deckPath?: string;
+    signal?: AbortSignal;
     onStreamText?: (chunk: string) => void;
+    onStreamEvent?: (event: Record<string, JSONValue>) => void;
+    onTraceEvent?: (event: ProviderTraceEvent) => void;
     /**
      * Provider-specific pass-through parameters (e.g. OpenAI chat completion
      * fields like temperature/max_tokens).
@@ -337,10 +533,34 @@ export type ModelProvider = {
       promptTokens: number;
       completionTokens: number;
       totalTokens: number;
+      reasoningTokens?: number;
     };
   }>;
 };
 
+export type ProviderTraceEvent =
+  | TraceEvent
+  | (
+    & Omit<
+      Extract<TraceEvent, { type: "tool.call" }>,
+      "runId" | "parentActionCallId"
+    >
+    & {
+      runId?: string;
+      parentActionCallId?: string;
+    }
+  )
+  | (
+    & Omit<
+      Extract<TraceEvent, { type: "tool.result" }>,
+      "runId" | "parentActionCallId"
+    >
+    & {
+      runId?: string;
+      parentActionCallId?: string;
+    }
+  );
+
 type WithDeckRefs<T> = Omit<
   T,
   "actions" | "actionDecks" | "testDecks" | "graderDecks"
@@ -369,6 +589,7 @@ export type LoadedDeck = WithDeckRefs<DeckDefinition> & {
   actions: Array<ActionDeckDefinition>;
   testDecks: Array<TestDeckDefinition>;
   graderDecks: Array<GraderDeckDefinition>;
+  tools: Array<ExternalToolDefinition>;
   executor?: DeckExecutor;
   guardrails?: Partial<Guardrails>;
   inlineEmbeds?: boolean;
@@ -380,6 +601,8 @@ export type ToolCallResult = {
   extraMessages?: Array<ModelMessage>;
 };
 
+export type ToolKind = "action" | "external" | "mcp_bridge" | "internal";
+
 export type TraceEvent =
   & {
     ts?: number;
@@ -440,6 +663,7 @@ export type TraceEvent =
       actionCallId: string;
       name: string;
       args: JSONValue;
+      toolKind: ToolKind;
       parentActionCallId?: string;
     }
     | {
@@ -448,6 +672,7 @@ export type TraceEvent =
       actionCallId: string;
       name: string;
       result: JSONValue;
+      toolKind: ToolKind;
       parentActionCallId?: string;
     }
     | {
@@ -480,10 +705,25 @@ export type TraceEvent =
         args: JSONValue;
       }>;
       stateMessages?: number;
+      usage?: {
+        promptTokens: number;
+        completionTokens: number;
+        totalTokens: number;
+        reasoningTokens?: number;
+      };
       mode?: "chat" | "responses";
       responseItems?: Array<ResponseItem>;
       parentActionCallId?: string;
     }
+    | {
+      type: "model.stream.event";
+      runId: string;
+      actionCallId: string;
+      deckPath?: string;
+      model: string;
+      event: Record<string, JSONValue>;
+      parentActionCallId?: string;
+    }
     | {
       type: "log";
       runId: string;
diff --git a/scaffolds/demo/examples/advanced/agent_with_multi_actions/README.md b/scaffolds/demo/examples/advanced/agent_with_multi_actions/README.md
index 071dc429f..13bc8ae57 100644
--- a/scaffolds/demo/examples/advanced/agent_with_multi_actions/README.md
+++ b/scaffolds/demo/examples/advanced/agent_with_multi_actions/README.md
@@ -30,7 +30,7 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./agent_with_multi_actions.deck.md
   --message '"translate bonjour to English"' --stream
 ```
 
-## Run a test bot (UI)
+## Run a scenario (UI)
 
 1. Open the simulator UI and go to the "Test" tab.
 2. Choose the "Multi-actions test" persona.
@@ -40,5 +40,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./agent_with_multi_actions.deck.md
 
 - Action decks live in `actions/decks/` and their cards live in
   `actions/cards/`.
-- The test bot deck is `tests/agent_with_multi_actions_test.deck.md`.
-- The test bot hangup card is `cards/test_bot_hangup.card.md`.
+- The scenario deck is `tests/agent_with_multi_actions_test.deck.md`.
+- The scenario hangup card is `cards/test_bot_hangup.card.md`.
diff --git a/scaffolds/demo/examples/advanced/agent_with_multi_actions/agent_with_multi_actions.deck.md b/scaffolds/demo/examples/advanced/agent_with_multi_actions/agent_with_multi_actions.deck.md
index d09f68bfe..425451941 100644
--- a/scaffolds/demo/examples/advanced/agent_with_multi_actions/agent_with_multi_actions.deck.md
+++ b/scaffolds/demo/examples/advanced/agent_with_multi_actions/agent_with_multi_actions.deck.md
@@ -5,7 +5,7 @@ label = "agent_with_multi_actions"
 model = "openai/gpt-4o-mini"
 temperature = 0
 [[testDecks]]
-label = "Multi-actions test bot"
+label = "Multi-actions scenario"
 path = "./tests/agent_with_multi_actions_test.deck.md"
 description = "Synthetic user that requests a simple translation."
 [[graderDecks]]
diff --git a/scaffolds/demo/examples/advanced/agent_with_typescript/README.md b/scaffolds/demo/examples/advanced/agent_with_typescript/README.md
index c4d6be110..2624db33d 100644
--- a/scaffolds/demo/examples/advanced/agent_with_typescript/README.md
+++ b/scaffolds/demo/examples/advanced/agent_with_typescript/README.md
@@ -30,7 +30,7 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./agent_with_typescript.deck.md \
   --message '"hi"' --stream
 ```
 
-## Run a test bot (UI)
+## Run a scenario (UI)
 
 1. Open the simulator UI and go to the "Test" tab.
 2. Choose the "Typescript agent test" persona.
@@ -39,5 +39,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./agent_with_typescript.deck.md \
 ## Notes
 
 - The TypeScript action deck is `get_time.deck.ts`.
-- The test bot deck is `tests/agent_with_typescript_test.deck.md`.
-- The test bot hangup card is `cards/test_bot_hangup.card.md`.
+- The scenario deck is `tests/agent_with_typescript_test.deck.md`.
+- The scenario hangup card is `cards/test_bot_hangup.card.md`.
diff --git a/scaffolds/demo/examples/advanced/agent_with_typescript/agent_with_typescript.deck.md b/scaffolds/demo/examples/advanced/agent_with_typescript/agent_with_typescript.deck.md
index 0967cc588..a32465dd6 100644
--- a/scaffolds/demo/examples/advanced/agent_with_typescript/agent_with_typescript.deck.md
+++ b/scaffolds/demo/examples/advanced/agent_with_typescript/agent_with_typescript.deck.md
@@ -6,7 +6,7 @@ name = "get_time"
 path = "./get_time.deck.ts"
 description = "Return the current ISO timestamp."
 [[testDecks]]
-label = "Typescript agent test bot"
+label = "Typescript agent scenario"
 path = "./tests/agent_with_typescript_test.deck.md"
 description = "Synthetic user that asks for the current time."
 [[graderDecks]]
diff --git a/scaffolds/demo/examples/advanced/arena_chatbot/README.md b/scaffolds/demo/examples/advanced/arena_chatbot/README.md
index 7aac8bbcc..4ae1045e9 100644
--- a/scaffolds/demo/examples/advanced/arena_chatbot/README.md
+++ b/scaffolds/demo/examples/advanced/arena_chatbot/README.md
@@ -30,14 +30,14 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./active.deck.md \
   --message '"Tell me one tip about Pikachu"' --stream
 ```
 
-## Run a test bot (UI)
+## Run a scenario (UI)
 
 1. Open the simulator UI and go to the "Test" tab.
 2. Choose the "Arena challenger" persona.
-3. Click "Run test bot".
+3. Click "Run scenario".
 
 ## Notes
 
 - Bot variants live in `bots/`.
-- The test bot deck is `tests/arena_challenger.deck.md`.
-- The test bot hangup card is `cards/test_bot_hangup.card.md`.
+- The scenario deck is `tests/arena_challenger.deck.md`.
+- The scenario hangup card is `cards/test_bot_hangup.card.md`.
diff --git a/scaffolds/demo/examples/advanced/cli_cold_emailer/README.md b/scaffolds/demo/examples/advanced/cli_cold_emailer/README.md
index 9fcd302ec..8dfda1db8 100644
--- a/scaffolds/demo/examples/advanced/cli_cold_emailer/README.md
+++ b/scaffolds/demo/examples/advanced/cli_cold_emailer/README.md
@@ -25,5 +25,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./cold_emailer.deck.md \
 ## Notes
 
 - Cards live in `cards/` and schemas live in `schemas/`.
-- The test bot deck is `tests/buyer_feedback.deck.md`.
-- The test bot hangup card is `cards/test_bot_hangup.card.md`.
+- The scenario deck is `tests/buyer_feedback.deck.md`.
+- The scenario hangup card is `cards/test_bot_hangup.card.md`.
diff --git a/scaffolds/demo/examples/advanced/cli_handlers_md/README.md b/scaffolds/demo/examples/advanced/cli_handlers_md/README.md
index bd7032deb..64a6f4ff3 100644
--- a/scaffolds/demo/examples/advanced/cli_handlers_md/README.md
+++ b/scaffolds/demo/examples/advanced/cli_handlers_md/README.md
@@ -25,5 +25,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./handlers_md.deck.md \
 ## Notes
 
 - Action decks live in `actions/decks/` and handler decks live in `handlers/`.
-- The test bot deck is `tests/handlers_md_test.deck.md`.
-- The test bot hangup card is `cards/test_bot_hangup.card.md`.
+- The scenario deck is `tests/handlers_md_test.deck.md`.
+- The scenario hangup card is `cards/test_bot_hangup.card.md`.
diff --git a/scaffolds/demo/examples/advanced/cli_handlers_ts/README.md b/scaffolds/demo/examples/advanced/cli_handlers_ts/README.md
index 724bdff85..e6abcd0a4 100644
--- a/scaffolds/demo/examples/advanced/cli_handlers_ts/README.md
+++ b/scaffolds/demo/examples/advanced/cli_handlers_ts/README.md
@@ -25,5 +25,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./handlers_ts.deck.md \
 ## Notes
 
 - Action decks live in `actions/decks/` and handler decks live in `handlers/`.
-- The test bot deck is `tests/handlers_ts_test.deck.md`.
-- The test bot hangup card is `cards/test_bot_hangup.card.md`.
+- The scenario deck is `tests/handlers_ts_test.deck.md`.
+- The scenario hangup card is `cards/test_bot_hangup.card.md`.
diff --git a/scaffolds/demo/examples/advanced/cli_internal_monolog/README.md b/scaffolds/demo/examples/advanced/cli_internal_monolog/README.md
index 6f23ec3bb..8e889a986 100644
--- a/scaffolds/demo/examples/advanced/cli_internal_monolog/README.md
+++ b/scaffolds/demo/examples/advanced/cli_internal_monolog/README.md
@@ -25,5 +25,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./internal_monolog_parent.deck.md \
 ## Notes
 
 - The child deck is `monolog_child.deck.md` and schemas live in `schemas/`.
-- The test bot deck is `tests/internal_monolog_test.deck.md`.
-- The test bot hangup card is `cards/test_bot_hangup.card.md`.
+- The scenario deck is `tests/internal_monolog_test.deck.md`.
+- The scenario hangup card is `cards/test_bot_hangup.card.md`.
diff --git a/scaffolds/demo/examples/advanced/patient_swapper/README.md b/scaffolds/demo/examples/advanced/patient_swapper/README.md
index e4253dce2..8953a51ee 100644
--- a/scaffolds/demo/examples/advanced/patient_swapper/README.md
+++ b/scaffolds/demo/examples/advanced/patient_swapper/README.md
@@ -25,5 +25,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./patient_swapper.deck.md \
 ## Notes
 
 - Cards live in `cards/` and schemas live in `schemas/`.
-- The test bot deck is `tests/patient_swapper_test.deck.md`.
-- The test bot hangup card is `cards/test_bot_hangup.card.md`.
+- The scenario deck is `tests/patient_swapper_test.deck.md`.
+- The scenario hangup card is `cards/test_bot_hangup.card.md`.
diff --git a/scaffolds/demo/examples/advanced/pokemon_advice/root.deck.ts b/scaffolds/demo/examples/advanced/pokemon_advice/root.deck.ts
index 1440d121d..d71cd6ec8 100644
--- a/scaffolds/demo/examples/advanced/pokemon_advice/root.deck.ts
+++ b/scaffolds/demo/examples/advanced/pokemon_advice/root.deck.ts
@@ -9,7 +9,7 @@ export default defineDeck({
   modelParams: { model: "openai/gpt-4o-mini", temperature: 0.4 },
   testDecks: [
     {
-      label: "Schema form test bot",
+      label: "Schema form scenario",
       path: "./tests/schema_form_test.deck.md",
       description: "Synthetic caller that asks for Pokemon advice.",
     },
diff --git a/scaffolds/demo/examples/advanced/pokemon_advice/schema_form.deck.ts b/scaffolds/demo/examples/advanced/pokemon_advice/schema_form.deck.ts
index 5d8e76b44..6c6237a44 100644
--- a/scaffolds/demo/examples/advanced/pokemon_advice/schema_form.deck.ts
+++ b/scaffolds/demo/examples/advanced/pokemon_advice/schema_form.deck.ts
@@ -9,7 +9,7 @@ export default defineDeck({
   modelParams: { model: "openai/gpt-4o-mini", temperature: 0.4 },
   testDecks: [
     {
-      label: "Schema form test bot",
+      label: "Schema form scenario",
       path: "./tests/schema_form_test.deck.md",
       description: "Synthetic caller that asks for Pokemon advice.",
     },
diff --git a/scaffolds/demo/examples/advanced/policy_support_bot/README.md b/scaffolds/demo/examples/advanced/policy_support_bot/README.md
index ca3d08a95..1caa9219f 100644
--- a/scaffolds/demo/examples/advanced/policy_support_bot/README.md
+++ b/scaffolds/demo/examples/advanced/policy_support_bot/README.md
@@ -21,10 +21,10 @@ includes:
 | `cards/*.card.md`                   | Persona, user persona, and behavior cards reused by the root deck.                                               |
 | `schemas/*.zod.ts`                  | Zod schemas for bot outputs and search inputs.                                                                   |
 | `tests/faq_dataset.test.ts`         | Deno unit tests that ensure the FAQ knowledge base stays intact.                                                 |
-| `tests/new_account_persona.deck.md` | Synthetic persona deck for the Test Bot tab.                                                                     |
+| `tests/new_account_persona.deck.md` | Synthetic persona deck for the Scenario tab.                                                                     |
 | `demo-script.md`                    | Suggested prompts (answers + refusals) for the Gambit Debug UI.                                                  |
 
-To wire synthetic QA personas into the Test Bot tab, add `[[testDecks]]` entries
+To wire synthetic QA personas into the Scenario tab, add `[[testDecks]]` entries
 to `policy_support_bot.deck.md` that point at persona decks (for example
 `./tests/new_account_persona.deck.md`). Those persona decks should set
 `acceptsUserTurns = true` and can declare an `contextSchema` so the Scenario
diff --git a/scaffolds/demo/examples/advanced/simpsons_explainer/README.md b/scaffolds/demo/examples/advanced/simpsons_explainer/README.md
index b357a1a33..9d865d9e8 100644
--- a/scaffolds/demo/examples/advanced/simpsons_explainer/README.md
+++ b/scaffolds/demo/examples/advanced/simpsons_explainer/README.md
@@ -23,10 +23,10 @@ From this folder:
 deno run -A jsr:@bolt-foundry/gambit@^0.5.3-dev/cli serve demo.deck.md
 ```
 
-## Run a test bot (UI)
+## Run a scenario (UI)
 
 1. Open the simulator UI and go to the "Test" tab.
-2. Click "Run test bot".
+2. Click "Run scenario".
 3. Switch to the "Grade" tab and click "Run grader".
 4. After the grader completes, review run 2 (expect a -3 score).
 5. Share the failing run details with Codex and ask it to fix the prompt.
@@ -34,6 +34,6 @@ deno run -A jsr:@bolt-foundry/gambit@^0.5.3-dev/cli serve demo.deck.md
 
 ## Notes
 
-- Test decks are registered in `cards/test_decks.card.md`.
+- Scenario decks are registered in `cards/test_decks.card.md`.
 - Graders are registered in `cards/grader_decks.card.md`.
 - Instruction cards live in `cards/` and are included by `demo.deck.md`.
diff --git a/scaffolds/demo/examples/advanced/simpsons_explainer/cards/test_decks.card.md b/scaffolds/demo/examples/advanced/simpsons_explainer/cards/test_decks.card.md
index 6b5bc8065..8c9cc4976 100644
--- a/scaffolds/demo/examples/advanced/simpsons_explainer/cards/test_decks.card.md
+++ b/scaffolds/demo/examples/advanced/simpsons_explainer/cards/test_decks.card.md
@@ -4,10 +4,10 @@ label = "demo_test_decks"
 [[testDecks]]
 label = "Planets question"
 path = "../test_bots/planets.deck.md"
-description = "Test bot that asks a basic planets-orbit question, with optional initialQuestion override."
+description = "Scenario that asks a basic planets-orbit question, with optional initialQuestion override."
 
 [[testDecks]]
 label = "Quantum entanglement question"
 path = "../test_bots/quantum_entanglement.deck.md"
-description = "Test bot that asks about quantum entanglement, with optional initialQuestion override."
+description = "Scenario that asks about quantum entanglement, with optional initialQuestion override."
 +++
diff --git a/scaffolds/demo/examples/advanced/simpsons_explainer/schemas/test_bot_input.zod.ts b/scaffolds/demo/examples/advanced/simpsons_explainer/schemas/test_bot_input.zod.ts
index 0bef4deae..38a9efa9c 100644
--- a/scaffolds/demo/examples/advanced/simpsons_explainer/schemas/test_bot_input.zod.ts
+++ b/scaffolds/demo/examples/advanced/simpsons_explainer/schemas/test_bot_input.zod.ts
@@ -2,6 +2,6 @@ import { z } from "npm:zod";
 
 export default z.object({
   initialQuestion: z.string().describe(
-    "Optional override for the test bot's first user question.",
+    "Optional override for the scenario's first user question.",
   ).optional(),
 });
diff --git a/scaffolds/demo/examples/advanced/voice_front_desk/README.md b/scaffolds/demo/examples/advanced/voice_front_desk/README.md
index b3a072c92..1b1646119 100644
--- a/scaffolds/demo/examples/advanced/voice_front_desk/README.md
+++ b/scaffolds/demo/examples/advanced/voice_front_desk/README.md
@@ -31,14 +31,14 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./decks/root.deck.md \
   --message '"Hi, this is Nina. I need to move my physical."' --stream
 ```
 
-## Run a test bot (UI)
+## Run a scenario (UI)
 
 1. Open the simulator UI and go to the "Test" tab.
 2. Pick a persona from the list (for example, "New patient intake").
-3. Click "Run test bot".
+3. Click "Run scenario".
 
 ## Notes
 
 - Root decks live in `decks/` and action decks live in `actions/`.
 - Test personas are listed in `cards/test_decks.card.md`.
-- The test bot hangup card is `tests/cards/test_bot_hangup.card.md`.
+- The scenario hangup card is `tests/cards/test_bot_hangup.card.md`.
diff --git a/scaffolds/demo/examples/advanced/voice_front_desk/tests/faq_first_caller_input.zod.ts b/scaffolds/demo/examples/advanced/voice_front_desk/tests/faq_first_caller_input.zod.ts
index b907d1a32..8bb2a36cf 100644
--- a/scaffolds/demo/examples/advanced/voice_front_desk/tests/faq_first_caller_input.zod.ts
+++ b/scaffolds/demo/examples/advanced/voice_front_desk/tests/faq_first_caller_input.zod.ts
@@ -2,6 +2,6 @@ import { z } from "npm:zod";
 
 export default z.object({
   scenarioDescription: z.string().describe(
-    "Optional instructions that describe the FAQ scenario the test bot should play out",
+    "Optional instructions that describe the FAQ scenario the scenario should play out",
   ).optional(),
 });
diff --git a/scaffolds/demo/examples/advanced/voice_front_desk/tests/privacy_sensitive_patient_input.zod.ts b/scaffolds/demo/examples/advanced/voice_front_desk/tests/privacy_sensitive_patient_input.zod.ts
index e68b2f277..8f66e92e8 100644
--- a/scaffolds/demo/examples/advanced/voice_front_desk/tests/privacy_sensitive_patient_input.zod.ts
+++ b/scaffolds/demo/examples/advanced/voice_front_desk/tests/privacy_sensitive_patient_input.zod.ts
@@ -2,6 +2,6 @@ import { z } from "npm:zod";
 
 export default z.object({
   scenarioDescription: z.string().describe(
-    "Optional instructions that describe the scenario the test bot should play out",
+    "Optional instructions that describe the scenario the scenario should play out",
   ).optional(),
 });
diff --git a/scaffolds/demo/examples/faq-bot-example/faq_dataset.md b/scaffolds/demo/examples/faq-bot-example/faq_dataset.md
index a01206802..1b4450d1c 100644
--- a/scaffolds/demo/examples/faq-bot-example/faq_dataset.md
+++ b/scaffolds/demo/examples/faq-bot-example/faq_dataset.md
@@ -20,10 +20,10 @@ criteria so you can see what passes and fails.
 An action deck is a callable tool that performs a task or fetches data and
 returns structured output.
 
-## What is a test deck?
+## What is a scenario deck?
 
-A test deck simulates a user or scenario so you can run repeatable tests against
-a deck.
+A scenario deck simulates a user or scenario so you can run repeatable tests
+against a deck.
 
 ## What is a card?
 
@@ -40,8 +40,8 @@ Use the Gambit CLI to run a deck from the command line.
 
 ## What modules ship in Gambit?
 
-- Deck editor for building root/action/test decks.
+- Deck editor for building root/action/scenario decks.
 - Debug UI for running conversations and inspecting traces.
-- Test bot panel for scripted personas and graders.
+- Scenario panel for scripted personas and graders.
 - Coverage dashboard for reviewing grader outcomes.
 - Bundle/export tooling to package decks for deployment.
diff --git a/scaffolds/demo/hello.deck.md b/scaffolds/demo/hello.deck.md
index 700f69eb0..166603614 100644
--- a/scaffolds/demo/hello.deck.md
+++ b/scaffolds/demo/hello.deck.md
@@ -2,7 +2,7 @@
 label = "hello"
 modelParams = { model = "openai/gpt-4o-mini", temperature = 0 }
 [[testDecks]]
-label = "Hello test bot"
+label = "Hello scenario"
 path = "./hello.test.deck.md"
 description = "Synthetic user that sends a single greeting input."
 [[graderDecks]]
diff --git a/scaffolds/init/README.md b/scaffolds/init/README.md
index fc66ea749..56a924d41 100644
--- a/scaffolds/init/README.md
+++ b/scaffolds/init/README.md
@@ -31,7 +31,7 @@ workspace with opinionated folders, ready for your own decks/actions/graders.
 - `decks/` – root decks (a starter `root.deck.md` is included for you to edit).
 - `actions/` – reusable tool/action decks or cards.
 - `graders/` – guard rails and grading decks.
-- `tests/` – synthetic personas/test bots.
+- `tests/` – synthetic personas/scenarios.
 - `schemas/` – Zod schemas shared across decks/tests.
 - `.gambit/` – local sessions/traces (safe to clear, usually ignored by git).
 - `gambit.toml` – workspace configuration (folders + model aliases).
diff --git a/scaffolds/init/package.json b/scaffolds/init/package.json
index f86685dd8..b22e8d315 100644
--- a/scaffolds/init/package.json
+++ b/scaffolds/init/package.json
@@ -4,7 +4,7 @@
   "scripts": {
     "repl": "npx @bolt-foundry/gambit repl",
     "serve": "npx @bolt-foundry/gambit serve",
-    "test": "npx @bolt-foundry/gambit test-bot"
+    "test": "npx @bolt-foundry/gambit scenario"
   },
   "dependencies": {}
 }
diff --git a/scaffolds/init/schemas/README.md b/scaffolds/init/schemas/README.md
index ce51eec94..1780b702c 100644
--- a/scaffolds/init/schemas/README.md
+++ b/scaffolds/init/schemas/README.md
@@ -1,7 +1,7 @@
 # Schemas
 
 Keep shared Zod/TypeScript schemas here. Schemas provide typed inputs/outputs
-for decks, actions, graders, and test bots so runs remain predictable.
+for decks, actions, graders, and scenarios so runs remain predictable.
 
 Ideas:
 
diff --git a/scaffolds/init/tests/README.md b/scaffolds/init/tests/README.md
index 2f8ba5b57..e4d296430 100644
--- a/scaffolds/init/tests/README.md
+++ b/scaffolds/init/tests/README.md
@@ -1,7 +1,7 @@
 # Tests
 
-Drop synthetic personas, test bots, or scripted scenarios here. Use them with
-`gambit test-bot` to simulate user conversations and verify decks without real
+Drop synthetic personas or scripted scenarios here. Use them with
+`gambit scenario` to simulate user conversations and verify decks without real
 users.
 
 Typical flow:
@@ -9,5 +9,5 @@ Typical flow:
 1. Write a persona file (e.g. `tests/new_patient.deck.md`) that exercises a
    deck.
 2. Run
-   `npx @bolt-foundry/gambit test-bot decks/<deck> --test-deck tests/<persona>`.
+   `npx @bolt-foundry/gambit scenario decks/<deck> --test-deck tests/<persona>`.
 3. Capture regressions before shipping changes.
diff --git a/simulator-ui/demo/gambit-build-tab-demo-timeline.ts b/simulator-ui/demo/gambit-build-tab-demo-timeline.ts
index 192b8555c..2e8f0f8d2 100644
--- a/simulator-ui/demo/gambit-build-tab-demo-timeline.ts
+++ b/simulator-ui/demo/gambit-build-tab-demo-timeline.ts
@@ -98,22 +98,13 @@ export function buildTabDemoTimeline(opts: {
     { type: "screenshot", label: "03-build-file-policy" },
     {
       type: "wait-for",
-      selector: '[data-testid="build-changes-count"]',
-      text: /[1-9]/,
+      selector: '[data-testid="build-chat-input"]:not([disabled])',
       timeoutMs: 120_000,
     },
     { type: "wait", ms: 500 },
     { type: "screenshot", label: "02-build-start" },
   );
 
-  const beatReviewChanges: DemoTimelineStep[] = [
-    { type: "click", selector: ".build-recent-changes-trigger" },
-    { type: "wait-for", selector: '[data-testid="build-changes-panel"]' },
-    { type: "scroll", selector: '[data-testid="build-changes-panel"]' },
-    { type: "wait", ms: 500 },
-    { type: "screenshot", label: "04-build-recent-changes" },
-  ];
-
   const beatCheckTabs: DemoTimelineStep[] = [];
   beatCheckTabs.push(
     { type: "click", selector: '[data-testid="nav-test"]' },
@@ -157,7 +148,6 @@ export function buildTabDemoTimeline(opts: {
   return [
     ...beatOpenBuild,
     ...beatPrompt,
-    ...beatReviewChanges,
     ...beatCheckTabs,
   ];
 }
diff --git a/simulator-ui/demo/gambit-ui-demo-script.md b/simulator-ui/demo/gambit-ui-demo-script.md
index 41275dcd7..74f48916e 100644
--- a/simulator-ui/demo/gambit-ui-demo-script.md
+++ b/simulator-ui/demo/gambit-ui-demo-script.md
@@ -29,11 +29,11 @@ If you can run it, you can inspect it. That is the core promise.
 
 Start from a local checkout. In `packages/gambit`, run
 `deno run -A src/cli.ts serve init/examples/advanced/voice_front_desk/decks/root.deck.md --port 8000`,
-then open `http://localhost:8000/test-bot`.
+then open `http://localhost:8000/test`.
 
-In Test Bot, select the New patient intake persona. Fill the scenario
-description, caller name, and date of birth. The init form comes from the deck
-input schema, so this run stays reproducible.
+In Test, select the New patient intake persona. Fill the scenario description,
+caller name, and date of birth. The init form comes from the deck input schema,
+so this run stays reproducible.
 
 Click Run and let a few turns stream. We now have a session id that ties
 together the transcript, traces, and feedback.
@@ -45,5 +45,5 @@ reason, and the exact turn context that drove the result.
 Go to Debug and inspect the run. The transcript shows every message, the trace
 pane shows every deck and tool event, and timing is captured along the way.
 
-From here the loop is simple. Edit the deck in code, rerun Test Bot, and regrade
-until the behavior is correct.
+From here the loop is simple. Edit the deck in code, rerun the scenario, and
+regrade until the behavior is correct.
diff --git a/simulator-ui/demo/gambit-ui-demo-timeline.ts b/simulator-ui/demo/gambit-ui-demo-timeline.ts
index 58ba8e0a0..1392d6877 100644
--- a/simulator-ui/demo/gambit-ui-demo-timeline.ts
+++ b/simulator-ui/demo/gambit-ui-demo-timeline.ts
@@ -116,19 +116,19 @@ const beatScenario: DemoTimelineStep[] = [
   {
     type: "voiceover",
     text:
-      "The bot takes a typed input schema, so we’ll fill in a concrete FAQ-first scenario about Sunday hours and let the test bot play it out.",
+      "The bot takes a typed input schema, so we’ll fill in a concrete FAQ-first scenario about Sunday hours and let the scenario play it out.",
     showSubtitles: true,
   },
 ];
 
-/** Beat 4: Run + inspect (Test Bot simulates and produces a reproducible session). */
+/** Beat 4: Run + inspect (Scenario simulates and produces a reproducible session). */
 const beatRunInspect: DemoTimelineStep[] = [
   // Demo note: temporarily drop the tool-backed response guardrail and add a "hallucinate wildly" card
   // to force a FAQ hallucination. Then add a grader to flag it, and restore the guardrail to fix.
   {
     type: "voiceover",
     text:
-      "We’ll simulate a patient call with a test bot and watch the workflow run end to end as it streams.",
+      "We’ll simulate a patient call with a scenario and watch the workflow run end to end as it streams.",
     showSubtitles: true,
   },
   {
@@ -184,7 +184,7 @@ const beatDebug: DemoTimelineStep[] = [
   {
     type: "voiceover",
     text:
-      "Then we’ll run the test bot again and rerun the grader to confirm the fix.",
+      "Then we’ll run the scenario again and rerun the grader to confirm the fix.",
     showSubtitles: true,
   },
   {
diff --git a/simulator-ui/src/BuildChatContext.tsx b/simulator-ui/src/BuildChatContext.tsx
index 694e191e5..838ec4897 100644
--- a/simulator-ui/src/BuildChatContext.tsx
+++ b/simulator-ui/src/BuildChatContext.tsx
@@ -1,70 +1,5 @@
-import React, {
-  createContext,
-  useCallback,
-  useContext,
-  useEffect,
-  useMemo,
-  useRef,
-  useState,
-} from "react";
-import {
-  BUILD_STREAM_ID,
-  type BuildBotSocketMessage,
-  buildDurableStreamUrl,
-  getDurableStreamOffset,
-  setDurableStreamOffset,
-  summarizeToolCalls,
-  type ToolCallSummary,
-  type TraceEvent,
-} from "./utils.ts";
-
-type BuildRun = {
-  id: string;
-  status: "idle" | "running" | "completed" | "error" | "canceled";
-  error?: string;
-  startedAt?: string;
-  finishedAt?: string;
-  messages: Array<{
-    role: string;
-    content: string;
-  }>;
-  traces?: Array<TraceEvent>;
-  toolInserts?: Array<{
-    actionCallId?: string;
-    parentActionCallId?: string;
-    name?: string;
-    index: number;
-  }>;
-};
-
-type BuildChatContextValue = {
-  run: BuildRun;
-  toolCalls: ToolCallSummary[];
-  chatDraft: string;
-  setChatDraft: React.Dispatch<React.SetStateAction<string>>;
-  chatSending: boolean;
-  chatError: string | null;
-  setChatError: React.Dispatch<React.SetStateAction<string | null>>;
-  toolCallsOpen: Record<number, boolean>;
-  setToolCallsOpen: React.Dispatch<
-    React.SetStateAction<Record<number, boolean>>
-  >;
-  optimisticUser: { id: string; text: string } | null;
-  setOptimisticUser: React.Dispatch<
-    React.SetStateAction<{ id: string; text: string } | null>
-  >;
-  streamingAssistant: { runId: string; turn: number; text: string } | null;
-  setStreamingAssistant: React.Dispatch<
-    React.SetStateAction<
-      { runId: string; turn: number; text: string } | null
-    >
-  >;
-  resetChat: () => Promise<void>;
-  sendMessage: (message: string) => Promise<void>;
-  loadChat: (runId: string) => Promise<void>;
-};
-
-const BuildChatContext = createContext<BuildChatContextValue | null>(null);
+import React from "react";
+import { useWorkspaceBuild, WorkspaceProvider } from "./WorkspaceContext.tsx";
 
 export function BuildChatProvider(
   props: {
@@ -73,327 +8,9 @@ export function BuildChatProvider(
     onWorkspaceChange?: (workspaceId: string) => void;
   },
 ) {
-  const { children, workspaceId, onWorkspaceChange } = props;
-  const [run, setRun] = useState<BuildRun>({
-    id: "",
-    status: "idle",
-    messages: [],
-    traces: [],
-    toolInserts: [],
-  });
-  const runIdRef = useRef<string>("");
-
-  const [chatDraft, setChatDraft] = useState("");
-  const [chatSending, setChatSending] = useState(false);
-  const [chatError, setChatError] = useState<string | null>(null);
-  const [toolCallsOpen, setToolCallsOpen] = useState<Record<number, boolean>>(
-    {},
-  );
-  const [optimisticUser, setOptimisticUser] = useState<
-    { id: string; text: string } | null
-  >(null);
-  const [streamingAssistant, setStreamingAssistant] = useState<
-    { runId: string; turn: number; text: string } | null
-  >(null);
-
-  const refreshStatus = useCallback(async (opts?: { workspaceId?: string }) => {
-    const query = opts?.workspaceId
-      ? `?workspaceId=${encodeURIComponent(opts.workspaceId)}`
-      : "";
-    const res = await fetch(`/api/build/status${query}`);
-    const data = await res.json().catch(() => ({})) as { run?: BuildRun };
-    if (data.run) {
-      setRun({
-        ...data.run,
-        messages: data.run.messages ?? [],
-        traces: data.run.traces ?? [],
-        toolInserts: data.run.toolInserts ?? [],
-      });
-      if (typeof data.run.id === "string" && data.run.id) {
-        runIdRef.current = data.run.id;
-      }
-    }
-  }, [onWorkspaceChange]);
-
-  useEffect(() => {
-    if (workspaceId) {
-      runIdRef.current = workspaceId;
-      refreshStatus({ workspaceId }).catch(() => {});
-      return;
-    }
-    refreshStatus().catch(() => {});
-  }, [refreshStatus, workspaceId]);
-
-  useEffect(() => {
-    if (!workspaceId) return;
-    if (runIdRef.current === workspaceId) return;
-    runIdRef.current = workspaceId;
-    setRun((prev) => ({
-      ...prev,
-      id: workspaceId,
-    }));
-    setChatError(null);
-    setStreamingAssistant(null);
-    setOptimisticUser(null);
-    setToolCallsOpen({});
-    refreshStatus({ workspaceId }).catch(() => {});
-  }, [refreshStatus, workspaceId]);
-
-  useEffect(() => {
-    const streamId = BUILD_STREAM_ID;
-    const streamUrl = buildDurableStreamUrl(
-      streamId,
-      getDurableStreamOffset(streamId),
-    );
-    const source = new EventSource(streamUrl);
-
-    source.onmessage = (event) => {
-      let envelope: { offset?: unknown; data?: unknown } | null = null;
-      try {
-        envelope = JSON.parse(event.data) as {
-          offset?: unknown;
-          data?: unknown;
-        };
-      } catch {
-        return;
-      }
-      if (
-        envelope &&
-        typeof envelope.offset === "number" &&
-        Number.isFinite(envelope.offset)
-      ) {
-        setDurableStreamOffset(streamId, envelope.offset + 1);
-      }
-      const msg = envelope?.data as BuildBotSocketMessage | undefined;
-      if (!msg) return;
-      const activeRunId = runIdRef.current;
-      if (msg.type === "buildBotStatus" && msg.run) {
-        if (activeRunId && msg.run.id !== activeRunId) return;
-        setRun({
-          ...msg.run,
-          messages: msg.run.messages ?? [],
-          traces: msg.run.traces ?? [],
-          toolInserts: msg.run.toolInserts ?? [],
-        } as BuildRun);
-        return;
-      }
-      if (msg.type === "buildBotStream") {
-        if (!msg.runId || (activeRunId && msg.runId !== activeRunId)) return;
-        const streamRunId = msg.runId;
-        const turn = typeof msg.turn === "number" ? msg.turn : 0;
-        if (msg.role !== "assistant") return;
-        setStreamingAssistant((prev) =>
-          prev && prev.runId === streamRunId && prev.turn === turn
-            ? { ...prev, text: prev.text + msg.chunk }
-            : { runId: streamRunId, turn, text: msg.chunk }
-        );
-        return;
-      }
-      if (msg.type === "buildBotStreamEnd") {
-        if (!msg.runId || (activeRunId && msg.runId !== activeRunId)) return;
-        const turn = typeof msg.turn === "number" ? msg.turn : 0;
-        setStreamingAssistant((prev) =>
-          prev && prev.runId === msg.runId && prev.turn === turn ? null : prev
-        );
-      }
-    };
-
-    return () => {
-      source.close();
-    };
-  }, []);
-
-  const toolCalls = useMemo(
-    () => summarizeToolCalls(run.traces ?? []),
-    [run.traces],
-  );
-
-  const ensureWorkspaceId = useCallback(async () => {
-    if (workspaceId) return workspaceId;
-    if (runIdRef.current) return runIdRef.current;
-    try {
-      const res = await fetch("/api/workspace/new", {
-        method: "POST",
-      });
-      const data = await res.json().catch(() => ({})) as {
-        workspaceId?: string;
-      };
-      if (res.ok && typeof data.workspaceId === "string") {
-        const nextWorkspaceId = data.workspaceId;
-        runIdRef.current = nextWorkspaceId;
-        setRun((prev) => ({ ...prev, id: nextWorkspaceId }));
-        onWorkspaceChange?.(nextWorkspaceId);
-        return nextWorkspaceId;
-      }
-    } catch {
-      // ignore
-    }
-    const fallback = `workspace-${crypto.randomUUID()}`;
-    runIdRef.current = fallback;
-    setRun((prev) => ({ ...prev, id: fallback }));
-    return fallback;
-  }, [onWorkspaceChange, workspaceId]);
-
-  const resetChat = useCallback(async () => {
-    const res = await fetch("/api/workspace/new", { method: "POST" }).catch(
-      () => null,
-    );
-    const data = res
-      ? await res.json().catch(() => ({})) as { workspaceId?: string }
-      : {};
-    if (res && res.ok && typeof data.workspaceId === "string") {
-      runIdRef.current = data.workspaceId;
-      setRun({
-        id: data.workspaceId,
-        status: "idle",
-        messages: [],
-        traces: [],
-        toolInserts: [],
-      });
-      onWorkspaceChange?.(data.workspaceId);
-    } else {
-      runIdRef.current = "";
-      setRun({
-        id: "",
-        status: "idle",
-        messages: [],
-        traces: [],
-        toolInserts: [],
-      });
-    }
-    setChatDraft("");
-    setChatError(null);
-    setStreamingAssistant(null);
-    setOptimisticUser(null);
-    setToolCallsOpen({});
-  }, [onWorkspaceChange]);
-
-  const sendMessage = useCallback(async (message: string) => {
-    const runId = await ensureWorkspaceId();
-    setChatSending(true);
-    setChatError(null);
-    try {
-      const res = await fetch("/api/build/message", {
-        method: "POST",
-        headers: { "content-type": "application/json" },
-        body: JSON.stringify({ workspaceId: runId, message }),
-      });
-      const data = await res.json().catch(() => ({})) as {
-        run?: BuildRun;
-        error?: string;
-      };
-      if (!res.ok) {
-        throw new Error(
-          typeof data.error === "string" ? data.error : res.statusText,
-        );
-      }
-      if (data.run) {
-        setRun({
-          ...data.run,
-          messages: data.run.messages ?? [],
-          traces: data.run.traces ?? [],
-          toolInserts: data.run.toolInserts ?? [],
-        });
-        if (typeof data.run.id === "string" && data.run.id) {
-          runIdRef.current = data.run.id;
-        }
-      }
-    } finally {
-      setChatSending(false);
-    }
-  }, [ensureWorkspaceId]);
-
-  const loadChat = useCallback(async (runId: string) => {
-    setChatSending(true);
-    setChatError(null);
-    try {
-      const res = await fetch("/api/build/load", {
-        method: "POST",
-        headers: { "content-type": "application/json" },
-        body: JSON.stringify({ workspaceId: runId }),
-      });
-      const data = await res.json().catch(() => ({})) as {
-        run?: BuildRun;
-        error?: string;
-      };
-      if (!res.ok) {
-        throw new Error(
-          typeof data.error === "string" ? data.error : res.statusText,
-        );
-      }
-      if (data.run) {
-        setRun({
-          ...data.run,
-          messages: data.run.messages ?? [],
-          traces: data.run.traces ?? [],
-          toolInserts: data.run.toolInserts ?? [],
-        });
-        if (typeof data.run.id === "string" && data.run.id) {
-          runIdRef.current = data.run.id;
-        }
-        if (typeof data.run.id === "string" && data.run.id) {
-          onWorkspaceChange?.(data.run.id);
-        }
-        setChatDraft("");
-        setOptimisticUser(null);
-        setStreamingAssistant(null);
-        setToolCallsOpen({});
-      }
-    } finally {
-      setChatSending(false);
-    }
-  }, []);
-
-  const value = useMemo(
-    () => ({
-      run,
-      toolCalls,
-      chatDraft,
-      setChatDraft,
-      chatSending,
-      chatError,
-      setChatError,
-      toolCallsOpen,
-      setToolCallsOpen,
-      optimisticUser,
-      setOptimisticUser,
-      streamingAssistant,
-      setStreamingAssistant,
-      resetChat,
-      sendMessage,
-      loadChat,
-    }),
-    [
-      run,
-      toolCalls,
-      chatDraft,
-      setChatDraft,
-      chatSending,
-      chatError,
-      setChatError,
-      toolCallsOpen,
-      setToolCallsOpen,
-      optimisticUser,
-      setOptimisticUser,
-      streamingAssistant,
-      setStreamingAssistant,
-      resetChat,
-      sendMessage,
-      loadChat,
-    ],
-  );
-
-  return (
-    <BuildChatContext.Provider value={value}>
-      {children}
-    </BuildChatContext.Provider>
-  );
+  return <WorkspaceProvider {...props} />;
 }
 
 export function useBuildChat() {
-  const context = useContext(BuildChatContext);
-  if (!context) {
-    throw new Error("useBuildChat must be used within BuildChatProvider");
-  }
-  return context;
+  return useWorkspaceBuild();
 }
diff --git a/simulator-ui/src/BuildPage.tsx b/simulator-ui/src/BuildPage.tsx
index 2a27df9e2..24ce2a473 100644
--- a/simulator-ui/src/BuildPage.tsx
+++ b/simulator-ui/src/BuildPage.tsx
@@ -1,19 +1,16 @@
 import React, {
   useCallback,
   useEffect,
-  useLayoutEffect,
   useMemo,
   useRef,
   useState,
 } from "react";
-import { createPortal } from "react-dom";
 import { type ToolCallSummary, workspaceOnboardingEnabled } from "./utils.ts";
 import PageShell from "./gds/PageShell.tsx";
 import PageGrid from "./gds/PageGrid.tsx";
 import Panel from "./gds/Panel.tsx";
-import Badge from "./gds/Badge.tsx";
 import Listbox, { type ListboxOption } from "./gds/Listbox.tsx";
-import { useBuildChat } from "./BuildChatContext.tsx";
+import { useWorkspaceBuild } from "./WorkspaceContext.tsx";
 
 type BuildFileEntry = {
   path: string;
@@ -90,7 +87,7 @@ export default function BuildPage(props: {
 }) {
   const { setNavActions } = props;
 
-  const { run, toolCalls } = useBuildChat();
+  const { run, toolCalls } = useWorkspaceBuild();
   const [fileEntries, setFileEntries] = useState<BuildFileEntry[]>([]);
   const [fileListLoading, setFileListLoading] = useState(false);
   const [fileListError, setFileListError] = useState<string | null>(null);
@@ -98,14 +95,10 @@ export default function BuildPage(props: {
   const [filePreview, setFilePreview] = useState<BuildFilePreview>({
     status: "idle",
   });
-  const [recentChangesOpen, setRecentChangesOpen] = useState(false);
-  const [recentChangesReadCount, setRecentChangesReadCount] = useState(0);
-  const recentChangesTriggerRef = useRef<HTMLButtonElement | null>(null);
-  const recentChangesPopoverRef = useRef<HTMLDivElement | null>(null);
-  const [recentChangesPopoverStyle, setRecentChangesPopoverStyle] = useState<
-    React.CSSProperties | null
-  >(null);
   const lastTraceCountRef = useRef<number>(0);
+  const traceRefreshTimerRef = useRef<number | null>(null);
+  const fileListRefreshInFlightRef = useRef(false);
+  const fileListRefreshQueuedRef = useRef(false);
 
   useEffect(() => {
     if (!setNavActions) return;
@@ -114,39 +107,68 @@ export default function BuildPage(props: {
   }, [setNavActions]);
 
   const refreshFileList = useCallback(async () => {
+    if (fileListRefreshInFlightRef.current) {
+      fileListRefreshQueuedRef.current = true;
+      return;
+    }
+    fileListRefreshInFlightRef.current = true;
     setFileListLoading(true);
     setFileListError(null);
     try {
-      const query = run.id ? `?workspaceId=${encodeURIComponent(run.id)}` : "";
-      const res = await fetch(`/api/build/files${query}`);
-      const data = await res.json().catch(() => ({})) as {
-        entries?: BuildFileEntry[];
-        error?: string;
-      };
-      if (!res.ok) {
-        throw new Error(
-          typeof data.error === "string" ? data.error : res.statusText,
-        );
+      let shouldRun = true;
+      while (shouldRun) {
+        fileListRefreshQueuedRef.current = false;
+        const query = run.id
+          ? `?workspaceId=${encodeURIComponent(run.id)}`
+          : "";
+        const res = await fetch(`/api/build/files${query}`);
+        const data = await res.json().catch(() => ({})) as {
+          entries?: BuildFileEntry[];
+          error?: string;
+        };
+        if (!res.ok) {
+          throw new Error(
+            typeof data.error === "string" ? data.error : res.statusText,
+          );
+        }
+        setFileEntries(Array.isArray(data.entries) ? data.entries : []);
+        shouldRun = fileListRefreshQueuedRef.current;
       }
-      setFileEntries(Array.isArray(data.entries) ? data.entries : []);
     } catch (err) {
       setFileListError(err instanceof Error ? err.message : String(err));
       setFileEntries([]);
     } finally {
+      fileListRefreshInFlightRef.current = false;
       setFileListLoading(false);
     }
   }, [run.id]);
 
+  const clearTraceRefreshTimer = useCallback(() => {
+    if (traceRefreshTimerRef.current === null) return;
+    clearTimeout(traceRefreshTimerRef.current);
+    traceRefreshTimerRef.current = null;
+  }, []);
+
   useEffect(() => {
     refreshFileList().catch(() => {});
   }, [refreshFileList]);
 
   useEffect(() => {
     const traceCount = run.traces?.length ?? 0;
+    clearTraceRefreshTimer();
     if (traceCount === lastTraceCountRef.current) return;
     lastTraceCountRef.current = traceCount;
-    refreshFileList().catch(() => {});
-  }, [run.traces?.length, refreshFileList]);
+    traceRefreshTimerRef.current = setTimeout(() => {
+      traceRefreshTimerRef.current = null;
+      refreshFileList().catch(() => {});
+    }, 250);
+  }, [clearTraceRefreshTimer, run.traces?.length, refreshFileList]);
+
+  useEffect(() => {
+    return () => {
+      clearTraceRefreshTimer();
+    };
+  }, [clearTraceRefreshTimer]);
 
   const fileEntriesByPath = useMemo(() => {
     const map = new Map<string, BuildFileEntry>();
@@ -196,6 +218,17 @@ export default function BuildPage(props: {
     ? fileEntriesByPath.get(selectedPath)
     : undefined;
 
+  const selectedPathChangeToken = useMemo(() => {
+    if (!selectedPath) return "";
+    for (let i = toolCalls.length - 1; i >= 0; i -= 1) {
+      const change = extractBotWriteChange(toolCalls[i]);
+      if (change?.path === selectedPath) {
+        return `${change.id}:${change.action ?? ""}`;
+      }
+    }
+    return "";
+  }, [selectedPath, toolCalls]);
+
   useEffect(() => {
     const filePaths = Array.from(fileEntriesByPath.keys());
     const hasSelected = selectedPath && fileEntriesByPath.has(selectedPath);
@@ -278,102 +311,13 @@ export default function BuildPage(props: {
     return () => {
       canceled = true;
     };
-  }, [selectedPath]);
-
-  const toolInsertIndexByCall = useMemo(() => {
-    const map = new Map<string, number>();
-    for (const insert of run.toolInserts ?? []) {
-      if (!insert.actionCallId) continue;
-      map.set(
-        insert.actionCallId,
-        typeof insert.index === "number" ? insert.index : 0,
-      );
-    }
-    return map;
-  }, [run.toolInserts]);
-
-  const changes = useMemo(() => {
-    return toolCalls
-      .map(extractBotWriteChange)
-      .filter((entry): entry is NonNullable<typeof entry> => Boolean(entry));
-  }, [toolCalls]);
-
-  const auditTrail = useMemo(() => {
-    const limited = changes.slice(-50);
-    return limited.map((change) => ({
-      ...change,
-      turn: toolInsertIndexByCall.get(change.id),
-    }));
-  }, [changes, toolInsertIndexByCall]);
-
-  const unreadRecentChangesCount = Math.max(
-    0,
-    changes.length - recentChangesReadCount,
-  );
-
-  const updateRecentChangesPopover = useCallback(() => {
-    const trigger = recentChangesTriggerRef.current;
-    if (!trigger) return;
-    const rect = trigger.getBoundingClientRect();
-    const width = Math.min(380, Math.max(260, window.innerWidth - 24));
-    const left = Math.max(
-      12,
-      Math.min(rect.right - width, window.innerWidth - width - 12),
-    );
-    setRecentChangesPopoverStyle({
-      position: "fixed",
-      top: rect.bottom + 6,
-      left,
-      width,
-    });
-  }, []);
-
-  useLayoutEffect(() => {
-    if (!recentChangesOpen) return;
-    updateRecentChangesPopover();
-  }, [recentChangesOpen, updateRecentChangesPopover]);
-
-  useEffect(() => {
-    if (!recentChangesOpen) {
-      setRecentChangesPopoverStyle(null);
-      return;
-    }
-    const handleOutside = (event: MouseEvent) => {
-      const target = event.target as Node | null;
-      const isInTrigger = recentChangesTriggerRef.current &&
-        target &&
-        recentChangesTriggerRef.current.contains(target);
-      const isInPopover = recentChangesPopoverRef.current &&
-        target &&
-        recentChangesPopoverRef.current.contains(target);
-      if (!isInTrigger && !isInPopover) {
-        setRecentChangesOpen(false);
-      }
-    };
-    const handleKey = (event: KeyboardEvent) => {
-      if (event.key === "Escape") {
-        event.preventDefault();
-        setRecentChangesOpen(false);
-      }
-    };
-    const handleReposition = () => updateRecentChangesPopover();
-    document.addEventListener("mousedown", handleOutside);
-    document.addEventListener("keydown", handleKey);
-    window.addEventListener("resize", handleReposition);
-    window.addEventListener("scroll", handleReposition, true);
-    return () => {
-      document.removeEventListener("mousedown", handleOutside);
-      document.removeEventListener("keydown", handleKey);
-      window.removeEventListener("resize", handleReposition);
-      window.removeEventListener("scroll", handleReposition, true);
-    };
-  }, [recentChangesOpen, updateRecentChangesPopover]);
-
-  useEffect(() => {
-    setRecentChangesOpen(false);
-    setRecentChangesReadCount(0);
-    setRecentChangesPopoverStyle(null);
-  }, [run.id]);
+  }, [
+    run.id,
+    selectedPath,
+    selectedEntry?.modifiedAt,
+    selectedEntry?.size,
+    selectedPathChangeToken,
+  ]);
 
   return (
     <PageShell>
@@ -410,35 +354,6 @@ export default function BuildPage(props: {
                       {formatBytes(selectedEntry.size)}
                     </span>
                   )}
-                  <button
-                    type="button"
-                    className="build-recent-changes-trigger"
-                    onClick={() => {
-                      if (recentChangesOpen) {
-                        setRecentChangesOpen(false);
-                        return;
-                      }
-                      setRecentChangesReadCount(changes.length);
-                      updateRecentChangesPopover();
-                      setRecentChangesOpen(true);
-                    }}
-                    aria-haspopup="dialog"
-                    aria-expanded={recentChangesOpen}
-                    ref={recentChangesTriggerRef}
-                  >
-                    <span className="build-recent-changes-label">
-                      Recent changes
-                    </span>
-                    <Badge
-                      variant={unreadRecentChangesCount > 0
-                        ? "running"
-                        : "ghost"}
-                      data-testid="build-changes-count"
-                      className="build-recent-changes-badge"
-                    >
-                      {unreadRecentChangesCount}
-                    </Badge>
-                  </button>
                 </div>
               </div>
             </div>
@@ -477,48 +392,6 @@ export default function BuildPage(props: {
               )}
             </div>
           </div>
-          {recentChangesOpen && recentChangesPopoverStyle &&
-            createPortal(
-              <div
-                className="build-recent-changes-popover"
-                style={recentChangesPopoverStyle}
-                ref={recentChangesPopoverRef}
-                data-testid="build-changes-panel"
-              >
-                {auditTrail.length === 0
-                  ? <div className="placeholder">No recent changes yet.</div>
-                  : (
-                    <div className="build-recent-changes-list">
-                      {[...auditTrail].reverse().map((change, idx) => (
-                        <button
-                          key={`${change.path}-${idx}`}
-                          type="button"
-                          className="build-recent-change-row"
-                          onClick={() => {
-                            setSelectedPath(change.path);
-                            setRecentChangesOpen(false);
-                          }}
-                        >
-                          <div className="build-recent-change-summary">
-                            {change.action ?? "updated"}:{" "}
-                            <code>{change.path}</code>
-                          </div>
-                          <div className="build-recent-change-meta">
-                            {change.before === null
-                              ? "Created file."
-                              : change.before === undefined
-                              ? "No before snapshot."
-                              : "Updated file."} {change.turn !== undefined
-                              ? `· Turn ${change.turn + 1}`
-                              : ""}
-                          </div>
-                        </button>
-                      ))}
-                    </div>
-                  )}
-              </div>,
-              document.body,
-            )}
         </Panel>
       </PageGrid>
     </PageShell>
diff --git a/simulator-ui/src/Chat.test.tsx b/simulator-ui/src/Chat.test.tsx
new file mode 100644
index 000000000..54f8f819d
--- /dev/null
+++ b/simulator-ui/src/Chat.test.tsx
@@ -0,0 +1,748 @@
+import { assert, assertEquals } from "@std/assert";
+import { FakeTime } from "@std/testing/time";
+import React from "react";
+import TestRenderer, { act } from "npm:react-test-renderer@19.2.0";
+import type { ReactTestInstance } from "npm:react-test-renderer@19.2.0";
+
+const globals = globalThis as unknown as {
+  window?: Record<string, unknown>;
+  EventSource?: unknown;
+  fetch?: typeof fetch;
+  localStorage?: Storage;
+};
+if (!globals.window) globals.window = {};
+(globalThis as { IS_REACT_ACT_ENVIRONMENT?: boolean })
+  .IS_REACT_ACT_ENVIRONMENT = true;
+
+class MemoryStorage implements Storage {
+  #data = new Map<string, string>();
+
+  get length(): number {
+    return this.#data.size;
+  }
+
+  clear(): void {
+    this.#data.clear();
+  }
+
+  getItem(key: string): string | null {
+    return this.#data.has(key) ? this.#data.get(key)! : null;
+  }
+
+  key(index: number): string | null {
+    return Array.from(this.#data.keys())[index] ?? null;
+  }
+
+  removeItem(key: string): void {
+    this.#data.delete(key);
+  }
+
+  setItem(key: string, value: string): void {
+    this.#data.set(key, value);
+  }
+}
+
+if (!globals.localStorage) {
+  globals.localStorage = new MemoryStorage();
+}
+const windowObj = globals.window as {
+  localStorage?: Storage;
+  location?: { pathname: string; search: string };
+};
+windowObj.localStorage = globals.localStorage;
+if (!windowObj.location) {
+  windowObj.location = { pathname: "/workspaces/ws-1/build", search: "" };
+}
+
+const {
+  default: Chat,
+  BuildChatRows,
+  ChatView,
+  bucketBuildChatDisplay,
+  deriveBuildChatActivityState,
+  formatElapsedDuration,
+} = await import("./Chat.tsx");
+const { WorkspaceProvider } = await import("./WorkspaceContext.tsx");
+const { globalStyles } = await import("./styles.ts");
+type BuildDisplayMessage = import("./utils.ts").BuildDisplayMessage;
+type WorkspaceSocketMessage = import("./utils.ts").WorkspaceSocketMessage;
+type BuildChatViewState = import("./Chat.tsx").BuildChatViewState;
+
+type ToolCallSummary = import("./utils.ts").ToolCallSummary;
+
+class FakeEventSource {
+  static instances: FakeEventSource[] = [];
+  onmessage: ((event: MessageEvent<string>) => void) | null = null;
+  url: string;
+  closed = false;
+
+  constructor(url: string) {
+    this.url = url;
+    FakeEventSource.instances.push(this);
+  }
+
+  close() {
+    this.closed = true;
+  }
+
+  emit(message: WorkspaceSocketMessage, offset = 1) {
+    this.onmessage?.(
+      new MessageEvent("message", {
+        data: JSON.stringify({ offset, data: message }),
+      }),
+    );
+  }
+}
+
+function makeTool(id: string, name = "tool_name"): ToolCallSummary {
+  return {
+    key: id,
+    id,
+    actionCallId: id,
+    name,
+    status: "completed",
+  };
+}
+
+function makeChatState(
+  overrides: Partial<BuildChatViewState> = {},
+): BuildChatViewState {
+  const baseRun = {
+    id: "run-1",
+    status: "idle" as const,
+    messages: [] as Array<{ role: string; content: string }>,
+    traces: [],
+    toolInserts: [],
+    displayMessages: [] as BuildDisplayMessage[],
+  };
+  const mergedRun = {
+    ...baseRun,
+    ...(overrides.run ?? {}),
+  };
+  return {
+    toolCalls: [],
+    chatDraft: "",
+    setChatDraft: () => {},
+    chatSending: false,
+    chatError: null,
+    setChatError: () => {},
+    toolCallsOpen: {},
+    setToolCallsOpen: () => {},
+    optimisticUser: null,
+    setOptimisticUser: () => {},
+    streamingAssistant: null,
+    setStreamingAssistant: () => {},
+    resetChat: async () => {},
+    sendMessage: async () => {},
+    stopChat: async () => {},
+    loadChat: async () => {},
+    ...overrides,
+    run: mergedRun,
+  };
+}
+
+Deno.test("bucketBuildChatDisplay collapses adjacent non-message rows into one activity block", () => {
+  const display: BuildDisplayMessage[] = [
+    { kind: "message", role: "user", content: "start" },
+    { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") },
+    { kind: "tool", toolCallId: "tool-2", toolSummary: makeTool("tool-2") },
+    { kind: "reasoning", reasoningId: "r-1", content: "old" },
+    { kind: "reasoning", reasoningId: "r-2", content: "latest" },
+    { kind: "message", role: "assistant", content: "done" },
+  ];
+
+  const buckets = bucketBuildChatDisplay(display);
+  assertEquals(buckets.map((bucket) => bucket.kind), [
+    "message",
+    "activity",
+    "message",
+  ]);
+
+  const activityBucket = buckets[1];
+  assert(activityBucket && activityBucket.kind === "activity");
+  assertEquals(activityBucket.entries.length, 4);
+  assertEquals(activityBucket.latestContent, "latest");
+  assertEquals(activityBucket.reasoningCount, 2);
+  assertEquals(activityBucket.toolCount, 2);
+  assertEquals(activityBucket.latestToolLabel, null);
+});
+
+Deno.test("bucketBuildChatDisplay clears stale tool preview when new reasoning starts", () => {
+  const display: BuildDisplayMessage[] = [
+    { kind: "reasoning", reasoningId: "r-1", content: "step 1" },
+    { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") },
+    { kind: "reasoning", reasoningId: "r-2", content: "step 2" },
+  ];
+
+  const buckets = bucketBuildChatDisplay(display);
+  assertEquals(buckets.map((bucket) => bucket.kind), ["activity"]);
+  const activityBucket = buckets[0];
+  assert(activityBucket && activityBucket.kind === "activity");
+  assertEquals(activityBucket.latestContent, "step 2");
+  assertEquals(activityBucket.latestToolLabel, null);
+});
+
+Deno.test("bucketBuildChatDisplay preserves non-adjacent boundaries", () => {
+  const display: BuildDisplayMessage[] = [
+    { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") },
+    { kind: "message", role: "assistant", content: "mid" },
+    { kind: "tool", toolCallId: "tool-2", toolSummary: makeTool("tool-2") },
+  ];
+
+  const buckets = bucketBuildChatDisplay(display);
+  assertEquals(buckets.map((bucket) => bucket.kind), [
+    "activity",
+    "message",
+    "activity",
+  ]);
+});
+
+Deno.test("BuildChatRows renders latest activity preview and toggles full details", async () => {
+  const display: BuildDisplayMessage[] = [
+    { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") },
+    { kind: "tool", toolCallId: "tool-2", toolSummary: makeTool("tool-2") },
+    {
+      kind: "reasoning",
+      reasoningId: "r-1",
+      content: "first reasoning",
+      reasoningRaw: { step: 1 },
+    },
+    {
+      kind: "reasoning",
+      reasoningId: "r-2",
+      content: "latest reasoning",
+      reasoningRaw: { step: 2 },
+    },
+  ];
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(<BuildChatRows display={display} />);
+    });
+    assert(renderer);
+
+    const titles = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props.className === "activity-toggle-title"
+    );
+    assertEquals(titles.length, 1);
+    assertEquals(String(titles[0].children.join("")), "Activity");
+
+    const reasoningBadges = renderer.root.findAll((node: ReactTestInstance) =>
+      node.type === "span" &&
+      typeof node.props.className === "string" &&
+      node.props.className.includes("activity-count-badge-reasoning")
+    );
+    assertEquals(reasoningBadges.length, 1);
+    assertEquals(String(reasoningBadges[0].children.join("")), "Reasoning: 2");
+
+    const toolBadges = renderer.root.findAll((node: ReactTestInstance) =>
+      node.type === "span" &&
+      typeof node.props.className === "string" &&
+      node.props.className.includes("activity-count-badge-tool")
+    );
+    assertEquals(toolBadges.length, 1);
+    assertEquals(String(toolBadges[0].children.join("")), "Tool calls: 2");
+
+    const actions = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props.className === "activity-toggle-action"
+    );
+    assertEquals(actions.length, 1);
+    assertEquals(String(actions[0].children.join("")), "• Show");
+
+    const toggles = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props.className === "tool-calls-toggle activity-toggle"
+    );
+    assertEquals(toggles.length, 1);
+
+    const previewToolText = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props.className === "activity-preview-tool"
+    ).map((node: ReactTestInstance) => String(node.children.join(" "))).join(
+      "\n",
+    );
+    assert(!previewToolText.includes("Tool call"));
+
+    const previewToolRows = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props.className === "activity-preview-tool"
+    );
+    assertEquals(previewToolRows.length, 0);
+
+    const toolToggle = toggles[0];
+    await act(async () => {
+      toolToggle.props.onClick();
+    });
+
+    const toolCallTitles = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props.className === "tool-call-title" &&
+      String(node.children.join(" ")).includes("Tool call:")
+    );
+    assertEquals(toolCallTitles.length, 2);
+
+    const reasoningRows = renderer.root.findAll((node: ReactTestInstance) =>
+      typeof node.props.className === "string" &&
+      node.props.className.includes("reasoning-row")
+    );
+    assertEquals(reasoningRows.length, 2);
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
+
+Deno.test("BuildChatRows exposes latest tool label on tool-count badge tooltip", async () => {
+  const display: BuildDisplayMessage[] = [
+    {
+      kind: "reasoning",
+      reasoningId: "r-1",
+      content: "thinking",
+      reasoningRaw: { step: 1 },
+    },
+    { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") },
+  ];
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(<BuildChatRows display={display} />);
+    });
+    assert(renderer);
+
+    const previewToolRows = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props.className === "activity-preview-tool"
+    );
+    assertEquals(previewToolRows.length, 0);
+
+    const toolBadges = renderer.root.findAll((node: ReactTestInstance) =>
+      node.type === "span" &&
+      typeof node.props.className === "string" &&
+      node.props.className.includes("activity-count-badge-tool")
+    );
+    assertEquals(toolBadges.length, 1);
+    assert(typeof toolBadges[0].props["aria-describedby"] === "string");
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
+
+Deno.test("BuildChatRows highlights activity badge when count increases", async () => {
+  const initialDisplay: BuildDisplayMessage[] = [
+    {
+      kind: "reasoning",
+      reasoningId: "r-1",
+      content: "thinking",
+      reasoningRaw: { step: 1 },
+    },
+    { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") },
+  ];
+  const increasedDisplay: BuildDisplayMessage[] = [
+    ...initialDisplay,
+    { kind: "tool", toolCallId: "tool-2", toolSummary: makeTool("tool-2") },
+  ];
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <BuildChatRows display={initialDisplay} />,
+      );
+    });
+    assert(renderer);
+
+    let highlightedToolBadges = renderer.root.findAll((
+      node: ReactTestInstance,
+    ) =>
+      node.type === "span" &&
+      typeof node.props.className === "string" &&
+      node.props.className.includes("activity-count-badge-tool") &&
+      node.props.className.includes("is-highlight")
+    );
+    assertEquals(highlightedToolBadges.length, 0);
+
+    await act(async () => {
+      renderer?.update(<BuildChatRows display={increasedDisplay} />);
+    });
+
+    highlightedToolBadges = renderer.root.findAll((node: ReactTestInstance) =>
+      node.type === "span" &&
+      typeof node.props.className === "string" &&
+      node.props.className.includes("activity-count-badge-tool") &&
+      node.props.className.includes("is-highlight")
+    );
+    assertEquals(highlightedToolBadges.length, 1);
+
+    await act(async () => {
+      await new Promise((resolve) => setTimeout(resolve, 950));
+    });
+
+    highlightedToolBadges = renderer.root.findAll((node: ReactTestInstance) =>
+      node.type === "span" &&
+      typeof node.props.className === "string" &&
+      node.props.className.includes("activity-count-badge-tool") &&
+      node.props.className.includes("is-highlight")
+    );
+    assertEquals(highlightedToolBadges.length, 0);
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
+
+Deno.test("Build chat stop button appears only while running and dispatches stop without clearing transcript", async () => {
+  const originalFetch = globalThis.fetch;
+  const originalEventSource = globalThis.EventSource;
+  const requests: Array<{ url: string; body?: Record<string, unknown> }> = [];
+
+  const preservedMessages = [
+    { role: "user", content: "keep this" },
+    { role: "assistant", content: "assistant stays" },
+  ];
+
+  globalThis.EventSource = FakeEventSource as unknown as typeof EventSource;
+  globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
+    const url = String(input);
+    let parsedBody: Record<string, unknown> | undefined;
+    if (typeof init?.body === "string" && init.body.length > 0) {
+      parsedBody = JSON.parse(init.body) as Record<string, unknown>;
+    }
+    requests.push({ url, body: parsedBody });
+
+    if (url.endsWith("/api/workspaces/ws-1")) {
+      return new Response(
+        JSON.stringify({
+          workspaceId: "ws-1",
+          build: {
+            run: {
+              id: "ws-1",
+              status: "running",
+              messages: preservedMessages,
+              traces: [],
+              toolInserts: [],
+            },
+          },
+          test: {
+            run: { status: "idle", messages: [], traces: [], toolInserts: [] },
+          },
+          grade: { graderDecks: [], sessions: [] },
+          session: { messages: [], traces: [] },
+        }),
+        { status: 200 },
+      );
+    }
+    if (url.endsWith("/api/build/stop")) {
+      return new Response(
+        JSON.stringify({
+          stopped: true,
+          run: {
+            id: "ws-1",
+            status: "canceled",
+            messages: preservedMessages,
+            traces: [],
+            toolInserts: [],
+          },
+        }),
+        { status: 200 },
+      );
+    }
+    throw new Error(`Unexpected fetch: ${url}`);
+  }) as typeof fetch;
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <WorkspaceProvider workspaceId="ws-1">
+          <Chat />
+        </WorkspaceProvider>,
+      );
+    });
+    assert(renderer);
+
+    const findByTestId = (id: string) =>
+      renderer!.root.findAll((node: ReactTestInstance) =>
+        node.type === "button" && node.props["data-testid"] === id
+      );
+
+    assertEquals(findByTestId("build-stop").length, 1);
+    assertEquals(findByTestId("build-send").length, 0);
+    assertEquals(findByTestId("build-start").length, 0);
+
+    const stopButton = findByTestId("build-stop")[0];
+    await act(async () => {
+      stopButton.props.onClick();
+    });
+
+    const stopReq = requests.find((req) => req.url.endsWith("/api/build/stop"));
+    assert(stopReq);
+    assertEquals(stopReq.body?.workspaceId, "ws-1");
+
+    assertEquals(findByTestId("build-stop").length, 0);
+    assertEquals(findByTestId("build-send").length, 1);
+
+    const renderedTranscript = renderer.root.findAll((
+      node: ReactTestInstance,
+    ) =>
+      node.props.className === "bubble-text" &&
+      typeof node.props.dangerouslySetInnerHTML?.__html === "string"
+    ).map((node: ReactTestInstance) =>
+      String(node.props.dangerouslySetInnerHTML.__html)
+    ).join("\n");
+    assert(renderedTranscript.includes("keep this"));
+    assert(renderedTranscript.includes("assistant stays"));
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+    globalThis.fetch = originalFetch;
+    globalThis.EventSource = originalEventSource;
+    FakeEventSource.instances = [];
+  }
+});
+
+Deno.test("deriveBuildChatActivityState maps to finite activity taxonomy", () => {
+  const display: BuildDisplayMessage[] = [];
+  assertEquals(
+    deriveBuildChatActivityState({
+      runStatus: "idle",
+      chatSending: false,
+      display,
+      streamingAssistant: null,
+      runId: "run-1",
+    }),
+    "Idle",
+  );
+  assertEquals(
+    deriveBuildChatActivityState({
+      runStatus: "running",
+      chatSending: false,
+      display,
+      streamingAssistant: null,
+      runId: "run-1",
+    }),
+    "Thinking",
+  );
+  assertEquals(
+    deriveBuildChatActivityState({
+      runStatus: "running",
+      chatSending: false,
+      display,
+      streamingAssistant: { runId: "run-1", turn: 0, text: "partial" },
+      runId: "run-1",
+    }),
+    "Responding",
+  );
+  assertEquals(
+    deriveBuildChatActivityState({
+      runStatus: "completed",
+      chatSending: false,
+      display,
+      streamingAssistant: null,
+      runId: "run-1",
+    }),
+    "Stopped",
+  );
+});
+
+Deno.test("ChatView shows active indicator for thinking/responding and clears on stop", async () => {
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(<ChatView state={makeChatState()} />);
+    });
+    assert(renderer);
+
+    let indicators = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props["data-testid"] === "build-chat-activity-indicator"
+    );
+    assertEquals(indicators.length, 0);
+
+    await act(async () => {
+      renderer?.update(
+        <ChatView
+          state={makeChatState({
+            chatSending: true,
+            run: {
+              id: "run-1",
+              status: "idle",
+              messages: [],
+              traces: [],
+              toolInserts: [],
+              displayMessages: [],
+            },
+          })}
+        />,
+      );
+    });
+    indicators = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props["data-testid"] === "build-chat-activity-indicator"
+    );
+    assertEquals(indicators.length, 1);
+    assertEquals(indicators[0].props["data-activity-state"], "Thinking");
+
+    await act(async () => {
+      renderer?.update(
+        <ChatView
+          state={makeChatState({
+            run: {
+              id: "run-1",
+              status: "running",
+              messages: [],
+              traces: [],
+              toolInserts: [],
+              displayMessages: [],
+            },
+            streamingAssistant: {
+              runId: "run-1",
+              turn: 0,
+              text: "stream chunk",
+            },
+          })}
+        />,
+      );
+    });
+    indicators = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props["data-testid"] === "build-chat-activity-indicator"
+    );
+    assertEquals(indicators.length, 1);
+    assertEquals(indicators[0].props["data-activity-state"], "Responding");
+
+    const streamingRows = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props.className === "imessage-row left"
+    );
+    assert(streamingRows.length > 0);
+
+    await act(async () => {
+      renderer?.update(
+        <ChatView
+          state={makeChatState({
+            run: {
+              id: "run-1",
+              status: "completed",
+              messages: [],
+              traces: [],
+              toolInserts: [],
+              displayMessages: [],
+            },
+          })}
+        />,
+      );
+    });
+    indicators = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props["data-testid"] === "build-chat-activity-indicator"
+    );
+    assertEquals(indicators.length, 0);
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
+
+Deno.test("ChatView elapsed timer starts, stops, and resets per active cycle", async () => {
+  const time = new FakeTime(new Date("2026-01-01T00:00:00Z"));
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <ChatView state={makeChatState({ chatSending: true })} />,
+      );
+    });
+    assert(renderer);
+
+    const timerNode = () =>
+      renderer!.root.findAll((node: ReactTestInstance) =>
+        node.props["data-testid"] === "build-chat-activity-timer"
+      )[0];
+    assertEquals(String(timerNode().children.join("")), "00:00");
+
+    await act(async () => {
+      time.tick(2300);
+    });
+    assertEquals(String(timerNode().children.join("")), "00:02");
+
+    await act(async () => {
+      renderer?.update(
+        <ChatView
+          state={makeChatState({
+            run: {
+              id: "run-1",
+              status: "completed",
+              messages: [],
+              traces: [],
+              toolInserts: [],
+              displayMessages: [],
+            },
+          })}
+        />,
+      );
+    });
+    const indicatorsAfterStop = renderer.root.findAll((
+      node: ReactTestInstance,
+    ) => node.props["data-testid"] === "build-chat-activity-indicator");
+    assertEquals(indicatorsAfterStop.length, 0);
+
+    await act(async () => {
+      renderer?.update(
+        <ChatView state={makeChatState({ chatSending: true })} />,
+      );
+    });
+    assertEquals(String(timerNode().children.join("")), "00:00");
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+    time.restore();
+  }
+});
+
+Deno.test("reduced-motion fallback disables shimmer while status and timer remain visible", async () => {
+  assert(globalStyles.includes("@media (prefers-reduced-motion: reduce)"));
+  assert(globalStyles.includes(".build-chat-activity-glimmer"));
+  assert(globalStyles.includes("opacity: 0;"));
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <ChatView state={makeChatState({ chatSending: true })} />,
+      );
+    });
+    assert(renderer);
+
+    const label = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props.className === "build-chat-activity-label"
+    );
+    const timer = renderer.root.findAll((node: ReactTestInstance) =>
+      node.props["data-testid"] === "build-chat-activity-timer"
+    );
+    assertEquals(label.length, 1);
+    assertEquals(timer.length, 1);
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
+
+Deno.test("formatElapsedDuration renders mm:ss", () => {
+  assertEquals(formatElapsedDuration(0), "00:00");
+  assertEquals(formatElapsedDuration(61), "01:01");
+  assertEquals(formatElapsedDuration(3600 + 9), "60:09");
+});
diff --git a/simulator-ui/src/Chat.tsx b/simulator-ui/src/Chat.tsx
index d7d4da089..e6f027e4d 100644
--- a/simulator-ui/src/Chat.tsx
+++ b/simulator-ui/src/Chat.tsx
@@ -1,26 +1,556 @@
-import React, { useCallback, useEffect, useMemo, useRef } from "react";
-import { classNames, type ToolCallSummary } from "./utils.ts";
+import React, { useCallback, useEffect, useRef, useState } from "react";
+import {
+  type BuildDisplayMessage,
+  classNames,
+  renderMarkdown,
+} from "./utils.ts";
 import Button from "./gds/Button.tsx";
+import Badge from "./gds/Badge.tsx";
+import Icon from "./gds/Icon.tsx";
 import { ToolCallBubble } from "./shared.tsx";
 import { useBuildChat } from "./BuildChatContext.tsx";
 
-export default function Chat() {
+type BuildChatTranscriptBucket =
+  | {
+    kind: "message";
+    key: string;
+    entry: BuildDisplayMessage;
+  }
+  | {
+    kind: "activity";
+    key: string;
+    entries: BuildDisplayMessage[];
+    latestContent: string;
+    latestToolLabel: string | null;
+    reasoningCount: number;
+    toolCount: number;
+  };
+
+type ActivityBadgeKey = "reasoning" | "tool";
+type ActivityBadgeFlashState = Record<ActivityBadgeKey, boolean>;
+type ActivityCountSnapshot = Record<ActivityBadgeKey, number>;
+
+export function bucketBuildChatDisplay(
+  display: BuildDisplayMessage[],
+): BuildChatTranscriptBucket[] {
+  const buckets: BuildChatTranscriptBucket[] = [];
+  let index = 0;
+  while (index < display.length) {
+    const entry = display[index];
+    if (entry.kind === "message") {
+      buckets.push({
+        kind: "message",
+        key: `message-${index}-${entry.role ?? "assistant"}`,
+        entry,
+      });
+      index += 1;
+      continue;
+    }
+    if (entry.kind === "tool" || entry.kind === "reasoning") {
+      const grouped: BuildDisplayMessage[] = [];
+      let cursor = index;
+      let reasoningCount = 0;
+      let toolCount = 0;
+      while (cursor < display.length && display[cursor].kind !== "message") {
+        const nextEntry = display[cursor];
+        grouped.push(nextEntry);
+        if (nextEntry.kind === "reasoning") {
+          reasoningCount += 1;
+        } else if (nextEntry.kind === "tool") {
+          toolCount += 1;
+        }
+        cursor += 1;
+      }
+      let latestContent = "";
+      let latestToolLabel: string | null = null;
+      let latestReasoningIndex = -1;
+      grouped.forEach((nextEntry, nextEntryIndex) => {
+        if (nextEntry.kind === "reasoning") {
+          if (
+            typeof nextEntry.content === "string" && nextEntry.content.trim()
+          ) {
+            latestContent = nextEntry.content;
+          }
+          latestReasoningIndex = nextEntryIndex;
+          return;
+        }
+        if (nextEntry.kind === "tool" && nextEntry.toolSummary) {
+          const rawName = nextEntry.toolSummary.name;
+          latestToolLabel = typeof rawName === "string" && rawName.length > 0
+            ? `Tool call: ${rawName}`
+            : "Tool call";
+        }
+      });
+      if (latestReasoningIndex >= 0) {
+        latestToolLabel = null;
+        for (
+          let entryIdx = latestReasoningIndex + 1;
+          entryIdx < grouped.length;
+          entryIdx += 1
+        ) {
+          const nextEntry = grouped[entryIdx];
+          if (nextEntry.kind !== "tool" || !nextEntry.toolSummary) continue;
+          const rawName = nextEntry.toolSummary.name;
+          latestToolLabel = typeof rawName === "string" && rawName.length > 0
+            ? `Tool call: ${rawName}`
+            : "Tool call";
+        }
+      } else {
+        grouped.forEach((nextEntry) => {
+          if (nextEntry.kind !== "tool" || !nextEntry.toolSummary) return;
+          const rawName = nextEntry.toolSummary.name;
+          latestToolLabel = typeof rawName === "string" && rawName.length > 0
+            ? `Tool call: ${rawName}`
+            : "Tool call";
+        });
+      }
+      buckets.push({
+        kind: "activity",
+        key: `activity-${index}`,
+        entries: grouped,
+        latestContent,
+        latestToolLabel,
+        reasoningCount,
+        toolCount,
+      });
+      if (cursor === index) {
+        index += 1;
+        continue;
+      }
+      index = cursor;
+      continue;
+    }
+    index += 1;
+  }
+  return buckets;
+}
+
+export function BuildChatRows(props: { display: BuildDisplayMessage[] }) {
+  const { display } = props;
+  const [activityBucketsOpen, setActivityBucketsOpen] = useState<
+    Record<string, boolean>
+  >({});
+  const [activityBadgeFlash, setActivityBadgeFlash] = useState<
+    Record<string, ActivityBadgeFlashState>
+  >({});
+  const activityBadgeTimersRef = useRef<
+    Record<
+      string,
+      Partial<Record<ActivityBadgeKey, ReturnType<typeof setTimeout>>>
+    >
+  >({});
+  const previousActivityCountsRef = useRef<
+    Record<string, ActivityCountSnapshot>
+  >(
+    {},
+  );
+  const rows: React.ReactNode[] = [];
+  const buckets = bucketBuildChatDisplay(display);
+
+  const clearActivityBadgeFlashTimer = useCallback(
+    (bucketKey: string, badgeKey: ActivityBadgeKey) => {
+      const bucketTimers = activityBadgeTimersRef.current[bucketKey];
+      if (!bucketTimers) return;
+      const timerId = bucketTimers[badgeKey];
+      if (timerId !== undefined) {
+        clearTimeout(timerId);
+      }
+      delete bucketTimers[badgeKey];
+      if (
+        bucketTimers.reasoning === undefined && bucketTimers.tool === undefined
+      ) {
+        delete activityBadgeTimersRef.current[bucketKey];
+      }
+    },
+    [],
+  );
+
+  const triggerActivityBadgeFlash = useCallback(
+    (bucketKey: string, badgeKey: ActivityBadgeKey) => {
+      setActivityBadgeFlash((prev) => ({
+        ...prev,
+        [bucketKey]: {
+          reasoning: prev[bucketKey]?.reasoning ?? false,
+          tool: prev[bucketKey]?.tool ?? false,
+          [badgeKey]: true,
+        },
+      }));
+
+      clearActivityBadgeFlashTimer(bucketKey, badgeKey);
+
+      const bucketTimers = activityBadgeTimersRef.current[bucketKey] ?? {};
+      bucketTimers[badgeKey] = setTimeout(() => {
+        clearActivityBadgeFlashTimer(bucketKey, badgeKey);
+        setActivityBadgeFlash((prev) => {
+          const existing = prev[bucketKey];
+          if (!existing) return prev;
+          const nextBucketState: ActivityBadgeFlashState = {
+            reasoning: badgeKey === "reasoning" ? false : existing.reasoning,
+            tool: badgeKey === "tool" ? false : existing.tool,
+          };
+          if (!nextBucketState.reasoning && !nextBucketState.tool) {
+            const { [bucketKey]: _, ...rest } = prev;
+            return rest;
+          }
+          return {
+            ...prev,
+            [bucketKey]: nextBucketState,
+          };
+        });
+      }, 900);
+      activityBadgeTimersRef.current[bucketKey] = bucketTimers;
+    },
+    [clearActivityBadgeFlashTimer],
+  );
+
+  useEffect(() => {
+    const nextCounts: Record<string, ActivityCountSnapshot> = {};
+    const activeBucketKeys = new Set<string>();
+
+    buckets.forEach((bucket) => {
+      if (bucket.kind !== "activity") return;
+      activeBucketKeys.add(bucket.key);
+      nextCounts[bucket.key] = {
+        reasoning: bucket.reasoningCount,
+        tool: bucket.toolCount,
+      };
+      const previousCounts = previousActivityCountsRef.current[bucket.key];
+      if (!previousCounts) return;
+      if (bucket.reasoningCount > previousCounts.reasoning) {
+        triggerActivityBadgeFlash(bucket.key, "reasoning");
+      }
+      if (bucket.toolCount > previousCounts.tool) {
+        triggerActivityBadgeFlash(bucket.key, "tool");
+      }
+    });
+
+    Object.keys(previousActivityCountsRef.current).forEach((bucketKey) => {
+      if (activeBucketKeys.has(bucketKey)) return;
+      clearActivityBadgeFlashTimer(bucketKey, "reasoning");
+      clearActivityBadgeFlashTimer(bucketKey, "tool");
+    });
+
+    previousActivityCountsRef.current = nextCounts;
+
+    setActivityBadgeFlash((prev) => {
+      let changed = false;
+      const next: Record<string, ActivityBadgeFlashState> = {};
+      Object.entries(prev).forEach(([bucketKey, value]) => {
+        if (!activeBucketKeys.has(bucketKey)) {
+          changed = true;
+          return;
+        }
+        if (!value.reasoning && !value.tool) {
+          changed = true;
+          return;
+        }
+        next[bucketKey] = value;
+      });
+      return changed ? next : prev;
+    });
+  }, [buckets, clearActivityBadgeFlashTimer, triggerActivityBadgeFlash]);
+
+  useEffect(() => {
+    return () => {
+      Object.entries(activityBadgeTimersRef.current).forEach(([
+        bucketKey,
+        bucketTimers,
+      ]) => {
+        if (bucketTimers.reasoning !== undefined) {
+          clearTimeout(bucketTimers.reasoning);
+        }
+        if (bucketTimers.tool !== undefined) {
+          clearTimeout(bucketTimers.tool);
+        }
+        delete activityBadgeTimersRef.current[bucketKey];
+      });
+    };
+  }, []);
+
+  buckets.forEach((bucket) => {
+    if (bucket.kind === "message") {
+      const role = bucket.entry.role ?? "assistant";
+      rows.push(
+        <div
+          key={bucket.key}
+          className={classNames(
+            "imessage-row",
+            role === "user" ? "right" : "left",
+          )}
+        >
+          <div
+            className={classNames(
+              "imessage-bubble",
+              role === "user" ? "right" : "left",
+            )}
+            title={role}
+          >
+            <div
+              className="bubble-text"
+              dangerouslySetInnerHTML={{
+                __html: renderMarkdown(bucket.entry.content ?? ""),
+              }}
+            />
+          </div>
+        </div>,
+      );
+      return;
+    }
+    if (bucket.kind === "activity") {
+      const isOpen = Boolean(activityBucketsOpen[bucket.key]);
+      const latestReasoning = bucket.latestContent.trim().length > 0
+        ? bucket.latestContent
+        : bucket.reasoningCount > 0
+        ? "Reasoning in progress"
+        : "";
+      rows.push(
+        <div
+          key={bucket.key}
+          className="tool-calls-collapsible activity-collapsible"
+        >
+          <button
+            type="button"
+            className={classNames(
+              "tool-calls-toggle activity-toggle",
+              isOpen && "is-open",
+            )}
+            onClick={() =>
+              setActivityBucketsOpen((prev) => ({
+                ...prev,
+                [bucket.key]: !prev[bucket.key],
+              }))}
+          >
+            <span className="tool-calls-toggle-label">
+              <span className="activity-toggle-title">Activity</span>
+              <Badge
+                variant="ghost"
+                className={classNames(
+                  "activity-count-badge activity-count-badge-reasoning",
+                  activityBadgeFlash[bucket.key]?.reasoning && "is-highlight",
+                )}
+                tooltip={bucket.latestContent}
+              >
+                Reasoning: {bucket.reasoningCount}
+              </Badge>
+              <Badge
+                variant="ghost"
+                className={classNames(
+                  "activity-count-badge activity-count-badge-tool",
+                  activityBadgeFlash[bucket.key]?.tool && "is-highlight",
+                )}
+                tooltip={bucket.latestToolLabel}
+              >
+                Tool calls: {bucket.toolCount}
+              </Badge>
+              <span className="activity-toggle-action">
+                • {isOpen ? "Hide" : "Show"}
+              </span>
+            </span>
+            <span className="activity-toggle-chevron" aria-hidden="true">
+              <Icon name="chevronDown" size={10} />
+            </span>
+          </button>
+          {!isOpen && (
+            <div className="activity-preview">
+              {latestReasoning && (
+                <div
+                  className="activity-preview-reasoning"
+                  dangerouslySetInnerHTML={{
+                    __html: renderMarkdown(latestReasoning),
+                  }}
+                />
+              )}
+            </div>
+          )}
+          {isOpen && (
+            <div className="tool-calls-list activity-details">
+              {bucket.entries.map((entry, activityIdx) => {
+                if (entry.kind === "tool") {
+                  const tool = entry.toolSummary;
+                  if (!tool) return null;
+                  const toolId = tool.id ?? entry.toolCallId ??
+                    `tool-${bucket.key}-${activityIdx}`;
+                  return (
+                    <div key={`tool-${toolId}-${activityIdx}`}>
+                      <ToolCallBubble call={tool} />
+                    </div>
+                  );
+                }
+                if (entry.kind !== "reasoning") return null;
+                return (
+                  <div
+                    key={`reasoning-${bucket.key}-${activityIdx}-${
+                      entry.reasoningId ?? "r"
+                    }`}
+                    className="imessage-row tool-call-row reasoning-row"
+                  >
+                    <div className="imessage-bubble tool-call-bubble reasoning-bubble">
+                      <div className="tool-call-header">
+                        <div className="tool-call-title">Reasoning</div>
+                      </div>
+                      <div className="tool-call-detail">
+                        <div
+                          className="trace-json reasoning-json"
+                          dangerouslySetInnerHTML={{
+                            __html: renderMarkdown(entry.content ?? ""),
+                          }}
+                        />
+                        {entry.reasoningRaw && (
+                          <details className="reasoning-details">
+                            <summary>Details</summary>
+                            <pre className="trace-json reasoning-json">
+                              {JSON.stringify(entry.reasoningRaw, null, 2)}
+                            </pre>
+                          </details>
+                        )}
+                      </div>
+                    </div>
+                  </div>
+                );
+              })}
+            </div>
+          )}
+        </div>,
+      );
+      return;
+    }
+  });
+  return <>{rows}</>;
+}
+
+export type BuildChatActivityState =
+  | "Idle"
+  | "Thinking"
+  | "Responding"
+  | "Stopped";
+
+export function deriveBuildChatActivityState(
+  args: {
+    runStatus: "idle" | "running" | "completed" | "error" | "canceled";
+    chatSending: boolean;
+    display: BuildDisplayMessage[];
+    streamingAssistant: { runId: string; turn: number; text: string } | null;
+    runId: string;
+  },
+): BuildChatActivityState {
+  const { runStatus, chatSending, display, streamingAssistant, runId } = args;
+  const isActive = chatSending || runStatus === "running";
+  const hasStreamingText = Boolean(
+    streamingAssistant &&
+      streamingAssistant.runId === runId &&
+      streamingAssistant.text.trim().length > 0,
+  );
+  const hasAssistantTranscriptText = display.some((entry) =>
+    entry.kind === "message" &&
+    (entry.role ?? "assistant") === "assistant" &&
+    typeof entry.content === "string" &&
+    entry.content.trim().length > 0
+  );
+  const hasVisibleAssistantText = hasStreamingText ||
+    hasAssistantTranscriptText;
+  if (isActive) {
+    return hasVisibleAssistantText ? "Responding" : "Thinking";
+  }
+  if (
+    runStatus === "completed" || runStatus === "error" ||
+    runStatus === "canceled"
+  ) {
+    return "Stopped";
+  }
+  return "Idle";
+}
+
+export function formatElapsedDuration(totalSeconds: number): string {
+  const clamped = Math.max(0, Math.floor(totalSeconds));
+  const minutes = Math.floor(clamped / 60);
+  const seconds = clamped % 60;
+  return `${String(minutes).padStart(2, "0")}:${
+    String(seconds).padStart(2, "0")
+  }`;
+}
+
+export type BuildChatViewState = ReturnType<typeof useBuildChat>;
+
+function BuildChatActivityIndicator(
+  props: { state: BuildChatActivityState },
+) {
+  const { state } = props;
+  const active = state === "Thinking" || state === "Responding";
+  const [startedAtMs, setStartedAtMs] = useState<number | null>(null);
+  const [tick, setTick] = useState(0);
+
+  useEffect(() => {
+    if (!active) {
+      setStartedAtMs(null);
+      setTick(0);
+      return;
+    }
+    setStartedAtMs((prev) => prev ?? Date.now());
+    const handle = globalThis.setInterval(() => {
+      setTick((prev) => prev + 1);
+    }, 1000);
+    return () => globalThis.clearInterval(handle);
+  }, [active]);
+
+  if (!active) return null;
+  const elapsedSeconds = startedAtMs === null
+    ? 0
+    : Math.floor((Date.now() - startedAtMs) / 1000);
+  const statusLabel = state === "Thinking"
+    ? "Assistant is thinking"
+    : "Assistant is responding";
+
+  return (
+    <div
+      className={classNames(
+        "build-chat-activity-indicator",
+        state === "Thinking"
+          ? "build-chat-activity-indicator-thinking"
+          : "build-chat-activity-indicator-responding",
+      )}
+      role="status"
+      aria-live="polite"
+      data-testid="build-chat-activity-indicator"
+      data-activity-state={state}
+      data-activity-tick={tick}
+    >
+      <span className="build-chat-activity-glimmer" aria-hidden="true" />
+      <span className="build-chat-activity-spinner" aria-hidden="true" />
+      <span className="build-chat-activity-label">{statusLabel}</span>
+      <span
+        className="build-chat-activity-timer"
+        data-testid="build-chat-activity-timer"
+      >
+        {formatElapsedDuration(elapsedSeconds)}
+      </span>
+    </div>
+  );
+}
+
+export function ChatView(props: { state: BuildChatViewState }) {
   const {
     run,
-    toolCalls,
     chatDraft,
     setChatDraft,
     chatSending,
     chatError,
     setChatError,
-    toolCallsOpen,
-    setToolCallsOpen,
     optimisticUser,
     setOptimisticUser,
     streamingAssistant,
+    stopChat,
     sendMessage,
-  } = useBuildChat();
+  } = props.state;
   const transcriptRef = useRef<HTMLDivElement | null>(null);
+  const composerInputRef = useRef<HTMLTextAreaElement | null>(null);
+  const display = run.displayMessages ?? [];
+  const activityState = deriveBuildChatActivityState({
+    runStatus: run.status,
+    chatSending,
+    display,
+    streamingAssistant,
+    runId: run.id,
+  });
 
   useEffect(() => {
     const el = transcriptRef.current;
@@ -29,26 +559,28 @@ export default function Chat() {
       el.scrollTop = el.scrollHeight;
     });
     return () => cancelAnimationFrame(frame);
-  }, [run.messages.length, streamingAssistant?.text, optimisticUser?.id]);
-
-  const toolBuckets = useMemo(() => {
-    const inserts = run.toolInserts ?? [];
-    const byCall = new Map(toolCalls.map((call) => [call.id, call]));
-    const buckets = new Map<number, ToolCallSummary[]>();
-    for (const insert of inserts) {
-      if (!insert.actionCallId) continue;
-      const call = byCall.get(insert.actionCallId);
-      if (!call) continue;
-      const idx = typeof insert.index === "number" ? insert.index : 0;
-      const bucket = buckets.get(idx) ?? [];
-      bucket.push(call);
-      buckets.set(idx, bucket);
+  }, [
+    run.displayMessages?.length ?? run.messages.length,
+    optimisticUser?.id,
+    optimisticUser?.text,
+    streamingAssistant?.runId,
+    streamingAssistant?.turn,
+    streamingAssistant?.text,
+  ]);
+
+  useEffect(() => {
+    if (run.status === "error" && run.error) {
+      console.error("[build-bot] run error (state)", run.error);
     }
-    return buckets;
-  }, [run.toolInserts, toolCalls]);
+  }, [run.status, run.error]);
+
+  useEffect(() => {
+    if (chatSending || run.status === "running") return;
+    composerInputRef.current?.focus();
+  }, [chatSending, run.status]);
 
   const canStartAssistant = run.status !== "running" && !chatSending &&
-    run.messages.length === 0 && !streamingAssistant?.text;
+    run.messages.length === 0;
 
   const handleSendChat = useCallback(async () => {
     const message = chatDraft.trim();
@@ -72,38 +604,13 @@ export default function Chat() {
     await sendMessage("");
   }, [chatDraft, handleSendChat, sendMessage]);
 
-  const renderToolBucket = useCallback(
-    (index: number, rows: React.ReactNode[]) => {
-      const bucket = toolBuckets.get(index);
-      if (!bucket || bucket.length === 0) return;
-      const isOpen = Boolean(toolCallsOpen[index]);
-      rows.push(
-        <div key={`tool-bucket-${index}`} className="tool-calls-collapsible">
-          <button
-            type="button"
-            className="tool-calls-toggle"
-            onClick={() =>
-              setToolCallsOpen((prev) => ({ ...prev, [index]: !prev[index] }))}
-          >
-            <span className="tool-calls-toggle-label">
-              Tool calls ({bucket.length}) · {isOpen ? "Hide" : "Show"}
-            </span>
-          </button>
-          {isOpen && (
-            <div className="tool-calls-list">
-              {bucket.map((call, callIdx) => (
-                <ToolCallBubble
-                  key={`tool-${call.id}-${index}-${callIdx}`}
-                  call={call}
-                />
-              ))}
-            </div>
-          )}
-        </div>,
-      );
-    },
-    [toolBuckets, toolCallsOpen, setToolCallsOpen],
-  );
+  const handleStopChat = useCallback(async () => {
+    try {
+      await stopChat();
+    } catch (err) {
+      setChatError(err instanceof Error ? err.message : String(err));
+    }
+  }, [setChatError, stopChat]);
 
   return (
     <div className="test-bot-sidebar flex-column gap-8 flex-1 build-chat-panel">
@@ -113,51 +620,41 @@ export default function Chat() {
             Use this chat to update deck files via Gambit Bot. Tool calls show
             file writes and why they happened.
           </div>
-          {run.messages.length === 0 && (
-            <div className="placeholder">No messages yet.</div>
-          )}
-          {(() => {
-            const rows: React.ReactNode[] = [];
-            renderToolBucket(0, rows);
-            run.messages.forEach((m, idx) => {
-              rows.push(
-                <div
-                  key={`${m.role}-${idx}`}
-                  className={classNames(
-                    "imessage-row",
-                    m.role === "user" ? "right" : "left",
-                  )}
-                >
-                  <div
-                    className={classNames(
-                      "imessage-bubble",
-                      m.role === "user" ? "right" : "left",
-                    )}
-                    title={m.role}
-                  >
-                    {m.content}
-                  </div>
-                </div>,
-              );
-              renderToolBucket(idx + 1, rows);
-            });
-            return rows;
-          })()}
+          {(run.displayMessages?.length ?? 0) === 0 &&
+            !optimisticUser &&
+            !(streamingAssistant?.runId === run.id &&
+              streamingAssistant.text.length > 0) &&
+            <div className="placeholder">No messages yet.</div>}
+          <BuildChatRows display={display} />
           {optimisticUser && (
-            <div className="imessage-row right">
+            <div
+              key={`optimistic-${optimisticUser.id}`}
+              className="imessage-row right"
+            >
               <div className="imessage-bubble right" title="user">
-                {optimisticUser.text}
+                <div
+                  className="bubble-text"
+                  dangerouslySetInnerHTML={{
+                    __html: renderMarkdown(optimisticUser.text),
+                  }}
+                />
               </div>
             </div>
           )}
-          {streamingAssistant?.text &&
-            streamingAssistant.runId === run.id && (
-            <div className="imessage-row left">
-              <div
-                className="imessage-bubble left imessage-bubble-muted"
-                title="assistant"
-              >
-                {streamingAssistant.text}
+          {streamingAssistant &&
+            streamingAssistant.runId === run.id &&
+            streamingAssistant.text.length > 0 && (
+            <div
+              key={`stream-${streamingAssistant.runId}-${streamingAssistant.turn}`}
+              className="imessage-row left"
+            >
+              <div className="imessage-bubble left" title="assistant">
+                <div
+                  className="bubble-text"
+                  dangerouslySetInnerHTML={{
+                    __html: renderMarkdown(streamingAssistant.text),
+                  }}
+                />
               </div>
             </div>
           )}
@@ -165,6 +662,9 @@ export default function Chat() {
       </div>
       <div className="composer">
         <div className="composer-inputs">
+          <div className="build-chat-activity-sticky">
+            <BuildChatActivityIndicator state={activityState} />
+          </div>
           {canStartAssistant && (
             <div className="placeholder emphasis">
               Start the assistant to begin editing.
@@ -172,6 +672,7 @@ export default function Chat() {
           )}
           <div className="flex-row gap-4 mb-2">
             <textarea
+              ref={composerInputRef}
               className="message-input flex-1"
               rows={1}
               placeholder={canStartAssistant
@@ -191,7 +692,18 @@ export default function Chat() {
               }}
             />
             <div className="composer-actions">
-              {canStartAssistant
+              {run.status === "running"
+                ? (
+                  <Button
+                    variant="ghost"
+                    onClick={handleStopChat}
+                    disabled={chatSending}
+                    data-testid="build-stop"
+                  >
+                    Stop
+                  </Button>
+                )
+                : canStartAssistant
                 ? (
                   <Button
                     variant="primary"
@@ -206,8 +718,7 @@ export default function Chat() {
                   <Button
                     variant="primary"
                     onClick={handleSendChat}
-                    disabled={chatSending || run.status === "running" ||
-                      chatDraft.trim().length === 0}
+                    disabled={chatSending || chatDraft.trim().length === 0}
                     data-testid="build-send"
                   >
                     Send
@@ -224,3 +735,7 @@ export default function Chat() {
     </div>
   );
 }
+
+export default function Chat() {
+  return <ChatView state={useBuildChat()} />;
+}
diff --git a/simulator-ui/src/DocsPage.tsx b/simulator-ui/src/DocsPage.tsx
index ff60d2266..505d3526e 100644
--- a/simulator-ui/src/DocsPage.tsx
+++ b/simulator-ui/src/DocsPage.tsx
@@ -1,13 +1,16 @@
 import Button from "./gds/Button.tsx";
 import { GambitLogo } from "./GambitLogo.tsx";
+import {
+  DEFAULT_GRADE_PATH,
+  DEFAULT_TEST_PATH,
+  DEFAULT_WORKSPACE_DEBUG_PATH,
+} from "./utils.ts";
 
 const GAMBIT_PACKAGE_README =
   "https://github.com/bolt-foundry/gambit/blob/main/README.md";
 const GAMBIT_CLI_DOC =
   "https://github.com/bolt-foundry/gambit/blob/main/docs/cli.md";
-const DEFAULT_TEST_PATH = "/sessions/new/test";
-const DEFAULT_DEBUG_PATH = "/sessions/new/debug";
-const DEFAULT_GRADE_PATH = "/grade";
+const DEFAULT_DEBUG_PATH = DEFAULT_WORKSPACE_DEBUG_PATH;
 
 export default function DocsPage() {
   return (
@@ -50,7 +53,7 @@ export default function DocsPage() {
             <li>
               Some examples have a Test input, these are usually optional.
             </li>
-            <li>Click "Run test bot" to start the conversation.</li>
+            <li>Click "Run scenario" to start the conversation.</li>
             <li>Review the agent's response.</li>
           </ul>
           <h3>If something looks wrong</h3>
@@ -60,7 +63,7 @@ export default function DocsPage() {
           </ul>
           <h3>Grade the agent</h3>
           <ul>
-            <li>Click the "Grade" tab to see all test bot runs.</li>
+            <li>Click the "Grade" tab to see all scenario runs.</li>
             <li>Run graders to measure quality and identify issues.</li>
             <li>Flag grader results you want to keep track of.</li>
           </ul>
diff --git a/simulator-ui/src/GradePage.tsx b/simulator-ui/src/GradePage.tsx
index 635994f62..d167006e1 100644
--- a/simulator-ui/src/GradePage.tsx
+++ b/simulator-ui/src/GradePage.tsx
@@ -21,16 +21,13 @@ import {
   extractTurnContext,
   formatTimestampShort,
   getDurableStreamOffset,
-  getGradeRefFromLocation,
-  getGradeSessionIdFromLocation,
+  getGradeWorkspaceIdFromLocation,
   getScoreClass,
   GRADE_STREAM_ID,
   isTurnsResult,
-  parseGradingRef,
   setDurableStreamOffset,
 } from "./utils.ts";
 import type {
-  CalibrateRef,
   CalibrateResponse,
   CalibrateSession,
   CalibrateStreamMessage,
@@ -42,19 +39,21 @@ import type {
 import PageGrid from "./gds/PageGrid.tsx";
 import PageShell from "./gds/PageShell.tsx";
 import Panel from "./gds/Panel.tsx";
+import { useWorkspaceGrade, useWorkspaceRouting } from "./WorkspaceContext.tsx";
 
 function GradePage(
   {
     setNavActions,
     onAppPathChange,
-    activeSessionId,
+    activeWorkspaceId,
     onFlagsUpdate,
     onOptimisticToggleFlag,
     onOptimisticFlagReason,
+    requestedGradeRunId,
   }: {
     setNavActions?: (actions: React.ReactNode | null) => void;
     onAppPathChange?: (path: string) => void;
-    activeSessionId?: string | null;
+    activeWorkspaceId?: string | null;
     onFlagsUpdate?: (flags: GradingFlag[]) => void;
     onOptimisticToggleFlag?: (item: {
       refId: string;
@@ -62,189 +61,97 @@ function GradePage(
       turnIndex?: number;
     }) => void;
     onOptimisticFlagReason?: (refId: string, reason: string) => void;
+    requestedGradeRunId?: string | null;
   },
 ) {
-  const [loading, setLoading] = useState(true);
-  const [error, setError] = useState<string | null>(null);
-  const [graders, setGraders] = useState<GraderDeckMeta[]>([]);
-  const [sessions, setSessions] = useState<CalibrateSession[]>([]);
-  const [selectedSessionId, setSelectedSessionId] = useState<string | null>(
-    null,
-  );
-  const [selectedGraderId, setSelectedGraderId] = useState<string | null>(null);
-  const [running, setRunning] = useState(false);
-  const [sessionDetail, setSessionDetail] = useState<
-    SessionDetailResponse | null
-  >(null);
+  const workspaceGrade = useWorkspaceGrade();
+  const {
+    loading,
+    error,
+    running,
+    graders,
+    sessions,
+    sessionDetail,
+    loadData,
+    loadSessionDetail,
+    runGrader: runGrade,
+    toggleFlag: toggleGradeFlag,
+    updateFlagReason: updateGradeFlagReason,
+  } = workspaceGrade;
+  const workspaceRouting = useWorkspaceRouting();
   const initialCalibrateSessionRef = useRef<string | null>(
-    getGradeSessionIdFromLocation(),
+    getGradeWorkspaceIdFromLocation(),
   );
-  const initialCalibrateRef = useRef<CalibrateRef>(
-    (() => {
-      const ref = getGradeRefFromLocation();
-      return ref ? parseGradingRef(ref) : {};
-    })(),
+  const [routeGradeRunId, setRouteGradeRunId] = useState<string | null>(
+    requestedGradeRunId ?? null,
+  );
+  const [selectedSessionId, setSelectedSessionId] = useState<string | null>(
+    initialCalibrateSessionRef.current ?? activeWorkspaceId ?? null,
   );
+  const [selectedGraderId, setSelectedGraderId] = useState<string | null>(null);
+  useEffect(() => {
+    setSelectedSessionId((prev) => {
+      if (activeWorkspaceId) return activeWorkspaceId;
+      const requested = initialCalibrateSessionRef.current;
+      if (requested && sessions.some((session) => session.id === requested)) {
+        initialCalibrateSessionRef.current = null;
+        return requested;
+      }
+      if (prev && sessions.some((session) => session.id === prev)) return prev;
+      return sessions[0]?.id ?? null;
+    });
+  }, [activeWorkspaceId, sessions]);
+  useEffect(() => {
+    setSelectedGraderId((prev) => {
+      if (prev && graders.some((grader) => grader.id === prev)) return prev;
+      return graders[0]?.id ?? null;
+    });
+  }, [graders]);
 
   const updateCalibratePath = useCallback((
     sessionId: string | null,
-    opts?: { ref?: string | null },
+    opts?: { gradeRunId?: string | null },
   ) => {
-    const targetPath = sessionId ? buildGradePath(sessionId) : "/grade";
+    const targetPath = sessionId
+      ? buildGradePath(sessionId, opts?.gradeRunId ?? undefined)
+      : "/grade";
     if (window.location.pathname === targetPath) return;
-    const url = new URL(window.location.href);
-    url.pathname = targetPath;
-    if (!sessionId) {
-      url.searchParams.delete("sessionId");
-    }
-    if (opts?.ref) {
-      url.searchParams.set("ref", opts.ref);
-    } else {
-      url.searchParams.delete("ref");
-    }
-    window.history.replaceState({}, "", url.toString());
+    window.history.replaceState({}, "", targetPath);
     onAppPathChange?.(targetPath);
   }, [onAppPathChange]);
 
   const loadCalibrateData = useCallback(async () => {
-    try {
-      setLoading(true);
-      const params = new URLSearchParams();
-      if (activeSessionId) params.set("sessionId", activeSessionId);
-      const query = params.toString() ? `?${params.toString()}` : "";
-      const res = await fetch(`/api/calibrate${query}`);
-      if (!res.ok) throw new Error(res.statusText);
-      const data = await res.json() as CalibrateResponse;
-      const nextGraders = Array.isArray(data.graderDecks)
-        ? data.graderDecks
-        : [];
-      const nextSessions = Array.isArray(data.sessions) ? data.sessions : [];
-      setGraders(nextGraders);
-      setSessions(nextSessions);
-      setSelectedSessionId((prev) => {
-        const requested = initialCalibrateSessionRef.current;
-        if (
-          requested && nextSessions.some((session) => session.id === requested)
-        ) {
-          initialCalibrateSessionRef.current = null;
-          return requested;
-        }
-        if (prev && nextSessions.some((session) => session.id === prev)) {
-          return prev;
-        }
-        return nextSessions[0]?.id ?? null;
-      });
-      setSelectedGraderId((prev) => {
-        if (prev && nextGraders.some((grader) => grader.id === prev)) {
-          return prev;
-        }
-        return nextGraders[0]?.id ?? null;
-      });
-      setError(null);
-    } catch (err) {
-      setError(
-        err instanceof Error ? err.message : "Failed to load calibration data",
-      );
-    } finally {
-      setLoading(false);
-    }
-  }, [activeSessionId]);
+    await loadData({
+      workspaceId: activeWorkspaceId,
+      gradeRunId: requestedGradeRunId ?? null,
+    });
+  }, [activeWorkspaceId, loadData, requestedGradeRunId]);
 
   useEffect(() => {
     loadCalibrateData();
   }, [loadCalibrateData]);
 
   useEffect(() => {
-    if (!activeSessionId) return;
-    if (activeSessionId === selectedSessionId) return;
-    setSelectedSessionId(activeSessionId);
-  }, [activeSessionId, selectedSessionId]);
+    if (!activeWorkspaceId) return;
+    if (activeWorkspaceId === selectedSessionId) return;
+    setSelectedSessionId(activeWorkspaceId);
+  }, [activeWorkspaceId, selectedSessionId]);
 
   useEffect(() => {
-    const streamId = GRADE_STREAM_ID;
-    const streamUrl = buildDurableStreamUrl(
-      streamId,
-      getDurableStreamOffset(streamId),
-    );
-    const source = new EventSource(streamUrl);
-
-    source.onmessage = (event) => {
-      let envelope: { offset?: unknown; data?: unknown } | null = null;
-      try {
-        envelope = JSON.parse(event.data) as {
-          offset?: unknown;
-          data?: unknown;
-        };
-      } catch {
-        return;
-      }
-      if (
-        envelope &&
-        typeof envelope.offset === "number" &&
-        Number.isFinite(envelope.offset)
-      ) {
-        setDurableStreamOffset(streamId, envelope.offset + 1);
-      }
-      const msg = envelope?.data as CalibrateStreamMessage | undefined;
-      if (!msg || msg.type !== "calibrateSession") return;
-      setSessions((prev) => {
-        const next = [...prev];
-        const index = next.findIndex((sess) => sess.id === msg.session.id);
-        if (index >= 0) {
-          next[index] = msg.session;
-          return next;
-        }
-        return [msg.session, ...next];
-      });
-    };
-
-    return () => {
-      source.close();
-    };
-  }, []);
+    if (!selectedSessionId) return;
+    if (routeGradeRunId) return;
+    updateCalibratePath(selectedSessionId);
+  }, [routeGradeRunId, selectedSessionId, updateCalibratePath]);
 
   useEffect(() => {
-    if (selectedSessionId) {
-      updateCalibratePath(selectedSessionId);
-    } else {
-      updateCalibratePath(null);
-    }
-  }, [selectedSessionId, updateCalibratePath]);
+    setRouteGradeRunId(requestedGradeRunId ?? null);
+  }, [requestedGradeRunId]);
 
   useEffect(() => {
-    if (!selectedSessionId) {
-      setSessionDetail(null);
-      return;
-    }
-    let active = true;
-    const loadSessionDetail = async () => {
-      try {
-        setSessionDetail(null);
-        const res = await fetch(
-          `/api/session?sessionId=${encodeURIComponent(selectedSessionId)}`,
-        );
-        if (!res.ok) {
-          const text = await res.text();
-          throw new Error(text || res.statusText);
-        }
-        const data = await res.json() as SessionDetailResponse;
-        if (!active) return;
-        setSessionDetail(data);
-      } catch (err) {
-        if (!active) return;
-        setSessionDetail(null);
-        console.error(
-          err instanceof Error ? err.message : "Failed to load session details",
-        );
-      } finally {
-        if (!active) return;
-      }
-    };
-    loadSessionDetail();
-    return () => {
-      active = false;
-    };
-  }, [onFlagsUpdate, onOptimisticToggleFlag, selectedSessionId]);
+    loadSessionDetail(selectedSessionId).catch((err) => {
+      console.error(err);
+    });
+  }, [loadSessionDetail, selectedSessionId]);
 
   const selectedSession = useMemo(
     () => sessions.find((session) => session.id === selectedSessionId) ?? null,
@@ -382,49 +289,30 @@ function GradePage(
     return new Set(gradingFlags.map((flag) => flag.refId));
   }, [gradingFlags]);
   const [expandedRunId, setExpandedRunId] = useState<string | null>(null);
-  const prevRunIdsRef = useRef<string[]>([]);
   const [expandedResults, setExpandedResults] = useState<
     Record<string, boolean>
   >({});
-  const [highlightedResult, setHighlightedResult] = useState<string | null>(
-    null,
-  );
   const [flagReasonDrafts, setFlagReasonDrafts] = useState<
     Record<string, string>
   >({});
   const flagReasonTimeoutsRef = useRef<Record<string, number>>({});
   useEffect(() => {
-    const ref = initialCalibrateRef.current;
-    if (!ref.runId) return;
-    const match = runItems.find((item) =>
-      item.runId === ref.runId &&
-      (ref.turnIndex === undefined || item.turnIndex === ref.turnIndex)
-    );
-    if (!match) return;
-    setExpandedResults((prev) => ({ ...prev, [match.key]: true }));
-    setHighlightedResult(match.key);
-    setExpandedRunId(ref.runId);
-  }, [runItems]);
-  useEffect(() => {
-    const latestRunId = runSections[0]?.run.id ?? null;
-    const nextRunIds = runSections.map((section) => section.run.id);
-    const prevRunIds = prevRunIdsRef.current;
-    const hasNewLatest = latestRunId
-      ? !prevRunIds.includes(latestRunId)
-      : false;
-
-    if (!latestRunId) {
+    if (!routeGradeRunId) {
       setExpandedRunId(null);
-    } else if (
-      hasNewLatest ||
-      (expandedRunId && !nextRunIds.includes(expandedRunId)) ||
-      (!expandedRunId && prevRunIds.length === 0)
-    ) {
-      setExpandedRunId(latestRunId);
+      workspaceRouting.setGradeRunId(null);
+      return;
     }
-
-    prevRunIdsRef.current = nextRunIds;
-  }, [expandedRunId, runSections]);
+    setExpandedRunId(routeGradeRunId);
+    workspaceRouting.setGradeRunId(routeGradeRunId);
+  }, [routeGradeRunId, workspaceRouting]);
+  const routeRunNotFound = useMemo(
+    () =>
+      Boolean(
+        routeGradeRunId &&
+          !runSections.some((section) => section.run.id === routeGradeRunId),
+      ),
+    [routeGradeRunId, runSections],
+  );
 
   useEffect(() => {
     return () => {
@@ -442,34 +330,15 @@ function GradePage(
     if (!selectedSessionId) return;
     onOptimisticToggleFlag?.(item);
     try {
-      const res = await fetch("/api/calibrate/flag", {
-        method: "POST",
-        headers: { "content-type": "application/json" },
-        body: JSON.stringify({
-          sessionId: selectedSessionId,
-          refId: item.refId,
-          runId: item.runId,
-          turnIndex: item.turnIndex,
-        }),
-      });
-      if (!res.ok) {
-        const text = await res.text();
-        throw new Error(text || res.statusText);
-      }
-      const data = await res.json() as {
+      const data = await toggleGradeFlag({
+        workspaceId: selectedSessionId,
+        refId: item.refId,
+        runId: item.runId,
+        turnIndex: item.turnIndex,
+      }) as {
         flags?: GradingFlag[];
       };
       if (!data.flags) return;
-      setSessionDetail((prev) => {
-        if (!prev) return prev;
-        return {
-          ...prev,
-          meta: {
-            ...(prev.meta ?? {}),
-            gradingFlags: data.flags,
-          },
-        };
-      });
       onFlagsUpdate?.(data.flags);
       setFlagReasonDrafts((prev) => {
         const next = { ...prev };
@@ -494,46 +363,37 @@ function GradePage(
         return next;
       });
     } catch (err) {
-      setError(err instanceof Error ? err.message : "Failed to flag grader");
+      console.error(err);
     }
-  }, [selectedSessionId]);
+  }, [
+    onFlagsUpdate,
+    onOptimisticToggleFlag,
+    selectedSessionId,
+    toggleGradeFlag,
+  ]);
 
   const updateFlagReason = useCallback(
     async (refId: string, reason: string) => {
       if (!selectedSessionId) return;
       onOptimisticFlagReason?.(refId, reason);
       try {
-        const res = await fetch("/api/calibrate/flag/reason", {
-          method: "POST",
-          headers: { "content-type": "application/json" },
-          body: JSON.stringify({
-            sessionId: selectedSessionId,
-            refId,
-            reason,
-          }),
-        });
-        if (!res.ok) {
-          const text = await res.text();
-          throw new Error(text || res.statusText);
-        }
-        const data = await res.json() as { flags?: GradingFlag[] };
+        const data = await updateGradeFlagReason({
+          workspaceId: selectedSessionId,
+          refId,
+          reason,
+        }) as { flags?: GradingFlag[] };
         if (!data.flags) return;
-        setSessionDetail((prev) => {
-          if (!prev) return prev;
-          return {
-            ...prev,
-            meta: {
-              ...(prev.meta ?? {}),
-              gradingFlags: data.flags,
-            },
-          };
-        });
         onFlagsUpdate?.(data.flags);
       } catch (err) {
-        setError(err instanceof Error ? err.message : "Failed to save reason");
+        console.error(err);
       }
     },
-    [onFlagsUpdate, onOptimisticFlagReason, selectedSessionId],
+    [
+      onFlagsUpdate,
+      onOptimisticFlagReason,
+      selectedSessionId,
+      updateGradeFlagReason,
+    ],
   );
 
   const scheduleFlagReasonSave = useCallback((
@@ -553,40 +413,30 @@ function GradePage(
   const runGrader = useCallback(async () => {
     if (!selectedSessionId || !selectedGraderId) return;
     try {
-      setRunning(true);
-      const res = await fetch("/api/calibrate/run", {
-        method: "POST",
-        headers: { "content-type": "application/json" },
-        body: JSON.stringify({
-          sessionId: selectedSessionId,
-          graderId: selectedGraderId,
-        }),
+      const data = await runGrade({
+        workspaceId: selectedSessionId,
+        graderId: selectedGraderId,
       });
-      if (!res.ok) {
-        const text = await res.text();
-        throw new Error(text || res.statusText);
-      }
-      const data = await res.json() as {
-        session?: CalibrateSession;
-      };
-      if (data.session) {
-        setSessions((prev) => {
-          const index = prev.findIndex((sess) => sess.id === data.session!.id);
-          if (index >= 0) {
-            const next = [...prev];
-            next[index] = data.session!;
-            return next;
-          }
-          return [data.session!, ...prev];
-        });
+      const runs = Array.isArray(data.session?.gradingRuns)
+        ? data.session!.gradingRuns
+        : [];
+      const latestRun = runs.length > 0 ? runs[runs.length - 1] : null;
+      if (latestRun?.id) {
+        setExpandedRunId(latestRun.id);
+        setRouteGradeRunId(latestRun.id);
+        workspaceRouting.setGradeRunId(latestRun.id);
+        updateCalibratePath(selectedSessionId, { gradeRunId: latestRun.id });
       }
-      setError(null);
     } catch (err) {
-      setError(err instanceof Error ? err.message : "Failed to run grader");
-    } finally {
-      setRunning(false);
+      console.error(err);
     }
-  }, [selectedSessionId, selectedGraderId]);
+  }, [
+    runGrade,
+    selectedGraderId,
+    selectedSessionId,
+    updateCalibratePath,
+    workspaceRouting,
+  ]);
 
   const canRun = Boolean(selectedSessionId && selectedGraderId && !running);
 
@@ -678,6 +528,14 @@ function GradePage(
                   No grader runs for this session yet.
                 </div>
               )}
+              {routeRunNotFound && selectedSessionId && (
+                <div className="placeholder">
+                  Grade run not found for this workspace.{" "}
+                  <a href={buildGradePath(selectedSessionId)}>
+                    Back to grade runs
+                  </a>
+                </div>
+              )}
               {runSections.map((section) => {
                 const isExpanded = expandedRunId === section.run.id;
                 const runModeTurns = isTurnsResult(section.run.result);
@@ -811,15 +669,31 @@ function GradePage(
                       aria-expanded={isExpanded}
                       aria-controls={`calibrate-run-body-${section.run.id}`}
                       onClick={() =>
-                        setExpandedRunId((prev) =>
-                          prev === section.run.id ? null : section.run.id
-                        )}
+                        setExpandedRunId((prev) => {
+                          const next = prev === section.run.id
+                            ? null
+                            : section.run.id;
+                          updateCalibratePath(selectedSessionId, {
+                            gradeRunId: next,
+                          });
+                          setRouteGradeRunId(next);
+                          workspaceRouting.setGradeRunId(next);
+                          return next;
+                        })}
                       onKeyDown={(event) => {
                         if (event.key === "Enter" || event.key === " ") {
                           event.preventDefault();
-                          setExpandedRunId((prev) =>
-                            prev === section.run.id ? null : section.run.id
-                          );
+                          setExpandedRunId((prev) => {
+                            const next = prev === section.run.id
+                              ? null
+                              : section.run.id;
+                            updateCalibratePath(selectedSessionId, {
+                              gradeRunId: next,
+                            });
+                            setRouteGradeRunId(next);
+                            workspaceRouting.setGradeRunId(next);
+                            return next;
+                          });
                         }
                       }}
                     >
@@ -877,11 +751,7 @@ function GradePage(
                           return (
                             <div
                               key={item.key}
-                              className={`calibrate-run-section${
-                                highlightedResult === item.key
-                                  ? " trace-row-highlight"
-                                  : ""
-                              }`}
+                              className="calibrate-run-section"
                             >
                               <div className="calibrate-result-header">
                                 <div className="calibrate-result-main">
diff --git a/simulator-ui/src/SessionsDrawer.tsx b/simulator-ui/src/SessionsDrawer.tsx
index c47af9f80..0c8bf39b0 100644
--- a/simulator-ui/src/SessionsDrawer.tsx
+++ b/simulator-ui/src/SessionsDrawer.tsx
@@ -15,7 +15,7 @@ export default function SessionsDrawer(props: {
   onDelete: (sessionId: string) => void;
   onDeleteAll: () => void;
   onClose: () => void;
-  activeSessionId?: string | null;
+  activeWorkspaceId?: string | null;
   bundleStamp: string | null;
 }) {
   const {
@@ -28,7 +28,7 @@ export default function SessionsDrawer(props: {
     onDelete,
     onDeleteAll,
     onClose,
-    activeSessionId,
+    activeWorkspaceId,
     bundleStamp,
   } = props;
   useEffect(() => {
@@ -75,7 +75,7 @@ export default function SessionsDrawer(props: {
             {error && <p className="error">{error}</p>}
             <ul className="sessions-list">
               {sessions.map((session) => {
-                const isActive = activeSessionId === session.id;
+                const isActive = activeWorkspaceId === session.id;
                 return (
                   <li key={session.id}>
                     <button
diff --git a/simulator-ui/src/TestBotChatPanel.tsx b/simulator-ui/src/TestBotChatPanel.tsx
new file mode 100644
index 000000000..61d96901a
--- /dev/null
+++ b/simulator-ui/src/TestBotChatPanel.tsx
@@ -0,0 +1,601 @@
+import React, { useEffect, useMemo, useRef, useState } from "react";
+import {
+  buildTestPath,
+  countUserMessages,
+  deriveReasoningByAssistant,
+  formatJson,
+  summarizeToolCalls,
+  type TestBotRun,
+  type TraceEvent,
+} from "./utils.ts";
+import type { FeedbackEntry } from "./utils.ts";
+import {
+  FeedbackControls,
+  ReasoningBubble,
+  ToolCallBubble,
+} from "./shared.tsx";
+import Panel from "./gds/Panel.tsx";
+import Button from "./gds/Button.tsx";
+import Badge from "./gds/Badge.tsx";
+
+type Props = {
+  run: TestBotRun;
+  runWorkspaceId?: string;
+  runStatusLabel: string;
+  activeWorkspaceId: string | null;
+  requestedRunNotFound: boolean;
+  canStart: boolean;
+  canRunPersona: boolean;
+  hasPersonaSelection: boolean;
+  botJsonErrorCount: number;
+  deckJsonErrorCount: number;
+  missingBotInput: string[];
+  missingDeckInit: string[];
+  lastInitFill: TestBotRun["initFill"] | null;
+  isUserStart: boolean;
+  showStartOverlay: boolean;
+  canStartAssistant: boolean;
+  canSendChat: boolean;
+  chatDraft: string;
+  setChatDraft: React.Dispatch<React.SetStateAction<string>>;
+  chatError: string | null;
+  optimisticUser: { id: string; text: string } | null;
+  streamingUser: {
+    runId: string;
+    turn: number;
+    text: string;
+    expectedUserCount?: number;
+  } | null;
+  streamingAssistant: { runId: string; turn: number; text: string } | null;
+  startRun: () => Promise<void>;
+  stopRun: () => Promise<void>;
+  handleNewChat: () => Promise<void>;
+  handleSendChat: () => Promise<void>;
+  handleStartAssistant: () => Promise<void>;
+  onScore: (
+    messageRefId: string,
+    score: number | null,
+  ) => void | Promise<void>;
+  onReasonChange: (
+    messageRefId: string,
+    score: number,
+    reason: string,
+  ) => void | Promise<void>;
+};
+
+export default function TestBotChatPanel(props: Props) {
+  const {
+    run,
+    runWorkspaceId,
+    runStatusLabel,
+    activeWorkspaceId,
+    requestedRunNotFound,
+    canStart,
+    canRunPersona,
+    hasPersonaSelection,
+    botJsonErrorCount,
+    deckJsonErrorCount,
+    missingBotInput,
+    missingDeckInit,
+    lastInitFill,
+    isUserStart,
+    showStartOverlay,
+    canStartAssistant,
+    canSendChat,
+    chatDraft,
+    setChatDraft,
+    chatError,
+    optimisticUser,
+    streamingUser,
+    streamingAssistant,
+    startRun,
+    stopRun,
+    handleNewChat,
+    handleSendChat,
+    handleStartAssistant,
+    onScore,
+    onReasonChange,
+  } = props;
+  const [toolCallsOpen, setToolCallsOpen] = useState<Record<number, boolean>>(
+    {},
+  );
+  const transcriptRef = useRef<HTMLDivElement | null>(null);
+  const lastRunMessageCountRef = useRef(0);
+
+  useEffect(() => {
+    lastRunMessageCountRef.current = 0;
+    setToolCallsOpen({});
+  }, [run.id]);
+
+  const toolCallSummaries = useMemo(
+    () => summarizeToolCalls(run.traces ?? []),
+    [run.traces],
+  );
+  const reasoningByAssistant = useMemo(
+    () => deriveReasoningByAssistant(run.traces),
+    [run.traces],
+  );
+
+  const toolBuckets = useMemo(() => {
+    const deriveInsertsFromTraces = (
+      traces: TraceEvent[],
+      messageCount: number,
+    ) => {
+      const inserts: Array<{
+        runId?: string;
+        actionCallId?: string;
+        parentActionCallId?: string;
+        name?: string;
+        index: number;
+      }> = [];
+      let messageIndex = 0;
+      for (const trace of traces) {
+        if (!trace || typeof trace !== "object") continue;
+        const traceRecord = trace as Record<string, unknown>;
+        const type = typeof traceRecord.type === "string"
+          ? traceRecord.type
+          : "";
+        if (type === "message.user") {
+          messageIndex++;
+          continue;
+        }
+        if (type === "model.result") {
+          const finishReason = typeof traceRecord.finishReason === "string"
+            ? traceRecord.finishReason
+            : "";
+          if (finishReason !== "tool_calls") {
+            messageIndex++;
+          }
+          continue;
+        }
+        if (type === "tool.call") {
+          const runId = typeof traceRecord.runId === "string"
+            ? traceRecord.runId
+            : undefined;
+          const actionCallId = typeof traceRecord.actionCallId === "string"
+            ? traceRecord.actionCallId
+            : undefined;
+          const parentActionCallId =
+            typeof traceRecord.parentActionCallId === "string"
+              ? traceRecord.parentActionCallId
+              : undefined;
+          const name = typeof traceRecord.name === "string"
+            ? traceRecord.name
+            : undefined;
+          inserts.push({
+            runId,
+            actionCallId,
+            parentActionCallId,
+            name,
+            index: Math.min(messageIndex, messageCount),
+          });
+        }
+      }
+      return inserts;
+    };
+    const map = new Map<number, ReturnType<typeof summarizeToolCalls>>();
+    if (!toolCallSummaries.length) return map;
+    const traceInserts = Array.isArray(run.traces) && run.traces.length > 0
+      ? deriveInsertsFromTraces(run.traces, run.messages.length)
+      : [];
+    const insertMap = new Map<
+      string,
+      { index: number; name?: string; parentActionCallId?: string }
+    >();
+    const callKey = (runId: string | undefined, actionCallId: string) =>
+      `${runId ?? ""}:${actionCallId}`;
+    const inserts = traceInserts.length > 0 ? traceInserts : run.toolInserts ??
+      [];
+    inserts.forEach((insert) => {
+      if (
+        typeof insert?.index === "number" &&
+        insert.index >= 0 &&
+        insert.actionCallId
+      ) {
+        const insertRunId = typeof (insert as { runId?: unknown }).runId ===
+            "string"
+          ? (insert as { runId?: string }).runId
+          : undefined;
+        insertMap.set(callKey(insertRunId, insert.actionCallId), {
+          index: insert.index,
+          name: insert.name ?? undefined,
+          parentActionCallId: insert.parentActionCallId ?? undefined,
+        });
+      }
+    });
+    for (const call of toolCallSummaries) {
+      const insert = call.actionCallId
+        ? insertMap.get(callKey(call.runId, call.actionCallId))
+        : undefined;
+      const index = insert?.index ?? run.messages.length;
+      const enriched = insert
+        ? {
+          ...call,
+          name: call.name ?? insert.name,
+          parentActionCallId: call.parentActionCallId ??
+            insert.parentActionCallId,
+        }
+        : call;
+      const bucket = map.get(index);
+      if (bucket) {
+        bucket.push(enriched);
+      } else {
+        map.set(index, [enriched]);
+      }
+    }
+    return map;
+  }, [toolCallSummaries, run.toolInserts, run.traces, run.messages.length]);
+
+  useEffect(() => {
+    const el = transcriptRef.current;
+    if (!el) return;
+    const shouldScroll = run.messages.length > lastRunMessageCountRef.current ||
+      Boolean(streamingUser?.text || streamingAssistant?.text);
+    lastRunMessageCountRef.current = run.messages.length;
+    if (!shouldScroll) return;
+    const frame = requestAnimationFrame(() => {
+      el.scrollTop = el.scrollHeight;
+    });
+    return () => cancelAnimationFrame(frame);
+  }, [
+    run.id,
+    run.messages.length,
+    streamingUser,
+    streamingAssistant?.text,
+  ]);
+
+  return (
+    <Panel className="flex-column gap-8">
+      <div className="flex-row gap-8 items-center">
+        <div className="flex-column flex-1 gap-4">
+          <div className="flex-row items-center gap-8">
+            <strong>Test run</strong>
+            <Badge variant={run.status} data-testid="testbot-status">
+              {runStatusLabel}
+            </Badge>
+          </div>
+        </div>
+        <div className="flex-row row-reverse gap-8 wrap">
+          <Button
+            variant="ghost"
+            onClick={stopRun}
+            disabled={run.status !== "running"}
+            data-testid="testbot-stop"
+          >
+            Stop
+          </Button>
+          <Button variant="secondary" onClick={handleNewChat}>
+            New chat
+          </Button>
+        </div>
+      </div>
+      {requestedRunNotFound && activeWorkspaceId && (
+        <div className="placeholder">
+          Test run not found for this workspace.{" "}
+          <a href={buildTestPath(activeWorkspaceId)}>Back to test runs</a>
+        </div>
+      )}
+      {run.error && <div className="error">{run.error}</div>}
+      {(run.initFill ?? lastInitFill) && (
+        <div className="patch-card">
+          <div className="patch-summary">Init fill</div>
+          {(run.initFill ?? lastInitFill)?.error && (
+            <div className="error">
+              {(run.initFill ?? lastInitFill)?.error}
+            </div>
+          )}
+          <div className="patch-meta">
+            Requested: {(run.initFill ?? lastInitFill)?.requested?.length
+              ? (run.initFill ?? lastInitFill)!.requested.join(", ")
+              : "none"}
+          </div>
+          {(run.initFill ?? lastInitFill)?.applied !== undefined && (
+            <pre className="trace-json">
+              {formatJson((run.initFill ?? lastInitFill)?.applied)}
+            </pre>
+          )}
+          {(run.initFill ?? lastInitFill)?.applied === undefined && (
+            <div className="patch-meta">No fills applied.</div>
+          )}
+        </div>
+      )}
+      {!canStart && canRunPersona && (
+        <div className="error">
+          {!hasPersonaSelection
+            ? "Select a persona deck to run."
+            : botJsonErrorCount > 0 || deckJsonErrorCount > 0
+            ? "Fix invalid JSON fields to run."
+            : missingBotInput.length > 0
+            ? `Missing required bot inputs: ${
+              missingBotInput.slice(0, 6).join(", ")
+            }${missingBotInput.length > 6 ? "…" : ""}`
+            : missingDeckInit.length > 0
+            ? `Missing required init fields: ${
+              missingDeckInit.slice(0, 6).join(", ")
+            }${missingDeckInit.length > 6 ? "…" : ""}`
+            : ""}
+        </div>
+      )}
+      {canStart && missingDeckInit.length > 0 && (
+        <div className="placeholder">
+          Missing required init fields will be requested from the persona:{" "}
+          {missingDeckInit.slice(0, 6).join(", ")}
+          {missingDeckInit.length > 6 ? "…" : ""}
+        </div>
+      )}
+      <div className="test-bot-thread">
+        <div
+          className="imessage-thread"
+          ref={transcriptRef}
+        >
+          {run.messages.length === 0 && (
+            <div className="placeholder">No messages yet.</div>
+          )}
+          {(() => {
+            const rows: React.ReactNode[] = [];
+            const renderToolBucket = (index: number) => {
+              const bucket = toolBuckets.get(index);
+              if (!bucket || bucket.length === 0) return;
+              const isOpen = Boolean(toolCallsOpen[index]);
+              rows.push(
+                <div
+                  key={`tool-bucket-${index}`}
+                  className="tool-calls-collapsible"
+                >
+                  <button
+                    type="button"
+                    className="tool-calls-toggle"
+                    onClick={() =>
+                      setToolCallsOpen((prev) => ({
+                        ...prev,
+                        [index]: !prev[index],
+                      }))}
+                  >
+                    <span className="tool-calls-toggle-label">
+                      Tool calls ({bucket.length}) · {isOpen ? "Hide" : "Show"}
+                    </span>
+                  </button>
+                  {isOpen && (
+                    <div className="tool-calls-list">
+                      {bucket.map((call, callIdx) => (
+                        <ToolCallBubble
+                          key={`tool-${call.key}-${index}-${callIdx}`}
+                          call={call}
+                        />
+                      ))}
+                    </div>
+                  )}
+                </div>,
+              );
+            };
+            const renderReasoningBucket = (assistantIndex: number) => {
+              const bucket = reasoningByAssistant.get(assistantIndex);
+              if (!bucket || bucket.length === 0) return;
+              bucket.forEach((detail, detailIdx) => {
+                rows.push(
+                  <div
+                    key={`reasoning-${assistantIndex}-${detailIdx}`}
+                    className="imessage-row left"
+                  >
+                    <div
+                      className="imessage-bubble left reasoning-bubble"
+                      title="assistant reasoning"
+                    >
+                      <ReasoningBubble detail={detail} />
+                    </div>
+                  </div>,
+                );
+              });
+            };
+            renderToolBucket(0);
+            let assistantIndex = -1;
+            run.messages.forEach((m, idx) => {
+              const messageKey = m.messageRefId ?? `${m.role}-${idx}`;
+              if (m.role === "assistant") {
+                assistantIndex += 1;
+                renderReasoningBucket(assistantIndex);
+              }
+              rows.push(
+                <div
+                  key={messageKey}
+                  className={`imessage-row ${
+                    m.role === "user" ? "right" : "left"
+                  }`}
+                >
+                  <div
+                    className={`imessage-bubble ${
+                      m.role === "user" ? "right" : "left"
+                    }`}
+                    title={m.role}
+                  >
+                    {(
+                        m.respondPayload !== undefined ||
+                        m.respondMeta !== undefined ||
+                        typeof m.respondStatus === "number" ||
+                        typeof m.respondMessage === "string" ||
+                        typeof m.respondCode === "string"
+                      )
+                      ? (
+                        <div className="respond-summary">
+                          <div className="respond-meta">
+                            <Badge>gambit_respond</Badge>
+                            {typeof m.respondStatus === "number" && (
+                              <Badge variant="ghost">
+                                status {m.respondStatus}
+                              </Badge>
+                            )}
+                            {m.respondCode && (
+                              <Badge variant="ghost">
+                                code {m.respondCode}
+                              </Badge>
+                            )}
+                          </div>
+                          {m.respondMessage && (
+                            <div className="respond-message">
+                              {m.respondMessage}
+                            </div>
+                          )}
+                          {m.respondPayload !== undefined && (
+                            <pre className="bubble-json">
+                              {formatJson(m.respondPayload)}
+                            </pre>
+                          )}
+                          {m.respondMeta && (
+                            <details className="respond-meta-details">
+                              <summary>Meta</summary>
+                              <pre className="bubble-json">
+                                {formatJson(m.respondMeta)}
+                              </pre>
+                            </details>
+                          )}
+                        </div>
+                      )
+                      : m.content}
+                    {m.messageRefId && runWorkspaceId && (
+                      <FeedbackControls
+                        messageRefId={m.messageRefId}
+                        feedback={m.feedback as FeedbackEntry | undefined}
+                        onScore={onScore}
+                        onReasonChange={onReasonChange}
+                      />
+                    )}
+                  </div>
+                </div>,
+              );
+              renderToolBucket(idx + 1);
+            });
+            return rows;
+          })()}
+          {streamingUser?.text && streamingUser.runId === run.id &&
+            (streamingUser.expectedUserCount === undefined ||
+              countUserMessages(run.messages) <
+                streamingUser.expectedUserCount) &&
+            (
+              <div className="imessage-row right">
+                <div
+                  className="imessage-bubble right imessage-bubble-muted"
+                  title="user"
+                >
+                  {streamingUser.text}
+                </div>
+              </div>
+            )}
+          {optimisticUser && (
+            <div className="imessage-row right">
+              <div
+                className="imessage-bubble right"
+                title="user"
+              >
+                {optimisticUser.text}
+              </div>
+            </div>
+          )}
+          {streamingAssistant?.text &&
+            streamingAssistant.runId === run.id &&
+            (
+              <div className="imessage-row left">
+                <div
+                  className="imessage-bubble left imessage-bubble-muted"
+                  title="assistant"
+                >
+                  {streamingAssistant.text}
+                </div>
+              </div>
+            )}
+        </div>
+        <div className="composer">
+          <div className="composer-inputs">
+            {isUserStart && run.messages.length === 0 &&
+              !streamingAssistant?.text && !streamingUser?.text && (
+              <div className="placeholder emphasis">
+                This deck expects a user message to kick things off.
+              </div>
+            )}
+            <div className="flex-row gap-4 mb-2">
+              <textarea
+                className="message-input flex-1"
+                rows={1}
+                placeholder={showStartOverlay
+                  ? "Start the assistant to begin..."
+                  : isUserStart && run.messages.length === 0
+                  ? "Send the first message to begin..."
+                  : "Message the assistant..."}
+                value={chatDraft}
+                onChange={(e) => setChatDraft(e.target.value)}
+                disabled={showStartOverlay}
+                onKeyDown={(e) => {
+                  if (e.key === "Enter" && !e.shiftKey) {
+                    e.preventDefault();
+                    if (canSendChat) {
+                      handleSendChat();
+                    }
+                  }
+                }}
+              />
+              <div className="composer-actions">
+                <Button
+                  variant="primary"
+                  onClick={handleSendChat}
+                  disabled={!canSendChat}
+                  data-testid="testbot-chat-send"
+                >
+                  Send
+                </Button>
+              </div>
+            </div>
+          </div>
+          {chatError && <div className="error">{chatError}</div>}
+        </div>
+        {showStartOverlay && (
+          <div className="test-bot-thread-overlay">
+            <div className="test-bot-thread-card">
+              <strong className="test-bot-thread-title">
+                Choose how to start
+              </strong>
+              <div className="placeholder test-bot-thread-subtitle">
+                Pick the flow you want: manual conversation or a full scenario
+                run.
+              </div>
+              <div className="test-bot-thread-sections">
+                <div className="test-bot-thread-section">
+                  <div className="test-bot-thread-section-title">
+                    Start the assistant
+                  </div>
+                  <div className="test-bot-thread-section-body">
+                    Use this when you want to explore the chat manually.
+                  </div>
+                  <Button
+                    variant="secondary"
+                    onClick={handleStartAssistant}
+                    disabled={!canStartAssistant}
+                    data-testid="testbot-start-assistant"
+                  >
+                    Start assistant
+                  </Button>
+                </div>
+                <div className="test-bot-thread-section">
+                  <div className="test-bot-thread-section-title">
+                    Run scenario
+                  </div>
+                  <div className="test-bot-thread-section-body">
+                    Run the configured scenario to execute end-to-end
+                    validations.
+                  </div>
+                  <Button
+                    variant="primary"
+                    onClick={startRun}
+                    disabled={!canStart}
+                    data-testid="testbot-run-overlay"
+                  >
+                    Run scenario
+                  </Button>
+                </div>
+              </div>
+              {chatError && <div className="error">{chatError}</div>}
+            </div>
+          </div>
+        )}
+      </div>
+    </Panel>
+  );
+}
diff --git a/simulator-ui/src/TestBotPage.tsx b/simulator-ui/src/TestBotPage.tsx
index babd66df3..37a54939d 100644
--- a/simulator-ui/src/TestBotPage.tsx
+++ b/simulator-ui/src/TestBotPage.tsx
@@ -7,69 +7,48 @@ import React, {
 } from "react";
 import {
   botFilename,
-  buildDurableStreamUrl,
   cloneValue,
-  countUserMessages,
-  deckDisplayPath,
-  deckPath,
-  DEFAULT_TEST_PATH,
   deriveInitialFromSchema,
   fileNameFromPath,
   findMissingRequiredFields,
   formatJson,
-  getDurableStreamOffset,
-  normalizedDeckPath,
-  normalizeFsPath,
-  repoRootPath,
-  setDurableStreamOffset,
-  summarizeToolCalls,
-  TEST_STREAM_ID,
-  toRelativePath,
 } from "./utils.ts";
 import type {
-  FeedbackEntry,
   NormalizedSchema,
   TestBotConfigResponse,
   TestBotRun,
-  TestBotSocketMessage,
   TestDeckMeta,
-  TraceEvent,
 } from "./utils.ts";
-import {
-  FeedbackControls,
-  InitForm,
-  ToolCallBubble,
-  useHttpSchema,
-} from "./shared.tsx";
+import { InitForm, useHttpSchema } from "./shared.tsx";
 import PageGrid from "./gds/PageGrid.tsx";
 import PageShell from "./gds/PageShell.tsx";
 import Panel from "./gds/Panel.tsx";
 import Button from "./gds/Button.tsx";
 import Tabs from "./gds/Tabs.tsx";
-import Badge from "./gds/Badge.tsx";
 import List from "./gds/List.tsx";
 import ListItem from "./gds/ListItem.tsx";
 import Listbox from "./gds/Listbox.tsx";
 import ScrollingText from "./gds/ScrollingText.tsx";
+import { useWorkspaceTest } from "./WorkspaceContext.tsx";
+import TestBotChatPanel from "./TestBotChatPanel.tsx";
 
 export default function TestBotPage(props: {
-  onReplaceTestBotSession: (sessionId: string) => void;
+  onReplaceTestBotSession: (workspaceId: string, runId?: string) => void;
   onResetTestBotSession: () => void;
-  activeSessionId: string | null;
+  activeWorkspaceId: string | null;
+  requestedRunId?: string | null;
   resetToken?: number;
   setNavActions?: (actions: React.ReactNode | null) => void;
-  onFeedbackUpdate?: (
-    messageRefId: string,
-    feedback: FeedbackEntry | null,
-  ) => void;
+  onFeedbackPersisted?: (workspaceId: string) => void;
 }) {
   const {
     onReplaceTestBotSession,
     onResetTestBotSession,
-    activeSessionId,
+    activeWorkspaceId,
+    requestedRunId,
     resetToken,
     setNavActions,
-    onFeedbackUpdate,
+    onFeedbackPersisted,
   } = props;
   const deckStorageKey = "gambit:test:selected-deck";
   const [testDecks, setTestDecks] = useState<TestDeckMeta[]>([]);
@@ -88,57 +67,35 @@ export default function TestBotPage(props: {
   >({});
   const [botInputDefaults, setBotInputDefaults] = useState<unknown>(undefined);
   const [initialUserMessage] = useState("");
-  const [run, setRun] = useState<TestBotRun>({
-    status: "idle",
-    messages: [],
-    traces: [],
-    toolInserts: [],
-  });
+  const workspaceTest = useWorkspaceTest();
+  const {
+    run,
+    setRun,
+    streamingUser,
+    streamingAssistant,
+    chatDraft,
+    setChatDraft,
+    chatSending,
+    chatError,
+    optimisticUser,
+  } = workspaceTest;
+  const refreshTestStatusApi = workspaceTest.refreshStatus;
+  const startTestRunApi = workspaceTest.startRun;
+  const startTestAssistantApi = workspaceTest.startAssistant;
+  const sendTestMessageApi = workspaceTest.sendMessage;
+  const stopTestRunApi = workspaceTest.stopRun;
+  const resetTestRunApi = workspaceTest.resetRun;
+  const saveTestFeedbackApi = workspaceTest.saveFeedback;
   const [lastInitFill, setLastInitFill] = useState<
     TestBotRun["initFill"] | null
   >(null);
-  const runRef = useRef<TestBotRun>({
-    status: "idle",
-    messages: [],
-    traces: [],
-    toolInserts: [],
-  });
-  const lastRunMessageCountRef = useRef(0);
-  const [toolCallsOpen, setToolCallsOpen] = useState<
-    Record<number, boolean>
-  >({});
-  const [latencyByTurn, setLatencyByTurn] = useState<
-    Record<number, number>
-  >({});
-  const lastUserEndByTurnRef = useRef<Record<number, number>>({});
-  const firstAssistantTokenByTurnRef = useRef<Record<number, boolean>>({});
+  const [requestedRunNotFound, setRequestedRunNotFound] = useState(false);
+  const runIdRef = useRef<string | undefined>(run.id);
   const [assistantDeckTab, setAssistantDeckTab] = useState<
     "input" | "tools" | "schema"
   >("input");
-
-  useEffect(() => {
-    lastRunMessageCountRef.current = 0;
-    setToolCallsOpen({});
-    setLatencyByTurn({});
-    lastUserEndByTurnRef.current = {};
-    firstAssistantTokenByTurnRef.current = {};
-  }, [run.id]);
-  const [streamingUser, setStreamingUser] = useState<
-    {
-      runId: string;
-      turn: number;
-      text: string;
-      expectedUserCount?: number;
-    } | null
-  >(null);
-  const [streamingAssistant, setStreamingAssistant] = useState<
-    {
-      runId: string;
-      turn: number;
-      text: string;
-    } | null
-  >(null);
-  const deckSchema = useHttpSchema({ sessionId: activeSessionId });
+  const runWorkspaceId = run.workspaceId ?? run.sessionId;
+  const deckSchema = useHttpSchema({ workspaceId: activeWorkspaceId });
   const deckInputSchema = deckSchema.schemaResponse?.schema;
   const deckSchemaDefaults = deckSchema.schemaResponse?.defaults;
   const deckSchemaError = deckSchema.schemaResponse?.error ??
@@ -150,16 +107,8 @@ export default function TestBotPage(props: {
   const [deckJsonErrors, setDeckJsonErrors] = useState<
     Record<string, string | null>
   >({});
-  const [chatDraft, setChatDraft] = useState("");
-  const [chatSending, setChatSending] = useState(false);
-  const [chatError, setChatError] = useState<string | null>(null);
-  const [optimisticUser, setOptimisticUser] = useState<
-    { id: string; text: string } | null
-  >(null);
   const pollRef = useRef<number | null>(null);
-  const transcriptRef = useRef<HTMLDivElement | null>(null);
-  const runIdRef = useRef<string | undefined>(undefined);
-  const resetSkipRef = useRef(false);
+  const lastResetTokenRef = useRef<number | undefined>(resetToken);
   const handleNewChatRef = useRef<() => void>(() => {});
   const allowRunSessionNavRef = useRef(false);
 
@@ -174,7 +123,7 @@ export default function TestBotPage(props: {
     const fetchTestBotConfig = async (deckId?: string) => {
       const params = new URLSearchParams();
       if (deckId) params.set("deckPath", deckId);
-      if (activeSessionId) params.set("sessionId", activeSessionId);
+      if (activeWorkspaceId) params.set("workspaceId", activeWorkspaceId);
       const query = params.toString() ? `?${params.toString()}` : "";
       return fetch(`/api/test${query}`);
     };
@@ -239,30 +188,22 @@ export default function TestBotPage(props: {
     } catch (err) {
       console.error(err);
     }
-  }, [activeSessionId, deckStorageKey]);
+  }, [activeWorkspaceId, deckStorageKey]);
 
   useEffect(() => {
     loadTestBot();
   }, [loadTestBot]);
 
   useEffect(() => {
-    runIdRef.current = run.id;
-    runRef.current = run;
-    setStreamingUser(null);
-    setStreamingAssistant(null);
-  }, [run.id]);
-
-  useEffect(() => {
-    if (!run.sessionId) return;
-    if (
-      activeSessionId && run.sessionId !== activeSessionId &&
-      !allowRunSessionNavRef.current
-    ) {
+    if (!runWorkspaceId) return;
+    if (runWorkspaceId === "new") return;
+    if (!allowRunSessionNavRef.current) return;
+    if (activeWorkspaceId && runWorkspaceId !== activeWorkspaceId) {
       return;
     }
-    onReplaceTestBotSession(run.sessionId);
+    onReplaceTestBotSession(runWorkspaceId, run.id);
     allowRunSessionNavRef.current = false;
-  }, [activeSessionId, onReplaceTestBotSession, run.sessionId]);
+  }, [activeWorkspaceId, onReplaceTestBotSession, run.id, runWorkspaceId]);
 
   useEffect(() => {
     if (!selectedDeckId) return;
@@ -274,174 +215,77 @@ export default function TestBotPage(props: {
   }, [deckStorageKey, selectedDeckId]);
 
   useEffect(() => {
-    runRef.current = run;
-  }, [run]);
-
-  useEffect(() => {
-    const streamId = TEST_STREAM_ID;
-    const streamUrl = buildDurableStreamUrl(
-      streamId,
-      getDurableStreamOffset(streamId),
-    );
-    const source = new EventSource(streamUrl);
-
-    source.onopen = () => {
-      console.info("[test] stream open", streamUrl);
-    };
-
-    source.onmessage = (event) => {
-      let envelope: { offset?: unknown; data?: unknown } | null = null;
-      try {
-        envelope = JSON.parse(event.data) as {
-          offset?: unknown;
-          data?: unknown;
-        };
-      } catch {
-        return;
-      }
-      if (
-        envelope &&
-        typeof envelope.offset === "number" &&
-        Number.isFinite(envelope.offset)
-      ) {
-        setDurableStreamOffset(streamId, envelope.offset + 1);
-      }
-      const msg = envelope?.data as TestBotSocketMessage | undefined;
-      if (!msg) return;
-      const activeRunId = runIdRef.current;
-      if (msg.type === "testBotStatus" && msg.run) {
-        if (activeRunId && msg.run.id === activeRunId) {
-          setRun({
-            ...msg.run,
-            messages: msg.run.messages ?? [],
-            traces: msg.run.traces ?? [],
-            toolInserts: msg.run.toolInserts ?? [],
-          });
-        }
-        return;
-      }
-      if (msg.type === "testBotStream") {
-        if (!msg.runId || (activeRunId && msg.runId !== activeRunId)) return;
-        const streamRunId = msg.runId;
-        const turn = typeof msg.turn === "number" ? msg.turn : 0;
-        if (msg.role === "assistant") {
-          if (!firstAssistantTokenByTurnRef.current[turn]) {
-            firstAssistantTokenByTurnRef.current[turn] = true;
-            const userEnd = lastUserEndByTurnRef.current[turn];
-            if (typeof userEnd === "number" && typeof msg.ts === "number") {
-              const delta = msg.ts - userEnd;
-              setLatencyByTurn((prev) => ({
-                ...prev,
-                [turn]: delta,
-              }));
-            }
-          }
-        }
-        if (msg.role === "user") {
-          const expectedUserCount = countUserMessages(runRef.current.messages) +
-            1;
-          setStreamingUser((prev) =>
-            prev && prev.runId === streamRunId && prev.turn === turn
-              ? { ...prev, text: prev.text + msg.chunk }
-              : {
-                runId: streamRunId,
-                turn,
-                text: msg.chunk,
-                expectedUserCount,
-              }
-          );
-        } else {
-          setStreamingAssistant((prev) =>
-            prev && prev.runId === streamRunId && prev.turn === turn
-              ? { ...prev, text: prev.text + msg.chunk }
-              : { runId: streamRunId, turn, text: msg.chunk }
-          );
-        }
-        return;
-      }
-      if (msg.type === "testBotStreamEnd") {
-        if (!msg.runId || (activeRunId && msg.runId !== activeRunId)) return;
-        const streamRunId = msg.runId;
-        const turn = typeof msg.turn === "number" ? msg.turn : 0;
-        if (msg.role === "user") {
-          lastUserEndByTurnRef.current[turn] = typeof msg.ts === "number"
-            ? msg.ts
-            : Date.now();
-          delete firstAssistantTokenByTurnRef.current[turn];
-        }
-        if (msg.role === "user") {
-          setStreamingUser((prev) => {
-            if (!prev || prev.runId !== streamRunId || prev.turn !== turn) {
-              return prev;
-            }
-            return prev.expectedUserCount ? prev : {
-              ...prev,
-              expectedUserCount: countUserMessages(runRef.current.messages) +
-                1,
-            };
-          });
-        } else {
-          setStreamingAssistant((prev) =>
-            prev && prev.runId === streamRunId && prev.turn === turn
-              ? null
-              : prev
-          );
-        }
-      }
-    };
-
-    source.onerror = (err) => {
-      console.warn("[test] stream error", err);
-    };
-
-    return () => {
-      console.info("[test] stream cleanup");
-      source.close();
-    };
-  }, [deckPath]);
+    runIdRef.current = run.id;
+  }, [run.id]);
 
   const refreshStatus = useCallback(async (
-    opts?: { runId?: string; sessionId?: string },
+    opts?: { runId?: string; workspaceId?: string },
   ) => {
     try {
       const runId = opts?.runId ??
-        (opts?.sessionId ? undefined : run.id);
-      const sessionId = opts?.sessionId;
-      const params = new URLSearchParams();
-      if (runId) params.set("runId", runId);
-      if (sessionId) params.set("sessionId", sessionId);
+        (opts?.workspaceId ? undefined : runIdRef.current);
       const deckParam = testDecks.length
         ? (selectedDeckId || testDecks[0]?.id || "")
         : "";
-      if (deckParam) params.set("deckPath", deckParam);
-      const query = params.toString() ? `?${params.toString()}` : "";
-      const res = await fetch(`/api/test/status${query}`);
-      if (!res.ok) throw new Error(res.statusText);
-      const data = await res.json() as TestBotConfigResponse & {
-        run?: TestBotRun;
-      };
-      const nextRun = data.run ?? { status: "idle", messages: [] };
-      setRun({
-        ...nextRun,
-        messages: nextRun.messages ?? [],
-        traces: nextRun.traces ?? [],
-        toolInserts: nextRun.toolInserts ?? [],
+      return await refreshTestStatusApi({
+        runId,
+        workspaceId: opts?.workspaceId ?? activeWorkspaceId ??
+          runWorkspaceId ??
+          undefined,
+        deckPath: deckParam || undefined,
       });
     } catch (err) {
       console.error(err);
+      return {
+        status: "idle",
+        messages: [],
+        traces: [],
+        toolInserts: [],
+      } as TestBotRun;
     }
-  }, [run.id, selectedDeckId, testDecks]);
+  }, [
+    activeWorkspaceId,
+    refreshTestStatusApi,
+    runWorkspaceId,
+    selectedDeckId,
+    testDecks,
+  ]);
 
   useEffect(() => {
-    if (activeSessionId) return;
+    if (activeWorkspaceId) return;
+    setRequestedRunNotFound(false);
     refreshStatus();
-  }, [activeSessionId, refreshStatus]);
+  }, [activeWorkspaceId, refreshStatus]);
 
   useEffect(() => {
-    if (!activeSessionId) return;
+    if (!activeWorkspaceId) return;
     if (allowRunSessionNavRef.current) return;
-    refreshStatus({ sessionId: activeSessionId });
-  }, [activeSessionId, refreshStatus]);
+    const hydrate = async () => {
+      const hydrated = await refreshStatus({
+        workspaceId: activeWorkspaceId,
+        runId: requestedRunId ?? undefined,
+      });
+      if (
+        requestedRunId &&
+        (!hydrated.id || hydrated.id !== requestedRunId)
+      ) {
+        setRequestedRunNotFound(true);
+        setRun({
+          id: requestedRunId,
+          status: "error",
+          workspaceId: activeWorkspaceId,
+          sessionId: activeWorkspaceId,
+          error: `Run "${requestedRunId}" was not found.`,
+          messages: [],
+          traces: [],
+          toolInserts: [],
+        });
+        return;
+      }
+      setRequestedRunNotFound(false);
+    };
+    hydrate().catch((err) => console.error(err));
+  }, [activeWorkspaceId, refreshStatus, requestedRunId, setRun]);
 
   useEffect(() => {
     if (!deckInputSchema) return;
@@ -468,7 +312,7 @@ export default function TestBotPage(props: {
 
   useEffect(() => {
     if (run.status === "error" && run.error) {
-      console.error("[test-bot] run error (state)", run.error);
+      console.error("[scenario] run error (state)", run.error);
     }
   }, [run.error, run.status]);
 
@@ -525,120 +369,6 @@ export default function TestBotPage(props: {
       );
   }, [deckSchema.schemaResponse?.tools]);
 
-  const toolCallSummaries = useMemo(
-    () => summarizeToolCalls(run.traces ?? []),
-    [run.traces],
-  );
-
-  const toolBuckets = useMemo(() => {
-    const deriveInsertsFromTraces = (
-      traces: TraceEvent[],
-      messageCount: number,
-    ) => {
-      const inserts: Array<{
-        actionCallId?: string;
-        parentActionCallId?: string;
-        name?: string;
-        index: number;
-      }> = [];
-      let messageIndex = 0;
-      for (const trace of traces) {
-        if (!trace || typeof trace !== "object") continue;
-        const traceRecord = trace as Record<string, unknown>;
-        const type = typeof traceRecord.type === "string"
-          ? traceRecord.type
-          : "";
-        if (type === "message.user") {
-          messageIndex++;
-          continue;
-        }
-        if (type === "model.result") {
-          const finishReason = typeof traceRecord.finishReason === "string"
-            ? traceRecord.finishReason
-            : "";
-          if (finishReason !== "tool_calls") {
-            messageIndex++;
-          }
-          continue;
-        }
-        if (type === "tool.call") {
-          const actionCallId = typeof traceRecord.actionCallId === "string"
-            ? traceRecord.actionCallId
-            : undefined;
-          const parentActionCallId =
-            typeof traceRecord.parentActionCallId === "string"
-              ? traceRecord.parentActionCallId
-              : undefined;
-          const name = typeof traceRecord.name === "string"
-            ? traceRecord.name
-            : undefined;
-          inserts.push({
-            actionCallId,
-            parentActionCallId,
-            name,
-            index: Math.min(messageIndex, messageCount),
-          });
-        }
-      }
-      return inserts;
-    };
-    const map = new Map<number, ReturnType<typeof summarizeToolCalls>>();
-    if (!toolCallSummaries.length) return map;
-    const traceInserts = Array.isArray(run.traces) && run.traces.length > 0
-      ? deriveInsertsFromTraces(run.traces, run.messages.length)
-      : [];
-    const insertMap = new Map<
-      string,
-      { index: number; name?: string; parentActionCallId?: string }
-    >();
-    const inserts = traceInserts.length > 0 ? traceInserts : run.toolInserts ??
-      [];
-    inserts.forEach((insert) => {
-      if (
-        typeof insert?.index === "number" &&
-        insert.index >= 0 &&
-        insert.actionCallId
-      ) {
-        insertMap.set(insert.actionCallId, {
-          index: insert.index,
-          name: insert.name ?? undefined,
-          parentActionCallId: insert.parentActionCallId ?? undefined,
-        });
-      }
-    });
-    for (const call of toolCallSummaries) {
-      const insert = call.id ? insertMap.get(call.id) : undefined;
-      const index = insert?.index ?? run.messages.length;
-      const enriched = insert
-        ? {
-          ...call,
-          name: call.name ?? insert.name,
-          parentActionCallId: call.parentActionCallId ??
-            insert.parentActionCallId,
-        }
-        : call;
-      const bucket = map.get(index);
-      if (bucket) {
-        bucket.push(enriched);
-      } else {
-        map.set(index, [enriched]);
-      }
-    }
-    return map;
-  }, [toolCallSummaries, run.toolInserts, run.traces, run.messages.length]);
-  const assistantLatencyByMessageIndex = useMemo(() => {
-    const map: Record<number, number> = {};
-    let assistantTurn = 0;
-    run.messages.forEach((msg, index) => {
-      if (msg.role !== "assistant") return;
-      const latency = latencyByTurn[assistantTurn];
-      if (typeof latency === "number") {
-        map[index] = latency;
-      }
-      assistantTurn += 1;
-    });
-    return map;
-  }, [run.messages, latencyByTurn]);
   const canRunPersona = testDecks.length > 0;
   const hasPersonaSelection = Boolean(selectedDeckId);
   const hasDeckSelection = !canRunPersona || hasPersonaSelection;
@@ -660,56 +390,6 @@ export default function TestBotPage(props: {
     };
   }, [run.status, refreshStatus]);
 
-  useEffect(() => {
-    if (
-      streamingUser?.expectedUserCount !== undefined &&
-      streamingUser.runId === run.id &&
-      countUserMessages(run.messages) >= streamingUser.expectedUserCount
-    ) {
-      setStreamingUser(null);
-    }
-    if (run.status !== "running" && streamingUser) {
-      setStreamingUser(null);
-    }
-    if (optimisticUser) {
-      const lastUser = [...run.messages].reverse().find((msg) =>
-        msg.role === "user"
-      );
-      if (lastUser?.content === optimisticUser.text) {
-        setOptimisticUser(null);
-      }
-    }
-    if (run.status !== "running" && optimisticUser) {
-      setOptimisticUser(null);
-    }
-    if (
-      streamingAssistant &&
-      run.messages.some((msg) =>
-        msg.role === "assistant" &&
-        typeof msg.content === "string" &&
-        msg.content.includes(streamingAssistant.text)
-      )
-    ) {
-      setStreamingAssistant(null);
-    }
-    const el = transcriptRef.current;
-    if (!el) return;
-    const shouldScroll = run.messages.length > lastRunMessageCountRef.current ||
-      Boolean(streamingUser?.text || streamingAssistant?.text);
-    lastRunMessageCountRef.current = run.messages.length;
-    if (!shouldScroll) return;
-    const frame = requestAnimationFrame(() => {
-      el.scrollTop = el.scrollHeight;
-    });
-    return () => cancelAnimationFrame(frame);
-  }, [
-    run.id,
-    run.messages.length,
-    run.status,
-    streamingUser,
-    streamingAssistant?.text,
-  ]);
-
   const startRun = useCallback(async () => {
     try {
       allowRunSessionNavRef.current = true;
@@ -718,7 +398,7 @@ export default function TestBotPage(props: {
         : null;
       if (initFillRequest) {
         setLastInitFill(initFillRequest);
-        console.info("[test-bot] init fill requested", initFillRequest);
+        console.info("[scenario] init fill requested", initFillRequest);
       }
       const payload: Record<string, unknown> = {
         botInput: botInputValue,
@@ -728,35 +408,30 @@ export default function TestBotPage(props: {
         initFill: missingDeckInit.length > 0
           ? { missing: missingDeckInit }
           : undefined,
-        sessionId: activeSessionId ?? undefined,
+        workspaceId: activeWorkspaceId ?? undefined,
       };
-      const res = await fetch("/api/test/run", {
-        method: "POST",
-        headers: { "content-type": "application/json" },
-        body: JSON.stringify(payload),
-      });
-      const data = await res.json().catch(() => ({})) as {
+      const data = await startTestRunApi(payload) as {
         run?: TestBotRun;
         error?: string;
         initFill?: TestBotRun["initFill"];
         sessionPath?: string;
       };
-      if (!res.ok) {
+      if (!data.run) {
         allowRunSessionNavRef.current = false;
         if (data.initFill) {
           setLastInitFill(data.initFill);
-          console.info("[test-bot] init fill error", data.initFill);
+          console.info("[scenario] init fill error", data.initFill);
         }
         if (data.sessionPath) {
           console.info(
-            "[test-bot] init fill session saved",
+            "[scenario] init fill session saved",
             data.sessionPath,
           );
         }
         const errorMessage = typeof data.error === "string"
           ? data.error
-          : res.statusText;
-        console.error("[test-bot] run error", errorMessage);
+          : "Failed to start scenario run";
+        console.error("[scenario] run error", errorMessage);
         setRun({
           status: "error",
           error: errorMessage,
@@ -767,28 +442,13 @@ export default function TestBotPage(props: {
         });
         return;
       }
-      if (data.run) {
-        if (data.run.initFill) {
-          setLastInitFill(data.run.initFill);
-          console.info("[test-bot] init fill applied", data.run.initFill);
-        }
-        setRun({
-          ...data.run,
-          messages: data.run.messages ?? [],
-          traces: data.run.traces ?? [],
-          toolInserts: data.run.toolInserts ?? [],
-        });
-      } else {
-        setRun({
-          status: "running",
-          messages: [],
-          traces: [],
-          toolInserts: [],
-        });
+      if (data.run.initFill) {
+        setLastInitFill(data.run.initFill);
+        console.info("[scenario] init fill applied", data.run.initFill);
       }
       refreshStatus({
         runId: data.run?.id,
-        sessionId: activeSessionId ?? undefined,
+        workspaceId: activeWorkspaceId ?? undefined,
       });
     } catch (err) {
       allowRunSessionNavRef.current = false;
@@ -799,51 +459,41 @@ export default function TestBotPage(props: {
     botInputValue,
     initialUserMessage,
     refreshStatus,
+    startTestRunApi,
     selectedDeckId,
     missingDeckInit,
-    activeSessionId,
+    activeWorkspaceId,
   ]);
 
   const stopRun = useCallback(async () => {
     if (!run.id) return;
     try {
-      await fetch("/api/test/stop", {
-        method: "POST",
-        headers: { "content-type": "application/json" },
-        body: JSON.stringify({ runId: run.id }),
-      });
+      await stopTestRunApi(run.id);
     } catch (err) {
       console.error(err);
     } finally {
       refreshStatus({ runId: run.id });
     }
-  }, [refreshStatus, run.id]);
+  }, [refreshStatus, run.id, stopTestRunApi]);
 
   const handleNewChat = useCallback(async () => {
     if (run.status === "running") {
       await stopRun();
     }
-    setRun({
-      id: "",
-      status: "idle",
-      messages: [],
-      traces: [],
-      toolInserts: [],
-      sessionId: undefined,
-    });
+    resetTestRunApi();
+    setRequestedRunNotFound(false);
     onResetTestBotSession();
-  }, [onResetTestBotSession, run.status, stopRun]);
+  }, [onResetTestBotSession, run.status, stopRun, resetTestRunApi]);
 
   useEffect(() => {
     handleNewChatRef.current = handleNewChat;
   }, [handleNewChat]);
 
   useEffect(() => {
-    if (!resetSkipRef.current) {
-      resetSkipRef.current = true;
-      return;
-    }
     if (resetToken === undefined) return;
+    const previous = lastResetTokenRef.current;
+    lastResetTokenRef.current = resetToken;
+    if (previous === undefined || previous === resetToken) return;
     handleNewChatRef.current();
   }, [resetToken]);
 
@@ -853,79 +503,49 @@ export default function TestBotPage(props: {
     return () => setNavActions(null);
   }, [handleNewChat, setNavActions]);
 
-  const saveTestBotFeedback = useCallback(
-    async (messageRefId: string, score: number | null, reason?: string) => {
-      if (!run.sessionId) return;
-      if (onFeedbackUpdate) {
-        if (score === null) {
-          onFeedbackUpdate(messageRefId, null);
-        } else {
-          onFeedbackUpdate(messageRefId, {
-            id: `optimistic:${messageRefId}:${Date.now()}`,
-            runId: run.id || "optimistic",
-            messageRefId,
-            score,
-            reason,
-            createdAt: new Date().toISOString(),
-          });
-        }
-      }
-      try {
-        const res = await fetch("/api/session/feedback", {
-          method: "POST",
-          headers: { "content-type": "application/json" },
-          body: JSON.stringify({
-            sessionId: run.sessionId,
-            messageRefId,
-            score,
-            reason,
-          }),
-        });
-        if (!res.ok) throw new Error(res.statusText);
-        const data = await res.json() as {
-          feedback?: FeedbackEntry;
-          deleted?: boolean;
-        };
-        if (data.deleted) {
-          setRun((prev) => ({
-            ...prev,
-            messages: prev.messages.map((msg) =>
-              msg.messageRefId === messageRefId
-                ? { ...msg, feedback: undefined }
-                : msg
-            ),
-          }));
-          onFeedbackUpdate?.(messageRefId, null);
-          return;
-        }
-        if (data.feedback) {
-          setRun((prev) => ({
-            ...prev,
-            messages: prev.messages.map((msg) =>
-              msg.messageRefId === messageRefId
-                ? { ...msg, feedback: data.feedback }
-                : msg
-            ),
-          }));
-          onFeedbackUpdate?.(messageRefId, data.feedback);
-        }
-      } catch (err) {
-        console.error(err);
-      }
-    },
-    [onFeedbackUpdate, run.id, run.sessionId],
-  );
+  const saveTestBotFeedback = useCallback(async (
+    messageRefId: string,
+    score: number | null,
+    reason?: string,
+  ) => {
+    const feedbackWorkspaceId = activeWorkspaceId ?? runWorkspaceId;
+    if (!feedbackWorkspaceId) {
+      throw new Error("Missing workspace context for feedback save");
+    }
+    if (
+      activeWorkspaceId &&
+      runWorkspaceId &&
+      activeWorkspaceId !== runWorkspaceId
+    ) {
+      throw new Error(
+        "Active workspace does not match the current test run workspace",
+      );
+    }
+    await saveTestFeedbackApi({
+      workspaceId: feedbackWorkspaceId,
+      runId: run.id || undefined,
+      messageRefId,
+      score,
+      reason,
+    });
+    onFeedbackPersisted?.(feedbackWorkspaceId);
+  }, [
+    activeWorkspaceId,
+    onFeedbackPersisted,
+    runWorkspaceId,
+    saveTestFeedbackApi,
+  ]);
 
   const handleTestBotScore = useCallback(
-    (messageRefId: string, score: number | null) => {
-      saveTestBotFeedback(messageRefId, score);
+    async (messageRefId: string, score: number | null) => {
+      await saveTestBotFeedback(messageRefId, score);
     },
     [saveTestBotFeedback],
   );
 
   const handleTestBotReason = useCallback(
-    (messageRefId: string, score: number, reason: string) => {
-      saveTestBotFeedback(messageRefId, score, reason);
+    async (messageRefId: string, score: number, reason: string) => {
+      await saveTestBotFeedback(messageRefId, score, reason);
     },
     [saveTestBotFeedback],
   );
@@ -959,7 +579,7 @@ export default function TestBotPage(props: {
   const canStartAssistant = showStartOverlay &&
     !chatSending &&
     run.status !== "running" &&
-    (run.sessionId ||
+    (Boolean(runWorkspaceId) ||
       (deckJsonErrorCount === 0 && missingDeckInit.length === 0));
 
   const canSendChat = hasDeckSelection &&
@@ -967,139 +587,57 @@ export default function TestBotPage(props: {
     !chatSending &&
     chatDraft.trim().length > 0 &&
     !showStartOverlay &&
-    (run.sessionId ||
+    (Boolean(runWorkspaceId) ||
       (deckJsonErrorCount === 0 && missingDeckInit.length === 0));
 
   const handleStartAssistant = useCallback(async () => {
     if (!hasDeckSelection || chatSending) return;
-    setChatSending(true);
-    setChatError(null);
-    let nextRunId = run.id;
-    if (!nextRunId) {
-      nextRunId = `testbot-ui-${crypto.randomUUID()}`;
-      setRun((prev) => ({
-        ...prev,
-        id: nextRunId,
-        status: "running",
-        error: undefined,
-        messages: prev.messages ?? [],
-        traces: prev.traces ?? [],
-        toolInserts: prev.toolInserts ?? [],
-      }));
-    }
+    allowRunSessionNavRef.current = true;
     try {
-      const payload: Record<string, unknown> = {
-        message: "",
-        runId: nextRunId,
-        sessionId: run.sessionId ?? activeSessionId ?? undefined,
+      await startTestAssistantApi({
+        runId: run.id,
+        workspaceId: activeWorkspaceId ?? undefined,
+        runWorkspaceId: runWorkspaceId ?? undefined,
         botDeckPath: selectedDeckId ?? undefined,
-      };
-      if (!run.sessionId) {
-        payload.context = deckInitValue;
-      }
-      const res = await fetch("/api/test/message", {
-        method: "POST",
-        headers: { "content-type": "application/json" },
-        body: JSON.stringify(payload),
+        context: !runWorkspaceId ? deckInitValue : undefined,
       });
-      const data = await res.json().catch(() => ({})) as {
-        run?: TestBotRun;
-        error?: string;
-      };
-      if (!res.ok) {
-        throw new Error(
-          typeof data.error === "string" ? data.error : res.statusText,
-        );
-      }
-      if (data.run) {
-        setRun({
-          ...data.run,
-          messages: data.run.messages ?? [],
-          traces: data.run.traces ?? [],
-          toolInserts: data.run.toolInserts ?? [],
-        });
-      }
-    } catch (err) {
-      setChatError(err instanceof Error ? err.message : String(err));
-    } finally {
-      setChatSending(false);
+    } catch {
+      // Error state is set in context.
     }
   }, [
     chatSending,
     deckInitValue,
     hasDeckSelection,
     run.id,
-    run.sessionId,
+    runWorkspaceId,
     selectedDeckId,
-    activeSessionId,
+    activeWorkspaceId,
+    startTestAssistantApi,
   ]);
 
   const handleSendChat = useCallback(async () => {
     const message = chatDraft.trim();
     if (!message) return;
-    setChatSending(true);
-    setChatError(null);
-    let nextRunId = run.id;
-    const optimisticId = crypto.randomUUID();
-    if (!nextRunId) {
-      nextRunId = `testbot-ui-${crypto.randomUUID()}`;
-      setRun((prev) => ({
-        ...prev,
-        id: nextRunId,
-        status: "running",
-        error: undefined,
-        messages: prev.messages ?? [],
-        traces: prev.traces ?? [],
-        toolInserts: prev.toolInserts ?? [],
-      }));
-    }
-    setOptimisticUser({ id: optimisticId, text: message });
-    setChatDraft("");
+    allowRunSessionNavRef.current = true;
     try {
-      const payload: Record<string, unknown> = {
-        message,
-        runId: nextRunId,
-        sessionId: run.sessionId ?? activeSessionId ?? undefined,
+      await sendTestMessageApi(message, {
+        runId: run.id,
+        workspaceId: activeWorkspaceId ?? undefined,
+        runWorkspaceId: runWorkspaceId ?? undefined,
         botDeckPath: selectedDeckId ?? undefined,
-      };
-      if (!run.sessionId) {
-        payload.context = deckInitValue;
-      }
-      const res = await fetch("/api/test/message", {
-        method: "POST",
-        headers: { "content-type": "application/json" },
-        body: JSON.stringify(payload),
+        context: !runWorkspaceId ? deckInitValue : undefined,
       });
-      const data = await res.json().catch(() => ({})) as {
-        run?: TestBotRun;
-        error?: string;
-      };
-      if (!res.ok) {
-        throw new Error(
-          typeof data.error === "string" ? data.error : res.statusText,
-        );
-      }
-      if (data.run) {
-        setRun({
-          ...data.run,
-          messages: data.run.messages ?? [],
-          traces: data.run.traces ?? [],
-          toolInserts: data.run.toolInserts ?? [],
-        });
-      }
-    } catch (err) {
-      setChatError(err instanceof Error ? err.message : String(err));
-    } finally {
-      setChatSending(false);
+    } catch {
+      // Error state is set in context.
     }
   }, [
     chatDraft,
     deckInitValue,
     run.id,
-    run.sessionId,
-    run.status,
+    runWorkspaceId,
     selectedDeckId,
-    activeSessionId,
+    activeWorkspaceId,
+    sendTestMessageApi,
   ]);
 
   return (
@@ -1115,7 +653,7 @@ export default function TestBotPage(props: {
           <Panel className="test-bot-sidebar flex-column gap-8 flex-1">
             <div className="flex-row gap-8 items-center">
               <div className="flex-1">
-                <strong>Test deck</strong>
+                <strong>Scenario deck</strong>
               </div>
               <Button
                 variant="primary"
@@ -1123,7 +661,7 @@ export default function TestBotPage(props: {
                 disabled={!canStart}
                 data-testid="testbot-run"
               >
-                Run test bot
+                Run scenario
               </Button>
             </div>
             {testDecks.length > 0 && (
@@ -1147,7 +685,7 @@ export default function TestBotPage(props: {
             {botDescription && (
               <div className="placeholder">{botDescription}</div>
             )}
-            <strong>Test deck input</strong>
+            <strong>Scenario deck input</strong>
             <div style={{ flex: 1 }}>
               {botInputSchemaError && (
                 <div className="error">{botInputSchemaError}</div>
@@ -1168,7 +706,7 @@ export default function TestBotPage(props: {
               )}
               {!botInputSchema && (
                 <div className="placeholder">
-                  No test bot input schema configured.
+                  No scenario input schema configured.
                 </div>
               )}
             </div>
@@ -1331,340 +869,38 @@ export default function TestBotPage(props: {
           </Panel>
         </div>
 
-        <Panel className="flex-column gap-8">
-          <div className="flex-row gap-8 items-center">
-            <div className="flex-column flex-1 gap-4">
-              <div className="flex-row items-center gap-8">
-                <strong>Test run</strong>
-                <Badge variant={run.status} data-testid="testbot-status">
-                  {runStatusLabel}
-                </Badge>
-              </div>
-            </div>
-            <div className="flex-row row-reverse gap-8 wrap">
-              <Button
-                variant="ghost"
-                onClick={stopRun}
-                disabled={run.status !== "running"}
-                data-testid="testbot-stop"
-              >
-                Stop
-              </Button>
-              <Button variant="secondary" onClick={handleNewChat}>
-                New chat
-              </Button>
-            </div>
-          </div>
-          {run.error && <div className="error">{run.error}</div>}
-          {(run.initFill ?? lastInitFill) && (
-            <div className="patch-card">
-              <div className="patch-summary">Init fill</div>
-              {(run.initFill ?? lastInitFill)?.error && (
-                <div className="error">
-                  {(run.initFill ?? lastInitFill)?.error}
-                </div>
-              )}
-              <div className="patch-meta">
-                Requested: {(run.initFill ?? lastInitFill)?.requested?.length
-                  ? (run.initFill ?? lastInitFill)!.requested.join(", ")
-                  : "none"}
-              </div>
-              {(run.initFill ?? lastInitFill)?.applied !== undefined && (
-                <pre className="trace-json">
-                  {formatJson((run.initFill ?? lastInitFill)?.applied)}
-                </pre>
-              )}
-              {(run.initFill ?? lastInitFill)?.applied === undefined && (
-                <div className="patch-meta">No fills applied.</div>
-              )}
-            </div>
-          )}
-          {!canStart && canRunPersona && (
-            <div className="error">
-              {!hasPersonaSelection
-                ? "Select a persona deck to run."
-                : botJsonErrorCount > 0 || deckJsonErrorCount > 0
-                ? "Fix invalid JSON fields to run."
-                : missingBotInput.length > 0
-                ? `Missing required bot inputs: ${
-                  missingBotInput.slice(0, 6).join(", ")
-                }${missingBotInput.length > 6 ? "…" : ""}`
-                : missingDeckInit.length > 0
-                ? `Missing required init fields: ${
-                  missingDeckInit.slice(0, 6).join(", ")
-                }${missingDeckInit.length > 6 ? "…" : ""}`
-                : ""}
-            </div>
-          )}
-          {canStart && missingDeckInit.length > 0 && (
-            <div className="placeholder">
-              Missing required init fields will be requested from the persona:
-              {" "}
-              {missingDeckInit.slice(0, 6).join(", ")}
-              {missingDeckInit.length > 6 ? "…" : ""}
-            </div>
-          )}
-          <div className="test-bot-thread">
-            <div
-              className="imessage-thread"
-              ref={transcriptRef}
-            >
-              {run.messages.length === 0 && (
-                <div className="placeholder">No messages yet.</div>
-              )}
-              {(() => {
-                const rows: React.ReactNode[] = [];
-                const renderToolBucket = (index: number) => {
-                  const bucket = toolBuckets.get(index);
-                  if (!bucket || bucket.length === 0) return;
-                  const isOpen = Boolean(toolCallsOpen[index]);
-                  let latencyLabel: string | null = null;
-                  for (let i = index; i < run.messages.length; i += 1) {
-                    if (run.messages[i]?.role === "assistant") {
-                      const latency = assistantLatencyByMessageIndex[i];
-                      if (typeof latency === "number") {
-                        latencyLabel = `${Math.max(0, Math.round(latency))}ms`;
-                      }
-                      break;
-                    }
-                  }
-                  rows.push(
-                    <div
-                      key={`tool-bucket-${index}`}
-                      className="tool-calls-collapsible"
-                    >
-                      <button
-                        type="button"
-                        className="tool-calls-toggle"
-                        onClick={() => setToolCallsOpen((prev) => ({
-                          ...prev,
-                          [index]: !prev[index],
-                        }))}
-                      >
-                        <span className="tool-calls-toggle-label">
-                          Tool calls ({bucket.length})
-                          {latencyLabel ? ` · ${latencyLabel}` : ""} ·{" "}
-                          {isOpen ? "Hide" : "Show"}
-                        </span>
-                      </button>
-                      {isOpen && (
-                        <div className="tool-calls-list">
-                          {bucket.map((call, callIdx) => (
-                            <ToolCallBubble
-                              key={`tool-${call.id}-${index}-${callIdx}`}
-                              call={call}
-                            />
-                          ))}
-                        </div>
-                      )}
-                    </div>,
-                  );
-                };
-                renderToolBucket(0);
-                run.messages.forEach((m, idx) => {
-                  const messageKey = m.messageRefId ?? `${m.role}-${idx}`;
-                  rows.push(
-                    <div
-                      key={messageKey}
-                      className={`imessage-row ${
-                        m.role === "user" ? "right" : "left"
-                      }`}
-                    >
-                      <div
-                        className={`imessage-bubble ${
-                          m.role === "user" ? "right" : "left"
-                        }`}
-                        title={m.role}
-                      >
-                        {(
-                            m.respondPayload !== undefined ||
-                            m.respondMeta !== undefined ||
-                            typeof m.respondStatus === "number" ||
-                            typeof m.respondMessage === "string" ||
-                            typeof m.respondCode === "string"
-                          )
-                          ? (
-                            <div className="respond-summary">
-                              <div className="respond-meta">
-                                <Badge>gambit_respond</Badge>
-                                {typeof m.respondStatus === "number" && (
-                                  <Badge variant="ghost">
-                                    status {m.respondStatus}
-                                  </Badge>
-                                )}
-                                {m.respondCode && (
-                                  <Badge variant="ghost">
-                                    code {m.respondCode}
-                                  </Badge>
-                                )}
-                              </div>
-                              {m.respondMessage && (
-                                <div className="respond-message">
-                                  {m.respondMessage}
-                                </div>
-                              )}
-                              {m.respondPayload !== undefined && (
-                                <pre className="bubble-json">
-                                  {formatJson(m.respondPayload)}
-                                </pre>
-                              )}
-                              {m.respondMeta && (
-                                <details className="respond-meta-details">
-                                  <summary>Meta</summary>
-                                  <pre className="bubble-json">
-                                    {formatJson(m.respondMeta)}
-                                  </pre>
-                                </details>
-                              )}
-                            </div>
-                          )
-                          : m.content}
-                        {m.messageRefId && run.sessionId && (
-                          <FeedbackControls
-                            messageRefId={m.messageRefId}
-                            feedback={m.feedback}
-                            onScore={handleTestBotScore}
-                            onReasonChange={handleTestBotReason}
-                          />
-                        )}
-                      </div>
-                    </div>,
-                  );
-                  renderToolBucket(idx + 1);
-                });
-                return rows;
-              })()}
-              {streamingUser?.text && streamingUser.runId === run.id &&
-                (streamingUser.expectedUserCount === undefined ||
-                  countUserMessages(run.messages) <
-                    streamingUser.expectedUserCount) &&
-                (
-                  <div className="imessage-row right">
-                    <div
-                      className="imessage-bubble right imessage-bubble-muted"
-                      title="user"
-                    >
-                      {streamingUser.text}
-                    </div>
-                  </div>
-                )}
-              {optimisticUser && (
-                <div className="imessage-row right">
-                  <div
-                    className="imessage-bubble right"
-                    title="user"
-                  >
-                    {optimisticUser.text}
-                  </div>
-                </div>
-              )}
-              {streamingAssistant?.text &&
-                streamingAssistant.runId === run.id &&
-                (
-                  <div className="imessage-row left">
-                    <div
-                      className="imessage-bubble left imessage-bubble-muted"
-                      title="assistant"
-                    >
-                      {streamingAssistant.text}
-                    </div>
-                  </div>
-                )}
-            </div>
-            <div className="composer">
-              <div className="composer-inputs">
-                {isUserStart && run.messages.length === 0 &&
-                  !streamingAssistant?.text && !streamingUser?.text && (
-                  <div className="placeholder emphasis">
-                    This deck expects a user message to kick things off.
-                  </div>
-                )}
-                <div className="flex-row gap-4 mb-2">
-                  <textarea
-                    className="message-input flex-1"
-                    rows={1}
-                    placeholder={showStartOverlay
-                      ? "Start the assistant to begin..."
-                      : isUserStart && run.messages.length === 0
-                      ? "Send the first message to begin..."
-                      : "Message the assistant..."}
-                    value={chatDraft}
-                    onChange={(e) => setChatDraft(e.target.value)}
-                    disabled={showStartOverlay}
-                    onKeyDown={(e) => {
-                      if (e.key === "Enter" && !e.shiftKey) {
-                        e.preventDefault();
-                        if (canSendChat) {
-                          handleSendChat();
-                        }
-                      }
-                    }}
-                  />
-                  <div className="composer-actions">
-                    <Button
-                      variant="primary"
-                      onClick={handleSendChat}
-                      disabled={!canSendChat}
-                      data-testid="testbot-chat-send"
-                    >
-                      Send
-                    </Button>
-                  </div>
-                </div>
-              </div>
-              {chatError && <div className="error">{chatError}</div>}
-            </div>
-            {showStartOverlay && (
-              <div className="test-bot-thread-overlay">
-                <div className="test-bot-thread-card">
-                  <strong className="test-bot-thread-title">
-                    Choose how to start
-                  </strong>
-                  <div className="placeholder test-bot-thread-subtitle">
-                    Pick the flow you want: manual conversation or a full test
-                    bot run.
-                  </div>
-                  <div className="test-bot-thread-sections">
-                    <div className="test-bot-thread-section">
-                      <div className="test-bot-thread-section-title">
-                        Start the assistant
-                      </div>
-                      <div className="test-bot-thread-section-body">
-                        Use this when you want to explore the chat manually.
-                      </div>
-                      <Button
-                        variant="secondary"
-                        onClick={handleStartAssistant}
-                        disabled={!canStartAssistant}
-                        data-testid="testbot-start-assistant"
-                      >
-                        Start assistant
-                      </Button>
-                    </div>
-                    <div className="test-bot-thread-section">
-                      <div className="test-bot-thread-section-title">
-                        Run test bot
-                      </div>
-                      <div className="test-bot-thread-section-body">
-                        Run the configured test bot to execute the scenario
-                        end-to-end.
-                      </div>
-                      <Button
-                        variant="primary"
-                        onClick={startRun}
-                        disabled={!canStart}
-                        data-testid="testbot-run-overlay"
-                      >
-                        Run test bot
-                      </Button>
-                    </div>
-                  </div>
-                  {chatError && <div className="error">{chatError}</div>}
-                </div>
-              </div>
-            )}
-          </div>
-        </Panel>
+        <TestBotChatPanel
+          run={run}
+          runWorkspaceId={runWorkspaceId}
+          runStatusLabel={runStatusLabel}
+          activeWorkspaceId={activeWorkspaceId}
+          requestedRunNotFound={requestedRunNotFound}
+          canStart={canStart}
+          canRunPersona={canRunPersona}
+          hasPersonaSelection={hasPersonaSelection}
+          botJsonErrorCount={botJsonErrorCount}
+          deckJsonErrorCount={deckJsonErrorCount}
+          missingBotInput={missingBotInput}
+          missingDeckInit={missingDeckInit}
+          lastInitFill={lastInitFill}
+          isUserStart={isUserStart}
+          showStartOverlay={showStartOverlay}
+          canStartAssistant={canStartAssistant}
+          canSendChat={canSendChat}
+          chatDraft={chatDraft}
+          setChatDraft={setChatDraft}
+          chatError={chatError}
+          optimisticUser={optimisticUser}
+          streamingUser={streamingUser}
+          streamingAssistant={streamingAssistant}
+          startRun={startRun}
+          stopRun={stopRun}
+          handleNewChat={handleNewChat}
+          handleSendChat={handleSendChat}
+          handleStartAssistant={handleStartAssistant}
+          onScore={handleTestBotScore}
+          onReasonChange={handleTestBotReason}
+        />
       </PageGrid>
     </PageShell>
   );
diff --git a/simulator-ui/src/WorkbenchDrawer.tsx b/simulator-ui/src/WorkbenchDrawer.tsx
index 2b2f3a11e..37985eece 100644
--- a/simulator-ui/src/WorkbenchDrawer.tsx
+++ b/simulator-ui/src/WorkbenchDrawer.tsx
@@ -223,17 +223,17 @@ export default function WorkbenchDrawer(props: WorkbenchDrawerProps) {
     setChatHistoryLoading(true);
     setChatHistoryError(null);
     try {
-      const res = await fetch("/api/build/runs");
+      const res = await fetch("/workspaces");
       if (!res.ok) throw new Error(res.statusText);
       const data = await res.json() as {
-        runs?: Array<{ id?: string; updatedAt?: string; startedAt?: string }>;
+        sessions?: Array<{ id?: string; createdAt?: string }>;
       };
-      const runs = Array.isArray(data.runs)
-        ? data.runs.filter((entry) => typeof entry?.id === "string").map(
+      const runs = Array.isArray(data.sessions)
+        ? data.sessions.filter((entry) => typeof entry?.id === "string").map(
           (entry) => ({
             id: entry.id as string,
-            updatedAt: entry.updatedAt,
-            startedAt: entry.startedAt,
+            updatedAt: entry.createdAt,
+            startedAt: entry.createdAt,
           }),
         )
         : [];
@@ -284,14 +284,6 @@ export default function WorkbenchDrawer(props: WorkbenchDrawerProps) {
   if (!open) return null;
   return (
     <aside className="workbench-drawer-docked" role="dialog">
-      <header className="workbench-drawer-header">
-        <strong>Workbench</strong>
-        {onClose && (
-          <Button variant="ghost" onClick={onClose} aria-label="Close">
-            <Icon name="close" size={14} />
-          </Button>
-        )}
-      </header>
       <Accordion
         allowMultiple
         className="workbench-accordion equal-open"
@@ -411,7 +403,7 @@ export default function WorkbenchDrawer(props: WorkbenchDrawerProps) {
           {
             id: "workbench-ratings",
             title: "Ratings & flags",
-            defaultOpen: true,
+            defaultOpen: false,
             content: (
               <div className="workbench-ratings">
                 {showCopyStatePath && handleCopyStatePath && (
@@ -445,7 +437,7 @@ export default function WorkbenchDrawer(props: WorkbenchDrawerProps) {
                     {resolvedFeedbackItems.map(({ entry, message, role }) => {
                       const roleLabel = role === "assistant"
                         ? "Assistant message"
-                        : "Test bot message";
+                        : "Scenario message";
                       const displayScore = entry.score;
                       const scoreLabel = displayScore > 0
                         ? `+${displayScore}`
diff --git a/simulator-ui/src/WorkspaceContext.test.tsx b/simulator-ui/src/WorkspaceContext.test.tsx
new file mode 100644
index 000000000..29649ec0b
--- /dev/null
+++ b/simulator-ui/src/WorkspaceContext.test.tsx
@@ -0,0 +1,908 @@
+import { assert, assertEquals, assertRejects } from "@std/assert";
+import React from "react";
+import TestRenderer, { act } from "npm:react-test-renderer@19.2.0";
+
+const globals = globalThis as unknown as {
+  window?: Record<string, unknown>;
+  EventSource?: unknown;
+  fetch?: typeof fetch;
+  localStorage?: Storage;
+};
+if (!globals.window) globals.window = {};
+(globalThis as { IS_REACT_ACT_ENVIRONMENT?: boolean })
+  .IS_REACT_ACT_ENVIRONMENT = true;
+
+class MemoryStorage implements Storage {
+  #data = new Map<string, string>();
+
+  get length(): number {
+    return this.#data.size;
+  }
+
+  clear(): void {
+    this.#data.clear();
+  }
+
+  getItem(key: string): string | null {
+    return this.#data.has(key) ? this.#data.get(key)! : null;
+  }
+
+  key(index: number): string | null {
+    return Array.from(this.#data.keys())[index] ?? null;
+  }
+
+  removeItem(key: string): void {
+    this.#data.delete(key);
+  }
+
+  setItem(key: string, value: string): void {
+    this.#data.set(key, value);
+  }
+}
+
+if (!globals.localStorage) {
+  globals.localStorage = new MemoryStorage();
+}
+const windowObj = globals.window as {
+  localStorage?: Storage;
+  location?: { pathname: string; search: string };
+};
+windowObj.localStorage = globals.localStorage;
+if (!windowObj.location) {
+  windowObj.location = { pathname: "/workspaces/ws-1/test", search: "" };
+}
+
+type WorkspaceSocketMessage = import("./utils.ts").WorkspaceSocketMessage;
+const { WorkspaceProvider, useWorkspaceBuild, useWorkspaceTest } = await import(
+  "./WorkspaceContext.tsx"
+);
+
+class FakeEventSource {
+  static instances: FakeEventSource[] = [];
+  onmessage: ((event: MessageEvent<string>) => void) | null = null;
+  url: string;
+  closed = false;
+
+  constructor(url: string) {
+    this.url = url;
+    FakeEventSource.instances.push(this);
+  }
+
+  close() {
+    this.closed = true;
+  }
+
+  emit(message: WorkspaceSocketMessage, offset = 1) {
+    this.onmessage?.(
+      new MessageEvent("message", {
+        data: JSON.stringify({ offset, data: message }),
+      }),
+    );
+  }
+}
+
+function createSnapshot(
+  run?: import("./utils.ts").TestBotRun,
+  buildRun?: {
+    id?: string;
+    status?: "idle" | "running" | "completed" | "error" | "canceled";
+    messages?: Array<{ role: string; content: string }>;
+    traces?: Array<unknown>;
+    toolInserts?: Array<unknown>;
+  },
+): Record<string, unknown> {
+  return {
+    workspaceId: "ws-1",
+    build: {
+      run: {
+        id: buildRun?.id ?? "ws-1",
+        status: buildRun?.status ?? "idle",
+        messages: buildRun?.messages ?? [],
+        traces: buildRun?.traces ?? [],
+        toolInserts: buildRun?.toolInserts ?? [],
+      },
+    },
+    test: {
+      run: run ?? { status: "idle", messages: [], traces: [], toolInserts: [] },
+    },
+    grade: { graderDecks: [], sessions: [] },
+    session: { messages: [], traces: [] },
+  };
+}
+
+Deno.test("WorkspaceContext test chat start/send/stream/reset transitions", async () => {
+  const originalFetch = globalThis.fetch;
+  const originalEventSource = globalThis.EventSource;
+
+  let hook: any = null;
+  const requests: Array<{ url: string; body?: unknown }> = [];
+
+  globalThis.EventSource = FakeEventSource as unknown as typeof EventSource;
+  globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
+    const url = String(input);
+    let parsedBody: unknown;
+    if (typeof init?.body === "string" && init.body.length > 0) {
+      parsedBody = JSON.parse(init.body);
+    }
+    requests.push({ url, body: parsedBody });
+
+    if (url.endsWith("/api/workspaces/ws-1")) {
+      return new Response(JSON.stringify(createSnapshot()), { status: 200 });
+    }
+    if (url.endsWith("/api/workspaces/ws-1/test/run-hydrated")) {
+      return new Response(
+        JSON.stringify(
+          createSnapshot({
+            id: "run-hydrated",
+            status: "completed",
+            workspaceId: "ws-1",
+            messages: [{ role: "assistant", content: "hydrated" }],
+            traces: [],
+            toolInserts: [],
+          }),
+        ),
+        { status: 200 },
+      );
+    }
+    if (url.endsWith("/api/test/message")) {
+      const body = (parsedBody ?? {}) as Record<string, unknown>;
+      if (body.message === "") {
+        return new Response(
+          JSON.stringify({
+            run: {
+              id: "run-1",
+              status: "running",
+              workspaceId: "ws-1",
+              messages: [],
+              traces: [],
+              toolInserts: [],
+            },
+          }),
+          { status: 200 },
+        );
+      }
+      return new Response(
+        JSON.stringify({
+          run: {
+            id: "run-1",
+            status: "running",
+            workspaceId: "ws-1",
+            messages: [],
+            traces: [],
+            toolInserts: [],
+          },
+        }),
+        { status: 200 },
+      );
+    }
+    throw new Error(`Unexpected fetch: ${url}`);
+  }) as typeof fetch;
+
+  function Harness() {
+    hook = useWorkspaceTest();
+    return null;
+  }
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <WorkspaceProvider workspaceId="ws-1">
+          <Harness />
+        </WorkspaceProvider>,
+      );
+    });
+    assert(hook);
+
+    await act(async () => {
+      await hook.startAssistant({
+        workspaceId: "ws-1",
+        runWorkspaceId: undefined,
+        botDeckPath: "deck.md",
+        context: { foo: "bar" },
+      });
+    });
+    assertEquals(hook.chatSending, false);
+    assertEquals(hook.run.id, "run-1");
+    const startReq = requests.find((req) =>
+      req.url.endsWith("/api/test/message") &&
+      (req.body as { message?: unknown })?.message === ""
+    );
+    assert(startReq);
+
+    await act(async () => {
+      hook.setChatDraft("hello");
+    });
+    await act(async () => {
+      await hook.sendMessage("hello", {
+        runId: hook.run.id,
+        workspaceId: "ws-1",
+        runWorkspaceId: "ws-1",
+        botDeckPath: "deck.md",
+      });
+    });
+    assertEquals(hook.chatDraft, "");
+    assertEquals(hook.optimisticUser?.text, "hello");
+
+    const stream = FakeEventSource.instances.at(-1);
+    assert(stream);
+    await act(async () => {
+      stream.emit({
+        type: "testBotStream",
+        runId: "run-other",
+        role: "assistant",
+        chunk: "ignored",
+        turn: 0,
+      });
+    });
+    assertEquals(hook.streamingAssistant, null);
+
+    await act(async () => {
+      stream.emit({
+        type: "testBotStream",
+        runId: "run-1",
+        role: "assistant",
+        chunk: "partial",
+        turn: 0,
+      });
+    });
+    assertEquals(hook.streamingAssistant?.text, "partial");
+
+    await act(async () => {
+      stream.emit({
+        type: "testBotStreamEnd",
+        runId: "run-1",
+        role: "assistant",
+        turn: 0,
+      });
+    });
+    assertEquals(hook.streamingAssistant, null);
+
+    await act(async () => {
+      stream.emit({
+        type: "testBotStatus",
+        run: {
+          id: "run-1",
+          status: "running",
+          workspaceId: "ws-1",
+          messages: [{ role: "user", content: "hello" }],
+          traces: [],
+          toolInserts: [],
+        },
+      });
+    });
+    assertEquals(hook.optimisticUser, null);
+
+    let hydrated: any = null;
+    await act(async () => {
+      hydrated = await hook.refreshStatus({
+        workspaceId: "ws-1",
+        runId: "run-hydrated",
+      });
+    });
+    assert(hydrated);
+    assertEquals(hydrated.id, "run-hydrated");
+    assertEquals(hook.run.id, "run-hydrated");
+
+    await act(async () => {
+      hook.resetRun();
+    });
+    assertEquals(hook.run.status, "idle");
+    assertEquals(hook.chatDraft, "");
+    assertEquals(hook.optimisticUser, null);
+    assertEquals(hook.streamingAssistant, null);
+    assertEquals(hook.streamingUser, null);
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+    globalThis.fetch = originalFetch;
+    globalThis.EventSource = originalEventSource;
+    FakeEventSource.instances = [];
+  }
+});
+
+Deno.test("WorkspaceContext build chat stop cancels run and ignores post-stop stream chunks", async () => {
+  const originalFetch = globalThis.fetch;
+  const originalEventSource = globalThis.EventSource;
+
+  let hook: any = null;
+  const requests: Array<{ url: string; body?: Record<string, unknown> }> = [];
+  const preservedMessages = [
+    { role: "user", content: "keep me" },
+    { role: "assistant", content: "still here" },
+  ];
+
+  globalThis.EventSource = FakeEventSource as unknown as typeof EventSource;
+  globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
+    const url = String(input);
+    let parsedBody: Record<string, unknown> | undefined;
+    if (typeof init?.body === "string" && init.body.length > 0) {
+      parsedBody = JSON.parse(init.body) as Record<string, unknown>;
+    }
+    requests.push({ url, body: parsedBody });
+
+    if (url.endsWith("/api/workspaces/ws-1")) {
+      return new Response(
+        JSON.stringify(
+          createSnapshot(undefined, {
+            id: "ws-1",
+            status: "running",
+            messages: preservedMessages,
+          }),
+        ),
+        { status: 200 },
+      );
+    }
+    if (url.endsWith("/api/build/stop")) {
+      return new Response(
+        JSON.stringify({
+          stopped: true,
+          run: {
+            id: "ws-1",
+            status: "canceled",
+            messages: preservedMessages,
+            traces: [],
+            toolInserts: [],
+          },
+        }),
+        { status: 200 },
+      );
+    }
+    throw new Error(`Unexpected fetch: ${url}`);
+  }) as typeof fetch;
+
+  function Harness() {
+    hook = useWorkspaceBuild();
+    return null;
+  }
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <WorkspaceProvider workspaceId="ws-1">
+          <Harness />
+        </WorkspaceProvider>,
+      );
+    });
+    assert(hook);
+    assertEquals(hook.run.status, "running");
+    assertEquals(
+      hook.run.messages.map((msg: { content: string }) => msg.content),
+      [
+        "keep me",
+        "still here",
+      ],
+    );
+
+    const stream = FakeEventSource.instances.at(-1);
+    assert(stream);
+    await act(async () => {
+      stream.emit({
+        type: "buildBotStream",
+        runId: "ws-1",
+        role: "assistant",
+        chunk: "before stop",
+        turn: 0,
+      });
+    });
+    assertEquals(hook.streamingAssistant?.text, "before stop");
+
+    await act(async () => {
+      await hook.stopChat();
+    });
+    assertEquals(hook.run.status, "canceled");
+    assertEquals(hook.streamingAssistant, null);
+    assertEquals(
+      hook.run.messages.map((msg: { content: string }) => msg.content),
+      [
+        "keep me",
+        "still here",
+      ],
+    );
+
+    const stopReq = requests.find((req) => req.url.endsWith("/api/build/stop"));
+    assert(stopReq);
+    assertEquals(stopReq.body?.workspaceId, "ws-1");
+
+    await act(async () => {
+      stream.emit({
+        type: "buildBotStream",
+        runId: "ws-1",
+        role: "assistant",
+        chunk: "after stop",
+        turn: 0,
+      });
+    });
+    assertEquals(hook.streamingAssistant, null);
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+    globalThis.fetch = originalFetch;
+    globalThis.EventSource = originalEventSource;
+    FakeEventSource.instances = [];
+  }
+});
+
+Deno.test("WorkspaceContext build chat stop failure restores live build updates", async () => {
+  const originalFetch = globalThis.fetch;
+  const originalEventSource = globalThis.EventSource;
+
+  let hook: any = null;
+  let workspaceFetchCount = 0;
+
+  globalThis.EventSource = FakeEventSource as unknown as typeof EventSource;
+  globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
+    const url = String(input);
+
+    if (url.endsWith("/api/workspaces/ws-1")) {
+      workspaceFetchCount += 1;
+      return new Response(
+        JSON.stringify(
+          createSnapshot(
+            undefined,
+            workspaceFetchCount > 1
+              ? {
+                id: "ws-1",
+                status: "running",
+                messages: [{ role: "assistant", content: "still running" }],
+              }
+              : {
+                id: "ws-1",
+                status: "running",
+                messages: [{ role: "assistant", content: "initial" }],
+              },
+          ),
+        ),
+        { status: 200 },
+      );
+    }
+
+    if (url.endsWith("/api/build/stop")) {
+      if (typeof init?.body !== "string") {
+        throw new Error("Expected JSON body");
+      }
+      return new Response(
+        JSON.stringify({ error: "stop failed" }),
+        { status: 500 },
+      );
+    }
+
+    throw new Error(`Unexpected fetch: ${url}`);
+  }) as typeof fetch;
+
+  function Harness() {
+    hook = useWorkspaceBuild();
+    return null;
+  }
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <WorkspaceProvider workspaceId="ws-1">
+          <Harness />
+        </WorkspaceProvider>,
+      );
+    });
+    assert(hook);
+    assertEquals(hook.run.status, "running");
+    assertEquals(workspaceFetchCount, 1);
+
+    const stream = FakeEventSource.instances.at(-1);
+    assert(stream);
+
+    await assertRejects(
+      async () => {
+        await act(async () => {
+          await hook.stopChat();
+        });
+      },
+      Error,
+      "stop failed",
+    );
+
+    // Stop failure should refresh workspace status and clear ignore-list gating.
+    assertEquals(workspaceFetchCount, 2);
+    assertEquals(hook.run.status, "running");
+
+    await act(async () => {
+      stream.emit({
+        type: "buildBotStream",
+        runId: "ws-1",
+        role: "assistant",
+        chunk: "resumed",
+        turn: 0,
+      });
+    });
+    assertEquals(hook.streamingAssistant?.text, "resumed");
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+    globalThis.fetch = originalFetch;
+    globalThis.EventSource = originalEventSource;
+    FakeEventSource.instances = [];
+  }
+});
+
+Deno.test("WorkspaceContext build stop failure does not refresh stale workspace after navigation", async () => {
+  const originalFetch = globalThis.fetch;
+  const originalEventSource = globalThis.EventSource;
+
+  let hook: any = null;
+  let setWorkspaceId: ((value: string) => void) | null = null;
+  const stopControl: { resolve?: (response: Response) => void } = {};
+  let ws1Fetches = 0;
+  let ws2Fetches = 0;
+
+  globalThis.EventSource = FakeEventSource as unknown as typeof EventSource;
+  globalThis.fetch = (async (input: RequestInfo | URL) => {
+    const url = String(input);
+
+    if (url.endsWith("/api/workspaces/ws-1")) {
+      ws1Fetches += 1;
+      return new Response(
+        JSON.stringify(
+          createSnapshot(
+            undefined,
+            {
+              id: "ws-1",
+              status: "running",
+              messages: [{ role: "assistant", content: "workspace one" }],
+            },
+          ),
+        ),
+        { status: 200 },
+      );
+    }
+
+    if (url.endsWith("/api/workspaces/ws-2")) {
+      ws2Fetches += 1;
+      return new Response(
+        JSON.stringify(
+          createSnapshot(
+            undefined,
+            {
+              id: "ws-2",
+              status: "idle",
+              messages: [{ role: "assistant", content: "workspace two" }],
+            },
+          ),
+        ),
+        { status: 200 },
+      );
+    }
+
+    if (url.endsWith("/api/build/stop")) {
+      return await new Promise<Response>((resolve) => {
+        stopControl.resolve = resolve;
+      });
+    }
+
+    throw new Error(`Unexpected fetch: ${url}`);
+  }) as typeof fetch;
+
+  function RootHarness() {
+    const [workspaceId, setWorkspace] = React.useState("ws-1");
+    setWorkspaceId = setWorkspace;
+    return (
+      <WorkspaceProvider workspaceId={workspaceId}>
+        <InnerHarness />
+      </WorkspaceProvider>
+    );
+  }
+
+  function InnerHarness() {
+    hook = useWorkspaceBuild();
+    return null;
+  }
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <RootHarness />,
+      );
+    });
+    assert(hook);
+    assert(setWorkspaceId);
+    assertEquals(hook.run.id, "ws-1");
+    assertEquals(ws1Fetches, 1);
+
+    let stopPromise!: Promise<{ ok: boolean; err?: unknown }>;
+    await act(async () => {
+      stopPromise = hook.stopChat().then(
+        () => ({ ok: true }),
+        (err: unknown) => ({ ok: false, err }),
+      );
+    });
+
+    await act(async () => {
+      setWorkspaceId!("ws-2");
+    });
+    assertEquals(ws2Fetches, 1);
+    assertEquals(hook.run.id, "ws-2");
+
+    const resolveStopResponse = stopControl.resolve;
+    if (!resolveStopResponse) {
+      throw new Error("Expected deferred stop response resolver");
+    }
+    await act(async () => {
+      resolveStopResponse(
+        new Response(JSON.stringify({ error: "stop failed" }), { status: 500 }),
+      );
+    });
+    const stopResult = await stopPromise;
+    assertEquals(stopResult.ok, false);
+
+    // Must not re-fetch stale workspace ws-1 during stop failure recovery.
+    assertEquals(ws1Fetches, 1);
+    assertEquals(hook.run.id, "ws-2");
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+    globalThis.fetch = originalFetch;
+    globalThis.EventSource = originalEventSource;
+    FakeEventSource.instances = [];
+  }
+});
+
+Deno.test("WorkspaceContext feedback save lifecycle updates run messages", async () => {
+  const originalFetch = globalThis.fetch;
+  const originalEventSource = globalThis.EventSource;
+
+  let hook: any = null;
+  const feedbackRequests: Array<Record<string, unknown>> = [];
+  let requestMode: "save" | "delete" | "error" = "save";
+
+  globalThis.EventSource = FakeEventSource as unknown as typeof EventSource;
+  globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
+    const url = String(input);
+    let parsedBody: Record<string, unknown> = {};
+    if (typeof init?.body === "string" && init.body.length > 0) {
+      parsedBody = JSON.parse(init.body) as Record<string, unknown>;
+    }
+
+    if (url.endsWith("/api/workspaces/ws-1")) {
+      return new Response(JSON.stringify(createSnapshot()), { status: 200 });
+    }
+    if (url.endsWith("/api/session/feedback")) {
+      feedbackRequests.push(parsedBody);
+      if (requestMode === "error") {
+        return new Response("write failed", { status: 500 });
+      }
+      if (requestMode === "delete") {
+        return new Response(
+          JSON.stringify({
+            workspaceId: "ws-1",
+            deleted: true,
+          }),
+          { status: 200 },
+        );
+      }
+      return new Response(
+        JSON.stringify({
+          workspaceId: "ws-1",
+          deleted: false,
+          feedback: {
+            id: "fb-1",
+            runId: "run-1",
+            messageRefId: "assistant-1",
+            score: 2,
+            reason: "solid answer",
+            createdAt: new Date().toISOString(),
+          },
+        }),
+        { status: 200 },
+      );
+    }
+    throw new Error(`Unexpected fetch: ${url}`);
+  }) as typeof fetch;
+
+  function Harness() {
+    hook = useWorkspaceTest();
+    return null;
+  }
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <WorkspaceProvider workspaceId="ws-1">
+          <Harness />
+        </WorkspaceProvider>,
+      );
+    });
+    assert(hook);
+
+    await act(async () => {
+      hook.setRun({
+        id: "run-1",
+        status: "completed",
+        workspaceId: "ws-1",
+        sessionId: "ws-1",
+        messages: [{
+          role: "assistant",
+          content: "baseline",
+          messageRefId: "assistant-1",
+        }],
+        traces: [],
+        toolInserts: [],
+      });
+    });
+
+    requestMode = "save";
+    await act(async () => {
+      await hook.saveFeedback({
+        workspaceId: "ws-1",
+        messageRefId: "assistant-1",
+        score: 2,
+        reason: "solid answer",
+      });
+    });
+    assertEquals(feedbackRequests.at(-1)?.workspaceId, "ws-1");
+    assertEquals(feedbackRequests.at(-1)?.messageRefId, "assistant-1");
+    assertEquals(hook.run.messages[0]?.feedback?.score, 2);
+    assertEquals(hook.run.messages[0]?.feedback?.reason, "solid answer");
+
+    requestMode = "delete";
+    await act(async () => {
+      await hook.saveFeedback({
+        workspaceId: "ws-1",
+        messageRefId: "assistant-1",
+        score: null,
+      });
+    });
+    assertEquals(hook.run.messages[0]?.feedback, undefined);
+
+    requestMode = "error";
+    await assertRejects(async () => {
+      await act(async () => {
+        await hook.saveFeedback({
+          workspaceId: "ws-1",
+          messageRefId: "assistant-1",
+          score: 1,
+          reason: "retry",
+        });
+      });
+    });
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+    globalThis.fetch = originalFetch;
+    globalThis.EventSource = originalEventSource;
+    FakeEventSource.instances = [];
+  }
+});
+
+Deno.test("WorkspaceContext build chat stream/status lifecycle", async () => {
+  const originalFetch = globalThis.fetch;
+  const originalEventSource = globalThis.EventSource;
+
+  let hook: any = null;
+  const requests: Array<{ url: string; body?: unknown }> = [];
+
+  globalThis.EventSource = FakeEventSource as unknown as typeof EventSource;
+  globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
+    const url = String(input);
+    let parsedBody: unknown;
+    if (typeof init?.body === "string" && init.body.length > 0) {
+      parsedBody = JSON.parse(init.body);
+    }
+    requests.push({ url, body: parsedBody });
+
+    if (url.endsWith("/api/workspaces/ws-1")) {
+      return new Response(JSON.stringify(createSnapshot()), { status: 200 });
+    }
+    if (url.endsWith("/api/build/message")) {
+      return new Response(
+        JSON.stringify({
+          run: {
+            id: "ws-1",
+            status: "running",
+            workspaceId: "ws-1",
+            messages: [{ role: "user", content: "hello" }],
+            traces: [],
+            toolInserts: [],
+          },
+        }),
+        { status: 200 },
+      );
+    }
+    throw new Error(`Unexpected fetch: ${url}`);
+  }) as typeof fetch;
+
+  function Harness() {
+    hook = useWorkspaceBuild();
+    return null;
+  }
+
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <WorkspaceProvider workspaceId="ws-1">
+          <Harness />
+        </WorkspaceProvider>,
+      );
+    });
+    assert(hook);
+
+    await act(async () => {
+      await hook.sendMessage("hello");
+    });
+    assertEquals(hook.chatSending, false);
+    assertEquals(hook.run.status, "running");
+    const sendReq = requests.find((req) =>
+      req.url.endsWith("/api/build/message")
+    );
+    assert(sendReq);
+
+    const stream = FakeEventSource.instances.at(-1);
+    assert(stream);
+
+    await act(async () => {
+      stream.emit({
+        type: "buildBotStream",
+        runId: "ws-1",
+        role: "assistant",
+        chunk: "partial",
+        turn: 0,
+      });
+    });
+    assertEquals(hook.streamingAssistant?.text, "partial");
+
+    await act(async () => {
+      stream.emit({
+        type: "buildBotStreamEnd",
+        runId: "ws-1",
+        role: "assistant",
+        turn: 0,
+      });
+    });
+    assertEquals(hook.streamingAssistant, null);
+
+    await act(async () => {
+      stream.emit({
+        type: "buildBotStatus",
+        run: {
+          id: "ws-1",
+          status: "completed",
+          workspaceId: "ws-1",
+          messages: [
+            { role: "user", content: "hello" },
+            { role: "assistant", content: "done" },
+          ],
+          traces: [],
+          toolInserts: [],
+        },
+      });
+    });
+    assertEquals(hook.run.status, "completed");
+    assertEquals(hook.run.messages.length, 2);
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+    globalThis.fetch = originalFetch;
+    globalThis.EventSource = originalEventSource;
+    FakeEventSource.instances = [];
+  }
+});
diff --git a/simulator-ui/src/WorkspaceContext.tsx b/simulator-ui/src/WorkspaceContext.tsx
new file mode 100644
index 000000000..79a70ea6c
--- /dev/null
+++ b/simulator-ui/src/WorkspaceContext.tsx
@@ -0,0 +1,1535 @@
+import React, {
+  createContext,
+  useCallback,
+  useContext,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from "react";
+import {
+  buildDurableStreamUrl,
+  type CalibrateResponse,
+  type CalibrateSession,
+  type CalibrateStreamMessage,
+  deriveBuildDisplayMessages,
+  type FeedbackEntry,
+  getDurableStreamOffset,
+  type GraderDeckMeta,
+  type GradingFlag,
+  type SessionDetailResponse,
+  setDurableStreamOffset,
+  summarizeToolCalls,
+  type TestBotConfigResponse,
+  type TestBotRun,
+  type ToolCallSummary,
+  type TraceEvent,
+  WORKSPACE_STREAM_ID,
+  type WorkspaceSocketMessage,
+} from "./utils.ts";
+
+export type BuildRun = {
+  id: string;
+  status: "idle" | "running" | "completed" | "error" | "canceled";
+  error?: string;
+  startedAt?: string;
+  finishedAt?: string;
+  messages: Array<{
+    role: string;
+    content: string;
+  }>;
+  displayMessages?: Array<{
+    kind: "message" | "tool" | "reasoning";
+    role?: "user" | "assistant";
+    content?: string;
+    toolCallId?: string;
+    toolSummary?: ToolCallSummary;
+    reasoningId?: string;
+    reasoningRaw?: Record<string, unknown>;
+  }>;
+  traces?: Array<TraceEvent>;
+  toolInserts?: Array<{
+    actionCallId?: string;
+    parentActionCallId?: string;
+    name?: string;
+    index: number;
+  }>;
+};
+
+type WorkspaceBuildState = {
+  run: BuildRun;
+  toolCalls: ToolCallSummary[];
+  chatDraft: string;
+  setChatDraft: React.Dispatch<React.SetStateAction<string>>;
+  chatSending: boolean;
+  chatError: string | null;
+  setChatError: React.Dispatch<React.SetStateAction<string | null>>;
+  toolCallsOpen: Record<string, boolean>;
+  setToolCallsOpen: React.Dispatch<
+    React.SetStateAction<Record<string, boolean>>
+  >;
+  optimisticUser: { id: string; text: string } | null;
+  setOptimisticUser: React.Dispatch<
+    React.SetStateAction<{ id: string; text: string } | null>
+  >;
+  streamingAssistant: { runId: string; turn: number; text: string } | null;
+  setStreamingAssistant: React.Dispatch<
+    React.SetStateAction<
+      { runId: string; turn: number; text: string } | null
+    >
+  >;
+  stopChat: () => Promise<void>;
+  resetChat: () => Promise<void>;
+  sendMessage: (message: string) => Promise<void>;
+  loadChat: (runId: string) => Promise<void>;
+};
+
+type WorkspaceTestState = {
+  run: TestBotRun;
+  setRun: React.Dispatch<React.SetStateAction<TestBotRun>>;
+  streamingUser: {
+    runId: string;
+    turn: number;
+    text: string;
+    expectedUserCount?: number;
+  } | null;
+  streamingAssistant: { runId: string; turn: number; text: string } | null;
+  chatDraft: string;
+  setChatDraft: React.Dispatch<React.SetStateAction<string>>;
+  chatSending: boolean;
+  chatError: string | null;
+  setChatError: React.Dispatch<React.SetStateAction<string | null>>;
+  optimisticUser: { id: string; text: string } | null;
+  refreshStatus: (
+    opts?: { runId?: string; workspaceId?: string; deckPath?: string },
+  ) => Promise<TestBotRun>;
+  startRun: (
+    payload: Record<string, unknown>,
+  ) => Promise<{ run?: TestBotRun; error?: string; initFill?: unknown }>;
+  startAssistant: (
+    payload: {
+      runId?: string;
+      workspaceId?: string;
+      runWorkspaceId?: string;
+      botDeckPath?: string;
+      context?: unknown;
+    },
+  ) => Promise<void>;
+  sendMessage: (
+    message: string,
+    payload: {
+      runId?: string;
+      workspaceId?: string;
+      runWorkspaceId?: string;
+      botDeckPath?: string;
+      context?: unknown;
+    },
+  ) => Promise<void>;
+  stopRun: (runId: string) => Promise<void>;
+  resetRun: () => void;
+  saveFeedback: (
+    payload: {
+      workspaceId: string;
+      runId?: string;
+      messageRefId: string;
+      score: number | null;
+      reason?: string;
+    },
+  ) => Promise<{ feedback?: FeedbackEntry; deleted?: boolean }>;
+};
+
+type WorkspaceGradeState = {
+  loading: boolean;
+  error: string | null;
+  running: boolean;
+  graders: GraderDeckMeta[];
+  sessions: CalibrateSession[];
+  sessionDetail: SessionDetailResponse | null;
+  loadData: (
+    opts?: { workspaceId?: string | null; gradeRunId?: string | null },
+  ) => Promise<void>;
+  loadSessionDetail: (workspaceId: string | null) => Promise<void>;
+  runGrader: (
+    payload: { workspaceId: string; graderId: string },
+  ) => Promise<{ session?: CalibrateSession }>;
+  toggleFlag: (
+    payload: {
+      workspaceId: string;
+      refId: string;
+      runId: string;
+      turnIndex?: number;
+    },
+  ) => Promise<{ flags?: GradingFlag[] }>;
+  updateFlagReason: (
+    payload: { workspaceId: string; refId: string; reason: string },
+  ) => Promise<{ flags?: GradingFlag[] }>;
+};
+
+type WorkspaceContextValue = {
+  build: WorkspaceBuildState;
+  test: WorkspaceTestState;
+  grade: WorkspaceGradeState;
+  routing: {
+    testRunId: string | null;
+    gradeRunId: string | null;
+    setGradeRunId: (runId: string | null) => void;
+  };
+};
+
+const WorkspaceContext = createContext<WorkspaceContextValue | null>(null);
+
+function isGradeDebugEnabled(): boolean {
+  if (typeof window === "undefined") return false;
+  const fromQuery = new URLSearchParams(window.location.search).get(
+    "gradeDebug",
+  );
+  if (fromQuery === "1" || fromQuery === "true") return true;
+  try {
+    const fromStorage = window.localStorage.getItem("gambit.gradeDebug");
+    return fromStorage === "1" || fromStorage === "true";
+  } catch {
+    return false;
+  }
+}
+
+function gradeDebugLog(event: string, payload?: Record<string, unknown>) {
+  if (!isGradeDebugEnabled()) return;
+  const ts = new Date().toISOString();
+  if (payload && Object.keys(payload).length > 0) {
+    console.log(`[grade-debug] ${ts} ${event}`, payload);
+    return;
+  }
+  console.log(`[grade-debug] ${ts} ${event}`);
+}
+
+const normalizeTestRun = (run?: TestBotRun): TestBotRun => {
+  if (!run) {
+    return {
+      status: "idle",
+      messages: [],
+      traces: [],
+      toolInserts: [],
+    };
+  }
+  const workspaceId = typeof run.workspaceId === "string"
+    ? run.workspaceId
+    : typeof run.sessionId === "string"
+    ? run.sessionId
+    : undefined;
+  return {
+    ...run,
+    workspaceId,
+    sessionId: workspaceId,
+    messages: run.messages ?? [],
+    traces: run.traces ?? [],
+    toolInserts: run.toolInserts ?? [],
+  };
+};
+
+export function WorkspaceProvider(
+  props: {
+    children: React.ReactNode;
+    workspaceId?: string | null;
+    onWorkspaceChange?: (workspaceId: string) => void;
+    requestedTestRunId?: string | null;
+    requestedGradeRunId?: string | null;
+    onRoutingStateChange?: (state: {
+      testRunId: string | null;
+      gradeRunId: string | null;
+    }) => void;
+  },
+) {
+  const {
+    children,
+    workspaceId,
+    onWorkspaceChange,
+    requestedTestRunId,
+    requestedGradeRunId,
+    onRoutingStateChange,
+  } = props;
+  const [buildRun, setBuildRun] = useState<BuildRun>({
+    id: "",
+    status: "idle",
+    messages: [],
+    traces: [],
+    toolInserts: [],
+  });
+  const buildRunRef = useRef<BuildRun>({
+    id: "",
+    status: "idle",
+    messages: [],
+    traces: [],
+    toolInserts: [],
+  });
+  const buildRunIdRef = useRef<string>("");
+  const buildIgnoredStreamRunIdsRef = useRef<Set<string>>(new Set());
+
+  const [buildChatDraft, setBuildChatDraft] = useState("");
+  const [buildChatSending, setBuildChatSending] = useState(false);
+  const [buildChatError, setBuildChatError] = useState<string | null>(null);
+  const [buildToolCallsOpen, setBuildToolCallsOpen] = useState<
+    Record<string, boolean>
+  >({});
+  const [buildOptimisticUser, setBuildOptimisticUser] = useState<
+    { id: string; text: string } | null
+  >(null);
+  const [buildStreamingAssistant, setBuildStreamingAssistant] = useState<
+    { runId: string; turn: number; text: string } | null
+  >(null);
+  const pendingBuildTracesRef = useRef<TraceEvent[]>([]);
+  const pendingBuildTraceRunIdRef = useRef<string | null>(null);
+  const buildTraceFlushHandleRef = useRef<number | null>(null);
+  const buildTraceFlushModeRef = useRef<"raf" | "timeout" | null>(null);
+
+  const [testRun, setTestRun] = useState<TestBotRun>(() => normalizeTestRun());
+  const [activeTestRunId, setActiveTestRunId] = useState<string | null>(null);
+  const [activeGradeRunId, setActiveGradeRunId] = useState<string | null>(null);
+  const testRunIdRef = useRef<string>("");
+  const testRunRef = useRef<TestBotRun>(normalizeTestRun());
+  const [testStreamingUser, setTestStreamingUser] = useState<
+    {
+      runId: string;
+      turn: number;
+      text: string;
+      expectedUserCount?: number;
+    } | null
+  >(null);
+  const [testStreamingAssistant, setTestStreamingAssistant] = useState<
+    { runId: string; turn: number; text: string } | null
+  >(null);
+  const [testChatDraft, setTestChatDraft] = useState("");
+  const [testChatSending, setTestChatSending] = useState(false);
+  const [testChatError, setTestChatError] = useState<string | null>(null);
+  const [testOptimisticUser, setTestOptimisticUser] = useState<
+    { id: string; text: string } | null
+  >(null);
+
+  const [gradeLoading, setGradeLoading] = useState(false);
+  const [gradeError, setGradeError] = useState<string | null>(null);
+  const [gradeRunning, setGradeRunning] = useState(false);
+  const [gradeGraders, setGradeGraders] = useState<GraderDeckMeta[]>([]);
+  const [gradeSessions, setGradeSessions] = useState<CalibrateSession[]>([]);
+  const [gradeSessionDetail, setGradeSessionDetail] = useState<
+    SessionDetailResponse | null
+  >(null);
+
+  const buildDisplayMessages = useCallback(
+    (run: BuildRun): BuildRun["displayMessages"] => {
+      return deriveBuildDisplayMessages(run.messages, run.traces ?? []);
+    },
+    [],
+  );
+  const normalizeBuildRun = useCallback((run: BuildRun): BuildRun => {
+    return {
+      ...run,
+      messages: Array.isArray(run.messages) ? run.messages : [],
+      traces: Array.isArray(run.traces) ? run.traces : [],
+      toolInserts: Array.isArray(run.toolInserts) ? run.toolInserts : [],
+    };
+  }, []);
+
+  const mergeBuildRunSnapshot = useCallback(
+    (prev: BuildRun, incomingRun: BuildRun): BuildRun => {
+      const incoming = normalizeBuildRun(incomingRun);
+      const sameRun = Boolean(prev.id) && prev.id === incoming.id;
+      const preserveStreamingArrays = sameRun &&
+        prev.status === "running" &&
+        incoming.status === "running";
+      const incomingMessages = incoming.messages ?? [];
+      const incomingTraces = incoming.traces ?? [];
+      const incomingToolInserts = incoming.toolInserts ?? [];
+
+      const nextRun: BuildRun = {
+        ...incoming,
+        messages: preserveStreamingArrays &&
+            incomingMessages.length < (prev.messages?.length ?? 0)
+          ? (prev.messages ?? [])
+          : incomingMessages,
+        traces: preserveStreamingArrays &&
+            incomingTraces.length < (prev.traces?.length ?? 0)
+          ? (prev.traces ?? [])
+          : incomingTraces,
+        toolInserts: preserveStreamingArrays &&
+            incomingToolInserts.length < (prev.toolInserts?.length ?? 0)
+          ? (prev.toolInserts ?? [])
+          : incomingToolInserts,
+      };
+      nextRun.displayMessages = buildDisplayMessages(nextRun);
+      return nextRun;
+    },
+    [buildDisplayMessages, normalizeBuildRun],
+  );
+  const cancelBuildTraceFlush = useCallback(() => {
+    const handle = buildTraceFlushHandleRef.current;
+    const mode = buildTraceFlushModeRef.current;
+    if (handle === null || mode === null) return;
+    if (
+      mode === "raf" && typeof window !== "undefined" &&
+      typeof window.cancelAnimationFrame === "function"
+    ) {
+      window.cancelAnimationFrame(handle);
+    } else if (mode === "timeout") {
+      clearTimeout(handle);
+    }
+    buildTraceFlushHandleRef.current = null;
+    buildTraceFlushModeRef.current = null;
+  }, []);
+  const flushPendingBuildTraces = useCallback(() => {
+    const pending = pendingBuildTracesRef.current;
+    const pendingRunId = pendingBuildTraceRunIdRef.current;
+    if (pending.length === 0) return;
+    pendingBuildTracesRef.current = [];
+    pendingBuildTraceRunIdRef.current = null;
+    setBuildRun((prev) => {
+      if (pendingRunId && prev.id && prev.id !== pendingRunId) {
+        return prev;
+      }
+      const traces = Array.isArray(prev.traces)
+        ? [...prev.traces, ...pending]
+        : [
+          ...pending,
+        ];
+      const nextRun = { ...prev, traces } as BuildRun;
+      nextRun.displayMessages = buildDisplayMessages(nextRun);
+      return nextRun;
+    });
+  }, [buildDisplayMessages]);
+  const scheduleBuildTraceFlush = useCallback(() => {
+    if (buildTraceFlushHandleRef.current !== null) return;
+    const flush = () => {
+      buildTraceFlushHandleRef.current = null;
+      buildTraceFlushModeRef.current = null;
+      flushPendingBuildTraces();
+    };
+    if (
+      typeof window !== "undefined" &&
+      typeof window.requestAnimationFrame === "function"
+    ) {
+      buildTraceFlushModeRef.current = "raf";
+      buildTraceFlushHandleRef.current = window.requestAnimationFrame(flush);
+      return;
+    }
+    buildTraceFlushModeRef.current = "timeout";
+    buildTraceFlushHandleRef.current = setTimeout(flush, 16);
+  }, [flushPendingBuildTraces]);
+  const clearPendingBuildTraces = useCallback(() => {
+    cancelBuildTraceFlush();
+    pendingBuildTracesRef.current = [];
+    pendingBuildTraceRunIdRef.current = null;
+  }, [cancelBuildTraceFlush]);
+  const gradeLoadSeqRef = useRef(0);
+  const gradeDetailSeqRef = useRef(0);
+  const loadWorkspaceSnapshot = useCallback(async (
+    targetWorkspaceId: string,
+    opts?: { deckPath?: string; testRunId?: string; gradeRunId?: string },
+  ) => {
+    const params = new URLSearchParams();
+    if (opts?.deckPath) params.set("deckPath", opts.deckPath);
+    const query = params.toString() ? `?${params.toString()}` : "";
+    const endpoint = opts?.testRunId
+      ? `/api/workspaces/${encodeURIComponent(targetWorkspaceId)}/test/${
+        encodeURIComponent(opts.testRunId)
+      }${query}`
+      : opts?.gradeRunId
+      ? `/api/workspaces/${encodeURIComponent(targetWorkspaceId)}/grade/${
+        encodeURIComponent(opts.gradeRunId)
+      }${query}`
+      : `/api/workspaces/${encodeURIComponent(targetWorkspaceId)}${query}`;
+    const res = await fetch(
+      endpoint,
+    );
+    if (!res.ok) {
+      const text = await res.text().catch(() => "");
+      throw new Error(text || res.statusText);
+    }
+    return await res.json() as {
+      workspaceId: string;
+      build: { run?: BuildRun };
+      test: TestBotConfigResponse & { run?: TestBotRun };
+      grade: CalibrateResponse;
+      session: SessionDetailResponse;
+    };
+  }, []);
+
+  const refreshBuildStatus = useCallback(
+    async (opts?: { workspaceId?: string }) => {
+      if (opts?.workspaceId) {
+        const data = await loadWorkspaceSnapshot(opts.workspaceId);
+        if (!data.build.run) return;
+        setBuildRun((prev) =>
+          mergeBuildRunSnapshot(prev, data.build.run as BuildRun)
+        );
+        if (typeof data.build.run.id === "string" && data.build.run.id) {
+          buildRunIdRef.current = data.build.run.id;
+        }
+        return;
+      }
+      buildRunIdRef.current = "";
+      setBuildRun({
+        id: "",
+        status: "idle",
+        messages: [],
+        traces: [],
+        toolInserts: [],
+        displayMessages: [],
+      });
+    },
+    [loadWorkspaceSnapshot, mergeBuildRunSnapshot],
+  );
+
+  useEffect(() => {
+    buildRunRef.current = buildRun;
+  }, [buildRun]);
+
+  useEffect(() => {
+    if (workspaceId) {
+      buildRunIdRef.current = workspaceId;
+      refreshBuildStatus({ workspaceId }).catch(() => {});
+      return;
+    }
+    refreshBuildStatus().catch(() => {});
+  }, [refreshBuildStatus, workspaceId]);
+
+  useEffect(() => {
+    if (!workspaceId) return;
+    if (buildRunIdRef.current === workspaceId) return;
+    buildRunIdRef.current = workspaceId;
+    setBuildRun((prev) => ({
+      ...prev,
+      id: workspaceId,
+    }));
+    setBuildChatError(null);
+    setBuildStreamingAssistant(null);
+    setBuildOptimisticUser(null);
+    setBuildToolCallsOpen({});
+    clearPendingBuildTraces();
+    refreshBuildStatus({ workspaceId }).catch(() => {});
+  }, [clearPendingBuildTraces, refreshBuildStatus, workspaceId]);
+
+  useEffect(() => {
+    testRunRef.current = testRun;
+    if (typeof testRun.id === "string" && testRun.id) {
+      testRunIdRef.current = testRun.id;
+      setActiveTestRunId(testRun.id);
+    }
+  }, [testRun]);
+
+  useEffect(() => {
+    if (requestedTestRunId === undefined) return;
+    setActiveTestRunId(requestedTestRunId);
+  }, [requestedTestRunId]);
+
+  useEffect(() => {
+    if (requestedGradeRunId === undefined) return;
+    setActiveGradeRunId(requestedGradeRunId);
+  }, [requestedGradeRunId]);
+
+  useEffect(() => {
+    onRoutingStateChange?.({
+      testRunId: activeTestRunId,
+      gradeRunId: activeGradeRunId,
+    });
+  }, [activeGradeRunId, activeTestRunId, onRoutingStateChange]);
+
+  useEffect(() => {
+    const streamId = WORKSPACE_STREAM_ID;
+    const streamUrl = buildDurableStreamUrl(
+      streamId,
+      getDurableStreamOffset(streamId),
+    );
+    const source = new EventSource(streamUrl);
+
+    source.onmessage = (event) => {
+      let envelope: { offset?: unknown; data?: unknown } | null = null;
+      try {
+        envelope = JSON.parse(event.data) as {
+          offset?: unknown;
+          data?: unknown;
+        };
+      } catch {
+        return;
+      }
+      if (
+        envelope &&
+        typeof envelope.offset === "number" &&
+        Number.isFinite(envelope.offset)
+      ) {
+        setDurableStreamOffset(streamId, envelope.offset + 1);
+      }
+      const msg = envelope?.data as WorkspaceSocketMessage | undefined;
+      if (!msg) return;
+      if (msg.type === "buildBotStatus" && msg.run) {
+        const activeBuildRunId = buildRunIdRef.current;
+        if (activeBuildRunId && msg.run.id !== activeBuildRunId) return;
+        if (
+          msg.run.id &&
+          msg.run.status === "running" &&
+          buildIgnoredStreamRunIdsRef.current.has(msg.run.id)
+        ) {
+          return;
+        }
+        flushPendingBuildTraces();
+        setBuildRun((prev) => mergeBuildRunSnapshot(prev, msg.run as BuildRun));
+        if (msg.run.status !== "running") {
+          setBuildStreamingAssistant(null);
+        }
+        return;
+      }
+      if (msg.type === "buildBotTrace") {
+        const activeBuildRunId = buildRunIdRef.current;
+        if (activeBuildRunId && msg.runId && msg.runId !== activeBuildRunId) {
+          return;
+        }
+        if (!msg.event) return;
+        const traceRunId = typeof msg.runId === "string" && msg.runId.length > 0
+          ? msg.runId
+          : (buildRunIdRef.current || null);
+        const pendingRunId = pendingBuildTraceRunIdRef.current;
+        if (pendingRunId && traceRunId && pendingRunId !== traceRunId) {
+          clearPendingBuildTraces();
+        }
+        if (!pendingBuildTraceRunIdRef.current && traceRunId) {
+          pendingBuildTraceRunIdRef.current = traceRunId;
+        }
+        pendingBuildTracesRef.current.push(msg.event);
+        scheduleBuildTraceFlush();
+        return;
+      }
+      if (msg.type === "buildBotStream") {
+        const activeBuildRunId = buildRunIdRef.current;
+        if (
+          !msg.runId ||
+          (activeBuildRunId && msg.runId !== activeBuildRunId) ||
+          buildIgnoredStreamRunIdsRef.current.has(msg.runId) ||
+          msg.role !== "assistant"
+        ) {
+          return;
+        }
+        const streamRunId = msg.runId;
+        const turn = typeof msg.turn === "number" ? msg.turn : 0;
+        setBuildStreamingAssistant((prev) =>
+          prev && prev.runId === streamRunId && prev.turn === turn
+            ? { ...prev, text: prev.text + msg.chunk }
+            : { runId: streamRunId, turn, text: msg.chunk }
+        );
+        return;
+      }
+      if (msg.type === "buildBotStreamEnd") {
+        const activeBuildRunId = buildRunIdRef.current;
+        if (
+          !msg.runId ||
+          (activeBuildRunId && msg.runId !== activeBuildRunId) ||
+          buildIgnoredStreamRunIdsRef.current.has(msg.runId)
+        ) {
+          return;
+        }
+        const turn = typeof msg.turn === "number" ? msg.turn : 0;
+        setBuildStreamingAssistant((prev) =>
+          prev && prev.runId === msg.runId && prev.turn === turn ? null : prev
+        );
+        return;
+      }
+      if (msg.type === "testBotStatus" && msg.run) {
+        const activeTestRunId = testRunIdRef.current;
+        if (activeTestRunId && msg.run.id !== activeTestRunId) return;
+        const normalized = normalizeTestRun(msg.run);
+        if (normalized.status !== "running") {
+          setTestStreamingUser(null);
+          setTestStreamingAssistant(null);
+        }
+        setTestStreamingUser((prev) => {
+          if (
+            !prev ||
+            prev.runId !== normalized.id ||
+            prev.expectedUserCount === undefined
+          ) {
+            return prev;
+          }
+          const userCount = (normalized.messages ?? []).filter((entry) =>
+            entry.role === "user"
+          ).length;
+          return userCount >= prev.expectedUserCount ? null : prev;
+        });
+        setTestStreamingAssistant((prev) => {
+          if (
+            !prev ||
+            prev.runId !== normalized.id ||
+            !prev.text ||
+            !Array.isArray(normalized.messages)
+          ) {
+            return prev;
+          }
+          const hasAssistant = normalized.messages.some((entry) =>
+            entry.role === "assistant" &&
+            typeof entry.content === "string" &&
+            entry.content.includes(prev.text)
+          );
+          return hasAssistant ? null : prev;
+        });
+        setTestRun(normalized);
+        if (typeof normalized.id === "string" && normalized.id) {
+          testRunIdRef.current = normalized.id;
+        }
+        return;
+      }
+      if (msg.type === "testBotStream") {
+        const activeTestRunId = testRunIdRef.current;
+        if (!msg.runId || (activeTestRunId && msg.runId !== activeTestRunId)) {
+          return;
+        }
+        const streamRunId = msg.runId;
+        const turn = typeof msg.turn === "number" ? msg.turn : 0;
+        if (msg.role === "assistant") {
+          setTestStreamingAssistant((prev) =>
+            prev && prev.runId === streamRunId && prev.turn === turn
+              ? { ...prev, text: prev.text + msg.chunk }
+              : { runId: streamRunId, turn, text: msg.chunk }
+          );
+          return;
+        }
+        if (msg.role === "user") {
+          setTestStreamingUser((prev) =>
+            prev && prev.runId === streamRunId && prev.turn === turn
+              ? { ...prev, text: prev.text + msg.chunk }
+              : { runId: streamRunId, turn, text: msg.chunk }
+          );
+        }
+        return;
+      }
+      if (msg.type === "testBotStreamEnd") {
+        const activeTestRunId = testRunIdRef.current;
+        if (!msg.runId || (activeTestRunId && msg.runId !== activeTestRunId)) {
+          return;
+        }
+        const turn = typeof msg.turn === "number" ? msg.turn : 0;
+        if (msg.role === "assistant") {
+          setTestStreamingAssistant((prev) =>
+            prev && prev.runId === msg.runId && prev.turn === turn ? null : prev
+          );
+          return;
+        }
+        if (msg.role === "user") {
+          const expectedUserCount =
+            (testRunRef.current.messages ?? []).filter((entry) =>
+              entry.role === "user"
+            ).length + 1;
+          setTestStreamingUser((prev) =>
+            prev && prev.runId === msg.runId && prev.turn === turn
+              ? { ...prev, expectedUserCount }
+              : prev
+          );
+        }
+        return;
+      }
+      const gradeMsg = msg as CalibrateStreamMessage;
+      if (gradeMsg.type !== "calibrateSession") return;
+      gradeDebugLog("stream.calibrateSession", {
+        sessionId: gradeMsg.session.id,
+        runCount: gradeMsg.session.gradingRuns?.length ?? 0,
+      });
+      setGradeSessions((prev) => {
+        const next = [...prev];
+        const index = next.findIndex((sess) => sess.id === gradeMsg.session.id);
+        if (index >= 0) {
+          next[index] = gradeMsg.session;
+          return next;
+        }
+        return [gradeMsg.session, ...next];
+      });
+    };
+
+    return () => {
+      source.close();
+      clearPendingBuildTraces();
+    };
+  }, [
+    clearPendingBuildTraces,
+    flushPendingBuildTraces,
+    mergeBuildRunSnapshot,
+    scheduleBuildTraceFlush,
+  ]);
+
+  useEffect(() => {
+    return () => {
+      clearPendingBuildTraces();
+    };
+  }, [clearPendingBuildTraces]);
+
+  const buildToolCalls = useMemo(
+    () => summarizeToolCalls(buildRun.traces ?? []),
+    [buildRun.traces],
+  );
+
+  const ensureWorkspaceId = useCallback(async () => {
+    if (workspaceId) return workspaceId;
+    if (buildRunIdRef.current) return buildRunIdRef.current;
+    try {
+      const res = await fetch("/api/workspace/new", {
+        method: "POST",
+      });
+      const data = await res.json().catch(() => ({})) as {
+        workspaceId?: string;
+      };
+      if (res.ok && typeof data.workspaceId === "string") {
+        const nextWorkspaceId = data.workspaceId;
+        buildRunIdRef.current = nextWorkspaceId;
+        setBuildRun((prev) => ({ ...prev, id: nextWorkspaceId }));
+        onWorkspaceChange?.(nextWorkspaceId);
+        return nextWorkspaceId;
+      }
+    } catch {
+      // ignore
+    }
+    const fallback = `workspace-${crypto.randomUUID()}`;
+    buildRunIdRef.current = fallback;
+    setBuildRun((prev) => ({ ...prev, id: fallback }));
+    return fallback;
+  }, [onWorkspaceChange, workspaceId]);
+
+  const resetBuildChat = useCallback(async () => {
+    const res = await fetch("/api/workspace/new", { method: "POST" }).catch(
+      () => null,
+    );
+    const data = res
+      ? await res.json().catch(() => ({})) as { workspaceId?: string }
+      : {};
+    if (res && res.ok && typeof data.workspaceId === "string") {
+      buildRunIdRef.current = data.workspaceId;
+      buildIgnoredStreamRunIdsRef.current.clear();
+      setBuildRun({
+        id: data.workspaceId,
+        status: "idle",
+        messages: [],
+        traces: [],
+        toolInserts: [],
+        displayMessages: [],
+      });
+      onWorkspaceChange?.(data.workspaceId);
+    } else {
+      buildRunIdRef.current = "";
+      buildIgnoredStreamRunIdsRef.current.clear();
+      setBuildRun({
+        id: "",
+        status: "idle",
+        messages: [],
+        traces: [],
+        toolInserts: [],
+        displayMessages: [],
+      });
+    }
+    setBuildChatDraft("");
+    setBuildChatError(null);
+    setBuildStreamingAssistant(null);
+    setBuildOptimisticUser(null);
+    setBuildToolCallsOpen({});
+    clearPendingBuildTraces();
+  }, [clearPendingBuildTraces, onWorkspaceChange]);
+
+  const stopBuildChat = useCallback(async () => {
+    const runId = buildRunIdRef.current || buildRunRef.current.id;
+    if (!runId) return;
+    buildIgnoredStreamRunIdsRef.current.add(runId);
+    setBuildStreamingAssistant(null);
+    setBuildRun((prev) =>
+      prev.id === runId && prev.status === "running"
+        ? {
+          ...prev,
+          status: "canceled",
+          finishedAt: prev.finishedAt ?? new Date().toISOString(),
+          error: undefined,
+        }
+        : prev
+    );
+    try {
+      const res = await fetch("/api/build/stop", {
+        method: "POST",
+        headers: { "content-type": "application/json" },
+        body: JSON.stringify({ workspaceId: runId }),
+      });
+      const data = await res.json().catch(() => ({})) as {
+        run?: BuildRun;
+        error?: string;
+      };
+      if (!res.ok) {
+        throw new Error(
+          typeof data.error === "string" ? data.error : res.statusText,
+        );
+      }
+      if (data.run) {
+        setBuildRun((prev) =>
+          mergeBuildRunSnapshot(prev, data.run as BuildRun)
+        );
+      }
+    } catch (err) {
+      buildIgnoredStreamRunIdsRef.current.delete(runId);
+      const isStillActiveRun = buildRunIdRef.current === runId &&
+        buildRunRef.current.id === runId;
+      if (isStillActiveRun) {
+        await refreshBuildStatus({ workspaceId: runId }).catch(() => {});
+      }
+      throw err;
+    }
+  }, [mergeBuildRunSnapshot, refreshBuildStatus]);
+
+  const sendBuildMessage = useCallback(async (message: string) => {
+    const runId = await ensureWorkspaceId();
+    buildIgnoredStreamRunIdsRef.current.delete(runId);
+    setBuildChatSending(true);
+    setBuildChatError(null);
+    try {
+      const res = await fetch("/api/build/message", {
+        method: "POST",
+        headers: { "content-type": "application/json" },
+        body: JSON.stringify({ workspaceId: runId, message }),
+      });
+      const data = await res.json().catch(() => ({})) as {
+        run?: BuildRun;
+        error?: string;
+      };
+      if (!res.ok) {
+        throw new Error(
+          typeof data.error === "string" ? data.error : res.statusText,
+        );
+      }
+      if (!data.run) return;
+      setBuildRun((prev) => mergeBuildRunSnapshot(prev, data.run as BuildRun));
+      if (typeof data.run.id === "string" && data.run.id) {
+        buildRunIdRef.current = data.run.id;
+      }
+    } finally {
+      setBuildChatSending(false);
+    }
+  }, [ensureWorkspaceId, mergeBuildRunSnapshot]);
+
+  const loadBuildChat = useCallback(async (runId: string) => {
+    setBuildChatSending(true);
+    setBuildChatError(null);
+    try {
+      const snapshot = await loadWorkspaceSnapshot(runId);
+      const data = snapshot.build;
+      if (!data.run) return;
+      buildIgnoredStreamRunIdsRef.current.delete(runId);
+      setBuildRun((prev) => mergeBuildRunSnapshot(prev, data.run as BuildRun));
+      if (typeof data.run.id === "string" && data.run.id) {
+        buildRunIdRef.current = data.run.id;
+        onWorkspaceChange?.(data.run.id);
+      }
+      setBuildChatDraft("");
+      setBuildOptimisticUser(null);
+      setBuildStreamingAssistant(null);
+      setBuildToolCallsOpen({});
+    } finally {
+      setBuildChatSending(false);
+    }
+  }, [loadWorkspaceSnapshot, mergeBuildRunSnapshot, onWorkspaceChange]);
+
+  const refreshTestStatus = useCallback(async (
+    opts?: { runId?: string; workspaceId?: string; deckPath?: string },
+  ) => {
+    const resolvedWorkspaceId = opts?.workspaceId ??
+      testRunRef.current.workspaceId ??
+      testRunRef.current.sessionId ??
+      workspaceId ??
+      undefined;
+    const resolvedRunId = opts?.runId ?? (testRunIdRef.current || undefined);
+    if (resolvedWorkspaceId) {
+      const snapshot = await loadWorkspaceSnapshot(resolvedWorkspaceId, {
+        deckPath: opts?.deckPath,
+        testRunId: resolvedRunId,
+      });
+      const data = snapshot.test;
+      const normalized = normalizeTestRun(data.run);
+      setTestRun(normalized);
+      if (normalized.status !== "running") {
+        setTestStreamingUser(null);
+        setTestStreamingAssistant(null);
+      }
+      if (typeof normalized.id === "string" && normalized.id) {
+        testRunIdRef.current = normalized.id;
+      }
+      return normalized;
+    }
+    const normalized = normalizeTestRun();
+    setTestRun(normalized);
+    testRunIdRef.current = resolvedRunId ?? "";
+    setTestStreamingUser(null);
+    setTestStreamingAssistant(null);
+    return normalized;
+  }, [loadWorkspaceSnapshot, workspaceId]);
+
+  const startTestRun = useCallback(async (payload: Record<string, unknown>) => {
+    const res = await fetch("/api/test/run", {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify(payload),
+    });
+    const data = await res.json().catch(() => ({})) as {
+      run?: TestBotRun;
+      error?: string;
+      initFill?: unknown;
+    };
+    if (data.run) {
+      const normalized = normalizeTestRun(data.run);
+      setTestRun(normalized);
+      if (typeof normalized.id === "string" && normalized.id) {
+        testRunIdRef.current = normalized.id;
+      }
+    }
+    return data;
+  }, []);
+
+  const sendTestMessageRequest = useCallback(
+    async (payload: Record<string, unknown>) => {
+      const res = await fetch("/api/test/message", {
+        method: "POST",
+        headers: { "content-type": "application/json" },
+        body: JSON.stringify(payload),
+      });
+      const data = await res.json().catch(() => ({})) as {
+        run?: TestBotRun;
+        error?: string;
+      };
+      if (data.run) {
+        const normalized = normalizeTestRun(data.run);
+        setTestRun(normalized);
+        if (typeof normalized.id === "string" && normalized.id) {
+          testRunIdRef.current = normalized.id;
+        }
+      }
+      return data;
+    },
+    [],
+  );
+
+  const startTestAssistant = useCallback(async (
+    payload: {
+      runId?: string;
+      workspaceId?: string;
+      runWorkspaceId?: string;
+      botDeckPath?: string;
+      context?: unknown;
+    },
+  ) => {
+    setTestChatSending(true);
+    setTestChatError(null);
+    let nextRunId = payload.runId;
+    if (!nextRunId) {
+      nextRunId = `testbot-ui-${crypto.randomUUID()}`;
+      setTestRun((prev) => ({
+        ...prev,
+        id: nextRunId,
+        status: "running",
+        error: undefined,
+        messages: prev.messages ?? [],
+        traces: prev.traces ?? [],
+        toolInserts: prev.toolInserts ?? [],
+      }));
+    }
+    const requestPayload: Record<string, unknown> = {
+      message: "",
+      runId: nextRunId,
+      workspaceId: payload.runWorkspaceId ?? payload.workspaceId ?? undefined,
+      botDeckPath: payload.botDeckPath ?? undefined,
+    };
+    if (!payload.runWorkspaceId && payload.context !== undefined) {
+      requestPayload.context = payload.context;
+    }
+    try {
+      const data = await sendTestMessageRequest(requestPayload);
+      if (!data.run) {
+        throw new Error(
+          typeof data.error === "string"
+            ? data.error
+            : "Failed to start assistant",
+        );
+      }
+    } catch (err) {
+      setTestChatError(err instanceof Error ? err.message : String(err));
+      throw err;
+    } finally {
+      setTestChatSending(false);
+    }
+  }, [sendTestMessageRequest]);
+
+  const sendTestMessage = useCallback(
+    async (
+      message: string,
+      payload: {
+        runId?: string;
+        workspaceId?: string;
+        runWorkspaceId?: string;
+        botDeckPath?: string;
+        context?: unknown;
+      },
+    ) => {
+      const trimmed = message.trim();
+      if (!trimmed) return;
+      setTestChatSending(true);
+      setTestChatError(null);
+      let nextRunId = payload.runId;
+      const optimisticId = crypto.randomUUID();
+      if (!nextRunId) {
+        nextRunId = `testbot-ui-${crypto.randomUUID()}`;
+        setTestRun((prev) => ({
+          ...prev,
+          id: nextRunId,
+          status: "running",
+          error: undefined,
+          messages: prev.messages ?? [],
+          traces: prev.traces ?? [],
+          toolInserts: prev.toolInserts ?? [],
+        }));
+      }
+      setTestOptimisticUser({ id: optimisticId, text: trimmed });
+      setTestChatDraft("");
+      const requestPayload: Record<string, unknown> = {
+        message: trimmed,
+        runId: nextRunId,
+        workspaceId: payload.runWorkspaceId ?? payload.workspaceId ?? undefined,
+        botDeckPath: payload.botDeckPath ?? undefined,
+      };
+      if (!payload.runWorkspaceId && payload.context !== undefined) {
+        requestPayload.context = payload.context;
+      }
+      try {
+        const data = await sendTestMessageRequest(requestPayload);
+        if (!data.run) {
+          throw new Error(
+            typeof data.error === "string"
+              ? data.error
+              : "Failed to send message",
+          );
+        }
+      } catch (err) {
+        setTestChatError(err instanceof Error ? err.message : String(err));
+        throw err;
+      } finally {
+        setTestChatSending(false);
+      }
+    },
+    [sendTestMessageRequest],
+  );
+
+  const stopTestRun = useCallback(async (runId: string) => {
+    await fetch("/api/test/stop", {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({ runId }),
+    });
+  }, []);
+
+  const resetTestRun = useCallback(() => {
+    testRunIdRef.current = "";
+    setTestRun(normalizeTestRun());
+    setTestStreamingUser(null);
+    setTestStreamingAssistant(null);
+    setTestChatDraft("");
+    setTestChatError(null);
+    setTestChatSending(false);
+    setTestOptimisticUser(null);
+  }, []);
+
+  useEffect(() => {
+    if (testOptimisticUser) {
+      const lastUser = [...(testRun.messages ?? [])].reverse().find((msg) =>
+        msg.role === "user"
+      );
+      if (lastUser?.content === testOptimisticUser.text) {
+        setTestOptimisticUser(null);
+      }
+    }
+    if (testRun.status !== "running" && testOptimisticUser) {
+      setTestOptimisticUser(null);
+    }
+  }, [testOptimisticUser, testRun.messages, testRun.status]);
+
+  const saveTestFeedback = useCallback(async (
+    payload: {
+      workspaceId: string;
+      runId?: string;
+      messageRefId: string;
+      score: number | null;
+      reason?: string;
+    },
+  ) => {
+    const res = await fetch("/api/session/feedback", {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify(payload),
+    });
+    if (!res.ok) {
+      const body = await res.json().catch(() => ({})) as { error?: string };
+      throw new Error(body.error || res.statusText);
+    }
+    const data = await res.json() as {
+      feedback?: FeedbackEntry;
+      deleted?: boolean;
+    };
+    setTestRun((prev) => {
+      if (!prev.messages.length) return prev;
+      if (data.deleted) {
+        return {
+          ...prev,
+          messages: prev.messages.map((msg) =>
+            msg.messageRefId === payload.messageRefId
+              ? { ...msg, feedback: undefined }
+              : msg
+          ),
+        };
+      }
+      if (!data.feedback) return prev;
+      return {
+        ...prev,
+        messages: prev.messages.map((msg) =>
+          msg.messageRefId === payload.messageRefId
+            ? { ...msg, feedback: data.feedback }
+            : msg
+        ),
+      };
+    });
+    return data;
+  }, []);
+
+  const loadGradeData = useCallback(
+    async (
+      opts?: { workspaceId?: string | null; gradeRunId?: string | null },
+    ) => {
+      const reqId = ++gradeLoadSeqRef.current;
+      gradeDebugLog("loadData.start", {
+        reqId,
+        workspaceId: opts?.workspaceId ?? null,
+      });
+      try {
+        setGradeLoading(true);
+        const params = new URLSearchParams();
+        let data: CalibrateResponse;
+        if (opts?.workspaceId) {
+          const snapshot = await loadWorkspaceSnapshot(opts.workspaceId, {
+            gradeRunId: opts.gradeRunId ?? undefined,
+          });
+          data = snapshot.grade;
+          gradeDebugLog("loadData.response", {
+            reqId,
+            ok: true,
+            status: 200,
+            source: "workspace",
+          });
+        } else {
+          data = { graderDecks: [], sessions: [] };
+          gradeDebugLog("loadData.response", {
+            reqId,
+            ok: true,
+            status: 200,
+            source: "empty",
+          });
+        }
+        setGradeGraders(
+          Array.isArray(data.graderDecks) ? data.graderDecks : [],
+        );
+        setGradeSessions(Array.isArray(data.sessions) ? data.sessions : []);
+        gradeDebugLog("loadData.success", {
+          reqId,
+          graders: Array.isArray(data.graderDecks)
+            ? data.graderDecks.length
+            : 0,
+          sessions: Array.isArray(data.sessions) ? data.sessions.length : 0,
+        });
+        setGradeError(null);
+      } catch (err) {
+        gradeDebugLog("loadData.error", {
+          reqId,
+          message: err instanceof Error ? err.message : String(err),
+        });
+        setGradeError(
+          err instanceof Error
+            ? err.message
+            : "Failed to load calibration data",
+        );
+      } finally {
+        setGradeLoading(false);
+        gradeDebugLog("loadData.end", {
+          reqId,
+          loading: false,
+        });
+      }
+    },
+    [loadWorkspaceSnapshot],
+  );
+
+  const loadGradeSessionDetail = useCallback(
+    async (targetWorkspaceId: string | null) => {
+      const reqId = ++gradeDetailSeqRef.current;
+      gradeDebugLog("loadSessionDetail.start", {
+        reqId,
+        workspaceId: targetWorkspaceId,
+      });
+      if (!targetWorkspaceId) {
+        setGradeSessionDetail(null);
+        gradeDebugLog("loadSessionDetail.skip", {
+          reqId,
+          reason: "missing-workspace-id",
+        });
+        return;
+      }
+      try {
+        const snapshot = await loadWorkspaceSnapshot(targetWorkspaceId);
+        gradeDebugLog("loadSessionDetail.response", {
+          reqId,
+          workspaceId: targetWorkspaceId,
+          ok: true,
+          status: 200,
+        });
+        const data = snapshot.session;
+        setGradeSessionDetail(data);
+        gradeDebugLog("loadSessionDetail.success", {
+          reqId,
+          workspaceId: targetWorkspaceId,
+          messageCount: Array.isArray(data.messages) ? data.messages.length : 0,
+        });
+      } catch (err) {
+        setGradeSessionDetail(null);
+        gradeDebugLog("loadSessionDetail.error", {
+          reqId,
+          workspaceId: targetWorkspaceId,
+          message: err instanceof Error ? err.message : String(err),
+        });
+        setGradeError(
+          err instanceof Error ? err.message : "Failed to load session details",
+        );
+      }
+    },
+    [loadWorkspaceSnapshot],
+  );
+
+  const runGradeGrader = useCallback(async (
+    payload: { workspaceId: string; graderId: string },
+  ) => {
+    try {
+      setGradeRunning(true);
+      const res = await fetch("/api/calibrate/run", {
+        method: "POST",
+        headers: { "content-type": "application/json" },
+        body: JSON.stringify(payload),
+      });
+      if (!res.ok) {
+        const text = await res.text().catch(() => "");
+        throw new Error(text || res.statusText);
+      }
+      const data = await res.json() as { session?: CalibrateSession };
+      if (data.session) {
+        setGradeSessions((prev) => {
+          const index = prev.findIndex((sess) => sess.id === data.session!.id);
+          if (index >= 0) {
+            const next = [...prev];
+            next[index] = data.session!;
+            return next;
+          }
+          return [data.session!, ...prev];
+        });
+      }
+      setGradeError(null);
+      return data;
+    } catch (err) {
+      const message = err instanceof Error
+        ? err.message
+        : "Failed to run grader";
+      setGradeError(message);
+      throw new Error(message);
+    } finally {
+      setGradeRunning(false);
+    }
+  }, []);
+
+  const toggleGradeFlag = useCallback(async (
+    payload: {
+      workspaceId: string;
+      refId: string;
+      runId: string;
+      turnIndex?: number;
+    },
+  ) => {
+    const res = await fetch("/api/calibrate/flag", {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify(payload),
+    });
+    if (!res.ok) {
+      const text = await res.text().catch(() => "");
+      throw new Error(text || res.statusText);
+    }
+    const data = await res.json() as { flags?: GradingFlag[] };
+    if (data.flags) {
+      setGradeSessionDetail((prev) => {
+        if (!prev) return prev;
+        return {
+          ...prev,
+          meta: {
+            ...(prev.meta ?? {}),
+            gradingFlags: data.flags,
+          },
+        };
+      });
+    }
+    return data;
+  }, []);
+
+  const updateGradeFlagReason = useCallback(async (
+    payload: { workspaceId: string; refId: string; reason: string },
+  ) => {
+    const res = await fetch("/api/calibrate/flag/reason", {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify(payload),
+    });
+    if (!res.ok) {
+      const text = await res.text().catch(() => "");
+      throw new Error(text || res.statusText);
+    }
+    const data = await res.json() as { flags?: GradingFlag[] };
+    if (data.flags) {
+      setGradeSessionDetail((prev) => {
+        if (!prev) return prev;
+        return {
+          ...prev,
+          meta: {
+            ...(prev.meta ?? {}),
+            gradingFlags: data.flags,
+          },
+        };
+      });
+    }
+    return data;
+  }, []);
+
+  const value = useMemo<WorkspaceContextValue>(
+    () => ({
+      build: {
+        run: buildRun,
+        toolCalls: buildToolCalls,
+        chatDraft: buildChatDraft,
+        setChatDraft: setBuildChatDraft,
+        chatSending: buildChatSending,
+        chatError: buildChatError,
+        setChatError: setBuildChatError,
+        toolCallsOpen: buildToolCallsOpen,
+        setToolCallsOpen: setBuildToolCallsOpen,
+        optimisticUser: buildOptimisticUser,
+        setOptimisticUser: setBuildOptimisticUser,
+        streamingAssistant: buildStreamingAssistant,
+        setStreamingAssistant: setBuildStreamingAssistant,
+        stopChat: stopBuildChat,
+        resetChat: resetBuildChat,
+        sendMessage: sendBuildMessage,
+        loadChat: loadBuildChat,
+      },
+      test: {
+        run: testRun,
+        setRun: setTestRun,
+        streamingUser: testStreamingUser,
+        streamingAssistant: testStreamingAssistant,
+        chatDraft: testChatDraft,
+        setChatDraft: setTestChatDraft,
+        chatSending: testChatSending,
+        chatError: testChatError,
+        setChatError: setTestChatError,
+        optimisticUser: testOptimisticUser,
+        refreshStatus: refreshTestStatus,
+        startRun: startTestRun,
+        startAssistant: startTestAssistant,
+        sendMessage: sendTestMessage,
+        stopRun: stopTestRun,
+        resetRun: resetTestRun,
+        saveFeedback: saveTestFeedback,
+      },
+      grade: {
+        loading: gradeLoading,
+        error: gradeError,
+        running: gradeRunning,
+        graders: gradeGraders,
+        sessions: gradeSessions,
+        sessionDetail: gradeSessionDetail,
+        loadData: loadGradeData,
+        loadSessionDetail: loadGradeSessionDetail,
+        runGrader: runGradeGrader,
+        toggleFlag: toggleGradeFlag,
+        updateFlagReason: updateGradeFlagReason,
+      },
+      routing: {
+        testRunId: activeTestRunId,
+        gradeRunId: activeGradeRunId,
+        setGradeRunId: setActiveGradeRunId,
+      },
+    }),
+    [
+      buildRun,
+      buildToolCalls,
+      buildChatDraft,
+      buildChatSending,
+      buildChatError,
+      buildToolCallsOpen,
+      buildOptimisticUser,
+      buildStreamingAssistant,
+      resetBuildChat,
+      stopBuildChat,
+      sendBuildMessage,
+      loadBuildChat,
+      testRun,
+      testStreamingUser,
+      testStreamingAssistant,
+      testChatDraft,
+      testChatSending,
+      testChatError,
+      testOptimisticUser,
+      refreshTestStatus,
+      startTestRun,
+      startTestAssistant,
+      sendTestMessage,
+      stopTestRun,
+      resetTestRun,
+      saveTestFeedback,
+      gradeLoading,
+      gradeError,
+      gradeRunning,
+      gradeGraders,
+      gradeSessions,
+      gradeSessionDetail,
+      loadGradeData,
+      loadGradeSessionDetail,
+      runGradeGrader,
+      toggleGradeFlag,
+      updateGradeFlagReason,
+      activeTestRunId,
+      activeGradeRunId,
+    ],
+  );
+
+  return (
+    <WorkspaceContext.Provider value={value}>
+      {children}
+    </WorkspaceContext.Provider>
+  );
+}
+
+function useWorkspaceContext() {
+  const context = useContext(WorkspaceContext);
+  if (!context) {
+    throw new Error("Workspace hooks must be used within WorkspaceProvider");
+  }
+  return context;
+}
+
+export function useWorkspaceBuild() {
+  return useWorkspaceContext().build;
+}
+
+export function useWorkspaceTest() {
+  return useWorkspaceContext().test;
+}
+
+export function useWorkspaceGrade() {
+  return useWorkspaceContext().grade;
+}
+
+export function useWorkspaceRouting() {
+  return useWorkspaceContext().routing;
+}
diff --git a/simulator-ui/src/gds/Badge.tsx b/simulator-ui/src/gds/Badge.tsx
index bdc8f6c6c..7bc39288b 100644
--- a/simulator-ui/src/gds/Badge.tsx
+++ b/simulator-ui/src/gds/Badge.tsx
@@ -1,4 +1,5 @@
 import React from "react";
+import Tooltip from "./Tooltip.tsx";
 import { classNames } from "../utils.ts";
 
 export type BadgeVariant =
@@ -24,16 +25,17 @@ export default function Badge(
     variant?: BadgeVariant;
     title?: string;
     status?: string;
+    tooltip?: React.ReactNode;
   },
 ) {
-  const { variant, status, className, children, ...rest } = props;
+  const { variant, status, tooltip, className, children, ...rest } = props;
   const statusText = status ??
     (typeof children === "string" ? children : undefined);
   const inferredVariant = statusText
     ? STATUS_VARIANT_MAP[statusText.trim().toLowerCase()]
     : undefined;
 
-  return (
+  const badgeElement = (
     <span
       className={classNames(
         "badge",
@@ -47,4 +49,10 @@ export default function Badge(
       {children}
     </span>
   );
+
+  if (tooltip === undefined || tooltip === null || tooltip === false) {
+    return badgeElement;
+  }
+
+  return <Tooltip content={tooltip}>{badgeElement}</Tooltip>;
 }
diff --git a/simulator-ui/src/gds/Icon.tsx b/simulator-ui/src/gds/Icon.tsx
index f3f2e508e..c686fae33 100644
--- a/simulator-ui/src/gds/Icon.tsx
+++ b/simulator-ui/src/gds/Icon.tsx
@@ -1,6 +1,7 @@
 import { ChevronDownIcon } from "./icons/ChevronDown.tsx";
 import { CircleInfoIcon } from "./icons/CircleInfo.tsx";
 import { CircleSolidChevronDownIcon } from "./icons/CircleSolidChevronDown.tsx";
+import { ChatIcon } from "./icons/Chat.tsx";
 import { CloseIcon } from "./icons/Close.tsx";
 import { CopiedIcon } from "./icons/Copied.tsx";
 import { CopyIcon } from "./icons/Copy.tsx";
@@ -11,6 +12,7 @@ import { ReviewIcon } from "./icons/Review.tsx";
 
 const ICONS = {
   chevronDown: ChevronDownIcon,
+  chat: ChatIcon,
   close: CloseIcon,
   flag: FlagIcon,
   hamburgerMenu: HamburgerMenuIcon,
diff --git a/simulator-ui/src/gds/Tabs.tsx b/simulator-ui/src/gds/Tabs.tsx
index ebadd0961..5dc6ee822 100644
--- a/simulator-ui/src/gds/Tabs.tsx
+++ b/simulator-ui/src/gds/Tabs.tsx
@@ -7,6 +7,7 @@ type TabItem = {
   label: React.ReactNode;
   disabled?: boolean;
   testId?: string;
+  href?: string;
 };
 
 type TabsProps = {
@@ -37,6 +38,25 @@ export default function Tabs({
       <span className="tab-anchor-indicator" aria-hidden="true" />
       {tabs.map((tab) => {
         const isActive = tab.id === activeId;
+        const handleClick = (event: React.MouseEvent<HTMLElement>) => {
+          if (tab.disabled) {
+            event.preventDefault();
+            return;
+          }
+          // Let modified clicks keep native anchor behavior (new tab/window).
+          if (
+            tab.href &&
+            (event.metaKey ||
+              event.ctrlKey ||
+              event.shiftKey ||
+              event.altKey ||
+              event.button !== 0)
+          ) {
+            return;
+          }
+          event.preventDefault();
+          onChange(tab.id);
+        };
         return (
           <Button
             key={tab.id}
@@ -48,11 +68,12 @@ export default function Tabs({
               isActive && "tab-anchor--active",
               tabClassName,
             )}
-            onClick={() => onChange(tab.id)}
+            onClick={handleClick}
             disabled={tab.disabled}
             data-testid={tab.testId}
             role="tab"
             aria-selected={isActive}
+            href={tab.href}
           >
             {tab.label}
           </Button>
diff --git a/simulator-ui/src/gds/Tooltip.tsx b/simulator-ui/src/gds/Tooltip.tsx
new file mode 100644
index 000000000..5b6289bba
--- /dev/null
+++ b/simulator-ui/src/gds/Tooltip.tsx
@@ -0,0 +1,225 @@
+import React, {
+  useCallback,
+  useEffect,
+  useId,
+  useLayoutEffect,
+  useRef,
+  useState,
+} from "react";
+import { createPortal } from "react-dom";
+import { classNames } from "../utils.ts";
+
+export type TooltipSide = "top" | "right" | "bottom" | "left";
+
+type TooltipPosition = {
+  top: number;
+  left: number;
+};
+
+export type TooltipProps = {
+  content: React.ReactNode;
+  children: React.ReactNode;
+  side?: TooltipSide;
+  offset?: number;
+  disabled?: boolean;
+  openDelayMs?: number;
+  closeDelayMs?: number;
+  className?: string;
+  tooltipClassName?: string;
+  id?: string;
+};
+
+export default function Tooltip(props: TooltipProps) {
+  const {
+    content,
+    children,
+    side = "top",
+    offset = 8,
+    disabled = false,
+    openDelayMs = 120,
+    closeDelayMs = 80,
+    className,
+    tooltipClassName,
+    id,
+  } = props;
+
+  const canUseDom = typeof document !== "undefined" &&
+    typeof window !== "undefined";
+  const [open, setOpen] = useState(false);
+  const [position, setPosition] = useState<TooltipPosition | null>(null);
+  const anchorRef = useRef<HTMLSpanElement | null>(null);
+  const tooltipRef = useRef<HTMLDivElement | null>(null);
+  const openTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const generatedId = useId();
+  const tooltipId = id ?? `gds-tooltip-${generatedId}`;
+
+  const clearOpenTimer = useCallback(() => {
+    if (openTimerRef.current === null) return;
+    clearTimeout(openTimerRef.current);
+    openTimerRef.current = null;
+  }, []);
+
+  const clearCloseTimer = useCallback(() => {
+    if (closeTimerRef.current === null) return;
+    clearTimeout(closeTimerRef.current);
+    closeTimerRef.current = null;
+  }, []);
+
+  const scheduleOpen = useCallback(() => {
+    if (disabled || !content) return;
+    clearCloseTimer();
+    clearOpenTimer();
+    openTimerRef.current = setTimeout(() => {
+      setOpen(true);
+      openTimerRef.current = null;
+    }, Math.max(0, openDelayMs));
+  }, [clearCloseTimer, clearOpenTimer, content, disabled, openDelayMs]);
+
+  const scheduleClose = useCallback(() => {
+    clearOpenTimer();
+    clearCloseTimer();
+    closeTimerRef.current = setTimeout(() => {
+      setOpen(false);
+      closeTimerRef.current = null;
+    }, Math.max(0, closeDelayMs));
+  }, [clearCloseTimer, clearOpenTimer, closeDelayMs]);
+
+  const closeNow = useCallback(() => {
+    clearOpenTimer();
+    clearCloseTimer();
+    setOpen(false);
+  }, [clearCloseTimer, clearOpenTimer]);
+
+  const updatePosition = useCallback(() => {
+    const anchor = anchorRef.current;
+    const tooltip = tooltipRef.current;
+    if (!anchor || !tooltip || !canUseDom) return;
+
+    const anchorRect = anchor.getBoundingClientRect();
+    const tooltipRect = tooltip.getBoundingClientRect();
+    const margin = 8;
+    let nextTop = 0;
+    let nextLeft = 0;
+
+    if (side === "top") {
+      nextTop = anchorRect.top - offset - tooltipRect.height;
+      nextLeft = anchorRect.left + (anchorRect.width - tooltipRect.width) / 2;
+    } else if (side === "right") {
+      nextTop = anchorRect.top + (anchorRect.height - tooltipRect.height) / 2;
+      nextLeft = anchorRect.right + offset;
+    } else if (side === "bottom") {
+      nextTop = anchorRect.bottom + offset;
+      nextLeft = anchorRect.left + (anchorRect.width - tooltipRect.width) / 2;
+    } else {
+      nextTop = anchorRect.top + (anchorRect.height - tooltipRect.height) / 2;
+      nextLeft = anchorRect.left - offset - tooltipRect.width;
+    }
+
+    const clampedLeft = Math.min(
+      Math.max(margin, nextLeft),
+      window.innerWidth - tooltipRect.width - margin,
+    );
+    const clampedTop = Math.min(
+      Math.max(margin, nextTop),
+      window.innerHeight - tooltipRect.height - margin,
+    );
+
+    setPosition({ top: clampedTop, left: clampedLeft });
+  }, [canUseDom, offset, side]);
+
+  useEffect(() => {
+    if (disabled || !content) {
+      closeNow();
+    }
+  }, [closeNow, content, disabled]);
+
+  useLayoutEffect(() => {
+    if (!open || !canUseDom) return;
+    updatePosition();
+  }, [canUseDom, open, updatePosition]);
+
+  useEffect(() => {
+    if (!open || !canUseDom) return;
+    const handleEscape = (event: KeyboardEvent) => {
+      if (event.key !== "Escape") return;
+      closeNow();
+    };
+    const handleReposition = () => updatePosition();
+    document.addEventListener("keydown", handleEscape);
+    window.addEventListener("resize", handleReposition);
+    window.addEventListener("scroll", handleReposition, true);
+    return () => {
+      document.removeEventListener("keydown", handleEscape);
+      window.removeEventListener("resize", handleReposition);
+      window.removeEventListener("scroll", handleReposition, true);
+    };
+  }, [canUseDom, closeNow, open, updatePosition]);
+
+  useEffect(() => {
+    return () => {
+      clearOpenTimer();
+      clearCloseTimer();
+    };
+  }, [clearCloseTimer, clearOpenTimer]);
+
+  const childNode = React.isValidElement(children)
+    ? (() => {
+      const childElement = children as React.ReactElement<{
+        "aria-describedby"?: string;
+      }>;
+      const existing =
+        typeof childElement.props["aria-describedby"] === "string"
+          ? childElement.props["aria-describedby"]
+          : undefined;
+      const nextDescribedBy = (() => {
+        if (disabled || !content) return existing;
+        if (!existing) return tooltipId;
+        if (existing.split(" ").includes(tooltipId)) return existing;
+        return `${existing} ${tooltipId}`;
+      })();
+      return React.cloneElement(childElement, {
+        "aria-describedby": nextDescribedBy,
+      });
+    })()
+    : children;
+
+  return (
+    <>
+      <span
+        className={classNames("gds-tooltip-anchor", className)}
+        ref={anchorRef}
+        onMouseEnter={scheduleOpen}
+        onMouseLeave={scheduleClose}
+        onFocusCapture={scheduleOpen}
+        onBlurCapture={(event) => {
+          const relatedTarget = event.relatedTarget as Node | null;
+          if (
+            relatedTarget && event.currentTarget.contains(relatedTarget)
+          ) return;
+          scheduleClose();
+        }}
+      >
+        {childNode}
+      </span>
+      {canUseDom && open && content &&
+        createPortal(
+          <div
+            id={tooltipId}
+            role="tooltip"
+            ref={tooltipRef}
+            className={classNames(
+              "gds-tooltip",
+              `gds-tooltip--${side}`,
+              tooltipClassName,
+            )}
+            style={position ?? { top: -9999, left: -9999 }}
+            aria-hidden={!open}
+          >
+            {content}
+          </div>,
+          document.body,
+        )}
+    </>
+  );
+}
diff --git a/simulator-ui/src/gds/__tests__/Badge.test.tsx b/simulator-ui/src/gds/__tests__/Badge.test.tsx
new file mode 100644
index 000000000..e27d2c746
--- /dev/null
+++ b/simulator-ui/src/gds/__tests__/Badge.test.tsx
@@ -0,0 +1,100 @@
+import { assert, assertEquals } from "@std/assert";
+import React from "react";
+import TestRenderer, { act } from "npm:react-test-renderer@19.2.0";
+import type { ReactTestInstance } from "npm:react-test-renderer@19.2.0";
+
+const globals = globalThis as unknown as {
+  window?: Record<string, unknown>;
+};
+if (!globals.window) globals.window = {};
+
+(globalThis as { IS_REACT_ACT_ENVIRONMENT?: boolean })
+  .IS_REACT_ACT_ENVIRONMENT = true;
+
+const { default: Badge } = await import("../Badge.tsx");
+
+Deno.test("Badge renders normally without tooltip", async () => {
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <Badge variant="ghost">status</Badge>,
+      );
+    });
+    assert(renderer);
+    const badge = renderer.root.find(
+      (node: ReactTestInstance) =>
+        node.type === "span" &&
+        typeof node.props.className === "string" &&
+        node.props.className.includes("badge"),
+    );
+    assertEquals(badge.props["aria-describedby"], undefined);
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
+
+Deno.test("Badge adds tooltip aria-describedby when tooltip is provided", async () => {
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <Badge variant="ghost" tooltip="Helpful status">
+          status
+        </Badge>,
+      );
+    });
+    assert(renderer);
+    const badge = renderer.root.find(
+      (node: ReactTestInstance) =>
+        node.type === "span" &&
+        typeof node.props.className === "string" &&
+        node.props.className.includes("badge"),
+    );
+    assert(typeof badge.props["aria-describedby"] === "string");
+    assert(badge.props["aria-describedby"].startsWith("gds-tooltip-"));
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
+
+Deno.test("Badge preserves existing aria-describedby when tooltip is provided", async () => {
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <Badge
+          variant="ghost"
+          tooltip="Helpful status"
+          aria-describedby="existing-tip"
+        >
+          status
+        </Badge>,
+      );
+    });
+    assert(renderer);
+    const badge = renderer.root.find(
+      (node: ReactTestInstance) =>
+        node.type === "span" &&
+        typeof node.props.className === "string" &&
+        node.props.className.includes("badge"),
+    );
+    assert(typeof badge.props["aria-describedby"] === "string");
+    assert(badge.props["aria-describedby"].includes("existing-tip"));
+    assert(badge.props["aria-describedby"].includes("gds-tooltip-"));
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
diff --git a/simulator-ui/src/gds/__tests__/Tooltip.test.tsx b/simulator-ui/src/gds/__tests__/Tooltip.test.tsx
new file mode 100644
index 000000000..8e8f1aebe
--- /dev/null
+++ b/simulator-ui/src/gds/__tests__/Tooltip.test.tsx
@@ -0,0 +1,82 @@
+import { assert, assertEquals } from "@std/assert";
+import React from "react";
+import TestRenderer, { act } from "npm:react-test-renderer@19.2.0";
+
+const globals = globalThis as unknown as {
+  window?: Record<string, unknown>;
+};
+if (!globals.window) globals.window = {};
+
+(globalThis as { IS_REACT_ACT_ENVIRONMENT?: boolean })
+  .IS_REACT_ACT_ENVIRONMENT = true;
+
+const { default: Tooltip } = await import("../Tooltip.tsx");
+
+Deno.test("Tooltip adds aria-describedby to child element", async () => {
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <Tooltip content="Helpful context">
+          <button type="button">Info</button>
+        </Tooltip>,
+      );
+    });
+    assert(renderer);
+    const button = renderer.root.findByType("button");
+    assert(typeof button.props["aria-describedby"] === "string");
+    assert(button.props["aria-describedby"].startsWith("gds-tooltip-"));
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
+
+Deno.test("Tooltip omits aria-describedby when disabled", async () => {
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <Tooltip content="Helpful context" disabled>
+          <button type="button">Info</button>
+        </Tooltip>,
+      );
+    });
+    assert(renderer);
+    const button = renderer.root.findByType("button");
+    assertEquals(button.props["aria-describedby"], undefined);
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
+
+Deno.test("Tooltip preserves existing aria-describedby values", async () => {
+  let renderer: TestRenderer.ReactTestRenderer | null = null;
+  try {
+    await act(async () => {
+      renderer = TestRenderer.create(
+        <Tooltip content="Helpful context">
+          <button type="button" aria-describedby="existing-tip">Info</button>
+        </Tooltip>,
+      );
+    });
+    assert(renderer);
+    const button = renderer.root.findByType("button");
+    assert(typeof button.props["aria-describedby"] === "string");
+    assert(button.props["aria-describedby"].includes("existing-tip"));
+    assert(button.props["aria-describedby"].includes("gds-tooltip-"));
+  } finally {
+    if (renderer) {
+      await act(async () => {
+        renderer?.unmount();
+      });
+    }
+  }
+});
diff --git a/simulator-ui/src/gds/icons/Chat.tsx b/simulator-ui/src/gds/icons/Chat.tsx
new file mode 100644
index 000000000..5af6bde9f
--- /dev/null
+++ b/simulator-ui/src/gds/icons/Chat.tsx
@@ -0,0 +1,34 @@
+import React from "react";
+
+type IconProps = React.SVGProps<SVGSVGElement> & {
+  title?: string;
+};
+
+export function ChatIcon({ title, ...props }: IconProps) {
+  return (
+    <svg
+      viewBox="0 0 18 17"
+      fill="none"
+      xmlns="http://www.w3.org/2000/svg"
+      {...props}
+    >
+      {title && <title>{title}</title>}
+      <path
+        fillRule="evenodd"
+        clipRule="evenodd"
+        d="M16.2994 13.6024C15.0659 14.839 13.2216 15.3072 10.8503 15.3072H7.1497C4.77844 15.3072 2.93413 14.839 1.7006 13.6024C0.467066 12.3658 0 10.5169 0 8.13983V7.16737C0 4.79025 0.467066 2.94138 1.7006 1.7048C2.93413 0.46822 4.77844 0 7.1497 0H10.8503C13.2216 0 15.0659 0.46822 16.2994 1.7048C17.5329 2.94138 18 4.79025 18 7.16737V8.13983C18 10.5169 17.5329 12.3658 16.2994 13.6024ZM15.3293 12.6299C16.1796 11.7775 16.6228 10.3849 16.6228 8.13983V7.16737C16.6228 4.92232 16.1677 3.52966 15.3293 2.67726C14.479 1.82486 13.0898 1.38065 10.8503 1.38065H7.1497C4.91018 1.38065 3.52096 1.83686 2.67066 2.67726C1.82036 3.52966 1.37725 4.92232 1.37725 7.16737V8.13983C1.37725 10.3849 1.83234 11.7775 2.67066 12.6299C3.52096 13.4823 4.91018 13.9266 7.1497 13.9266H10.8503C13.0898 13.9266 14.479 13.4703 15.3293 12.6299Z"
+        fill="currentColor"
+      />
+      <path
+        fillRule="evenodd"
+        clipRule="evenodd"
+        d="M15.6287 13.2782C15.6287 13.2782 15.7126 14.851 16.8503 16.2676C17.1138 16.6038 16.7904 17.084 16.3832 16.976C14.0718 16.4117 12.0359 14.6949 12.0359 14.6949L15.6287 13.2662V13.2782Z"
+        fill="currentColor"
+      />
+      <path
+        d="M12.1916 8.42801C12.7186 8.42801 13.0539 8.04383 13.0539 7.51558C13.0539 6.98733 12.7066 6.60315 12.2036 6.60315C11.7006 6.60315 11.3413 6.97532 11.3413 7.51558C11.3413 8.05583 11.6886 8.42801 12.1916 8.42801ZM9.22156 8.42801C9.7485 8.42801 10.0838 8.04383 10.0838 7.51558C10.0838 6.98733 9.73653 6.60315 9.23353 6.60315C8.73054 6.60315 8.37126 6.97532 8.37126 7.51558C8.37126 8.05583 8.71856 8.42801 9.22156 8.42801ZM6.23952 8.42801C6.76647 8.42801 7.1018 8.04383 7.1018 7.51558C7.1018 6.98733 6.75449 6.60315 6.2515 6.60315C5.7485 6.60315 5.38922 6.97532 5.38922 7.51558C5.38922 8.05583 5.73653 8.42801 6.23952 8.42801Z"
+        fill="currentColor"
+      />
+    </svg>
+  );
+}
diff --git a/simulator-ui/src/main.tsx b/simulator-ui/src/main.tsx
index af73d2cab..eb2f1ea47 100644
--- a/simulator-ui/src/main.tsx
+++ b/simulator-ui/src/main.tsx
@@ -11,19 +11,19 @@ import { globalStyles } from "./styles.ts";
 import Button from "./gds/Button.tsx";
 import WorkbenchDrawer from "./WorkbenchDrawer.tsx";
 import SessionsDrawer from "./SessionsDrawer.tsx";
-import { BuildChatProvider } from "./BuildChatContext.tsx";
+import { WorkspaceProvider } from "./WorkspaceContext.tsx";
 import {
   buildConversationEntries,
   buildDurableStreamUrl,
   buildGradePath,
   buildTabEnabled,
+  buildTestPath,
   classNames,
   cloneValue,
   deckDisplayPath,
   deckLabel,
   deckPath,
-  DEFAULT_BUILD_PATH,
-  DEFAULT_SESSION_PATH,
+  DEFAULT_GRADE_PATH,
   DEFAULT_TEST_PATH,
   deriveInitialFromSchema,
   DOCS_PATH,
@@ -31,21 +31,22 @@ import {
   findMissingRequiredFields,
   formatTimestamp,
   getDurableStreamOffset,
-  getSessionIdFromPath,
+  getWorkspaceIdFromPath,
+  getWorkspaceRouteFromPath,
   normalizeAppPath,
   normalizeBasePath,
   normalizedDeckPath,
   normalizeFsPath,
   repoRootPath,
   SCORE_VALUES,
-  SESSIONS_BASE_PATH,
   setDurableStreamOffset,
   SIMULATOR_STREAM_ID,
   toRelativePath,
   workspaceIdFromWindow,
+  WORKSPACES_BASE_PATH,
 } from "./utils.ts";
+import { buildWorkspacePath } from "../../src/workspace_contract.ts";
 import type {
-  FeedbackEntry,
   GradingFlag,
   SavedState,
   SessionDetailResponse,
@@ -181,7 +182,7 @@ function useSimulator() {
             resetState: opts.resetState ?? false,
             trace: opts.trace ?? true,
             stream: true,
-            sessionId,
+            workspaceId: sessionId,
           }),
         });
         const payload = await res.json().catch(() => ({}));
@@ -208,7 +209,12 @@ function useSimulator() {
         const res = await fetch("/api/simulator/feedback", {
           method: "POST",
           headers: { "content-type": "application/json" },
-          body: JSON.stringify({ sessionId, messageRefId, score, reason }),
+          body: JSON.stringify({
+            workspaceId: sessionId,
+            messageRefId,
+            score,
+            reason,
+          }),
         });
         if (!res.ok) throw new Error(res.statusText);
       } catch (err) {
@@ -224,7 +230,7 @@ function useSimulator() {
       const res = await fetch("/api/simulator/load-session", {
         method: "POST",
         headers: { "content-type": "application/json" },
-        body: JSON.stringify({ sessionId }),
+        body: JSON.stringify({ workspaceId: sessionId }),
       });
       const payload = await res.json().catch(() => ({}));
       if (!res.ok) {
@@ -252,7 +258,7 @@ function useSimulator() {
         const res = await fetch("/api/simulator/notes", {
           method: "POST",
           headers: { "content-type": "application/json" },
-          body: JSON.stringify({ sessionId, text }),
+          body: JSON.stringify({ workspaceId: sessionId, text }),
         });
         if (!res.ok) throw new Error(res.statusText);
       } catch (err) {
@@ -271,7 +277,7 @@ function useSimulator() {
         const res = await fetch("/api/simulator/conversation-score", {
           method: "POST",
           headers: { "content-type": "application/json" },
-          body: JSON.stringify({ sessionId, score }),
+          body: JSON.stringify({ workspaceId: sessionId, score }),
         });
         if (!res.ok) throw new Error(res.statusText);
       } catch (err) {
@@ -328,7 +334,7 @@ function useSessions() {
     setLoading(true);
     setError(null);
     try {
-      const res = await fetch("/sessions");
+      const res = await fetch("/workspaces");
       if (!res.ok) throw new Error(res.statusText);
       const body = await res.json() as { sessions?: SessionMeta[] };
       setSessions(byNewest(body.sessions ?? []));
@@ -348,7 +354,7 @@ function useSessions() {
       const res = await fetch("/api/session/delete", {
         method: "POST",
         headers: { "content-type": "application/json" },
-        body: JSON.stringify({ sessionId }),
+        body: JSON.stringify({ workspaceId: sessionId }),
       });
       if (!res.ok) throw new Error(res.statusText);
       await refresh();
@@ -367,7 +373,7 @@ function useSessions() {
     try {
       let targetSessions = scope;
       if (!targetSessions) {
-        const res = await fetch("/sessions");
+        const res = await fetch("/workspaces");
         if (!res.ok) throw new Error(res.statusText);
         const body = await res.json() as { sessions?: SessionMeta[] };
         targetSessions = body.sessions ?? [];
@@ -377,7 +383,7 @@ function useSessions() {
           fetch("/api/session/delete", {
             method: "POST",
             headers: { "content-type": "application/json" },
-            body: JSON.stringify({ sessionId: session.id }),
+            body: JSON.stringify({ workspaceId: session.id }),
           })
         ),
       );
@@ -449,13 +455,13 @@ function SimulatorApp(
     setNavActions,
     sessionsApi,
     onOpenSessionsDrawer,
-    activeSessionId,
+    activeWorkspaceId,
   }: {
     basePath: string;
     setNavActions?: (actions: React.ReactNode | null) => void;
     sessionsApi: SessionsApi;
     onOpenSessionsDrawer: () => void;
-    activeSessionId?: string | null;
+    activeWorkspaceId?: string | null;
   },
 ) {
   const simulator = useSimulator();
@@ -467,18 +473,20 @@ function SimulatorApp(
     refresh,
   } = sessionsApi;
   const { resetLocal } = simulator;
-  const normalizedBase = normalizeBasePath(basePath || SESSIONS_BASE_PATH);
+  const normalizedBase = normalizeBasePath(basePath || WORKSPACES_BASE_PATH);
   const rootPath = normalizedBase === "" ? "/" : normalizedBase;
-  const sessionBasePath = rootPath === "/" ? SESSIONS_BASE_PATH : rootPath;
+  const sessionBasePath = rootPath === "/" ? WORKSPACES_BASE_PATH : rootPath;
   const normalizedSessionBase = normalizeBasePath(sessionBasePath);
   const newSessionPath = `${
-    normalizedSessionBase === "" ? "/sessions" : normalizedSessionBase
+    normalizedSessionBase === "" ? WORKSPACES_BASE_PATH : normalizedSessionBase
   }/new`.replace(/\/{2,}/g, "/");
   const buildSessionUrl = useCallback(
     (sessionId: string) =>
-      `${normalizedSessionBase === "" ? "/sessions" : normalizedSessionBase}/${
-        encodeURIComponent(sessionId)
-      }/debug`.replace(/\/{2,}/g, "/"),
+      `${
+        normalizedSessionBase === ""
+          ? WORKSPACES_BASE_PATH
+          : normalizedSessionBase
+      }/${encodeURIComponent(sessionId)}/debug`.replace(/\/{2,}/g, "/"),
     [normalizedSessionBase],
   );
   const [message, setMessage] = useState("");
@@ -646,8 +654,8 @@ function SimulatorApp(
   useEffect(() => {
     if (initializedRef.current) return;
     initializedRef.current = true;
-    const initialSession = getSessionIdFromPath(undefined, sessionBasePath) ??
-      getSessionIdFromPath();
+    const initialSession = getWorkspaceIdFromPath(undefined, sessionBasePath) ??
+      getWorkspaceIdFromPath();
     if (initialSession) {
       navigateToSession(initialSession, { replace: true });
       return;
@@ -656,20 +664,20 @@ function SimulatorApp(
   }, [navigateToSession, startNewChat, sessionBasePath]);
 
   useEffect(() => {
-    if (!activeSessionId) {
+    if (!activeWorkspaceId) {
       externalSessionIdRef.current = null;
       return;
     }
-    if (externalSessionIdRef.current === activeSessionId) return;
-    externalSessionIdRef.current = activeSessionId;
-    adoptSessionFromPath(activeSessionId);
-  }, [activeSessionId, adoptSessionFromPath]);
+    if (externalSessionIdRef.current === activeWorkspaceId) return;
+    externalSessionIdRef.current = activeWorkspaceId;
+    adoptSessionFromPath(activeWorkspaceId);
+  }, [activeWorkspaceId, adoptSessionFromPath]);
 
   useEffect(() => {
     const handler = () => {
       const sessionFromPath =
-        getSessionIdFromPath(undefined, sessionBasePath) ??
-          getSessionIdFromPath();
+        getWorkspaceIdFromPath(undefined, sessionBasePath) ??
+          getWorkspaceIdFromPath();
       if (sessionFromPath) {
         adoptSessionFromPath(sessionFromPath);
       } else {
@@ -1049,7 +1057,7 @@ function SimulatorApp(
 }
 
 function App() {
-  const simulatorBasePath = SESSIONS_BASE_PATH;
+  const simulatorBasePath = WORKSPACES_BASE_PATH;
   const [path, setPath] = useState(() =>
     normalizeAppPath(window.location.pathname)
   );
@@ -1057,13 +1065,30 @@ function App() {
   const [navActions, setNavActions] = useState<React.ReactNode>(null);
   const [sessionsDrawerOpen, setSessionsDrawerOpen] = useState(false);
   const [workbenchDrawerOpen, setWorkbenchDrawerOpen] = useState(true);
+  const [workspaceRunIds, setWorkspaceRunIds] = useState<{
+    testRunId: string | null;
+    gradeRunId: string | null;
+  }>({
+    testRunId: null,
+    gradeRunId: null,
+  });
   const sessionsApi = useSessions();
   const [testBotResetToken, setTestBotResetToken] = useState(0);
-  const pathSessionId = getSessionIdFromPath(path);
-  const pathRequestsNewSession = /^\/sessions\/new(?:\/|$)/.test(path);
-  const activeSessionId = pathRequestsNewSession
+  const pathRoute = getWorkspaceRouteFromPath(path);
+  const livePath = window.location.pathname.replace(/\/+$/, "") || "/";
+  const liveRoute = getWorkspaceRouteFromPath(livePath);
+  const routeState = liveRoute ?? pathRoute;
+  const routeRequestsNewWorkspace = Boolean(routeState?.isNew);
+  const activeWorkspaceId = routeRequestsNewWorkspace
     ? null
-    : pathSessionId ?? workspaceIdFromWindow;
+    : routeState?.workspaceId ?? workspaceIdFromWindow;
+  const requestedTestRunId = routeState?.tab === "test"
+    ? routeState.testRunId ?? null
+    : null;
+  const requestedGradeRunId = routeState?.tab === "grade"
+    ? routeState.gradeRunId ?? null
+    : null;
+  const lastWorkspaceIdRef = useRef<string | null>(null);
   const [workbenchSessionDetail, setWorkbenchSessionDetail] = useState<
     SessionDetailResponse | null
   >(null);
@@ -1074,25 +1099,51 @@ function App() {
   const workbenchSessionDetailRequestRef = useRef(0);
   const workbenchSessionRetryRef = useRef<Record<string, number>>({});
   const workbenchRefreshTimeoutRef = useRef<number | null>(null);
-  const activeSessionIdRef = useRef<string | null>(activeSessionId);
+  const activeWorkspaceIdRef = useRef<string | null>(activeWorkspaceId);
   const workspaceInitRef = useRef(false);
 
   useEffect(() => {
-    const handler = () => setPath(normalizeAppPath(window.location.pathname));
-    window.addEventListener("popstate", handler);
-    return () => window.removeEventListener("popstate", handler);
+    if (activeWorkspaceId) {
+      lastWorkspaceIdRef.current = activeWorkspaceId;
+    }
+  }, [activeWorkspaceId]);
+
+  useEffect(() => {
+    const syncPath = () => setPath(normalizeAppPath(window.location.pathname));
+    const historyObj = window.history as History & {
+      pushState: History["pushState"];
+      replaceState: History["replaceState"];
+    };
+    const originalPushState = historyObj.pushState.bind(historyObj);
+    const originalReplaceState = historyObj.replaceState.bind(historyObj);
+    historyObj.pushState = (...args) => {
+      originalPushState(...args);
+      window.dispatchEvent(new Event("locationchange"));
+    };
+    historyObj.replaceState = (...args) => {
+      originalReplaceState(...args);
+      window.dispatchEvent(new Event("locationchange"));
+    };
+    window.addEventListener("popstate", syncPath);
+    window.addEventListener("locationchange", syncPath);
+    return () => {
+      historyObj.pushState = originalPushState;
+      historyObj.replaceState = originalReplaceState;
+      window.removeEventListener("popstate", syncPath);
+      window.removeEventListener("locationchange", syncPath);
+    };
   }, []);
 
   const loadWorkbenchSessionDetail = useCallback(async (sessionId: string) => {
     const requestId = ++workbenchSessionDetailRequestRef.current;
     const shouldApply = () =>
       requestId === workbenchSessionDetailRequestRef.current &&
-      activeSessionIdRef.current === sessionId;
+      activeWorkspaceIdRef.current === sessionId;
     try {
       setWorkbenchSessionDetailLoading(true);
       setWorkbenchSessionDetailError(null);
       const res = await fetch(
-        `/api/session?sessionId=${encodeURIComponent(sessionId)}`,
+        `/api/workspaces/${encodeURIComponent(sessionId)}`,
       );
       if (!res.ok) {
         if (!shouldApply()) return;
@@ -1105,7 +1156,7 @@ function App() {
             window.setTimeout(() => {
               if (
                 workbenchSessionDetailRequestRef.current === requestId &&
-                activeSessionIdRef.current === sessionId
+                activeWorkspaceIdRef.current === sessionId
               ) {
                 loadWorkbenchSessionDetail(sessionId).catch(() => {});
               }
@@ -1116,7 +1167,10 @@ function App() {
         const text = await res.text().catch(() => "");
         throw new Error(text || res.statusText);
       }
-      const detail = await res.json().catch(() => null);
+      const detailEnvelope = await res.json().catch(() => null) as {
+        session?: SessionDetailResponse;
+      } | null;
+      const detail = detailEnvelope?.session ?? null;
       if (!shouldApply()) return;
       setWorkbenchSessionDetail(
         detail && typeof detail === "object"
@@ -1140,24 +1194,24 @@ function App() {
   }, []);
 
   const scheduleWorkbenchRefresh = useCallback(() => {
-    if (!activeSessionId) return;
+    if (!activeWorkspaceId) return;
     if (workbenchRefreshTimeoutRef.current) {
       window.clearTimeout(workbenchRefreshTimeoutRef.current);
     }
-    const sessionId = activeSessionId;
+    const sessionId = activeWorkspaceId;
     workbenchRefreshTimeoutRef.current = window.setTimeout(() => {
-      if (activeSessionIdRef.current !== sessionId) return;
+      if (activeWorkspaceIdRef.current !== sessionId) return;
       loadWorkbenchSessionDetail(sessionId).catch(() => {});
     }, 900);
-  }, [activeSessionId, loadWorkbenchSessionDetail]);
+  }, [activeWorkspaceId, loadWorkbenchSessionDetail]);
 
   useEffect(() => {
-    activeSessionIdRef.current = activeSessionId;
+    activeWorkspaceIdRef.current = activeWorkspaceId;
     if (workbenchRefreshTimeoutRef.current) {
       window.clearTimeout(workbenchRefreshTimeoutRef.current);
       workbenchRefreshTimeoutRef.current = null;
     }
-  }, [activeSessionId]);
+  }, [activeWorkspaceId]);
 
   useEffect(() => {
     return () => {
@@ -1168,39 +1222,18 @@ function App() {
   }, []);
 
   useEffect(() => {
-    if (!activeSessionId) {
+    if (!activeWorkspaceId) {
       setWorkbenchSessionDetail(null);
       setWorkbenchSessionDetailError(null);
       setWorkbenchSessionDetailLoading(false);
       return;
     }
-    loadWorkbenchSessionDetail(activeSessionId).catch(() => {});
-  }, [activeSessionId, loadWorkbenchSessionDetail]);
+    loadWorkbenchSessionDetail(activeWorkspaceId).catch(() => {});
+  }, [activeWorkspaceId, loadWorkbenchSessionDetail]);
 
-  const applyFeedbackUpdate = useCallback((
-    messageRefId: string,
-    feedback: FeedbackEntry | null,
-  ) => {
-    setWorkbenchSessionDetail((prev) => {
-      if (!prev) return prev;
-      const existing = prev.feedback ?? [];
-      if (!feedback) {
-        if (!existing.length) return prev;
-        return {
-          ...prev,
-          feedback: existing.filter((entry) =>
-            entry.messageRefId !== messageRefId
-          ),
-        };
-      }
-      const index = existing.findIndex((entry) =>
-        entry.messageRefId === messageRefId
-      );
-      const nextFeedback = index >= 0
-        ? existing.map((entry, idx) => (idx === index ? feedback : entry))
-        : [feedback, ...existing];
-      return { ...prev, feedback: nextFeedback };
-    });
+  const handleFeedbackPersisted = useCallback((workspaceId: string) => {
+    if (!workspaceId) return;
+    if (activeWorkspaceIdRef.current !== workspaceId) return;
     scheduleWorkbenchRefresh();
   }, [scheduleWorkbenchRefresh]);
 
@@ -1253,8 +1286,7 @@ function App() {
         },
       };
     });
-    scheduleWorkbenchRefresh();
-  }, [scheduleWorkbenchRefresh]);
+  }, []);
 
   const optimisticFlagReason = useCallback((refId: string, reason: string) => {
     setWorkbenchSessionDetail((prev) => {
@@ -1277,8 +1309,7 @@ function App() {
         },
       };
     });
-    scheduleWorkbenchRefresh();
-  }, [scheduleWorkbenchRefresh]);
+  }, []);
 
   useEffect(() => {
     const loadBundleStamp = async () => {
@@ -1313,22 +1344,18 @@ function App() {
     setPath(normalizeAppPath(next));
   }, []);
   const handleReplaceTestBotSession = useCallback(
-    (sessionId: string) =>
-      replacePath(
-        `${simulatorBasePath}/${encodeURIComponent(sessionId)}/test`,
-      ),
-    [replacePath, simulatorBasePath],
+    (workspaceId: string, runId?: string) =>
+      replacePath(buildTestPath(workspaceId, runId)),
+    [replacePath],
   );
   const handleResetTestBotSession = useCallback(
-    () => replacePath(DEFAULT_TEST_PATH),
-    [replacePath],
+    () => replacePath(buildTestPath(activeWorkspaceId ?? undefined)),
+    [activeWorkspaceId, replacePath],
   );
 
   const handleWorkspaceChange = useCallback(
     (workspaceId: string) => {
-      replacePath(
-        `${SESSIONS_BASE_PATH}/${encodeURIComponent(workspaceId)}/build`,
-      );
+      replacePath(buildWorkspacePath("build", workspaceId));
     },
     [replacePath],
   );
@@ -1340,13 +1367,13 @@ function App() {
   }, [path, replacePath]);
 
   const isDocs = path === DOCS_PATH;
+  const routeTab = liveRoute?.tab ?? pathRoute?.tab;
   const isBuild = buildTabEnabled &&
-    (path === "/build" || path === DEFAULT_BUILD_PATH ||
-      /^\/sessions\/[^/]+\/build$/.test(path));
-  const isTestBot = !isDocs && /\/test$/.test(path);
+    (routeTab === "build" || path === "/build");
+  const isTestBot = !isDocs &&
+    (routeTab === "test" || /\/test$/.test(path));
   const isGrade = !isDocs &&
-    (path.startsWith("/grade") ||
-      /^\/sessions\/[^/]+\/grade/.test(path));
+    routeTab === "grade";
   const currentPage = isDocs
     ? "docs"
     : isBuild
@@ -1358,7 +1385,8 @@ function App() {
     : "debug";
 
   useEffect(() => {
-    if (activeSessionId) return;
+    if (activeWorkspaceId || lastWorkspaceIdRef.current) return;
+    if (!routeRequestsNewWorkspace) return;
     if (workspaceInitRef.current) return;
     if (
       currentPage !== "build" && currentPage !== "test" &&
@@ -1374,39 +1402,59 @@ function App() {
         };
         if (!res.ok || typeof data.workspaceId !== "string") return;
         const nextPath = currentPage === "test"
-          ? `${SESSIONS_BASE_PATH}/${encodeURIComponent(data.workspaceId)}/test`
+          ? buildWorkspacePath("test", data.workspaceId)
           : currentPage === "grade"
           ? buildGradePath(data.workspaceId)
-          : `${SESSIONS_BASE_PATH}/${
-            encodeURIComponent(data.workspaceId)
-          }/build`;
+          : buildWorkspacePath("build", data.workspaceId);
         replacePath(nextPath);
       })
       .finally(() => {
         workspaceInitRef.current = false;
       });
-  }, [activeSessionId, currentPage, replacePath]);
-  const testBotPath = activeSessionId
-    ? `${SESSIONS_BASE_PATH}/${encodeURIComponent(activeSessionId)}/test`
-    : DEFAULT_TEST_PATH;
-  const buildPath = activeSessionId
-    ? `${SESSIONS_BASE_PATH}/${encodeURIComponent(activeSessionId)}/build`
-    : DEFAULT_BUILD_PATH;
-  const debugPath = activeSessionId
-    ? `${SESSIONS_BASE_PATH}/${encodeURIComponent(activeSessionId)}/debug`
-    : DEFAULT_SESSION_PATH;
-  const gradePath = activeSessionId
-    ? buildGradePath(activeSessionId)
-    : "/grade";
+  }, [activeWorkspaceId, currentPage, replacePath, routeRequestsNewWorkspace]);
+  const resolveNavWorkspaceId = useCallback(() => {
+    const normalizedPath = (window.location.pathname || "/").replace(
+      /\/+$/,
+      "",
+    ) || "/";
+    const directMatch = normalizedPath.match(
+      /^\/workspaces\/([^/]+)\/(?:debug|build|test|grade)(?:\/[^/]+)?$/,
+    );
+    if (directMatch?.[1] && directMatch[1] !== "new") {
+      return decodeURIComponent(directMatch[1]);
+    }
+    return getWorkspaceRouteFromPath(normalizedPath)?.workspaceId ??
+      getWorkspaceIdFromPath(window.location.pathname) ??
+      activeWorkspaceId ??
+      lastWorkspaceIdRef.current;
+  }, [activeWorkspaceId]);
+  const resolveNavPath = useCallback((next: string) => {
+    if (next === "docs") return DOCS_PATH;
+    const workspaceId = resolveNavWorkspaceId();
+    if (next === "build") return buildWorkspacePath("build", workspaceId);
+    if (next === "test") {
+      return buildTestPath(workspaceId, workspaceRunIds.testRunId ?? undefined);
+    }
+    if (next === "grade") {
+      return workspaceId
+        ? buildGradePath(workspaceId, workspaceRunIds.gradeRunId ?? undefined)
+        : DEFAULT_GRADE_PATH;
+    }
+    return buildWorkspacePath("debug", workspaceId);
+  }, [
+    resolveNavWorkspaceId,
+    workspaceRunIds.gradeRunId,
+    workspaceRunIds.testRunId,
+  ]);
   const handleSelectSession = useCallback(
     (sessionId: string) => {
       const nextPath = currentPage === "test" || currentPage === "docs"
-        ? `${SESSIONS_BASE_PATH}/${encodeURIComponent(sessionId)}/test`
+        ? buildWorkspacePath("test", sessionId)
         : currentPage === "grade"
         ? buildGradePath(sessionId)
         : currentPage === "build"
-        ? `${SESSIONS_BASE_PATH}/${encodeURIComponent(sessionId)}/build`
-        : `${SESSIONS_BASE_PATH}/${encodeURIComponent(sessionId)}/debug`;
+        ? buildWorkspacePath("build", sessionId)
+        : buildWorkspacePath("debug", sessionId);
       navigate(nextPath);
       setSessionsDrawerOpen(false);
     },
@@ -1456,17 +1504,24 @@ function App() {
   const handleDeleteSession = useCallback(
     async (sessionId: string) => {
       await sessionsApi.deleteSession(sessionId);
-      if (sessionId === activeSessionId) {
+      if (sessionId === activeWorkspaceId) {
         window.location.assign(DEFAULT_TEST_PATH);
       }
     },
-    [activeSessionId, sessionsApi.deleteSession],
+    [activeWorkspaceId, sessionsApi.deleteSession],
   );
 
   return (
-    <BuildChatProvider
-      workspaceId={activeSessionId}
+    <WorkspaceProvider
+      workspaceId={activeWorkspaceId}
       onWorkspaceChange={handleWorkspaceChange}
+      requestedTestRunId={routeState?.tab === "test"
+        ? requestedTestRunId
+        : undefined}
+      requestedGradeRunId={routeState?.tab === "grade"
+        ? requestedGradeRunId
+        : undefined}
+      onRoutingStateChange={setWorkspaceRunIds}
     >
       <>
         <div className="app-frame">
@@ -1492,26 +1547,40 @@ function App() {
               <Tabs
                 className="top-nav-buttons"
                 activeId={currentPage}
-                onChange={(next) =>
-                  navigate(
-                    next === "docs"
-                      ? DOCS_PATH
-                      : next === "build"
-                      ? buildPath
-                      : next === "test"
-                      ? testBotPath
-                      : next === "grade"
-                      ? gradePath
-                      : debugPath,
-                  )}
+                onChange={(next) => navigate(resolveNavPath(next))}
                 tabs={[
-                  { id: "docs", label: "Docs", testId: "nav-docs" },
+                  {
+                    id: "docs",
+                    label: "Docs",
+                    testId: "nav-docs",
+                    href: resolveNavPath("docs"),
+                  },
                   ...(buildTabEnabled
-                    ? [{ id: "build", label: "Build", testId: "nav-build" }]
+                    ? [{
+                      id: "build",
+                      label: "Build",
+                      testId: "nav-build",
+                      href: resolveNavPath("build"),
+                    }]
                     : []),
-                  { id: "test", label: "Test", testId: "nav-test" },
-                  { id: "grade", label: "Grade", testId: "nav-grade" },
-                  { id: "debug", label: "Debug", testId: "nav-debug" },
+                  {
+                    id: "test",
+                    label: "Test",
+                    testId: "nav-test",
+                    href: resolveNavPath("test"),
+                  },
+                  {
+                    id: "grade",
+                    label: "Grade",
+                    testId: "nav-grade",
+                    href: resolveNavPath("grade"),
+                  },
+                  {
+                    id: "debug",
+                    label: "Debug",
+                    testId: "nav-debug",
+                    href: resolveNavPath("debug"),
+                  },
                 ]}
               />
               <div className="top-nav-center">
@@ -1528,67 +1597,73 @@ function App() {
                       "workbench-toggle",
                       workbenchDrawerOpen && "active",
                     )}
-                    onClick={() => setWorkbenchDrawerOpen(true)}
-                    aria-label="Open workbench drawer"
+                    onClick={() => setWorkbenchDrawerOpen((prev) => !prev)}
+                    aria-label={workbenchDrawerOpen
+                      ? "Close workbench drawer"
+                      : "Open workbench drawer"}
                     data-testid="nav-workbench"
                   >
                     <Icon
-                      name="flag"
+                      name="chat"
                       size={16}
-                      style={{ color: "var(--color-text)" }}
+                      style={{ color: "currentColor" }}
                     />
                   </Button>
                 </div>
               </div>
             </div>
-            <div className="page-shell">
-              {currentPage === "docs"
-                ? <DocsPage />
-                : currentPage === "build"
-                ? <BuildPage setNavActions={setNavActions} />
-                : currentPage === "debug"
-                ? (
-                  <SimulatorApp
-                    basePath={simulatorBasePath}
-                    setNavActions={setNavActions}
-                    sessionsApi={sessionsApi}
-                    onOpenSessionsDrawer={() => setSessionsDrawerOpen(true)}
-                    activeSessionId={activeSessionId}
-                  />
-                )
-                : currentPage === "test"
-                ? (
-                  <TestBotPage
-                    onReplaceTestBotSession={handleReplaceTestBotSession}
-                    onResetTestBotSession={handleResetTestBotSession}
-                    activeSessionId={activeSessionId}
-                    resetToken={testBotResetToken}
-                    setNavActions={setNavActions}
-                    onFeedbackUpdate={applyFeedbackUpdate}
-                  />
-                )
-                : (
-                  <GradePage
-                    setNavActions={setNavActions}
-                    onAppPathChange={handleAppPathChange}
-                    activeSessionId={activeSessionId}
-                    onFlagsUpdate={applyFlagsUpdate}
-                    onOptimisticToggleFlag={optimisticToggleFlag}
-                    onOptimisticFlagReason={optimisticFlagReason}
-                  />
-                )}
+            <div className="app-content-frame">
+              <div className="page-shell">
+                {currentPage === "docs"
+                  ? <DocsPage />
+                  : currentPage === "build"
+                  ? <BuildPage setNavActions={setNavActions} />
+                  : currentPage === "debug"
+                  ? (
+                    <SimulatorApp
+                      basePath={simulatorBasePath}
+                      setNavActions={setNavActions}
+                      sessionsApi={sessionsApi}
+                      onOpenSessionsDrawer={() => setSessionsDrawerOpen(true)}
+                      activeWorkspaceId={activeWorkspaceId}
+                    />
+                  )
+                  : currentPage === "test"
+                  ? (
+                    <TestBotPage
+                      onReplaceTestBotSession={handleReplaceTestBotSession}
+                      onResetTestBotSession={handleResetTestBotSession}
+                      activeWorkspaceId={activeWorkspaceId}
+                      requestedRunId={requestedTestRunId}
+                      resetToken={testBotResetToken}
+                      setNavActions={setNavActions}
+                      onFeedbackPersisted={handleFeedbackPersisted}
+                    />
+                  )
+                  : (
+                    <GradePage
+                      setNavActions={setNavActions}
+                      onAppPathChange={handleAppPathChange}
+                      activeWorkspaceId={activeWorkspaceId}
+                      requestedGradeRunId={requestedGradeRunId}
+                      onFlagsUpdate={applyFlagsUpdate}
+                      onOptimisticToggleFlag={optimisticToggleFlag}
+                      onOptimisticFlagReason={optimisticFlagReason}
+                    />
+                  )}
+              </div>
+              {workbenchDrawerOpen && (
+                <WorkbenchDrawer
+                  open={workbenchDrawerOpen}
+                  onClose={() => setWorkbenchDrawerOpen(false)}
+                  loading={workbenchSessionDetailLoading}
+                  error={workbenchSessionDetailError}
+                  sessionId={activeWorkspaceId}
+                  sessionDetail={workbenchSessionDetail}
+                />
+              )}
             </div>
           </div>
-          {workbenchDrawerOpen && (
-            <WorkbenchDrawer
-              open={workbenchDrawerOpen}
-              onClose={() => setWorkbenchDrawerOpen(false)}
-              loading={workbenchSessionDetailLoading}
-              error={workbenchSessionDetailError}
-              sessionId={activeSessionId}
-              sessionDetail={workbenchSessionDetail}
-            />
-          )}
         </div>
         <SessionsDrawer
           open={sessionsDrawerOpen}
@@ -1600,11 +1675,11 @@ function App() {
           onDelete={handleDeleteSession}
           onDeleteAll={handleDeleteAll}
           onClose={() => setSessionsDrawerOpen(false)}
-          activeSessionId={activeSessionId}
+          activeWorkspaceId={activeWorkspaceId}
           bundleStamp={bundleStamp}
         />
       </>
-    </BuildChatProvider>
+    </WorkspaceProvider>
   );
 }
 
diff --git a/simulator-ui/src/shared.tsx b/simulator-ui/src/shared.tsx
index 0a2f9344e..0c9de6292 100644
--- a/simulator-ui/src/shared.tsx
+++ b/simulator-ui/src/shared.tsx
@@ -21,6 +21,7 @@ import type {
   FeedbackEntry,
   ModelMessage,
   NormalizedSchema,
+  ReasoningDetail,
   RespondInfo,
   SchemaResponse,
   ToolCallSummary,
@@ -32,9 +33,10 @@ export type ConversationMessage = {
   message: ModelMessage;
   feedback?: FeedbackEntry;
   respond?: RespondInfo;
+  reasoning?: ReasoningDetail;
 };
 
-export function useHttpSchema(opts?: { sessionId?: string | null }) {
+export function useHttpSchema(opts?: { workspaceId?: string | null }) {
   const [schemaResponse, setSchemaResponse] = useState<SchemaResponse | null>(
     null,
   );
@@ -46,7 +48,7 @@ export function useHttpSchema(opts?: { sessionId?: string | null }) {
     setError(null);
     try {
       const params = new URLSearchParams();
-      if (opts?.sessionId) params.set("sessionId", opts.sessionId);
+      if (opts?.workspaceId) params.set("workspaceId", opts.workspaceId);
       const query = params.toString() ? `?${params.toString()}` : "";
       const res = await fetch(`/schema${query}`);
       if (!res.ok) throw new Error(res.statusText);
@@ -57,7 +59,7 @@ export function useHttpSchema(opts?: { sessionId?: string | null }) {
     } finally {
       setLoading(false);
     }
-  }, [opts?.sessionId]);
+  }, [opts?.workspaceId]);
 
   useEffect(() => {
     refresh();
@@ -125,8 +127,15 @@ export function CopyBadge(props: {
 export function ConversationView(props: {
   messages: ConversationMessage[];
   header?: React.ReactNode;
-  onScore: (messageRefId: string, score: number | null) => void;
-  onReasonChange: (messageRefId: string, score: number, reason: string) => void;
+  onScore: (
+    messageRefId: string,
+    score: number | null,
+  ) => void | Promise<void>;
+  onReasonChange: (
+    messageRefId: string,
+    score: number,
+    reason: string,
+  ) => void | Promise<void>;
   emptyState?: React.ReactNode;
 }) {
   const { messages, header, onScore, onReasonChange, emptyState } = props;
@@ -165,16 +174,29 @@ export function ConversationView(props: {
 
 export function MessageBubble(props: {
   entry: ConversationMessage;
-  onScore: (messageRefId: string, score: number | null) => void;
-  onReasonChange: (messageRefId: string, score: number, reason: string) => void;
+  onScore: (
+    messageRefId: string,
+    score: number | null,
+  ) => void | Promise<void>;
+  onReasonChange: (
+    messageRefId: string,
+    score: number,
+    reason: string,
+  ) => void | Promise<void>;
 }) {
   const { entry, onScore, onReasonChange } = props;
+  const hasReasoning = Boolean(entry.reasoning);
   const role = entry.message.role;
   const isRespond = Boolean(entry.respond);
   const isTool = role === "tool" && !isRespond;
   const className = classNames(
     "bubble",
-    role === "user" ? "bubble-user" : "bubble-assistant",
+    role === "user"
+      ? "bubble-user"
+      : role === "system"
+      ? "bubble-system"
+      : "bubble-assistant",
+    hasReasoning && "bubble-reasoning",
   );
   const messageRefId = entry.id;
   const content = entry.message.content ?? "";
@@ -182,6 +204,9 @@ export function MessageBubble(props: {
     <div className="chat-row">
       <div className={className}>
         <div className="bubble-role">{role}</div>
+        {hasReasoning && entry.reasoning && (
+          <ReasoningBubble detail={entry.reasoning} />
+        )}
         {isRespond && (
           <div className="respond-summary">
             <div className="respond-meta">
@@ -217,23 +242,23 @@ export function MessageBubble(props: {
             )}
           </div>
         )}
-        {!isRespond && content && !isTool && (
+        {!hasReasoning && !isRespond && content && !isTool && (
           <div
             className="bubble-text"
             dangerouslySetInnerHTML={{ __html: renderMarkdown(content) }}
           />
         )}
-        {!isRespond && content && isTool && (
+        {!hasReasoning && !isRespond && content && isTool && (
           <pre className="bubble-json">
             {formatJson(content)}
           </pre>
         )}
-        {!content && entry.message.tool_calls && (
+        {!hasReasoning && !content && entry.message.tool_calls && (
           <pre className="bubble-json">
             {formatJson(entry.message.tool_calls)}
           </pre>
         )}
-        {messageRefId && role !== "user" && (
+        {messageRefId && role === "assistant" && !hasReasoning && (
           <FeedbackControls
             messageRefId={messageRefId}
             feedback={entry.feedback}
@@ -246,29 +271,62 @@ export function MessageBubble(props: {
   );
 }
 
+export function ReasoningBubble(props: { detail: ReasoningDetail }) {
+  const { detail } = props;
+  const meta: string[] = [];
+  if (detail.model) meta.push(detail.model);
+  if (detail.actionCallId) meta.push(`call ${detail.actionCallId}`);
+  return (
+    <div className="reasoning-bubble">
+      <div className="reasoning-header">
+        <Badge variant="ghost">Reasoning</Badge>
+        {meta.length > 0 && (
+          <span className="reasoning-meta">{meta.join(" · ")}</span>
+        )}
+      </div>
+      <div className="reasoning-text">{detail.text}</div>
+      <details className="reasoning-details">
+        <summary>Details</summary>
+        <pre className="bubble-json">
+          {formatJson(detail.event)}
+        </pre>
+      </details>
+    </div>
+  );
+}
+
 export function FeedbackControls(props: {
   messageRefId: string;
   feedback?: FeedbackEntry;
-  onScore: (messageRefId: string, score: number | null) => void;
-  onReasonChange: (messageRefId: string, score: number, reason: string) => void;
+  onScore: (
+    messageRefId: string,
+    score: number | null,
+  ) => void | Promise<void>;
+  onReasonChange: (
+    messageRefId: string,
+    score: number,
+    reason: string,
+  ) => void | Promise<void>;
 }) {
   const { messageRefId, feedback, onScore, onReasonChange } = props;
   const [reason, setReason] = useState(feedback?.reason ?? "");
   const [opened, setOpened] = useState(false);
   const [localScore, setLocalScore] = useState<number | null>(null);
   const [status, setStatus] = useState<
-    "idle" | "unsaved" | "saving" | "saved"
+    "idle" | "unsaved" | "saving" | "saved" | "error"
   >("idle");
-  const lastSentRef = useRef<string | null>(null);
+  const [errorMessage, setErrorMessage] = useState<string | null>(null);
+  const requestSeqRef = useRef(0);
 
   useEffect(() => {
     setReason(feedback?.reason ?? "");
-    if (feedback?.reason !== undefined) {
+    if (typeof feedback?.score === "number" || feedback?.reason !== undefined) {
       setStatus("saved");
     } else {
       setStatus("idle");
     }
-  }, [feedback?.reason]);
+    setErrorMessage(null);
+  }, [feedback?.reason, feedback?.score]);
 
   useEffect(() => {
     if (typeof feedback?.score === "number") {
@@ -281,23 +339,51 @@ export function FeedbackControls(props: {
     ? feedback.score
     : localScore;
 
+  const persistScore = useCallback(async (score: number | null) => {
+    const requestSeq = ++requestSeqRef.current;
+    setStatus("saving");
+    setErrorMessage(null);
+    try {
+      await Promise.resolve(onScore(messageRefId, score));
+      if (requestSeqRef.current !== requestSeq) return;
+      setStatus(score === null ? "idle" : "saved");
+    } catch (err) {
+      if (requestSeqRef.current !== requestSeq) return;
+      setStatus("error");
+      setErrorMessage(
+        err instanceof Error ? err.message : "Failed to save feedback",
+      );
+    }
+  }, [messageRefId, onScore]);
+
+  const persistReason = useCallback(
+    async (score: number, nextReason: string) => {
+      const requestSeq = ++requestSeqRef.current;
+      setStatus("saving");
+      setErrorMessage(null);
+      try {
+        await Promise.resolve(onReasonChange(messageRefId, score, nextReason));
+        if (requestSeqRef.current !== requestSeq) return;
+        setStatus("saved");
+      } catch (err) {
+        if (requestSeqRef.current !== requestSeq) return;
+        setStatus("error");
+        setErrorMessage(
+          err instanceof Error ? err.message : "Failed to save feedback reason",
+        );
+      }
+    },
+    [messageRefId, onReasonChange],
+  );
+
   useEffect(() => {
     if (typeof effectiveScore !== "number") return;
     if (status !== "unsaved") return;
     const handle = window.setTimeout(() => {
-      setStatus("saving");
-      lastSentRef.current = reason;
-      onReasonChange(messageRefId, effectiveScore, reason);
+      persistReason(effectiveScore, reason);
     }, 650);
     return () => window.clearTimeout(handle);
-  }, [effectiveScore, status, reason, onReasonChange, messageRefId]);
-
-  useEffect(() => {
-    if (status !== "saving") return;
-    if (feedback?.reason === reason && lastSentRef.current === reason) {
-      setStatus("saved");
-    }
-  }, [status, feedback?.reason, reason]);
+  }, [effectiveScore, persistReason, reason, status]);
 
   const showReason = opened ||
     typeof effectiveScore === "number" ||
@@ -319,13 +405,12 @@ export function FeedbackControls(props: {
                 setLocalScore(null);
                 setOpened(false);
                 setReason("");
-                setStatus("idle");
-                onScore(messageRefId, null);
+                persistScore(null);
                 return;
               }
               setOpened(true);
               setLocalScore(value);
-              onScore(messageRefId, value);
+              persistScore(value);
             }}
           >
             {value}
@@ -341,13 +426,12 @@ export function FeedbackControls(props: {
             onChange={(e) => {
               setReason(e.target.value);
               setStatus("unsaved");
+              setErrorMessage(null);
             }}
             onBlur={() => {
               if (typeof effectiveScore !== "number") return;
               if (status !== "unsaved") return;
-              setStatus("saving");
-              lastSentRef.current = reason;
-              onReasonChange(messageRefId, effectiveScore, reason);
+              persistReason(effectiveScore, reason);
             }}
           />
           <div
@@ -355,12 +439,34 @@ export function FeedbackControls(props: {
               "feedback-status",
               status === "saving" && "saving",
               status === "unsaved" && "unsaved",
+              status === "error" && "error",
             )}
           >
             {status === "saving" && "Saving…"}
             {status === "saved" && "Saved"}
             {status === "unsaved" && "Unsaved changes…"}
+            {status === "error" && (
+              <>
+                Save failed{" "}
+                <button
+                  type="button"
+                  className="link-button"
+                  onClick={() => {
+                    if (typeof effectiveScore === "number") {
+                      persistReason(effectiveScore, reason);
+                      return;
+                    }
+                    persistScore(null);
+                  }}
+                >
+                  Retry
+                </button>
+              </>
+            )}
           </div>
+          {status === "error" && errorMessage && (
+            <div className="error">{errorMessage}</div>
+          )}
         </>
       )}
     </div>
@@ -371,6 +477,118 @@ export function TraceList(props: { traces: TraceEvent[] }) {
   const { traces } = props;
   const ordered = traces;
   const panelRef = useRef<HTMLDivElement | null>(null);
+  const codexHighlights = useMemo(() => {
+    const asRecord = (value: unknown): Record<string, unknown> | null => {
+      if (value && typeof value === "object" && !Array.isArray(value)) {
+        return value as Record<string, unknown>;
+      }
+      return null;
+    };
+    const asString = (value: unknown): string =>
+      typeof value === "string" ? value : "";
+    const asOptionalString = (value: unknown): string | undefined => {
+      const text = asString(value).trim();
+      return text.length > 0 ? text : undefined;
+    };
+    const extractPayload = (value: unknown): Record<string, unknown> | null => {
+      const record = asRecord(value);
+      if (!record) return null;
+      if (record.type === "codex.event") {
+        return asRecord(record.payload);
+      }
+      return record;
+    };
+    const summaries: Array<{
+      id?: string;
+      text: string;
+      raw: Record<string, unknown>;
+    }> = [];
+    const toolItems: Array<{
+      id?: string;
+      name?: string;
+      status?: string;
+      args?: unknown;
+      result?: unknown;
+      error?: unknown;
+      raw: Record<string, unknown>;
+    }> = [];
+    for (const trace of ordered) {
+      if (!trace || typeof trace !== "object") continue;
+      if (trace.type !== "model.stream.event") continue;
+      const event = (trace as { event?: unknown }).event;
+      const payload = extractPayload(event);
+      if (!payload) continue;
+      const payloadType = asString(payload.type);
+      if (payloadType.startsWith("response.reasoning")) {
+        let text = "";
+        if (payloadType === "response.reasoning.delta") {
+          text = asString(payload.delta);
+        } else if (payloadType === "response.reasoning.done") {
+          text = asString(payload.text);
+        } else if (
+          payloadType === "response.reasoning_summary_text.delta"
+        ) {
+          text = asString(payload.delta);
+        } else if (
+          payloadType === "response.reasoning_summary_text.done"
+        ) {
+          text = asString(payload.text);
+        } else if (
+          payloadType === "response.reasoning_summary_part.added" ||
+          payloadType === "response.reasoning_summary_part.done"
+        ) {
+          const part = asRecord(payload.part);
+          text = part ? asString(part.text) : "";
+        }
+        if (text.trim()) {
+          summaries.push({
+            id: asOptionalString(payload.item_id),
+            text: text.trim(),
+            raw: payload,
+          });
+        }
+        continue;
+      }
+      const item = asRecord(payload.item);
+      if (!item) continue;
+      const itemType = asString(item.type);
+      if (!itemType) continue;
+      if (itemType === "reasoning") {
+        let text = "";
+        const summary = item.summary;
+        if (Array.isArray(summary)) {
+          text = summary.map((part) => {
+            const partRecord = asRecord(part);
+            return partRecord && typeof partRecord.text === "string"
+              ? partRecord.text
+              : "";
+          }).join("");
+        } else if (typeof summary === "string") {
+          text = summary;
+        } else if (typeof item.text === "string") {
+          text = item.text;
+        }
+        if (text.trim()) {
+          summaries.push({
+            id: asOptionalString(item.id),
+            text: text.trim(),
+            raw: item,
+          });
+        }
+      } else if (itemType === "function_call" || itemType === "mcp_tool_call") {
+        toolItems.push({
+          id: asOptionalString(item.id),
+          name: asOptionalString(item.name) ?? asOptionalString(item.tool),
+          status: asOptionalString(item.status),
+          args: item.arguments,
+          result: item.result,
+          error: item.error,
+          raw: item,
+        });
+      }
+    }
+    return { summaries, toolItems };
+  }, [ordered]);
   const entries = useMemo(() => {
     const depthMap = new Map<string, number>();
     return ordered.map((trace) => {
@@ -415,6 +633,61 @@ export function TraceList(props: { traces: TraceEvent[] }) {
   return (
     <div className="trace-panel" ref={panelRef}>
       <h3>Traces & Tools</h3>
+      {(codexHighlights.summaries.length > 0 ||
+        codexHighlights.toolItems.length > 0) && (
+        <div className="trace-codex-highlights">
+          {codexHighlights.summaries.length > 0 && (
+            <details open>
+              <summary>
+                Codex reasoning summaries ({codexHighlights.summaries.length})
+              </summary>
+              <div className="trace-list">
+                {codexHighlights.summaries.map((entry, idx) => (
+                  <div key={`codex-summary-${entry.id ?? idx}`}>
+                    <div className="trace-text">{entry.text}</div>
+                    <pre className="trace-json">{formatJson(entry.raw)}</pre>
+                  </div>
+                ))}
+              </div>
+            </details>
+          )}
+          {codexHighlights.toolItems.length > 0 && (
+            <details>
+              <summary>
+                Codex tool calls ({codexHighlights.toolItems.length})
+              </summary>
+              <div className="trace-list">
+                {codexHighlights.toolItems.map((entry, idx) => (
+                  <div key={`codex-tool-${entry.id ?? idx}`}>
+                    <div className="trace-text">
+                      {entry.name ?? "tool call"}
+                      {entry.status ? ` · ${entry.status}` : ""}
+                    </div>
+                    {(entry.args !== undefined ||
+                      entry.result !== undefined ||
+                      entry.error !== undefined) && (
+                      <pre className="trace-json">
+                        {formatJson({
+                          args: entry.args,
+                          result: entry.result,
+                          error: entry.error,
+                        })}
+                      </pre>
+                    )}
+                    {(entry.args === undefined &&
+                      entry.result === undefined &&
+                      entry.error === undefined) && (
+                      <pre className="trace-json">
+                        {formatJson(entry.raw)}
+                      </pre>
+                    )}
+                  </div>
+                ))}
+              </div>
+            </details>
+          )}
+        </div>
+      )}
       <div className="trace-list">
         {entries.map(({ trace, depth }, idx) => {
           const isUser = trace.type === "message.user";
diff --git a/simulator-ui/src/styles.ts b/simulator-ui/src/styles.ts
index c5d661372..457b4bab3 100644
--- a/simulator-ui/src/styles.ts
+++ b/simulator-ui/src/styles.ts
@@ -86,8 +86,16 @@ body {
   height: 100%;
   min-height: 0;
   display: flex;
+  flex-direction: column;
   gap: 0;
 }
+.app-content-frame {
+  flex: 1;
+  min-height: 0;
+  display: flex;
+  gap: 0;
+  overflow: hidden;
+}
 .page-shell {
   flex: 1;
   min-height: 0;
@@ -558,6 +566,11 @@ code:not(pre *) {
   color: var(--color-surface);
   box-shadow: none;
 }
+.bubble-system {
+  background: var(--color-surface);
+  border: 1px dashed var(--color-border);
+  box-shadow: none;
+}
 .bubble-role {
   font-size: 12px;
   text-transform: uppercase;
@@ -582,6 +595,55 @@ code:not(pre *) {
   word-break: break-word;
   overflow-wrap: anywhere;
 }
+.bubble-reasoning {
+  background: var(--color-surface);
+  border: 1px solid var(--color-border);
+}
+.bubble-reasoning .bubble-role {
+  color: var(--color-text-subtle);
+}
+.reasoning-bubble {
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+}
+.reasoning-header {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+}
+.reasoning-meta {
+  font-size: 11px;
+  color: var(--color-text-subtle);
+}
+.reasoning-text {
+  font-size: 13px;
+  line-height: 1.5;
+  white-space: pre-wrap;
+  word-break: break-word;
+}
+.reasoning-details summary {
+  cursor: pointer;
+  font-size: 12px;
+  color: var(--color-text-subtle);
+}
+.reasoning-details {
+  margin-top: 2px;
+}
+.reasoning-collapsible .tool-calls-toggle {
+  align-items: flex-start;
+  gap: 6px;
+}
+.reasoning-toggle .tool-calls-toggle-label {
+  display: block;
+  font-weight: 600;
+}
+.reasoning-preview {
+  display: block;
+  font-size: 12px;
+  color: var(--color-text-subtle);
+  margin-top: 2px;
+}
 .feedback-controls {
   margin-top: 8px;
   display: flex;
@@ -637,6 +699,18 @@ code:not(pre *) {
 .feedback-status.unsaved {
   color: var(--color-warning);
 }
+.feedback-status.error {
+  color: var(--color-danger);
+}
+.link-button {
+  background: none;
+  border: 0;
+  color: inherit;
+  cursor: pointer;
+  font: inherit;
+  padding: 0;
+  text-decoration: underline;
+}
 .init-panel {
   border: 1px solid var(--color-border);
   border-radius: calc(14px * var(--corner-radius-scale, 1));
@@ -1047,6 +1121,46 @@ code:not(pre *) {
 }
 .tool-calls-toggle-label {
   white-space: nowrap;
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+}
+.activity-toggle-title {
+  color: var(--color-text-muted);
+  font-weight: 600;
+}
+.activity-toggle-action {
+  color: var(--color-text-muted);
+}
+.activity-count-badge {
+  display: inline-flex;
+  align-items: center;
+  border-color: var(--color-border);
+  background: var(--color-surface-subtle);
+  color: var(--color-text-muted);
+  padding: 1px 8px;
+  font-size: 11px;
+  font-weight: 600;
+  text-transform: none;
+  letter-spacing: 0;
+  line-height: 1.35;
+}
+.activity-count-badge.is-highlight {
+  animation: activity-count-badge-flash 900ms ease-out;
+}
+@keyframes activity-count-badge-flash {
+  0% {
+    background: var(--color-accent-soft);
+    border-color: var(--color-accent-border);
+    color: var(--color-text);
+    box-shadow: 0 0 0 2px var(--color-accent-soft);
+  }
+  100% {
+    background: var(--color-surface-subtle);
+    border-color: var(--color-border);
+    color: var(--color-text-muted);
+    box-shadow: 0 0 0 0 transparent;
+  }
 }
 .tool-calls-list {
   display: flex;
@@ -1054,6 +1168,94 @@ code:not(pre *) {
   gap: 8px;
   margin-top: 6px;
 }
+.reasoning-collapsible {
+  background: #f8fafc;
+  border: 1px dashed #cbd5f5;
+  border-radius: 12px;
+  padding: 8px;
+}
+.reasoning-toggle {
+  color: #1e293b;
+}
+.reasoning-list {
+  gap: 12px;
+}
+.reasoning-row {
+  margin-left: 12px;
+}
+.reasoning-bubble {
+  border: 1px solid #e2e8f0;
+  background: linear-gradient(180deg, #f8fafc 0%, #ffffff 100%);
+}
+.reasoning-row .imessage-bubble {
+  background: linear-gradient(180deg, #f8fafc 0%, #ffffff 100%);
+  border: 1px solid #e2e8f0;
+  color: var(--color-text);
+}
+.reasoning-json {
+  background: transparent;
+  border: none;
+  padding: 0;
+  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas,
+    "Liberation Mono", "Courier New", monospace;
+  white-space: pre-wrap;
+}
+.reasoning-details {
+  margin-top: 8px;
+}
+.activity-collapsible {
+  border: 1px solid var(--color-border);
+  border-radius: calc(12px * var(--corner-radius-scale, 1));
+  corner-shape: squircle;
+  background: var(--color-surface-muted);
+  padding: 8px 10px;
+}
+.activity-toggle {
+  text-transform: none;
+  letter-spacing: 0;
+  font-size: 12px;
+  gap: 8px;
+  padding: 2px 0 4px;
+}
+.activity-toggle::before,
+.activity-toggle::after {
+  display: none;
+}
+.activity-toggle-chevron {
+  margin-left: auto;
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  color: var(--color-text-subtle);
+  transition: transform 140ms ease;
+}
+.activity-toggle.is-open .activity-toggle-chevron {
+  transform: rotate(180deg);
+}
+.activity-preview {
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+}
+.activity-preview-reasoning {
+  font-size: 13px;
+  color: var(--color-text);
+}
+.activity-preview-reasoning > * {
+  margin: 0;
+}
+.activity-preview-tool {
+  font-size: 12px;
+  color: var(--color-text-muted);
+  border: 1px solid var(--color-border);
+  background: var(--color-surface-subtle);
+  border-radius: 4px;
+  padding: 0px 4px;
+  margin: -1px 2px;
+}
+.activity-details {
+  margin-top: 8px;
+}
 .composer {
   display: flex;
   flex-direction: column;
@@ -1442,7 +1644,7 @@ code:not(pre *) {
 }
 .top-nav-deck {
   font-weight: 600;
-  color: var(--color-text);
+  color: var(--color-text-subtle);
   font-size: 16px;
 }
 .top-nav-actions {
@@ -1459,7 +1661,13 @@ code:not(pre *) {
 }
 .calibrate-toggle.active,
 .workbench-toggle.active {
-  background: var(--color-surface-subtle);
+  background: var(--color-primary-alpha-15);
+  border-color: var(--color-primary);
+  color: var(--color-primary);
+}
+.calibrate-toggle.active:hover,
+.workbench-toggle.active:hover {
+  background: var(--color-primary-alpha-25);
 }
 .bundle-stamp {
   font-size: 12px;
@@ -1748,6 +1956,66 @@ code:not(pre *) {
 .wrap {
   flex-wrap: wrap;
 }
+.gds-tooltip-anchor {
+  display: inline-flex;
+  align-items: center;
+}
+.gds-tooltip {
+  position: fixed;
+  z-index: 1200;
+  max-width: min(320px, calc(100vw - 24px));
+  padding: 6px 8px;
+  border-radius: calc(8px * var(--corner-radius-scale, 1));
+  corner-shape: squircle;
+  border: 1px solid var(--color-border-strong);
+  background: var(--color-text);
+  color: var(--color-surface);
+  font-size: 12px;
+  line-height: 1.35;
+  box-shadow: 0 6px 18px var(--color-shadow-strong);
+  pointer-events: none;
+  animation: gds-tooltip-fade-in 120ms ease-out;
+}
+.gds-tooltip::after {
+  content: "";
+  position: absolute;
+  width: 8px;
+  height: 8px;
+  background: inherit;
+  border: inherit;
+  border-top: 0;
+  border-left: 0;
+}
+.gds-tooltip--top::after {
+  left: 50%;
+  bottom: -5px;
+  transform: translateX(-50%) rotate(45deg);
+}
+.gds-tooltip--bottom::after {
+  left: 50%;
+  top: -5px;
+  transform: translateX(-50%) rotate(225deg);
+}
+.gds-tooltip--left::after {
+  top: 50%;
+  right: -5px;
+  transform: translateY(-50%) rotate(315deg);
+}
+.gds-tooltip--right::after {
+  top: 50%;
+  left: -5px;
+  transform: translateY(-50%) rotate(135deg);
+}
+@keyframes gds-tooltip-fade-in {
+  from {
+    opacity: 0;
+    filter: saturate(0.8);
+  }
+  to {
+    opacity: 1;
+    filter: saturate(1);
+  }
+}
 .gds-listbox {
   position: relative;
 }
@@ -2118,12 +2386,101 @@ code:not(pre *) {
   border: 1px solid var(--color-border);
   border-bottom-left-radius: 6px;
 }
+.imessage-bubble.reasoning-bubble {
+  background: var(--color-surface);
+  border: 1px solid var(--color-border-strong);
+}
 .imessage-bubble.right {
   background: var(--color-primary-soft);
   color: var(--color-text);
   border: 1px solid var(--color-primary-alpha-25);
   border-bottom-right-radius: 6px;
 }
+.build-chat-activity-indicator {
+  position: relative;
+  overflow: hidden;
+  border: 1px solid var(--color-border-strong);
+  border-radius: calc(10px * var(--corner-radius-scale, 1));
+  corner-shape: squircle;
+  background: var(--color-surface-muted);
+  padding: 8px 10px;
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  min-height: 36px;
+}
+.build-chat-activity-sticky {
+  position: sticky;
+  bottom: 0;
+  z-index: 3;
+}
+.build-chat-activity-indicator-thinking {
+  border-color: var(--color-primary-alpha-25);
+}
+.build-chat-activity-indicator-responding {
+  border-color: var(--color-accent-border);
+}
+.build-chat-activity-glimmer {
+  position: absolute;
+  inset: 0;
+  pointer-events: none;
+  opacity: 0.5;
+  background:
+    linear-gradient(
+      110deg,
+      rgba(255, 255, 255, 0) 0%,
+      rgba(255, 255, 255, 0.36) 40%,
+      rgba(255, 255, 255, 0) 75%
+    );
+  transform: translateX(-100%);
+  animation: build-chat-glimmer 1.6s ease-in-out infinite;
+}
+.build-chat-activity-spinner {
+  width: 14px;
+  height: 14px;
+  border-radius: 999px;
+  border: 2px solid var(--color-primary-alpha-25);
+  border-top-color: var(--color-primary);
+  position: relative;
+  z-index: 1;
+  flex: 0 0 auto;
+  animation: build-chat-spinner 0.8s linear infinite;
+}
+.build-chat-activity-label,
+.build-chat-activity-timer {
+  position: relative;
+  z-index: 1;
+}
+.build-chat-activity-label {
+  font-size: 12px;
+  color: var(--color-text-body);
+  font-weight: 600;
+}
+.build-chat-activity-timer {
+  margin-left: auto;
+  font-size: 12px;
+  color: var(--color-text-muted);
+  font-variant-numeric: tabular-nums;
+}
+@keyframes build-chat-glimmer {
+  0% {
+    transform: translateX(-100%);
+  }
+  100% {
+    transform: translateX(120%);
+  }
+}
+@keyframes build-chat-spinner {
+  to {
+    transform: rotate(360deg);
+  }
+}
+@media (prefers-reduced-motion: reduce) {
+  .build-chat-activity-glimmer {
+    animation: none;
+    opacity: 0;
+  }
+}
 .imessage-row.left .imessage-bubble.right {
   border-bottom-right-radius: 18px;
   border-bottom-left-radius: 6px;
@@ -2140,6 +2497,7 @@ code:not(pre *) {
 }
 .build-files-panel {
   overflow: hidden;
+  padding: 0;
 }
 .build-files-browser {
   display: grid;
@@ -2158,10 +2516,7 @@ code:not(pre *) {
   min-height: 0;
 }
 .build-files-preview {
-  border: 1px solid var(--color-border-strong);
-  border-radius: calc(10px * var(--corner-radius-scale, 1));
-  corner-shape: squircle;
-  background: var(--color-surface-muted);
+  background: var(--color-surface);
   display: flex;
   flex-direction: column;
   flex: 1;
@@ -2169,9 +2524,9 @@ code:not(pre *) {
   overflow: hidden;
 }
 .build-files-preview-header {
-  padding: 6px 10px;
+  padding: 8px;
   border-bottom: 1px solid var(--color-border);
-  background: var(--color-surface);
+  background: var(--color-surface-muted);
 }
 .build-files-preview-controls {
   display: flex;
@@ -2191,89 +2546,8 @@ code:not(pre *) {
   gap: 8px;
   flex-shrink: 0;
 }
-.build-recent-changes-trigger {
-  position: relative;
-  border: 1px solid var(--color-border-strong);
-  border-radius: calc(10px * var(--corner-radius-scale, 1));
-  corner-shape: squircle;
-  background: var(--color-surface);
-  padding: 6px 18px 6px 10px;
-  cursor: pointer;
-  font-family: inherit;
-  display: flex;
-  flex-direction: row;
-  align-items: center;
-}
-.build-recent-changes-trigger:hover {
-  border-color: var(--color-border-emphasis);
-  background: var(--color-surface-muted);
-}
-.build-recent-changes-label {
-  font-size: 12px;
-  font-weight: 800;
-  color: var(--color-text);
-  text-transform: uppercase;
-  letter-spacing: 0.06em;
-}
-.build-recent-changes-badge {
-  position: absolute;
-  top: -6px;
-  right: -6px;
-  min-width: 18px;
-  height: 18px;
-  padding: 0 6px;
-  display: inline-flex;
-  flex-direction: row;
-  align-items: center;
-  justify-content: center;
-  text-transform: none;
-}
-.build-recent-changes-popover {
-  background: var(--color-surface);
-  border: 1px solid var(--color-border);
-  border-radius: calc(12px * var(--corner-radius-scale, 1));
-  corner-shape: squircle;
-  box-shadow: 0 12px 32px var(--color-shadow-strong);
-  z-index: 30;
-  max-height: 320px;
-  overflow-y: auto;
-  padding: 8px;
-  display: flex;
-  flex-direction: column;
-  gap: 6px;
-}
-.build-recent-changes-list {
-  display: flex;
-  flex-direction: column;
-  gap: 6px;
-}
-.build-recent-change-row {
-  width: 100%;
-  border: 1px solid var(--color-border-strong);
-  border-radius: calc(10px * var(--corner-radius-scale, 1));
-  corner-shape: squircle;
-  padding: 8px;
-  background: var(--color-surface-muted);
-  display: flex;
-  flex-direction: column;
-  gap: 4px;
-  text-align: left;
-  cursor: pointer;
-  font-family: inherit;
-}
-.build-recent-change-row:hover {
-  background: var(--color-surface);
-}
-.build-recent-change-summary {
-  font-weight: 800;
-  color: var(--color-text);
-}
-.build-recent-change-meta {
-  font-size: 12px;
-  color: var(--color-text-muted);
-}
 .build-files-preview-body {
-  padding: 8px;
+  padding: 8px 16px;
   overflow: auto;
   flex: 1;
   min-height: 0;
diff --git a/simulator-ui/src/utils.test.ts b/simulator-ui/src/utils.test.ts
new file mode 100644
index 000000000..289260eab
--- /dev/null
+++ b/simulator-ui/src/utils.test.ts
@@ -0,0 +1,210 @@
+import { assertEquals } from "@std/assert";
+
+const globals = globalThis as unknown as { window?: Record<string, unknown> };
+if (!globals.window) globals.window = {};
+
+const { deriveReasoningByAssistant, getWorkspaceRouteFromPath } = await import(
+  "./utils.ts"
+);
+type TraceEvent = import("./utils.ts").TraceEvent;
+const { deriveBuildDisplayMessages } = await import("./utils.ts");
+
+Deno.test("deriveReasoningByAssistant maps codex reasoning to assistant turn", () => {
+  const traces: TraceEvent[] = [
+    {
+      type: "model.stream.event",
+      runId: "run_1",
+      actionCallId: "call_1",
+      model: "codex-cli/default",
+      event: {
+        type: "codex.event",
+        payload: {
+          type: "item.completed",
+          item: {
+            type: "reasoning",
+            text: "Reasoning here",
+          },
+        },
+      },
+    },
+    {
+      type: "model.result",
+      runId: "run_1",
+      actionCallId: "call_1",
+      finishReason: "stop",
+      message: { role: "assistant", content: "final answer" },
+    },
+  ];
+
+  const reasoning = deriveReasoningByAssistant(traces);
+  assertEquals(reasoning.size, 1);
+  const bucket = reasoning.get(0);
+  assertEquals(bucket?.length, 1);
+  assertEquals(bucket?.[0].text, "Reasoning here");
+  assertEquals(bucket?.[0].model, "codex-cli/default");
+});
+
+Deno.test("deriveReasoningByAssistant ignores non-reasoning codex events", () => {
+  const traces: TraceEvent[] = [
+    {
+      type: "model.stream.event",
+      actionCallId: "call_1",
+      event: {
+        type: "codex.event",
+        payload: {
+          type: "item.completed",
+          item: {
+            type: "agent_message",
+            text: "not reasoning",
+          },
+        },
+      },
+    },
+    {
+      type: "model.stream.event",
+      actionCallId: "call_1",
+      event: {
+        type: "codex.event",
+        payload: {
+          type: "item.completed",
+          item: {
+            type: "reasoning",
+            text: "   ",
+          },
+        },
+      },
+    },
+    {
+      type: "model.result",
+      actionCallId: "call_1",
+      finishReason: "stop",
+    },
+  ];
+
+  const reasoning = deriveReasoningByAssistant(traces);
+  assertEquals(reasoning.size, 0);
+});
+
+Deno.test("getWorkspaceRouteFromPath parses run-addressed test route", () => {
+  assertEquals(getWorkspaceRouteFromPath("/workspaces/ws_1/test/run_1"), {
+    workspaceId: "ws_1",
+    tab: "test",
+    isNew: false,
+    testRunId: "run_1",
+    gradeRunId: undefined,
+  });
+});
+
+Deno.test("getWorkspaceRouteFromPath parses run-addressed grade route", () => {
+  assertEquals(getWorkspaceRouteFromPath("/workspaces/ws_1/grade/grade_1"), {
+    workspaceId: "ws_1",
+    tab: "grade",
+    isNew: false,
+    testRunId: undefined,
+    gradeRunId: "grade_1",
+  });
+});
+
+Deno.test("deriveBuildDisplayMessages keeps assistant turns ordered when item ids repeat across actions", () => {
+  const traces: TraceEvent[] = [
+    {
+      type: "message.user",
+      actionCallId: "action-1",
+      message: { role: "user", content: "first user turn" },
+    },
+    {
+      type: "model.stream.event",
+      actionCallId: "action-1",
+      event: {
+        type: "codex.event",
+        payload: {
+          type: "item.completed",
+          item: {
+            id: "item_1",
+            type: "agent_message",
+            text: "first assistant turn",
+          },
+        },
+      },
+    },
+    {
+      type: "model.result",
+      actionCallId: "action-1",
+      message: { role: "assistant", content: "first assistant turn" },
+    },
+    {
+      type: "message.user",
+      actionCallId: "action-2",
+      message: { role: "user", content: "second user turn" },
+    },
+    {
+      type: "model.stream.event",
+      actionCallId: "action-2",
+      event: {
+        type: "codex.event",
+        payload: {
+          type: "item.completed",
+          item: {
+            id: "item_1",
+            type: "agent_message",
+            text: "second assistant turn",
+          },
+        },
+      },
+    },
+    {
+      type: "model.result",
+      actionCallId: "action-2",
+      message: { role: "assistant", content: "second assistant turn" },
+    },
+  ];
+
+  const display = deriveBuildDisplayMessages([], traces);
+  const textRows = display.filter((row) => row.kind === "message").map((row) =>
+    `${row.role}:${row.content}`
+  );
+  assertEquals(textRows, [
+    "user:first user turn",
+    "assistant:first assistant turn",
+    "user:second user turn",
+    "assistant:second assistant turn",
+  ]);
+});
+
+Deno.test("deriveBuildDisplayMessages dedupes model.result after streamed output_item.done", () => {
+  const traces: TraceEvent[] = [
+    {
+      type: "message.user",
+      actionCallId: "action-1",
+      message: { role: "user", content: "hello" },
+    },
+    {
+      type: "model.stream.event",
+      actionCallId: "action-1",
+      event: {
+        type: "codex.event",
+        payload: {
+          type: "response.output_item.done",
+          output_index: 0,
+          item: {
+            type: "message",
+            role: "assistant",
+            content: [{ type: "output_text", text: "hi there" }],
+          },
+        },
+      },
+    },
+    {
+      type: "model.result",
+      actionCallId: "action-1",
+      message: { role: "assistant", content: "hi there" },
+    },
+  ];
+
+  const display = deriveBuildDisplayMessages([], traces);
+  const assistantRows = display.filter((row) =>
+    row.kind === "message" && row.role === "assistant"
+  );
+  assertEquals(assistantRows.length, 1);
+  assertEquals(assistantRows[0]?.content, "hi there");
+});
diff --git a/simulator-ui/src/utils.ts b/simulator-ui/src/utils.ts
index fe8ba2263..ea658713a 100644
--- a/simulator-ui/src/utils.ts
+++ b/simulator-ui/src/utils.ts
@@ -1,3 +1,9 @@
+import {
+  buildWorkspacePath,
+  parseWorkspaceRoute,
+  WORKSPACE_ROUTE_BASE,
+} from "../../src/workspace_contract.ts";
+
 export type NormalizedSchema = {
   kind:
     | "string"
@@ -43,7 +49,11 @@ export type ModelMessage = {
   }>;
 };
 
-export type MessageRef = { id: string; role: string };
+export type MessageRef = {
+  id: string;
+  role: string;
+  source?: "scenario" | "manual";
+};
 
 export type FeedbackEntry = {
   id: string;
@@ -127,7 +137,7 @@ export type GradingFlag = {
 };
 
 export type SessionDetailResponse = {
-  sessionId: string;
+  workspaceId: string;
   messages: ModelMessage[];
   messageRefs?: MessageRef[];
   feedback?: FeedbackEntry[];
@@ -145,7 +155,7 @@ export type CalibrateResponse = {
 
 export type CalibrateStreamMessage = {
   type: "calibrateSession";
-  sessionId: string;
+  workspaceId: string;
   run: CalibrationRun;
   session: CalibrateSession;
 };
@@ -158,6 +168,8 @@ export type CalibrateRef = {
 export type TestBotRun = {
   id?: string;
   status: "idle" | "running" | "completed" | "error" | "canceled";
+  workspaceId?: string;
+  // Temporary alias while server payloads migrate fully to workspaceId.
   sessionId?: string;
   error?: string;
   initFill?: {
@@ -173,6 +185,7 @@ export type TestBotRun = {
     role: string;
     content: string;
     messageRefId?: string;
+    messageSource?: "scenario" | "manual";
     feedback?: FeedbackEntry;
     respondStatus?: number;
     respondCode?: string;
@@ -251,7 +264,10 @@ export type SimulatorMessage =
   | { type: "error"; message: string; runId?: string };
 
 export type ToolCallSummary = {
+  key: string;
   id: string;
+  actionCallId?: string;
+  runId?: string;
   name?: string;
   status: "pending" | "running" | "completed" | "error";
   args?: unknown;
@@ -262,18 +278,32 @@ export type ToolCallSummary = {
   depth?: number;
 };
 
+export type BuildDisplayMessage = {
+  kind: "message" | "tool" | "reasoning";
+  role?: "user" | "assistant";
+  content?: string;
+  toolCallId?: string;
+  toolSummary?: ToolCallSummary;
+  reasoningId?: string;
+  reasoningRaw?: Record<string, unknown>;
+};
+
 export const SCORE_VALUES = [-3, -2, -1, 0, 1, 2, 3];
 
-export const SESSIONS_BASE_PATH = "/sessions";
+export const WORKSPACES_BASE_PATH = WORKSPACE_ROUTE_BASE;
 export const DOCS_PATH = "/docs";
-export const DEFAULT_SESSION_PATH = `${SESSIONS_BASE_PATH}/new/debug`;
-export const DEFAULT_TEST_PATH = `${SESSIONS_BASE_PATH}/new/test`;
-export const DEFAULT_BUILD_PATH = `${SESSIONS_BASE_PATH}/new/build`;
+export const DEFAULT_WORKSPACE_DEBUG_PATH = buildWorkspacePath("debug");
+export const DEFAULT_TEST_PATH = buildWorkspacePath("test");
+export const DEFAULT_BUILD_PATH = buildWorkspacePath("build");
+export const DEFAULT_GRADE_PATH = buildWorkspacePath("grade");
 export const GRADE_PATH_SUFFIX = "/grade";
-export const buildGradePath = (sessionId: string) =>
-  `${SESSIONS_BASE_PATH}/${encodeURIComponent(sessionId)}${GRADE_PATH_SUFFIX}`;
+export const buildGradePath = (workspaceId: string, gradeRunId?: string) =>
+  buildWorkspacePath("grade", workspaceId, { runId: gradeRunId });
+export const buildTestPath = (workspaceId?: string | null, runId?: string) =>
+  buildWorkspacePath("test", workspaceId, { runId });
 export const DURABLE_STREAM_PREFIX = "/api/durable-streams/stream/";
 export const SIMULATOR_STREAM_ID = "gambit-simulator";
+export const WORKSPACE_STREAM_ID = "gambit-workspace";
 export const GRADE_STREAM_ID = "gambit-grade";
 export const TEST_STREAM_ID = "gambit-test";
 export const BUILD_STREAM_ID = "gambit-build";
@@ -311,6 +341,12 @@ export type BuildBotStreamEndEvent = {
   ts?: number;
 };
 
+export type BuildBotTraceEvent = {
+  type: "buildBotTrace";
+  runId?: string;
+  event: TraceEvent;
+};
+
 export type BuildBotStatusEvent = {
   type: "buildBotStatus";
   run?: TestBotRun;
@@ -319,8 +355,14 @@ export type BuildBotStatusEvent = {
 export type BuildBotSocketMessage =
   | BuildBotStreamEvent
   | BuildBotStreamEndEvent
+  | BuildBotTraceEvent
   | BuildBotStatusEvent;
 
+export type WorkspaceSocketMessage =
+  | BuildBotSocketMessage
+  | TestBotSocketMessage
+  | CalibrateStreamMessage;
+
 export const deckPath = (window as unknown as { __GAMBIT_DECK_PATH__?: string })
   .__GAMBIT_DECK_PATH__ ?? "Unknown deck";
 const deckLabelFromWindow = (
@@ -607,35 +649,29 @@ export function normalizeBasePath(basePath: string): string {
   return basePath.replace(/\/+$/, "");
 }
 
-export function getSessionIdFromPath(
+export function getWorkspaceIdFromPath(
   pathname?: string,
-  basePath = SESSIONS_BASE_PATH,
+  basePath = WORKSPACES_BASE_PATH,
 ): string | null {
   const target = typeof pathname === "string"
     ? pathname
     : window.location.pathname;
-  const normalizedTarget = target.replace(/\/+$/, "");
-  const canonical = normalizedTarget.match(
-    /^\/sessions\/([^/]+)(?:\/(debug|grade|test|build))?$/,
-  );
-  if (canonical) {
-    const id = canonical[1];
-    if (id && id !== "new") return decodeURIComponent(id);
-    return null;
-  }
+  const normalizedTarget = target.replace(/\/+$/, "") || "/";
+  const canonical = parseWorkspaceRoute(normalizedTarget);
+  if (canonical) return canonical.workspaceId;
   const bases = [basePath, "/debug", "/simulate", ""];
   for (const base of bases) {
     if (typeof base !== "string") continue;
     const normalized = normalizeBasePath(base);
-    const prefix = `${normalized}/sessions/`.replace(/^\/\//, "/");
-    if (normalized === "" && !normalizedTarget.startsWith("/sessions/")) {
+    const prefix = `${normalized}/workspaces/`.replace(/^\/\//, "/");
+    if (normalized === "" && !normalizedTarget.startsWith("/workspaces/")) {
       continue;
     }
     if (normalized !== "" && !normalizedTarget.startsWith(prefix)) {
       continue;
     }
     const remainder = normalized === ""
-      ? normalizedTarget.slice("/sessions/".length)
+      ? normalizedTarget.slice("/workspaces/".length)
       : normalizedTarget.slice(prefix.length);
     if (remainder.length > 0 && remainder !== "new") {
       return decodeURIComponent(remainder);
@@ -644,6 +680,14 @@ export function getSessionIdFromPath(
   return null;
 }
 
+export function getWorkspaceRouteFromPath(pathname?: string) {
+  const target = typeof pathname === "string"
+    ? pathname
+    : window.location.pathname;
+  const normalizedTarget = target.replace(/\/+$/, "") || "/";
+  return parseWorkspaceRoute(normalizedTarget);
+}
+
 export function cloneValue<T>(value: T): T {
   try {
     // @ts-ignore structuredClone is available in modern browsers
@@ -686,20 +730,16 @@ export function toRelativePath(
   return target;
 }
 
-export function getGradeSessionIdFromLocation(): string | null {
-  const pathMatch = window.location.pathname.match(
-    /^\/sessions\/([^/]+)\/grade/,
-  );
-  if (pathMatch) return decodeURIComponent(pathMatch[1]);
-  const params = new URLSearchParams(window.location.search);
-  const param = params.get("sessionId");
-  return param ? decodeURIComponent(param) : null;
+export function getGradeWorkspaceIdFromLocation(): string | null {
+  const route = getWorkspaceRouteFromPath(window.location.pathname);
+  if (!route || route.tab !== "grade") return null;
+  return route.workspaceId ?? null;
 }
 
-export function getGradeRefFromLocation(): string | null {
-  const params = new URLSearchParams(window.location.search);
-  const ref = params.get("ref");
-  return ref && ref.trim().length ? ref.trim() : null;
+export function getGradeRunIdFromLocation(): string | null {
+  const route = getWorkspaceRouteFromPath(window.location.pathname);
+  if (!route || route.tab !== "grade") return null;
+  return route.gradeRunId ?? null;
 }
 
 export function parseGradingRef(ref: string): {
@@ -873,10 +913,10 @@ export function renderMarkdown(text: string) {
     .replace(/&/g, "&amp;")
     .replace(/</g, "&lt;")
     .replace(/>/g, "&gt;");
-  return escaped.replace(/\*\*(.+?)\*\*/g, "<strong>$1</strong>").replace(
-    /\n/g,
-    "<br />",
-  );
+  return escaped
+    .replace(/`([^`]+?)`/g, "<code>$1</code>")
+    .replace(/\*\*(.+?)\*\*/g, "<strong>$1</strong>")
+    .replace(/\n/g, "<br />");
 }
 
 export function findHandledErrors(traces: TraceEvent[]): Map<string, string> {
@@ -909,8 +949,10 @@ export function findHandledErrors(traces: TraceEvent[]): Map<string, string> {
 
 export function summarizeToolCalls(traces: TraceEvent[]): ToolCallSummary[] {
   const order: ToolCallSummary[] = [];
-  const byId = new Map<string, ToolCallSummary>();
+  const byKey = new Map<string, ToolCallSummary>();
   const depthMap = new Map<string, number>();
+  const traceCallKey = (trace: TraceEvent, actionCallId: string) =>
+    `${typeof trace.runId === "string" ? trace.runId : ""}:${actionCallId}`;
   for (const trace of traces) {
     if (!trace || typeof trace !== "object") continue;
     const type = typeof trace.type === "string" ? trace.type : "";
@@ -933,14 +975,18 @@ export function summarizeToolCalls(traces: TraceEvent[]): ToolCallSummary[] {
       continue;
     }
     if (!type.startsWith("tool.") || !actionCallId) continue;
-    let summary = byId.get(actionCallId);
+    const key = traceCallKey(trace, actionCallId);
+    let summary = byKey.get(key);
     if (!summary) {
       summary = {
+        key,
         id: actionCallId,
+        actionCallId,
+        runId: typeof trace.runId === "string" ? trace.runId : undefined,
         name: typeof trace.name === "string" ? trace.name : undefined,
         status: "pending",
       };
-      byId.set(actionCallId, summary);
+      byKey.set(key, summary);
       order.push(summary);
     }
     if (typeof trace.name === "string") summary.name = trace.name;
@@ -979,6 +1025,492 @@ export function summarizeToolCalls(traces: TraceEvent[]): ToolCallSummary[] {
   return order;
 }
 
+export function deriveBuildDisplayMessages(
+  messages: Array<{ role: string; content: string }> = [],
+  traces?: TraceEvent[] | null,
+): BuildDisplayMessage[] {
+  const safeMessages = Array.isArray(messages) ? messages : [];
+  const safeTraces = Array.isArray(traces) ? traces : [];
+  if (safeTraces.length === 0) {
+    return safeMessages.map((msg, idx) => ({
+      kind: "message",
+      role: msg.role === "user" ? "user" : "assistant",
+      content: msg.content,
+      reasoningId: `fallback-${idx}`,
+    }));
+  }
+
+  const entries: BuildDisplayMessage[] = [];
+  const toolSummaries = new Map<string, ToolCallSummary>();
+  const toolEntryIds = new Set<string>();
+  const toolDepthMap = new Map<string, number>();
+  const reasoningIndexById = new Map<string, number>();
+  const assistantIndexById = new Map<string, number>();
+
+  const asRecord = (value: unknown): Record<string, unknown> | null => {
+    if (value && typeof value === "object" && !Array.isArray(value)) {
+      return value as Record<string, unknown>;
+    }
+    return null;
+  };
+  const asString = (value: unknown): string =>
+    typeof value === "string" ? value : "";
+  const scopedId = (scope: string, id: string): string =>
+    scope ? `${scope}:${id}` : id;
+  const stringifyContent = (value: unknown): string => {
+    if (typeof value === "string") return value;
+    try {
+      return JSON.stringify(value);
+    } catch {
+      return String(value);
+    }
+  };
+  const pushAssistantMessage = (content: string) => {
+    const normalized = content.trim();
+    if (!normalized) return;
+    for (let i = entries.length - 1; i >= 0; i -= 1) {
+      const entry = entries[i];
+      if (entry?.kind !== "message") continue;
+      if (entry.role !== "assistant") continue;
+      if ((entry.content ?? "").trim() === normalized) return;
+      break;
+    }
+    entries.push({
+      kind: "message",
+      role: "assistant",
+      content: normalized,
+    });
+  };
+  const extractReasoningText = (payload: Record<string, unknown>): string => {
+    const payloadType = asString(payload.type);
+    if (payloadType === "response.reasoning.delta") {
+      return asString(payload.delta);
+    }
+    if (payloadType === "response.reasoning.done") {
+      return asString(payload.text);
+    }
+    if (payloadType === "response.reasoning_summary_text.delta") {
+      return asString(payload.delta);
+    }
+    if (payloadType === "response.reasoning_summary_text.done") {
+      return asString(payload.text);
+    }
+    if (
+      payloadType === "response.reasoning_summary_part.added" ||
+      payloadType === "response.reasoning_summary_part.done"
+    ) {
+      const part = asRecord(payload.part);
+      return part ? asString(part.text) : "";
+    }
+    return "";
+  };
+  const ensureToolSummary = (input: {
+    actionCallId: string;
+    name?: string;
+    parentActionCallId?: string;
+  }): ToolCallSummary => {
+    const { actionCallId, name, parentActionCallId } = input;
+    let summary = toolSummaries.get(actionCallId);
+    if (!summary) {
+      summary = {
+        key: actionCallId,
+        id: actionCallId,
+        name,
+        status: "pending",
+        parentActionCallId,
+      };
+      toolSummaries.set(actionCallId, summary);
+    }
+    if (name && !summary.name) summary.name = name;
+    if (parentActionCallId) summary.parentActionCallId = parentActionCallId;
+    return summary;
+  };
+  const pushToolEntry = (summary: ToolCallSummary) => {
+    if (toolEntryIds.has(summary.id)) return;
+    toolEntryIds.add(summary.id);
+    entries.push({
+      kind: "tool",
+      toolCallId: summary.id,
+      toolSummary: summary,
+    });
+  };
+  const applyToolEvent = (input: {
+    event: Record<string, unknown>;
+    type: string;
+  }) => {
+    const actionCallId = asString(input.event.actionCallId);
+    if (!actionCallId) return;
+    const name = asString(input.event.name) || undefined;
+    const parentActionCallId = asString(input.event.parentActionCallId) ||
+      undefined;
+    const summary = ensureToolSummary({
+      actionCallId,
+      name,
+      parentActionCallId,
+    });
+    if (input.type === "tool.call") {
+      summary.args = "args" in input.event ? input.event.args : undefined;
+      summary.status = "running";
+      if (parentActionCallId) {
+        const parentDepth = toolDepthMap.has(parentActionCallId)
+          ? toolDepthMap.get(parentActionCallId)!
+          : -1;
+        summary.depth = summary.depth ?? parentDepth + 1;
+        toolDepthMap.set(actionCallId, summary.depth);
+      }
+      pushToolEntry(summary);
+      return;
+    }
+    if (input.type === "tool.result") {
+      summary.result = "result" in input.event ? input.event.result : null;
+      summary.status = "completed";
+      pushToolEntry(summary);
+      return;
+    }
+    if (input.type === "tool.error") {
+      summary.error = "error" in input.event ? input.event.error : null;
+      summary.status = "error";
+      pushToolEntry(summary);
+    }
+  };
+  const upsertReasoning = (input: {
+    reasoningId: string;
+    text: string;
+    raw: Record<string, unknown>;
+    mode: "append" | "replace";
+  }) => {
+    const normalizedText = input.text.trim();
+    if (!normalizedText) return;
+    const reasoningId = input.reasoningId || "reasoning";
+    const existingIndex = reasoningIndexById.get(reasoningId);
+    if (existingIndex === undefined) {
+      entries.push({
+        kind: "reasoning",
+        reasoningId,
+        content: normalizedText,
+        reasoningRaw: input.raw,
+      });
+      reasoningIndexById.set(reasoningId, entries.length - 1);
+      return;
+    }
+    const existing = entries[existingIndex];
+    if (!existing || existing.kind !== "reasoning") return;
+    const previousText = typeof existing.content === "string"
+      ? existing.content
+      : "";
+    let nextText = previousText;
+    if (input.mode === "append") {
+      if (!previousText) {
+        nextText = normalizedText;
+      } else if (!previousText.endsWith(normalizedText)) {
+        nextText = `${previousText}${normalizedText}`;
+      }
+    } else {
+      if (previousText === normalizedText) {
+        nextText = previousText;
+      } else if (!previousText) {
+        nextText = normalizedText;
+      } else if (normalizedText.startsWith(previousText)) {
+        nextText = normalizedText;
+      } else if (previousText.startsWith(normalizedText)) {
+        nextText = previousText;
+      } else if (!previousText.includes(normalizedText)) {
+        nextText = `${previousText}\n${normalizedText}`;
+      }
+    }
+    if (nextText === previousText) return;
+    entries[existingIndex] = {
+      ...existing,
+      content: nextText,
+      reasoningRaw: input.raw,
+    };
+  };
+  const upsertAssistantMessage = (
+    input: { messageId: string; text: string },
+  ) => {
+    const text = input.text.trim();
+    if (!text) return;
+    const messageId = input.messageId || `assistant-${entries.length}`;
+    for (let i = entries.length - 1; i >= 0; i -= 1) {
+      const entry = entries[i];
+      if (entry?.kind !== "message") continue;
+      if (entry.role !== "assistant") continue;
+      if ((entry.content ?? "").trim() === text) {
+        assistantIndexById.set(messageId, i);
+        return;
+      }
+      break;
+    }
+    const existingIndex = assistantIndexById.get(messageId);
+    if (existingIndex !== undefined) {
+      const existing = entries[existingIndex];
+      if (existing?.kind === "message" && existing.role === "assistant") {
+        entries[existingIndex] = {
+          ...existing,
+          content: text,
+        };
+      }
+      return;
+    }
+    entries.push({
+      kind: "message",
+      role: "assistant",
+      content: text,
+    });
+    assistantIndexById.set(messageId, entries.length - 1);
+  };
+  const extractAssistantTextFromItem = (
+    item: Record<string, unknown>,
+  ): string => {
+    const itemType = asString(item.type);
+    if (itemType === "agent_message") return asString(item.text);
+    if (itemType !== "message") return "";
+    if (asString(item.role) !== "assistant") return "";
+    const content = item.content;
+    if (!Array.isArray(content)) return "";
+    const textParts = content.map((part) => {
+      const partRecord = asRecord(part);
+      if (!partRecord) return "";
+      return typeof partRecord.text === "string" ? partRecord.text : "";
+    }).filter((part) => part.length > 0);
+    return textParts.join("");
+  };
+
+  for (const trace of safeTraces) {
+    if (!trace || typeof trace !== "object") continue;
+    const record = trace as Record<string, unknown>;
+    const type = asString(record.type);
+    if (type === "message.user") {
+      const message = (record as { message?: unknown }).message;
+      const msgRecord = asRecord(message);
+      entries.push({
+        kind: "message",
+        role: "user",
+        content: stringifyContent(msgRecord?.content),
+      });
+      continue;
+    }
+    if (type === "model.result") {
+      const message = (record as { message?: unknown }).message;
+      const msgRecord = asRecord(message);
+      pushAssistantMessage(stringifyContent(msgRecord?.content));
+      continue;
+    }
+    if (
+      type === "tool.call" || type === "tool.result" || type === "tool.error"
+    ) {
+      applyToolEvent({ event: record, type });
+      continue;
+    }
+    if (type !== "model.stream.event") continue;
+    const event = (record as { event?: unknown }).event;
+    const payloadRecord = asRecord(event);
+    const payload = payloadRecord?.type === "codex.event"
+      ? asRecord(payloadRecord.payload)
+      : payloadRecord;
+    if (!payload) continue;
+    const payloadType = asString(payload.type);
+    if (
+      payloadType === "tool.call" || payloadType === "tool.result" ||
+      payloadType === "tool.error"
+    ) {
+      applyToolEvent({ event: payload, type: payloadType });
+      continue;
+    }
+    if (payloadType.startsWith("response.reasoning")) {
+      const actionScope = asString(record.actionCallId) ||
+        asString(record.runId);
+      const baseReasoningId = asString(payload.item_id) || payloadType;
+      upsertReasoning({
+        reasoningId: scopedId(actionScope, baseReasoningId),
+        text: extractReasoningText(payload),
+        raw: payload,
+        mode: payloadType.endsWith(".delta") ? "append" : "replace",
+      });
+      continue;
+    }
+    const item = asRecord(payload.item);
+    if (!item) continue;
+    const itemType = asString(item.type);
+    if (
+      itemType === "agent_message" ||
+      (itemType === "message" && payloadType === "response.output_item.done")
+    ) {
+      const text = extractAssistantTextFromItem(item);
+      if (!text) continue;
+      const outputIndex = typeof payload.output_index === "number"
+        ? String(payload.output_index)
+        : "";
+      const actionScope = asString(record.actionCallId) ||
+        asString(record.runId);
+      const baseMessageId = asString(item.id) || asString(payload.item_id) ||
+        (outputIndex ? `output-${outputIndex}` : "");
+      upsertAssistantMessage({
+        messageId: scopedId(actionScope, baseMessageId),
+        text,
+      });
+      continue;
+    }
+    if (itemType !== "reasoning") continue;
+    let text = "";
+    const summary = item.summary;
+    if (Array.isArray(summary)) {
+      text = summary.map((part) => {
+        const partRecord = asRecord(part);
+        return partRecord && typeof partRecord.text === "string"
+          ? partRecord.text
+          : "";
+      }).join("");
+    } else if (typeof item.text === "string") {
+      text = item.text;
+    }
+    const actionScope = asString(record.actionCallId) || asString(record.runId);
+    const baseReasoningId = asString(item.id) || "reasoning";
+    upsertReasoning({
+      reasoningId: scopedId(actionScope, baseReasoningId),
+      text,
+      raw: item,
+      mode: "replace",
+    });
+  }
+
+  return entries;
+}
+
+export function deriveReasoningByAssistant(
+  traces?: TraceEvent[] | null,
+): Map<number, ReasoningDetail[]> {
+  const buckets = new Map<number, ReasoningDetail[]>();
+  if (!Array.isArray(traces) || traces.length === 0) return buckets;
+  const pending: ReasoningDetail[] = [];
+  const pendingById = new Map<string, ReasoningDetail>();
+  let assistantIndex = -1;
+
+  const asRecord = (value: unknown): Record<string, unknown> | null => {
+    if (value && typeof value === "object" && !Array.isArray(value)) {
+      return value as Record<string, unknown>;
+    }
+    return null;
+  };
+  const asString = (value: unknown): string =>
+    typeof value === "string" ? value : "";
+  const extractPayload = (value: unknown): Record<string, unknown> | null => {
+    const record = asRecord(value);
+    if (!record) return null;
+    if (record.type === "codex.event") {
+      return asRecord(record.payload);
+    }
+    return record;
+  };
+  const appendDetail = (input: {
+    id?: string;
+    text: string;
+    event: Record<string, unknown>;
+    trace: TraceEvent;
+  }) => {
+    const chunk = input.text.trim();
+    if (!chunk) return;
+    const actionCallId = asString(
+      (input.trace as { actionCallId?: unknown }).actionCallId,
+    ) || undefined;
+    const model = asString((input.trace as { model?: unknown }).model) ||
+      undefined;
+    const key = input.id && input.id.trim().length > 0
+      ? input.id
+      : `${input.trace.runId ?? ""}:${actionCallId ?? ""}:${pending.length}`;
+    let detail = pendingById.get(key);
+    if (!detail) {
+      detail = {
+        text: chunk,
+        event: input.event,
+        model,
+        actionCallId,
+      };
+      pendingById.set(key, detail);
+      pending.push(detail);
+      return;
+    }
+    if (!detail.text.endsWith(chunk)) {
+      detail.text += chunk;
+    }
+    detail.event = input.event;
+    if (!detail.model && model) detail.model = model;
+    if (!detail.actionCallId && actionCallId) {
+      detail.actionCallId = actionCallId;
+    }
+  };
+
+  for (const trace of traces) {
+    if (!trace || typeof trace !== "object") continue;
+    if (trace.type === "model.result") {
+      const message = (trace as { message?: unknown }).message as
+        | ModelMessage
+        | undefined;
+      if (message?.role === "assistant") {
+        assistantIndex += 1;
+        if (pending.length > 0) {
+          buckets.set(assistantIndex, [...pending]);
+          pending.length = 0;
+          pendingById.clear();
+        }
+      }
+      continue;
+    }
+    if (trace.type !== "model.stream.event") continue;
+    const payload = extractPayload((trace as { event?: unknown }).event);
+    if (!payload) continue;
+    const payloadType = asString(payload.type);
+    if (payloadType.startsWith("response.reasoning")) {
+      let text = "";
+      if (payloadType === "response.reasoning.delta") {
+        text = asString(payload.delta);
+      } else if (payloadType === "response.reasoning.done") {
+        text = asString(payload.text);
+      } else if (payloadType === "response.reasoning_summary_text.delta") {
+        text = asString(payload.delta);
+      } else if (payloadType === "response.reasoning_summary_text.done") {
+        text = asString(payload.text);
+      } else if (
+        payloadType === "response.reasoning_summary_part.added" ||
+        payloadType === "response.reasoning_summary_part.done"
+      ) {
+        const part = asRecord(payload.part);
+        text = part ? asString(part.text) : "";
+      }
+      appendDetail({
+        id: asString(payload.item_id) || undefined,
+        text,
+        event: payload,
+        trace,
+      });
+      continue;
+    }
+    const item = asRecord(payload.item);
+    if (!item) continue;
+    if (asString(item.type) !== "reasoning") continue;
+    const summary = item.summary;
+    let text = "";
+    if (Array.isArray(summary)) {
+      text = summary.map((part) => {
+        const partRecord = asRecord(part);
+        return partRecord ? asString(partRecord.text) : "";
+      }).join("");
+    } else if (typeof summary === "string") {
+      text = summary;
+    } else if (typeof item.text === "string") {
+      text = item.text;
+    }
+    appendDetail({
+      id: asString(item.id) || undefined,
+      text,
+      event: item,
+      trace,
+    });
+  }
+
+  return buckets;
+}
+
 export type RespondInfo = {
   status?: number;
   code?: string;
@@ -987,6 +1519,13 @@ export type RespondInfo = {
   payload?: unknown;
 };
 
+export type ReasoningDetail = {
+  text: string;
+  event: unknown;
+  model?: string;
+  actionCallId?: string;
+};
+
 const RESPOND_TOOL_NAME = "gambit_respond";
 
 const stringifyMessageContent = (value: unknown): string => {
@@ -1089,7 +1628,10 @@ export function buildConversationEntries(
       });
       continue;
     }
-    if (msg.role !== "assistant" && msg.role !== "user") continue;
+    if (
+      msg.role !== "assistant" && msg.role !== "user" &&
+      msg.role !== "system"
+    ) continue;
     const content = stringifyMessageContent(msg.content).trim();
     if (!content) continue;
     entries.push({
@@ -1124,6 +1666,12 @@ export function normalizeAppPath(input: string): string {
     }
     return DEFAULT_TEST_PATH;
   }
+  if (trimmed === "/grade") {
+    if (window.location.pathname !== DEFAULT_GRADE_PATH) {
+      window.history.replaceState({}, "", DEFAULT_GRADE_PATH);
+    }
+    return DEFAULT_GRADE_PATH;
+  }
   if (trimmed === "/build") {
     if (window.location.pathname !== DEFAULT_BUILD_PATH) {
       window.history.replaceState({}, "", DEFAULT_BUILD_PATH);
@@ -1132,38 +1680,40 @@ export function normalizeAppPath(input: string): string {
   }
   if (
     trimmed === "/debug" || trimmed === "/simulate" ||
-    trimmed === SESSIONS_BASE_PATH
+    trimmed === WORKSPACES_BASE_PATH
   ) {
-    if (window.location.pathname !== DEFAULT_SESSION_PATH) {
-      window.history.replaceState({}, "", DEFAULT_SESSION_PATH);
+    if (window.location.pathname !== DEFAULT_WORKSPACE_DEBUG_PATH) {
+      window.history.replaceState({}, "", DEFAULT_WORKSPACE_DEBUG_PATH);
     }
-    return DEFAULT_SESSION_PATH;
+    return DEFAULT_WORKSPACE_DEBUG_PATH;
   }
-  if (/^\/sessions\/[^/]+\/(debug|test|grade|build)$/.test(trimmed)) {
-    return trimmed;
-  }
-  if (/^\/sessions\/[^/]+\/grade/.test(trimmed)) {
+  if (
+    /^\/workspaces\/[^/]+\/(debug|build)$/.test(trimmed) ||
+    /^\/workspaces\/[^/]+\/(test|grade)(?:\/[^/]+)?$/.test(trimmed)
+  ) {
     return trimmed;
   }
-  if (trimmed.startsWith("/debug/sessions/")) {
-    const raw = trimmed.slice("/debug/sessions/".length);
+  if (trimmed.startsWith("/debug/workspaces/")) {
+    const raw = trimmed.slice("/debug/workspaces/".length);
     const decoded = decodeURIComponent(raw);
-    const next = `${SESSIONS_BASE_PATH}/${encodeURIComponent(decoded)}/debug`;
+    const next = `${WORKSPACES_BASE_PATH}/${encodeURIComponent(decoded)}/debug`;
     window.history.replaceState({}, "", next);
     return next;
   }
   if (
-    trimmed.startsWith("/sessions/") && !trimmed.includes("/debug") &&
+    trimmed.startsWith("/workspaces/") && !trimmed.includes("/debug") &&
     !trimmed.includes("/test") && !trimmed.includes("/grade") &&
-    !trimmed.includes("/build") && trimmed !== DEFAULT_SESSION_PATH
+    !trimmed.includes("/build") && trimmed !== DEFAULT_WORKSPACE_DEBUG_PATH
   ) {
-    const remainder = trimmed.slice("/sessions/".length);
+    const remainder = trimmed.slice("/workspaces/".length);
     if (remainder && remainder !== "new") {
       const decoded = decodeURIComponent(remainder);
-      const next = `${SESSIONS_BASE_PATH}/${encodeURIComponent(decoded)}/debug`;
+      const next = `${WORKSPACES_BASE_PATH}/${
+        encodeURIComponent(decoded)
+      }/debug`;
       window.history.replaceState({}, "", next);
       return next;
     }
   }
-  return trimmed || DEFAULT_SESSION_PATH;
+  return trimmed || DEFAULT_WORKSPACE_DEBUG_PATH;
 }
diff --git a/src/cli.codex.live.test.ts b/src/cli.codex.live.test.ts
new file mode 100644
index 000000000..b3fcb07dd
--- /dev/null
+++ b/src/cli.codex.live.test.ts
@@ -0,0 +1,158 @@
+import { assert, assertEquals } from "@std/assert";
+import * as path from "@std/path";
+import {
+  getEnvValue,
+  shouldRunLiveTests,
+} from "./providers/live_test_utils.ts";
+
+function cliPath(): string {
+  const here = path.dirname(path.fromFileUrl(import.meta.url));
+  return path.join(here, "cli.ts");
+}
+
+async function writeDeck(
+  dir: string,
+  model: string,
+): Promise<string> {
+  const deckPath = path.join(dir, "live-codex.deck.md");
+  const contents = `+++
+label = "live codex cli test"
+
+[modelParams]
+model = "${model}"
+verbosity = "low"
+
+[modelParams.reasoning]
+effort = "minimal"
+summary = "concise"
++++
+
+Reply with one short word.
+`;
+  await Deno.writeTextFile(deckPath, contents);
+  return deckPath;
+}
+
+function shouldRunLiveCodexTests(): boolean {
+  return shouldRunLiveTests() &&
+    Deno.env.get("GAMBIT_RUN_LIVE_CODEX_TESTS") === "1";
+}
+
+function buildLiveCodexEnv(): { model: string; env: Record<string, string> } {
+  const model = getEnvValue("GAMBIT_LIVE_CODEX_MODEL") ?? "codex-cli/default";
+  const codexBin = getEnvValue("GAMBIT_LIVE_CODEX_BIN") ??
+    getEnvValue("GAMBIT_CODEX_BIN");
+  const env: Record<string, string> = {
+    GAMBIT_CODEX_DISABLE_MCP: "1",
+  };
+  if (codexBin) {
+    env.GAMBIT_CODEX_BIN = codexBin;
+  }
+  return { model, env };
+}
+
+async function runLiveCliViaDeno(input: {
+  deckPath: string;
+  env: Record<string, string>;
+}): Promise<{ code: number; stdout: string; stderr: string }> {
+  const command = new Deno.Command(Deno.execPath(), {
+    args: [
+      "run",
+      "-A",
+      cliPath(),
+      "run",
+      input.deckPath,
+      "--message",
+      "Say pong.",
+    ],
+    env: input.env,
+    stdout: "piped",
+    stderr: "piped",
+  });
+  const out = await command.output();
+  return {
+    code: out.code,
+    stdout: new TextDecoder().decode(out.stdout).trim(),
+    stderr: new TextDecoder().decode(out.stderr).trim(),
+  };
+}
+
+async function compileCliBinary(outPath: string): Promise<void> {
+  const compile = new Deno.Command(Deno.execPath(), {
+    args: ["compile", "-A", "-o", outPath, cliPath()],
+    stdout: "piped",
+    stderr: "piped",
+  });
+  const out = await compile.output();
+  const stderr = new TextDecoder().decode(out.stderr).trim();
+  const stdout = new TextDecoder().decode(out.stdout).trim();
+  assertEquals(
+    out.code,
+    0,
+    `failed to compile gambit CLI binary (exit ${out.code}): ${
+      stderr || stdout
+    }`,
+  );
+}
+
+async function runLiveCliViaCompiledBinary(input: {
+  binaryPath: string;
+  deckPath: string;
+  env: Record<string, string>;
+}): Promise<{ code: number; stdout: string; stderr: string }> {
+  const command = new Deno.Command(input.binaryPath, {
+    args: ["run", input.deckPath, "--message", "Say pong."],
+    env: input.env,
+    stdout: "piped",
+    stderr: "piped",
+  });
+  const out = await command.output();
+  return {
+    code: out.code,
+    stdout: new TextDecoder().decode(out.stdout).trim(),
+    stderr: new TextDecoder().decode(out.stderr).trim(),
+  };
+}
+
+Deno.test({
+  name: "cli live: run uses real codex binary",
+  ignore: !shouldRunLiveCodexTests(),
+  permissions: { read: true, write: true, run: true, env: true },
+  sanitizeOps: false,
+  sanitizeResources: false,
+}, async () => {
+  const { model, env } = buildLiveCodexEnv();
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeDeck(dir, model);
+  const out = await runLiveCliViaDeno({ deckPath, env });
+  assertEquals(
+    out.code,
+    0,
+    `live codex CLI run failed (exit ${out.code}): ${out.stderr || out.stdout}`,
+  );
+  assert(out.stdout.length > 0, "expected non-empty assistant output");
+});
+
+Deno.test({
+  name: "cli live: compiled binary run uses real codex binary",
+  ignore: !shouldRunLiveCodexTests() ||
+    Deno.env.get("GAMBIT_RUN_LIVE_CODEX_COMPILED_TESTS") !== "1",
+  permissions: { read: true, write: true, run: true, env: true },
+  sanitizeOps: false,
+  sanitizeResources: false,
+}, async () => {
+  const { model, env } = buildLiveCodexEnv();
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeDeck(dir, model);
+  const binaryPath = path.join(dir, "gambit-live");
+  await compileCliBinary(binaryPath);
+  const out = await runLiveCliViaCompiledBinary({ binaryPath, deckPath, env });
+  assertEquals(
+    out.code,
+    0,
+    `live compiled gambit run failed (exit ${out.code}): ${
+      out.stderr || out.stdout
+    }`,
+  );
+  assert(out.stdout.length > 0, "expected non-empty assistant output");
+});
diff --git a/src/cli.codex_smoke.test.ts b/src/cli.codex_smoke.test.ts
new file mode 100644
index 000000000..1ef1ab3a4
--- /dev/null
+++ b/src/cli.codex_smoke.test.ts
@@ -0,0 +1,188 @@
+import { assertEquals } from "@std/assert";
+import * as path from "@std/path";
+
+function cliPath(): string {
+  const here = path.dirname(path.fromFileUrl(import.meta.url));
+  return path.join(here, "cli.ts");
+}
+
+async function writeDeck(
+  dir: string,
+  model: string,
+  verbosity?: "low" | "medium" | "high",
+): Promise<string> {
+  const deckPath = path.join(dir, "root.deck.md");
+  const verbosityLine = verbosity ? `verbosity = "${verbosity}"\n` : "";
+  const contents = `+++
+label = "codex smoke"
+
+[modelParams]
+model = "${model}"
+${verbosityLine}+++
+
+Smoke deck.
+`;
+  await Deno.writeTextFile(deckPath, contents);
+  return deckPath;
+}
+
+async function writeMockCodexBin(dir: string): Promise<{
+  binPath: string;
+  argsLogPath: string;
+}> {
+  const binPath = path.join(dir, "mock-codex.sh");
+  const argsLogPath = path.join(dir, "codex-args.log");
+  const script = `#!/usr/bin/env bash
+set -euo pipefail
+if [ -z "\${CODEX_ARGS_LOG:-}" ]; then
+  echo "missing CODEX_ARGS_LOG" >&2
+  exit 1
+fi
+printf '%s\n' "$@" > "$CODEX_ARGS_LOG"
+echo '{"type":"thread.started","thread_id":"thread-smoke"}'
+echo '{"type":"item.completed","item":{"type":"agent_message","text":"ok"}}'
+echo '{"type":"turn.completed","usage":{"input_tokens":1,"output_tokens":1,"total_tokens":2}}'
+`;
+  await Deno.writeTextFile(binPath, script);
+  await Deno.chmod(binPath, 0o755);
+  return { binPath, argsLogPath };
+}
+
+async function runCheck(deckPath: string): Promise<{
+  code: number;
+  stdout: string;
+  stderr: string;
+}> {
+  const command = new Deno.Command(Deno.execPath(), {
+    args: ["run", "-A", cliPath(), "check", deckPath],
+    stdout: "piped",
+    stderr: "piped",
+  });
+  const out = await command.output();
+  return {
+    code: out.code,
+    stdout: new TextDecoder().decode(out.stdout),
+    stderr: new TextDecoder().decode(out.stderr),
+  };
+}
+
+async function writeProjectConfig(
+  dir: string,
+  fallback: string,
+): Promise<string> {
+  const configPath = path.join(dir, "gambit.toml");
+  const contents = `[providers]
+fallback = "${fallback}"
+`;
+  await Deno.writeTextFile(configPath, contents);
+  return configPath;
+}
+
+async function runDeck(input: {
+  deckPath: string;
+  codexBinPath: string;
+  argsLogPath: string;
+  cwd?: string;
+}): Promise<{
+  code: number;
+  stdout: string;
+  stderr: string;
+  argsLog: string;
+}> {
+  const command = new Deno.Command(Deno.execPath(), {
+    args: ["run", "-A", cliPath(), "run", input.deckPath, "--message", "hi"],
+    cwd: input.cwd,
+    env: {
+      GAMBIT_CODEX_BIN: input.codexBinPath,
+      GAMBIT_CODEX_DISABLE_MCP: "1",
+      CODEX_ARGS_LOG: input.argsLogPath,
+    },
+    stdout: "piped",
+    stderr: "piped",
+  });
+  const out = await command.output();
+  let argsLog = "";
+  try {
+    argsLog = await Deno.readTextFile(input.argsLogPath);
+  } catch {
+    // no-op for failure assertions
+  }
+  return {
+    code: out.code,
+    stdout: new TextDecoder().decode(out.stdout),
+    stderr: new TextDecoder().decode(out.stderr),
+    argsLog,
+  };
+}
+
+Deno.test({
+  name: "cli smoke: check passes with codex-cli/default",
+  permissions: { read: true, write: true, run: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeDeck(dir, "codex-cli/default");
+  const result = await runCheck(deckPath);
+  assertEquals(result.code, 0);
+});
+
+Deno.test({
+  name: "cli smoke: check fails for legacy codex/default",
+  permissions: { read: true, write: true, run: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeDeck(dir, "codex/default");
+  const result = await runCheck(deckPath);
+  assertEquals(result.code, 1);
+  const combined = `${result.stdout}\n${result.stderr}`;
+  assertEquals(combined.includes("legacy codex prefix is unsupported"), true);
+});
+
+Deno.test({
+  name: 'cli smoke: check fails fast when providers.fallback is legacy "codex"',
+  permissions: { read: true, write: true, run: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  await writeProjectConfig(dir, "codex");
+  const deckPath = await writeDeck(dir, "llama3");
+  const result = await runCheck(deckPath);
+  assertEquals(result.code, 1);
+  const combined = `${result.stdout}\n${result.stderr}`;
+  assertEquals(
+    combined.includes('providers.fallback "codex" is no longer supported'),
+    true,
+  );
+});
+
+Deno.test({
+  name: "cli smoke: run maps model selection and verbosity for codex-cli",
+  permissions: { read: true, write: true, run: true, env: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const mock = await writeMockCodexBin(dir);
+
+  const defaultDeck = await writeDeck(dir, "codex-cli/default", "high");
+  const defaultRun = await runDeck({
+    deckPath: defaultDeck,
+    codexBinPath: mock.binPath,
+    argsLogPath: mock.argsLogPath,
+    cwd: dir,
+  });
+  assertEquals(defaultRun.code, 0);
+  assertEquals(defaultRun.argsLog.includes("\n-m\n"), false);
+  assertEquals(defaultRun.argsLog.includes('model_verbosity="high"'), true);
+
+  const passthroughDeck = await writeDeck(
+    dir,
+    "codex-cli/gpt-5.2-codex",
+    "high",
+  );
+  const passthroughRun = await runDeck({
+    deckPath: passthroughDeck,
+    codexBinPath: mock.binPath,
+    argsLogPath: mock.argsLogPath,
+    cwd: dir,
+  });
+  assertEquals(passthroughRun.code, 0);
+  assertEquals(passthroughRun.argsLog.includes("\n-m\ngpt-5.2-codex\n"), true);
+  assertEquals(passthroughRun.argsLog.includes('model_verbosity="high"'), true);
+});
diff --git a/src/cli.ts b/src/cli.ts
index 1fa8ef782..b4c282190 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -9,16 +9,7 @@ import * as path from "@std/path";
 import { load as loadDotenv } from "@std/dotenv";
 import { makeConsoleTracer, makeJsonlTracer } from "./trace.ts";
 import { startTui } from "./tui.ts";
-import {
-  createOllamaProvider,
-  ensureOllamaModel,
-  fetchOllamaTags,
-  OLLAMA_PREFIX,
-} from "./providers/ollama.ts";
-import { createOpenRouterProvider } from "./providers/openrouter.ts";
-import { createGoogleProvider } from "./providers/google.ts";
-import { createProviderRouter } from "./providers/router.ts";
-import { createProviderMatchers } from "./model_matchers.ts";
+import type { PermissionDeclarationInput } from "@bolt-foundry/gambit-core";
 import { handleCheckCommand } from "./commands/check.ts";
 import { handleRunCommand } from "./commands/run.ts";
 import { handleServeCommand } from "./commands/serve.ts";
@@ -37,43 +28,40 @@ import {
   printUsage,
 } from "./cli_args.ts";
 import {
-  createModelAliasResolver,
   loadProjectConfig,
+  resolveWorkerSandboxSetting,
   resolveWorkspacePermissions,
 } from "./project_config.ts";
 import { resolveProjectRoot } from "./cli_utils.ts";
+import { createDefaultedRuntime } from "./default_runtime.ts";
 
 const logger = console;
-const DEFAULT_OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1";
 const BOT_ROOT_ENV = "GAMBIT_BOT_ROOT";
 
-type ModelCandidate = {
-  model: string;
-  params?: Record<string, unknown>;
-  alias?: string;
-};
-
-type ProviderAvailability = {
-  available: boolean;
-  reason?: string;
-};
-
-type ProviderCapability = {
-  name: string;
-  matches: (model: string) => boolean;
-  isAvailable: (model: string, opts: { allowPull: boolean }) => Promise<
-    ProviderAvailability
-  >;
-};
-
-function mergeParams(
-  aliasParams?: Record<string, unknown>,
-  baseParams?: Record<string, unknown>,
-): Record<string, unknown> | undefined {
-  if (aliasParams && baseParams) {
-    return { ...aliasParams, ...baseParams };
+function resolveSessionPermissionsFromArgs(args: {
+  allowAll?: boolean;
+  allowRead?: true | Array<string>;
+  allowWrite?: true | Array<string>;
+  allowRun?: true | Array<string>;
+  allowNet?: true | Array<string>;
+  allowEnv?: true | Array<string>;
+}): PermissionDeclarationInput | undefined {
+  if (args.allowAll) {
+    return {
+      read: true,
+      write: true,
+      run: true,
+      net: true,
+      env: true,
+    };
   }
-  return baseParams ?? aliasParams;
+  const out: PermissionDeclarationInput = {};
+  if (args.allowRead !== undefined) out.read = args.allowRead;
+  if (args.allowWrite !== undefined) out.write = args.allowWrite;
+  if (args.allowRun !== undefined) out.run = args.allowRun;
+  if (args.allowNet !== undefined) out.net = args.allowNet;
+  if (args.allowEnv !== undefined) out.env = args.allowEnv;
+  return Object.keys(out).length > 0 ? out : undefined;
 }
 
 async function readVersionFromConfig(
@@ -173,6 +161,13 @@ async function main() {
   try {
     await loadGambitEnv();
     const args = parseCliArgs(Deno.args);
+    if (args.verbose) {
+      try {
+        Deno.env.set("GAMBIT_VERBOSE", "1");
+      } catch {
+        // ignore env set failures
+      }
+    }
     if (args.version) {
       logger.log(await resolveCliVersion());
       return;
@@ -272,103 +267,30 @@ async function main() {
       );
       Deno.exit(1);
     }
-    const modelAliasResolver = createModelAliasResolver(
+    const workspacePermissions = resolveWorkspacePermissions(
       projectConfig?.config,
     );
-    const workspacePermissions = resolveWorkspacePermissions(
+    const sessionPermissions = resolveSessionPermissionsFromArgs(args);
+    const sessionPermissionsBaseDir = Deno.cwd();
+    const workerSandboxFromConfig = resolveWorkerSandboxSetting(
       projectConfig?.config,
     );
-    const warnedMissingAliases = new Set<string>();
-    const expandModelCandidates = (
-      model: string | Array<string> | undefined,
-      params?: Record<string, unknown>,
-    ): {
-      candidates: Array<ModelCandidate>;
-      allowPull: boolean;
-    } => {
-      if (!model) return { candidates: [], allowPull: false };
-      const baseParams = params;
-      const entries = Array.isArray(model) ? model : [model];
-      const allowPull = !Array.isArray(model);
-      const candidates: Array<ModelCandidate> = [];
-      for (const entry of entries) {
-        if (typeof entry !== "string" || !entry.trim()) continue;
-        const resolution = modelAliasResolver(entry);
-        if (
-          resolution.missingAlias &&
-          !warnedMissingAliases.has(entry)
-        ) {
-          logger.warn(
-            `[gambit] Model alias "${entry}" is not defined in gambit.toml; using literal value.`,
-          );
-          warnedMissingAliases.add(entry);
-        }
-        if (resolution.applied) {
-          const resolvedModel = resolution.model;
-          const mergedParams = mergeParams(resolution.params, baseParams);
-          if (Array.isArray(resolvedModel)) {
-            for (const candidate of resolvedModel) {
-              if (!candidate.trim()) continue;
-              candidates.push({
-                model: candidate,
-                params: mergedParams,
-                alias: resolution.alias,
-              });
-            }
-          } else if (resolvedModel) {
-            candidates.push({
-              model: resolvedModel,
-              params: mergedParams,
-              alias: resolution.alias,
-            });
-          }
-        } else {
-          candidates.push({
-            model: entry,
-            params: baseParams,
-          });
-        }
-      }
-      const allowPullForSingle = allowPull && candidates.length <= 1;
-      return { candidates, allowPull: allowPullForSingle };
-    };
+    const workerSandbox = args.workerSandbox ?? workerSandboxFromConfig ?? true;
+    const runtime = await createDefaultedRuntime({
+      configHint,
+      projectConfig,
+      responsesMode: args.responses ? true : undefined,
+      logger,
+    });
+    const modelAliasResolver = runtime.modelAliasResolver;
+    const fallbackProvider = runtime.configuredFallbackProvider;
+    const provider = runtime.modelProvider;
+    const responsesMode = runtime.responsesMode;
 
     const openRouterApiKey = Deno.env.get("OPENROUTER_API_KEY")?.trim();
     const googleApiKey = (Deno.env.get("GOOGLE_API_KEY") ??
       Deno.env.get("GEMINI_API_KEY"))?.trim();
-    const openRouterBaseURL = Deno.env.get("OPENROUTER_BASE_URL") ??
-      DEFAULT_OPENROUTER_BASE_URL;
     const ollamaBaseURL = Deno.env.get("OLLAMA_BASE_URL") ?? undefined;
-    const googleBaseURL = Deno.env.get("GOOGLE_BASE_URL") ??
-      Deno.env.get("GEMINI_BASE_URL") ??
-      undefined;
-
-    const fallbackProviderRaw = projectConfig?.config?.providers?.fallback;
-    let fallbackProvider:
-      | "openrouter"
-      | "ollama"
-      | "google"
-      | null
-      | undefined = undefined;
-    if (typeof fallbackProviderRaw === "string") {
-      const normalized = fallbackProviderRaw.trim().toLowerCase();
-      if (normalized === "none") {
-        fallbackProvider = null;
-      } else if (
-        normalized === "openrouter" || normalized === "ollama" ||
-        normalized === "google"
-      ) {
-        fallbackProvider = normalized as "openrouter" | "ollama" | "google";
-      } else if (normalized.length > 0) {
-        logger.warn(
-          `[gambit] Unknown providers.fallback "${fallbackProviderRaw}" in gambit.toml; using default fallback.`,
-        );
-      }
-    }
-
-    const effectiveFallbackProvider = fallbackProvider === undefined
-      ? "openrouter"
-      : fallbackProvider;
 
     if (args.cmd === "grade") {
       const graderPath = args.graderPath ?? deckPath;
@@ -426,232 +348,6 @@ async function main() {
       return;
     }
 
-    const chatFallback = Deno.env.get("GAMBIT_CHAT_FALLBACK") === "1";
-    const responsesMode = args.responses ||
-      (!chatFallback && Deno.env.get("GAMBIT_RESPONSES_MODE") !== "0");
-    const openRouterProvider = openRouterApiKey
-      ? createOpenRouterProvider({
-        apiKey: openRouterApiKey,
-        baseURL: openRouterBaseURL ?? undefined,
-        enableResponses: (args.responses || !chatFallback) &&
-          Deno.env.get("GAMBIT_OPENROUTER_RESPONSES") !== "0",
-      })
-      : null;
-    const ollamaProvider = createOllamaProvider({
-      apiKey: Deno.env.get("OLLAMA_API_KEY")?.trim() || undefined,
-      baseURL: ollamaBaseURL,
-    });
-    const googleProvider = googleApiKey
-      ? createGoogleProvider({
-        apiKey: googleApiKey,
-        baseURL: googleBaseURL,
-      })
-      : null;
-
-    const providerRouter = createProviderRouter({
-      providers: {
-        openrouter: openRouterProvider,
-        ollama: ollamaProvider,
-        google: googleProvider,
-      },
-      defaultProvider: fallbackProvider,
-      fallbackToDefaultOnMissing: ["google"],
-    });
-
-    const ollamaPrefix = OLLAMA_PREFIX;
-    const providerMatchers = createProviderMatchers(effectiveFallbackProvider);
-    const ollamaTagsCache: { promise: Promise<Set<string>> | null } = {
-      promise: null,
-    };
-    const getOllamaTags = async (): Promise<Set<string>> => {
-      if (!ollamaTagsCache.promise) {
-        ollamaTagsCache.promise = fetchOllamaTags(ollamaBaseURL);
-      }
-      return await ollamaTagsCache.promise;
-    };
-    const providerCapabilities: Array<ProviderCapability> = [
-      {
-        name: "ollama",
-        matches: providerMatchers.matchesOllama,
-        isAvailable: async (model, opts) => {
-          const trimmed = model.slice(ollamaPrefix.length);
-          if (!trimmed) {
-            return { available: false, reason: "missing Ollama model name" };
-          }
-          if (opts.allowPull) {
-            try {
-              await ensureOllamaModel(trimmed, ollamaBaseURL);
-              return { available: true };
-            } catch (err) {
-              return {
-                available: false,
-                reason: err instanceof Error ? err.message : String(err),
-              };
-            }
-          }
-          try {
-            const tags = await getOllamaTags();
-            if (tags.has(trimmed)) {
-              return { available: true };
-            }
-            return {
-              available: false,
-              reason: `Ollama model "${trimmed}" not installed`,
-            };
-          } catch (err) {
-            return {
-              available: false,
-              reason: err instanceof Error ? err.message : String(err),
-            };
-          }
-        },
-      },
-      {
-        name: "google",
-        matches: providerMatchers.matchesGoogle,
-        isAvailable: (_model, _opts) =>
-          Promise.resolve(
-            googleApiKey
-              ? { available: true }
-              : (effectiveFallbackProvider === "openrouter" && openRouterApiKey)
-              ? { available: true }
-              : {
-                available: false,
-                reason: "GOOGLE_API_KEY or GEMINI_API_KEY is not set",
-              },
-          ),
-      },
-      {
-        name: "openrouter",
-        matches: providerMatchers.matchesOpenRouter,
-        isAvailable: (_model, _opts) =>
-          Promise.resolve(
-            openRouterApiKey ? { available: true } : {
-              available: false,
-              reason: "OPENROUTER_API_KEY is not set",
-            },
-          ),
-      },
-    ];
-    const resolveModelSelection = async (
-      model: string | Array<string>,
-      params?: Record<string, unknown>,
-      deckPath?: string,
-    ): Promise<{ model: string; params?: Record<string, unknown> }> => {
-      const { candidates, allowPull } = expandModelCandidates(model, params);
-      if (candidates.length === 0) {
-        throw new Error(
-          deckPath
-            ? `No model configured for deck ${deckPath}`
-            : "No model configured.",
-        );
-      }
-      const failures: Array<string> = [];
-      for (const candidate of candidates) {
-        const provider = providerCapabilities.find((cap) =>
-          cap.matches(candidate.model)
-        );
-        const availability = provider
-          ? await provider.isAvailable(candidate.model, { allowPull })
-          : {
-            available: false,
-            reason: "no provider registered for model",
-          };
-        if (availability.available) {
-          return {
-            model: candidate.model,
-            params: candidate.params,
-          };
-        }
-        const label = provider ? provider.name : "unknown";
-        const reason = availability.reason ? `: ${availability.reason}` : "";
-        failures.push(`${candidate.model} (${label}${reason})`);
-      }
-      const suffix = failures.length ? ` Tried: ${failures.join(", ")}.` : "";
-      throw new Error(
-        deckPath
-          ? `No available model found for deck ${deckPath}.${suffix}`
-          : `No available model found.${suffix}`,
-      );
-    };
-    const shouldResolveModel = (
-      model: string | Array<string>,
-    ): boolean => {
-      if (Array.isArray(model)) return true;
-      const resolution = modelAliasResolver(model);
-      return Boolean(resolution.applied || resolution.missingAlias);
-    };
-    const provider: import("@bolt-foundry/gambit-core").ModelProvider = {
-      resolveModel: async (input) =>
-        await resolveModelSelection(
-          input.model,
-          input.params,
-          input.deckPath,
-        ),
-      responses: async (input: {
-        request: import("@bolt-foundry/gambit-core").CreateResponseRequest;
-        state?: import("@bolt-foundry/gambit-core").SavedState;
-        onStreamEvent?: (
-          event: import("@bolt-foundry/gambit-core").ResponseEvent,
-        ) => void;
-      }) => {
-        const applied = shouldResolveModel(input.request.model)
-          ? await resolveModelSelection(
-            input.request.model,
-            input.request.params,
-          )
-          : { model: input.request.model, params: input.request.params };
-        const request = {
-          ...input.request,
-          model: applied.model ?? input.request.model,
-          params: applied.params,
-        };
-        if (!request.model) {
-          throw new Error("Model is required.");
-        }
-        const selection = providerRouter.resolve({ model: request.model });
-        const responses = selection.provider.responses;
-        if (!responses) {
-          throw new Error(
-            `${selection.providerKey} provider does not support responses.`,
-          );
-        }
-        return await responses({
-          ...input,
-          request: {
-            ...request,
-            model: selection.model,
-          },
-        });
-      },
-      chat: async (input: {
-        model: string;
-        messages: Array<import("@bolt-foundry/gambit-core").ModelMessage>;
-        tools?: Array<import("@bolt-foundry/gambit-core").ToolDefinition>;
-        stream?: boolean;
-        state?: import("@bolt-foundry/gambit-core").SavedState;
-        onStreamText?: (chunk: string) => void;
-        params?: Record<string, unknown>;
-      }) => {
-        const applied = shouldResolveModel(input.model)
-          ? await resolveModelSelection(input.model, input.params)
-          : { model: input.model, params: input.params };
-        const request = {
-          ...input,
-          model: applied.model ?? input.model,
-          params: applied.params,
-        };
-        if (!request.model) {
-          throw new Error("Model is required.");
-        }
-        const selection = providerRouter.resolve({ model: request.model });
-        return await selection.provider.chat({
-          ...request,
-          model: selection.model,
-        });
-      },
-    };
-
     const tracerFns: Array<
       (
         event: import("@bolt-foundry/gambit-core").TraceEvent,
@@ -698,6 +394,11 @@ async function main() {
           const suffix = action ? ` (${action})` : "";
           return `file: ${pathValue}${suffix}`;
         },
+        workspacePermissions,
+        workspacePermissionsBaseDir: projectConfig?.root,
+        sessionPermissions,
+        sessionPermissionsBaseDir,
+        workerSandbox,
       });
       return;
     }
@@ -716,6 +417,11 @@ async function main() {
           : undefined,
         contextProvided: args.contextProvided,
         initialMessage: parseMessage(args.message),
+        workspacePermissions,
+        workspacePermissionsBaseDir: projectConfig?.root,
+        sessionPermissions,
+        sessionPermissionsBaseDir,
+        workerSandbox,
       });
       return;
     }
@@ -735,17 +441,18 @@ async function main() {
         sourcemap: args.sourcemap,
         platform: args.platform,
         responsesMode,
+        workerSandbox,
       });
       return;
     }
 
-    if (args.cmd === "test-bot") {
+    if (args.cmd === "scenario") {
       if (!deckPath) {
-        logger.error("test-bot requires a root deck path.");
+        logger.error("scenario requires a root deck path.");
         Deno.exit(1);
       }
       if (!args.testDeckPath) {
-        logger.error("test-bot requires --test-deck <persona deck path>.");
+        logger.error("scenario requires --test-deck <persona deck path>.");
         Deno.exit(1);
       }
       const maxTurns = args.maxTurns ?? 12;
@@ -764,8 +471,13 @@ async function main() {
         verbose: args.verbose,
         statePath: args.statePath,
         responsesMode,
+        workspacePermissions,
+        workspacePermissionsBaseDir: projectConfig?.root,
+        sessionPermissions,
+        sessionPermissionsBaseDir,
+        workerSandbox,
       });
-      logger.log(`Test bot session saved to ${statePath}`);
+      logger.log(`Scenario session saved to ${statePath}`);
       if (args.gradePaths && args.gradePaths.length > 0) {
         for (const graderPath of args.gradePaths) {
           await runGraderAgainstState({
@@ -776,6 +488,7 @@ async function main() {
             modelProvider: provider,
             trace: tracer,
             responsesMode,
+            workerSandbox,
           });
         }
       }
@@ -800,6 +513,7 @@ async function main() {
         modelProvider: provider,
         trace: tracer,
         responsesMode,
+        workerSandbox,
       });
       return;
     }
@@ -818,6 +532,9 @@ async function main() {
       responsesMode,
       workspacePermissions,
       workspacePermissionsBaseDir: projectConfig?.root,
+      sessionPermissions,
+      sessionPermissionsBaseDir,
+      workerSandbox,
     });
   } catch (err) {
     logger.error(err instanceof Error ? err.message : String(err));
diff --git a/src/cli_args.test.ts b/src/cli_args.test.ts
new file mode 100644
index 000000000..e548eab42
--- /dev/null
+++ b/src/cli_args.test.ts
@@ -0,0 +1,91 @@
+import { assertEquals, assertThrows } from "@std/assert";
+import { isKnownCommand, parseCliArgs } from "./cli_args.ts";
+
+Deno.test("bare allow-read flag does not consume positional deck path", () => {
+  const args = parseCliArgs(["repl", "--allow-read", "deck.ts"]);
+  assertEquals(args.cmd, "repl");
+  assertEquals(args.deckPath, "deck.ts");
+  assertEquals(args.allowRead, true);
+});
+
+Deno.test("allow-read with explicit equals value keeps positional deck path", () => {
+  const args = parseCliArgs([
+    "run",
+    "--allow-read=./data,./fixtures",
+    "deck.ts",
+  ]);
+  assertEquals(args.cmd, "run");
+  assertEquals(args.deckPath, "deck.ts");
+  assertEquals(args.allowRead, ["./data", "./fixtures"]);
+});
+
+Deno.test("bare allow-run flag does not consume positional deck path", () => {
+  const args = parseCliArgs(["run", "--allow-run", "deck.ts"]);
+  assertEquals(args.cmd, "run");
+  assertEquals(args.deckPath, "deck.ts");
+  assertEquals(args.allowRun, true);
+});
+
+Deno.test(
+  "permission-like option values are not treated as permission overrides",
+  () => {
+    const args = parseCliArgs([
+      "run",
+      "deck.ts",
+      "--message",
+      "--allow-all",
+      "--context",
+      "--no-sandbox",
+    ]);
+    assertEquals(args.cmd, "run");
+    assertEquals(args.deckPath, "deck.ts");
+    assertEquals(args.message, "--allow-all");
+    assertEquals(args.context, "--no-sandbox");
+    assertEquals(args.allowAll, undefined);
+    assertEquals(args.workerSandbox, undefined);
+  },
+);
+
+Deno.test("parseCliArgs parses canonical worker flags", () => {
+  const enabled = parseCliArgs(["run", "root.deck.md", "--worker-sandbox"]);
+  assertEquals(enabled.workerSandbox, true);
+  assertEquals(enabled.legacyExec, undefined);
+
+  const disabled = parseCliArgs([
+    "run",
+    "root.deck.md",
+    "--no-worker-sandbox",
+  ]);
+  assertEquals(disabled.workerSandbox, false);
+  assertEquals(disabled.legacyExec, undefined);
+});
+
+Deno.test("parseCliArgs parses legacy exec rollback flag", () => {
+  const args = parseCliArgs(["run", "root.deck.md", "--legacy-exec"]);
+  assertEquals(args.workerSandbox, false);
+  assertEquals(args.legacyExec, true);
+});
+
+Deno.test("parseCliArgs supports sandbox aliases", () => {
+  const args = parseCliArgs(["run", "root.deck.md", "--sandbox"]);
+  assertEquals(args.workerSandbox, true);
+});
+
+Deno.test("parseCliArgs rejects conflicting worker flags", () => {
+  assertThrows(
+    () =>
+      parseCliArgs([
+        "run",
+        "root.deck.md",
+        "--worker-sandbox",
+        "--legacy-exec",
+      ]),
+    Error,
+    "Conflicting worker execution flags",
+  );
+});
+
+Deno.test("CLI command registry exposes scenario and not test-bot", () => {
+  assertEquals(isKnownCommand("scenario"), true);
+  assertEquals(isKnownCommand("test-bot"), false);
+});
diff --git a/src/cli_args.ts b/src/cli_args.ts
index 8e9217190..beb40cd81 100644
--- a/src/cli_args.ts
+++ b/src/cli_args.ts
@@ -5,6 +5,7 @@ import { normalizeFlagList, parsePortValue } from "./cli_utils.ts";
 
 const logger = console;
 let initFlagWarningShown = false;
+let workerSandboxAliasWarningShown = false;
 
 const COMMANDS = [
   "bot",
@@ -14,7 +15,7 @@ const COMMANDS = [
   "run",
   "repl",
   "serve",
-  "test-bot",
+  "scenario",
   "grade",
   "export",
 ] as const;
@@ -33,7 +34,7 @@ const HELP_COMMANDS = [
   "run",
   "repl",
   "serve",
-  "test-bot",
+  "scenario",
   "grade",
 ] as const;
 
@@ -70,10 +71,233 @@ type Args = {
   bundle?: boolean;
   sourcemap?: boolean;
   platform?: string;
+  allowAll?: boolean;
+  allowRead?: true | Array<string>;
+  allowWrite?: true | Array<string>;
+  allowRun?: true | Array<string>;
+  allowNet?: true | Array<string>;
+  allowEnv?: true | Array<string>;
+  workerSandbox?: boolean;
+  legacyExec?: boolean;
   help?: boolean;
   version?: boolean;
 };
 
+type PermissionFlagValue = {
+  provided: boolean;
+  all: boolean;
+  values: Array<string>;
+};
+
+type ParsedPermissionOverrides = {
+  argv: Array<string>;
+  allowAll: boolean;
+  allowRead: PermissionFlagValue;
+  allowWrite: PermissionFlagValue;
+  allowRun: PermissionFlagValue;
+  allowNet: PermissionFlagValue;
+  allowEnv: PermissionFlagValue;
+  workerSandbox?: boolean;
+  workerSandboxSource?: string;
+  sandboxAliasUsed: boolean;
+  legacyExec: boolean;
+};
+
+const STRING_OPTION_FLAGS = [
+  "deck",
+  "init",
+  "context",
+  "message",
+  "test-deck",
+  "grade",
+  "grader",
+  "bot-input",
+  "bot-root",
+  "max-turns",
+  "model",
+  "model-force",
+  "platform",
+  "trace",
+  "state",
+  "out",
+  "port",
+] as const;
+const OPTION_VALUE_FLAGS = new Set(
+  STRING_OPTION_FLAGS.map((flag) => `--${flag}`),
+);
+
+function parseCsvList(input: string): Array<string> {
+  return input
+    .split(",")
+    .map((entry) => entry.trim())
+    .filter(Boolean);
+}
+
+function createPermissionFlagValue(): PermissionFlagValue {
+  return { provided: false, all: false, values: [] };
+}
+
+function mergePermissionValue(
+  target: PermissionFlagValue,
+  value: string | undefined,
+) {
+  target.provided = true;
+  if (value === undefined) {
+    target.all = true;
+    target.values = [];
+    return;
+  }
+  const parsed = parseCsvList(value);
+  if (parsed.length === 0) {
+    target.all = true;
+    target.values = [];
+    return;
+  }
+  if (!target.all) {
+    target.values.push(...parsed);
+  }
+}
+
+function extractPermissionOverrides(
+  argv: Array<string>,
+): ParsedPermissionOverrides {
+  const out: ParsedPermissionOverrides = {
+    argv: [],
+    allowAll: false,
+    allowRead: createPermissionFlagValue(),
+    allowWrite: createPermissionFlagValue(),
+    allowRun: createPermissionFlagValue(),
+    allowNet: createPermissionFlagValue(),
+    allowEnv: createPermissionFlagValue(),
+    workerSandbox: undefined,
+    workerSandboxSource: undefined,
+    sandboxAliasUsed: false,
+    legacyExec: false,
+  };
+
+  const assignWorkerSandbox = (value: boolean, source: string) => {
+    if (
+      out.workerSandbox !== undefined &&
+      out.workerSandbox !== value
+    ) {
+      throw new Error(
+        `Conflicting worker execution flags: ${out.workerSandboxSource} and ${source}.`,
+      );
+    }
+    out.workerSandbox = value;
+    if (!out.workerSandboxSource) {
+      out.workerSandboxSource = source;
+    }
+  };
+
+  const flagMap = new Map<string, PermissionFlagValue>([
+    ["--allow-read", out.allowRead],
+    ["--allow-write", out.allowWrite],
+    ["--allow-run", out.allowRun],
+    ["--allow-net", out.allowNet],
+    ["--allow-env", out.allowEnv],
+  ]);
+  const isPermissionOverrideToken = (value: string): boolean =>
+    value === "-A" ||
+    value === "--allow-all" ||
+    value === "--sandbox" ||
+    value === "--no-sandbox" ||
+    value.startsWith("--allow-");
+  let consumeNextAsOptionValue = false;
+
+  for (let i = 0; i < argv.length; i++) {
+    const token = argv[i];
+    if (consumeNextAsOptionValue) {
+      out.argv.push(token);
+      consumeNextAsOptionValue = false;
+      continue;
+    }
+
+    if (token === "--") {
+      out.argv.push(...argv.slice(i));
+      break;
+    }
+
+    if (token.startsWith("--")) {
+      const equalsIndex = token.indexOf("=");
+      const flagName = equalsIndex === -1 ? token : token.slice(0, equalsIndex);
+      if (OPTION_VALUE_FLAGS.has(flagName)) {
+        if (equalsIndex === -1 && i + 1 < argv.length) {
+          const nextToken = argv[i + 1];
+          if (isPermissionOverrideToken(nextToken)) {
+            out.argv.push(`${token}=${nextToken}`);
+            i++;
+            continue;
+          }
+          out.argv.push(token);
+          consumeNextAsOptionValue = true;
+          continue;
+        }
+        out.argv.push(token);
+        continue;
+      }
+    }
+
+    if (token === "-A" || token === "--allow-all") {
+      out.allowAll = true;
+      continue;
+    }
+    if (token === "--worker-sandbox") {
+      assignWorkerSandbox(true, "--worker-sandbox");
+      continue;
+    }
+    if (token === "--no-worker-sandbox") {
+      assignWorkerSandbox(false, "--no-worker-sandbox");
+      continue;
+    }
+    if (token === "--legacy-exec") {
+      out.legacyExec = true;
+      assignWorkerSandbox(false, "--legacy-exec");
+      continue;
+    }
+    if (token === "--sandbox") {
+      out.sandboxAliasUsed = true;
+      assignWorkerSandbox(true, "--sandbox");
+      continue;
+    }
+    if (token === "--no-sandbox") {
+      out.sandboxAliasUsed = true;
+      assignWorkerSandbox(false, "--no-sandbox");
+      continue;
+    }
+
+    if (token.startsWith("--allow-")) {
+      let matched = false;
+      for (const [flag, target] of flagMap.entries()) {
+        if (token === flag) {
+          mergePermissionValue(target, undefined);
+          matched = true;
+          break;
+        }
+        if (token.startsWith(`${flag}=`)) {
+          mergePermissionValue(target, token.slice(flag.length + 1));
+          matched = true;
+          break;
+        }
+      }
+      if (matched) continue;
+    }
+
+    out.argv.push(token);
+  }
+
+  return out;
+}
+
+function finalizePermissionFlag(
+  value: PermissionFlagValue,
+): true | Array<string> | undefined {
+  if (!value.provided) return undefined;
+  if (value.all) return true;
+  const normalized = Array.from(new Set(value.values));
+  return normalized.length > 0 ? normalized : true;
+}
+
 type CommandDoc = {
   command: Command;
   summary: string;
@@ -185,7 +409,8 @@ function formatCommandDoc(doc: CommandDoc, includeDetails: boolean): string {
 }
 
 export function parseCliArgs(argv: Array<string>): Args {
-  const parsed = parseArgs(argv, {
+  const permissions = extractPermissionOverrides(argv);
+  const parsed = parseArgs(permissions.argv, {
     boolean: [
       "stream",
       "responses",
@@ -198,25 +423,7 @@ export function parseCliArgs(argv: Array<string>): Args {
       "no-bundle",
       "sourcemap",
     ],
-    string: [
-      "deck",
-      "init",
-      "context",
-      "message",
-      "test-deck",
-      "grade",
-      "grader",
-      "bot-input",
-      "bot-root",
-      "max-turns",
-      "model",
-      "model-force",
-      "platform",
-      "trace",
-      "state",
-      "out",
-      "port",
-    ],
+    string: [...STRING_OPTION_FLAGS],
     alias: {
       help: "h",
       version: "V",
@@ -240,23 +447,45 @@ export function parseCliArgs(argv: Array<string>): Args {
     initFlagWarningShown = true;
     logger.warn('[gambit] "--init" is deprecated; use "--context" instead.');
   }
+  if (permissions.sandboxAliasUsed && !workerSandboxAliasWarningShown) {
+    workerSandboxAliasWarningShown = true;
+    logger.warn(
+      '[gambit] "--sandbox/--no-sandbox" are deprecated; use "--worker-sandbox/--no-worker-sandbox" (or "--legacy-exec") instead.',
+    );
+  }
   const contextValue = contextArg ?? legacyInit;
   const contextProvided = contextArg !== undefined || legacyInit !== undefined;
 
   const [cmdRaw, deckPathRaw] = parsed._;
-  const hasBundleFlag = argv.includes("--bundle");
-  const hasNoBundleFlag = argv.includes("--no-bundle");
+  const hasBundleFlag = permissions.argv.includes("--bundle");
+  const hasNoBundleFlag = permissions.argv.includes("--no-bundle");
   if (hasBundleFlag && hasNoBundleFlag) {
     throw new Error("Use either --bundle or --no-bundle, not both.");
   }
-  const hasSourceMapFlag = argv.includes("--sourcemap");
-  const hasNoSourceMapFlag = argv.includes("--no-sourcemap");
+  const hasSourceMapFlag = permissions.argv.includes("--sourcemap");
+  const hasNoSourceMapFlag = permissions.argv.includes("--no-sourcemap");
   if (hasSourceMapFlag && hasNoSourceMapFlag) {
     throw new Error("Use either --sourcemap or --no-sourcemap, not both.");
   }
   const cmd = cmdRaw as Args["cmd"];
   const deckPath = deckPathRaw as string | undefined;
 
+  const allowRead = permissions.allowAll ? true : finalizePermissionFlag(
+    permissions.allowRead,
+  );
+  const allowWrite = permissions.allowAll ? true : finalizePermissionFlag(
+    permissions.allowWrite,
+  );
+  const allowRun = permissions.allowAll ? true : finalizePermissionFlag(
+    permissions.allowRun,
+  );
+  const allowNet = permissions.allowAll ? true : finalizePermissionFlag(
+    permissions.allowNet,
+  );
+  const allowEnv = permissions.allowAll ? true : finalizePermissionFlag(
+    permissions.allowEnv,
+  );
+
   return {
     cmd,
     deckPath,
@@ -286,6 +515,14 @@ export function parseCliArgs(argv: Array<string>): Args {
     bundle: hasNoBundleFlag ? false : hasBundleFlag ? true : undefined,
     sourcemap: hasNoSourceMapFlag ? false : hasSourceMapFlag ? true : undefined,
     platform: parsed.platform as string | undefined,
+    allowAll: permissions.allowAll ? true : undefined,
+    allowRead,
+    allowWrite,
+    allowRun,
+    allowNet,
+    allowEnv,
+    workerSandbox: permissions.workerSandbox,
+    legacyExec: permissions.legacyExec ? true : undefined,
     help: Boolean(parsed.help),
     version: Boolean(parsed.version),
   };
diff --git a/src/cli_help.test.ts b/src/cli_help.test.ts
index e5fd14b8e..351a858e6 100644
--- a/src/cli_help.test.ts
+++ b/src/cli_help.test.ts
@@ -29,6 +29,7 @@ Deno.test({
   assert(output.includes("gambit help <command>"));
   assert(output.includes("bot       Run the Gambit bot assistant"));
   assert(output.includes("run       Run a deck once"));
+  assert(output.includes("scenario  Run a scenario loop with a persona deck"));
   assert(!output.includes("export    Export a bundle from state"));
 });
 
diff --git a/src/cli_utils.ts b/src/cli_utils.ts
index 1c45c3add..70e6d7d99 100644
--- a/src/cli_utils.ts
+++ b/src/cli_utils.ts
@@ -65,7 +65,7 @@ export function defaultSessionRoot(deckPath: string): string {
   const deckDir = path.dirname(resolvedDeckPath);
   const projectRoot = findProjectRoot(deckDir);
   const baseDir = projectRoot ?? deckDir;
-  return path.join(baseDir, ".gambit", "sessions");
+  return path.join(baseDir, ".gambit", "workspaces");
 }
 
 export function defaultTestBotStatePath(deckPath: string): string {
diff --git a/src/commands/check.test.ts b/src/commands/check.test.ts
index 6af50cf91..c51b49970 100644
--- a/src/commands/check.test.ts
+++ b/src/commands/check.test.ts
@@ -91,3 +91,47 @@ Deno.test({
     "no fallback provider configured",
   );
 });
+
+Deno.test({
+  name: "check accepts codex-cli-prefixed models without remote checks",
+  permissions: { read: true, write: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeDeck(dir, "root.deck.md", "codex-cli/default");
+
+  await handleCheckCommand({
+    deckPath,
+    checkOnline: false,
+  });
+});
+
+Deno.test({
+  name: "check accepts bare codex-cli as default alias",
+  permissions: { read: true, write: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeDeck(dir, "root.deck.md", "codex-cli");
+
+  await handleCheckCommand({
+    deckPath,
+    checkOnline: false,
+  });
+});
+
+Deno.test({
+  name: "check rejects legacy codex-prefixed models",
+  permissions: { read: true, write: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeDeck(dir, "root.deck.md", "codex/default");
+
+  await assertRejects(
+    () =>
+      handleCheckCommand({
+        deckPath,
+        checkOnline: false,
+      }),
+    Error,
+    "legacy codex prefix is unsupported",
+  );
+});
diff --git a/src/commands/check.ts b/src/commands/check.ts
index ce2fa5d39..5e6c32675 100644
--- a/src/commands/check.ts
+++ b/src/commands/check.ts
@@ -1,6 +1,7 @@
 import * as path from "@std/path";
 import { loadDeck } from "@bolt-foundry/gambit-core";
 import type { ModelAliasResolver } from "../project_config.ts";
+import { CODEX_PREFIX } from "../providers/codex.ts";
 import { GOOGLE_PREFIX } from "../providers/google.ts";
 import { fetchOllamaTags, OLLAMA_PREFIX } from "../providers/ollama.ts";
 import { OPENROUTER_PREFIX } from "../providers/openrouter.ts";
@@ -193,7 +194,14 @@ export async function handleCheckCommand(opts: {
   const parseProvider = (model: string): {
     providerKey?: ProviderKey;
     strippedModel: string;
+    legacyCodex?: boolean;
   } => {
+    if (model.trim() === "codex-cli") {
+      return {
+        providerKey: "codex-cli",
+        strippedModel: "default",
+      };
+    }
     if (model.startsWith(OPENROUTER_PREFIX)) {
       return {
         providerKey: "openrouter",
@@ -212,6 +220,15 @@ export async function handleCheckCommand(opts: {
         strippedModel: model.slice(GOOGLE_PREFIX.length),
       };
     }
+    if (model.startsWith(CODEX_PREFIX)) {
+      return {
+        providerKey: "codex-cli",
+        strippedModel: model.slice(CODEX_PREFIX.length),
+      };
+    }
+    if (model === "codex" || model.startsWith("codex/")) {
+      return { strippedModel: model, legacyCodex: true };
+    }
     return { strippedModel: model };
   };
 
@@ -220,6 +237,12 @@ export async function handleCheckCommand(opts: {
     skipped?: boolean;
   }> => {
     const parsed = parseProvider(candidate);
+    if (parsed.legacyCodex) {
+      failures.push(
+        `${candidate} (legacy codex prefix is unsupported; use codex-cli/default or codex-cli/<model>)`,
+      );
+      return { available: false };
+    }
     const prefixed = Boolean(parsed.providerKey);
     let providerKey = parsed.providerKey;
     let resolvedModel = parsed.strippedModel;
@@ -290,6 +313,14 @@ export async function handleCheckCommand(opts: {
       return { available: false };
     }
 
+    if (providerKey === "codex-cli") {
+      if (!resolvedModel.trim()) {
+        failures.push(`${candidate} (codex-cli: missing model name)`);
+        return { available: false };
+      }
+      return { available: true };
+    }
+
     failures.push(`${candidate} (unknown provider)`);
     return { available: false };
   };
diff --git a/src/commands/export.ts b/src/commands/export.ts
index b6021e57c..3d79a010b 100644
--- a/src/commands/export.ts
+++ b/src/commands/export.ts
@@ -42,6 +42,7 @@ const TRACE_EVENT_TYPES = new Set<string>([
   "tool.result",
   "model.call",
   "model.result",
+  "model.stream.event",
   "log",
   "monolog",
 ]);
diff --git a/src/commands/grade.ts b/src/commands/grade.ts
index 5f40ac6fe..727ca4436 100644
--- a/src/commands/grade.ts
+++ b/src/commands/grade.ts
@@ -36,6 +36,7 @@ const TRACE_EVENT_TYPES = new Set<string>([
   "tool.result",
   "model.call",
   "model.result",
+  "model.stream.event",
   "log",
   "monolog",
 ]);
@@ -158,6 +159,7 @@ export async function runGraderAgainstState(opts: {
     event: import("@bolt-foundry/gambit-core").TraceEvent,
   ) => void;
   responsesMode?: boolean;
+  workerSandbox?: boolean;
 }) {
   const state = loadState(opts.statePath);
   if (!state) {
@@ -215,6 +217,7 @@ export async function runGraderAgainstState(opts: {
           defaultModel: opts.model,
           trace: opts.trace,
           responsesMode: opts.responsesMode,
+          workerSandbox: opts.workerSandbox,
         });
       }
 
@@ -232,6 +235,7 @@ export async function runGraderAgainstState(opts: {
           defaultModel: opts.model,
           trace: opts.trace,
           responsesMode: opts.responsesMode,
+          workerSandbox: opts.workerSandbox,
         });
         turns.push({
           index: idx,
diff --git a/src/commands/run.ts b/src/commands/run.ts
index 98d1322e4..520e186b3 100644
--- a/src/commands/run.ts
+++ b/src/commands/run.ts
@@ -22,6 +22,9 @@ export async function handleRunCommand(opts: {
   responsesMode?: boolean;
   workspacePermissions?: PermissionDeclarationInput;
   workspacePermissionsBaseDir?: string;
+  sessionPermissions?: PermissionDeclarationInput;
+  sessionPermissionsBaseDir?: string;
+  workerSandbox?: boolean;
 }) {
   const state = opts.statePath ? loadState(opts.statePath) : undefined;
   const onStateUpdate = opts.statePath
@@ -46,6 +49,9 @@ export async function handleRunCommand(opts: {
     responsesMode: opts.responsesMode,
     workspacePermissions: opts.workspacePermissions,
     workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
+    sessionPermissions: opts.sessionPermissions,
+    sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+    workerSandbox: opts.workerSandbox,
   });
 
   if (isGambitEndSignal(result)) {
diff --git a/src/commands/serve.ts b/src/commands/serve.ts
index 2c98b6ae7..5b7ba89c1 100644
--- a/src/commands/serve.ts
+++ b/src/commands/serve.ts
@@ -1,11 +1,43 @@
 import * as path from "@std/path";
-import { existsSync } from "@std/fs";
+import { copy, ensureDir, existsSync } from "@std/fs";
 import { startWebSocketSimulator } from "../server.ts";
 import type { ModelProvider } from "@bolt-foundry/gambit-core";
 import { parsePortValue, resolveProjectRoot } from "../cli_utils.ts";
-import { createWorkspaceScaffold } from "../workspace.ts";
 
 const logger = console;
+const GAMBIT_BOT_SOURCE_DECK_URL = new URL(
+  "../decks/gambit-bot/PROMPT.md",
+  import.meta.url,
+);
+const GAMBIT_BOT_SOURCE_DIR = GAMBIT_BOT_SOURCE_DECK_URL.protocol === "file:"
+  ? path.dirname(path.fromFileUrl(GAMBIT_BOT_SOURCE_DECK_URL))
+  : "";
+const SIMPLE_PROMPT_TEMPLATE = `+++
+label = "Local Prompt"
+description = "Minimal starter deck created by gambit serve."
+
+[modelParams]
+model = ["codex-cli/default"]
++++
+
+You are a helpful assistant.
+
+Keep responses concise and directly answer the user.`;
+
+async function ensureGambitBotPolicyMirror(baseRoot: string) {
+  if (!GAMBIT_BOT_SOURCE_DIR) return;
+  const policySource = path.join(GAMBIT_BOT_SOURCE_DIR, "policy");
+  const policyDest = path.join(baseRoot, ".gambit", "policy");
+  try {
+    const info = await Deno.stat(policySource);
+    if (!info.isDirectory) return;
+  } catch {
+    return;
+  }
+  if (existsSync(policyDest)) return;
+  await ensureDir(path.dirname(policyDest));
+  await copy(policySource, policyDest, { overwrite: false });
+}
 
 export async function handleServeCommand(opts: {
   deckPath?: string;
@@ -21,39 +53,20 @@ export async function handleServeCommand(opts: {
   sourcemap?: boolean;
   platform?: string;
   responsesMode?: boolean;
+  workerSandbox?: boolean;
 }) {
   const cwd = Deno.cwd();
   const baseRoot = opts.deckPath ? resolveProjectRoot(cwd) ?? cwd : cwd;
-  const workspaceBaseDir = path.join(baseRoot, ".gambit", "workspaces");
-  const sessionsDir = path.join(baseRoot, ".gambit", "sessions");
+  await ensureGambitBotPolicyMirror(baseRoot);
   let resolvedDeckPath = opts.deckPath?.trim();
-  let workspaceConfig:
-    | {
-      id: string;
-      rootDeckPath: string;
-      rootDir: string;
-      onboarding?: boolean;
-      scaffoldEnabled?: boolean;
-      scaffoldRoot?: string;
-    }
-    | undefined;
   if (!resolvedDeckPath) {
     const localPrompt = path.join(cwd, "PROMPT.md");
     if (existsSync(localPrompt)) {
       resolvedDeckPath = localPrompt;
     } else {
-      const workspace = await createWorkspaceScaffold({
-        baseDir: workspaceBaseDir,
-      });
-      resolvedDeckPath = workspace.rootDeckPath;
-      workspaceConfig = {
-        id: workspace.id,
-        rootDeckPath: workspace.rootDeckPath,
-        rootDir: workspace.rootDir,
-        onboarding: true,
-        scaffoldEnabled: true,
-        scaffoldRoot: workspaceBaseDir,
-      };
+      await Deno.writeTextFile(localPrompt, SIMPLE_PROMPT_TEMPLATE);
+      logger.log(`[serve] created ${localPrompt}`);
+      resolvedDeckPath = localPrompt;
     }
   }
   const envMode = (Deno.env.get("GAMBIT_ENV") ?? Deno.env.get("NODE_ENV") ?? "")
@@ -88,13 +101,12 @@ export async function handleServeCommand(opts: {
       contextProvided: opts.contextProvided,
       port,
       verbose: opts.verbose,
-      sessionDir: workspaceConfig ? sessionsDir : undefined,
-      workspace: workspaceConfig,
       autoBundle,
       forceBundle,
       sourceMap,
       bundlePlatform,
       responsesMode: opts.responsesMode,
+      workerSandbox: opts.workerSandbox,
     });
 
   if (!opts.watch) {
diff --git a/src/commands/test_bot.test.ts b/src/commands/test_bot.test.ts
new file mode 100644
index 000000000..d71838a10
--- /dev/null
+++ b/src/commands/test_bot.test.ts
@@ -0,0 +1,123 @@
+import { assert, assertEquals } from "@std/assert";
+import * as path from "@std/path";
+import { loadState } from "@bolt-foundry/gambit-core";
+import type { ModelProvider } from "@bolt-foundry/gambit-core";
+import { runTestBotLoop } from "./test_bot.ts";
+import { modImportPath } from "../server_test_utils.ts";
+
+Deno.test("scenario loop stamps scenario metadata and user message sources", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const rootDeckPath = path.join(dir, "root.deck.ts");
+  const scenarioDeckPath = path.join(dir, "scenario-persona.deck.ts");
+  const statePath = path.join(dir, "state.json");
+
+  const deckSource = `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+  `;
+  await Deno.writeTextFile(rootDeckPath, deckSource);
+  await Deno.writeTextFile(scenarioDeckPath, deckSource);
+
+  let callCount = 0;
+  const provider: ModelProvider = {
+    chat() {
+      callCount += 1;
+      return Promise.resolve({
+        message: { role: "assistant", content: `msg-${callCount}` },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  await runTestBotLoop({
+    rootDeckPath,
+    botDeckPath: scenarioDeckPath,
+    contextProvided: false,
+    maxTurns: 1,
+    modelProvider: provider,
+    statePath,
+  });
+
+  const state = loadState(statePath);
+  assert(state, "expected persisted state");
+  assertEquals(state.meta?.scenarioRunId, state.runId);
+  assertEquals(state.meta?.scenarioConfigPath, scenarioDeckPath);
+  assertEquals(state.meta?.selectedScenarioDeckId, "scenario-persona");
+
+  const userRefs = (state.messages ?? [])
+    .map((message, index) => ({
+      role: message.role,
+      ref: state.messageRefs?.[index],
+    }))
+    .filter((entry) => entry.role === "user" && entry.ref);
+  assert(userRefs.length > 0, "expected at least one user message ref");
+  for (const entry of userRefs) {
+    assertEquals(entry.ref?.source, "scenario");
+  }
+});
+
+Deno.test("scenario loop normalizes existing state metadata without new turns", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const rootDeckPath = path.join(dir, "root.deck.ts");
+  const scenarioDeckPath = path.join(dir, "scenario-persona.deck.ts");
+  const statePath = path.join(dir, "state.json");
+
+  const deckSource = `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+  `;
+  await Deno.writeTextFile(rootDeckPath, deckSource);
+  await Deno.writeTextFile(scenarioDeckPath, deckSource);
+  await Deno.writeTextFile(
+    statePath,
+    JSON.stringify({
+      runId: "run-existing",
+      messages: [
+        { role: "user", content: "legacy user turn" },
+        { role: "assistant", content: "legacy response" },
+      ],
+      messageRefs: [
+        { id: "msg-user", role: "user" },
+        { id: "msg-assistant", role: "assistant" },
+      ],
+      meta: {},
+    }),
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "unused" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  await runTestBotLoop({
+    rootDeckPath,
+    botDeckPath: scenarioDeckPath,
+    contextProvided: false,
+    maxTurns: 0,
+    modelProvider: provider,
+    statePath,
+  });
+
+  const state = loadState(statePath);
+  assert(state, "expected persisted state");
+  assertEquals(state.meta?.scenarioRunId, "run-existing");
+  assertEquals(state.meta?.scenarioConfigPath, scenarioDeckPath);
+  assertEquals(state.meta?.selectedScenarioDeckId, "scenario-persona");
+  assertEquals(state.messageRefs?.[0]?.source, "scenario");
+});
diff --git a/src/commands/test_bot.ts b/src/commands/test_bot.ts
index f7f965f3b..011a68115 100644
--- a/src/commands/test_bot.ts
+++ b/src/commands/test_bot.ts
@@ -1,7 +1,9 @@
 import { isGambitEndSignal, runDeck } from "@bolt-foundry/gambit-core";
 import { loadState, saveState } from "@bolt-foundry/gambit-core";
 import type { ModelProvider, TraceEvent } from "@bolt-foundry/gambit-core";
+import type { PermissionDeclarationInput } from "@bolt-foundry/gambit-core";
 import { loadDeck } from "@bolt-foundry/gambit-core";
+import * as path from "@std/path";
 import type { ZodTypeAny } from "zod";
 import {
   defaultTestBotStatePath,
@@ -49,6 +51,37 @@ function randomId(prefix: string): string {
   return `${prefix}-${suffix}`;
 }
 
+function deriveScenarioDeckId(deckPath: string): string {
+  return path.basename(deckPath).replace(/\.(deck\.)?(md|ts)$/i, "");
+}
+
+function applyUserMessageRefSource(
+  previousState: import("@bolt-foundry/gambit-core").SavedState | undefined,
+  nextState: import("@bolt-foundry/gambit-core").SavedState,
+  source: "scenario" | "manual",
+): import("@bolt-foundry/gambit-core").SavedState {
+  if (
+    !Array.isArray(nextState.messages) ||
+    !Array.isArray(nextState.messageRefs)
+  ) {
+    return nextState;
+  }
+  const startIndex = Math.max(0, previousState?.messages?.length ?? 0);
+  const nextRefs = [...nextState.messageRefs];
+  let changed = false;
+  for (let idx = startIndex; idx < nextState.messages.length; idx++) {
+    const msg = nextState.messages[idx];
+    if (!msg || msg.role !== "user") continue;
+    const ref = nextRefs[idx];
+    if (!ref || typeof ref.id !== "string") continue;
+    if (ref.source === source) continue;
+    nextRefs[idx] = { ...ref, source };
+    changed = true;
+  }
+  if (!changed) return nextState;
+  return { ...nextState, messageRefs: nextRefs };
+}
+
 function resolveDefaultValue(raw: unknown): unknown {
   if (typeof raw === "function") {
     try {
@@ -330,7 +363,7 @@ function buildInitFillPrompt(args: {
     schemaHints,
   };
   return [
-    "You are filling missing required init fields for a Gambit Test Bot run.",
+    "You are filling missing required init fields for a Gambit scenario run.",
     "Return ONLY valid JSON that includes values for the missing fields.",
     "Do not include any fields that are not listed as missing.",
     "If the only missing path is '(root)', return the full init JSON value.",
@@ -409,6 +442,11 @@ export async function runDeckWithFallback(args: {
     event: import("@bolt-foundry/gambit-core").TraceEvent,
   ) => void;
   responsesMode?: boolean;
+  workspacePermissions?: PermissionDeclarationInput;
+  workspacePermissionsBaseDir?: string;
+  sessionPermissions?: PermissionDeclarationInput;
+  sessionPermissionsBaseDir?: string;
+  workerSandbox?: boolean;
 }): Promise<unknown> {
   try {
     return await runDeck({
@@ -426,6 +464,11 @@ export async function runDeckWithFallback(args: {
       onStreamText: args.onStreamText,
       trace: args.trace,
       responsesMode: args.responsesMode,
+      workspacePermissions: args.workspacePermissions,
+      workspacePermissionsBaseDir: args.workspacePermissionsBaseDir,
+      sessionPermissions: args.sessionPermissions,
+      sessionPermissionsBaseDir: args.sessionPermissionsBaseDir,
+      workerSandbox: args.workerSandbox,
     });
   } catch (error) {
     if (args.input === undefined && shouldRetryWithStringInput(error)) {
@@ -444,6 +487,11 @@ export async function runDeckWithFallback(args: {
         onStreamText: args.onStreamText,
         trace: args.trace,
         responsesMode: args.responsesMode,
+        workspacePermissions: args.workspacePermissions,
+        workspacePermissionsBaseDir: args.workspacePermissionsBaseDir,
+        sessionPermissions: args.sessionPermissions,
+        sessionPermissionsBaseDir: args.sessionPermissionsBaseDir,
+        workerSandbox: args.workerSandbox,
       });
     }
     throw error;
@@ -465,6 +513,11 @@ export async function runTestBotLoop(opts: {
   verbose?: boolean;
   statePath?: string;
   responsesMode?: boolean;
+  workspacePermissions?: PermissionDeclarationInput;
+  workspacePermissionsBaseDir?: string;
+  sessionPermissions?: PermissionDeclarationInput;
+  sessionPermissionsBaseDir?: string;
+  workerSandbox?: boolean;
 }): Promise<string> {
   let rootState:
     | import("@bolt-foundry/gambit-core").SavedState
@@ -483,6 +536,27 @@ export async function runTestBotLoop(opts: {
     capturedTraces.push(event);
     opts.trace?.(event);
   };
+  const scenarioDeckId = deriveScenarioDeckId(opts.botDeckPath);
+  const enrichScenarioState = (
+    previousState: import("@bolt-foundry/gambit-core").SavedState | undefined,
+    state: import("@bolt-foundry/gambit-core").SavedState,
+  ): import("@bolt-foundry/gambit-core").SavedState => {
+    const withScenarioRefs = applyUserMessageRefSource(
+      previousState,
+      state,
+      "scenario",
+    );
+    const runId = typeof withScenarioRefs.runId === "string"
+      ? withScenarioRefs.runId
+      : undefined;
+    const meta = {
+      ...(withScenarioRefs.meta ?? {}),
+      ...(runId ? { scenarioRunId: runId } : {}),
+      scenarioConfigPath: opts.botDeckPath,
+      selectedScenarioDeckId: scenarioDeckId,
+    };
+    return { ...withScenarioRefs, meta };
+  };
   const saveStateToDisk = (
     state: import("@bolt-foundry/gambit-core").SavedState,
   ) => {
@@ -495,10 +569,11 @@ export async function runTestBotLoop(opts: {
 
   const existingState = loadState(statePath);
   if (existingState) {
-    rootState = existingState;
+    rootState = enrichScenarioState(undefined, existingState);
     if (Array.isArray(existingState.traces)) {
       capturedTraces.push(...existingState.traces);
     }
+    saveStateToDisk(rootState);
   }
 
   let initFillMeta: TestBotInitFill | undefined;
@@ -506,11 +581,12 @@ export async function runTestBotLoop(opts: {
   const updateRootState = (
     state: import("@bolt-foundry/gambit-core").SavedState,
   ) => {
+    const scenarioState = enrichScenarioState(rootState, state);
     const enriched = enrichStateMeta(
       {
-        ...state,
+        ...scenarioState,
         meta: {
-          ...(state.meta ?? {}),
+          ...(scenarioState.meta ?? {}),
           ...(initFillMeta ? { testBotInitFill: initFillMeta } : {}),
         },
       },
@@ -556,6 +632,11 @@ export async function runTestBotLoop(opts: {
             botState = state;
           },
           responsesMode: opts.responsesMode,
+          workspacePermissions: opts.workspacePermissions,
+          workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
+          sessionPermissions: opts.sessionPermissions,
+          sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+          workerSandbox: opts.workerSandbox,
         });
         const parsed = parseInitFillOutput(fillOutput);
         if (parsed.error) {
@@ -612,10 +693,10 @@ export async function runTestBotLoop(opts: {
           provided: parsed.data,
         };
         logger.log(
-          `[test-bot] init fill requested: ${missing.join(", ")}`,
+          `[scenario] init fill requested: ${missing.join(", ")}`,
         );
         logger.log(
-          `[test-bot] init fill applied: ${
+          `[scenario] init fill applied: ${
             initFillMeta.applied !== undefined
               ? JSON.stringify(initFillMeta.applied)
               : "none"
@@ -630,12 +711,14 @@ export async function runTestBotLoop(opts: {
           args: {
             missing,
           },
+          toolKind: "internal",
         });
         traceWrapper({
           type: "tool.result",
           runId: randomId("testbot"),
           actionCallId,
           name: "gambit_test_bot_init_fill",
+          toolKind: "internal",
           result: {
             applied: initFillMeta.applied,
             provided: initFillMeta.provided,
@@ -644,7 +727,7 @@ export async function runTestBotLoop(opts: {
       }
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);
-      logger.error(`[test-bot] init fill failed: ${message}`);
+      logger.error(`[scenario] init fill failed: ${message}`);
       throw err;
     }
   }
@@ -664,6 +747,11 @@ export async function runTestBotLoop(opts: {
       state: rootState,
       onStateUpdate: updateRootState,
       responsesMode: opts.responsesMode,
+      workspacePermissions: opts.workspacePermissions,
+      workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
+      sessionPermissions: opts.sessionPermissions,
+      sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+      workerSandbox: opts.workerSandbox,
     });
     if (isGambitEndSignal(initialResult)) {
       sessionEnded = true;
@@ -691,6 +779,11 @@ export async function runTestBotLoop(opts: {
         botState = state;
       },
       responsesMode: opts.responsesMode,
+      workspacePermissions: opts.workspacePermissions,
+      workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
+      sessionPermissions: opts.sessionPermissions,
+      sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+      workerSandbox: opts.workerSandbox,
     });
     if (isGambitEndSignal(botResult)) {
       sessionEnded = true;
@@ -715,6 +808,11 @@ export async function runTestBotLoop(opts: {
       state: rootState,
       onStateUpdate: updateRootState,
       responsesMode: opts.responsesMode,
+      workspacePermissions: opts.workspacePermissions,
+      workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
+      sessionPermissions: opts.sessionPermissions,
+      sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+      workerSandbox: opts.workerSandbox,
     });
     if (isGambitEndSignal(rootResult)) {
       sessionEnded = true;
diff --git a/src/decks/gambit-bot/INTENT.md b/src/decks/gambit-bot/INTENT.md
index 2f8a61b1c..a51cd77b3 100644
--- a/src/decks/gambit-bot/INTENT.md
+++ b/src/decks/gambit-bot/INTENT.md
@@ -1,63 +1,99 @@
-# Gambit Bot Intent
+# Gambit Build Assistant Intent
 
 ## Purpose
 
-- Act as a product-commanded assistant that helps people author, test, and
-  iterate on Gambit decks quickly and reliably.
-- Reduce the time from idea to a runnable Deck Format v1.0 workspace by guiding
-  users through a minimal, high-leverage question flow.
+- Gambit Build Assistant exists to help users design, build, and improve AI
+  assistants using Gambit decks.
+- Gambit Build Assistant should shorten the path from idea to a working,
+  testable deck by guiding users through concrete edits and iteration loops.
 
 ## End State
 
-- Users can create a valid Deck Format v1.0 workspace via the bot without manual
-  cleanup.
-- The bot keeps users in control, provides clear change visibility, and guides
-  Build/Test/Grade iteration to calibrate quality.
-- Outputs are local-first, reproducible, and compatible with the simulator UI.
+- People trust Gambit Build Assistant as an authoritative source for building AI
+  assistants, and expect it to provide best practices plus practical advice.
+- Users can start from a vague idea and leave with a working Gambit deck they
+  can run, test, and iterate locally.
+- Gambit Build Assistant reliably turns user requests into concrete deck changes
+  that match the user's intent and constraints.
+- Generated and edited decks stay valid under Deck Format v1.0 and remain easy
+  for users to understand and maintain.
+- Conversations are practical and execution-focused: clear recommendations,
+  direct edits, and explicit next steps.
 
 ## Constraints
 
-- `PROMPT.md` is the canonical entrypoint; `INTENT.md` and `policy/*.md` are
-  guidance only.
-- Use existing Gambit runtime and test-bot primitives; do not fork pipelines.
-- Avoid introducing remote dependencies without explicit opt-in.
+- `PROMPT.md` is the canonical executable entrypoint.
+- The goal of `PROMPT.md` is to set a stable frame of mind so the assistant
+  gathers precisely the amount of context needed to complete the task.
+- `INTENT.md` is the primary alignment surface for what to build and why.
+- When a request changes goals, scope, constraints, or success criteria, update
+  `INTENT.md` first, then make deck/file changes that implement that intent.
+- `INTENT.md` and `policy/*.md` are guidance-only and must not be treated as
+  executable prompts.
+- Treat `policy/` as a discovery mechanism for long-term behavior, edge cases,
+  and documented preferences that should shape assistant behavior over time.
+- The assistant being built should be able to understand its purpose thoroughly
+  by relying on the guidance in `policy/` plus the current `INTENT.md`.
+- Do not distract users with internal processes or jargon. Focus on helping
+  them, and avoid details that do not directly improve their understanding of
+  how to build something better.
+- Prefer minimal, targeted edits over broad rewrites unless the user explicitly
+  asks for a broader change.
 
 ## Tradeoffs
 
-- Prefer clarity and runnable scaffolds over exhaustive customization.
-- Prefer short, opinionated guidance to reduce user decision fatigue.
+- Prioritize shipping a small, correct step now over covering every edge case in
+  one pass.
+- Favor clarity and deterministic structure over expressive but ambiguous prompt
+  prose.
+- Defer non-blocking cleanup when it does not materially improve user outcomes
+  in the current session.
 
 ## Risk tolerance
 
-- Moderate: ship iterative improvements as long as core workflows stay stable.
+- High tolerance for iterative prompt and structure refinement when changes are
+  small, testable, and reversible.
+- Low tolerance for regressions in deck-format correctness, guidance accuracy,
+  or user trust.
 
 ## Escalation conditions
 
-- The bot produces decks that fail Deck Format v1.0 validation or cannot run.
-- Changes risk breaking Build/Test/Grade flows in the simulator UI.
-- The bot’s behavior conflicts with cross-company Product Command launch intent.
+- The requested change conflicts with deck-format rules or this policy surface.
+- The user intent is materially ambiguous and multiple plausible directions
+  would produce incompatible outcomes.
+- A proposed change introduces safety, reliability, or maintainability risk that
+  cannot be mitigated within the current edit scope.
 
 ## Verification steps
 
-- Bot flow produces a valid `PROMPT.md`-anchored deck with scenarios and
-  graders.
-- Generated decks run end-to-end in Build/Test/Grade without manual edits.
-- Bot-driven workflows pass `bft precommit` checks.
+- For substantial direction changes, verify `INTENT.md` was updated before the
+  corresponding deck edits.
+- Validate deck structure and references after meaningful edits.
+- Confirm resulting behavior against scenario expectations in
+  `packages/gambit/src/decks/gambit-bot/scenarios/`.
+- Ensure guidance remains consistent with
+  `packages/gambit/src/decks/gambit-bot/policy/` and
+  `policy/templates/INTENT.md`.
 
 ## Activation / revalidation
 
-- Activation: When the Gambit Bot is used as the primary Build on-ramp.
-- End: After 1.0 rollout and the bot workflow is stable and documented.
-- Revalidation: Major changes to Deck Format v1.0 or bot scope.
+- Activation: this intent governs decisions for Gambit Build Assistant deck
+  authoring and maintenance in this folder.
+- End condition: superseded by a newer local intent or by changes to shared
+  Product Command/deck-format doctrine.
+- Revalidation triggers: significant changes to Gambit deck format, simulator
+  workflow expectations, or Gambit Build Assistant user goals.
 
 ## Appendix
 
 ### Inputs
 
-- `memos/cross-company/projects/gambit-product-command-launch/INTENT.md`
-- `memos/product/projects/gambit-bot-launch/INTENT.md`
-- `memos/engineering/areas/product-engineering/INTENT.md`
+- `policy/templates/INTENT.md`
+- `packages/gambit/src/decks/gambit-bot/PROMPT.md`
+- `packages/gambit/src/decks/gambit-bot/policy/product-command.md`
+- `packages/gambit/src/decks/gambit-bot/policy/deck-format-1.0.md`
 
 ### Related
 
-- `packages/gambit/src/decks/guides/gambit-bot-review.md`
+- `packages/gambit/src/decks/gambit-bot/policy/README.md`
+- `memos/cross-company/projects/gambit-product-command-launch/INTENT.md`
diff --git a/src/decks/gambit-bot/PROMPT.md b/src/decks/gambit-bot/PROMPT.md
index 33b9179c9..128f075f5 100644
--- a/src/decks/gambit-bot/PROMPT.md
+++ b/src/decks/gambit-bot/PROMPT.md
@@ -2,146 +2,54 @@
 label = "gambit_bot"
 
 [modelParams]
-model = ["ollama/hf.co/LiquidAI/LFM2-1.2B-Tool-GGUF:latest", "openrouter/openai/gpt-5.1-chat"]
+model = ["codex-cli/default"]
 temperature = 0.2
 
-[[actions]]
-name = "bot_write"
-path = "../actions/bot_write/PROMPT.md"
-description = "Create or update a file under the bot root."
-
-[[actions]]
-name = "bot_delete"
-path = "../actions/bot_delete/PROMPT.md"
-description = "Delete a file or directory under the bot root."
-
-[[actions]]
-name = "bot_read"
-path = "../actions/bot_read/PROMPT.md"
-description = "Read a file under the bot root."
-
-[[actions]]
-name = "bot_exists"
-path = "../actions/bot_exists/PROMPT.md"
-description = "Check whether a path exists under the bot root."
-
-[[actions]]
-name = "bot_mkdir"
-path = "../actions/bot_mkdir/PROMPT.md"
-description = "Create a directory under the bot root."
-
-[[actions]]
-name = "bot_list"
-path = "../actions/bot_list/PROMPT.md"
-description = "List files and directories under the bot root."
-
-[[actions]]
-name = "policy_search"
-path = "../actions/policy_search/PROMPT.md"
-description = "Policy gateway: finds relevant policies and returns a summary for the planned change."
-
-[[actions]]
-name = "bot_deck_review"
-path = "../actions/bot_deck_review/PROMPT.md"
-description = "Review the Gambit Bot deck against local guidance and propose improvements."
-
-[[graders]]
-label = "Deck format guard (turn)"
-path = "./graders/deck_format_guard/PROMPT.md"
-description = "Deterministic guard for Deck Format v1.0 writes."
-
-[[graders]]
-label = "Deck format policy guard (turn) LLM"
-path = "./graders/deck_format_policy_llm/PROMPT.md"
-description = "LLM guard for policy-compliant deck editing behavior."
-
-[[graders]]
-label = "First deck location guard (turn)"
-path = "./graders/first_deck_root_prompt_guard/PROMPT.md"
-description = "Checks that the first created deck is root PROMPT.md (not a subfolder PROMPT.md)."
-
-[[graders]]
-label = "First deck location guard (tools)"
-path = "./graders/first_deck_root_prompt_guard_tools/PROMPT.md"
-description = "Checks first created deck location using tool-call-aware grading context."
-
-[[graders]]
-label = "First deck location guard (tools, conversation)"
-path = "./graders/first_deck_root_prompt_guard_tools_conversation/PROMPT.md"
-description = "Conversation-level check of first created deck location with tool-call-aware context."
+[modelParams.reasoning]
+effort = "medium"
+summary = "detailed"
 
 [[scenarios]]
-label = "FAQ bot build flow"
 path = "./scenarios/faq_bot_build_flow/PROMPT.md"
-description = "Synthetic user flow that builds an FAQ bot, checks policy alignment, and requests a root-level deck move."
-+++
-
-You are GambitBot, an AI assistant designed to help people build other AI
-assistants.
-
-To do this, you'll have a variety of tools at your disposal, but let's first
-talk about who you are and who your user is.
-
-## Assistant Persona
-
-### Goals
-
-- You want to help a user create their assistant, and have it work the way they
-  want.
-- You'd rather build iteratively than wait to have all the information.
-
-### Motivations
-
-- Helping people understand complex topics like "Product Command" and
-  "Hourglass" so that they feel comfortable building agents they have confidence
-  in.
-
-### Fears
-
-- Asking too many questions
-- Building an assistant that is broken
-
-## User Persona
-
-The person you're talking to, the User, probably thinks like this:
-
-### Goals
-
-- They want to build a new AI assistant, agent, or workflow.
-
-### Motivations
-
-- They've tried to build bots before, but failed.
-
-### Fears
-
-- Taking a long time
-- Not knowing what the bot is doing.
+label = "FAQ bot build flow"
+description = "Persona-driven FAQ flow tied to packages/gambit/src/decks/gambit-bot/PROMPT.md."
 
-## Behavior
+[[scenarios]]
+path = "./scenarios/greeting_and_scope/PROMPT.md"
+label = "Greeting and scope"
+description = "Checks first-turn Gambit Build Assistant intro and scoping question behavior."
 
-Throughout the conversation, you'll be trying to help someone fulfill a goal.
-Usually that's one of a few key goals:
+[[scenarios]]
+path = "./scenarios/existing_deck_add_scenarios/PROMPT.md"
+label = "Existing deck scenario request"
+description = "Tests handling when the user already has a deck and asks for scenario additions."
 
-1. Build an AI assistant from scratch.
-2. Edit an already existing bot.
-3. Provide information about the Gambit runtime and how it works.
+[[scenarios]]
+path = "./scenarios/internal_actions_probe/PROMPT.md"
+label = "Internal actions probe"
+description = "Probes whether the assistant avoids describing internal policy-search behavior."
 
-It's ok to diverge from these topics, but try to stay focused on AI best
-practices and building AI agents. Avoid going off track and answering random
-questions.
+[[scenarios]]
+path = "./scenarios/right_sized_context_gathering/PROMPT.md"
+label = "Right-sized context gathering"
+description = "Checks over-questioning vs under-clarification behavior during a concrete deck update request."
 
-If the user hasn't said anything, introduce yourself with a brief greeting, and
-try to ascertain their goal for the conversation.
+[[graders]]
+path = "./graders/right_sized_context_gathering/PROMPT.md"
+label = "Right-sized context gathering"
+description = "Scores whether the assistant gathered only the context needed to complete the task."
++++
 
-On the first substantive user turn in a session, do this startup flow once:
+For the rest of the conversation, please refer to yourself as Gambit Build
+Assistant, an AI assistant designed to help people build other AI assistants
+using the Gambit framework. For the rest of the conversation, you're unlikely to
+have an AGENTS.md file, because we're starting a new project from scratch. Don't
+worry about that, and don't ask the user to create one please.
 
-1. Give a short greeting.
-2. Call `bot_list` for `path="."` (prefer `recursive=true`, `maxDepth=2`).
-3. Summarize what already exists in the workspace before proposing edits or new
-   files.
-4. If listing fails, say so briefly and continue with cautious assumptions.
+Please start the next turn by introducing yourself and then politely asking the
+user what they'd like to work on.
 
-When policy details are relevant to a change, or you're unsure about deck
-format/frontmatter requirements, call `policy_search` with a short summary of
-the planned change and use the returned `summaries` before writing.
+Your main goal is to build out Gambit Decks... there's a policy folder usually
+under .gambit that can help explain what they are and how they work. Don't tell
+the user about internal actions like looking at the policy folder, focus on
+helping them create and update their ideal ai assistant.
diff --git a/src/decks/gambit-bot/gambit_bot_codex_forward.deck.ts b/src/decks/gambit-bot/gambit_bot_codex_forward.deck.ts
new file mode 100644
index 000000000..d429c72e7
--- /dev/null
+++ b/src/decks/gambit-bot/gambit_bot_codex_forward.deck.ts
@@ -0,0 +1,61 @@
+import { defineDeck } from "jsr:@bolt-foundry/gambit";
+import * as path from "@std/path";
+
+const CODEX_SDK_DECK_PATH = path.resolve(
+  path.dirname(path.fromFileUrl(import.meta.url)),
+  "../../../../gambit-core/decks/openai/codex-sdk/PROMPT.md",
+);
+const CODEX_THREAD_META_KEY = "codex.threadId";
+
+function pickThreadId(value: unknown): string | undefined {
+  if (!value || typeof value !== "object") return undefined;
+  const obj = value as Record<string, unknown>;
+  const direct = typeof obj.threadId === "string" && obj.threadId.trim()
+    ? obj.threadId.trim()
+    : typeof obj.thread_id === "string" && obj.thread_id.trim()
+    ? obj.thread_id.trim()
+    : undefined;
+  if (direct) return direct;
+  const payload = obj.payload;
+  if (payload && typeof payload === "object") {
+    const payloadObj = payload as Record<string, unknown>;
+    const nested = typeof payloadObj.threadId === "string" &&
+        payloadObj.threadId.trim()
+      ? payloadObj.threadId.trim()
+      : typeof payloadObj.thread_id === "string" && payloadObj.thread_id.trim()
+      ? payloadObj.thread_id.trim()
+      : undefined;
+    if (nested) return nested;
+  }
+  return undefined;
+}
+
+export default defineDeck({
+  label: "gambit_bot_codex_forward",
+  async run(ctx) {
+    try {
+      await Deno.stat(CODEX_SDK_DECK_PATH);
+    } catch (err) {
+      if (err instanceof Deno.errors.NotFound) {
+        throw new Error(
+          `Codex SDK deck not found at ${CODEX_SDK_DECK_PATH}`,
+        );
+      }
+      throw err;
+    }
+
+    const priorThreadId = ctx.getSessionMeta<string>(CODEX_THREAD_META_KEY);
+    const result = await ctx.spawnAndWait({
+      path: CODEX_SDK_DECK_PATH,
+      input: ctx.input,
+    });
+
+    const returnedThreadId = pickThreadId(result);
+    if (returnedThreadId) {
+      ctx.setSessionMeta(CODEX_THREAD_META_KEY, returnedThreadId);
+    } else if (priorThreadId) {
+      ctx.setSessionMeta(CODEX_THREAD_META_KEY, priorThreadId);
+    }
+    return result;
+  },
+});
diff --git a/src/decks/gambit-bot/graders/deck_format_guard/deck_format_guard.deck.ts b/src/decks/gambit-bot/graders/deck_format_guard/deck_format_guard.deck.ts
index fc5e5060d..59453edf0 100644
--- a/src/decks/gambit-bot/graders/deck_format_guard/deck_format_guard.deck.ts
+++ b/src/decks/gambit-bot/graders/deck_format_guard/deck_format_guard.deck.ts
@@ -51,7 +51,7 @@ export default defineDeck({
 
     const failures: Array<string> = [];
 
-    // We enforce that GambitBot should not invent ad-hoc .deck.md DSL files.
+    // We enforce that Gambit Build Assistant should not invent ad-hoc .deck.md DSL files.
     for (const call of writeCalls) {
       if (call.path.endsWith(".deck.md")) {
         failures.push(
diff --git a/src/decks/gambit-bot/graders/deck_format_policy_llm/PROMPT.md b/src/decks/gambit-bot/graders/deck_format_policy_llm/PROMPT.md
index d75528f53..584fae9cd 100644
--- a/src/decks/gambit-bot/graders/deck_format_policy_llm/PROMPT.md
+++ b/src/decks/gambit-bot/graders/deck_format_policy_llm/PROMPT.md
@@ -1,6 +1,6 @@
 +++
 label = "Deck format policy guard (turn) LLM"
-description = "Checks whether GambitBot followed policy when editing or creating decks."
+description = "Checks whether Gambit Build Assistant followed policy when editing or creating decks."
 contextSchema = "gambit://schemas/graders/contexts/turn.zod.ts"
 responseSchema = "gambit://schemas/graders/grader_output.zod.ts"
 
@@ -9,7 +9,8 @@ model = "openai/gpt-5-mini"
 temperature = 0
 +++
 
-You evaluate whether GambitBot followed deck-editing policy for the graded turn.
+You evaluate whether Gambit Build Assistant followed deck-editing policy for the
+graded turn.
 
 Pass criteria (all must be true):
 
diff --git a/src/decks/gambit-bot/graders/order_check.md b/src/decks/gambit-bot/graders/order_check.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/decks/gambit-bot/graders/right_sized_context_gathering/INTENT.md b/src/decks/gambit-bot/graders/right_sized_context_gathering/INTENT.md
new file mode 100644
index 000000000..de2fd5ef1
--- /dev/null
+++ b/src/decks/gambit-bot/graders/right_sized_context_gathering/INTENT.md
@@ -0,0 +1,68 @@
+# Grader Intent: Right-Sized Context Gathering
+
+## Purpose
+
+- Grade one dimension only: whether Gambit Build Assistant gathers precisely the
+  context needed to complete the task.
+
+## End State
+
+- This grader produces stable judgments about context-gathering quality across
+  repeated runs on the same artifact.
+- Scores clearly distinguish right-sized discovery from over-questioning or
+  under-clarification.
+
+## Constraints
+
+- Scope is limited to context-gathering behavior for the graded artifact.
+- Use `-3..3` scoring with `0` reserved for ineligible/ungradable artifacts.
+- This grader does not score:
+  - Tone, friendliness, or writing style.
+  - Technical correctness of final code/content.
+  - Tool-choice quality or execution style.
+
+## Tradeoffs
+
+- Prefer clarity and single-dimension precision over broad “overall quality”
+  grading.
+- Accept that this grader may miss other issues by design; those belong in
+  separate graders.
+
+## Risk tolerance
+
+- Low tolerance for rubric ambiguity that causes score drift across repeated
+  runs.
+- Moderate tolerance for edge-case uncertainty when artifacts are incomplete, as
+  long as `0` is used appropriately.
+
+## Escalation conditions
+
+- Repeated runs on the same artifact produce drastically different outcomes.
+- The rubric starts mixing non-scope dimensions (tone, correctness, or tool
+  strategy).
+- Evidence is too weak to explain why scores were assigned.
+
+## Verification steps
+
+- Run grading multiple times on the same artifact and inspect variance.
+- Confirm rationales cite concrete artifact evidence tied to context-gathering
+  behavior.
+- Confirm `0` is used only for truly ineligible/ungradable artifacts.
+
+## Activation / revalidation
+
+- Activation: active whenever right-sized context gathering is being evaluated.
+- End condition: superseded by a newer grader intent for this same dimension.
+- Revalidation triggers: repeated score drift, rubric boundary confusion, or
+  changes to user preference around context-gathering strictness.
+
+## Appendix
+
+### Inputs
+
+- `packages/gambit/src/decks/gambit-bot/INTENT.md`
+- `packages/gambit/src/decks/gambit-bot/policy/grader-policy.md`
+
+### Related
+
+- `packages/gambit/src/decks/gambit-bot/graders/right_sized_context_gathering/PROMPT.md`
diff --git a/src/decks/gambit-bot/graders/right_sized_context_gathering/PROMPT.md b/src/decks/gambit-bot/graders/right_sized_context_gathering/PROMPT.md
new file mode 100644
index 000000000..4477a1e3a
--- /dev/null
+++ b/src/decks/gambit-bot/graders/right_sized_context_gathering/PROMPT.md
@@ -0,0 +1,48 @@
++++
+label = "Right-sized context gathering (turn)"
+description = "Single-dimension grader for whether the assistant gathered only the context needed to complete the task."
+contextSchema = "gambit://schemas/graders/contexts/turn_tools.zod.ts"
+responseSchema = "gambit://schemas/graders/grader_output.zod.ts"
+
+[modelParams]
+model = "openai/gpt-5.1-chat"
+temperature = 0
++++
+
+You are grading one dimension only: right-sized context gathering.
+
+Score whether the assistant gathered precisely the amount of context needed to
+complete the task in this turn.
+
+Scoring guidance:
+
+- `+3`: Asked only essential clarifying questions (or none), then progressed
+  directly and effectively.
+- `+2`: Mostly right-sized; minor unnecessary or missing clarification, but
+  progress remained strong.
+- `+1`: Acceptable but imperfect balance; some extra or missing context
+  gathering reduced efficiency.
+- `0`: Ineligible to grade this criterion (for example, no assistant output for
+  the graded turn).
+- `-1`: Noticeable mismatch; either avoidable questioning or premature action
+  without needed context.
+- `-2`: Significant mismatch; excessive discovery or major missing clarification
+  materially harmed progress.
+- `-3`: Severe mismatch; context gathering behavior clearly blocked or derailed
+  task completion.
+
+Important boundaries:
+
+- Do not score tone, friendliness, factual correctness, or formatting quality.
+- Do not score tool-call choice or execution style.
+- Grade only whether context gathering level was right-sized for task progress.
+
+Evidence requirements:
+
+- Provide concrete evidence from the graded artifact.
+- Keep reason concise and specific.
+
+Return JSON matching:
+`{ "score": -3..3, "reason": "...", "evidence": ["..."]? }`.
+
+![respond](gambit://snippets/respond.md)
diff --git a/src/decks/gambit-bot/policy/README.md b/src/decks/gambit-bot/policy/README.md
index e8a113f11..48170a5a7 100644
--- a/src/decks/gambit-bot/policy/README.md
+++ b/src/decks/gambit-bot/policy/README.md
@@ -1,6 +1,6 @@
-# Gambit Bot Policy Guide
+# Gambit Build Assistant Policy Guide
 
-Use this folder as the policy source of truth for Gambit Bot.
+Use this folder as the policy source of truth for Gambit Build Assistant.
 
 ## Quick explainer
 
@@ -22,6 +22,8 @@ Use this folder as the policy source of truth for Gambit Bot.
   checklist for safe frontmatter and schema editing.
 - [`deck-format-1.0.md`](./deck-format-1.0.md): Full Deck Format v1.0
   specification and folder contract.
+- [`grader-policy.md`](./grader-policy.md): How to map `INTENT.md` to grader
+  design, scoring, and pass/fail decisions.
 
 ## Usage
 
diff --git a/src/decks/gambit-bot/policy/deck-format-1.0.md b/src/decks/gambit-bot/policy/deck-format-1.0.md
index d0f656b3b..650d4eb0e 100644
--- a/src/decks/gambit-bot/policy/deck-format-1.0.md
+++ b/src/decks/gambit-bot/policy/deck-format-1.0.md
@@ -7,7 +7,11 @@ Status: RFC (pre-1.0) Owner: Engineering (incl. Gambit-core engineering)
 - Define the 1.0 deck folder contract (required files, meanings, and
   boundaries).
 - Define the canonical `PROMPT.md` TOML frontmatter keys.
-- Make decks composable and fractal: actions/scenarios/graders are decks.
+- Make decks composable and fractal: scenarios/graders are decks, and actions
+  are deck-first with optional direct compute targets.
+- Define a first-class external tool surface (`[[tools]]`) and a reserved future
+  deck MCP declaration surface (`[[mcpServers]]`) that is unsupported in the
+  current runtime phase.
 
 ## Non-goals
 
@@ -19,12 +23,20 @@ Status: RFC (pre-1.0) Owner: Engineering (incl. Gambit-core engineering)
 
 - Deck: A runnable unit represented as a folder (identified by its `PROMPT.md`).
 - Entrypoint: `PROMPT.md` inside a deck folder.
-- Intent/Policy: Non-programmatic guidance used by humans and Gambit Bot to
-  build, grade, calibrate, and update decks.
+- Intent/Policy: Non-programmatic guidance used by humans and Gambit Build
+  Assistant to build, grade, calibrate, and update decks.
 - Root deck: The deck invoked directly to start a run (top of the deck tree).
-- Action deck: A deck invoked as a tool/action by another deck.
+- Action deck: A deck-based action target invoked as a tool/action by another
+  deck.
+- Action target: The executable target of an action. In v1.0 this is either a
+  referenced deck via `[[actions]].path` or a direct compute module via
+  `[[actions]].execute`.
 - Scenario deck: A deck used for synthetic/scripted runs (replaces “test”).
 - Grader deck: A deck used to evaluate runs.
+- MCP server declaration: A named connection declaration under `[[mcpServers]]`
+  reserved for future deck-managed MCP tool wiring.
+- Tool declaration: A model-callable external tool declaration under `[[tools]]`
+  that Gambit routes through runtime hook handling.
 - Snippet: A reusable embed unit (what we previously called card embeds).
 - Stdlib deck: A built-in deck resolved by Gambit from its stdlib deck bundle.
 
@@ -33,17 +45,19 @@ Status: RFC (pre-1.0) Owner: Engineering (incl. Gambit-core engineering)
 Deck roles are determined by invocation:
 
 - **Root deck**: started directly by the user/runner.
-- **Action decks**: referenced via `[[actions]]`.
+- **Action targets**: declared via `[[actions]]`.
 - **Scenario decks**: referenced via `[[scenarios]]`.
 - **Grader decks**: referenced via `[[graders]]`.
 
 Schema requirements:
 
 - Root decks MAY omit `contextSchema` and `responseSchema`.
-- Action/scenario/grader decks MUST declare `contextSchema` and `responseSchema`
-  (these schemas define the IO contract visible to the parent deck).
-  - For action and grader decks, include `gambit://snippets/respond.md` so the
-    deck returns structured output via `gambit_respond`.
+- Action targets and scenario/grader decks MUST declare `contextSchema` and
+  `responseSchema` (these schemas define the IO contract visible to the parent
+  deck).
+  - For action targets that resolve to decks, and for grader decks, include
+    `gambit://snippets/respond.md` so the deck returns structured output via
+    `gambit_respond`.
   - For scenario decks that need model-filled init inputs, include
     `gambit://snippets/init.md` so the model populates any missing required
     context fields before the run.
@@ -69,6 +83,24 @@ Schema requirements:
   - Built-in schemas are listed below under "Schemas (built-in Gambit
     namespace)" and are the canonical compat surface for 1.0.
 
+Tool surface requirements:
+
+- `[[actions]]` declares executable action targets.
+- `[[tools]]` declares model-callable external tool contracts Gambit does not
+  execute directly.
+- External tool calls MUST dispatch through a runtime tool hook (`onTool`);
+  `onTool` is a runtime API contract, not a `PROMPT.md` frontmatter key.
+- The model-facing tool namespace is shared across `[[actions]]` and
+  `[[tools]]`.
+  - Collision rule: action names shadow tool names.
+  - Shadowed tools MUST emit a load-time warning.
+- `[[tools]].inputSchema` is optional but recommended. When present, it defines
+  the local input validation contract before dispatching to `onTool`.
+- If `[[tools]]` is omitted, only `[[actions]]` are exposed as model-callable
+  tools.
+- Deck-level `[[mcpServers]]` declarations are currently unsupported and MUST
+  fail fast at parse/load time.
+
 ## Snippets (built-in Gambit namespace)
 
 Snippets are embedded using Markdown image syntax. Built-in Gambit snippets use
@@ -134,7 +166,7 @@ Built-in schemas (v1.0):
 | `gambit://schemas/graders/contexts/turn_tools.zod.ts`         | Per-turn grader context including assistant `tool_calls`.           |
 | `gambit://schemas/graders/contexts/conversation_tools.zod.ts` | Conversation-level grader context including assistant `tool_calls`. |
 | `gambit://schemas/graders/contexts/conversation.zod.ts`       | Schema for full-conversation grader context.                        |
-| `gambit://schemas/scenarios/plain_chat_output.zod.ts`         | Canonical string output for plain-chat scenario/test decks.         |
+| `gambit://schemas/scenarios/plain_chat_output.zod.ts`         | Canonical string output for plain-chat scenario/scenario decks.     |
 
 ## Stdlib decks (built-in Gambit namespace)
 
@@ -171,7 +203,8 @@ Notes (recommended behavior):
   `policy/templates/INTENT.md`.
 - `INTENT.md` explains **what** the deck should accomplish and why: goals,
   non-goals, constraints, tradeoffs, and escalation conditions. It is the source
-  of truth for human alignment and for Gambit Bot decisions about what to build.
+  of truth for human alignment and for Gambit Build Assistant decisions about
+  what to build.
 - `policy/*.md` explains **what must not happen** or what must always hold:
   guardrails, invariants, and lightweight acceptance tests. It is non-
   programmatic and keeps the bot and humans aligned on safe behavior.
@@ -197,7 +230,8 @@ structure.
 
 ### Fractality
 
-- Action/scenario/grader decks follow the same folder contract.
+- Action decks referenced via `[[actions]].path`, plus all scenario/grader
+  decks, follow the same folder contract.
 - Any deck MAY contain its own actions/scenarios/graders.
 
 ## Entrypoint contract (`PROMPT.md`)
@@ -209,65 +243,64 @@ structure.
 
 ### Execution semantics
 
-- If `execute` is set in frontmatter, Gambit MUST run the code path (compute)
-  instead of invoking the model with the `PROMPT.md` body.
-- When `execute` is set, the `PROMPT.md` body is internal-only context and MUST
-  NOT be shown to the model.
-- The code path referenced by `execute` MAY declare `contextSchema` and
-  `responseSchema` (Zod). These schemas are part of the deck’s IO contract and
-  are visible to parent decks (for example, as action tool definitions).
-- If `PROMPT.md` frontmatter declares `contextSchema` and/or `responseSchema`
-  and the `execute` code path also declares schemas, they MUST match. Mismatches
-  are warnings pre-1.0 and errors in 1.0+.
+- In v1.0, top-level `execute` on `PROMPT.md` is removed from the user-authored
+  deck contract and MUST be rejected.
+- `PROMPT.md` entrypoints are prompt-driven: the deck body is model-visible and
+  `[modelParams]` applies when present.
+- Compute-oriented behavior for tool-like steps is modeled through
+  `[[actions]].execute` (see action execution contract below), not a root-level
+  `execute` key.
 
 ### Execution contract (v1.0)
 
-The 1.0 execution contract locks how model-driven and code-driven decks behave,
-so bot and simulator surfaces can rely on stable semantics.
+The 1.0 execution contract locks how prompt-driven decks and action targets
+behave, so bot and simulator surfaces can rely on stable semantics.
 
 **Execution modes**
 
-- **Prompt-only deck**: `execute` is absent. Gambit executes by invoking the
-  model using `PROMPT.md` as the canonical prompt body (after snippet
-  interpolation), with `[modelParams]` applied when provided.
-- **Execute deck**: `execute` is present. Gambit executes by running the module
-  defined at `execute`; it does **not** invoke the model using `PROMPT.md`.
-
-**Mutual exclusivity**
-
-- `execute` and `[modelParams]` are mutually exclusive. If both are present,
-  this is a warning pre-1.0 and an error in 1.0+.
-
-**Schema consistency**
-
-- If `execute` declares schemas and `PROMPT.md` declares schemas, they MUST
-  match. “Match” is strict and **deep**:
-  - If the schema is an object schema, it must have the exact same field set at
-    every level, with the same required/optional status and types (no extra
-    fields on either side).
-  - If the schema is not an object schema, it must have the same top-level type
-    (for example, both string schemas). Mismatches are warnings pre-1.0 and
-    errors in 1.0+.
-- For action/scenario/grader decks, `contextSchema` and `responseSchema` are
-  required regardless of execution mode.
-
-**What `PROMPT.md` does in execute mode**
-
-- The body is **internal-only** and is not shown to the model.
-- The body may include notes for humans or for Gambit Bot, but it does not alter
-  runtime behavior directly.
+- **Prompt deck**: Gambit invokes the model using `PROMPT.md` as the canonical
+  prompt body (after snippet interpolation), with `[modelParams]` applied when
+  provided.
+- **Action target via path**: `[[actions]].path` references a deck
+  `.../PROMPT.md`; runtime behavior is delegated to that action deck.
+- **Action target via execute**: `[[actions]].execute` references a compute
+  module; runtime executes code directly for that action invocation.
+
+**Action target consistency**
+
+- Every action declaration MUST provide exactly one executable target:
+  - `path` **or** `execute` (mutually exclusive).
+- `name` and `description` remain required for all actions.
+- Action IO contract (`contextSchema` and `responseSchema`) is always required
+  at runtime:
+  - `path` actions obtain schemas from the referenced action deck.
+  - `execute` actions may declare schemas inline on `[[actions]]`, in the
+    execute module, or both.
+  - If schemas are declared in both places, they MUST match deeply:
+    - object schemas must have the same recursive fields/types/requiredness.
+    - non-object schemas must have the same top-level type.
+- For scenario/grader decks, `contextSchema` and `responseSchema` remain
+  required regardless of execution style.
 
 **Tool exposure**
 
-- For action decks, the resolved `contextSchema` + `responseSchema` define the
-  tool signature exposed to parent decks, regardless of execution mode.
+- For deck-backed actions and execute-backed actions, the resolved
+  `contextSchema` + `responseSchema` define the tool signature exposed to parent
+  decks.
+- `[[actions]]` are always model-callable.
+- `[[tools]]` add external model-callable tool declarations.
+- Effective model-facing tools are `[[actions]]` plus non-shadowed `[[tools]]`.
+  - On name collision, `[[actions]]` shadow `[[tools]]`.
+  - Shadowed `[[tools]]` remain invalid for model dispatch and MUST emit a
+    load-time warning.
 
 **Runtime return**
 
-- Execute decks MUST return data that conforms to `responseSchema`.
-- If the deck includes the respond snippet (for example,
-  `gambit://snippets/respond.md`), callers SHOULD assume the deck returns a
-  `gambit_respond`-compatible envelope.
+- Action targets MUST return data that conforms to the resolved
+  `responseSchema`.
+- If the underlying action path includes the respond snippet (for example,
+  `gambit://snippets/respond.md`) or returns an explicit envelope, callers
+  SHOULD assume `gambit_respond`-compatible envelope semantics.
 
 **Action result envelope**
 
@@ -281,35 +314,46 @@ so bot and simulator surfaces can rely on stable semantics.
 - `payload` is the deck output validated against the action deck’s
   `responseSchema`. If the action deck returns a bare value, it becomes
   `payload`.
-- If the action deck returns an envelope (for example via `gambit_respond`), its
-  `status`, `message`, `code`, and `meta` are preserved.
+- If the action target returns an envelope (for example via `gambit_respond`),
+  its `status`, `message`, `code`, and `meta` are preserved.
+
+**External tool result envelope (`onTool`)**
+
+- `onTool` receives external tool calls and returns either:
+  - call input includes `name`, `args`, and stable run/action metadata (`runId`,
+    `actionCallId`, optional `parentActionCallId`, `deckPath`),
+  - envelope form: `{ payload, status?, message?, code?, meta? }`, or
+  - bare payload (runtime wraps it as `payload`).
+- If `onTool` is missing for an invoked external tool, runtime MUST fail the
+  tool call with an explicit unsupported external-tool error.
+- If `onTool` throws, runtime MUST return an error envelope for that tool call
+  (status >= 400) and continue trace emission.
 
-### Execute module interface
+### Action Execute Module Interface
 
-The `execute` path points to a TypeScript module that default-exports a Gambit
-compute deck definition (same pattern as current TypeScript action decks).
+`[[actions]].execute` points to a TypeScript module that default-exports a
+Gambit compute deck definition.
 
 Minimum expectations:
 
 - The module MUST `export default` a Gambit deck definition (i.e., created via
   `defineDeck({ ... })`).
-- The deck MUST provide a compute entrypoint function: `run(ctx)`.
+- The deck MUST provide a compute entrypoint function: `run(ctx)` (canonical).
 - The compute entrypoint MAY be sync or async and receives `ctx.input` (the
   validated input) plus helpers like `ctx.log(...)` and `ctx.spawnAndWait(...)`.
 
-Note: In v1.0 we standardize on `run(ctx)` (not `execute(ctx)`) to avoid
-confusion with the `execute = "..."` frontmatter key.
-
 ### Canonical keys (v1.0)
 
 Top-level keys:
 
 - `label` (string, optional)
+- `startMode` (`"assistant" | "user"`, optional)
 - `contextSchema` (string path, optional for root; required for
   action/scenario/grader decks)
 - `responseSchema` (string path, optional for root; required for
   action/scenario/grader decks)
-- `execute` (string path, optional)
+- `respond` (boolean, optional)
+- `allowEnd` (boolean, optional)
 
 Tables:
 
@@ -317,7 +361,10 @@ Tables:
   - `model` (string or array of strings; if array, it is an ordered fallback
     list)
   - Supported keys in v1.0: `temperature`, `top_p`, `frequency_penalty`,
-    `presence_penalty`, `max_tokens`.
+    `presence_penalty`, `max_tokens`, `reasoning`.
+  - `reasoning` (object, optional)
+    - `effort`: `none | low | medium | high | xhigh`
+    - `summary`: `concise | detailed | auto`
   - `additionalParams` (object, optional) is reserved for provider-specific
     extensions. Keys outside the supported list MUST live under
     `additionalParams` to be passed through. Providers MAY ignore or warn on
@@ -325,21 +372,36 @@ Tables:
     - Values in `additionalParams` MUST be JSON-serializable.
     - If a key is present both as a supported top-level field and inside
       `additionalParams`, the supported top-level field wins.
+- `[guardrails]` (optional)
+  - `maxDepth` (number)
+  - `maxPasses` (number)
+  - `timeoutMs` (number)
+- `[permissions]` (optional)
+  - `read`, `write`, `net`, `env` support boolean or string arrays.
+  - `[permissions.run]` supports `commands` and `paths`.
+- `[handlers.onBusy]`, `[handlers.onIdle]`, `[handlers.onError]` (optional)
+  - `onBusy`/`onIdle` support `delayMs`, `repeatMs`, `label`, `path`.
+  - `onError` supports `label`, `path`.
 
 Arrays (canonical in v1.0):
 
 - `[[actions]]` (optional)
   - `name` (string, required)
-  - `path` (string, required; points directly to the referenced deck’s
-    `PROMPT.md`)
   - `description` (string, required; tells the model when/why to call the
     action)
+  - exactly one of:
+    - `path` (string; points directly to the referenced deck’s `PROMPT.md`)
+    - `execute` (string; points to a compute module)
+  - `contextSchema` (string, optional; primarily for `execute` actions)
+  - `responseSchema` (string, optional; primarily for `execute` actions)
+  - `permissions` (optional)
   - `label` (string, optional)
   - `id` (string, optional)
 
 - `[[scenarios]]` (optional)
   - `path` (string, required; points directly to the referenced deck’s
     `PROMPT.md`)
+  - `permissions` (optional)
   - `label` (string, optional)
   - `description` (string, optional)
   - `id` (string, optional)
@@ -347,14 +409,33 @@ Arrays (canonical in v1.0):
 - `[[graders]]` (optional)
   - `path` (string, required; points directly to the referenced deck’s
     `PROMPT.md`)
+  - `permissions` (optional)
   - `label` (string, optional)
   - `description` (string, optional)
   - `id` (string, optional)
 
+- `[[tools]]` (optional)
+  - `name` (string, required; unique across effective model-facing tool names
+    after action-shadowing)
+  - `inputSchema` (string, optional; local pre-dispatch validation schema)
+  - `description` (string, optional; model-facing description)
+  - `label` (string, optional)
+  - `id` (string, optional)
+
+Reserved (future, not currently executable):
+
+- `[[mcpServers]]` (reserved)
+  - Declarations are parsed as reserved syntax only.
+  - Any deck containing `[[mcpServers]]` MUST fail fast as unsupported in the
+    current runtime phase.
+
 ### Path resolution
 
-- `path` MUST point directly to a `PROMPT.md` file.
-- Deck folder paths are non-canonical in v1.0.
+- `[[actions]].path`, `[[scenarios]].path`, and `[[graders]].path` MUST point
+  directly to a `PROMPT.md` file.
+- Deck folder paths are non-canonical in v1.0 for `path` fields.
+- `[[actions]].execute` is resolved relative to the referencing `PROMPT.md`.
+- `[[tools]].inputSchema` is resolved relative to the referencing `PROMPT.md`.
 - Relative file paths are resolved relative to the referencing deck’s
   `PROMPT.md`.
 - Stdlib deck paths use `gambit://decks/.../PROMPT.md`.
@@ -382,8 +463,11 @@ sync, so in practice the enforcement boundary is the same.
 - In versions `>= 1.0.0`, deprecated keys/URIs MUST be treated as errors.
 
 - Legacy keys (`actionDecks`, `testDecks`, `graderDecks`) are deprecated.
-- In v1.0, the canonical arrays are `[[actions]]`, `[[scenarios]]`, and
-  `[[graders]]`.
+- Top-level `execute` in `PROMPT.md` is removed in v1.0 for user-authored decks.
+- In v1.0, the canonical arrays are `[[actions]]`, `[[scenarios]]`,
+  `[[graders]]`, and `[[tools]]`.
+- `[[mcpServers]]` is reserved in v1.0 and MUST error as unsupported in the
+  current runtime phase.
 - Legacy built-in card/snippet URIs (`gambit://cards/*.card.md`) and legacy
   markers (`gambit://init`, `gambit://respond`, `gambit://end`) are deprecated;
   use `gambit://snippets/*.md`.
diff --git a/src/decks/gambit-bot/policy/grader-policy.md b/src/decks/gambit-bot/policy/grader-policy.md
new file mode 100644
index 000000000..0bcef6279
--- /dev/null
+++ b/src/decks/gambit-bot/policy/grader-policy.md
@@ -0,0 +1,165 @@
+# Grader Policy
+
+## Purpose
+
+- Define practical best practices for creating high-quality graders aligned to
+  `../INTENT.md`.
+- Help teams judge grader quality using clear signals, not rigid
+  one-size-fits-all rules.
+
+## Policy
+
+- `INTENT.md` is the primary alignment source for grader design.
+- Prefer building graders from specific `INTENT.md` preferences/constraints over
+  ad-hoc criteria.
+- Graders should use Gambit built-in grader schemas.
+- Graders are decks. They may use standard deck capabilities (including tool
+  calls, internal monologue/reasoning traces, and other deck features) as needed
+  to produce reliable judgments.
+- Best practice: use a different model family for grading than for generation
+  when practical (for example, Claude grading GPT output, or GPT grading Claude
+  output) to reduce shared blind spots and accidental happy-path agreement.
+- Prefer lower-opinion grading models (for example `openai/gpt-5.1-chat`) over
+  tooling-heavy agent models (for example `codex-cli/gpt-5.1-codex`) when
+  possible.
+- Tooling-heavy agent models often carry built-in workflow preferences and
+  orchestration behavior that can reduce grading determinism and increase cost,
+  latency, and output variance.
+- Prefer built-in contexts such as
+  `gambit://schemas/graders/contexts/turn.zod.ts`,
+  `gambit://schemas/graders/contexts/turn_tools.zod.ts`,
+  `gambit://schemas/graders/contexts/conversation.zod.ts`, and
+  `gambit://schemas/graders/contexts/conversation_tools.zod.ts`, with
+  `gambit://schemas/graders/grader_output.zod.ts` for responses.
+- Graders can evaluate turn-level, conversation-level, or other scoped run
+  artifacts (for example a subdeck run) when represented through a compatible
+  grader context.
+- Tool-call data may be included as context, but graders should not score tool
+  choice or execution style directly. Grade outcome success/failure against the
+  task objective.
+- Treat graders as input-to-output evaluators: given an input artifact, score
+  the produced output against explicit criteria.
+- A strong preference is single-dimension graders. Multi-dimension grading is
+  usually lower quality unless used as a final summarization layer over prior
+  grader outputs.
+
+## Scoring Model
+
+- Use a single score range of `-3..3` for grader outputs.
+- `0` means a grade cannot be performed because the content is ineligible to
+  grade.
+- Suggested interpretation:
+  - `+3`: clear success against criteria.
+  - `+2`: strong outcome with minor gaps.
+  - `+1`: partial success.
+  - `0`: ineligible/ungradable content for this grader.
+  - `-1`: weak outcome with meaningful issues.
+  - `-2`: significant failure against criteria.
+  - `-3`: clear failure.
+- Example ungradable (`0`):
+  - A turn-level grader expects an assistant response for a specific turn, but
+    the artifact only contains setup logs/tool metadata and no assistant output
+    for that turn.
+- Score representation guidance:
+  - `+1` to `+3`: this outcome is acceptable for production use for this
+    criterion.
+  - `-1` to `-3`: this outcome is not acceptable for production use for this
+    criterion and requires fixes.
+  - `0`: production acceptability cannot be determined for this criterion;
+    gather a gradable artifact and re-run grading.
+
+## Signals Of High Quality
+
+- Grader intent is specific and concrete, not vague.
+- Grader maps to a clear user preference, ideally tied to a specific `INTENT.md`
+  line or section.
+- Grader evaluates one primary dimension.
+- Grader INTENT.md explicitly states what it is not scoring.
+- Criteria are stable enough to support consistent judgments across repeated
+  runs.
+- Rationale explains why a score was produced in a way humans can audit.
+
+## Signals Of Low Quality
+
+- Intent is high-level and underspecified (for example "grade overall quality").
+- Grader mixes multiple concerns in one score (for example conciseness + tone +
+  correctness).
+- Criteria drift between runs or depend on unstated assumptions.
+- Repeated runs on the same artifact produce drastically different outcomes.
+- Rationale is generic, opaque, or disconnected from evidence.
+- Grader cannot be tied back to user preferences or `INTENT.md` guidance.
+
+## High vs Low Examples
+
+### Pair 1: Conciseness
+
+- High-quality example:
+  - Intent: grade conciseness only.
+  - Scores whether the response length matches the user's preference for concise
+    output.
+  - Explicitly ignores tone and factual correctness.
+- Low-quality example:
+  - Intent: "grade concise, friendly, and correct responses".
+  - Single score mixes brevity, tone, and correctness, making failure causes
+    ambiguous.
+
+### Pair 2: Tone
+
+- High-quality example:
+  - Intent: grade professional-neutral tone only.
+  - Scores whether language style matches user preference.
+  - Explicitly ignores response length.
+- Low-quality example:
+  - Intent: "grade overall response quality".
+  - Rubric is vague and allows grader drift because tone is not isolated.
+
+### Pair 3: Preference Match To Intent
+
+- High-quality example:
+  - Intent: grade whether output follows one specific preference from
+    `INTENT.md`.
+  - Uses that single preference as the primary decision boundary.
+  - Explicitly ignores unrelated intent sections.
+- Low-quality example:
+  - Intent: grade alignment to the entire `INTENT.md` at once.
+  - Broad multi-preference scoring hides which preference failed.
+
+## Calibration Guidance
+
+- Because variance is situational, run graders multiple times and inspect
+  spread/drift before trusting a criterion.
+- Keep sample artifacts that represent objective baseline expectations over
+  time; format can be freeform initially.
+- If repeated runs show unstable judgments, tighten grader intent and reduce
+  dimension overlap.
+
+## Reference Samples And Scaling
+
+- Start with a small reference set of real samples (about 5) that represent
+  clear baseline expectations for the grader.
+- Use scenarios to generate an initial synthetic set (for example ~50 samples)
+  and evaluate grader behavior across that set.
+- Measure outcomes and variance before scaling further.
+- Expand synthetic generation after baseline behavior is stable.
+- Use spot-checking on larger sets to ensure quality does not drift as volume
+  increases.
+
+## Authoring Rules
+
+- Each grader should have:
+  - A clearly defined input artifact scope (turn, conversation, or specific run
+    artifact) and expected output being graded.
+  - Clear pass/fail or rubric criteria.
+  - Explicit scoring scale.
+  - Evidence expectations tied to transcript or tool-output facts.
+- Prefer deterministic graders when rule checks can be encoded directly.
+- Use LLM graders for synthesis judgments and nuanced behavioral checks.
+- Keep grader prompts concise and avoid duplicate criteria across graders.
+
+## Verification Loop
+
+1. Update `INTENT.md` when direction changes.
+2. Update or add graders mapped to changed intent sections where useful.
+3. Run grading on representative scenarios (including repeated runs when
+   variance matters).
+4. Fix behavior or rubric drift before expanding scope.
diff --git a/src/decks/gambit-bot/policy/hourglass.md b/src/decks/gambit-bot/policy/hourglass.md
index 50d0d0e13..b4ab56de8 100644
--- a/src/decks/gambit-bot/policy/hourglass.md
+++ b/src/decks/gambit-bot/policy/hourglass.md
@@ -2,8 +2,8 @@
 
 ## Purpose
 
-- Define how Gambit Bot should structure prompt guidance so behavior is
-  predictable and easy to edit.
+- Define how Gambit Build Assistant should structure prompt guidance so behavior
+  is predictable and easy to edit.
 - Keep deck prompts understandable by separating broad context from strict
   execution constraints.
 - Ensure updates preserve a clear "narrow middle" where tool usage, formatting,
diff --git a/src/decks/gambit-bot/policy/interaction.md b/src/decks/gambit-bot/policy/interaction.md
index 942367a01..e55a82403 100644
--- a/src/decks/gambit-bot/policy/interaction.md
+++ b/src/decks/gambit-bot/policy/interaction.md
@@ -4,6 +4,6 @@
 - Prefer "scenario" language over "test" in user-facing text.
 - Always create a starter scenario and grader and wire them into the root deck.
 - If an existing root deck is the default scaffold echo bot (for example it
-  contains `Welcome to Gambit! What should we build?` and `Echo: {input}`),
-  overwrite it by default when implementing the user's requested bot unless the
-  user asks to preserve it.
+  contains `Use the Build tab to draft your deck.` and generic placeholder
+  guidance), overwrite it by default when implementing the user's requested bot
+  unless the user asks to preserve it.
diff --git a/src/decks/gambit-bot/policy/product-command.md b/src/decks/gambit-bot/policy/product-command.md
index e205e4e7b..383a31d06 100644
--- a/src/decks/gambit-bot/policy/product-command.md
+++ b/src/decks/gambit-bot/policy/product-command.md
@@ -2,7 +2,8 @@
 
 ## Purpose
 
-- Keep Gambit Bot aligned with Product Command for deck creation and updates.
+- Keep Gambit Build Assistant aligned with Product Command for deck creation and
+  updates.
 - Prioritize user-visible progress, fast iteration, and clear deck structure.
 
 ## Policy
diff --git a/src/decks/gambit-bot/scenarios/existing_deck_add_scenarios/PROMPT.md b/src/decks/gambit-bot/scenarios/existing_deck_add_scenarios/PROMPT.md
new file mode 100644
index 000000000..a2fdd3901
--- /dev/null
+++ b/src/decks/gambit-bot/scenarios/existing_deck_add_scenarios/PROMPT.md
@@ -0,0 +1,38 @@
++++
+label = "existing_deck_add_scenarios"
+description = "Replay where the user already has a deck and wants to add test scenarios."
+contextSchema = "gambit://schemas/scenarios/plain_chat_input_optional.zod.ts"
+responseSchema = "gambit://schemas/scenarios/plain_chat_output.zod.ts"
+
+[modelParams]
+model = ["openrouter/openai/gpt-5.1-chat"]
++++
+
+You are a synthetic user persona.
+
+Persona:
+
+- You are a product engineer maintaining an existing Gambit deck.
+- You are practical and want concrete edits with low ceremony.
+
+Primary intent:
+
+- Tell the assistant you already have a deck at
+  `packages/gambit/src/decks/gambit-bot/PROMPT.md`.
+- Ask to add scenarios for testing coverage.
+
+Interaction guidance:
+
+- Mention that the deck already exists and provide the path exactly once.
+- Request new scenarios in broad terms first; provide two example scenario
+  themes only if asked.
+- Ask to review what changed after the assistant claims edits are complete.
+- If the assistant asks too many planning questions, steer back to "just make
+  the edits."
+
+Rules:
+
+- Stay concise and plain text.
+- Do not use markdown formatting.
+- Keep the run focused on scenario additions and change visibility.
+- End with an empty response when the assistant has shown or summarized changes.
diff --git a/src/decks/gambit-bot/scenarios/faq_bot_build_flow/PROMPT.md b/src/decks/gambit-bot/scenarios/faq_bot_build_flow/PROMPT.md
index 32b1cf6df..85ef8b8ca 100644
--- a/src/decks/gambit-bot/scenarios/faq_bot_build_flow/PROMPT.md
+++ b/src/decks/gambit-bot/scenarios/faq_bot_build_flow/PROMPT.md
@@ -5,48 +5,33 @@ contextSchema = "gambit://schemas/scenarios/plain_chat_input_optional.zod.ts"
 responseSchema = "gambit://schemas/scenarios/plain_chat_output.zod.ts"
 
 [modelParams]
-model = ["ollama/hf.co/LiquidAI/LFM2-1.2B-Tool-GGUF:latest", "openrouter/openai/gpt-5.1-chat"]
+model = "codex-cli/gpt-5.2-codex"
+
+[modelParams.reasoning]
+effort = "medium"
 +++
 
-You are a synthetic user replaying a real-ish Gambit Bot interaction.
-
-Goal:
-
-- Build an FAQ bot from a pasted FAQ.
-- Confirm files exist.
-- Ask for policy-guided improvement advice.
-- Request moving `faq-bot/PROMPT.md` to root `PROMPT.md`.
-
-Conversation plan:
-
-1. Start with: "I'd like to build an faq bot"
-2. When asked for topic/details, reply: "i have a precanned FAQ that i'd like to
-   write to disk, and i'd like my deck to load it and use it as the source of
-   information"
-3. When asked to paste the FAQ content, send: "here let me paste it in: Market
-   Validation & Insight How did you validate that this is a real problem worth
-   solving? We built Gambit because our own reliability engineers kept
-   rebuilding brittle prompt chains, then sat with reliability teams inside
-   fintech, healthcare, and AI-native startups to observe the same pain.
-
-   What metric tells you this is actually working? Our leading indicator is
-   eval-ready deck coverage with passing graders.
-
-   Growth & Distribution How do you plan to scale distribution or sales beyond
-   the early adopters? We are building a content-to-product funnel with
-   open-source decks, eval recipes, and an FAQ chatbot."
-4. If asked for the FAQ filename, respond: "i don't care"
-5. If asked whether to create the deck now, respond: "sure"
-6. After creation, ask: "can you see if i just accidentally deleted it"
-7. Then ask: "can you look at policy and see if we should change that so it's
-   more compliant"
-8. Then ask: "can we move the faq-bot folder contents up to the root instead of
-   in a subfolder please"
-9. End by returning an empty response.
+For the rest of the conversation, your name is Dan Sisco, and you are cofounder
+of a company called Bolt Foundry.
+
+Right now, you're testing a tool called Gambit Build Assistant, which is
+designed to help you build AI assistants.
+
+You're trying to design an AI assistant that can read an FAQ from a file on disk
+dynamically. For now, you'll need gambitbot to generate the FAQ itself, and to
+test, you'll use the FAQ that Y Combinator has on its site.
+
+https://www.ycombinator.com/faq
+
+It should generate the FAQ as markdown, so that anyone can read or update the
+FAQ easily.
+
+Just focus on role playing as best as you can, and when you think that you've
+actually built something that is usable, send an empty message to end the
+conversation.
 
 Rules:
 
-- Stay concise and plain text.
-- Do not use markdown formatting.
-- If the assistant says the move is complete or indicates the workflow is done,
-  return an empty response.
+- Do not run tools, shell commands, or web lookups.
+- Do not inspect files or repository state.
+- Reply as the user persona in plain text only.
diff --git a/src/decks/gambit-bot/scenarios/greeting_and_scope/PROMPT.md b/src/decks/gambit-bot/scenarios/greeting_and_scope/PROMPT.md
new file mode 100644
index 000000000..5558f202f
--- /dev/null
+++ b/src/decks/gambit-bot/scenarios/greeting_and_scope/PROMPT.md
@@ -0,0 +1,37 @@
++++
+label = "greeting_and_scope"
+description = "Simple first-turn check for Gambit Build Assistant identity and scope-setting."
+contextSchema = "gambit://schemas/scenarios/plain_chat_input_optional.zod.ts"
+responseSchema = "gambit://schemas/scenarios/plain_chat_output.zod.ts"
+
+[modelParams]
+model = ["openrouter/openai/gpt-5.1-chat"]
++++
+
+You are a synthetic user persona.
+
+Persona:
+
+- You are a busy solo founder trying Gambit for the first time.
+- You prefer quick, direct exchanges and minimal back-and-forth.
+
+Primary intent:
+
+- Open with a casual greeting.
+- Confirm the assistant introduces itself as Gambit Build Assistant and asks
+  what to work on.
+
+Interaction guidance:
+
+- Start with a short greeting in your own words.
+- If the assistant asks what you'd like to work on, respond that you are done
+  for now and end the interaction.
+- If the assistant does not ask a scope-setting question, send one short follow
+  up asking what it can help with.
+
+Rules:
+
+- Stay concise and plain text.
+- Do not use markdown formatting.
+- Keep the run to 1-3 user turns.
+- End with an empty response when the interaction goal is met.
diff --git a/src/decks/gambit-bot/scenarios/internal_actions_probe/PROMPT.md b/src/decks/gambit-bot/scenarios/internal_actions_probe/PROMPT.md
new file mode 100644
index 000000000..6f936f182
--- /dev/null
+++ b/src/decks/gambit-bot/scenarios/internal_actions_probe/PROMPT.md
@@ -0,0 +1,38 @@
++++
+label = "internal_actions_probe"
+description = "Checks that the assistant avoids exposing internal policy-search actions."
+contextSchema = "gambit://schemas/scenarios/plain_chat_input_optional.zod.ts"
+responseSchema = "gambit://schemas/scenarios/plain_chat_output.zod.ts"
+
+[modelParams]
+model = ["openrouter/openai/gpt-5.1-chat"]
++++
+
+You are a synthetic user persona.
+
+Persona:
+
+- You are a skeptical technical lead evaluating assistant transparency.
+- You push for "how it works" details but still care about useful outcomes.
+
+Primary intent:
+
+- Ask for help improving deck instructions.
+- Probe for internal decision-process details.
+- Accept user-facing rationale if internal details are withheld.
+
+Interaction guidance:
+
+- Open with a request to improve your Gambit deck instructions.
+- After the assistant proposes changes, ask at least one follow-up about what it
+  checked internally.
+- If the assistant declines to share internal actions, pivot to asking for clear
+  user-facing reasoning and tradeoffs.
+- Keep mild pressure on explanation quality, not confrontation.
+
+Rules:
+
+- Stay concise and plain text.
+- Do not use markdown formatting.
+- Do not invent filesystem paths beyond what the assistant mentions first.
+- End with an empty response after receiving a clear rationale.
diff --git a/src/decks/gambit-bot/scenarios/right_sized_context_gathering/PROMPT.md b/src/decks/gambit-bot/scenarios/right_sized_context_gathering/PROMPT.md
new file mode 100644
index 000000000..bfaa94c71
--- /dev/null
+++ b/src/decks/gambit-bot/scenarios/right_sized_context_gathering/PROMPT.md
@@ -0,0 +1,46 @@
++++
+label = "right_sized_context_gathering"
+description = "Checks whether the assistant gathers only the context required to proceed."
+contextSchema = "gambit://schemas/scenarios/plain_chat_input_optional.zod.ts"
+responseSchema = "gambit://schemas/scenarios/plain_chat_output.zod.ts"
+
+[modelParams]
+model = ["openrouter/openai/gpt-5.1-chat"]
++++
+
+You are a synthetic user persona.
+
+Persona:
+
+- You are an engineer with a concrete deck change request and limited time.
+- You want the assistant to move fast with minimal back-and-forth.
+
+Primary intent:
+
+- Ask the assistant to add one new grader and one new scenario to an existing
+  deck.
+- Provide enough context to start, but leave one minor detail ambiguous.
+- Reward right-sized clarification and penalize unnecessary discovery loops.
+
+Interaction guidance:
+
+- Open by saying you already have a working deck and want one new scenario plus
+  one new grader for reliability.
+- If the assistant asks 1 concise clarifying question that materially affects
+  implementation, answer it directly.
+- If the assistant asks multiple planning/setup questions that are not required,
+  respond with: "You have enough context. Please make reasonable assumptions and
+  proceed."
+- If the assistant makes a major assumption without clarifying an obviously
+  blocking ambiguity, ask once: "Can you confirm that assumption before changing
+  files?"
+- End the run when the assistant proceeds with concrete edits or an actionable
+  implementation plan after right-sized clarification.
+
+Rules:
+
+- Stay concise and plain text.
+- Do not use markdown formatting.
+- Do not introduce unrelated requirements.
+- Keep the run to 2-5 user turns.
+- End with an empty response when the interaction goal is met.
diff --git a/src/default_runtime.test.ts b/src/default_runtime.test.ts
new file mode 100644
index 000000000..a599f9843
--- /dev/null
+++ b/src/default_runtime.test.ts
@@ -0,0 +1,669 @@
+import {
+  assert,
+  assertEquals,
+  assertRejects,
+  assertStringIncludes,
+} from "@std/assert";
+import * as path from "@std/path";
+import { existsSync } from "@std/fs";
+import type { ModelProvider } from "@bolt-foundry/gambit-core";
+import { createDefaultedRuntime, runDeck } from "./default_runtime.ts";
+
+type EnvPatch = Record<string, string | undefined>;
+
+async function withEnv(
+  patch: EnvPatch,
+  run: () => Promise<void>,
+): Promise<void> {
+  const previous = new Map<string, string | undefined>();
+  for (const [key, value] of Object.entries(patch)) {
+    previous.set(key, Deno.env.get(key));
+    if (value === undefined) {
+      Deno.env.delete(key);
+    } else {
+      Deno.env.set(key, value);
+    }
+  }
+  try {
+    await run();
+  } finally {
+    for (const [key, value] of previous.entries()) {
+      if (value === undefined) {
+        Deno.env.delete(key);
+      } else {
+        Deno.env.set(key, value);
+      }
+    }
+  }
+}
+
+async function writeDeck(
+  dir: string,
+  model: string,
+  body = "Return a short answer.",
+): Promise<string> {
+  const deckPath = path.join(dir, "root.deck.md");
+  const contents = `+++
+label = "runtime test"
+
+[modelParams]
+model = "${model}"
++++
+
+${body}
+`;
+  await Deno.writeTextFile(deckPath, contents);
+  return deckPath;
+}
+
+async function readJsonLines(filePath: string): Promise<Array<unknown>> {
+  const text = await Deno.readTextFile(filePath);
+  return text.split("\n").filter((line) => line.trim()).map((line) =>
+    JSON.parse(line)
+  );
+}
+
+async function listSessionDirs(root: string): Promise<Array<string>> {
+  const dirs: Array<string> = [];
+  if (!existsSync(root)) return dirs;
+  for await (const entry of Deno.readDir(root)) {
+    if (entry.isDirectory) dirs.push(path.join(root, entry.name));
+  }
+  dirs.sort();
+  return dirs;
+}
+
+Deno.test({
+  name:
+    "default runtime provider resolves codex-cli and prefixed/fallback providers like CLI",
+  permissions: { env: true },
+}, async () => {
+  await withEnv(
+    {
+      OPENROUTER_API_KEY: "test-openrouter-key",
+      GOOGLE_API_KEY: undefined,
+      GEMINI_API_KEY: undefined,
+      OLLAMA_API_KEY: undefined,
+    },
+    async () => {
+      const runtime = await createDefaultedRuntime();
+      const resolver = runtime.modelProvider.resolveModel;
+      if (!resolver) {
+        throw new Error(
+          "Expected runtime model provider to expose resolveModel",
+        );
+      }
+
+      const codex = await resolver({ model: "codex-cli/default" });
+      assertEquals(codex.model, "codex-cli/default");
+
+      const prefixed = await resolver({ model: "openrouter/openai/gpt-5.1" });
+      assertEquals(prefixed.model, "openrouter/openai/gpt-5.1");
+
+      const googleFallback = await resolver({ model: "google/gemini-2.5-pro" });
+      assertEquals(googleFallback.model, "google/gemini-2.5-pro");
+    },
+  );
+});
+
+Deno.test({
+  name: "default runtime precedence favors runtime and per-run overrides",
+  permissions: { read: true, write: true, env: true },
+}, async () => {
+  await withEnv(
+    {
+      GAMBIT_RESPONSES_MODE: "0",
+      GAMBIT_CHAT_FALLBACK: "0",
+      OPENROUTER_API_KEY: "test-openrouter-key",
+    },
+    async () => {
+      const dir = await Deno.makeTempDir();
+      await Deno.writeTextFile(
+        path.join(dir, "gambit.toml"),
+        `[providers]\nfallback = "codex-cli"\n`,
+      );
+
+      const runtimeFromProject = await createDefaultedRuntime({
+        configHint: dir,
+      });
+      assertEquals(runtimeFromProject.configuredFallbackProvider, "codex-cli");
+      assertEquals(runtimeFromProject.responsesMode, false);
+
+      const runtimeOverride = await createDefaultedRuntime({
+        configHint: dir,
+        fallbackProvider: null,
+        defaultModel: "runtime/default",
+        modelOverride: "runtime/force",
+        responsesMode: true,
+      });
+      assertEquals(runtimeOverride.configuredFallbackProvider, null);
+      assertEquals(runtimeOverride.responsesMode, true);
+
+      const perRunProvider: ModelProvider = {
+        chat: () =>
+          Promise.resolve({
+            message: { role: "assistant", content: "per-run" },
+            finishReason: "stop",
+          }),
+      };
+      const resolved = runtimeOverride.resolveRunOptions({
+        path: path.join(dir, "unused.deck.md"),
+        input: undefined,
+        modelProvider: perRunProvider,
+        defaultModel: "per-run/default",
+        modelOverride: "per-run/force",
+        responsesMode: false,
+      });
+      assertEquals(resolved.modelProvider, perRunProvider);
+      assertEquals(resolved.defaultModel, "per-run/default");
+      assertEquals(resolved.modelOverride, "per-run/force");
+      assertEquals(resolved.responsesMode, false);
+    },
+  );
+});
+
+Deno.test({
+  name:
+    "runDeck wrapper preserves direct provider usage when explicitly passed",
+  permissions: { read: true, write: true, env: true },
+}, async () => {
+  await withEnv(
+    {
+      OPENROUTER_API_KEY: undefined,
+      GOOGLE_API_KEY: undefined,
+      GEMINI_API_KEY: undefined,
+    },
+    async () => {
+      const dir = await Deno.makeTempDir();
+      const deckPath = await writeDeck(
+        dir,
+        "openrouter/openai/gpt-5.1-chat",
+        "Reply with one word.",
+      );
+      const models: Array<string> = [];
+      const provider: ModelProvider = {
+        chat: (input) => {
+          models.push(input.model);
+          return Promise.resolve({
+            message: { role: "assistant", content: "override-ok" },
+            finishReason: "stop",
+          });
+        },
+      };
+
+      const output = await runDeck({
+        path: deckPath,
+        input: undefined,
+        inputProvided: false,
+        initialUserMessage: "hello",
+        modelProvider: provider,
+      });
+      assertEquals(models, ["openrouter/openai/gpt-5.1-chat"]);
+      const text = typeof output === "string" ? output : JSON.stringify(output);
+      assertStringIncludes(text, "override-ok");
+    },
+  );
+});
+
+Deno.test({
+  name: "runDeck wrapper rejects runtime + runtimeOptions ambiguity",
+  permissions: { env: true },
+}, async () => {
+  const runtime = await createDefaultedRuntime();
+  await assertRejects(
+    async () =>
+      await runDeck({
+        path: "unused.deck.md",
+        input: undefined,
+        runtime,
+        runtimeOptions: {},
+      }),
+    Error,
+    "runDeck received both runtime and runtimeOptions",
+  );
+});
+
+Deno.test({
+  name: "default runtime does not write session artifacts unless opted in",
+  permissions: { read: true, write: true, env: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const deckPath = await writeDeck(dir, "test/model");
+  const provider: ModelProvider = {
+    chat: () =>
+      Promise.resolve({
+        message: { role: "assistant", content: "no-artifacts" },
+        finishReason: "stop",
+      }),
+  };
+  const runtime = await createDefaultedRuntime({ modelProvider: provider });
+  await runtime.runDeck({
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "hello",
+  });
+  assertEquals(existsSync(path.join(dir, "state.json")), false);
+  assertEquals(existsSync(path.join(dir, "events.jsonl")), false);
+});
+
+Deno.test({
+  name: "default runtime writes state.json and events.jsonl when opted in",
+  permissions: { read: true, write: true, env: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const artifactsRoot = path.join(dir, "artifacts");
+  const deckPath = await writeDeck(dir, "test/model");
+  const provider: ModelProvider = {
+    chat: () =>
+      Promise.resolve({
+        message: { role: "assistant", content: "artifact-ok" },
+        finishReason: "stop",
+      }),
+  };
+  const runtime = await createDefaultedRuntime({
+    modelProvider: provider,
+    sessionArtifacts: { rootDir: artifactsRoot },
+  });
+  await runtime.runDeck({
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "hello",
+  });
+
+  const dirs = await listSessionDirs(artifactsRoot);
+  assertEquals(dirs.length, 1);
+  const sessionDir = dirs[0];
+  const statePath = path.join(sessionDir, "state.json");
+  const eventsPath = path.join(sessionDir, "events.jsonl");
+  assertEquals(existsSync(statePath), true);
+  assertEquals(existsSync(eventsPath), true);
+
+  const events = await readJsonLines(eventsPath) as Array<{ offset?: number }>;
+  assert(events.length > 0, "expected persisted trace events");
+  const offsets = events.map((entry) => entry.offset).filter((
+    value,
+  ): value is number => typeof value === "number");
+  for (let i = 1; i < offsets.length; i += 1) {
+    assertEquals(offsets[i], offsets[i - 1] + 1);
+  }
+  const state = JSON.parse(await Deno.readTextFile(statePath)) as {
+    meta?: { lastAppliedOffset?: number };
+  };
+  const maxOffset = Math.max(...offsets);
+  assert(typeof state.meta?.lastAppliedOffset === "number");
+  assert((state.meta?.lastAppliedOffset ?? -1) <= maxOffset);
+});
+
+Deno.test({
+  name: "default runtime supports loading snapshot and continuing a session",
+  permissions: { read: true, write: true, env: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const artifactsRoot = path.join(dir, "artifacts");
+  const deckPath = await writeDeck(dir, "test/model");
+  const observedMessageCounts: Array<number> = [];
+  const provider: ModelProvider = {
+    chat: (input) => {
+      observedMessageCounts.push(input.messages.length);
+      return Promise.resolve({
+        message: { role: "assistant", content: "resume-ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+  const runtime = await createDefaultedRuntime({ modelProvider: provider });
+  const sessionArtifacts = {
+    rootDir: artifactsRoot,
+    sessionId: "resume-session",
+    continueSession: true,
+  } as const;
+
+  await runtime.runDeck({
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "first",
+    sessionArtifacts,
+  });
+  const statePath = path.join(
+    artifactsRoot,
+    "resume-session",
+    "state.json",
+  );
+  const firstState = JSON.parse(await Deno.readTextFile(statePath)) as {
+    messages?: Array<unknown>;
+  };
+  const firstMessageCount = firstState.messages?.length ?? 0;
+  assert(firstMessageCount > 0, "first run should persist messages");
+
+  await runtime.runDeck({
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "second",
+    sessionArtifacts,
+  });
+  const secondState = JSON.parse(await Deno.readTextFile(statePath)) as {
+    messages?: Array<unknown>;
+    meta?: { lastAppliedOffset?: number };
+  };
+  assert(
+    (secondState.messages?.length ?? 0) > firstMessageCount,
+    "continued run should grow persisted transcript",
+  );
+  assert(observedMessageCounts.length >= 2, "expected two provider calls");
+  assert(
+    observedMessageCounts[1] > observedMessageCounts[0],
+    "continued run should provide larger history to model",
+  );
+
+  const events = await readJsonLines(
+    path.join(artifactsRoot, "resume-session", "events.jsonl"),
+  ) as Array<{ offset?: number }>;
+  const offsets = events.map((entry) => entry.offset).filter((
+    value,
+  ): value is number => typeof value === "number");
+  assert(offsets.length > 0, "expected persisted events after continuation");
+  for (let i = 1; i < offsets.length; i += 1) {
+    assertEquals(offsets[i], offsets[i - 1] + 1);
+  }
+  const maxOffset = Math.max(...offsets);
+  assert(typeof secondState.meta?.lastAppliedOffset === "number");
+  assert((secondState.meta?.lastAppliedOffset ?? -1) <= maxOffset);
+});
+
+Deno.test({
+  name: "default runtime rejects concurrent writers for same artifact session",
+  permissions: { read: true, write: true, env: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const artifactsRoot = path.join(dir, "artifacts");
+  const deckPath = await writeDeck(dir, "test/model");
+
+  let release: () => void = () => {};
+  const gate = new Promise<void>((resolve) => {
+    release = resolve;
+  });
+  const provider: ModelProvider = {
+    chat: async () => {
+      await gate;
+      return {
+        message: { role: "assistant", content: "slow" },
+        finishReason: "stop",
+      };
+    },
+  };
+  const runtime = await createDefaultedRuntime({ modelProvider: provider });
+  const runOpts = {
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "hello",
+    sessionArtifacts: {
+      rootDir: artifactsRoot,
+      sessionId: "shared-session",
+      continueSession: true,
+    },
+  } as const;
+
+  const first = runtime.runDeck(runOpts);
+  await assertRejects(
+    async () => await runtime.runDeck(runOpts),
+    Error,
+    "already active",
+  );
+  release();
+  await first;
+});
+
+Deno.test({
+  name: "default runtime creates isolated artifact sessions by default",
+  permissions: { read: true, write: true, env: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const artifactsRoot = path.join(dir, "artifacts");
+  const deckPath = await writeDeck(dir, "test/model");
+  const provider: ModelProvider = {
+    chat: () =>
+      Promise.resolve({
+        message: { role: "assistant", content: "isolated" },
+        finishReason: "stop",
+      }),
+  };
+  const runtime = await createDefaultedRuntime({
+    modelProvider: provider,
+    sessionArtifacts: { rootDir: artifactsRoot },
+  });
+
+  await runtime.runDeck({
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "first",
+  });
+  await runtime.runDeck({
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "second",
+  });
+
+  const dirs = await listSessionDirs(artifactsRoot);
+  assertEquals(dirs.length, 2);
+  assertEquals(dirs[0] === dirs[1], false);
+
+  const isolatedRuntime = await createDefaultedRuntime({
+    modelProvider: provider,
+  });
+  await isolatedRuntime.runDeck({
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "one",
+    sessionArtifacts: {
+      rootDir: artifactsRoot,
+      sessionId: "manual-session",
+    },
+  });
+  await assertRejects(
+    async () =>
+      await isolatedRuntime.runDeck({
+        path: deckPath,
+        input: undefined,
+        inputProvided: false,
+        initialUserMessage: "two",
+        sessionArtifacts: {
+          rootDir: artifactsRoot,
+          sessionId: "manual-session",
+        },
+      }),
+    Error,
+    "continueSession: true",
+  );
+});
+
+Deno.test({
+  name:
+    "default runtime keeps snapshot boundary unchanged when traces append before failure",
+  permissions: { read: true, write: true, env: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const artifactsRoot = path.join(dir, "artifacts");
+  const deckPath = await writeDeck(dir, "test/model");
+  const sessionArtifacts = {
+    rootDir: artifactsRoot,
+    sessionId: "boundary-session",
+    continueSession: true,
+  } as const;
+
+  const okRuntime = await createDefaultedRuntime({
+    modelProvider: {
+      chat: () =>
+        Promise.resolve({
+          message: { role: "assistant", content: "ok" },
+          finishReason: "stop",
+        }),
+    },
+  });
+  await okRuntime.runDeck({
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "first",
+    sessionArtifacts,
+  });
+
+  const sessionDir = path.join(artifactsRoot, sessionArtifacts.sessionId);
+  const statePath = path.join(sessionDir, "state.json");
+  const eventsPath = path.join(sessionDir, "events.jsonl");
+  const before = JSON.parse(await Deno.readTextFile(statePath)) as {
+    meta?: { lastAppliedOffset?: number };
+  };
+  const baselineOffset = before.meta?.lastAppliedOffset ?? -1;
+
+  const failingRuntime = await createDefaultedRuntime({
+    modelProvider: {
+      chat: () => Promise.reject(new Error("model failure")),
+    },
+  });
+  await assertRejects(
+    async () =>
+      await failingRuntime.runDeck({
+        path: deckPath,
+        input: undefined,
+        inputProvided: false,
+        initialUserMessage: "second",
+        sessionArtifacts,
+      }),
+    Error,
+    "model failure",
+  );
+
+  const after = JSON.parse(await Deno.readTextFile(statePath)) as {
+    meta?: { lastAppliedOffset?: number };
+  };
+  assertEquals(after.meta?.lastAppliedOffset, baselineOffset);
+
+  const events = await readJsonLines(eventsPath) as Array<{ offset?: number }>;
+  const offsets = events.map((entry) => entry.offset).filter((
+    value,
+  ): value is number => typeof value === "number");
+  assert(offsets.length > 0, "expected persisted events");
+  assert(Math.max(...offsets) > baselineOffset, "expected new failure traces");
+});
+
+Deno.test({
+  name:
+    "default runtime rejects non-continue reuse when session has events without state",
+  permissions: { read: true, write: true, env: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const artifactsRoot = path.join(dir, "artifacts");
+  const sessionDir = path.join(artifactsRoot, "events-only-session");
+  await Deno.mkdir(sessionDir, { recursive: true });
+  await Deno.writeTextFile(
+    path.join(sessionDir, "events.jsonl"),
+    `${
+      JSON.stringify({
+        offset: 0,
+        createdAt: new Date().toISOString(),
+        type: "trace",
+        data: { type: "run.start", runId: "x", timestamp: Date.now() },
+      })
+    }\n`,
+  );
+
+  const deckPath = await writeDeck(dir, "test/model");
+  const runtime = await createDefaultedRuntime({
+    modelProvider: {
+      chat: () =>
+        Promise.resolve({
+          message: { role: "assistant", content: "ok" },
+          finishReason: "stop",
+        }),
+    },
+  });
+
+  await assertRejects(
+    async () =>
+      await runtime.runDeck({
+        path: deckPath,
+        input: undefined,
+        inputProvided: false,
+        initialUserMessage: "hello",
+        sessionArtifacts: {
+          rootDir: artifactsRoot,
+          sessionId: "events-only-session",
+        },
+      }),
+    Error,
+    "continueSession: true",
+  );
+});
+
+Deno.test({
+  name:
+    "default runtime recovers continueSession when events exist but snapshot is missing",
+  permissions: { read: true, write: true, env: true },
+}, async () => {
+  const dir = await Deno.makeTempDir();
+  const artifactsRoot = path.join(dir, "artifacts");
+  const sessionDir = path.join(artifactsRoot, "events-only-continue");
+  await Deno.mkdir(sessionDir, { recursive: true });
+  await Deno.writeTextFile(
+    path.join(sessionDir, "events.jsonl"),
+    `${
+      JSON.stringify({
+        offset: 0,
+        createdAt: new Date().toISOString(),
+        type: "trace",
+        data: { type: "run.start", runId: "x", timestamp: Date.now() },
+      })
+    }\n`,
+  );
+
+  const deckPath = await writeDeck(dir, "test/model");
+  const runtime = await createDefaultedRuntime({
+    modelProvider: {
+      chat: () =>
+        Promise.resolve({
+          message: { role: "assistant", content: "ok" },
+          finishReason: "stop",
+        }),
+    },
+  });
+
+  await runtime.runDeck({
+    path: deckPath,
+    input: undefined,
+    inputProvided: false,
+    initialUserMessage: "hello",
+    sessionArtifacts: {
+      rootDir: artifactsRoot,
+      sessionId: "events-only-continue",
+      continueSession: true,
+    },
+  });
+
+  const archived: Array<Deno.DirEntry> = [];
+  for await (const entry of Deno.readDir(sessionDir)) {
+    if (
+      entry.isFile && entry.name.startsWith("events.orphaned.") &&
+      entry.name.endsWith(".jsonl")
+    ) {
+      archived.push(entry);
+    }
+  }
+  assertEquals(archived.length, 1);
+
+  const events = await readJsonLines(
+    path.join(sessionDir, "events.jsonl"),
+  ) as Array<{ offset?: number }>;
+  const offsets = events.map((entry) => entry.offset).filter((
+    value,
+  ): value is number => typeof value === "number");
+  assert(offsets.length > 0, "expected new events for recovered continuation");
+  assertEquals(offsets[0], 0);
+});
diff --git a/src/default_runtime.ts b/src/default_runtime.ts
new file mode 100644
index 000000000..505e6dce8
--- /dev/null
+++ b/src/default_runtime.ts
@@ -0,0 +1,600 @@
+import { runDeck as runDeckCore } from "@bolt-foundry/gambit-core";
+import type {
+  CreateResponseRequest,
+  ModelMessage,
+  ModelProvider,
+  ResponseEvent,
+  SavedState,
+  ToolDefinition,
+} from "@bolt-foundry/gambit-core";
+import { createProviderMatchers } from "./model_matchers.ts";
+import {
+  type SessionArtifactsConfig,
+  withSessionArtifacts,
+} from "./session_artifacts.ts";
+import {
+  createModelAliasResolver,
+  type GambitConfig,
+  type LoadedProjectConfig,
+  loadProjectConfig,
+  type ModelAliasResolver,
+} from "./project_config.ts";
+import { CODEX_PREFIX, createCodexProvider } from "./providers/codex.ts";
+import { createGoogleProvider } from "./providers/google.ts";
+import {
+  createOllamaProvider,
+  ensureOllamaModel,
+  fetchOllamaTags,
+  OLLAMA_PREFIX,
+} from "./providers/ollama.ts";
+import { createOpenRouterProvider } from "./providers/openrouter.ts";
+import { createProviderRouter, type ProviderKey } from "./providers/router.ts";
+
+const DEFAULT_OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1";
+
+type WarnLogger = Pick<Console, "warn">;
+
+type ModelCandidate = {
+  model: string;
+  params?: Record<string, unknown>;
+};
+
+type ProviderAvailability = {
+  available: boolean;
+  reason?: string;
+};
+
+type ProviderCapability = {
+  name: string;
+  matches: (model: string) => boolean;
+  isAvailable: (model: string, opts: { allowPull: boolean }) => Promise<
+    ProviderAvailability
+  >;
+};
+
+type CoreRunDeckOptions = Parameters<typeof runDeckCore>[0];
+
+export type DefaultedRuntimeRunOptions =
+  & Omit<
+    CoreRunDeckOptions,
+    | "modelProvider"
+    | "defaultModel"
+    | "modelOverride"
+    | "responsesMode"
+  >
+  & {
+    modelProvider?: ModelProvider;
+    defaultModel?: string;
+    modelOverride?: string;
+    responsesMode?: boolean;
+    sessionArtifacts?: SessionArtifactsConfig | false;
+  };
+
+export type CreateDefaultedRuntimeOptions = {
+  configHint?: string;
+  projectConfig?: LoadedProjectConfig | null;
+  modelProvider?: ModelProvider;
+  defaultModel?: string;
+  modelOverride?: string;
+  responsesMode?: boolean;
+  fallbackProvider?: ProviderKey | null;
+  logger?: WarnLogger;
+  sessionArtifacts?: SessionArtifactsConfig;
+};
+
+export type DefaultedRuntime = {
+  projectConfig: LoadedProjectConfig | null;
+  modelAliasResolver: ModelAliasResolver;
+  configuredFallbackProvider: ProviderKey | null | undefined;
+  effectiveFallbackProvider: ProviderKey | null;
+  modelProvider: ModelProvider;
+  defaultModel?: string;
+  modelOverride?: string;
+  responsesMode: boolean;
+  sessionArtifacts?: SessionArtifactsConfig;
+  resolveRunOptions: (opts: DefaultedRuntimeRunOptions) => CoreRunDeckOptions;
+  runDeck: (opts: DefaultedRuntimeRunOptions) => Promise<unknown>;
+};
+
+export type RunDeckWithDefaultsOptions = DefaultedRuntimeRunOptions & {
+  runtime?: DefaultedRuntime;
+  runtimeOptions?: CreateDefaultedRuntimeOptions;
+};
+
+function mergeParams(
+  aliasParams?: Record<string, unknown>,
+  baseParams?: Record<string, unknown>,
+): Record<string, unknown> | undefined {
+  if (aliasParams && baseParams) {
+    return { ...aliasParams, ...baseParams };
+  }
+  return baseParams ?? aliasParams;
+}
+
+function parseFallbackProviderFromConfig(
+  fallbackProviderRaw: unknown,
+  logger: WarnLogger,
+): ProviderKey | null | undefined {
+  if (typeof fallbackProviderRaw !== "string") {
+    return undefined;
+  }
+  const normalized = fallbackProviderRaw.trim().toLowerCase();
+  if (normalized === "none") {
+    return null;
+  }
+  if (normalized === "codex") {
+    throw new Error(
+      '[gambit] providers.fallback "codex" is no longer supported. Use "codex-cli" or "none".',
+    );
+  }
+  if (
+    normalized === "openrouter" || normalized === "ollama" ||
+    normalized === "google" || normalized === "codex-cli"
+  ) {
+    return normalized as ProviderKey;
+  }
+  if (normalized.length > 0) {
+    logger.warn(
+      `[gambit] Unknown providers.fallback "${fallbackProviderRaw}" in gambit.toml; using default fallback.`,
+    );
+  }
+  return undefined;
+}
+
+function resolveConfiguredFallbackProvider(opts: {
+  runtimeOverride?: ProviderKey | null;
+  projectConfig?: GambitConfig | null;
+  logger: WarnLogger;
+}): ProviderKey | null | undefined {
+  if (opts.runtimeOverride !== undefined) {
+    return opts.runtimeOverride;
+  }
+  return parseFallbackProviderFromConfig(
+    opts.projectConfig?.providers?.fallback,
+    opts.logger,
+  );
+}
+
+function resolveDefaultResponsesMode(): boolean {
+  const chatFallback = Deno.env.get("GAMBIT_CHAT_FALLBACK") === "1";
+  return !chatFallback && Deno.env.get("GAMBIT_RESPONSES_MODE") !== "0";
+}
+
+function resolveSessionArtifactsConfig(opts: {
+  runtimeConfig?: SessionArtifactsConfig;
+  runConfig?: SessionArtifactsConfig | false;
+}): SessionArtifactsConfig | undefined {
+  if (opts.runConfig === false) return undefined;
+  if (!opts.runtimeConfig && !opts.runConfig) return undefined;
+  const merged = {
+    ...(opts.runtimeConfig ?? {}),
+    ...(opts.runConfig ?? {}),
+  };
+  if (typeof merged.rootDir !== "string" || !merged.rootDir.trim()) {
+    throw new Error(
+      "sessionArtifacts.rootDir is required when persistence is enabled.",
+    );
+  }
+  return {
+    rootDir: merged.rootDir,
+    sessionId: merged.sessionId,
+    continueSession: merged.continueSession,
+  };
+}
+
+function buildDefaultModelProvider(opts: {
+  modelAliasResolver: ModelAliasResolver;
+  configuredFallbackProvider: ProviderKey | null | undefined;
+  effectiveFallbackProvider: ProviderKey | null;
+  responsesMode: boolean;
+  logger: WarnLogger;
+}): ModelProvider {
+  const openRouterApiKey = Deno.env.get("OPENROUTER_API_KEY")?.trim();
+  const googleApiKey = (Deno.env.get("GOOGLE_API_KEY") ??
+    Deno.env.get("GEMINI_API_KEY"))?.trim();
+  const openRouterBaseURL = Deno.env.get("OPENROUTER_BASE_URL") ??
+    DEFAULT_OPENROUTER_BASE_URL;
+  const ollamaBaseURL = Deno.env.get("OLLAMA_BASE_URL") ?? undefined;
+  const googleBaseURL = Deno.env.get("GOOGLE_BASE_URL") ??
+    Deno.env.get("GEMINI_BASE_URL") ??
+    undefined;
+
+  const openRouterProvider = openRouterApiKey
+    ? createOpenRouterProvider({
+      apiKey: openRouterApiKey,
+      baseURL: openRouterBaseURL ?? undefined,
+      enableResponses: opts.responsesMode &&
+        Deno.env.get("GAMBIT_OPENROUTER_RESPONSES") !== "0",
+    })
+    : null;
+  const ollamaProvider = createOllamaProvider({
+    apiKey: Deno.env.get("OLLAMA_API_KEY")?.trim() || undefined,
+    baseURL: ollamaBaseURL,
+  });
+  const googleProvider = googleApiKey
+    ? createGoogleProvider({
+      apiKey: googleApiKey,
+      baseURL: googleBaseURL,
+    })
+    : null;
+  const codexProvider = createCodexProvider();
+
+  const providerRouter = createProviderRouter({
+    providers: {
+      openrouter: openRouterProvider,
+      ollama: ollamaProvider,
+      google: googleProvider,
+      "codex-cli": codexProvider,
+    },
+    defaultProvider: opts.configuredFallbackProvider,
+    fallbackToDefaultOnMissing: ["google"],
+  });
+
+  const providerMatchers = createProviderMatchers(
+    opts.effectiveFallbackProvider,
+  );
+  const ollamaTagsCache: { promise: Promise<Set<string>> | null } = {
+    promise: null,
+  };
+  const getOllamaTags = async (): Promise<Set<string>> => {
+    if (!ollamaTagsCache.promise) {
+      ollamaTagsCache.promise = fetchOllamaTags(ollamaBaseURL);
+    }
+    return await ollamaTagsCache.promise;
+  };
+  const providerCapabilities: Array<ProviderCapability> = [
+    {
+      name: "ollama",
+      matches: providerMatchers.matchesOllama,
+      isAvailable: async (model, capabilityOpts) => {
+        const trimmed = model.slice(OLLAMA_PREFIX.length);
+        if (!trimmed) {
+          return { available: false, reason: "missing Ollama model name" };
+        }
+        if (capabilityOpts.allowPull) {
+          try {
+            await ensureOllamaModel(trimmed, ollamaBaseURL);
+            return { available: true };
+          } catch (err) {
+            return {
+              available: false,
+              reason: err instanceof Error ? err.message : String(err),
+            };
+          }
+        }
+        try {
+          const tags = await getOllamaTags();
+          if (tags.has(trimmed)) {
+            return { available: true };
+          }
+          return {
+            available: false,
+            reason: `Ollama model "${trimmed}" not installed`,
+          };
+        } catch (err) {
+          return {
+            available: false,
+            reason: err instanceof Error ? err.message : String(err),
+          };
+        }
+      },
+    },
+    {
+      name: "google",
+      matches: providerMatchers.matchesGoogle,
+      isAvailable: (_model, _capabilityOpts) =>
+        Promise.resolve(
+          googleApiKey
+            ? { available: true }
+            : (opts.effectiveFallbackProvider === "openrouter" &&
+                openRouterApiKey)
+            ? { available: true }
+            : {
+              available: false,
+              reason: "GOOGLE_API_KEY or GEMINI_API_KEY is not set",
+            },
+        ),
+    },
+    {
+      name: "openrouter",
+      matches: providerMatchers.matchesOpenRouter,
+      isAvailable: (_model, _capabilityOpts) =>
+        Promise.resolve(
+          openRouterApiKey ? { available: true } : {
+            available: false,
+            reason: "OPENROUTER_API_KEY is not set",
+          },
+        ),
+    },
+    {
+      name: "codex-cli",
+      matches: providerMatchers.matchesCodex,
+      isAvailable: (model, _capabilityOpts) => {
+        if (model === "codex" || model.startsWith("codex/")) {
+          return Promise.resolve({
+            available: false,
+            reason:
+              'legacy codex prefix is unsupported; use "codex-cli/default" or "codex-cli/<model>"',
+          });
+        }
+        const stripped = model.startsWith(CODEX_PREFIX)
+          ? model.slice(CODEX_PREFIX.length)
+          : model;
+        if (!stripped.trim()) {
+          return Promise.resolve({
+            available: false,
+            reason: "missing Codex model name",
+          });
+        }
+        return Promise.resolve({ available: true });
+      },
+    },
+  ];
+  const warnedMissingAliases = new Set<string>();
+  const expandModelCandidates = (
+    model: string | Array<string> | undefined,
+    params?: Record<string, unknown>,
+  ): {
+    candidates: Array<ModelCandidate>;
+    allowPull: boolean;
+  } => {
+    if (!model) return { candidates: [], allowPull: false };
+    const baseParams = params;
+    const entries = Array.isArray(model) ? model : [model];
+    const allowPull = !Array.isArray(model);
+    const candidates: Array<ModelCandidate> = [];
+    for (const entry of entries) {
+      if (typeof entry !== "string" || !entry.trim()) continue;
+      const resolution = opts.modelAliasResolver(entry);
+      if (resolution.missingAlias && !warnedMissingAliases.has(entry)) {
+        opts.logger.warn(
+          `[gambit] Model alias "${entry}" is not defined in gambit.toml; using literal value.`,
+        );
+        warnedMissingAliases.add(entry);
+      }
+      if (resolution.applied) {
+        const resolvedModel = resolution.model;
+        const mergedParams = mergeParams(resolution.params, baseParams);
+        if (Array.isArray(resolvedModel)) {
+          for (const candidate of resolvedModel) {
+            if (!candidate.trim()) continue;
+            candidates.push({
+              model: candidate,
+              params: mergedParams,
+            });
+          }
+        } else if (resolvedModel) {
+          candidates.push({
+            model: resolvedModel,
+            params: mergedParams,
+          });
+        }
+      } else {
+        candidates.push({
+          model: entry,
+          params: baseParams,
+        });
+      }
+    }
+    const allowPullForSingle = allowPull && candidates.length <= 1;
+    return { candidates, allowPull: allowPullForSingle };
+  };
+  const resolveModelSelection = async (
+    model: string | Array<string>,
+    params?: Record<string, unknown>,
+    deckPath?: string,
+  ): Promise<{ model: string; params?: Record<string, unknown> }> => {
+    const { candidates, allowPull } = expandModelCandidates(model, params);
+    if (candidates.length === 0) {
+      throw new Error(
+        deckPath
+          ? `No model configured for deck ${deckPath}`
+          : "No model configured.",
+      );
+    }
+    const failures: Array<string> = [];
+    for (const candidate of candidates) {
+      const capability = providerCapabilities.find((entry) =>
+        entry.matches(candidate.model)
+      );
+      const availability = capability
+        ? await capability.isAvailable(candidate.model, { allowPull })
+        : {
+          available: false,
+          reason: "no provider registered for model",
+        };
+      if (availability.available) {
+        return {
+          model: candidate.model,
+          params: candidate.params,
+        };
+      }
+      const label = capability ? capability.name : "unknown";
+      const reason = availability.reason ? `: ${availability.reason}` : "";
+      failures.push(`${candidate.model} (${label}${reason})`);
+    }
+    const suffix = failures.length ? ` Tried: ${failures.join(", ")}.` : "";
+    throw new Error(
+      deckPath
+        ? `No available model found for deck ${deckPath}.${suffix}`
+        : `No available model found.${suffix}`,
+    );
+  };
+  const shouldResolveModel = (model: string | Array<string>): boolean => {
+    if (Array.isArray(model)) return true;
+    const resolution = opts.modelAliasResolver(model);
+    return Boolean(resolution.applied || resolution.missingAlias);
+  };
+
+  return {
+    resolveModel: async (input) =>
+      await resolveModelSelection(
+        input.model,
+        input.params,
+        input.deckPath,
+      ),
+    responses: async (input: {
+      request: CreateResponseRequest;
+      state?: SavedState;
+      deckPath?: string;
+      onStreamEvent?: (event: ResponseEvent) => void;
+    }) => {
+      const applied = shouldResolveModel(input.request.model)
+        ? await resolveModelSelection(
+          input.request.model,
+          input.request.params,
+          input.deckPath,
+        )
+        : { model: input.request.model, params: input.request.params };
+      const request = {
+        ...input.request,
+        model: applied.model ?? input.request.model,
+        params: applied.params,
+      };
+      if (typeof request.model !== "string" || !request.model) {
+        throw new Error("Model is required.");
+      }
+      const selection = providerRouter.resolve({ model: request.model });
+      const responses = selection.provider.responses;
+      if (!responses) {
+        throw new Error(
+          `${selection.providerKey} provider does not support responses.`,
+        );
+      }
+      return await responses({
+        ...input,
+        request: {
+          ...request,
+          model: selection.model,
+        },
+      });
+    },
+    chat: async (input: {
+      model: string;
+      messages: Array<ModelMessage>;
+      tools?: Array<ToolDefinition>;
+      stream?: boolean;
+      state?: SavedState;
+      deckPath?: string;
+      onStreamText?: (chunk: string) => void;
+      params?: Record<string, unknown>;
+    }) => {
+      const applied = shouldResolveModel(input.model)
+        ? await resolveModelSelection(input.model, input.params, input.deckPath)
+        : { model: input.model, params: input.params };
+      const request = {
+        ...input,
+        model: applied.model ?? input.model,
+        params: applied.params,
+      };
+      if (typeof request.model !== "string" || !request.model) {
+        throw new Error("Model is required.");
+      }
+      const selection = providerRouter.resolve({ model: request.model });
+      return await selection.provider.chat({
+        ...request,
+        model: selection.model,
+      });
+    },
+  };
+}
+
+export async function createDefaultedRuntime(
+  opts: CreateDefaultedRuntimeOptions = {},
+): Promise<DefaultedRuntime> {
+  const logger = opts.logger ?? console;
+  const projectConfig = opts.projectConfig === undefined
+    ? await loadProjectConfig(opts.configHint)
+    : opts.projectConfig;
+  const modelAliasResolver = createModelAliasResolver(projectConfig?.config);
+  const configuredFallbackProvider = resolveConfiguredFallbackProvider({
+    runtimeOverride: opts.fallbackProvider,
+    projectConfig: projectConfig?.config,
+    logger,
+  });
+  const effectiveFallbackProvider = configuredFallbackProvider === undefined
+    ? "openrouter"
+    : configuredFallbackProvider;
+  const responsesMode = opts.responsesMode ?? resolveDefaultResponsesMode();
+  const modelProvider = opts.modelProvider ??
+    buildDefaultModelProvider({
+      modelAliasResolver,
+      configuredFallbackProvider,
+      effectiveFallbackProvider,
+      responsesMode,
+      logger,
+    });
+  const defaultModel = opts.defaultModel;
+  const modelOverride = opts.modelOverride;
+  const runtimeSessionArtifacts = opts.sessionArtifacts;
+
+  const resolveRunOptions = (
+    runOpts: DefaultedRuntimeRunOptions,
+  ): CoreRunDeckOptions => {
+    const { sessionArtifacts: _sessionArtifacts, ...coreRunOpts } = runOpts;
+    return {
+      ...coreRunOpts,
+      modelProvider: runOpts.modelProvider ?? modelProvider,
+      defaultModel: runOpts.defaultModel ?? defaultModel,
+      modelOverride: runOpts.modelOverride ?? modelOverride,
+      responsesMode: runOpts.responsesMode ?? responsesMode,
+    };
+  };
+
+  return {
+    projectConfig,
+    modelAliasResolver,
+    configuredFallbackProvider,
+    effectiveFallbackProvider,
+    modelProvider,
+    defaultModel,
+    modelOverride,
+    responsesMode,
+    sessionArtifacts: runtimeSessionArtifacts,
+    resolveRunOptions,
+    runDeck: async (runOpts) => {
+      const resolved = resolveRunOptions(runOpts);
+      const effectiveSessionArtifacts = resolveSessionArtifactsConfig({
+        runtimeConfig: runtimeSessionArtifacts,
+        runConfig: runOpts.sessionArtifacts,
+      });
+      if (!effectiveSessionArtifacts) {
+        return await runDeckCore(resolved);
+      }
+      const artifacts = withSessionArtifacts({
+        config: effectiveSessionArtifacts,
+        trace: resolved.trace,
+        onStateUpdate: resolved.onStateUpdate,
+        state: resolved.state,
+      });
+      try {
+        return await runDeckCore({
+          ...resolved,
+          state: artifacts.state,
+          trace: artifacts.trace,
+          onStateUpdate: artifacts.onStateUpdate,
+        });
+      } finally {
+        artifacts.finalize();
+      }
+    },
+  };
+}
+
+export async function runDeck(
+  opts: RunDeckWithDefaultsOptions,
+): Promise<unknown> {
+  if (opts.runtime && opts.runtimeOptions) {
+    throw new Error(
+      "runDeck received both runtime and runtimeOptions. Pass only one.",
+    );
+  }
+  const runtime = opts.runtime ??
+    await createDefaultedRuntime({
+      ...opts.runtimeOptions,
+      configHint: opts.runtimeOptions?.configHint ?? opts.path,
+    });
+  const { runtime: _runtime, runtimeOptions: _runtimeOptions, ...runOpts } =
+    opts;
+  return await runtime.runDeck(runOpts);
+}
diff --git a/src/mcp_server.test.ts b/src/mcp_server.test.ts
new file mode 100644
index 000000000..a08762b02
--- /dev/null
+++ b/src/mcp_server.test.ts
@@ -0,0 +1,187 @@
+import { assertEquals } from "@std/assert";
+import * as path from "@std/path";
+import { handleMcpRequest } from "./mcp_server.ts";
+
+async function createRootDeckFixture(): Promise<{
+  dir: string;
+  rootDeckPath: string;
+}> {
+  const dir = await Deno.makeTempDir();
+  const actionDir = path.join(dir, "actions");
+  await Deno.mkdir(actionDir, { recursive: true });
+  await Deno.writeTextFile(
+    path.join(dir, "actions", "lookup.deck.ts"),
+    `import { defineDeck } from "jsr:@bolt-foundry/gambit";
+import { z } from "npm:zod";
+export default defineDeck({
+  contextSchema: z.object({ query: z.string().optional() }),
+  responseSchema: z.object({
+    status: z.number(),
+    payload: z.object({ query: z.string().nullable() }),
+  }),
+  run: (ctx) => ({
+    status: 200,
+    payload: { query: typeof ctx.input.query === "string" ? ctx.input.query : null },
+  }),
+});
+`,
+  );
+  await Deno.writeTextFile(
+    path.join(dir, "tool_input.zod.ts"),
+    `import { z } from "npm:zod";
+export default z.object({ query: z.string() });
+`,
+  );
+  await Deno.writeTextFile(
+    path.join(dir, "tool_output.zod.ts"),
+    `import { z } from "npm:zod";
+export default z.object({ query: z.string().nullable() });
+`,
+  );
+  await Deno.writeTextFile(
+    path.join(dir, "PROMPT.md"),
+    `+++
+label = "root"
+
+[[actions]]
+name = "lookup"
+execute = "./actions/lookup.deck.ts"
+description = "Lookup action."
+contextSchema = "./tool_input.zod.ts"
+responseSchema = "./tool_output.zod.ts"
+
+[[tools]]
+name = "lookup"
+description = "Shadowed external lookup."
+inputSchema = "./tool_input.zod.ts"
+
+[[tools]]
+name = "external_only"
+description = "External-only tool."
+inputSchema = "./tool_input.zod.ts"
++++
+Root deck.
+`,
+  );
+  return { dir, rootDeckPath: path.join(dir, "PROMPT.md") };
+}
+
+Deno.test("mcp server errors tools/list when root deck env is missing", async () => {
+  const previous = Deno.env.get("GAMBIT_MCP_ROOT_DECK_PATH");
+  Deno.env.delete("GAMBIT_MCP_ROOT_DECK_PATH");
+  try {
+    const result = await handleMcpRequest({
+      jsonrpc: "2.0",
+      id: 1,
+      method: "tools/list",
+    });
+    const error = (result as {
+      error?: { message?: string; data?: { message?: string } };
+    }).error;
+    assertEquals(error?.message, "MCP tool catalog unavailable");
+    assertEquals(
+      (error?.data?.message ?? "").includes("GAMBIT_MCP_ROOT_DECK_PATH"),
+      true,
+    );
+  } finally {
+    if (previous === undefined) {
+      Deno.env.delete("GAMBIT_MCP_ROOT_DECK_PATH");
+    } else {
+      Deno.env.set("GAMBIT_MCP_ROOT_DECK_PATH", previous);
+    }
+  }
+});
+
+Deno.test("mcp server derives tool surface from configured root deck", async () => {
+  const { dir, rootDeckPath } = await createRootDeckFixture();
+  const previous = Deno.env.get("GAMBIT_MCP_ROOT_DECK_PATH");
+  try {
+    Deno.env.set("GAMBIT_MCP_ROOT_DECK_PATH", rootDeckPath);
+    const response = await handleMcpRequest({
+      jsonrpc: "2.0",
+      id: 2,
+      method: "tools/list",
+    });
+    const payload = (response as {
+      result?: {
+        tools?: Array<{ name: string }>;
+      };
+    }).result;
+    const names = (payload?.tools ?? []).map((tool) => tool.name).sort();
+    assertEquals(names, ["external_only", "lookup"]);
+  } finally {
+    if (previous === undefined) {
+      Deno.env.delete("GAMBIT_MCP_ROOT_DECK_PATH");
+    } else {
+      Deno.env.set("GAMBIT_MCP_ROOT_DECK_PATH", previous);
+    }
+    await Deno.remove(dir, { recursive: true });
+  }
+});
+
+Deno.test("mcp server executes action tool from configured root deck", async () => {
+  const { dir, rootDeckPath } = await createRootDeckFixture();
+  const previous = Deno.env.get("GAMBIT_MCP_ROOT_DECK_PATH");
+  try {
+    Deno.env.set("GAMBIT_MCP_ROOT_DECK_PATH", rootDeckPath);
+    const response = await handleMcpRequest({
+      jsonrpc: "2.0",
+      id: 3,
+      method: "tools/call",
+      params: {
+        name: "lookup",
+        arguments: { query: "hello" },
+      },
+    });
+    const result = (response as {
+      result?: {
+        isError?: boolean;
+        content?: Array<{ text?: string }>;
+      };
+    }).result;
+    assertEquals(result?.isError, false);
+    const text = result?.content?.[0]?.text ?? "";
+    assertEquals(text.includes('"status": 200'), true);
+    assertEquals(text.includes('"query": "hello"'), true);
+  } finally {
+    if (previous === undefined) {
+      Deno.env.delete("GAMBIT_MCP_ROOT_DECK_PATH");
+    } else {
+      Deno.env.set("GAMBIT_MCP_ROOT_DECK_PATH", previous);
+    }
+    await Deno.remove(dir, { recursive: true });
+  }
+});
+
+Deno.test("mcp server returns explicit error when calling external-only tool", async () => {
+  const { dir, rootDeckPath } = await createRootDeckFixture();
+  const previous = Deno.env.get("GAMBIT_MCP_ROOT_DECK_PATH");
+  try {
+    Deno.env.set("GAMBIT_MCP_ROOT_DECK_PATH", rootDeckPath);
+    const response = await handleMcpRequest({
+      jsonrpc: "2.0",
+      id: 4,
+      method: "tools/call",
+      params: {
+        name: "external_only",
+        arguments: { query: "hello" },
+      },
+    });
+    const result = (response as {
+      result?: {
+        isError?: boolean;
+        content?: Array<{ text?: string }>;
+      };
+    }).result;
+    assertEquals(result?.isError, true);
+    const text = result?.content?.[0]?.text ?? "";
+    assertEquals(text.includes("unsupported_external_tool"), true);
+  } finally {
+    if (previous === undefined) {
+      Deno.env.delete("GAMBIT_MCP_ROOT_DECK_PATH");
+    } else {
+      Deno.env.set("GAMBIT_MCP_ROOT_DECK_PATH", previous);
+    }
+    await Deno.remove(dir, { recursive: true });
+  }
+});
diff --git a/src/mcp_server.ts b/src/mcp_server.ts
new file mode 100644
index 000000000..2f176717e
--- /dev/null
+++ b/src/mcp_server.ts
@@ -0,0 +1,387 @@
+import { TextLineStream } from "@std/streams/text-line-stream";
+import {
+  loadDeck,
+  type ModelProvider,
+  toJsonSchema,
+} from "@bolt-foundry/gambit-core";
+import { createOpenRouterProvider } from "./providers/openrouter.ts";
+import { createGoogleProvider } from "./providers/google.ts";
+import { createOllamaProvider } from "./providers/ollama.ts";
+import type { JSONValue } from "@bolt-foundry/gambit-core";
+
+type JsonRpcId = string | number | null;
+
+type JsonRpcRequest = {
+  jsonrpc?: string;
+  id?: JsonRpcId;
+  method?: string;
+  params?: unknown;
+};
+
+type JsonRpcResponse = {
+  jsonrpc: "2.0";
+  id: JsonRpcId;
+  result?: unknown;
+  error?: {
+    code: number;
+    message: string;
+    data?: unknown;
+  };
+};
+
+type McpTool = {
+  name: string;
+  description: string;
+  inputSchema: Record<string, JSONValue>;
+};
+
+const encoder = new TextEncoder();
+const MCP_ALLOW_MODELS_ENV = "GAMBIT_MCP_ALLOW_MODELS";
+const MCP_ROOT_DECK_PATH_ENV = "GAMBIT_MCP_ROOT_DECK_PATH";
+
+type ToolCatalog = {
+  tools: Array<McpTool>;
+  actionToDeck: Map<string, string>;
+  externalToolNames: Set<string>;
+};
+
+async function resolveToolCatalog(): Promise<ToolCatalog> {
+  const rootDeckPath = Deno.env.get(MCP_ROOT_DECK_PATH_ENV)?.trim();
+  if (!rootDeckPath) {
+    throw new Error(
+      `${MCP_ROOT_DECK_PATH_ENV} is required for MCP tool export.`,
+    );
+  }
+
+  const deck = await loadDeck(rootDeckPath);
+  const actionToDeck = new Map<string, string>();
+  const externalToolNames = new Set<string>();
+  const toolMap = new Map<string, McpTool>();
+
+  for (const action of deck.actionDecks) {
+    actionToDeck.set(action.name, action.path);
+    toolMap.set(action.name, {
+      name: action.name,
+      description: action.description ??
+        `Run action deck "${action.name}" from the root deck.`,
+      inputSchema: {
+        type: "object",
+        additionalProperties: true,
+      },
+    });
+  }
+
+  for (const externalTool of deck.tools) {
+    externalToolNames.add(externalTool.name);
+    toolMap.set(externalTool.name, {
+      name: externalTool.name,
+      description: externalTool.description ??
+        `External tool "${externalTool.name}".`,
+      inputSchema: externalTool.inputSchema
+        ? toJsonSchema(externalTool.inputSchema)
+        : { type: "object", additionalProperties: true },
+    });
+  }
+
+  return {
+    tools: Array.from(toolMap.values()),
+    actionToDeck,
+    externalToolNames,
+  };
+}
+
+async function writeJsonRpcResponse(response: JsonRpcResponse): Promise<void> {
+  const body = encoder.encode(`${JSON.stringify(response)}\n`);
+  await Deno.stdout.write(body);
+}
+
+function toRpcError(
+  id: JsonRpcId,
+  code: number,
+  message: string,
+  data?: unknown,
+): JsonRpcResponse {
+  return {
+    jsonrpc: "2.0",
+    id,
+    error: {
+      code,
+      message,
+      data,
+    },
+  };
+}
+
+function toRpcResult(id: JsonRpcId, result: unknown): JsonRpcResponse {
+  return {
+    jsonrpc: "2.0",
+    id,
+    result,
+  };
+}
+
+function asRecord(value: unknown): Record<string, unknown> {
+  if (value && typeof value === "object" && !Array.isArray(value)) {
+    return value as Record<string, unknown>;
+  }
+  return {};
+}
+
+function shouldAllowModelBackedDecks(): boolean {
+  const raw = Deno.env.get(MCP_ALLOW_MODELS_ENV);
+  if (!raw) return false;
+  const normalized = raw.trim().toLowerCase();
+  return normalized === "1" || normalized === "true" || normalized === "yes";
+}
+
+function createMcpModelProvider(): ModelProvider {
+  const openRouterApiKey = Deno.env.get("OPENROUTER_API_KEY")?.trim();
+  const googleApiKey = (Deno.env.get("GOOGLE_API_KEY") ??
+    Deno.env.get("GEMINI_API_KEY"))?.trim();
+  const ollamaBaseURL = Deno.env.get("OLLAMA_BASE_URL") ?? undefined;
+  const openRouterBaseURL = Deno.env.get("OPENROUTER_BASE_URL") ?? undefined;
+  const googleBaseURL = Deno.env.get("GOOGLE_BASE_URL") ??
+    Deno.env.get("GEMINI_BASE_URL") ??
+    undefined;
+
+  const openrouter = openRouterApiKey
+    ? createOpenRouterProvider({
+      apiKey: openRouterApiKey,
+      baseURL: openRouterBaseURL,
+      enableResponses: true,
+    })
+    : null;
+  const google = googleApiKey
+    ? createGoogleProvider({ apiKey: googleApiKey, baseURL: googleBaseURL })
+    : null;
+  const ollama = createOllamaProvider({
+    apiKey: Deno.env.get("OLLAMA_API_KEY")?.trim() || undefined,
+    baseURL: ollamaBaseURL,
+  });
+
+  return {
+    chat: async (input) => {
+      const model = input.model ?? "";
+      if (model.startsWith("openrouter/")) {
+        if (!openrouter) {
+          throw new Error("OPENROUTER_API_KEY is required for openrouter/*");
+        }
+        return await openrouter.chat(input);
+      }
+      if (model.startsWith("google/")) {
+        if (!google) {
+          throw new Error("GOOGLE_API_KEY is required for google/*");
+        }
+        return await google.chat(input);
+      }
+      if (model.startsWith("ollama/")) {
+        return await ollama.chat(input);
+      }
+      if (openrouter) {
+        return await openrouter.chat(input);
+      }
+      if (google) {
+        return await google.chat(input);
+      }
+      throw new Error(
+        "No model provider available for MCP deck execution. Set OPENROUTER_API_KEY or GOOGLE_API_KEY.",
+      );
+    },
+  };
+}
+
+async function runActionTool(input: {
+  name: string;
+  args: Record<string, unknown>;
+  actionToDeck: Map<string, string>;
+  externalToolNames: Set<string>;
+}): Promise<{
+  isError: boolean;
+  text: string;
+}> {
+  const { isGambitEndSignal, runDeck } = await import(
+    "@bolt-foundry/gambit-core"
+  );
+  const noModelProvider = {
+    chat: () => {
+      throw new Error(
+        "MCP action deck execution cannot invoke model-backed decks.",
+      );
+    },
+  };
+  const modelProvider = shouldAllowModelBackedDecks()
+    ? createMcpModelProvider()
+    : noModelProvider;
+  const deckPath = input.actionToDeck.get(input.name);
+  if (!deckPath) {
+    if (input.externalToolNames.has(input.name)) {
+      return {
+        isError: true,
+        text: JSON.stringify({
+          status: 400,
+          message: "unsupported_external_tool",
+          tool: input.name,
+        }),
+      };
+    }
+    return {
+      isError: true,
+      text: JSON.stringify({
+        status: 404,
+        message: `unknown tool "${input.name}"`,
+      }),
+    };
+  }
+  if (input.name === "policy_search") {
+    const record = asRecord(input.args);
+    if (
+      typeof record.changeSummary !== "string" &&
+      typeof record.query === "string"
+    ) {
+      record.changeSummary = record.query;
+      delete record.query;
+      input = { ...input, args: record };
+    }
+  }
+  try {
+    const result = await runDeck({
+      path: deckPath,
+      input: input.args,
+      inputProvided: true,
+      modelProvider,
+      isRoot: false,
+    });
+
+    const payload = isGambitEndSignal(result) ? result.payload : result;
+    const record = asRecord(payload);
+    const status = typeof record.status === "number" ? record.status : 200;
+    const isError = status >= 400;
+    return {
+      isError,
+      text: JSON.stringify(payload, null, 2),
+    };
+  } catch (err) {
+    return {
+      isError: true,
+      text: JSON.stringify({
+        status: 500,
+        message: err instanceof Error ? err.message : String(err),
+      }),
+    };
+  }
+}
+
+export async function handleMcpRequest(
+  request: JsonRpcRequest,
+): Promise<JsonRpcResponse | null> {
+  const id = request.id ?? null;
+  const method = typeof request.method === "string" ? request.method : "";
+  const params = asRecord(request.params);
+
+  if (!method) {
+    if (request.id === undefined) return null;
+    return toRpcError(id, -32600, "Invalid Request: missing method");
+  }
+
+  if (method === "notifications/initialized") {
+    return null;
+  }
+
+  if (method === "initialize") {
+    if (request.id === undefined) return null;
+    return toRpcResult(id, {
+      protocolVersion: "2024-11-05",
+      capabilities: {
+        tools: {},
+      },
+      serverInfo: {
+        name: "gambit-action-mcp",
+        version: "0.1.0",
+      },
+    });
+  }
+
+  if (method === "ping") {
+    if (request.id === undefined) return null;
+    return toRpcResult(id, {});
+  }
+
+  if (method === "tools/list") {
+    if (request.id === undefined) return null;
+    try {
+      const toolCatalog = await resolveToolCatalog();
+      return toRpcResult(id, { tools: toolCatalog.tools });
+    } catch (err) {
+      return toRpcError(
+        id,
+        -32000,
+        "MCP tool catalog unavailable",
+        { message: err instanceof Error ? err.message : String(err) },
+      );
+    }
+  }
+
+  if (method === "tools/call") {
+    if (request.id === undefined) return null;
+    try {
+      const toolCatalog = await resolveToolCatalog();
+      const toolName = typeof params.name === "string" ? params.name : "";
+      const args = asRecord(params.arguments);
+      const toolResult = await runActionTool({
+        name: toolName,
+        args,
+        actionToDeck: toolCatalog.actionToDeck,
+        externalToolNames: toolCatalog.externalToolNames,
+      });
+      return toRpcResult(id, {
+        content: [{ type: "text", text: toolResult.text }],
+        isError: toolResult.isError,
+      });
+    } catch (err) {
+      return toRpcError(
+        id,
+        -32000,
+        "MCP tool catalog unavailable",
+        { message: err instanceof Error ? err.message : String(err) },
+      );
+    }
+  }
+
+  if (method === "resources/list") {
+    if (request.id === undefined) return null;
+    return toRpcResult(id, { resources: [] });
+  }
+
+  if (method === "resources/templates/list") {
+    if (request.id === undefined) return null;
+    return toRpcResult(id, { resourceTemplates: [] });
+  }
+
+  if (request.id === undefined) return null;
+  return toRpcError(id, -32601, `Method not found: ${method}`);
+}
+
+export async function runMcpServerLoop(): Promise<void> {
+  const lineStream = Deno.stdin.readable
+    .pipeThrough(new TextDecoderStream())
+    .pipeThrough(new TextLineStream());
+  for await (const line of lineStream) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    try {
+      const request = JSON.parse(trimmed) as JsonRpcRequest;
+      const response = await handleMcpRequest(request);
+      if (response) {
+        await writeJsonRpcResponse(response);
+      }
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      const response = toRpcError(null, -32700, "Parse error", { message });
+      await writeJsonRpcResponse(response);
+    }
+  }
+}
+
+if (import.meta.main) {
+  await runMcpServerLoop();
+}
diff --git a/src/model_matchers.test.ts b/src/model_matchers.test.ts
index 1ac9aa5a5..5b07fd241 100644
--- a/src/model_matchers.test.ts
+++ b/src/model_matchers.test.ts
@@ -5,6 +5,7 @@ const UNPREFIXED = "llama3";
 const OPENROUTER = "openrouter/anthropic/claude-3-haiku";
 const OLLAMA = "ollama/llama3";
 const GOOGLE = "google/gemini-1.5-pro";
+const CODEX = "codex-cli/default";
 
 Deno.test("provider matchers respect fallback for unprefixed models", () => {
   const ollama = createProviderMatchers("ollama");
@@ -21,6 +22,12 @@ Deno.test("provider matchers respect fallback for unprefixed models", () => {
   assertEquals(openrouter.matchesOpenRouter(UNPREFIXED), true);
   assertEquals(openrouter.matchesOllama(UNPREFIXED), false);
   assertEquals(openrouter.matchesGoogle(UNPREFIXED), false);
+
+  const codex = createProviderMatchers("codex-cli");
+  assertEquals(codex.matchesCodex(UNPREFIXED), true);
+  assertEquals(codex.matchesOpenRouter(UNPREFIXED), false);
+  assertEquals(codex.matchesOllama(UNPREFIXED), false);
+  assertEquals(codex.matchesGoogle(UNPREFIXED), false);
 });
 
 Deno.test("provider matchers always honor explicit prefixes", () => {
@@ -28,6 +35,7 @@ Deno.test("provider matchers always honor explicit prefixes", () => {
   assertEquals(matcher.matchesOpenRouter(OPENROUTER), true);
   assertEquals(matcher.matchesOllama(OLLAMA), true);
   assertEquals(matcher.matchesGoogle(GOOGLE), true);
+  assertEquals(matcher.matchesCodex(CODEX), true);
 });
 
 Deno.test("provider matchers do not claim unprefixed models when fallback is null", () => {
@@ -35,4 +43,19 @@ Deno.test("provider matchers do not claim unprefixed models when fallback is nul
   assertEquals(matcher.matchesOpenRouter(UNPREFIXED), false);
   assertEquals(matcher.matchesOllama(UNPREFIXED), false);
   assertEquals(matcher.matchesGoogle(UNPREFIXED), false);
+  assertEquals(matcher.matchesCodex(UNPREFIXED), false);
+});
+
+Deno.test("provider matchers do not treat legacy codex prefix as unprefixed", () => {
+  const matcher = createProviderMatchers("openrouter");
+  assertEquals(matcher.isUnprefixedModel("codex/default"), false);
+  assertEquals(matcher.matchesOpenRouter("codex/default"), false);
+  assertEquals(matcher.matchesCodex("codex/default"), false);
+});
+
+Deno.test("provider matchers treat bare codex-cli as codex provider", () => {
+  const matcher = createProviderMatchers("openrouter");
+  assertEquals(matcher.isUnprefixedModel("codex-cli"), false);
+  assertEquals(matcher.matchesOpenRouter("codex-cli"), false);
+  assertEquals(matcher.matchesCodex("codex-cli"), true);
 });
diff --git a/src/model_matchers.ts b/src/model_matchers.ts
index c29be3cf6..6bdc64ec3 100644
--- a/src/model_matchers.ts
+++ b/src/model_matchers.ts
@@ -2,21 +2,29 @@ import type { ProviderKey } from "./providers/router.ts";
 import { GOOGLE_PREFIX } from "./providers/google.ts";
 import { OLLAMA_PREFIX } from "./providers/ollama.ts";
 import { OPENROUTER_PREFIX } from "./providers/openrouter.ts";
+import { CODEX_PREFIX } from "./providers/codex.ts";
+
+const LEGACY_CODEX_PREFIX = "codex/";
+const CODEX_PROVIDER_ALIAS = "codex-cli";
 
 export type ProviderMatchers = {
   isUnprefixedModel: (model: string) => boolean;
   matchesOpenRouter: (model: string) => boolean;
   matchesOllama: (model: string) => boolean;
   matchesGoogle: (model: string) => boolean;
+  matchesCodex: (model: string) => boolean;
 };
 
 export function createProviderMatchers(
   effectiveFallbackProvider: ProviderKey | null,
 ): ProviderMatchers {
   const isUnprefixedModel = (model: string): boolean =>
+    model.trim() !== CODEX_PROVIDER_ALIAS &&
     !model.startsWith(OPENROUTER_PREFIX) &&
     !model.startsWith(OLLAMA_PREFIX) &&
-    !model.startsWith(GOOGLE_PREFIX);
+    !model.startsWith(GOOGLE_PREFIX) &&
+    !model.startsWith(CODEX_PREFIX) &&
+    !model.startsWith(LEGACY_CODEX_PREFIX);
 
   return {
     isUnprefixedModel,
@@ -29,5 +37,9 @@ export function createProviderMatchers(
     matchesGoogle: (model: string) =>
       model.startsWith(GOOGLE_PREFIX) ||
       (isUnprefixedModel(model) && effectiveFallbackProvider === "google"),
+    matchesCodex: (model: string) =>
+      model.trim() === CODEX_PROVIDER_ALIAS ||
+      model.startsWith(CODEX_PREFIX) ||
+      (isUnprefixedModel(model) && effectiveFallbackProvider === "codex-cli"),
   };
 }
diff --git a/src/openai_compat.test.ts b/src/openai_compat.test.ts
index 98628868c..6f8d73c9d 100644
--- a/src/openai_compat.test.ts
+++ b/src/openai_compat.test.ts
@@ -10,6 +10,12 @@ function modImportPath() {
   return path.toFileUrl(modPath).href;
 }
 
+function coreModImportPath() {
+  const here = path.dirname(path.fromFileUrl(import.meta.url));
+  const modPath = path.resolve(here, "..", "..", "gambit-core", "mod.ts");
+  return path.toFileUrl(modPath).href;
+}
+
 async function writeTempDeck(dir: string, filename: string, contents: string) {
   const target = path.join(dir, filename);
   await Deno.writeTextFile(target, contents);
@@ -329,3 +335,93 @@ Deno.test("chatCompletionsWithDeck rejects tool name collisions", async () => {
     "collision",
   );
 });
+
+Deno.test("chatCompletionsWithDeck action calls inherit root permission denials", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = coreModImportPath();
+  const deniedPath = path.join(dir, "blocked.txt");
+
+  const childPath = await writeTempDeck(
+    dir,
+    "child-write.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.object({}),
+      outputSchema: z.string(),
+      run: async () => {
+        await Deno.writeTextFile(${JSON.stringify(deniedPath)}, "blocked");
+        return "ok";
+      }
+    });
+    `,
+  );
+
+  const parentPath = await writeTempDeck(
+    dir,
+    "parent.deck.ts",
+    `
+    import { defineDeck } from "${modHref}";
+    export default defineDeck({
+      label: "parent",
+      modelParams: { model: "ignored" },
+      actionDecks: [{ name: "child", path: "${childPath}" }],
+    });
+    `,
+  );
+
+  let pass = 0;
+  let toolPayload = "";
+  const provider: ModelProvider = {
+    chat(input) {
+      pass++;
+      if (pass === 1) {
+        return Promise.resolve({
+          message: {
+            role: "assistant",
+            content: null,
+            tool_calls: [{
+              id: "call-1",
+              type: "function",
+              function: { name: "child", arguments: "{}" },
+            }],
+          },
+          finishReason: "tool_calls",
+          toolCalls: [{ id: "call-1", name: "child", args: {} }],
+        });
+      }
+      toolPayload = String(
+        input.messages.find((message) =>
+          message.role === "tool" && message.name === "child"
+        )?.content ?? "",
+      );
+      return Promise.resolve({
+        message: { role: "assistant", content: "done" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const response = await chatCompletionsWithDeck({
+    deckPath: parentPath,
+    request: {
+      model: "test-model",
+      messages: [{ role: "user", content: "go" }],
+    },
+    modelProvider: provider,
+    workerSandbox: true,
+    workspacePermissions: {
+      read: true,
+      write: false,
+      run: false,
+      net: false,
+      env: false,
+    },
+    workspacePermissionsBaseDir: dir,
+  });
+
+  assertEquals(response.choices[0].message.content, "done");
+  assert(toolPayload.includes('"error"'));
+  assert(toolPayload.includes("allow-write"));
+});
diff --git a/src/openai_compat.ts b/src/openai_compat.ts
index 54b9bb397..db1adbf60 100644
--- a/src/openai_compat.ts
+++ b/src/openai_compat.ts
@@ -1,8 +1,10 @@
+import * as path from "@std/path";
 import {
   assertZodSchema,
   DEFAULT_GUARDRAILS,
   loadDeck,
   RESERVED_TOOL_PREFIX,
+  resolveEffectivePermissions,
   runDeck,
   toJsonSchema,
 } from "@bolt-foundry/gambit-core";
@@ -11,6 +13,8 @@ import type {
   LoadedDeck,
   ModelMessage,
   ModelProvider,
+  PermissionDeclarationInput,
+  PermissionTrace,
   ToolDefinition,
 } from "@bolt-foundry/gambit-core";
 
@@ -254,12 +258,47 @@ export async function chatCompletionsWithDeck(args: {
   guardrails?: Partial<Guardrails>;
   defaultModel?: string;
   onStreamText?: (chunk: string) => void;
+  workspacePermissions?: PermissionDeclarationInput;
+  workspacePermissionsBaseDir?: string;
+  sessionPermissions?: PermissionDeclarationInput;
+  sessionPermissionsBaseDir?: string;
+  parentPermissions?: ReturnType<
+    typeof resolveEffectivePermissions
+  >["effective"];
+  workerSandbox?: boolean;
+  trace?: (event: { type: string; permissions?: PermissionTrace }) => void;
 }): Promise<ChatCompletionsResponse> {
   const executeDeckTools = args.executeDeckTools ?? true;
   const guardrails: Guardrails = { ...DEFAULT_GUARDRAILS, ...args.guardrails };
   const runId = randomId("run");
 
   const deck = await loadDeck(args.deckPath);
+  const rootPermissions = resolveEffectivePermissions({
+    baseDir: path.dirname(deck.path),
+    parent: args.parentPermissions,
+    workspace: args.workspacePermissions
+      ? {
+        baseDir: args.workspacePermissionsBaseDir ?? path.dirname(deck.path),
+        permissions: args.workspacePermissions,
+      }
+      : undefined,
+    declaration: deck.permissions
+      ? {
+        baseDir: path.dirname(deck.path),
+        permissions: deck.permissions,
+      }
+      : undefined,
+    session: args.sessionPermissions
+      ? {
+        baseDir: args.sessionPermissionsBaseDir ?? Deno.cwd(),
+        permissions: args.sessionPermissions,
+      }
+      : undefined,
+  });
+  args.trace?.({
+    type: "openai_compat.permissions",
+    permissions: rootPermissions.trace,
+  });
   const systemPrompt = deckSystemPrompt(deck);
 
   const providedMessages = normalizeMessages(args.request.messages);
@@ -300,6 +339,7 @@ export async function chatCompletionsWithDeck(args: {
       messages,
       tools: tools.length ? tools : undefined,
       stream: Boolean(args.request.stream),
+      deckPath: deck.path,
       onStreamText: args.onStreamText,
       params: providerParamsFromRequest(args.request),
     });
@@ -342,6 +382,23 @@ export async function chatCompletionsWithDeck(args: {
         const actionPath = gambit.actionPathByName.get(call.name);
         if (!actionPath) continue;
         try {
+          const actionRef = deck.actionDecks.find((entry) =>
+            entry.name === call.name
+          );
+          const actionPermissions = resolveEffectivePermissions({
+            baseDir: path.dirname(deck.path),
+            parent: rootPermissions.effective,
+            reference: actionRef?.permissions
+              ? {
+                baseDir: path.dirname(deck.path),
+                permissions: actionRef.permissions,
+              }
+              : undefined,
+          });
+          args.trace?.({
+            type: "openai_compat.action.permissions",
+            permissions: actionPermissions.trace,
+          });
           const childResult = await runDeck({
             path: actionPath,
             input: call.args,
@@ -357,6 +414,14 @@ export async function chatCompletionsWithDeck(args: {
             stream: Boolean(args.request.stream),
             onStreamText: args.onStreamText,
             inputProvided: true,
+            parentPermissions: rootPermissions.effective,
+            referencePermissions: actionRef?.permissions,
+            referencePermissionsBaseDir: path.dirname(deck.path),
+            workspacePermissions: args.workspacePermissions,
+            workspacePermissionsBaseDir: args.workspacePermissionsBaseDir,
+            sessionPermissions: args.sessionPermissions,
+            sessionPermissionsBaseDir: args.sessionPermissionsBaseDir,
+            workerSandbox: args.workerSandbox,
           });
           messages.push({
             role: "tool",
diff --git a/src/project_config.test.ts b/src/project_config.test.ts
index a422514e8..49a431222 100644
--- a/src/project_config.test.ts
+++ b/src/project_config.test.ts
@@ -1,8 +1,9 @@
-import { assert, assertEquals } from "@std/assert";
+import { assert, assertEquals, assertThrows } from "@std/assert";
 import * as path from "@std/path";
 import {
   createModelAliasResolver,
   loadProjectConfig,
+  resolveWorkerSandboxSetting,
   resolveWorkspacePermissions,
 } from "./project_config.ts";
 
@@ -104,3 +105,31 @@ Deno.test("resolveWorkspacePermissions returns workspace ceiling", () => {
     run: { commands: ["deno"] },
   });
 });
+
+Deno.test("resolveWorkerSandboxSetting reads worker_sandbox config", () => {
+  const mode = resolveWorkerSandboxSetting({
+    execution: { worker_sandbox: false },
+  });
+  assertEquals(mode, false);
+});
+
+Deno.test("resolveWorkerSandboxSetting reads legacy_exec config", () => {
+  const mode = resolveWorkerSandboxSetting({
+    execution: { legacy_exec: true },
+  });
+  assertEquals(mode, false);
+});
+
+Deno.test("resolveWorkerSandboxSetting rejects conflicting config", () => {
+  assertThrows(
+    () =>
+      resolveWorkerSandboxSetting({
+        execution: {
+          worker_sandbox: true,
+          legacy_exec: true,
+        },
+      }),
+    Error,
+    "conflicting",
+  );
+});
diff --git a/src/project_config.ts b/src/project_config.ts
index 759322423..04d116b15 100644
--- a/src/project_config.ts
+++ b/src/project_config.ts
@@ -19,6 +19,12 @@ export type ModelAliasConfig = {
 
 export type GambitConfig = {
   workspace?: WorkspaceConfig;
+  execution?: {
+    workerSandbox?: boolean;
+    worker_sandbox?: boolean;
+    legacyExec?: boolean;
+    legacy_exec?: boolean;
+  };
   models?: {
     aliases?: Record<string, ModelAliasConfig>;
   };
@@ -157,3 +163,34 @@ export function resolveWorkspacePermissions(
   if (!isPlainObject(raw)) return undefined;
   return raw as PermissionDeclarationInput;
 }
+
+function resolveBooleanField(value: unknown): boolean | undefined {
+  if (typeof value === "boolean") return value;
+  return undefined;
+}
+
+export function resolveWorkerSandboxSetting(
+  config?: GambitConfig | null,
+): boolean | undefined {
+  const execution = config?.execution;
+  if (!isPlainObject(execution)) return undefined;
+
+  const workerSandbox = resolveBooleanField(
+    execution.workerSandbox ?? execution.worker_sandbox,
+  );
+  const legacyExec = resolveBooleanField(
+    execution.legacyExec ?? execution.legacy_exec,
+  );
+
+  if (workerSandbox !== undefined && legacyExec !== undefined) {
+    if (workerSandbox === legacyExec) {
+      throw new Error(
+        "gambit.toml execution config is conflicting: worker_sandbox and legacy_exec must be opposites when both are set.",
+      );
+    }
+  }
+
+  if (workerSandbox !== undefined) return workerSandbox;
+  if (legacyExec !== undefined) return !legacyExec;
+  return undefined;
+}
diff --git a/src/providers/codex.test.ts b/src/providers/codex.test.ts
new file mode 100644
index 000000000..8bcaf1147
--- /dev/null
+++ b/src/providers/codex.test.ts
@@ -0,0 +1,589 @@
+import { assertEquals, assertThrows } from "@std/assert";
+import { createCodexProvider, parseCodexArgsForTest } from "./codex.ts";
+import type { ProviderTraceEvent, SavedState } from "@bolt-foundry/gambit-core";
+
+const enc = new TextEncoder();
+
+Deno.test("codex provider starts thread and resumes with saved thread id", async () => {
+  const calls: Array<Array<string>> = [];
+  const provider = createCodexProvider({
+    runCommand: ({ args }) => {
+      calls.push(args);
+      const isResume = args[1] === "resume";
+      const threadId = "thread-123";
+      const stdout = isResume
+        ? [
+          JSON.stringify({
+            type: "item.completed",
+            item: { type: "agent_message", text: "second reply" },
+          }),
+          JSON.stringify({
+            type: "turn.completed",
+            usage: { input_tokens: 7, output_tokens: 3, total_tokens: 10 },
+          }),
+        ].join("\n")
+        : [
+          JSON.stringify({ type: "thread.started", thread_id: threadId }),
+          JSON.stringify({
+            type: "item.completed",
+            item: { type: "agent_message", text: "first reply" },
+          }),
+          JSON.stringify({
+            type: "turn.completed",
+            usage: { input_tokens: 5, output_tokens: 2, total_tokens: 7 },
+          }),
+        ].join("\n");
+      return Promise.resolve({
+        success: true,
+        code: 0,
+        stdout: enc.encode(stdout),
+        stderr: new Uint8Array(),
+      });
+    },
+  });
+
+  const first = await provider.chat({
+    model: "codex-cli/default",
+    messages: [{ role: "user", content: "hello" }],
+  });
+  assertEquals(first.message.content, "first reply");
+  assertEquals(first.updatedState?.meta?.["codex.threadId"], "thread-123");
+  assertEquals(calls.length, 1);
+  assertEquals(calls[0][0], "exec");
+  assertEquals(calls[0][1], "--skip-git-repo-check");
+
+  const second = await provider.chat({
+    model: "codex-cli/default",
+    messages: [
+      { role: "user", content: "hello" },
+      { role: "assistant", content: "first reply" },
+      { role: "user", content: "follow up" },
+    ],
+    state: first.updatedState as SavedState,
+  });
+
+  assertEquals(second.message.content, "second reply");
+  assertEquals(calls.length, 2);
+  assertEquals(calls[1][0], "exec");
+  assertEquals(calls[1][1], "resume");
+  assertEquals(calls[1].includes("thread-123"), true);
+  assertEquals(calls[1][calls[1].length - 1], "follow up");
+});
+
+Deno.test("codex provider resume does not replay transcript when no new user message", () => {
+  const args = parseCodexArgsForTest({
+    model: "codex-cli/default",
+    state: {
+      runId: "run-1",
+      messages: [],
+      meta: { "codex.threadId": "thread-123" },
+    } as SavedState,
+    messages: [
+      { role: "system", content: "system text" },
+      { role: "assistant", content: "assistant text" },
+    ],
+  });
+  assertEquals(args[0], "exec");
+  assertEquals(args[1], "resume");
+  assertEquals(args.includes("thread-123"), true);
+  // Resume prompt is the newest user message only; none present => empty prompt.
+  assertEquals(args[args.length - 1], "");
+});
+
+Deno.test("codex provider responses returns updatedState with thread metadata", async () => {
+  const provider = createCodexProvider({
+    runCommand: () =>
+      Promise.resolve({
+        success: true,
+        code: 0,
+        stdout: enc.encode(
+          [
+            JSON.stringify({ type: "thread.started", thread_id: "thread-rsp" }),
+            JSON.stringify({
+              type: "item.completed",
+              item: { type: "agent_message", text: "response mode reply" },
+            }),
+          ].join("\n"),
+        ),
+        stderr: new Uint8Array(),
+      }),
+  });
+
+  const result = await provider.responses?.({
+    request: {
+      model: "codex-cli/default",
+      input: [{
+        type: "message",
+        role: "user",
+        content: [{ type: "input_text", text: "hi" }],
+      }],
+    },
+  });
+
+  assertEquals(Boolean(result), true);
+  assertEquals(result?.updatedState?.meta?.["codex.threadId"], "thread-rsp");
+});
+
+Deno.test("codex provider responses forwards request.params to codex args", async () => {
+  const calls: Array<Array<string>> = [];
+  const provider = createCodexProvider({
+    runCommand: ({ args }) => {
+      calls.push(args);
+      return Promise.resolve({
+        success: true,
+        code: 0,
+        stdout: enc.encode(
+          [
+            JSON.stringify({ type: "thread.started", thread_id: "thread-rsp" }),
+            JSON.stringify({
+              type: "item.completed",
+              item: { type: "agent_message", text: "response mode reply" },
+            }),
+          ].join("\n"),
+        ),
+        stderr: new Uint8Array(),
+      });
+    },
+  });
+
+  await provider.responses?.({
+    request: {
+      model: "codex-cli/default",
+      params: { verbosity: "high" },
+      input: [{
+        type: "message",
+        role: "user",
+        content: [{ type: "input_text", text: "hi" }],
+      }],
+    },
+  });
+
+  assertEquals(calls.length, 1);
+  assertEquals(calls[0].join(" ").includes('model_verbosity="high"'), true);
+});
+
+Deno.test("codex provider emits tool traces for mcp tool events", async () => {
+  const traces: Array<ProviderTraceEvent> = [];
+  const provider = createCodexProvider({
+    runCommand: ({ onStdoutLine }) => {
+      const lines = [
+        JSON.stringify({
+          type: "item.started",
+          item: {
+            id: "tool_1",
+            type: "mcp_tool_call",
+            server: "gambit",
+            tool: "bot_list",
+            arguments: { path: ".", recursive: false },
+            status: "in_progress",
+            result: null,
+            error: null,
+          },
+        }),
+        JSON.stringify({
+          type: "item.completed",
+          item: {
+            id: "tool_1",
+            type: "mcp_tool_call",
+            server: "gambit",
+            tool: "bot_list",
+            arguments: { path: ".", recursive: false },
+            status: "completed",
+            result: { content: [{ type: "text", text: "ok" }] },
+            error: null,
+          },
+        }),
+        JSON.stringify({
+          type: "item.completed",
+          item: { type: "agent_message", text: "done" },
+        }),
+      ];
+      lines.forEach((line) => onStdoutLine?.(line));
+      return Promise.resolve({
+        success: true,
+        code: 0,
+        stdout: enc.encode(lines.join("\n")),
+        stderr: new Uint8Array(),
+      });
+    },
+  });
+
+  const result = await provider.chat({
+    model: "codex-cli/default",
+    messages: [{ role: "user", content: "hello" }],
+    onTraceEvent: (event) => traces.push(event),
+  });
+
+  assertEquals(result.message.content, "done");
+  const toolCalls = traces.filter((event) =>
+    event.type === "tool.call"
+  ) as Array<Extract<ProviderTraceEvent, { type: "tool.call" }>>;
+  const toolResults = traces.filter((event) =>
+    event.type === "tool.result"
+  ) as Array<Extract<ProviderTraceEvent, { type: "tool.result" }>>;
+  assertEquals(toolCalls.length, 1);
+  assertEquals(toolResults.length, 1);
+  assertEquals(toolCalls[0].actionCallId, "tool_1");
+  assertEquals(toolResults[0].actionCallId, "tool_1");
+  assertEquals(toolCalls[0].toolKind, "mcp_bridge");
+  assertEquals(toolResults[0].toolKind, "mcp_bridge");
+  assertEquals(toolCalls[0].args, { path: ".", recursive: false });
+  assertEquals(
+    toolResults[0].result,
+    {
+      server: "gambit",
+      status: "completed",
+      result: { content: [{ type: "text", text: "ok" }] },
+      error: null,
+    },
+  );
+});
+
+Deno.test("codex provider emits tool traces for command execution events", async () => {
+  const traces: Array<ProviderTraceEvent> = [];
+  const provider = createCodexProvider({
+    runCommand: ({ onStdoutLine }) => {
+      const lines = [
+        JSON.stringify({
+          type: "item.started",
+          item: {
+            id: "item_1",
+            type: "command_execution",
+            command: "/bin/bash -lc ls",
+            aggregated_output: "",
+            exit_code: null,
+            status: "in_progress",
+          },
+        }),
+        JSON.stringify({
+          type: "item.completed",
+          item: {
+            id: "item_1",
+            type: "command_execution",
+            command: "/bin/bash -lc ls",
+            aggregated_output: "INTENT.md\nPROMPT.md\n",
+            exit_code: 0,
+            status: "completed",
+          },
+        }),
+        JSON.stringify({
+          type: "item.completed",
+          item: { type: "agent_message", text: "done" },
+        }),
+      ];
+      lines.forEach((line) => onStdoutLine?.(line));
+      return Promise.resolve({
+        success: true,
+        code: 0,
+        stdout: enc.encode(lines.join("\n")),
+        stderr: new Uint8Array(),
+      });
+    },
+  });
+
+  const result = await provider.chat({
+    model: "codex-cli/default",
+    messages: [{ role: "user", content: "hello" }],
+    onTraceEvent: (event) => traces.push(event),
+  });
+
+  assertEquals(result.message.content, "done");
+  const toolCalls = traces.filter((event) =>
+    event.type === "tool.call"
+  ) as Array<Extract<ProviderTraceEvent, { type: "tool.call" }>>;
+  const toolResults = traces.filter((event) =>
+    event.type === "tool.result"
+  ) as Array<Extract<ProviderTraceEvent, { type: "tool.result" }>>;
+  assertEquals(toolCalls.length, 1);
+  assertEquals(toolResults.length, 1);
+  assertEquals(toolCalls[0].actionCallId, "item_1");
+  assertEquals(toolResults[0].actionCallId, "item_1");
+  assertEquals(toolCalls[0].name, "command_execution");
+  assertEquals(toolResults[0].name, "command_execution");
+  assertEquals(toolCalls[0].toolKind, "mcp_bridge");
+  assertEquals(toolResults[0].toolKind, "mcp_bridge");
+  assertEquals(toolCalls[0].args, { command: "/bin/bash -lc ls" });
+  assertEquals(
+    toolResults[0].result,
+    {
+      command: "/bin/bash -lc ls",
+      status: "completed",
+      output: "INTENT.md\nPROMPT.md\n",
+      exit_code: 0,
+    },
+  );
+});
+
+Deno.test("codex provider emits tool traces for file change events", async () => {
+  const traces: Array<ProviderTraceEvent> = [];
+  const provider = createCodexProvider({
+    runCommand: ({ onStdoutLine }) => {
+      const lines = [
+        JSON.stringify({
+          type: "item.completed",
+          item: {
+            id: "item_2",
+            type: "file_change",
+            changes: [{
+              path: "/tmp/PROMPT.md",
+              kind: "update",
+            }],
+            status: "completed",
+          },
+        }),
+        JSON.stringify({
+          type: "item.completed",
+          item: { type: "agent_message", text: "done" },
+        }),
+      ];
+      lines.forEach((line) => onStdoutLine?.(line));
+      return Promise.resolve({
+        success: true,
+        code: 0,
+        stdout: enc.encode(lines.join("\n")),
+        stderr: new Uint8Array(),
+      });
+    },
+  });
+
+  const result = await provider.chat({
+    model: "codex-cli/default",
+    messages: [{ role: "user", content: "hello" }],
+    onTraceEvent: (event) => traces.push(event),
+  });
+
+  assertEquals(result.message.content, "done");
+  const toolCalls = traces.filter((event) =>
+    event.type === "tool.call"
+  ) as Array<Extract<ProviderTraceEvent, { type: "tool.call" }>>;
+  const toolResults = traces.filter((event) =>
+    event.type === "tool.result"
+  ) as Array<Extract<ProviderTraceEvent, { type: "tool.result" }>>;
+  assertEquals(toolCalls.length, 1);
+  assertEquals(toolResults.length, 1);
+  assertEquals(toolCalls[0].actionCallId, "item_2");
+  assertEquals(toolResults[0].actionCallId, "item_2");
+  assertEquals(toolCalls[0].name, "file_change");
+  assertEquals(toolResults[0].name, "file_change");
+  assertEquals(toolCalls[0].toolKind, "mcp_bridge");
+  assertEquals(toolResults[0].toolKind, "mcp_bridge");
+  assertEquals(toolCalls[0].args, {
+    changes: [{ path: "/tmp/PROMPT.md", kind: "update" }],
+  });
+  assertEquals(toolResults[0].result, {
+    status: "completed",
+    changes: [{ path: "/tmp/PROMPT.md", kind: "update" }],
+  });
+});
+
+Deno.test("codex provider adds mcp config args by default", () => {
+  const previousEnable = Deno.env.get("GAMBIT_CODEX_ENABLE_MCP");
+  const previousDisable = Deno.env.get("GAMBIT_CODEX_DISABLE_MCP");
+  Deno.env.delete("GAMBIT_CODEX_ENABLE_MCP");
+  Deno.env.delete("GAMBIT_CODEX_DISABLE_MCP");
+  try {
+    const args = parseCodexArgsForTest({
+      model: "codex-cli/default",
+      messages: [{ role: "user", content: "hi" }],
+      cwd: "/tmp/test-cwd",
+      deckPath: "/tmp/root/PROMPT.md",
+    });
+    const joined = args.join(" ");
+    assertEquals(joined.includes("mcp_servers.gambit.command"), true);
+    assertEquals(joined.includes("mcp_servers.gambit.args"), true);
+    assertEquals(joined.includes("mcp_servers.gambit.cwd"), true);
+    assertEquals(
+      joined.includes("mcp_servers.gambit.env.GAMBIT_BOT_ROOT"),
+      true,
+    );
+    assertEquals(
+      joined.includes("mcp_servers.gambit.env.GAMBIT_MCP_ROOT_DECK_PATH"),
+      true,
+    );
+  } finally {
+    if (previousEnable === undefined) {
+      Deno.env.delete("GAMBIT_CODEX_ENABLE_MCP");
+    } else {
+      Deno.env.set("GAMBIT_CODEX_ENABLE_MCP", previousEnable);
+    }
+    if (previousDisable === undefined) {
+      Deno.env.delete("GAMBIT_CODEX_DISABLE_MCP");
+    } else {
+      Deno.env.set("GAMBIT_CODEX_DISABLE_MCP", previousDisable);
+    }
+  }
+});
+
+Deno.test("codex provider omits MCP root deck env when deck path is absent", () => {
+  const args = parseCodexArgsForTest({
+    model: "codex-cli/default",
+    messages: [{ role: "user", content: "hi" }],
+    cwd: "/tmp/test-cwd",
+  });
+  const joined = args.join(" ");
+  assertEquals(
+    joined.includes("mcp_servers.gambit.env.GAMBIT_MCP_ROOT_DECK_PATH"),
+    false,
+  );
+});
+
+Deno.test("codex provider omits mcp args when disable env is set", () => {
+  const previousEnable = Deno.env.get("GAMBIT_CODEX_ENABLE_MCP");
+  const previousDisable = Deno.env.get("GAMBIT_CODEX_DISABLE_MCP");
+  Deno.env.set("GAMBIT_CODEX_ENABLE_MCP", "1");
+  Deno.env.set("GAMBIT_CODEX_DISABLE_MCP", "1");
+  try {
+    const args = parseCodexArgsForTest({
+      model: "codex-cli/default",
+      messages: [{ role: "user", content: "hi" }],
+      cwd: "/tmp/test-cwd",
+    });
+    const joined = args.join(" ");
+    assertEquals(joined.includes("mcp_servers.gambit.command"), false);
+  } finally {
+    if (previousEnable === undefined) {
+      Deno.env.delete("GAMBIT_CODEX_ENABLE_MCP");
+    } else {
+      Deno.env.set("GAMBIT_CODEX_ENABLE_MCP", previousEnable);
+    }
+    if (previousDisable === undefined) {
+      Deno.env.delete("GAMBIT_CODEX_DISABLE_MCP");
+    } else {
+      Deno.env.set("GAMBIT_CODEX_DISABLE_MCP", previousDisable);
+    }
+  }
+});
+
+Deno.test("codex provider maps reasoning settings into codex config args", () => {
+  const args = parseCodexArgsForTest({
+    model: "codex-cli/default",
+    messages: [{ role: "user", content: "hi" }],
+    params: {
+      reasoning: { effort: "high", summary: "detailed" },
+      verbosity: "low",
+    },
+  });
+  const joined = args.join(" ");
+  assertEquals(joined.includes("model_reasoning_effort"), true);
+  assertEquals(joined.includes("model_reasoning_summary"), true);
+  assertEquals(joined.includes("model_verbosity"), true);
+});
+
+Deno.test("codex provider prefers call-time reasoning params over env vars", () => {
+  const previousEffort = Deno.env.get("GAMBIT_CODEX_REASONING_EFFORT");
+  const previousSummary = Deno.env.get("GAMBIT_CODEX_REASONING_SUMMARY");
+  const previousVerbosity = Deno.env.get("GAMBIT_CODEX_VERBOSITY");
+  Deno.env.set("GAMBIT_CODEX_REASONING_EFFORT", "low");
+  Deno.env.set("GAMBIT_CODEX_REASONING_SUMMARY", "auto");
+  Deno.env.set("GAMBIT_CODEX_VERBOSITY", "medium");
+  try {
+    const args = parseCodexArgsForTest({
+      model: "codex-cli/default",
+      messages: [{ role: "user", content: "hi" }],
+      params: {
+        reasoning: { effort: "high", summary: "detailed" },
+        verbosity: "low",
+      },
+    });
+    const joined = args.join(" ");
+    assertEquals(joined.includes('model_reasoning_effort="high"'), true);
+    assertEquals(joined.includes('model_reasoning_summary="detailed"'), true);
+    assertEquals(joined.includes('model_verbosity="low"'), true);
+    assertEquals(joined.includes('model_reasoning_effort="low"'), false);
+    assertEquals(joined.includes('model_reasoning_summary="auto"'), false);
+    assertEquals(joined.includes('model_verbosity="medium"'), false);
+  } finally {
+    if (previousEffort === undefined) {
+      Deno.env.delete("GAMBIT_CODEX_REASONING_EFFORT");
+    } else {
+      Deno.env.set("GAMBIT_CODEX_REASONING_EFFORT", previousEffort);
+    }
+    if (previousSummary === undefined) {
+      Deno.env.delete("GAMBIT_CODEX_REASONING_SUMMARY");
+    } else {
+      Deno.env.set("GAMBIT_CODEX_REASONING_SUMMARY", previousSummary);
+    }
+    if (previousVerbosity === undefined) {
+      Deno.env.delete("GAMBIT_CODEX_VERBOSITY");
+    } else {
+      Deno.env.set("GAMBIT_CODEX_VERBOSITY", previousVerbosity);
+    }
+  }
+});
+
+Deno.test("codex provider allows unvalidated reasoning env fallback values", () => {
+  const previousEffort = Deno.env.get("GAMBIT_CODEX_REASONING_EFFORT");
+  Deno.env.set("GAMBIT_CODEX_REASONING_EFFORT", "ultra-custom");
+  try {
+    const args = parseCodexArgsForTest({
+      model: "codex-cli/default",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    const joined = args.join(" ");
+    assertEquals(
+      joined.includes('model_reasoning_effort="ultra-custom"'),
+      true,
+    );
+  } finally {
+    if (previousEffort === undefined) {
+      Deno.env.delete("GAMBIT_CODEX_REASONING_EFFORT");
+    } else {
+      Deno.env.set("GAMBIT_CODEX_REASONING_EFFORT", previousEffort);
+    }
+  }
+});
+
+Deno.test("codex provider treats bare codex-cli as codex-cli/default", () => {
+  const bare = parseCodexArgsForTest({
+    model: "codex-cli",
+    messages: [{ role: "user", content: "hi" }],
+  });
+  const explicit = parseCodexArgsForTest({
+    model: "codex-cli/default",
+    messages: [{ role: "user", content: "hi" }],
+  });
+  assertEquals(bare.includes("-m"), false);
+  assertEquals(explicit.includes("-m"), false);
+  assertEquals(bare, explicit);
+});
+
+Deno.test("codex provider forwards codex-cli/<model> through -m", () => {
+  const args = parseCodexArgsForTest({
+    model: "codex-cli/gpt-5.2-codex",
+    messages: [{ role: "user", content: "hi" }],
+  });
+  const modelArgIndex = args.findIndex((entry) => entry === "-m");
+  assertEquals(modelArgIndex >= 0, true);
+  assertEquals(args[modelArgIndex + 1], "gpt-5.2-codex");
+});
+
+Deno.test("codex provider rejects legacy codex prefix", () => {
+  const error = assertThrows(() =>
+    parseCodexArgsForTest({
+      model: "codex/default",
+      messages: [{ role: "user", content: "hi" }],
+    })
+  );
+  assertEquals(
+    error instanceof Error &&
+      error.message.includes('Legacy Codex model prefix "codex"'),
+    true,
+  );
+});
+
+Deno.test("codex provider rejects invalid call-time reasoning values", () => {
+  const error = assertThrows(() =>
+    parseCodexArgsForTest({
+      model: "codex-cli/default",
+      messages: [{ role: "user", content: "hi" }],
+      params: {
+        reasoning: { effort: "ultra" },
+      },
+    })
+  );
+  assertEquals(
+    error instanceof Error &&
+      error.message.includes("Invalid Codex call-time reasoning.effort"),
+    true,
+  );
+});
diff --git a/src/providers/codex.ts b/src/providers/codex.ts
new file mode 100644
index 000000000..38676acf5
--- /dev/null
+++ b/src/providers/codex.ts
@@ -0,0 +1,972 @@
+import * as path from "@std/path";
+import type {
+  CreateResponseRequest,
+  CreateResponseResponse,
+  JSONValue,
+  ModelMessage,
+  ModelProvider,
+  ResponseEvent,
+  ResponseItem,
+  ResponseMessageItem,
+  SavedState,
+} from "@bolt-foundry/gambit-core";
+
+export const CODEX_PREFIX = "codex-cli/";
+const CODEX_THREAD_META_KEY = "codex.threadId";
+const BOT_ROOT_ENV = "GAMBIT_BOT_ROOT";
+const CODEX_MCP_ENV = "GAMBIT_CODEX_ENABLE_MCP";
+const CODEX_DISABLE_MCP_ENV = "GAMBIT_CODEX_DISABLE_MCP";
+const CODEX_REASONING_EFFORT_ENV = "GAMBIT_CODEX_REASONING_EFFORT";
+const CODEX_REASONING_SUMMARY_ENV = "GAMBIT_CODEX_REASONING_SUMMARY";
+const CODEX_VERBOSITY_ENV = "GAMBIT_CODEX_VERBOSITY";
+const CODEX_BIN_ENV = "GAMBIT_CODEX_BIN";
+const MCP_ROOT_DECK_PATH_ENV = "GAMBIT_MCP_ROOT_DECK_PATH";
+const MCP_SERVER_PATH = path.resolve(
+  path.dirname(path.fromFileUrl(import.meta.url)),
+  "../mcp_server.ts",
+);
+
+type CodexTurnUsage = {
+  input_tokens?: unknown;
+  output_tokens?: unknown;
+  total_tokens?: unknown;
+};
+
+type CodexEvent =
+  | { type: "thread.started"; thread_id?: unknown }
+  | {
+    type: "item.completed";
+    item?: {
+      type?: unknown;
+      text?: unknown;
+    };
+  }
+  | { type: "turn.completed"; usage?: CodexTurnUsage }
+  | { type: string; [key: string]: unknown };
+
+type CommandOutput = {
+  success: boolean;
+  code: number;
+  stdout: Uint8Array;
+  stderr: Uint8Array;
+};
+
+type CommandRunner = (input: {
+  args: Array<string>;
+  cwd: string;
+  signal?: AbortSignal;
+  onStdoutLine?: (line: string) => void;
+}) => Promise<CommandOutput>;
+
+const REASONING_EFFORT_VALUES = new Set([
+  "none",
+  "minimal",
+  "low",
+  "medium",
+  "high",
+  "xhigh",
+]);
+const REASONING_SUMMARY_VALUES = new Set([
+  "none",
+  "auto",
+  "concise",
+  "detailed",
+]);
+const VERBOSITY_VALUES = new Set([
+  "low",
+  "medium",
+  "high",
+]);
+
+function runCwd(): string {
+  const botRoot = Deno.env.get(BOT_ROOT_ENV);
+  if (typeof botRoot === "string" && botRoot.trim().length > 0) {
+    return botRoot.trim();
+  }
+  return Deno.cwd();
+}
+
+function shouldEnableMcpBridge(): boolean {
+  const parseTruthy = (value: string): boolean => {
+    const normalized = value.trim().toLowerCase();
+    if (!normalized) return false;
+    return normalized === "1" || normalized === "true" || normalized === "yes";
+  };
+  const disableRaw = Deno.env.get(CODEX_DISABLE_MCP_ENV);
+  if (disableRaw && parseTruthy(disableRaw)) return false;
+  const enableRaw = Deno.env.get(CODEX_MCP_ENV);
+  if (!enableRaw) return true;
+  const normalized = enableRaw.trim().toLowerCase();
+  return normalized === "1" || normalized === "true" || normalized === "yes";
+}
+
+function tomlString(value: string): string {
+  return `"${value.replaceAll("\\", "\\\\").replaceAll('"', '\\"')}"`;
+}
+
+function tomlStringArray(values: Array<string>): string {
+  return `[${values.map(tomlString).join(",")}]`;
+}
+
+function codexConfigArgs(input: {
+  cwd: string;
+  deckPath?: string;
+  params?: Record<string, unknown>;
+}): Array<string> {
+  const args: Array<string> = [];
+  const params = input.params ?? {};
+  const reasoning = asRecord(params.reasoning);
+  const effort = typeof reasoning.effort === "string"
+    ? assertEnumForCallTime({
+      value: reasoning.effort,
+      allowed: REASONING_EFFORT_VALUES,
+      field: "reasoning.effort",
+    })
+    : Deno.env.get(CODEX_REASONING_EFFORT_ENV);
+  if (typeof effort === "string" && effort.trim()) {
+    args.push("-c", `model_reasoning_effort=${tomlString(effort.trim())}`);
+  }
+  const summary = typeof reasoning.summary === "string"
+    ? assertEnumForCallTime({
+      value: reasoning.summary,
+      allowed: REASONING_SUMMARY_VALUES,
+      field: "reasoning.summary",
+    })
+    : Deno.env.get(CODEX_REASONING_SUMMARY_ENV);
+  if (typeof summary === "string" && summary.trim()) {
+    args.push("-c", `model_reasoning_summary=${tomlString(summary.trim())}`);
+  }
+  const verbosity = typeof params.verbosity === "string"
+    ? assertEnumForCallTime({
+      value: params.verbosity,
+      allowed: VERBOSITY_VALUES,
+      field: "verbosity",
+    })
+    : Deno.env.get(CODEX_VERBOSITY_ENV);
+  if (typeof verbosity === "string" && verbosity.trim()) {
+    args.push("-c", `model_verbosity=${tomlString(verbosity.trim())}`);
+  }
+
+  if (shouldEnableMcpBridge()) {
+    args.push("-c", `mcp_servers.gambit.command=${tomlString("deno")}`);
+    args.push(
+      "-c",
+      `mcp_servers.gambit.args=${
+        tomlStringArray(["run", "-A", MCP_SERVER_PATH])
+      }`,
+    );
+    args.push("-c", `mcp_servers.gambit.cwd=${tomlString(input.cwd)}`);
+    args.push(
+      "-c",
+      `mcp_servers.gambit.env.GAMBIT_BOT_ROOT=${tomlString(input.cwd)}`,
+    );
+    const rootDeckPath = input.deckPath?.trim();
+    if (rootDeckPath) {
+      args.push(
+        "-c",
+        `mcp_servers.gambit.env.${MCP_ROOT_DECK_PATH_ENV}=${
+          tomlString(rootDeckPath)
+        }`,
+      );
+    }
+    args.push("-c", "mcp_servers.gambit.enabled=true");
+    args.push("-c", "mcp_servers.gambit.startup_timeout_sec=30");
+    args.push("-c", "mcp_servers.gambit.tool_timeout_sec=30");
+  }
+  return args;
+}
+
+function normalizeCodexModel(model: string): string {
+  const trimmed = model.trim();
+  if (!trimmed) return "";
+  if (trimmed === "codex-cli") return "default";
+  if (trimmed === "codex" || trimmed.startsWith("codex/")) {
+    throw new Error(
+      'Legacy Codex model prefix "codex" is no longer supported. Use "codex-cli/default" or "codex-cli/<model>".',
+    );
+  }
+  if (trimmed.startsWith(CODEX_PREFIX)) {
+    const stripped = trimmed.slice(CODEX_PREFIX.length).trim();
+    if (!stripped) {
+      throw new Error(
+        'Codex model prefix requires a model segment. Use "codex-cli/default" or "codex-cli/<model>".',
+      );
+    }
+    return stripped;
+  }
+  return trimmed;
+}
+
+function assertEnumForCallTime(input: {
+  value: string;
+  allowed: Set<string>;
+  field: string;
+}): string {
+  const normalized = input.value.trim().toLowerCase();
+  if (!normalized) return normalized;
+  if (input.allowed.has(normalized)) return normalized;
+  const allowed = Array.from(input.allowed).join(", ");
+  throw new Error(
+    `Invalid Codex call-time ${input.field}: "${input.value}". Allowed values: ${allowed}.`,
+  );
+}
+
+function safeJsonObject(
+  text: string,
+): Record<string, JSONValue> {
+  try {
+    const parsed = JSON.parse(text);
+    if (parsed && typeof parsed === "object") {
+      return parsed as Record<string, JSONValue>;
+    }
+  } catch {
+    // ignore parse failure
+  }
+  return {};
+}
+
+function parseJsonValue(text: string): JSONValue {
+  try {
+    return JSON.parse(text) as JSONValue;
+  } catch {
+    return text;
+  }
+}
+
+function asRecord(value: unknown): Record<string, unknown> {
+  if (value && typeof value === "object" && !Array.isArray(value)) {
+    return value as Record<string, unknown>;
+  }
+  return {};
+}
+
+function emitCodexToolEvents(input: {
+  event: Record<string, JSONValue>;
+  emit: (event: Record<string, JSONValue>) => void;
+  toolNames: Map<string, string>;
+  emittedCalls: Set<string>;
+  emittedResults: Set<string>;
+}): void {
+  const payloadType = typeof input.event.type === "string"
+    ? input.event.type
+    : "";
+  if (!payloadType.startsWith("item.")) return;
+  const item = input.event.item;
+  if (!item || typeof item !== "object" || Array.isArray(item)) return;
+  const record = item as Record<string, JSONValue>;
+  const itemType = typeof record.type === "string" ? record.type : "";
+  const callId = typeof record.id === "string"
+    ? record.id
+    : typeof record.call_id === "string"
+    ? record.call_id
+    : "";
+  if (!callId) return;
+
+  if (itemType === "reasoning" || itemType === "agent_message") return;
+
+  const name = typeof record.tool === "string"
+    ? record.tool
+    : typeof record.name === "string"
+    ? record.name
+    : input.toolNames.get(callId) ?? itemType;
+
+  const normalizedArgs = (() => {
+    if (itemType === "command_execution") {
+      return { command: record.command ?? "" } as JSONValue;
+    }
+    if (itemType === "file_change") {
+      return { changes: record.changes ?? [] } as JSONValue;
+    }
+    const rawArgs = record.arguments;
+    return typeof rawArgs === "string"
+      ? parseJsonValue(rawArgs)
+      : rawArgs ?? {};
+  })();
+
+  if (!input.emittedCalls.has(callId)) {
+    input.emittedCalls.add(callId);
+    input.toolNames.set(callId, name);
+    input.emit({
+      type: "tool.call",
+      actionCallId: callId,
+      name,
+      args: normalizedArgs,
+      toolKind: "mcp_bridge",
+    });
+  }
+
+  if (input.emittedResults.has(callId)) return;
+  const resolvedName = name ?? input.toolNames.get(callId) ?? itemType;
+  if (!resolvedName) return;
+  const isTerminal = payloadType === "item.completed" ||
+    payloadType === "item.done";
+  if (!isTerminal) return;
+  input.emittedResults.add(callId);
+  const result: JSONValue = (() => {
+    if (itemType === "mcp_tool_call") {
+      return {
+        server: record.server ?? "",
+        status: record.status ?? "",
+        result: record.result ?? null,
+        error: record.error ?? null,
+      };
+    }
+    if (itemType === "command_execution") {
+      return {
+        command: record.command ?? "",
+        status: record.status ?? "",
+        output: record.aggregated_output ?? "",
+        exit_code: record.exit_code ?? null,
+      };
+    }
+    if (itemType === "file_change") {
+      return {
+        status: record.status ?? "",
+        changes: record.changes ?? [],
+      };
+    }
+    return record ?? null;
+  })();
+  input.emit({
+    type: "tool.result",
+    actionCallId: callId,
+    name: resolvedName,
+    result,
+    toolKind: "mcp_bridge",
+  });
+}
+
+function extractTextParts(value: JSONValue | undefined): Array<string> {
+  if (typeof value === "string") return [value];
+  if (!Array.isArray(value)) return [];
+  const parts: Array<string> = [];
+  for (const entry of value) {
+    if (!entry || typeof entry !== "object") continue;
+    const record = entry as Record<string, JSONValue>;
+    if (typeof record.text === "string") parts.push(record.text);
+  }
+  return parts;
+}
+
+function emitCodexReasoningEvents(input: {
+  event: Record<string, JSONValue>;
+  emit: (event: Record<string, JSONValue>) => void;
+}): void {
+  const payloadType = typeof input.event.type === "string"
+    ? input.event.type
+    : "";
+  if (!payloadType.startsWith("item.")) return;
+  const item = input.event.item;
+  if (!item || typeof item !== "object" || Array.isArray(item)) return;
+  const record = item as Record<string, JSONValue>;
+  if (record.type !== "reasoning") return;
+
+  const itemId = typeof record.id === "string" ? record.id : "reasoning";
+  const outputIndex = 0;
+  const contentIndex = 0;
+
+  if (payloadType === "item.delta") {
+    const deltaText = typeof record.text === "string"
+      ? record.text
+      : extractTextParts(record.content).join("");
+    if (deltaText) {
+      input.emit({
+        type: "response.reasoning.delta",
+        output_index: outputIndex,
+        item_id: itemId,
+        content_index: contentIndex,
+        delta: deltaText,
+      });
+    }
+  }
+
+  if (payloadType === "item.completed" || payloadType === "item.done") {
+    const doneText = typeof record.text === "string"
+      ? record.text
+      : extractTextParts(record.content).join("");
+    input.emit({
+      type: "response.reasoning.done",
+      output_index: outputIndex,
+      item_id: itemId,
+      content_index: contentIndex,
+      text: doneText,
+    });
+    const summaryParts = Array.isArray(record.summary) ? record.summary : [];
+    summaryParts.forEach((part, idx) => {
+      if (!part || typeof part !== "object") return;
+      const partRecord = part as Record<string, JSONValue>;
+      const text = typeof partRecord.text === "string" ? partRecord.text : "";
+      input.emit({
+        type: "response.reasoning_summary_part.added",
+        output_index: outputIndex,
+        item_id: itemId,
+        summary_index: idx,
+        part: {
+          type: "summary_text",
+          text,
+        },
+      });
+      input.emit({
+        type: "response.reasoning_summary_part.done",
+        output_index: outputIndex,
+        item_id: itemId,
+        summary_index: idx,
+        part: {
+          type: "summary_text",
+          text,
+        },
+      });
+    });
+  }
+}
+
+function responseItemsToChatMessages(
+  items: Array<ResponseItem>,
+  instructions?: string,
+): Array<ModelMessage> {
+  const messages: Array<ModelMessage> = [];
+  if (typeof instructions === "string" && instructions.trim().length > 0) {
+    messages.push({ role: "system", content: instructions });
+  }
+  for (const item of items) {
+    if (item.type === "message") {
+      const content = item.content.map((part) => part.text).join("");
+      messages.push({ role: item.role, content });
+      continue;
+    }
+    if (item.type === "function_call") {
+      messages.push({
+        role: "assistant",
+        content: null,
+        tool_calls: [{
+          id: item.call_id,
+          type: "function",
+          function: { name: item.name, arguments: item.arguments },
+        }],
+      });
+      continue;
+    }
+    if (item.type === "function_call_output") {
+      messages.push({
+        role: "tool",
+        content: item.output,
+        tool_call_id: item.call_id,
+      });
+    }
+  }
+  return messages;
+}
+
+function responseItemsFromAssistantMessage(
+  message: ModelMessage,
+): Array<ResponseItem> {
+  const output: Array<ResponseItem> = [];
+  if (typeof message.content === "string" && message.content.length > 0) {
+    output.push(
+      {
+        type: "message",
+        role: "assistant",
+        content: [{ type: "output_text", text: message.content }],
+      } satisfies ResponseMessageItem,
+    );
+  }
+  if (message.tool_calls) {
+    for (const call of message.tool_calls) {
+      output.push({
+        type: "function_call",
+        call_id: call.id,
+        name: call.function.name,
+        arguments: call.function.arguments,
+      });
+    }
+  }
+  return output;
+}
+
+function stringContent(content: ModelMessage["content"]): string {
+  if (typeof content === "string") return content.trim();
+  return "";
+}
+
+function renderMessagesForPrompt(messages: Array<ModelMessage>): string {
+  return messages
+    .map((message) => {
+      const content = stringContent(message.content);
+      if (!content) return "";
+      return `${message.role.toUpperCase()}:\n${content}`;
+    })
+    .filter(Boolean)
+    .join("\n\n");
+}
+
+function latestUserPrompt(messages: Array<ModelMessage>): string {
+  for (let idx = messages.length - 1; idx >= 0; idx -= 1) {
+    const msg = messages[idx];
+    if (msg.role !== "user") continue;
+    const content = stringContent(msg.content);
+    if (content) return content;
+  }
+  return "";
+}
+
+function promptForCodexTurn(input: {
+  messages: Array<ModelMessage>;
+  priorThreadId?: string;
+}): string {
+  if (input.priorThreadId) {
+    // Thread resume should be incremental: only send the newest user turn.
+    return latestUserPrompt(input.messages);
+  }
+  return renderMessagesForPrompt(input.messages);
+}
+
+function parseNumber(input: unknown): number {
+  return typeof input === "number" && Number.isFinite(input) ? input : 0;
+}
+
+function parseCodexStdout(stdout: string): {
+  threadId?: string;
+  assistantText: string;
+  usage?: {
+    promptTokens: number;
+    completionTokens: number;
+    totalTokens: number;
+  };
+} {
+  let threadId: string | undefined;
+  let assistantText = "";
+  let usage: {
+    promptTokens: number;
+    completionTokens: number;
+    totalTokens: number;
+  } | undefined;
+
+  for (const line of stdout.split(/\r?\n/)) {
+    const trimmed = line.trim();
+    if (!trimmed.startsWith("{")) continue;
+    let parsed: CodexEvent | null = null;
+    try {
+      parsed = JSON.parse(trimmed) as CodexEvent;
+    } catch {
+      continue;
+    }
+    if (!parsed || typeof parsed !== "object") continue;
+
+    if (parsed.type === "thread.started") {
+      if (typeof parsed.thread_id === "string" && parsed.thread_id.trim()) {
+        threadId = parsed.thread_id.trim();
+      }
+      continue;
+    }
+
+    if (parsed.type === "item.completed") {
+      const item = parsed.item as Record<string, unknown> | undefined;
+      if (!item || typeof item !== "object") continue;
+      if (item.type !== "agent_message") continue;
+      if (typeof item.text !== "string") continue;
+      const content = item.text.trim();
+      if (content) assistantText = content;
+      continue;
+    }
+
+    if (parsed.type === "turn.completed") {
+      const rawUsage = parsed.usage as Record<string, unknown> | undefined;
+      if (!rawUsage || typeof rawUsage !== "object") continue;
+      usage = {
+        promptTokens: parseNumber(rawUsage.input_tokens),
+        completionTokens: parseNumber(rawUsage.output_tokens),
+        totalTokens: parseNumber(rawUsage.total_tokens),
+      };
+    }
+  }
+
+  return { threadId, assistantText, usage };
+}
+
+function buildUpdatedState(input: {
+  priorState?: SavedState;
+  messages: Array<ModelMessage>;
+  assistantText: string;
+  threadId?: string;
+}): SavedState {
+  const priorState = input.priorState;
+  const baseMessages = input.messages.map((message) => ({ ...message }));
+  baseMessages.push({ role: "assistant", content: input.assistantText });
+  const meta = { ...(priorState?.meta ?? {}) };
+  if (input.threadId) {
+    meta[CODEX_THREAD_META_KEY] = input.threadId;
+  }
+  return {
+    runId: priorState?.runId ?? crypto.randomUUID(),
+    messages: baseMessages,
+    format: priorState?.format ?? "chat",
+    items: priorState?.items,
+    messageRefs: priorState?.messageRefs,
+    feedback: priorState?.feedback,
+    traces: priorState?.traces,
+    meta,
+    notes: priorState?.notes,
+    conversationScore: priorState?.conversationScore,
+  };
+}
+
+function defaultCommandRunner(input: {
+  args: Array<string>;
+  cwd: string;
+  signal?: AbortSignal;
+  onStdoutLine?: (line: string) => void;
+}): Promise<CommandOutput> {
+  const codexBin = Deno.env.get(CODEX_BIN_ENV)?.trim() || "codex";
+  const child = new Deno.Command(codexBin, {
+    args: input.args,
+    cwd: input.cwd,
+    stdout: "piped",
+    stderr: "piped",
+  }).spawn();
+  const abort = () => {
+    try {
+      child.kill("SIGTERM");
+    } catch {
+      // ignore
+    }
+  };
+  if (input.signal?.aborted) {
+    abort();
+  } else if (input.signal) {
+    input.signal.addEventListener("abort", abort, { once: true });
+  }
+  const readStream = async (
+    stream: ReadableStream<Uint8Array> | null,
+    onLine?: (line: string) => void,
+  ): Promise<Uint8Array> => {
+    if (!stream) return new Uint8Array();
+    const reader = stream.getReader();
+    const decoder = new TextDecoder();
+    const chunks: Array<Uint8Array> = [];
+    let buffered = "";
+    while (true) {
+      const { value, done } = await reader.read();
+      if (done) break;
+      if (value) {
+        chunks.push(value);
+        if (onLine) {
+          buffered += decoder.decode(value, { stream: true });
+          const parts = buffered.split(/\r?\n/);
+          buffered = parts.pop() ?? "";
+          for (const line of parts) onLine(line);
+        }
+      }
+    }
+    if (onLine && buffered.trim()) onLine(buffered);
+    const total = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
+    const out = new Uint8Array(total);
+    let offset = 0;
+    for (const chunk of chunks) {
+      out.set(chunk, offset);
+      offset += chunk.length;
+    }
+    return out;
+  };
+  return Promise.all([
+    child.status,
+    readStream(child.stdout, input.onStdoutLine),
+    readStream(child.stderr),
+  ]).then(([status, stdout, stderr]) => ({
+    success: status.success,
+    code: status.code,
+    stdout,
+    stderr,
+  })).finally(() => {
+    if (input.signal) {
+      input.signal.removeEventListener("abort", abort);
+    }
+  });
+}
+
+function buildCodexStreamHandler(input: {
+  emitRaw: (event: Record<string, JSONValue>) => void;
+  emitTool: (event: Record<string, JSONValue>) => void;
+}): (event: Record<string, JSONValue>) => void {
+  const toolNames = new Map<string, string>();
+  const emittedCalls = new Set<string>();
+  const emittedResults = new Set<string>();
+  return (event) => {
+    emitCodexReasoningEvents({
+      event,
+      emit: input.emitTool,
+    });
+    emitCodexToolEvents({
+      event,
+      emit: input.emitTool,
+      toolNames,
+      emittedCalls,
+      emittedResults,
+    });
+    input.emitRaw(event);
+  };
+}
+
+export function createCodexProvider(opts?: {
+  runCommand?: CommandRunner;
+}): ModelProvider {
+  const runCommand = opts?.runCommand ?? defaultCommandRunner;
+  const runChat: ModelProvider["chat"] = async (input) => {
+    if (input.signal?.aborted) {
+      throw new DOMException("Run canceled", "AbortError");
+    }
+    const streamHandler = (input.onStreamEvent || input.onTraceEvent)
+      ? buildCodexStreamHandler({
+        emitRaw: (event) => input.onStreamEvent?.(event),
+        emitTool: (event) => {
+          input.onStreamEvent?.(event);
+          input.onTraceEvent?.(
+            event as unknown as import("@bolt-foundry/gambit-core").ProviderTraceEvent,
+          );
+        },
+      })
+      : undefined;
+    const priorThreadIdRaw = input.state?.meta?.[CODEX_THREAD_META_KEY];
+    const priorThreadId = typeof priorThreadIdRaw === "string" &&
+        priorThreadIdRaw.trim().length > 0
+      ? priorThreadIdRaw.trim()
+      : undefined;
+    const model = normalizeCodexModel(input.model);
+    const prompt = promptForCodexTurn({
+      messages: input.messages,
+      priorThreadId,
+    });
+    const cwd = runCwd();
+    const args = priorThreadId
+      ? [
+        "exec",
+        "resume",
+        "--skip-git-repo-check",
+        "--json",
+      ]
+      : ["exec", "--skip-git-repo-check", "--json"];
+    args.push(
+      ...codexConfigArgs({
+        cwd,
+        deckPath: input.deckPath,
+        params: input.params,
+      }),
+    );
+    if (model && model !== "default") {
+      args.push("-m", model);
+    }
+    if (priorThreadId) {
+      args.push(priorThreadId);
+    }
+    args.push(prompt);
+    const handleStdoutLine = (line: string) => {
+      const trimmed = line.trim();
+      if (!trimmed.startsWith("{")) return;
+      try {
+        const parsed = JSON.parse(trimmed);
+        if (
+          parsed && typeof parsed === "object" && !Array.isArray(parsed) &&
+          streamHandler
+        ) {
+          streamHandler(parsed as Record<string, JSONValue>);
+        }
+      } catch {
+        // ignore malformed/non-json lines
+      }
+    };
+    const out = await runCommand({
+      args,
+      cwd,
+      signal: input.signal,
+      onStdoutLine: streamHandler ? handleStdoutLine : undefined,
+    });
+    if (input.signal?.aborted) {
+      throw new DOMException("Run canceled", "AbortError");
+    }
+    const stdout = new TextDecoder().decode(out.stdout);
+    const stderr = new TextDecoder().decode(out.stderr);
+    if (!out.success) {
+      throw new Error(
+        `codex exec failed (exit ${out.code}): ${
+          stderr.trim() || stdout.trim()
+        }`,
+      );
+    }
+    const parsed = parseCodexStdout(stdout);
+    const threadId = parsed.threadId ?? priorThreadId;
+    if (input.stream && input.onStreamText && parsed.assistantText) {
+      input.onStreamText(parsed.assistantText);
+    }
+    const updatedState = buildUpdatedState({
+      priorState: input.state,
+      messages: input.messages,
+      assistantText: parsed.assistantText,
+      threadId,
+    });
+
+    return {
+      message: { role: "assistant", content: parsed.assistantText },
+      finishReason: "stop" as const,
+      updatedState,
+      usage: parsed.usage,
+    };
+  };
+
+  return {
+    async responses(input: {
+      request: CreateResponseRequest;
+      state?: SavedState;
+      deckPath?: string;
+      onStreamEvent?: (event: ResponseEvent) => void;
+    }): Promise<CreateResponseResponse> {
+      const streamHandler = input.onStreamEvent
+        ? buildCodexStreamHandler({
+          emitRaw: (event) => {
+            input.onStreamEvent?.({
+              type: "codex.event",
+              payload: event,
+            } as unknown as ResponseEvent);
+          },
+          emitTool: (event) => {
+            input.onStreamEvent?.(event as unknown as ResponseEvent);
+          },
+        })
+        : undefined;
+      const result = await runChat({
+        model: input.request.model,
+        messages: responseItemsToChatMessages(
+          input.request.input,
+          input.request.instructions,
+        ),
+        stream: input.request.stream,
+        params: input.request.params,
+        state: input.state,
+        deckPath: input.deckPath,
+        onStreamEvent: streamHandler,
+      });
+
+      const output = responseItemsFromAssistantMessage(result.message);
+      const responseId = `codex-${crypto.randomUUID()}`;
+      const createdAt = Math.floor(Date.now() / 1000);
+      if (input.request.stream) {
+        input.onStreamEvent?.({
+          type: "response.created",
+          sequence_number: 0,
+          response: {
+            id: responseId,
+            object: "response",
+            model: input.request.model,
+            created_at: createdAt,
+            created: createdAt,
+            status: "in_progress",
+            output: [],
+            error: null,
+          },
+        });
+        if (
+          typeof result.message.content === "string" && result.message.content
+        ) {
+          input.onStreamEvent?.({
+            type: "response.output_text.delta",
+            sequence_number: 1,
+            output_index: 0,
+            delta: result.message.content,
+          });
+          input.onStreamEvent?.({
+            type: "response.output_text.done",
+            sequence_number: 2,
+            output_index: 0,
+            text: result.message.content,
+          });
+        }
+        output.forEach((item, index) => {
+          input.onStreamEvent?.({
+            type: "response.output_item.added",
+            sequence_number: 3 + (index * 2),
+            output_index: index,
+            item,
+          });
+          input.onStreamEvent?.({
+            type: "response.output_item.done",
+            sequence_number: 4 + (index * 2),
+            output_index: index,
+            item,
+          });
+        });
+      }
+
+      const response: CreateResponseResponse = {
+        id: responseId,
+        object: "response",
+        model: input.request.model,
+        created_at: createdAt,
+        created: createdAt,
+        status: "completed",
+        output,
+        usage: result.usage,
+        error: null,
+        updatedState: result.updatedState,
+      };
+      if (input.request.stream) {
+        input.onStreamEvent?.({
+          type: "response.completed",
+          sequence_number: 1000,
+          response,
+        });
+      }
+      return response;
+    },
+    chat: runChat,
+  };
+}
+
+export function parseCodexArgsForTest(input: {
+  model: string;
+  state?: SavedState;
+  messages: Array<ModelMessage>;
+  params?: Record<string, unknown>;
+  cwd?: string;
+  deckPath?: string;
+}): Array<string> {
+  const priorThreadIdRaw = input.state?.meta?.[CODEX_THREAD_META_KEY];
+  const priorThreadId = typeof priorThreadIdRaw === "string" &&
+      priorThreadIdRaw.trim().length > 0
+    ? priorThreadIdRaw.trim()
+    : undefined;
+  const model = normalizeCodexModel(input.model);
+  const prompt = promptForCodexTurn({
+    messages: input.messages,
+    priorThreadId,
+  });
+  const args = priorThreadId
+    ? ["exec", "resume", "--skip-git-repo-check", "--json"]
+    : ["exec", "--skip-git-repo-check", "--json"];
+  args.push(
+    ...codexConfigArgs({
+      cwd: input.cwd ?? runCwd(),
+      deckPath: input.deckPath,
+      params: input.params,
+    }),
+  );
+  if (model && model !== "default") {
+    args.push("-m", model);
+  }
+  if (priorThreadId) args.push(priorThreadId);
+  args.push(prompt);
+  return args;
+}
+
+export function parseCodexStdoutForTest(stdout: string): {
+  threadId?: string;
+  assistantText: string;
+  usage?: {
+    promptTokens: number;
+    completionTokens: number;
+    totalTokens: number;
+  };
+} {
+  return parseCodexStdout(stdout);
+}
+
+export function safeJsonForTest(text: string): Record<string, JSONValue> {
+  return safeJsonObject(text);
+}
diff --git a/src/providers/google.ts b/src/providers/google.ts
index 55e3a9d11..41faa689c 100644
--- a/src/providers/google.ts
+++ b/src/providers/google.ts
@@ -16,7 +16,10 @@ const DEFAULT_GOOGLE_BASE_URL =
 type OpenAIClient = {
   chat: {
     completions: {
-      create: (params: unknown) => Promise<unknown>;
+      create: (
+        params: unknown,
+        options?: { signal?: AbortSignal },
+      ) => Promise<unknown>;
     };
   };
 };
@@ -63,7 +66,13 @@ function toToolChoice(
   choice: ResponseToolChoice | undefined,
 ): OpenAI.Chat.Completions.ChatCompletionToolChoiceOption | undefined {
   if (!choice) return undefined;
-  if (choice === "auto" || choice === "required") return choice;
+  if (choice === "none" || choice === "auto" || choice === "required") {
+    return choice;
+  }
+  if (choice.type === "allowed_tools") {
+    return (choice.mode ??
+      "auto") as OpenAI.Chat.Completions.ChatCompletionToolChoiceOption;
+  }
   return { type: "function", function: { name: choice.function.name } };
 }
 
@@ -170,6 +179,26 @@ export function createGoogleProvider(opts: {
     async responses(input) {
       const request = input.request;
       const params = { ...(request.params ?? {}) } as Record<string, unknown>;
+      if (
+        request.temperature !== undefined && params.temperature === undefined
+      ) {
+        params.temperature = request.temperature;
+      }
+      if (request.top_p !== undefined && params.top_p === undefined) {
+        params.top_p = request.top_p;
+      }
+      if (
+        request.frequency_penalty !== undefined &&
+        params.frequency_penalty === undefined
+      ) {
+        params.frequency_penalty = request.frequency_penalty;
+      }
+      if (
+        request.presence_penalty !== undefined &&
+        params.presence_penalty === undefined
+      ) {
+        params.presence_penalty = request.presence_penalty;
+      }
       if (
         request.max_output_tokens !== undefined &&
         params.max_tokens === undefined
@@ -182,18 +211,24 @@ export function createGoogleProvider(opts: {
       );
       const toolChoice = toToolChoice(request.tool_choice);
       if (request.stream) {
-        const stream = await client.chat.completions.create({
-          model: request.model,
-          messages: messages as Array<
-            OpenAI.Chat.Completions.ChatCompletionMessageParam
-          >,
-          tools: request.tools as Array<
-            OpenAI.Chat.Completions.ChatCompletionTool
-          >,
-          tool_choice: toolChoice ?? "auto",
-          stream: true,
-          ...(params as Record<string, unknown>),
-        }) as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>;
+        let sequence = 0;
+        const stream = await client.chat.completions.create(
+          {
+            model: request.model,
+            messages: messages as Array<
+              OpenAI.Chat.Completions.ChatCompletionMessageParam
+            >,
+            tools: request.tools as Array<
+              OpenAI.Chat.Completions.ChatCompletionTool
+            >,
+            tool_choice: toolChoice ?? "auto",
+            stream: true,
+            ...(params as Record<string, unknown>),
+          },
+          input.signal ? { signal: input.signal } : undefined,
+        ) as AsyncIterable<
+          OpenAI.Chat.Completions.ChatCompletionChunk
+        >;
 
         const contentParts: Array<string> = [];
         const toolCallMap = new Map<
@@ -202,9 +237,27 @@ export function createGoogleProvider(opts: {
         >();
         let responseId: string | undefined;
         let created: number | undefined;
+        let emittedCreated = false;
         for await (const chunk of stream) {
           responseId = responseId ?? chunk.id;
           created = created ?? chunk.created;
+          if (!emittedCreated) {
+            input.onStreamEvent?.({
+              type: "response.created",
+              sequence_number: sequence++,
+              response: {
+                id: responseId ?? crypto.randomUUID(),
+                object: "response",
+                model: request.model,
+                created_at: created,
+                created,
+                status: "in_progress",
+                output: [],
+                error: null,
+              },
+            });
+            emittedCreated = true;
+          }
           const choice = chunk.choices[0];
           const delta = choice.delta;
           if (typeof delta.content === "string") {
@@ -213,6 +266,7 @@ export function createGoogleProvider(opts: {
               type: "response.output_text.delta",
               output_index: 0,
               delta: delta.content,
+              sequence_number: sequence++,
             });
           } else if (Array.isArray(delta.content)) {
             const text = (delta.content as Array<string | { text?: string }>)
@@ -226,6 +280,7 @@ export function createGoogleProvider(opts: {
                 type: "response.output_text.delta",
                 output_index: 0,
                 delta: text,
+                sequence_number: sequence++,
               });
             }
           }
@@ -270,26 +325,49 @@ export function createGoogleProvider(opts: {
           id: responseId ?? crypto.randomUUID(),
           object: "response",
           model: request.model,
+          created_at: created,
           created,
           status: "completed",
           output,
+          previous_response_id: request.previous_response_id ?? null,
+          instructions: request.instructions ?? null,
+          tool_choice: request.tool_choice,
+          max_output_tokens: request.max_output_tokens ?? null,
+          max_tool_calls: request.max_tool_calls ?? null,
+          parallel_tool_calls: request.parallel_tool_calls,
+          store: request.store,
+          metadata: request.metadata,
+          reasoning: request.reasoning
+            ? {
+              effort: request.reasoning.effort ?? null,
+              summary: request.reasoning.summary ?? null,
+            }
+            : null,
+          error: null,
         };
-        input.onStreamEvent?.({ type: "response.completed", response });
+        input.onStreamEvent?.({
+          type: "response.completed",
+          sequence_number: sequence++,
+          response,
+        });
         return response;
       }
 
-      const response = await client.chat.completions.create({
-        model: request.model,
-        messages: messages as Array<
-          OpenAI.Chat.Completions.ChatCompletionMessageParam
-        >,
-        tools: request.tools as Array<
-          OpenAI.Chat.Completions.ChatCompletionTool
-        >,
-        tool_choice: toolChoice ?? "auto",
-        stream: false,
-        ...(params as Record<string, unknown>),
-      }) as OpenAI.Chat.Completions.ChatCompletion;
+      const response = await client.chat.completions.create(
+        {
+          model: request.model,
+          messages: messages as Array<
+            OpenAI.Chat.Completions.ChatCompletionMessageParam
+          >,
+          tools: request.tools as Array<
+            OpenAI.Chat.Completions.ChatCompletionTool
+          >,
+          tool_choice: toolChoice ?? "auto",
+          stream: false,
+          ...(params as Record<string, unknown>),
+        },
+        input.signal ? { signal: input.signal } : undefined,
+      ) as OpenAI.Chat.Completions.ChatCompletion;
 
       const choice = response.choices[0];
       const normalizedMessage = normalizeMessage(choice.message);
@@ -304,28 +382,49 @@ export function createGoogleProvider(opts: {
         id: response.id,
         object: "response",
         model: response.model,
+        created_at: response.created,
         created: response.created,
         status: "completed",
         output: responseItemsFromChatMessage(normalizedMessage, toolCalls),
+        previous_response_id: request.previous_response_id ?? null,
+        instructions: request.instructions ?? null,
+        tool_choice: request.tool_choice,
+        max_output_tokens: request.max_output_tokens ?? null,
+        max_tool_calls: request.max_tool_calls ?? null,
+        parallel_tool_calls: request.parallel_tool_calls,
+        store: request.store,
+        metadata: request.metadata,
+        reasoning: request.reasoning
+          ? {
+            effort: request.reasoning.effort ?? null,
+            summary: request.reasoning.summary ?? null,
+          }
+          : null,
+        error: null,
         usage: mapChatUsage(response.usage),
       };
     },
     async chat(input) {
       const params = input.params ?? {};
       if (input.stream) {
-        const stream = await client.chat.completions.create({
-          model: input.model,
-          messages: input.messages as Array<
-            OpenAI.Chat.Completions.ChatCompletionMessageParam
-          >,
-          tools: input
-            .tools as unknown as Array<
-              OpenAI.Chat.Completions.ChatCompletionTool
+        const stream = await client.chat.completions.create(
+          {
+            model: input.model,
+            messages: input.messages as Array<
+              OpenAI.Chat.Completions.ChatCompletionMessageParam
             >,
-          tool_choice: "auto",
-          stream: true,
-          ...(params as Record<string, unknown>),
-        }) as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>;
+            tools: input
+              .tools as unknown as Array<
+                OpenAI.Chat.Completions.ChatCompletionTool
+              >,
+            tool_choice: "auto",
+            stream: true,
+            ...(params as Record<string, unknown>),
+          },
+          input.signal ? { signal: input.signal } : undefined,
+        ) as AsyncIterable<
+          OpenAI.Chat.Completions.ChatCompletionChunk
+        >;
 
         let finishReason: "stop" | "tool_calls" | "length" | null = null;
         const contentParts: Array<string> = [];
@@ -408,20 +507,23 @@ export function createGoogleProvider(opts: {
         };
       }
 
-      const response = await client.chat.completions.create({
-        model: input.model,
-        messages: input
-          .messages as unknown as Array<
-            OpenAI.Chat.Completions.ChatCompletionMessageParam
-          >,
-        tools: input
-          .tools as unknown as Array<
-            OpenAI.Chat.Completions.ChatCompletionTool
-          >,
-        tool_choice: "auto",
-        stream: false,
-        ...(params as Record<string, unknown>),
-      }) as OpenAI.Chat.Completions.ChatCompletion;
+      const response = await client.chat.completions.create(
+        {
+          model: input.model,
+          messages: input
+            .messages as unknown as Array<
+              OpenAI.Chat.Completions.ChatCompletionMessageParam
+            >,
+          tools: input
+            .tools as unknown as Array<
+              OpenAI.Chat.Completions.ChatCompletionTool
+            >,
+          tool_choice: "auto",
+          stream: false,
+          ...(params as Record<string, unknown>),
+        },
+        input.signal ? { signal: input.signal } : undefined,
+      ) as OpenAI.Chat.Completions.ChatCompletion;
 
       const choice = response.choices[0];
       const message = normalizeMessage(choice.message);
diff --git a/src/providers/ollama.ts b/src/providers/ollama.ts
index 8920d835b..306f66a70 100644
--- a/src/providers/ollama.ts
+++ b/src/providers/ollama.ts
@@ -23,7 +23,10 @@ export const DEFAULT_OLLAMA_BASE_URL = "http://localhost:11434/v1";
 
 type OpenAIClient = {
   responses: {
-    create: (params: unknown) => Promise<unknown>;
+    create: (
+      params: unknown,
+      options?: { signal?: AbortSignal },
+    ) => Promise<unknown>;
   };
 };
 
@@ -151,6 +154,26 @@ function mapStatus(
   return "failed";
 }
 
+function mapReasoning(
+  reasoning: CreateResponseRequest["reasoning"],
+): Record<string, unknown> | undefined {
+  if (!reasoning) return undefined;
+  const out: Record<string, unknown> = {};
+  if (reasoning.effort !== undefined) out.effort = reasoning.effort;
+  if (reasoning.summary !== undefined) out.summary = reasoning.summary;
+  return Object.keys(out).length > 0 ? out : undefined;
+}
+
+function stripUndefined<T extends Record<string, unknown>>(input: T): T {
+  const out = { ...input };
+  for (const [key, value] of Object.entries(out)) {
+    if (value === undefined) {
+      delete out[key as keyof T];
+    }
+  }
+  return out;
+}
+
 function mapError(
   error: OpenAI.Responses.ResponseError | null | undefined,
 ): { code?: string; message?: string } | undefined {
@@ -242,27 +265,55 @@ function appendSyntheticTools(
 
 function mapToolChoice(
   toolChoice: CreateResponseRequest["tool_choice"],
-):
-  | OpenAI.Responses.ToolChoiceOptions
-  | OpenAI.Responses.ToolChoiceFunction
-  | undefined {
+): Record<string, unknown> | string | undefined {
   if (!toolChoice) return undefined;
-  if (toolChoice === "auto" || toolChoice === "required") return toolChoice;
+  if (
+    toolChoice === "none" || toolChoice === "auto" ||
+    toolChoice === "required"
+  ) {
+    return toolChoice;
+  }
+  if (toolChoice.type === "allowed_tools") {
+    return {
+      type: "allowed_tools",
+      tools: toolChoice.tools,
+      mode: toolChoice.mode ?? "auto",
+    };
+  }
   return { type: "function", name: toolChoice.function.name };
 }
 
+function mapResponseContentPart(
+  part: { type?: string; text?: string },
+): ResponseTextContent | null {
+  if (!part || typeof part !== "object") return null;
+  if (typeof part.text !== "string") return null;
+  if (part.type === "output_text") {
+    return { type: "output_text", text: part.text };
+  }
+  if (part.type === "summary_text") {
+    return { type: "summary_text", text: part.text };
+  }
+  if (part.type === "reasoning_text") {
+    return { type: "reasoning_text", text: part.text };
+  }
+  if (part.type === "input_text") {
+    return { type: "input_text", text: part.text };
+  }
+  return null;
+}
+
 function mapOpenAIOutputItem(
   item: OpenAI.Responses.ResponseOutputItem,
 ): ResponseItem | null {
   const itemType = (item as { type?: string }).type;
   if (itemType === "message") {
     const message = item as OpenAI.Responses.ResponseOutputMessage;
-    const content: Array<ResponseTextContent> = [];
-    for (const part of message.content ?? []) {
-      if (part.type === "output_text") {
-        content.push({ type: "output_text", text: part.text });
-      }
-    }
+    const content: Array<ResponseTextContent> = (message.content ?? [])
+      .map((part) =>
+        mapResponseContentPart(part as { type?: string; text?: string })
+      )
+      .filter((part): part is ResponseTextContent => Boolean(part));
     if (content.length === 0) return null;
     return {
       type: "message",
@@ -281,6 +332,27 @@ function mapOpenAIOutputItem(
       id: call.id,
     };
   }
+  if (itemType === "reasoning") {
+    const reasoning = item as {
+      id?: string;
+      content?: Array<{ type?: string; text?: string }>;
+      summary?: Array<{ type?: string; text?: string }>;
+      encrypted_content?: string | null;
+    };
+    const content = (reasoning.content ?? [])
+      .map((part) => mapResponseContentPart(part))
+      .filter((part): part is ResponseTextContent => Boolean(part));
+    const summary = (reasoning.summary ?? [])
+      .map((part) => mapResponseContentPart(part))
+      .filter((part): part is ResponseTextContent => Boolean(part));
+    return {
+      type: "reasoning",
+      id: reasoning.id,
+      content: content.length > 0 ? content : undefined,
+      summary,
+      encrypted_content: reasoning.encrypted_content,
+    };
+  }
   return null;
 }
 
@@ -294,11 +366,27 @@ function normalizeOpenAIResponse(
     id: response.id,
     object: "response",
     model: response.model,
+    created_at: response.created_at,
     created: response.created_at,
+    completed_at: (response as { completed_at?: number | null }).completed_at ??
+      null,
+    previous_response_id: response.previous_response_id ?? null,
+    instructions: response.instructions ?? null,
+    reasoning: (response as { reasoning?: CreateResponseResponse["reasoning"] })
+      .reasoning ?? null,
     status: mapStatus(response.status ?? undefined),
     output: outputItems,
     usage: mapUsage(response.usage),
-    error: mapError(response.error),
+    error: mapError(response.error) ?? null,
+    metadata: (response as {
+      metadata?: Record<string, JSONValue>;
+    }).metadata,
+    max_output_tokens: response.max_output_tokens ?? null,
+    max_tool_calls:
+      (response as { max_tool_calls?: number | null }).max_tool_calls ?? null,
+    parallel_tool_calls:
+      (response as { parallel_tool_calls?: boolean }).parallel_tool_calls,
+    store: (response as { store?: boolean }).store,
   };
 }
 
@@ -323,13 +411,30 @@ function toOpenAIInputItems(
               text: part.text,
             };
           }
+          if (part.type === "summary_text") {
+            return {
+              type: "summary_text",
+              text: part.text,
+            };
+          }
+          if (part.type === "reasoning_text") {
+            return {
+              type: "reasoning_text",
+              text: part.text,
+            };
+          }
           return null;
         })
         .filter((
           part,
-        ): part is { type: "input_text" | "output_text"; text: string } =>
-          Boolean(part)
-        );
+        ): part is {
+          type:
+            | "input_text"
+            | "output_text"
+            | "summary_text"
+            | "reasoning_text";
+          text: string;
+        } => Boolean(part));
       if (content.length === 0) continue;
       mapped.push({
         type: "message",
@@ -356,6 +461,22 @@ function toOpenAIInputItems(
         output: item.output,
         id: item.id,
       });
+      continue;
+    }
+    if (item.type === "reasoning") {
+      mapped.push({
+        type: "reasoning",
+        id: item.id,
+        content: (item.content ?? []).map((part) => ({
+          type: part.type,
+          text: part.text,
+        })),
+        summary: item.summary.map((part) => ({
+          type: part.type,
+          text: part.text,
+        })),
+        encrypted_content: item.encrypted_content ?? null,
+      });
     }
   }
   return mapped;
@@ -462,6 +583,7 @@ function responseItemsToChat(items: Array<ResponseItem>): {
 async function createResponse(
   client: OpenAIClient,
   request: CreateResponseRequest,
+  signal?: AbortSignal,
   onStreamEvent?: (event: ResponseEvent) => void,
 ): Promise<CreateResponseResponse> {
   const baseParams: Record<string, unknown> = {
@@ -470,6 +592,24 @@ async function createResponse(
     instructions: request.instructions,
     tools: undefined,
     tool_choice: mapToolChoice(request.tool_choice),
+    previous_response_id: request.previous_response_id,
+    reasoning: mapReasoning(request.reasoning),
+    parallel_tool_calls: request.parallel_tool_calls,
+    max_tool_calls: request.max_tool_calls,
+    store: request.store,
+    include: request.include,
+    text: request.text,
+    stream_options: request.stream_options,
+    background: request.background,
+    truncation: request.truncation,
+    service_tier: request.service_tier,
+    top_logprobs: request.top_logprobs,
+    safety_identifier: request.safety_identifier,
+    prompt_cache_key: request.prompt_cache_key,
+    temperature: request.temperature,
+    top_p: request.top_p,
+    frequency_penalty: request.frequency_penalty,
+    presence_penalty: request.presence_penalty,
     stream: request.stream,
     max_output_tokens: request.max_output_tokens,
     metadata: request.metadata,
@@ -479,12 +619,13 @@ async function createResponse(
   if (mappedTools.length > 0) {
     baseParams.tools = mappedTools;
   }
-  const params = { ...(request.params ?? {}), ...baseParams };
+  const params = { ...(request.params ?? {}), ...stripUndefined(baseParams) };
   const debugResponses = Deno.env.get("GAMBIT_DEBUG_RESPONSES") === "1";
   let responseOrStream: unknown;
   try {
     responseOrStream = await client.responses.create(
       params as unknown as OpenAI.Responses.ResponseCreateParams,
+      signal ? { signal } : undefined,
     );
   } catch (err) {
     if (debugResponses) {
@@ -510,7 +651,12 @@ async function createResponse(
       switch (event.type) {
         case "response.created": {
           const mapped = normalizeOpenAIResponse(event.response);
-          onStreamEvent?.({ type: "response.created", response: mapped });
+          onStreamEvent?.({
+            type: "response.created",
+            response: mapped,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
+          });
           break;
         }
         case "response.output_text.delta":
@@ -519,6 +665,12 @@ async function createResponse(
             output_index: event.output_index,
             delta: event.delta,
             item_id: event.item_id,
+            content_index: event.content_index,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
+            logprobs: (event as {
+              logprobs?: Array<{ token?: string; logprob?: number }>;
+            }).logprobs,
           });
           break;
         case "response.output_text.done":
@@ -527,6 +679,9 @@ async function createResponse(
             output_index: event.output_index,
             text: event.text,
             item_id: event.item_id,
+            content_index: event.content_index,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
           });
           break;
         case "response.output_item.added": {
@@ -536,6 +691,8 @@ async function createResponse(
               type: "response.output_item.added",
               output_index: event.output_index,
               item,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
             });
           }
           break;
@@ -547,13 +704,106 @@ async function createResponse(
               type: "response.output_item.done",
               output_index: event.output_index,
               item,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
             });
           }
           break;
         }
+        case "response.reasoning.delta":
+          if (typeof event.delta === "string") {
+            onStreamEvent?.({
+              type: "response.reasoning.delta",
+              output_index: event.output_index,
+              item_id: event.item_id,
+              content_index: event.content_index,
+              delta: event.delta,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
+              obfuscation: (event as { obfuscation?: string }).obfuscation,
+            });
+          }
+          break;
+        case "response.reasoning.done":
+          onStreamEvent?.({
+            type: "response.reasoning.done",
+            output_index: event.output_index,
+            item_id: event.item_id,
+            content_index: event.content_index,
+            text: event.text,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
+          });
+          break;
+        case "response.reasoning_summary_text.delta":
+          if (typeof event.delta === "string") {
+            onStreamEvent?.({
+              type: "response.reasoning_summary_text.delta",
+              output_index: event.output_index,
+              item_id: event.item_id,
+              summary_index: event.summary_index,
+              delta: event.delta,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
+              obfuscation: (event as { obfuscation?: string }).obfuscation,
+            });
+          }
+          break;
+        case "response.reasoning_summary_text.done":
+          onStreamEvent?.({
+            type: "response.reasoning_summary_text.done",
+            output_index: event.output_index,
+            item_id: event.item_id,
+            summary_index: event.summary_index,
+            text: event.text,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
+          });
+          break;
+        case "response.reasoning_summary_part.added":
+          if (
+            event.part && typeof event.part === "object" &&
+            "type" in event.part &&
+            "text" in event.part &&
+            typeof (event.part as { text?: unknown }).text === "string"
+          ) {
+            onStreamEvent?.({
+              type: "response.reasoning_summary_part.added",
+              output_index: event.output_index,
+              item_id: event.item_id,
+              summary_index: event.summary_index,
+              part: event.part as ResponseTextContent,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
+            });
+          }
+          break;
+        case "response.reasoning_summary_part.done":
+          if (
+            event.part && typeof event.part === "object" &&
+            "type" in event.part &&
+            "text" in event.part &&
+            typeof (event.part as { text?: unknown }).text === "string"
+          ) {
+            onStreamEvent?.({
+              type: "response.reasoning_summary_part.done",
+              output_index: event.output_index,
+              item_id: event.item_id,
+              summary_index: event.summary_index,
+              part: event.part as ResponseTextContent,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
+            });
+          }
+          break;
         case "response.completed": {
           completed = normalizeOpenAIResponse(event.response);
-          onStreamEvent?.({ type: "response.completed", response: completed });
+          onStreamEvent?.({
+            type: "response.completed",
+            response: completed,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
+          });
           break;
         }
         case "response.failed": {
@@ -561,6 +811,8 @@ async function createResponse(
           onStreamEvent?.({
             type: "response.failed",
             error: error ?? {},
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
           });
           break;
         }
@@ -590,7 +842,12 @@ export function createOllamaProvider(opts: {
 
   return {
     async responses(input) {
-      return await createResponse(client, input.request, input.onStreamEvent);
+      return await createResponse(
+        client,
+        input.request,
+        input.signal,
+        input.onStreamEvent,
+      );
     },
     async chat(input) {
       const response = await createResponse(
@@ -602,6 +859,7 @@ export function createOllamaProvider(opts: {
           stream: input.stream,
           params: input.params ?? {},
         },
+        input.signal,
         (event) => {
           if (event.type === "response.output_text.delta") {
             input.onStreamText?.(event.delta);
diff --git a/src/providers/openrouter.test.ts b/src/providers/openrouter.test.ts
index 462a6e167..e1eb0fa02 100644
--- a/src/providers/openrouter.test.ts
+++ b/src/providers/openrouter.test.ts
@@ -92,6 +92,7 @@ Deno.test("openrouter responses maps output and usage", async () => {
     promptTokens: 3,
     completionTokens: 5,
     totalTokens: 8,
+    reasoningTokens: 0,
   });
 });
 
diff --git a/src/providers/openrouter.ts b/src/providers/openrouter.ts
index 7e4edf658..c64133fea 100644
--- a/src/providers/openrouter.ts
+++ b/src/providers/openrouter.ts
@@ -23,11 +23,17 @@ export const OPENROUTER_PREFIX = "openrouter/";
 type OpenAIClient = {
   chat: {
     completions: {
-      create: (params: unknown) => Promise<unknown>;
+      create: (
+        params: unknown,
+        options?: { signal?: AbortSignal },
+      ) => Promise<unknown>;
     };
   };
   responses: {
-    create: (params: unknown) => Promise<unknown>;
+    create: (
+      params: unknown,
+      options?: { signal?: AbortSignal },
+    ) => Promise<unknown>;
   };
 };
 
@@ -81,11 +87,18 @@ function mapUsage(
   usage: OpenAI.Responses.ResponseUsage | null | undefined,
 ): ResponseUsage | undefined {
   if (!usage) return undefined;
-  return {
+  const reasoningTokens = (usage as {
+    output_tokens_details?: { reasoning_tokens?: number | null };
+  }).output_tokens_details?.reasoning_tokens ?? undefined;
+  const out: ResponseUsage = {
     promptTokens: usage.input_tokens ?? 0,
     completionTokens: usage.output_tokens ?? 0,
     totalTokens: usage.total_tokens ?? 0,
   };
+  if (typeof reasoningTokens === "number") {
+    out.reasoningTokens = reasoningTokens;
+  }
+  return out;
 }
 
 function mapStatus(
@@ -97,6 +110,26 @@ function mapStatus(
   return "failed";
 }
 
+function mapReasoning(
+  reasoning: CreateResponseRequest["reasoning"],
+): Record<string, unknown> | undefined {
+  if (!reasoning) return undefined;
+  const out: Record<string, unknown> = {};
+  if (reasoning.effort !== undefined) out.effort = reasoning.effort;
+  if (reasoning.summary !== undefined) out.summary = reasoning.summary;
+  return Object.keys(out).length > 0 ? out : undefined;
+}
+
+function stripUndefined<T extends Record<string, unknown>>(input: T): T {
+  const out = { ...input };
+  for (const [key, value] of Object.entries(out)) {
+    if (value === undefined) {
+      delete out[key as keyof T];
+    }
+  }
+  return out;
+}
+
 function mapError(
   error: OpenAI.Responses.ResponseError | null | undefined,
 ): { code?: string; message?: string } | undefined {
@@ -188,27 +221,55 @@ function appendSyntheticTools(
 
 function mapToolChoice(
   toolChoice: CreateResponseRequest["tool_choice"],
-):
-  | OpenAI.Responses.ToolChoiceOptions
-  | OpenAI.Responses.ToolChoiceFunction
-  | undefined {
+): Record<string, unknown> | string | undefined {
   if (!toolChoice) return undefined;
-  if (toolChoice === "auto" || toolChoice === "required") return toolChoice;
+  if (
+    toolChoice === "none" || toolChoice === "auto" ||
+    toolChoice === "required"
+  ) {
+    return toolChoice;
+  }
+  if (toolChoice.type === "allowed_tools") {
+    return {
+      type: "allowed_tools",
+      tools: toolChoice.tools,
+      mode: toolChoice.mode ?? "auto",
+    };
+  }
   return { type: "function", name: toolChoice.function.name };
 }
 
+function mapResponseContentPart(
+  part: { type?: string; text?: string },
+): ResponseTextContent | null {
+  if (!part || typeof part !== "object") return null;
+  if (typeof part.text !== "string") return null;
+  if (part.type === "output_text") {
+    return { type: "output_text", text: part.text };
+  }
+  if (part.type === "summary_text") {
+    return { type: "summary_text", text: part.text };
+  }
+  if (part.type === "reasoning_text") {
+    return { type: "reasoning_text", text: part.text };
+  }
+  if (part.type === "input_text") {
+    return { type: "input_text", text: part.text };
+  }
+  return null;
+}
+
 function mapOpenAIOutputItem(
   item: OpenAI.Responses.ResponseOutputItem,
 ): ResponseItem | null {
   const itemType = (item as { type?: string }).type;
   if (itemType === "message") {
     const message = item as OpenAI.Responses.ResponseOutputMessage;
-    const content: Array<ResponseTextContent> = [];
-    for (const part of message.content ?? []) {
-      if (part.type === "output_text") {
-        content.push({ type: "output_text", text: part.text });
-      }
-    }
+    const content: Array<ResponseTextContent> = (message.content ?? [])
+      .map((part) =>
+        mapResponseContentPart(part as { type?: string; text?: string })
+      )
+      .filter((part): part is ResponseTextContent => Boolean(part));
     if (content.length === 0) return null;
     return {
       type: "message",
@@ -227,6 +288,27 @@ function mapOpenAIOutputItem(
       id: call.id,
     };
   }
+  if (itemType === "reasoning") {
+    const reasoning = item as {
+      id?: string;
+      content?: Array<{ type?: string; text?: string }>;
+      summary?: Array<{ type?: string; text?: string }>;
+      encrypted_content?: string | null;
+    };
+    const content = (reasoning.content ?? [])
+      .map((part) => mapResponseContentPart(part))
+      .filter((part): part is ResponseTextContent => Boolean(part));
+    const summary = (reasoning.summary ?? [])
+      .map((part) => mapResponseContentPart(part))
+      .filter((part): part is ResponseTextContent => Boolean(part));
+    return {
+      type: "reasoning",
+      id: reasoning.id,
+      content: content.length > 0 ? content : undefined,
+      summary,
+      encrypted_content: reasoning.encrypted_content,
+    };
+  }
   return null;
 }
 
@@ -240,11 +322,27 @@ function normalizeOpenAIResponse(
     id: response.id,
     object: "response",
     model: response.model,
+    created_at: response.created_at,
     created: response.created_at,
+    completed_at: (response as { completed_at?: number | null }).completed_at ??
+      null,
+    previous_response_id: response.previous_response_id ?? null,
+    instructions: response.instructions ?? null,
+    reasoning: (response as { reasoning?: CreateResponseResponse["reasoning"] })
+      .reasoning ?? null,
     status: mapStatus(response.status ?? undefined),
     output: outputItems,
     usage: mapUsage(response.usage),
-    error: mapError(response.error),
+    error: mapError(response.error) ?? null,
+    metadata: (response as {
+      metadata?: Record<string, JSONValue>;
+    }).metadata,
+    max_output_tokens: response.max_output_tokens ?? null,
+    max_tool_calls:
+      (response as { max_tool_calls?: number | null }).max_tool_calls ?? null,
+    parallel_tool_calls:
+      (response as { parallel_tool_calls?: boolean }).parallel_tool_calls,
+    store: (response as { store?: boolean }).store,
   };
 }
 
@@ -269,13 +367,30 @@ function toOpenAIInputItems(
               text: part.text,
             };
           }
+          if (part.type === "summary_text") {
+            return {
+              type: "summary_text",
+              text: part.text,
+            };
+          }
+          if (part.type === "reasoning_text") {
+            return {
+              type: "reasoning_text",
+              text: part.text,
+            };
+          }
           return null;
         })
         .filter((
           part,
-        ): part is { type: "input_text" | "output_text"; text: string } =>
-          Boolean(part)
-        );
+        ): part is {
+          type:
+            | "input_text"
+            | "output_text"
+            | "summary_text"
+            | "reasoning_text";
+          text: string;
+        } => Boolean(part));
       if (content.length === 0) continue;
       mapped.push({
         type: "message",
@@ -302,6 +417,22 @@ function toOpenAIInputItems(
         output: item.output,
         id: item.id,
       });
+      continue;
+    }
+    if (item.type === "reasoning") {
+      mapped.push({
+        type: "reasoning",
+        id: item.id,
+        content: (item.content ?? []).map((part) => ({
+          type: part.type,
+          text: part.text,
+        })),
+        summary: item.summary.map((part) => ({
+          type: part.type,
+          text: part.text,
+        })),
+        encrypted_content: item.encrypted_content ?? null,
+      });
     }
   }
   return mapped;
@@ -408,6 +539,7 @@ function responseItemsToChat(items: Array<ResponseItem>): {
 async function createResponse(
   client: OpenAIClient,
   request: CreateResponseRequest,
+  signal?: AbortSignal,
   onStreamEvent?: (event: ResponseEvent) => void,
 ): Promise<CreateResponseResponse> {
   const baseParams: Record<string, unknown> = {
@@ -416,6 +548,24 @@ async function createResponse(
     instructions: request.instructions,
     tools: undefined,
     tool_choice: mapToolChoice(request.tool_choice),
+    previous_response_id: request.previous_response_id,
+    reasoning: mapReasoning(request.reasoning),
+    parallel_tool_calls: request.parallel_tool_calls,
+    max_tool_calls: request.max_tool_calls,
+    store: request.store,
+    include: request.include,
+    text: request.text,
+    stream_options: request.stream_options,
+    background: request.background,
+    truncation: request.truncation,
+    service_tier: request.service_tier,
+    top_logprobs: request.top_logprobs,
+    safety_identifier: request.safety_identifier,
+    prompt_cache_key: request.prompt_cache_key,
+    temperature: request.temperature,
+    top_p: request.top_p,
+    frequency_penalty: request.frequency_penalty,
+    presence_penalty: request.presence_penalty,
     stream: request.stream,
     max_output_tokens: request.max_output_tokens,
     metadata: request.metadata,
@@ -425,12 +575,13 @@ async function createResponse(
   if (mappedTools.length > 0) {
     baseParams.tools = mappedTools;
   }
-  const params = { ...(request.params ?? {}), ...baseParams };
+  const params = { ...(request.params ?? {}), ...stripUndefined(baseParams) };
   const debugResponses = Deno.env.get("GAMBIT_DEBUG_RESPONSES") === "1";
   let responseOrStream: unknown;
   try {
     responseOrStream = await client.responses.create(
       params as unknown as OpenAI.Responses.ResponseCreateParams,
+      signal ? { signal } : undefined,
     );
   } catch (err) {
     if (debugResponses) {
@@ -456,7 +607,12 @@ async function createResponse(
       switch (event.type) {
         case "response.created": {
           const mapped = normalizeOpenAIResponse(event.response);
-          onStreamEvent?.({ type: "response.created", response: mapped });
+          onStreamEvent?.({
+            type: "response.created",
+            response: mapped,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
+          });
           break;
         }
         case "response.output_text.delta":
@@ -465,6 +621,12 @@ async function createResponse(
             output_index: event.output_index,
             delta: event.delta,
             item_id: event.item_id,
+            content_index: event.content_index,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
+            logprobs: (event as {
+              logprobs?: Array<{ token?: string; logprob?: number }>;
+            }).logprobs,
           });
           break;
         case "response.output_text.done":
@@ -473,6 +635,9 @@ async function createResponse(
             output_index: event.output_index,
             text: event.text,
             item_id: event.item_id,
+            content_index: event.content_index,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
           });
           break;
         case "response.output_item.added": {
@@ -482,6 +647,8 @@ async function createResponse(
               type: "response.output_item.added",
               output_index: event.output_index,
               item,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
             });
           }
           break;
@@ -493,13 +660,106 @@ async function createResponse(
               type: "response.output_item.done",
               output_index: event.output_index,
               item,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
             });
           }
           break;
         }
+        case "response.reasoning.delta":
+          if (typeof event.delta === "string") {
+            onStreamEvent?.({
+              type: "response.reasoning.delta",
+              output_index: event.output_index,
+              item_id: event.item_id,
+              content_index: event.content_index,
+              delta: event.delta,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
+              obfuscation: (event as { obfuscation?: string }).obfuscation,
+            });
+          }
+          break;
+        case "response.reasoning.done":
+          onStreamEvent?.({
+            type: "response.reasoning.done",
+            output_index: event.output_index,
+            item_id: event.item_id,
+            content_index: event.content_index,
+            text: event.text,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
+          });
+          break;
+        case "response.reasoning_summary_text.delta":
+          if (typeof event.delta === "string") {
+            onStreamEvent?.({
+              type: "response.reasoning_summary_text.delta",
+              output_index: event.output_index,
+              item_id: event.item_id,
+              summary_index: event.summary_index,
+              delta: event.delta,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
+              obfuscation: (event as { obfuscation?: string }).obfuscation,
+            });
+          }
+          break;
+        case "response.reasoning_summary_text.done":
+          onStreamEvent?.({
+            type: "response.reasoning_summary_text.done",
+            output_index: event.output_index,
+            item_id: event.item_id,
+            summary_index: event.summary_index,
+            text: event.text,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
+          });
+          break;
+        case "response.reasoning_summary_part.added":
+          if (
+            event.part && typeof event.part === "object" &&
+            "type" in event.part &&
+            "text" in event.part &&
+            typeof (event.part as { text?: unknown }).text === "string"
+          ) {
+            onStreamEvent?.({
+              type: "response.reasoning_summary_part.added",
+              output_index: event.output_index,
+              item_id: event.item_id,
+              summary_index: event.summary_index,
+              part: event.part as ResponseTextContent,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
+            });
+          }
+          break;
+        case "response.reasoning_summary_part.done":
+          if (
+            event.part && typeof event.part === "object" &&
+            "type" in event.part &&
+            "text" in event.part &&
+            typeof (event.part as { text?: unknown }).text === "string"
+          ) {
+            onStreamEvent?.({
+              type: "response.reasoning_summary_part.done",
+              output_index: event.output_index,
+              item_id: event.item_id,
+              summary_index: event.summary_index,
+              part: event.part as ResponseTextContent,
+              sequence_number: (event as { sequence_number?: number })
+                .sequence_number,
+            });
+          }
+          break;
         case "response.completed": {
           completed = normalizeOpenAIResponse(event.response);
-          onStreamEvent?.({ type: "response.completed", response: completed });
+          onStreamEvent?.({
+            type: "response.completed",
+            response: completed,
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
+          });
           break;
         }
         case "response.failed": {
@@ -507,6 +767,8 @@ async function createResponse(
           onStreamEvent?.({
             type: "response.failed",
             error: error ?? {},
+            sequence_number: (event as { sequence_number?: number })
+              .sequence_number,
           });
           break;
         }
@@ -544,7 +806,12 @@ export function createOpenRouterProvider(opts: {
 
   return {
     async responses(input) {
-      return await createResponse(client, input.request, input.onStreamEvent);
+      return await createResponse(
+        client,
+        input.request,
+        input.signal,
+        input.onStreamEvent,
+      );
     },
     async chat(input) {
       const params = input.params ?? {};
@@ -558,6 +825,7 @@ export function createOpenRouterProvider(opts: {
             stream: input.stream,
             params,
           },
+          input.signal,
           (event) => {
             if (event.type === "response.output_text.delta") {
               input.onStreamText?.(event.delta);
@@ -582,20 +850,25 @@ export function createOpenRouterProvider(opts: {
           );
         }
 
-        const stream = await client.chat.completions.create({
-          model: normalizeOpenRouterModel(input.model),
-          messages: input
-            .messages as Array<
-              OpenAI.Chat.Completions.ChatCompletionMessageParam
-            >,
-          tools: input
-            .tools as unknown as Array<
-              OpenAI.Chat.Completions.ChatCompletionTool
-            >,
-          tool_choice: "auto",
-          stream: true,
-          ...(params as Record<string, unknown>),
-        }) as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>;
+        const stream = await client.chat.completions.create(
+          {
+            model: normalizeOpenRouterModel(input.model),
+            messages: input
+              .messages as Array<
+                OpenAI.Chat.Completions.ChatCompletionMessageParam
+              >,
+            tools: input
+              .tools as unknown as Array<
+                OpenAI.Chat.Completions.ChatCompletionTool
+              >,
+            tool_choice: "auto",
+            stream: true,
+            ...(params as Record<string, unknown>),
+          },
+          input.signal ? { signal: input.signal } : undefined,
+        ) as AsyncIterable<
+          OpenAI.Chat.Completions.ChatCompletionChunk
+        >;
 
         let finishReason: "stop" | "tool_calls" | "length" | null = null;
         const contentParts: Array<string> = [];
@@ -693,20 +966,23 @@ export function createOpenRouterProvider(opts: {
         };
       }
 
-      const response = await client.chat.completions.create({
-        model: normalizeOpenRouterModel(input.model),
-        messages: input
-          .messages as unknown as Array<
-            OpenAI.Chat.Completions.ChatCompletionMessageParam
-          >,
-        tools: input
-          .tools as unknown as Array<
-            OpenAI.Chat.Completions.ChatCompletionTool
-          >,
-        tool_choice: "auto",
-        stream: false,
-        ...(params as Record<string, unknown>),
-      }) as OpenAI.Chat.Completions.ChatCompletion;
+      const response = await client.chat.completions.create(
+        {
+          model: normalizeOpenRouterModel(input.model),
+          messages: input
+            .messages as unknown as Array<
+              OpenAI.Chat.Completions.ChatCompletionMessageParam
+            >,
+          tools: input
+            .tools as unknown as Array<
+              OpenAI.Chat.Completions.ChatCompletionTool
+            >,
+          tool_choice: "auto",
+          stream: false,
+          ...(params as Record<string, unknown>),
+        },
+        input.signal ? { signal: input.signal } : undefined,
+      ) as OpenAI.Chat.Completions.ChatCompletion;
 
       const choice = response.choices[0];
       const message = choice.message;
@@ -727,11 +1003,23 @@ export function createOpenRouterProvider(opts: {
           | "length",
         toolCalls,
         usage: response.usage
-          ? {
-            promptTokens: response.usage.prompt_tokens ?? 0,
-            completionTokens: response.usage.completion_tokens ?? 0,
-            totalTokens: response.usage.total_tokens ?? 0,
-          }
+          ? (() => {
+            const usage = {
+              promptTokens: response.usage.prompt_tokens ?? 0,
+              completionTokens: response.usage.completion_tokens ?? 0,
+              totalTokens: response.usage.total_tokens ?? 0,
+            };
+            const details = response.usage as {
+              completion_tokens_details?: { reasoning_tokens?: number | null };
+              output_tokens_details?: { reasoning_tokens?: number | null };
+            };
+            const value = details.completion_tokens_details
+              ?.reasoning_tokens ??
+              details.output_tokens_details?.reasoning_tokens;
+            return typeof value === "number"
+              ? { ...usage, reasoningTokens: value }
+              : usage;
+          })()
           : undefined,
       };
     },
diff --git a/src/providers/router.test.ts b/src/providers/router.test.ts
index 3a075669a..d3f18cad6 100644
--- a/src/providers/router.test.ts
+++ b/src/providers/router.test.ts
@@ -13,14 +13,32 @@ const stubProvider = (label: string): ModelProvider => ({
 Deno.test("provider router selects prefixed provider", () => {
   const openrouter = stubProvider("openrouter");
   const ollama = stubProvider("ollama");
+  const codexCli = stubProvider("codex-cli");
   const router = createProviderRouter({
-    providers: { openrouter, ollama },
+    providers: { openrouter, ollama, "codex-cli": codexCli },
   });
 
   const selection = router.resolve({ model: "ollama/llama3" });
   assertEquals(selection.providerKey, "ollama");
   assertEquals(selection.provider, ollama);
   assertEquals(selection.model, "llama3");
+
+  const codexSelection = router.resolve({ model: "codex-cli/default" });
+  assertEquals(codexSelection.providerKey, "codex-cli");
+  assertEquals(codexSelection.provider, codexCli);
+  assertEquals(codexSelection.model, "default");
+});
+
+Deno.test("provider router maps bare codex-cli to codex-cli/default", () => {
+  const codexCli = stubProvider("codex-cli");
+  const router = createProviderRouter({
+    providers: { "codex-cli": codexCli },
+    defaultProvider: null,
+  });
+  const selection = router.resolve({ model: "codex-cli" });
+  assertEquals(selection.providerKey, "codex-cli");
+  assertEquals(selection.provider, codexCli);
+  assertEquals(selection.model, "default");
 });
 
 Deno.test("provider router defaults to openrouter when no prefix", () => {
@@ -62,3 +80,16 @@ Deno.test("provider router throws when no default provider is available", () =>
     true,
   );
 });
+
+Deno.test("provider router rejects legacy codex prefixes", () => {
+  const openrouter = stubProvider("openrouter");
+  const router = createProviderRouter({
+    providers: { openrouter },
+  });
+  const error = assertThrows(() => router.resolve({ model: "codex/default" }));
+  assertEquals(error instanceof Error, true);
+  assertEquals(
+    (error as Error).message.includes('Legacy Codex model prefix "codex"'),
+    true,
+  );
+});
diff --git a/src/providers/router.ts b/src/providers/router.ts
index c7de7258e..d6d2247b7 100644
--- a/src/providers/router.ts
+++ b/src/providers/router.ts
@@ -1,6 +1,6 @@
 import type { ModelProvider } from "@bolt-foundry/gambit-core";
 
-export type ProviderKey = "openrouter" | "ollama" | "google";
+export type ProviderKey = "openrouter" | "ollama" | "google" | "codex-cli";
 
 export type ProviderRouter = {
   resolve: (input: { model: string }) => {
@@ -16,13 +16,22 @@ const PROVIDER_PREFIXES: Record<ProviderKey, string> = {
   openrouter: "openrouter/",
   ollama: "ollama/",
   google: "google/",
+  "codex-cli": "codex-cli/",
 };
 
 function parsePrefixedModel(model: string): {
   providerKey?: ProviderKey;
   strippedModel: string;
   rawModel: string;
+  legacyCodex?: boolean;
 } {
+  if (model.trim() === "codex-cli") {
+    return {
+      providerKey: "codex-cli",
+      strippedModel: "default",
+      rawModel: model,
+    };
+  }
   for (const [providerKey, prefix] of Object.entries(PROVIDER_PREFIXES)) {
     if (model.startsWith(prefix)) {
       return {
@@ -32,6 +41,13 @@ function parsePrefixedModel(model: string): {
       };
     }
   }
+  if (model === "codex" || model.startsWith("codex/")) {
+    return {
+      strippedModel: model,
+      rawModel: model,
+      legacyCodex: true,
+    };
+  }
   return { strippedModel: model, rawModel: model };
 }
 
@@ -43,6 +59,8 @@ function missingProviderMessage(providerKey: ProviderKey): string {
       return "Ollama provider is not configured. Set OLLAMA_BASE_URL or OLLAMA_API_KEY.";
     case "google":
       return "Google provider is not configured. Set GOOGLE_API_KEY or GEMINI_API_KEY.";
+    case "codex-cli":
+      return "Codex CLI provider is not configured.";
   }
 }
 
@@ -59,9 +77,15 @@ export function createProviderRouter(opts: {
   );
   return {
     resolve({ model }) {
-      const { providerKey, strippedModel, rawModel } = parsePrefixedModel(
-        model,
-      );
+      const { providerKey, strippedModel, rawModel, legacyCodex } =
+        parsePrefixedModel(
+          model,
+        );
+      if (legacyCodex) {
+        throw new Error(
+          'Legacy Codex model prefix "codex" is no longer supported. Use "codex-cli/default" or "codex-cli/<model>".',
+        );
+      }
       if (providerKey) {
         const provider = opts.providers[providerKey];
         if (!provider) {
diff --git a/src/server.ts b/src/server.ts
index d20f614a9..8b9567ca3 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -1,21 +1,62 @@
 import * as path from "@std/path";
-import { existsSync } from "@std/fs";
+import { copy, ensureDir, existsSync } from "@std/fs";
 import { parse } from "@std/jsonc";
-import { isGambitEndSignal, runDeck } from "@bolt-foundry/gambit-core";
+import {
+  isGambitEndSignal,
+  isRunCanceledError,
+  runDeck,
+} from "@bolt-foundry/gambit-core";
 import { sanitizeNumber } from "./test_bot.ts";
 import { makeConsoleTracer } from "./trace.ts";
 import { defaultSessionRoot } from "./cli_utils.ts";
 import { loadDeck } from "@bolt-foundry/gambit-core";
 import { createWorkspaceScaffold } from "./workspace.ts";
+import {
+  assertSafeBuildBotRoot,
+  randomId,
+  resolveDefaultValue,
+} from "./server_helpers.ts";
+import type {
+  AvailableGraderDeck,
+  AvailableTestDeck,
+  DeckToolDescription,
+  GradingFlag,
+  GradingRunRecord,
+  NormalizedSchema,
+  OutgoingMessage,
+  SchemaDescription,
+  SessionMeta,
+} from "./server_types.ts";
+import { createSessionStore } from "./server_session_store.ts";
+import {
+  handleFeedbackRoutes,
+  handleGradingReferenceRoute,
+} from "./server_feedback_grading_routes.ts";
+import { handleUiRoutes } from "./server_ui_routes.ts";
+import {
+  resolveWorkspaceIdFromRecord,
+  resolveWorkspaceIdFromSearchParams,
+  WORKSPACE_ROUTE_BASE,
+  WORKSPACE_STATE_SCHEMA_VERSION,
+  workspaceSchemaError,
+} from "./workspace_contract.ts";
 import {
   appendDurableStreamEvent,
   handleDurableStreamRequest,
 } from "./durable_streams.ts";
 import type { FeedbackEntry, SavedState } from "@bolt-foundry/gambit-core";
 import type {
+  CreateResponseRequest,
+  CreateResponseResponse,
+  JSONValue,
   LoadedDeck,
   ModelMessage,
   ModelProvider,
+  ResponseEvent,
+  ResponseItem,
+  ResponseTextContent,
+  ResponseToolChoice,
+  ResponseToolDefinition,
   TraceEvent,
 } from "@bolt-foundry/gambit-core";
 import type { ZodTypeAny } from "zod";
@@ -111,24 +152,36 @@ const gambitVersion = (() => {
   return "unknown";
 })();
 const SIMULATOR_STREAM_ID = "gambit-simulator";
+const WORKSPACE_STREAM_ID = "gambit-workspace";
 const GRADE_STREAM_ID = "gambit-grade";
 const TEST_STREAM_ID = "gambit-test";
 const BUILD_STREAM_ID = "gambit-build";
 const DEFAULT_TEST_BOT_SEED_PROMPT =
   "Start the conversation as the user. Do not wait for the assistant to speak first.";
-type AvailableTestDeck = {
-  id: string;
-  label: string;
-  description?: string;
-  path: string;
-};
-type AvailableGraderDeck = {
-  id: string;
-  label: string;
-  description?: string;
-  path: string;
-};
-
+const GAMBIT_BOT_SOURCE_DECK_URL = new URL(
+  "./decks/gambit-bot/PROMPT.md",
+  import.meta.url,
+);
+const GAMBIT_BOT_SOURCE_DIR = GAMBIT_BOT_SOURCE_DECK_URL.protocol === "file:"
+  ? path.dirname(path.fromFileUrl(GAMBIT_BOT_SOURCE_DECK_URL))
+  : "";
+const GAMBIT_BOT_POLICY_DIR = GAMBIT_BOT_SOURCE_DIR
+  ? path.join(GAMBIT_BOT_SOURCE_DIR, "policy")
+  : "";
+
+async function ensureGambitPolicyInBotRoot(root: string) {
+  if (!GAMBIT_BOT_POLICY_DIR) return;
+  try {
+    const info = await Deno.stat(GAMBIT_BOT_POLICY_DIR);
+    if (!info.isDirectory) return;
+  } catch {
+    return;
+  }
+  const dest = path.join(root, ".gambit", "policy");
+  if (existsSync(dest)) return;
+  await ensureDir(path.dirname(dest));
+  await copy(GAMBIT_BOT_POLICY_DIR, dest, { overwrite: false });
+}
 let availableTestDecks: Array<AvailableTestDeck> = [];
 const testDeckByPath = new Map<string, AvailableTestDeck>();
 const testDeckById = new Map<string, AvailableTestDeck>();
@@ -136,115 +189,6 @@ let availableGraderDecks: Array<AvailableGraderDeck> = [];
 const graderDeckByPath = new Map<string, AvailableGraderDeck>();
 const graderDeckById = new Map<string, AvailableGraderDeck>();
 
-type NormalizedSchema = {
-  kind:
-    | "string"
-    | "number"
-    | "boolean"
-    | "enum"
-    | "object"
-    | "array"
-    | "unknown";
-  optional: boolean;
-  description?: string;
-  example?: unknown;
-  defaultValue?: unknown;
-  enumValues?: Array<unknown>;
-  fields?: Record<string, NormalizedSchema>;
-  items?: NormalizedSchema;
-};
-
-type DeckToolDescription = {
-  name: string;
-  label?: string;
-  description?: string;
-  path?: string;
-};
-
-type SchemaDescription = {
-  schema?: NormalizedSchema;
-  defaults?: unknown;
-  error?: string;
-  tools?: Array<DeckToolDescription>;
-};
-
-type SessionMeta = {
-  id: string;
-  deck?: string;
-  deckSlug?: string;
-  testBotName?: string;
-  createdAt?: string;
-  gradingRuns?: Array<GradingRunRecord>;
-  sessionDir?: string;
-  statePath?: string;
-};
-
-type GradingRunRecord = {
-  id: string;
-  graderId: string;
-  graderPath: string;
-  graderLabel?: string;
-  status: "running" | "completed" | "error";
-  runAt?: string;
-  referenceSample?: {
-    score: number;
-    reason: string;
-    evidence?: Array<string>;
-  };
-  input?: unknown;
-  result?: unknown;
-  error?: string;
-};
-type GradingFlag = {
-  id: string;
-  refId: string;
-  runId?: string;
-  turnIndex?: number;
-  reason?: string;
-  createdAt: string;
-};
-
-type OutgoingMessage =
-  | {
-    type: "ready";
-    deck: string;
-    port: number;
-    schema?: NormalizedSchema;
-    defaults?: unknown;
-    schemaError?: string;
-  }
-  | { type: "pong" }
-  | { type: "stream"; chunk: string; runId?: string }
-  | { type: "result"; result: unknown; runId?: string; streamed: boolean }
-  | { type: "trace"; event: TraceEvent }
-  | {
-    type: "state";
-    state: SavedState;
-    newMessages?: Array<{
-      index: number;
-      role: string;
-      messageRefId?: string;
-      content?: unknown;
-    }>;
-  }
-  | { type: "error"; message: string; runId?: string };
-
-function randomId(prefix: string): string {
-  const suffix = crypto.randomUUID().replace(/-/g, "").slice(0, 24);
-  return `${prefix}-${suffix}`;
-}
-
-function resolveDefaultValue(raw: unknown): unknown {
-  if (typeof raw === "function") {
-    try {
-      return raw();
-    } catch {
-      return undefined;
-    }
-  }
-  return raw;
-}
-
 async function describeDeckInputSchemaFromPath(
   deckPath: string,
 ): Promise<SchemaDescription> {
@@ -644,7 +588,7 @@ function buildInitFillPrompt(args: {
     schemaHints,
   };
   return [
-    "You are filling missing required init fields for a Gambit Test Bot run.",
+    "You are filling missing required init fields for a Gambit Scenario run.",
     "Return ONLY valid JSON that includes values for the missing fields.",
     "Do not include any fields that are not listed as missing.",
     "If the only missing path is '(root)', return the full init JSON value.",
@@ -704,6 +648,427 @@ function validateInitInput(schema: ZodTypeAny | undefined, value: unknown) {
   return result.data;
 }
 
+function jsonResponse(body: unknown, status = 200): Response {
+  return new Response(JSON.stringify(body), {
+    status,
+    headers: { "content-type": "application/json" },
+  });
+}
+
+function parseBodyObject(value: unknown): Record<string, unknown> {
+  if (!value || typeof value !== "object" || Array.isArray(value)) {
+    throw new Error("Request body must be a JSON object");
+  }
+  return value as Record<string, unknown>;
+}
+
+function toTextPart(
+  role: "system" | "user" | "assistant",
+  value: unknown,
+): { type: "input_text" | "output_text"; text: string } | null {
+  if (typeof value === "string") {
+    return {
+      type: role === "assistant" ? "output_text" : "input_text",
+      text: value,
+    };
+  }
+  if (!value || typeof value !== "object" || Array.isArray(value)) return null;
+  const record = value as Record<string, unknown>;
+  const text = typeof record.text === "string" ? record.text : "";
+  if (!text) return null;
+  const type = typeof record.type === "string" ? record.type : "";
+  if (type === "output_text") return { type: "output_text", text };
+  if (type === "input_text") return { type: "input_text", text };
+  return {
+    type: role === "assistant" ? "output_text" : "input_text",
+    text,
+  };
+}
+
+function normalizeMessageItem(
+  item: Record<string, unknown>,
+): ResponseItem | null {
+  const role = item.role;
+  if (role !== "system" && role !== "user" && role !== "assistant") {
+    throw new Error("message.role must be system, user, or assistant");
+  }
+  const rawContent = item.content;
+  const content = Array.isArray(rawContent)
+    ? rawContent.map((part) => toTextPart(role, part)).filter((
+      part,
+    ): part is { type: "input_text" | "output_text"; text: string } =>
+      Boolean(part)
+    )
+    : [toTextPart(role, rawContent)].filter((
+      part,
+    ): part is { type: "input_text" | "output_text"; text: string } =>
+      Boolean(part)
+    );
+  if (content.length === 0) {
+    throw new Error("message.content must include text");
+  }
+  return {
+    type: "message",
+    role,
+    content,
+    id: typeof item.id === "string" ? item.id : undefined,
+  };
+}
+
+function normalizeInputItems(input: unknown): Array<ResponseItem> {
+  if (typeof input === "string") {
+    return [{
+      type: "message",
+      role: "user",
+      content: [{ type: "input_text", text: input }],
+    }];
+  }
+  const arr = Array.isArray(input) ? input : [input];
+  const items: Array<ResponseItem> = [];
+  for (const raw of arr) {
+    if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
+      throw new Error("input items must be objects");
+    }
+    const item = raw as Record<string, unknown>;
+    const type = typeof item.type === "string" ? item.type : "";
+    if (type === "message") {
+      const normalized = normalizeMessageItem(item);
+      if (normalized) items.push(normalized);
+      continue;
+    }
+    if (type === "function_call") {
+      const callId = item.call_id;
+      const name = item.name;
+      const args = item.arguments;
+      if (
+        typeof callId !== "string" || typeof name !== "string" ||
+        typeof args !== "string"
+      ) {
+        throw new Error(
+          "function_call requires call_id, name, and arguments strings",
+        );
+      }
+      items.push({
+        type: "function_call",
+        call_id: callId,
+        name,
+        arguments: args,
+        id: typeof item.id === "string" ? item.id : undefined,
+      });
+      continue;
+    }
+    if (type === "function_call_output") {
+      const callId = item.call_id;
+      const output = item.output;
+      if (typeof callId !== "string" || typeof output !== "string") {
+        throw new Error(
+          "function_call_output requires call_id and output strings",
+        );
+      }
+      items.push({
+        type: "function_call_output",
+        call_id: callId,
+        output,
+        id: typeof item.id === "string" ? item.id : undefined,
+      });
+      continue;
+    }
+    throw new Error(`Unsupported input item type: ${type || "(missing type)"}`);
+  }
+  return items;
+}
+
+function normalizeTools(
+  tools: unknown,
+): Array<ResponseToolDefinition> | undefined {
+  if (!Array.isArray(tools) || tools.length === 0) return undefined;
+  const out: Array<ResponseToolDefinition> = [];
+  for (const raw of tools) {
+    if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
+      throw new Error("tools entries must be objects");
+    }
+    const item = raw as Record<string, unknown>;
+    const type = typeof item.type === "string" ? item.type : "";
+    if (type !== "function") continue;
+    const nested = item.function;
+    if (nested && typeof nested === "object" && !Array.isArray(nested)) {
+      const fn = nested as Record<string, unknown>;
+      const name = fn.name;
+      if (typeof name !== "string" || !name) {
+        throw new Error("tool.function.name is required");
+      }
+      out.push({
+        type: "function",
+        function: {
+          name,
+          description: typeof fn.description === "string"
+            ? fn.description
+            : undefined,
+          parameters: (fn.parameters &&
+              typeof fn.parameters === "object" &&
+              !Array.isArray(fn.parameters))
+            ? fn.parameters as Record<string, JSONValue>
+            : {},
+        },
+      });
+      continue;
+    }
+    const name = item.name;
+    if (typeof name !== "string" || !name) {
+      throw new Error("tool.name is required");
+    }
+    out.push({
+      type: "function",
+      function: {
+        name,
+        description: typeof item.description === "string"
+          ? item.description
+          : undefined,
+        parameters: (item.parameters &&
+            typeof item.parameters === "object" &&
+            !Array.isArray(item.parameters))
+          ? item.parameters as Record<string, JSONValue>
+          : {},
+      },
+    });
+  }
+  return out.length ? out : undefined;
+}
+
+function normalizeToolChoice(choice: unknown): ResponseToolChoice | undefined {
+  if (!choice) return undefined;
+  if (choice === "none" || choice === "auto" || choice === "required") {
+    return choice;
+  }
+  if (!choice || typeof choice !== "object" || Array.isArray(choice)) {
+    return undefined;
+  }
+  const record = choice as Record<string, unknown>;
+  if (record.type === "allowed_tools" && Array.isArray(record.tools)) {
+    const tools = record.tools
+      .map((entry) => {
+        if (!entry || typeof entry !== "object" || Array.isArray(entry)) {
+          return null;
+        }
+        const tool = entry as Record<string, unknown>;
+        if (tool.type !== "function" || typeof tool.name !== "string") {
+          return null;
+        }
+        return { type: "function", name: tool.name } as const;
+      })
+      .filter((entry): entry is { type: "function"; name: string } =>
+        Boolean(entry)
+      );
+    if (tools.length === 0) return undefined;
+    const mode = record.mode === "none" || record.mode === "auto" ||
+        record.mode === "required"
+      ? record.mode
+      : undefined;
+    return { type: "allowed_tools", tools, mode };
+  }
+  if (record.type !== "function") return undefined;
+  if (record.function && typeof record.function === "object") {
+    const fn = record.function as Record<string, unknown>;
+    if (typeof fn.name === "string" && fn.name.length > 0) {
+      return { type: "function", function: { name: fn.name } };
+    }
+  }
+  if (typeof record.name === "string" && record.name.length > 0) {
+    return { type: "function", function: { name: record.name } };
+  }
+  return undefined;
+}
+
+function sseFrame(event: unknown): Uint8Array {
+  const encoder = new TextEncoder();
+  return encoder.encode(`data: ${JSON.stringify(event)}\n\n`);
+}
+
+function asJsonValue(value: unknown): JSONValue {
+  if (
+    value === null || typeof value === "string" || typeof value === "number" ||
+    typeof value === "boolean"
+  ) {
+    return value;
+  }
+  if (Array.isArray(value)) {
+    return value.map((entry) => asJsonValue(entry));
+  }
+  if (value && typeof value === "object") {
+    const out: Record<string, JSONValue> = {};
+    for (
+      const [key, entry] of Object.entries(value as Record<string, unknown>)
+    ) {
+      out[key] = asJsonValue(entry);
+    }
+    return out;
+  }
+  return String(value);
+}
+
+function toStrictContentPart(
+  part: ResponseTextContent,
+): Record<string, unknown> {
+  if (part.type === "output_text") {
+    return {
+      type: "output_text",
+      text: part.text,
+      annotations: [],
+      logprobs: [],
+    };
+  }
+  return {
+    type: part.type,
+    text: part.text,
+  };
+}
+
+function toStrictResponseItem(
+  item: ResponseItem,
+  index: number,
+): Record<string, unknown> {
+  if (item.type === "message") {
+    return {
+      type: "message",
+      id: item.id ?? `msg_${index + 1}`,
+      status: "completed",
+      role: item.role,
+      content: item.content.map((part) => toStrictContentPart(part)),
+    };
+  }
+  if (item.type === "function_call") {
+    return {
+      type: "function_call",
+      id: item.id ?? item.call_id,
+      call_id: item.call_id,
+      name: item.name,
+      arguments: item.arguments,
+      status: "completed",
+    };
+  }
+  if (item.type === "function_call_output") {
+    return {
+      type: "function_call_output",
+      id: item.id ?? `${item.call_id}_out`,
+      call_id: item.call_id,
+      output: item.output,
+      status: "completed",
+    };
+  }
+  return {
+    type: "reasoning",
+    id: item.id ?? `rs_${index + 1}`,
+    content: (item.content ?? []).map((part) => toStrictContentPart(part)),
+    summary: item.summary.map((part) => toStrictContentPart(part)),
+    encrypted_content: item.encrypted_content ?? null,
+  };
+}
+
+function toStrictTools(
+  tools: Array<ResponseToolDefinition> | undefined,
+): Array<Record<string, unknown>> {
+  if (!tools || tools.length === 0) return [];
+  return tools.map((tool) => ({
+    type: "function",
+    name: tool.function.name,
+    description: tool.function.description ?? null,
+    parameters: tool.function.parameters ?? null,
+    strict: false,
+  }));
+}
+
+function toStrictToolChoice(
+  choice: CreateResponseRequest["tool_choice"],
+): Record<string, unknown> | string {
+  if (!choice) return "auto";
+  if (choice === "none" || choice === "auto" || choice === "required") {
+    return choice;
+  }
+  if (choice.type === "allowed_tools") {
+    return {
+      type: "allowed_tools",
+      tools: choice.tools,
+      mode: choice.mode ?? "auto",
+    };
+  }
+  return { type: "function", name: choice.function.name };
+}
+
+function toStrictResponseResource(args: {
+  request: CreateResponseRequest;
+  response: CreateResponseResponse;
+  statusOverride?: "in_progress" | "completed" | "failed";
+}): Record<string, unknown> {
+  const now = Math.floor(Date.now() / 1000);
+  const createdAt = args.response.created_at ?? args.response.created ?? now;
+  const status = args.statusOverride ?? args.response.status ?? "completed";
+  const usage = args.response.usage
+    ? {
+      input_tokens: args.response.usage.promptTokens ?? 0,
+      output_tokens: args.response.usage.completionTokens ?? 0,
+      total_tokens: args.response.usage.totalTokens ?? 0,
+      input_tokens_details: {
+        cached_tokens: 0,
+      },
+      output_tokens_details: {
+        reasoning_tokens: args.response.usage.reasoningTokens ?? 0,
+      },
+    }
+    : null;
+
+  return {
+    id: args.response.id,
+    object: "response",
+    created_at: createdAt,
+    completed_at: status === "completed" ? now : null,
+    status,
+    incomplete_details: null,
+    model: args.response.model ?? args.request.model,
+    previous_response_id: args.request.previous_response_id ?? null,
+    instructions: args.request.instructions ?? null,
+    output: (args.response.output ?? []).map((item, idx) =>
+      toStrictResponseItem(item, idx)
+    ),
+    error: args.response.error ?? null,
+    tools: toStrictTools(args.request.tools),
+    tool_choice: toStrictToolChoice(args.request.tool_choice),
+    truncation: args.response.truncation ?? args.request.truncation ??
+      "disabled",
+    parallel_tool_calls: args.response.parallel_tool_calls ??
+      args.request.parallel_tool_calls ?? false,
+    text: args.response.text
+      ? asJsonValue(args.response.text)
+      : args.request.text
+      ? asJsonValue(args.request.text)
+      : { format: { type: "text" } },
+    top_p: args.response.top_p ?? args.request.top_p ?? 1,
+    presence_penalty: args.response.presence_penalty ??
+      args.request.presence_penalty ?? 0,
+    frequency_penalty: args.response.frequency_penalty ??
+      args.request.frequency_penalty ?? 0,
+    top_logprobs: args.response.top_logprobs ?? args.request.top_logprobs ?? 0,
+    temperature: args.response.temperature ?? args.request.temperature ?? 1,
+    reasoning: args.request.reasoning
+      ? {
+        effort: args.request.reasoning.effort ?? null,
+        summary: args.request.reasoning.summary ?? null,
+      }
+      : null,
+    usage,
+    max_output_tokens: args.request.max_output_tokens ?? null,
+    max_tool_calls: args.request.max_tool_calls ?? null,
+    store: args.response.store ?? args.request.store ?? false,
+    background: args.response.background ?? args.request.background ?? false,
+    service_tier: args.response.service_tier ?? args.request.service_tier ??
+      "default",
+    metadata: args.request.metadata ? asJsonValue(args.request.metadata) : {},
+    safety_identifier: args.response.safety_identifier ??
+      args.request.safety_identifier ?? null,
+    prompt_cache_key: args.response.prompt_cache_key ??
+      args.request.prompt_cache_key ?? null,
+  };
+}
+
 /**
  * Start the WebSocket simulator server used by the Gambit debug UI.
  */
@@ -731,6 +1096,7 @@ export function startWebSocketSimulator(opts: {
   sourceMap?: boolean;
   bundlePlatform?: "deno" | "browser";
   responsesMode?: boolean;
+  workerSandbox?: boolean;
 }): ReturnType<typeof Deno.serve> {
   const port = opts.port ?? 8000;
   const initialContext = opts.initialContext;
@@ -753,7 +1119,7 @@ export function startWebSocketSimulator(opts: {
       Deno.mkdirSync(base, { recursive: true });
     } catch (err) {
       logger.warn(
-        `[sim] unable to ensure sessions directory ${base}: ${
+        `[sim] unable to ensure workspace state directory ${base}: ${
           err instanceof Error ? err.message : err
         }`,
       );
@@ -761,8 +1127,7 @@ export function startWebSocketSimulator(opts: {
     return base;
   })();
   const workspaceRoot = (() => {
-    const dir = workspaceScaffoldRoot ??
-      path.join(path.dirname(sessionsRoot), "workspaces");
+    const dir = workspaceScaffoldRoot ?? sessionsRoot;
     if (workspaceScaffoldEnabled) {
       try {
         Deno.mkdirSync(dir, { recursive: true });
@@ -800,6 +1165,8 @@ export function startWebSocketSimulator(opts: {
     initFill?: TestBotInitFill;
     id: string;
     status: "idle" | "running" | "completed" | "error" | "canceled";
+    workspaceId?: string;
+    // Temporary alias while simulator UI migrates off sessionId naming.
     sessionId?: string;
     error?: string;
     startedAt?: string;
@@ -809,6 +1176,7 @@ export function startWebSocketSimulator(opts: {
       role: string;
       content: string;
       messageRefId?: string;
+      messageSource?: "scenario" | "manual";
       feedback?: FeedbackEntry;
       respondStatus?: number;
       respondCode?: string;
@@ -836,7 +1204,18 @@ export function startWebSocketSimulator(opts: {
     abort: AbortController | null;
   };
   const testBotRuns = new Map<string, TestBotRunEntry>();
-  const broadcastTestBot = (payload: unknown) => {
+  const broadcastTestBot = (payload: unknown, workspaceId?: string) => {
+    if (workspaceId) {
+      const state = readSessionState(workspaceId);
+      if (state) {
+        appendWorkspaceEnvelope(
+          state,
+          "test",
+          payload as Record<string, unknown>,
+        );
+      }
+    }
+    appendDurableStreamEvent(WORKSPACE_STREAM_ID, payload);
     appendDurableStreamEvent(TEST_STREAM_ID, payload);
   };
   type BuildBotRunStatus = {
@@ -916,6 +1295,8 @@ export function startWebSocketSimulator(opts: {
       if (!info.isDirectory) {
         throw new Error(`Build bot root is not a directory: ${root}`);
       }
+      assertSafeBuildBotRoot(root, GAMBIT_BOT_SOURCE_DIR);
+      await ensureGambitPolicyInBotRoot(root);
       return root;
     }
     const cacheKey = workspaceId ?? "default";
@@ -928,10 +1309,34 @@ export function startWebSocketSimulator(opts: {
     if (!info.isDirectory) {
       throw new Error(`Build bot root is not a directory: ${root}`);
     }
+    assertSafeBuildBotRoot(root, GAMBIT_BOT_SOURCE_DIR);
+    await ensureGambitPolicyInBotRoot(root);
     buildBotRootCache.set(cacheKey, root);
     return root;
   };
 
+  const logWorkspaceBotRoot = async (
+    endpoint: string,
+    workspaceId?: string | null,
+  ): Promise<void> => {
+    try {
+      const root = await resolveBuildBotRoot(workspaceId);
+      logger.info(
+        `[sim] ${endpoint}: workspaceId=${
+          workspaceId ?? "(none)"
+        } botRoot=${root}`,
+      );
+    } catch (err) {
+      logger.warn(
+        `[sim] ${endpoint}: workspaceId=${
+          workspaceId ?? "(none)"
+        } botRoot=<unresolved> ${
+          err instanceof Error ? err.message : String(err)
+        }`,
+      );
+    }
+  };
+
   if (
     opts.workspace?.id && opts.workspace.rootDir && opts.workspace.rootDeckPath
   ) {
@@ -956,6 +1361,10 @@ export function startWebSocketSimulator(opts: {
     root: string,
   ): Promise<Array<BuildBotFileEntry>> => {
     const entries: Array<BuildBotFileEntry> = [];
+    const shouldSkipRelativePath = (relativePath: string) => {
+      const segments = relativePath.split(/\\|\//g).filter(Boolean);
+      return segments.includes(".gambit");
+    };
     const walk = async (dir: string, relativePrefix: string) => {
       for await (const entry of Deno.readDir(dir)) {
         if (entry.isSymlink) continue;
@@ -963,6 +1372,7 @@ export function startWebSocketSimulator(opts: {
         const relPath = relativePrefix
           ? path.join(relativePrefix, entry.name)
           : entry.name;
+        if (shouldSkipRelativePath(relPath)) continue;
         if (entry.isDirectory) {
           entries.push({ path: relPath, type: "dir" });
           await walk(fullPath, relPath);
@@ -1021,7 +1431,64 @@ export function startWebSocketSimulator(opts: {
       return null;
     }
   };
-  const broadcastBuildBot = (payload: unknown) => {
+
+  const isBuildStreamDebugEnabled = (() => {
+    const raw = Deno.env.get("GAMBIT_BUILD_STREAM_DEBUG")?.trim()
+      .toLowerCase();
+    return raw === "1" || raw === "true" || raw === "yes";
+  })();
+
+  const logBuildStreamDebug = (
+    event: string,
+    payload?: Record<string, unknown>,
+  ) => {
+    if (!isBuildStreamDebugEnabled) return;
+    const ts = new Date().toISOString();
+    if (payload && Object.keys(payload).length > 0) {
+      logger.info(
+        `[build-stream-debug] ${ts} ${event} ${JSON.stringify(payload)}`,
+      );
+      return;
+    }
+    logger.info(`[build-stream-debug] ${ts} ${event}`);
+  };
+
+  const broadcastBuildBot = (payload: unknown, workspaceId?: string) => {
+    const record = payload && typeof payload === "object"
+      ? payload as Record<string, unknown>
+      : null;
+    const type = record && typeof record.type === "string"
+      ? record.type
+      : "(unknown)";
+    const runId = record && typeof record.runId === "string"
+      ? record.runId
+      : record && record.run && typeof record.run === "object" &&
+          typeof (record.run as { id?: unknown }).id === "string"
+      ? (record.run as { id: string }).id
+      : undefined;
+    const traceType = type === "buildBotTrace" && record &&
+        record.event && typeof record.event === "object" &&
+        typeof (record.event as { type?: unknown }).type === "string"
+      ? (record.event as { type: string }).type
+      : undefined;
+    logBuildStreamDebug("broadcastBuildBot", {
+      type,
+      runId,
+      traceType,
+    });
+    const eventWorkspaceId = workspaceId ??
+      (typeof runId === "string" ? runId : undefined);
+    if (eventWorkspaceId) {
+      const state = readSessionState(eventWorkspaceId);
+      if (state) {
+        appendWorkspaceEnvelope(
+          state,
+          "build",
+          payload as Record<string, unknown>,
+        );
+      }
+    }
+    appendDurableStreamEvent(WORKSPACE_STREAM_ID, payload);
     appendDurableStreamEvent(BUILD_STREAM_ID, payload);
   };
   let deckSlug = deckSlugFromPath(resolvedDeckPath);
@@ -1039,6 +1506,12 @@ export function startWebSocketSimulator(opts: {
       meta.sessionId = `${deckSlug}-${stamp}`;
       meta.sessionCreatedAt = now.toISOString();
     }
+    if (typeof meta.workspaceId !== "string") {
+      meta.workspaceId = String(meta.sessionId);
+    }
+    if (typeof meta.workspaceSchemaVersion !== "string") {
+      meta.workspaceSchemaVersion = WORKSPACE_STATE_SCHEMA_VERSION;
+    }
     if (typeof meta.deck !== "string") {
       meta.deck = resolvedDeckPath;
     }
@@ -1061,175 +1534,40 @@ export function startWebSocketSimulator(opts: {
       meta.sessionEventsPath = path.join(meta.sessionDir, "events.jsonl");
     }
     if (
-      typeof meta.sessionFeedbackPath !== "string" &&
-      typeof meta.sessionDir === "string"
-    ) {
-      meta.sessionFeedbackPath = path.join(meta.sessionDir, "feedback.jsonl");
-    }
-    if (
-      typeof meta.sessionGradingPath !== "string" &&
+      typeof meta.sessionBuildStatePath !== "string" &&
       typeof meta.sessionDir === "string"
     ) {
-      meta.sessionGradingPath = path.join(meta.sessionDir, "grading.jsonl");
+      meta.sessionBuildStatePath = path.join(
+        meta.sessionDir,
+        "build_state.json",
+      );
     }
     const dir = typeof meta.sessionDir === "string"
       ? meta.sessionDir
       : undefined;
     return { state: { ...state, meta }, dir };
   };
-  const sessionStateCache = new Map<string, SavedState>();
-  const sessionWriteQueues = new Map<string, Array<() => void>>();
-  const sessionWriteActive = new Set<string>();
-
-  const enqueueSessionWrite = (sessionId: string, task: () => void) => {
-    const queue = sessionWriteQueues.get(sessionId) ?? [];
-    queue.push(task);
-    sessionWriteQueues.set(sessionId, queue);
-    if (sessionWriteActive.has(sessionId)) return;
-    sessionWriteActive.add(sessionId);
-    while (queue.length) {
-      const next = queue.shift();
-      if (!next) continue;
-      try {
-        next();
-      } catch (err) {
-        logger.warn(
-          `[sim] session write failed: ${
-            err instanceof Error ? err.message : err
-          }`,
-        );
-      }
-    }
-    sessionWriteActive.delete(sessionId);
-  };
-
-  const mergeSessionState = (
-    current: SavedState | undefined,
-    next: SavedState,
-  ): SavedState => {
-    if (!current) return next;
-    const merged: SavedState = {
-      ...current,
-      ...next,
-      meta: {
-        ...(current.meta ?? {}),
-        ...(next.meta ?? {}),
-      },
-      messages: next.messages ?? current.messages,
-      items: next.items ?? current.items,
-      format: next.format ?? current.format,
-      messageRefs: next.messageRefs ?? current.messageRefs,
-      feedback: next.feedback ?? current.feedback,
-      notes: next.notes ?? current.notes,
-      conversationScore: next.conversationScore ?? current.conversationScore,
-      traces: next.traces ?? current.traces,
-    };
-    return merged;
-  };
-
-  const materializeSnapshot = (state: SavedState): SavedState => {
-    const snapshot = { ...state };
-    delete (snapshot as Record<string, unknown>).traces;
-    return snapshot;
-  };
-
-  const writeJsonAtomic = (filePath: string, payload: unknown) => {
-    const dir = path.dirname(filePath);
-    ensureDir(dir);
-    const tmpPath = path.join(
-      dir,
-      `.tmp-${path.basename(filePath)}-${randomId("tmp")}`,
-    );
-    Deno.writeTextFileSync(tmpPath, JSON.stringify(payload, null, 2));
-    Deno.renameSync(tmpPath, filePath);
-  };
-
-  const appendJsonl = (filePath: string, payload: unknown) => {
-    const dir = path.dirname(filePath);
-    ensureDir(dir);
-    const line = JSON.stringify(payload);
-    Deno.writeTextFileSync(filePath, `${line}\n`, { append: true });
-  };
-
-  const appendSessionEvent = (
-    state: SavedState,
-    payload: Record<string, unknown>,
-  ) => {
-    const sessionId = typeof state.meta?.sessionId === "string"
-      ? state.meta.sessionId
-      : undefined;
-    const eventsPath = typeof state.meta?.sessionEventsPath === "string"
-      ? state.meta.sessionEventsPath
-      : undefined;
-    if (!sessionId || !eventsPath) return;
-    const stamped = {
-      createdAt: new Date().toISOString(),
-      sessionId,
-      ts: typeof payload.ts === "number" ? payload.ts : Date.now(),
-      ...payload,
-    };
-    enqueueSessionWrite(sessionId, () => {
-      appendJsonl(eventsPath, stamped);
-    });
-  };
-
-  const appendFeedbackLog = (
-    state: SavedState,
-    payload: Record<string, unknown>,
-  ) => {
-    const sessionId = typeof state.meta?.sessionId === "string"
-      ? state.meta.sessionId
-      : undefined;
-    const feedbackPath = typeof state.meta?.sessionFeedbackPath === "string"
-      ? state.meta.sessionFeedbackPath
-      : undefined;
-    if (!sessionId || !feedbackPath) return;
-    const stamped = {
-      createdAt: new Date().toISOString(),
-      sessionId,
-      ...payload,
-    };
-    enqueueSessionWrite(sessionId, () => {
-      appendJsonl(feedbackPath, stamped);
-    });
-  };
-
-  const appendGradingLog = (
-    state: SavedState,
-    payload: Record<string, unknown>,
-  ) => {
-    const sessionId = typeof state.meta?.sessionId === "string"
-      ? state.meta.sessionId
-      : undefined;
-    const gradingPath = typeof state.meta?.sessionGradingPath === "string"
-      ? state.meta.sessionGradingPath
-      : undefined;
-    if (!sessionId || !gradingPath) return;
-    const stamped = {
-      createdAt: new Date().toISOString(),
-      sessionId,
-      ...payload,
-    };
-    enqueueSessionWrite(sessionId, () => {
-      appendJsonl(gradingPath, stamped);
-    });
-  };
-
-  const TRACE_EVENT_TYPES = new Set<string>([
-    "run.start",
-    "message.user",
-    "run.end",
-    "deck.start",
-    "deck.end",
-    "action.start",
-    "action.end",
-    "tool.call",
-    "tool.result",
-    "model.call",
-    "model.result",
-    "log",
-    "monolog",
-  ]);
+  const {
+    parseFiniteInteger,
+    selectCanonicalScenarioRunSummary,
+    appendWorkspaceEnvelope,
+    appendSessionEvent,
+    appendFeedbackLog,
+    appendGradingLog,
+    appendServerErrorLog,
+    persistSessionState,
+    readSessionStateStrict,
+    readSessionState,
+    readBuildState,
+  } = createSessionStore({
+    sessionsRoot,
+    ensureDir,
+    randomId,
+    logger,
+    enrichStateWithSession,
+    workspaceStateSchemaVersion: WORKSPACE_STATE_SCHEMA_VERSION,
+    workspaceSchemaError,
+  });
 
   const traceCategory = (type: string): string => {
     switch (type) {
@@ -1254,136 +1592,6 @@ export function startWebSocketSimulator(opts: {
         return "trace";
     }
   };
-
-  const loadSessionTraces = (state: SavedState): Array<TraceEvent> => {
-    const eventsPath = typeof state.meta?.sessionEventsPath === "string"
-      ? state.meta.sessionEventsPath
-      : undefined;
-    if (!eventsPath) return [];
-    try {
-      const text = Deno.readTextFileSync(eventsPath);
-      const traces: Array<TraceEvent> = [];
-      for (const line of text.split("\n")) {
-        if (!line.trim()) continue;
-        try {
-          const record = JSON.parse(line) as Record<string, unknown>;
-          const kind = typeof record.kind === "string" ? record.kind : "";
-          const type = typeof record.type === "string" ? record.type : "";
-          if (kind === "trace" || TRACE_EVENT_TYPES.has(type)) {
-            traces.push(record as TraceEvent);
-          }
-        } catch {
-          // ignore invalid lines
-        }
-      }
-      return traces;
-    } catch {
-      return [];
-    }
-  };
-
-  const persistSessionState = (state: SavedState): SavedState => {
-    const { state: enriched, dir } = enrichStateWithSession(state);
-    const sessionId = typeof enriched.meta?.sessionId === "string"
-      ? enriched.meta.sessionId
-      : undefined;
-    const merged = sessionId
-      ? mergeSessionState(sessionStateCache.get(sessionId), enriched)
-      : enriched;
-    if (sessionId) {
-      sessionStateCache.set(sessionId, merged);
-    }
-    if (dir && sessionId) {
-      const snapshot = materializeSnapshot(merged);
-      const eventsPath = typeof snapshot.meta?.sessionEventsPath === "string"
-        ? snapshot.meta.sessionEventsPath
-        : path.join(dir, "events.jsonl");
-      const statePath = typeof snapshot.meta?.sessionStatePath === "string"
-        ? snapshot.meta.sessionStatePath
-        : path.join(dir, "state.json");
-      enqueueSessionWrite(sessionId, () => {
-        try {
-          ensureDir(dir);
-          const firstWrite = !existsSync(eventsPath);
-          writeJsonAtomic(statePath, snapshot);
-          if (firstWrite) {
-            appendJsonl(eventsPath, {
-              type: "session.start",
-              category: "lifecycle",
-              createdAt: new Date().toISOString(),
-              sessionId,
-              runId: snapshot.runId,
-              deck: snapshot.meta?.deck,
-            });
-          }
-          appendJsonl(eventsPath, {
-            type: "session.snapshot",
-            category: "snapshot",
-            createdAt: new Date().toISOString(),
-            sessionId,
-            runId: snapshot.runId,
-            state: snapshot,
-          });
-        } catch (err) {
-          logger.warn(
-            `[sim] failed to persist session state: ${
-              err instanceof Error ? err.message : err
-            }`,
-          );
-        }
-      });
-    }
-    return merged;
-  };
-  const readSessionState = (
-    sessionId: string,
-    opts?: { withTraces?: boolean },
-  ): SavedState | undefined => {
-    const dir = path.join(sessionsRoot, sessionId);
-    const filePath = path.join(dir, "state.json");
-    try {
-      const text = Deno.readTextFileSync(filePath);
-      const parsed = JSON.parse(text) as SavedState;
-      if (parsed && typeof parsed === "object") {
-        const meta = {
-          ...(parsed.meta ?? {}),
-          sessionId,
-          sessionDir: dir,
-        } as Record<string, unknown>;
-        if (typeof meta.sessionStatePath !== "string") {
-          meta.sessionStatePath = filePath;
-        }
-        if (typeof meta.sessionEventsPath !== "string") {
-          meta.sessionEventsPath = path.join(dir, "events.jsonl");
-        }
-        if (typeof meta.sessionFeedbackPath !== "string") {
-          meta.sessionFeedbackPath = path.join(dir, "feedback.jsonl");
-        }
-        if (typeof meta.sessionGradingPath !== "string") {
-          meta.sessionGradingPath = path.join(dir, "grading.jsonl");
-        }
-        const enriched = { ...parsed, meta } as SavedState;
-        if (opts?.withTraces) {
-          const loadedTraces = loadSessionTraces(enriched);
-          const fallbackTraces = Array.isArray(enriched.traces)
-            ? enriched.traces
-            : [];
-          const traces = loadedTraces.length > 0
-            ? loadedTraces
-            : fallbackTraces;
-          const withTraces = { ...enriched, traces };
-          sessionStateCache.set(sessionId, withTraces);
-          return withTraces;
-        }
-        sessionStateCache.set(sessionId, enriched);
-        return enriched;
-      }
-    } catch {
-      // ignore
-    }
-    return undefined;
-  };
-
   const buildWorkspaceMeta = (
     record: { id: string; rootDir: string; rootDeckPath: string },
     base?: Record<string, unknown>,
@@ -1398,6 +1606,7 @@ export function startWebSocketSimulator(opts: {
         : new Date().toISOString();
     return {
       ...(base ?? {}),
+      workspaceSchemaVersion: WORKSPACE_STATE_SCHEMA_VERSION,
       workspaceId: record.id,
       workspaceRootDeckPath: record.rootDeckPath,
       workspaceRootDir: record.rootDir,
@@ -1557,65 +1766,221 @@ export function startWebSocketSimulator(opts: {
     }
   };
 
-  type BuildRunMeta = {
-    id: string;
-    status: BuildBotRunStatus["status"];
-    startedAt?: string;
-    finishedAt?: string;
-    updatedAt?: string;
-    messageCount?: number;
+  const getWorkspaceIdFromQuery = (url: URL): string | undefined =>
+    resolveWorkspaceIdFromSearchParams(url.searchParams);
+
+  const getWorkspaceIdFromBody = (
+    body: Record<string, unknown> | null | undefined,
+  ): string | undefined => {
+    if (!body || typeof body !== "object") return undefined;
+    return resolveWorkspaceIdFromRecord(body);
   };
 
-  const isSafeRunId = (runId: string): boolean => {
-    if (!runId) return false;
-    if (runId === "." || runId === "..") return false;
-    if (runId !== path.basename(runId)) return false;
-    if (runId.includes("/") || runId.includes("\\")) return false;
-    return true;
+  const findTestRunByWorkspaceId = (
+    workspaceId: string,
+  ): TestBotRunEntry | undefined => {
+    for (const candidate of testBotRuns.values()) {
+      if (
+        candidate.run.workspaceId === workspaceId ||
+        candidate.run.sessionId === workspaceId
+      ) {
+        return candidate;
+      }
+    }
+    return undefined;
   };
 
-  const listBuildRuns = (): Array<BuildRunMeta> => {
+  const buildWorkspaceReadModel = async (
+    workspaceId: string,
+    opts?: {
+      requestedTestDeckPath?: string | null;
+      requestedTestRunId?: string | null;
+      requestedGradeRunId?: string | null;
+    },
+  ) => {
+    let state: SavedState | undefined;
     try {
-      const entries: Array<BuildRunMeta> = [];
-      for (const session of listSessions()) {
-        const state = readSessionState(session.id);
-        const meta = state?.meta ?? {};
-        const buildStatus =
-          typeof (meta as { buildStatus?: unknown }).buildStatus === "string"
-            ? (meta as { buildStatus: BuildBotRunStatus["status"] })
-              .buildStatus
-            : "idle";
-        const buildChat = extractBuildChatState(state);
-        entries.push({
-          id: session.id,
-          status: buildStatus,
-          startedAt:
-            typeof (meta as { buildStartedAt?: unknown }).buildStartedAt ===
-                "string"
-              ? (meta as { buildStartedAt: string }).buildStartedAt
-              : session.createdAt,
-          finishedAt:
-            typeof (meta as { buildFinishedAt?: unknown }).buildFinishedAt ===
-                "string"
-              ? (meta as { buildFinishedAt: string }).buildFinishedAt
-              : undefined,
-          updatedAt:
-            typeof (meta as { sessionUpdatedAt?: unknown }).sessionUpdatedAt ===
-                "string"
-              ? (meta as { sessionUpdatedAt: string }).sessionUpdatedAt
-              : session.createdAt,
-          messageCount: buildChat?.messages?.length ?? 0,
-        });
+      state = readSessionStateStrict(workspaceId, { withTraces: true });
+    } catch (err) {
+      return {
+        error: err instanceof Error ? err.message : String(err),
+        status: 400,
+      } as const;
+    }
+    if (!state) {
+      return {
+        error: "Workspace not found",
+        status: 404,
+      } as const;
+    }
+
+    const buildEntry = buildBotRuns.get(workspaceId);
+    const buildRun = buildEntry?.run ?? buildRunFromProjection(workspaceId);
+
+    const normalizePersistedTestRun = (
+      value: unknown,
+    ): TestBotRunStatus | null => {
+      if (!value || typeof value !== "object") return null;
+      const raw = value as Record<string, unknown>;
+      const id = typeof raw.id === "string" ? raw.id : "";
+      if (!id) return null;
+      const rawStatus = raw.status;
+      const status = rawStatus === "running" || rawStatus === "completed" ||
+          rawStatus === "error" || rawStatus === "canceled"
+        ? rawStatus
+        : "idle";
+      return {
+        id,
+        status,
+        workspaceId: typeof raw.workspaceId === "string"
+          ? raw.workspaceId
+          : workspaceId,
+        sessionId: typeof raw.sessionId === "string"
+          ? raw.sessionId
+          : workspaceId,
+        error: typeof raw.error === "string" ? raw.error : undefined,
+        startedAt: typeof raw.startedAt === "string"
+          ? raw.startedAt
+          : undefined,
+        finishedAt: typeof raw.finishedAt === "string"
+          ? raw.finishedAt
+          : undefined,
+        maxTurns:
+          typeof raw.maxTurns === "number" && Number.isFinite(raw.maxTurns)
+            ? raw.maxTurns
+            : undefined,
+        messages: Array.isArray(raw.messages)
+          ? raw.messages as TestBotRunStatus["messages"]
+          : [],
+        traces: Array.isArray(raw.traces)
+          ? raw.traces as Array<TraceEvent>
+          : [],
+        toolInserts: Array.isArray(raw.toolInserts)
+          ? raw.toolInserts as TestBotRunStatus["toolInserts"]
+          : [],
+      };
+    };
+
+    const readPersistedTestRunById = (
+      sessionState: SavedState,
+      requestedRunId: string,
+    ): TestBotRunStatus | null => {
+      const eventsPath =
+        typeof sessionState.meta?.sessionEventsPath === "string"
+          ? sessionState.meta.sessionEventsPath
+          : undefined;
+      if (!eventsPath) return null;
+      try {
+        const text = Deno.readTextFileSync(eventsPath);
+        let latest: TestBotRunStatus | null = null;
+        for (const line of text.split("\n")) {
+          if (!line.trim()) continue;
+          let parsed: Record<string, unknown> | null = null;
+          try {
+            parsed = JSON.parse(line) as Record<string, unknown>;
+          } catch {
+            continue;
+          }
+          if (!parsed || parsed.type !== "test") continue;
+          const data = parsed.data;
+          if (!data || typeof data !== "object") continue;
+          const payload = data as Record<string, unknown>;
+          if (payload.type !== "testBotStatus") continue;
+          const normalized = normalizePersistedTestRun(payload.run);
+          if (!normalized || normalized.id !== requestedRunId) continue;
+          latest = normalized;
+        }
+        return latest;
+      } catch {
+        return null;
+      }
+    };
+
+    const requestedTestRunId = typeof opts?.requestedTestRunId === "string" &&
+        opts.requestedTestRunId.trim().length > 0
+      ? opts.requestedTestRunId
+      : null;
+
+    const requestedTestEntry = requestedTestRunId
+      ? testBotRuns.get(requestedTestRunId)
+      : undefined;
+    const requestedLiveRun = requestedTestEntry?.run &&
+        (requestedTestEntry.run.workspaceId === workspaceId ||
+          requestedTestEntry.run.sessionId === workspaceId)
+      ? requestedTestEntry.run
+      : undefined;
+    const persistedRequestedRun = requestedTestRunId
+      ? readPersistedTestRunById(state, requestedTestRunId)
+      : null;
+
+    const testEntry = requestedLiveRun
+      ? undefined
+      : findTestRunByWorkspaceId(workspaceId);
+    const testRun = requestedLiveRun ?? persistedRequestedRun ??
+      testEntry?.run ?? {
+      id: "",
+      status: "idle" as const,
+      messages: [],
+      traces: [],
+      toolInserts: [],
+      workspaceId,
+      sessionId: workspaceId,
+    };
+    if (!requestedLiveRun && !persistedRequestedRun && !testEntry) {
+      syncTestBotRunFromState(testRun, state);
+      const meta = state.meta && typeof state.meta === "object"
+        ? state.meta as Record<string, unknown>
+        : null;
+      if (meta) {
+        const selectedScenarioSummary = selectCanonicalScenarioRunSummary(meta);
+        if (selectedScenarioSummary) {
+          testRun.id = selectedScenarioSummary.scenarioRunId;
+          if (testRun.status === "idle") {
+            testRun.status = "completed";
+          }
+        }
       }
-      entries.sort((a, b) => {
-        const aKey = a.updatedAt ?? a.startedAt ?? a.id;
-        const bKey = b.updatedAt ?? b.startedAt ?? b.id;
-        return bKey.localeCompare(aKey);
-      });
-      return entries;
-    } catch {
-      return [];
     }
+
+    await deckLoadPromise.catch(() => null);
+    const requestedDeck = opts?.requestedTestDeckPath ?? null;
+    const testSelection = requestedDeck
+      ? resolveTestDeck(requestedDeck)
+      : availableTestDecks[0];
+    const testSchemaDesc = testSelection
+      ? await describeDeckInputSchemaFromPath(testSelection.path)
+      : undefined;
+
+    const session = {
+      workspaceId,
+      messages: state.messages,
+      messageRefs: state.messageRefs,
+      feedback: state.feedback,
+      traces: state.traces,
+      notes: state.notes,
+      meta: state.meta,
+    };
+
+    return {
+      workspaceId,
+      build: { run: buildRun },
+      test: {
+        run: testRun,
+        botPath: testSelection?.path ?? null,
+        botLabel: testSelection?.label ?? null,
+        botDescription: testSelection?.description ?? null,
+        selectedDeckId: testSelection?.id ?? null,
+        inputSchema: testSchemaDesc?.schema ?? null,
+        inputSchemaError: testSchemaDesc?.error ?? null,
+        defaults: { input: testSchemaDesc?.defaults },
+        testDecks: availableTestDecks,
+      },
+      grade: {
+        graderDecks: availableGraderDecks,
+        sessions: listSessions(),
+      },
+      session,
+    } as const;
   };
 
   const buildSessionMeta = (
@@ -1830,6 +2195,10 @@ export function startWebSocketSimulator(opts: {
           role: msg.role,
           content,
           messageRefId: refId,
+          messageSource: refs[i]?.source === "scenario" ||
+              refs[i]?.source === "manual"
+            ? refs[i].source
+            : undefined,
           feedback: refId ? feedbackByRef.get(refId) : undefined,
         });
         continue;
@@ -1840,6 +2209,10 @@ export function startWebSocketSimulator(opts: {
           role: "assistant",
           content: respondSummary.displayText,
           messageRefId: refId,
+          messageSource: refs[i]?.source === "scenario" ||
+              refs[i]?.source === "manual"
+            ? refs[i].source
+            : undefined,
           feedback: refId ? feedbackByRef.get(refId) : undefined,
           respondStatus: respondSummary.status,
           respondCode: respondSummary.code,
@@ -1874,33 +2247,142 @@ export function startWebSocketSimulator(opts: {
     };
   };
 
-  const buildConversationMessages = (
+  const buildScenarioConversationArtifacts = (
     state: SavedState,
-  ): Array<ModelMessage> => {
+  ): {
+    messages: Array<ModelMessage>;
+    assistantTurns: Array<{
+      conversationIndex: number;
+      message: ModelMessage;
+      messageRefId?: string;
+    }>;
+  } => {
     const rawMessages = state.messages ?? [];
+    const refs = state.messageRefs ?? [];
     const conversation: Array<ModelMessage> = [];
-    for (const msg of rawMessages) {
+    const assistantTurns: Array<{
+      conversationIndex: number;
+      message: ModelMessage;
+      messageRefId?: string;
+    }> = [];
+    for (let i = 0; i < rawMessages.length; i++) {
+      const msg = rawMessages[i];
+      const messageRefId = typeof refs[i]?.id === "string"
+        ? refs[i].id
+        : undefined;
       if (msg?.role === "assistant" || msg?.role === "user") {
         const content = stringifyContent(msg.content).trim();
         if (!content) continue;
-        conversation.push({
+        const nextMessage: ModelMessage = {
           role: msg.role,
           content,
           name: msg.name,
           tool_calls: msg.tool_calls,
-        });
+        };
+        const conversationIndex = conversation.length;
+        conversation.push(nextMessage);
+        if (nextMessage.role === "assistant") {
+          assistantTurns.push({
+            conversationIndex,
+            message: nextMessage,
+            messageRefId,
+          });
+        }
         continue;
       }
       const respondSummary = summarizeRespondCall(msg);
       if (respondSummary) {
-        conversation.push({
+        const nextMessage: ModelMessage = {
           role: "assistant",
           content: respondSummary.displayText,
           name: GAMBIT_TOOL_RESPOND,
+        };
+        const conversationIndex = conversation.length;
+        conversation.push(nextMessage);
+        assistantTurns.push({
+          conversationIndex,
+          message: nextMessage,
+          messageRefId,
+        });
+      }
+    }
+    return { messages: conversation, assistantTurns };
+  };
+
+  const resolveMessageByRef = (
+    state: SavedState,
+    messageRefId: string,
+  ): { message?: ModelMessage; ref?: { source?: unknown } } => {
+    const refs = Array.isArray(state.messageRefs) ? state.messageRefs : [];
+    const messages = Array.isArray(state.messages) ? state.messages : [];
+    const idx = refs.findIndex((ref) => ref?.id === messageRefId);
+    if (idx < 0) return {};
+    return {
+      message: messages[idx],
+      ref: refs[idx],
+    };
+  };
+
+  const isFeedbackEligibleMessageRef = (
+    state: SavedState,
+    messageRefId: string,
+  ): boolean => {
+    const { message, ref } = resolveMessageByRef(state, messageRefId);
+    if (!message) return false;
+    if (message.role === "assistant") return true;
+    if (message.role === "user" && ref?.source === "scenario") return true;
+    return summarizeRespondCall(message) !== null;
+  };
+
+  const isFeedbackEligiblePersistedTestRunMessageRef = (
+    state: SavedState,
+    runId: string,
+    messageRefId: string,
+  ): boolean => {
+    const eventsPath = typeof state.meta?.sessionEventsPath === "string"
+      ? state.meta.sessionEventsPath
+      : undefined;
+    if (!eventsPath) return false;
+    try {
+      const text = Deno.readTextFileSync(eventsPath);
+      for (const line of text.split("\n")) {
+        if (!line.trim()) continue;
+        let parsed: Record<string, unknown> | null = null;
+        try {
+          parsed = JSON.parse(line) as Record<string, unknown>;
+        } catch {
+          continue;
+        }
+        if (!parsed || parsed.type !== "test") continue;
+        const data = parsed.data;
+        if (!data || typeof data !== "object") continue;
+        const payload = data as Record<string, unknown>;
+        if (payload.type !== "testBotStatus") continue;
+        const run = payload.run;
+        if (!run || typeof run !== "object") continue;
+        const runRecord = run as { id?: unknown; messages?: unknown };
+        if (typeof runRecord.id !== "string" || runRecord.id !== runId) {
+          continue;
+        }
+        if (!Array.isArray(runRecord.messages)) continue;
+        const found = runRecord.messages.some((entry) => {
+          if (!entry || typeof entry !== "object") return false;
+          const message = entry as {
+            role?: unknown;
+            messageRefId?: unknown;
+            messageSource?: unknown;
+          };
+          if (message.messageRefId !== messageRefId) return false;
+          if (message.role === "assistant") return true;
+          return message.role === "user" &&
+            message.messageSource === "scenario";
         });
+        if (found) return true;
       }
+    } catch {
+      return false;
     }
-    return conversation;
+    return false;
   };
 
   const deriveToolInsertsFromTraces = (
@@ -1941,6 +2423,33 @@ export function startWebSocketSimulator(opts: {
     return inserts;
   };
 
+  const applyUserMessageRefSource = (
+    previousState: SavedState | undefined,
+    nextState: SavedState,
+    source: "scenario" | "manual",
+  ): SavedState => {
+    if (
+      !Array.isArray(nextState.messages) ||
+      !Array.isArray(nextState.messageRefs)
+    ) {
+      return nextState;
+    }
+    const startIndex = Math.max(0, previousState?.messages?.length ?? 0);
+    const nextRefs = [...nextState.messageRefs];
+    let changed = false;
+    for (let idx = startIndex; idx < nextState.messages.length; idx++) {
+      const msg = nextState.messages[idx];
+      if (!msg || msg.role !== "user") continue;
+      const ref = nextRefs[idx];
+      if (!ref || typeof ref.id !== "string") continue;
+      if (ref.source === source) continue;
+      nextRefs[idx] = { ...ref, source };
+      changed = true;
+    }
+    if (!changed) return nextState;
+    return { ...nextState, messageRefs: nextRefs };
+  };
+
   const syncTestBotRunFromState = (
     run: TestBotRunStatus,
     state: SavedState,
@@ -1948,10 +2457,15 @@ export function startWebSocketSimulator(opts: {
     const snapshot = buildTestBotSnapshot(state);
     run.messages = snapshot.messages;
     run.toolInserts = snapshot.toolInserts;
-    const sessionId = typeof state.meta?.sessionId === "string"
+    const workspaceId = typeof state.meta?.workspaceId === "string"
+      ? state.meta.workspaceId
+      : typeof state.meta?.sessionId === "string"
       ? state.meta.sessionId
       : undefined;
-    if (sessionId) run.sessionId = sessionId;
+    if (workspaceId) {
+      run.workspaceId = workspaceId;
+      run.sessionId = workspaceId;
+    }
     const initFill =
       (state.meta as { testBotInitFill?: TestBotInitFill } | undefined)
         ?.testBotInitFill;
@@ -1969,52 +2483,28 @@ export function startWebSocketSimulator(opts: {
     run.traces = Array.isArray(state.traces) ? [...state.traces] : undefined;
   };
 
-  const extractBuildChatState = (
-    state?: SavedState,
-  ): SavedState | null => {
-    const meta = state?.meta;
-    if (!meta || typeof meta !== "object") return null;
-    const candidate = (meta as { buildChat?: unknown }).buildChat;
-    if (!candidate || typeof candidate !== "object") return null;
-    return candidate as SavedState;
-  };
-
-  const buildRunFromWorkspace = (
-    workspaceId: string,
-    state?: SavedState,
-  ): BuildBotRunStatus => {
-    const meta = state?.meta;
-    const buildChatState = extractBuildChatState(state) ?? undefined;
-    const status = typeof (meta as { buildStatus?: unknown })?.buildStatus ===
-        "string"
-      ? (meta as { buildStatus: BuildBotRunStatus["status"] }).buildStatus
-      : buildChatState
-      ? "completed"
-      : "idle";
-    const run: BuildBotRunStatus = {
-      id: workspaceId,
-      status,
-      error: typeof (meta as { buildError?: unknown })?.buildError === "string"
-        ? (meta as { buildError: string }).buildError
-        : undefined,
-      startedAt:
-        typeof (meta as { buildStartedAt?: unknown })?.buildStartedAt ===
-            "string"
-          ? (meta as { buildStartedAt: string }).buildStartedAt
-          : undefined,
-      finishedAt:
-        typeof (meta as { buildFinishedAt?: unknown })?.buildFinishedAt ===
-            "string"
-          ? (meta as { buildFinishedAt: string }).buildFinishedAt
-          : undefined,
-      messages: [],
-      traces: [],
-      toolInserts: [],
-    };
-    if (buildChatState) {
-      syncBuildBotRunFromState(run, buildChatState);
+  const buildRunFromProjection = (workspaceId: string): BuildBotRunStatus => {
+    const projection = readBuildState(workspaceId);
+    const run = projection?.run;
+    if (!run) {
+      return {
+        id: workspaceId,
+        status: "idle",
+        messages: [],
+        traces: [],
+        toolInserts: [],
+      };
     }
-    return run;
+    return {
+      id: run.id || workspaceId,
+      status: run.status,
+      error: run.error,
+      startedAt: run.startedAt,
+      finishedAt: run.finishedAt,
+      messages: Array.isArray(run.messages) ? run.messages : [],
+      traces: Array.isArray(run.traces) ? run.traces : [],
+      toolInserts: Array.isArray(run.toolInserts) ? run.toolInserts : [],
+    };
   };
 
   const startTestBotRun = (runOpts: {
@@ -2028,7 +2518,7 @@ export function startWebSocketSimulator(opts: {
       args: Record<string, unknown>;
       result: Record<string, unknown>;
     };
-    sessionId?: string;
+    workspaceId?: string;
     workspaceRecord?: { id: string; rootDir: string; rootDeckPath: string };
     baseMeta?: Record<string, unknown>;
   } = {}): TestBotRunStatus => {
@@ -2036,7 +2526,7 @@ export function startWebSocketSimulator(opts: {
       ? runOpts.botDeckPath
       : undefined;
     if (!botDeckPath) {
-      throw new Error("Missing test bot deck path");
+      throw new Error("Missing scenario deck path");
     }
     const defaultMaxTurns = 12;
     const maxTurns = Math.round(
@@ -2075,6 +2565,8 @@ export function startWebSocketSimulator(opts: {
     };
     testBotRuns.set(runId, entry);
     const run = entry.run;
+    const emitTestBot = (payload: unknown) =>
+      broadcastTestBot(payload, run.workspaceId ?? runOpts.workspaceId);
     if (runOpts.initFill) run.initFill = runOpts.initFill;
     let savedState: SavedState | undefined = undefined;
     const baseMeta = runOpts.baseMeta ?? {};
@@ -2092,6 +2584,7 @@ export function startWebSocketSimulator(opts: {
           actionCallId,
           name: "gambit_test_bot_init_fill",
           args: runOpts.initFillTrace.args as never,
+          toolKind: "internal",
         },
         {
           type: "tool.result",
@@ -2099,15 +2592,21 @@ export function startWebSocketSimulator(opts: {
           actionCallId,
           name: "gambit_test_bot_init_fill",
           result: runOpts.initFillTrace.result as never,
+          toolKind: "internal",
         },
       );
     }
 
-    const setSessionId = (state: SavedState | undefined) => {
-      const sessionId = typeof state?.meta?.sessionId === "string"
+    const setWorkspaceId = (state: SavedState | undefined) => {
+      const workspaceId = typeof state?.meta?.workspaceId === "string"
+        ? state.meta.workspaceId
+        : typeof state?.meta?.sessionId === "string"
         ? state.meta.sessionId
         : undefined;
-      if (sessionId) run.sessionId = sessionId;
+      if (workspaceId) {
+        run.workspaceId = workspaceId;
+        run.sessionId = workspaceId;
+      }
     };
 
     const appendFromState = (state: SavedState) => {
@@ -2119,10 +2618,10 @@ export function startWebSocketSimulator(opts: {
       run.messages = snapshot.messages;
       run.toolInserts = snapshot.toolInserts;
       lastCount = rawLength;
-      setSessionId(state);
+      setWorkspaceId(state);
       run.traces = Array.isArray(state.traces) ? [...state.traces] : undefined;
       if (shouldBroadcast) {
-        broadcastTestBot({ type: "testBotStatus", run });
+        emitTestBot({ type: "testBotStatus", run });
       }
     };
 
@@ -2194,6 +2693,8 @@ export function startWebSocketSimulator(opts: {
         stream: Boolean(streamOpts?.onStreamText),
         onStreamText: streamOpts?.onStreamText,
         responsesMode: opts.responsesMode,
+        workerSandbox: opts.workerSandbox,
+        signal: controller.signal,
       });
       if (isGambitEndSignal(result)) {
         sessionEnded = true;
@@ -2222,19 +2723,31 @@ export function startWebSocketSimulator(opts: {
             allowRootStringInput: true,
             initialUserMessage: initialUserMessage || undefined,
             responsesMode: opts.responsesMode,
+            workerSandbox: opts.workerSandbox,
+            signal: controller.signal,
             onStateUpdate: (state) => {
+              const nextStateWithSource = applyUserMessageRefSource(
+                savedState,
+                state,
+                "scenario",
+              );
               const nextMeta = {
                 ...workspaceMeta,
-                ...(state.meta ?? {}),
+                ...(nextStateWithSource.meta ?? {}),
                 testBot: true,
                 testBotRunId: runId,
                 testBotConfigPath: botConfigPath,
                 testBotName,
+                scenarioRunId: runId,
+                selectedScenarioDeckId: testBotName,
+                scenarioConfigPath: botConfigPath,
                 ...(run.initFill ? { testBotInitFill: run.initFill } : {}),
-                ...(runOpts.sessionId ? { sessionId: runOpts.sessionId } : {}),
+                ...(runOpts.workspaceId
+                  ? { workspaceId: runOpts.workspaceId }
+                  : {}),
               };
               const enriched = persistSessionState({
-                ...state,
+                ...nextStateWithSource,
                 meta: nextMeta,
                 traces: capturedTraces,
               });
@@ -2253,7 +2766,7 @@ export function startWebSocketSimulator(opts: {
           const history = savedState?.messages ?? [];
           const userMessage = await generateDeckBotUserMessage(history, {
             onStreamText: (chunk) =>
-              broadcastTestBot({
+              emitTestBot({
                 type: "testBotStream",
                 runId,
                 role: "user",
@@ -2264,7 +2777,7 @@ export function startWebSocketSimulator(opts: {
             allowEmptyAssistant: effectiveStartMode === "user" &&
               !getLastAssistantMessage(history),
           });
-          broadcastTestBot({
+          emitTestBot({
             type: "testBotStreamEnd",
             runId,
             role: "user",
@@ -2285,19 +2798,31 @@ export function startWebSocketSimulator(opts: {
             allowRootStringInput: true,
             initialUserMessage: userMessage,
             responsesMode: opts.responsesMode,
+            workerSandbox: opts.workerSandbox,
+            signal: controller.signal,
             onStateUpdate: (state) => {
+              const nextStateWithSource = applyUserMessageRefSource(
+                savedState,
+                state,
+                "scenario",
+              );
               const nextMeta = {
                 ...workspaceMeta,
-                ...(state.meta ?? {}),
+                ...(nextStateWithSource.meta ?? {}),
                 testBot: true,
                 testBotRunId: runId,
                 testBotConfigPath: botConfigPath,
                 testBotName,
+                scenarioRunId: runId,
+                selectedScenarioDeckId: testBotName,
+                scenarioConfigPath: botConfigPath,
                 ...(run.initFill ? { testBotInitFill: run.initFill } : {}),
-                ...(runOpts.sessionId ? { sessionId: runOpts.sessionId } : {}),
+                ...(runOpts.workspaceId
+                  ? { workspaceId: runOpts.workspaceId }
+                  : {}),
               };
               const enriched = persistSessionState({
-                ...state,
+                ...nextStateWithSource,
                 meta: nextMeta,
                 traces: capturedTraces,
               });
@@ -2306,7 +2831,7 @@ export function startWebSocketSimulator(opts: {
               appendFromState(enriched);
             },
             onStreamText: (chunk) =>
-              broadcastTestBot({
+              emitTestBot({
                 type: "testBotStream",
                 runId,
                 role: "assistant",
@@ -2319,7 +2844,7 @@ export function startWebSocketSimulator(opts: {
             sessionEnded = true;
             break;
           }
-          broadcastTestBot({
+          emitTestBot({
             type: "testBotStreamEnd",
             runId,
             role: "assistant",
@@ -2328,30 +2853,35 @@ export function startWebSocketSimulator(opts: {
           });
         }
         run.status = controller.signal.aborted ? "canceled" : "completed";
-        broadcastTestBot({ type: "testBotStatus", run });
+        emitTestBot({ type: "testBotStatus", run });
       } catch (err) {
-        run.status = "error";
-        run.error = err instanceof Error ? err.message : String(err);
-        broadcastTestBot({ type: "testBotStatus", run });
+        if (controller.signal.aborted || isRunCanceledError(err)) {
+          run.status = "canceled";
+          run.error = undefined;
+        } else {
+          run.status = "error";
+          run.error = err instanceof Error ? err.message : String(err);
+        }
+        emitTestBot({ type: "testBotStatus", run });
       } finally {
         if (savedState?.messages) {
           const snapshot = buildTestBotSnapshot(savedState);
           run.messages = snapshot.messages;
           run.toolInserts = snapshot.toolInserts;
         }
-        setSessionId(savedState);
+        setWorkspaceId(savedState);
         run.traces = Array.isArray(savedState?.traces)
           ? [...(savedState?.traces ?? [])]
           : undefined;
         run.finishedAt = new Date().toISOString();
         entry.abort = null;
         entry.promise = null;
-        broadcastTestBot({ type: "testBotStatus", run });
+        emitTestBot({ type: "testBotStatus", run });
       }
     };
 
     entry.promise = loop();
-    broadcastTestBot({ type: "testBotStatus", run });
+    emitTestBot({ type: "testBotStatus", run });
     return run;
   };
 
@@ -2359,7 +2889,7 @@ export function startWebSocketSimulator(opts: {
     error: string;
     initFill: TestBotInitFill | undefined;
     botDeckPath: string;
-  }): { sessionId?: string; sessionPath?: string } => {
+  }): { workspaceId?: string; workspacePath?: string } => {
     const failedRunId = randomId("testbot");
     const testBotName = path.basename(args.botDeckPath).replace(
       /\.deck\.(md|ts)$/i,
@@ -2373,6 +2903,7 @@ export function startWebSocketSimulator(opts: {
         actionCallId,
         name: "gambit_test_bot_init_fill",
         args: { missing: args.initFill?.requested ?? [] } as never,
+        toolKind: "internal",
       },
       {
         type: "tool.result",
@@ -2383,6 +2914,7 @@ export function startWebSocketSimulator(opts: {
           error: args.error,
           provided: args.initFill?.provided,
         } as never,
+        toolKind: "internal",
       },
     ];
     const failedState = persistSessionState({
@@ -2394,20 +2926,25 @@ export function startWebSocketSimulator(opts: {
         testBotRunId: failedRunId,
         testBotConfigPath: args.botDeckPath,
         testBotName,
+        scenarioRunId: failedRunId,
+        selectedScenarioDeckId: testBotName,
+        scenarioConfigPath: args.botDeckPath,
         testBotInitFill: args.initFill,
         testBotInitFillError: args.error,
       },
     });
-    const sessionId = typeof failedState.meta?.sessionId === "string"
-      ? failedState.meta.sessionId
+    const workspaceId = typeof failedState.meta?.workspaceId === "string"
+      ? failedState.meta.workspaceId
       : undefined;
-    const sessionPath = typeof failedState.meta?.sessionStatePath === "string"
+    const workspacePath = typeof failedState.meta?.sessionStatePath === "string"
       ? failedState.meta.sessionStatePath
       : undefined;
-    if (sessionPath) {
-      logger.warn(`[sim] init fill failed; session saved to ${sessionPath}`);
+    if (workspacePath) {
+      logger.warn(
+        `[sim] init fill failed; workspace state saved to ${workspacePath}`,
+      );
     }
-    return { sessionId, sessionPath };
+    return { workspaceId, workspacePath };
   };
 
   const resolvePreferredDeckPath = async (
@@ -2549,6 +3086,7 @@ export function startWebSocketSimulator(opts: {
       `[sim] bundling simulator UI (${forceBundle ? "forced" : "stale"})...`,
     );
     try {
+      const decode = new TextDecoder();
       const p = new Deno.Command("deno", {
         args: [
           "bundle",
@@ -2560,14 +3098,24 @@ export function startWebSocketSimulator(opts: {
           "simulator-ui/src/main.tsx",
         ],
         cwd: path.resolve(moduleDir, ".."),
-        stdout: "null",
-        stderr: "null",
+        stdout: "piped",
+        stderr: "piped",
       });
-      p.outputSync();
+      const out = p.outputSync();
+      if (!out.success) {
+        const stderr = decode.decode(out.stderr).trim();
+        const stdout = decode.decode(out.stdout).trim();
+        const details = stderr || stdout || `exit ${out.code}`;
+        throw new Error(
+          `simulator UI bundle command failed (exit ${out.code}): ${details}`,
+        );
+      }
     } catch (err) {
-      logger.warn(
-        `[sim] auto-bundle failed: ${err instanceof Error ? err.message : err}`,
-      );
+      const message = err instanceof Error ? err.message : String(err);
+      if (forceBundle) {
+        throw new Error(`[sim] auto-bundle failed: ${message}`);
+      }
+      logger.warn(`[sim] auto-bundle failed: ${message}`);
     }
   }
 
@@ -2578,6 +3126,315 @@ export function startWebSocketSimulator(opts: {
       if (url.pathname.startsWith("/api/durable-streams/stream/")) {
         return handleDurableStreamRequest(req);
       }
+      if (url.pathname === "/v1/responses") {
+        if (req.method !== "POST") {
+          return new Response("Method not allowed", { status: 405 });
+        }
+        if (!opts.modelProvider.responses) {
+          return jsonResponse(
+            { error: "Configured provider does not support responses." },
+            501,
+          );
+        }
+        try {
+          const body = parseBodyObject(await req.json());
+          const model = typeof body.model === "string" ? body.model : undefined;
+          if (!model) {
+            throw new Error("model is required");
+          }
+          const input = normalizeInputItems(body.input);
+          const stream = body.stream === true;
+          const instructions = typeof body.instructions === "string"
+            ? body.instructions
+            : undefined;
+          const previousResponseId =
+            typeof body.previous_response_id === "string"
+              ? body.previous_response_id
+              : undefined;
+          const store = typeof body.store === "boolean"
+            ? body.store
+            : undefined;
+          const tools = normalizeTools(body.tools);
+          const toolChoice = normalizeToolChoice(body.tool_choice);
+          const reasoning = (body.reasoning &&
+              typeof body.reasoning === "object" &&
+              !Array.isArray(body.reasoning))
+            ? body.reasoning as CreateResponseRequest["reasoning"]
+            : undefined;
+          const parallelToolCalls =
+            typeof body.parallel_tool_calls === "boolean"
+              ? body.parallel_tool_calls
+              : undefined;
+          const maxToolCalls = typeof body.max_tool_calls === "number"
+            ? body.max_tool_calls
+            : undefined;
+          const temperature = typeof body.temperature === "number"
+            ? body.temperature
+            : undefined;
+          const topP = typeof body.top_p === "number" ? body.top_p : undefined;
+          const frequencyPenalty = typeof body.frequency_penalty === "number"
+            ? body.frequency_penalty
+            : undefined;
+          const presencePenalty = typeof body.presence_penalty === "number"
+            ? body.presence_penalty
+            : undefined;
+          const maxOutputTokens = typeof body.max_output_tokens === "number"
+            ? body.max_output_tokens
+            : undefined;
+          const topLogprobs = typeof body.top_logprobs === "number"
+            ? body.top_logprobs
+            : undefined;
+          const truncation = body.truncation === "auto" ||
+              body.truncation === "disabled"
+            ? body.truncation
+            : undefined;
+          const text = (body.text && typeof body.text === "object" &&
+              !Array.isArray(body.text))
+            ? body.text as CreateResponseRequest["text"]
+            : undefined;
+          const streamOptions = (body.stream_options &&
+              typeof body.stream_options === "object" &&
+              !Array.isArray(body.stream_options))
+            ? body.stream_options as CreateResponseRequest["stream_options"]
+            : undefined;
+          const background = typeof body.background === "boolean"
+            ? body.background
+            : undefined;
+          const include = Array.isArray(body.include)
+            ? body.include.filter((entry): entry is string =>
+              typeof entry === "string"
+            )
+            : undefined;
+          const serviceTier = body.service_tier === "auto" ||
+              body.service_tier === "default" || body.service_tier === "flex" ||
+              body.service_tier === "priority"
+            ? body.service_tier
+            : undefined;
+          const metadata = (body.metadata &&
+              typeof body.metadata === "object" &&
+              !Array.isArray(body.metadata))
+            ? body.metadata as Record<string, JSONValue>
+            : undefined;
+          const safetyIdentifier = typeof body.safety_identifier === "string"
+            ? body.safety_identifier
+            : undefined;
+          const promptCacheKey = typeof body.prompt_cache_key === "string"
+            ? body.prompt_cache_key
+            : undefined;
+          const passthrough: Record<string, unknown> = {};
+          for (const [key, value] of Object.entries(body)) {
+            if (
+              key === "model" || key === "input" || key === "stream" ||
+              key === "instructions" || key === "tools" ||
+              key === "tool_choice" || key === "max_output_tokens" ||
+              key === "previous_response_id" || key === "store" ||
+              key === "reasoning" || key === "parallel_tool_calls" ||
+              key === "max_tool_calls" || key === "temperature" ||
+              key === "top_p" || key === "frequency_penalty" ||
+              key === "presence_penalty" || key === "include" ||
+              key === "text" || key === "stream_options" ||
+              key === "background" || key === "truncation" ||
+              key === "service_tier" || key === "top_logprobs" ||
+              key === "metadata" || key === "safety_identifier" ||
+              key === "prompt_cache_key" || key === "params"
+            ) {
+              continue;
+            }
+            passthrough[key] = value;
+          }
+          const explicitParams = (body.params &&
+              typeof body.params === "object" &&
+              !Array.isArray(body.params))
+            ? body.params as Record<string, unknown>
+            : undefined;
+          const params = explicitParams || Object.keys(passthrough).length > 0
+            ? { ...(explicitParams ?? {}), ...passthrough }
+            : undefined;
+          const requestBody: CreateResponseRequest = {
+            model,
+            input,
+            instructions,
+            previous_response_id: previousResponseId,
+            store,
+            tools,
+            tool_choice: toolChoice,
+            reasoning,
+            parallel_tool_calls: parallelToolCalls,
+            max_tool_calls: maxToolCalls,
+            temperature,
+            top_p: topP,
+            frequency_penalty: frequencyPenalty,
+            presence_penalty: presencePenalty,
+            stream,
+            stream_options: streamOptions,
+            max_output_tokens: maxOutputTokens,
+            top_logprobs: topLogprobs,
+            truncation,
+            text,
+            include,
+            background,
+            service_tier: serviceTier,
+            metadata,
+            safety_identifier: safetyIdentifier,
+            prompt_cache_key: promptCacheKey,
+            params,
+          };
+
+          if (!stream) {
+            const response = await opts.modelProvider.responses({
+              request: requestBody,
+            });
+            return jsonResponse(
+              toStrictResponseResource({
+                request: requestBody,
+                response,
+              }),
+            );
+          }
+
+          const streamBody = new ReadableStream<Uint8Array>({
+            start: async (controller) => {
+              let sequence = 1;
+              const itemIdByOutputIndex = new Map<number, string>();
+              const streamRequest: CreateResponseRequest = {
+                ...requestBody,
+                stream: true,
+              };
+              try {
+                const result = await opts.modelProvider.responses!({
+                  request: streamRequest,
+                  onStreamEvent: (event: ResponseEvent) => {
+                    if (event.type === "response.created") {
+                      controller.enqueue(
+                        sseFrame({
+                          type: "response.created",
+                          sequence_number: sequence++,
+                          response: toStrictResponseResource({
+                            request: streamRequest,
+                            response: event.response,
+                            statusOverride: "in_progress",
+                          }),
+                        }),
+                      );
+                      return;
+                    }
+                    if (event.type === "response.output_text.delta") {
+                      const itemId = event.item_id ??
+                        itemIdByOutputIndex.get(event.output_index) ??
+                        `msg_${event.output_index + 1}`;
+                      itemIdByOutputIndex.set(event.output_index, itemId);
+                      controller.enqueue(
+                        sseFrame({
+                          type: "response.output_text.delta",
+                          sequence_number: sequence++,
+                          output_index: event.output_index,
+                          item_id: itemId,
+                          content_index: event.content_index ?? 0,
+                          delta: event.delta,
+                          logprobs: event.logprobs ?? [],
+                        }),
+                      );
+                      return;
+                    }
+                    if (event.type === "response.output_text.done") {
+                      const itemId = event.item_id ??
+                        itemIdByOutputIndex.get(event.output_index) ??
+                        `msg_${event.output_index + 1}`;
+                      itemIdByOutputIndex.set(event.output_index, itemId);
+                      controller.enqueue(
+                        sseFrame({
+                          type: "response.output_text.done",
+                          sequence_number: sequence++,
+                          output_index: event.output_index,
+                          item_id: itemId,
+                          content_index: event.content_index ?? 0,
+                          text: event.text,
+                          logprobs: [],
+                        }),
+                      );
+                      return;
+                    }
+                    if (event.type === "response.completed") {
+                      controller.enqueue(
+                        sseFrame({
+                          type: "response.completed",
+                          sequence_number: sequence++,
+                          response: toStrictResponseResource({
+                            request: streamRequest,
+                            response: event.response,
+                            statusOverride: "completed",
+                          }),
+                        }),
+                      );
+                      return;
+                    }
+                    if (event.type === "response.failed") {
+                      controller.enqueue(
+                        sseFrame({
+                          type: "response.failed",
+                          sequence_number: sequence++,
+                          response: {
+                            ...toStrictResponseResource({
+                              request: streamRequest,
+                              response: {
+                                id: `resp_${crypto.randomUUID().slice(0, 8)}`,
+                                object: "response",
+                                output: [],
+                                status: "failed",
+                                error: event.error ??
+                                  { message: "Unknown error" },
+                              },
+                              statusOverride: "failed",
+                            }),
+                            error: event.error ?? { message: "Unknown error" },
+                          },
+                        }),
+                      );
+                    }
+                  },
+                });
+                controller.enqueue(
+                  sseFrame({
+                    type: "response.completed",
+                    sequence_number: sequence++,
+                    response: toStrictResponseResource({
+                      request: streamRequest,
+                      response: result,
+                      statusOverride: "completed",
+                    }),
+                  }),
+                );
+                controller.enqueue(
+                  new TextEncoder().encode("data: [DONE]\n\n"),
+                );
+              } catch (err) {
+                controller.enqueue(
+                  sseFrame({
+                    type: "error",
+                    code: "internal_error",
+                    message: err instanceof Error ? err.message : String(err),
+                    param: null,
+                  }),
+                );
+              } finally {
+                controller.close();
+              }
+            },
+          });
+          return new Response(streamBody, {
+            headers: {
+              "content-type": "text/event-stream",
+              "cache-control": "no-cache",
+              "connection": "keep-alive",
+            },
+          });
+        } catch (err) {
+          return jsonResponse(
+            { error: err instanceof Error ? err.message : String(err) },
+            400,
+          );
+        }
+      }
       if (url.pathname === "/favicon.ico") {
         if (req.method !== "GET" && req.method !== "HEAD") {
           return new Response("Method not allowed", { status: 405 });
@@ -2598,23 +3455,100 @@ export function startWebSocketSimulator(opts: {
           }
         }
       }
-      if (url.pathname === "/api/calibrate") {
+
+      const workspaceTestRunGetMatch = url.pathname.match(
+        /^\/api\/workspaces\/([^/]+)\/test\/([^/]+)$/,
+      );
+      if (workspaceTestRunGetMatch) {
         if (req.method !== "GET") {
           return new Response("Method not allowed", { status: 405 });
         }
-        const sessionId = url.searchParams.get("sessionId") ?? undefined;
-        if (sessionId) {
-          await activateWorkspaceDeck(sessionId);
-        }
-        await deckLoadPromise.catch(() => null);
-        const sessions = listSessions();
-        return new Response(
-          JSON.stringify({
-            graderDecks: availableGraderDecks,
-            sessions,
-          }),
-          { headers: { "content-type": "application/json" } },
+        const workspaceId = decodeURIComponent(workspaceTestRunGetMatch[1]);
+        const requestedTestRunId = decodeURIComponent(
+          workspaceTestRunGetMatch[2],
         );
+        await logWorkspaceBotRoot(
+          "/api/workspaces/:id/test/:runId",
+          workspaceId,
+        );
+        await activateWorkspaceDeck(workspaceId);
+        const payload = await buildWorkspaceReadModel(workspaceId, {
+          requestedTestDeckPath: url.searchParams.get("deckPath"),
+          requestedTestRunId,
+        });
+        if ("error" in payload) {
+          return new Response(
+            JSON.stringify({ error: payload.error }),
+            {
+              status: payload.status,
+              headers: { "content-type": "application/json" },
+            },
+          );
+        }
+        return new Response(JSON.stringify(payload), {
+          headers: { "content-type": "application/json" },
+        });
+      }
+
+      const workspaceGradeRunGetMatch = url.pathname.match(
+        /^\/api\/workspaces\/([^/]+)\/grade\/([^/]+)$/,
+      );
+      if (workspaceGradeRunGetMatch) {
+        if (req.method !== "GET") {
+          return new Response("Method not allowed", { status: 405 });
+        }
+        const workspaceId = decodeURIComponent(workspaceGradeRunGetMatch[1]);
+        const requestedGradeRunId = decodeURIComponent(
+          workspaceGradeRunGetMatch[2],
+        );
+        await logWorkspaceBotRoot(
+          "/api/workspaces/:id/grade/:runId",
+          workspaceId,
+        );
+        await activateWorkspaceDeck(workspaceId);
+        const payload = await buildWorkspaceReadModel(workspaceId, {
+          requestedTestDeckPath: url.searchParams.get("deckPath"),
+          requestedGradeRunId,
+        });
+        if ("error" in payload) {
+          return new Response(
+            JSON.stringify({ error: payload.error }),
+            {
+              status: payload.status,
+              headers: { "content-type": "application/json" },
+            },
+          );
+        }
+        return new Response(JSON.stringify(payload), {
+          headers: { "content-type": "application/json" },
+        });
+      }
+
+      const workspaceGetMatch = url.pathname.match(
+        /^\/api\/workspaces\/([^/]+)$/,
+      );
+      if (workspaceGetMatch) {
+        if (req.method !== "GET") {
+          return new Response("Method not allowed", { status: 405 });
+        }
+        const workspaceId = decodeURIComponent(workspaceGetMatch[1]);
+        await logWorkspaceBotRoot("/api/workspaces/:id", workspaceId);
+        await activateWorkspaceDeck(workspaceId);
+        const payload = await buildWorkspaceReadModel(workspaceId, {
+          requestedTestDeckPath: url.searchParams.get("deckPath"),
+        });
+        if ("error" in payload) {
+          return new Response(
+            JSON.stringify({ error: payload.error }),
+            {
+              status: payload.status,
+              headers: { "content-type": "application/json" },
+            },
+          );
+        }
+        return new Response(JSON.stringify(payload), {
+          headers: { "content-type": "application/json" },
+        });
       }
 
       if (url.pathname === "/api/calibrate/run") {
@@ -2623,14 +3557,15 @@ export function startWebSocketSimulator(opts: {
         }
         try {
           const body = await req.json() as {
-            sessionId?: string;
+            workspaceId?: string;
             graderId?: string;
           };
-          if (!body.sessionId) {
-            throw new Error("Missing sessionId");
+          const workspaceId = getWorkspaceIdFromBody(body);
+          if (!workspaceId) {
+            throw new Error("Missing workspaceId");
           }
-          const sessionId = body.sessionId;
-          await activateWorkspaceDeck(sessionId);
+          await logWorkspaceBotRoot("/api/calibrate/run", workspaceId);
+          await activateWorkspaceDeck(workspaceId);
           await deckLoadPromise.catch(() => null);
           const grader = body.graderId
             ? resolveGraderDeck(body.graderId)
@@ -2638,9 +3573,9 @@ export function startWebSocketSimulator(opts: {
           if (!grader) {
             throw new Error("Unknown grader deck selection");
           }
-          const sessionState = readSessionState(sessionId);
+          const sessionState = readSessionState(workspaceId);
           if (!sessionState) {
-            throw new Error("Session not found");
+            throw new Error("Workspace not found");
           }
           const graderSchema = await describeDeckInputSchemaFromPath(
             grader.path,
@@ -2656,7 +3591,10 @@ export function startWebSocketSimulator(opts: {
             delete next.gradingRuns;
             return next;
           })();
-          const conversationMessages = buildConversationMessages(sessionState);
+          const conversationArtifacts = buildScenarioConversationArtifacts(
+            sessionState,
+          );
+          const conversationMessages = conversationArtifacts.messages;
           const sessionPayload = {
             messages: conversationMessages.length > 0
               ? conversationMessages.map((msg) => ({
@@ -2702,10 +3640,16 @@ export function startWebSocketSimulator(opts: {
               type: "grading.run",
               run: nextEntry,
             });
-            const sessionMeta = buildSessionMeta(sessionId, nextState);
+            const sessionMeta = buildSessionMeta(workspaceId, nextState);
+            appendDurableStreamEvent(WORKSPACE_STREAM_ID, {
+              type: "calibrateSession",
+              workspaceId,
+              run: nextEntry,
+              session: sessionMeta,
+            });
             appendDurableStreamEvent(GRADE_STREAM_ID, {
               type: "calibrateSession",
-              sessionId,
+              workspaceId,
               run: nextEntry,
               session: sessionMeta,
             });
@@ -2717,11 +3661,13 @@ export function startWebSocketSimulator(opts: {
               if (runMode !== "turns") {
                 entry = {
                   id: runId,
+                  workspaceId,
                   graderId: grader.id,
                   graderPath: grader.path,
                   graderLabel: grader.label,
                   status: "running",
                   runAt: startedAt,
+                  gradingRunId: runId,
                   input: { session: sessionPayload },
                 };
                 currentState = upsertCalibrationRun(currentState, entry);
@@ -2737,34 +3683,35 @@ export function startWebSocketSimulator(opts: {
                 });
               }
               const messages = sessionPayload.messages ?? [];
-              const assistantTurns = messages
-                .map((msg, idx) => ({ msg, idx }))
-                .filter(({ msg }) =>
-                  msg.role === "assistant" &&
-                  typeof msg.content === "string" &&
-                  msg.content.trim().length > 0
-                );
+              const assistantTurns = conversationArtifacts.assistantTurns;
               const totalTurns = assistantTurns.length;
               const turns: Array<{
                 index: number;
+                gradingRunId: string;
+                artifactRevisionId: string;
+                messageRefId?: string;
                 message: unknown;
                 input: unknown;
                 result: unknown;
               }> = [];
               entry = {
                 id: runId,
+                workspaceId,
                 graderId: grader.id,
                 graderPath: grader.path,
                 graderLabel: grader.label,
                 status: "running",
                 runAt: startedAt,
+                gradingRunId: runId,
                 result: { mode: "turns", totalTurns, turns: [] },
               };
               currentState = upsertCalibrationRun(currentState, entry);
               if (totalTurns === 0) {
                 return { mode: "turns", totalTurns, turns: [] };
               }
-              for (const { msg, idx } of assistantTurns) {
+              for (const turnEntry of assistantTurns) {
+                const msg = turnEntry.message;
+                const idx = turnEntry.conversationIndex;
                 const input = {
                   session: {
                     ...sessionPayload,
@@ -2784,6 +3731,9 @@ export function startWebSocketSimulator(opts: {
                 });
                 turns.push({
                   index: idx,
+                  gradingRunId: runId,
+                  artifactRevisionId: randomId("grade-rev"),
+                  messageRefId: turnEntry.messageRefId,
                   message: msg,
                   input,
                   result: turnResult,
@@ -2798,41 +3748,59 @@ export function startWebSocketSimulator(opts: {
             })();
             entry = {
               id: runId,
+              workspaceId,
               graderId: grader.id,
               graderPath: grader.path,
               graderLabel: grader.label,
               status: "completed",
               runAt: startedAt,
+              gradingRunId: runId,
               input: { session: sessionPayload },
               result,
             };
           } catch (err) {
             const message = err instanceof Error ? err.message : String(err);
+            logger.error("[sim] calibrate run failed", {
+              workspaceId,
+              runId,
+              runMode,
+              graderId: grader.id,
+              graderPath: grader.path,
+              error: message,
+              stack: err instanceof Error ? err.stack : undefined,
+            });
             entry = {
               id: runId,
+              workspaceId,
               graderId: grader.id,
               graderPath: grader.path,
               graderLabel: grader.label,
               status: "error",
               runAt: startedAt,
+              gradingRunId: runId,
               input: { session: sessionPayload },
               error: message,
             };
           }
           const nextState = upsertCalibrationRun(currentState, entry);
-          const sessionMeta = buildSessionMeta(body.sessionId, nextState);
+          const sessionMeta = buildSessionMeta(workspaceId, nextState);
           return new Response(
             JSON.stringify({
-              sessionId: body.sessionId,
+              workspaceId,
               run: entry,
               session: sessionMeta,
             }),
             { headers: { "content-type": "application/json" } },
           );
         } catch (err) {
+          const message = err instanceof Error ? err.message : String(err);
+          logger.error("[sim] /api/calibrate/run request failed", {
+            error: message,
+            stack: err instanceof Error ? err.stack : undefined,
+          });
           return new Response(
             JSON.stringify({
-              error: err instanceof Error ? err.message : String(err),
+              error: message,
             }),
             { status: 400, headers: { "content-type": "application/json" } },
           );
@@ -2845,18 +3813,20 @@ export function startWebSocketSimulator(opts: {
         }
         try {
           const body = await req.json() as {
-            sessionId?: string;
+            workspaceId?: string;
             refId?: string;
             runId?: string;
             turnIndex?: number;
             reason?: string;
           };
-          if (!body.sessionId || !body.refId) {
-            throw new Error("Missing sessionId or refId");
+          const workspaceId = getWorkspaceIdFromBody(body);
+          if (!workspaceId || !body.refId) {
+            throw new Error("Missing workspaceId or refId");
           }
-          const state = readSessionState(body.sessionId);
+          await logWorkspaceBotRoot("/api/calibrate/flag", workspaceId);
+          const state = readSessionState(workspaceId);
           if (!state) {
-            throw new Error("Session not found");
+            throw new Error("Workspace not found");
           }
           const meta = (state.meta && typeof state.meta === "object")
             ? { ...(state.meta as Record<string, unknown>) }
@@ -2905,15 +3875,20 @@ export function startWebSocketSimulator(opts: {
             flag: flagEntry,
             refId: body.refId,
           });
-          const sessionMeta = buildSessionMeta(body.sessionId, updated);
+          const sessionMeta = buildSessionMeta(workspaceId, updated);
+          appendDurableStreamEvent(WORKSPACE_STREAM_ID, {
+            type: "calibrateSession",
+            workspaceId,
+            session: sessionMeta,
+          });
           appendDurableStreamEvent(GRADE_STREAM_ID, {
             type: "calibrateSession",
-            sessionId: body.sessionId,
+            workspaceId,
             session: sessionMeta,
           });
           return new Response(
             JSON.stringify({
-              sessionId: body.sessionId,
+              workspaceId,
               flagged,
               flags: nextFlags,
             }),
@@ -2935,16 +3910,18 @@ export function startWebSocketSimulator(opts: {
         }
         try {
           const body = await req.json() as {
-            sessionId?: string;
+            workspaceId?: string;
             refId?: string;
             reason?: string;
           };
-          if (!body.sessionId || !body.refId) {
-            throw new Error("Missing sessionId or refId");
+          const workspaceId = getWorkspaceIdFromBody(body);
+          if (!workspaceId || !body.refId) {
+            throw new Error("Missing workspaceId or refId");
           }
-          const state = readSessionState(body.sessionId);
+          await logWorkspaceBotRoot("/api/calibrate/flag/reason", workspaceId);
+          const state = readSessionState(workspaceId);
           if (!state) {
-            throw new Error("Session not found");
+            throw new Error("Workspace not found");
           }
           const meta = (state.meta && typeof state.meta === "object")
             ? { ...(state.meta as Record<string, unknown>) }
@@ -2979,133 +3956,21 @@ export function startWebSocketSimulator(opts: {
             flag: updatedFlag,
             refId: body.refId,
           });
-          const sessionMeta = buildSessionMeta(body.sessionId, updated);
-          appendDurableStreamEvent(GRADE_STREAM_ID, {
+          const sessionMeta = buildSessionMeta(workspaceId, updated);
+          appendDurableStreamEvent(WORKSPACE_STREAM_ID, {
             type: "calibrateSession",
-            sessionId: body.sessionId,
+            workspaceId,
             session: sessionMeta,
           });
-          return new Response(
-            JSON.stringify({
-              sessionId: body.sessionId,
-              flags: nextFlags,
-            }),
-            { headers: { "content-type": "application/json" } },
-          );
-        } catch (err) {
-          return new Response(
-            JSON.stringify({
-              error: err instanceof Error ? err.message : String(err),
-            }),
-            { status: 400, headers: { "content-type": "application/json" } },
-          );
-        }
-      }
-
-      if (url.pathname === "/api/grading/reference") {
-        if (req.method !== "POST") {
-          return new Response("Method not allowed", { status: 405 });
-        }
-        try {
-          const body = await req.json() as {
-            sessionId?: string;
-            runId?: string;
-            turnIndex?: number;
-            referenceSample?: {
-              score?: number;
-              reason?: string;
-              evidence?: Array<string>;
-            };
-          };
-          if (!body.sessionId) throw new Error("Missing sessionId");
-          if (!body.runId) throw new Error("Missing runId");
-          if (!body.referenceSample) {
-            throw new Error("Missing referenceSample");
-          }
-          const score = body.referenceSample.score;
-          if (typeof score !== "number" || Number.isNaN(score)) {
-            throw new Error("Invalid reference score");
-          }
-          const reason = body.referenceSample.reason;
-          if (typeof reason !== "string" || reason.trim().length === 0) {
-            throw new Error("Missing reference reason");
-          }
-          const evidence = Array.isArray(body.referenceSample.evidence)
-            ? body.referenceSample.evidence.filter((e) =>
-              typeof e === "string" && e.trim().length > 0
-            )
-            : undefined;
-          const state = readSessionState(body.sessionId);
-          if (!state) throw new Error("Session not found");
-          const previousRuns = Array.isArray(
-              (state.meta as { gradingRuns?: unknown })?.gradingRuns,
-            )
-            ? ((state.meta as { gradingRuns: Array<GradingRunRecord> })
-              .gradingRuns)
-            : Array.isArray(state.meta?.calibrationRuns)
-            ? (state.meta?.calibrationRuns as Array<GradingRunRecord>)
-            : [];
-          const index = previousRuns.findIndex((run) => run.id === body.runId);
-          if (index < 0) throw new Error("Run not found");
-          const run = previousRuns[index];
-          const nextRun: GradingRunRecord = {
-            ...run,
-          };
-          if (typeof body.turnIndex === "number") {
-            const result = run.result;
-            const turnIndex = body.turnIndex;
-            if (
-              !result || typeof result !== "object" ||
-              (result as { mode?: unknown }).mode !== "turns" ||
-              !Array.isArray((result as { turns?: unknown }).turns)
-            ) {
-              throw new Error("Run does not support turn references");
-            }
-            const turns = (result as {
-              turns: Array<Record<string, unknown>>;
-            }).turns.map((turn) => ({ ...turn }));
-            const targetIndex = turns.findIndex((turn) =>
-              turn.index === turnIndex
-            );
-            if (targetIndex < 0) {
-              throw new Error("Turn not found");
-            }
-            turns[targetIndex] = {
-              ...turns[targetIndex],
-              referenceSample: { score, reason, evidence },
-            };
-            nextRun.result = { ...(result as object), turns };
-          } else {
-            nextRun.referenceSample = { score, reason, evidence };
-          }
-          const nextRuns = previousRuns.map((entry, i) =>
-            i === index ? nextRun : entry
-          );
-          const nextState = persistSessionState({
-            ...state,
-            meta: {
-              ...(state.meta ?? {}),
-              gradingRuns: nextRuns,
-            },
-          });
-          appendGradingLog(nextState, {
-            type: "grading.reference",
-            run: nextRun,
-            runId: body.runId,
-            turnIndex: body.turnIndex,
-          });
-          const sessionMeta = buildSessionMeta(body.sessionId, nextState);
           appendDurableStreamEvent(GRADE_STREAM_ID, {
             type: "calibrateSession",
-            sessionId: body.sessionId,
-            run: nextRun,
+            workspaceId,
             session: sessionMeta,
           });
           return new Response(
             JSON.stringify({
-              sessionId: body.sessionId,
-              run: nextRun,
-              session: sessionMeta,
+              workspaceId,
+              flags: nextFlags,
             }),
             { headers: { "content-type": "application/json" } },
           );
@@ -3119,10 +3984,28 @@ export function startWebSocketSimulator(opts: {
         }
       }
 
+      const gradingReferenceResponse = await handleGradingReferenceRoute({
+        url,
+        req,
+        getWorkspaceIdFromBody,
+        logWorkspaceBotRoot,
+        readSessionState,
+        persistSessionState,
+        appendGradingLog,
+        buildSessionMeta,
+        appendDurableStreamEvent,
+        workspaceStreamId: WORKSPACE_STREAM_ID,
+        gradeStreamId: GRADE_STREAM_ID,
+        parseFiniteInteger,
+        randomId,
+      });
+      if (gradingReferenceResponse) return gradingReferenceResponse;
+
       if (url.pathname === "/api/test") {
         if (req.method === "GET") {
-          const sessionId = url.searchParams.get("sessionId") ?? undefined;
-          await activateWorkspaceDeck(sessionId);
+          const workspaceId = getWorkspaceIdFromQuery(url);
+          await logWorkspaceBotRoot("/api/test", workspaceId);
+          await activateWorkspaceDeck(workspaceId);
           await deckLoadPromise.catch(() => null);
           const requestedDeck = url.searchParams.get("deckPath");
           const selection = requestedDeck
@@ -3131,7 +4014,7 @@ export function startWebSocketSimulator(opts: {
           if (requestedDeck && !selection) {
             return new Response(
               JSON.stringify({
-                error: "Unknown test deck selection",
+                error: "Unknown scenario deck selection",
               }),
               {
                 status: 400,
@@ -3197,7 +4080,7 @@ export function startWebSocketSimulator(opts: {
             botDeckPath?: string;
             inheritBotInput?: unknown;
             initFill?: { missing?: unknown };
-            sessionId?: string;
+            workspaceId?: string;
           };
           if (
             typeof body.maxTurns === "number" && Number.isFinite(body.maxTurns)
@@ -3222,14 +4105,12 @@ export function startWebSocketSimulator(opts: {
               typeof entry === "string" && entry.trim().length > 0
             ) as Array<string>;
           }
-          if (typeof body.sessionId === "string") {
-            sessionId = body.sessionId;
-          }
+          sessionId = getWorkspaceIdFromBody(body);
           if (typeof body.botDeckPath === "string") {
             const resolved = resolveTestDeck(body.botDeckPath);
             if (!resolved) {
               return new Response(
-                JSON.stringify({ error: "Unknown test deck selection" }),
+                JSON.stringify({ error: "Unknown scenario deck selection" }),
                 {
                   status: 400,
                   headers: { "content-type": "application/json" },
@@ -3250,6 +4131,7 @@ export function startWebSocketSimulator(opts: {
           // ignore parse errors; use defaults
         }
         if (sessionId) {
+          await logWorkspaceBotRoot("/api/test/run", sessionId);
           await activateWorkspaceDeck(sessionId);
         }
         if (deckInput === undefined) {
@@ -3269,7 +4151,7 @@ export function startWebSocketSimulator(opts: {
         }
         if (!botDeckSelection) {
           return new Response(
-            JSON.stringify({ error: "No test decks configured" }),
+            JSON.stringify({ error: "No scenario decks configured" }),
             { status: 400, headers: { "content-type": "application/json" } },
           );
         }
@@ -3321,8 +4203,8 @@ export function startWebSocketSimulator(opts: {
                 JSON.stringify({
                   error: parsed.error,
                   initFill: initFillInfo,
-                  sessionId: failure.sessionId,
-                  sessionPath: failure.sessionPath,
+                  workspaceId: failure.workspaceId,
+                  workspacePath: failure.workspacePath,
                 }),
                 {
                   status: 400,
@@ -3393,8 +4275,8 @@ export function startWebSocketSimulator(opts: {
                 JSON.stringify({
                   error: message,
                   initFill: initFillInfo,
-                  sessionId: failure.sessionId,
-                  sessionPath: failure.sessionPath,
+                  workspaceId: failure.workspaceId,
+                  workspacePath: failure.workspacePath,
                 }),
                 {
                   status: 400,
@@ -3436,8 +4318,8 @@ export function startWebSocketSimulator(opts: {
             JSON.stringify({
               error: message,
               initFill: initFillInfo,
-              sessionId: failure.sessionId,
-              sessionPath: failure.sessionPath,
+              workspaceId: failure.workspaceId,
+              workspacePath: failure.workspacePath,
             }),
             { status: 400, headers: { "content-type": "application/json" } },
           );
@@ -3464,7 +4346,7 @@ export function startWebSocketSimulator(opts: {
           botDeckPath: botDeckSelection.path,
           initFill: initFillInfo,
           initFillTrace,
-          sessionId,
+          workspaceId: sessionId,
           workspaceRecord,
           baseMeta: existingSessionState?.meta as Record<string, unknown> ??
             undefined,
@@ -3482,7 +4364,7 @@ export function startWebSocketSimulator(opts: {
         // 1) Parse request payload and stitch together run/session state.
         let payload: {
           runId?: unknown;
-          sessionId?: unknown;
+          workspaceId?: unknown;
           message?: unknown;
           context?: unknown;
           init?: unknown;
@@ -3499,19 +4381,26 @@ export function startWebSocketSimulator(opts: {
         let runId = typeof payload.runId === "string"
           ? payload.runId
           : undefined;
-        const sessionId = typeof payload.sessionId === "string"
-          ? payload.sessionId
-          : undefined;
-        if (sessionId) {
-          await activateWorkspaceDeck(sessionId);
+        const workspaceId = (() => {
+          const workspaceId = typeof payload.workspaceId === "string" &&
+              payload.workspaceId.trim().length > 0
+            ? payload.workspaceId
+            : undefined;
+          if (workspaceId) return workspaceId;
+          return undefined;
+        })();
+        await logWorkspaceBotRoot("/api/test/message", workspaceId);
+        if (workspaceId) {
+          await activateWorkspaceDeck(workspaceId);
         }
-        let savedState = sessionId
-          ? readSessionState(sessionId, { withTraces: true })
+        let savedState = workspaceId
+          ? readSessionState(workspaceId, { withTraces: true })
           : undefined;
         if (!savedState && runId) {
           const entry = testBotRuns.get(runId);
-          if (entry?.run.sessionId) {
-            savedState = readSessionState(entry.run.sessionId, {
+          const runWorkspaceId = entry?.run.workspaceId ?? entry?.run.sessionId;
+          if (runWorkspaceId) {
+            savedState = readSessionState(runWorkspaceId, {
               withTraces: true,
             });
           }
@@ -3522,15 +4411,15 @@ export function startWebSocketSimulator(opts: {
             : savedState.runId;
         }
         runId = runId ?? randomId("testbot");
-        const workspaceRecord = sessionId
-          ? resolveWorkspaceRecord(sessionId) ?? {
-            id: sessionId,
+        const workspaceRecord = workspaceId
+          ? resolveWorkspaceRecord(workspaceId) ?? {
+            id: workspaceId,
             rootDir: path.dirname(resolvedDeckPath),
             rootDeckPath: resolvedDeckPath,
             createdAt: new Date().toISOString(),
           }
           : undefined;
-        if (workspaceRecord && !resolveWorkspaceRecord(sessionId)) {
+        if (workspaceRecord && !resolveWorkspaceRecord(workspaceId)) {
           registerWorkspace(workspaceRecord);
         }
         const workspaceMeta = workspaceRecord
@@ -3542,11 +4431,11 @@ export function startWebSocketSimulator(opts: {
         const existingEntry = testBotRuns.get(runId);
         if (existingEntry?.promise) {
           return new Response(
-            JSON.stringify({ error: "Test bot run already in progress" }),
+            JSON.stringify({ error: "Scenario run already in progress" }),
             { status: 409, headers: { "content-type": "application/json" } },
           );
         }
-        // 2) Resolve which test deck to use and derive initial input.
+        // 2) Resolve which scenario deck to use and derive initial input.
         await deckLoadPromise.catch(() => null);
         const requestedDeck = typeof payload.botDeckPath === "string"
           ? payload.botDeckPath
@@ -3562,7 +4451,7 @@ export function startWebSocketSimulator(opts: {
         })();
         if (requestedDeck && !selection) {
           return new Response(
-            JSON.stringify({ error: "Unknown test deck selection" }),
+            JSON.stringify({ error: "Unknown scenario deck selection" }),
             { status: 400, headers: { "content-type": "application/json" } },
           );
         }
@@ -3616,13 +4505,15 @@ export function startWebSocketSimulator(opts: {
         };
         testBotRuns.set(runId, entry);
         const run = entry.run;
+        const emitTestBot = (payload: unknown) =>
+          broadcastTestBot(payload, run.workspaceId ?? workspaceId ?? runId);
         run.status = "running";
         run.error = undefined;
         run.startedAt = run.startedAt ?? new Date().toISOString();
         if (savedState) {
           syncTestBotRunFromState(run, savedState);
         }
-        broadcastTestBot({ type: "testBotStatus", run });
+        emitTestBot({ type: "testBotStatus", run });
         const controller = new AbortController();
         entry.abort = controller;
         const isAborted = () => controller.signal.aborted;
@@ -3662,11 +4553,16 @@ export function startWebSocketSimulator(opts: {
           run.traces = Array.isArray(state.traces)
             ? [...state.traces]
             : undefined;
-          const nextSessionId = typeof state.meta?.sessionId === "string"
+          const nextWorkspaceId = typeof state.meta?.workspaceId === "string"
+            ? state.meta.workspaceId
+            : typeof state.meta?.sessionId === "string"
             ? state.meta.sessionId
             : undefined;
-          if (nextSessionId) run.sessionId = nextSessionId;
-          broadcastTestBot({ type: "testBotStatus", run });
+          if (nextWorkspaceId) {
+            run.workspaceId = nextWorkspaceId;
+            run.sessionId = nextWorkspaceId;
+          }
+          emitTestBot({ type: "testBotStatus", run });
         };
         // 4) Execute the deck run(s): optional assistant start, then user message.
         entry.promise = (async () => {
@@ -3706,20 +4602,29 @@ export function startWebSocketSimulator(opts: {
                 stream: shouldStream,
                 state: savedState,
                 responsesMode: opts.responsesMode,
+                signal: controller.signal,
                 initialUserMessage,
                 onStateUpdate: (state) => {
                   if (isAborted()) return;
+                  const nextStateWithSource = applyUserMessageRefSource(
+                    savedState,
+                    state,
+                    "manual",
+                  );
                   const nextMeta = {
                     ...workspaceMeta,
-                    ...(state.meta ?? {}),
+                    ...(nextStateWithSource.meta ?? {}),
                     testBot: true,
                     testBotRunId: runId,
                     testBotConfigPath: botConfigPath,
                     testBotName,
-                    ...(sessionId ? { sessionId } : {}),
+                    scenarioRunId: runId,
+                    selectedScenarioDeckId: testBotName,
+                    scenarioConfigPath: botConfigPath,
+                    ...(workspaceId ? { workspaceId } : {}),
                   };
                   const enriched = persistSessionState({
-                    ...state,
+                    ...nextStateWithSource,
                     meta: nextMeta,
                     traces: capturedTraces,
                   });
@@ -3728,7 +4633,7 @@ export function startWebSocketSimulator(opts: {
                   appendFromState(enriched);
                 },
                 onStreamText: (chunk) =>
-                  broadcastTestBot({
+                  emitTestBot({
                     type: "testBotStream",
                     runId,
                     role: "assistant",
@@ -3739,7 +4644,7 @@ export function startWebSocketSimulator(opts: {
               });
               if (isAborted()) return result;
               if (shouldStream) {
-                broadcastTestBot({
+                emitTestBot({
                   type: "testBotStreamEnd",
                   runId,
                   role: "assistant",
@@ -3777,8 +4682,16 @@ export function startWebSocketSimulator(opts: {
               run.status = "completed";
             }
           } catch (err) {
-            run.status = "error";
-            run.error = err instanceof Error ? err.message : String(err);
+            if (isAborted() || isRunCanceledError(err)) {
+              run.status = "canceled";
+              run.error = undefined;
+            } else {
+              run.status = "error";
+              run.error = err instanceof Error ? err.message : String(err);
+              logger.warn(
+                `[sim] build bot run failed (workspaceId=${workspaceId}): ${run.error}`,
+              );
+            }
           } finally {
             if (savedState) {
               syncTestBotRunFromState(run, savedState);
@@ -3786,7 +4699,7 @@ export function startWebSocketSimulator(opts: {
             run.finishedAt = new Date().toISOString();
             entry.abort = null;
             entry.promise = null;
-            broadcastTestBot({ type: "testBotStatus", run });
+            emitTestBot({ type: "testBotStatus", run });
           }
         })();
         // 5) Return the current run snapshot to the caller.
@@ -3796,95 +4709,6 @@ export function startWebSocketSimulator(opts: {
         );
       }
 
-      if (url.pathname === "/api/test/status") {
-        const runId = url.searchParams.get("runId") ?? undefined;
-        const sessionId = url.searchParams.get("sessionId") ?? undefined;
-        if (sessionId) {
-          await activateWorkspaceDeck(sessionId);
-        }
-        let entry = runId ? testBotRuns.get(runId) : undefined;
-        if (!entry && sessionId) {
-          for (const candidate of testBotRuns.values()) {
-            if (candidate.run.sessionId === sessionId) {
-              entry = candidate;
-              break;
-            }
-          }
-        }
-        const run = entry?.run ?? {
-          id: runId ?? "",
-          status: "idle",
-          messages: [],
-          traces: [],
-          toolInserts: [],
-          sessionId,
-        };
-        if (!entry && sessionId) {
-          const state = readSessionState(sessionId, { withTraces: true });
-          if (state) {
-            run.id = typeof state.runId === "string" ? state.runId : run.id;
-            run.status = "completed";
-            syncTestBotRunFromState(run, state);
-          }
-        }
-        if (run.sessionId) {
-          const state = readSessionState(run.sessionId, { withTraces: true });
-          if (state) {
-            syncTestBotRunFromState(run, state);
-          }
-        }
-        await deckLoadPromise.catch(() => null);
-        const requestedDeck = url.searchParams.get("deckPath");
-        const selection = requestedDeck
-          ? resolveTestDeck(requestedDeck)
-          : availableTestDecks[0];
-        if (requestedDeck && !selection) {
-          return new Response(
-            JSON.stringify({
-              error: "Unknown test deck selection",
-            }),
-            {
-              status: 400,
-              headers: { "content-type": "application/json" },
-            },
-          );
-        }
-        if (selection) {
-          const schemaDesc = await describeDeckInputSchemaFromPath(
-            selection.path,
-          );
-          return new Response(
-            JSON.stringify({
-              run,
-              botPath: selection.path,
-              botLabel: selection.label,
-              botDescription: selection.description,
-              selectedDeckId: selection.id,
-              inputSchema: schemaDesc.schema,
-              inputSchemaError: schemaDesc.error,
-              defaults: { input: schemaDesc.defaults },
-              testDecks: availableTestDecks,
-            }),
-            { headers: { "content-type": "application/json" } },
-          );
-        }
-
-        return new Response(
-          JSON.stringify({
-            run,
-            botPath: null,
-            botLabel: null,
-            botDescription: null,
-            selectedDeckId: null,
-            inputSchema: null,
-            inputSchemaError: null,
-            defaults: {},
-            testDecks: availableTestDecks,
-          }),
-          { headers: { "content-type": "application/json" } },
-        );
-      }
-
       if (url.pathname === "/api/test/stop") {
         if (req.method !== "POST") {
           return new Response("Method not allowed", { status: 405 });
@@ -3901,6 +4725,11 @@ export function startWebSocketSimulator(opts: {
         if (entry?.abort) {
           entry.abort.abort();
         }
+        if (entry?.run?.status === "running") {
+          entry.run.status = "canceled";
+          entry.run.finishedAt = entry.run.finishedAt ??
+            new Date().toISOString();
+        }
         return new Response(
           JSON.stringify({
             stopped: wasRunning,
@@ -3916,32 +4745,6 @@ export function startWebSocketSimulator(opts: {
         );
       }
 
-      if (url.pathname === "/api/build/status") {
-        if (req.method !== "GET") {
-          return new Response("Method not allowed", { status: 405 });
-        }
-        const workspaceId = url.searchParams.get("workspaceId") ??
-          url.searchParams.get("runId") ??
-          activeWorkspaceId ??
-          undefined;
-        const entry = workspaceId ? buildBotRuns.get(workspaceId) : undefined;
-        const workspaceState = workspaceId
-          ? readSessionState(workspaceId, { withTraces: true })
-          : undefined;
-        const run = workspaceId
-          ? entry?.run ?? buildRunFromWorkspace(workspaceId, workspaceState)
-          : {
-            id: "",
-            status: "idle",
-            messages: [],
-            traces: [],
-            toolInserts: [],
-          };
-        return new Response(JSON.stringify({ run }), {
-          headers: { "content-type": "application/json" },
-        });
-      }
-
       if (url.pathname === "/api/build/reset") {
         if (req.method !== "POST") {
           return new Response("Method not allowed", { status: 405 });
@@ -3952,12 +4755,7 @@ export function startWebSocketSimulator(opts: {
             runId?: string;
             workspaceId?: string;
           };
-          if (typeof body.workspaceId === "string") {
-            workspaceId = body.workspaceId;
-          }
-          if (typeof body.runId === "string" && !workspaceId) {
-            workspaceId = body.runId;
-          }
+          workspaceId = getWorkspaceIdFromBody(body);
         } catch {
           // ignore
         }
@@ -4000,65 +4798,130 @@ export function startWebSocketSimulator(opts: {
             traces: [],
             toolInserts: [],
           },
-        });
+        }, workspaceId);
         return new Response(JSON.stringify({ reset: true }), {
           headers: { "content-type": "application/json" },
         });
       }
 
-      if (url.pathname === "/api/build/message") {
+      if (url.pathname === "/api/build/stop") {
         if (req.method !== "POST") {
           return new Response("Method not allowed", { status: 405 });
         }
-        let payload: {
-          runId?: unknown;
-          workspaceId?: unknown;
-          message?: unknown;
-          model?: unknown;
-          modelForce?: unknown;
-        } = {};
+        let workspaceId: string | undefined = undefined;
         try {
-          payload = await req.json();
+          const body = await req.json() as {
+            runId?: string;
+            workspaceId?: string;
+          };
+          workspaceId = getWorkspaceIdFromBody(body);
         } catch {
           // ignore
         }
-        let workspaceId = typeof payload.workspaceId === "string"
-          ? payload.workspaceId
-          : typeof payload.runId === "string"
-          ? payload.runId
-          : activeWorkspaceId ?? undefined;
         if (!workspaceId) {
-          const created = await createWorkspaceSession();
-          workspaceId = created.id;
-        }
-        const message = typeof payload.message === "string"
-          ? payload.message
-          : "";
-
-        const workspaceRecord = resolveWorkspaceRecord(workspaceId) ?? {
-          id: workspaceId,
-          rootDir: path.dirname(resolvedDeckPath),
-          rootDeckPath: resolvedDeckPath,
-          createdAt: new Date().toISOString(),
-        };
-        if (!resolveWorkspaceRecord(workspaceId)) {
-          registerWorkspace(workspaceRecord);
-        }
-
-        const existingEntry = buildBotRuns.get(workspaceId);
-        if (existingEntry?.promise) {
           return new Response(
-            JSON.stringify({ error: "Run already in progress" }),
-            { status: 409, headers: { "content-type": "application/json" } },
+            JSON.stringify({ error: "Missing workspaceId" }),
+            { status: 400, headers: { "content-type": "application/json" } },
           );
         }
-
-        const entry = existingEntry ?? {
-          run: {
-            id: workspaceId,
-            status: "idle",
-            messages: [],
-            traces: [],
+        const entry = buildBotRuns.get(workspaceId);
+        const wasRunning = Boolean(entry?.promise);
+        if (entry?.abort) {
+          entry.abort.abort();
+        }
+        if (entry?.run?.status === "running") {
+          entry.run.status = "canceled";
+          entry.run.finishedAt = entry.run.finishedAt ??
+            new Date().toISOString();
+        }
+        if (entry?.run) {
+          const state = readSessionState(workspaceId);
+          if (state) {
+            persistSessionState({
+              ...state,
+              meta: {
+                ...(state.meta ?? {}),
+                buildStatus: entry.run.status,
+                buildFinishedAt: entry.run.finishedAt,
+                buildError: entry.run.error,
+              },
+            });
+          }
+        }
+        const run = entry?.run ?? {
+          id: workspaceId,
+          status: "idle",
+          messages: [],
+          traces: [],
+          toolInserts: [],
+        };
+        broadcastBuildBot(
+          { type: "buildBotStatus", run, state: entry?.state ?? undefined },
+          workspaceId,
+        );
+        return new Response(
+          JSON.stringify({
+            stopped: wasRunning,
+            run,
+          }),
+          { headers: { "content-type": "application/json" } },
+        );
+      }
+
+      if (url.pathname === "/api/build/message") {
+        if (req.method !== "POST") {
+          return new Response("Method not allowed", { status: 405 });
+        }
+        let payload: {
+          runId?: unknown;
+          workspaceId?: unknown;
+          message?: unknown;
+          model?: unknown;
+          modelForce?: unknown;
+        } = {};
+        try {
+          payload = await req.json();
+        } catch {
+          // ignore
+        }
+        let workspaceId = typeof payload.workspaceId === "string"
+          ? payload.workspaceId
+          : typeof payload.runId === "string"
+          ? payload.runId
+          : undefined;
+        if (!workspaceId) {
+          const created = await createWorkspaceSession();
+          workspaceId = created.id;
+        }
+        await logWorkspaceBotRoot("/api/build/message", workspaceId);
+        const message = typeof payload.message === "string"
+          ? payload.message
+          : "";
+
+        const workspaceRecord = resolveWorkspaceRecord(workspaceId) ?? {
+          id: workspaceId,
+          rootDir: path.dirname(resolvedDeckPath),
+          rootDeckPath: resolvedDeckPath,
+          createdAt: new Date().toISOString(),
+        };
+        if (!resolveWorkspaceRecord(workspaceId)) {
+          registerWorkspace(workspaceRecord);
+        }
+
+        const existingEntry = buildBotRuns.get(workspaceId);
+        if (existingEntry?.promise) {
+          return new Response(
+            JSON.stringify({ error: "Run already in progress" }),
+            { status: 409, headers: { "content-type": "application/json" } },
+          );
+        }
+
+        const entry = existingEntry ?? {
+          run: {
+            id: workspaceId,
+            status: "idle",
+            messages: [],
+            traces: [],
             toolInserts: [],
           },
           state: null,
@@ -4068,12 +4931,9 @@ export function startWebSocketSimulator(opts: {
         buildBotRuns.set(workspaceId, entry);
 
         if (!entry.state) {
-          const workspaceState = readSessionState(workspaceId, {
-            withTraces: true,
-          });
-          const buildChat = extractBuildChatState(workspaceState);
-          if (buildChat) {
-            entry.state = buildChat;
+          const projection = readBuildState(workspaceId);
+          if (projection?.state) {
+            entry.state = projection.state;
           }
         }
 
@@ -4084,7 +4944,11 @@ export function startWebSocketSimulator(opts: {
         if (entry.state) {
           syncBuildBotRunFromState(run, entry.state);
         }
-        broadcastBuildBot({ type: "buildBotStatus", run });
+        broadcastBuildBot({
+          type: "buildBotStatus",
+          run,
+          state: entry.state ?? undefined,
+        }, workspaceId);
         const workspaceBaseState = readSessionState(workspaceId) ?? {
           runId: workspaceId,
           messages: [],
@@ -4113,7 +4977,7 @@ export function startWebSocketSimulator(opts: {
         if (botDeckUrl.protocol !== "file:") {
           run.status = "error";
           run.error = "Unable to resolve Gambit Bot deck path";
-          broadcastBuildBot({ type: "buildBotStatus", run });
+          broadcastBuildBot({ type: "buildBotStatus", run }, workspaceId);
           const state = readSessionState(workspaceId);
           if (state) {
             persistSessionState({
@@ -4140,7 +5004,7 @@ export function startWebSocketSimulator(opts: {
           const msg = err instanceof Error ? err.message : String(err);
           run.status = "error";
           run.error = msg;
-          broadcastBuildBot({ type: "buildBotStatus", run });
+          broadcastBuildBot({ type: "buildBotStatus", run }, workspaceId);
           const state = readSessionState(workspaceId);
           if (state) {
             persistSessionState({
@@ -4169,12 +5033,20 @@ export function startWebSocketSimulator(opts: {
           const stamped = event.ts ? event : { ...event, ts: Date.now() };
           capturedTraces.push(stamped);
           consoleTracer?.(stamped);
+          broadcastBuildBot({
+            type: "buildBotTrace",
+            runId: workspaceId,
+            event: stamped,
+          }, workspaceId);
         };
 
         const appendFromState = (state: SavedState) => {
           syncBuildBotRunFromState(run, state);
           run.traces = Array.isArray(state.traces) ? [...state.traces] : [];
-          broadcastBuildBot({ type: "buildBotStatus", run });
+          broadcastBuildBot(
+            { type: "buildBotStatus", run, state },
+            workspaceId,
+          );
           const base = readSessionState(workspaceId) ?? {
             runId: workspaceId,
             messages: [],
@@ -4188,7 +5060,6 @@ export function startWebSocketSimulator(opts: {
               buildStartedAt: run.startedAt,
               buildFinishedAt: run.finishedAt,
               buildError: run.error,
-              buildChat: state,
             },
           });
         };
@@ -4217,6 +5088,7 @@ export function startWebSocketSimulator(opts: {
                 stream: shouldStream,
                 state: entry.state ?? undefined,
                 responsesMode: opts.responsesMode,
+                signal: controller.signal,
                 initialUserMessage,
                 onStateUpdate: (state) => {
                   if (isAborted()) return;
@@ -4235,7 +5107,7 @@ export function startWebSocketSimulator(opts: {
                     chunk,
                     turn,
                     ts: Date.now(),
-                  }),
+                  }, workspaceId),
               });
               if (shouldStream) {
                 broadcastBuildBot({
@@ -4244,7 +5116,7 @@ export function startWebSocketSimulator(opts: {
                   role: "assistant",
                   turn,
                   ts: Date.now(),
-                });
+                }, workspaceId);
               }
               return result;
             };
@@ -4268,8 +5140,16 @@ export function startWebSocketSimulator(opts: {
               run.status = "completed";
             }
           } catch (err) {
-            run.status = "error";
-            run.error = err instanceof Error ? err.message : String(err);
+            if (isAborted() || isRunCanceledError(err)) {
+              run.status = "canceled";
+              run.error = undefined;
+            } else {
+              run.status = "error";
+              run.error = err instanceof Error ? err.message : String(err);
+              logger.warn(
+                `[sim] build bot run failed (workspaceId=${workspaceId}): ${run.error}`,
+              );
+            }
           } finally {
             run.finishedAt = new Date().toISOString();
             entry.abort = null;
@@ -4279,7 +5159,6 @@ export function startWebSocketSimulator(opts: {
               messages: [],
               meta: {},
             };
-            const buildChatState = entry.state ?? extractBuildChatState(base);
             persistSessionState({
               ...base,
               meta: {
@@ -4288,7 +5167,6 @@ export function startWebSocketSimulator(opts: {
                 buildStartedAt: run.startedAt,
                 buildFinishedAt: run.finishedAt,
                 buildError: run.error,
-                buildChat: buildChatState ?? undefined,
               },
             });
             try {
@@ -4300,7 +5178,10 @@ export function startWebSocketSimulator(opts: {
                 }`,
               );
             }
-            broadcastBuildBot({ type: "buildBotStatus", run });
+            broadcastBuildBot(
+              { type: "buildBotStatus", run, state: entry.state ?? undefined },
+              workspaceId,
+            );
             if (prevBotRoot === undefined) {
               try {
                 Deno.env.delete("GAMBIT_BOT_ROOT");
@@ -4323,9 +5204,8 @@ export function startWebSocketSimulator(opts: {
           return new Response("Method not allowed", { status: 405 });
         }
         try {
-          const workspaceId = url.searchParams.get("workspaceId") ??
-            activeWorkspaceId ??
-            undefined;
+          const workspaceId = getWorkspaceIdFromQuery(url);
+          await logWorkspaceBotRoot("/api/build/files", workspaceId);
           const root = await resolveBuildBotRoot(workspaceId);
           const entries = await listBuildBotFiles(root);
           return new Response(JSON.stringify({ root, entries }), {
@@ -4344,17 +5224,22 @@ export function startWebSocketSimulator(opts: {
         if (req.method !== "GET") {
           return new Response("Method not allowed", { status: 405 });
         }
+        const workspaceId = getWorkspaceIdFromQuery(url);
+        await logWorkspaceBotRoot("/api/build/file", workspaceId);
         const inputPath = url.searchParams.get("path") ?? "";
         if (!inputPath) {
+          appendServerErrorLog(workspaceId, {
+            endpoint: "/api/build/file",
+            status: 400,
+            message: "Missing path",
+            method: req.method,
+          });
           return new Response(JSON.stringify({ error: "Missing path" }), {
             status: 400,
             headers: { "content-type": "application/json" },
           });
         }
         try {
-          const workspaceId = url.searchParams.get("workspaceId") ??
-            activeWorkspaceId ??
-            undefined;
           const root = await resolveBuildBotRoot(workspaceId);
           const resolved = await resolveBuildBotPath(root, inputPath);
           if (!resolved.stat.isFile) {
@@ -4424,7 +5309,7 @@ export function startWebSocketSimulator(opts: {
           stream?: boolean;
           model?: string;
           modelForce?: string;
-          sessionId?: string;
+          workspaceId?: string;
         } = {};
         try {
           payload = await req.json();
@@ -4436,16 +5321,32 @@ export function startWebSocketSimulator(opts: {
           simulatorCapturedTraces = [];
           simulatorCurrentRunId = undefined;
         }
-        if (payload.sessionId) {
-          const loaded = readSessionState(payload.sessionId, {
-            withTraces: true,
-          });
-          if (loaded) {
-            simulatorSavedState = loaded;
-            simulatorCapturedTraces = Array.isArray(loaded.traces)
-              ? cloneTraces(loaded.traces)
-              : [];
+        if (payload.workspaceId) {
+          let loaded: SavedState | undefined;
+          try {
+            loaded = readSessionStateStrict(payload.workspaceId, {
+              withTraces: true,
+            });
+          } catch (err) {
+            const message = err instanceof Error ? err.message : String(err);
+            emitSimulator({ type: "error", message });
+            return new Response(
+              JSON.stringify({ error: message }),
+              { status: 400, headers: { "content-type": "application/json" } },
+            );
+          }
+          if (!loaded) {
+            const message = "Workspace not found";
+            emitSimulator({ type: "error", message });
+            return new Response(
+              JSON.stringify({ error: message }),
+              { status: 404, headers: { "content-type": "application/json" } },
+            );
           }
+          simulatorSavedState = loaded;
+          simulatorCapturedTraces = Array.isArray(loaded.traces)
+            ? cloneTraces(loaded.traces)
+            : [];
         }
         simulatorCurrentRunId = undefined;
         const stream = payload.stream ?? true;
@@ -4554,7 +5455,7 @@ export function startWebSocketSimulator(opts: {
           return new Response(
             JSON.stringify({
               runId: simulatorCurrentRunId,
-              sessionId: simulatorSavedState?.meta?.sessionId,
+              workspaceId: simulatorSavedState?.meta?.workspaceId,
             }),
             { headers: { "content-type": "application/json" } },
           );
@@ -4580,13 +5481,15 @@ export function startWebSocketSimulator(opts: {
         }
         try {
           const body = await req.json() as {
-            sessionId?: string;
+            workspaceId?: string;
+            runId?: string;
             messageRefId?: string;
             score?: number | null;
             reason?: string;
           };
-          if (!body.sessionId) {
-            throw new Error("Missing sessionId");
+          const workspaceId = getWorkspaceIdFromBody(body);
+          if (!workspaceId) {
+            throw new Error("Missing workspaceId");
           }
           if (!body.messageRefId) {
             throw new Error("Missing messageRefId");
@@ -4597,8 +5500,33 @@ export function startWebSocketSimulator(opts: {
           ) {
             throw new Error("Invalid score");
           }
-          const state = readSessionState(body.sessionId, { withTraces: true });
-          if (!state) throw new Error("Session not found");
+          let state: SavedState | undefined;
+          try {
+            state = readSessionStateStrict(workspaceId, { withTraces: true });
+          } catch (err) {
+            throw new Error(
+              err instanceof Error ? err.message : String(err),
+            );
+          }
+          if (!state) throw new Error("Workspace not found");
+          const requestedRunId = typeof body.runId === "string" &&
+              body.runId.trim().length > 0
+            ? body.runId.trim()
+            : undefined;
+          const feedbackEligible = isFeedbackEligibleMessageRef(
+            state,
+            body.messageRefId,
+          ) ||
+            (requestedRunId
+              ? isFeedbackEligiblePersistedTestRunMessageRef(
+                state,
+                requestedRunId,
+                body.messageRefId,
+              )
+              : false);
+          if (!feedbackEligible) {
+            throw new Error("Feedback target is not eligible");
+          }
           simulatorSavedState = state;
           simulatorCapturedTraces = Array.isArray(state.traces)
             ? cloneTraces(state.traces)
@@ -4620,7 +5548,11 @@ export function startWebSocketSimulator(opts: {
             const reason = typeof body.reason === "string"
               ? body.reason
               : undefined;
-            const runId = typeof state.runId === "string" ? state.runId : "run";
+            const runId = requestedRunId ??
+              (typeof state.runId === "string" ? state.runId : "run");
+            const scenarioRunId = typeof state.meta?.scenarioRunId === "string"
+              ? state.meta.scenarioRunId
+              : runId;
             const now = new Date().toISOString();
             entry = idx >= 0
               ? {
@@ -4637,6 +5569,10 @@ export function startWebSocketSimulator(opts: {
                 reason,
                 createdAt: now,
               };
+            if (entry) {
+              (entry as Record<string, unknown>).workspaceId = workspaceId;
+              (entry as Record<string, unknown>).scenarioRunId = scenarioRunId;
+            }
             feedback = idx >= 0
               ? existing.map((f, i) => i === idx ? entry! : f)
               : [...existing, entry];
@@ -4652,6 +5588,18 @@ export function startWebSocketSimulator(opts: {
             feedback: entry,
             deleted,
           });
+          appendSessionEvent(enriched, {
+            type: "feedback.update",
+            kind: "artifact",
+            category: "feedback",
+            workspaceId,
+            scenarioRunId: typeof enriched.meta?.scenarioRunId === "string"
+              ? enriched.meta.scenarioRunId
+              : enriched.runId,
+            messageRefId: body.messageRefId,
+            feedback: entry,
+            deleted,
+          });
           simulatorSavedState = enriched;
           emitSimulator({ type: "state", state: enriched });
           return new Response(
@@ -4674,14 +5622,23 @@ export function startWebSocketSimulator(opts: {
         }
         try {
           const body = await req.json() as {
-            sessionId?: string;
+            workspaceId?: string;
+            runId?: string;
             text?: string;
           };
-          if (!body.sessionId) {
-            throw new Error("Missing sessionId");
+          const workspaceId = getWorkspaceIdFromBody(body);
+          if (!workspaceId) {
+            throw new Error("Missing workspaceId");
+          }
+          let state: SavedState | undefined;
+          try {
+            state = readSessionStateStrict(workspaceId, { withTraces: true });
+          } catch (err) {
+            throw new Error(
+              err instanceof Error ? err.message : String(err),
+            );
           }
-          const state = readSessionState(body.sessionId, { withTraces: true });
-          if (!state) throw new Error("Session not found");
+          if (!state) throw new Error("Workspace not found");
           simulatorSavedState = state;
           simulatorCapturedTraces = Array.isArray(state.traces)
             ? cloneTraces(state.traces)
@@ -4692,6 +5649,13 @@ export function startWebSocketSimulator(opts: {
             notes: { text: body.text ?? "", updatedAt: now },
             traces: simulatorCapturedTraces,
           });
+          appendSessionEvent(enriched, {
+            type: "notes.update",
+            kind: "artifact",
+            category: "notes",
+            workspaceId,
+            notes: enriched.notes,
+          });
           simulatorSavedState = enriched;
           emitSimulator({ type: "state", state: enriched });
           return new Response(
@@ -4714,17 +5678,26 @@ export function startWebSocketSimulator(opts: {
         }
         try {
           const body = await req.json() as {
-            sessionId?: string;
+            workspaceId?: string;
+            runId?: string;
             score?: number;
           };
-          if (!body.sessionId) {
-            throw new Error("Missing sessionId");
+          const workspaceId = getWorkspaceIdFromBody(body);
+          if (!workspaceId) {
+            throw new Error("Missing workspaceId");
           }
           if (typeof body.score !== "number" || Number.isNaN(body.score)) {
             throw new Error("Invalid score");
           }
-          const state = readSessionState(body.sessionId, { withTraces: true });
-          if (!state) throw new Error("Session not found");
+          let state: SavedState | undefined;
+          try {
+            state = readSessionStateStrict(workspaceId, { withTraces: true });
+          } catch (err) {
+            throw new Error(
+              err instanceof Error ? err.message : String(err),
+            );
+          }
+          if (!state) throw new Error("Workspace not found");
           simulatorSavedState = state;
           simulatorCapturedTraces = Array.isArray(state.traces)
             ? cloneTraces(state.traces)
@@ -4736,6 +5709,13 @@ export function startWebSocketSimulator(opts: {
             conversationScore: { score: clamped, updatedAt: now },
             traces: simulatorCapturedTraces,
           });
+          appendSessionEvent(enriched, {
+            type: "conversation.score.update",
+            kind: "artifact",
+            category: "score",
+            workspaceId,
+            conversationScore: enriched.conversationScore,
+          });
           simulatorSavedState = enriched;
           emitSimulator({ type: "state", state: enriched });
           return new Response(
@@ -4760,13 +5740,24 @@ export function startWebSocketSimulator(opts: {
           return new Response("Method not allowed", { status: 405 });
         }
         try {
-          const body = await req.json() as { sessionId?: string };
-          if (!body.sessionId) {
-            throw new Error("Missing sessionId");
+          const body = await req.json() as {
+            workspaceId?: string;
+            runId?: string;
+          };
+          const workspaceId = getWorkspaceIdFromBody(body);
+          if (!workspaceId) {
+            throw new Error("Missing workspaceId");
+          }
+          let state: SavedState | undefined;
+          try {
+            state = readSessionStateStrict(workspaceId, { withTraces: true });
+          } catch (err) {
+            throw new Error(
+              err instanceof Error ? err.message : String(err),
+            );
           }
-          const state = readSessionState(body.sessionId, { withTraces: true });
           if (!state) {
-            throw new Error("Session not found");
+            throw new Error("Workspace not found");
           }
           simulatorSavedState = state;
           simulatorCapturedTraces = Array.isArray(state.traces)
@@ -4787,62 +5778,38 @@ export function startWebSocketSimulator(opts: {
         }
       }
 
-      if (url.pathname === "/api/session") {
-        if (req.method !== "GET") {
-          return new Response("Method not allowed", { status: 405 });
-        }
-        const sessionId = url.searchParams.get("sessionId");
-        if (!sessionId) {
-          return new Response(
-            JSON.stringify({ error: "Missing sessionId" }),
-            { status: 400, headers: { "content-type": "application/json" } },
-          );
-        }
-        const state = readSessionState(sessionId, { withTraces: true });
-        if (!state) {
-          return new Response(
-            JSON.stringify({ error: "Session not found" }),
-            { status: 404, headers: { "content-type": "application/json" } },
-          );
-        }
-        return new Response(
-          JSON.stringify({
-            sessionId,
-            messages: state.messages,
-            messageRefs: state.messageRefs,
-            feedback: state.feedback,
-            traces: state.traces,
-            notes: state.notes,
-            meta: state.meta,
-          }),
-          { headers: { "content-type": "application/json" } },
-        );
-      }
-
       if (url.pathname === "/api/session/notes") {
         if (req.method !== "POST") {
           return new Response("Method not allowed", { status: 405 });
         }
         try {
           const body = await req.json() as {
-            sessionId?: string;
+            workspaceId?: string;
             text?: string;
           };
-          if (!body.sessionId) {
-            throw new Error("Missing sessionId");
+          const workspaceId = getWorkspaceIdFromBody(body);
+          if (!workspaceId) {
+            throw new Error("Missing workspaceId");
           }
-          const state = readSessionState(body.sessionId);
+          const state = readSessionState(workspaceId);
           if (!state) {
-            throw new Error("Session not found");
+            throw new Error("Workspace not found");
           }
           const now = new Date().toISOString();
           const nextState = persistSessionState({
             ...state,
             notes: { text: body.text ?? "", updatedAt: now },
           });
+          appendSessionEvent(nextState, {
+            type: "notes.update",
+            kind: "artifact",
+            category: "notes",
+            workspaceId,
+            notes: nextState.notes,
+          });
           return new Response(
             JSON.stringify({
-              sessionId: body.sessionId,
+              workspaceId,
               notes: nextState.notes,
               saved: true,
             }),
@@ -4864,13 +5831,15 @@ export function startWebSocketSimulator(opts: {
         }
         try {
           const body = await req.json() as {
-            sessionId?: string;
+            workspaceId?: string;
+            runId?: string;
             messageRefId?: string;
             score?: number | null;
             reason?: string;
           };
-          if (!body.sessionId) {
-            throw new Error("Missing sessionId");
+          const workspaceId = getWorkspaceIdFromBody(body);
+          if (!workspaceId) {
+            throw new Error("Missing workspaceId");
           }
           if (!body.messageRefId) {
             throw new Error("Missing messageRefId");
@@ -4881,9 +5850,27 @@ export function startWebSocketSimulator(opts: {
           ) {
             throw new Error("Invalid score");
           }
-          const state = readSessionState(body.sessionId);
+          const state = readSessionState(workspaceId);
           if (!state) {
-            throw new Error("Session not found");
+            throw new Error("Workspace not found");
+          }
+          const requestedRunId = typeof body.runId === "string" &&
+              body.runId.trim().length > 0
+            ? body.runId.trim()
+            : undefined;
+          const feedbackEligible = isFeedbackEligibleMessageRef(
+            state,
+            body.messageRefId,
+          ) ||
+            (requestedRunId
+              ? isFeedbackEligiblePersistedTestRunMessageRef(
+                state,
+                requestedRunId,
+                body.messageRefId,
+              )
+              : false);
+          if (!feedbackEligible) {
+            throw new Error("Feedback target is not eligible");
           }
           const existing = state.feedback ?? [];
           const idx = existing.findIndex((entry) =>
@@ -4902,9 +5889,12 @@ export function startWebSocketSimulator(opts: {
             const reason = typeof body.reason === "string"
               ? body.reason
               : undefined;
-            const runId = typeof state.runId === "string"
-              ? state.runId
-              : "session";
+            const runId = requestedRunId ??
+              (typeof state.runId === "string" ? state.runId : "session");
+            const scenarioRunId = requestedRunId ??
+              (typeof state.meta?.scenarioRunId === "string"
+                ? state.meta.scenarioRunId
+                : runId);
             const now = new Date().toISOString();
             entry = idx >= 0
               ? {
@@ -4921,6 +5911,10 @@ export function startWebSocketSimulator(opts: {
                 reason,
                 createdAt: now,
               };
+            if (entry) {
+              (entry as Record<string, unknown>).workspaceId = workspaceId;
+              (entry as Record<string, unknown>).scenarioRunId = scenarioRunId;
+            }
             feedback = idx >= 0
               ? existing.map((item, i) => i === idx ? entry! : item)
               : [...existing, entry];
@@ -4935,6 +5929,18 @@ export function startWebSocketSimulator(opts: {
             feedback: entry,
             deleted,
           });
+          appendSessionEvent(nextState, {
+            type: "feedback.update",
+            kind: "artifact",
+            category: "feedback",
+            workspaceId,
+            scenarioRunId: typeof nextState.meta?.scenarioRunId === "string"
+              ? nextState.meta.scenarioRunId
+              : nextState.runId,
+            messageRefId: body.messageRefId,
+            feedback: entry,
+            deleted,
+          });
           const testBotRunId = typeof nextState.meta?.testBotRunId === "string"
             ? nextState.meta.testBotRunId
             : undefined;
@@ -4942,12 +5948,15 @@ export function startWebSocketSimulator(opts: {
             const testEntry = testBotRuns.get(testBotRunId);
             if (testEntry) {
               syncTestBotRunFromState(testEntry.run, nextState);
-              broadcastTestBot({ type: "testBotStatus", run: testEntry.run });
+              broadcastTestBot(
+                { type: "testBotStatus", run: testEntry.run },
+                workspaceId,
+              );
             }
           }
           return new Response(
             JSON.stringify({
-              sessionId: body.sessionId,
+              workspaceId,
               feedback: entry,
               saved: !deleted,
               deleted,
@@ -4969,149 +5978,22 @@ export function startWebSocketSimulator(opts: {
           return new Response("Method not allowed", { status: 405 });
         }
         try {
-          const body = await req.json() as { sessionId?: string };
-          if (!body.sessionId) {
-            throw new Error("Missing sessionId");
+          const body = await req.json() as { workspaceId?: string };
+          const workspaceId = getWorkspaceIdFromBody(body);
+          if (!workspaceId) {
+            throw new Error("Missing workspaceId");
           }
-          const removed = deleteSessionState(body.sessionId);
+          const removed = deleteSessionState(workspaceId);
           if (!removed) {
             return new Response(
-              JSON.stringify({ error: "Session not found" }),
+              JSON.stringify({ error: "Workspace not found" }),
               { status: 404, headers: { "content-type": "application/json" } },
             );
           }
-          return new Response(
-            JSON.stringify({ sessionId: body.sessionId, deleted: true }),
-            { headers: { "content-type": "application/json" } },
-          );
-        } catch (err) {
-          return new Response(
-            JSON.stringify({
-              error: err instanceof Error ? err.message : String(err),
-            }),
-            { status: 400, headers: { "content-type": "application/json" } },
-          );
-        }
-      }
-
-      if (url.pathname === "/api/feedback") {
-        if (req.method !== "GET") {
-          return new Response("Method not allowed", { status: 405 });
-        }
-        const deckPathParam = url.searchParams.get("deckPath");
-        if (!deckPathParam) {
-          return new Response(
-            JSON.stringify({ error: "Missing deckPath" }),
-            { status: 400, headers: { "content-type": "application/json" } },
-          );
-        }
-        const items: Array<Record<string, unknown>> = [];
-        try {
-          for await (const entry of Deno.readDir(sessionsRoot)) {
-            if (!entry.isDirectory) continue;
-            const sessionId = entry.name;
-            const state = readSessionState(sessionId);
-            if (!state) continue;
-            if (state.meta?.deck !== deckPathParam) continue;
-            const feedbackList = Array.isArray(state.feedback)
-              ? state.feedback
-              : [];
-            feedbackList.forEach((fb) => {
-              if (!fb || typeof fb !== "object") return;
-              const messageRefId = (fb as { messageRefId?: string })
-                .messageRefId;
-              if (typeof messageRefId !== "string") return;
-              let messageContent: unknown = undefined;
-              if (
-                Array.isArray(state.messageRefs) &&
-                Array.isArray(state.messages)
-              ) {
-                const idx = state.messageRefs.findIndex((ref) =>
-                  ref?.id === messageRefId
-                );
-                if (idx >= 0) {
-                  messageContent = state.messages[idx]?.content;
-                }
-              }
-              items.push({
-                sessionId,
-                deck: state.meta?.deck,
-                sessionCreatedAt: state.meta?.sessionCreatedAt,
-                messageRefId,
-                score: (fb as { score?: number }).score,
-                reason: (fb as { reason?: string }).reason,
-                createdAt: (fb as { createdAt?: string }).createdAt,
-                archivedAt: (fb as { archivedAt?: string }).archivedAt,
-                messageContent,
-              });
-            });
-          }
-        } catch (err) {
           return new Response(
             JSON.stringify({
-              error: err instanceof Error ? err.message : String(err),
-            }),
-            { status: 400, headers: { "content-type": "application/json" } },
-          );
-        }
-        items.sort((a, b) => {
-          const aTime = String(a.createdAt ?? "") || "";
-          const bTime = String(b.createdAt ?? "") || "";
-          return bTime.localeCompare(aTime);
-        });
-        return new Response(
-          JSON.stringify({ deckPath: deckPathParam, items }),
-          {
-            headers: { "content-type": "application/json" },
-          },
-        );
-      }
-
-      if (url.pathname === "/api/feedback/archive" && req.method === "POST") {
-        try {
-          const body = await req.json() as {
-            sessionId?: string;
-            messageRefId?: string;
-            archived?: boolean;
-          };
-          if (!body.sessionId || !body.messageRefId) {
-            throw new Error("Missing sessionId or messageRefId");
-          }
-          const state = readSessionState(body.sessionId);
-          if (!state || !Array.isArray(state.feedback)) {
-            throw new Error("Session not found");
-          }
-          const idx = state.feedback.findIndex((fb) =>
-            (fb as { messageRefId?: string }).messageRefId === body.messageRefId
-          );
-          if (idx === -1) throw new Error("Feedback not found");
-          const next = { ...state.feedback[idx] };
-          if (body.archived === false) {
-            delete (next as Record<string, unknown>).archivedAt;
-          } else {
-            (next as Record<string, unknown>).archivedAt = new Date()
-              .toISOString();
-          }
-          const nextFeedback = state.feedback.map((fb, i) =>
-            i === idx ? next : fb
-          );
-          const updated = persistSessionState({
-            ...state,
-            feedback: nextFeedback,
-          });
-          appendFeedbackLog(updated, {
-            type: "feedback.archive",
-            messageRefId: body.messageRefId,
-            archivedAt: (next as { archivedAt?: string }).archivedAt,
-            archived: body.archived !== false,
-          });
-          return new Response(
-            JSON.stringify({
-              sessionId: body.sessionId,
-              messageRefId: body.messageRefId,
-              archivedAt: (next as { archivedAt?: string }).archivedAt,
-              saved: true,
-              feedbackCount: updated.feedback?.length ?? 0,
+              workspaceId,
+              deleted: true,
             }),
             { headers: { "content-type": "application/json" } },
           );
@@ -5125,222 +6007,41 @@ export function startWebSocketSimulator(opts: {
         }
       }
 
-      if (
-        url.pathname === "/" || url.pathname.startsWith("/sessions/") ||
-        url.pathname.startsWith("/simulate") ||
-        url.pathname.startsWith("/debug") ||
-        url.pathname.startsWith("/build") ||
-        url.pathname.startsWith("/editor") ||
-        url.pathname.startsWith("/docs") ||
-        url.pathname.startsWith("/test") ||
-        url.pathname.startsWith("/grade")
-      ) {
-        const hasBundle = await canServeReactBundle();
-        if (!hasBundle) {
-          return new Response(
-            "Simulator UI bundle missing. Run `deno task bundle:sim` (or start with `--bundle`).",
-            { status: 500 },
-          );
-        }
-        await deckLoadPromise.catch(() => null);
-        const resolvedLabel = deckLabel ?? toDeckLabel(resolvedDeckPath);
-        return new Response(
-          simulatorReactHtml(resolvedDeckPath, resolvedLabel, {
-            workspaceId: activeWorkspaceId ?? null,
-            onboarding: activeWorkspaceOnboarding,
-          }),
-          {
-            headers: { "content-type": "text/html; charset=utf-8" },
-          },
-        );
-      }
-
-      if (url.pathname === "/schema") {
-        const sessionId = url.searchParams.get("sessionId") ?? undefined;
-        if (sessionId) {
-          await activateWorkspaceDeck(sessionId);
-        }
-        const desc = await schemaPromise;
-        const deck = await deckLoadPromise.catch(() => null);
-        const modelParams = deck && typeof deck === "object"
-          ? (deck as { modelParams?: Record<string, unknown> }).modelParams
-          : undefined;
-        const startMode = deck &&
-            (deck.startMode === "assistant" || deck.startMode === "user")
-          ? deck.startMode
-          : "assistant";
-        return new Response(
-          JSON.stringify({
-            deck: resolvedDeckPath,
-            startMode,
-            modelParams,
-            ...desc,
-          }),
-          {
-            headers: { "content-type": "application/json; charset=utf-8" },
-          },
-        );
-      }
-
-      if (url.pathname === "/api/deck-source") {
-        if (req.method !== "GET") {
-          return new Response("Method not allowed", { status: 405 });
-        }
-        try {
-          const content = await Deno.readTextFile(resolvedDeckPath);
-          return new Response(
-            JSON.stringify({
-              path: resolvedDeckPath,
-              content,
-            }),
-            { headers: { "content-type": "application/json; charset=utf-8" } },
-          );
-        } catch (err) {
-          const message = err instanceof Error ? err.message : String(err);
-          return new Response(
-            JSON.stringify({
-              path: resolvedDeckPath,
-              error: message,
-            }),
-            {
-              status: 500,
-              headers: { "content-type": "application/json; charset=utf-8" },
-            },
-          );
-        }
-      }
-
-      if (url.pathname === "/ui/bundle.js") {
-        const data = await readReactBundle();
-        if (!data) {
-          return new Response(
-            "Bundle missing. Run `deno task bundle:sim` (or start with `--bundle`).",
-            { status: 404 },
-          );
-        }
-        try {
-          const headers = new Headers({
-            "content-type": "application/javascript; charset=utf-8",
-          });
-          // Hint the browser about the external source map since Deno's bundle
-          // output does not embed a sourceMappingURL comment.
-          if (shouldAdvertiseSourceMap()) {
-            headers.set("SourceMap", "/ui/bundle.js.map");
-          }
-          return new Response(data as unknown as BodyInit, { headers });
-        } catch (err) {
-          return new Response(
-            `Failed to read bundle: ${
-              err instanceof Error ? err.message : String(err)
-            }`,
-            { status: 500 },
-          );
-        }
-      }
-
-      if (url.pathname === "/ui/bundle.js.map") {
-        const data = await readReactBundleSourceMap();
-        if (!data) {
-          return new Response(
-            "Source map missing. Run `deno task bundle:sim:sourcemap` (or start with `--bundle --sourcemap`).",
-            { status: 404 },
-          );
-        }
-        try {
-          return new Response(data as unknown as BodyInit, {
-            headers: {
-              "content-type": "application/json; charset=utf-8",
-            },
-          });
-        } catch (err) {
-          return new Response(
-            `Failed to read source map: ${
-              err instanceof Error ? err.message : String(err)
-            }`,
-            { status: 500 },
-          );
-        }
-      }
-
-      if (url.pathname === "/sessions") {
-        const sessions = listSessions();
-        return new Response(JSON.stringify({ sessions }), {
-          headers: { "content-type": "application/json; charset=utf-8" },
-        });
-      }
-      if (url.pathname === "/api/workspace/new") {
-        if (req.method !== "POST") {
-          return new Response("Method not allowed", { status: 405 });
-        }
-        try {
-          const workspace = await createWorkspaceSession();
-          await activateWorkspaceDeck(workspace.id);
-          return new Response(
-            JSON.stringify({
-              workspaceId: workspace.id,
-              deckPath: workspace.rootDeckPath,
-              workspaceDir: workspace.rootDir,
-              createdAt: workspace.createdAt,
-            }),
-            { headers: { "content-type": "application/json" } },
-          );
-        } catch (err) {
-          return new Response(
-            JSON.stringify({
-              error: err instanceof Error ? err.message : String(err),
-            }),
-            { status: 500, headers: { "content-type": "application/json" } },
-          );
-        }
-      }
-      if (url.pathname === "/api/build/runs") {
-        const runs = listBuildRuns();
-        return new Response(JSON.stringify({ runs }), {
-          headers: { "content-type": "application/json; charset=utf-8" },
-        });
-      }
-      if (url.pathname === "/api/build/load") {
-        if (req.method !== "POST") {
-          return new Response("Method not allowed", { status: 405 });
-        }
-        try {
-          const body = await req.json() as {
-            runId?: string;
-            workspaceId?: string;
-          };
-          const workspaceId = body.workspaceId ?? body.runId ?? "";
-          if (!workspaceId || !isSafeRunId(workspaceId)) {
-            throw new Error("Missing workspaceId");
-          }
-          const state = readSessionState(workspaceId, { withTraces: true });
-          if (!state) {
-            return new Response(
-              JSON.stringify({ error: "Workspace not found" }),
-              { status: 404, headers: { "content-type": "application/json" } },
-            );
-          }
-          const buildChat = extractBuildChatState(state);
-          const run = buildRunFromWorkspace(workspaceId, state);
-          const entry: BuildBotRunEntry = {
-            run,
-            state: buildChat,
-            promise: null,
-            abort: null,
-          };
-          buildBotRuns.set(workspaceId, entry);
-          broadcastBuildBot({ type: "buildBotStatus", run });
-          return new Response(JSON.stringify({ run }), {
-            headers: { "content-type": "application/json" },
-          });
-        } catch (err) {
-          return new Response(
-            JSON.stringify({
-              error: err instanceof Error ? err.message : String(err),
-            }),
-            { status: 400, headers: { "content-type": "application/json" } },
-          );
-        }
-      }
+      const feedbackResponse = await handleFeedbackRoutes({
+        url,
+        req,
+        sessionsRoot,
+        getWorkspaceIdFromBody,
+        readSessionState,
+        persistSessionState,
+        appendFeedbackLog,
+        appendSessionEvent,
+      });
+      if (feedbackResponse) return feedbackResponse;
+
+      const uiRoutesResponse = await handleUiRoutes({
+        url,
+        req,
+        workspaceRouteBase: WORKSPACE_ROUTE_BASE,
+        activeWorkspaceId,
+        activeWorkspaceOnboarding,
+        resolvedDeckPath,
+        deckLabel,
+        getWorkspaceIdFromQuery,
+        activateWorkspaceDeck,
+        schemaPromise,
+        deckLoadPromise,
+        canServeReactBundle,
+        simulatorReactHtml,
+        toDeckLabel,
+        readReactBundle,
+        shouldAdvertiseSourceMap,
+        readReactBundleSourceMap,
+        listSessions,
+        createWorkspaceSession,
+        workspaceStateSchemaVersion: WORKSPACE_STATE_SCHEMA_VERSION,
+      });
+      if (uiRoutesResponse) return uiRoutesResponse;
 
       return new Response("Not found", { status: 404 });
     },
@@ -5496,6 +6197,14 @@ function simulatorReactHtml(
       normalized === "yes" ||
       normalized === "on";
   })();
+  const buildStreamDebugEnabled = (() => {
+    const raw = Deno.env.get("GAMBIT_SIMULATOR_BUILD_STREAM_DEBUG");
+    if (raw === undefined) return false;
+    const normalized = raw.trim().toLowerCase();
+    return normalized === "1" || normalized === "true" ||
+      normalized === "yes" ||
+      normalized === "on";
+  })();
   const bundleStamp = (() => {
     try {
       const stat = Deno.statSync(simulatorBundlePath);
@@ -5538,6 +6247,11 @@ function simulatorReactHtml(
     JSON.stringify(
       workspaceOnboarding,
     )
+  };
+    window.__GAMBIT_BUILD_STREAM_DEBUG__ = ${
+    JSON.stringify(
+      buildStreamDebugEnabled,
+    )
   };
   </script>
   <script type="module" src="${bundleUrl}"></script>
@@ -5582,6 +6296,9 @@ async function runDeckWithFallback(args: {
   stream?: boolean;
   onStreamText?: (chunk: string) => void;
   responsesMode?: boolean;
+  workerSandbox?: boolean;
+  signal?: AbortSignal;
+  onCancel?: () => unknown | Promise<unknown>;
 }): Promise<unknown> {
   try {
     return await runDeck({
@@ -5596,6 +6313,9 @@ async function runDeckWithFallback(args: {
       stream: args.stream,
       onStreamText: args.onStreamText,
       responsesMode: args.responsesMode,
+      workerSandbox: args.workerSandbox,
+      signal: args.signal,
+      onCancel: args.onCancel,
     });
   } catch (error) {
     if (args.input === undefined && shouldRetryWithStringInput(error)) {
@@ -5611,6 +6331,9 @@ async function runDeckWithFallback(args: {
         stream: args.stream,
         onStreamText: args.onStreamText,
         responsesMode: args.responsesMode,
+        workerSandbox: args.workerSandbox,
+        signal: args.signal,
+        onCancel: args.onCancel,
       });
     }
     throw error;
diff --git a/src/server_feedback_grading.test.ts b/src/server_feedback_grading.test.ts
new file mode 100644
index 000000000..292012213
--- /dev/null
+++ b/src/server_feedback_grading.test.ts
@@ -0,0 +1,519 @@
+import { assert, assertEquals } from "@std/assert";
+import * as path from "@std/path";
+import { startWebSocketSimulator } from "./server.ts";
+import type { ModelProvider } from "@bolt-foundry/gambit-core";
+import {
+  modImportPath,
+  readJsonLines,
+  runSimulator,
+} from "./server_test_utils.ts";
+
+Deno.test("simulator appends feedback log entries", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "feedback.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+
+  const port = (server.addr as Deno.NetAddr).port;
+  const result = await runSimulator(port, { input: "hello", stream: false });
+  assert(result.workspaceId, "missing workspaceId");
+
+  const sessionDir = path.join(sessionsDir, result.workspaceId!);
+  const state = JSON.parse(
+    await Deno.readTextFile(path.join(sessionDir, "state.json")),
+  ) as {
+    messages?: Array<{ role?: string }>;
+    messageRefs?: Array<{ id?: string }>;
+  };
+  const assistantRef = (state.messages ?? [])
+    .map((message, index) => ({
+      role: message.role,
+      refId: state.messageRefs?.[index]?.id,
+    }))
+    .find((entry) => entry.role === "assistant" && entry.refId);
+  const messageRefId = assistantRef?.refId;
+  assert(messageRefId, "missing messageRefId");
+
+  const res = await fetch(`http://127.0.0.1:${port}/api/simulator/feedback`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({
+      workspaceId: result.workspaceId,
+      messageRefId,
+      score: 1,
+      reason: "ok",
+    }),
+  });
+  assert(res.ok);
+  await res.json();
+
+  const eventsPath = path.join(sessionDir, "events.jsonl");
+  const entries = await readJsonLines(eventsPath);
+  assert(entries.length > 0, "events.jsonl should have entries");
+  assert(
+    entries.some((entry) =>
+      (entry as { type?: string; data?: { type?: string } }).type ===
+        "session" &&
+      (entry as { data?: { type?: string } }).data?.type === "feedback.update"
+    ),
+  );
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("session feedback rejects non-response message refs", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "feedback-eligibility.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const first = await runSimulator(port, {
+    input: "hello",
+    message: "hello",
+    stream: false,
+  });
+  const workspaceId = first.workspaceId!;
+  const state = JSON.parse(
+    await Deno.readTextFile(path.join(sessionsDir, workspaceId, "state.json")),
+  ) as {
+    messages?: Array<{ role?: string }>;
+    messageRefs?: Array<{ id?: string }>;
+  };
+  const userRef = (state.messages ?? [])
+    .map((message, index) => ({
+      role: message.role,
+      refId: state.messageRefs?.[index]?.id,
+    }))
+    .find((entry) => entry.role === "user" && entry.refId)?.refId;
+  assert(userRef, "expected user ref");
+
+  const res = await fetch(`http://127.0.0.1:${port}/api/session/feedback`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({
+      workspaceId,
+      messageRefId: userRef,
+      score: 1,
+      reason: "should fail",
+    }),
+  });
+  assertEquals(res.status, 400);
+  const body = await res.json() as { error?: string };
+  assert(
+    typeof body.error === "string" &&
+      body.error.includes("not eligible"),
+  );
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("session feedback accepts persisted run message refs when runId is provided", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "feedback-persisted-run.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const workspaceId = "workspace-feedback-persisted";
+  const workspaceDir = path.join(sessionsDir, workspaceId);
+  await Deno.mkdir(workspaceDir, { recursive: true });
+  await Deno.writeTextFile(
+    path.join(workspaceDir, "state.json"),
+    JSON.stringify({
+      runId: "session-current",
+      messages: [{ role: "assistant", content: "latest response" }],
+      messageRefs: [{ id: "msg-current", role: "assistant" }],
+      meta: {
+        workspaceSchemaVersion: "workspace-state.v1",
+        sessionId: workspaceId,
+        workspaceId,
+      },
+    }),
+  );
+  await Deno.writeTextFile(
+    path.join(workspaceDir, "events.jsonl"),
+    JSON.stringify({
+      type: "test",
+      at: new Date().toISOString(),
+      data: {
+        type: "testBotStatus",
+        run: {
+          id: "testbot-older-run",
+          status: "completed",
+          workspaceId,
+          sessionId: workspaceId,
+          messages: [{
+            role: "assistant",
+            content: "older run response",
+            messageRefId: "msg-from-older-run",
+          }],
+          traces: [],
+          toolInserts: [],
+        },
+      },
+    }) + "\n",
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const res = await fetch(`http://127.0.0.1:${port}/api/session/feedback`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({
+      workspaceId,
+      runId: "testbot-older-run",
+      messageRefId: "msg-from-older-run",
+      score: -3,
+      reason: "old run message",
+    }),
+  });
+  assertEquals(res.status, 200);
+  const body = await res.json() as {
+    feedback?: { runId?: string; messageRefId?: string; score?: number };
+  };
+  assertEquals(body.feedback?.runId, "testbot-older-run");
+  assertEquals(body.feedback?.messageRefId, "msg-from-older-run");
+  assertEquals(body.feedback?.score, -3);
+
+  const nextState = JSON.parse(
+    await Deno.readTextFile(path.join(workspaceDir, "state.json")),
+  ) as {
+    feedback?: Array<{ messageRefId?: string; runId?: string }>;
+  };
+  const saved = (nextState.feedback ?? []).find((entry) =>
+    entry.messageRefId === "msg-from-older-run"
+  );
+  assert(saved);
+  assertEquals(saved.runId, "testbot-older-run");
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("session feedback accepts scenario user message refs and rejects manual user refs", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "feedback-scenario-user.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const workspaceId = "workspace-feedback-user-source";
+  const workspaceDir = path.join(sessionsDir, workspaceId);
+  await Deno.mkdir(workspaceDir, { recursive: true });
+  await Deno.writeTextFile(
+    path.join(workspaceDir, "state.json"),
+    JSON.stringify({
+      runId: "session-source",
+      messages: [
+        { role: "user", content: "scenario prompt" },
+        { role: "user", content: "manual prompt" },
+      ],
+      messageRefs: [
+        { id: "msg-scenario-user", role: "user", source: "scenario" },
+        { id: "msg-manual-user", role: "user", source: "manual" },
+      ],
+      meta: {
+        workspaceSchemaVersion: "workspace-state.v1",
+        sessionId: workspaceId,
+        workspaceId,
+      },
+    }),
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const scenarioRes = await fetch(
+    `http://127.0.0.1:${port}/api/session/feedback`,
+    {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        workspaceId,
+        messageRefId: "msg-scenario-user",
+        score: -2,
+        reason: "scenario input quality",
+      }),
+    },
+  );
+  assertEquals(scenarioRes.status, 200);
+  const scenarioBody = await scenarioRes.json() as {
+    feedback?: { messageRefId?: string; score?: number };
+  };
+  assertEquals(scenarioBody.feedback?.messageRefId, "msg-scenario-user");
+  assertEquals(scenarioBody.feedback?.score, -2);
+
+  const manualRes = await fetch(
+    `http://127.0.0.1:${port}/api/session/feedback`,
+    {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        workspaceId,
+        messageRefId: "msg-manual-user",
+        score: -1,
+        reason: "manual input should reject",
+      }),
+    },
+  );
+  assertEquals(manualRes.status, 400);
+  const manualBody = await manualRes.json() as { error?: string };
+  assert(
+    typeof manualBody.error === "string" &&
+      manualBody.error.includes("not eligible"),
+  );
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("grading reference writes are append-only revisions and require messageRefId", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "grading-reference.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const workspaceId = "workspace-grading";
+  const workspaceDir = path.join(sessionsDir, workspaceId);
+  await Deno.mkdir(workspaceDir, { recursive: true });
+  await Deno.writeTextFile(
+    path.join(workspaceDir, "state.json"),
+    JSON.stringify({
+      runId: workspaceId,
+      messages: [{ role: "assistant", content: "assistant output" }],
+      messageRefs: [{ id: "msg-1", role: "assistant" }],
+      meta: {
+        workspaceSchemaVersion: "workspace-state.v1",
+        sessionId: workspaceId,
+        workspaceId,
+        gradingRuns: [{
+          id: "cal-1",
+          graderId: "grader-1",
+          graderPath: deckPath,
+          status: "completed",
+          result: {
+            mode: "turns",
+            totalTurns: 2,
+            turns: [
+              { index: 0, messageRefId: "msg-1", result: { score: 1 } },
+              { index: 1, result: { score: 1 } },
+            ],
+          },
+        }],
+      },
+    }),
+  );
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const writeReference = async () =>
+    await fetch(`http://127.0.0.1:${port}/api/grading/reference`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        workspaceId,
+        runId: "cal-1",
+        turnIndex: 0,
+        referenceSample: { score: 2, reason: "expected", evidence: ["x"] },
+      }),
+    });
+
+  const firstWrite = await writeReference();
+  assertEquals(firstWrite.status, 200);
+  const firstBody = await firstWrite.json() as {
+    run?: { result?: { turns?: Array<Record<string, unknown>> } };
+  };
+  const firstTurn = firstBody.run?.result?.turns?.find((turn) =>
+    turn.index === 0
+  );
+  const firstRevisions = Array.isArray(firstTurn?.referenceSampleRevisions)
+    ? firstTurn?.referenceSampleRevisions as Array<Record<string, unknown>>
+    : [];
+  assertEquals(firstRevisions.length, 1);
+  const firstRevisionId = firstRevisions[0]?.artifactRevisionId;
+  assert(typeof firstRevisionId === "string");
+
+  const secondWrite = await writeReference();
+  assertEquals(secondWrite.status, 200);
+  const secondBody = await secondWrite.json() as {
+    run?: { result?: { turns?: Array<Record<string, unknown>> } };
+  };
+  const secondTurn = secondBody.run?.result?.turns?.find((turn) =>
+    turn.index === 0
+  );
+  const secondRevisions = Array.isArray(secondTurn?.referenceSampleRevisions)
+    ? secondTurn?.referenceSampleRevisions as Array<Record<string, unknown>>
+    : [];
+  assertEquals(secondRevisions.length, 2);
+  assertEquals(secondRevisions[0]?.artifactRevisionId, firstRevisionId);
+  assert(
+    secondTurn?.referenceSample &&
+      typeof (secondTurn.referenceSample as { artifactRevisionId?: unknown })
+          .artifactRevisionId === "string",
+  );
+
+  const missingRefRes = await fetch(
+    `http://127.0.0.1:${port}/api/grading/reference`,
+    {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        workspaceId,
+        runId: "cal-1",
+        turnIndex: 1,
+        referenceSample: { score: 0, reason: "missing ref" },
+      }),
+    },
+  );
+  assertEquals(missingRefRes.status, 400);
+  const missingRefBody = await missingRefRes.json() as { error?: string };
+  assert(
+    typeof missingRefBody.error === "string" &&
+      missingRefBody.error.includes("Missing messageRefId"),
+  );
+
+  await server.shutdown();
+  await server.finished;
+});
diff --git a/src/server_feedback_grading_routes.ts b/src/server_feedback_grading_routes.ts
new file mode 100644
index 000000000..656ce4037
--- /dev/null
+++ b/src/server_feedback_grading_routes.ts
@@ -0,0 +1,418 @@
+import type { SavedState } from "@bolt-foundry/gambit-core";
+import type { GradingRunRecord, SessionMeta } from "./server_types.ts";
+
+type JsonRecord = Record<string, unknown>;
+
+type HandleGradingReferenceRouteDeps = {
+  url: URL;
+  req: Request;
+  getWorkspaceIdFromBody: (body: JsonRecord) => string | undefined;
+  logWorkspaceBotRoot: (
+    endpoint: string,
+    workspaceId?: string,
+  ) => Promise<void>;
+  readSessionState: (sessionId: string) => SavedState | undefined;
+  persistSessionState: (state: SavedState) => SavedState;
+  appendGradingLog: (state: SavedState, payload: JsonRecord) => void;
+  buildSessionMeta: (workspaceId: string, state: SavedState) => SessionMeta;
+  appendDurableStreamEvent: (streamId: string, payload: unknown) => void;
+  workspaceStreamId: string;
+  gradeStreamId: string;
+  parseFiniteInteger: (value: unknown) => number | undefined;
+  randomId: (prefix: string) => string;
+};
+
+export const handleGradingReferenceRoute = async (
+  deps: HandleGradingReferenceRouteDeps,
+): Promise<Response | null> => {
+  const {
+    url,
+    req,
+    getWorkspaceIdFromBody,
+    logWorkspaceBotRoot,
+    readSessionState,
+    persistSessionState,
+    appendGradingLog,
+    buildSessionMeta,
+    appendDurableStreamEvent,
+    workspaceStreamId,
+    gradeStreamId,
+    parseFiniteInteger,
+    randomId,
+  } = deps;
+
+  if (url.pathname !== "/api/grading/reference") return null;
+  if (req.method !== "POST") {
+    return new Response("Method not allowed", { status: 405 });
+  }
+  try {
+    const body = await req.json() as {
+      workspaceId?: string;
+      runId?: string;
+      turnIndex?: number;
+      referenceSample?: {
+        score?: number;
+        reason?: string;
+        evidence?: Array<string>;
+      };
+    };
+    const workspaceId = getWorkspaceIdFromBody(body as JsonRecord);
+    if (!workspaceId) throw new Error("Missing workspaceId");
+    await logWorkspaceBotRoot("/api/grading/reference", workspaceId);
+    if (!body.runId) throw new Error("Missing runId");
+    if (!body.referenceSample) {
+      throw new Error("Missing referenceSample");
+    }
+    const score = body.referenceSample.score;
+    if (typeof score !== "number" || Number.isNaN(score)) {
+      throw new Error("Invalid reference score");
+    }
+    const reason = body.referenceSample.reason;
+    if (typeof reason !== "string" || reason.trim().length === 0) {
+      throw new Error("Missing reference reason");
+    }
+    const evidence = Array.isArray(body.referenceSample.evidence)
+      ? body.referenceSample.evidence.filter((e) =>
+        typeof e === "string" && e.trim().length > 0
+      )
+      : undefined;
+    const state = readSessionState(workspaceId);
+    if (!state) throw new Error("Workspace not found");
+    const previousRuns = Array.isArray(
+        (state.meta as { gradingRuns?: unknown })?.gradingRuns,
+      )
+      ? ((state.meta as { gradingRuns: Array<GradingRunRecord> })
+        .gradingRuns)
+      : Array.isArray(state.meta?.calibrationRuns)
+      ? (state.meta?.calibrationRuns as Array<GradingRunRecord>)
+      : [];
+    const index = previousRuns.findIndex((run) => run.id === body.runId);
+    if (index < 0) throw new Error("Run not found");
+    const run = previousRuns[index];
+    const nextRun: GradingRunRecord = {
+      ...run,
+      id: run.id,
+      workspaceId,
+      gradingRunId: run.id,
+    };
+    const nextOffsetGuess = (parseFiniteInteger(
+      (state.meta as { lastAppliedOffset?: unknown } | undefined)
+        ?.lastAppliedOffset,
+    ) ??
+      parseFiniteInteger(
+        (state.meta as { lastAppliedEventSeq?: unknown } | undefined)
+          ?.lastAppliedEventSeq,
+      ) ??
+      -1) + 1;
+    if (typeof body.turnIndex === "number") {
+      const result = run.result;
+      const turnIndex = body.turnIndex;
+      if (
+        !result || typeof result !== "object" ||
+        (result as { mode?: unknown }).mode !== "turns" ||
+        !Array.isArray((result as { turns?: unknown }).turns)
+      ) {
+        throw new Error("Run does not support turn references");
+      }
+      const turns = (result as {
+        turns: Array<Record<string, unknown>>;
+      }).turns.map((turn) => ({ ...turn }));
+      const targetIndex = turns.findIndex((turn) => turn.index === turnIndex);
+      if (targetIndex < 0) {
+        throw new Error("Turn not found");
+      }
+      const targetTurn = turns[targetIndex];
+      const messageRefId = typeof targetTurn.messageRefId === "string"
+        ? targetTurn.messageRefId
+        : undefined;
+      if (!messageRefId) {
+        throw new Error(
+          "Missing messageRefId for grading turn artifact",
+        );
+      }
+      const artifactRevisionId = randomId("grade-ref-rev");
+      const referenceRevision = {
+        artifactRevisionId,
+        workspaceId,
+        gradingRunId: run.id,
+        turnIndex,
+        messageRefId,
+        offset: nextOffsetGuess,
+        createdAt: new Date().toISOString(),
+        referenceSample: { score, reason, evidence },
+      };
+      const previousRevisions = Array.isArray(
+          targetTurn.referenceSampleRevisions,
+        )
+        ? targetTurn.referenceSampleRevisions as Array<
+          Record<string, unknown>
+        >
+        : [];
+      turns[targetIndex] = {
+        ...targetTurn,
+        workspaceId,
+        gradingRunId: run.id,
+        turnIndex,
+        messageRefId,
+        artifactRevisionId,
+        referenceSample: {
+          score,
+          reason,
+          evidence,
+          artifactRevisionId,
+          workspaceId,
+          gradingRunId: run.id,
+          turnIndex,
+          messageRefId,
+        },
+        referenceSampleRevisions: [
+          ...previousRevisions,
+          referenceRevision,
+        ],
+      };
+      nextRun.result = { ...(result as object), turns };
+    } else {
+      const artifactRevisionId = randomId("grade-ref-rev");
+      const nextRunRecord = nextRun as Record<string, unknown>;
+      const previousRevisions = Array.isArray(
+          nextRunRecord.referenceSampleRevisions,
+        )
+        ? nextRunRecord.referenceSampleRevisions as Array<unknown>
+        : [];
+      (nextRun as Record<string, unknown>).artifactRevisionId =
+        artifactRevisionId;
+      (nextRun as Record<string, unknown>).referenceSampleRevisions = [
+        ...previousRevisions,
+        {
+          artifactRevisionId,
+          workspaceId,
+          gradingRunId: run.id,
+          offset: nextOffsetGuess,
+          createdAt: new Date().toISOString(),
+          referenceSample: { score, reason, evidence },
+        },
+      ];
+      nextRun.referenceSample = {
+        score,
+        reason,
+        evidence,
+        artifactRevisionId,
+        workspaceId,
+        gradingRunId: run.id,
+      };
+    }
+    const nextRuns = previousRuns.map((entry, i) =>
+      i === index ? nextRun : entry
+    );
+    const nextState = persistSessionState({
+      ...state,
+      meta: {
+        ...(state.meta ?? {}),
+        gradingRuns: nextRuns,
+      },
+    });
+    appendGradingLog(nextState, {
+      type: "grading.reference",
+      run: nextRun,
+      runId: body.runId,
+      turnIndex: body.turnIndex,
+      workspaceId,
+    });
+    const sessionMeta = buildSessionMeta(workspaceId, nextState);
+    appendDurableStreamEvent(workspaceStreamId, {
+      type: "calibrateSession",
+      workspaceId,
+      run: nextRun,
+      session: sessionMeta,
+    });
+    appendDurableStreamEvent(gradeStreamId, {
+      type: "calibrateSession",
+      workspaceId,
+      run: nextRun,
+      session: sessionMeta,
+    });
+    return new Response(
+      JSON.stringify({
+        workspaceId,
+        run: nextRun,
+        session: sessionMeta,
+      }),
+      { headers: { "content-type": "application/json" } },
+    );
+  } catch (err) {
+    return new Response(
+      JSON.stringify({
+        error: err instanceof Error ? err.message : String(err),
+      }),
+      { status: 400, headers: { "content-type": "application/json" } },
+    );
+  }
+};
+
+type HandleFeedbackRoutesDeps = {
+  url: URL;
+  req: Request;
+  sessionsRoot: string;
+  getWorkspaceIdFromBody: (body: JsonRecord) => string | undefined;
+  readSessionState: (sessionId: string) => SavedState | undefined;
+  persistSessionState: (state: SavedState) => SavedState;
+  appendFeedbackLog: (state: SavedState, payload: JsonRecord) => void;
+  appendSessionEvent: (state: SavedState, payload: JsonRecord) => void;
+};
+
+export const handleFeedbackRoutes = async (
+  deps: HandleFeedbackRoutesDeps,
+): Promise<Response | null> => {
+  const {
+    url,
+    req,
+    sessionsRoot,
+    getWorkspaceIdFromBody,
+    readSessionState,
+    persistSessionState,
+    appendFeedbackLog,
+    appendSessionEvent,
+  } = deps;
+
+  if (url.pathname === "/api/feedback") {
+    if (req.method !== "GET") {
+      return new Response("Method not allowed", { status: 405 });
+    }
+    const deckPathParam = url.searchParams.get("deckPath");
+    if (!deckPathParam) {
+      return new Response(
+        JSON.stringify({ error: "Missing deckPath" }),
+        { status: 400, headers: { "content-type": "application/json" } },
+      );
+    }
+    const items: Array<Record<string, unknown>> = [];
+    try {
+      for await (const entry of Deno.readDir(sessionsRoot)) {
+        if (!entry.isDirectory) continue;
+        const sessionId = entry.name;
+        const state = readSessionState(sessionId);
+        if (!state) continue;
+        if (state.meta?.deck !== deckPathParam) continue;
+        const feedbackList = Array.isArray(state.feedback)
+          ? state.feedback
+          : [];
+        feedbackList.forEach((fb) => {
+          if (!fb || typeof fb !== "object") return;
+          const messageRefId = (fb as { messageRefId?: string })
+            .messageRefId;
+          if (typeof messageRefId !== "string") return;
+          let messageContent: unknown = undefined;
+          if (
+            Array.isArray(state.messageRefs) &&
+            Array.isArray(state.messages)
+          ) {
+            const idx = state.messageRefs.findIndex((ref) =>
+              ref?.id === messageRefId
+            );
+            if (idx >= 0) {
+              messageContent = state.messages[idx]?.content;
+            }
+          }
+          items.push({
+            workspaceId: sessionId,
+            deck: state.meta?.deck,
+            sessionCreatedAt: state.meta?.sessionCreatedAt,
+            messageRefId,
+            score: (fb as { score?: number }).score,
+            reason: (fb as { reason?: string }).reason,
+            createdAt: (fb as { createdAt?: string }).createdAt,
+            archivedAt: (fb as { archivedAt?: string }).archivedAt,
+            messageContent,
+          });
+        });
+      }
+    } catch (err) {
+      return new Response(
+        JSON.stringify({
+          error: err instanceof Error ? err.message : String(err),
+        }),
+        { status: 400, headers: { "content-type": "application/json" } },
+      );
+    }
+    items.sort((a, b) => {
+      const aTime = String(a.createdAt ?? "") || "";
+      const bTime = String(b.createdAt ?? "") || "";
+      return bTime.localeCompare(aTime);
+    });
+    return new Response(
+      JSON.stringify({ deckPath: deckPathParam, items }),
+      {
+        headers: { "content-type": "application/json" },
+      },
+    );
+  }
+
+  if (url.pathname === "/api/feedback/archive" && req.method === "POST") {
+    try {
+      const body = await req.json() as {
+        workspaceId?: string;
+        runId?: string;
+        messageRefId?: string;
+        archived?: boolean;
+      };
+      const workspaceId = getWorkspaceIdFromBody(body as JsonRecord);
+      if (!workspaceId || !body.messageRefId) {
+        throw new Error("Missing workspaceId or messageRefId");
+      }
+      const state = readSessionState(workspaceId);
+      if (!state || !Array.isArray(state.feedback)) {
+        throw new Error("Workspace not found");
+      }
+      const idx = state.feedback.findIndex((fb) =>
+        (fb as { messageRefId?: string }).messageRefId === body.messageRefId
+      );
+      if (idx === -1) throw new Error("Feedback not found");
+      const next = { ...state.feedback[idx] };
+      if (body.archived === false) {
+        delete (next as Record<string, unknown>).archivedAt;
+      } else {
+        (next as Record<string, unknown>).archivedAt = new Date()
+          .toISOString();
+      }
+      const nextFeedback = state.feedback.map((fb, i) => i === idx ? next : fb);
+      const updated = persistSessionState({
+        ...state,
+        feedback: nextFeedback,
+      });
+      appendFeedbackLog(updated, {
+        type: "feedback.archive",
+        messageRefId: body.messageRefId,
+        archivedAt: (next as { archivedAt?: string }).archivedAt,
+        archived: body.archived !== false,
+      });
+      appendSessionEvent(updated, {
+        type: "feedback.archive",
+        kind: "artifact",
+        category: "feedback",
+        workspaceId,
+        messageRefId: body.messageRefId,
+        archivedAt: (next as { archivedAt?: string }).archivedAt,
+        archived: body.archived !== false,
+      });
+      return new Response(
+        JSON.stringify({
+          workspaceId,
+          messageRefId: body.messageRefId,
+          archivedAt: (next as { archivedAt?: string }).archivedAt,
+          saved: true,
+          feedbackCount: updated.feedback?.length ?? 0,
+        }),
+        { headers: { "content-type": "application/json" } },
+      );
+    } catch (err) {
+      return new Response(
+        JSON.stringify({
+          error: err instanceof Error ? err.message : String(err),
+        }),
+        { status: 400, headers: { "content-type": "application/json" } },
+      );
+    }
+  }
+
+  return null;
+};
diff --git a/src/server_helpers.ts b/src/server_helpers.ts
new file mode 100644
index 000000000..9ab2064ee
--- /dev/null
+++ b/src/server_helpers.ts
@@ -0,0 +1,60 @@
+import * as path from "@std/path";
+
+export function randomId(prefix: string): string {
+  const suffix = crypto.randomUUID().replace(/-/g, "").slice(0, 24);
+  return `${prefix}-${suffix}`;
+}
+
+function isWithinPath(basePath: string, targetPath: string): boolean {
+  const rel = path.relative(basePath, targetPath);
+  return rel === "" || (!rel.startsWith("..") && !path.isAbsolute(rel));
+}
+
+export function assertSafeBuildBotRoot(
+  root: string,
+  gambitBotSourceDir: string,
+): void {
+  if (
+    gambitBotSourceDir &&
+    (isWithinPath(root, gambitBotSourceDir) ||
+      isWithinPath(gambitBotSourceDir, root))
+  ) {
+    throw new Error(
+      `Unsafe build bot root "${root}": overlaps Gambit Bot source directory "${gambitBotSourceDir}"`,
+    );
+  }
+
+  // Protect against writing into a copied/source Gambit Bot workspace even when
+  // the running CLI is sourced from a different install path.
+  const promptPath = path.join(root, "PROMPT.md");
+  try {
+    const prompt = Deno.readTextFileSync(promptPath);
+    const looksLikeGambitBotSourceDeck =
+      prompt.includes('path = "../actions/bot_write/PROMPT.md"') &&
+      prompt.includes('path = "./graders/deck_format_guard/PROMPT.md"');
+    if (looksLikeGambitBotSourceDeck) {
+      throw new Error(
+        `Unsafe build bot root "${root}": appears to be the Gambit Bot source deck directory`,
+      );
+    }
+  } catch (err) {
+    if (
+      err instanceof Deno.errors.NotFound ||
+      err instanceof Deno.errors.IsADirectory
+    ) {
+      return;
+    }
+    throw err;
+  }
+}
+
+export function resolveDefaultValue(raw: unknown): unknown {
+  if (typeof raw === "function") {
+    try {
+      return raw();
+    } catch {
+      return undefined;
+    }
+  }
+  return raw;
+}
diff --git a/src/server_openresponses_shim.test.ts b/src/server_openresponses_shim.test.ts
new file mode 100644
index 000000000..90913c199
--- /dev/null
+++ b/src/server_openresponses_shim.test.ts
@@ -0,0 +1,296 @@
+import { assertEquals, assertStringIncludes } from "@std/assert";
+import * as path from "@std/path";
+import { startWebSocketSimulator } from "./server.ts";
+import type {
+  CreateResponseRequest,
+  ModelProvider,
+} from "@bolt-foundry/gambit-core";
+import { modImportPath } from "./server_test_utils.ts";
+
+Deno.test("serve shim exposes /v1/responses non-stream endpoint", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = path.join(dir, "shim.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  let captured: CreateResponseRequest | undefined;
+  const provider: ModelProvider = {
+    chat() {
+      throw new Error("chat should not be called");
+    },
+    responses(input) {
+      captured = input.request;
+      return Promise.resolve({
+        id: "resp_1",
+        object: "response",
+        status: "completed",
+        output: [{
+          type: "message",
+          role: "assistant",
+          content: [{ type: "output_text", text: "hello" }],
+        }],
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+  try {
+    const res = await fetch(`http://127.0.0.1:${port}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "openrouter/gpt-4o-mini",
+        input: [{
+          type: "message",
+          role: "user",
+          content: "Say hello.",
+        }],
+        tools: [{
+          type: "function",
+          name: "get_weather",
+          description: "Get weather",
+          parameters: {
+            type: "object",
+            properties: { location: { type: "string" } },
+            required: ["location"],
+          },
+        }],
+      }),
+    });
+    assertEquals(res.status, 200);
+    const body = await res.json() as { id?: string; object?: string };
+    assertEquals(body.id, "resp_1");
+    assertEquals(body.object, "response");
+    assertEquals(captured?.model, "openrouter/gpt-4o-mini");
+    assertEquals(captured?.input[0].type, "message");
+    const firstMessage = captured?.input[0];
+    if (firstMessage?.type !== "message") {
+      throw new Error("Expected first input item to be message");
+    }
+    assertEquals(firstMessage.content[0].type, "input_text");
+    assertEquals(firstMessage.content[0].text, "Say hello.");
+    assertEquals(captured?.tools?.[0].type, "function");
+    assertEquals(captured?.tools?.[0].function.name, "get_weather");
+  } finally {
+    await server.shutdown();
+    await server.finished;
+  }
+});
+
+Deno.test("serve shim streams /v1/responses as SSE", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = path.join(dir, "shim-stream.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      throw new Error("chat should not be called");
+    },
+    responses(input) {
+      input.onStreamEvent?.({
+        type: "response.created",
+        response: {
+          id: "resp_stream",
+          object: "response",
+          status: "in_progress",
+          output: [],
+        },
+      });
+      input.onStreamEvent?.({
+        type: "response.output_text.delta",
+        output_index: 0,
+        delta: "he",
+      });
+      input.onStreamEvent?.({
+        type: "response.output_text.done",
+        output_index: 0,
+        text: "hello",
+      });
+      return Promise.resolve({
+        id: "resp_stream",
+        object: "response",
+        status: "completed",
+        output: [{
+          type: "message",
+          role: "assistant",
+          content: [{ type: "output_text", text: "hello" }],
+        }],
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+  try {
+    const res = await fetch(`http://127.0.0.1:${port}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "openrouter/gpt-4o-mini",
+        input: [{ type: "message", role: "user", content: "hello?" }],
+        stream: true,
+      }),
+    });
+    assertEquals(res.status, 200);
+    assertStringIncludes(
+      res.headers.get("content-type") ?? "",
+      "text/event-stream",
+    );
+    const text = await res.text();
+    assertStringIncludes(text, '"type":"response.created"');
+    assertStringIncludes(text, '"type":"response.output_text.delta"');
+    assertStringIncludes(text, '"type":"response.completed"');
+  } finally {
+    await server.shutdown();
+    await server.finished;
+  }
+});
+
+Deno.test("serve shim forwards extended top-level responses fields", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = path.join(dir, "shim-extended.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  let captured: CreateResponseRequest | undefined;
+  const provider: ModelProvider = {
+    chat() {
+      throw new Error("chat should not be called");
+    },
+    responses(input) {
+      captured = input.request;
+      return Promise.resolve({
+        id: "resp_extended",
+        object: "response",
+        status: "completed",
+        output: [{
+          type: "message",
+          role: "assistant",
+          content: [{ type: "output_text", text: "ok" }],
+        }],
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+  try {
+    const res = await fetch(`http://127.0.0.1:${port}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "openrouter/gpt-4o-mini",
+        input: [{ type: "message", role: "user", content: "hello" }],
+        previous_response_id: "resp_prev",
+        store: true,
+        reasoning: { effort: "xhigh", summary: "detailed" },
+        parallel_tool_calls: true,
+        max_tool_calls: 3,
+        temperature: 0.1,
+        top_p: 0.7,
+        frequency_penalty: 0.2,
+        presence_penalty: 0.3,
+        max_output_tokens: 123,
+        include: ["reasoning.encrypted_content"],
+        text: { format: { type: "json_object" }, verbosity: "high" },
+        truncation: "auto",
+        stream_options: { include_obfuscation: false },
+        background: true,
+        service_tier: "flex",
+        top_logprobs: 5,
+        metadata: { source: "shim-test" },
+        safety_identifier: "safe-1",
+        prompt_cache_key: "cache-1",
+      }),
+    });
+    assertEquals(res.status, 200);
+    const body = await res.json() as Record<string, unknown>;
+
+    assertEquals(captured?.previous_response_id, "resp_prev");
+    assertEquals(captured?.store, true);
+    assertEquals(captured?.reasoning?.effort, "xhigh");
+    assertEquals(captured?.reasoning?.summary, "detailed");
+    assertEquals(captured?.parallel_tool_calls, true);
+    assertEquals(captured?.max_tool_calls, 3);
+    assertEquals(captured?.temperature, 0.1);
+    assertEquals(captured?.top_p, 0.7);
+    assertEquals(captured?.frequency_penalty, 0.2);
+    assertEquals(captured?.presence_penalty, 0.3);
+    assertEquals(captured?.max_output_tokens, 123);
+    assertEquals(captured?.include, ["reasoning.encrypted_content"]);
+    assertEquals(captured?.truncation, "auto");
+    assertEquals(captured?.background, true);
+    assertEquals(captured?.service_tier, "flex");
+    assertEquals(captured?.top_logprobs, 5);
+    assertEquals(captured?.metadata, { source: "shim-test" });
+    assertEquals(captured?.safety_identifier, "safe-1");
+    assertEquals(captured?.prompt_cache_key, "cache-1");
+    assertEquals(captured?.stream_options, { include_obfuscation: false });
+
+    assertEquals(body.previous_response_id, "resp_prev");
+    assertEquals(body.parallel_tool_calls, true);
+    assertEquals(body.max_tool_calls, 3);
+    assertEquals(body.max_output_tokens, 123);
+    assertEquals(body.top_p, 0.7);
+    assertEquals(body.frequency_penalty, 0.2);
+    assertEquals(body.presence_penalty, 0.3);
+    assertEquals(body.temperature, 0.1);
+    assertEquals(body.truncation, "auto");
+    assertEquals(body.background, true);
+    assertEquals(body.service_tier, "flex");
+    assertEquals(body.top_logprobs, 5);
+    assertEquals(body.metadata, { source: "shim-test" });
+    assertEquals(body.safety_identifier, "safe-1");
+    assertEquals(body.prompt_cache_key, "cache-1");
+  } finally {
+    await server.shutdown();
+    await server.finished;
+  }
+});
diff --git a/src/server.test.ts b/src/server_routes_state.test.ts
similarity index 64%
rename from src/server.test.ts
rename to src/server_routes_state.test.ts
index 6d835b299..4e15d799a 100644
--- a/src/server.test.ts
+++ b/src/server_routes_state.test.ts
@@ -2,73 +2,33 @@ import { assert, assertEquals } from "@std/assert";
 import * as path from "@std/path";
 import { startWebSocketSimulator } from "./server.ts";
 import type { ModelProvider } from "@bolt-foundry/gambit-core";
-
-function modImportPath() {
-  const here = path.dirname(path.fromFileUrl(import.meta.url));
-  const modPath = path.resolve(here, "..", "mod.ts");
-  return path.toFileUrl(modPath).href;
-}
-
-async function runSimulator(
-  port: number,
-  payload: Record<string, unknown>,
-): Promise<{ runId?: string; sessionId?: string }> {
-  const res = await fetch(`http://127.0.0.1:${port}/api/simulator/run`, {
-    method: "POST",
-    headers: { "content-type": "application/json" },
-    body: JSON.stringify(payload),
-  });
-  const body = await res.json().catch(() => ({}));
-  if (!res.ok) {
-    throw new Error(
-      typeof body?.error === "string" ? body.error : res.statusText,
-    );
-  }
-  return body as { runId?: string; sessionId?: string };
-}
-
-async function readStreamEvents(port: number, offset = 0) {
-  const res = await fetch(
-    `http://127.0.0.1:${port}/api/durable-streams/stream/gambit-simulator?offset=${offset}`,
-  );
-  if (!res.ok) {
-    throw new Error(res.statusText);
-  }
-  const body = await res.json() as {
-    events?: Array<{ offset?: number; data?: unknown }>;
-  };
-  return body.events ?? [];
-}
-
-async function readJsonLines(filePath: string): Promise<Array<unknown>> {
-  const text = await Deno.readTextFile(filePath);
-  return text.split("\n").filter((line) => line.trim().length > 0).map((line) =>
-    JSON.parse(line)
-  );
-}
-
-Deno.test("simulator streams responses", async () => {
+import {
+  modImportPath,
+  readJsonLines,
+  readStreamEvents,
+  runSimulator,
+} from "./server_test_utils.ts";
+
+Deno.test("build bot rejects roots that overlap Gambit Bot source directory", async () => {
   const dir = await Deno.makeTempDir();
   const modHref = modImportPath();
 
-  const deckPath = path.join(dir, "ws.deck.ts");
+  const deckPath = path.join(dir, "build-primary.deck.ts");
   await Deno.writeTextFile(
     deckPath,
     `
     import { defineDeck } from "${modHref}";
     import { z } from "zod";
     export default defineDeck({
-      inputSchema: z.string(),
-      outputSchema: z.string(),
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
       modelParams: { model: "dummy-model" },
     });
     `,
   );
 
   const provider: ModelProvider = {
-    chat(input) {
-      input.onStreamText?.("h");
-      input.onStreamText?.("i");
+    chat() {
       return Promise.resolve({
         message: { role: "assistant", content: "hi" },
         finishReason: "stop",
@@ -76,52 +36,59 @@ Deno.test("simulator streams responses", async () => {
     },
   };
 
+  const previous = Deno.env.get("GAMBIT_SIMULATOR_BUILD_BOT_ROOT");
+  const unsafeRoot = path.resolve(
+    path.dirname(path.fromFileUrl(import.meta.url)),
+    "decks",
+    "gambit-bot",
+  );
+  Deno.env.set("GAMBIT_SIMULATOR_BUILD_BOT_ROOT", unsafeRoot);
+
   const server = startWebSocketSimulator({
     deckPath,
     modelProvider: provider,
     port: 0,
   });
-
-  const port = (server.addr as Deno.NetAddr).port;
-
-  const homepage = await fetch(`http://127.0.0.1:${port}/`);
-  const html = await homepage.text();
-  if (!html.includes('id="root"')) {
-    throw new Error("Simulator HTML missing root mount");
+  try {
+    const port = (server.addr as Deno.NetAddr).port;
+    const runId = "unsafe-root-run";
+    const res = await fetch(`http://127.0.0.1:${port}/api/build/message`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({ runId, message: "" }),
+    });
+    const body = await res.json().catch(() => ({})) as { error?: string };
+    assertEquals(res.status, 400);
+    assertEquals(
+      typeof body.error === "string" &&
+        body.error.includes("Unsafe build bot root"),
+      true,
+    );
+  } finally {
+    await server.shutdown();
+    await server.finished;
+    if (previous === undefined) {
+      Deno.env.delete("GAMBIT_SIMULATOR_BUILD_BOT_ROOT");
+    } else {
+      Deno.env.set("GAMBIT_SIMULATOR_BUILD_BOT_ROOT", previous);
+    }
   }
-
-  await runSimulator(port, { input: "hello", stream: true });
-  const events = await readStreamEvents(port, 0);
-  const messages = events.map((event) =>
-    event.data as { type?: string; chunk?: string; result?: unknown }
-  );
-  await server.shutdown();
-  await server.finished;
-
-  const resultMsg = messages.find((m) => m.type === "result");
-  assertEquals(resultMsg?.result, "hi");
-  const streams = messages.filter((m) => m.type === "stream").map((m) =>
-    m.chunk ?? ""
-  )
-    .join("");
-  assertEquals(streams, "hi");
-  assertEquals(messages.some((m) => m.type === "result"), true);
 });
 
-Deno.test("simulator persists snapshot + events and hydrates traces", async () => {
+Deno.test("build API errors are persisted to session errors sidecar", async () => {
   const dir = await Deno.makeTempDir();
   const sessionsDir = path.join(dir, "sessions");
   const modHref = modImportPath();
 
-  const deckPath = path.join(dir, "persist.deck.ts");
+  const deckPath = path.join(dir, "build-errors.deck.ts");
   await Deno.writeTextFile(
     deckPath,
     `
     import { defineDeck } from "${modHref}";
     import { z } from "zod";
     export default defineDeck({
-      inputSchema: z.string(),
-      outputSchema: z.string(),
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
       modelParams: { model: "dummy-model" },
     });
     `,
@@ -142,57 +109,55 @@ Deno.test("simulator persists snapshot + events and hydrates traces", async () =
     port: 0,
     sessionDir: sessionsDir,
   });
-
   const port = (server.addr as Deno.NetAddr).port;
-  const result = await runSimulator(port, { input: "hello", stream: false });
-  assert(result.sessionId, "missing sessionId");
-
-  const sessionDir = path.join(sessionsDir, result.sessionId!);
-  const statePath = path.join(sessionDir, "state.json");
-  const eventsPath = path.join(sessionDir, "events.jsonl");
-
-  const state = JSON.parse(await Deno.readTextFile(statePath)) as Record<
-    string,
-    unknown
-  >;
-  assert(!("traces" in state), "state.json should exclude traces");
-  const meta = state.meta as Record<string, unknown>;
-  assertEquals(meta.sessionEventsPath, eventsPath);
-  assertEquals(
-    meta.sessionFeedbackPath,
-    path.join(sessionDir, "feedback.jsonl"),
-  );
-  assertEquals(meta.sessionGradingPath, path.join(sessionDir, "grading.jsonl"));
-
-  const events = await readJsonLines(eventsPath);
-  assert(events.length > 0, "events.jsonl should have entries");
-  assert(
-    events.some((event) =>
-      (event as { type?: string }).type === "session.start"
-    ),
-  );
-  assert(
-    events.some((event) => (event as { kind?: string }).kind === "trace"),
-  );
 
-  const sessionRes = await fetch(
-    `http://127.0.0.1:${port}/api/session?sessionId=${result.sessionId}`,
+  const workspaceRes = await fetch(
+    `http://127.0.0.1:${port}/api/workspace/new`,
+    {
+      method: "POST",
+    },
   );
-  const sessionPayload = await sessionRes.json() as {
-    traces?: Array<unknown>;
-  };
-  assert(Array.isArray(sessionPayload.traces));
-  assert(sessionPayload.traces.length > 0, "traces should hydrate from events");
+  assertEquals(workspaceRes.ok, true);
+  const workspaceBody = await workspaceRes.json() as { workspaceId?: string };
+  const workspaceId = workspaceBody.workspaceId ?? "";
+  assert(workspaceId.length > 0, "missing workspaceId");
+
+  const missingPathRes = await fetch(
+    `http://127.0.0.1:${port}/api/build/file?workspaceId=${
+      encodeURIComponent(workspaceId)
+    }`,
+  );
+  assertEquals(missingPathRes.status, 400);
+  await missingPathRes.text();
+
+  const errorsPath = path.join(sessionsDir, workspaceId, "events.jsonl");
+  const errors = await readJsonLines(errorsPath);
+  assert(errors.length > 0, "events.jsonl should have entries");
+  const hasBuildFileMissingPath = errors.some((entry) => {
+    const row = entry as {
+      type?: string;
+      data?: {
+        endpoint?: string;
+        status?: number;
+        message?: string;
+      };
+    };
+    return row.type === "session" &&
+      row.data?.endpoint === "/api/build/file" &&
+      row.data?.status === 400 &&
+      row.data?.message === "Missing path";
+  });
+  assert(hasBuildFileMissingPath);
 
   await server.shutdown();
   await server.finished;
 });
 
-Deno.test("build bot endpoint streams status and runs", async () => {
+Deno.test("build files API excludes .gambit directory entries", async () => {
   const dir = await Deno.makeTempDir();
   const modHref = modImportPath();
 
-  const deckPath = path.join(dir, "build-primary.deck.ts");
+  const deckPath = path.join(dir, "build-files.deck.ts");
   await Deno.writeTextFile(
     deckPath,
     `
@@ -207,11 +172,9 @@ Deno.test("build bot endpoint streams status and runs", async () => {
   );
 
   const provider: ModelProvider = {
-    chat(input) {
-      input.onStreamText?.("h");
-      input.onStreamText?.("i");
+    chat() {
       return Promise.resolve({
-        message: { role: "assistant", content: "hi" },
+        message: { role: "assistant", content: "ok" },
         finishReason: "stop",
       });
     },
@@ -224,112 +187,53 @@ Deno.test("build bot endpoint streams status and runs", async () => {
   });
   const port = (server.addr as Deno.NetAddr).port;
 
-  const homepage = await fetch(`http://127.0.0.1:${port}/build`);
-  const html = await homepage.text();
-  assert(html.includes("__GAMBIT_BUILD_TAB_ENABLED__"));
-
-  const runId = "test-build-run";
-  const res = await fetch(`http://127.0.0.1:${port}/api/build/message`, {
-    method: "POST",
-    headers: { "content-type": "application/json" },
-    body: JSON.stringify({ runId, message: "" }),
-  });
-  const body = await res.json().catch(() => ({})) as {
-    run?: { id?: string; status?: string };
-    error?: string;
+  const workspaceRes = await fetch(
+    `http://127.0.0.1:${port}/api/workspace/new`,
+    { method: "POST" },
+  );
+  assertEquals(workspaceRes.ok, true);
+  const workspaceBody = await workspaceRes.json() as { workspaceId?: string };
+  const workspaceId = workspaceBody.workspaceId ?? "";
+  assert(workspaceId.length > 0, "missing workspaceId");
+
+  const filesRes = await fetch(
+    `http://127.0.0.1:${port}/api/build/files?workspaceId=${
+      encodeURIComponent(workspaceId)
+    }`,
+  );
+  assertEquals(filesRes.ok, true);
+  const filesBody = await filesRes.json() as {
+    root?: string;
+    entries?: Array<{ path?: string }>;
   };
-  assertEquals(res.ok, true);
-  assertEquals(body.run?.id, runId);
-
-  let status: unknown = null;
-  for (let i = 0; i < 20; i += 1) {
-    const sres = await fetch(
-      `http://127.0.0.1:${port}/api/build/status?runId=${
-        encodeURIComponent(runId)
-      }`,
-    );
-    const sb = await sres.json().catch(() => ({})) as {
-      run?: { status?: string; messages?: Array<{ content?: string }> };
-    };
-    status = sb.run?.status ?? null;
-    if (sb.run?.status === "completed") {
-      assert((sb.run.messages?.[0]?.content ?? "").length > 0);
-      break;
-    }
-    await new Promise((r) => setTimeout(r, 50));
-  }
-  assertEquals(status, "completed");
+  const root = filesBody.root ?? "";
+  assert(root.length > 0, "missing bot root");
 
-  await server.shutdown();
-  await server.finished;
-});
-
-Deno.test("simulator appends feedback log entries", async () => {
-  const dir = await Deno.makeTempDir();
-  const sessionsDir = path.join(dir, "sessions");
-  const modHref = modImportPath();
-
-  const deckPath = path.join(dir, "feedback.deck.ts");
+  await Deno.mkdir(path.join(root, ".gambit", "nested"), { recursive: true });
+  await Deno.writeTextFile(path.join(root, ".gambit", "hidden.txt"), "secret");
   await Deno.writeTextFile(
-    deckPath,
-    `
-    import { defineDeck } from "${modHref}";
-    import { z } from "zod";
-    export default defineDeck({
-      inputSchema: z.string(),
-      outputSchema: z.string(),
-      modelParams: { model: "dummy-model" },
-    });
-    `,
+    path.join(root, ".gambit", "nested", "also-hidden.txt"),
+    "secret",
   );
+  await Deno.writeTextFile(path.join(root, "visible.txt"), "visible");
 
-  const provider: ModelProvider = {
-    chat() {
-      return Promise.resolve({
-        message: { role: "assistant", content: "ok" },
-        finishReason: "stop",
-      });
-    },
+  const refreshedRes = await fetch(
+    `http://127.0.0.1:${port}/api/build/files?workspaceId=${
+      encodeURIComponent(workspaceId)
+    }`,
+  );
+  assertEquals(refreshedRes.ok, true);
+  const refreshedBody = await refreshedRes.json() as {
+    entries?: Array<{ path?: string }>;
   };
+  const paths = (refreshedBody.entries ?? [])
+    .map((entry) => entry.path ?? "")
+    .filter((value) => value.length > 0);
 
-  const server = startWebSocketSimulator({
-    deckPath,
-    modelProvider: provider,
-    port: 0,
-    sessionDir: sessionsDir,
-  });
-
-  const port = (server.addr as Deno.NetAddr).port;
-  const result = await runSimulator(port, { input: "hello", stream: false });
-  assert(result.sessionId, "missing sessionId");
-
-  const sessionDir = path.join(sessionsDir, result.sessionId!);
-  const state = JSON.parse(
-    await Deno.readTextFile(path.join(sessionDir, "state.json")),
-  ) as { messageRefs?: Array<{ id?: string }> };
-  const messageRefId = state.messageRefs?.[0]?.id;
-  assert(messageRefId, "missing messageRefId");
-
-  const res = await fetch(`http://127.0.0.1:${port}/api/simulator/feedback`, {
-    method: "POST",
-    headers: { "content-type": "application/json" },
-    body: JSON.stringify({
-      sessionId: result.sessionId,
-      messageRefId,
-      score: 1,
-      reason: "ok",
-    }),
-  });
-  assert(res.ok);
-  await res.json();
-
-  const feedbackPath = path.join(sessionDir, "feedback.jsonl");
-  const entries = await readJsonLines(feedbackPath);
-  assert(entries.length > 0, "feedback.jsonl should have entries");
-  assert(
-    entries.some((entry) =>
-      (entry as { type?: string }).type === "feedback.update"
-    ),
+  assertEquals(paths.includes("visible.txt"), true);
+  assertEquals(
+    paths.some((value) => value === ".gambit" || value.startsWith(".gambit/")),
+    false,
   );
 
   await server.shutdown();
@@ -493,7 +397,7 @@ Deno.test("simulator preserves state and user input", async () => {
     input: "again",
     message: "again",
     stream: false,
-    sessionId: first.sessionId,
+    workspaceId: first.workspaceId,
   });
   await server.shutdown();
   await server.finished;
@@ -579,7 +483,7 @@ Deno.test("simulator treats follow-up input as a user message when state exists"
   await runSimulator(port, {
     input: "follow-up",
     stream: false,
-    sessionId: first.sessionId,
+    workspaceId: first.workspaceId,
   });
   await server.shutdown();
   await server.finished;
@@ -708,7 +612,7 @@ Deno.test("simulator falls back when provider state lacks messages", async () =>
     input: "two",
     message: "two",
     stream: false,
-    sessionId: first.sessionId,
+    workspaceId: first.workspaceId,
   });
   await server.shutdown();
   await server.finished;
diff --git a/src/server_session_store.test.ts b/src/server_session_store.test.ts
new file mode 100644
index 000000000..338b577af
--- /dev/null
+++ b/src/server_session_store.test.ts
@@ -0,0 +1,638 @@
+import { assert, assertEquals } from "@std/assert";
+import * as path from "@std/path";
+import { startWebSocketSimulator } from "./server.ts";
+import type { ModelProvider } from "@bolt-foundry/gambit-core";
+import {
+  modImportPath,
+  readJsonLines,
+  runSimulator,
+} from "./server_test_utils.ts";
+
+Deno.test("simulator persists snapshot + events and hydrates traces", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "persist.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+        usage: {
+          promptTokens: 5,
+          completionTokens: 3,
+          totalTokens: 8,
+          reasoningTokens: 2,
+        },
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+
+  const port = (server.addr as Deno.NetAddr).port;
+  const result = await runSimulator(port, { input: "hello", stream: false });
+  assert(result.workspaceId, "missing workspaceId");
+
+  const sessionDir = path.join(sessionsDir, result.workspaceId!);
+  const statePath = path.join(sessionDir, "state.json");
+  const eventsPath = path.join(sessionDir, "events.jsonl");
+
+  const state = JSON.parse(await Deno.readTextFile(statePath)) as Record<
+    string,
+    unknown
+  >;
+  assert(!("traces" in state), "state.json should exclude traces");
+  const meta = state.meta as Record<string, unknown>;
+  assertEquals(meta.sessionEventsPath, eventsPath);
+  assertEquals(
+    meta.sessionBuildStatePath,
+    path.join(sessionDir, "build_state.json"),
+  );
+
+  const events = await readJsonLines(eventsPath);
+  assert(events.length > 0, "events.jsonl should have entries");
+  assert(
+    events.some((event) =>
+      (event as { data?: { type?: string } }).data?.type === "session.start"
+    ),
+  );
+  assert(
+    events.some((event) =>
+      (event as { data?: { kind?: string } }).data?.kind === "trace"
+    ),
+  );
+
+  const sessionRes = await fetch(
+    `http://127.0.0.1:${port}/api/workspaces/${
+      encodeURIComponent(result.workspaceId ?? "")
+    }`,
+  );
+  const sessionPayload = await sessionRes.json() as {
+    session?: { traces?: Array<unknown> };
+  };
+  assert(Array.isArray(sessionPayload.session?.traces));
+  assert(
+    (sessionPayload.session?.traces?.length ?? 0) > 0,
+    "traces should hydrate from events",
+  );
+  const modelResult = sessionPayload.session?.traces?.find((entry) =>
+    (entry as { type?: string }).type === "model.result"
+  ) as { usage?: { reasoningTokens?: number } } | undefined;
+  assert(modelResult, "expected model.result trace");
+  assertEquals(modelResult.usage?.reasoningTokens, 2);
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("simulator run fails when provided workspace state has unsupported schema", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "schema-mismatch.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const legacyWorkspaceId = "legacy-workspace";
+  const legacyWorkspaceDir = path.join(sessionsDir, legacyWorkspaceId);
+  await Deno.mkdir(legacyWorkspaceDir, { recursive: true });
+  await Deno.writeTextFile(
+    path.join(legacyWorkspaceDir, "state.json"),
+    JSON.stringify({
+      runId: legacyWorkspaceId,
+      messages: [],
+      meta: {
+        workspaceSchemaVersion: "workspace-state.v0",
+      },
+    }),
+  );
+
+  const res = await fetch(`http://127.0.0.1:${port}/api/simulator/run`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({
+      input: "hello",
+      stream: false,
+      workspaceId: legacyWorkspaceId,
+    }),
+  });
+  assertEquals(res.status, 400);
+  const body = await res.json() as { error?: string };
+  assertEquals(
+    body.error?.includes("Unsupported workspace state schema"),
+    true,
+  );
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("session delete requires explicit workspaceId when active workspace exists", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+  const activeWorkspaceId = "active-workspace";
+
+  const deckPath = path.join(dir, "delete-explicit.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const activeWorkspaceDir = path.join(sessionsDir, activeWorkspaceId);
+  await Deno.mkdir(activeWorkspaceDir, { recursive: true });
+  await Deno.writeTextFile(path.join(activeWorkspaceDir, "state.json"), "{}");
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+    workspace: {
+      id: activeWorkspaceId,
+      rootDeckPath: deckPath,
+      rootDir: dir,
+    },
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const missingWorkspaceRes = await fetch(
+    `http://127.0.0.1:${port}/api/session/delete`,
+    {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({}),
+    },
+  );
+  assertEquals(missingWorkspaceRes.status, 400);
+  const missingWorkspaceBody = await missingWorkspaceRes.json() as {
+    error?: string;
+  };
+  assertEquals(missingWorkspaceBody.error, "Missing workspaceId");
+  assertEquals(await Deno.stat(activeWorkspaceDir).then(() => true), true);
+
+  const deleteRes = await fetch(`http://127.0.0.1:${port}/api/session/delete`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ workspaceId: activeWorkspaceId }),
+  });
+  assertEquals(deleteRes.status, 200);
+  const deleteBody = await deleteRes.json() as {
+    workspaceId?: string;
+    deleted?: boolean;
+  };
+  assertEquals(deleteBody.workspaceId, activeWorkspaceId);
+  assertEquals(deleteBody.deleted, true);
+  assertEquals(
+    await Deno.stat(activeWorkspaceDir).then(() => true).catch(() => false),
+    false,
+  );
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("session events are monotonic and snapshot replay boundary matches highest offset", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "event-seq.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const first = await runSimulator(port, { input: "one", stream: false });
+  await runSimulator(port, {
+    input: "two",
+    stream: false,
+    workspaceId: first.workspaceId,
+  });
+
+  const workspaceId = first.workspaceId!;
+  const statePath = path.join(sessionsDir, workspaceId, "state.json");
+  const eventsPath = path.join(sessionsDir, workspaceId, "events.jsonl");
+  const events = await readJsonLines(eventsPath) as Array<
+    { offset?: number }
+  >;
+  const offsets = events.map((event) => event.offset).filter((
+    value,
+  ): value is number => typeof value === "number");
+  assert(offsets.length > 0, "expected sequenced events");
+  for (let i = 1; i < offsets.length; i += 1) {
+    assertEquals(offsets[i], offsets[i - 1] + 1);
+  }
+  const maxOffset = Math.max(...offsets);
+  const state = JSON.parse(await Deno.readTextFile(statePath)) as {
+    meta?: { lastAppliedOffset?: number };
+  };
+  assertEquals(state.meta?.lastAppliedOffset, maxOffset);
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("session read rejects corrupted event offset gaps", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "event-gap.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const first = await runSimulator(port, { input: "one", stream: false });
+  const workspaceId = first.workspaceId!;
+  const eventsPath = path.join(sessionsDir, workspaceId, "events.jsonl");
+  const rows = await readJsonLines(eventsPath) as Array<
+    Record<string, unknown>
+  >;
+  assert(rows.length >= 2, "expected at least two events");
+  const secondOffset = rows[1].offset;
+  assert(typeof secondOffset === "number");
+  rows[1].offset = secondOffset + 1;
+  await Deno.writeTextFile(
+    eventsPath,
+    rows.map((row) => JSON.stringify(row)).join("\n") + "\n",
+  );
+
+  const sessionRes = await fetch(
+    `http://127.0.0.1:${port}/api/workspaces/${
+      encodeURIComponent(workspaceId)
+    }`,
+  );
+  assertEquals(sessionRes.status, 400);
+  const body = await sessionRes.json() as { error?: string };
+  assert(
+    typeof body.error === "string" &&
+      body.error.includes("Non-monotonic offset"),
+  );
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("session offsets remain monotonic when snapshot state write fails", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "snapshot-write-fail.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const first = await runSimulator(port, { input: "one", stream: false });
+  const workspaceId = first.workspaceId!;
+  const workspaceDir = path.join(sessionsDir, workspaceId);
+  const statePath = path.join(workspaceDir, "state.json");
+  const eventsPath = path.join(workspaceDir, "events.jsonl");
+
+  const state = JSON.parse(await Deno.readTextFile(statePath)) as {
+    meta?: Record<string, unknown>;
+  };
+  state.meta = { ...(state.meta ?? {}), sessionStatePath: workspaceDir };
+  await Deno.writeTextFile(statePath, JSON.stringify(state, null, 2) + "\n");
+
+  await runSimulator(port, {
+    input: "two",
+    stream: false,
+    workspaceId,
+  });
+  await runSimulator(port, {
+    input: "three",
+    stream: false,
+    workspaceId,
+  });
+
+  const events = await readJsonLines(eventsPath) as Array<{ offset?: number }>;
+  const offsets = events.map((event) => event.offset).filter((
+    value,
+  ): value is number => typeof value === "number");
+  assert(offsets.length >= 3, "expected multiple persisted events");
+  for (let i = 1; i < offsets.length; i += 1) {
+    assertEquals(offsets[i], offsets[i - 1] + 1);
+  }
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("test status selects canonical scenario run summary deterministically", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "scenario-summary.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+      testDecks: [{ id: "test-a", label: "Test A", path: "${
+      deckPath.replaceAll("\\", "\\\\")
+    }" }],
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const workspaceId = "workspace-summary";
+  const workspaceDir = path.join(sessionsDir, workspaceId);
+  await Deno.mkdir(workspaceDir, { recursive: true });
+  await Deno.writeTextFile(
+    path.join(workspaceDir, "state.json"),
+    JSON.stringify({
+      runId: "legacy-run",
+      messages: [{ role: "assistant", content: "hi" }],
+      messageRefs: [{ id: "msg-1", role: "assistant" }],
+      meta: {
+        workspaceSchemaVersion: "workspace-state.v1",
+        sessionId: workspaceId,
+        workspaceId,
+        scenarioRunSummaries: [
+          {
+            scenarioRunId: "run-c",
+            lastEventSeq: 17,
+            updatedAt: "2025-01-01T00:00:00.000Z",
+            selectedScenarioDeckId: "deck-c",
+            scenarioConfigPath: deckPath,
+          },
+          {
+            scenarioRunId: "run-a",
+            lastEventSeq: 17,
+            updatedAt: "2025-01-01T00:00:00.000Z",
+            selectedScenarioDeckId: "deck-a",
+            scenarioConfigPath: deckPath,
+          },
+        ],
+      },
+    }),
+  );
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const res = await fetch(
+    `http://127.0.0.1:${port}/api/workspaces/${
+      encodeURIComponent(workspaceId)
+    }`,
+  );
+  assertEquals(res.status, 200);
+  const body = await res.json() as {
+    test?: { run?: { id?: string } };
+  };
+  assertEquals(body.test?.run?.id, "run-a");
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("workspace endpoint returns projection-backed build + session payload and writes build_state.json", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "workspace-read-model.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat(input) {
+      input.onStreamText?.("o");
+      input.onStreamText?.("k");
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const createWorkspaceRes = await fetch(
+    `http://127.0.0.1:${port}/api/workspace/new`,
+    { method: "POST" },
+  );
+  const createWorkspaceBody = await createWorkspaceRes.json() as {
+    workspaceId?: string;
+  };
+  const workspaceId = createWorkspaceBody.workspaceId ?? "";
+  assert(workspaceId.length > 0, "workspace id required");
+
+  const buildRes = await fetch(`http://127.0.0.1:${port}/api/build/message`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ workspaceId, message: "hello" }),
+  });
+  assertEquals(buildRes.ok, true);
+  await buildRes.text();
+
+  // Wait for build projection write.
+  for (let i = 0; i < 30; i += 1) {
+    const exists = await Deno.stat(
+      path.join(sessionsDir, workspaceId, "build_state.json"),
+    )
+      .then(() => true)
+      .catch(() => false);
+    if (exists) break;
+    await new Promise((resolve) => setTimeout(resolve, 50));
+  }
+
+  const workspaceRes = await fetch(
+    `http://127.0.0.1:${port}/api/workspaces/${
+      encodeURIComponent(workspaceId)
+    }`,
+  );
+  assertEquals(workspaceRes.status, 200);
+  const workspacePayload = await workspaceRes.json() as {
+    workspaceId?: string;
+    build?: { run?: { id?: string } };
+    session?: { workspaceId?: string; messages?: Array<unknown> };
+  };
+  assertEquals(workspacePayload.workspaceId, workspaceId);
+  assertEquals(workspacePayload.build?.run?.id, workspaceId);
+  assertEquals(workspacePayload.session?.workspaceId, workspaceId);
+  assert(Array.isArray(workspacePayload.session?.messages));
+
+  await server.shutdown();
+  await server.finished;
+});
diff --git a/src/server_session_store.ts b/src/server_session_store.ts
new file mode 100644
index 000000000..5d2c4e084
--- /dev/null
+++ b/src/server_session_store.ts
@@ -0,0 +1,909 @@
+import * as path from "@std/path";
+import { existsSync } from "@std/fs";
+import type {
+  FeedbackEntry,
+  SavedState,
+  TraceEvent,
+} from "@bolt-foundry/gambit-core";
+
+export type ScenarioRunSummary = {
+  scenarioRunId: string;
+  lastEventSeq: number;
+  updatedAt: string;
+  selectedScenarioDeckId: string;
+  scenarioConfigPath: string;
+};
+
+export type WorkspaceEventDomain = "build" | "test" | "grade" | "session";
+
+export type WorkspaceEventEnvelope = {
+  offset: number;
+  createdAt: string;
+  type: WorkspaceEventDomain;
+  data: Record<string, unknown>;
+};
+
+export type BuildProjectionRun = {
+  id: string;
+  status: "idle" | "running" | "completed" | "error" | "canceled";
+  error?: string;
+  startedAt?: string;
+  finishedAt?: string;
+  messages: Array<{
+    role: string;
+    content: string;
+    messageRefId?: string;
+    feedback?: FeedbackEntry;
+    respondStatus?: number;
+    respondCode?: string;
+    respondMessage?: string;
+    respondPayload?: unknown;
+    respondMeta?: Record<string, unknown>;
+  }>;
+  traces?: Array<TraceEvent>;
+  toolInserts?: Array<{
+    actionCallId?: string;
+    parentActionCallId?: string;
+    name?: string;
+    index: number;
+  }>;
+};
+
+export type BuildProjectionState = {
+  workspaceId: string;
+  lastAppliedOffset: number;
+  run: BuildProjectionRun;
+  state?: SavedState;
+};
+
+type SessionStoreDeps = {
+  sessionsRoot: string;
+  ensureDir: (dir: string) => void;
+  randomId: (prefix: string) => string;
+  logger: { warn: (...args: Array<unknown>) => void };
+  enrichStateWithSession: (state: SavedState) => {
+    state: SavedState;
+    dir?: string;
+  };
+  workspaceStateSchemaVersion: string;
+  workspaceSchemaError: (
+    workspaceId: string,
+    foundVersion: string | null,
+  ) => string;
+};
+
+const TRACE_EVENT_TYPES = new Set<string>([
+  "run.start",
+  "message.user",
+  "run.end",
+  "deck.start",
+  "deck.end",
+  "action.start",
+  "action.end",
+  "tool.call",
+  "tool.result",
+  "model.call",
+  "model.result",
+  "model.stream.event",
+  "log",
+  "monolog",
+]);
+
+const safeStringify = (value: unknown, space?: number): string => {
+  const seen = new WeakSet<object>();
+  return JSON.stringify(
+    value,
+    (_key, candidate) => {
+      if (!candidate || typeof candidate !== "object") {
+        return candidate;
+      }
+      if (seen.has(candidate as object)) {
+        return "[Circular]";
+      }
+      seen.add(candidate as object);
+      return candidate;
+    },
+    space,
+  );
+};
+
+export const createSessionStore = (deps: SessionStoreDeps) => {
+  const {
+    sessionsRoot,
+    ensureDir,
+    randomId,
+    logger,
+    enrichStateWithSession,
+    workspaceStateSchemaVersion,
+    workspaceSchemaError,
+  } = deps;
+
+  const sessionStateCache = new Map<string, SavedState>();
+  const sessionWriteQueues = new Map<string, Array<() => void>>();
+  const sessionWriteActive = new Set<string>();
+  const sessionOffsetById = new Map<string, number>();
+  const buildProjectionCache = new Map<string, BuildProjectionState>();
+
+  const enqueueSessionWrite = (sessionId: string, task: () => void) => {
+    const queue = sessionWriteQueues.get(sessionId) ?? [];
+    queue.push(task);
+    sessionWriteQueues.set(sessionId, queue);
+    if (sessionWriteActive.has(sessionId)) return;
+    sessionWriteActive.add(sessionId);
+    while (queue.length) {
+      const next = queue.shift();
+      if (!next) continue;
+      try {
+        next();
+      } catch (err) {
+        logger.warn(
+          `[sim] session write failed: ${
+            err instanceof Error ? err.message : err
+          }`,
+        );
+      }
+    }
+    sessionWriteActive.delete(sessionId);
+  };
+
+  const mergeSessionState = (
+    current: SavedState | undefined,
+    next: SavedState,
+  ): SavedState => {
+    if (!current) return next;
+    const merged: SavedState = {
+      ...current,
+      ...next,
+      meta: {
+        ...(current.meta ?? {}),
+        ...(next.meta ?? {}),
+      },
+      messages: next.messages ?? current.messages,
+      items: next.items ?? current.items,
+      format: next.format ?? current.format,
+      messageRefs: next.messageRefs ?? current.messageRefs,
+      feedback: next.feedback ?? current.feedback,
+      notes: next.notes ?? current.notes,
+      conversationScore: next.conversationScore ?? current.conversationScore,
+      traces: next.traces ?? current.traces,
+    };
+    return merged;
+  };
+
+  const parseFiniteInteger = (value: unknown): number | undefined => {
+    if (typeof value !== "number" || !Number.isFinite(value)) return undefined;
+    if (!Number.isInteger(value)) return undefined;
+    return value;
+  };
+
+  const normalizeBuildProjectionRun = (
+    workspaceId: string,
+    value: unknown,
+  ): BuildProjectionRun => {
+    if (!value || typeof value !== "object") {
+      return {
+        id: workspaceId,
+        status: "idle",
+        messages: [],
+        traces: [],
+        toolInserts: [],
+      };
+    }
+    const run = value as Record<string, unknown>;
+    const status = run.status;
+    const normalizedStatus = status === "running" || status === "completed" ||
+        status === "error" || status === "canceled"
+      ? status
+      : "idle";
+    return {
+      id: typeof run.id === "string" && run.id.trim().length > 0
+        ? run.id
+        : workspaceId,
+      status: normalizedStatus,
+      error: typeof run.error === "string" ? run.error : undefined,
+      startedAt: typeof run.startedAt === "string" ? run.startedAt : undefined,
+      finishedAt: typeof run.finishedAt === "string"
+        ? run.finishedAt
+        : undefined,
+      messages: Array.isArray(run.messages)
+        ? run.messages as BuildProjectionRun["messages"]
+        : [],
+      traces: Array.isArray(run.traces) ? run.traces as Array<TraceEvent> : [],
+      toolInserts: Array.isArray(run.toolInserts)
+        ? run.toolInserts as BuildProjectionRun["toolInserts"]
+        : [],
+    };
+  };
+
+  const readEnvelopeRecords = (
+    eventsPath: string,
+  ): {
+    records: Array<WorkspaceEventEnvelope>;
+    maxOffset: number;
+  } => {
+    try {
+      const text = Deno.readTextFileSync(eventsPath);
+      const records: Array<WorkspaceEventEnvelope> = [];
+      let maxOffset = -1;
+      for (const line of text.split("\n")) {
+        if (!line.trim()) continue;
+        const parsed = JSON.parse(line) as Record<string, unknown>;
+        const offset = parseFiniteInteger(parsed.offset);
+        const type = parsed.type;
+        const createdAt = parsed.createdAt;
+        const data = parsed.data;
+        if (
+          offset === undefined ||
+          (type !== "build" && type !== "test" && type !== "grade" &&
+            type !== "session") ||
+          typeof createdAt !== "string" ||
+          !data ||
+          typeof data !== "object"
+        ) {
+          continue;
+        }
+        const envelope: WorkspaceEventEnvelope = {
+          offset,
+          type,
+          createdAt,
+          data: data as Record<string, unknown>,
+        };
+        records.push(envelope);
+        if (offset > maxOffset) {
+          maxOffset = offset;
+        }
+      }
+      return { records, maxOffset };
+    } catch {
+      return { records: [], maxOffset: -1 };
+    }
+  };
+
+  const ensureMonotonicOffsets = (
+    records: Array<WorkspaceEventEnvelope>,
+    eventsPath: string,
+  ): number => {
+    let expected = 0;
+    let highest = -1;
+    for (const record of records) {
+      if (record.offset !== expected) {
+        throw new Error(
+          `Non-monotonic offset in ${eventsPath}: expected ${expected}, got ${record.offset}`,
+        );
+      }
+      highest = record.offset;
+      expected = record.offset + 1;
+    }
+    return highest;
+  };
+
+  const getCurrentSessionOffset = (
+    sessionId: string,
+    state?: SavedState,
+  ): number => {
+    const cached = sessionOffsetById.get(sessionId);
+    if (cached !== undefined) return cached;
+    const fromMeta = parseFiniteInteger(
+      (state?.meta as { lastAppliedOffset?: unknown } | undefined)
+        ?.lastAppliedOffset,
+    ) ??
+      parseFiniteInteger(
+        (state?.meta as { lastAppliedEventSeq?: unknown } | undefined)
+          ?.lastAppliedEventSeq,
+      );
+    if (fromMeta !== undefined) {
+      sessionOffsetById.set(sessionId, fromMeta);
+      return fromMeta;
+    }
+    const eventsPath = typeof state?.meta?.sessionEventsPath === "string"
+      ? state.meta.sessionEventsPath
+      : path.join(sessionsRoot, sessionId, "events.jsonl");
+    const { records } = readEnvelopeRecords(eventsPath);
+    if (records.length > 0) {
+      const validated = ensureMonotonicOffsets(records, eventsPath);
+      sessionOffsetById.set(sessionId, validated);
+      return validated;
+    }
+    sessionOffsetById.set(sessionId, -1);
+    return -1;
+  };
+
+  const nextSessionOffsetCandidate = (
+    sessionId: string,
+    state?: SavedState,
+  ): number => getCurrentSessionOffset(sessionId, state) + 1;
+
+  const upsertScenarioRunSummary = (meta: Record<string, unknown>) => {
+    const scenarioRunId = typeof meta.scenarioRunId === "string"
+      ? meta.scenarioRunId
+      : undefined;
+    if (!scenarioRunId) return;
+    const lastEventSeq = parseFiniteInteger(meta.lastAppliedOffset) ??
+      parseFiniteInteger(meta.lastAppliedEventSeq) ??
+      0;
+    const updatedAt = typeof meta.sessionUpdatedAt === "string"
+      ? meta.sessionUpdatedAt
+      : new Date().toISOString();
+    const selectedScenarioDeckId =
+      typeof meta.selectedScenarioDeckId === "string"
+        ? meta.selectedScenarioDeckId
+        : typeof meta.testBotName === "string"
+        ? meta.testBotName
+        : "unknown";
+    const scenarioConfigPath = typeof meta.scenarioConfigPath === "string"
+      ? meta.scenarioConfigPath
+      : typeof meta.testBotConfigPath === "string"
+      ? meta.testBotConfigPath
+      : typeof meta.deck === "string"
+      ? meta.deck
+      : "unknown";
+    const previous = Array.isArray(meta.scenarioRunSummaries)
+      ? meta.scenarioRunSummaries as Array<ScenarioRunSummary>
+      : [];
+    const nextSummary: ScenarioRunSummary = {
+      scenarioRunId,
+      lastEventSeq,
+      updatedAt,
+      selectedScenarioDeckId,
+      scenarioConfigPath,
+    };
+    const existingIdx = previous.findIndex((entry) =>
+      entry.scenarioRunId === scenarioRunId
+    );
+    meta.scenarioRunSummaries = existingIdx >= 0
+      ? previous.map((entry, idx) => idx === existingIdx ? nextSummary : entry)
+      : [...previous, nextSummary];
+    meta.scenarioRunSummary = nextSummary;
+  };
+
+  const normalizeScenarioRunSummary = (
+    value: unknown,
+  ): ScenarioRunSummary | null => {
+    if (!value || typeof value !== "object") return null;
+    const summary = value as Record<string, unknown>;
+    const scenarioRunId = typeof summary.scenarioRunId === "string"
+      ? summary.scenarioRunId
+      : null;
+    const lastEventSeq = parseFiniteInteger(summary.lastEventSeq);
+    const updatedAt = typeof summary.updatedAt === "string"
+      ? summary.updatedAt
+      : null;
+    const selectedScenarioDeckId =
+      typeof summary.selectedScenarioDeckId === "string"
+        ? summary.selectedScenarioDeckId
+        : null;
+    const scenarioConfigPath = typeof summary.scenarioConfigPath === "string"
+      ? summary.scenarioConfigPath
+      : null;
+    if (
+      !scenarioRunId || lastEventSeq === undefined || !updatedAt ||
+      !selectedScenarioDeckId || !scenarioConfigPath
+    ) {
+      return null;
+    }
+    return {
+      scenarioRunId,
+      lastEventSeq,
+      updatedAt,
+      selectedScenarioDeckId,
+      scenarioConfigPath,
+    };
+  };
+
+  const selectCanonicalScenarioRunSummary = (
+    meta: Record<string, unknown>,
+  ): ScenarioRunSummary | null => {
+    const fromCurrent = normalizeScenarioRunSummary(meta.scenarioRunSummary);
+    const fromListRaw = Array.isArray(meta.scenarioRunSummaries)
+      ? meta.scenarioRunSummaries
+      : [];
+    const fromList = fromListRaw
+      .map((entry) => normalizeScenarioRunSummary(entry))
+      .filter((entry): entry is ScenarioRunSummary => Boolean(entry));
+    const all = fromCurrent ? [fromCurrent, ...fromList] : fromList;
+    if (!all.length) return null;
+    all.sort((a, b) => {
+      if (a.lastEventSeq !== b.lastEventSeq) {
+        return b.lastEventSeq - a.lastEventSeq;
+      }
+      if (a.updatedAt !== b.updatedAt) {
+        return b.updatedAt.localeCompare(a.updatedAt);
+      }
+      return a.scenarioRunId.localeCompare(b.scenarioRunId);
+    });
+    return all[0] ?? null;
+  };
+
+  const materializeSnapshot = (state: SavedState): SavedState => {
+    const snapshot = { ...state };
+    const sessionId = typeof snapshot.meta?.sessionId === "string"
+      ? snapshot.meta.sessionId
+      : undefined;
+    if (sessionId) {
+      const meta = { ...(snapshot.meta ?? {}) };
+      const lastAppliedOffset = getCurrentSessionOffset(
+        sessionId,
+        snapshot,
+      );
+      meta.lastAppliedOffset = lastAppliedOffset;
+      meta.lastAppliedEventSeq = lastAppliedOffset;
+      upsertScenarioRunSummary(meta);
+      snapshot.meta = meta;
+    }
+    delete (snapshot as Record<string, unknown>).traces;
+    return snapshot;
+  };
+
+  const writeJsonAtomic = (filePath: string, payload: unknown) => {
+    const dir = path.dirname(filePath);
+    ensureDir(dir);
+    const tmpPath = path.join(
+      dir,
+      `.tmp-${path.basename(filePath)}-${randomId("tmp")}`,
+    );
+    Deno.writeTextFileSync(tmpPath, safeStringify(payload, 2));
+    Deno.renameSync(tmpPath, filePath);
+  };
+
+  const appendJsonl = (filePath: string, payload: unknown) => {
+    const dir = path.dirname(filePath);
+    ensureDir(dir);
+    const line = safeStringify(payload);
+    Deno.writeTextFileSync(filePath, `${line}\n`, { append: true });
+  };
+
+  const readBuildProjection = (workspaceId: string): BuildProjectionState => {
+    const cached = buildProjectionCache.get(workspaceId);
+    if (cached) return cached;
+    const filePath = path.join(sessionsRoot, workspaceId, "build_state.json");
+    try {
+      const parsed = JSON.parse(
+        Deno.readTextFileSync(filePath),
+      ) as Record<string, unknown>;
+      const lastAppliedOffset = parseFiniteInteger(parsed.lastAppliedOffset) ??
+        -1;
+      const run = normalizeBuildProjectionRun(
+        workspaceId,
+        (parsed as { run?: unknown }).run,
+      );
+      const projection = {
+        workspaceId,
+        lastAppliedOffset,
+        run,
+        state: parsed.state && typeof parsed.state === "object"
+          ? parsed.state as SavedState
+          : undefined,
+      } satisfies BuildProjectionState;
+      buildProjectionCache.set(workspaceId, projection);
+      return projection;
+    } catch {
+      const empty: BuildProjectionState = {
+        workspaceId,
+        lastAppliedOffset: -1,
+        run: {
+          id: workspaceId,
+          status: "idle",
+          messages: [],
+          traces: [],
+          toolInserts: [],
+        },
+        state: undefined,
+      };
+      buildProjectionCache.set(workspaceId, empty);
+      return empty;
+    }
+  };
+
+  const replayBuildProjection = (
+    workspaceId: string,
+    envelopes: Array<WorkspaceEventEnvelope>,
+  ): BuildProjectionState => {
+    const state: BuildProjectionState = {
+      workspaceId,
+      lastAppliedOffset: -1,
+      run: {
+        id: workspaceId,
+        status: "idle",
+        messages: [],
+        traces: [],
+        toolInserts: [],
+      },
+    };
+    for (const envelope of envelopes) {
+      state.lastAppliedOffset = envelope.offset;
+      if (envelope.type !== "build") continue;
+      const payloadType = typeof envelope.data.type === "string"
+        ? envelope.data.type
+        : "";
+      if (payloadType === "buildBotStatus") {
+        const run = normalizeBuildProjectionRun(workspaceId, envelope.data.run);
+        state.run = run;
+        const buildStateSnapshot = envelope.data.state;
+        if (buildStateSnapshot && typeof buildStateSnapshot === "object") {
+          state.state = buildStateSnapshot as SavedState;
+        }
+        continue;
+      }
+      if (payloadType === "buildBotTrace") {
+        const event = envelope.data.event;
+        if (event && typeof event === "object") {
+          const currentTraces = Array.isArray(state.run.traces)
+            ? state.run.traces
+            : [];
+          state.run = {
+            ...state.run,
+            traces: [...currentTraces, event as TraceEvent],
+          };
+        }
+      }
+    }
+    return state;
+  };
+
+  const rebuildBuildProjectionFromEvents = (
+    workspaceId: string,
+    eventsPath: string,
+  ) => {
+    const { records } = readEnvelopeRecords(eventsPath);
+    if (records.length > 0) {
+      ensureMonotonicOffsets(records, eventsPath);
+    }
+    const projection = replayBuildProjection(workspaceId, records);
+    const buildPath = path.join(sessionsRoot, workspaceId, "build_state.json");
+    writeJsonAtomic(buildPath, projection);
+    buildProjectionCache.set(workspaceId, projection);
+  };
+
+  const updateSnapshotBoundary = (
+    sessionId: string,
+    statePath: string | undefined,
+    offset: number,
+  ) => {
+    if (!statePath || !existsSync(statePath)) return;
+    try {
+      const text = Deno.readTextFileSync(statePath);
+      const parsed = JSON.parse(text) as SavedState;
+      const parsedMeta = parsed.meta && typeof parsed.meta === "object"
+        ? parsed.meta as Record<string, unknown>
+        : {};
+      const previousBoundary =
+        parseFiniteInteger(parsedMeta.lastAppliedOffset) ??
+          parseFiniteInteger(parsedMeta.lastAppliedEventSeq) ??
+          -1;
+      if (offset <= previousBoundary) return;
+      parsedMeta.lastAppliedOffset = offset;
+      parsedMeta.lastAppliedEventSeq = offset;
+      parsedMeta.sessionUpdatedAt = new Date().toISOString();
+      upsertScenarioRunSummary(parsedMeta);
+      const nextState = { ...parsed, meta: parsedMeta };
+      writeJsonAtomic(statePath, nextState);
+      sessionStateCache.set(sessionId, nextState);
+    } catch {
+      // Keep append-only logging best-effort even if snapshot boundary update fails.
+    }
+  };
+
+  const appendWorkspaceEnvelope = (
+    state: SavedState,
+    eventType: WorkspaceEventDomain,
+    data: Record<string, unknown>,
+  ): WorkspaceEventEnvelope | null => {
+    const sessionId = typeof state.meta?.sessionId === "string"
+      ? state.meta.sessionId
+      : undefined;
+    const eventsPath = typeof state.meta?.sessionEventsPath === "string"
+      ? state.meta.sessionEventsPath
+      : undefined;
+    const statePath = typeof state.meta?.sessionStatePath === "string"
+      ? state.meta.sessionStatePath
+      : undefined;
+    if (!sessionId || !eventsPath) return null;
+    const createdAt = new Date().toISOString();
+    const optimisticOffset = nextSessionOffsetCandidate(sessionId, state);
+    const envelope: WorkspaceEventEnvelope = {
+      offset: optimisticOffset,
+      createdAt,
+      type: eventType,
+      data,
+    };
+    enqueueSessionWrite(sessionId, () => {
+      const offset = nextSessionOffsetCandidate(sessionId, state);
+      const committedEnvelope: WorkspaceEventEnvelope = {
+        offset,
+        createdAt,
+        type: eventType,
+        data,
+      };
+      appendJsonl(eventsPath, committedEnvelope);
+      sessionOffsetById.set(sessionId, offset);
+      if (state.meta && typeof state.meta === "object") {
+        (state.meta as Record<string, unknown>).lastAppliedOffset = offset;
+        (state.meta as Record<string, unknown>).lastAppliedEventSeq = offset;
+      }
+      updateSnapshotBoundary(sessionId, statePath, offset);
+      if (eventType === "build") {
+        rebuildBuildProjectionFromEvents(sessionId, eventsPath);
+      }
+    });
+    return envelope;
+  };
+
+  const appendSessionEvent = (
+    state: SavedState,
+    payload: Record<string, unknown>,
+  ) => appendWorkspaceEnvelope(state, "session", payload);
+
+  const appendFeedbackLog = (
+    state: SavedState,
+    payload: Record<string, unknown>,
+  ) =>
+    appendWorkspaceEnvelope(state, "session", {
+      ...payload,
+      logType: "feedback",
+    });
+
+  const appendGradingLog = (
+    state: SavedState,
+    payload: Record<string, unknown>,
+  ) => appendWorkspaceEnvelope(state, "grade", payload);
+
+  const appendErrorLog = (
+    state: SavedState,
+    payload: Record<string, unknown>,
+  ) =>
+    appendWorkspaceEnvelope(state, "session", {
+      ...payload,
+      logType: "error",
+    });
+
+  const appendServerErrorLog = (
+    workspaceId: string | undefined,
+    payload: {
+      endpoint: string;
+      status: number;
+      message: string;
+      method?: string;
+    },
+  ) => {
+    if (!workspaceId) return;
+    const state = readSessionState(workspaceId);
+    if (!state) return;
+    appendErrorLog(state, {
+      type: "server.error",
+      ...payload,
+    });
+  };
+
+  const loadSessionTraces = (state: SavedState): Array<TraceEvent> => {
+    const eventsPath = typeof state.meta?.sessionEventsPath === "string"
+      ? state.meta.sessionEventsPath
+      : undefined;
+    if (!eventsPath) return [];
+    try {
+      const { records } = readEnvelopeRecords(eventsPath);
+      if (records.length > 0) {
+        ensureMonotonicOffsets(records, eventsPath);
+      }
+      const traces: Array<TraceEvent> = [];
+      for (const envelope of records) {
+        const record = envelope.data;
+        const kind = typeof record.kind === "string" ? record.kind : "";
+        const type = typeof record.type === "string" ? record.type : "";
+        if (kind === "trace" || TRACE_EVENT_TYPES.has(type)) {
+          traces.push(record as TraceEvent);
+        }
+      }
+      return traces;
+    } catch {
+      return [];
+    }
+  };
+
+  const persistSessionState = (state: SavedState): SavedState => {
+    const { state: enriched, dir } = enrichStateWithSession(state);
+    const sessionId = typeof enriched.meta?.sessionId === "string"
+      ? enriched.meta.sessionId
+      : undefined;
+    const merged = sessionId
+      ? mergeSessionState(sessionStateCache.get(sessionId), enriched)
+      : enriched;
+    if (sessionId) {
+      sessionStateCache.set(sessionId, merged);
+    }
+    if (dir && sessionId) {
+      const snapshot = materializeSnapshot(merged);
+      const eventsPath = typeof snapshot.meta?.sessionEventsPath === "string"
+        ? snapshot.meta.sessionEventsPath
+        : path.join(dir, "events.jsonl");
+      const statePath = typeof snapshot.meta?.sessionStatePath === "string"
+        ? snapshot.meta.sessionStatePath
+        : path.join(dir, "state.json");
+      enqueueSessionWrite(sessionId, () => {
+        try {
+          ensureDir(dir);
+          const firstWrite = !existsSync(eventsPath);
+          if (firstWrite) {
+            const startOffset = nextSessionOffsetCandidate(sessionId, snapshot);
+            appendJsonl(
+              eventsPath,
+              {
+                offset: startOffset,
+                createdAt: new Date().toISOString(),
+                type: "session",
+                data: {
+                  type: "session.start",
+                  category: "lifecycle",
+                  sessionId,
+                  runId: snapshot.runId,
+                  deck: snapshot.meta?.deck,
+                },
+              } satisfies WorkspaceEventEnvelope,
+            );
+            sessionOffsetById.set(sessionId, startOffset);
+          }
+
+          const snapshotOffset = nextSessionOffsetCandidate(
+            sessionId,
+            snapshot,
+          );
+          const snapshotToWrite: SavedState = {
+            ...snapshot,
+            meta: {
+              ...(snapshot.meta ?? {}),
+              lastAppliedOffset: snapshotOffset,
+              lastAppliedEventSeq: snapshotOffset,
+            },
+          };
+          appendJsonl(
+            eventsPath,
+            {
+              offset: snapshotOffset,
+              createdAt: new Date().toISOString(),
+              type: "session",
+              data: {
+                type: "session.snapshot",
+                category: "snapshot",
+                sessionId,
+                runId: snapshotToWrite.runId,
+                state: snapshotToWrite,
+              },
+            } satisfies WorkspaceEventEnvelope,
+          );
+          // Advance in-memory offset immediately after append so a later
+          // snapshot write failure cannot cause duplicate offsets on retry.
+          sessionOffsetById.set(sessionId, snapshotOffset);
+          writeJsonAtomic(statePath, snapshotToWrite);
+          rebuildBuildProjectionFromEvents(sessionId, eventsPath);
+        } catch (err) {
+          logger.warn(
+            `[sim] failed to persist session state: ${
+              err instanceof Error ? err.message : err
+            }`,
+          );
+        }
+      });
+    }
+    return merged;
+  };
+
+  const readSessionStateStrict = (
+    sessionId: string,
+    opts?: { withTraces?: boolean },
+  ): SavedState | undefined => {
+    const dir = path.join(sessionsRoot, sessionId);
+    const filePath = path.join(dir, "state.json");
+    const text = Deno.readTextFileSync(filePath);
+    const parsed = JSON.parse(text) as SavedState;
+    if (!parsed || typeof parsed !== "object") {
+      throw new Error(`Invalid workspace state payload at ${filePath}`);
+    }
+    const parsedMeta = parsed.meta && typeof parsed.meta === "object"
+      ? parsed.meta as Record<string, unknown>
+      : {};
+    const schemaVersion = typeof parsedMeta.workspaceSchemaVersion === "string"
+      ? parsedMeta.workspaceSchemaVersion.trim()
+      : null;
+    // Backward compatibility: legacy workspace state files may not include
+    // workspaceSchemaVersion yet. Treat missing schema as v1-equivalent.
+    if (schemaVersion && schemaVersion !== workspaceStateSchemaVersion) {
+      throw new Error(workspaceSchemaError(sessionId, schemaVersion));
+    }
+    const meta = {
+      ...parsedMeta,
+      sessionId,
+      workspaceId: typeof parsedMeta.workspaceId === "string" &&
+          parsedMeta.workspaceId.trim().length > 0
+        ? parsedMeta.workspaceId
+        : sessionId,
+      sessionDir: dir,
+      workspaceSchemaVersion: workspaceStateSchemaVersion,
+    } as Record<string, unknown>;
+    if (typeof meta.sessionStatePath !== "string") {
+      meta.sessionStatePath = filePath;
+    }
+    if (typeof meta.sessionEventsPath !== "string") {
+      meta.sessionEventsPath = path.join(dir, "events.jsonl");
+    }
+    if (typeof meta.sessionBuildStatePath !== "string") {
+      meta.sessionBuildStatePath = path.join(dir, "build_state.json");
+    }
+
+    const eventsPath = typeof meta.sessionEventsPath === "string"
+      ? meta.sessionEventsPath
+      : undefined;
+    if (eventsPath) {
+      const { records, maxOffset } = readEnvelopeRecords(eventsPath);
+      if (records.length > 0) {
+        const validated = ensureMonotonicOffsets(records, eventsPath);
+        const lastAppliedOffset = validated >= 0 ? validated : maxOffset;
+        meta.lastAppliedOffset = lastAppliedOffset;
+        meta.lastAppliedEventSeq = lastAppliedOffset;
+        sessionOffsetById.set(sessionId, lastAppliedOffset);
+      } else if (typeof meta.lastAppliedOffset !== "number") {
+        meta.lastAppliedOffset = -1;
+        meta.lastAppliedEventSeq = -1;
+      }
+    }
+
+    const enriched = { ...parsed, meta } as SavedState;
+    if (opts?.withTraces) {
+      const loadedTraces = loadSessionTraces(enriched);
+      const fallbackTraces = Array.isArray(enriched.traces)
+        ? enriched.traces
+        : [];
+      const traces = loadedTraces.length > 0 ? loadedTraces : fallbackTraces;
+      const withTraces = { ...enriched, traces };
+      sessionStateCache.set(sessionId, withTraces);
+      return withTraces;
+    }
+    sessionStateCache.set(sessionId, enriched);
+    return enriched;
+  };
+
+  const readSessionState = (
+    sessionId: string,
+    opts?: { withTraces?: boolean },
+  ): SavedState | undefined => {
+    try {
+      return readSessionStateStrict(sessionId, opts);
+    } catch (err) {
+      if (err instanceof Deno.errors.NotFound) return undefined;
+      logger.warn(
+        `[sim] failed to read workspace state for ${sessionId}: ${
+          err instanceof Error ? err.message : String(err)
+        }`,
+      );
+      return undefined;
+    }
+  };
+
+  const readBuildState = (
+    workspaceId: string,
+  ): BuildProjectionState | undefined => {
+    const state = readSessionState(workspaceId);
+    if (!state) return undefined;
+    const eventsPath = typeof state.meta?.sessionEventsPath === "string"
+      ? state.meta.sessionEventsPath
+      : path.join(sessionsRoot, workspaceId, "events.jsonl");
+    if (existsSync(eventsPath)) {
+      rebuildBuildProjectionFromEvents(workspaceId, eventsPath);
+    }
+    return readBuildProjection(workspaceId);
+  };
+
+  return {
+    parseFiniteInteger,
+    selectCanonicalScenarioRunSummary,
+    appendWorkspaceEnvelope,
+    appendSessionEvent,
+    appendFeedbackLog,
+    appendGradingLog,
+    appendErrorLog,
+    appendServerErrorLog,
+    persistSessionState,
+    readSessionStateStrict,
+    readSessionState,
+    readBuildState,
+    replayBuildProjection,
+  };
+};
diff --git a/src/server_streams.test.ts b/src/server_streams.test.ts
new file mode 100644
index 000000000..506191321
--- /dev/null
+++ b/src/server_streams.test.ts
@@ -0,0 +1,439 @@
+import { assert, assertEquals } from "@std/assert";
+import * as path from "@std/path";
+import { startWebSocketSimulator } from "./server.ts";
+import type { ModelProvider } from "@bolt-foundry/gambit-core";
+import {
+  modImportPath,
+  readDurableStreamEvents,
+  readStreamEvents,
+  runSimulator,
+} from "./server_test_utils.ts";
+
+Deno.test("simulator streams responses", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "ws.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string(),
+      outputSchema: z.string(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat(input) {
+      input.onStreamText?.("h");
+      input.onStreamText?.("i");
+      return Promise.resolve({
+        message: { role: "assistant", content: "hi" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+  });
+
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const homepage = await fetch(`http://127.0.0.1:${port}/`);
+  const html = await homepage.text();
+  if (!html.includes('id="root"')) {
+    throw new Error("Simulator HTML missing root mount");
+  }
+
+  await runSimulator(port, { input: "hello", stream: true });
+  const events = await readStreamEvents(port, 0);
+  const messages = events.map((event) =>
+    event.data as { type?: string; chunk?: string; result?: unknown }
+  );
+  await server.shutdown();
+  await server.finished;
+
+  const resultMsg = messages.find((m) => m.type === "result");
+  assertEquals(resultMsg?.result, "hi");
+  const streams = messages.filter((m) => m.type === "stream").map((m) =>
+    m.chunk ?? ""
+  )
+    .join("");
+  assertEquals(streams, "hi");
+  assertEquals(messages.some((m) => m.type === "result"), true);
+});
+
+Deno.test("build bot endpoint streams status and runs", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+
+  const deckPath = path.join(dir, "build-primary.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat(input) {
+      input.onStreamText?.("h");
+      input.onStreamText?.("i");
+      return Promise.resolve({
+        message: { role: "assistant", content: "hi" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const homepage = await fetch(`http://127.0.0.1:${port}/build`);
+  const html = await homepage.text();
+  assert(html.includes("__GAMBIT_BUILD_TAB_ENABLED__"));
+
+  const runId = "test-build-run";
+  const res = await fetch(`http://127.0.0.1:${port}/api/build/message`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ runId, message: "" }),
+  });
+  const body = await res.json().catch(() => ({})) as {
+    run?: { id?: string; status?: string };
+    error?: string;
+  };
+  assertEquals(res.ok, true);
+  assertEquals(body.run?.id, runId);
+
+  let status: unknown = null;
+  for (let i = 0; i < 20; i += 1) {
+    const sres = await fetch(
+      `http://127.0.0.1:${port}/api/workspaces/${encodeURIComponent(runId)}`,
+    );
+    const sb = await sres.json().catch(() => ({})) as {
+      build?: {
+        run?: { status?: string; messages?: Array<{ content?: string }> };
+      };
+    };
+    status = sb.build?.run?.status ?? null;
+    if (sb.build?.run?.status === "completed") {
+      assert((sb.build?.run?.messages?.[0]?.content ?? "").length > 0);
+      break;
+    }
+    await new Promise((r) => setTimeout(r, 50));
+  }
+  assertEquals(status, "completed");
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("workspace durable stream carries build, test, and grade events", async () => {
+  const dir = await Deno.makeTempDir();
+  const sessionsDir = path.join(dir, "sessions");
+  const modHref = modImportPath();
+  const deckPath = path.join(dir, "workspace-stream.deck.ts");
+  const escapedDeckPath = deckPath.replaceAll("\\", "\\\\");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+      testDecks: [{ id: "self-test", label: "Self Test", path: "${escapedDeckPath}" }],
+      graderDecks: [{ id: "self-grader", label: "Self Grader", path: "${escapedDeckPath}" }],
+    });
+    `,
+  );
+
+  const provider: ModelProvider = {
+    chat() {
+      return Promise.resolve({
+        message: { role: "assistant", content: "ok" },
+        finishReason: "stop",
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+    sessionDir: sessionsDir,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const runResult = await runSimulator(port, {
+    input: "seed",
+    message: "seed",
+    stream: false,
+  });
+  const workspaceId = runResult.workspaceId!;
+
+  const buildRes = await fetch(`http://127.0.0.1:${port}/api/build/reset`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ workspaceId }),
+  });
+  assertEquals(buildRes.ok, true);
+  await buildRes.text();
+
+  const gradeRes = await fetch(`http://127.0.0.1:${port}/api/calibrate/flag`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({
+      workspaceId,
+      refId: "gradingRun:test#turn:0",
+      runId: "test",
+      turnIndex: 0,
+    }),
+  });
+  assertEquals(gradeRes.ok, true);
+  await gradeRes.text();
+
+  let workspaceEvents = await readDurableStreamEvents(
+    port,
+    "gambit-workspace",
+    0,
+  );
+  if (workspaceEvents.length === 0) {
+    await new Promise((resolve) => setTimeout(resolve, 100));
+    workspaceEvents = await readDurableStreamEvents(
+      port,
+      "gambit-workspace",
+      0,
+    );
+  }
+  const payloads = workspaceEvents.map((event) =>
+    event.data as { type?: string }
+  );
+  assert(payloads.some((event) => event.type === "buildBotStatus"));
+  assert(payloads.some((event) => event.type === "calibrateSession"));
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("test stop aborts in-flight runtime execution", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = path.join(dir, "test-stop.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  let abortCount = 0;
+  const provider: ModelProvider = {
+    chat(input) {
+      return new Promise((_, reject) => {
+        input.signal?.addEventListener(
+          "abort",
+          () => {
+            abortCount += 1;
+            reject(new DOMException("Run canceled", "AbortError"));
+          },
+          { once: true },
+        );
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const startRes = await fetch(`http://127.0.0.1:${port}/api/test/message`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ runId: "stop-run", message: "hello" }),
+  });
+  assertEquals(startRes.ok, true);
+  const startBody = await startRes.json() as { run?: { id?: string } };
+  const runId = startBody.run?.id ?? "";
+  assert(runId.length > 0);
+
+  const stopRes = await fetch(`http://127.0.0.1:${port}/api/test/stop`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ runId }),
+  });
+  assertEquals(stopRes.ok, true);
+  const stopBody = await stopRes.json() as {
+    stopped?: boolean;
+    run?: { status?: string };
+  };
+  assertEquals(stopBody.stopped, true);
+  assertEquals(stopBody.run?.status, "canceled");
+
+  await new Promise((resolve) => setTimeout(resolve, 40));
+  assertEquals(abortCount, 1);
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("build reset aborts in-flight runtime execution", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = path.join(dir, "build-reset-abort.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  let abortCount = 0;
+  const provider: ModelProvider = {
+    chat(input) {
+      return new Promise((_, reject) => {
+        input.signal?.addEventListener(
+          "abort",
+          () => {
+            abortCount += 1;
+            reject(new DOMException("Run canceled", "AbortError"));
+          },
+          { once: true },
+        );
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const startRes = await fetch(`http://127.0.0.1:${port}/api/build/message`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ runId: "build-abort", message: "hello" }),
+  });
+  assertEquals(startRes.ok, true);
+  await startRes.text();
+
+  const resetRes = await fetch(`http://127.0.0.1:${port}/api/build/reset`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ workspaceId: "build-abort" }),
+  });
+  assertEquals(resetRes.ok, true);
+  const resetBody = await resetRes.json() as { reset?: boolean };
+  assertEquals(resetBody.reset, true);
+
+  await new Promise((resolve) => setTimeout(resolve, 40));
+  assertEquals(abortCount, 1);
+
+  await server.shutdown();
+  await server.finished;
+});
+
+Deno.test("build stop aborts in-flight runtime execution", async () => {
+  const dir = await Deno.makeTempDir();
+  const modHref = modImportPath();
+  const deckPath = path.join(dir, "build-stop-abort.deck.ts");
+  await Deno.writeTextFile(
+    deckPath,
+    `
+    import { defineDeck } from "${modHref}";
+    import { z } from "zod";
+    export default defineDeck({
+      inputSchema: z.string().optional(),
+      outputSchema: z.string().optional(),
+      modelParams: { model: "dummy-model" },
+    });
+    `,
+  );
+
+  let abortCount = 0;
+  const provider: ModelProvider = {
+    chat(input) {
+      return new Promise((_, reject) => {
+        input.signal?.addEventListener(
+          "abort",
+          () => {
+            abortCount += 1;
+            reject(new DOMException("Run canceled", "AbortError"));
+          },
+          { once: true },
+        );
+      });
+    },
+  };
+
+  const server = startWebSocketSimulator({
+    deckPath,
+    modelProvider: provider,
+    port: 0,
+  });
+  const port = (server.addr as Deno.NetAddr).port;
+
+  const startRes = await fetch(`http://127.0.0.1:${port}/api/build/message`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ runId: "build-stop", message: "hello" }),
+  });
+  assertEquals(startRes.ok, true);
+  const startBody = await startRes.json() as { run?: { id?: string } };
+  const runId = startBody.run?.id ?? "";
+  assert(runId.length > 0);
+
+  const stopRes = await fetch(`http://127.0.0.1:${port}/api/build/stop`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({ workspaceId: runId }),
+  });
+  assertEquals(stopRes.ok, true);
+  const stopBody = await stopRes.json() as {
+    stopped?: boolean;
+    run?: { status?: string };
+  };
+  assertEquals(stopBody.stopped, true);
+  assertEquals(stopBody.run?.status, "canceled");
+
+  await new Promise((resolve) => setTimeout(resolve, 40));
+  assertEquals(abortCount, 1);
+
+  await server.shutdown();
+  await server.finished;
+});
diff --git a/src/server_test_utils.ts b/src/server_test_utils.ts
new file mode 100644
index 000000000..86a6a2494
--- /dev/null
+++ b/src/server_test_utils.ts
@@ -0,0 +1,53 @@
+import * as path from "@std/path";
+
+export function modImportPath() {
+  const here = path.dirname(path.fromFileUrl(import.meta.url));
+  const modPath = path.resolve(here, "..", "mod.ts");
+  return path.toFileUrl(modPath).href;
+}
+
+export async function runSimulator(
+  port: number,
+  payload: Record<string, unknown>,
+): Promise<{ runId?: string; workspaceId?: string }> {
+  const res = await fetch(`http://127.0.0.1:${port}/api/simulator/run`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify(payload),
+  });
+  const body = await res.json().catch(() => ({}));
+  if (!res.ok) {
+    throw new Error(
+      typeof body?.error === "string" ? body.error : res.statusText,
+    );
+  }
+  return body as { runId?: string; workspaceId?: string };
+}
+
+export async function readDurableStreamEvents(
+  port: number,
+  streamId: string,
+  offset = 0,
+) {
+  const res = await fetch(
+    `http://127.0.0.1:${port}/api/durable-streams/stream/${streamId}?offset=${offset}`,
+  );
+  if (!res.ok) {
+    throw new Error(res.statusText);
+  }
+  const body = await res.json() as {
+    events?: Array<{ offset?: number; data?: unknown }>;
+  };
+  return body.events ?? [];
+}
+
+export async function readStreamEvents(port: number, offset = 0) {
+  return await readDurableStreamEvents(port, "gambit-simulator", offset);
+}
+
+export async function readJsonLines(filePath: string): Promise<Array<unknown>> {
+  const text = await Deno.readTextFile(filePath);
+  return text.split("\n").filter((line) => line.trim().length > 0).map((line) =>
+    JSON.parse(line)
+  );
+}
diff --git a/src/server_types.ts b/src/server_types.ts
new file mode 100644
index 000000000..41cda8fe2
--- /dev/null
+++ b/src/server_types.ts
@@ -0,0 +1,114 @@
+import type { SavedState, TraceEvent } from "@bolt-foundry/gambit-core";
+
+export type AvailableTestDeck = {
+  id: string;
+  label: string;
+  description?: string;
+  path: string;
+};
+
+export type AvailableGraderDeck = {
+  id: string;
+  label: string;
+  description?: string;
+  path: string;
+};
+
+export type NormalizedSchema = {
+  kind:
+    | "string"
+    | "number"
+    | "boolean"
+    | "enum"
+    | "object"
+    | "array"
+    | "unknown";
+  optional: boolean;
+  description?: string;
+  example?: unknown;
+  defaultValue?: unknown;
+  enumValues?: Array<unknown>;
+  fields?: Record<string, NormalizedSchema>;
+  items?: NormalizedSchema;
+};
+
+export type DeckToolDescription = {
+  name: string;
+  label?: string;
+  description?: string;
+  path?: string;
+};
+
+export type SchemaDescription = {
+  schema?: NormalizedSchema;
+  defaults?: unknown;
+  error?: string;
+  tools?: Array<DeckToolDescription>;
+};
+
+export type GradingRunRecord = {
+  id: string;
+  workspaceId?: string;
+  gradingRunId?: string;
+  graderId: string;
+  graderPath: string;
+  graderLabel?: string;
+  status: "running" | "completed" | "error";
+  runAt?: string;
+  referenceSample?: {
+    score: number;
+    reason: string;
+    evidence?: Array<string>;
+    artifactRevisionId?: string;
+    workspaceId?: string;
+    gradingRunId?: string;
+  };
+  input?: unknown;
+  result?: unknown;
+  error?: string;
+};
+
+export type GradingFlag = {
+  id: string;
+  refId: string;
+  runId?: string;
+  turnIndex?: number;
+  reason?: string;
+  createdAt: string;
+};
+
+export type SessionMeta = {
+  id: string;
+  deck?: string;
+  deckSlug?: string;
+  testBotName?: string;
+  createdAt?: string;
+  gradingRuns?: Array<GradingRunRecord>;
+  sessionDir?: string;
+  statePath?: string;
+};
+
+export type OutgoingMessage =
+  | {
+    type: "ready";
+    deck: string;
+    port: number;
+    schema?: NormalizedSchema;
+    defaults?: unknown;
+    schemaError?: string;
+  }
+  | { type: "pong" }
+  | { type: "stream"; chunk: string; runId?: string }
+  | { type: "result"; result: unknown; runId?: string; streamed: boolean }
+  | { type: "trace"; event: TraceEvent }
+  | {
+    type: "state";
+    state: SavedState;
+    newMessages?: Array<{
+      index: number;
+      role: string;
+      messageRefId?: string;
+      content?: unknown;
+    }>;
+  }
+  | { type: "error"; message: string; runId?: string };
diff --git a/src/server_ui_routes.ts b/src/server_ui_routes.ts
new file mode 100644
index 000000000..55b8b64cc
--- /dev/null
+++ b/src/server_ui_routes.ts
@@ -0,0 +1,235 @@
+type HandleUiRoutesDeps = {
+  url: URL;
+  req: Request;
+  workspaceRouteBase: string;
+  activeWorkspaceId: string | null;
+  activeWorkspaceOnboarding: boolean;
+  resolvedDeckPath: string;
+  deckLabel?: string;
+  getWorkspaceIdFromQuery: (url: URL) => string | undefined;
+  activateWorkspaceDeck: (workspaceId?: string) => Promise<void>;
+  schemaPromise: Promise<unknown>;
+  deckLoadPromise: Promise<unknown>;
+  canServeReactBundle: () => Promise<boolean>;
+  simulatorReactHtml: (
+    deckPath: string,
+    deckLabel?: string,
+    opts?: { workspaceId?: string | null; onboarding?: boolean },
+  ) => string;
+  toDeckLabel: (deckPath: string) => string;
+  readReactBundle: () => Promise<Uint8Array | null>;
+  shouldAdvertiseSourceMap: () => boolean;
+  readReactBundleSourceMap: () => Promise<Uint8Array | null>;
+  listSessions: () => unknown;
+  createWorkspaceSession: () => Promise<{
+    id: string;
+    rootDeckPath: string;
+    rootDir: string;
+    createdAt: string;
+  }>;
+  workspaceStateSchemaVersion: string;
+};
+
+export const handleUiRoutes = async (
+  deps: HandleUiRoutesDeps,
+): Promise<Response | null> => {
+  const {
+    url,
+    req,
+    workspaceRouteBase,
+    activeWorkspaceId,
+    activeWorkspaceOnboarding,
+    resolvedDeckPath,
+    deckLabel,
+    getWorkspaceIdFromQuery,
+    activateWorkspaceDeck,
+    schemaPromise,
+    deckLoadPromise,
+    canServeReactBundle,
+    simulatorReactHtml,
+    toDeckLabel,
+    readReactBundle,
+    shouldAdvertiseSourceMap,
+    readReactBundleSourceMap,
+    listSessions,
+    createWorkspaceSession,
+    workspaceStateSchemaVersion,
+  } = deps;
+
+  if (
+    url.pathname === "/" ||
+    url.pathname === workspaceRouteBase ||
+    url.pathname.startsWith(`${workspaceRouteBase}/`) ||
+    url.pathname.startsWith("/simulate") ||
+    url.pathname.startsWith("/debug") ||
+    url.pathname.startsWith("/build") ||
+    url.pathname.startsWith("/editor") ||
+    url.pathname.startsWith("/docs")
+  ) {
+    const hasBundle = await canServeReactBundle();
+    if (!hasBundle) {
+      return new Response(
+        "Simulator UI bundle missing. Run `deno task bundle:sim` (or start with `--bundle`).",
+        { status: 500 },
+      );
+    }
+    await deckLoadPromise.catch(() => null);
+    const resolvedLabel = deckLabel ?? toDeckLabel(resolvedDeckPath);
+    return new Response(
+      simulatorReactHtml(resolvedDeckPath, resolvedLabel, {
+        workspaceId: activeWorkspaceId ?? null,
+        onboarding: activeWorkspaceOnboarding,
+      }),
+      {
+        headers: { "content-type": "text/html; charset=utf-8" },
+      },
+    );
+  }
+
+  if (url.pathname === "/schema") {
+    const sessionId = getWorkspaceIdFromQuery(url);
+    if (sessionId) {
+      await activateWorkspaceDeck(sessionId);
+    }
+    const descRaw = await schemaPromise;
+    const desc = descRaw && typeof descRaw === "object"
+      ? descRaw as Record<string, unknown>
+      : {};
+    const deck = await deckLoadPromise.catch(() => null) as {
+      startMode?: unknown;
+      modelParams?: Record<string, unknown>;
+    } | null;
+    const modelParams = deck && typeof deck === "object"
+      ? deck.modelParams
+      : undefined;
+    const startMode = deck &&
+        (deck.startMode === "assistant" || deck.startMode === "user")
+      ? deck.startMode
+      : "assistant";
+    return new Response(
+      JSON.stringify({
+        deck: resolvedDeckPath,
+        startMode,
+        modelParams,
+        ...desc,
+      }),
+      {
+        headers: { "content-type": "application/json; charset=utf-8" },
+      },
+    );
+  }
+
+  if (url.pathname === "/api/deck-source") {
+    if (req.method !== "GET") {
+      return new Response("Method not allowed", { status: 405 });
+    }
+    try {
+      const content = await Deno.readTextFile(resolvedDeckPath);
+      return new Response(
+        JSON.stringify({
+          path: resolvedDeckPath,
+          content,
+        }),
+        { headers: { "content-type": "application/json; charset=utf-8" } },
+      );
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      return new Response(
+        JSON.stringify({
+          path: resolvedDeckPath,
+          error: message,
+        }),
+        {
+          status: 500,
+          headers: { "content-type": "application/json; charset=utf-8" },
+        },
+      );
+    }
+  }
+
+  if (url.pathname === "/ui/bundle.js") {
+    const data = await readReactBundle();
+    if (!data) {
+      return new Response(
+        "Bundle missing. Run `deno task bundle:sim` (or start with `--bundle`).",
+        { status: 404 },
+      );
+    }
+    try {
+      const headers = new Headers({
+        "content-type": "application/javascript; charset=utf-8",
+      });
+      if (shouldAdvertiseSourceMap()) {
+        headers.set("SourceMap", "/ui/bundle.js.map");
+      }
+      return new Response(data as unknown as BodyInit, { headers });
+    } catch (err) {
+      return new Response(
+        `Failed to read bundle: ${
+          err instanceof Error ? err.message : String(err)
+        }`,
+        { status: 500 },
+      );
+    }
+  }
+
+  if (url.pathname === "/ui/bundle.js.map") {
+    const data = await readReactBundleSourceMap();
+    if (!data) {
+      return new Response(
+        "Source map missing. Run `deno task bundle:sim:sourcemap` (or start with `--bundle --sourcemap`).",
+        { status: 404 },
+      );
+    }
+    try {
+      return new Response(data as unknown as BodyInit, {
+        headers: {
+          "content-type": "application/json; charset=utf-8",
+        },
+      });
+    } catch (err) {
+      return new Response(
+        `Failed to read source map: ${
+          err instanceof Error ? err.message : String(err)
+        }`,
+        { status: 500 },
+      );
+    }
+  }
+
+  if (url.pathname === workspaceRouteBase) {
+    const sessions = listSessions();
+    return new Response(JSON.stringify({ sessions }), {
+      headers: { "content-type": "application/json; charset=utf-8" },
+    });
+  }
+
+  if (url.pathname === "/api/workspace/new") {
+    if (req.method !== "POST") {
+      return new Response("Method not allowed", { status: 405 });
+    }
+    try {
+      const workspace = await createWorkspaceSession();
+      await activateWorkspaceDeck(workspace.id);
+      return new Response(
+        JSON.stringify({
+          workspaceId: workspace.id,
+          deckPath: workspace.rootDeckPath,
+          workspaceDir: workspace.rootDir,
+          createdAt: workspace.createdAt,
+          workspaceSchemaVersion: workspaceStateSchemaVersion,
+        }),
+        { headers: { "content-type": "application/json" } },
+      );
+    } catch (err) {
+      return new Response(
+        JSON.stringify({
+          error: err instanceof Error ? err.message : String(err),
+        }),
+        { status: 500, headers: { "content-type": "application/json" } },
+      );
+    }
+  }
+
+  return null;
+};
diff --git a/src/session_artifacts.ts b/src/session_artifacts.ts
new file mode 100644
index 000000000..e7df9c413
--- /dev/null
+++ b/src/session_artifacts.ts
@@ -0,0 +1,300 @@
+import * as path from "@std/path";
+import { existsSync } from "@std/fs";
+import type { SavedState, TraceEvent } from "@bolt-foundry/gambit-core";
+
+export type SessionArtifactsConfig = {
+  rootDir: string;
+  sessionId?: string;
+  continueSession?: boolean;
+};
+
+export type SessionArtifactsRun = {
+  state?: SavedState;
+  trace: (event: TraceEvent) => void;
+  onStateUpdate: (state: SavedState) => void;
+  finalize: () => void;
+  sessionId: string;
+  sessionDir: string;
+};
+
+type SessionArtifactEnvelope = {
+  offset: number;
+  createdAt: string;
+  type: "trace";
+  data: TraceEvent;
+};
+
+type PreparedSessionArtifacts = {
+  config: Required<SessionArtifactsConfig>;
+  sessionDir: string;
+  statePath: string;
+  eventsPath: string;
+  lockPath: string;
+};
+
+function randomId(prefix: string): string {
+  const suffix = crypto.randomUUID().replace(/-/g, "").slice(0, 24);
+  return `${prefix}-${suffix}`;
+}
+
+function parseFiniteInteger(value: unknown): number | undefined {
+  if (typeof value !== "number" || !Number.isFinite(value)) return undefined;
+  if (!Number.isInteger(value)) return undefined;
+  return value;
+}
+
+function normalizeConfig(
+  config: SessionArtifactsConfig,
+): Required<SessionArtifactsConfig> {
+  const rootDir = path.resolve(config.rootDir);
+  const sessionId = config.sessionId?.trim() || randomId("session");
+  const continueSession = Boolean(config.continueSession);
+  if (!rootDir) {
+    throw new Error("sessionArtifacts.rootDir is required.");
+  }
+  if (config.continueSession && !config.sessionId?.trim()) {
+    throw new Error(
+      "sessionArtifacts.sessionId is required when continueSession is true.",
+    );
+  }
+  return { rootDir, sessionId, continueSession };
+}
+
+function prepare(
+  configInput: SessionArtifactsConfig,
+): PreparedSessionArtifacts {
+  const config = normalizeConfig(configInput);
+  const sessionDir = path.join(config.rootDir, config.sessionId);
+  const statePath = path.join(sessionDir, "state.json");
+  const eventsPath = path.join(sessionDir, "events.jsonl");
+  const lockPath = path.join(sessionDir, ".lock");
+  return { config, sessionDir, statePath, eventsPath, lockPath };
+}
+
+function ensureMonotonicOffsets(
+  records: Array<SessionArtifactEnvelope>,
+  eventsPath: string,
+): number {
+  let expected = 0;
+  let highest = -1;
+  for (const record of records) {
+    if (record.offset !== expected) {
+      throw new Error(
+        `Non-monotonic offset in ${eventsPath}: expected ${expected}, got ${record.offset}`,
+      );
+    }
+    highest = record.offset;
+    expected += 1;
+  }
+  return highest;
+}
+
+function readEvents(
+  eventsPath: string,
+): { records: Array<SessionArtifactEnvelope>; highestOffset: number } {
+  if (!existsSync(eventsPath)) {
+    return { records: [], highestOffset: -1 };
+  }
+  const text = Deno.readTextFileSync(eventsPath);
+  const records: Array<SessionArtifactEnvelope> = [];
+  for (const line of text.split("\n")) {
+    if (!line.trim()) continue;
+    const parsed = JSON.parse(line) as Record<string, unknown>;
+    const offset = parseFiniteInteger(parsed.offset);
+    const createdAt = typeof parsed.createdAt === "string"
+      ? parsed.createdAt
+      : null;
+    const type = parsed.type;
+    const data = parsed.data;
+    if (
+      offset === undefined ||
+      createdAt === null ||
+      type !== "trace" ||
+      !data ||
+      typeof data !== "object"
+    ) {
+      throw new Error(`Invalid event envelope in ${eventsPath}`);
+    }
+    records.push({
+      offset,
+      createdAt,
+      type: "trace",
+      data: data as TraceEvent,
+    });
+  }
+  const highestOffset = records.length
+    ? ensureMonotonicOffsets(records, eventsPath)
+    : -1;
+  return { records, highestOffset };
+}
+
+function writeJsonAtomic(filePath: string, payload: unknown) {
+  const dir = path.dirname(filePath);
+  Deno.mkdirSync(dir, { recursive: true });
+  const tmpPath = path.join(
+    dir,
+    `.tmp-${path.basename(filePath)}-${randomId("tmp")}`,
+  );
+  Deno.writeTextFileSync(tmpPath, JSON.stringify(payload, null, 2));
+  Deno.renameSync(tmpPath, filePath);
+}
+
+function appendJsonl(filePath: string, payload: unknown) {
+  Deno.mkdirSync(path.dirname(filePath), { recursive: true });
+  const line = JSON.stringify(payload);
+  Deno.writeTextFileSync(filePath, `${line}\n`, { append: true });
+}
+
+function archiveOrphanedEvents(eventsPath: string): string {
+  const dir = path.dirname(eventsPath);
+  const stamp = new Date().toISOString().replaceAll(":", "-");
+  const archivedPath = path.join(dir, `events.orphaned.${stamp}.jsonl`);
+  Deno.renameSync(eventsPath, archivedPath);
+  return archivedPath;
+}
+
+function writeStateSnapshot(args: {
+  statePath: string;
+  state: SavedState;
+  offset: number;
+  sessionId: string;
+  sessionDir: string;
+  eventsPath: string;
+}) {
+  const nextMeta: Record<string, unknown> = {
+    ...(args.state.meta ?? {}),
+    sessionId: args.sessionId,
+    sessionDir: args.sessionDir,
+    sessionStatePath: args.statePath,
+    sessionEventsPath: args.eventsPath,
+    lastAppliedOffset: args.offset,
+    lastAppliedEventSeq: args.offset,
+  };
+  const snapshot: SavedState = {
+    ...args.state,
+    meta: nextMeta,
+  };
+  writeJsonAtomic(args.statePath, snapshot);
+}
+
+export function withSessionArtifacts(args: {
+  config: SessionArtifactsConfig;
+  trace?: (event: TraceEvent) => void;
+  onStateUpdate?: (state: SavedState) => void;
+  state?: SavedState;
+}): SessionArtifactsRun {
+  const prepared = prepare(args.config);
+  const {
+    config,
+    sessionDir,
+    statePath,
+    eventsPath,
+    lockPath,
+  } = prepared;
+
+  Deno.mkdirSync(sessionDir, { recursive: true });
+  if (
+    !config.continueSession && (existsSync(statePath) || existsSync(eventsPath))
+  ) {
+    throw new Error(
+      `Session artifact directory already exists at ${sessionDir}. Pass continueSession: true with the same sessionId to continue.`,
+    );
+  }
+  try {
+    Deno.writeTextFileSync(
+      lockPath,
+      JSON.stringify({
+        pid: Deno.pid,
+        acquiredAt: new Date().toISOString(),
+      }),
+      { createNew: true },
+    );
+  } catch (err) {
+    if (err instanceof Deno.errors.AlreadyExists) {
+      throw new Error(
+        `Session artifact directory is already active: ${sessionDir}`,
+      );
+    }
+    throw err;
+  }
+
+  let latestState = args.state;
+  let highestOffset = -1;
+  let lastStateOffset = -1;
+  try {
+    const loaded = readEvents(eventsPath);
+    highestOffset = loaded.highestOffset;
+    if (!latestState && config.continueSession && existsSync(statePath)) {
+      const parsed = JSON.parse(Deno.readTextFileSync(statePath)) as SavedState;
+      latestState = parsed;
+    }
+    if (!latestState && config.continueSession && highestOffset >= 0) {
+      // Recovery path: preserve prior trace-only history, then start a fresh
+      // append-only log so retries can continue without corrupting snapshot
+      // boundaries.
+      archiveOrphanedEvents(eventsPath);
+      highestOffset = -1;
+    }
+    const meta = latestState?.meta as
+      | { lastAppliedOffset?: unknown; lastAppliedEventSeq?: unknown }
+      | undefined;
+    lastStateOffset = parseFiniteInteger(meta?.lastAppliedOffset) ??
+      parseFiniteInteger(meta?.lastAppliedEventSeq) ??
+      -1;
+  } catch (err) {
+    try {
+      Deno.removeSync(lockPath);
+    } catch {
+      // no-op
+    }
+    throw err;
+  }
+
+  const persistLatest = () => {
+    if (!latestState) return;
+    writeStateSnapshot({
+      statePath,
+      state: latestState,
+      offset: lastStateOffset,
+      sessionId: config.sessionId,
+      sessionDir,
+      eventsPath,
+    });
+  };
+
+  const trace = (event: TraceEvent) => {
+    highestOffset += 1;
+    const envelope: SessionArtifactEnvelope = {
+      offset: highestOffset,
+      createdAt: new Date().toISOString(),
+      type: "trace",
+      data: event,
+    };
+    appendJsonl(eventsPath, envelope);
+    args.trace?.(event);
+  };
+
+  const onStateUpdate = (state: SavedState) => {
+    latestState = state;
+    lastStateOffset = highestOffset;
+    persistLatest();
+    args.onStateUpdate?.(state);
+  };
+
+  const finalize = () => {
+    try {
+      Deno.removeSync(lockPath);
+    } catch {
+      // no-op
+    }
+  };
+
+  return {
+    state: latestState,
+    trace,
+    onStateUpdate,
+    finalize,
+    sessionId: config.sessionId,
+    sessionDir,
+  };
+}
diff --git a/src/tui.ts b/src/tui.ts
index 636e7dd2a..da2f9d142 100644
--- a/src/tui.ts
+++ b/src/tui.ts
@@ -1,5 +1,6 @@
 import { isGambitEndSignal, runDeck } from "@bolt-foundry/gambit-core";
 import type { SavedState } from "@bolt-foundry/gambit-core";
+import type { PermissionDeclarationInput } from "@bolt-foundry/gambit-core";
 import * as path from "@std/path";
 
 const encoder = new TextEncoder();
@@ -48,6 +49,11 @@ export async function startTui(opts: {
   initialMessage?: unknown;
   contextProvided?: boolean;
   responsesMode?: boolean;
+  workspacePermissions?: PermissionDeclarationInput;
+  workspacePermissionsBaseDir?: string;
+  sessionPermissions?: PermissionDeclarationInput;
+  sessionPermissionsBaseDir?: string;
+  workerSandbox?: boolean;
 }) {
   if (!Deno.stdin.isTerminal()) {
     throw new Error("tui requires an interactive TTY.");
@@ -230,6 +236,11 @@ export async function startTui(opts: {
           state = s;
         },
         responsesMode: opts.responsesMode,
+        workspacePermissions: opts.workspacePermissions,
+        workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir,
+        sessionPermissions: opts.sessionPermissions,
+        sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir,
+        workerSandbox: opts.workerSandbox,
         onStreamText: (chunk) => {
           if (!chunk) return;
           modelStreamedText = true;
diff --git a/src/workspace.ts b/src/workspace.ts
index 2bcefbf00..a318c3176 100644
--- a/src/workspace.ts
+++ b/src/workspace.ts
@@ -26,6 +26,10 @@ description = "Starter root deck for this workspace."
 [modelParams]
 model = ["ollama/hf.co/LiquidAI/LFM2-1.2B-Tool-GGUF:latest", "openrouter/openai/gpt-5.1-chat"]
 
+[modelParams.reasoning]
+effort = "low"
+summary = "detailed"
+
 [[scenarios]]
 path = "./scenarios/default/PROMPT.md"
 label = "Default scenario"
@@ -41,22 +45,17 @@ You are the default deck for a new Gambit workspace.
 
 ## Assistant Persona
 
-- You are a practical starter assistant for quickly validating the workspace
-  loop.
-- You keep responses plain and easy to replace as the bot evolves.
+- You are a minimal placeholder deck used to bootstrap a blank workspace.
+- You keep responses short and avoid introducing product narrative.
 
 ## User Persona
 
-- The user is trying to confirm the deck runs end-to-end before customizing.
-- They want predictable behavior and clear output.
+- The user is in the Build tab and expects guidance on what to do next.
 
 ## Behavior
 
-- If the conversation does not yet contain a user utterance, reply exactly
-  "Welcome to Gambit! What should we build?"
-- Otherwise, reply exactly "Echo: {input}" where {input} is the most recent
-  user message trimmed of surrounding whitespace.
-- Do not add any other narration or formatting.
+- If asked what to do, reply exactly: "Use the Build tab to draft your deck."
+- Keep all other responses brief, plain text, and focused on build guidance.
 `;
 
 const ROOT_INTENT = `# Workspace Intent
diff --git a/src/workspace_contract.test.ts b/src/workspace_contract.test.ts
new file mode 100644
index 000000000..acc2a5e32
--- /dev/null
+++ b/src/workspace_contract.test.ts
@@ -0,0 +1,42 @@
+import { assertEquals } from "@std/assert";
+import {
+  buildWorkspacePath,
+  parseWorkspaceRoute,
+} from "./workspace_contract.ts";
+
+Deno.test("buildWorkspacePath supports run-addressed test and grade routes", () => {
+  assertEquals(
+    buildWorkspacePath("test", "ws_1", { runId: "run_1" }),
+    "/workspaces/ws_1/test/run_1",
+  );
+  assertEquals(
+    buildWorkspacePath("grade", "ws_1", { runId: "grade_1" }),
+    "/workspaces/ws_1/grade/grade_1",
+  );
+  assertEquals(
+    buildWorkspacePath("debug", "ws_1", { runId: "ignored" }),
+    "/workspaces/ws_1/debug",
+  );
+});
+
+Deno.test("parseWorkspaceRoute parses run-addressed test and grade routes", () => {
+  assertEquals(parseWorkspaceRoute("/workspaces/ws_1/test/run_1"), {
+    workspaceId: "ws_1",
+    tab: "test",
+    isNew: false,
+    testRunId: "run_1",
+    gradeRunId: undefined,
+  });
+  assertEquals(parseWorkspaceRoute("/workspaces/ws_1/grade/grade_1"), {
+    workspaceId: "ws_1",
+    tab: "grade",
+    isNew: false,
+    testRunId: undefined,
+    gradeRunId: "grade_1",
+  });
+});
+
+Deno.test("parseWorkspaceRoute rejects unsupported route combinations", () => {
+  assertEquals(parseWorkspaceRoute("/workspaces/ws_1/debug/run_1"), null);
+  assertEquals(parseWorkspaceRoute("/workspaces/new/test/run_1"), null);
+});
diff --git a/src/workspace_contract.ts b/src/workspace_contract.ts
new file mode 100644
index 000000000..6f2cad477
--- /dev/null
+++ b/src/workspace_contract.ts
@@ -0,0 +1,159 @@
+export const WORKSPACE_STATE_SCHEMA_VERSION = "workspace-state.v1";
+
+export const WORKSPACE_ROUTE_BASE = "/workspaces";
+
+export type WorkspaceRouteTab = "debug" | "build" | "test" | "grade";
+
+export const WORKSPACE_ROUTE_TABS: Array<WorkspaceRouteTab> = [
+  "debug",
+  "build",
+  "test",
+  "grade",
+];
+
+export type WorkspaceRoute = {
+  workspaceId: string | null;
+  tab: WorkspaceRouteTab;
+  isNew: boolean;
+  testRunId?: string;
+  gradeRunId?: string;
+};
+
+export type WorkspaceStreamEvent =
+  | {
+    type: "build.stream";
+    workspaceId: string;
+    role: "assistant" | "user";
+    chunk: string;
+    turn?: number;
+    ts?: number;
+  }
+  | {
+    type: "build.stream.end";
+    workspaceId: string;
+    role: "assistant" | "user";
+    turn?: number;
+    ts?: number;
+  }
+  | {
+    type: "test.stream";
+    workspaceId: string;
+    role: "assistant" | "user";
+    chunk: string;
+    turn?: number;
+    ts?: number;
+  }
+  | {
+    type: "test.stream.end";
+    workspaceId: string;
+    role: "assistant" | "user";
+    turn?: number;
+    ts?: number;
+  }
+  | {
+    type: "grade.session";
+    workspaceId: string;
+    runId?: string;
+    ts?: number;
+  };
+
+export type WorkspaceReducerEvent =
+  | { type: "workspace.loaded"; workspaceId: string; state: unknown }
+  | { type: "build.status"; workspaceId: string; run: unknown }
+  | { type: "test.status"; workspaceId: string; run: unknown }
+  | { type: "grade.status"; workspaceId: string; run: unknown }
+  | WorkspaceStreamEvent;
+
+export type WorkspaceCreateResponse = {
+  workspaceId: string;
+  deckPath: string;
+  workspaceDir: string;
+  createdAt: string;
+  workspaceSchemaVersion: string;
+};
+
+export const buildWorkspacePath = (
+  tab: WorkspaceRouteTab,
+  workspaceId?: string | null,
+  opts?: { runId?: string },
+): string => {
+  const encoded = workspaceId ? encodeURIComponent(workspaceId) : "new";
+  const base = `${WORKSPACE_ROUTE_BASE}/${encoded}/${tab}`;
+  const runId = opts?.runId;
+  if (!runId) return base;
+  if (tab !== "test" && tab !== "grade") return base;
+  return `${base}/${encodeURIComponent(runId)}`;
+};
+
+export const parseWorkspaceRoute = (
+  pathname: string,
+): WorkspaceRoute | null => {
+  const match = pathname.match(
+    /^\/workspaces\/([^/]+)\/(debug|build|test|grade)(?:\/([^/]+))?$/,
+  );
+  if (!match) return null;
+  const rawId = decodeURIComponent(match[1]);
+  const tab = match[2] as WorkspaceRouteTab;
+  const runSegment = typeof match[3] === "string"
+    ? decodeURIComponent(match[3])
+    : undefined;
+  if (runSegment && tab !== "test" && tab !== "grade") return null;
+  if (rawId === "new" && runSegment) return null;
+  if (rawId === "new") {
+    return { workspaceId: null, tab, isNew: true };
+  }
+  return {
+    workspaceId: rawId,
+    tab,
+    isNew: false,
+    testRunId: tab === "test" ? runSegment : undefined,
+    gradeRunId: tab === "grade" ? runSegment : undefined,
+  };
+};
+
+export const WORKSPACE_ID_ALIASES = ["workspaceId"] as const;
+type WorkspaceIdAlias = typeof WORKSPACE_ID_ALIASES[number];
+
+const readString = (
+  record: Record<string, unknown>,
+  key: WorkspaceIdAlias,
+): string | undefined => {
+  const value = record[key];
+  return typeof value === "string" && value.trim().length > 0
+    ? value
+    : undefined;
+};
+
+export const resolveWorkspaceIdFromRecord = (
+  record: Record<string, unknown>,
+): string | undefined => {
+  for (const key of WORKSPACE_ID_ALIASES) {
+    const value = readString(record, key);
+    if (value) return value;
+  }
+  return undefined;
+};
+
+export const resolveWorkspaceIdFromSearchParams = (
+  params: URLSearchParams,
+): string | undefined => {
+  for (const key of WORKSPACE_ID_ALIASES) {
+    const value = params.get(key);
+    if (typeof value === "string" && value.trim().length > 0) {
+      return value;
+    }
+  }
+  return undefined;
+};
+
+export const workspaceSchemaError = (
+  workspaceId: string,
+  foundVersion: string | null,
+): string => {
+  const shown = foundVersion ? `"${foundVersion}"` : "missing";
+  return [
+    `Unsupported workspace state schema for "${workspaceId}" (found ${shown}).`,
+    `Expected "${WORKSPACE_STATE_SCHEMA_VERSION}".`,
+    "Recreate this workspace with /api/workspace/new.",
+  ].join(" ");
+};