diff --git a/CHANGELOG.md b/CHANGELOG.md index 6300b5151..5b4678725 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,14 @@ since = "849762245925cce325c04da1d604088370ec3723" ## Unreleased (v0.8.4) -- TBD +- feat(gambit): add `createDefaultedRuntime` and defaulted `runDeck` wrapper + with CLI-equivalent provider/model routing for library callers +- refactor(gambit): route CLI runtime/provider setup through shared + `default_runtime` construction path +- feat(demo-runner): migrate demo test-deck prompt generation to Gambit default + runtime wrapper (no hardwired OpenRouter provider) +- docs(gambit): add migration guidance for `runDeck` wrapper and `runDeckCore` + replacement mapping ## v0.8.3 diff --git a/README.md b/README.md index c17a015a7..97a64b201 100644 --- a/README.md +++ b/README.md @@ -100,10 +100,10 @@ Drop into a REPL (streams by default): npx @bolt-foundry/gambit repl ``` -Run a persona against a root deck (test bot): +Run a persona against a root deck (scenario): ``` -npx @bolt-foundry/gambit test-bot --test-deck +npx @bolt-foundry/gambit scenario --test-deck ``` Grade a saved session: @@ -124,6 +124,23 @@ Tracing and state:  `--verbose` to print events\ `--state ` to persist a session. +### Worker sandbox defaults + +- Deck-executing CLI surfaces default to worker sandbox execution. +- Use `--no-worker-sandbox` (or `--legacy-exec`) to force legacy in-process + execution. +- `--worker-sandbox` explicitly forces worker execution on. +- `--sandbox` / `--no-sandbox` are deprecated aliases. +- `gambit.toml` equivalent: + ```toml + [execution] + worker_sandbox = false # same as --no-worker-sandbox + # legacy_exec = true # equivalent rollback toggle + ``` + +The npm launcher (`npx @bolt-foundry/gambit ...`) runs the Gambit CLI binary for +your platform, so these defaults and flags apply there as well. + ## Using the Simulator The simulator is the local Debug UI that streams runs and renders traces. @@ -173,6 +190,59 @@ Define `contextSchema`/`responseSchema` with Zod to validate IO, and implement\ `ctx.spawnAndWait({ path, input })`. Emit structured trace events with\ `ctx.log(...)`. +### Runtime defaults for programmatic `runDeck` + +`runDeck` from `@bolt-foundry/gambit` now uses CLI-equivalent provider/model +defaults (alias expansion, provider routing, fallback behavior). + +Before (direct-provider setup in each caller): + +```ts +import { createOpenRouterProvider, runDeck } from "jsr:@bolt-foundry/gambit"; + +const provider = createOpenRouterProvider({ + apiKey: Deno.env.get("OPENROUTER_API_KEY")!, +}); +await runDeck({ + path: "./root.deck.md", + input: { message: "hi" }, + modelProvider: provider, +}); +``` + +After (defaulted wrapper): + +```ts +import { runDeck } from "jsr:@bolt-foundry/gambit"; + +await runDeck({ + path: "./root.deck.md", + input: { message: "hi" }, +}); +``` + +Per-runtime override (shared runtime object): + +```ts +import { createDefaultedRuntime, runDeck } from "jsr:@bolt-foundry/gambit"; + +const runtime = await createDefaultedRuntime({ + fallbackProvider: "codex-cli", +}); + +await runDeck({ + runtime, + path: "./root.deck.md", + input: { message: "hi" }, +}); +``` + +Replacement mapping: + +- Legacy direct core passthrough export: `runDeck` -> `runDeckCore` +- Defaulted wrapper export: `runDeck` +- Runtime builder: `createDefaultedRuntime` + --- ## Author your first deck @@ -271,8 +341,8 @@ npx @bolt-foundry/gambit serve ./examples/respond_flow/decks/root.deck.ts --port Then: 1. Open `http://localhost:8000/test`, pick the **Escalation persona**, and run - it. Leave the “Use test deck input for init” toggle on to see persona data - seed the init form automatically. + it. Leave the “Use scenario deck input for init” toggle on to see persona + data seed the init form automatically. 2. Switch to the Debug tab to inspect the session—the child deck emits a `gambit_respond` payload that now shows up as a structured assistant turn. 3. Head to the Calibrate tab and run the **Respond payload grader** to exercise diff --git a/deno.jsonc b/deno.jsonc index c33e43626..2a222e9eb 100644 --- a/deno.jsonc +++ b/deno.jsonc @@ -25,7 +25,7 @@ "bundle:sim:sourcemap": "deno run -A scripts/bundle_simulator_ui.ts --sourcemap=external", "bundle:sim:web": "deno run -A scripts/bundle_simulator_ui.ts --platform=browser", "bundle:sim:web:sourcemap": "deno run -A scripts/bundle_simulator_ui.ts --platform=browser --sourcemap=external", - "serve:bot": "mkdir -p /tmp/gambit-bot-root && GAMBIT_BOT_ROOT=/tmp/gambit-bot-root deno run -A src/cli.ts serve src/decks/gambit-bot/PROMPT.md --bundle --port 8000", + "serve:bot": "mkdir -p /tmp/gambit-bot-root && GAMBIT_SIMULATOR_BUILD_BOT_ROOT=/tmp/gambit-bot-root GAMBIT_BOT_ROOT=/tmp/gambit-bot-root deno run -A src/cli.ts serve src/decks/gambit-bot/PROMPT.md --bundle --port 8000", "serve:bot:sandbox": "deno run -A scripts/serve_bot_sandbox.ts", "build_npm": "deno run -A scripts/build_npm.ts" }, diff --git a/docs/external/concepts/runtime.md b/docs/external/concepts/runtime.md index e16251fb2..613f6e614 100644 --- a/docs/external/concepts/runtime.md +++ b/docs/external/concepts/runtime.md @@ -31,7 +31,7 @@ safe/observable. - `gambit_end`: enable with `![end](gambit://cards/end.card.md)` in Markdown (or `allowEnd: true` in TypeScript decks). Calling it returns a sentinel `{ __gambitEnd: true, payload?, status?, message?, code?, meta? }` so - CLI/test-bot loops stop reinjecting the closing assistant turn. + CLI/scenario loops stop reinjecting the closing assistant turn. ## State and turn order diff --git a/docs/external/guides/authoring.md b/docs/external/guides/authoring.md index 9f1790c11..18c6d0bab 100644 --- a/docs/external/guides/authoring.md +++ b/docs/external/guides/authoring.md @@ -12,10 +12,10 @@ verification. references (action/test/grader) and schema fragments into the parent deck. - Action decks are child decks exposed as model tools. Names must match `^[A-Za-z_][A-Za-z0-9_]*$` and avoid the `gambit_` prefix (reserved). -- Persona/test decks may accept free-form user turns. Use the `acceptsUserTurns` - flag to control this behavior: root decks default to `true`, while action - decks default to `false`. Set it explicitly to `true` for persona/bot decks or - to `false` for workflow-only decks. +- Persona/scenario decks may accept free-form user turns. Use the + `acceptsUserTurns` flag to control this behavior: root decks default to + `true`, while action decks default to `false`. Set it explicitly to `true` for + persona/bot decks or to `false` for workflow-only decks. ## Pick a format @@ -77,7 +77,7 @@ migrate a repository, run: deno run -A packages/gambit/scripts/migrate-schema-terms.ts ``` -## Action decks, test decks, grader decks +## Action decks, scenario decks, grader decks - Add action decks in front matter or TS definitions: `actionDecks = [{ name = "get_time", path = "./get_time.deck.ts" }]`. @@ -101,10 +101,10 @@ deno run -A packages/gambit/scripts/migrate-schema-terms.ts should set `acceptsUserTurns = true` and may declare its own `contextSchema` (for example `contextSchema = "../schemas/my_persona_test.zod.ts"`) so the Test tab renders a schema-driven “Scenario” form for that persona. -- For persona/test decks, you can embed +- For persona/scenario decks, you can embed `![generate-test-input](gambit://cards/generate-test-input.card.md)` to - include the Test Bot init-fill contract instructions. -- Test Bot init fill: when a Test Bot run is missing required init fields, the + include the scenario init-fill contract instructions. +- Scenario init fill: when a scenario run is missing required init fields, the selected persona deck is asked to supply only the missing values before the run begins. The persona receives a single user message containing a JSON payload like: @@ -133,8 +133,8 @@ deno run -A packages/gambit/scripts/migrate-schema-terms.ts - Markdown roots default to `true`; TypeScript decks default to `false` everywhere. Set it to `false` for any workflow deck that should never accept user turns (regardless of how it's run). - - Persona/test decks should set `acceptsUserTurns = true` so they can receive - messages even when invoked as non-root bots. + - Persona/scenario decks should set `acceptsUserTurns = true` so they can + receive messages even when invoked as non-root bots. ## Synthetic tools and handlers @@ -170,7 +170,7 @@ deno run -A packages/gambit/scripts/migrate-schema-terms.ts http://localhost:8000/debug. - Tracing: add `--verbose` for console traces or `--trace out.jsonl` to persist events; use `--state state.json` with `run` to persist conversation state - between turns. When `--state` is omitted, test-bot/serve sessions default to + between turns. When `--state` is omitted, scenario/serve sessions default to `/.gambit/sessions/...` where each session includes `state.json` (materialized snapshot) plus append-only `events.jsonl`, `feedback.jsonl`, and `grading.jsonl` for downstream ingestion. The project root is the nearest diff --git a/docs/external/reference/cli.md b/docs/external/reference/cli.md index 5223c9c16..023e75261 100644 --- a/docs/external/reference/cli.md +++ b/docs/external/reference/cli.md @@ -11,22 +11,22 @@ How to run Gambit, the agent harness framework, locally and observe runs. - Command help: `deno run -A src/cli.ts help ` (or `deno run -A src/cli.ts -h`). - Run once: - `deno run -A src/cli.ts run [--context ] [--message ] [--model ] [--model-force ] [--trace ] [--state ] [--stream] [--responses] [--verbose]` + `deno run -A src/cli.ts run [--context ] [--message ] [--model ] [--model-force ] [--trace ] [--state ] [--stream] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]` - Check models: `deno run -A src/cli.ts check ` - REPL: `deno run -A src/cli.ts repl ` (defaults to `src/decks/gambit-assistant.deck.md` in a local checkout). Streams by default and keeps state in memory for the session. -- Test bot (CLI): - `deno run -A src/cli.ts test-bot --test-deck [--context ] [--bot-input ] [--message ] [--max-turns ] [--state ] [--grade ...] [--trace ] [--responses] [--verbose]` +- Scenario (CLI): + `deno run -A src/cli.ts scenario --test-deck [--context ] [--bot-input ] [--message ] [--max-turns ] [--state ] [--grade ...] [--trace ] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]` - Grade (CLI): - `deno run -A src/cli.ts grade --state [--model ] [--model-force ] [--trace ] [--responses] [--verbose]` + `deno run -A src/cli.ts grade --state [--model ] [--model-force ] [--trace ] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]` - Export bundle (CLI): `deno run -A src/cli.ts export [] --state --out ` - Debug UI: `deno run -A src/cli.ts serve --port 8000` then open http://localhost:8000/. This serves a multi-page UI: - Debug (default): `http://localhost:8000/debug` - - Test: `http://localhost:8000/test-bot` + - Test: `http://localhost:8000/test` - Calibrate: `http://localhost:8000/calibrate` The WebSocket server streams turns, traces, and status updates. @@ -46,15 +46,24 @@ How to run Gambit, the agent harness framework, locally and observe runs. - `GAMBIT_RESPONSES_MODE=1`: env alternative to `--responses` for runtime/state. - `GAMBIT_OPENROUTER_RESPONSES=1`: route OpenRouter calls through the Responses API (experimental; chat remains the default path). +- Worker execution defaults on for deck-executing surfaces. Use + `--no-worker-sandbox` (or `--legacy-exec`) to roll back to legacy in-process + execution. `--sandbox/--no-sandbox` still work as deprecated aliases. +- `gambit.toml` config equivalent: + ```toml + [execution] + worker_sandbox = false # same as --no-worker-sandbox + # legacy_exec = true # equivalent rollback toggle + ``` ## State and tracing -- `--state ` (run/test-bot/grade/export): load/persist messages so you can +- `--state ` (run/scenario/grade/export): load/persist messages so you can continue a conversation; skips `gambit_context` on resume. `grade` writes `meta.gradingRuns` back into the session state, while `export` reads the state file to build the bundle. - `--out ` (export): bundle output path (tar.gz). -- `--grade ` (test-bot): can be repeated; graders run in the order +- `--grade ` (scenario): can be repeated; graders run in the order provided and append results to `meta.gradingRuns` in the same session state file. - `--trace ` writes JSONL trace events; `--verbose` prints trace to @@ -91,17 +100,17 @@ How to run Gambit, the agent harness framework, locally and observe runs. `window.gambitFormatTrace` hook in the page; return a string or `{role?, summary?, details?, depth?}` to override the entry that appears in the Traces & Tools pane. -- The Test page reuses the same simulator runtime but drives persona/test-bot +- The Test page reuses the same simulator runtime but drives persona/scenario decks so you can batch synthetic conversations, inspect per-turn scoring, and export JSONL artifacts for later ingestion. List personas by declaring `[[testDecks]]` entries in your root deck (for example `gambit/examples/advanced/voice_front_desk/decks/root.deck.md`). Each entry’s `path` should point to a persona deck (Markdown or TS) that includes `acceptsUserTurns = true`; the persona deck’s own `contextSchema` and defaults - power the Scenario/Test Bot form (see + power the Scenario form (see `gambit/examples/advanced/voice_front_desk/tests/new_patient_intake.deck.md`). Editing those deck files is how you add/remove personas now—there is no - `.gambit/test-bot.md` override. + `.gambit/scenario.md` override. - The Calibrate page is the regroup/diagnostics view for graders that run against saved Debug/Test sessions; it currently serves as a placeholder until the grading transport lands. diff --git a/docs/external/reference/cli/commands/bot.md b/docs/external/reference/cli/commands/bot.md index 61afa71aa..83ba442ec 100644 --- a/docs/external/reference/cli/commands/bot.md +++ b/docs/external/reference/cli/commands/bot.md @@ -1,12 +1,17 @@ +++ command = "bot" summary = "Run the Gambit bot assistant" -usage = "gambit bot [] [--bot-root ] [--model ] [--model-force ] [--responses] [--verbose]" +usage = "gambit bot [] [--bot-root ] [--model ] [--model-force ] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]" flags = [ "--bot-root Allowed folder for bot file writes (defaults to workspace.decks if set; overrides )", "--model Default model id", "--model-force Override model id", "--responses Run runtime/state in Responses mode", + "--worker-sandbox Force worker execution on", + "--no-worker-sandbox Force worker execution off", + "--legacy-exec Alias for --no-worker-sandbox", + "--sandbox Deprecated alias for --worker-sandbox", + "--no-sandbox Deprecated alias for --no-worker-sandbox", "--verbose Print trace events to console", ] +++ diff --git a/docs/external/reference/cli/commands/grade.md b/docs/external/reference/cli/commands/grade.md index 531bb12c7..6dab256d1 100644 --- a/docs/external/reference/cli/commands/grade.md +++ b/docs/external/reference/cli/commands/grade.md @@ -1,7 +1,7 @@ +++ command = "grade" summary = "Grade a saved state file" -usage = "gambit grade --state [--model ] [--model-force ] [--trace ] [--responses] [--verbose]" +usage = "gambit grade --state [--model ] [--model-force ] [--trace ] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]" flags = [ "--grader Grader deck path (overrides positional)", "--state Load/persist state", @@ -9,6 +9,11 @@ flags = [ "--model-force Override model id", "--trace Write trace events to file (JSONL)", "--responses Run runtime/state in Responses mode", + "--worker-sandbox Force worker execution on", + "--no-worker-sandbox Force worker execution off", + "--legacy-exec Alias for --no-worker-sandbox", + "--sandbox Deprecated alias for --worker-sandbox", + "--no-sandbox Deprecated alias for --no-worker-sandbox", "--verbose Print trace events to console", ] +++ diff --git a/docs/external/reference/cli/commands/repl.md b/docs/external/reference/cli/commands/repl.md index 37426f039..fd557d713 100644 --- a/docs/external/reference/cli/commands/repl.md +++ b/docs/external/reference/cli/commands/repl.md @@ -1,7 +1,7 @@ +++ command = "repl" summary = "Start an interactive REPL" -usage = "gambit repl [--context ] [--message ] [--model ] [--model-force ] [--responses] [--verbose]" +usage = "gambit repl [--context ] [--message ] [--model ] [--model-force ] [--responses] [--verbose] [-A|--allow-all|--allow-] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]" flags = [ "--context Context payload (seeds gambit_context; legacy --init still works)", "--message Initial user message (sent before assistant speaks)", @@ -9,6 +9,17 @@ flags = [ "--model-force Override model id", "--responses Run runtime/state in Responses mode", "--verbose Print trace events to console", + "-A, --allow-all Allow all session permissions (read/write/run/net/env)", + "--allow-read[=] Session read override (all when value omitted)", + "--allow-write[=] Session write override (all when value omitted)", + "--allow-run[=] Session run override (all when value omitted)", + "--allow-net[=] Session net override (all when value omitted)", + "--allow-env[=] Session env override (all when value omitted)", + "--worker-sandbox Force worker execution on", + "--no-worker-sandbox Force worker execution off", + "--legacy-exec Alias for --no-worker-sandbox", + "--sandbox Deprecated alias for --worker-sandbox", + "--no-sandbox Deprecated alias for --no-worker-sandbox", ] +++ diff --git a/docs/external/reference/cli/commands/run.md b/docs/external/reference/cli/commands/run.md index 1ce8e84e9..e148ed428 100644 --- a/docs/external/reference/cli/commands/run.md +++ b/docs/external/reference/cli/commands/run.md @@ -1,7 +1,7 @@ +++ command = "run" summary = "Run a deck once" -usage = "gambit run [] [--context ] [--message ] [--model ] [--model-force ] [--trace ] [--state ] [--stream] [--responses] [--verbose]" +usage = "gambit run [] [--context ] [--message ] [--model ] [--model-force ] [--trace ] [--state ] [--stream] [--responses] [--verbose] [-A|--allow-all|--allow-] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]" flags = [ "--context Context payload (seeds gambit_context; legacy --init still works)", "--message Initial user message (sent before assistant speaks)", @@ -12,6 +12,17 @@ flags = [ "--stream Enable streaming responses", "--responses Run runtime/state in Responses mode", "--verbose Print trace events to console", + "-A, --allow-all Allow all session permissions (read/write/run/net/env)", + "--allow-read[=] Session read override (all when value omitted)", + "--allow-write[=] Session write override (all when value omitted)", + "--allow-run[=] Session run override (all when value omitted)", + "--allow-net[=] Session net override (all when value omitted)", + "--allow-env[=] Session env override (all when value omitted)", + "--worker-sandbox Force worker execution on", + "--no-worker-sandbox Force worker execution off", + "--legacy-exec Alias for --no-worker-sandbox", + "--sandbox Deprecated alias for --worker-sandbox", + "--no-sandbox Deprecated alias for --no-worker-sandbox", ] +++ diff --git a/docs/external/reference/cli/commands/test-bot.md b/docs/external/reference/cli/commands/scenario.md similarity index 55% rename from docs/external/reference/cli/commands/test-bot.md rename to docs/external/reference/cli/commands/scenario.md index 7b54dd238..1e8a224ba 100644 --- a/docs/external/reference/cli/commands/test-bot.md +++ b/docs/external/reference/cli/commands/scenario.md @@ -1,25 +1,30 @@ +++ -command = "test-bot" -summary = "Run a persona/test-bot loop" -usage = "gambit test-bot --test-deck [--context ] [--bot-input ] [--message ] [--max-turns ] [--state ] [--grade ...] [--trace ] [--responses] [--verbose]" +command = "scenario" +summary = "Run a scenario loop with a persona deck" +usage = "gambit scenario --test-deck [--context ] [--bot-input ] [--message ] [--max-turns ] [--state ] [--grade ...] [--trace ] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]" flags = [ - "--test-deck Persona/test deck path", + "--test-deck Persona/scenario deck path", "--grade Grader deck path (repeatable)", "--context Context payload (seeds gambit_context; legacy --init still works)", - "--bot-input Input payload for the persona deck", + "--bot-input Input payload for the persona/scenario deck", "--message Initial user message (sent before assistant speaks)", - "--max-turns Max turns for test-bot (default: 12)", + "--max-turns Max turns for scenario loop (default: 12)", "--state Load/persist state", "--model Default model id", "--model-force Override model id", "--trace Write trace events to file (JSONL)", "--responses Run runtime/state in Responses mode", + "--worker-sandbox Force worker execution on", + "--no-worker-sandbox Force worker execution off", + "--legacy-exec Alias for --no-worker-sandbox", + "--sandbox Deprecated alias for --worker-sandbox", + "--no-sandbox Deprecated alias for --no-worker-sandbox", "--verbose Print trace events to console", ] +++ -Runs a persona deck against a root deck to simulate conversations. Repeat -`--grade` to apply multiple graders. +Runs a persona deck against a root deck to execute a scenario. Repeat `--grade` +to apply multiple graders. If the root deck has required init fields that are missing, the persona deck is asked to return JSON for only the missing fields before the run starts. The diff --git a/docs/external/reference/cli/commands/serve.md b/docs/external/reference/cli/commands/serve.md index ce605d6d7..1d3165106 100644 --- a/docs/external/reference/cli/commands/serve.md +++ b/docs/external/reference/cli/commands/serve.md @@ -1,7 +1,7 @@ +++ command = "serve" summary = "Run the debug UI server" -usage = "gambit serve [] [--model ] [--model-force ] [--port ] [--responses] [--verbose] [--watch] [--no-bundle] [--no-sourcemap]" +usage = "gambit serve [] [--model ] [--model-force ] [--port ] [--responses] [--verbose] [--watch] [--no-bundle] [--no-sourcemap] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]" flags = [ "--model Default model id", "--model-force Override model id", @@ -13,12 +13,16 @@ flags = [ "--sourcemap Generate external source maps (serve; default in dev)", "--no-sourcemap Disable source map generation (serve)", "--platform Bundle target platform: deno (default) or web (browser)", + "--worker-sandbox Force worker execution on", + "--no-worker-sandbox Force worker execution off", + "--legacy-exec Alias for --no-worker-sandbox", + "--sandbox Deprecated alias for --worker-sandbox", + "--no-sandbox Deprecated alias for --no-worker-sandbox", "--verbose Print trace events to console", ] +++ Starts the debug UI server (default at `http://localhost:8000/`). -If no deck path is provided, Gambit creates a new workspace scaffold (root -`PROMPT.md`, `INTENT.md`, plus default scenario/grader decks) and opens the -simulator UI in workspace onboarding mode. +If no deck path is provided, Gambit uses `./PROMPT.md`. If `./PROMPT.md` does +not exist, Gambit creates a minimal `PROMPT.md` and serves it. diff --git a/examples/dev/simpsons_explainer_notest/README.md b/examples/dev/simpsons_explainer_notest/README.md index 471f07d56..171c94556 100644 --- a/examples/dev/simpsons_explainer_notest/README.md +++ b/examples/dev/simpsons_explainer_notest/README.md @@ -1,6 +1,7 @@ # simpsons_explainer_notest -Local dev example for testing the Test tab when no test decks are configured. +Local dev example for testing the Test tab when no scenario decks are +configured. ## Prereqs diff --git a/examples/dev/simpsons_explainer_notest/schemas/test_bot_input.zod.ts b/examples/dev/simpsons_explainer_notest/schemas/test_bot_input.zod.ts index 0bef4deae..38a9efa9c 100644 --- a/examples/dev/simpsons_explainer_notest/schemas/test_bot_input.zod.ts +++ b/examples/dev/simpsons_explainer_notest/schemas/test_bot_input.zod.ts @@ -2,6 +2,6 @@ import { z } from "npm:zod"; export default z.object({ initialQuestion: z.string().describe( - "Optional override for the test bot's first user question.", + "Optional override for the scenario's first user question.", ).optional(), }); diff --git a/examples/dev/simpsons_explainer_user/README.md b/examples/dev/simpsons_explainer_user/README.md index 33415b5e2..4b2ac7ceb 100644 --- a/examples/dev/simpsons_explainer_user/README.md +++ b/examples/dev/simpsons_explainer_user/README.md @@ -1,6 +1,6 @@ # simpsons_explainer_user -Local dev example with test decks enabled and `startMode = "user"`. +Local dev example with scenario decks enabled and `startMode = "user"`. ## Prereqs @@ -21,15 +21,15 @@ From this folder: deno run -A jsr:@bolt-foundry/gambit@^0.8.3/cli serve root.deck.md ``` -## Run a test bot (UI) +## Run a scenario (UI) 1. Open the simulator UI and go to the "Test" tab. -2. Click "Run test bot". +2. Click "Run scenario". 3. Use the composer to send the first user message (start mode is user). 4. Optionally switch to the "Grade" tab and click "Run grader". ## Notes -- Test decks are registered in `cards/test_decks.card.md`. +- Scenario decks are registered in `cards/test_decks.card.md`. - Graders are registered in `cards/grader_decks.card.md`. - Instruction cards live in `cards/` and are included by `root.deck.md`. diff --git a/examples/dev/simpsons_explainer_user/cards/test_decks.card.md b/examples/dev/simpsons_explainer_user/cards/test_decks.card.md index 6b5bc8065..8c9cc4976 100644 --- a/examples/dev/simpsons_explainer_user/cards/test_decks.card.md +++ b/examples/dev/simpsons_explainer_user/cards/test_decks.card.md @@ -4,10 +4,10 @@ label = "demo_test_decks" [[testDecks]] label = "Planets question" path = "../test_bots/planets.deck.md" -description = "Test bot that asks a basic planets-orbit question, with optional initialQuestion override." +description = "Scenario that asks a basic planets-orbit question, with optional initialQuestion override." [[testDecks]] label = "Quantum entanglement question" path = "../test_bots/quantum_entanglement.deck.md" -description = "Test bot that asks about quantum entanglement, with optional initialQuestion override." +description = "Scenario that asks about quantum entanglement, with optional initialQuestion override." +++ diff --git a/examples/dev/simpsons_explainer_user/schemas/test_bot_input.zod.ts b/examples/dev/simpsons_explainer_user/schemas/test_bot_input.zod.ts index 0bef4deae..38a9efa9c 100644 --- a/examples/dev/simpsons_explainer_user/schemas/test_bot_input.zod.ts +++ b/examples/dev/simpsons_explainer_user/schemas/test_bot_input.zod.ts @@ -2,6 +2,6 @@ import { z } from "npm:zod"; export default z.object({ initialQuestion: z.string().describe( - "Optional override for the test bot's first user question.", + "Optional override for the scenario's first user question.", ).optional(), }); diff --git a/examples/dev/simpsons_explainer_user_notest/README.md b/examples/dev/simpsons_explainer_user_notest/README.md index 26bf3588d..b00002640 100644 --- a/examples/dev/simpsons_explainer_user_notest/README.md +++ b/examples/dev/simpsons_explainer_user_notest/README.md @@ -1,6 +1,6 @@ # simpsons_explainer_user_notest -Local dev example for testing the Test tab with no test decks configured and +Local dev example for testing the Test tab with no scenario decks configured and `startMode = "user"`. ## Prereqs diff --git a/examples/dev/simpsons_explainer_user_notest/schemas/test_bot_input.zod.ts b/examples/dev/simpsons_explainer_user_notest/schemas/test_bot_input.zod.ts index 0bef4deae..38a9efa9c 100644 --- a/examples/dev/simpsons_explainer_user_notest/schemas/test_bot_input.zod.ts +++ b/examples/dev/simpsons_explainer_user_notest/schemas/test_bot_input.zod.ts @@ -2,6 +2,6 @@ import { z } from "npm:zod"; export default z.object({ initialQuestion: z.string().describe( - "Optional override for the test bot's first user question.", + "Optional override for the scenario's first user question.", ).optional(), }); diff --git a/mod.ts b/mod.ts index bfea422c7..fa0663bbd 100644 --- a/mod.ts +++ b/mod.ts @@ -25,12 +25,27 @@ export type { Guardrails } from "@bolt-foundry/gambit-core"; export type { JSONValue } from "@bolt-foundry/gambit-core"; /** Model provider interface for LLM backends. */ export type { ModelProvider } from "@bolt-foundry/gambit-core"; -/** Test deck definition shape. */ +/** Scenario deck definition shape. */ export type { TestDeckDefinition } from "@bolt-foundry/gambit-core"; /** Check if a value is an explicit end-of-run signal. */ export { isGambitEndSignal } from "@bolt-foundry/gambit-core"; -/** Run a deck and return its execution result. */ -export { runDeck } from "@bolt-foundry/gambit-core"; +/** Check whether an error represents runtime cancellation. */ +export { isRunCanceledError } from "@bolt-foundry/gambit-core"; +/** Build a runtime with CLI-equivalent provider defaults and routing. */ +export { createDefaultedRuntime } from "./src/default_runtime.ts"; +/** Runtime defaults/options for the `runDeck` wrapper. */ +export type { + CreateDefaultedRuntimeOptions, + DefaultedRuntime, + DefaultedRuntimeRunOptions, + RunDeckWithDefaultsOptions, +} from "./src/default_runtime.ts"; +/** Session artifact persistence config for default runtime runs. */ +export type { SessionArtifactsConfig } from "./src/session_artifacts.ts"; +/** Run a deck with default provider/model/runtime behavior. */ +export { runDeck } from "./src/default_runtime.ts"; +/** Run a deck directly through gambit-core without gambit defaults. */ +export { runDeck as runDeckCore } from "@bolt-foundry/gambit-core"; /** Signal for explicitly ending a Gambit run. */ export type { GambitEndSignal } from "@bolt-foundry/gambit-core"; /** OpenAI Chat Completions compatibility helper for a deck. */ diff --git a/packages/gambit-core/README.md b/packages/gambit-core/README.md index f149ff342..ae5e26840 100644 --- a/packages/gambit-core/README.md +++ b/packages/gambit-core/README.md @@ -112,6 +112,14 @@ export default defineCard({ }); ``` +For built-in Gambit schemas in TypeScript/compute decks, use canonical module +subpaths: + +``` +import contextSchema from "@bolt-foundry/gambit-core/schemas/scenarios/plain_chat_input_optional.zod.ts"; +import responseSchema from "@bolt-foundry/gambit-core/schemas/scenarios/plain_chat_output.zod.ts"; +``` + ## Running decks programmatically The runtime loads the deck (Markdown or TS) and steps through each pass. Provide @@ -145,7 +153,13 @@ When the deck defines `run`/`execute`, the runtime hands you an [`ExecutionContext`](src/types.ts) with: - `ctx.input`: validated input (narrowable when you type the schema). -- `ctx.spawnAndWait({ path, input })`: call another deck and await the result. +- `ctx.initialUserMessage`: current turn user message when provided by caller. +- `ctx.getSessionMeta(key)`: read persisted run/session metadata. +- `ctx.setSessionMeta(key, value)`: persist metadata for later turns. +- `ctx.appendMessage({ role, content })`: append chat transcript messages from + execute decks. +- `ctx.spawnAndWait({ path, input, initialUserMessage? })`: call another deck + and await the result; user message is inherited by default unless overridden. - `ctx.return(payload)`: respond early without running guards again. - `ctx.fail({ message, code?, details? })`: aborts the run (throws). - `ctx.log(...)`: emit structured trace entries for observability. @@ -153,6 +167,19 @@ When the deck defines `run`/`execute`, the runtime hands you an Pass `guardrails`, `initialUserMessage`, `modelOverride`, and `allowRootStringInput` to `runDeck` when scripting custom runtimes. +### Worker sandbox behavior in `runDeck` + +`gambit-core` keeps worker sandboxing opt-in: + +- `runDeck` enables worker sandboxing only when `workerSandbox: true` is passed. +- You can also opt in via `GAMBIT_DECK_WORKER_SANDBOX=1` (or `true` / `yes`). +- If neither is set, `runDeck` executes without worker sandboxing by default. + +Why this is opt-in: `@bolt-foundry/gambit-core` is intended to run in multiple +hosts (Node, Bun, Deno). Worker sandboxing relies on Deno-specific worker +permission controls, so host apps must opt in when they run in an environment +that supports it. + ## Loading Markdown decks and cards Markdown files use front matter for metadata, with the body becoming the prompt. diff --git a/packages/gambit-core/decks/anthropic/agent-sdk/PROMPT.md b/packages/gambit-core/decks/anthropic/agent-sdk/PROMPT.md index cf5a9f35a..f88e199c5 100644 --- a/packages/gambit-core/decks/anthropic/agent-sdk/PROMPT.md +++ b/packages/gambit-core/decks/anthropic/agent-sdk/PROMPT.md @@ -1,5 +1,7 @@ +++ label = "Anthropic agent SDK bridge" +contextSchema = "gambit://schemas/scenarios/plain_chat_input_optional.zod.ts" +responseSchema = "gambit://schemas/scenarios/plain_chat_output.zod.ts" +++ This stdlib deck provides the default bridge between the Anthropic agent SDK diff --git a/packages/gambit-core/decks/openai/codex-sdk/PROMPT.md b/packages/gambit-core/decks/openai/codex-sdk/PROMPT.md index 1a3ef6e8f..b7091d60e 100644 --- a/packages/gambit-core/decks/openai/codex-sdk/PROMPT.md +++ b/packages/gambit-core/decks/openai/codex-sdk/PROMPT.md @@ -1,5 +1,7 @@ +++ label = "Codex SDK bridge" +contextSchema = "gambit://schemas/scenarios/plain_chat_input_optional.zod.ts" +responseSchema = "gambit://schemas/scenarios/plain_chat_output.zod.ts" +++ This stdlib deck provides the default bridge between the Codex SDK runtime and diff --git a/packages/gambit-core/decks/openai/codex-sdk/codex_client.ts b/packages/gambit-core/decks/openai/codex-sdk/codex_client.ts new file mode 100644 index 000000000..39e4c49aa --- /dev/null +++ b/packages/gambit-core/decks/openai/codex-sdk/codex_client.ts @@ -0,0 +1,109 @@ +export type CodexTurnInput = { + userText: string; + threadId?: string; + systemPrompt?: string; +}; + +export type CodexTurnOutput = { + threadId: string; + assistantText: string; +}; + +type CodexEvent = + | { type: "thread.started"; thread_id?: unknown } + | { + type: "item.completed"; + item?: { type?: unknown; text?: unknown }; + } + | { type: string; [key: string]: unknown }; + +function runCwd(): string { + const botRoot = Deno.env.get("GAMBIT_BOT_ROOT"); + if (typeof botRoot === "string" && botRoot.trim().length > 0) { + return botRoot.trim(); + } + return Deno.cwd(); +} + +function parseCodexEvents(stdout: string): { + threadId?: string; + assistantText?: string; +} { + let threadId: string | undefined; + let assistantText: string | undefined; + + for (const line of stdout.split(/\r?\n/)) { + const trimmed = line.trim(); + if (!trimmed.startsWith("{")) continue; + let parsed: CodexEvent | null = null; + try { + parsed = JSON.parse(trimmed) as CodexEvent; + } catch { + continue; + } + if (!parsed || typeof parsed !== "object") continue; + if (parsed.type === "thread.started") { + if (typeof parsed.thread_id === "string" && parsed.thread_id.trim()) { + threadId = parsed.thread_id.trim(); + } + continue; + } + if (parsed.type === "item.completed") { + const item = parsed.item; + if (!item || typeof item !== "object") continue; + const rec = item as Record; + if (rec.type !== "agent_message") continue; + if (typeof rec.text !== "string") continue; + const next = rec.text.trim(); + if (next) assistantText = next; + } + } + + return { threadId, assistantText }; +} + +export async function sendCodexTurn( + input: CodexTurnInput, +): Promise { + const prompt = input.systemPrompt && input.systemPrompt.trim() + ? `${input.systemPrompt.trim()}\n\n${input.userText}` + : input.userText; + + const args = input.threadId + ? [ + "exec", + "resume", + "--skip-git-repo-check", + "--json", + input.threadId, + prompt, + ] + : ["exec", "--skip-git-repo-check", "--json", prompt]; + + const out = await new Deno.Command("codex", { + args, + cwd: runCwd(), + stdout: "piped", + stderr: "piped", + }).output(); + + const stdout = new TextDecoder().decode(out.stdout); + const stderr = new TextDecoder().decode(out.stderr); + if (!out.success) { + throw new Error( + `codex exec failed (exit ${out.code}): ${stderr.trim() || stdout.trim()}`, + ); + } + + const parsed = parseCodexEvents(stdout); + const threadId = parsed.threadId ?? input.threadId; + if (!threadId) { + throw new Error( + `codex exec succeeded but no thread id found in output: ${stdout.trim()}`, + ); + } + return { + threadId, + assistantText: parsed.assistantText ?? "", + }; +} diff --git a/packages/gambit-core/decks/openai/codex-sdk/codex_sdk_bridge.deck.ts b/packages/gambit-core/decks/openai/codex-sdk/codex_sdk_bridge.deck.ts new file mode 100644 index 000000000..81da6f0e7 --- /dev/null +++ b/packages/gambit-core/decks/openai/codex-sdk/codex_sdk_bridge.deck.ts @@ -0,0 +1,36 @@ +import { defineDeck } from "../../../src/definitions.ts"; +import { z } from "zod"; +import { sendCodexTurn } from "./codex_client.ts"; + +const CODEX_THREAD_META_KEY = "codex.threadId"; + +export default defineDeck({ + label: "codex_sdk_bridge", + contextSchema: z.string().optional(), + responseSchema: z.string(), + async run(ctx) { + const userText = typeof ctx.initialUserMessage === "string" && + ctx.initialUserMessage.trim() + ? ctx.initialUserMessage.trim() + : typeof ctx.input === "string" && ctx.input.trim() + ? ctx.input.trim() + : ""; + + if (!userText) return ""; + + const priorThreadId = ctx.getSessionMeta(CODEX_THREAD_META_KEY); + const systemPrompt = ctx.getSessionMeta("codex.systemPrompt"); + + ctx.appendMessage({ role: "user", content: userText }); + + const result = await sendCodexTurn({ + userText, + threadId: priorThreadId, + systemPrompt, + }); + + ctx.setSessionMeta(CODEX_THREAD_META_KEY, result.threadId); + ctx.appendMessage({ role: "assistant", content: result.assistantText }); + return result.assistantText; + }, +}); diff --git a/packages/gambit-core/deno.json b/packages/gambit-core/deno.json index 595f255ad..38715ceca 100644 --- a/packages/gambit-core/deno.json +++ b/packages/gambit-core/deno.json @@ -8,7 +8,39 @@ "url": "git+https://github.com/bolt-foundry/gambit.git" }, "exports": { - ".": "./mod.ts" + ".": "./mod.ts", + "./schemas/graders/respond.ts": "./schemas/graders/respond.ts", + "./schemas/graders/respond.zod.ts": "./schemas/graders/respond.zod.ts", + "./schemas/graders/grader_output.ts": "./schemas/graders/grader_output.ts", + "./schemas/graders/grader_output.zod.ts": + "./schemas/graders/grader_output.zod.ts", + "./schemas/graders/contexts/turn.ts": "./schemas/graders/contexts/turn.ts", + "./schemas/graders/contexts/turn.zod.ts": + "./schemas/graders/contexts/turn.zod.ts", + "./schemas/graders/contexts/turn_tools.ts": + "./schemas/graders/contexts/turn_tools.ts", + "./schemas/graders/contexts/turn_tools.zod.ts": + "./schemas/graders/contexts/turn_tools.zod.ts", + "./schemas/graders/contexts/conversation.ts": + "./schemas/graders/contexts/conversation.ts", + "./schemas/graders/contexts/conversation.zod.ts": + "./schemas/graders/contexts/conversation.zod.ts", + "./schemas/graders/contexts/conversation_tools.ts": + "./schemas/graders/contexts/conversation_tools.ts", + "./schemas/graders/contexts/conversation_tools.zod.ts": + "./schemas/graders/contexts/conversation_tools.zod.ts", + "./schemas/graders/contexts/tools.ts": + "./schemas/graders/contexts/tools.ts", + "./schemas/graders/contexts/tools.zod.ts": + "./schemas/graders/contexts/tools.zod.ts", + "./schemas/scenarios/plain_chat_input_optional.ts": + "./schemas/scenarios/plain_chat_input_optional.ts", + "./schemas/scenarios/plain_chat_input_optional.zod.ts": + "./schemas/scenarios/plain_chat_input_optional.zod.ts", + "./schemas/scenarios/plain_chat_output.ts": + "./schemas/scenarios/plain_chat_output.ts", + "./schemas/scenarios/plain_chat_output.zod.ts": + "./schemas/scenarios/plain_chat_output.zod.ts" }, "tasks": { "fmt": "deno fmt", diff --git a/packages/gambit-core/mod.ts b/packages/gambit-core/mod.ts index e0d0f8e7e..0b05c1e8d 100644 --- a/packages/gambit-core/mod.ts +++ b/packages/gambit-core/mod.ts @@ -59,8 +59,12 @@ export { export { isGambitEndSignal } from "./src/runtime.ts"; /** Run a deck and return its execution result. */ export { runDeck } from "./src/runtime.ts"; +/** Cancellation error type surfaced when a run is aborted. */ +export { isRunCanceledError, RunCanceledError } from "./src/runtime.ts"; /** Signal for explicitly ending a Gambit run. */ export type { GambitEndSignal } from "./src/runtime.ts"; +/** Runtime run options accepted by `runDeck`. */ +export type { RunOptions } from "./src/runtime.ts"; /** Default guardrail settings applied to deck runs. */ export { DEFAULT_GUARDRAILS } from "./src/constants.ts"; /** Reserved tool name prefix for Gambit tools. */ @@ -92,4 +96,4 @@ export type { ModelProvider } from "./src/types.ts"; /** Tool definition passed to model providers. */ export type { ToolDefinition } from "./src/types.ts"; /** Trace events emitted during execution. */ -export type { TraceEvent } from "./src/types.ts"; +export type { ProviderTraceEvent, TraceEvent } from "./src/types.ts"; diff --git a/packages/gambit-core/schemas/graders/contexts/conversation.ts b/packages/gambit-core/schemas/graders/contexts/conversation.ts index de05621ae..a8d5defbc 100644 --- a/packages/gambit-core/schemas/graders/contexts/conversation.ts +++ b/packages/gambit-core/schemas/graders/contexts/conversation.ts @@ -1,17 +1,40 @@ import { z } from "zod"; -export const graderMessageSchema = z.object({ +type GraderMessage = { + role: string; + content?: unknown; + name?: string; +}; + +type GraderConversation = { + messages?: Array; + meta?: Record; + notes?: { + text?: string; + }; +}; + +type GraderConversationContext = { + session: GraderConversation; +}; + +export const graderMessageSchema: z.ZodType = z.object({ role: z.string(), content: z.any().optional(), name: z.string().optional(), }); -export const graderConversationSchema = z.object({ - messages: z.array(graderMessageSchema).optional(), - meta: z.record(z.any()).optional(), - notes: z.object({ text: z.string().optional() }).optional(), -}); +export const graderConversationSchema: z.ZodType = z.object( + { + messages: z.array(graderMessageSchema).optional(), + meta: z.record(z.any()).optional(), + notes: z.object({ text: z.string().optional() }).optional(), + }, +); -export default z.object({ - session: graderConversationSchema, -}); +const graderConversationContextSchema: z.ZodType = z + .object({ + session: graderConversationSchema, + }); + +export default graderConversationContextSchema; diff --git a/packages/gambit-core/schemas/graders/contexts/conversation_tools.ts b/packages/gambit-core/schemas/graders/contexts/conversation_tools.ts index a525e4248..7ef3479aa 100644 --- a/packages/gambit-core/schemas/graders/contexts/conversation_tools.ts +++ b/packages/gambit-core/schemas/graders/contexts/conversation_tools.ts @@ -1,6 +1,34 @@ import { z } from "zod"; -const graderToolCallSchema = z.object({ +type GraderToolCall = { + id?: string; + type?: string; + function: { + name: string; + arguments?: string; + }; +}; + +type GraderConversationMessageWithTools = { + role: string; + content?: unknown; + name?: string; + tool_calls?: Array; +}; + +type GraderConversationWithTools = { + messages?: Array; + meta?: Record; + notes?: { + text?: string; + }; +}; + +type GraderConversationToolsContext = { + session: GraderConversationWithTools; +}; + +const graderToolCallSchema: z.ZodType = z.object({ id: z.string().optional(), type: z.string().optional(), function: z.object({ @@ -9,19 +37,27 @@ const graderToolCallSchema = z.object({ }), }); -export const graderConversationMessageWithToolsSchema = z.object({ +export const graderConversationMessageWithToolsSchema: z.ZodType< + GraderConversationMessageWithTools +> = z.object({ role: z.string(), content: z.any().optional(), name: z.string().optional(), tool_calls: z.array(graderToolCallSchema).optional(), }); -export const graderConversationWithToolsSchema = z.object({ +export const graderConversationWithToolsSchema: z.ZodType< + GraderConversationWithTools +> = z.object({ messages: z.array(graderConversationMessageWithToolsSchema).optional(), meta: z.record(z.any()).optional(), notes: z.object({ text: z.string().optional() }).optional(), }); -export default z.object({ +const graderConversationToolsContextSchema: z.ZodType< + GraderConversationToolsContext +> = z.object({ session: graderConversationWithToolsSchema, }); + +export default graderConversationToolsContextSchema; diff --git a/packages/gambit-core/schemas/graders/contexts/turn.ts b/packages/gambit-core/schemas/graders/contexts/turn.ts index cf5d523e8..55646564f 100644 --- a/packages/gambit-core/schemas/graders/contexts/turn.ts +++ b/packages/gambit-core/schemas/graders/contexts/turn.ts @@ -4,7 +4,14 @@ import { graderMessageSchema, } from "./conversation.ts"; -export default z.object({ +type GraderTurnContext = { + session: z.infer; + messageToGrade: z.infer; +}; + +const graderTurnContextSchema: z.ZodType = z.object({ session: graderConversationSchema, messageToGrade: graderMessageSchema, }); + +export default graderTurnContextSchema; diff --git a/packages/gambit-core/schemas/graders/contexts/turn_tools.ts b/packages/gambit-core/schemas/graders/contexts/turn_tools.ts index 50b0e8f34..f555ab31b 100644 --- a/packages/gambit-core/schemas/graders/contexts/turn_tools.ts +++ b/packages/gambit-core/schemas/graders/contexts/turn_tools.ts @@ -1,6 +1,35 @@ import { z } from "zod"; -const graderToolCallSchema = z.object({ +type GraderToolCall = { + id?: string; + type?: string; + function: { + name: string; + arguments?: string; + }; +}; + +type GraderMessageWithTools = { + role: string; + content?: unknown; + name?: string; + tool_calls?: Array; +}; + +type GraderConversationWithTools = { + messages?: Array; + meta?: Record; + notes?: { + text?: string; + }; +}; + +type GraderTurnToolsContext = { + session: GraderConversationWithTools; + messageToGrade: GraderMessageWithTools; +}; + +const graderToolCallSchema: z.ZodType = z.object({ id: z.string().optional(), type: z.string().optional(), function: z.object({ @@ -9,20 +38,26 @@ const graderToolCallSchema = z.object({ }), }); -export const graderMessageWithToolsSchema = z.object({ - role: z.string(), - content: z.any().optional(), - name: z.string().optional(), - tool_calls: z.array(graderToolCallSchema).optional(), -}); +export const graderMessageWithToolsSchema: z.ZodType = z + .object({ + role: z.string(), + content: z.any().optional(), + name: z.string().optional(), + tool_calls: z.array(graderToolCallSchema).optional(), + }); -export const graderConversationWithToolsSchema = z.object({ +export const graderConversationWithToolsSchema: z.ZodType< + GraderConversationWithTools +> = z.object({ messages: z.array(graderMessageWithToolsSchema).optional(), meta: z.record(z.any()).optional(), notes: z.object({ text: z.string().optional() }).optional(), }); -export default z.object({ - session: graderConversationWithToolsSchema, - messageToGrade: graderMessageWithToolsSchema, -}); +const graderTurnToolsContextSchema: z.ZodType = z + .object({ + session: graderConversationWithToolsSchema, + messageToGrade: graderMessageWithToolsSchema, + }); + +export default graderTurnToolsContextSchema; diff --git a/packages/gambit-core/schemas/graders/grader_output.ts b/packages/gambit-core/schemas/graders/grader_output.ts index 0b9f38a89..bb9306687 100644 --- a/packages/gambit-core/schemas/graders/grader_output.ts +++ b/packages/gambit-core/schemas/graders/grader_output.ts @@ -1,7 +1,15 @@ import { z } from "zod"; -export default z.object({ +type GraderOutput = { + score: number; + reason: string; + evidence?: Array; +}; + +const graderOutputSchema: z.ZodType = z.object({ score: z.number().int().min(-3).max(3), reason: z.string(), evidence: z.array(z.string()).optional(), }); + +export default graderOutputSchema; diff --git a/packages/gambit-core/schemas/graders/respond.ts b/packages/gambit-core/schemas/graders/respond.ts index 4d690e7f4..8d2359a1d 100644 --- a/packages/gambit-core/schemas/graders/respond.ts +++ b/packages/gambit-core/schemas/graders/respond.ts @@ -1,9 +1,19 @@ import { z } from "zod"; -export default z.object({ +type RespondEnvelope = { + payload?: unknown; + status?: number; + message?: string; + code?: string; + meta?: Record; +}; + +const respondSchema: z.ZodType = z.object({ payload: z.any().optional(), status: z.number().int().optional(), message: z.string().optional(), code: z.string().optional(), meta: z.record(z.any()).optional(), }); + +export default respondSchema; diff --git a/packages/gambit-core/schemas/scenarios/plain_chat_input_optional.ts b/packages/gambit-core/schemas/scenarios/plain_chat_input_optional.ts index 3807c1c29..7f8731a15 100644 --- a/packages/gambit-core/schemas/scenarios/plain_chat_input_optional.ts +++ b/packages/gambit-core/schemas/scenarios/plain_chat_input_optional.ts @@ -1,3 +1,6 @@ import { z } from "zod"; -export default z.string().optional(); +const plainChatInputOptionalSchema: z.ZodType = z.string() + .optional(); + +export default plainChatInputOptionalSchema; diff --git a/packages/gambit-core/schemas/scenarios/plain_chat_output.ts b/packages/gambit-core/schemas/scenarios/plain_chat_output.ts index 91d0a8d3f..221a1e087 100644 --- a/packages/gambit-core/schemas/scenarios/plain_chat_output.ts +++ b/packages/gambit-core/schemas/scenarios/plain_chat_output.ts @@ -1,3 +1,5 @@ import { z } from "zod"; -export default z.string(); +const plainChatOutputSchema: z.ZodType = z.string(); + +export default plainChatOutputSchema; diff --git a/packages/gambit-core/src/loader.ts b/packages/gambit-core/src/loader.ts index 63b4727f4..326ff949d 100644 --- a/packages/gambit-core/src/loader.ts +++ b/packages/gambit-core/src/loader.ts @@ -25,6 +25,7 @@ import type { ActionDeckDefinition, CardDefinition, DeckDefinition, + ExternalToolDefinition, GraderDeckDefinition, LoadedCard, LoadedDeck, @@ -174,6 +175,39 @@ function checkReserved(action: ActionDeckDefinition) { } } +function normalizeExternalTools( + tools: DeckDefinition["tools"], + resolvedPath: string, +): Array { + if (!tools) return []; + return tools.map((tool) => { + const name = String(tool.name ?? "").trim(); + if (!name) { + throw new Error(`External tool must include a name (${resolvedPath})`); + } + if (name.startsWith(RESERVED_TOOL_PREFIX)) { + throw new Error( + `External tool name ${name} is reserved (prefix ${RESERVED_TOOL_PREFIX})`, + ); + } + if ( + !TOOL_NAME_PATTERN.test(name) || + name.length > MAX_TOOL_NAME_LENGTH + ) { + throw new Error( + `External tool name ${name} must match ${TOOL_NAME_PATTERN} and be <= ${MAX_TOOL_NAME_LENGTH} characters`, + ); + } + return { + name, + description: typeof tool.description === "string" + ? tool.description + : undefined, + inputSchema: tool.inputSchema, + }; + }); +} + async function loadCardInternal( cardPath: string, parentPath?: string, @@ -272,6 +306,11 @@ export async function loadDeck( `Deck at ${resolved} did not export a valid deck definition`, ); } + if ((deck as { mcpServers?: unknown }).mcpServers !== undefined) { + throw new Error( + `Deck-level [[mcpServers]] is unsupported in this phase (${resolved})`, + ); + } const deckLabel = deck.label; @@ -297,6 +336,15 @@ export async function loadDeck( } const actionDecks = Object.values(mergedActions); + const tools = normalizeExternalTools(deck.tools, resolved); + const actionNames = new Set(actionDecks.map((action) => action.name)); + for (const tool of tools) { + if (actionNames.has(tool.name)) { + logger.warn( + `[gambit] tool ${tool.name} is shadowed by an action in ${resolved}`, + ); + } + } const schemaAliases = normalizeDeckSchemas(deck, resolved); let inputSchema = schemaAliases.inputSchema; @@ -383,6 +431,7 @@ export async function loadDeck( deck.graderDecks, resolved, ), + tools, contextSchema, responseSchema, inputSchema, diff --git a/packages/gambit-core/src/markdown.test.ts b/packages/gambit-core/src/markdown.test.ts index d813430ba..deca50193 100644 --- a/packages/gambit-core/src/markdown.test.ts +++ b/packages/gambit-core/src/markdown.test.ts @@ -343,6 +343,38 @@ Root deck. assert(deck.graderDecks[0].path.endsWith("graders/qa/PROMPT.md")); }); +Deno.test("markdown deck loads without front matter", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "PROMPT.md", + `You are a plain markdown deck with no front matter.`, + ); + + const deck = await loadMarkdownDeck(deckPath); + assertEquals(deck.label, undefined); + assertStringIncludes(deck.body ?? "", "plain markdown deck"); +}); + +Deno.test("markdown deck rejects malformed explicit front matter", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "broken" + +This file is missing a closing delimiter. +`, + ); + + await assertRejects( + () => loadMarkdownDeck(deckPath), + Error, + "Failed to parse front matter", + ); +}); + Deno.test("markdown deck requires action descriptions in 1.0 actions", async () => { const dir = await Deno.makeTempDir(); const deckPath = await writeTempDeck( @@ -406,31 +438,93 @@ Root deck. ]); }); -Deno.test("markdown execute deck loads module and PROMPT overrides schemas", async () => { +Deno.test("markdown deck rejects top-level execute", async () => { const dir = await Deno.makeTempDir(); - const execPath = path.join(dir, "exec.ts"); - const definitionsUrl = path.toFileUrl( - path.resolve("packages/gambit-core/src/definitions.ts"), - ).href; - await Deno.writeTextFile( - execPath, - `import { defineDeck } from "${definitionsUrl}"; -import { z } from "zod"; - -export default defineDeck({ - label: "exec", - contextSchema: z.object({ fromExec: z.string() }), - responseSchema: z.object({ out: z.string() }), - run: (_ctx) => ({ out: "ok" }), + const deckPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" +execute = "./compute.deck.ts" ++++ + +Root deck. +`, + ); + + await assertRejects( + () => loadMarkdownDeck(deckPath), + Error, + "Top-level execute", + ); }); + +Deno.test("markdown deck rejects action target with both path and execute", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" + +[[actions]] +name = "do_thing" +path = "./actions/do/PROMPT.md" +execute = "./actions/do.deck.ts" +description = "Run do thing." +contextSchema = "./schemas/in.zod.ts" +responseSchema = "./schemas/out.zod.ts" ++++ + +Root deck. `, ); - const schemaPath = path.join(dir, "context.zod.ts"); + await assertRejects( + () => loadMarkdownDeck(deckPath), + Error, + "exactly one of path or execute", + ); +}); + +Deno.test("markdown deck rejects action target with neither path nor execute", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" + +[[actions]] +name = "do_thing" +description = "Run do thing." ++++ + +Root deck. +`, + ); + + await assertRejects( + () => loadMarkdownDeck(deckPath), + Error, + "exactly one of path or execute", + ); +}); + +Deno.test("markdown deck normalizes actions execute targets with schemas", async () => { + const dir = await Deno.makeTempDir(); + const inputSchemaPath = path.join(dir, "input.zod.ts"); + const outputSchemaPath = path.join(dir, "output.zod.ts"); await Deno.writeTextFile( - schemaPath, + inputSchemaPath, `import { z } from "zod"; -export default z.object({ fromPrompt: z.number() }); +export default z.object({ count: z.number() }); +`, + ); + await Deno.writeTextFile( + outputSchemaPath, + `import { z } from "zod"; +export default z.object({ total: z.number() }); `, ); @@ -438,19 +532,31 @@ export default z.object({ fromPrompt: z.number() }); dir, "PROMPT.md", `+++ -label = "exec-root" -execute = "./exec.ts" -contextSchema = "./context.zod.ts" +label = "root" + +[[actions]] +name = "compute_rollup" +execute = "./actions/compute_rollup.deck.ts" +description = "Compute rollup totals." +contextSchema = "./input.zod.ts" +responseSchema = "./output.zod.ts" +++ -Execute deck. +Root deck. `, ); const deck = await loadMarkdownDeck(deckPath); - assert(deck.executor, "expected executor to be set"); - assert(deck.contextSchema, "expected context schema to resolve"); - deck.contextSchema.parse({ fromPrompt: 123 }); + assertEquals(deck.actionDecks.length, 1); + assert(deck.actionDecks[0].path.endsWith("actions/compute_rollup.deck.ts")); + assertEquals( + deck.actionDecks[0].execute, + deck.actionDecks[0].path, + ); + const parsedInput = deck.actionDecks[0].contextSchema?.parse({ count: 2 }); + const parsedOutput = deck.actionDecks[0].responseSchema?.parse({ total: 3 }); + assertEquals(parsedInput, { count: 2 }); + assertEquals(parsedOutput, { total: 3 }); }); Deno.test("loadDeck resolves gambit://decks PROMPT.md", async () => { @@ -459,3 +565,101 @@ Deno.test("loadDeck resolves gambit://decks PROMPT.md", async () => { ); assertEquals(deck.label, "Codex SDK bridge"); }); + +Deno.test("markdown deck rejects unsupported mcpServers declarations", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" + +[[mcpServers]] +name = "local" +command = "node" ++++ +Root deck. +`, + ); + + await assertRejects( + () => loadMarkdownDeck(deckPath), + Error, + "[[mcpServers]]", + ); +}); + +Deno.test("markdown deck parses tools and warns when action shadows a tool", async () => { + const dir = await Deno.makeTempDir(); + const actionDir = path.join(dir, "actions", "do"); + await Deno.mkdir(actionDir, { recursive: true }); + await writeTempDeck( + actionDir, + "PROMPT.md", + `+++ +label = "do" +contextSchema = "gambit://schemas/graders/respond.zod.ts" +responseSchema = "gambit://schemas/graders/respond.zod.ts" ++++ +Action deck. +`, + ); + const schemaPath = path.join(dir, "tool_input.zod.ts"); + await Deno.writeTextFile( + schemaPath, + `import { z } from "zod"; +export default z.object({ query: z.string() }); +`, + ); + const deckPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" + +[[actions]] +name = "search_docs" +path = "./actions/do/PROMPT.md" +description = "Run action." + +[[tools]] +name = "search_docs" +description = "External search." +inputSchema = "./tool_input.zod.ts" + +[[tools]] +name = "external_lookup" +description = "External lookup." +inputSchema = "./tool_input.zod.ts" ++++ +Root deck. +`, + ); + + const warnings: Array = []; + // deno-lint-ignore no-console + const originalWarn = console.warn; + // deno-lint-ignore no-console + console.warn = (message?: unknown, ...rest: Array) => { + warnings.push([message, ...rest].map(String).join(" ")); + }; + try { + const deck = await loadMarkdownDeck(deckPath); + assertEquals(deck.tools.length, 2); + assertEquals(deck.tools[0].name, "search_docs"); + assertEquals(deck.tools[1].name, "external_lookup"); + assert(deck.tools[1].inputSchema, "expected tool input schema"); + const parsed = deck.tools[1].inputSchema?.parse({ query: "q" }); + assertEquals(parsed, { query: "q" }); + } finally { + // deno-lint-ignore no-console + console.warn = originalWarn; + } + + assert( + warnings.some((line) => + line.includes("shadowed") && line.includes("search_docs") + ), + "expected action-shadow warning for tool name collision", + ); +}); diff --git a/packages/gambit-core/src/markdown.ts b/packages/gambit-core/src/markdown.ts index 080ae7e36..8842baa83 100644 --- a/packages/gambit-core/src/markdown.ts +++ b/packages/gambit-core/src/markdown.ts @@ -9,17 +9,18 @@ import { RESERVED_TOOL_PREFIX, TOOL_NAME_PATTERN, } from "./constants.ts"; -import { isCardDefinition, isDeckDefinition } from "./definitions.ts"; +import { isCardDefinition } from "./definitions.ts"; import { loadCard } from "./loader.ts"; import { normalizePermissionDeclaration, type PermissionDeclarationInput, } from "./permissions.ts"; -import { mergeZodObjects, toJsonSchema } from "./schema.ts"; +import { mergeZodObjects } from "./schema.ts"; import { resolveBuiltinSchemaPath } from "./builtins.ts"; import type { ActionDeckDefinition, DeckDefinition, + ExternalToolDefinition, GraderDeckDefinition, LoadedCard, LoadedDeck, @@ -53,27 +54,6 @@ const END_TEXT = ` If the entire workflow is finished and no further user turns should be sent, call the \`${GAMBIT_TOOL_END}\` tool with optional \`message\` and \`payload\` fields to explicitly end the session. `.trim(); -function normalizeJsonSchema(value: unknown): unknown { - if (Array.isArray(value)) { - return value.map((entry) => normalizeJsonSchema(entry)); - } - if (value && typeof value === "object") { - const record = value as Record; - const out: Record = {}; - for (const key of Object.keys(record).sort()) { - out[key] = normalizeJsonSchema(record[key]); - } - return out; - } - return value; -} - -function schemasMatchDeep(a: ZodTypeAny, b: ZodTypeAny): boolean { - const aJson = normalizeJsonSchema(toJsonSchema(a as never)); - const bJson = normalizeJsonSchema(toJsonSchema(b as never)); - return JSON.stringify(aJson) === JSON.stringify(bJson); -} - function warnLegacyMarker( marker: keyof typeof LEGACY_MARKER_WARNINGS, replacement: string, @@ -116,6 +96,29 @@ function toFileUrl(p: string): string { return path.toFileUrl(abs).href; } +function startsWithFrontMatterDelimiter(raw: string): boolean { + const normalized = raw.startsWith("\uFEFF") ? raw.slice(1) : raw; + const trimmed = normalized.trimStart(); + return /^(\+\+\+|---)\s*(\r?\n|$)/.test(trimmed); +} + +function parseFrontMatterOrRaw( + raw: string, + resolvedPath: string, +): { attrs: ParsedFrontmatter; body: string } { + try { + return extract(raw) as { attrs: ParsedFrontmatter; body: string }; + } catch (err) { + if (!startsWithFrontMatterDelimiter(raw)) { + return { attrs: {}, body: raw }; + } + const message = err instanceof Error ? err.message : String(err); + throw new Error( + `Failed to parse front matter in ${resolvedPath}: ${message}`, + ); + } +} + async function maybeLoadSchema( schemaPath: unknown, basePath: string, @@ -198,20 +201,122 @@ function mergeDeckRefs( return Array.from(merged.values()); } -function normalizeActionDecks( +async function normalizeActionDecks( entries: unknown, basePath: string, opts?: { requirePrompt?: boolean; requireDescription?: boolean }, -): Array { - return normalizeDeckRefs(entries, basePath, opts).map( - (entry) => { - const name = "name" in entry ? String(entry.name ?? "").trim() : ""; - if (!name) { - throw new Error(`Action deck must include a name (${basePath})`); - } - return { ...entry, name }; - }, - ); +): Promise> { + if (!Array.isArray(entries)) return []; + const out: Array = []; + for (const rawEntry of entries) { + if (!rawEntry || typeof rawEntry !== "object") continue; + const rec = rawEntry as Record; + const name = String(rec.name ?? "").trim(); + if (!name) { + throw new Error(`Action deck must include a name (${basePath})`); + } + + const desc = typeof rec.description === "string" + ? rec.description.trim() + : ""; + if (opts?.requireDescription && !desc) { + throw new Error( + `Action deck must include a description (${basePath})`, + ); + } + + const rawPath = typeof rec.path === "string" ? rec.path.trim() : ""; + const rawExecute = typeof rec.execute === "string" + ? rec.execute.trim() + : ""; + const hasPath = rawPath.length > 0; + const hasExecute = rawExecute.length > 0; + if (hasPath === hasExecute) { + throw new Error( + `Action deck must include exactly one of path or execute (${basePath})`, + ); + } + if (hasPath && opts?.requirePrompt && !rawPath.endsWith("PROMPT.md")) { + throw new Error( + `Deck reference must point to PROMPT.md (${basePath})`, + ); + } + + const actionContextSchema = await maybeLoadSchema( + rec.contextSchema, + basePath, + ); + const actionResponseSchema = await maybeLoadSchema( + rec.responseSchema, + basePath, + ); + if (hasExecute && (!actionContextSchema || !actionResponseSchema)) { + throw new Error( + `Action execute target must include contextSchema and responseSchema (${basePath})`, + ); + } + + const selectedTarget = hasPath ? rawPath : rawExecute; + const normalizedPath = selectedTarget.startsWith("gambit://") + ? selectedTarget + : path.resolve(path.dirname(basePath), selectedTarget); + const normalized: ActionDeckDefinition = { + name, + path: normalizedPath, + description: desc || undefined, + label: typeof rec.label === "string" ? rec.label : undefined, + id: typeof rec.id === "string" ? rec.id : undefined, + execute: hasExecute ? normalizedPath : undefined, + contextSchema: actionContextSchema, + responseSchema: actionResponseSchema, + }; + if (rec.permissions !== undefined) { + const parsed = normalizePermissionDeclaration( + rec.permissions as PermissionDeclarationInput, + path.dirname(basePath), + ); + if (parsed) normalized.permissions = parsed; + } + out.push(normalized); + } + return out; +} + +async function normalizeExternalTools( + refs: unknown, + basePath: string, +): Promise> { + if (!Array.isArray(refs)) return []; + const out: Array = []; + for (const entry of refs) { + if (!entry || typeof entry !== "object") continue; + const rec = entry as Record; + const name = String(rec.name ?? "").trim(); + if (!name) { + throw new Error(`External tool must include a name (${basePath})`); + } + if (name.startsWith(RESERVED_TOOL_PREFIX)) { + throw new Error( + `External tool name ${name} is reserved (prefix ${RESERVED_TOOL_PREFIX})`, + ); + } + if ( + !TOOL_NAME_PATTERN.test(name) || name.length > MAX_TOOL_NAME_LENGTH + ) { + throw new Error( + `External tool name ${name} must match ${TOOL_NAME_PATTERN} and be <= ${MAX_TOOL_NAME_LENGTH} characters`, + ); + } + const inputSchema = await maybeLoadSchema(rec.inputSchema, basePath); + out.push({ + name, + description: typeof rec.description === "string" + ? rec.description + : undefined, + inputSchema, + }); + } + return out; } async function expandEmbedsInBody(args: { @@ -278,16 +383,7 @@ export async function loadMarkdownCard( } const nextStack = [...stack, resolved]; const raw = await Deno.readTextFile(resolved); - let attrs: ParsedFrontmatter; - let body: string; - try { - const parsed = extract(raw) as { attrs: ParsedFrontmatter; body: string }; - attrs = parsed.attrs; - body = parsed.body; - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - throw new Error(`Failed to parse front matter in ${resolved}: ${message}`); - } + const { attrs, body } = parseFrontMatterOrRaw(raw, resolved); const candidate = attrs as unknown; if (isCardDefinition(candidate)) { // treat attrs as ts-shaped card @@ -299,7 +395,7 @@ export async function loadMarkdownCard( } const hasNewActionField = (attrs as { actionDecks?: unknown }).actionDecks; const legacyActions = (attrs as { actions?: unknown }).actions; - const actionDecks = normalizeActionDecks( + const actionDecks = await normalizeActionDecks( hasNewActionField ?? legacyActions, resolved, ); @@ -388,29 +484,33 @@ export async function loadMarkdownDeck( ? path.resolve(path.dirname(parentPath), filePath) : path.resolve(filePath); const raw = await Deno.readTextFile(resolved); - let attrs: ParsedFrontmatter; - let body: string; - try { - const parsed = extract(raw) as { attrs: ParsedFrontmatter; body: string }; - attrs = parsed.attrs; - body = parsed.body; - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - throw new Error(`Failed to parse front matter in ${resolved}: ${message}`); - } + const { attrs, body } = parseFrontMatterOrRaw(raw, resolved); const deckAttrs = attrs as { deck?: DeckDefinition } & DeckDefinition; const deckMeta: Partial = (deckAttrs.deck ?? deckAttrs) as DeckDefinition; + if ((deckMeta as { mcpServers?: unknown }).mcpServers !== undefined) { + throw new Error( + `Deck-level [[mcpServers]] is unsupported in this phase (${resolved})`, + ); + } + if ((deckMeta as { execute?: unknown }).execute !== undefined) { + throw new Error( + `Top-level execute in PROMPT.md is unsupported (${resolved})`, + ); + } const hasNewActionDecks = (deckMeta as { actionDecks?: unknown; }).actionDecks; const canonicalActions = (deckMeta as { actions?: unknown }).actions; - const actionDecks = normalizeActionDecks(canonicalActions, resolved, { + const actionDecks = await normalizeActionDecks(canonicalActions, resolved, { requirePrompt: true, requireDescription: true, }); - const legacyActionDecks = normalizeActionDecks(hasNewActionDecks, resolved); + const legacyActionDecks = await normalizeActionDecks( + hasNewActionDecks, + resolved, + ); if (hasNewActionDecks) { logger.warn( `[gambit] deck at ${resolved} uses deprecated "actionDecks"; use "[[actions]]" instead.`, @@ -480,56 +580,6 @@ export async function loadMarkdownDeck( warnLegacySchema(resolved, "outputSchema", "responseSchema"); } - const executePath = (deckMeta as { execute?: unknown }).execute; - let executor: DeckDefinition["run"] | DeckDefinition["execute"] | undefined; - let executeContextSchema: ZodTypeAny | undefined; - let executeResponseSchema: ZodTypeAny | undefined; - if (typeof executePath === "string" && executePath.trim()) { - const execResolved = path.resolve(path.dirname(resolved), executePath); - const mod = await import(toFileUrl(execResolved)); - const executeDeck = mod.default; - if (!isDeckDefinition(executeDeck)) { - throw new Error( - `Execute module at ${execResolved} did not export a valid deck definition`, - ); - } - executor = typeof executeDeck.run === "function" - ? executeDeck.run - : typeof executeDeck.execute === "function" - ? executeDeck.execute - : undefined; - if (!executor) { - throw new Error( - `Execute module at ${execResolved} must export a deck with run(ctx)`, - ); - } - executeContextSchema = executeDeck.contextSchema ?? executeDeck.inputSchema; - executeResponseSchema = executeDeck.responseSchema ?? - executeDeck.outputSchema; - } - if (executor && deckMeta.modelParams) { - logger.warn( - `[gambit] deck at ${resolved} sets execute + modelParams; modelParams will be ignored.`, - ); - } - - if ( - contextSchema && executeContextSchema && - !schemasMatchDeep(contextSchema, executeContextSchema) - ) { - logger.warn( - `[gambit] deck at ${resolved} has mismatched contextSchema between PROMPT.md and execute module (pre-1.0: warn; 1.0+: error)`, - ); - } - if ( - responseSchema && executeResponseSchema && - !schemasMatchDeep(responseSchema, executeResponseSchema) - ) { - logger.warn( - `[gambit] deck at ${resolved} has mismatched responseSchema between PROMPT.md and execute module (pre-1.0: warn; 1.0+: error)`, - ); - } - const allCards = flattenCards(cards); const cleanedBody = replaced.body; const allowEnd = Boolean(deckMeta.allowEnd) || @@ -546,8 +596,8 @@ export async function loadMarkdownDeck( mergedActions[action.name] = action; } - let mergedContextSchema = contextSchema ?? executeContextSchema; - let mergedResponseSchema = responseSchema ?? executeResponseSchema; + let mergedContextSchema = contextSchema; + let mergedResponseSchema = responseSchema; for (const card of allCards) { mergedContextSchema = mergeZodObjects( mergedContextSchema, @@ -610,6 +660,18 @@ export async function loadMarkdownDeck( : undefined; const mergedActionDecks = Object.values(mergedActions); + const tools = await normalizeExternalTools( + (deckMeta as { tools?: unknown }).tools, + resolved, + ); + const actionNameSet = new Set(mergedActionDecks.map((action) => action.name)); + for (const tool of tools) { + if (actionNameSet.has(tool.name)) { + logger.warn( + `[gambit] tool ${tool.name} is shadowed by an action in ${resolved}`, + ); + } + } const rootTestDecks = normalizeDeckRefs( (deckMeta as { testDecks?: unknown }).testDecks, resolved, @@ -634,6 +696,7 @@ export async function loadMarkdownDeck( allowEnd, actionDecks: mergedActionDecks, actions: mergedActionDecks, + tools, testDecks: mergeDeckRefs( scenarioDecks, rootTestDecks, @@ -647,13 +710,13 @@ export async function loadMarkdownDeck( cards: allCards, label: deckMeta.label, startMode: deckMeta.startMode, - modelParams: executor ? undefined : deckMeta.modelParams, + modelParams: deckMeta.modelParams, guardrails: deckMeta.guardrails, contextSchema: mergedContextSchema, responseSchema: mergedResponseSchema, inputSchema: mergedInputSchema, outputSchema: mergedOutputSchema, - executor, + executor: undefined, handlers, respond: Boolean(deckMeta.respond) || replaced.respond || diff --git a/packages/gambit-core/src/permissions.test.ts b/packages/gambit-core/src/permissions.test.ts index ea7cfcf5f..dc20f9cdc 100644 --- a/packages/gambit-core/src/permissions.test.ts +++ b/packages/gambit-core/src/permissions.test.ts @@ -1,11 +1,13 @@ -import { assert, assertEquals } from "@std/assert"; +import { assert, assertEquals, assertThrows } from "@std/assert"; import * as path from "@std/path"; import { canReadPath, canRunCommand, canRunPath, + canWritePath, normalizePermissionDeclaration, normalizePermissionDeclarationToSet, + type PermissionDeclarationInput, resolveEffectivePermissions, } from "./permissions.ts"; @@ -129,6 +131,102 @@ Deno.test("child-only inherited permissions use child baseDir for relative check ); }); +Deno.test("path grants cover descendant files within the directory tree", () => { + const set = normalizePermissionDeclarationToSet( + { + read: ["./shared"], + write: ["./shared", "./local.txt"], + }, + "/workspace/decks/root", + ); + assert(set, "expected normalized permission set"); + + assertEquals( + canReadPath(set, "./shared/prompts/prompt.txt"), + true, + "read grants must apply to files beneath a declared directory", + ); + assertEquals( + canReadPath(set, "./shared"), + true, + "read grants must apply to the directory itself", + ); + assertEquals( + canReadPath(set, "./other/path.txt"), + false, + "read grants must not leak into sibling directories", + ); + assertEquals( + canWritePath(set, "./shared/prompts/prompt.txt"), + true, + "write grants must apply to files beneath a declared directory", + ); + assertEquals( + canWritePath(set, "./local.txt"), + true, + "write grants must still allow file-specific declarations", + ); + assertEquals( + canWritePath(set, "./local.txt.bak"), + false, + "write grants must not allow unrelated files", + ); +}); + +Deno.test("canonical read checks deny symlink escapes outside granted roots", async () => { + const dir = await Deno.makeTempDir(); + const allowedDir = path.join(dir, "allowed"); + const outsideDir = path.join(dir, "outside"); + await Deno.mkdir(allowedDir, { recursive: true }); + await Deno.mkdir(outsideDir, { recursive: true }); + + const outsideFile = path.join(outsideDir, "secret.txt"); + await Deno.writeTextFile(outsideFile, "secret"); + + const symlinkPath = path.join(allowedDir, "secret-link.txt"); + await Deno.symlink(outsideFile, symlinkPath); + + const set = normalizePermissionDeclarationToSet( + { read: ["./allowed"] }, + dir, + ); + assert(set, "expected normalized permission set"); + + assertEquals( + canReadPath(set, symlinkPath), + false, + "symlink traversal must not bypass read root", + ); +}); + +Deno.test("canonical write checks deny symlink parent escapes", async () => { + const dir = await Deno.makeTempDir(); + const allowedDir = path.join(dir, "allowed"); + const outsideDir = path.join(dir, "outside"); + await Deno.mkdir(allowedDir, { recursive: true }); + await Deno.mkdir(outsideDir, { recursive: true }); + + const symlinkDir = path.join(allowedDir, "linked"); + await Deno.symlink(outsideDir, symlinkDir); + + const set = normalizePermissionDeclarationToSet( + { write: ["./allowed"] }, + dir, + ); + assert(set, "expected normalized permission set"); + + assertEquals( + canWritePath(set, path.join(symlinkDir, "escaped.txt")), + false, + "symlink traversal must not bypass write root", + ); + assertEquals( + canWritePath(set, path.join(allowedDir, "safe.txt")), + true, + "writes inside granted root should remain allowed", + ); +}); + Deno.test("run grants keep path vs command semantics separate", () => { const set = normalizePermissionDeclarationToSet( { @@ -147,22 +245,33 @@ Deno.test("run grants keep path vs command semantics separate", () => { assertEquals(canRunCommand(set, "bin/tool"), false); }); -Deno.test("run object-form booleans honor all-access semantics", () => { - const pathsTrue = normalizePermissionDeclarationToSet( - { run: { paths: true } }, +Deno.test("run=true grants all run access", () => { + const runAll = normalizePermissionDeclarationToSet( + { run: true }, "/workspace", ); - assert(pathsTrue, "expected normalized permission set for paths=true"); - assertEquals(canRunPath(pathsTrue, "/workspace/bin/anything"), true); - assertEquals(canRunCommand(pathsTrue, "anything"), true); + assert(runAll, "expected normalized permission set for run=true"); + assertEquals(canRunPath(runAll, "/workspace/bin/anything"), true); + assertEquals(canRunCommand(runAll, "anything"), true); +}); - const commandsTrue = normalizePermissionDeclarationToSet( - { run: { commands: true } }, - "/workspace", +Deno.test("run object-form booleans are rejected", () => { + const invalidPaths = { + run: { paths: true }, + } as unknown as PermissionDeclarationInput; + const invalidCommands = { + run: { commands: false }, + } as unknown as PermissionDeclarationInput; + assertThrows( + () => normalizePermissionDeclarationToSet(invalidPaths, "/workspace"), + Error, + "permissions.run.paths must be an array in object form", + ); + assertThrows( + () => normalizePermissionDeclarationToSet(invalidCommands, "/workspace"), + Error, + "permissions.run.commands must be an array in object form", ); - assert(commandsTrue, "expected normalized permission set for commands=true"); - assertEquals(canRunPath(commandsTrue, "/workspace/bin/anything"), true); - assertEquals(canRunCommand(commandsTrue, "anything"), true); }); Deno.test("unspecified kinds deny by default when a layer is provided", () => { diff --git a/packages/gambit-core/src/permissions.ts b/packages/gambit-core/src/permissions.ts index f7debdcb6..32d450e3c 100644 --- a/packages/gambit-core/src/permissions.ts +++ b/packages/gambit-core/src/permissions.ts @@ -11,8 +11,8 @@ export type RunPermissionInput = | boolean | Array | { - paths?: boolean | Array; - commands?: boolean | Array; + paths?: Array; + commands?: Array; }; export type PermissionDeclarationInput = Partial<{ @@ -175,19 +175,22 @@ function normalizeRun( paths?: unknown; commands?: unknown; }; + if (typeof record.paths === "boolean") { + throw new Error( + "permissions.run.paths must be an array in object form; use permissions.run=true for full run access", + ); + } + if (typeof record.commands === "boolean") { + throw new Error( + "permissions.run.commands must be an array in object form; use permissions.run=true for full run access", + ); + } const pathsScope = normalizeList(record.paths, "run", baseDir, { resolvePaths: true, }); const commandsScope = normalizeList(record.commands, "run", baseDir, { resolvePaths: false, }); - if (pathsScope.all || commandsScope.all) { - return { - all: true, - paths: new Set(), - commands: new Set(), - }; - } return { all: false, paths: pathsScope.values, @@ -424,9 +427,61 @@ export function resolveEffectivePermissions(args: { }; } +/** + * Checks whether `target` is covered by `scope`, treating each value as either + * an exact path grant or the root of an allowed directory tree. + */ function matchScope(scope: NormalizedScope, target: string): boolean { if (scope.all) return true; - return scope.values.has(target); + const canonicalTarget = canonicalizePath(target); + if (!canonicalTarget) return false; + + for (const root of scope.values) { + const canonicalRoot = canonicalizePath(root); + if (!canonicalRoot) continue; + if (pathWithinRoot(canonicalRoot, canonicalTarget)) return true; + } + return false; +} + +function pathWithinRoot(root: string, target: string): boolean { + if (root === target) return true; + const rel = path.relative(root, target); + return rel.length > 0 && !rel.startsWith("..") && !path.isAbsolute(rel); +} + +function canonicalizePath(target: string): string | undefined { + const resolved = path.resolve(target); + try { + return path.resolve(Deno.realPathSync(resolved)); + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + return canonicalizeMissingPath(resolved); + } + return undefined; + } +} + +function canonicalizeMissingPath(target: string): string | undefined { + const suffix: Array = []; + let probe = target; + + while (true) { + try { + const canonicalBase = path.resolve(Deno.realPathSync(probe)); + if (suffix.length === 0) return canonicalBase; + return path.resolve(canonicalBase, ...suffix.reverse()); + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + const parent = path.dirname(probe); + if (parent === probe) return undefined; + suffix.push(path.basename(probe)); + probe = parent; + continue; + } + return undefined; + } + } } /** @@ -463,8 +518,22 @@ export function canRunPath( targetPath: string, ): boolean { if (set.run.all) return true; - const resolved = path.resolve(set.baseDir, targetPath); - return set.run.paths.has(resolved); + const resolvedTarget = path.resolve(set.baseDir, targetPath); + const canonicalTarget = canonicalizePath(resolvedTarget); + if (!canonicalTarget) return false; + // Run-path grants are exact binary grants; deny symlink-mediated execution. + if (canonicalTarget !== resolvedTarget) return false; + for (const allowedPath of set.run.paths) { + const resolvedAllowed = path.resolve(set.baseDir, allowedPath); + if (resolvedAllowed !== resolvedTarget) continue; + const canonicalAllowed = canonicalizePath( + resolvedAllowed, + ); + if (!canonicalAllowed) continue; + if (canonicalAllowed !== resolvedAllowed) continue; + if (canonicalAllowed === canonicalTarget) return true; + } + return false; } /** diff --git a/packages/gambit-core/src/runtime.test.ts b/packages/gambit-core/src/runtime.test.ts index 24c4e8dc8..af6bdaea4 100644 --- a/packages/gambit-core/src/runtime.test.ts +++ b/packages/gambit-core/src/runtime.test.ts @@ -2,8 +2,9 @@ import { assert, assertEquals, assertRejects } from "@std/assert"; import * as path from "@std/path"; import { loadDeck } from "./loader.ts"; import { loadState } from "./state.ts"; -import { runDeck } from "./runtime.ts"; +import { isRunCanceledError, runDeck } from "./runtime.ts"; import type { + JSONValue, ModelMessage, ModelProvider, ResponseItem, @@ -58,6 +59,69 @@ Deno.test("deck loads contextSchema/responseSchema aliases", async () => { assert(deck.outputSchema, "expected legacy outputSchema alias to be set"); }); +Deno.test("compute deck supports canonical schema module imports", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "canonical-schema-import.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import contextSchema from "@bolt-foundry/gambit-core/schemas/scenarios/plain_chat_input_optional.zod.ts"; + import responseSchema from "@bolt-foundry/gambit-core/schemas/scenarios/plain_chat_output.zod.ts"; + export default defineDeck({ + contextSchema, + responseSchema, + run: () => "ok", + }); + `, + ); + + const result = await runDeck({ + path: deckPath, + input: "hello", + modelProvider: dummyProvider, + isRoot: true, + }); + + assertEquals(result, "ok"); +}); + +Deno.test("workspace import map cannot remap trusted schema namespaces", async () => { + const dir = await Deno.makeTempDir(); + await Deno.writeTextFile( + path.join(dir, "deno.json"), + JSON.stringify({ + imports: { + "@bolt-foundry/gambit-core/schemas/": "./shadow/", + }, + }), + ); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "trusted-schema-remap.deck.ts", + ` + import { defineDeck } from "${modHref}"; + export default defineDeck({ + run: () => "ok", + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + }), + Error, + "trust-boundary violation", + ); +}); + Deno.test("compute deck returns validated output", async () => { const dir = await Deno.makeTempDir(); const modHref = modImportPath(); @@ -716,6 +780,91 @@ Deno.test("isRoot inferred when omitted", async () => { assertEquals(result, "child:hi"); }); +Deno.test("child deck timeout override tightens inherited deadline", async () => { + const origNow = performance.now; + let now = 0; + (performance as { now: () => number }).now = () => now; + + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "child-timeout.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.any(), + outputSchema: z.string(), + guardrails: { timeoutMs: 5 }, + run() { + (globalThis).__advanceNow?.(20); + return "late"; + } + }); + `, + ); + + try { + (globalThis as { __advanceNow?: (delta: number) => void }).__advanceNow = ( + delta, + ) => { + now += delta; + }; + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + guardrails: { timeoutMs: 1_000 }, + runDeadlineMs: 1_000, + }), + Error, + "Timeout exceeded", + ); + } finally { + delete (globalThis as { __advanceNow?: (delta: number) => void }) + .__advanceNow; + (performance as { now: () => number }).now = origNow; + } +}); + +Deno.test("worker sandbox flag defaults false when env access is denied", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "env-perm.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + run(ctx) { return ctx.input; } + }); + `, + ); + + const origGet = Deno.env.get; + try { + Deno.env.get = (() => { + throw new Deno.errors.PermissionDenied("env access denied"); + }) as typeof Deno.env.get; + const result = await runDeck({ + path: deckPath, + input: "ok", + modelProvider: dummyProvider, + isRoot: true, + }); + assertEquals(result, "ok"); + } finally { + Deno.env.get = origGet; + } +}); + Deno.test("LLM deck streams via onStreamText", async () => { const dir = await Deno.makeTempDir(); const modHref = modImportPath(); @@ -974,6 +1123,92 @@ Deno.test("responses mode stores response items and calls responses()", async () assert((updatedState?.messages?.length ?? 0) > 0); }); +Deno.test("responses mode projects tool stream events into tool traces", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + + const deckPath = await writeTempDeck( + dir, + "responses_tool_stream_events.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, + ); + + const traces: Array = []; + const provider: ModelProvider = { + responses({ onStreamEvent }) { + onStreamEvent?.( + { + type: "tool.call", + actionCallId: "tool_1", + name: "external_lookup", + args: { query: "hello" }, + } as unknown as import("./types.ts").ResponseEvent, + ); + onStreamEvent?.( + { + type: "tool.result", + actionCallId: "tool_1", + name: "external_lookup", + result: { ok: true }, + } as unknown as import("./types.ts").ResponseEvent, + ); + return Promise.resolve({ + id: "resp_1", + object: "response", + output: [{ + type: "message", + role: "assistant", + content: [{ type: "output_text", text: "done" }], + }], + }); + }, + chat() { + throw new Error("chat should not be called in responses mode"); + }, + }; + + const result = await runDeck({ + path: deckPath, + input: undefined, + inputProvided: false, + initialUserMessage: "hello", + modelProvider: provider, + isRoot: true, + responsesMode: true, + stream: true, + trace: (event) => traces.push(event), + }); + + assertEquals(result, "done"); + const modelCall = traces.find((event) => event.type === "model.call") as + | Extract + | undefined; + const toolCalls = traces.filter((event) => + event.type === "tool.call" && event.name === "external_lookup" + ) as Array>; + const toolResults = traces.filter((event) => + event.type === "tool.result" && event.name === "external_lookup" + ) as Array>; + + assert(modelCall); + assertEquals(toolCalls.length, 1); + assertEquals(toolResults.length, 1); + assertEquals(toolCalls[0].actionCallId, "tool_1"); + assertEquals(toolResults[0].actionCallId, "tool_1"); + assertEquals(toolCalls[0].args, { query: "hello" }); + assertEquals(toolCalls[0].parentActionCallId, modelCall.actionCallId); + assertEquals(toolResults[0].parentActionCallId, modelCall.actionCallId); + assertEquals(toolResults[0].result, { ok: true }); +}); + Deno.test("responses mode treats empty output as empty string", async () => { const dir = await Deno.makeTempDir(); const modHref = modImportPath(); @@ -1366,6 +1601,55 @@ Deno.test("trace includes parentActionCallId hierarchy", async () => { assertEquals(childDeck.parentActionCallId, actionStart.actionCallId); }); +Deno.test("model.result trace includes model usage fields", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "usage-trace.deck.md", + ` ++++ +modelParams = { model = "dummy-model" } ++++ + +Deck. +`.trim(), + ); + + const traces: Array = []; + const provider: ModelProvider = { + chat() { + return Promise.resolve({ + message: { role: "assistant", content: "ok" }, + finishReason: "stop", + usage: { + promptTokens: 11, + completionTokens: 7, + totalTokens: 18, + reasoningTokens: 4, + }, + }); + }, + }; + + await runDeck({ + path: deckPath, + input: "hello", + modelProvider: provider, + isRoot: true, + trace: (ev) => traces.push(ev), + }); + + const modelResult = traces.find((event): event is Extract< + TraceEvent, + { type: "model.result" } + > => event.type === "model.result"); + assert(modelResult, "expected model.result trace"); + assertEquals(modelResult.usage?.promptTokens, 11); + assertEquals(modelResult.usage?.completionTokens, 7); + assertEquals(modelResult.usage?.totalTokens, 18); + assertEquals(modelResult.usage?.reasoningTokens, 4); +}); + Deno.test("non-root assistant text emits monolog trace", async () => { const dir = await Deno.makeTempDir(); const modHref = modImportPath(); @@ -1534,337 +1818,3599 @@ Deck body. assertEquals(deck.cards.length, 2); }); -Deno.test("markdown deck strips inline embed markers from system prompt", async () => { +Deno.test("markdown action execute target runs compute module and returns envelope", async () => { const dir = await Deno.makeTempDir(); - + const modHref = modImportPath(); + const inputSchemaPath = path.join(dir, "action_input.zod.ts"); + const outputSchemaPath = path.join(dir, "action_output.zod.ts"); await Deno.writeTextFile( - path.join(dir, "persona.card.md"), + inputSchemaPath, ` -+++ -+++ - -Persona content. -`.trim(), + import { z } from "zod"; + export default z.object({ count: z.number() }); + `, ); - - const deckPath = path.join(dir, "root.deck.md"); await Deno.writeTextFile( - deckPath, + outputSchemaPath, ` -+++ -modelParams = { model = "dummy-model" } -+++ - -Deck intro before embed. + import { z } from "zod"; + export default z.object({ total: z.number() }); + `, + ); + await Deno.writeTextFile( + path.join(dir, "compute_rollup.deck.ts"), + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + contextSchema: z.object({ count: z.number() }), + responseSchema: z.object({ + status: z.number().optional(), + message: z.string().optional(), + payload: z.object({ total: z.number() }), + }), + run(ctx) { + return { + status: 201, + message: "computed", + payload: { total: ctx.input.count + 1 }, + }; + }, + }); + `, + ); -![Persona](./persona.card.md) + const rootPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" +[modelParams] +model = "dummy-model" + +[[actions]] +name = "compute_rollup" +execute = "./compute_rollup.deck.ts" +description = "Compute totals." +contextSchema = "./action_input.zod.ts" +responseSchema = "./action_output.zod.ts" ++++ -Deck outro after embed. -`.trim(), +Root deck. +`, ); - const seen: Array> = []; + let pass = 0; + let seenToolParams: Record | undefined; + let seenToolContent = ""; const provider: ModelProvider = { - chat({ messages }) { - seen.push(messages); + chat({ tools, messages }) { + if (pass === 0) { + pass += 1; + const toolDef = tools?.find((entry) => + entry.function.name === "compute_rollup" + ); + seenToolParams = toolDef?.function.parameters; + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "call-1", + name: "compute_rollup", + args: { count: 2 }, + }], + }); + } + if (pass === 1) { + pass += 1; + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role === "tool" && message.tool_call_id === "call-1") { + seenToolContent = String(message.content ?? ""); + break; + } + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + } return Promise.resolve({ - message: { role: "assistant", content: "ok" }, + message: { role: "assistant", content: "done" }, finishReason: "stop", }); }, }; - await runDeck({ - path: deckPath, - input: "hi", + const result = await runDeck({ + path: rootPath, + input: {}, modelProvider: provider, isRoot: true, + inputProvided: true, }); - const last = seen.at(-1); - const system = last?.find((m) => m.role === "system"); - if (!system || typeof system.content !== "string") { - throw new Error("missing system message"); - } - - const content = system.content; - assertEquals(content.includes("![Persona](./persona.card.md)"), false); - assertEquals(content.includes("Deck intro before embed."), true); - assertEquals(content.includes("Deck outro after embed."), true); - assertEquals(content.includes("Persona content."), true); + assertEquals(result, "done"); + const params = seenToolParams as { + required?: Array; + properties?: Record; + }; + assertEquals(params.required, ["count"]); + assertEquals(params.properties?.count?.type, "number"); + const toolEnvelope = JSON.parse(seenToolContent) as { + status?: number; + message?: string; + payload?: { total?: number }; + }; + assertEquals(toolEnvelope.status, 201); + assertEquals(toolEnvelope.message, "computed"); + assertEquals(toolEnvelope.payload?.total, 3); }); -Deno.test("markdown card embed cycles are rejected", async () => { +Deno.test("markdown action execute target rejects invalid args with action schema", async () => { const dir = await Deno.makeTempDir(); - + const modHref = modImportPath(); await Deno.writeTextFile( - path.join(dir, "a.card.md"), + path.join(dir, "action_input.zod.ts"), ` -+++ -+++ - -A card body. - -![B card](./b.card.md) -`.trim(), + import { z } from "zod"; + export default z.object({ count: z.number() }); + `, ); - await Deno.writeTextFile( - path.join(dir, "b.card.md"), + path.join(dir, "action_output.zod.ts"), ` + import { z } from "zod"; + export default z.object({ total: z.number() }); + `, + ); + await Deno.writeTextFile( + path.join(dir, "compute_rollup.deck.ts"), + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + contextSchema: z.object({ count: z.number() }), + responseSchema: z.object({ total: z.number() }), + run(ctx) { + return { total: ctx.input.count + 1 }; + }, + }); + `, + ); + const rootPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" +[modelParams] +model = "dummy-model" + +[[actions]] +name = "compute_rollup" +execute = "./compute_rollup.deck.ts" +description = "Compute totals." +contextSchema = "./action_input.zod.ts" +responseSchema = "./action_output.zod.ts" +++ -+++ +Root deck. +`, + ); -B card body. + let pass = 0; + let seenToolContent = ""; + const provider: ModelProvider = { + chat({ messages }) { + if (pass === 0) { + pass += 1; + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "call-1", + name: "compute_rollup", + args: { count: "bad" }, + }], + }); + } + if (pass === 1) { + pass += 1; + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role === "tool" && message.tool_call_id === "call-1") { + seenToolContent = String(message.content ?? ""); + break; + } + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; -![A card](./a.card.md) -`.trim(), - ); + const result = await runDeck({ + path: rootPath, + input: {}, + modelProvider: provider, + isRoot: true, + inputProvided: true, + }); - const deckPath = path.join(dir, "root.deck.md"); + assertEquals(result, "done"); + const toolEnvelope = JSON.parse(seenToolContent) as { + status?: number; + code?: string; + }; + assertEquals(toolEnvelope.status, 400); + assertEquals(toolEnvelope.code, "invalid_input"); +}); + +Deno.test("markdown external tools dispatch through onTool", async () => { + const dir = await Deno.makeTempDir(); await Deno.writeTextFile( - deckPath, + path.join(dir, "tool_input.zod.ts"), ` + import { z } from "zod"; + export default z.object({ query: z.string() }); + `, + ); + const rootPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" +[modelParams] +model = "dummy-model" + +[[tools]] +name = "external_lookup" +description = "External lookup." +inputSchema = "./tool_input.zod.ts" +++ -modelParams = { model = "dummy-model" } -+++ +Root deck. +`, + ); -Deck with cyclic cards. + let pass = 0; + let seenToolContent = ""; + let seenTools: Array = []; + let seenOnToolInput: + | { + name: string; + args: Record; + runId: string; + actionCallId: string; + parentActionCallId?: string; + deckPath: string; + } + | undefined; + const provider: ModelProvider = { + chat({ messages, tools }) { + if (pass === 0) { + pass += 1; + seenTools = (tools ?? []).map((entry) => entry.function.name); + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "call-1", + name: "external_lookup", + args: { query: "hello" }, + }], + }); + } + if (pass === 1) { + pass += 1; + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role === "tool" && message.tool_call_id === "call-1") { + seenToolContent = String(message.content ?? ""); + break; + } + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; -![A card](./a.card.md) -`.trim(), - ); + const result = await runDeck({ + path: rootPath, + input: {}, + modelProvider: provider, + isRoot: true, + inputProvided: true, + onTool: (input) => { + seenOnToolInput = input; + return { + status: 207, + message: "handled", + payload: { echo: String(input.args.query ?? "") }, + meta: { source: "hook" }, + }; + }, + }); - await assertRejects( - () => - runDeck({ - path: deckPath, - input: "hi", - modelProvider: dummyProvider, - isRoot: true, - }), - Error, - "cycle", + assertEquals(result, "done"); + assert( + seenTools.includes("external_lookup"), + "expected external tool in defs", ); + assertEquals(seenOnToolInput?.name, "external_lookup"); + assertEquals(seenOnToolInput?.args, { query: "hello" }); + assertEquals(seenOnToolInput?.actionCallId, "call-1"); + assertEquals(seenOnToolInput?.deckPath, rootPath); + assert( + typeof seenOnToolInput?.runId === "string" && + seenOnToolInput.runId.length > 0, + "expected onTool runId", + ); + const parsed = JSON.parse(seenToolContent) as { + status?: number; + message?: string; + payload?: { echo?: string }; + meta?: { source?: string }; + }; + assertEquals(parsed.status, 207); + assertEquals(parsed.message, "handled"); + assertEquals(parsed.payload?.echo, "hello"); + assertEquals(parsed.meta?.source, "hook"); }); -Deno.test("markdown card schema fragments merge into deck schemas", async () => { +Deno.test("markdown external tools return explicit error when onTool is missing", async () => { const dir = await Deno.makeTempDir(); - await Deno.writeTextFile( - path.join(dir, "fragments.card.md"), + path.join(dir, "tool_input.zod.ts"), ` + import { z } from "zod"; + export default z.object({ query: z.string() }); + `, + ); + const rootPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" +[modelParams] +model = "dummy-model" + +[[tools]] +name = "external_lookup" +description = "External lookup." +inputSchema = "./tool_input.zod.ts" +++ -inputSchema = "./input_fragment.zod.ts" -outputSchema = "./output_fragment.zod.ts" -+++ - -Fragments card body. -`.trim(), +Root deck. +`, ); + let pass = 0; + let seenToolContent = ""; + const provider: ModelProvider = { + chat({ messages }) { + if (pass === 0) { + pass += 1; + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "call-1", + name: "external_lookup", + args: { query: "hello" }, + }], + }); + } + if (pass === 1) { + pass += 1; + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role === "tool" && message.tool_call_id === "call-1") { + seenToolContent = String(message.content ?? ""); + break; + } + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + + const result = await runDeck({ + path: rootPath, + input: {}, + modelProvider: provider, + isRoot: true, + inputProvided: true, + }); + + assertEquals(result, "done"); + const parsed = JSON.parse(seenToolContent) as { + status?: number; + code?: string; + }; + assertEquals(parsed.status, 500); + assertEquals(parsed.code, "missing_on_tool"); +}); + +Deno.test("markdown external tools return explicit error when onTool throws", async () => { + const dir = await Deno.makeTempDir(); await Deno.writeTextFile( - path.join(dir, "input_fragment.zod.ts"), + path.join(dir, "tool_input.zod.ts"), ` import { z } from "zod"; - export default z.object({ extra: z.string() }); - `.trim(), + export default z.object({ query: z.string() }); + `, + ); + const rootPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" +[modelParams] +model = "dummy-model" + +[[tools]] +name = "external_lookup" +description = "External lookup." +inputSchema = "./tool_input.zod.ts" ++++ +Root deck. +`, ); + let pass = 0; + let seenToolContent = ""; + const provider: ModelProvider = { + chat({ messages }) { + if (pass === 0) { + pass += 1; + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "call-1", + name: "external_lookup", + args: { query: "hello" }, + }], + }); + } + if (pass === 1) { + pass += 1; + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role === "tool" && message.tool_call_id === "call-1") { + seenToolContent = String(message.content ?? ""); + break; + } + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + + const result = await runDeck({ + path: rootPath, + input: {}, + modelProvider: provider, + isRoot: true, + inputProvided: true, + onTool: () => { + throw new Error("boom"); + }, + }); + + assertEquals(result, "done"); + const parsed = JSON.parse(seenToolContent) as { + status?: number; + code?: string; + message?: string; + }; + assertEquals(parsed.status, 500); + assertEquals(parsed.code, "tool_handler_error"); + assertEquals(parsed.message, "boom"); +}); + +Deno.test("actions shadow external tools during runtime dispatch", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); await Deno.writeTextFile( - path.join(dir, "output_fragment.zod.ts"), + path.join(dir, "action_input.zod.ts"), ` import { z } from "zod"; - export default z.object({ note: z.number() }); - `.trim(), + export default z.object({ query: z.string() }); + `, + ); + await Deno.writeTextFile( + path.join(dir, "action_output.zod.ts"), + ` + import { z } from "zod"; + export default z.object({ value: z.string() }); + `, + ); + await Deno.writeTextFile( + path.join(dir, "lookup.deck.ts"), + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + contextSchema: z.object({ query: z.string() }), + responseSchema: z.object({ value: z.string() }), + run: () => ({ value: "action" }), + }); + `, + ); + const rootPath = await writeTempDeck( + dir, + "PROMPT.md", + `+++ +label = "root" +[modelParams] +model = "dummy-model" + +[[actions]] +name = "lookup" +execute = "./lookup.deck.ts" +description = "Action lookup." +contextSchema = "./action_input.zod.ts" +responseSchema = "./action_output.zod.ts" + +[[tools]] +name = "lookup" +description = "External lookup." +inputSchema = "./action_input.zod.ts" ++++ +Root deck. +`, ); - const deckPath = path.join(dir, "root.deck.md"); + let pass = 0; + let seenToolContent = ""; + let onToolCalled = false; + const provider: ModelProvider = { + chat({ messages }) { + if (pass === 0) { + pass += 1; + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ id: "call-1", name: "lookup", args: { query: "x" } }], + }); + } + if (pass === 1) { + pass += 1; + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role === "tool" && message.tool_call_id === "call-1") { + seenToolContent = String(message.content ?? ""); + break; + } + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + + const result = await runDeck({ + path: rootPath, + input: {}, + modelProvider: provider, + isRoot: true, + inputProvided: true, + onTool: () => { + onToolCalled = true; + return { value: "external" }; + }, + }); + + assertEquals(result, "done"); + assertEquals(onToolCalled, false); + const parsed = JSON.parse(seenToolContent) as { + payload?: { value?: string }; + }; + assertEquals(parsed.payload?.value, "action"); +}); + +Deno.test("markdown deck strips inline embed markers from system prompt", async () => { + const dir = await Deno.makeTempDir(); + await Deno.writeTextFile( - deckPath, + path.join(dir, "persona.card.md"), ` +++ -inputSchema = "./base_input.zod.ts" -outputSchema = "./base_output.zod.ts" +++ -Deck body. +Persona content. +`.trim(), + ); + + const deckPath = path.join(dir, "root.deck.md"); + await Deno.writeTextFile( + deckPath, + ` ++++ +modelParams = { model = "dummy-model" } ++++ + +Deck intro before embed. + +![Persona](./persona.card.md) + +Deck outro after embed. +`.trim(), + ); + + const seen: Array> = []; + const provider: ModelProvider = { + chat({ messages }) { + seen.push(messages); + return Promise.resolve({ + message: { role: "assistant", content: "ok" }, + finishReason: "stop", + }); + }, + }; + + await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + }); + + const last = seen.at(-1); + const system = last?.find((m) => m.role === "system"); + if (!system || typeof system.content !== "string") { + throw new Error("missing system message"); + } + + const content = system.content; + assertEquals(content.includes("![Persona](./persona.card.md)"), false); + assertEquals(content.includes("Deck intro before embed."), true); + assertEquals(content.includes("Deck outro after embed."), true); + assertEquals(content.includes("Persona content."), true); +}); + +Deno.test("markdown card embed cycles are rejected", async () => { + const dir = await Deno.makeTempDir(); + + await Deno.writeTextFile( + path.join(dir, "a.card.md"), + ` ++++ ++++ + +A card body. + +![B card](./b.card.md) +`.trim(), + ); + + await Deno.writeTextFile( + path.join(dir, "b.card.md"), + ` ++++ ++++ + +B card body. + +![A card](./a.card.md) +`.trim(), + ); + + const deckPath = path.join(dir, "root.deck.md"); + await Deno.writeTextFile( + deckPath, + ` ++++ +modelParams = { model = "dummy-model" } ++++ + +Deck with cyclic cards. + +![A card](./a.card.md) +`.trim(), + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: "hi", + modelProvider: dummyProvider, + isRoot: true, + }), + Error, + "cycle", + ); +}); + +Deno.test("markdown card schema fragments merge into deck schemas", async () => { + const dir = await Deno.makeTempDir(); + + await Deno.writeTextFile( + path.join(dir, "fragments.card.md"), + ` ++++ +inputSchema = "./input_fragment.zod.ts" +outputSchema = "./output_fragment.zod.ts" ++++ + +Fragments card body. +`.trim(), + ); + + await Deno.writeTextFile( + path.join(dir, "input_fragment.zod.ts"), + ` + import { z } from "zod"; + export default z.object({ extra: z.string() }); + `.trim(), + ); + + await Deno.writeTextFile( + path.join(dir, "output_fragment.zod.ts"), + ` + import { z } from "zod"; + export default z.object({ note: z.number() }); + `.trim(), + ); + + const deckPath = path.join(dir, "root.deck.md"); + await Deno.writeTextFile( + deckPath, + ` ++++ +inputSchema = "./base_input.zod.ts" +outputSchema = "./base_output.zod.ts" ++++ + +Deck body. + +![Fragments card](./fragments.card.md) +`.trim(), + ); + + await Deno.writeTextFile( + path.join(dir, "base_input.zod.ts"), + ` + import { z } from "zod"; + export default z.object({ text: z.string() }); + `.trim(), + ); + + await Deno.writeTextFile( + path.join(dir, "base_output.zod.ts"), + ` + import { z } from "zod"; + export default z.object({ result: z.string() }); + `.trim(), + ); + + const deck = await loadDeck(deckPath); + const inputShape = (deck.inputSchema as unknown as { + shape: Record; + }).shape; + const outputShape = (deck.outputSchema as unknown as { + shape: Record; + }).shape; + + assertEquals(Object.keys(inputShape).sort(), ["extra", "text"]); + assertEquals(Object.keys(outputShape).sort(), ["note", "result"]); +}); + +Deno.test("cards cannot declare handlers (ts card)", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + + await writeTempDeck( + dir, + "bad_handlers.card.ts", + ` + import { defineCard } from "${modHref}"; + export default defineCard({ + handlers: { onBusy: { path: "./noop.deck.ts" } } + }); + `, + ); + + const deckPath = await writeTempDeck( + dir, + "root.deck.md", + ` ++++ +modelParams = { model = "dummy-model" } ++++ + +Deck. + +![Bad handlers](./bad_handlers.card.ts) +`.trim(), + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: "hi", + modelProvider: dummyProvider, + isRoot: true, + }), + Error, + "handlers", + ); +}); + +Deno.test("cards cannot declare handlers (markdown card)", async () => { + const dir = await Deno.makeTempDir(); + + await Deno.writeTextFile( + path.join(dir, "bad.card.md"), + ` ++++ +handlers = { onBusy = { path = "./noop.deck.ts" } } ++++ + +Body. +`.trim(), + ); + + const deckPath = path.join(dir, "root.deck.md"); + await Deno.writeTextFile( + deckPath, + ` ++++ +modelParams = { model = "dummy-model" } ++++ + +Deck. + +![Bad card](./bad.card.md) +`.trim(), + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: "hi", + modelProvider: dummyProvider, + isRoot: true, + }), + Error, + "handlers", + ); +}); + +Deno.test("runDeck resolves model arrays via modelProvider", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "root.deck.md", + ` ++++ +modelParams = { model = ["ollama/llama3.1", "openrouter/openai/gpt-4o-mini"] } ++++ + +Deck. +`.trim(), + ); + let resolvedInput: { model?: string | Array } = {}; + const provider: ModelProvider = { + resolveModel: (input) => { + resolvedInput = { model: input.model }; + return Promise.resolve({ + model: "openrouter/openai/gpt-4o-mini", + params: { temp: 1 }, + }); + }, + chat: (input) => { + assertEquals(input.model, "openrouter/openai/gpt-4o-mini"); + assertEquals(input.params?.temp, 1); + return Promise.resolve({ + message: { role: "assistant", content: "ok" }, + finishReason: "stop", + }); + }, + }; + + await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + }); + + assert(Array.isArray(resolvedInput.model)); +}); + +Deno.test("modelParams.reasoning passes through to provider params", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "root.deck.md", + ` ++++ +modelParams = { model = "dummy-model", temperature = 0.2, reasoning = { effort = "high", summary = "detailed" } } ++++ + +Deck. +`.trim(), + ); + + let seenParams: Record | undefined; + const provider: ModelProvider = { + chat: (input) => { + seenParams = input.params; + return Promise.resolve({ + message: { role: "assistant", content: "ok" }, + finishReason: "stop", + }); + }, + }; + + await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + }); + + assertEquals(seenParams?.temperature, 0.2); + assertEquals(seenParams?.reasoning, { + effort: "high", + summary: "detailed", + }); +}); + +Deno.test("modelParams.verbosity passes through to provider params", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "root.deck.md", + ` ++++ +modelParams = { model = "dummy-model", verbosity = "high" } ++++ + +Deck. +`.trim(), + ); + + let seenParams: Record | undefined; + const provider: ModelProvider = { + chat: (input) => { + seenParams = input.params; + return Promise.resolve({ + message: { role: "assistant", content: "ok" }, + finishReason: "stop", + }); + }, + }; + + await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + }); + + assertEquals(seenParams?.verbosity, "high"); +}); + +Deno.test("worker sandbox denies write when write permission is absent", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const targetPath = path.join(dir, "denied-write.txt"); + const deckPath = await writeTempDeck( + dir, + "write-denied.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: async () => { + await Deno.writeTextFile(${JSON.stringify(targetPath)}, "nope"); + return "ok"; + } + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { read: true, write: false, run: false }, + workspacePermissionsBaseDir: dir, + }), + Error, + "write", + ); +}); + +Deno.test("worker sandbox denies run when run permission is absent", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "run-denied.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: async () => { + const cmd = new Deno.Command("sh", { args: ["-c", "echo hi"] }); + await cmd.output(); + return "ok"; + } + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { read: true, write: false, run: false }, + workspacePermissionsBaseDir: dir, + }), + Error, + "run", + ); +}); + +Deno.test("worker sandbox denies writes outside allowed roots", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const allowedDir = path.join(dir, "allowed"); + const blockedPath = path.join(dir, "blocked.txt"); + await Deno.mkdir(allowedDir, { recursive: true }); + const deckPath = await writeTempDeck( + dir, + "write-outside.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: async () => { + await Deno.writeTextFile(${JSON.stringify(blockedPath)}, "nope"); + return "ok"; + } + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: true, + write: [allowedDir], + run: false, + }, + workspacePermissionsBaseDir: dir, + }), + Error, + "write", + ); +}); + +Deno.test("worker sandbox restrictive profile still boots compute deck", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "restrictive-start.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: () => "ok" + }); + `, + ); + + const result = await runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: false, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + assertEquals(result, "ok"); +}); + +Deno.test("worker sandbox bootstrap does not grant package-root reads", async () => { + const dir = await Deno.makeTempDir(); + const here = path.dirname(path.fromFileUrl(import.meta.url)); + const defineDeckHref = path.toFileUrl(path.resolve(here, "definitions.ts")) + .href; + const packageCardPath = path.resolve( + here, + "..", + "cards", + "generate-test-input.card.md", + ); + const deckPath = await writeTempDeck( + dir, + "bootstrap-read-bypass.deck.ts", + ` + import { defineDeck } from "${defineDeckHref}"; + export default defineDeck({ + run: async () => { + await Deno.readTextFile(${JSON.stringify(packageCardPath)}); + return "ok"; + } + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: false, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }), + Error, + "read", + ); +}); + +Deno.test("worker sandbox bootstrap ignores fake imports in comments", async () => { + const dir = await Deno.makeTempDir(); + const here = path.dirname(path.fromFileUrl(import.meta.url)); + const defineDeckHref = path.toFileUrl(path.resolve(here, "definitions.ts")) + .href; + const secretPath = path.join(dir, "secret.txt"); + await Deno.writeTextFile(secretPath, "top-secret"); + const deckPath = await writeTempDeck( + dir, + "comment-import-escalation.deck.ts", + ` + import { defineDeck } from "${defineDeckHref}"; + // import "${secretPath}" + export default defineDeck({ + run: async () => { + await Deno.readTextFile(${JSON.stringify(secretPath)}); + return "ok"; + } + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: false, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }), + Error, + "read", + ); +}); + +Deno.test("worker sandbox bootstrap does not widen reads via imported modules", async () => { + const dir = await Deno.makeTempDir(); + const decksDir = path.join(dir, "decks"); + const secretsDir = path.join(dir, "secrets"); + await Deno.mkdir(decksDir, { recursive: true }); + await Deno.mkdir(secretsDir, { recursive: true }); + const here = path.dirname(path.fromFileUrl(import.meta.url)); + const defineDeckHref = path.toFileUrl(path.resolve(here, "definitions.ts")) + .href; + const secretModulePath = path.join(secretsDir, "secret-module.ts"); + await Deno.writeTextFile(secretModulePath, 'export const secret = "nope";\n'); + const deckPath = await writeTempDeck( + decksDir, + "import-read-escalation.deck.ts", + ` + import { defineDeck } from "${defineDeckHref}"; + import "../secrets/secret-module.ts"; + export default defineDeck({ + run: async () => { + await Deno.readTextFile(${JSON.stringify(secretModulePath)}); + return "ok"; + } + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: false, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }), + Error, + "read", + ); +}); + +Deno.test("worker sandbox inspect does not execute out-of-root imports", async () => { + const dir = await Deno.makeTempDir(); + const decksDir = path.join(dir, "decks"); + const secretsDir = path.join(dir, "secrets"); + await Deno.mkdir(decksDir, { recursive: true }); + await Deno.mkdir(secretsDir, { recursive: true }); + const here = path.dirname(path.fromFileUrl(import.meta.url)); + const defineDeckHref = path.toFileUrl(path.resolve(here, "definitions.ts")) + .href; + const secretModulePath = path.join(secretsDir, "secret-module.ts"); + await Deno.writeTextFile( + secretModulePath, + 'throw new Error("inspect-secret-module-loaded");\n', + ); + const deckPath = await writeTempDeck( + decksDir, + "inspect-import-escalation.deck.ts", + ` + import { defineDeck } from "${defineDeckHref}"; + import "../secrets/secret-module.ts"; + export default defineDeck({ + run: () => "ok", + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: false, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }), + Error, + "read access", + ); +}); + +Deno.test("worker sandbox inspect enforces hard timeout", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "inspect-timeout.deck.ts", + ` + import { defineDeck } from "${modHref}"; + await new Promise(() => {}); + export default defineDeck({ + run: () => "ok", + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }), + Error, + "Deck inspection timed out", + ); +}); + +Deno.test("worker sandbox blocks remote imports when net is false", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "remote-import-denied.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import "https://example.com/gambit-runtime-net-blocked.ts"; + export default defineDeck({ + run: () => "ok", + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }), + Error, + "import access", + ); +}); + +Deno.test("worker sandbox restrictive profile loads local deck imports", async () => { + const dir = await Deno.makeTempDir(); + const here = path.dirname(path.fromFileUrl(import.meta.url)); + const defineDeckHref = path.toFileUrl(path.resolve(here, "definitions.ts")) + .href; + + await writeTempDeck( + dir, + "helper.ts", + ` + export const helperValue = "ok-from-helper"; + `, + ); + const deckPath = await writeTempDeck( + dir, + "restrictive-import.deck.ts", + ` + import { defineDeck } from "${defineDeckHref}"; + import { helperValue } from "./helper.ts"; + export default defineDeck({ + run: () => helperValue, + }); + `, + ); + + const result = await runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: false, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + assertEquals(result, "ok-from-helper"); +}); + +Deno.test("worker sandbox restrictive profile loads markdown decks with builtin snippet embeds", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + await writeTempDeck( + dir, + "snippet-embed.card.md", + `![respond](gambit://snippets/respond.md) +`, + ); + const deckPath = await writeTempDeck( + dir, + "builtin-snippet.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + cards: ["./snippet-embed.card.md"], + contextSchema: z.object({}), + responseSchema: z.string(), + run: () => "ok", + }); + `, + ); + + const result = await runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: false, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + + assertEquals(result, "ok"); +}); + +Deno.test("worker sandbox restrictive profile loads markdown decks with local embeds", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "PROMPT.md", + ` ++++ +[modelParams] +model = "dummy-model" ++++ + +Ground answers using this FAQ: +![](./faq.md) +`.trim(), + ); + await writeTempDeck( + dir, + "faq.md", + ` +# FAQ + +- Q: Reset password? +- A: Use the reset flow. +`.trim(), + ); + + const result = await runDeck({ + path: deckPath, + input: "hello", + modelProvider: { + chat: () => + Promise.resolve({ + message: { role: "assistant", content: "ok" }, + finishReason: "stop", + }), + }, + isRoot: true, + workerSandbox: true, + }); + + assertEquals(result, "ok"); +}); + +Deno.test("worker sandbox blocks top-level deck side effects", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const sideEffectPath = path.join(dir, "top-level-side-effect.txt"); + const deckPath = await writeTempDeck( + dir, + "top-level-side-effect.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "leak"); + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: () => "ok", + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }), + Error, + "write", + ); + + const leaked = await Deno.stat(sideEffectPath).then( + () => true, + () => false, + ); + assertEquals(leaked, false); +}); + +Deno.test( + "worker sandbox blocks top-level model deck side effects during host orchestration", + async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const sideEffectPath = path.join(dir, "llm-top-level-side-effect.txt"); + const deckPath = await writeTempDeck( + dir, + "llm-top-level-side-effect.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + try { + await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "leak"); + } catch { + // no-op: sandboxed deck import should deny this write + } + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, + ); + + const provider: ModelProvider = { + chat() { + return Promise.resolve({ + message: { role: "assistant", content: "ok" }, + finishReason: "stop", + }); + }, + }; + + const result = await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + assertEquals(result, "ok"); + + const leaked = await Deno.stat(sideEffectPath).then( + () => true, + () => false, + ); + assertEquals(leaked, false); + }, +); + +Deno.test("worker spawn bridge preserves parent permission ceiling for child", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const childWritePath = path.join(dir, "bridge-child-write.txt"); + + const childPath = await writeTempDeck( + dir, + "bridge-child.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + permissions: { write: true }, + run: async () => { + await Deno.writeTextFile(${JSON.stringify(childWritePath)}, "nope"); + return "child-ok"; + } + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + "bridge-parent.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: async (ctx) => { + return await ctx.spawnAndWait({ path: ${ + JSON.stringify(childPath) + }, input: {} }); + } + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: parentPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { read: true, write: false, run: false }, + workspacePermissionsBaseDir: dir, + }), + Error, + "write", + ); +}); + +Deno.test("worker timeout cancels spawned children before side effects", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const sideEffectPath = path.join(dir, "late-side-effect.txt"); + + const childPath = await writeTempDeck( + dir, + "timeout-child.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: async () => { + await new Promise((resolve) => setTimeout(resolve, 300)); + await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "late"); + return "late"; + } + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + "timeout-parent.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: async (ctx) => { + return await ctx.spawnAndWait({ path: ${ + JSON.stringify(childPath) + }, input: {} }); + } + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: parentPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + guardrails: { timeoutMs: 80 }, + workspacePermissions: { read: true, write: [dir], run: false }, + workspacePermissionsBaseDir: dir, + }), + Error, + "Timeout exceeded", + ); + + await new Promise((resolve) => setTimeout(resolve, 350)); + let sideEffectExists = true; + try { + await Deno.stat(sideEffectPath); + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + sideEffectExists = false; + } else { + throw err; + } + } + assertEquals(sideEffectExists, false); +}); + +Deno.test( + "compute spawnAndWait inherits initialUserMessage by default", + async () => { + for (const workerSandbox of [false, true]) { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const childPath = await writeTempDeck( + dir, + `initial-user-child-${workerSandbox ? "worker" : "inproc"}.deck.ts`, + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + contextSchema: z.object({}), + responseSchema: z.string(), + body: "echo latest user message", + modelParams: { model: "dummy-model" }, + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + `initial-user-parent-${workerSandbox ? "worker" : "inproc"}.deck.ts`, + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + contextSchema: z.object({}), + responseSchema: z.string(), + run: async (ctx) => { + return await ctx.spawnAndWait({ path: ${ + JSON.stringify(childPath) + }, input: {} }); + } + }); + `, + ); + + const provider: ModelProvider = { + chat({ messages }) { + const userMessages = messages.filter((msg) => + msg.role === "user" && typeof msg.content === "string" + ); + const latest = userMessages.length + ? userMessages[userMessages.length - 1].content as string + : "missing-user-message"; + return Promise.resolve({ + message: { role: "assistant", content: latest }, + finishReason: "stop", + }); + }, + }; + + const result = await runDeck({ + path: parentPath, + input: {}, + modelProvider: provider, + isRoot: true, + initialUserMessage: "forward-this-message", + workerSandbox, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + + assertEquals(result, "forward-this-message"); + } + }, +); + +Deno.test( + "execute deck helpers persist session meta and transcript across turns", + async () => { + for (const workerSandbox of [false, true]) { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + `execute-session-meta-${workerSandbox ? "worker" : "inproc"}.deck.ts`, + ` + import { defineDeck } from "${modHref}"; + export default defineDeck({ + run: (ctx) => { + const existing = ctx.getSessionMeta("codex.threadId"); + const threadId = typeof existing === "string" && existing + ? existing + : "thread-" + crypto.randomUUID(); + ctx.setSessionMeta("codex.threadId", threadId); + if (typeof ctx.initialUserMessage === "string" && ctx.initialUserMessage.trim()) { + ctx.appendMessage({ role: "user", content: ctx.initialUserMessage.trim() }); + } + const assistant = "thread=" + threadId; + ctx.appendMessage({ role: "assistant", content: assistant }); + return assistant; + }, + }); + `, + ); + + let savedState: import("./state.ts").SavedState | undefined; + const onStateUpdate = (state: import("./state.ts").SavedState) => { + savedState = state; + }; + + const first = await runDeck({ + path: deckPath, + input: "", + modelProvider: dummyProvider, + isRoot: true, + initialUserMessage: "first turn", + state: savedState, + onStateUpdate, + workerSandbox, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + + const firstThread = String(first).replace(/^thread=/, ""); + assert(firstThread.length > 0); + assertEquals(savedState?.meta?.["codex.threadId"], firstThread); + assertEquals(savedState?.messages?.length, 2); + assertEquals(savedState?.messages?.[0]?.role, "user"); + assertEquals(savedState?.messages?.[1]?.role, "assistant"); + + const second = await runDeck({ + path: deckPath, + input: "", + modelProvider: dummyProvider, + isRoot: true, + initialUserMessage: "second turn", + state: savedState, + onStateUpdate, + workerSandbox, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + + const secondThread = String(second).replace(/^thread=/, ""); + assertEquals(secondThread, firstThread); + assertEquals(savedState?.meta?.["codex.threadId"], firstThread); + assertEquals(savedState?.messages?.length, 4); + assertEquals(savedState?.messages?.[2]?.role, "user"); + assertEquals(savedState?.messages?.[3]?.role, "assistant"); + } + }, +); + +Deno.test("orchestration worker preserves serial LLM trace ordering and correlation ids", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const childPath = await writeTempDeck( + dir, + "serial-child.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({ value: z.string() }), + outputSchema: z.string(), + run: (ctx) => "child:" + ctx.input.value, + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + "serial-parent.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + actions: [{ name: "child", path: "${childPath}" }], + }); + `, + ); + + const makeProvider = (): ModelProvider => { + let pass = 0; + return { + chat() { + pass += 1; + if (pass === 1) { + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "call-child", + name: "child", + args: { value: "x" }, + }], + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + }; + + const legacyTraces: Array = []; + const workerTraces: Array = []; + const legacy = await runDeck({ + path: parentPath, + input: "hi", + modelProvider: makeProvider(), + isRoot: true, + workerSandbox: false, + trace: (ev) => legacyTraces.push(ev), + }); + const worker = await runDeck({ + path: parentPath, + input: "hi", + modelProvider: makeProvider(), + isRoot: true, + workerSandbox: true, + trace: (ev) => workerTraces.push(ev), + workspacePermissions: { read: true, write: false, run: false }, + workspacePermissionsBaseDir: dir, + }); + + assertEquals(legacy, "done"); + assertEquals(worker, legacy); + + const workerActionStart = workerTraces.find((event) => + event.type === "action.start" && event.name === "child" + ) as Extract | undefined; + const workerToolCall = workerTraces.find((event) => + event.type === "tool.call" && event.name === "child" + ) as Extract | undefined; + const workerToolResult = workerTraces.find((event) => + event.type === "tool.result" && event.name === "child" + ) as Extract | undefined; + const workerActionEnd = workerTraces.find((event) => + event.type === "action.end" && event.name === "child" + ) as Extract | undefined; + + assert(workerActionStart); + assert(workerToolCall); + assert(workerToolResult); + assert(workerActionEnd); + assertEquals(workerActionStart.actionCallId, "call-child"); + assertEquals(workerToolCall.actionCallId, "call-child"); + assertEquals(workerToolResult.actionCallId, "call-child"); + assertEquals(workerActionEnd.actionCallId, "call-child"); + + const startIdx = workerTraces.findIndex((event) => + event.type === "action.start" && event.name === "child" + ); + const callIdx = workerTraces.findIndex((event) => + event.type === "tool.call" && event.name === "child" + ); + const resultIdx = workerTraces.findIndex((event) => + event.type === "tool.result" && event.name === "child" + ); + const endIdx = workerTraces.findIndex((event) => + event.type === "action.end" && event.name === "child" + ); + assert( + startIdx >= 0 && callIdx > startIdx && resultIdx > callIdx && + endIdx > resultIdx, + ); +}); + +Deno.test("orchestration worker enforces parent permission ceiling for LLM child actions", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deniedPath = path.join(dir, "llm-child-denied.txt"); + const childPath = await writeTempDeck( + dir, + "llm-child-write.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({ path: z.string() }), + outputSchema: z.string(), + permissions: { write: true }, + run: async (ctx) => { + await Deno.writeTextFile(ctx.input.path, "nope"); + return "ok"; + }, + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + "llm-parent-write.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + actions: [{ name: "child", path: "${childPath}" }], + }); + `, + ); + + let pass = 0; + const provider: ModelProvider = { + chat() { + pass += 1; + if (pass === 1) { + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "call-write", + name: "child", + args: { path: deniedPath }, + }], + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + + await assertRejects( + () => + runDeck({ + path: parentPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { read: true, write: false, run: false }, + workspacePermissionsBaseDir: dir, + }), + Error, + "write", + ); +}); + +Deno.test("orchestration worker enforces action reference narrowing for child compute deck writes", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const writePath = path.join(dir, "child-write-should-be-denied.txt"); + const childPath = await writeTempDeck( + dir, + "llm-child-reference-deny.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({ path: z.string() }), + outputSchema: z.string(), + run: async (ctx) => { + await Deno.writeTextFile(ctx.input.path, "should-fail"); + return "ok"; + }, + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + "llm-parent-reference-deny.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + actions: [{ + name: "child", + path: "${childPath}", + permissions: { write: false }, + }], + }); + `, + ); + + let pass = 0; + const provider: ModelProvider = { + chat() { + pass += 1; + if (pass === 1) { + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "call-ref-deny", + name: "child", + args: { path: writePath }, + }], + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + + await assertRejects( + () => + runDeck({ + path: parentPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workerSandbox: true, + // Root allows writes; action reference must still narrow child writes. + workspacePermissions: { read: true, write: [dir], run: false }, + workspacePermissionsBaseDir: dir, + }), + Error, + "write", + ); +}); + +Deno.test("orchestration worker timeout cancels nested LLM child actions before side effects", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const sideEffectPath = path.join(dir, "llm-late-side-effect.txt"); + const childPath = await writeTempDeck( + dir, + "llm-timeout-child.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: async () => { + await new Promise((resolve) => setTimeout(resolve, 300)); + await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "late"); + return "late"; + }, + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + "llm-timeout-parent.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + actions: [{ name: "child", path: "${childPath}" }], + }); + `, + ); + + let pass = 0; + const provider: ModelProvider = { + chat() { + pass += 1; + if (pass === 1) { + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ id: "call-timeout", name: "child", args: {} }], + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + + await assertRejects( + () => + runDeck({ + path: parentPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workerSandbox: true, + guardrails: { timeoutMs: 80 }, + workspacePermissions: { read: true, write: [dir], run: false }, + workspacePermissionsBaseDir: dir, + }), + Error, + "Timeout exceeded", + ); + + await new Promise((resolve) => setTimeout(resolve, 350)); + let sideEffectExists = true; + try { + await Deno.stat(sideEffectPath); + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + sideEffectExists = false; + } else { + throw err; + } + } + assertEquals(sideEffectExists, false); +}); + +Deno.test("orchestration worker clamps forged child deadlines to parent timeout", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const sideEffectPath = path.join(dir, "forged-deadline-side-effect.txt"); + const childPath = await writeTempDeck( + dir, + "forged-deadline-child.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: async () => { + await new Promise((resolve) => setTimeout(resolve, 200)); + await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "late"); + return "late"; + }, + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + "forged-deadline-parent.deck.ts", + ` + import { defineDeck } from "${modHref}"; + export default defineDeck({ + run: async () => { + globalThis.postMessage({ + type: "spawn.request", + requestId: "forged-request", + payload: { + path: ${JSON.stringify(childPath)}, + input: {}, + parentActionCallId: "forged-action", + parentPermissionsBaseDir: ${JSON.stringify(dir)}, + parentPermissions: { + baseDir: ${JSON.stringify(dir)}, + read: true, + write: true, + run: false, + net: false, + env: false, + }, + workspacePermissions: { + read: true, + write: [${JSON.stringify(dir)}], + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: ${JSON.stringify(dir)}, + runDeadlineMs: performance.now() + 10_000, + }, + }); + await new Promise(() => {}); + }, + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: parentPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + guardrails: { timeoutMs: 80 }, + workspacePermissions: { read: true, write: [dir], run: false }, + workspacePermissionsBaseDir: dir, + }), + Error, + "Timeout exceeded", + ); + + await new Promise((resolve) => setTimeout(resolve, 260)); + let sideEffectExists = true; + try { + await Deno.stat(sideEffectPath); + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + sideEffectExists = false; + } else { + throw err; + } + } + assertEquals(sideEffectExists, false); +}); + +Deno.test("compute worker rejects forged run.result messages", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "forged-run-result.deck.ts", + ` + import { defineDeck } from "${modHref}"; + export default defineDeck({ + run: async () => { + globalThis.postMessage({ type: "run.result", result: "forged" }); + await new Promise(() => {}); + }, + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: deckPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + guardrails: { timeoutMs: 80 }, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }), + Error, + "Timeout exceeded", + ); +}); + +Deno.test( + "compute worker does not leak bridge session back to untrusted deck messages", + async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const childPath = await writeTempDeck( + dir, + "bridge-leak-child.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: () => "child-ok", + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + "bridge-leak-parent.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + run: async (ctx) => { + globalThis.addEventListener("message", (event) => { + const data = event.data; + if (data?.type !== "spawn.result") return; + if (typeof data?.bridgeSession !== "string") return; + globalThis.postMessage({ + type: "run.result", + bridgeSession: data.bridgeSession, + completionNonce: data.completionNonce, + result: "forged", + }); + }); + await ctx.spawnAndWait({ path: ${ + JSON.stringify(childPath) + }, input: {} }); + await new Promise(() => {}); + }, + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: parentPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + guardrails: { timeoutMs: 80 }, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }), + Error, + "Timeout exceeded", + ); + }, +); + +Deno.test("compute worker rejects forged spawn.request messages", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const sideEffectPath = path.join(dir, "forged-spawn-side-effect.txt"); + const childPath = await writeTempDeck( + dir, + "forged-spawn-child.deck.ts", + ` + import { defineDeck } from "${modHref}"; + export default defineDeck({ + run: async () => { + await Deno.writeTextFile(${JSON.stringify(sideEffectPath)}, "forged"); + return "ok"; + }, + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + "forged-spawn-parent.deck.ts", + ` + import { defineDeck } from "${modHref}"; + export default defineDeck({ + run: async () => { + globalThis.postMessage({ + type: "spawn.request", + requestId: "forged-request", + payload: { + path: ${JSON.stringify(childPath)}, + input: {}, + parentActionCallId: "forged-action", + parentPermissionsBaseDir: ${JSON.stringify(dir)}, + parentPermissions: { + baseDir: ${JSON.stringify(dir)}, + read: true, + write: true, + run: false, + net: false, + env: false, + }, + workspacePermissions: { + read: true, + write: [${JSON.stringify(dir)}], + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: ${JSON.stringify(dir)}, + runDeadlineMs: performance.now() + 10_000, + }, + }); + await new Promise(() => {}); + }, + }); + `, + ); + + await assertRejects( + () => + runDeck({ + path: parentPath, + input: {}, + modelProvider: dummyProvider, + isRoot: true, + workerSandbox: true, + guardrails: { timeoutMs: 80 }, + workspacePermissions: { read: true, write: [dir], run: false }, + workspacePermissionsBaseDir: dir, + }), + Error, + "Timeout exceeded", + ); + + await new Promise((resolve) => setTimeout(resolve, 120)); + let sideEffectExists = true; + try { + await Deno.stat(sideEffectPath); + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + sideEffectExists = false; + } else { + throw err; + } + } + assertEquals(sideEffectExists, false); +}); + +Deno.test("orchestration worker serial scheduler runs one child tool invocation at a time", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const orderPath = path.join(dir, "serial-order.txt"); + await Deno.writeTextFile(orderPath, ""); + const childPath = await writeTempDeck( + dir, + "serial-child-work.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({ id: z.number(), path: z.string() }), + outputSchema: z.string(), + run: async (ctx) => { + await Deno.writeTextFile(ctx.input.path, "start" + ctx.input.id + "\\n", { append: true }); + await new Promise((resolve) => setTimeout(resolve, 60)); + await Deno.writeTextFile(ctx.input.path, "end" + ctx.input.id + "\\n", { append: true }); + return "ok-" + ctx.input.id; + }, + }); + `, + ); + const parentPath = await writeTempDeck( + dir, + "serial-parent-llm.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + actions: [{ name: "child", path: "${childPath}" }], + }); + `, + ); + + let pass = 0; + const provider: ModelProvider = { + chat() { + pass += 1; + if (pass === 1) { + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [ + { id: "call-1", name: "child", args: { id: 1, path: orderPath } }, + { id: "call-2", name: "child", args: { id: 2, path: orderPath } }, + ], + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + + const result = await runDeck({ + path: parentPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { read: true, write: [dir], run: false }, + workspacePermissionsBaseDir: dir, + }); + assertEquals(result, "done"); + + const order = (await Deno.readTextFile(orderPath)) + .split("\n") + .map((line) => line.trim()) + .filter(Boolean); + assertEquals(order, ["start1", "end1", "start2", "end2"]); +}); + +Deno.test("LLM built-in tools are gated by effective permissions", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "tool-gating.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, + ); + + let toolNames: Array = []; + const provider: ModelProvider = { + chat(input) { + toolNames = (input.tools ?? []).map((tool) => tool.function.name); + return Promise.resolve({ + message: { role: "assistant", content: "ok" }, + finishReason: "stop", + }); + }, + }; + + const result = await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + + assertEquals(result, "ok"); + assert(toolNames.includes("read_file")); + assert(toolNames.includes("list_dir")); + assert(toolNames.includes("grep_files")); + assertEquals(toolNames.includes("apply_patch"), false); + assertEquals(toolNames.includes("exec"), false); +}); + +Deno.test( + "LLM file tools enforce directory-scoped read permissions", + async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "file-tools.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, + ); + + const allowedDir = path.join(dir, "allowed"); + const nestedDir = path.join(allowedDir, "nested"); + await Deno.mkdir(nestedDir, { recursive: true }); + const allowedFile = path.join(nestedDir, "note.txt"); + await Deno.writeTextFile(allowedFile, "line-one\\nline-two\\nmatch-line"); + + const otherDir = path.join(dir, "other"); + await Deno.mkdir(otherDir, { recursive: true }); + const deniedFile = path.join(otherDir, "secret.txt"); + await Deno.writeTextFile(deniedFile, "top-secret"); + const allowedPatchPath = path.join(allowedDir, "editable.txt"); + await Deno.writeTextFile(allowedPatchPath, "before-allowed"); + const deniedPatchPath = path.join(otherDir, "blocked.txt"); + await Deno.writeTextFile(deniedPatchPath, "before-denied"); + + type ToolPlanEntry = { + id: string; + name: "read_file" | "list_dir" | "grep_files" | "apply_patch"; + args: Record; + expectStatus: 200 | 403; + }; + const toolPlan: Array = [ + { + id: "call-1", + name: "read_file", + args: { path: allowedFile }, + expectStatus: 200, + }, + { + id: "call-2", + name: "read_file", + args: { path: deniedFile }, + expectStatus: 403, + }, + { + id: "call-3", + name: "list_dir", + args: { path: allowedDir, recursive: true }, + expectStatus: 200, + }, + { + id: "call-4", + name: "list_dir", + args: { path: otherDir, recursive: true }, + expectStatus: 403, + }, + { + id: "call-5", + name: "grep_files", + args: { path: allowedDir, query: "match" }, + expectStatus: 200, + }, + { + id: "call-6", + name: "grep_files", + args: { path: otherDir, query: "match" }, + expectStatus: 403, + }, + { + id: "call-7", + name: "apply_patch", + args: { + path: allowedPatchPath, + edits: [{ + old_text: "before-allowed", + new_text: "after-allowed", + }], + }, + expectStatus: 200, + }, + { + id: "call-8", + name: "apply_patch", + args: { + path: deniedPatchPath, + edits: [{ + old_text: "before-denied", + new_text: "after-denied", + }], + }, + expectStatus: 403, + }, + ]; + + type ToolResponseBody = { + status?: number; + code?: string; + message?: string; + payload?: Record; + }; + const toolResults: Array<{ plan: ToolPlanEntry; body: ToolResponseBody }> = + []; + const seenToolIds = new Set(); + const captureToolMessages = (input: { messages: Array }) => { + for (const message of input.messages) { + if (message.role !== "tool" || !message.tool_call_id) continue; + if (seenToolIds.has(message.tool_call_id)) continue; + seenToolIds.add(message.tool_call_id); + if (message.content === null) continue; + const plan = toolPlan.find((entry) => + entry.id === message.tool_call_id + ); + if (!plan) continue; + const body = JSON.parse(String(message.content)) as ToolResponseBody; + toolResults.push({ plan, body }); + } + }; + + let pass = 0; + const provider: ModelProvider = { + chat(input) { + captureToolMessages(input); + const plan = toolPlan[pass]; + pass += 1; + if (!plan) { + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + } + return Promise.resolve({ + message: { + role: "assistant", + content: null, + tool_calls: [{ + id: plan.id, + type: "function", + function: { + name: plan.name, + arguments: JSON.stringify(plan.args), + }, + }], + }, + finishReason: "tool_calls", + toolCalls: [{ + id: plan.id, + name: plan.name, + args: plan.args, + }], + }); + }, + }; + + const result = await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workspacePermissions: { + read: ["./allowed"], + write: ["./allowed"], + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + + assertEquals(result, "done"); + assertEquals(toolResults.length, toolPlan.length); + + const listDirEntries = (payload: Record | undefined) => { + return Array.isArray(payload?.entries) + ? payload.entries as Array> + : []; + }; + + for (const { plan, body } of toolResults) { + assertEquals(body.status, plan.expectStatus); + if (plan.expectStatus === 200) { + assertEquals(body.code ?? null, null); + if (plan.name === "read_file") { + const payload = body.payload as Record; + assert(payload, "expected read_file payload"); + assertEquals(payload.path, plan.args.path); + assert( + typeof payload.content === "string" && + payload.content.includes("match-line"), + ); + } + if (plan.name === "list_dir") { + const payload = body.payload as Record; + assert(payload, "expected list_dir payload"); + const entries = listDirEntries(payload); + assert( + entries.some((entry) => entry.path === allowedFile), + "expected list_dir entries to include allowed file", + ); + } + if (plan.name === "grep_files") { + const payload = body.payload as Record; + assert(payload, "expected grep_files payload"); + const matches = Array.isArray(payload.matches) + ? payload.matches as Array> + : []; + assert( + matches.some((match) => match.path === allowedFile), + "expected grep_files to return match from allowed file", + ); + } + if (plan.name === "apply_patch") { + const payload = body.payload as Record; + assert(payload, "expected apply_patch payload"); + assertEquals(payload.path, plan.args.path); + assertEquals(payload.applied, 1); + } + } else { + assertEquals(body.code, "permission_denied"); + assert( + typeof body.message === "string" && + body.message.includes(plan.name), + "expected permission denial message to mention the tool", + ); + } + } + + assertEquals( + await Deno.readTextFile(allowedPatchPath), + "after-allowed", + "apply_patch should modify files in allowed directories", + ); + assertEquals( + await Deno.readTextFile(deniedPatchPath), + "before-denied", + "apply_patch must not modify files outside allowed directories", + ); + }, +); + +Deno.test("LLM file tools deny symlink escapes outside granted roots", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "file-tools-symlink.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, + ); + + const allowedDir = path.join(dir, "allowed"); + const outsideDir = path.join(dir, "outside"); + await Deno.mkdir(allowedDir, { recursive: true }); + await Deno.mkdir(outsideDir, { recursive: true }); + + const outsideReadTarget = path.join(outsideDir, "secret.txt"); + await Deno.writeTextFile(outsideReadTarget, "secret"); + const outsideWriteTarget = path.join(outsideDir, "edit.txt"); + await Deno.writeTextFile(outsideWriteTarget, "before"); + + const readSymlink = path.join(allowedDir, "secret-link.txt"); + await Deno.symlink(outsideReadTarget, readSymlink); + const writeSymlinkDir = path.join(allowedDir, "linked"); + await Deno.symlink(outsideDir, writeSymlinkDir); + + type ToolResponseBody = { + status?: number; + code?: string; + message?: string; + }; + const toolResults: Array = []; + const seenToolIds = new Set(); + + let pass = 0; + const provider: ModelProvider = { + chat(input) { + if (pass > 0) { + const toolMessages = input.messages.filter((message) => + message.role === "tool" && + (message.name === "read_file" || message.name === "apply_patch") + ); + for (const message of toolMessages) { + if (!message.tool_call_id || seenToolIds.has(message.tool_call_id)) { + continue; + } + seenToolIds.add(message.tool_call_id); + toolResults.push( + JSON.parse(String(message.content)) as ToolResponseBody, + ); + } + } + pass += 1; + if (pass === 1) { + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "tool-read", + name: "read_file", + args: { path: readSymlink }, + }], + }); + } + if (pass === 2) { + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "tool-write", + name: "apply_patch", + args: { + path: path.join(writeSymlinkDir, "edit.txt"), + edits: [{ old_text: "before", new_text: "after" }], + }, + }], + }); + } + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + + const result = await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workspacePermissions: { + read: ["./allowed"], + write: ["./allowed"], + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + + assertEquals(result, "done"); + assertEquals(toolResults.length, 2); + assertEquals(toolResults[0].status, 403); + assertEquals(toolResults[0].code, "permission_denied"); + assertEquals(toolResults[1].status, 403); + assertEquals(toolResults[1].code, "permission_denied"); + assertEquals(await Deno.readTextFile(outsideWriteTarget), "before"); +}); + +Deno.test( + "LLM built-in exec denies symlink targets outside allowed run.paths", + async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "exec-symlink-path-deny.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, + ); + const allowedDir = path.join(dir, "allowed"); + await Deno.mkdir(allowedDir, { recursive: true }); + const symlinkCommand = path.join(allowedDir, "tool"); + await Deno.symlink("/bin/sh", symlinkCommand); + const sideEffectPath = path.join(dir, "exec-symlink-side-effect.txt"); + + let pass = 0; + let toolPayload = ""; + const provider: ModelProvider = { + chat(input) { + pass++; + if (pass === 1) { + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ + id: "tool-exec", + name: "exec", + args: { + command: symlinkCommand, + args: ["-c", `echo escaped > ${sideEffectPath}`], + }, + }], + }); + } + toolPayload = String( + input.messages.find((message) => + message.role === "tool" && message.name === "exec" + )?.content ?? "", + ); + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + + const result = await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workerSandbox: true, + workspacePermissions: { + read: true, + write: false, + run: { paths: ["./allowed/tool"] }, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + assertEquals(result, "done"); + + const parsed = JSON.parse(toolPayload) as { + status?: number; + code?: string; + message?: string; + }; + assertEquals(parsed.status, 403); + assertEquals(parsed.code, "permission_denied"); + assert( + typeof parsed.message === "string" && parsed.message.includes("exec"), + "expected permission denial to mention exec", + ); + + const leaked = await Deno.stat(sideEffectPath).then( + () => true, + () => false, + ); + assertEquals(leaked, false); + }, +); + +Deno.test("LLM built-in apply_patch returns stable permission denial", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const targetPath = path.join(dir, "target.txt"); + await Deno.writeTextFile(targetPath, "before"); + const deckPath = await writeTempDeck( + dir, + "tool-deny.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, + ); + + let pass = 0; + let toolPayload = ""; + const provider: ModelProvider = { + chat(input) { + pass++; + if (pass === 1) { + return Promise.resolve({ + message: { + role: "assistant", + content: null, + tool_calls: [{ + id: "tool-1", + type: "function", + function: { + name: "apply_patch", + arguments: JSON.stringify({ + path: targetPath, + edits: [{ old_text: "before", new_text: "after" }], + }), + }, + }], + }, + finishReason: "tool_calls", + toolCalls: [{ + id: "tool-1", + name: "apply_patch", + args: { + path: targetPath, + edits: [{ old_text: "before", new_text: "after" }], + }, + }], + }); + } + toolPayload = String( + input.messages.find((message) => + message.role === "tool" && message.name === "apply_patch" + )?.content ?? "", + ); + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; + + const result = await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workspacePermissions: { + read: true, + write: false, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); + + assertEquals(result, "done"); + assert(toolPayload.includes('"code":"permission_denied"')); + assert(toolPayload.includes("apply_patch denied")); + assertEquals(await Deno.readTextFile(targetPath), "before"); +}); + +Deno.test( + "LLM built-in apply_patch create_if_missing creates nested parent directories", + async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "tool-create-missing.deck.ts", + ` + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, + ); + + const nestedTarget = path.join(dir, "faq", "faq.txt"); + let pass = 0; + let toolPayload = ""; + const provider: ModelProvider = { + chat(input) { + pass += 1; + if (pass === 1) { + return Promise.resolve({ + message: { + role: "assistant", + content: null, + tool_calls: [{ + id: "tool-create", + type: "function", + function: { + name: "apply_patch", + arguments: JSON.stringify({ + path: nestedTarget, + create_if_missing: true, + edits: [{ old_text: "placeholder", new_text: "hello faq" }], + }), + }, + }], + }, + finishReason: "tool_calls", + toolCalls: [{ + id: "tool-create", + name: "apply_patch", + args: { + path: nestedTarget, + create_if_missing: true, + edits: [{ old_text: "placeholder", new_text: "hello faq" }], + }, + }], + }); + } + toolPayload = String( + input.messages.find((message) => + message.role === "tool" && message.name === "apply_patch" + )?.content ?? "", + ); + return Promise.resolve({ + message: { role: "assistant", content: "done" }, + finishReason: "stop", + }); + }, + }; -![Fragments card](./fragments.card.md) -`.trim(), - ); + const result = await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + workspacePermissions: { + read: true, + write: true, + run: false, + net: false, + env: false, + }, + workspacePermissionsBaseDir: dir, + }); - await Deno.writeTextFile( - path.join(dir, "base_input.zod.ts"), - ` - import { z } from "zod"; - export default z.object({ text: z.string() }); - `.trim(), - ); + assertEquals(result, "done"); + assert(toolPayload.includes('"status":200')); + assert(toolPayload.includes('"created":true')); + assertEquals(await Deno.readTextFile(nestedTarget), ""); + }, +); - await Deno.writeTextFile( - path.join(dir, "base_output.zod.ts"), +Deno.test("runDeck abort signal cancels in-flight model call and fires onCancel once", async () => { + const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); + const deckPath = await writeTempDeck( + dir, + "abort.deck.ts", ` + import { defineDeck } from "${modHref}"; import { z } from "zod"; - export default z.object({ result: z.string() }); - `.trim(), + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, ); - const deck = await loadDeck(deckPath); - const inputShape = (deck.inputSchema as unknown as { - shape: Record; - }).shape; - const outputShape = (deck.outputSchema as unknown as { - shape: Record; - }).shape; + const controller = new AbortController(); + let onCancelCalls = 0; + let providerSawSignal = false; + const provider: ModelProvider = { + chat(input) { + providerSawSignal = Boolean(input.signal); + if (input.signal && !controller.signal.aborted) { + setTimeout(() => controller.abort("stop"), 0); + } + return new Promise((_, reject) => { + input.signal?.addEventListener( + "abort", + () => reject(new DOMException("Run canceled", "AbortError")), + { once: true }, + ); + }); + }, + }; - assertEquals(Object.keys(inputShape).sort(), ["extra", "text"]); - assertEquals(Object.keys(outputShape).sort(), ["note", "result"]); + const runPromise = runDeck({ + path: deckPath, + input: "hello", + modelProvider: provider, + isRoot: true, + signal: controller.signal, + onCancel: () => { + onCancelCalls += 1; + }, + }); + + await assertRejects(() => runPromise); + await runPromise.catch((err) => { + assert(isRunCanceledError(err)); + }); + assertEquals(providerSawSignal, true); + assertEquals(onCancelCalls, 1); }); -Deno.test("cards cannot declare handlers (ts card)", async () => { +Deno.test("runDeck ignores post-abort stream chunks", async () => { const dir = await Deno.makeTempDir(); const modHref = modImportPath(); - - await writeTempDeck( + const deckPath = await writeTempDeck( dir, - "bad_handlers.card.ts", + "abort-stream.deck.ts", ` - import { defineCard } from "${modHref}"; - export default defineCard({ - handlers: { onBusy: { path: "./noop.deck.ts" } } + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, }); `, ); - const deckPath = await writeTempDeck( - dir, - "root.deck.md", - ` -+++ -modelParams = { model = "dummy-model" } -+++ - -Deck. - -![Bad handlers](./bad_handlers.card.ts) -`.trim(), - ); + const controller = new AbortController(); + const chunks: Array = []; + const provider: ModelProvider = { + chat(input) { + input.onStreamText?.("a"); + return new Promise((_, reject) => { + input.signal?.addEventListener( + "abort", + () => { + input.onStreamText?.("b"); + reject(new DOMException("Run canceled", "AbortError")); + }, + { once: true }, + ); + }); + }, + }; - await assertRejects( - () => - runDeck({ - path: deckPath, - input: "hi", - modelProvider: dummyProvider, - isRoot: true, - }), - Error, - "handlers", - ); + const runPromise = runDeck({ + path: deckPath, + input: "hello", + modelProvider: provider, + isRoot: true, + stream: true, + signal: controller.signal, + onStreamText: (chunk) => chunks.push(chunk), + }); + setTimeout(() => controller.abort(), 0); + await assertRejects(() => runPromise); + assertEquals(chunks.includes("b"), false); }); -Deno.test("cards cannot declare handlers (markdown card)", async () => { +Deno.test("runDeck propagates cancellation through nested action runs", async () => { const dir = await Deno.makeTempDir(); - - await Deno.writeTextFile( - path.join(dir, "bad.card.md"), + const modHref = modImportPath(); + const childPath = await writeTempDeck( + dir, + "abort-child.deck.ts", ` -+++ -handlers = { onBusy = { path = "./noop.deck.ts" } } -+++ - -Body. -`.trim(), + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.object({}), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, ); - - const deckPath = path.join(dir, "root.deck.md"); - await Deno.writeTextFile( - deckPath, + const parentPath = await writeTempDeck( + dir, + "abort-parent.deck.ts", ` -+++ -modelParams = { model = "dummy-model" } -+++ - -Deck. - -![Bad card](./bad.card.md) -`.trim(), + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + actions: [{ name: "child", path: "${childPath}" }], + }); + `, ); - await assertRejects( - () => - runDeck({ - path: deckPath, - input: "hi", - modelProvider: dummyProvider, - isRoot: true, - }), - Error, - "handlers", - ); + let parentCalls = 0; + let childCalls = 0; + const controller = new AbortController(); + const provider: ModelProvider = { + chat(input) { + if (input.deckPath?.endsWith("abort-parent.deck.ts")) { + parentCalls += 1; + return Promise.resolve({ + message: { role: "assistant", content: null }, + finishReason: "tool_calls", + toolCalls: [{ id: "child-1", name: "child", args: {} }], + }); + } + childCalls += 1; + if (!controller.signal.aborted) { + controller.abort("stop-child"); + } + return Promise.reject(new DOMException("Run canceled", "AbortError")); + }, + }; + + const runPromise = runDeck({ + path: parentPath, + input: "start", + modelProvider: provider, + isRoot: true, + signal: controller.signal, + }); + await assertRejects(() => runPromise); + assertEquals(parentCalls, 1); + assertEquals(childCalls, 1); }); -Deno.test("runDeck resolves model arrays via modelProvider", async () => { +Deno.test("runDeck keeps cancellation distinct from normal errors", async () => { const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); const deckPath = await writeTempDeck( dir, - "root.deck.md", + "cancel-vs-error.deck.ts", ` -+++ -modelParams = { model = ["ollama/llama3.1", "openrouter/openai/gpt-4o-mini"] } -+++ - -Deck. -`.trim(), + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, ); - let resolvedInput: { model?: string | Array } = {}; - const provider: ModelProvider = { - resolveModel: (input) => { - resolvedInput = { model: input.model }; - return Promise.resolve({ - model: "openrouter/openai/gpt-4o-mini", - params: { temp: 1 }, - }); - }, - chat: (input) => { - assertEquals(input.model, "openrouter/openai/gpt-4o-mini"); - assertEquals(input.params?.temp, 1); - return Promise.resolve({ - message: { role: "assistant", content: "ok" }, - finishReason: "stop", + + const canceledProvider: ModelProvider = { + chat(input) { + return new Promise((_, reject) => { + input.signal?.addEventListener( + "abort", + () => reject(new DOMException("Run canceled", "AbortError")), + { once: true }, + ); }); }, }; - - await runDeck({ + const canceledController = new AbortController(); + const canceled = runDeck({ path: deckPath, - input: "hi", - modelProvider: provider, + input: "hello", + modelProvider: canceledProvider, isRoot: true, - }); + signal: canceledController.signal, + }).catch((err) => err); + canceledController.abort(); + const canceledErr = await canceled; + assert(isRunCanceledError(canceledErr)); - assert(Array.isArray(resolvedInput.model)); + const failingProvider: ModelProvider = { + chat() { + throw new Error("normal failure"); + }, + }; + const failingErr = await runDeck({ + path: deckPath, + input: "hello", + modelProvider: failingProvider, + isRoot: true, + }).catch((err) => err); + assertEquals(isRunCanceledError(failingErr), false); }); -Deno.test("modelParams.additionalParams pass through and top-level wins", async () => { +Deno.test("runDeck rejects as canceled when signal aborts before final output without onStateUpdate", async () => { const dir = await Deno.makeTempDir(); + const modHref = modImportPath(); const deckPath = await writeTempDeck( dir, - "root.deck.md", + "abort-before-final-output.deck.ts", ` -+++ -modelParams = { model = "dummy-model", temperature = 0.2, additionalParams = { temperature = 0.9, seed = 42, my_param = "x" } } -+++ - -Deck. -`.trim(), + import { defineDeck } from "${modHref}"; + import { z } from "zod"; + export default defineDeck({ + inputSchema: z.string(), + outputSchema: z.string(), + modelParams: { model: "dummy-model" }, + }); + `, ); - let seenParams: Record | undefined; + const controller = new AbortController(); const provider: ModelProvider = { - chat: (input) => { - seenParams = input.params; + chat() { + controller.abort("stop-before-final"); return Promise.resolve({ message: { role: "assistant", content: "ok" }, finishReason: "stop", @@ -1872,14 +5418,13 @@ Deck. }, }; - await runDeck({ + const err = await runDeck({ path: deckPath, - input: "hi", + input: "hello", modelProvider: provider, isRoot: true, - }); + signal: controller.signal, + }).catch((caught) => caught); - assertEquals(seenParams?.temperature, 0.2); - assertEquals(seenParams?.seed, 42); - assertEquals(seenParams?.my_param, "x"); + assert(isRunCanceledError(err)); }); diff --git a/packages/gambit-core/src/runtime.ts b/packages/gambit-core/src/runtime.ts index a9004dab4..68b534f1d 100644 --- a/packages/gambit-core/src/runtime.ts +++ b/packages/gambit-core/src/runtime.ts @@ -9,19 +9,30 @@ import { GAMBIT_TOOL_RESPOND, } from "./constants.ts"; import { loadDeck } from "./loader.ts"; -import { resolveEffectivePermissions } from "./permissions.ts"; +import { + canReadPath, + canRunCommand, + canRunPath, + canWritePath, + intersectPermissions, + resolveEffectivePermissions, +} from "./permissions.ts"; import { assertZodSchema, toJsonSchema, validateWithSchema } from "./schema.ts"; import type { + CreateResponseRequest, + CreateResponseResponse, ExecutionContext, Guardrails, JSONValue, LoadedDeck, ModelMessage, ModelProvider, + ResponseEvent, ResponseItem, ResponseToolDefinition, ToolCallResult, ToolDefinition, + ToolKind, } from "./types.ts"; import type { MessageRef, SavedState } from "./state.ts"; import type { @@ -62,7 +73,7 @@ type IdleController = { stop: () => void; }; -type RunOptions = { +export type RunOptions = { path: string; input: unknown; inputProvided?: boolean; @@ -90,8 +101,351 @@ type RunOptions = { parentPermissions?: NormalizedPermissionSet; referencePermissions?: PermissionDeclarationInput; referencePermissionsBaseDir?: string; + runDeadlineMs?: number; + workerSandbox?: boolean; + inOrchestrationWorker?: boolean; + signal?: AbortSignal; + onCancel?: () => unknown | Promise; + onTool?: (input: { + name: string; + args: Record; + runId: string; + actionCallId: string; + parentActionCallId?: string; + deckPath: string; + }) => unknown | Promise; +}; + +const WORKER_SANDBOX_ENV = "GAMBIT_DECK_WORKER_SANDBOX"; +const WORKER_TIMEOUT_MESSAGE = "Timeout exceeded"; +const RUN_CANCELED_MESSAGE = "Run canceled"; +const INSPECT_WORKER_TIMEOUT_MS = 1_500; +const INSPECT_WORKER_TIMEOUT_MESSAGE = "Deck inspection timed out"; +const BUILTIN_TOOL_READ_FILE = "read_file"; +const BUILTIN_TOOL_LIST_DIR = "list_dir"; +const BUILTIN_TOOL_GREP_FILES = "grep_files"; +const BUILTIN_TOOL_APPLY_PATCH = "apply_patch"; +const BUILTIN_TOOL_EXEC = "exec"; +const BUILTIN_TOOL_NAMES = new Set([ + BUILTIN_TOOL_READ_FILE, + BUILTIN_TOOL_LIST_DIR, + BUILTIN_TOOL_GREP_FILES, + BUILTIN_TOOL_APPLY_PATCH, + BUILTIN_TOOL_EXEC, +]); +const TRUSTED_SCHEMA_IMPORT_PREFIXES = [ + "@bolt-foundry/gambit-core/schemas", + "gambit://schemas", +]; + +type WireScope = true | false | Array; +type WireRunScope = true | false | { + paths: Array; + commands: Array; +}; +type WirePermissionSet = { + baseDir: string; + read: WireScope; + write: WireScope; + run: WireRunScope; + net: WireScope; + env: WireScope; }; +type WorkerDeckInspection = { + deckPath: string; + hasModelParams: boolean; + permissions?: PermissionDeclarationInput; + guardrails?: Partial; +}; + +export class RunCanceledError extends Error { + code = "run_canceled"; + + constructor(message = RUN_CANCELED_MESSAGE) { + super(message); + this.name = "RunCanceledError"; + } +} + +export function isRunCanceledError(err: unknown): boolean { + if (!err || typeof err !== "object") return false; + const name = (err as { name?: unknown }).name; + const code = (err as { code?: unknown }).code; + if (name === "RunCanceledError" || code === "run_canceled") return true; + if (name === "AbortError") return true; + return false; +} + +function shouldUseWorkerSandbox(): boolean { + let raw: string | undefined; + try { + raw = Deno.env.get(WORKER_SANDBOX_ENV); + } catch { + return false; + } + raw = raw?.trim().toLowerCase(); + return raw === "1" || raw === "true" || raw === "yes"; +} + +function normalizedScopeToWire(scope: { + all: boolean; + values: Set; +}): WireScope { + if (scope.all) return true; + if (scope.values.size === 0) return false; + return Array.from(scope.values).sort(); +} + +function normalizedRunToWire(scope: { + all: boolean; + paths: Set; + commands: Set; +}): WireRunScope { + if (scope.all) return true; + if (scope.paths.size === 0 && scope.commands.size === 0) return false; + return { + paths: Array.from(scope.paths).sort(), + commands: Array.from(scope.commands).sort(), + }; +} + +function toWirePermissionSet(set: NormalizedPermissionSet): WirePermissionSet { + return { + baseDir: set.baseDir, + read: normalizedScopeToWire(set.read), + write: normalizedScopeToWire(set.write), + run: normalizedRunToWire(set.run), + net: normalizedScopeToWire(set.net), + env: normalizedScopeToWire(set.env), + }; +} + +function wireScopeToNormalized( + scope: WireScope, +): { all: boolean; values: Set } { + if (scope === true) return { all: true, values: new Set() }; + if (scope === false) return { all: false, values: new Set() }; + return { all: false, values: new Set(scope) }; +} + +function wireRunToNormalized( + scope: WireRunScope, +): { all: boolean; paths: Set; commands: Set } { + if (scope === true) { + return { + all: true, + paths: new Set(), + commands: new Set(), + }; + } + if (scope === false) { + return { + all: false, + paths: new Set(), + commands: new Set(), + }; + } + return { + all: false, + paths: new Set(scope.paths), + commands: new Set(scope.commands), + }; +} + +function fromWirePermissionSet( + set: WirePermissionSet, +): NormalizedPermissionSet { + return { + baseDir: set.baseDir, + read: wireScopeToNormalized(set.read), + write: wireScopeToNormalized(set.write), + run: wireRunToNormalized(set.run), + net: wireScopeToNormalized(set.net), + env: wireScopeToNormalized(set.env), + }; +} + +function normalizePermissionBaseDir( + set: NormalizedPermissionSet, + baseDir: string, +): NormalizedPermissionSet { + return { + ...set, + baseDir, + read: { all: set.read.all, values: new Set(set.read.values) }, + write: { all: set.write.all, values: new Set(set.write.values) }, + run: { + all: set.run.all, + paths: new Set(set.run.paths), + commands: new Set(set.run.commands), + }, + net: { all: set.net.all, values: new Set(set.net.values) }, + env: { all: set.env.all, values: new Set(set.env.values) }, + }; +} + +function deadlineForRun( + guardrails: Guardrails, + existing?: number, +): number { + const timeoutDeadline = performance.now() + guardrails.timeoutMs; + if (typeof existing === "number" && Number.isFinite(existing)) { + return Math.min(existing, timeoutDeadline); + } + return timeoutDeadline; +} + +function ensureNotExpired(deadlineMs: number) { + if (performance.now() > deadlineMs) { + throw new Error(WORKER_TIMEOUT_MESSAGE); + } +} + +function throwIfCanceled(signal?: AbortSignal) { + if (!signal?.aborted) return; + const reason = signal.reason; + if (typeof reason === "string" && reason.trim().length > 0) { + throw new RunCanceledError(reason); + } + if (reason instanceof Error && reason.message.trim().length > 0) { + throw new RunCanceledError(reason.message); + } + throw new RunCanceledError(); +} + +function ensureRunActive(deadlineMs: number, signal?: AbortSignal) { + throwIfCanceled(signal); + ensureNotExpired(deadlineMs); +} + +function isTrustedSchemaImportKey(key: string): boolean { + const normalized = key.trim(); + if (!normalized) return false; + return TRUSTED_SCHEMA_IMPORT_PREFIXES.some((prefix) => + normalized === prefix || normalized.startsWith(`${prefix}/`) + ); +} + +function tryReadWorkspaceConfigPath(deckPath: string): string | undefined { + const startDir = path.dirname(path.resolve(deckPath)); + let current = startDir; + while (true) { + const denoJson = path.join(current, "deno.json"); + const denoJsonc = path.join(current, "deno.jsonc"); + try { + if (Deno.statSync(denoJson).isFile) return denoJson; + } catch { + // continue search + } + try { + if (Deno.statSync(denoJsonc).isFile) return denoJsonc; + } catch { + // continue search + } + const parent = path.dirname(current); + if (parent === current) break; + current = parent; + } + return undefined; +} + +function readWorkspaceImportMapKeys(configPath: string): Array { + const text = Deno.readTextFileSync(configPath); + const parsed = parseWorkspaceConfig(text) as { imports?: unknown }; + if ( + !parsed || typeof parsed !== "object" || Array.isArray(parsed) || + !parsed.imports || typeof parsed.imports !== "object" || + Array.isArray(parsed.imports) + ) { + return []; + } + return Object.keys(parsed.imports as Record); +} + +function parseWorkspaceConfig(text: string): unknown { + try { + return JSON.parse(text); + } catch { + const stripped = stripJsonComments(text); + return JSON.parse(stripped); + } +} + +function stripJsonComments(text: string): string { + let out = ""; + let inString = false; + let escapeNext = false; + let inLineComment = false; + let inBlockComment = false; + + for (let i = 0; i < text.length; i++) { + const ch = text[i]; + const next = text[i + 1]; + + if (inLineComment) { + if (ch === "\n") { + inLineComment = false; + out += ch; + } + continue; + } + + if (inBlockComment) { + if (ch === "*" && next === "/") { + inBlockComment = false; + i++; + } + continue; + } + + if (inString) { + out += ch; + if (escapeNext) { + escapeNext = false; + } else if (ch === "\\") { + escapeNext = true; + } else if (ch === '"') { + inString = false; + } + continue; + } + + if (ch === '"') { + inString = true; + out += ch; + continue; + } + if (ch === "/" && next === "/") { + inLineComment = true; + i++; + continue; + } + if (ch === "/" && next === "*") { + inBlockComment = true; + i++; + continue; + } + out += ch; + } + return out; +} + +function enforceTrustedSchemaImportMapPolicy(deckPath: string) { + if (deckPath.startsWith("gambit://")) return; + const configPath = tryReadWorkspaceConfigPath(deckPath); + if (!configPath) return; + const violations = readWorkspaceImportMapKeys(configPath).filter((key) => + isTrustedSchemaImportKey(key) + ); + if (violations.length === 0) return; + throw new Error( + `[gambit] trust-boundary violation: workspace import map at ${configPath} remaps trusted schema namespace (${ + violations.join(", ") + })`, + ); +} + export async function runDeck(opts: RunOptions): Promise { const guardrails: Guardrails = { ...DEFAULT_GUARDRAILS, @@ -104,68 +458,263 @@ export async function runDeck(opts: RunOptions): Promise { throw new Error(`Max depth ${guardrails.maxDepth} exceeded`); } const runId = opts.runId ?? opts.state?.runId ?? randomId("run"); - - const deck = await loadDeck(opts.path); - const permissions = resolveEffectivePermissions({ - baseDir: path.dirname(deck.path), - parent: opts.parentPermissions, - workspace: opts.workspacePermissions - ? { - baseDir: opts.workspacePermissionsBaseDir ?? path.dirname(deck.path), - permissions: opts.workspacePermissions, - } - : undefined, - declaration: deck.permissions - ? { baseDir: path.dirname(deck.path), permissions: deck.permissions } - : undefined, - reference: opts.referencePermissions - ? { - baseDir: opts.referencePermissionsBaseDir ?? path.dirname(deck.path), - permissions: opts.referencePermissions, - } - : undefined, - session: opts.sessionPermissions - ? { - baseDir: opts.sessionPermissionsBaseDir ?? Deno.cwd(), - permissions: opts.sessionPermissions, - } - : undefined, - }); - const deckGuardrails = deck.guardrails ?? {}; - const effectiveGuardrails: Guardrails = { - ...guardrails, - ...deckGuardrails, - }; + enforceTrustedSchemaImportMapPolicy(opts.path); + // AbortSignal is not bridged into worker runtimes yet, so preserve + // cancellation semantics by keeping signal-bound runs in-process. + const workerSandbox = (opts.workerSandbox ?? shouldUseWorkerSandbox()) && + !opts.signal; const isRoot = Boolean(inferredRoot); + const shouldEmitRun = opts.depth === undefined || opts.depth === 0; + let canceled = false; + let cancelHandled = false; + const handleCancel = async () => { + if (cancelHandled) return; + cancelHandled = true; + if (!opts.onCancel) return; + try { + await opts.onCancel(); + } catch (err) { + logger.warn( + `[gambit] runDeck onCancel callback failed: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + } + }; + try { + throwIfCanceled(opts.signal); + if (workerSandbox) { + const preInspectRunDeadlineMs = deadlineForRun( + guardrails, + opts.runDeadlineMs, + ); + ensureRunActive(preInspectRunDeadlineMs, opts.signal); + const inspectedDeck = await inspectDeckInWorker( + opts.path, + preInspectRunDeadlineMs, + ); + const deckDir = path.dirname(inspectedDeck.deckPath); + const permissions = resolveEffectivePermissions({ + baseDir: deckDir, + parent: opts.parentPermissions, + workspace: opts.workspacePermissions + ? { + baseDir: opts.workspacePermissionsBaseDir ?? deckDir, + permissions: opts.workspacePermissions, + } + : undefined, + declaration: inspectedDeck.permissions + ? { baseDir: deckDir, permissions: inspectedDeck.permissions } + : undefined, + reference: opts.referencePermissions + ? { + baseDir: opts.referencePermissionsBaseDir ?? deckDir, + permissions: opts.referencePermissions, + } + : undefined, + session: opts.sessionPermissions + ? { + baseDir: opts.sessionPermissionsBaseDir ?? Deno.cwd(), + permissions: opts.sessionPermissions, + } + : undefined, + }); + const effectiveGuardrails: Guardrails = { + ...guardrails, + ...(inspectedDeck.guardrails ?? {}), + }; + const runDeadlineMs = deadlineForRun( + effectiveGuardrails, + opts.runDeadlineMs, + ); + ensureRunActive(runDeadlineMs, opts.signal); + const resolvedInput = resolveInputWithoutDeck({ + input: opts.input, + state: opts.state, + isRoot, + initialUserMessage: opts.initialUserMessage, + }); + + if (!inspectedDeck.hasModelParams) { + if (shouldEmitRun) { + opts.trace?.({ + type: "run.start", + runId, + deckPath: inspectedDeck.deckPath, + input: resolvedInput as unknown as import("./types.ts").JSONValue, + initialUserMessage: opts + .initialUserMessage as unknown as import("./types.ts").JSONValue, + permissions: permissions.trace, + }); + } - ensureSchemaPresence(deck, isRoot); + return await runComputeDeckInWorker({ + deckPath: inspectedDeck.deckPath, + guardrails: effectiveGuardrails, + depth, + runId, + initialUserMessage: opts.initialUserMessage, + parentActionCallId: opts.parentActionCallId, + modelProvider: opts.modelProvider, + input: resolvedInput, + defaultModel: opts.defaultModel, + modelOverride: opts.modelOverride, + trace: opts.trace, + stream: opts.stream, + state: opts.state, + onStateUpdate: opts.onStateUpdate, + onStreamText: opts.onStreamText, + responsesMode: opts.responsesMode, + permissions: permissions.effective, + permissionsTrace: permissions.trace, + workspacePermissions: opts.workspacePermissions, + workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir, + sessionPermissions: opts.sessionPermissions, + sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir, + runDeadlineMs, + isRoot, + allowRootStringInput: opts.allowRootStringInput ?? false, + signal: opts.signal, + }); + } - const resolvedInput = resolveInput({ - deck, - input: opts.input, - state: opts.state, - isRoot, - initialUserMessage: opts.initialUserMessage, - }); - const validatedInput = validateInput( - deck, - resolvedInput, - isRoot, - opts.allowRootStringInput ?? false, - ); - const shouldEmitRun = opts.depth === undefined || opts.depth === 0; - if (shouldEmitRun) { - opts.trace?.({ - type: "run.start", - runId, - deckPath: deck.path, - input: validatedInput as unknown as import("./types.ts").JSONValue, - initialUserMessage: opts - .initialUserMessage as unknown as import("./types.ts").JSONValue, - permissions: permissions.trace, + if (!opts.inOrchestrationWorker) { + return await runLlmDeckInWorker({ + deckPath: inspectedDeck.deckPath, + guardrails: effectiveGuardrails, + depth, + runId, + parentActionCallId: opts.parentActionCallId, + modelProvider: opts.modelProvider, + input: resolvedInput, + inputProvided: opts.inputProvided ?? true, + initialUserMessage: opts.initialUserMessage, + defaultModel: opts.defaultModel, + modelOverride: opts.modelOverride, + trace: opts.trace, + stream: opts.stream, + state: opts.state, + onStateUpdate: opts.onStateUpdate, + onStreamText: opts.onStreamText, + responsesMode: opts.responsesMode, + permissions: permissions.effective, + permissionsTrace: permissions.trace, + workspacePermissions: opts.workspacePermissions, + workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir, + sessionPermissions: opts.sessionPermissions, + sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir, + runDeadlineMs, + workerSandbox, + allowRootStringInput: opts.allowRootStringInput, + isRoot, + signal: opts.signal, + }); + } + } + + const deck = await loadDeck(opts.path); + const permissions = resolveEffectivePermissions({ + baseDir: path.dirname(deck.path), + parent: opts.parentPermissions, + workspace: opts.workspacePermissions + ? { + baseDir: opts.workspacePermissionsBaseDir ?? path.dirname(deck.path), + permissions: opts.workspacePermissions, + } + : undefined, + declaration: deck.permissions + ? { baseDir: path.dirname(deck.path), permissions: deck.permissions } + : undefined, + reference: opts.referencePermissions + ? { + baseDir: opts.referencePermissionsBaseDir ?? path.dirname(deck.path), + permissions: opts.referencePermissions, + } + : undefined, + session: opts.sessionPermissions + ? { + baseDir: opts.sessionPermissionsBaseDir ?? Deno.cwd(), + permissions: opts.sessionPermissions, + } + : undefined, }); - } - try { + const deckGuardrails = deck.guardrails ?? {}; + const effectiveGuardrails: Guardrails = { + ...guardrails, + ...deckGuardrails, + }; + const runDeadlineMs = deadlineForRun( + effectiveGuardrails, + opts.runDeadlineMs, + ); + ensureRunActive(runDeadlineMs, opts.signal); + + ensureSchemaPresence(deck, isRoot); + + const resolvedInput = resolveInput({ + deck, + input: opts.input, + state: opts.state, + isRoot, + initialUserMessage: opts.initialUserMessage, + }); + const validatedInput = validateInput( + deck, + resolvedInput, + isRoot, + opts.allowRootStringInput ?? false, + ); + const useOrchestrationWorker = workerSandbox && + !opts.inOrchestrationWorker && + isRoot && + !opts.onTool && + Boolean( + deck.modelParams?.model || deck.modelParams?.temperature !== undefined, + ); + if (useOrchestrationWorker) { + return await runLlmDeckInWorker({ + deckPath: deck.path, + guardrails: effectiveGuardrails, + depth, + runId, + parentActionCallId: opts.parentActionCallId, + modelProvider: opts.modelProvider, + input: validatedInput, + inputProvided: opts.inputProvided ?? true, + initialUserMessage: opts.initialUserMessage, + defaultModel: opts.defaultModel, + modelOverride: opts.modelOverride, + trace: opts.trace, + stream: opts.stream, + state: opts.state, + onStateUpdate: opts.onStateUpdate, + onStreamText: opts.onStreamText, + responsesMode: opts.responsesMode, + permissions: permissions.effective, + permissionsTrace: permissions.trace, + workspacePermissions: opts.workspacePermissions, + workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir, + sessionPermissions: opts.sessionPermissions, + sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir, + runDeadlineMs, + workerSandbox, + allowRootStringInput: opts.allowRootStringInput, + isRoot, + signal: opts.signal, + }); + } + if (shouldEmitRun) { + opts.trace?.({ + type: "run.start", + runId, + deckPath: deck.path, + input: validatedInput as unknown as import("./types.ts").JSONValue, + initialUserMessage: opts + .initialUserMessage as unknown as import("./types.ts").JSONValue, + permissions: permissions.trace, + }); + } + if ( deck.modelParams?.model || deck.modelParams?.temperature !== undefined ) { @@ -193,6 +742,10 @@ export async function runDeck(opts: RunOptions): Promise { workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir, sessionPermissions: opts.sessionPermissions, sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir, + runDeadlineMs, + workerSandbox, + onTool: opts.onTool, + signal: opts.signal, }); } @@ -207,6 +760,7 @@ export async function runDeck(opts: RunOptions): Promise { guardrails: effectiveGuardrails, depth, runId, + initialUserMessage: opts.initialUserMessage, parentActionCallId: opts.parentActionCallId, modelProvider: opts.modelProvider, input: validatedInput, @@ -214,6 +768,8 @@ export async function runDeck(opts: RunOptions): Promise { modelOverride: opts.modelOverride, trace: opts.trace, stream: opts.stream, + state: opts.state, + onStateUpdate: opts.onStateUpdate, onStreamText: opts.onStreamText, responsesMode: opts.responsesMode, permissions: permissions.effective, @@ -222,11 +778,24 @@ export async function runDeck(opts: RunOptions): Promise { workspacePermissionsBaseDir: opts.workspacePermissionsBaseDir, sessionPermissions: opts.sessionPermissions, sessionPermissionsBaseDir: opts.sessionPermissionsBaseDir, + runDeadlineMs, + workerSandbox, + onTool: opts.onTool, + signal: opts.signal, }); + } catch (err) { + if (isRunCanceledError(err)) { + canceled = true; + await handleCancel(); + } + throw err; } finally { if (shouldEmitRun) { opts.trace?.({ type: "run.end", runId }); } + if (opts.signal?.aborted && !canceled) { + await handleCancel(); + } } } @@ -236,24 +805,15 @@ function toProviderParams( if (!params) return undefined; const { model: _model, - additionalParams, temperature, top_p, frequency_penalty, presence_penalty, max_tokens, + verbosity, + reasoning, } = params; const out: Record = {}; - if ( - additionalParams && - typeof additionalParams === "object" && - !Array.isArray(additionalParams) - ) { - for (const [key, value] of Object.entries(additionalParams)) { - if (value === undefined) continue; - out[key] = value; - } - } if (temperature !== undefined) out.temperature = temperature; if (top_p !== undefined) out.top_p = top_p; if (frequency_penalty !== undefined) { @@ -261,6 +821,8 @@ function toProviderParams( } if (presence_penalty !== undefined) out.presence_penalty = presence_penalty; if (max_tokens !== undefined) out.max_tokens = max_tokens; + if (verbosity !== undefined) out.verbosity = verbosity; + if (reasoning !== undefined) out.reasoning = reasoning; return Object.keys(out).length ? out : undefined; } @@ -356,6 +918,25 @@ function resolveInput(args: { return args.input; } +function resolveInputWithoutDeck(args: { + input: unknown; + state?: SavedState; + isRoot: boolean; + initialUserMessage?: unknown; +}) { + if (args.input !== undefined) return args.input; + if (!args.isRoot) return args.input; + + const persisted = extractContextInput(args.state); + if (persisted !== undefined) return persisted; + + if (args.initialUserMessage !== undefined) { + return ""; + } + + return args.input; +} + function extractContextInput(state?: SavedState): unknown { if (!state) return undefined; if (state.format === "responses" && Array.isArray(state.items)) { @@ -490,6 +1071,75 @@ function safeJsonArgs(value: string): Record { return {}; } +function asToolKind(value: unknown, fallback: ToolKind): ToolKind { + if ( + value === "action" || value === "external" || value === "mcp_bridge" || + value === "internal" + ) { + return value; + } + return fallback; +} + +function projectStreamToolTraceEvents(input: { + streamEvent: Record; + runId: string; + parentActionCallId: string; + trace?: (event: import("./types.ts").TraceEvent) => void; + emittedCalls: Set; + emittedResults: Set; + toolNames: Map; +}): void { + if (!input.trace) return; + const type = typeof input.streamEvent.type === "string" + ? input.streamEvent.type + : ""; + if (type !== "tool.call" && type !== "tool.result") return; + const actionCallId = typeof input.streamEvent.actionCallId === "string" + ? input.streamEvent.actionCallId + : ""; + const name = typeof input.streamEvent.name === "string" + ? input.streamEvent.name + : input.toolNames.get(actionCallId) ?? ""; + if (!actionCallId || !name) return; + + if (type === "tool.call") { + if (input.emittedCalls.has(actionCallId)) return; + input.emittedCalls.add(actionCallId); + input.toolNames.set(actionCallId, name); + const args = "args" in input.streamEvent + ? (input.streamEvent.args ?? {}) as JSONValue + : {}; + const toolKind = asToolKind(input.streamEvent.toolKind, "mcp_bridge"); + input.trace({ + type: "tool.call", + runId: input.runId, + actionCallId, + name, + args, + toolKind, + parentActionCallId: input.parentActionCallId, + }); + return; + } + + if (input.emittedResults.has(actionCallId)) return; + input.emittedResults.add(actionCallId); + const result = "result" in input.streamEvent + ? (input.streamEvent.result ?? null) as JSONValue + : null; + const toolKind = asToolKind(input.streamEvent.toolKind, "mcp_bridge"); + input.trace({ + type: "tool.result", + runId: input.runId, + actionCallId, + name, + result, + toolKind, + parentActionCallId: input.parentActionCallId, + }); +} + function mapResponseOutput( output: Array, ): { @@ -575,6 +1225,7 @@ type RuntimeCtxBase = { depth: number; runId: string; inputProvided?: boolean; + initialUserMessage?: unknown; parentActionCallId?: string; modelProvider: ModelProvider; input: unknown; @@ -592,11 +1243,1480 @@ type RuntimeCtxBase = { workspacePermissionsBaseDir?: string; sessionPermissions?: PermissionDeclarationInput; sessionPermissionsBaseDir?: string; + runDeadlineMs: number; + workerSandbox: boolean; + signal?: AbortSignal; + onTool?: RunOptions["onTool"]; +}; + +type WorkerRuntimeCtx = Omit & { + deckPath: string; + isRoot: boolean; + allowRootStringInput: boolean; }; -async function runComputeDeck(ctx: RuntimeCtxBase): Promise { +async function runComputeDeck(ctx: RuntimeCtxBase): Promise { + if (ctx.workerSandbox) { + return await runComputeDeckInWorker({ + guardrails: ctx.guardrails, + depth: ctx.depth, + runId: ctx.runId, + inputProvided: ctx.inputProvided, + initialUserMessage: ctx.initialUserMessage, + parentActionCallId: ctx.parentActionCallId, + modelProvider: ctx.modelProvider, + input: ctx.input, + defaultModel: ctx.defaultModel, + modelOverride: ctx.modelOverride, + trace: ctx.trace, + stream: ctx.stream, + state: ctx.state, + onStateUpdate: ctx.onStateUpdate, + onStreamText: ctx.onStreamText, + responsesMode: ctx.responsesMode, + permissions: ctx.permissions, + permissionsTrace: ctx.permissionsTrace, + workspacePermissions: ctx.workspacePermissions, + workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir, + sessionPermissions: ctx.sessionPermissions, + sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir, + runDeadlineMs: ctx.runDeadlineMs, + deckPath: ctx.deck.path, + isRoot: ctx.depth === 0 && !ctx.parentActionCallId, + allowRootStringInput: false, + signal: ctx.signal, + }); + } + return await runComputeDeckInProcess(ctx); +} + +function toDenoPermissionList(scope: { + all: boolean; + values: Set; +}): true | false | Array { + if (scope.all) return true; + if (scope.values.size === 0) return false; + return Array.from(scope.values).sort(); +} + +function toDenoRunPermission(scope: { + all: boolean; + paths: Set; + commands: Set; +}): true | false | Array { + if (scope.all) return true; + const values = new Set([ + ...Array.from(scope.paths), + ...Array.from(scope.commands), + ]); + if (values.size === 0) return false; + return Array.from(values).sort(); +} + +const IMPORT_SOURCE_EXTENSIONS = new Set([ + ".ts", + ".tsx", + ".mts", + ".cts", + ".js", + ".jsx", + ".mjs", + ".cjs", +]); +const RESOLVABLE_MODULE_EXTENSIONS = [ + ".ts", + ".tsx", + ".mts", + ".cts", + ".js", + ".jsx", + ".mjs", + ".cjs", + ".json", +]; + +function stripSpecifierSuffix(specifier: string): string { + let out = specifier; + const q = out.indexOf("?"); + if (q >= 0) out = out.slice(0, q); + const h = out.indexOf("#"); + if (h >= 0) out = out.slice(0, h); + return out.trim(); +} + +function isIdentifierStart(ch: string): boolean { + return /[A-Za-z_$]/.test(ch); +} + +function isIdentifierContinue(ch: string): boolean { + return /[A-Za-z0-9_$]/.test(ch); +} + +function skipWhitespaceAndComments(source: string, start: number): number { + let i = start; + while (i < source.length) { + const ch = source[i]; + if (/\s/.test(ch)) { + i++; + continue; + } + if (ch === "/" && source[i + 1] === "/") { + i += 2; + while (i < source.length && source[i] !== "\n" && source[i] !== "\r") { + i++; + } + continue; + } + if (ch === "/" && source[i + 1] === "*") { + i += 2; + while (i < source.length) { + if (source[i] === "*" && source[i + 1] === "/") { + i += 2; + break; + } + i++; + } + continue; + } + break; + } + return i; +} + +function readIdentifier( + source: string, + start: number, +): { value: string; end: number } | undefined { + if (start >= source.length) return undefined; + if (!isIdentifierStart(source[start])) return undefined; + let i = start + 1; + while (i < source.length && isIdentifierContinue(source[i])) i++; + return { value: source.slice(start, i), end: i }; +} + +function readStringLiteral( + source: string, + start: number, +): { value: string; end: number } | undefined { + const quote = source[start]; + if (quote !== "'" && quote !== '"') return undefined; + let i = start + 1; + let value = ""; + while (i < source.length) { + const ch = source[i]; + if (ch === "\\") { + if (i + 1 >= source.length) return undefined; + value += source[i + 1]; + i += 2; + continue; + } + if (ch === quote) return { value, end: i + 1 }; + if (ch === "\n" || ch === "\r") return undefined; + value += ch; + i++; + } + return undefined; +} + +function skipTemplateExpression(source: string, start: number): number { + let i = start; + let depth = 1; + while (i < source.length && depth > 0) { + i = skipWhitespaceAndComments(source, i); + if (i >= source.length) break; + const ch = source[i]; + if (ch === "'" || ch === '"') { + const stringLiteral = readStringLiteral(source, i); + i = stringLiteral ? stringLiteral.end : i + 1; + continue; + } + if (ch === "`") { + i = skipTemplateLiteral(source, i); + continue; + } + if (ch === "{") { + depth++; + i++; + continue; + } + if (ch === "}") { + depth--; + i++; + continue; + } + i++; + } + return i; +} + +function skipTemplateLiteral(source: string, start: number): number { + let i = start + 1; + while (i < source.length) { + const ch = source[i]; + if (ch === "\\") { + i += 2; + continue; + } + if (ch === "`") return i + 1; + if (ch === "$" && source[i + 1] === "{") { + i = skipTemplateExpression(source, i + 2); + continue; + } + i++; + } + return i; +} + +function readSpecifierAfterFrom( + source: string, + start: number, +): { specifier?: string; end: number } { + const i = skipWhitespaceAndComments(source, start); + const stringLiteral = readStringLiteral(source, i); + if (!stringLiteral) return { end: i }; + return { specifier: stringLiteral.value, end: stringLiteral.end }; +} + +function readImportCallSpecifier( + source: string, + start: number, +): { specifier?: string; end: number } { + let i = skipWhitespaceAndComments(source, start); + if (source[i] !== "(") return { end: i }; + i = skipWhitespaceAndComments(source, i + 1); + const stringLiteral = readStringLiteral(source, i); + if (!stringLiteral) return { end: i }; + i = skipWhitespaceAndComments(source, stringLiteral.end); + if (source[i] === ")") i++; + return { specifier: stringLiteral.value, end: i }; +} + +function readImportOrExportStatementSpecifier( + source: string, + start: number, + keyword: "import" | "export", +): { specifier?: string; end: number } { + let i = skipWhitespaceAndComments(source, start); + + if (keyword === "import") { + if (source[i] === ".") return { end: i + 1 }; // import.meta + const sideEffectImport = readStringLiteral(source, i); + if (sideEffectImport) { + return { specifier: sideEffectImport.value, end: sideEffectImport.end }; + } + } + + let depth = 0; + while (i < source.length) { + i = skipWhitespaceAndComments(source, i); + if (i >= source.length) break; + const ch = source[i]; + + if (ch === "'" || ch === '"') { + const stringLiteral = readStringLiteral(source, i); + i = stringLiteral ? stringLiteral.end : i + 1; + continue; + } + if (ch === "`") { + i = skipTemplateLiteral(source, i); + continue; + } + if (ch === "(" || ch === "[" || ch === "{") { + depth++; + i++; + continue; + } + if (ch === ")" || ch === "]" || ch === "}") { + if (depth > 0) depth--; + i++; + continue; + } + if (depth === 0) { + if (ch === ";") return { end: i + 1 }; + const identifier = readIdentifier(source, i); + if (identifier?.value === "from") { + return readSpecifierAfterFrom(source, identifier.end); + } + if (identifier) { + i = identifier.end; + continue; + } + } + i++; + } + return { end: i }; +} + +function extractModuleSpecifiers(source: string): Set { + const out = new Set(); + let i = 0; + while (i < source.length) { + i = skipWhitespaceAndComments(source, i); + if (i >= source.length) break; + + const ch = source[i]; + if (ch === "'" || ch === '"') { + const stringLiteral = readStringLiteral(source, i); + i = stringLiteral ? stringLiteral.end : i + 1; + continue; + } + if (ch === "`") { + i = skipTemplateLiteral(source, i); + continue; + } + + const identifier = readIdentifier(source, i); + if (!identifier) { + i++; + continue; + } + + if (identifier.value === "import") { + const afterImport = skipWhitespaceAndComments(source, identifier.end); + if (source[afterImport] === "(") { + const result = readImportCallSpecifier(source, afterImport); + if (result.specifier) out.add(result.specifier); + i = Math.max(result.end, afterImport + 1); + continue; + } + const result = readImportOrExportStatementSpecifier( + source, + identifier.end, + "import", + ); + if (result.specifier) out.add(result.specifier); + i = Math.max(result.end, identifier.end); + continue; + } + + if (identifier.value === "export") { + const result = readImportOrExportStatementSpecifier( + source, + identifier.end, + "export", + ); + if (result.specifier) out.add(result.specifier); + i = Math.max(result.end, identifier.end); + continue; + } + + i = identifier.end; + } + return out; +} + +function resolveExistingModulePath(candidate: string): string | undefined { + const resolved = path.resolve(candidate); + const candidates = new Set([resolved]); + if (!path.extname(resolved)) { + for (const ext of RESOLVABLE_MODULE_EXTENSIONS) { + candidates.add(`${resolved}${ext}`); + candidates.add(path.join(resolved, `index${ext}`)); + } + } + for (const filePath of candidates) { + try { + if (Deno.statSync(filePath).isFile) { + return path.resolve(filePath); + } + } catch { + // ignore unresolved module candidates + } + } + return undefined; +} + +function resolveLocalImportPath( + importerPath: string, + specifier: string, +): string | undefined { + const cleaned = stripSpecifierSuffix(specifier); + if (!cleaned) return undefined; + if (cleaned.startsWith("file://")) { + try { + return resolveExistingModulePath(path.fromFileUrl(cleaned)); + } catch { + return undefined; + } + } + if ( + !(cleaned.startsWith("./") || cleaned.startsWith("../") || + path.isAbsolute(cleaned)) + ) { + return undefined; + } + const base = path.isAbsolute(cleaned) + ? cleaned + : path.resolve(path.dirname(importerPath), cleaned); + return resolveExistingModulePath(base); +} + +function collectLocalImportGraph(entryPath: string): Set { + const visited = new Set(); + const queue: Array = [path.resolve(entryPath)]; + + while (queue.length > 0) { + const current = queue.pop()!; + if (visited.has(current)) continue; + visited.add(current); + + const ext = path.extname(current).toLowerCase(); + if (!IMPORT_SOURCE_EXTENSIONS.has(ext)) { + continue; + } + + let source: string; + try { + source = Deno.readTextFileSync(current); + } catch { + continue; + } + + const specifiers = extractModuleSpecifiers(source); + for (const specifier of specifiers) { + const resolved = resolveLocalImportPath(current, specifier); + if (resolved && !visited.has(resolved)) { + queue.push(resolved); + } + } + } + + return visited; +} + +const WORKER_ENTRY_PATHS = [ + "./runtime_worker.ts", + "./runtime_orchestration_worker.ts", +].map((relative) => path.fromFileUrl(new URL(relative, import.meta.url))); +const BUILTIN_SCHEMAS_DIR = path.resolve( + path.dirname(path.fromFileUrl(import.meta.url)), + "../schemas", +); +const BUILTIN_SNIPPETS_DIR = path.resolve( + path.dirname(path.fromFileUrl(import.meta.url)), + "../snippets", +); + +let builtinSchemaBootstrapCache: Array | undefined; +function builtinSchemaBootstrapReads(): Array { + if (builtinSchemaBootstrapCache) return builtinSchemaBootstrapCache; + const schemaModules: Array = []; + const stack: Array = [BUILTIN_SCHEMAS_DIR]; + while (stack.length > 0) { + const current = stack.pop()!; + let entries: Array = []; + try { + entries = Array.from(Deno.readDirSync(current)); + } catch { + continue; + } + for (const entry of entries) { + const target = path.join(current, entry.name); + if (entry.isDirectory) { + stack.push(target); + continue; + } + if (!entry.isFile) continue; + const ext = path.extname(entry.name).toLowerCase(); + if (ext !== ".ts") continue; + schemaModules.push(target); + } + } + builtinSchemaBootstrapCache = Array.from( + new Set( + schemaModules.flatMap((entry) => + Array.from(collectLocalImportGraph(entry)) + ), + ), + ).sort(); + return builtinSchemaBootstrapCache; +} + +let builtinSnippetBootstrapCache: Array | undefined; +function builtinSnippetBootstrapReads(): Array { + if (builtinSnippetBootstrapCache) return builtinSnippetBootstrapCache; + const snippetFiles: Array = []; + const stack: Array = [BUILTIN_SNIPPETS_DIR]; + while (stack.length > 0) { + const current = stack.pop()!; + let entries: Array = []; + try { + entries = Array.from(Deno.readDirSync(current)); + } catch { + continue; + } + for (const entry of entries) { + const target = path.join(current, entry.name); + if (entry.isDirectory) { + stack.push(target); + continue; + } + if (!entry.isFile) continue; + const ext = path.extname(entry.name).toLowerCase(); + if (ext !== ".md") continue; + snippetFiles.push(target); + } + } + builtinSnippetBootstrapCache = Array.from(new Set(snippetFiles)).sort(); + return builtinSnippetBootstrapCache; +} + +function workerBootstrapReadAllowlist(deckPath: string): Array { + return Array.from( + new Set([ + ...Array.from(collectLocalImportGraph(deckPath)), + ...WORKER_ENTRY_PATHS.flatMap((entry) => + Array.from(collectLocalImportGraph(entry)) + ), + ...builtinSchemaBootstrapReads(), + ...builtinSnippetBootstrapReads(), + ]), + ).sort(); +} + +let trustedWorkerBootstrapCache: Array | undefined; +function trustedWorkerBootstrapReads(): Array { + if (trustedWorkerBootstrapCache) return trustedWorkerBootstrapCache; + const definitionsPath = path.fromFileUrl( + new URL("./definitions.ts", import.meta.url), + ); + const modPath = path.fromFileUrl(new URL("../mod.ts", import.meta.url)); + trustedWorkerBootstrapCache = Array.from( + new Set([ + ...WORKER_ENTRY_PATHS.flatMap((entry) => + Array.from(collectLocalImportGraph(entry)) + ), + ...Array.from(collectLocalImportGraph(definitionsPath)), + ...Array.from(collectLocalImportGraph(modPath)), + ...builtinSchemaBootstrapReads(), + ...builtinSnippetBootstrapReads(), + ]), + ).sort(); + return trustedWorkerBootstrapCache; +} + +function pathMatchesPermissionRoot(root: string, target: string): boolean { + if (root === target) return true; + const rel = path.relative(root, target); + return rel.length > 0 && !rel.startsWith("..") && !path.isAbsolute(rel); +} + +function constrainBootstrapReads( + permissions: NormalizedPermissionSet, + roots: Array, + trustedReads: Set, + reads: Array, +): Array { + const allowedRoots = [ + ...roots.map((entry) => path.resolve(entry)), + ...Array.from(permissions.read.values).map((entry) => + path.resolve(permissions.baseDir, entry) + ), + ]; + if (permissions.read.all) { + return Array.from(new Set(reads)).sort(); + } + if (allowedRoots.length === 0) return []; + return reads.filter((entry) => { + const target = path.resolve(permissions.baseDir, entry); + if (trustedReads.has(target)) return true; + return allowedRoots.some((root) => pathMatchesPermissionRoot(root, target)); + }); +} + +function buildWorkerPermissions( + permissions: NormalizedPermissionSet, + deckPath: string, +): WorkerOptions["deno"] { + const workerDirs = WORKER_ENTRY_PATHS.map((entry) => path.dirname(entry)); + const bootstrapReads = constrainBootstrapReads( + permissions, + [path.dirname(deckPath), ...workerDirs], + new Set(trustedWorkerBootstrapReads()), + workerBootstrapReadAllowlist(deckPath), + ); + const mergedRead = permissions.read.all ? true : Array.from( + new Set([ + ...Array.from(permissions.read.values), + ...bootstrapReads, + ]), + ).sort(); + return { + permissions: { + read: mergedRead === true + ? true + : mergedRead.length > 0 + ? mergedRead + : false, + write: toDenoPermissionList(permissions.write), + run: toDenoRunPermission(permissions.run), + net: toDenoPermissionList(permissions.net), + env: toDenoPermissionList(permissions.env), + // Worker module graphs include JSR dependencies (e.g. @std/*). Allow + // manifest resolution without widening deck runtime file/run permissions. + import: ["jsr.io:443"], + }, + }; +} + +function buildDeckInspectWorkerPermissions( + deckPath: string, +): WorkerOptions["deno"] { + const deckDir = path.dirname(deckPath); + const workerDirs = WORKER_ENTRY_PATHS.map((entry) => path.dirname(entry)); + const inspectSeedPermissions: NormalizedPermissionSet = { + baseDir: deckDir, + read: { all: false, values: new Set() }, + write: { all: false, values: new Set() }, + run: { all: false, paths: new Set(), commands: new Set() }, + net: { all: false, values: new Set() }, + env: { all: false, values: new Set() }, + }; + const bootstrapReads = constrainBootstrapReads( + inspectSeedPermissions, + [path.dirname(deckPath), ...workerDirs], + new Set(trustedWorkerBootstrapReads()), + workerBootstrapReadAllowlist(deckPath), + ); + const inspectReads = Array.from( + new Set([deckDir, ...bootstrapReads]), + ).sort(); + return { + permissions: { + read: inspectReads.length > 0 ? inspectReads : false, + write: false, + run: false, + net: false, + env: false, + }, + }; +} + +async function inspectDeckInWorker( + deckPath: string, + runDeadlineMs?: number, +): Promise { + if (typeof runDeadlineMs === "number" && Number.isFinite(runDeadlineMs)) { + ensureNotExpired(runDeadlineMs); + } + const bridgeSession = randomId("bridge"); + const worker = new Worker( + new URL("./runtime_worker.ts", import.meta.url).href, + { + type: "module", + deno: buildDeckInspectWorkerPermissions(deckPath), + }, + ); + let settled = false; + const clearAndTerminate = () => { + try { + worker.terminate(); + } catch { + // ignore + } + }; + let timeoutId: number | undefined; + + const outcome = new Promise((resolve, reject) => { + const finishResolve = (value: WorkerDeckInspection) => { + if (settled) return; + settled = true; + if (timeoutId !== undefined) clearTimeout(timeoutId); + resolve(value); + }; + const finishReject = (err: unknown) => { + if (settled) return; + settled = true; + if (timeoutId !== undefined) clearTimeout(timeoutId); + reject(err); + }; + + const deadlineConstrained = typeof runDeadlineMs === "number" && + Number.isFinite(runDeadlineMs); + const timeoutMs = deadlineConstrained + ? Math.max( + 0, + Math.min( + INSPECT_WORKER_TIMEOUT_MS, + Math.floor(runDeadlineMs - performance.now()), + ), + ) + : INSPECT_WORKER_TIMEOUT_MS; + const timeoutMessage = deadlineConstrained && + timeoutMs < INSPECT_WORKER_TIMEOUT_MS + ? WORKER_TIMEOUT_MESSAGE + : INSPECT_WORKER_TIMEOUT_MESSAGE; + timeoutId = setTimeout(() => { + finishReject(new Error(timeoutMessage)); + clearAndTerminate(); + }, timeoutMs) as unknown as number; + + worker.addEventListener("error", (event) => { + event.preventDefault(); + finishReject(event.error ?? new Error(event.message)); + }); + + worker.addEventListener("messageerror", () => { + finishReject(new Error("Worker bridge message serialization failed")); + }); + + worker.addEventListener("message", (event: MessageEvent) => { + const msg = event.data as Record; + const receivedSession = typeof msg.bridgeSession === "string" + ? msg.bridgeSession + : ""; + if (receivedSession !== bridgeSession) { + if (typeof msg.type === "string") { + logger.warn( + `[gambit] rejected inspect-worker message with mismatched bridge session (type=${msg.type})`, + ); + } + return; + } + const type = typeof msg.type === "string" ? msg.type : ""; + if (type === "deck.inspect.result") { + finishResolve((msg as { result: WorkerDeckInspection }).result); + return; + } + if (type === "deck.inspect.error" || type === "run.error") { + finishReject(normalizeWorkerError((msg as { error?: unknown }).error)); + } + }); + }); + + try { + worker.postMessage({ type: "deck.inspect", bridgeSession, deckPath }); + return await outcome; + } finally { + if (timeoutId !== undefined) clearTimeout(timeoutId); + clearAndTerminate(); + } +} + +function normalizeWorkerError(err: unknown): Error { + if (!err || typeof err !== "object") { + return new Error(String(err)); + } + const rec = err as Record; + const message = + typeof rec.message === "string" && rec.message.trim().length > 0 + ? rec.message + : "Worker execution failed"; + const code = typeof rec.code === "string" ? rec.code : undefined; + const name = typeof rec.name === "string" ? rec.name : undefined; + const source = typeof rec.source === "string" ? rec.source : undefined; + const out = new Error( + source ? `[${source}] ${message}${code ? ` (${code})` : ""}` : message, + ); + if (name) out.name = name; + return out; +} + +type OrchestrationRunStartMessage = { + type: "run.start"; + bridgeSession: string; + completionNonce: string; + options: { + path: string; + input: unknown; + inputProvided?: boolean; + initialUserMessage?: unknown; + isRoot?: boolean; + guardrails?: Partial; + depth?: number; + parentActionCallId?: string; + runId: string; + defaultModel?: string; + modelOverride?: string; + stream?: boolean; + state?: SavedState; + responsesMode?: boolean; + allowRootStringInput?: boolean; + runDeadlineMs: number; + }; + permissionCeiling: WirePermissionSet; +}; + +type OrchestrationModelChatRequest = { + type: "model.chat.request"; + bridgeSession: string; + requestId: string; + input: { + model: string; + messages: Array; + tools?: Array; + stream?: boolean; + state?: SavedState; + deckPath?: string; + params?: Record; + }; +}; + +type OrchestrationModelResponsesRequest = { + type: "model.responses.request"; + bridgeSession: string; + requestId: string; + input: { + request: CreateResponseRequest; + state?: SavedState; + deckPath?: string; + }; +}; + +type OrchestrationModelResolveRequest = { + type: "model.resolveModel.request"; + bridgeSession: string; + requestId: string; + input: { + model: string | Array; + params?: Record; + deckPath?: string; + }; +}; + +type OrchestrationWorkerMessageToParent = + | { + type: "trace.event"; + bridgeSession: string; + event: import("./types.ts").TraceEvent; + } + | { type: "state.update"; bridgeSession: string; state: SavedState } + | { type: "stream.text"; bridgeSession: string; chunk: string } + | OrchestrationModelChatRequest + | OrchestrationModelResponsesRequest + | OrchestrationModelResolveRequest + | { + type: "run.result"; + bridgeSession: string; + completionNonce?: string; + result: unknown; + } + | { + type: "run.error"; + bridgeSession: string; + completionNonce?: string; + error: unknown; + }; + +type OrchestrationParentMessage = + | OrchestrationRunStartMessage + | { + type: "model.chat.result"; + requestId: string; + result: Awaited>; + } + | { + type: "model.responses.result"; + requestId: string; + result: CreateResponseResponse; + } + | { + type: "model.resolveModel.result"; + requestId: string; + result: { + model: string; + params?: Record; + }; + } + | { + type: "model.chat.stream"; + requestId: string; + chunk: string; + } + | { + type: "model.responses.event"; + requestId: string; + event: ResponseEvent; + } + | { + type: + | "model.chat.error" + | "model.responses.error" + | "model.resolveModel.error"; + requestId: string; + error: { + source?: string; + name?: string; + message: string; + code?: unknown; + }; + }; + +async function runLlmDeckInWorker( + ctx: Omit & { + deckPath: string; + initialUserMessage?: unknown; + inputProvided?: boolean; + allowRootStringInput?: boolean; + isRoot: boolean; + }, +): Promise { + throwIfCanceled(ctx.signal); + const bridgeSession = randomId("bridge"); + const completionNonce = randomId("done"); + const worker = new Worker( + new URL("./runtime_orchestration_worker.ts", import.meta.url).href, + { + type: "module", + deno: buildWorkerPermissions(ctx.permissions, ctx.deckPath), + }, + ); + + let settled = false; + const clearAndTerminate = () => { + try { + worker.terminate(); + } catch { + // ignore + } + }; + let timeoutId: number | undefined; + + const outcome = new Promise((resolve, reject) => { + const finishResolve = (value: unknown) => { + if (settled) return; + settled = true; + if (timeoutId !== undefined) clearTimeout(timeoutId); + resolve(value); + }; + const finishReject = (err: unknown) => { + if (settled) return; + settled = true; + if (timeoutId !== undefined) clearTimeout(timeoutId); + reject(err); + }; + + const remainingMs = Math.max( + 0, + Math.floor(ctx.runDeadlineMs - performance.now()), + ); + timeoutId = setTimeout(() => { + finishReject(new Error(WORKER_TIMEOUT_MESSAGE)); + clearAndTerminate(); + }, remainingMs) as unknown as number; + + worker.addEventListener("error", (event) => { + event.preventDefault(); + finishReject(event.error ?? new Error(event.message)); + }); + + worker.addEventListener("messageerror", () => { + finishReject(new Error("Worker bridge message serialization failed")); + }); + + worker.addEventListener("message", (event: MessageEvent) => { + const msg = event.data as OrchestrationWorkerMessageToParent; + if (!msg || typeof msg !== "object") return; + if (msg.bridgeSession !== bridgeSession) { + logger.warn( + `[gambit] rejected orchestration-worker message with mismatched bridge session (type=${msg.type})`, + ); + return; + } + + if (msg.type === "trace.event") { + ctx.trace?.(msg.event); + return; + } + if (msg.type === "state.update") { + ctx.onStateUpdate?.(msg.state); + return; + } + if (msg.type === "stream.text") { + ctx.onStreamText?.(msg.chunk); + return; + } + + if (msg.type === "model.chat.request") { + (async () => { + try { + const result = await ctx.modelProvider.chat({ + ...msg.input, + signal: ctx.signal, + onStreamText: (chunk) => { + worker.postMessage( + { + type: "model.chat.stream", + requestId: msg.requestId, + chunk, + } satisfies OrchestrationParentMessage, + ); + }, + }); + worker.postMessage( + { + type: "model.chat.result", + requestId: msg.requestId, + result, + } satisfies OrchestrationParentMessage, + ); + } catch (err) { + worker.postMessage( + { + type: "model.chat.error", + requestId: msg.requestId, + error: { + source: "model", + name: err instanceof Error ? err.name : undefined, + message: err instanceof Error ? err.message : String(err), + code: (err as { code?: unknown })?.code, + }, + } satisfies OrchestrationParentMessage, + ); + } + })(); + return; + } + + if (msg.type === "model.responses.request") { + (async () => { + try { + if (!ctx.modelProvider.responses) { + throw new Error( + "Responses API unavailable for current model provider", + ); + } + const result = await ctx.modelProvider.responses({ + ...msg.input, + signal: ctx.signal, + onStreamEvent: (streamEvent) => { + worker.postMessage( + { + type: "model.responses.event", + requestId: msg.requestId, + event: streamEvent, + } satisfies OrchestrationParentMessage, + ); + }, + }); + worker.postMessage( + { + type: "model.responses.result", + requestId: msg.requestId, + result, + } satisfies OrchestrationParentMessage, + ); + } catch (err) { + worker.postMessage( + { + type: "model.responses.error", + requestId: msg.requestId, + error: { + source: "model", + name: err instanceof Error ? err.name : undefined, + message: err instanceof Error ? err.message : String(err), + code: (err as { code?: unknown })?.code, + }, + } satisfies OrchestrationParentMessage, + ); + } + })(); + return; + } + + if (msg.type === "model.resolveModel.request") { + (async () => { + try { + const result = ctx.modelProvider.resolveModel + ? await ctx.modelProvider.resolveModel(msg.input) + : { + model: Array.isArray(msg.input.model) + ? msg.input.model[0] + : msg.input.model, + params: msg.input.params, + }; + worker.postMessage( + { + type: "model.resolveModel.result", + requestId: msg.requestId, + result, + } satisfies OrchestrationParentMessage, + ); + } catch (err) { + worker.postMessage( + { + type: "model.resolveModel.error", + requestId: msg.requestId, + error: { + source: "model", + name: err instanceof Error ? err.name : undefined, + message: err instanceof Error ? err.message : String(err), + code: (err as { code?: unknown })?.code, + }, + } satisfies OrchestrationParentMessage, + ); + } + })(); + return; + } + + if (msg.type === "run.result") { + if (msg.completionNonce !== completionNonce) { + logger.warn( + `[gambit] rejected orchestration-worker run.result with invalid completion nonce`, + ); + return; + } + finishResolve(msg.result); + return; + } + if (msg.type === "run.error") { + if (msg.completionNonce !== completionNonce) { + logger.warn( + `[gambit] rejected orchestration-worker run.error with invalid completion nonce`, + ); + return; + } + finishReject(normalizeWorkerError(msg.error)); + } + }); + }); + + try { + worker.postMessage( + { + type: "run.start", + bridgeSession, + completionNonce, + options: { + path: ctx.deckPath, + input: ctx.input, + inputProvided: ctx.inputProvided, + initialUserMessage: ctx.initialUserMessage, + isRoot: ctx.isRoot, + guardrails: ctx.guardrails, + depth: ctx.depth, + parentActionCallId: ctx.parentActionCallId, + runId: ctx.runId, + defaultModel: ctx.defaultModel, + modelOverride: ctx.modelOverride, + stream: ctx.stream, + state: ctx.state, + responsesMode: ctx.responsesMode, + allowRootStringInput: ctx.allowRootStringInput, + runDeadlineMs: ctx.runDeadlineMs, + }, + permissionCeiling: toWirePermissionSet(ctx.permissions), + } satisfies OrchestrationRunStartMessage, + ); + ensureRunActive(ctx.runDeadlineMs, ctx.signal); + return await outcome; + } finally { + if (timeoutId !== undefined) clearTimeout(timeoutId); + clearAndTerminate(); + } +} + +type WorkerSpawnRequest = { + bridgeSession: string; + requestId: string; + payload: { + path: string; + input: unknown; + initialUserMessage?: unknown; + parentActionCallId?: string; + parentPermissionsBaseDir: string; + parentPermissions: WirePermissionSet; + workspacePermissions?: PermissionDeclarationInput; + workspacePermissionsBaseDir?: string; + sessionPermissions?: PermissionDeclarationInput; + sessionPermissionsBaseDir?: string; + runDeadlineMs: number; + }; +}; + +async function runComputeDeckInWorker(ctx: WorkerRuntimeCtx): Promise { + throwIfCanceled(ctx.signal); + const { runId } = ctx; + const actionCallId = randomId("action"); + const bridgeSession = randomId("bridge"); + const completionNonce = randomId("done"); + const worker = new Worker( + new URL("./runtime_worker.ts", import.meta.url).href, + { + type: "module", + deno: buildWorkerPermissions(ctx.permissions, ctx.deckPath), + }, + ); + + let settled = false; + const clearAndTerminate = () => { + try { + worker.terminate(); + } catch { + // ignore + } + }; + let timeoutId: number | undefined; + const activeSpawnRequests = new Set(); + let currentState = ctx.state; + + const outcome = new Promise((resolve, reject) => { + const finishResolve = (value: unknown) => { + if (settled) return; + settled = true; + if (timeoutId !== undefined) clearTimeout(timeoutId); + resolve(value); + }; + const finishReject = (err: unknown) => { + if (settled) return; + settled = true; + if (timeoutId !== undefined) clearTimeout(timeoutId); + reject(err); + }; + const remainingMs = Math.max( + 0, + Math.floor(ctx.runDeadlineMs - performance.now()), + ); + timeoutId = setTimeout(() => { + finishReject(new Error(WORKER_TIMEOUT_MESSAGE)); + clearAndTerminate(); + }, remainingMs) as unknown as number; + + worker.addEventListener("error", (event) => { + event.preventDefault(); + finishReject(event.error ?? new Error(event.message)); + }); + + worker.addEventListener("messageerror", () => { + finishReject(new Error("Worker bridge message serialization failed")); + }); + + worker.addEventListener("message", (event: MessageEvent) => { + const msg = event.data as Record; + const receivedBridgeSession = typeof msg.bridgeSession === "string" + ? msg.bridgeSession + : ""; + if (receivedBridgeSession !== bridgeSession) { + const type = typeof msg.type === "string" ? msg.type : "unknown"; + logger.warn( + `[gambit] rejected compute-worker message with mismatched bridge session (type=${type})`, + ); + return; + } + // Ignore any late worker messages once this run has already settled. + if (settled) return; + const type = typeof msg.type === "string" ? msg.type : ""; + if (type === "log.entry") { + if (!ctx.trace) return; + const entry = msg.entry; + const raw = typeof entry === "string" + ? { message: entry } + : entry && typeof entry === "object" + ? entry as Record + : { message: "" }; + const message = typeof raw.message === "string" + ? raw.message + : raw.message !== undefined + ? String(raw.message) + : ""; + const title = typeof raw.title === "string" ? raw.title : undefined; + const body = raw.body ?? raw.message ?? message; + ctx.trace({ + type: "log", + runId, + deckPath: ctx.deckPath, + actionCallId, + parentActionCallId: ctx.parentActionCallId, + level: (raw.level as "debug" | "info" | "warn" | "error") ?? "info", + title: title ?? (message || undefined), + message, + body, + meta: raw.meta, + }); + return; + } + + if (type === "spawn.request") { + const req = msg as unknown as WorkerSpawnRequest; + const requestId = req.requestId; + if (!requestId) return; + if (activeSpawnRequests.has(requestId)) { + logger.warn( + `[gambit] rejected duplicate compute-worker spawn.request (${requestId})`, + ); + return; + } + activeSpawnRequests.add(requestId); + (async () => { + try { + const parentFromWorker = normalizePermissionBaseDir( + fromWirePermissionSet(req.payload.parentPermissions), + req.payload.parentPermissionsBaseDir, + ); + // Enforce monotonicity against the parent effective ceiling. + const bridgedParent = intersectPermissions( + ctx.permissions, + parentFromWorker, + req.payload.parentPermissionsBaseDir, + ); + const childResult = await runDeck({ + path: req.payload.path, + input: req.payload.input, + modelProvider: ctx.modelProvider, + isRoot: false, + guardrails: ctx.guardrails, + depth: ctx.depth + 1, + parentActionCallId: req.payload.parentActionCallId, + runId, + defaultModel: ctx.defaultModel, + modelOverride: ctx.modelOverride, + trace: ctx.trace, + stream: ctx.stream, + state: currentState, + onStateUpdate: (state) => { + currentState = state; + ctx.onStateUpdate?.(state); + }, + onStreamText: ctx.onStreamText, + responsesMode: ctx.responsesMode, + initialUserMessage: req.payload.initialUserMessage, + inputProvided: true, + parentPermissions: bridgedParent, + workspacePermissions: req.payload.workspacePermissions, + workspacePermissionsBaseDir: + req.payload.workspacePermissionsBaseDir, + sessionPermissions: req.payload.sessionPermissions, + sessionPermissionsBaseDir: req.payload.sessionPermissionsBaseDir, + runDeadlineMs: Math.min( + ctx.runDeadlineMs, + Number.isFinite(req.payload.runDeadlineMs) + ? req.payload.runDeadlineMs + : ctx.runDeadlineMs, + ), + workerSandbox: true, + signal: ctx.signal, + onTool: ctx.onTool, + }); + worker.postMessage({ + type: "spawn.result", + requestId, + result: childResult, + }); + } catch (err) { + worker.postMessage({ + type: "spawn.error", + requestId, + error: { + source: "child", + name: err instanceof Error ? err.name : undefined, + message: err instanceof Error ? err.message : String(err), + code: (err as { code?: unknown })?.code, + }, + }); + } finally { + activeSpawnRequests.delete(requestId); + } + })(); + return; + } + + if (type === "state.update") { + const nextState = (msg as { state?: SavedState }).state; + if (!nextState || typeof nextState !== "object") return; + currentState = nextState; + ctx.onStateUpdate?.(nextState); + return; + } + + if (type === "run.result") { + if ( + (msg as { completionNonce?: unknown }).completionNonce !== + completionNonce + ) { + logger.warn( + `[gambit] rejected compute-worker run.result with invalid completion nonce`, + ); + return; + } + finishResolve((msg as { result?: unknown }).result); + return; + } + + if (type === "run.error") { + if ( + (msg as { completionNonce?: unknown }).completionNonce !== + completionNonce + ) { + logger.warn( + `[gambit] rejected compute-worker run.error with invalid completion nonce`, + ); + return; + } + finishReject(normalizeWorkerError((msg as { error?: unknown }).error)); + } + }); + }); + + try { + worker.postMessage({ + type: "run.start", + bridgeSession, + completionNonce, + runId, + actionCallId, + deckPath: ctx.deckPath, + input: ctx.input, + state: ctx.state, + initialUserMessage: ctx.initialUserMessage, + depth: ctx.depth, + parentActionCallId: ctx.parentActionCallId, + permissions: toWirePermissionSet(ctx.permissions), + workspacePermissions: ctx.workspacePermissions, + workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir, + sessionPermissions: ctx.sessionPermissions, + sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir, + runDeadlineMs: ctx.runDeadlineMs, + isRoot: ctx.isRoot, + allowRootStringInput: ctx.allowRootStringInput, + }); + const raw = await outcome; + ensureRunActive(ctx.runDeadlineMs, ctx.signal); + return raw; + } finally { + if (timeoutId !== undefined) clearTimeout(timeoutId); + clearAndTerminate(); + } +} + +async function runComputeDeckInProcess(ctx: RuntimeCtxBase): Promise { const { deck, runId } = ctx; const actionCallId = randomId("action"); + let computeState = ctx.state + ? { + ...ctx.state, + messages: Array.isArray(ctx.state.messages) + ? ctx.state.messages.map(sanitizeMessage) + : [], + meta: ctx.state.meta ? { ...ctx.state.meta } : undefined, + messageRefs: Array.isArray(ctx.state.messageRefs) + ? [...ctx.state.messageRefs] + : undefined, + } + : undefined; + + const ensureComputeState = (): SavedState => { + if (computeState) return computeState; + computeState = { + runId, + messages: [], + meta: {}, + messageRefs: [], + }; + return computeState; + }; + + const publishComputeState = () => { + if (!computeState) return; + ctx.onStateUpdate?.({ + ...computeState, + messages: computeState.messages.map(sanitizeMessage), + meta: computeState.meta ? { ...computeState.meta } : undefined, + messageRefs: Array.isArray(computeState.messageRefs) + ? [...computeState.messageRefs] + : undefined, + }); + }; const execContext: ExecutionContext = { runId, @@ -604,6 +2724,39 @@ async function runComputeDeck(ctx: RuntimeCtxBase): Promise { parentActionCallId: ctx.parentActionCallId, depth: ctx.depth, input: ctx.input, + initialUserMessage: ctx.initialUserMessage, + getSessionMeta: (key: string): T | undefined => { + if (!key) return undefined; + return computeState?.meta?.[key] as T | undefined; + }, + setSessionMeta: (key, value) => { + if (!key) return; + const state = ensureComputeState(); + const nextMeta = { ...(state.meta ?? {}) }; + if (value === undefined) { + delete nextMeta[key]; + } else { + nextMeta[key] = value; + } + state.meta = nextMeta; + publishComputeState(); + }, + appendMessage: (message) => { + const role = message.role; + const content = String(message.content ?? ""); + if ((role !== "user" && role !== "assistant") || !content.trim()) { + return; + } + const state = ensureComputeState(); + const sanitized = sanitizeMessage({ role, content: content.trim() }); + state.messages = [...(state.messages ?? []), sanitized]; + const refs = Array.isArray(state.messageRefs) + ? [...state.messageRefs] + : []; + refs.push({ id: randomId("msg"), role: sanitized.role }); + state.messageRefs = refs; + publishComputeState(); + }, label: deck.label, log: (entry) => { if (!ctx.trace) return; @@ -635,9 +2788,13 @@ async function runComputeDeck(ctx: RuntimeCtxBase): Promise { }); }, spawnAndWait: async (opts) => { + ensureRunActive(ctx.runDeadlineMs, ctx.signal); const childPath = path.isAbsolute(opts.path) ? opts.path : path.resolve(path.dirname(deck.path), opts.path); + const childInitialUserMessage = Object.hasOwn(opts, "initialUserMessage") + ? opts.initialUserMessage + : ctx.initialUserMessage; return await runDeck({ path: childPath, input: opts.input, @@ -651,17 +2808,33 @@ async function runComputeDeck(ctx: RuntimeCtxBase): Promise { modelOverride: ctx.modelOverride, trace: ctx.trace, stream: ctx.stream, - state: ctx.state, - onStateUpdate: ctx.onStateUpdate, + state: computeState, + onStateUpdate: (state) => { + computeState = { + ...state, + messages: Array.isArray(state.messages) + ? state.messages.map(sanitizeMessage) + : [], + meta: state.meta ? { ...state.meta } : undefined, + messageRefs: Array.isArray(state.messageRefs) + ? [...state.messageRefs] + : undefined, + }; + ctx.onStateUpdate?.(state); + }, onStreamText: ctx.onStreamText, responsesMode: ctx.responsesMode, - initialUserMessage: undefined, + initialUserMessage: childInitialUserMessage, inputProvided: true, parentPermissions: ctx.permissions, workspacePermissions: ctx.workspacePermissions, workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir, sessionPermissions: ctx.sessionPermissions, sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir, + runDeadlineMs: ctx.runDeadlineMs, + workerSandbox: ctx.workerSandbox, + signal: ctx.signal, + onTool: ctx.onTool, }); }, fail: (opts) => { @@ -670,7 +2843,9 @@ async function runComputeDeck(ctx: RuntimeCtxBase): Promise { return: (payload) => Promise.resolve(payload), }; + ensureRunActive(ctx.runDeadlineMs, ctx.signal); const raw = await deck.executor!(execContext); + ensureRunActive(ctx.runDeadlineMs, ctx.signal); return validateOutput(deck, raw, ctx.depth === 0); } @@ -725,11 +2900,15 @@ async function runLlmDeck( workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir, sessionPermissions: ctx.sessionPermissions, sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir, + runDeadlineMs: ctx.runDeadlineMs, + workerSandbox: ctx.workerSandbox, + signal: ctx.signal, + onTool: ctx.onTool, }); let streamingBuffer = ""; let streamingCommitted = false; const wrappedOnStreamText = (chunk: string) => { - if (!chunk) return; + if (!chunk || ctx.signal?.aborted) return; idleController.touch(); streamingBuffer += chunk; ctx.onStreamText?.(chunk); @@ -743,6 +2922,7 @@ async function runLlmDeck( actionCallId: refToolCallId, name: GAMBIT_TOOL_CONTEXT, args: {}, + toolKind: "internal", parentActionCallId: actionCallId, }); messages.push( @@ -771,6 +2951,7 @@ async function runLlmDeck( actionCallId: refToolCallId, name: GAMBIT_TOOL_CONTEXT, result: input as unknown as import("./types.ts").JSONValue, + toolKind: "internal", parentActionCallId: actionCallId, }); } @@ -793,7 +2974,7 @@ async function runLlmDeck( } idleController.touch(); - const tools = await buildToolDefs(deck); + const tools = await buildToolDefs(deck, ctx.permissions); ctx.trace?.({ type: "deck.start", runId, @@ -806,9 +2987,7 @@ async function runLlmDeck( try { while (passes < guardrails.maxPasses) { passes++; - if (performance.now() - start > guardrails.timeoutMs) { - throw new Error("Timeout exceeded"); - } + ensureRunActive(ctx.runDeadlineMs, ctx.signal); streamingBuffer = ""; streamingCommitted = false; const modelCandidate = ctx.modelOverride ?? @@ -850,6 +3029,9 @@ async function runLlmDeck( let responseOutputItems: Array | undefined; const responses = modelProvider.responses; + const projectedToolCalls = new Set(); + const projectedToolResults = new Set(); + const projectedToolNames = new Map(); type ModelCallResult = Awaited>; const result: ModelCallResult = (useResponses && responses) ? await (async () => { @@ -864,18 +3046,47 @@ async function runLlmDeck( params: providerParams, }, state: ctx.state, - onStreamEvent: (ctx.onStreamText || deck.handlers?.onIdle) - ? (event) => { - if (event.type === "response.output_text.delta") { - sawDelta = true; - wrappedOnStreamText(event.delta); - } else if ( - event.type === "response.output_text.done" && !sawDelta - ) { - wrappedOnStreamText(event.text); + deckPath: deck.path, + signal: ctx.signal, + onStreamEvent: + (ctx.trace || ctx.onStreamText || deck.handlers?.onIdle) + ? (event) => { + if (ctx.trace) { + ctx.trace({ + type: "model.stream.event", + runId, + actionCallId, + deckPath: deck.path, + model, + event: event as unknown as Record< + string, + import("./types.ts").JSONValue + >, + parentActionCallId: ctx.parentActionCallId, + }); + projectStreamToolTraceEvents({ + streamEvent: event as unknown as Record< + string, + JSONValue + >, + runId, + parentActionCallId: actionCallId, + trace: ctx.trace, + emittedCalls: projectedToolCalls, + emittedResults: projectedToolResults, + toolNames: projectedToolNames, + }); + } + if (event.type === "response.output_text.delta") { + sawDelta = true; + wrappedOnStreamText(event.delta); + } else if ( + event.type === "response.output_text.done" && !sawDelta + ) { + wrappedOnStreamText(event.text); + } } - } - : undefined, + : undefined, }); responseOutputItems = response.output ?? []; const mapped = mapResponseOutput(responseOutputItems); @@ -883,7 +3094,8 @@ async function runLlmDeck( message: mapped.message, finishReason: mapped.toolCalls?.length ? "tool_calls" : "stop", toolCalls: mapped.toolCalls, - updatedState: undefined, + usage: response.usage, + updatedState: response.updatedState, }; })() : await modelProvider.chat({ @@ -892,10 +3104,34 @@ async function runLlmDeck( tools, stream: ctx.stream, state: ctx.state, + deckPath: deck.path, + signal: ctx.signal, params: providerParams, onStreamText: (ctx.onStreamText || deck.handlers?.onIdle) ? wrappedOnStreamText : undefined, + onStreamEvent: ctx.trace + ? (event) => { + ctx.trace?.({ + type: "model.stream.event", + runId, + actionCallId, + deckPath: deck.path, + model, + event, + parentActionCallId: ctx.parentActionCallId, + }); + projectStreamToolTraceEvents({ + streamEvent: event, + runId, + parentActionCallId: actionCallId, + trace: ctx.trace, + emittedCalls: projectedToolCalls, + emittedResults: projectedToolResults, + toolNames: projectedToolNames, + }); + } + : undefined, }); idleController.touch(); let message = result.message; @@ -909,6 +3145,7 @@ async function runLlmDeck( message: sanitizeMessage(message), toolCalls: result.toolCalls, stateMessages: result.updatedState?.messages?.length, + usage: result.usage, mode: useResponses ? "responses" : "chat", responseItems: responseOutputItems, parentActionCallId: ctx.parentActionCallId, @@ -928,6 +3165,10 @@ async function runLlmDeck( ); const feedback = updated?.feedback ?? ctx.state?.feedback; const traces = updated?.traces ?? ctx.state?.traces; + const meta = updated?.meta ?? ctx.state?.meta; + const notes = updated?.notes ?? ctx.state?.notes; + const conversationScore = updated?.conversationScore ?? + ctx.state?.conversationScore; return { ...base, runId, @@ -939,6 +3180,9 @@ async function runLlmDeck( messageRefs, feedback, traces, + meta, + notes, + conversationScore, }; }; @@ -997,6 +3241,7 @@ async function runLlmDeck( actionCallId: call.id, name: call.name, args: call.args, + toolKind: "internal", parentActionCallId: actionCallId, }); const toolContent = JSON.stringify(call.args ?? {}); @@ -1027,6 +3272,7 @@ async function runLlmDeck( name: call.name, result: respondEnvelope as unknown as import("./types.ts").JSONValue, + toolKind: "internal", parentActionCallId: actionCallId, }); continue; @@ -1054,6 +3300,7 @@ async function runLlmDeck( actionCallId: call.id, name: call.name, args: call.args, + toolKind: "internal", parentActionCallId: actionCallId, }); const toolContent = JSON.stringify(call.args ?? {}); @@ -1088,12 +3335,14 @@ async function runLlmDeck( actionCallId: call.id, name: call.name, result: signal as unknown as import("./types.ts").JSONValue, + toolKind: "internal", parentActionCallId: actionCallId, }); continue; } const actionRef = deck.actionDecks.find((a) => a.name === call.name); + const toolKind: ToolKind = actionRef ? "action" : "external"; const actionPermissions = resolveEffectivePermissions({ baseDir: path.dirname(deck.path), parent: ctx.permissions, @@ -1119,6 +3368,7 @@ async function runLlmDeck( actionCallId: call.id, name: call.name, args: call.args, + toolKind, parentActionCallId: actionCallId, }); const toolResult = await handleToolCall(call, { @@ -1143,6 +3393,10 @@ async function runLlmDeck( workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir, sessionPermissions: ctx.sessionPermissions, sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir, + runDeadlineMs: ctx.runDeadlineMs, + workerSandbox: ctx.workerSandbox, + signal: ctx.signal, + onTool: ctx.onTool, }); ctx.trace?.({ type: "tool.result", @@ -1150,6 +3404,7 @@ async function runLlmDeck( actionCallId: call.id, name: call.name, result: toolResult.toolContent, + toolKind, parentActionCallId: actionCallId, }); appendedMessages.push({ @@ -1188,6 +3443,7 @@ async function runLlmDeck( idleController.touch(); } if (ctx.onStateUpdate) { + ensureRunActive(ctx.runDeadlineMs, ctx.signal); const state = computeState(result.updatedState); ctx.onStateUpdate(state); } @@ -1236,6 +3492,7 @@ async function runLlmDeck( if (message.content !== null && message.content !== undefined) { messages.push(sanitizeMessage(message)); + ensureRunActive(ctx.runDeadlineMs, ctx.signal); if (ctx.onStateUpdate) { const state = computeState(result.updatedState); ctx.onStateUpdate(state); @@ -1305,25 +3562,17 @@ async function handleToolCall( workspacePermissionsBaseDir?: string; sessionPermissions?: PermissionDeclarationInput; sessionPermissionsBaseDir?: string; + runDeadlineMs: number; + workerSandbox: boolean; + signal?: AbortSignal; + onTool?: RunOptions["onTool"]; }, ): Promise { - const action = ctx.parentDeck.actionDecks.find((a) => a.name === call.name); + ensureRunActive(ctx.runDeadlineMs, ctx.signal); const source = { deckPath: ctx.parentDeck.path, - actionName: action?.name ?? call.name, + actionName: call.name, }; - if (!action) { - return { - toolContent: JSON.stringify({ - runId: ctx.runId, - actionCallId: call.id, - parentActionCallId: ctx.parentActionCallId, - source, - status: 404, - message: "unknown action", - }), - }; - } const baseComplete = (payload: { status?: number; @@ -1346,6 +3595,474 @@ async function handleToolCall( const extraMessages: Array = []; const started = performance.now(); + const runBuiltinTool = async (): Promise => { + if (!isBuiltinTool(call.name)) return null; + const deny = (message: string): ToolCallResult => ({ + toolContent: baseComplete({ + status: 403, + code: "permission_denied", + message, + }), + }); + + if (call.name === BUILTIN_TOOL_READ_FILE) { + let targetPath: string; + try { + targetPath = resolveToolPath(ctx.permissions.baseDir, call.args.path); + } catch (err) { + return { + toolContent: baseComplete({ + status: 400, + code: "invalid_input", + message: err instanceof Error ? err.message : String(err), + }), + }; + } + if (!canReadPath(ctx.permissions, targetPath)) { + return deny(`read_file denied for ${targetPath}`); + } + const text = await Deno.readTextFile(targetPath); + const lines = text.split(/\r?\n/); + const { startLine, endLine } = parseLineRange(call.args); + const sliced = lines.slice(startLine - 1, endLine).join("\n"); + return { + toolContent: baseComplete({ + status: 200, + payload: { + path: targetPath, + start_line: startLine, + end_line: endLine, + total_lines: lines.length, + content: sliced, + }, + }), + }; + } + + if (call.name === BUILTIN_TOOL_LIST_DIR) { + let targetPath: string; + try { + targetPath = resolveToolPath(ctx.permissions.baseDir, call.args.path); + } catch (err) { + return { + toolContent: baseComplete({ + status: 400, + code: "invalid_input", + message: err instanceof Error ? err.message : String(err), + }), + }; + } + if (!canReadPath(ctx.permissions, targetPath)) { + return deny(`list_dir denied for ${targetPath}`); + } + const recursive = Boolean(call.args.recursive); + const maxEntries = parseToolLimit(call.args.max_entries, 200, 2000); + const out: Array<{ path: string; type: "file" | "dir" | "symlink" }> = []; + const pending: Array = [targetPath]; + while (pending.length > 0 && out.length < maxEntries) { + const current = pending.pop()!; + for await (const entry of Deno.readDir(current)) { + if (out.length >= maxEntries) break; + const entryPath = path.join(current, entry.name); + if (!canReadPath(ctx.permissions, entryPath)) continue; + const type = entry.isDirectory + ? "dir" + : entry.isSymlink + ? "symlink" + : "file"; + out.push({ path: entryPath, type }); + if (recursive && entry.isDirectory) { + pending.push(entryPath); + } + } + } + return { + toolContent: baseComplete({ + status: 200, + payload: { + path: targetPath, + recursive, + entries: out, + truncated: out.length >= maxEntries, + }, + }), + }; + } + + if (call.name === BUILTIN_TOOL_GREP_FILES) { + let targetPath: string; + try { + targetPath = resolveToolPath(ctx.permissions.baseDir, call.args.path); + } catch (err) { + return { + toolContent: baseComplete({ + status: 400, + code: "invalid_input", + message: err instanceof Error ? err.message : String(err), + }), + }; + } + if (!canReadPath(ctx.permissions, targetPath)) { + return deny(`grep_files denied for ${targetPath}`); + } + const query = typeof call.args.query === "string" ? call.args.query : ""; + if (!query) { + return { + toolContent: baseComplete({ + status: 400, + code: "invalid_input", + message: "query is required", + }), + }; + } + let re: RegExp; + try { + re = new RegExp(query, "g"); + } catch (err) { + return { + toolContent: baseComplete({ + status: 400, + code: "invalid_regex", + message: err instanceof Error ? err.message : String(err), + }), + }; + } + const maxMatches = parseToolLimit(call.args.max_matches, 200, 2000); + const matches: Array<{ + path: string; + line: number; + text: string; + }> = []; + const pending: Array = [targetPath]; + while (pending.length > 0 && matches.length < maxMatches) { + const current = pending.pop()!; + const stat = await Deno.stat(current); + if (stat.isDirectory) { + for await (const entry of Deno.readDir(current)) { + const entryPath = path.join(current, entry.name); + if (!canReadPath(ctx.permissions, entryPath)) continue; + if (entry.isDirectory) { + pending.push(entryPath); + continue; + } + if (!entry.isFile) continue; + const text = await Deno.readTextFile(entryPath).catch(() => null); + if (text === null) continue; + const lines = text.split(/\r?\n/); + for (let i = 0; i < lines.length; i++) { + re.lastIndex = 0; + if (!re.test(lines[i])) continue; + matches.push({ path: entryPath, line: i + 1, text: lines[i] }); + if (matches.length >= maxMatches) break; + } + if (matches.length >= maxMatches) break; + } + continue; + } + if (!stat.isFile) continue; + const text = await Deno.readTextFile(current).catch(() => null); + if (text === null) continue; + const lines = text.split(/\r?\n/); + for (let i = 0; i < lines.length; i++) { + re.lastIndex = 0; + if (!re.test(lines[i])) continue; + matches.push({ path: current, line: i + 1, text: lines[i] }); + if (matches.length >= maxMatches) break; + } + } + return { + toolContent: baseComplete({ + status: 200, + payload: { + path: targetPath, + query, + matches, + truncated: matches.length >= maxMatches, + }, + }), + }; + } + + if (call.name === BUILTIN_TOOL_APPLY_PATCH) { + let targetPath: string; + try { + targetPath = resolveToolPath(ctx.permissions.baseDir, call.args.path); + } catch (err) { + return { + toolContent: baseComplete({ + status: 400, + code: "invalid_input", + message: err instanceof Error ? err.message : String(err), + }), + }; + } + if (!canWritePath(ctx.permissions, targetPath)) { + return deny(`apply_patch denied for ${targetPath}`); + } + + const rawEdits = Array.isArray(call.args.edits) ? call.args.edits : []; + const edits = rawEdits.flatMap((entry) => { + if (!entry || typeof entry !== "object") return []; + const rec = entry as Record; + if ( + typeof rec.old_text !== "string" || typeof rec.new_text !== "string" + ) { + return []; + } + return [{ + oldText: rec.old_text, + newText: rec.new_text, + replaceAll: Boolean(rec.replace_all), + }]; + }); + if (edits.length === 0) { + return { + toolContent: baseComplete({ + status: 400, + code: "invalid_input", + message: "edits must include at least one old_text/new_text pair", + }), + }; + } + + const createIfMissing = Boolean(call.args.create_if_missing); + let existing = ""; + let created = false; + try { + if (!canReadPath(ctx.permissions, targetPath)) { + return deny(`apply_patch read denied for ${targetPath}`); + } + existing = await Deno.readTextFile(targetPath); + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + if (!createIfMissing) { + return { + toolContent: baseComplete({ + status: 404, + code: "not_found", + message: `file not found: ${targetPath}`, + }), + }; + } + created = true; + existing = ""; + } else { + throw err; + } + } + + const patched = applySimplePatch(existing, edits); + if (!created && patched.applied === 0) { + return { + toolContent: baseComplete({ + status: 409, + code: "no_changes", + message: `No edit targets were found in ${targetPath}`, + }), + }; + } + if (created) { + const parentDir = path.dirname(targetPath); + if (parentDir && parentDir !== "." && parentDir !== targetPath) { + await Deno.mkdir(parentDir, { recursive: true }); + } + } + try { + await Deno.writeTextFile(targetPath, patched.next); + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + return { + toolContent: baseComplete({ + status: 404, + code: "not_found", + message: `path not found: ${targetPath}`, + }), + }; + } + return { + toolContent: baseComplete({ + status: 500, + code: "write_failed", + message: err instanceof Error ? err.message : String(err), + }), + }; + } + return { + toolContent: baseComplete({ + status: 200, + payload: { + path: targetPath, + applied: patched.applied, + created, + }, + }), + }; + } + + if (call.name === BUILTIN_TOOL_EXEC) { + const command = typeof call.args.command === "string" + ? call.args.command + : ""; + if (!command) { + return { + toolContent: baseComplete({ + status: 400, + code: "invalid_input", + message: "command is required", + }), + }; + } + if ( + !canRunCommand(ctx.permissions, command) && + !canRunPath(ctx.permissions, command) + ) { + return deny(`exec denied for command ${command}`); + } + const args = toStringArray(call.args.args); + const cwd = typeof call.args.cwd === "string" + ? path.resolve(ctx.permissions.baseDir, call.args.cwd) + : ctx.permissions.baseDir; + const timeoutMs = parseToolLimit(call.args.timeout_ms, 5000, 30000); + const remainingMs = Math.max( + 1, + Math.min(timeoutMs, Math.floor(ctx.runDeadlineMs - performance.now())), + ); + const controller = new AbortController(); + const onAbort = () => controller.abort(); + if (ctx.signal?.aborted) { + controller.abort(); + } else if (ctx.signal) { + ctx.signal.addEventListener("abort", onAbort, { once: true }); + } + const timeoutId = setTimeout(() => controller.abort(), remainingMs); + try { + const output = await new Deno.Command(command, { + args, + cwd, + stdout: "piped", + stderr: "piped", + signal: controller.signal, + }).output(); + const stdout = new TextDecoder().decode(output.stdout).slice(0, 65536); + const stderr = new TextDecoder().decode(output.stderr).slice(0, 65536); + return { + toolContent: baseComplete({ + status: 200, + payload: { + command, + args, + cwd, + code: output.code, + success: output.success, + stdout, + stderr, + }, + }), + }; + } catch (err) { + return { + toolContent: baseComplete({ + status: 500, + code: "exec_failed", + message: err instanceof Error ? err.message : String(err), + }), + }; + } finally { + clearTimeout(timeoutId); + if (ctx.signal) { + ctx.signal.removeEventListener("abort", onAbort); + } + } + } + + return null; + }; + + const builtinResult = await runBuiltinTool(); + if (builtinResult) { + return builtinResult; + } + const action = ctx.parentDeck.actionDecks.find((a) => a.name === call.name); + if (!action) { + const externalTool = ctx.parentDeck.tools.find((tool) => + tool.name === call.name + ); + if (!externalTool) { + return { + toolContent: JSON.stringify({ + runId: ctx.runId, + actionCallId: call.id, + parentActionCallId: ctx.parentActionCallId, + source, + status: 404, + message: "unknown action", + }), + }; + } + let externalInput: Record = call.args; + if (externalTool.inputSchema) { + try { + externalInput = validateWithSchema( + externalTool.inputSchema as never, + call.args, + ) as Record; + } catch (err) { + return { + toolContent: baseComplete({ + status: 400, + code: "invalid_input", + message: err instanceof Error ? err.message : String(err), + }), + }; + } + } + if (!ctx.onTool) { + return { + toolContent: baseComplete({ + status: 500, + code: "missing_on_tool", + message: `External tool ${call.name} requires runtime onTool handler`, + }), + }; + } + try { + const result = await ctx.onTool({ + name: call.name, + args: externalInput, + runId: ctx.runId, + actionCallId: call.id, + parentActionCallId: ctx.parentActionCallId, + deckPath: ctx.parentDeck.path, + }); + return { toolContent: baseComplete(normalizeChildResult(result)) }; + } catch (err) { + return { + toolContent: baseComplete({ + status: 500, + code: "tool_handler_error", + message: err instanceof Error ? err.message : String(err), + }), + }; + } + } + let actionInput: unknown = call.args; + if (action.contextSchema) { + try { + actionInput = validateWithSchema( + action.contextSchema as never, + call.args, + ); + } catch (err) { + return { + toolContent: baseComplete({ + status: 400, + code: "invalid_input", + message: err instanceof Error ? err.message : String(err), + }), + }; + } + } + const busyCfg = ctx.parentDeck.handlers?.onBusy ?? ctx.parentDeck.handlers?.onInterval; const busyDelay = busyCfg?.delayMs ?? DEFAULT_STATUS_DELAY_MS; @@ -1362,7 +4079,7 @@ async function handleToolCall( try { const result = await runDeck({ path: action.path, - input: call.args, + input: actionInput, modelProvider: ctx.modelProvider, isRoot: false, guardrails: ctx.guardrails, @@ -1383,6 +4100,10 @@ async function handleToolCall( workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir, sessionPermissions: ctx.sessionPermissions, sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir, + runDeadlineMs: ctx.runDeadlineMs, + workerSandbox: ctx.workerSandbox, + signal: ctx.signal, + onTool: ctx.onTool, }); return { ok: true, result }; } catch (err) { @@ -1419,6 +4140,10 @@ async function handleToolCall( workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir, sessionPermissions: ctx.sessionPermissions, sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir, + runDeadlineMs: ctx.runDeadlineMs, + workerSandbox: ctx.workerSandbox, + signal: ctx.signal, + onTool: ctx.onTool, }); if (envelope.length) { extraMessages.push(...envelope.map(sanitizeMessage)); @@ -1480,6 +4205,12 @@ async function handleToolCall( } const normalized = normalizeChildResult(childResult.result); + if (action.responseSchema) { + normalized.payload = validateWithSchema( + action.responseSchema as never, + normalized.payload, + ); + } const toolContent = baseComplete(normalized); if (busyCfg?.path) { @@ -1509,6 +4240,10 @@ async function handleToolCall( workspacePermissionsBaseDir: ctx.workspacePermissionsBaseDir, sessionPermissions: ctx.sessionPermissions, sessionPermissionsBaseDir: ctx.sessionPermissionsBaseDir, + runDeadlineMs: ctx.runDeadlineMs, + workerSandbox: ctx.workerSandbox, + signal: ctx.signal, + onTool: ctx.onTool, }); if (envelope.length) { extraMessages.push(...envelope.map(sanitizeMessage)); @@ -1594,8 +4329,13 @@ async function runBusyHandler(args: { workspacePermissionsBaseDir?: string; sessionPermissions?: PermissionDeclarationInput; sessionPermissionsBaseDir?: string; + runDeadlineMs: number; + workerSandbox: boolean; + signal?: AbortSignal; + onTool?: RunOptions["onTool"]; }): Promise> { try { + ensureRunActive(args.runDeadlineMs, args.signal); const input = { kind: "busy", label: args.action.label ?? args.parentDeck.label, @@ -1628,6 +4368,10 @@ async function runBusyHandler(args: { workspacePermissionsBaseDir: args.workspacePermissionsBaseDir, sessionPermissions: args.sessionPermissions, sessionPermissionsBaseDir: args.sessionPermissionsBaseDir, + runDeadlineMs: args.runDeadlineMs, + workerSandbox: args.workerSandbox, + signal: args.signal, + onTool: args.onTool, }); const elapsedMs = Math.floor(args.elapsedMs); let message: string | undefined; @@ -1643,7 +4387,7 @@ async function runBusyHandler(args: { } } if (!message) return []; - if (args.onStreamText) { + if (args.onStreamText && !args.signal?.aborted) { args.onStreamText(`${message}\n`); } else { logger.log(message); @@ -1677,6 +4421,10 @@ function createIdleController(args: { workspacePermissionsBaseDir?: string; sessionPermissions?: PermissionDeclarationInput; sessionPermissionsBaseDir?: string; + runDeadlineMs: number; + workerSandbox: boolean; + signal?: AbortSignal; + onTool?: RunOptions["onTool"]; }): IdleController { if (!args.cfg?.path) { return { @@ -1728,6 +4476,10 @@ function createIdleController(args: { workspacePermissionsBaseDir: args.workspacePermissionsBaseDir, sessionPermissions: args.sessionPermissions, sessionPermissionsBaseDir: args.sessionPermissionsBaseDir, + runDeadlineMs: args.runDeadlineMs, + workerSandbox: args.workerSandbox, + signal: args.signal, + onTool: args.onTool, }); if (envelope.length) args.pushMessages(envelope.map(sanitizeMessage)); } catch { @@ -1783,8 +4535,13 @@ async function runIdleHandler(args: { workspacePermissionsBaseDir?: string; sessionPermissions?: PermissionDeclarationInput; sessionPermissionsBaseDir?: string; + runDeadlineMs: number; + workerSandbox: boolean; + signal?: AbortSignal; + onTool?: RunOptions["onTool"]; }): Promise> { try { + ensureRunActive(args.runDeadlineMs, args.signal); const input = { kind: "idle", label: args.deck.label, @@ -1816,6 +4573,10 @@ async function runIdleHandler(args: { workspacePermissionsBaseDir: args.workspacePermissionsBaseDir, sessionPermissions: args.sessionPermissions, sessionPermissionsBaseDir: args.sessionPermissionsBaseDir, + runDeadlineMs: args.runDeadlineMs, + workerSandbox: args.workerSandbox, + signal: args.signal, + onTool: args.onTool, }); const elapsedMs = Math.floor(args.elapsedMs); let message: string | undefined; @@ -1831,7 +4592,7 @@ async function runIdleHandler(args: { } } if (!message) return []; - if (args.onStreamText) { + if (args.onStreamText && !args.signal?.aborted) { args.onStreamText(`${message}\n`); } else { logger.log(message); @@ -1866,11 +4627,16 @@ async function maybeHandleError(args: { workspacePermissionsBaseDir?: string; sessionPermissions?: PermissionDeclarationInput; sessionPermissionsBaseDir?: string; + runDeadlineMs: number; + workerSandbox: boolean; + signal?: AbortSignal; + onTool?: RunOptions["onTool"]; }; action: { name: string; path: string; label?: string; description?: string }; }): Promise { const handlerPath = args.ctx.parentDeck.handlers?.onError?.path; if (!handlerPath) return undefined; + ensureRunActive(args.ctx.runDeadlineMs, args.ctx.signal); const message = args.err instanceof Error ? args.err.message @@ -1909,6 +4675,10 @@ async function maybeHandleError(args: { workspacePermissionsBaseDir: args.ctx.workspacePermissionsBaseDir, sessionPermissions: args.ctx.sessionPermissions, sessionPermissionsBaseDir: args.ctx.sessionPermissionsBaseDir, + runDeadlineMs: args.ctx.runDeadlineMs, + workerSandbox: args.ctx.workerSandbox, + signal: args.ctx.signal, + onTool: args.ctx.onTool, }); const parsed = typeof handlerOutput === "object" && handlerOutput !== null @@ -2037,8 +4807,195 @@ function sanitizeMessage(msg: ModelMessage): ModelMessage { return { ...msg, tool_calls: toolCalls }; } -async function buildToolDefs(deck: LoadedDeck): Promise> { +function toStringArray(value: unknown): Array { + if (!Array.isArray(value)) return []; + return value.filter((entry): entry is string => typeof entry === "string"); +} + +function resolveToolPath(baseDir: string, rawPath: unknown): string { + if (typeof rawPath !== "string" || rawPath.trim().length === 0) { + throw new Error("path is required"); + } + return path.resolve(baseDir, rawPath); +} + +function parseLineRange(args: Record): { + startLine: number; + endLine: number; +} { + const startLine = Number.isInteger(args.start_line) + ? Math.max(1, Number(args.start_line)) + : 1; + const endLine = Number.isInteger(args.end_line) + ? Math.max(startLine, Number(args.end_line)) + : startLine + 399; + return { startLine, endLine }; +} + +function parseToolLimit(value: unknown, fallback: number, max: number): number { + if (!Number.isInteger(value)) return fallback; + return Math.min(max, Math.max(1, Number(value))); +} + +function hasAnyScope(scope: { all: boolean; values: Set }): boolean { + return scope.all || scope.values.size > 0; +} + +function hasAnyRunScope( + scope: { all: boolean; paths: Set; commands: Set }, +): boolean { + return scope.all || scope.paths.size > 0 || scope.commands.size > 0; +} + +function isBuiltinTool(name: string): boolean { + return BUILTIN_TOOL_NAMES.has(name); +} + +function applySimplePatch( + content: string, + edits: Array<{ oldText: string; newText: string; replaceAll?: boolean }>, +): { next: string; applied: number } { + let next = content; + let applied = 0; + for (const edit of edits) { + const oldText = edit.oldText ?? ""; + const newText = edit.newText ?? ""; + if (!oldText) continue; + if (edit.replaceAll) { + if (!next.includes(oldText)) continue; + next = next.split(oldText).join(newText); + applied++; + continue; + } + const idx = next.indexOf(oldText); + if (idx === -1) continue; + next = `${next.slice(0, idx)}${newText}${next.slice(idx + oldText.length)}`; + applied++; + } + return { next, applied }; +} + +async function buildToolDefs( + deck: LoadedDeck, + permissions: NormalizedPermissionSet, +): Promise> { const defs: Array = []; + const addBuiltinTools = () => { + if (hasAnyScope(permissions.read)) { + defs.push( + { + type: "function", + function: { + name: BUILTIN_TOOL_READ_FILE, + description: "Read a UTF-8 text file.", + parameters: { + type: "object", + properties: { + path: { type: "string" }, + start_line: { type: "number" }, + end_line: { type: "number" }, + }, + required: ["path"], + additionalProperties: false, + }, + }, + }, + { + type: "function", + function: { + name: BUILTIN_TOOL_LIST_DIR, + description: "List directory entries.", + parameters: { + type: "object", + properties: { + path: { type: "string" }, + recursive: { type: "boolean" }, + max_entries: { type: "number" }, + }, + required: ["path"], + additionalProperties: false, + }, + }, + }, + { + type: "function", + function: { + name: BUILTIN_TOOL_GREP_FILES, + description: "Search text files using a regular expression.", + parameters: { + type: "object", + properties: { + path: { type: "string" }, + query: { type: "string" }, + max_matches: { type: "number" }, + }, + required: ["path", "query"], + additionalProperties: false, + }, + }, + }, + ); + } + + if (hasAnyScope(permissions.write)) { + defs.push({ + type: "function", + function: { + name: BUILTIN_TOOL_APPLY_PATCH, + description: + "Apply text replacements to a file using old/new edit pairs.", + parameters: { + type: "object", + properties: { + path: { type: "string" }, + create_if_missing: { type: "boolean" }, + edits: { + type: "array", + items: { + type: "object", + properties: { + old_text: { type: "string" }, + new_text: { type: "string" }, + replace_all: { type: "boolean" }, + }, + required: ["old_text", "new_text"], + additionalProperties: false, + }, + }, + }, + required: ["path", "edits"], + additionalProperties: false, + }, + }, + }); + } + + if (hasAnyRunScope(permissions.run)) { + defs.push({ + type: "function", + function: { + name: BUILTIN_TOOL_EXEC, + description: "Run an allowed command with optional args.", + parameters: { + type: "object", + properties: { + command: { type: "string" }, + args: { + type: "array", + items: { type: "string" }, + }, + cwd: { type: "string" }, + timeout_ms: { type: "number" }, + }, + required: ["command"], + additionalProperties: false, + }, + }, + }); + } + }; + + addBuiltinTools(); if (deck.allowEnd) { defs.push({ type: "function", @@ -2080,9 +5037,17 @@ async function buildToolDefs(deck: LoadedDeck): Promise> { }); } for (const action of deck.actionDecks) { - const child = await loadDeck(action.path, deck.path); - ensureSchemaPresence(child, false); - const schema = resolveContextSchema(child)!; + if (isBuiltinTool(action.name)) { + throw new Error( + `Action name ${action.name} conflicts with a built-in tool name`, + ); + } + let schema = action.contextSchema; + if (!schema) { + const child = await loadDeck(action.path, deck.path); + ensureSchemaPresence(child, false); + schema = resolveContextSchema(child)!; + } const params = toJsonSchema(schema as never); defs.push({ type: "function", @@ -2093,5 +5058,24 @@ async function buildToolDefs(deck: LoadedDeck): Promise> { }, }); } + const actionNames = new Set(deck.actionDecks.map((action) => action.name)); + for (const external of deck.tools) { + if (actionNames.has(external.name)) continue; + if (isBuiltinTool(external.name)) { + throw new Error( + `External tool name ${external.name} conflicts with a built-in tool name`, + ); + } + defs.push({ + type: "function", + function: { + name: external.name, + description: external.description, + parameters: external.inputSchema + ? toJsonSchema(external.inputSchema as never) + : { type: "object", additionalProperties: true }, + }, + }); + } return defs; } diff --git a/packages/gambit-core/src/runtime_orchestration_worker.ts b/packages/gambit-core/src/runtime_orchestration_worker.ts new file mode 100644 index 000000000..b79d60b17 --- /dev/null +++ b/packages/gambit-core/src/runtime_orchestration_worker.ts @@ -0,0 +1,416 @@ +import { runDeck } from "./runtime.ts"; +import type { SavedState } from "./state.ts"; +import type { NormalizedPermissionSet } from "./permissions.ts"; +import type { + CreateResponseResponse, + Guardrails, + ModelMessage, + ModelProvider, + ProviderTraceEvent, + ResponseEvent, + TraceEvent, +} from "./types.ts"; + +type WireScope = true | false | Array; +type WireRunScope = true | false | { + paths: Array; + commands: Array; +}; +type WirePermissionSet = { + baseDir: string; + read: WireScope; + write: WireScope; + run: WireRunScope; + net: WireScope; + env: WireScope; +}; + +type RunStartMessage = { + type: "run.start"; + bridgeSession: string; + completionNonce: string; + options: { + path: string; + input: unknown; + inputProvided?: boolean; + initialUserMessage?: unknown; + isRoot?: boolean; + guardrails?: Partial; + depth?: number; + parentActionCallId?: string; + runId: string; + defaultModel?: string; + modelOverride?: string; + stream?: boolean; + state?: SavedState; + responsesMode?: boolean; + allowRootStringInput?: boolean; + runDeadlineMs: number; + }; + permissionCeiling: WirePermissionSet; +}; + +type ModelChatResultMessage = { + type: "model.chat.result"; + requestId: string; + result: { + message: ModelMessage; + finishReason: "stop" | "tool_calls" | "length"; + toolCalls?: Array<{ + id: string; + name: string; + args: Record; + }>; + updatedState?: SavedState; + usage?: { + promptTokens: number; + completionTokens: number; + totalTokens: number; + }; + }; +}; + +type ModelResponsesResultMessage = { + type: "model.responses.result"; + requestId: string; + result: CreateResponseResponse; +}; + +type ModelResolveResultMessage = { + type: "model.resolveModel.result"; + requestId: string; + result: { + model: string; + params?: Record; + }; +}; + +type ModelStreamMessage = { + type: "model.chat.stream"; + requestId: string; + chunk: string; +}; + +type ModelResponsesEventMessage = { + type: "model.responses.event"; + requestId: string; + event: ResponseEvent; +}; + +type ModelTraceMessage = + | { + type: "model.chat.trace"; + requestId: string; + event: ProviderTraceEvent; + } + | { + type: "model.responses.trace"; + requestId: string; + event: ProviderTraceEvent; + }; + +type ModelErrorMessage = { + type: + | "model.chat.error" + | "model.responses.error" + | "model.resolveModel.error"; + requestId: string; + error: { + source?: string; + name?: string; + message: string; + code?: unknown; + }; +}; + +type ParentMessage = + | RunStartMessage + | ModelChatResultMessage + | ModelResponsesResultMessage + | ModelResolveResultMessage + | ModelStreamMessage + | ModelResponsesEventMessage + | ModelTraceMessage + | ModelErrorMessage; + +type PendingRequest = { + kind: "chat" | "responses" | "resolveModel"; + resolve: (value: unknown) => void; + reject: (error: unknown) => void; + onStreamText?: (chunk: string) => void; + onStreamEvent?: (event: ResponseEvent) => void; + onTraceEvent?: (event: ProviderTraceEvent) => void; +}; + +const pending = new Map(); +let activeBridgeSession: string | undefined; +let activeCompletionNonce: string | undefined; +let runInFlight = false; +const bridgePostMessage = self.postMessage.bind(self); + +function postBridgeMessage(message: Record) { + if (!activeBridgeSession) { + throw new Error("Orchestration bridge session not established"); + } + bridgePostMessage({ ...message, bridgeSession: activeBridgeSession }); +} + +function randomId(prefix: string) { + const suffix = crypto.randomUUID().replace(/-/g, "").slice(0, 24); + return `${prefix}-${suffix}`; +} + +function wireScopeToNormalized( + scope: WireScope, +): { all: boolean; values: Set } { + if (scope === true) return { all: true, values: new Set() }; + if (scope === false) return { all: false, values: new Set() }; + return { all: false, values: new Set(scope) }; +} + +function wireRunToNormalized( + scope: WireRunScope, +): { all: boolean; paths: Set; commands: Set } { + if (scope === true) { + return { + all: true, + paths: new Set(), + commands: new Set(), + }; + } + if (scope === false) { + return { + all: false, + paths: new Set(), + commands: new Set(), + }; + } + return { + all: false, + paths: new Set(scope.paths), + commands: new Set(scope.commands), + }; +} + +function fromWirePermissionSet( + set: WirePermissionSet, +): NormalizedPermissionSet { + return { + baseDir: set.baseDir, + read: wireScopeToNormalized(set.read), + write: wireScopeToNormalized(set.write), + run: wireRunToNormalized(set.run), + net: wireScopeToNormalized(set.net), + env: wireScopeToNormalized(set.env), + }; +} + +function workerErrorPayload(err: unknown) { + return { + source: "worker", + name: err instanceof Error ? err.name : undefined, + message: err instanceof Error ? err.message : String(err), + code: (err as { code?: unknown })?.code, + }; +} + +const requestModelProvider: ModelProvider = { + chat(input) { + const requestId = randomId("model-chat"); + const { + onStreamText, + onStreamEvent: _onStreamEvent, + onTraceEvent, + ...wireInput + } = input; + return new Promise>>( + (resolve, reject) => { + pending.set(requestId, { + kind: "chat", + resolve: (value) => + resolve(value as Awaited>), + reject: (error) => reject(error), + onStreamText, + onTraceEvent, + }); + postBridgeMessage({ + type: "model.chat.request", + requestId, + input: wireInput, + }); + }, + ); + }, + responses(input) { + const requestId = randomId("model-responses"); + const { onStreamEvent, onTraceEvent, ...wireInput } = input; + return new Promise((resolve, reject) => { + pending.set(requestId, { + kind: "responses", + resolve: (value) => resolve(value as CreateResponseResponse), + reject: (error) => reject(error), + onStreamEvent, + onTraceEvent, + }); + postBridgeMessage({ + type: "model.responses.request", + requestId, + input: wireInput, + }); + }); + }, + resolveModel(input) { + const requestId = randomId("model-resolve"); + return new Promise<{ model: string; params?: Record }>( + (resolve, reject) => { + pending.set(requestId, { + kind: "resolveModel", + resolve: (value) => + resolve( + value as { model: string; params?: Record }, + ), + reject: (error) => reject(error), + }); + postBridgeMessage({ + type: "model.resolveModel.request", + requestId, + input, + }); + }, + ); + }, +}; + +async function runOrchestration(msg: RunStartMessage): Promise { + return await runDeck({ + path: msg.options.path, + input: msg.options.input, + inputProvided: msg.options.inputProvided, + initialUserMessage: msg.options.initialUserMessage, + modelProvider: requestModelProvider, + isRoot: msg.options.isRoot, + guardrails: msg.options.guardrails, + depth: msg.options.depth, + parentActionCallId: msg.options.parentActionCallId, + runId: msg.options.runId, + defaultModel: msg.options.defaultModel, + modelOverride: msg.options.modelOverride, + trace: (event: TraceEvent) => { + postBridgeMessage({ type: "trace.event", event }); + }, + stream: msg.options.stream, + state: msg.options.state, + onStateUpdate: (state: SavedState) => { + postBridgeMessage({ type: "state.update", state }); + }, + onStreamText: (chunk: string) => { + postBridgeMessage({ type: "stream.text", chunk }); + }, + allowRootStringInput: msg.options.allowRootStringInput, + responsesMode: msg.options.responsesMode, + parentPermissions: fromWirePermissionSet(msg.permissionCeiling), + runDeadlineMs: msg.options.runDeadlineMs, + // Keep sandboxing enabled for nested runs so child compute decks are + // executed with narrowed OS permissions derived from effective ceilings. + workerSandbox: true, + inOrchestrationWorker: true, + }); +} + +self.addEventListener("message", (event: MessageEvent) => { + const data = event.data; + if (!data || typeof data !== "object") return; + + if (data.type === "run.start") { + if (runInFlight) return; + if (typeof data.bridgeSession !== "string" || !data.bridgeSession) return; + if (typeof data.completionNonce !== "string" || !data.completionNonce) { + return; + } + activeBridgeSession = data.bridgeSession; + activeCompletionNonce = data.completionNonce; + runInFlight = true; + runOrchestration(data).then( + (result) => { + postBridgeMessage({ + type: "run.result", + result, + completionNonce: activeCompletionNonce, + }); + runInFlight = false; + }, + (err) => { + postBridgeMessage({ + type: "run.error", + error: workerErrorPayload(err), + completionNonce: activeCompletionNonce, + }); + runInFlight = false; + }, + ); + return; + } + + if (data.type === "model.chat.stream") { + const req = pending.get(data.requestId); + if (!req || req.kind !== "chat") return; + req.onStreamText?.(data.chunk); + return; + } + + if (data.type === "model.responses.event") { + const req = pending.get(data.requestId); + if (!req || req.kind !== "responses") return; + req.onStreamEvent?.(data.event); + return; + } + + if (data.type === "model.chat.trace") { + const req = pending.get(data.requestId); + if (!req || req.kind !== "chat") return; + req.onTraceEvent?.(data.event); + return; + } + + if (data.type === "model.responses.trace") { + const req = pending.get(data.requestId); + if (!req || req.kind !== "responses") return; + req.onTraceEvent?.(data.event); + return; + } + + if (data.type === "model.chat.result") { + const req = pending.get(data.requestId); + if (!req || req.kind !== "chat") return; + pending.delete(data.requestId); + req.resolve(data.result); + return; + } + + if (data.type === "model.responses.result") { + const req = pending.get(data.requestId); + if (!req || req.kind !== "responses") return; + pending.delete(data.requestId); + req.resolve(data.result); + return; + } + + if (data.type === "model.resolveModel.result") { + const req = pending.get(data.requestId); + if (!req || req.kind !== "resolveModel") return; + pending.delete(data.requestId); + req.resolve(data.result); + return; + } + + if ( + data.type === "model.chat.error" || data.type === "model.responses.error" || + data.type === "model.resolveModel.error" + ) { + const req = pending.get(data.requestId); + if (!req) return; + pending.delete(data.requestId); + req.reject(new Error(data.error.message)); + return; + } +}); diff --git a/packages/gambit-core/src/runtime_worker.ts b/packages/gambit-core/src/runtime_worker.ts new file mode 100644 index 000000000..4a4f8e24b --- /dev/null +++ b/packages/gambit-core/src/runtime_worker.ts @@ -0,0 +1,397 @@ +import * as path from "@std/path"; +import { loadDeck } from "./loader.ts"; +import type { SavedState } from "./state.ts"; +import type { PermissionDeclarationInput } from "./permissions.ts"; +import { assertZodSchema, validateWithSchema } from "./schema.ts"; +import type { ExecutionContext, Guardrails, LoadedDeck } from "./types.ts"; + +type WireScope = true | false | Array; +type WireRunScope = true | false | { + paths: Array; + commands: Array; +}; +type WirePermissionSet = { + baseDir: string; + read: WireScope; + write: WireScope; + run: WireRunScope; + net: WireScope; + env: WireScope; +}; + +type RunStartMessage = { + type: "run.start"; + bridgeSession: string; + completionNonce: string; + runId: string; + actionCallId: string; + deckPath: string; + input: unknown; + state?: SavedState; + initialUserMessage?: unknown; + depth: number; + parentActionCallId?: string; + permissions: WirePermissionSet; + workspacePermissions?: PermissionDeclarationInput; + workspacePermissionsBaseDir?: string; + sessionPermissions?: PermissionDeclarationInput; + sessionPermissionsBaseDir?: string; + runDeadlineMs: number; + isRoot: boolean; + allowRootStringInput: boolean; +}; + +type DeckInspectStartMessage = { + type: "deck.inspect"; + bridgeSession: string; + deckPath: string; +}; + +type WorkerDeckInspection = { + deckPath: string; + hasModelParams: boolean; + permissions?: PermissionDeclarationInput; + guardrails?: Partial; +}; + +type SpawnResultMessage = { + type: "spawn.result"; + requestId: string; + result: unknown; +}; + +type SpawnErrorMessage = { + type: "spawn.error"; + requestId: string; + error: { + source?: string; + name?: string; + message: string; + code?: unknown; + }; +}; + +type ParentMessage = + | RunStartMessage + | DeckInspectStartMessage + | SpawnResultMessage + | SpawnErrorMessage; + +const logger = console; + +function randomId(prefix: string) { + const suffix = crypto.randomUUID().replace(/-/g, "").slice(0, 24); + return `${prefix}-${suffix}`; +} + +function ensureNotExpired(deadlineMs: number) { + if (performance.now() > deadlineMs) { + throw new Error("Timeout exceeded"); + } +} + +function workerErrorPayload(err: unknown) { + return { + source: "worker", + name: err instanceof Error ? err.name : undefined, + message: err instanceof Error ? err.message : String(err), + code: (err as { code?: unknown })?.code, + }; +} + +function resolveContextSchema(deck: LoadedDeck) { + return deck.contextSchema ?? deck.inputSchema; +} + +function resolveResponseSchema(deck: LoadedDeck) { + return deck.responseSchema ?? deck.outputSchema; +} + +function ensureSchemaPresence(deck: LoadedDeck, isRoot: boolean) { + if (!isRoot) { + const contextSchema = resolveContextSchema(deck); + const responseSchema = resolveResponseSchema(deck); + if (!contextSchema || !responseSchema) { + throw new Error( + `Deck ${deck.path} must declare contextSchema and responseSchema (non-root)`, + ); + } + assertZodSchema(contextSchema, "contextSchema"); + assertZodSchema(responseSchema, "responseSchema"); + } +} + +function validateInput( + deck: LoadedDeck, + input: unknown, + isRoot: boolean, + allowRootStringInput: boolean, +) { + const contextSchema = resolveContextSchema(deck); + if (contextSchema) { + if (isRoot && typeof input === "string" && allowRootStringInput) { + try { + return validateWithSchema(contextSchema as never, input); + } catch { + return input; + } + } + return validateWithSchema(contextSchema as never, input); + } + if (isRoot) { + if (input === undefined) return ""; + if (typeof input === "string") return input; + return input; + } + throw new Error(`Deck ${deck.path} requires contextSchema (non-root)`); +} + +function validateOutput( + deck: LoadedDeck, + output: unknown, + isRoot: boolean, +): unknown { + const responseSchema = resolveResponseSchema(deck); + if (responseSchema) { + return validateWithSchema(responseSchema as never, output); + } + if (isRoot) { + if (typeof output === "string") return output; + return JSON.stringify(output); + } + throw new Error(`Deck ${deck.path} requires responseSchema (non-root)`); +} + +type PendingRequest = { + resolve: (value: unknown) => void; + reject: (error: unknown) => void; +}; + +const pending = new Map(); +let activeBridgeSession: string | undefined; +let activeCompletionNonce: string | undefined; +let runInFlight = false; +let inspectInFlight = false; +const bridgePostMessage = self.postMessage.bind(self); + +function postBridgeMessage(message: Record) { + if (!activeBridgeSession) { + throw new Error("Worker bridge session not established"); + } + bridgePostMessage({ ...message, bridgeSession: activeBridgeSession }); +} + +async function inspectDeck(deckPath: string): Promise { + const deck = await loadDeck(deckPath); + return { + deckPath: deck.path, + hasModelParams: Boolean( + deck.modelParams?.model || deck.modelParams?.temperature !== undefined, + ), + permissions: deck.permissions, + guardrails: deck.guardrails, + }; +} + +async function runCompute(msg: RunStartMessage) { + ensureNotExpired(msg.runDeadlineMs); + const deck = await loadDeck(msg.deckPath); + ensureSchemaPresence(deck, msg.isRoot); + const validatedInput = validateInput( + deck, + msg.input, + msg.isRoot, + msg.allowRootStringInput, + ); + + if (!deck.executor) { + throw new Error( + `Deck ${deck.path} has no model and no executor (add run or execute to the deck definition)`, + ); + } + + let computeState = msg.state + ? { + ...msg.state, + messages: Array.isArray(msg.state.messages) + ? msg.state.messages.map((entry) => ({ + ...entry, + content: entry.content ?? null, + })) + : [], + meta: msg.state.meta ? { ...msg.state.meta } : undefined, + messageRefs: Array.isArray(msg.state.messageRefs) + ? [...msg.state.messageRefs] + : undefined, + } + : undefined; + + const ensureComputeState = (): SavedState => { + if (computeState) return computeState; + computeState = { + runId: msg.runId, + messages: [], + meta: {}, + messageRefs: [], + }; + return computeState; + }; + + const publishComputeState = () => { + if (!computeState) return; + postBridgeMessage({ type: "state.update", state: computeState }); + }; + + const execContext: ExecutionContext = { + runId: msg.runId, + actionCallId: msg.actionCallId, + parentActionCallId: msg.parentActionCallId, + depth: msg.depth, + input: validatedInput, + initialUserMessage: msg.initialUserMessage, + getSessionMeta: (key: string): T | undefined => { + if (!key) return undefined; + return computeState?.meta?.[key] as T | undefined; + }, + setSessionMeta: (key, value) => { + if (!key) return; + const state = ensureComputeState(); + const nextMeta = { ...(state.meta ?? {}) }; + if (value === undefined) { + delete nextMeta[key]; + } else { + nextMeta[key] = value; + } + state.meta = nextMeta; + publishComputeState(); + }, + appendMessage: (message) => { + const role = message.role; + const content = String(message.content ?? "").trim(); + if ((role !== "user" && role !== "assistant") || !content) return; + const state = ensureComputeState(); + state.messages = [...(state.messages ?? []), { role, content }]; + const refs = Array.isArray(state.messageRefs) + ? [...state.messageRefs] + : []; + refs.push({ id: randomId("msg"), role }); + state.messageRefs = refs; + publishComputeState(); + }, + label: deck.label, + log: (entry) => { + postBridgeMessage({ type: "log.entry", entry }); + }, + spawnAndWait: async (opts) => { + ensureNotExpired(msg.runDeadlineMs); + const childPath = path.isAbsolute(opts.path) + ? opts.path + : path.resolve(path.dirname(deck.path), opts.path); + const requestId = randomId("spawn"); + const childPromise = new Promise((resolve, reject) => { + pending.set(requestId, { resolve, reject }); + }); + postBridgeMessage({ + type: "spawn.request", + requestId, + payload: { + path: childPath, + input: opts.input, + initialUserMessage: Object.hasOwn(opts, "initialUserMessage") + ? opts.initialUserMessage + : msg.initialUserMessage, + parentActionCallId: msg.actionCallId, + parentPermissionsBaseDir: msg.permissions.baseDir, + parentPermissions: msg.permissions, + workspacePermissions: msg.workspacePermissions, + workspacePermissionsBaseDir: msg.workspacePermissionsBaseDir, + sessionPermissions: msg.sessionPermissions, + sessionPermissionsBaseDir: msg.sessionPermissionsBaseDir, + runDeadlineMs: msg.runDeadlineMs, + }, + }); + const result = await childPromise; + ensureNotExpired(msg.runDeadlineMs); + return result; + }, + fail: (opts) => { + throw new Error(opts.message); + }, + return: (payload) => Promise.resolve(payload), + }; + + const raw = await deck.executor(execContext); + ensureNotExpired(msg.runDeadlineMs); + return validateOutput(deck, raw, msg.isRoot); +} + +self.addEventListener("message", (event: MessageEvent) => { + const data = event.data; + if (!data || typeof data !== "object") return; + + if (data.type === "spawn.result") { + const pendingRequest = pending.get(data.requestId); + if (!pendingRequest) return; + pending.delete(data.requestId); + pendingRequest.resolve(data.result); + return; + } + + if (data.type === "spawn.error") { + const pendingRequest = pending.get(data.requestId); + if (!pendingRequest) return; + pending.delete(data.requestId); + pendingRequest.reject(new Error(data.error.message)); + return; + } + + if (data.type === "deck.inspect") { + if (inspectInFlight) return; + if (typeof data.bridgeSession !== "string" || !data.bridgeSession) return; + activeBridgeSession = data.bridgeSession; + inspectInFlight = true; + inspectDeck(data.deckPath).then( + (result) => { + postBridgeMessage({ type: "deck.inspect.result", result }); + inspectInFlight = false; + }, + (err) => { + logger.error("[gambit-worker] deck inspection failed", err); + postBridgeMessage({ + type: "deck.inspect.error", + error: workerErrorPayload(err), + }); + inspectInFlight = false; + }, + ); + return; + } + + if (data.type !== "run.start") return; + if (runInFlight) return; + if (typeof data.bridgeSession !== "string" || !data.bridgeSession) return; + if (typeof data.completionNonce !== "string" || !data.completionNonce) return; + activeBridgeSession = data.bridgeSession; + activeCompletionNonce = data.completionNonce; + runInFlight = true; + + runCompute(data).then( + (result) => { + postBridgeMessage({ + type: "run.result", + result, + completionNonce: activeCompletionNonce, + }); + runInFlight = false; + }, + (err) => { + logger.error("[gambit-worker] compute execution failed", err); + postBridgeMessage({ + type: "run.error", + error: workerErrorPayload(err), + completionNonce: activeCompletionNonce, + }); + runInFlight = false; + }, + ); +}); diff --git a/packages/gambit-core/src/state.ts b/packages/gambit-core/src/state.ts index f3a2dbbff..522a58aeb 100644 --- a/packages/gambit-core/src/state.ts +++ b/packages/gambit-core/src/state.ts @@ -17,6 +17,7 @@ export type SavedState = { export type MessageRef = { id: string; role: ModelMessage["role"]; + source?: "scenario" | "manual"; }; export type FeedbackEntry = { diff --git a/packages/gambit-core/src/types.ts b/packages/gambit-core/src/types.ts index 614060666..f085c9576 100644 --- a/packages/gambit-core/src/types.ts +++ b/packages/gambit-core/src/types.ts @@ -31,11 +31,11 @@ export type ModelParams = { frequency_penalty?: number; presence_penalty?: number; max_tokens?: number; - /** - * Provider-specific pass-through parameters. Values must be JSON-serializable. - * Top-level supported fields take precedence when keys overlap. - */ - additionalParams?: Record; + verbosity?: "low" | "medium" | "high"; + reasoning?: { + effort?: "none" | "low" | "medium" | "high" | "xhigh"; + summary?: "concise" | "detailed" | "auto"; + }; }; export type Guardrails = { @@ -56,6 +56,15 @@ export type DeckReferenceDefinition = { export type ActionDeckDefinition = DeckReferenceDefinition & { name: string; + execute?: string; + contextSchema?: ZodTypeAny; + responseSchema?: ZodTypeAny; +}; + +export type ExternalToolDefinition = { + name: string; + description?: string; + inputSchema?: ZodTypeAny; }; export type TestDeckDefinition = DeckReferenceDefinition; @@ -116,6 +125,7 @@ export type BaseDefinition = { export type DeckDefinition = BaseDefinition & { kind: "gambit.deck"; modelParams?: ModelParams; + tools?: ReadonlyArray; handlers?: HandlersConfig; prompt?: string; // deprecated; prefer body body?: string; @@ -162,8 +172,18 @@ export type ExecutionContext = { depth: number; label?: Label; input: Input; + initialUserMessage?: unknown; + getSessionMeta: (key: string) => T | undefined; + setSessionMeta: (key: string, value: unknown) => void; + appendMessage: ( + message: { role: "user" | "assistant"; content: string }, + ) => void; log: (entry: LogEntry | string) => void; - spawnAndWait: (opts: { path: string; input: unknown }) => Promise; + spawnAndWait: (opts: { + path: string; + input: unknown; + initialUserMessage?: unknown; + }) => Promise; fail: ( opts: { message: string; code?: string; details?: JSONValue }, ) => never; @@ -199,7 +219,9 @@ export type ToolDefinition = { export type ResponseTextContent = | { type: "input_text"; text: string } - | { type: "output_text"; text: string }; + | { type: "output_text"; text: string } + | { type: "summary_text"; text: string } + | { type: "reasoning_text"; text: string }; export type ResponseMessageItem = { type: "message"; @@ -223,10 +245,19 @@ export type ResponseFunctionCallOutputItem = { id?: string; }; +export type ResponseReasoningItem = { + type: "reasoning"; + id?: string; + content?: Array; + summary: Array; + encrypted_content?: string | null; +}; + export type ResponseItem = | ResponseMessageItem | ResponseFunctionCallItem - | ResponseFunctionCallOutputItem; + | ResponseFunctionCallOutputItem + | ResponseReasoningItem; export type ResponseToolDefinition = { type: "function"; @@ -238,9 +269,37 @@ export type ResponseToolDefinition = { }; export type ResponseToolChoice = + | "none" | "auto" | "required" - | { type: "function"; function: { name: string } }; + | { type: "function"; function: { name: string } } + | { + type: "allowed_tools"; + tools: Array<{ type: "function"; name: string }>; + mode?: "none" | "auto" | "required"; + }; + +export type ResponseReasoningConfig = { + effort?: "none" | "low" | "medium" | "high" | "xhigh" | null; + summary?: "auto" | "concise" | "detailed" | null; +}; + +export type ResponseTextConfig = { + format?: + | { type: "text" } + | { type: "json_object" } + | { + type: "json_schema"; + name?: string; + description?: string | null; + schema?: JSONValue | null; + strict?: boolean; + } + | null; + verbosity?: "low" | "medium" | "high"; +}; + +export type ResponseAllowedTool = { type: "function"; name: string }; export type CreateResponseRequest = { model: string; @@ -248,9 +307,30 @@ export type CreateResponseRequest = { instructions?: string; tools?: Array; tool_choice?: ResponseToolChoice; + allowed_tools?: Array; + previous_response_id?: string; + store?: boolean; + reasoning?: ResponseReasoningConfig; + parallel_tool_calls?: boolean; + max_tool_calls?: number; + temperature?: number; + top_p?: number; + frequency_penalty?: number; + presence_penalty?: number; stream?: boolean; + stream_options?: { + include_obfuscation?: boolean; + }; + background?: boolean; max_output_tokens?: number; + top_logprobs?: number; + truncation?: "auto" | "disabled"; + text?: ResponseTextConfig; + service_tier?: "auto" | "default" | "flex" | "priority"; + include?: Array; metadata?: Record; + safety_identifier?: string; + prompt_cache_key?: string; params?: Record; }; @@ -258,51 +338,163 @@ export type ResponseUsage = { promptTokens: number; completionTokens: number; totalTokens: number; + reasoningTokens?: number; }; export type CreateResponseResponse = { id: string; object: "response"; model?: string; + created_at?: number; + completed_at?: number | null; + previous_response_id?: string | null; + instructions?: string | null; + reasoning?: ResponseReasoningConfig | null; created?: number; status?: "completed" | "in_progress" | "failed"; output: Array; + tools?: Array; + tool_choice?: ResponseToolChoice; + parallel_tool_calls?: boolean; + truncation?: "auto" | "disabled"; + text?: ResponseTextConfig; + top_p?: number; + presence_penalty?: number; + frequency_penalty?: number; + top_logprobs?: number; + temperature?: number; + max_output_tokens?: number | null; + max_tool_calls?: number | null; + store?: boolean; + background?: boolean; + service_tier?: "auto" | "default" | "flex" | "priority"; + metadata?: Record; + safety_identifier?: string | null; + prompt_cache_key?: string | null; usage?: ResponseUsage; - error?: { code?: string; message?: string }; + error?: { code?: string; message?: string } | null; + updatedState?: SavedState; }; export type ResponseEvent = - | { type: "response.created"; response: CreateResponseResponse } + | { + type: "response.created"; + response: CreateResponseResponse; + sequence_number?: number; + } + | { + type: "tool.call"; + actionCallId: string; + name: string; + args?: JSONValue; + } + | { + type: "tool.result"; + actionCallId: string; + name: string; + result?: JSONValue; + } | { type: "response.output_text.delta"; output_index: number; delta: string; item_id?: string; + content_index?: number; + sequence_number?: number; + logprobs?: Array<{ + token?: string; + logprob?: number; + }>; } | { type: "response.output_text.done"; output_index: number; text: string; item_id?: string; + content_index?: number; + sequence_number?: number; } | { type: "response.output_item.added"; output_index: number; item: ResponseItem; + sequence_number?: number; } | { type: "response.output_item.done"; output_index: number; item: ResponseItem; + sequence_number?: number; + } + | { + type: "response.reasoning.delta"; + output_index: number; + item_id: string; + content_index: number; + delta: string; + sequence_number?: number; + obfuscation?: string; + } + | { + type: "response.reasoning.done"; + output_index: number; + item_id: string; + content_index: number; + text: string; + sequence_number?: number; + } + | { + type: "response.reasoning_summary_text.delta"; + output_index: number; + item_id: string; + summary_index: number; + delta: string; + sequence_number?: number; + obfuscation?: string; + } + | { + type: "response.reasoning_summary_text.done"; + output_index: number; + item_id: string; + summary_index: number; + text: string; + sequence_number?: number; + } + | { + type: "response.reasoning_summary_part.added"; + output_index: number; + item_id: string; + summary_index: number; + part: ResponseTextContent; + sequence_number?: number; + } + | { + type: "response.reasoning_summary_part.done"; + output_index: number; + item_id: string; + summary_index: number; + part: ResponseTextContent; + sequence_number?: number; + } + | { + type: "response.completed"; + response: CreateResponseResponse; + sequence_number?: number; } - | { type: "response.completed"; response: CreateResponseResponse } - | { type: "response.failed"; error: { code?: string; message?: string } }; + | { + type: "response.failed"; + error: { code?: string; message?: string }; + sequence_number?: number; + }; export type ModelProvider = { responses?: (input: { request: CreateResponseRequest; state?: SavedState; + deckPath?: string; + signal?: AbortSignal; onStreamEvent?: (event: ResponseEvent) => void; + onTraceEvent?: (event: ProviderTraceEvent) => void; }) => Promise; resolveModel?: (input: { model: string | Array; @@ -318,7 +510,11 @@ export type ModelProvider = { tools?: Array; stream?: boolean; state?: SavedState; + deckPath?: string; + signal?: AbortSignal; onStreamText?: (chunk: string) => void; + onStreamEvent?: (event: Record) => void; + onTraceEvent?: (event: ProviderTraceEvent) => void; /** * Provider-specific pass-through parameters (e.g. OpenAI chat completion * fields like temperature/max_tokens). @@ -337,10 +533,34 @@ export type ModelProvider = { promptTokens: number; completionTokens: number; totalTokens: number; + reasoningTokens?: number; }; }>; }; +export type ProviderTraceEvent = + | TraceEvent + | ( + & Omit< + Extract, + "runId" | "parentActionCallId" + > + & { + runId?: string; + parentActionCallId?: string; + } + ) + | ( + & Omit< + Extract, + "runId" | "parentActionCallId" + > + & { + runId?: string; + parentActionCallId?: string; + } + ); + type WithDeckRefs = Omit< T, "actions" | "actionDecks" | "testDecks" | "graderDecks" @@ -369,6 +589,7 @@ export type LoadedDeck = WithDeckRefs & { actions: Array; testDecks: Array; graderDecks: Array; + tools: Array; executor?: DeckExecutor; guardrails?: Partial; inlineEmbeds?: boolean; @@ -380,6 +601,8 @@ export type ToolCallResult = { extraMessages?: Array; }; +export type ToolKind = "action" | "external" | "mcp_bridge" | "internal"; + export type TraceEvent = & { ts?: number; @@ -440,6 +663,7 @@ export type TraceEvent = actionCallId: string; name: string; args: JSONValue; + toolKind: ToolKind; parentActionCallId?: string; } | { @@ -448,6 +672,7 @@ export type TraceEvent = actionCallId: string; name: string; result: JSONValue; + toolKind: ToolKind; parentActionCallId?: string; } | { @@ -480,10 +705,25 @@ export type TraceEvent = args: JSONValue; }>; stateMessages?: number; + usage?: { + promptTokens: number; + completionTokens: number; + totalTokens: number; + reasoningTokens?: number; + }; mode?: "chat" | "responses"; responseItems?: Array; parentActionCallId?: string; } + | { + type: "model.stream.event"; + runId: string; + actionCallId: string; + deckPath?: string; + model: string; + event: Record; + parentActionCallId?: string; + } | { type: "log"; runId: string; diff --git a/scaffolds/demo/examples/advanced/agent_with_multi_actions/README.md b/scaffolds/demo/examples/advanced/agent_with_multi_actions/README.md index 071dc429f..13bc8ae57 100644 --- a/scaffolds/demo/examples/advanced/agent_with_multi_actions/README.md +++ b/scaffolds/demo/examples/advanced/agent_with_multi_actions/README.md @@ -30,7 +30,7 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./agent_with_multi_actions.deck.md --message '"translate bonjour to English"' --stream ``` -## Run a test bot (UI) +## Run a scenario (UI) 1. Open the simulator UI and go to the "Test" tab. 2. Choose the "Multi-actions test" persona. @@ -40,5 +40,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./agent_with_multi_actions.deck.md - Action decks live in `actions/decks/` and their cards live in `actions/cards/`. -- The test bot deck is `tests/agent_with_multi_actions_test.deck.md`. -- The test bot hangup card is `cards/test_bot_hangup.card.md`. +- The scenario deck is `tests/agent_with_multi_actions_test.deck.md`. +- The scenario hangup card is `cards/test_bot_hangup.card.md`. diff --git a/scaffolds/demo/examples/advanced/agent_with_multi_actions/agent_with_multi_actions.deck.md b/scaffolds/demo/examples/advanced/agent_with_multi_actions/agent_with_multi_actions.deck.md index d09f68bfe..425451941 100644 --- a/scaffolds/demo/examples/advanced/agent_with_multi_actions/agent_with_multi_actions.deck.md +++ b/scaffolds/demo/examples/advanced/agent_with_multi_actions/agent_with_multi_actions.deck.md @@ -5,7 +5,7 @@ label = "agent_with_multi_actions" model = "openai/gpt-4o-mini" temperature = 0 [[testDecks]] -label = "Multi-actions test bot" +label = "Multi-actions scenario" path = "./tests/agent_with_multi_actions_test.deck.md" description = "Synthetic user that requests a simple translation." [[graderDecks]] diff --git a/scaffolds/demo/examples/advanced/agent_with_typescript/README.md b/scaffolds/demo/examples/advanced/agent_with_typescript/README.md index c4d6be110..2624db33d 100644 --- a/scaffolds/demo/examples/advanced/agent_with_typescript/README.md +++ b/scaffolds/demo/examples/advanced/agent_with_typescript/README.md @@ -30,7 +30,7 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./agent_with_typescript.deck.md \ --message '"hi"' --stream ``` -## Run a test bot (UI) +## Run a scenario (UI) 1. Open the simulator UI and go to the "Test" tab. 2. Choose the "Typescript agent test" persona. @@ -39,5 +39,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./agent_with_typescript.deck.md \ ## Notes - The TypeScript action deck is `get_time.deck.ts`. -- The test bot deck is `tests/agent_with_typescript_test.deck.md`. -- The test bot hangup card is `cards/test_bot_hangup.card.md`. +- The scenario deck is `tests/agent_with_typescript_test.deck.md`. +- The scenario hangup card is `cards/test_bot_hangup.card.md`. diff --git a/scaffolds/demo/examples/advanced/agent_with_typescript/agent_with_typescript.deck.md b/scaffolds/demo/examples/advanced/agent_with_typescript/agent_with_typescript.deck.md index 0967cc588..a32465dd6 100644 --- a/scaffolds/demo/examples/advanced/agent_with_typescript/agent_with_typescript.deck.md +++ b/scaffolds/demo/examples/advanced/agent_with_typescript/agent_with_typescript.deck.md @@ -6,7 +6,7 @@ name = "get_time" path = "./get_time.deck.ts" description = "Return the current ISO timestamp." [[testDecks]] -label = "Typescript agent test bot" +label = "Typescript agent scenario" path = "./tests/agent_with_typescript_test.deck.md" description = "Synthetic user that asks for the current time." [[graderDecks]] diff --git a/scaffolds/demo/examples/advanced/arena_chatbot/README.md b/scaffolds/demo/examples/advanced/arena_chatbot/README.md index 7aac8bbcc..4ae1045e9 100644 --- a/scaffolds/demo/examples/advanced/arena_chatbot/README.md +++ b/scaffolds/demo/examples/advanced/arena_chatbot/README.md @@ -30,14 +30,14 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./active.deck.md \ --message '"Tell me one tip about Pikachu"' --stream ``` -## Run a test bot (UI) +## Run a scenario (UI) 1. Open the simulator UI and go to the "Test" tab. 2. Choose the "Arena challenger" persona. -3. Click "Run test bot". +3. Click "Run scenario". ## Notes - Bot variants live in `bots/`. -- The test bot deck is `tests/arena_challenger.deck.md`. -- The test bot hangup card is `cards/test_bot_hangup.card.md`. +- The scenario deck is `tests/arena_challenger.deck.md`. +- The scenario hangup card is `cards/test_bot_hangup.card.md`. diff --git a/scaffolds/demo/examples/advanced/cli_cold_emailer/README.md b/scaffolds/demo/examples/advanced/cli_cold_emailer/README.md index 9fcd302ec..8dfda1db8 100644 --- a/scaffolds/demo/examples/advanced/cli_cold_emailer/README.md +++ b/scaffolds/demo/examples/advanced/cli_cold_emailer/README.md @@ -25,5 +25,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./cold_emailer.deck.md \ ## Notes - Cards live in `cards/` and schemas live in `schemas/`. -- The test bot deck is `tests/buyer_feedback.deck.md`. -- The test bot hangup card is `cards/test_bot_hangup.card.md`. +- The scenario deck is `tests/buyer_feedback.deck.md`. +- The scenario hangup card is `cards/test_bot_hangup.card.md`. diff --git a/scaffolds/demo/examples/advanced/cli_handlers_md/README.md b/scaffolds/demo/examples/advanced/cli_handlers_md/README.md index bd7032deb..64a6f4ff3 100644 --- a/scaffolds/demo/examples/advanced/cli_handlers_md/README.md +++ b/scaffolds/demo/examples/advanced/cli_handlers_md/README.md @@ -25,5 +25,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./handlers_md.deck.md \ ## Notes - Action decks live in `actions/decks/` and handler decks live in `handlers/`. -- The test bot deck is `tests/handlers_md_test.deck.md`. -- The test bot hangup card is `cards/test_bot_hangup.card.md`. +- The scenario deck is `tests/handlers_md_test.deck.md`. +- The scenario hangup card is `cards/test_bot_hangup.card.md`. diff --git a/scaffolds/demo/examples/advanced/cli_handlers_ts/README.md b/scaffolds/demo/examples/advanced/cli_handlers_ts/README.md index 724bdff85..e6abcd0a4 100644 --- a/scaffolds/demo/examples/advanced/cli_handlers_ts/README.md +++ b/scaffolds/demo/examples/advanced/cli_handlers_ts/README.md @@ -25,5 +25,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./handlers_ts.deck.md \ ## Notes - Action decks live in `actions/decks/` and handler decks live in `handlers/`. -- The test bot deck is `tests/handlers_ts_test.deck.md`. -- The test bot hangup card is `cards/test_bot_hangup.card.md`. +- The scenario deck is `tests/handlers_ts_test.deck.md`. +- The scenario hangup card is `cards/test_bot_hangup.card.md`. diff --git a/scaffolds/demo/examples/advanced/cli_internal_monolog/README.md b/scaffolds/demo/examples/advanced/cli_internal_monolog/README.md index 6f23ec3bb..8e889a986 100644 --- a/scaffolds/demo/examples/advanced/cli_internal_monolog/README.md +++ b/scaffolds/demo/examples/advanced/cli_internal_monolog/README.md @@ -25,5 +25,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./internal_monolog_parent.deck.md \ ## Notes - The child deck is `monolog_child.deck.md` and schemas live in `schemas/`. -- The test bot deck is `tests/internal_monolog_test.deck.md`. -- The test bot hangup card is `cards/test_bot_hangup.card.md`. +- The scenario deck is `tests/internal_monolog_test.deck.md`. +- The scenario hangup card is `cards/test_bot_hangup.card.md`. diff --git a/scaffolds/demo/examples/advanced/patient_swapper/README.md b/scaffolds/demo/examples/advanced/patient_swapper/README.md index e4253dce2..8953a51ee 100644 --- a/scaffolds/demo/examples/advanced/patient_swapper/README.md +++ b/scaffolds/demo/examples/advanced/patient_swapper/README.md @@ -25,5 +25,5 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./patient_swapper.deck.md \ ## Notes - Cards live in `cards/` and schemas live in `schemas/`. -- The test bot deck is `tests/patient_swapper_test.deck.md`. -- The test bot hangup card is `cards/test_bot_hangup.card.md`. +- The scenario deck is `tests/patient_swapper_test.deck.md`. +- The scenario hangup card is `cards/test_bot_hangup.card.md`. diff --git a/scaffolds/demo/examples/advanced/pokemon_advice/root.deck.ts b/scaffolds/demo/examples/advanced/pokemon_advice/root.deck.ts index 1440d121d..d71cd6ec8 100644 --- a/scaffolds/demo/examples/advanced/pokemon_advice/root.deck.ts +++ b/scaffolds/demo/examples/advanced/pokemon_advice/root.deck.ts @@ -9,7 +9,7 @@ export default defineDeck({ modelParams: { model: "openai/gpt-4o-mini", temperature: 0.4 }, testDecks: [ { - label: "Schema form test bot", + label: "Schema form scenario", path: "./tests/schema_form_test.deck.md", description: "Synthetic caller that asks for Pokemon advice.", }, diff --git a/scaffolds/demo/examples/advanced/pokemon_advice/schema_form.deck.ts b/scaffolds/demo/examples/advanced/pokemon_advice/schema_form.deck.ts index 5d8e76b44..6c6237a44 100644 --- a/scaffolds/demo/examples/advanced/pokemon_advice/schema_form.deck.ts +++ b/scaffolds/demo/examples/advanced/pokemon_advice/schema_form.deck.ts @@ -9,7 +9,7 @@ export default defineDeck({ modelParams: { model: "openai/gpt-4o-mini", temperature: 0.4 }, testDecks: [ { - label: "Schema form test bot", + label: "Schema form scenario", path: "./tests/schema_form_test.deck.md", description: "Synthetic caller that asks for Pokemon advice.", }, diff --git a/scaffolds/demo/examples/advanced/policy_support_bot/README.md b/scaffolds/demo/examples/advanced/policy_support_bot/README.md index ca3d08a95..1caa9219f 100644 --- a/scaffolds/demo/examples/advanced/policy_support_bot/README.md +++ b/scaffolds/demo/examples/advanced/policy_support_bot/README.md @@ -21,10 +21,10 @@ includes: | `cards/*.card.md` | Persona, user persona, and behavior cards reused by the root deck. | | `schemas/*.zod.ts` | Zod schemas for bot outputs and search inputs. | | `tests/faq_dataset.test.ts` | Deno unit tests that ensure the FAQ knowledge base stays intact. | -| `tests/new_account_persona.deck.md` | Synthetic persona deck for the Test Bot tab. | +| `tests/new_account_persona.deck.md` | Synthetic persona deck for the Scenario tab. | | `demo-script.md` | Suggested prompts (answers + refusals) for the Gambit Debug UI. | -To wire synthetic QA personas into the Test Bot tab, add `[[testDecks]]` entries +To wire synthetic QA personas into the Scenario tab, add `[[testDecks]]` entries to `policy_support_bot.deck.md` that point at persona decks (for example `./tests/new_account_persona.deck.md`). Those persona decks should set `acceptsUserTurns = true` and can declare an `contextSchema` so the Scenario diff --git a/scaffolds/demo/examples/advanced/simpsons_explainer/README.md b/scaffolds/demo/examples/advanced/simpsons_explainer/README.md index b357a1a33..9d865d9e8 100644 --- a/scaffolds/demo/examples/advanced/simpsons_explainer/README.md +++ b/scaffolds/demo/examples/advanced/simpsons_explainer/README.md @@ -23,10 +23,10 @@ From this folder: deno run -A jsr:@bolt-foundry/gambit@^0.5.3-dev/cli serve demo.deck.md ``` -## Run a test bot (UI) +## Run a scenario (UI) 1. Open the simulator UI and go to the "Test" tab. -2. Click "Run test bot". +2. Click "Run scenario". 3. Switch to the "Grade" tab and click "Run grader". 4. After the grader completes, review run 2 (expect a -3 score). 5. Share the failing run details with Codex and ask it to fix the prompt. @@ -34,6 +34,6 @@ deno run -A jsr:@bolt-foundry/gambit@^0.5.3-dev/cli serve demo.deck.md ## Notes -- Test decks are registered in `cards/test_decks.card.md`. +- Scenario decks are registered in `cards/test_decks.card.md`. - Graders are registered in `cards/grader_decks.card.md`. - Instruction cards live in `cards/` and are included by `demo.deck.md`. diff --git a/scaffolds/demo/examples/advanced/simpsons_explainer/cards/test_decks.card.md b/scaffolds/demo/examples/advanced/simpsons_explainer/cards/test_decks.card.md index 6b5bc8065..8c9cc4976 100644 --- a/scaffolds/demo/examples/advanced/simpsons_explainer/cards/test_decks.card.md +++ b/scaffolds/demo/examples/advanced/simpsons_explainer/cards/test_decks.card.md @@ -4,10 +4,10 @@ label = "demo_test_decks" [[testDecks]] label = "Planets question" path = "../test_bots/planets.deck.md" -description = "Test bot that asks a basic planets-orbit question, with optional initialQuestion override." +description = "Scenario that asks a basic planets-orbit question, with optional initialQuestion override." [[testDecks]] label = "Quantum entanglement question" path = "../test_bots/quantum_entanglement.deck.md" -description = "Test bot that asks about quantum entanglement, with optional initialQuestion override." +description = "Scenario that asks about quantum entanglement, with optional initialQuestion override." +++ diff --git a/scaffolds/demo/examples/advanced/simpsons_explainer/schemas/test_bot_input.zod.ts b/scaffolds/demo/examples/advanced/simpsons_explainer/schemas/test_bot_input.zod.ts index 0bef4deae..38a9efa9c 100644 --- a/scaffolds/demo/examples/advanced/simpsons_explainer/schemas/test_bot_input.zod.ts +++ b/scaffolds/demo/examples/advanced/simpsons_explainer/schemas/test_bot_input.zod.ts @@ -2,6 +2,6 @@ import { z } from "npm:zod"; export default z.object({ initialQuestion: z.string().describe( - "Optional override for the test bot's first user question.", + "Optional override for the scenario's first user question.", ).optional(), }); diff --git a/scaffolds/demo/examples/advanced/voice_front_desk/README.md b/scaffolds/demo/examples/advanced/voice_front_desk/README.md index b3a072c92..1b1646119 100644 --- a/scaffolds/demo/examples/advanced/voice_front_desk/README.md +++ b/scaffolds/demo/examples/advanced/voice_front_desk/README.md @@ -31,14 +31,14 @@ deno run -A jsr:@bolt-foundry/gambit/cli run ./decks/root.deck.md \ --message '"Hi, this is Nina. I need to move my physical."' --stream ``` -## Run a test bot (UI) +## Run a scenario (UI) 1. Open the simulator UI and go to the "Test" tab. 2. Pick a persona from the list (for example, "New patient intake"). -3. Click "Run test bot". +3. Click "Run scenario". ## Notes - Root decks live in `decks/` and action decks live in `actions/`. - Test personas are listed in `cards/test_decks.card.md`. -- The test bot hangup card is `tests/cards/test_bot_hangup.card.md`. +- The scenario hangup card is `tests/cards/test_bot_hangup.card.md`. diff --git a/scaffolds/demo/examples/advanced/voice_front_desk/tests/faq_first_caller_input.zod.ts b/scaffolds/demo/examples/advanced/voice_front_desk/tests/faq_first_caller_input.zod.ts index b907d1a32..8bb2a36cf 100644 --- a/scaffolds/demo/examples/advanced/voice_front_desk/tests/faq_first_caller_input.zod.ts +++ b/scaffolds/demo/examples/advanced/voice_front_desk/tests/faq_first_caller_input.zod.ts @@ -2,6 +2,6 @@ import { z } from "npm:zod"; export default z.object({ scenarioDescription: z.string().describe( - "Optional instructions that describe the FAQ scenario the test bot should play out", + "Optional instructions that describe the FAQ scenario the scenario should play out", ).optional(), }); diff --git a/scaffolds/demo/examples/advanced/voice_front_desk/tests/privacy_sensitive_patient_input.zod.ts b/scaffolds/demo/examples/advanced/voice_front_desk/tests/privacy_sensitive_patient_input.zod.ts index e68b2f277..8f66e92e8 100644 --- a/scaffolds/demo/examples/advanced/voice_front_desk/tests/privacy_sensitive_patient_input.zod.ts +++ b/scaffolds/demo/examples/advanced/voice_front_desk/tests/privacy_sensitive_patient_input.zod.ts @@ -2,6 +2,6 @@ import { z } from "npm:zod"; export default z.object({ scenarioDescription: z.string().describe( - "Optional instructions that describe the scenario the test bot should play out", + "Optional instructions that describe the scenario the scenario should play out", ).optional(), }); diff --git a/scaffolds/demo/examples/faq-bot-example/faq_dataset.md b/scaffolds/demo/examples/faq-bot-example/faq_dataset.md index a01206802..1b4450d1c 100644 --- a/scaffolds/demo/examples/faq-bot-example/faq_dataset.md +++ b/scaffolds/demo/examples/faq-bot-example/faq_dataset.md @@ -20,10 +20,10 @@ criteria so you can see what passes and fails. An action deck is a callable tool that performs a task or fetches data and returns structured output. -## What is a test deck? +## What is a scenario deck? -A test deck simulates a user or scenario so you can run repeatable tests against -a deck. +A scenario deck simulates a user or scenario so you can run repeatable tests +against a deck. ## What is a card? @@ -40,8 +40,8 @@ Use the Gambit CLI to run a deck from the command line. ## What modules ship in Gambit? -- Deck editor for building root/action/test decks. +- Deck editor for building root/action/scenario decks. - Debug UI for running conversations and inspecting traces. -- Test bot panel for scripted personas and graders. +- Scenario panel for scripted personas and graders. - Coverage dashboard for reviewing grader outcomes. - Bundle/export tooling to package decks for deployment. diff --git a/scaffolds/demo/hello.deck.md b/scaffolds/demo/hello.deck.md index 700f69eb0..166603614 100644 --- a/scaffolds/demo/hello.deck.md +++ b/scaffolds/demo/hello.deck.md @@ -2,7 +2,7 @@ label = "hello" modelParams = { model = "openai/gpt-4o-mini", temperature = 0 } [[testDecks]] -label = "Hello test bot" +label = "Hello scenario" path = "./hello.test.deck.md" description = "Synthetic user that sends a single greeting input." [[graderDecks]] diff --git a/scaffolds/init/README.md b/scaffolds/init/README.md index fc66ea749..56a924d41 100644 --- a/scaffolds/init/README.md +++ b/scaffolds/init/README.md @@ -31,7 +31,7 @@ workspace with opinionated folders, ready for your own decks/actions/graders. - `decks/` – root decks (a starter `root.deck.md` is included for you to edit). - `actions/` – reusable tool/action decks or cards. - `graders/` – guard rails and grading decks. -- `tests/` – synthetic personas/test bots. +- `tests/` – synthetic personas/scenarios. - `schemas/` – Zod schemas shared across decks/tests. - `.gambit/` – local sessions/traces (safe to clear, usually ignored by git). - `gambit.toml` – workspace configuration (folders + model aliases). diff --git a/scaffolds/init/package.json b/scaffolds/init/package.json index f86685dd8..b22e8d315 100644 --- a/scaffolds/init/package.json +++ b/scaffolds/init/package.json @@ -4,7 +4,7 @@ "scripts": { "repl": "npx @bolt-foundry/gambit repl", "serve": "npx @bolt-foundry/gambit serve", - "test": "npx @bolt-foundry/gambit test-bot" + "test": "npx @bolt-foundry/gambit scenario" }, "dependencies": {} } diff --git a/scaffolds/init/schemas/README.md b/scaffolds/init/schemas/README.md index ce51eec94..1780b702c 100644 --- a/scaffolds/init/schemas/README.md +++ b/scaffolds/init/schemas/README.md @@ -1,7 +1,7 @@ # Schemas Keep shared Zod/TypeScript schemas here. Schemas provide typed inputs/outputs -for decks, actions, graders, and test bots so runs remain predictable. +for decks, actions, graders, and scenarios so runs remain predictable. Ideas: diff --git a/scaffolds/init/tests/README.md b/scaffolds/init/tests/README.md index 2f8ba5b57..e4d296430 100644 --- a/scaffolds/init/tests/README.md +++ b/scaffolds/init/tests/README.md @@ -1,7 +1,7 @@ # Tests -Drop synthetic personas, test bots, or scripted scenarios here. Use them with -`gambit test-bot` to simulate user conversations and verify decks without real +Drop synthetic personas or scripted scenarios here. Use them with +`gambit scenario` to simulate user conversations and verify decks without real users. Typical flow: @@ -9,5 +9,5 @@ Typical flow: 1. Write a persona file (e.g. `tests/new_patient.deck.md`) that exercises a deck. 2. Run - `npx @bolt-foundry/gambit test-bot decks/ --test-deck tests/`. + `npx @bolt-foundry/gambit scenario decks/ --test-deck tests/`. 3. Capture regressions before shipping changes. diff --git a/simulator-ui/demo/gambit-build-tab-demo-timeline.ts b/simulator-ui/demo/gambit-build-tab-demo-timeline.ts index 192b8555c..2e8f0f8d2 100644 --- a/simulator-ui/demo/gambit-build-tab-demo-timeline.ts +++ b/simulator-ui/demo/gambit-build-tab-demo-timeline.ts @@ -98,22 +98,13 @@ export function buildTabDemoTimeline(opts: { { type: "screenshot", label: "03-build-file-policy" }, { type: "wait-for", - selector: '[data-testid="build-changes-count"]', - text: /[1-9]/, + selector: '[data-testid="build-chat-input"]:not([disabled])', timeoutMs: 120_000, }, { type: "wait", ms: 500 }, { type: "screenshot", label: "02-build-start" }, ); - const beatReviewChanges: DemoTimelineStep[] = [ - { type: "click", selector: ".build-recent-changes-trigger" }, - { type: "wait-for", selector: '[data-testid="build-changes-panel"]' }, - { type: "scroll", selector: '[data-testid="build-changes-panel"]' }, - { type: "wait", ms: 500 }, - { type: "screenshot", label: "04-build-recent-changes" }, - ]; - const beatCheckTabs: DemoTimelineStep[] = []; beatCheckTabs.push( { type: "click", selector: '[data-testid="nav-test"]' }, @@ -157,7 +148,6 @@ export function buildTabDemoTimeline(opts: { return [ ...beatOpenBuild, ...beatPrompt, - ...beatReviewChanges, ...beatCheckTabs, ]; } diff --git a/simulator-ui/demo/gambit-ui-demo-script.md b/simulator-ui/demo/gambit-ui-demo-script.md index 41275dcd7..74f48916e 100644 --- a/simulator-ui/demo/gambit-ui-demo-script.md +++ b/simulator-ui/demo/gambit-ui-demo-script.md @@ -29,11 +29,11 @@ If you can run it, you can inspect it. That is the core promise. Start from a local checkout. In `packages/gambit`, run `deno run -A src/cli.ts serve init/examples/advanced/voice_front_desk/decks/root.deck.md --port 8000`, -then open `http://localhost:8000/test-bot`. +then open `http://localhost:8000/test`. -In Test Bot, select the New patient intake persona. Fill the scenario -description, caller name, and date of birth. The init form comes from the deck -input schema, so this run stays reproducible. +In Test, select the New patient intake persona. Fill the scenario description, +caller name, and date of birth. The init form comes from the deck input schema, +so this run stays reproducible. Click Run and let a few turns stream. We now have a session id that ties together the transcript, traces, and feedback. @@ -45,5 +45,5 @@ reason, and the exact turn context that drove the result. Go to Debug and inspect the run. The transcript shows every message, the trace pane shows every deck and tool event, and timing is captured along the way. -From here the loop is simple. Edit the deck in code, rerun Test Bot, and regrade -until the behavior is correct. +From here the loop is simple. Edit the deck in code, rerun the scenario, and +regrade until the behavior is correct. diff --git a/simulator-ui/demo/gambit-ui-demo-timeline.ts b/simulator-ui/demo/gambit-ui-demo-timeline.ts index 58ba8e0a0..1392d6877 100644 --- a/simulator-ui/demo/gambit-ui-demo-timeline.ts +++ b/simulator-ui/demo/gambit-ui-demo-timeline.ts @@ -116,19 +116,19 @@ const beatScenario: DemoTimelineStep[] = [ { type: "voiceover", text: - "The bot takes a typed input schema, so we’ll fill in a concrete FAQ-first scenario about Sunday hours and let the test bot play it out.", + "The bot takes a typed input schema, so we’ll fill in a concrete FAQ-first scenario about Sunday hours and let the scenario play it out.", showSubtitles: true, }, ]; -/** Beat 4: Run + inspect (Test Bot simulates and produces a reproducible session). */ +/** Beat 4: Run + inspect (Scenario simulates and produces a reproducible session). */ const beatRunInspect: DemoTimelineStep[] = [ // Demo note: temporarily drop the tool-backed response guardrail and add a "hallucinate wildly" card // to force a FAQ hallucination. Then add a grader to flag it, and restore the guardrail to fix. { type: "voiceover", text: - "We’ll simulate a patient call with a test bot and watch the workflow run end to end as it streams.", + "We’ll simulate a patient call with a scenario and watch the workflow run end to end as it streams.", showSubtitles: true, }, { @@ -184,7 +184,7 @@ const beatDebug: DemoTimelineStep[] = [ { type: "voiceover", text: - "Then we’ll run the test bot again and rerun the grader to confirm the fix.", + "Then we’ll run the scenario again and rerun the grader to confirm the fix.", showSubtitles: true, }, { diff --git a/simulator-ui/src/BuildChatContext.tsx b/simulator-ui/src/BuildChatContext.tsx index 694e191e5..838ec4897 100644 --- a/simulator-ui/src/BuildChatContext.tsx +++ b/simulator-ui/src/BuildChatContext.tsx @@ -1,70 +1,5 @@ -import React, { - createContext, - useCallback, - useContext, - useEffect, - useMemo, - useRef, - useState, -} from "react"; -import { - BUILD_STREAM_ID, - type BuildBotSocketMessage, - buildDurableStreamUrl, - getDurableStreamOffset, - setDurableStreamOffset, - summarizeToolCalls, - type ToolCallSummary, - type TraceEvent, -} from "./utils.ts"; - -type BuildRun = { - id: string; - status: "idle" | "running" | "completed" | "error" | "canceled"; - error?: string; - startedAt?: string; - finishedAt?: string; - messages: Array<{ - role: string; - content: string; - }>; - traces?: Array; - toolInserts?: Array<{ - actionCallId?: string; - parentActionCallId?: string; - name?: string; - index: number; - }>; -}; - -type BuildChatContextValue = { - run: BuildRun; - toolCalls: ToolCallSummary[]; - chatDraft: string; - setChatDraft: React.Dispatch>; - chatSending: boolean; - chatError: string | null; - setChatError: React.Dispatch>; - toolCallsOpen: Record; - setToolCallsOpen: React.Dispatch< - React.SetStateAction> - >; - optimisticUser: { id: string; text: string } | null; - setOptimisticUser: React.Dispatch< - React.SetStateAction<{ id: string; text: string } | null> - >; - streamingAssistant: { runId: string; turn: number; text: string } | null; - setStreamingAssistant: React.Dispatch< - React.SetStateAction< - { runId: string; turn: number; text: string } | null - > - >; - resetChat: () => Promise; - sendMessage: (message: string) => Promise; - loadChat: (runId: string) => Promise; -}; - -const BuildChatContext = createContext(null); +import React from "react"; +import { useWorkspaceBuild, WorkspaceProvider } from "./WorkspaceContext.tsx"; export function BuildChatProvider( props: { @@ -73,327 +8,9 @@ export function BuildChatProvider( onWorkspaceChange?: (workspaceId: string) => void; }, ) { - const { children, workspaceId, onWorkspaceChange } = props; - const [run, setRun] = useState({ - id: "", - status: "idle", - messages: [], - traces: [], - toolInserts: [], - }); - const runIdRef = useRef(""); - - const [chatDraft, setChatDraft] = useState(""); - const [chatSending, setChatSending] = useState(false); - const [chatError, setChatError] = useState(null); - const [toolCallsOpen, setToolCallsOpen] = useState>( - {}, - ); - const [optimisticUser, setOptimisticUser] = useState< - { id: string; text: string } | null - >(null); - const [streamingAssistant, setStreamingAssistant] = useState< - { runId: string; turn: number; text: string } | null - >(null); - - const refreshStatus = useCallback(async (opts?: { workspaceId?: string }) => { - const query = opts?.workspaceId - ? `?workspaceId=${encodeURIComponent(opts.workspaceId)}` - : ""; - const res = await fetch(`/api/build/status${query}`); - const data = await res.json().catch(() => ({})) as { run?: BuildRun }; - if (data.run) { - setRun({ - ...data.run, - messages: data.run.messages ?? [], - traces: data.run.traces ?? [], - toolInserts: data.run.toolInserts ?? [], - }); - if (typeof data.run.id === "string" && data.run.id) { - runIdRef.current = data.run.id; - } - } - }, [onWorkspaceChange]); - - useEffect(() => { - if (workspaceId) { - runIdRef.current = workspaceId; - refreshStatus({ workspaceId }).catch(() => {}); - return; - } - refreshStatus().catch(() => {}); - }, [refreshStatus, workspaceId]); - - useEffect(() => { - if (!workspaceId) return; - if (runIdRef.current === workspaceId) return; - runIdRef.current = workspaceId; - setRun((prev) => ({ - ...prev, - id: workspaceId, - })); - setChatError(null); - setStreamingAssistant(null); - setOptimisticUser(null); - setToolCallsOpen({}); - refreshStatus({ workspaceId }).catch(() => {}); - }, [refreshStatus, workspaceId]); - - useEffect(() => { - const streamId = BUILD_STREAM_ID; - const streamUrl = buildDurableStreamUrl( - streamId, - getDurableStreamOffset(streamId), - ); - const source = new EventSource(streamUrl); - - source.onmessage = (event) => { - let envelope: { offset?: unknown; data?: unknown } | null = null; - try { - envelope = JSON.parse(event.data) as { - offset?: unknown; - data?: unknown; - }; - } catch { - return; - } - if ( - envelope && - typeof envelope.offset === "number" && - Number.isFinite(envelope.offset) - ) { - setDurableStreamOffset(streamId, envelope.offset + 1); - } - const msg = envelope?.data as BuildBotSocketMessage | undefined; - if (!msg) return; - const activeRunId = runIdRef.current; - if (msg.type === "buildBotStatus" && msg.run) { - if (activeRunId && msg.run.id !== activeRunId) return; - setRun({ - ...msg.run, - messages: msg.run.messages ?? [], - traces: msg.run.traces ?? [], - toolInserts: msg.run.toolInserts ?? [], - } as BuildRun); - return; - } - if (msg.type === "buildBotStream") { - if (!msg.runId || (activeRunId && msg.runId !== activeRunId)) return; - const streamRunId = msg.runId; - const turn = typeof msg.turn === "number" ? msg.turn : 0; - if (msg.role !== "assistant") return; - setStreamingAssistant((prev) => - prev && prev.runId === streamRunId && prev.turn === turn - ? { ...prev, text: prev.text + msg.chunk } - : { runId: streamRunId, turn, text: msg.chunk } - ); - return; - } - if (msg.type === "buildBotStreamEnd") { - if (!msg.runId || (activeRunId && msg.runId !== activeRunId)) return; - const turn = typeof msg.turn === "number" ? msg.turn : 0; - setStreamingAssistant((prev) => - prev && prev.runId === msg.runId && prev.turn === turn ? null : prev - ); - } - }; - - return () => { - source.close(); - }; - }, []); - - const toolCalls = useMemo( - () => summarizeToolCalls(run.traces ?? []), - [run.traces], - ); - - const ensureWorkspaceId = useCallback(async () => { - if (workspaceId) return workspaceId; - if (runIdRef.current) return runIdRef.current; - try { - const res = await fetch("/api/workspace/new", { - method: "POST", - }); - const data = await res.json().catch(() => ({})) as { - workspaceId?: string; - }; - if (res.ok && typeof data.workspaceId === "string") { - const nextWorkspaceId = data.workspaceId; - runIdRef.current = nextWorkspaceId; - setRun((prev) => ({ ...prev, id: nextWorkspaceId })); - onWorkspaceChange?.(nextWorkspaceId); - return nextWorkspaceId; - } - } catch { - // ignore - } - const fallback = `workspace-${crypto.randomUUID()}`; - runIdRef.current = fallback; - setRun((prev) => ({ ...prev, id: fallback })); - return fallback; - }, [onWorkspaceChange, workspaceId]); - - const resetChat = useCallback(async () => { - const res = await fetch("/api/workspace/new", { method: "POST" }).catch( - () => null, - ); - const data = res - ? await res.json().catch(() => ({})) as { workspaceId?: string } - : {}; - if (res && res.ok && typeof data.workspaceId === "string") { - runIdRef.current = data.workspaceId; - setRun({ - id: data.workspaceId, - status: "idle", - messages: [], - traces: [], - toolInserts: [], - }); - onWorkspaceChange?.(data.workspaceId); - } else { - runIdRef.current = ""; - setRun({ - id: "", - status: "idle", - messages: [], - traces: [], - toolInserts: [], - }); - } - setChatDraft(""); - setChatError(null); - setStreamingAssistant(null); - setOptimisticUser(null); - setToolCallsOpen({}); - }, [onWorkspaceChange]); - - const sendMessage = useCallback(async (message: string) => { - const runId = await ensureWorkspaceId(); - setChatSending(true); - setChatError(null); - try { - const res = await fetch("/api/build/message", { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ workspaceId: runId, message }), - }); - const data = await res.json().catch(() => ({})) as { - run?: BuildRun; - error?: string; - }; - if (!res.ok) { - throw new Error( - typeof data.error === "string" ? data.error : res.statusText, - ); - } - if (data.run) { - setRun({ - ...data.run, - messages: data.run.messages ?? [], - traces: data.run.traces ?? [], - toolInserts: data.run.toolInserts ?? [], - }); - if (typeof data.run.id === "string" && data.run.id) { - runIdRef.current = data.run.id; - } - } - } finally { - setChatSending(false); - } - }, [ensureWorkspaceId]); - - const loadChat = useCallback(async (runId: string) => { - setChatSending(true); - setChatError(null); - try { - const res = await fetch("/api/build/load", { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ workspaceId: runId }), - }); - const data = await res.json().catch(() => ({})) as { - run?: BuildRun; - error?: string; - }; - if (!res.ok) { - throw new Error( - typeof data.error === "string" ? data.error : res.statusText, - ); - } - if (data.run) { - setRun({ - ...data.run, - messages: data.run.messages ?? [], - traces: data.run.traces ?? [], - toolInserts: data.run.toolInserts ?? [], - }); - if (typeof data.run.id === "string" && data.run.id) { - runIdRef.current = data.run.id; - } - if (typeof data.run.id === "string" && data.run.id) { - onWorkspaceChange?.(data.run.id); - } - setChatDraft(""); - setOptimisticUser(null); - setStreamingAssistant(null); - setToolCallsOpen({}); - } - } finally { - setChatSending(false); - } - }, []); - - const value = useMemo( - () => ({ - run, - toolCalls, - chatDraft, - setChatDraft, - chatSending, - chatError, - setChatError, - toolCallsOpen, - setToolCallsOpen, - optimisticUser, - setOptimisticUser, - streamingAssistant, - setStreamingAssistant, - resetChat, - sendMessage, - loadChat, - }), - [ - run, - toolCalls, - chatDraft, - setChatDraft, - chatSending, - chatError, - setChatError, - toolCallsOpen, - setToolCallsOpen, - optimisticUser, - setOptimisticUser, - streamingAssistant, - setStreamingAssistant, - resetChat, - sendMessage, - loadChat, - ], - ); - - return ( - - {children} - - ); + return ; } export function useBuildChat() { - const context = useContext(BuildChatContext); - if (!context) { - throw new Error("useBuildChat must be used within BuildChatProvider"); - } - return context; + return useWorkspaceBuild(); } diff --git a/simulator-ui/src/BuildPage.tsx b/simulator-ui/src/BuildPage.tsx index 2a27df9e2..24ce2a473 100644 --- a/simulator-ui/src/BuildPage.tsx +++ b/simulator-ui/src/BuildPage.tsx @@ -1,19 +1,16 @@ import React, { useCallback, useEffect, - useLayoutEffect, useMemo, useRef, useState, } from "react"; -import { createPortal } from "react-dom"; import { type ToolCallSummary, workspaceOnboardingEnabled } from "./utils.ts"; import PageShell from "./gds/PageShell.tsx"; import PageGrid from "./gds/PageGrid.tsx"; import Panel from "./gds/Panel.tsx"; -import Badge from "./gds/Badge.tsx"; import Listbox, { type ListboxOption } from "./gds/Listbox.tsx"; -import { useBuildChat } from "./BuildChatContext.tsx"; +import { useWorkspaceBuild } from "./WorkspaceContext.tsx"; type BuildFileEntry = { path: string; @@ -90,7 +87,7 @@ export default function BuildPage(props: { }) { const { setNavActions } = props; - const { run, toolCalls } = useBuildChat(); + const { run, toolCalls } = useWorkspaceBuild(); const [fileEntries, setFileEntries] = useState([]); const [fileListLoading, setFileListLoading] = useState(false); const [fileListError, setFileListError] = useState(null); @@ -98,14 +95,10 @@ export default function BuildPage(props: { const [filePreview, setFilePreview] = useState({ status: "idle", }); - const [recentChangesOpen, setRecentChangesOpen] = useState(false); - const [recentChangesReadCount, setRecentChangesReadCount] = useState(0); - const recentChangesTriggerRef = useRef(null); - const recentChangesPopoverRef = useRef(null); - const [recentChangesPopoverStyle, setRecentChangesPopoverStyle] = useState< - React.CSSProperties | null - >(null); const lastTraceCountRef = useRef(0); + const traceRefreshTimerRef = useRef(null); + const fileListRefreshInFlightRef = useRef(false); + const fileListRefreshQueuedRef = useRef(false); useEffect(() => { if (!setNavActions) return; @@ -114,39 +107,68 @@ export default function BuildPage(props: { }, [setNavActions]); const refreshFileList = useCallback(async () => { + if (fileListRefreshInFlightRef.current) { + fileListRefreshQueuedRef.current = true; + return; + } + fileListRefreshInFlightRef.current = true; setFileListLoading(true); setFileListError(null); try { - const query = run.id ? `?workspaceId=${encodeURIComponent(run.id)}` : ""; - const res = await fetch(`/api/build/files${query}`); - const data = await res.json().catch(() => ({})) as { - entries?: BuildFileEntry[]; - error?: string; - }; - if (!res.ok) { - throw new Error( - typeof data.error === "string" ? data.error : res.statusText, - ); + let shouldRun = true; + while (shouldRun) { + fileListRefreshQueuedRef.current = false; + const query = run.id + ? `?workspaceId=${encodeURIComponent(run.id)}` + : ""; + const res = await fetch(`/api/build/files${query}`); + const data = await res.json().catch(() => ({})) as { + entries?: BuildFileEntry[]; + error?: string; + }; + if (!res.ok) { + throw new Error( + typeof data.error === "string" ? data.error : res.statusText, + ); + } + setFileEntries(Array.isArray(data.entries) ? data.entries : []); + shouldRun = fileListRefreshQueuedRef.current; } - setFileEntries(Array.isArray(data.entries) ? data.entries : []); } catch (err) { setFileListError(err instanceof Error ? err.message : String(err)); setFileEntries([]); } finally { + fileListRefreshInFlightRef.current = false; setFileListLoading(false); } }, [run.id]); + const clearTraceRefreshTimer = useCallback(() => { + if (traceRefreshTimerRef.current === null) return; + clearTimeout(traceRefreshTimerRef.current); + traceRefreshTimerRef.current = null; + }, []); + useEffect(() => { refreshFileList().catch(() => {}); }, [refreshFileList]); useEffect(() => { const traceCount = run.traces?.length ?? 0; + clearTraceRefreshTimer(); if (traceCount === lastTraceCountRef.current) return; lastTraceCountRef.current = traceCount; - refreshFileList().catch(() => {}); - }, [run.traces?.length, refreshFileList]); + traceRefreshTimerRef.current = setTimeout(() => { + traceRefreshTimerRef.current = null; + refreshFileList().catch(() => {}); + }, 250); + }, [clearTraceRefreshTimer, run.traces?.length, refreshFileList]); + + useEffect(() => { + return () => { + clearTraceRefreshTimer(); + }; + }, [clearTraceRefreshTimer]); const fileEntriesByPath = useMemo(() => { const map = new Map(); @@ -196,6 +218,17 @@ export default function BuildPage(props: { ? fileEntriesByPath.get(selectedPath) : undefined; + const selectedPathChangeToken = useMemo(() => { + if (!selectedPath) return ""; + for (let i = toolCalls.length - 1; i >= 0; i -= 1) { + const change = extractBotWriteChange(toolCalls[i]); + if (change?.path === selectedPath) { + return `${change.id}:${change.action ?? ""}`; + } + } + return ""; + }, [selectedPath, toolCalls]); + useEffect(() => { const filePaths = Array.from(fileEntriesByPath.keys()); const hasSelected = selectedPath && fileEntriesByPath.has(selectedPath); @@ -278,102 +311,13 @@ export default function BuildPage(props: { return () => { canceled = true; }; - }, [selectedPath]); - - const toolInsertIndexByCall = useMemo(() => { - const map = new Map(); - for (const insert of run.toolInserts ?? []) { - if (!insert.actionCallId) continue; - map.set( - insert.actionCallId, - typeof insert.index === "number" ? insert.index : 0, - ); - } - return map; - }, [run.toolInserts]); - - const changes = useMemo(() => { - return toolCalls - .map(extractBotWriteChange) - .filter((entry): entry is NonNullable => Boolean(entry)); - }, [toolCalls]); - - const auditTrail = useMemo(() => { - const limited = changes.slice(-50); - return limited.map((change) => ({ - ...change, - turn: toolInsertIndexByCall.get(change.id), - })); - }, [changes, toolInsertIndexByCall]); - - const unreadRecentChangesCount = Math.max( - 0, - changes.length - recentChangesReadCount, - ); - - const updateRecentChangesPopover = useCallback(() => { - const trigger = recentChangesTriggerRef.current; - if (!trigger) return; - const rect = trigger.getBoundingClientRect(); - const width = Math.min(380, Math.max(260, window.innerWidth - 24)); - const left = Math.max( - 12, - Math.min(rect.right - width, window.innerWidth - width - 12), - ); - setRecentChangesPopoverStyle({ - position: "fixed", - top: rect.bottom + 6, - left, - width, - }); - }, []); - - useLayoutEffect(() => { - if (!recentChangesOpen) return; - updateRecentChangesPopover(); - }, [recentChangesOpen, updateRecentChangesPopover]); - - useEffect(() => { - if (!recentChangesOpen) { - setRecentChangesPopoverStyle(null); - return; - } - const handleOutside = (event: MouseEvent) => { - const target = event.target as Node | null; - const isInTrigger = recentChangesTriggerRef.current && - target && - recentChangesTriggerRef.current.contains(target); - const isInPopover = recentChangesPopoverRef.current && - target && - recentChangesPopoverRef.current.contains(target); - if (!isInTrigger && !isInPopover) { - setRecentChangesOpen(false); - } - }; - const handleKey = (event: KeyboardEvent) => { - if (event.key === "Escape") { - event.preventDefault(); - setRecentChangesOpen(false); - } - }; - const handleReposition = () => updateRecentChangesPopover(); - document.addEventListener("mousedown", handleOutside); - document.addEventListener("keydown", handleKey); - window.addEventListener("resize", handleReposition); - window.addEventListener("scroll", handleReposition, true); - return () => { - document.removeEventListener("mousedown", handleOutside); - document.removeEventListener("keydown", handleKey); - window.removeEventListener("resize", handleReposition); - window.removeEventListener("scroll", handleReposition, true); - }; - }, [recentChangesOpen, updateRecentChangesPopover]); - - useEffect(() => { - setRecentChangesOpen(false); - setRecentChangesReadCount(0); - setRecentChangesPopoverStyle(null); - }, [run.id]); + }, [ + run.id, + selectedPath, + selectedEntry?.modifiedAt, + selectedEntry?.size, + selectedPathChangeToken, + ]); return ( @@ -410,35 +354,6 @@ export default function BuildPage(props: { {formatBytes(selectedEntry.size)} )} - @@ -477,48 +392,6 @@ export default function BuildPage(props: { )} - {recentChangesOpen && recentChangesPopoverStyle && - createPortal( -
- {auditTrail.length === 0 - ?
No recent changes yet.
- : ( -
- {[...auditTrail].reverse().map((change, idx) => ( - - ))} -
- )} -
, - document.body, - )}
diff --git a/simulator-ui/src/Chat.test.tsx b/simulator-ui/src/Chat.test.tsx new file mode 100644 index 000000000..54f8f819d --- /dev/null +++ b/simulator-ui/src/Chat.test.tsx @@ -0,0 +1,748 @@ +import { assert, assertEquals } from "@std/assert"; +import { FakeTime } from "@std/testing/time"; +import React from "react"; +import TestRenderer, { act } from "npm:react-test-renderer@19.2.0"; +import type { ReactTestInstance } from "npm:react-test-renderer@19.2.0"; + +const globals = globalThis as unknown as { + window?: Record; + EventSource?: unknown; + fetch?: typeof fetch; + localStorage?: Storage; +}; +if (!globals.window) globals.window = {}; +(globalThis as { IS_REACT_ACT_ENVIRONMENT?: boolean }) + .IS_REACT_ACT_ENVIRONMENT = true; + +class MemoryStorage implements Storage { + #data = new Map(); + + get length(): number { + return this.#data.size; + } + + clear(): void { + this.#data.clear(); + } + + getItem(key: string): string | null { + return this.#data.has(key) ? this.#data.get(key)! : null; + } + + key(index: number): string | null { + return Array.from(this.#data.keys())[index] ?? null; + } + + removeItem(key: string): void { + this.#data.delete(key); + } + + setItem(key: string, value: string): void { + this.#data.set(key, value); + } +} + +if (!globals.localStorage) { + globals.localStorage = new MemoryStorage(); +} +const windowObj = globals.window as { + localStorage?: Storage; + location?: { pathname: string; search: string }; +}; +windowObj.localStorage = globals.localStorage; +if (!windowObj.location) { + windowObj.location = { pathname: "/workspaces/ws-1/build", search: "" }; +} + +const { + default: Chat, + BuildChatRows, + ChatView, + bucketBuildChatDisplay, + deriveBuildChatActivityState, + formatElapsedDuration, +} = await import("./Chat.tsx"); +const { WorkspaceProvider } = await import("./WorkspaceContext.tsx"); +const { globalStyles } = await import("./styles.ts"); +type BuildDisplayMessage = import("./utils.ts").BuildDisplayMessage; +type WorkspaceSocketMessage = import("./utils.ts").WorkspaceSocketMessage; +type BuildChatViewState = import("./Chat.tsx").BuildChatViewState; + +type ToolCallSummary = import("./utils.ts").ToolCallSummary; + +class FakeEventSource { + static instances: FakeEventSource[] = []; + onmessage: ((event: MessageEvent) => void) | null = null; + url: string; + closed = false; + + constructor(url: string) { + this.url = url; + FakeEventSource.instances.push(this); + } + + close() { + this.closed = true; + } + + emit(message: WorkspaceSocketMessage, offset = 1) { + this.onmessage?.( + new MessageEvent("message", { + data: JSON.stringify({ offset, data: message }), + }), + ); + } +} + +function makeTool(id: string, name = "tool_name"): ToolCallSummary { + return { + key: id, + id, + actionCallId: id, + name, + status: "completed", + }; +} + +function makeChatState( + overrides: Partial = {}, +): BuildChatViewState { + const baseRun = { + id: "run-1", + status: "idle" as const, + messages: [] as Array<{ role: string; content: string }>, + traces: [], + toolInserts: [], + displayMessages: [] as BuildDisplayMessage[], + }; + const mergedRun = { + ...baseRun, + ...(overrides.run ?? {}), + }; + return { + toolCalls: [], + chatDraft: "", + setChatDraft: () => {}, + chatSending: false, + chatError: null, + setChatError: () => {}, + toolCallsOpen: {}, + setToolCallsOpen: () => {}, + optimisticUser: null, + setOptimisticUser: () => {}, + streamingAssistant: null, + setStreamingAssistant: () => {}, + resetChat: async () => {}, + sendMessage: async () => {}, + stopChat: async () => {}, + loadChat: async () => {}, + ...overrides, + run: mergedRun, + }; +} + +Deno.test("bucketBuildChatDisplay collapses adjacent non-message rows into one activity block", () => { + const display: BuildDisplayMessage[] = [ + { kind: "message", role: "user", content: "start" }, + { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") }, + { kind: "tool", toolCallId: "tool-2", toolSummary: makeTool("tool-2") }, + { kind: "reasoning", reasoningId: "r-1", content: "old" }, + { kind: "reasoning", reasoningId: "r-2", content: "latest" }, + { kind: "message", role: "assistant", content: "done" }, + ]; + + const buckets = bucketBuildChatDisplay(display); + assertEquals(buckets.map((bucket) => bucket.kind), [ + "message", + "activity", + "message", + ]); + + const activityBucket = buckets[1]; + assert(activityBucket && activityBucket.kind === "activity"); + assertEquals(activityBucket.entries.length, 4); + assertEquals(activityBucket.latestContent, "latest"); + assertEquals(activityBucket.reasoningCount, 2); + assertEquals(activityBucket.toolCount, 2); + assertEquals(activityBucket.latestToolLabel, null); +}); + +Deno.test("bucketBuildChatDisplay clears stale tool preview when new reasoning starts", () => { + const display: BuildDisplayMessage[] = [ + { kind: "reasoning", reasoningId: "r-1", content: "step 1" }, + { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") }, + { kind: "reasoning", reasoningId: "r-2", content: "step 2" }, + ]; + + const buckets = bucketBuildChatDisplay(display); + assertEquals(buckets.map((bucket) => bucket.kind), ["activity"]); + const activityBucket = buckets[0]; + assert(activityBucket && activityBucket.kind === "activity"); + assertEquals(activityBucket.latestContent, "step 2"); + assertEquals(activityBucket.latestToolLabel, null); +}); + +Deno.test("bucketBuildChatDisplay preserves non-adjacent boundaries", () => { + const display: BuildDisplayMessage[] = [ + { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") }, + { kind: "message", role: "assistant", content: "mid" }, + { kind: "tool", toolCallId: "tool-2", toolSummary: makeTool("tool-2") }, + ]; + + const buckets = bucketBuildChatDisplay(display); + assertEquals(buckets.map((bucket) => bucket.kind), [ + "activity", + "message", + "activity", + ]); +}); + +Deno.test("BuildChatRows renders latest activity preview and toggles full details", async () => { + const display: BuildDisplayMessage[] = [ + { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") }, + { kind: "tool", toolCallId: "tool-2", toolSummary: makeTool("tool-2") }, + { + kind: "reasoning", + reasoningId: "r-1", + content: "first reasoning", + reasoningRaw: { step: 1 }, + }, + { + kind: "reasoning", + reasoningId: "r-2", + content: "latest reasoning", + reasoningRaw: { step: 2 }, + }, + ]; + + let renderer: TestRenderer.ReactTestRenderer | null = null; + try { + await act(async () => { + renderer = TestRenderer.create(); + }); + assert(renderer); + + const titles = renderer.root.findAll((node: ReactTestInstance) => + node.props.className === "activity-toggle-title" + ); + assertEquals(titles.length, 1); + assertEquals(String(titles[0].children.join("")), "Activity"); + + const reasoningBadges = renderer.root.findAll((node: ReactTestInstance) => + node.type === "span" && + typeof node.props.className === "string" && + node.props.className.includes("activity-count-badge-reasoning") + ); + assertEquals(reasoningBadges.length, 1); + assertEquals(String(reasoningBadges[0].children.join("")), "Reasoning: 2"); + + const toolBadges = renderer.root.findAll((node: ReactTestInstance) => + node.type === "span" && + typeof node.props.className === "string" && + node.props.className.includes("activity-count-badge-tool") + ); + assertEquals(toolBadges.length, 1); + assertEquals(String(toolBadges[0].children.join("")), "Tool calls: 2"); + + const actions = renderer.root.findAll((node: ReactTestInstance) => + node.props.className === "activity-toggle-action" + ); + assertEquals(actions.length, 1); + assertEquals(String(actions[0].children.join("")), "• Show"); + + const toggles = renderer.root.findAll((node: ReactTestInstance) => + node.props.className === "tool-calls-toggle activity-toggle" + ); + assertEquals(toggles.length, 1); + + const previewToolText = renderer.root.findAll((node: ReactTestInstance) => + node.props.className === "activity-preview-tool" + ).map((node: ReactTestInstance) => String(node.children.join(" "))).join( + "\n", + ); + assert(!previewToolText.includes("Tool call")); + + const previewToolRows = renderer.root.findAll((node: ReactTestInstance) => + node.props.className === "activity-preview-tool" + ); + assertEquals(previewToolRows.length, 0); + + const toolToggle = toggles[0]; + await act(async () => { + toolToggle.props.onClick(); + }); + + const toolCallTitles = renderer.root.findAll((node: ReactTestInstance) => + node.props.className === "tool-call-title" && + String(node.children.join(" ")).includes("Tool call:") + ); + assertEquals(toolCallTitles.length, 2); + + const reasoningRows = renderer.root.findAll((node: ReactTestInstance) => + typeof node.props.className === "string" && + node.props.className.includes("reasoning-row") + ); + assertEquals(reasoningRows.length, 2); + } finally { + if (renderer) { + await act(async () => { + renderer?.unmount(); + }); + } + } +}); + +Deno.test("BuildChatRows exposes latest tool label on tool-count badge tooltip", async () => { + const display: BuildDisplayMessage[] = [ + { + kind: "reasoning", + reasoningId: "r-1", + content: "thinking", + reasoningRaw: { step: 1 }, + }, + { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") }, + ]; + + let renderer: TestRenderer.ReactTestRenderer | null = null; + try { + await act(async () => { + renderer = TestRenderer.create(); + }); + assert(renderer); + + const previewToolRows = renderer.root.findAll((node: ReactTestInstance) => + node.props.className === "activity-preview-tool" + ); + assertEquals(previewToolRows.length, 0); + + const toolBadges = renderer.root.findAll((node: ReactTestInstance) => + node.type === "span" && + typeof node.props.className === "string" && + node.props.className.includes("activity-count-badge-tool") + ); + assertEquals(toolBadges.length, 1); + assert(typeof toolBadges[0].props["aria-describedby"] === "string"); + } finally { + if (renderer) { + await act(async () => { + renderer?.unmount(); + }); + } + } +}); + +Deno.test("BuildChatRows highlights activity badge when count increases", async () => { + const initialDisplay: BuildDisplayMessage[] = [ + { + kind: "reasoning", + reasoningId: "r-1", + content: "thinking", + reasoningRaw: { step: 1 }, + }, + { kind: "tool", toolCallId: "tool-1", toolSummary: makeTool("tool-1") }, + ]; + const increasedDisplay: BuildDisplayMessage[] = [ + ...initialDisplay, + { kind: "tool", toolCallId: "tool-2", toolSummary: makeTool("tool-2") }, + ]; + + let renderer: TestRenderer.ReactTestRenderer | null = null; + try { + await act(async () => { + renderer = TestRenderer.create( + , + ); + }); + assert(renderer); + + let highlightedToolBadges = renderer.root.findAll(( + node: ReactTestInstance, + ) => + node.type === "span" && + typeof node.props.className === "string" && + node.props.className.includes("activity-count-badge-tool") && + node.props.className.includes("is-highlight") + ); + assertEquals(highlightedToolBadges.length, 0); + + await act(async () => { + renderer?.update(); + }); + + highlightedToolBadges = renderer.root.findAll((node: ReactTestInstance) => + node.type === "span" && + typeof node.props.className === "string" && + node.props.className.includes("activity-count-badge-tool") && + node.props.className.includes("is-highlight") + ); + assertEquals(highlightedToolBadges.length, 1); + + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, 950)); + }); + + highlightedToolBadges = renderer.root.findAll((node: ReactTestInstance) => + node.type === "span" && + typeof node.props.className === "string" && + node.props.className.includes("activity-count-badge-tool") && + node.props.className.includes("is-highlight") + ); + assertEquals(highlightedToolBadges.length, 0); + } finally { + if (renderer) { + await act(async () => { + renderer?.unmount(); + }); + } + } +}); + +Deno.test("Build chat stop button appears only while running and dispatches stop without clearing transcript", async () => { + const originalFetch = globalThis.fetch; + const originalEventSource = globalThis.EventSource; + const requests: Array<{ url: string; body?: Record }> = []; + + const preservedMessages = [ + { role: "user", content: "keep this" }, + { role: "assistant", content: "assistant stays" }, + ]; + + globalThis.EventSource = FakeEventSource as unknown as typeof EventSource; + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + const url = String(input); + let parsedBody: Record | undefined; + if (typeof init?.body === "string" && init.body.length > 0) { + parsedBody = JSON.parse(init.body) as Record; + } + requests.push({ url, body: parsedBody }); + + if (url.endsWith("/api/workspaces/ws-1")) { + return new Response( + JSON.stringify({ + workspaceId: "ws-1", + build: { + run: { + id: "ws-1", + status: "running", + messages: preservedMessages, + traces: [], + toolInserts: [], + }, + }, + test: { + run: { status: "idle", messages: [], traces: [], toolInserts: [] }, + }, + grade: { graderDecks: [], sessions: [] }, + session: { messages: [], traces: [] }, + }), + { status: 200 }, + ); + } + if (url.endsWith("/api/build/stop")) { + return new Response( + JSON.stringify({ + stopped: true, + run: { + id: "ws-1", + status: "canceled", + messages: preservedMessages, + traces: [], + toolInserts: [], + }, + }), + { status: 200 }, + ); + } + throw new Error(`Unexpected fetch: ${url}`); + }) as typeof fetch; + + let renderer: TestRenderer.ReactTestRenderer | null = null; + try { + await act(async () => { + renderer = TestRenderer.create( + + + , + ); + }); + assert(renderer); + + const findByTestId = (id: string) => + renderer!.root.findAll((node: ReactTestInstance) => + node.type === "button" && node.props["data-testid"] === id + ); + + assertEquals(findByTestId("build-stop").length, 1); + assertEquals(findByTestId("build-send").length, 0); + assertEquals(findByTestId("build-start").length, 0); + + const stopButton = findByTestId("build-stop")[0]; + await act(async () => { + stopButton.props.onClick(); + }); + + const stopReq = requests.find((req) => req.url.endsWith("/api/build/stop")); + assert(stopReq); + assertEquals(stopReq.body?.workspaceId, "ws-1"); + + assertEquals(findByTestId("build-stop").length, 0); + assertEquals(findByTestId("build-send").length, 1); + + const renderedTranscript = renderer.root.findAll(( + node: ReactTestInstance, + ) => + node.props.className === "bubble-text" && + typeof node.props.dangerouslySetInnerHTML?.__html === "string" + ).map((node: ReactTestInstance) => + String(node.props.dangerouslySetInnerHTML.__html) + ).join("\n"); + assert(renderedTranscript.includes("keep this")); + assert(renderedTranscript.includes("assistant stays")); + } finally { + if (renderer) { + await act(async () => { + renderer?.unmount(); + }); + } + globalThis.fetch = originalFetch; + globalThis.EventSource = originalEventSource; + FakeEventSource.instances = []; + } +}); + +Deno.test("deriveBuildChatActivityState maps to finite activity taxonomy", () => { + const display: BuildDisplayMessage[] = []; + assertEquals( + deriveBuildChatActivityState({ + runStatus: "idle", + chatSending: false, + display, + streamingAssistant: null, + runId: "run-1", + }), + "Idle", + ); + assertEquals( + deriveBuildChatActivityState({ + runStatus: "running", + chatSending: false, + display, + streamingAssistant: null, + runId: "run-1", + }), + "Thinking", + ); + assertEquals( + deriveBuildChatActivityState({ + runStatus: "running", + chatSending: false, + display, + streamingAssistant: { runId: "run-1", turn: 0, text: "partial" }, + runId: "run-1", + }), + "Responding", + ); + assertEquals( + deriveBuildChatActivityState({ + runStatus: "completed", + chatSending: false, + display, + streamingAssistant: null, + runId: "run-1", + }), + "Stopped", + ); +}); + +Deno.test("ChatView shows active indicator for thinking/responding and clears on stop", async () => { + let renderer: TestRenderer.ReactTestRenderer | null = null; + try { + await act(async () => { + renderer = TestRenderer.create(); + }); + assert(renderer); + + let indicators = renderer.root.findAll((node: ReactTestInstance) => + node.props["data-testid"] === "build-chat-activity-indicator" + ); + assertEquals(indicators.length, 0); + + await act(async () => { + renderer?.update( + , + ); + }); + indicators = renderer.root.findAll((node: ReactTestInstance) => + node.props["data-testid"] === "build-chat-activity-indicator" + ); + assertEquals(indicators.length, 1); + assertEquals(indicators[0].props["data-activity-state"], "Thinking"); + + await act(async () => { + renderer?.update( + , + ); + }); + indicators = renderer.root.findAll((node: ReactTestInstance) => + node.props["data-testid"] === "build-chat-activity-indicator" + ); + assertEquals(indicators.length, 1); + assertEquals(indicators[0].props["data-activity-state"], "Responding"); + + const streamingRows = renderer.root.findAll((node: ReactTestInstance) => + node.props.className === "imessage-row left" + ); + assert(streamingRows.length > 0); + + await act(async () => { + renderer?.update( + , + ); + }); + indicators = renderer.root.findAll((node: ReactTestInstance) => + node.props["data-testid"] === "build-chat-activity-indicator" + ); + assertEquals(indicators.length, 0); + } finally { + if (renderer) { + await act(async () => { + renderer?.unmount(); + }); + } + } +}); + +Deno.test("ChatView elapsed timer starts, stops, and resets per active cycle", async () => { + const time = new FakeTime(new Date("2026-01-01T00:00:00Z")); + let renderer: TestRenderer.ReactTestRenderer | null = null; + try { + await act(async () => { + renderer = TestRenderer.create( + , + ); + }); + assert(renderer); + + const timerNode = () => + renderer!.root.findAll((node: ReactTestInstance) => + node.props["data-testid"] === "build-chat-activity-timer" + )[0]; + assertEquals(String(timerNode().children.join("")), "00:00"); + + await act(async () => { + time.tick(2300); + }); + assertEquals(String(timerNode().children.join("")), "00:02"); + + await act(async () => { + renderer?.update( + , + ); + }); + const indicatorsAfterStop = renderer.root.findAll(( + node: ReactTestInstance, + ) => node.props["data-testid"] === "build-chat-activity-indicator"); + assertEquals(indicatorsAfterStop.length, 0); + + await act(async () => { + renderer?.update( + , + ); + }); + assertEquals(String(timerNode().children.join("")), "00:00"); + } finally { + if (renderer) { + await act(async () => { + renderer?.unmount(); + }); + } + time.restore(); + } +}); + +Deno.test("reduced-motion fallback disables shimmer while status and timer remain visible", async () => { + assert(globalStyles.includes("@media (prefers-reduced-motion: reduce)")); + assert(globalStyles.includes(".build-chat-activity-glimmer")); + assert(globalStyles.includes("opacity: 0;")); + + let renderer: TestRenderer.ReactTestRenderer | null = null; + try { + await act(async () => { + renderer = TestRenderer.create( + , + ); + }); + assert(renderer); + + const label = renderer.root.findAll((node: ReactTestInstance) => + node.props.className === "build-chat-activity-label" + ); + const timer = renderer.root.findAll((node: ReactTestInstance) => + node.props["data-testid"] === "build-chat-activity-timer" + ); + assertEquals(label.length, 1); + assertEquals(timer.length, 1); + } finally { + if (renderer) { + await act(async () => { + renderer?.unmount(); + }); + } + } +}); + +Deno.test("formatElapsedDuration renders mm:ss", () => { + assertEquals(formatElapsedDuration(0), "00:00"); + assertEquals(formatElapsedDuration(61), "01:01"); + assertEquals(formatElapsedDuration(3600 + 9), "60:09"); +}); diff --git a/simulator-ui/src/Chat.tsx b/simulator-ui/src/Chat.tsx index d7d4da089..e6f027e4d 100644 --- a/simulator-ui/src/Chat.tsx +++ b/simulator-ui/src/Chat.tsx @@ -1,26 +1,556 @@ -import React, { useCallback, useEffect, useMemo, useRef } from "react"; -import { classNames, type ToolCallSummary } from "./utils.ts"; +import React, { useCallback, useEffect, useRef, useState } from "react"; +import { + type BuildDisplayMessage, + classNames, + renderMarkdown, +} from "./utils.ts"; import Button from "./gds/Button.tsx"; +import Badge from "./gds/Badge.tsx"; +import Icon from "./gds/Icon.tsx"; import { ToolCallBubble } from "./shared.tsx"; import { useBuildChat } from "./BuildChatContext.tsx"; -export default function Chat() { +type BuildChatTranscriptBucket = + | { + kind: "message"; + key: string; + entry: BuildDisplayMessage; + } + | { + kind: "activity"; + key: string; + entries: BuildDisplayMessage[]; + latestContent: string; + latestToolLabel: string | null; + reasoningCount: number; + toolCount: number; + }; + +type ActivityBadgeKey = "reasoning" | "tool"; +type ActivityBadgeFlashState = Record; +type ActivityCountSnapshot = Record; + +export function bucketBuildChatDisplay( + display: BuildDisplayMessage[], +): BuildChatTranscriptBucket[] { + const buckets: BuildChatTranscriptBucket[] = []; + let index = 0; + while (index < display.length) { + const entry = display[index]; + if (entry.kind === "message") { + buckets.push({ + kind: "message", + key: `message-${index}-${entry.role ?? "assistant"}`, + entry, + }); + index += 1; + continue; + } + if (entry.kind === "tool" || entry.kind === "reasoning") { + const grouped: BuildDisplayMessage[] = []; + let cursor = index; + let reasoningCount = 0; + let toolCount = 0; + while (cursor < display.length && display[cursor].kind !== "message") { + const nextEntry = display[cursor]; + grouped.push(nextEntry); + if (nextEntry.kind === "reasoning") { + reasoningCount += 1; + } else if (nextEntry.kind === "tool") { + toolCount += 1; + } + cursor += 1; + } + let latestContent = ""; + let latestToolLabel: string | null = null; + let latestReasoningIndex = -1; + grouped.forEach((nextEntry, nextEntryIndex) => { + if (nextEntry.kind === "reasoning") { + if ( + typeof nextEntry.content === "string" && nextEntry.content.trim() + ) { + latestContent = nextEntry.content; + } + latestReasoningIndex = nextEntryIndex; + return; + } + if (nextEntry.kind === "tool" && nextEntry.toolSummary) { + const rawName = nextEntry.toolSummary.name; + latestToolLabel = typeof rawName === "string" && rawName.length > 0 + ? `Tool call: ${rawName}` + : "Tool call"; + } + }); + if (latestReasoningIndex >= 0) { + latestToolLabel = null; + for ( + let entryIdx = latestReasoningIndex + 1; + entryIdx < grouped.length; + entryIdx += 1 + ) { + const nextEntry = grouped[entryIdx]; + if (nextEntry.kind !== "tool" || !nextEntry.toolSummary) continue; + const rawName = nextEntry.toolSummary.name; + latestToolLabel = typeof rawName === "string" && rawName.length > 0 + ? `Tool call: ${rawName}` + : "Tool call"; + } + } else { + grouped.forEach((nextEntry) => { + if (nextEntry.kind !== "tool" || !nextEntry.toolSummary) return; + const rawName = nextEntry.toolSummary.name; + latestToolLabel = typeof rawName === "string" && rawName.length > 0 + ? `Tool call: ${rawName}` + : "Tool call"; + }); + } + buckets.push({ + kind: "activity", + key: `activity-${index}`, + entries: grouped, + latestContent, + latestToolLabel, + reasoningCount, + toolCount, + }); + if (cursor === index) { + index += 1; + continue; + } + index = cursor; + continue; + } + index += 1; + } + return buckets; +} + +export function BuildChatRows(props: { display: BuildDisplayMessage[] }) { + const { display } = props; + const [activityBucketsOpen, setActivityBucketsOpen] = useState< + Record + >({}); + const [activityBadgeFlash, setActivityBadgeFlash] = useState< + Record + >({}); + const activityBadgeTimersRef = useRef< + Record< + string, + Partial>> + > + >({}); + const previousActivityCountsRef = useRef< + Record + >( + {}, + ); + const rows: React.ReactNode[] = []; + const buckets = bucketBuildChatDisplay(display); + + const clearActivityBadgeFlashTimer = useCallback( + (bucketKey: string, badgeKey: ActivityBadgeKey) => { + const bucketTimers = activityBadgeTimersRef.current[bucketKey]; + if (!bucketTimers) return; + const timerId = bucketTimers[badgeKey]; + if (timerId !== undefined) { + clearTimeout(timerId); + } + delete bucketTimers[badgeKey]; + if ( + bucketTimers.reasoning === undefined && bucketTimers.tool === undefined + ) { + delete activityBadgeTimersRef.current[bucketKey]; + } + }, + [], + ); + + const triggerActivityBadgeFlash = useCallback( + (bucketKey: string, badgeKey: ActivityBadgeKey) => { + setActivityBadgeFlash((prev) => ({ + ...prev, + [bucketKey]: { + reasoning: prev[bucketKey]?.reasoning ?? false, + tool: prev[bucketKey]?.tool ?? false, + [badgeKey]: true, + }, + })); + + clearActivityBadgeFlashTimer(bucketKey, badgeKey); + + const bucketTimers = activityBadgeTimersRef.current[bucketKey] ?? {}; + bucketTimers[badgeKey] = setTimeout(() => { + clearActivityBadgeFlashTimer(bucketKey, badgeKey); + setActivityBadgeFlash((prev) => { + const existing = prev[bucketKey]; + if (!existing) return prev; + const nextBucketState: ActivityBadgeFlashState = { + reasoning: badgeKey === "reasoning" ? false : existing.reasoning, + tool: badgeKey === "tool" ? false : existing.tool, + }; + if (!nextBucketState.reasoning && !nextBucketState.tool) { + const { [bucketKey]: _, ...rest } = prev; + return rest; + } + return { + ...prev, + [bucketKey]: nextBucketState, + }; + }); + }, 900); + activityBadgeTimersRef.current[bucketKey] = bucketTimers; + }, + [clearActivityBadgeFlashTimer], + ); + + useEffect(() => { + const nextCounts: Record = {}; + const activeBucketKeys = new Set(); + + buckets.forEach((bucket) => { + if (bucket.kind !== "activity") return; + activeBucketKeys.add(bucket.key); + nextCounts[bucket.key] = { + reasoning: bucket.reasoningCount, + tool: bucket.toolCount, + }; + const previousCounts = previousActivityCountsRef.current[bucket.key]; + if (!previousCounts) return; + if (bucket.reasoningCount > previousCounts.reasoning) { + triggerActivityBadgeFlash(bucket.key, "reasoning"); + } + if (bucket.toolCount > previousCounts.tool) { + triggerActivityBadgeFlash(bucket.key, "tool"); + } + }); + + Object.keys(previousActivityCountsRef.current).forEach((bucketKey) => { + if (activeBucketKeys.has(bucketKey)) return; + clearActivityBadgeFlashTimer(bucketKey, "reasoning"); + clearActivityBadgeFlashTimer(bucketKey, "tool"); + }); + + previousActivityCountsRef.current = nextCounts; + + setActivityBadgeFlash((prev) => { + let changed = false; + const next: Record = {}; + Object.entries(prev).forEach(([bucketKey, value]) => { + if (!activeBucketKeys.has(bucketKey)) { + changed = true; + return; + } + if (!value.reasoning && !value.tool) { + changed = true; + return; + } + next[bucketKey] = value; + }); + return changed ? next : prev; + }); + }, [buckets, clearActivityBadgeFlashTimer, triggerActivityBadgeFlash]); + + useEffect(() => { + return () => { + Object.entries(activityBadgeTimersRef.current).forEach(([ + bucketKey, + bucketTimers, + ]) => { + if (bucketTimers.reasoning !== undefined) { + clearTimeout(bucketTimers.reasoning); + } + if (bucketTimers.tool !== undefined) { + clearTimeout(bucketTimers.tool); + } + delete activityBadgeTimersRef.current[bucketKey]; + }); + }; + }, []); + + buckets.forEach((bucket) => { + if (bucket.kind === "message") { + const role = bucket.entry.role ?? "assistant"; + rows.push( +
+
+
+
+
, + ); + return; + } + if (bucket.kind === "activity") { + const isOpen = Boolean(activityBucketsOpen[bucket.key]); + const latestReasoning = bucket.latestContent.trim().length > 0 + ? bucket.latestContent + : bucket.reasoningCount > 0 + ? "Reasoning in progress" + : ""; + rows.push( +
+ + {!isOpen && ( +
+ {latestReasoning && ( +
+ )} +
+ )} + {isOpen && ( +
+ {bucket.entries.map((entry, activityIdx) => { + if (entry.kind === "tool") { + const tool = entry.toolSummary; + if (!tool) return null; + const toolId = tool.id ?? entry.toolCallId ?? + `tool-${bucket.key}-${activityIdx}`; + return ( +
+ +
+ ); + } + if (entry.kind !== "reasoning") return null; + return ( +
+
+
+
Reasoning
+
+
+
+ {entry.reasoningRaw && ( +
+ Details +
+                              {JSON.stringify(entry.reasoningRaw, null, 2)}
+                            
+
+ )} +
+
+
+ ); + })} +
+ )} +
, + ); + return; + } + }); + return <>{rows}; +} + +export type BuildChatActivityState = + | "Idle" + | "Thinking" + | "Responding" + | "Stopped"; + +export function deriveBuildChatActivityState( + args: { + runStatus: "idle" | "running" | "completed" | "error" | "canceled"; + chatSending: boolean; + display: BuildDisplayMessage[]; + streamingAssistant: { runId: string; turn: number; text: string } | null; + runId: string; + }, +): BuildChatActivityState { + const { runStatus, chatSending, display, streamingAssistant, runId } = args; + const isActive = chatSending || runStatus === "running"; + const hasStreamingText = Boolean( + streamingAssistant && + streamingAssistant.runId === runId && + streamingAssistant.text.trim().length > 0, + ); + const hasAssistantTranscriptText = display.some((entry) => + entry.kind === "message" && + (entry.role ?? "assistant") === "assistant" && + typeof entry.content === "string" && + entry.content.trim().length > 0 + ); + const hasVisibleAssistantText = hasStreamingText || + hasAssistantTranscriptText; + if (isActive) { + return hasVisibleAssistantText ? "Responding" : "Thinking"; + } + if ( + runStatus === "completed" || runStatus === "error" || + runStatus === "canceled" + ) { + return "Stopped"; + } + return "Idle"; +} + +export function formatElapsedDuration(totalSeconds: number): string { + const clamped = Math.max(0, Math.floor(totalSeconds)); + const minutes = Math.floor(clamped / 60); + const seconds = clamped % 60; + return `${String(minutes).padStart(2, "0")}:${ + String(seconds).padStart(2, "0") + }`; +} + +export type BuildChatViewState = ReturnType; + +function BuildChatActivityIndicator( + props: { state: BuildChatActivityState }, +) { + const { state } = props; + const active = state === "Thinking" || state === "Responding"; + const [startedAtMs, setStartedAtMs] = useState(null); + const [tick, setTick] = useState(0); + + useEffect(() => { + if (!active) { + setStartedAtMs(null); + setTick(0); + return; + } + setStartedAtMs((prev) => prev ?? Date.now()); + const handle = globalThis.setInterval(() => { + setTick((prev) => prev + 1); + }, 1000); + return () => globalThis.clearInterval(handle); + }, [active]); + + if (!active) return null; + const elapsedSeconds = startedAtMs === null + ? 0 + : Math.floor((Date.now() - startedAtMs) / 1000); + const statusLabel = state === "Thinking" + ? "Assistant is thinking" + : "Assistant is responding"; + + return ( +
+
+ ); +} + +export function ChatView(props: { state: BuildChatViewState }) { const { run, - toolCalls, chatDraft, setChatDraft, chatSending, chatError, setChatError, - toolCallsOpen, - setToolCallsOpen, optimisticUser, setOptimisticUser, streamingAssistant, + stopChat, sendMessage, - } = useBuildChat(); + } = props.state; const transcriptRef = useRef(null); + const composerInputRef = useRef(null); + const display = run.displayMessages ?? []; + const activityState = deriveBuildChatActivityState({ + runStatus: run.status, + chatSending, + display, + streamingAssistant, + runId: run.id, + }); useEffect(() => { const el = transcriptRef.current; @@ -29,26 +559,28 @@ export default function Chat() { el.scrollTop = el.scrollHeight; }); return () => cancelAnimationFrame(frame); - }, [run.messages.length, streamingAssistant?.text, optimisticUser?.id]); - - const toolBuckets = useMemo(() => { - const inserts = run.toolInserts ?? []; - const byCall = new Map(toolCalls.map((call) => [call.id, call])); - const buckets = new Map(); - for (const insert of inserts) { - if (!insert.actionCallId) continue; - const call = byCall.get(insert.actionCallId); - if (!call) continue; - const idx = typeof insert.index === "number" ? insert.index : 0; - const bucket = buckets.get(idx) ?? []; - bucket.push(call); - buckets.set(idx, bucket); + }, [ + run.displayMessages?.length ?? run.messages.length, + optimisticUser?.id, + optimisticUser?.text, + streamingAssistant?.runId, + streamingAssistant?.turn, + streamingAssistant?.text, + ]); + + useEffect(() => { + if (run.status === "error" && run.error) { + console.error("[build-bot] run error (state)", run.error); } - return buckets; - }, [run.toolInserts, toolCalls]); + }, [run.status, run.error]); + + useEffect(() => { + if (chatSending || run.status === "running") return; + composerInputRef.current?.focus(); + }, [chatSending, run.status]); const canStartAssistant = run.status !== "running" && !chatSending && - run.messages.length === 0 && !streamingAssistant?.text; + run.messages.length === 0; const handleSendChat = useCallback(async () => { const message = chatDraft.trim(); @@ -72,38 +604,13 @@ export default function Chat() { await sendMessage(""); }, [chatDraft, handleSendChat, sendMessage]); - const renderToolBucket = useCallback( - (index: number, rows: React.ReactNode[]) => { - const bucket = toolBuckets.get(index); - if (!bucket || bucket.length === 0) return; - const isOpen = Boolean(toolCallsOpen[index]); - rows.push( -
- - {isOpen && ( -
- {bucket.map((call, callIdx) => ( - - ))} -
- )} -
, - ); - }, - [toolBuckets, toolCallsOpen, setToolCallsOpen], - ); + const handleStopChat = useCallback(async () => { + try { + await stopChat(); + } catch (err) { + setChatError(err instanceof Error ? err.message : String(err)); + } + }, [setChatError, stopChat]); return (
@@ -113,51 +620,41 @@ export default function Chat() { Use this chat to update deck files via Gambit Bot. Tool calls show file writes and why they happened.
- {run.messages.length === 0 && ( -
No messages yet.
- )} - {(() => { - const rows: React.ReactNode[] = []; - renderToolBucket(0, rows); - run.messages.forEach((m, idx) => { - rows.push( -
-
- {m.content} -
-
, - ); - renderToolBucket(idx + 1, rows); - }); - return rows; - })()} + {(run.displayMessages?.length ?? 0) === 0 && + !optimisticUser && + !(streamingAssistant?.runId === run.id && + streamingAssistant.text.length > 0) && +
No messages yet.
} + {optimisticUser && ( -
+
- {optimisticUser.text} +
)} - {streamingAssistant?.text && - streamingAssistant.runId === run.id && ( -
-
- {streamingAssistant.text} + {streamingAssistant && + streamingAssistant.runId === run.id && + streamingAssistant.text.length > 0 && ( +
+
+
)} @@ -165,6 +662,9 @@ export default function Chat() {
+
+ +
{canStartAssistant && (
Start the assistant to begin editing. @@ -172,6 +672,7 @@ export default function Chat() { )}