From bc99375dce8161d8b19136069b430ac3c20017d1 Mon Sep 17 00:00:00 2001 From: Craig Haseler Date: Wed, 11 Mar 2026 18:55:35 -0400 Subject: [PATCH] feat!: stop sending raw state to the model State is now only used as input to instructions(state) and validWhen(state). The consumer controls what the model sees entirely through the instructions function. BREAKING CHANGE: TokenBudgets no longer has a "state" field. Remove "state" from your budgets config. --- CLAUDE.md | 5 +++-- scripts/e2e-live.ts | 14 +++++++------- src/context/assembler.ts | 12 ++++-------- src/context/budget.ts | 3 +-- src/types.ts | 1 - tests/agent.test.ts | 1 - tests/context/assembler.test.ts | 7 +++---- tests/context/budget.test.ts | 6 ------ tests/integration/e2e.test.ts | 3 +-- 9 files changed, 19 insertions(+), 33 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 038bf70..65c9f0d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -11,7 +11,7 @@ Consumer Loop └─> agent.nextAction() ├─ Filter tools by validWhen predicates ├─ Generate discriminated union JSON Schema from valid tools - ├─ Assemble context (instructions, state, history, tool descriptions) + ├─ Assemble context (instructions, history, tool descriptions) ├─ Enforce per-section token budgets ├─ Translate to provider format (OpenAI or Anthropic) ├─ Single LLM call with constrained structured output @@ -27,7 +27,8 @@ Consumer Loop - **History formatted as provider-native tool-calling messages** (tool_use/tool_result for Anthropic, tool_calls/tool for OpenAI). This exploits model training on tool-calling patterns. - **Anthropic provider** is a raw fetch adapter (no SDK). Uses `output_config.format` for structured output, implements its own retry with exponential backoff. - **OpenAI provider** uses the official OpenAI SDK. Handles OpenAI, vLLM, and OpenRouter via `baseUrl`. -- **Token budgeting** rejects (throws `BudgetExceededError`) if any section exceeds its budget. No silent truncation. +- **State is not sent to the model** — state is only passed to `instructions(state)` and `validWhen(state)`. The consumer controls what the model sees through the instructions function. +- **Token budgeting** rejects (throws `BudgetExceededError`) if any section (instructions, history, tools) exceeds its budget. No silent truncation. - **OAuth** extracted from pi-ai (MIT). Supports Anthropic and OpenAI device code flows. Tokens stored at `~/.determinate/` with 0o600 permissions. ## Project Structure diff --git a/scripts/e2e-live.ts b/scripts/e2e-live.ts index 94a509c..639294d 100644 --- a/scripts/e2e-live.ts +++ b/scripts/e2e-live.ts @@ -102,7 +102,7 @@ await runTest("warmup request", async () => { ], instructions: () => "Respond with a ping.", context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); agent.setState({ x: 1 }); @@ -131,7 +131,7 @@ await runTest("basic single-tool decision", async () => { instructions: (s) => `You are a smart home controller. The current temperature is ${s.temperature}°${s.unit === "celsius" ? "C" : "F"}. It's too cold. Set the thermostat to a comfortable temperature (around 22°C or 72°F).`, context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); @@ -194,7 +194,7 @@ await runTest("multi-tool choice based on state", async () => { instructions: (s) => `You are a productivity assistant. The user wants to perform a "${s.taskType}" task. Their request: "${s.message}". Choose the appropriate action and fill in reasonable parameters.`, context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); @@ -245,7 +245,7 @@ await runTest("validWhen filters tools correctly", async () => { instructions: () => "You are a customer support agent. The customer is asking for help. Provide basic support.", context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); @@ -289,7 +289,7 @@ await runTest("history context influences decision", async () => { instructions: (s) => `You are a form-filling assistant. You need to collect: name, email, phone. So far you have collected: [${s.data.collected.join(", ")}]. Collect the next missing field.`, context: { - budgets: { instructions: 5000, state: 2000, history: 10000, tools: 2000 }, + budgets: { instructions: 5000, history: 10000, tools: 2000 }, }, }); @@ -333,7 +333,7 @@ await runTest("verbose mode returns assembled context", async () => { ], instructions: () => "Increment the counter by 1.", context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); @@ -377,7 +377,7 @@ await runTest("timeout aborts long requests", async () => { ], instructions: () => "Do nothing.", context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); diff --git a/src/context/assembler.ts b/src/context/assembler.ts index 0bba999..d378817 100644 --- a/src/context/assembler.ts +++ b/src/context/assembler.ts @@ -46,10 +46,7 @@ export function assembleContext(input: AssembleInput): Assembled // 4. Build tool descriptions const toolDescriptions = validTools.map((t) => `- ${t.name}: ${t.description}`).join("\n"); - // 5. Serialize state - const stateText = JSON.stringify(state, null, 2); - - // 6. Build history messages (needed for accurate budget counting) + // 5. Build history messages (needed for accurate budget counting) const historyMessages: unknown[] = []; for (const entry of history) { const callId = randomUUID(); @@ -89,12 +86,11 @@ export function assembleContext(input: AssembleInput): Assembled } } - // 7. Enforce budgets + // 6. Enforce budgets const historyText = historyMessages.length > 0 ? JSON.stringify(historyMessages) : ""; enforceBudgets( { instructions: fullInstructions, - state: stateText, history: historyText, tools: toolDescriptions, }, @@ -102,7 +98,7 @@ export function assembleContext(input: AssembleInput): Assembled tokenizer, ); - // 8. Build final messages + // 7. Build final messages const messages: unknown[] = []; messages.push({ role: "system", @@ -111,7 +107,7 @@ export function assembleContext(input: AssembleInput): Assembled messages.push(...historyMessages); messages.push({ role: "user", - content: `Current state:\n${stateText}\n\nChoose the next action.`, + content: "Choose the next action.", }); return { messages, outputSchema, validTools: validTools.map((t) => t.name) }; diff --git a/src/context/budget.ts b/src/context/budget.ts index 73b908b..cc4bd7b 100644 --- a/src/context/budget.ts +++ b/src/context/budget.ts @@ -4,7 +4,6 @@ import type { Tokenizer } from "./tokenizer"; export interface SectionContents { instructions: string; - state: string; history: string; tools: string; } @@ -16,7 +15,7 @@ export function enforceBudgets( ): Record { const counts: Record = {}; - for (const key of ["instructions", "state", "history", "tools"] as const) { + for (const key of ["instructions", "history", "tools"] as const) { const count = tokenizer.count(sections[key]); counts[key] = count; if (count > budgets[key]) { diff --git a/src/types.ts b/src/types.ts index d23271b..cf3fba7 100644 --- a/src/types.ts +++ b/src/types.ts @@ -13,7 +13,6 @@ export interface ProviderConfig { export interface TokenBudgets { instructions: number; - state: number; history: number; tools: number; } diff --git a/tests/agent.test.ts b/tests/agent.test.ts index 2106815..0827ead 100644 --- a/tests/agent.test.ts +++ b/tests/agent.test.ts @@ -27,7 +27,6 @@ const baseConfig = { context: { budgets: { instructions: 5000, - state: 5000, history: 5000, tools: 5000, }, diff --git a/tests/context/assembler.test.ts b/tests/context/assembler.test.ts index 80cfb9e..4297f55 100644 --- a/tests/context/assembler.test.ts +++ b/tests/context/assembler.test.ts @@ -59,7 +59,6 @@ const tools: ToolDefinition[] = [ const bigBudgets: TokenBudgets = { instructions: 10000, - state: 10000, history: 10000, tools: 10000, }; @@ -92,7 +91,7 @@ describe("context assembler", () => { expect(systemMsg.content as string).toContain("0.3"); }); - it("includes state in user message", () => { + it("does not include raw state in user message", () => { const result = assembleContext({ state: { status: "pending", score: 0.3 }, tools, @@ -103,8 +102,8 @@ describe("context assembler", () => { providerType: "openai", }); const userMsg = findMessage(result.messages, (m) => m.role === "user"); - expect(userMsg.content as string).toContain("pending"); - expect(userMsg.content as string).toContain("0.3"); + expect(userMsg.content as string).not.toContain("pending"); + expect(userMsg.content as string).not.toContain("0.3"); }); it("throws NoValidToolsError when no tools match", () => { diff --git a/tests/context/budget.test.ts b/tests/context/budget.test.ts index d7ccd2a..44d2367 100644 --- a/tests/context/budget.test.ts +++ b/tests/context/budget.test.ts @@ -14,7 +14,6 @@ const mockTokenizer: Tokenizer = { describe("budget enforcement", () => { const budgets: TokenBudgets = { instructions: 100, - state: 50, history: 30, tools: 40, }; @@ -22,7 +21,6 @@ describe("budget enforcement", () => { it("passes when all sections are within budget", () => { const sections = { instructions: "a".repeat(50), - state: "b".repeat(30), history: "c".repeat(20), tools: "d".repeat(25), }; @@ -32,7 +30,6 @@ describe("budget enforcement", () => { it("throws BudgetExceededError when instructions exceed budget", () => { const sections = { instructions: "a".repeat(150), - state: "b".repeat(10), history: "c".repeat(10), tools: "d".repeat(10), }; @@ -50,7 +47,6 @@ describe("budget enforcement", () => { it("throws for the first section that exceeds budget", () => { const sections = { instructions: "a".repeat(200), - state: "b".repeat(200), history: "c".repeat(10), tools: "d".repeat(10), }; @@ -66,13 +62,11 @@ describe("budget enforcement", () => { it("returns token counts for all sections", () => { const sections = { instructions: "a".repeat(50), - state: "b".repeat(30), history: "c".repeat(20), tools: "d".repeat(25), }; const counts = enforceBudgets(sections, budgets, mockTokenizer); expect(counts.instructions).toBe(50); - expect(counts.state).toBe(30); expect(counts.history).toBe(20); expect(counts.tools).toBe(25); }); diff --git a/tests/integration/e2e.test.ts b/tests/integration/e2e.test.ts index bcf50d8..6393840 100644 --- a/tests/integration/e2e.test.ts +++ b/tests/integration/e2e.test.ts @@ -79,7 +79,6 @@ describe("end-to-end with mock server", () => { context: { budgets: { instructions: 5000, - state: 5000, history: 5000, tools: 5000, }, @@ -129,7 +128,7 @@ describe("end-to-end with mock server", () => { ], instructions: () => "test", context: { - budgets: { instructions: 5000, state: 5000, history: 5000, tools: 5000 }, + budgets: { instructions: 5000, history: 5000, tools: 5000 }, }, });