diff --git a/CLAUDE.md b/CLAUDE.md index 038bf70..65c9f0d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -11,7 +11,7 @@ Consumer Loop └─> agent.nextAction() ├─ Filter tools by validWhen predicates ├─ Generate discriminated union JSON Schema from valid tools - ├─ Assemble context (instructions, state, history, tool descriptions) + ├─ Assemble context (instructions, history, tool descriptions) ├─ Enforce per-section token budgets ├─ Translate to provider format (OpenAI or Anthropic) ├─ Single LLM call with constrained structured output @@ -27,7 +27,8 @@ Consumer Loop - **History formatted as provider-native tool-calling messages** (tool_use/tool_result for Anthropic, tool_calls/tool for OpenAI). This exploits model training on tool-calling patterns. - **Anthropic provider** is a raw fetch adapter (no SDK). Uses `output_config.format` for structured output, implements its own retry with exponential backoff. - **OpenAI provider** uses the official OpenAI SDK. Handles OpenAI, vLLM, and OpenRouter via `baseUrl`. -- **Token budgeting** rejects (throws `BudgetExceededError`) if any section exceeds its budget. No silent truncation. +- **State is not sent to the model** — state is only passed to `instructions(state)` and `validWhen(state)`. The consumer controls what the model sees through the instructions function. +- **Token budgeting** rejects (throws `BudgetExceededError`) if any section (instructions, history, tools) exceeds its budget. No silent truncation. - **OAuth** extracted from pi-ai (MIT). Supports Anthropic and OpenAI device code flows. Tokens stored at `~/.determinate/` with 0o600 permissions. ## Project Structure diff --git a/scripts/e2e-live.ts b/scripts/e2e-live.ts index 94a509c..639294d 100644 --- a/scripts/e2e-live.ts +++ b/scripts/e2e-live.ts @@ -102,7 +102,7 @@ await runTest("warmup request", async () => { ], instructions: () => "Respond with a ping.", context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); agent.setState({ x: 1 }); @@ -131,7 +131,7 @@ await runTest("basic single-tool decision", async () => { instructions: (s) => `You are a smart home controller. The current temperature is ${s.temperature}°${s.unit === "celsius" ? "C" : "F"}. It's too cold. Set the thermostat to a comfortable temperature (around 22°C or 72°F).`, context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); @@ -194,7 +194,7 @@ await runTest("multi-tool choice based on state", async () => { instructions: (s) => `You are a productivity assistant. The user wants to perform a "${s.taskType}" task. Their request: "${s.message}". Choose the appropriate action and fill in reasonable parameters.`, context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); @@ -245,7 +245,7 @@ await runTest("validWhen filters tools correctly", async () => { instructions: () => "You are a customer support agent. The customer is asking for help. Provide basic support.", context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); @@ -289,7 +289,7 @@ await runTest("history context influences decision", async () => { instructions: (s) => `You are a form-filling assistant. You need to collect: name, email, phone. So far you have collected: [${s.data.collected.join(", ")}]. Collect the next missing field.`, context: { - budgets: { instructions: 5000, state: 2000, history: 10000, tools: 2000 }, + budgets: { instructions: 5000, history: 10000, tools: 2000 }, }, }); @@ -333,7 +333,7 @@ await runTest("verbose mode returns assembled context", async () => { ], instructions: () => "Increment the counter by 1.", context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); @@ -377,7 +377,7 @@ await runTest("timeout aborts long requests", async () => { ], instructions: () => "Do nothing.", context: { - budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 }, + budgets: { instructions: 5000, history: 5000, tools: 2000 }, }, }); diff --git a/src/context/assembler.ts b/src/context/assembler.ts index 0bba999..d378817 100644 --- a/src/context/assembler.ts +++ b/src/context/assembler.ts @@ -46,10 +46,7 @@ export function assembleContext(input: AssembleInput): Assembled // 4. Build tool descriptions const toolDescriptions = validTools.map((t) => `- ${t.name}: ${t.description}`).join("\n"); - // 5. Serialize state - const stateText = JSON.stringify(state, null, 2); - - // 6. Build history messages (needed for accurate budget counting) + // 5. Build history messages (needed for accurate budget counting) const historyMessages: unknown[] = []; for (const entry of history) { const callId = randomUUID(); @@ -89,12 +86,11 @@ export function assembleContext(input: AssembleInput): Assembled } } - // 7. Enforce budgets + // 6. Enforce budgets const historyText = historyMessages.length > 0 ? JSON.stringify(historyMessages) : ""; enforceBudgets( { instructions: fullInstructions, - state: stateText, history: historyText, tools: toolDescriptions, }, @@ -102,7 +98,7 @@ export function assembleContext(input: AssembleInput): Assembled tokenizer, ); - // 8. Build final messages + // 7. Build final messages const messages: unknown[] = []; messages.push({ role: "system", @@ -111,7 +107,7 @@ export function assembleContext(input: AssembleInput): Assembled messages.push(...historyMessages); messages.push({ role: "user", - content: `Current state:\n${stateText}\n\nChoose the next action.`, + content: "Choose the next action.", }); return { messages, outputSchema, validTools: validTools.map((t) => t.name) }; diff --git a/src/context/budget.ts b/src/context/budget.ts index 73b908b..cc4bd7b 100644 --- a/src/context/budget.ts +++ b/src/context/budget.ts @@ -4,7 +4,6 @@ import type { Tokenizer } from "./tokenizer"; export interface SectionContents { instructions: string; - state: string; history: string; tools: string; } @@ -16,7 +15,7 @@ export function enforceBudgets( ): Record { const counts: Record = {}; - for (const key of ["instructions", "state", "history", "tools"] as const) { + for (const key of ["instructions", "history", "tools"] as const) { const count = tokenizer.count(sections[key]); counts[key] = count; if (count > budgets[key]) { diff --git a/src/types.ts b/src/types.ts index d23271b..cf3fba7 100644 --- a/src/types.ts +++ b/src/types.ts @@ -13,7 +13,6 @@ export interface ProviderConfig { export interface TokenBudgets { instructions: number; - state: number; history: number; tools: number; } diff --git a/tests/agent.test.ts b/tests/agent.test.ts index 2106815..0827ead 100644 --- a/tests/agent.test.ts +++ b/tests/agent.test.ts @@ -27,7 +27,6 @@ const baseConfig = { context: { budgets: { instructions: 5000, - state: 5000, history: 5000, tools: 5000, }, diff --git a/tests/context/assembler.test.ts b/tests/context/assembler.test.ts index 80cfb9e..4297f55 100644 --- a/tests/context/assembler.test.ts +++ b/tests/context/assembler.test.ts @@ -59,7 +59,6 @@ const tools: ToolDefinition[] = [ const bigBudgets: TokenBudgets = { instructions: 10000, - state: 10000, history: 10000, tools: 10000, }; @@ -92,7 +91,7 @@ describe("context assembler", () => { expect(systemMsg.content as string).toContain("0.3"); }); - it("includes state in user message", () => { + it("does not include raw state in user message", () => { const result = assembleContext({ state: { status: "pending", score: 0.3 }, tools, @@ -103,8 +102,8 @@ describe("context assembler", () => { providerType: "openai", }); const userMsg = findMessage(result.messages, (m) => m.role === "user"); - expect(userMsg.content as string).toContain("pending"); - expect(userMsg.content as string).toContain("0.3"); + expect(userMsg.content as string).not.toContain("pending"); + expect(userMsg.content as string).not.toContain("0.3"); }); it("throws NoValidToolsError when no tools match", () => { diff --git a/tests/context/budget.test.ts b/tests/context/budget.test.ts index d7ccd2a..44d2367 100644 --- a/tests/context/budget.test.ts +++ b/tests/context/budget.test.ts @@ -14,7 +14,6 @@ const mockTokenizer: Tokenizer = { describe("budget enforcement", () => { const budgets: TokenBudgets = { instructions: 100, - state: 50, history: 30, tools: 40, }; @@ -22,7 +21,6 @@ describe("budget enforcement", () => { it("passes when all sections are within budget", () => { const sections = { instructions: "a".repeat(50), - state: "b".repeat(30), history: "c".repeat(20), tools: "d".repeat(25), }; @@ -32,7 +30,6 @@ describe("budget enforcement", () => { it("throws BudgetExceededError when instructions exceed budget", () => { const sections = { instructions: "a".repeat(150), - state: "b".repeat(10), history: "c".repeat(10), tools: "d".repeat(10), }; @@ -50,7 +47,6 @@ describe("budget enforcement", () => { it("throws for the first section that exceeds budget", () => { const sections = { instructions: "a".repeat(200), - state: "b".repeat(200), history: "c".repeat(10), tools: "d".repeat(10), }; @@ -66,13 +62,11 @@ describe("budget enforcement", () => { it("returns token counts for all sections", () => { const sections = { instructions: "a".repeat(50), - state: "b".repeat(30), history: "c".repeat(20), tools: "d".repeat(25), }; const counts = enforceBudgets(sections, budgets, mockTokenizer); expect(counts.instructions).toBe(50); - expect(counts.state).toBe(30); expect(counts.history).toBe(20); expect(counts.tools).toBe(25); }); diff --git a/tests/integration/e2e.test.ts b/tests/integration/e2e.test.ts index bcf50d8..6393840 100644 --- a/tests/integration/e2e.test.ts +++ b/tests/integration/e2e.test.ts @@ -79,7 +79,6 @@ describe("end-to-end with mock server", () => { context: { budgets: { instructions: 5000, - state: 5000, history: 5000, tools: 5000, }, @@ -129,7 +128,7 @@ describe("end-to-end with mock server", () => { ], instructions: () => "test", context: { - budgets: { instructions: 5000, state: 5000, history: 5000, tools: 5000 }, + budgets: { instructions: 5000, history: 5000, tools: 5000 }, }, });