cahaseler · cahaseler · Mar 11, 2026 · Mar 11, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -11,7 +11,7 @@ Consumer Loop
   └─> agent.nextAction()
         ├─ Filter tools by validWhen predicates
         ├─ Generate discriminated union JSON Schema from valid tools
-        ├─ Assemble context (instructions, state, history, tool descriptions)
+        ├─ Assemble context (instructions, history, tool descriptions)
         ├─ Enforce per-section token budgets
         ├─ Translate to provider format (OpenAI or Anthropic)
         ├─ Single LLM call with constrained structured output
@@ -27,7 +27,8 @@ Consumer Loop
 - **History formatted as provider-native tool-calling messages** (tool_use/tool_result for Anthropic, tool_calls/tool for OpenAI). This exploits model training on tool-calling patterns.
 - **Anthropic provider** is a raw fetch adapter (no SDK). Uses `output_config.format` for structured output, implements its own retry with exponential backoff.
 - **OpenAI provider** uses the official OpenAI SDK. Handles OpenAI, vLLM, and OpenRouter via `baseUrl`.
-- **Token budgeting** rejects (throws `BudgetExceededError`) if any section exceeds its budget. No silent truncation.
+- **State is not sent to the model** — state is only passed to `instructions(state)` and `validWhen(state)`. The consumer controls what the model sees through the instructions function.
+- **Token budgeting** rejects (throws `BudgetExceededError`) if any section (instructions, history, tools) exceeds its budget. No silent truncation.
 - **OAuth** extracted from pi-ai (MIT). Supports Anthropic and OpenAI device code flows. Tokens stored at `~/.determinate/` with 0o600 permissions.
 
 ## Project Structure

diff --git a/scripts/e2e-live.ts b/scripts/e2e-live.ts
@@ -102,7 +102,7 @@ await runTest("warmup request", async () => {
 		],
 		instructions: () => "Respond with a ping.",
 		context: {
-			budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 },
+			budgets: { instructions: 5000, history: 5000, tools: 2000 },
 		},
 	});
 	agent.setState({ x: 1 });
@@ -131,7 +131,7 @@ await runTest("basic single-tool decision", async () => {
 		instructions: (s) =>
 			`You are a smart home controller. The current temperature is ${s.temperature}°${s.unit === "celsius" ? "C" : "F"}. It's too cold. Set the thermostat to a comfortable temperature (around 22°C or 72°F).`,
 		context: {
-			budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 },
+			budgets: { instructions: 5000, history: 5000, tools: 2000 },
 		},
 	});
 
@@ -194,7 +194,7 @@ await runTest("multi-tool choice based on state", async () => {
 		instructions: (s) =>
 			`You are a productivity assistant. The user wants to perform a "${s.taskType}" task. Their request: "${s.message}". Choose the appropriate action and fill in reasonable parameters.`,
 		context: {
-			budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 },
+			budgets: { instructions: 5000, history: 5000, tools: 2000 },
 		},
 	});
 
@@ -245,7 +245,7 @@ await runTest("validWhen filters tools correctly", async () => {
 		instructions: () =>
 			"You are a customer support agent. The customer is asking for help. Provide basic support.",
 		context: {
-			budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 },
+			budgets: { instructions: 5000, history: 5000, tools: 2000 },
 		},
 	});
 
@@ -289,7 +289,7 @@ await runTest("history context influences decision", async () => {
 		instructions: (s) =>
 			`You are a form-filling assistant. You need to collect: name, email, phone. So far you have collected: [${s.data.collected.join(", ")}]. Collect the next missing field.`,
 		context: {
-			budgets: { instructions: 5000, state: 2000, history: 10000, tools: 2000 },
+			budgets: { instructions: 5000, history: 10000, tools: 2000 },
 		},
 	});
 
@@ -333,7 +333,7 @@ await runTest("verbose mode returns assembled context", async () => {
 		],
 		instructions: () => "Increment the counter by 1.",
 		context: {
-			budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 },
+			budgets: { instructions: 5000, history: 5000, tools: 2000 },
 		},
 	});
 
@@ -377,7 +377,7 @@ await runTest("timeout aborts long requests", async () => {
 		],
 		instructions: () => "Do nothing.",
 		context: {
-			budgets: { instructions: 5000, state: 2000, history: 5000, tools: 2000 },
+			budgets: { instructions: 5000, history: 5000, tools: 2000 },
 		},
 	});
 

diff --git a/src/context/assembler.ts b/src/context/assembler.ts
@@ -46,10 +46,7 @@ export function assembleContext<TState>(input: AssembleInput<TState>): Assembled
 	// 4. Build tool descriptions
 	const toolDescriptions = validTools.map((t) => `- ${t.name}: ${t.description}`).join("\n");
 
-	// 5. Serialize state
-	const stateText = JSON.stringify(state, null, 2);
-
-	// 6. Build history messages (needed for accurate budget counting)
+	// 5. Build history messages (needed for accurate budget counting)
 	const historyMessages: unknown[] = [];
 	for (const entry of history) {
 		const callId = randomUUID();
@@ -89,20 +86,19 @@ export function assembleContext<TState>(input: AssembleInput<TState>): Assembled
 		}
 	}
 
-	// 7. Enforce budgets
+	// 6. Enforce budgets
 	const historyText = historyMessages.length > 0 ? JSON.stringify(historyMessages) : "";
 	enforceBudgets(
 		{
 			instructions: fullInstructions,
-			state: stateText,
 			history: historyText,
 			tools: toolDescriptions,
 		},
 		budgets,
 		tokenizer,
 	);
 
-	// 8. Build final messages
+	// 7. Build final messages
 	const messages: unknown[] = [];
 	messages.push({
 		role: "system",
@@ -111,7 +107,7 @@ export function assembleContext<TState>(input: AssembleInput<TState>): Assembled
 	messages.push(...historyMessages);
 	messages.push({
 		role: "user",
-		content: `Current state:\n${stateText}\n\nChoose the next action.`,
+		content: "Choose the next action.",
 	});
 
 	return { messages, outputSchema, validTools: validTools.map((t) => t.name) };

diff --git a/src/context/budget.ts b/src/context/budget.ts
@@ -4,7 +4,6 @@ import type { Tokenizer } from "./tokenizer";
 
 export interface SectionContents {
 	instructions: string;
-	state: string;
 	history: string;
 	tools: string;
 }
@@ -16,7 +15,7 @@ export function enforceBudgets(
 ): Record<keyof TokenBudgets, number> {
 	const counts: Record<string, number> = {};
 
-	for (const key of ["instructions", "state", "history", "tools"] as const) {
+	for (const key of ["instructions", "history", "tools"] as const) {
 		const count = tokenizer.count(sections[key]);
 		counts[key] = count;
 		if (count > budgets[key]) {

diff --git a/src/types.ts b/src/types.ts
@@ -13,7 +13,6 @@ export interface ProviderConfig {
 
 export interface TokenBudgets {
 	instructions: number;
-	state: number;
 	history: number;
 	tools: number;
 }

diff --git a/tests/agent.test.ts b/tests/agent.test.ts
@@ -27,7 +27,6 @@ const baseConfig = {
 	context: {
 		budgets: {
 			instructions: 5000,
-			state: 5000,
 			history: 5000,
 			tools: 5000,
 		},

diff --git a/tests/context/assembler.test.ts b/tests/context/assembler.test.ts
@@ -59,7 +59,6 @@ const tools: ToolDefinition<TestState>[] = [
 
 const bigBudgets: TokenBudgets = {
 	instructions: 10000,
-	state: 10000,
 	history: 10000,
 	tools: 10000,
 };
@@ -92,7 +91,7 @@ describe("context assembler", () => {
 		expect(systemMsg.content as string).toContain("0.3");
 	});
 
-	it("includes state in user message", () => {
+	it("does not include raw state in user message", () => {
 		const result = assembleContext({
 			state: { status: "pending", score: 0.3 },
 			tools,
@@ -103,8 +102,8 @@ describe("context assembler", () => {
 			providerType: "openai",
 		});
 		const userMsg = findMessage(result.messages, (m) => m.role === "user");
-		expect(userMsg.content as string).toContain("pending");
-		expect(userMsg.content as string).toContain("0.3");
+		expect(userMsg.content as string).not.toContain("pending");
+		expect(userMsg.content as string).not.toContain("0.3");
 	});
 
 	it("throws NoValidToolsError when no tools match", () => {

diff --git a/tests/context/budget.test.ts b/tests/context/budget.test.ts
@@ -14,15 +14,13 @@ const mockTokenizer: Tokenizer = {
 describe("budget enforcement", () => {
 	const budgets: TokenBudgets = {
 		instructions: 100,
-		state: 50,
 		history: 30,
 		tools: 40,
 	};
 
 	it("passes when all sections are within budget", () => {
 		const sections = {
 			instructions: "a".repeat(50),
-			state: "b".repeat(30),
 			history: "c".repeat(20),
 			tools: "d".repeat(25),
 		};
@@ -32,7 +30,6 @@ describe("budget enforcement", () => {
 	it("throws BudgetExceededError when instructions exceed budget", () => {
 		const sections = {
 			instructions: "a".repeat(150),
-			state: "b".repeat(10),
 			history: "c".repeat(10),
 			tools: "d".repeat(10),
 		};
@@ -50,7 +47,6 @@ describe("budget enforcement", () => {
 	it("throws for the first section that exceeds budget", () => {
 		const sections = {
 			instructions: "a".repeat(200),
-			state: "b".repeat(200),
 			history: "c".repeat(10),
 			tools: "d".repeat(10),
 		};
@@ -66,13 +62,11 @@ describe("budget enforcement", () => {
 	it("returns token counts for all sections", () => {
 		const sections = {
 			instructions: "a".repeat(50),
-			state: "b".repeat(30),
 			history: "c".repeat(20),
 			tools: "d".repeat(25),
 		};
 		const counts = enforceBudgets(sections, budgets, mockTokenizer);
 		expect(counts.instructions).toBe(50);
-		expect(counts.state).toBe(30);
 		expect(counts.history).toBe(20);
 		expect(counts.tools).toBe(25);
 	});

diff --git a/tests/integration/e2e.test.ts b/tests/integration/e2e.test.ts
@@ -79,7 +79,6 @@ describe("end-to-end with mock server", () => {
 			context: {
 				budgets: {
 					instructions: 5000,
-					state: 5000,
 					history: 5000,
 					tools: 5000,
 				},
@@ -129,7 +128,7 @@ describe("end-to-end with mock server", () => {
 			],
 			instructions: () => "test",
 			context: {
-				budgets: { instructions: 5000, state: 5000, history: 5000, tools: 5000 },
+				budgets: { instructions: 5000, history: 5000, tools: 5000 },
 			},
 		});