From a735713db305b1383393b223c4e6e530e038c41e Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 02:00:36 +0000 Subject: [PATCH 1/7] Add: Dynamic prompt cache breakpoints for Anthropic prefix caching Co-Authored-By: Claude Opus 4.6 --- e2e/perstack-cli/continue.test.ts | 15 ++ e2e/perstack-cli/providers.test.ts | 7 + packages/runtime/src/messages/message.test.ts | 227 ++++++++++++++++++ packages/runtime/src/messages/message.ts | 37 +++ .../states/generating-tool-call.ts | 4 +- 5 files changed, 289 insertions(+), 1 deletion(-) diff --git a/e2e/perstack-cli/continue.test.ts b/e2e/perstack-cli/continue.test.ts index e67e01e8..271ff045 100644 --- a/e2e/perstack-cli/continue.test.ts +++ b/e2e/perstack-cli/continue.test.ts @@ -72,6 +72,16 @@ describe.concurrent("Continue Job", () => { ).toBe(true) const completeEvents = filterEventsByType(continueResult.events, "completeRun") expect(completeEvents.length).toBe(1) + + // Verify usage tracking flows through multi-turn conversations. + // On turn 2, the conversation prefix from turn 1 is resent — with prompt caching + // enabled (applyCacheBreakpoints), cachedInputTokens should be populated. + // Actual cache hits depend on the model's minimum token threshold + // (e.g. 1024 for Sonnet, 4096 for Haiku 4.5). + const completeEvent = completeEvents[0] + const usage = (completeEvent as { usage?: Record }).usage + expect(usage).toBeDefined() + expect(typeof usage?.cachedInputTokens).toBe("number") }) // ───────────────────────────────────────────────────────────────────────── @@ -128,6 +138,11 @@ describe.concurrent("Continue Job", () => { expect(continueCompleteEvents.length).toBe(1) const lastCompleteEvent = continueCompleteEvents[continueCompleteEvents.length - 1] expect((lastCompleteEvent as { text?: string }).text).toBeDefined() + + // Verify usage includes cache metrics on continued run + const usage = (lastCompleteEvent as { usage?: Record }).usage + expect(usage).toBeDefined() + expect(typeof usage?.cachedInputTokens).toBe("number") }) // ───────────────────────────────────────────────────────────────────────── diff --git a/e2e/perstack-cli/providers.test.ts b/e2e/perstack-cli/providers.test.ts index 2256005d..c71a1201 100644 --- a/e2e/perstack-cli/providers.test.ts +++ b/e2e/perstack-cli/providers.test.ts @@ -47,6 +47,13 @@ describe.concurrent("LLM Providers", () => { // Note: text may be empty when using attemptCompletion tool (explicit completion) // The actual response is in the checkpoint messages, not in completeRun.text expect((completeEvent as { text?: string }).text).toBeDefined() + + // Verify usage tracking includes cache token metrics + const usage = (completeEvent as { usage?: Record }).usage + expect(usage).toBeDefined() + expect(typeof usage?.inputTokens).toBe("number") + expect(typeof usage?.outputTokens).toBe("number") + expect(typeof usage?.cachedInputTokens).toBe("number") }, LLM_TIMEOUT, ) diff --git a/packages/runtime/src/messages/message.test.ts b/packages/runtime/src/messages/message.test.ts index 14d6f6af..d8ab9100 100644 --- a/packages/runtime/src/messages/message.test.ts +++ b/packages/runtime/src/messages/message.test.ts @@ -1,6 +1,8 @@ import { describe, expect, it } from "bun:test" +import type { Message } from "@perstack/core" import { createInstructionMessage } from "./instruction-message.js" import { + applyCacheBreakpoints, createExpertMessage, createToolMessage, createUserMessage, @@ -593,3 +595,228 @@ describe("@perstack/messages: instruction-message", () => { }) }) }) + +describe("applyCacheBreakpoints", () => { + it("returns empty array for empty input", () => { + const result = applyCacheBreakpoints([]) + expect(result).toEqual([]) + }) + + it("returns copy of array when only one message exists", () => { + const messages: Message[] = [ + { + type: "instructionMessage", + id: "msg-1", + cache: true, + contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], + }, + ] + const result = applyCacheBreakpoints(messages) + expect(result).toEqual(messages) + expect(result).not.toBe(messages) + }) + + it("sets cache on last message when multiple messages exist", () => { + const messages: Message[] = [ + { + type: "instructionMessage", + id: "msg-1", + cache: true, + contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], + }, + { + type: "userMessage", + id: "msg-2", + contents: [{ id: "c-2", type: "textPart", text: "Hello" }], + }, + ] + const result = applyCacheBreakpoints(messages) + expect(result[0].cache).toBe(true) + expect(result[1].cache).toBe(true) + }) + + it("preserves instruction message cache flag", () => { + const messages: Message[] = [ + { + type: "instructionMessage", + id: "msg-1", + cache: true, + contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], + }, + { + type: "userMessage", + id: "msg-2", + contents: [{ id: "c-2", type: "textPart", text: "User input" }], + }, + { + type: "expertMessage", + id: "msg-3", + contents: [{ id: "c-3", type: "textPart", text: "Response" }], + }, + ] + const result = applyCacheBreakpoints(messages) + expect(result[0].cache).toBe(true) + expect(result[0]).toBe(messages[0]) + }) + + it("clears cache from middle messages", () => { + const messages: Message[] = [ + { + type: "instructionMessage", + id: "msg-1", + cache: true, + contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], + }, + { + type: "userMessage", + id: "msg-2", + cache: true, + contents: [{ id: "c-2", type: "textPart", text: "User input" }], + }, + { + type: "expertMessage", + id: "msg-3", + contents: [{ id: "c-3", type: "textPart", text: "Response" }], + }, + { + type: "toolMessage", + id: "msg-4", + contents: [ + { + id: "c-4", + type: "toolResultPart", + toolCallId: "tc-1", + toolName: "readFile", + contents: [{ id: "c-5", type: "textPart", text: "file contents" }], + isError: false, + }, + ], + }, + ] + const result = applyCacheBreakpoints(messages) + expect(result[0].cache).toBe(true) + expect(result[1].cache).toBeUndefined() + expect(result[2].cache).toBeUndefined() + expect(result[3].cache).toBe(true) + }) + + it("does not mutate the original messages array", () => { + const messages: Message[] = [ + { + type: "instructionMessage", + id: "msg-1", + cache: true, + contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], + }, + { + type: "userMessage", + id: "msg-2", + contents: [{ id: "c-2", type: "textPart", text: "Hello" }], + }, + ] + const original = JSON.parse(JSON.stringify(messages)) + applyCacheBreakpoints(messages) + expect(messages).toEqual(original) + }) + + it("avoids creating new object when last message already has cache true", () => { + const lastMsg: Message = { + type: "expertMessage", + id: "msg-2", + cache: true, + contents: [{ id: "c-2", type: "textPart", text: "Response" }], + } + const messages: Message[] = [ + { + type: "instructionMessage", + id: "msg-1", + cache: true, + contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], + }, + lastMsg, + ] + const result = applyCacheBreakpoints(messages) + expect(result[1]).toBe(lastMsg) + }) + + it("handles multi-turn conversation correctly", () => { + const messages: Message[] = [ + { + type: "instructionMessage", + id: "msg-1", + cache: true, + contents: [{ id: "c-1", type: "textPart", text: "System" }], + }, + { + type: "userMessage", + id: "msg-2", + contents: [{ id: "c-2", type: "textPart", text: "Input" }], + }, + { + type: "expertMessage", + id: "msg-3", + cache: true, + contents: [ + { + id: "c-3", + type: "toolCallPart", + toolCallId: "tc-1", + toolName: "search", + args: {}, + }, + ], + }, + { + type: "toolMessage", + id: "msg-4", + cache: true, + contents: [ + { + id: "c-4", + type: "toolResultPart", + toolCallId: "tc-1", + toolName: "search", + contents: [{ id: "c-5", type: "textPart", text: "results" }], + isError: false, + }, + ], + }, + { + type: "expertMessage", + id: "msg-5", + contents: [ + { + id: "c-6", + type: "toolCallPart", + toolCallId: "tc-2", + toolName: "write", + args: {}, + }, + ], + }, + { + type: "toolMessage", + id: "msg-6", + contents: [ + { + id: "c-7", + type: "toolResultPart", + toolCallId: "tc-2", + toolName: "write", + contents: [{ id: "c-8", type: "textPart", text: "done" }], + isError: false, + }, + ], + }, + ] + + const result = applyCacheBreakpoints(messages) + + expect(result[0].cache).toBe(true) + expect(result[1].cache).toBeUndefined() + expect(result[2].cache).toBeUndefined() + expect(result[3].cache).toBeUndefined() + expect(result[4].cache).toBeUndefined() + expect(result[5].cache).toBe(true) + }) +}) diff --git a/packages/runtime/src/messages/message.ts b/packages/runtime/src/messages/message.ts index 6984d84b..9c78df86 100644 --- a/packages/runtime/src/messages/message.ts +++ b/packages/runtime/src/messages/message.ts @@ -131,6 +131,43 @@ export function messageToCoreMessage(message: Message): ModelMessage { } } } + +/** + * Apply cache breakpoints to messages for optimal prompt caching. + * + * Anthropic supports up to 4 cache breakpoints. This function uses 2: + * - Breakpoint 1: Instruction message (preserves existing cache flag) + * - Breakpoint 2: Last message in conversation (dynamic, moves each turn) + * + * Cache flags on non-strategic positions are cleared to stay within limits. + * This is a pure function — it does not mutate the input array. + * + * For non-Anthropic providers, cache flags are harmless (messageToCoreMessage + * only translates them to Anthropic providerOptions). + */ +export function applyCacheBreakpoints(messages: ReadonlyArray): Message[] { + if (messages.length <= 1) { + return [...messages] + } + + const lastIndex = messages.length - 1 + + return messages.map((msg, index) => { + // Preserve instruction message cache (breakpoint 1) + if (msg.type === "instructionMessage") { + return msg + } + + // Set cache on the last message (breakpoint 2 — dynamic frontier) + if (index === lastIndex) { + return msg.cache === true ? msg : { ...msg, cache: true } + } + + // Clear cache from non-strategic positions + return msg.cache ? { ...msg, cache: undefined } : msg + }) +} + function instructionContentsToCoreContent( contents: InstructionMessage["contents"], ): SystemModelMessage["content"] { diff --git a/packages/runtime/src/state-machine/states/generating-tool-call.ts b/packages/runtime/src/state-machine/states/generating-tool-call.ts index 0bcdf011..30d44ac0 100644 --- a/packages/runtime/src/state-machine/states/generating-tool-call.ts +++ b/packages/runtime/src/state-machine/states/generating-tool-call.ts @@ -22,6 +22,7 @@ import { getToolSet } from "../../helpers/tool-set.js" import { createEmptyUsage, sumUsage, usageFromGenerateTextResult } from "../../helpers/usage.js" import type { StreamCallbacks } from "../../llm/types.js" import { + applyCacheBreakpoints, createExpertMessage, createToolMessage, createUserMessage, @@ -100,6 +101,7 @@ export async function generatingToolCallLogic({ llmExecutor, }: RunSnapshot["context"]): Promise { const { messages } = checkpoint + const cachedMessages = applyCacheBreakpoints(messages) // Track if reasoning was completed via callback (to avoid duplicate emissions) let reasoningCompletedViaCallback = false @@ -127,7 +129,7 @@ export async function generatingToolCallLogic({ const executionResult = await llmExecutor.streamText( { - messages: messages.map(messageToCoreMessage), + messages: cachedMessages.map(messageToCoreMessage), maxRetries: setting.maxRetries, tools: getToolSet(skillManager), toolChoice: "auto", From a626717f5cdb5cb96a78529cc724de46684b7833 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 02:02:57 +0000 Subject: [PATCH 2/7] Chore: Add changeset for prompt cache breakpoints Co-Authored-By: Claude Opus 4.6 --- .changeset/prompt-cache-breakpoints.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/prompt-cache-breakpoints.md diff --git a/.changeset/prompt-cache-breakpoints.md b/.changeset/prompt-cache-breakpoints.md new file mode 100644 index 00000000..22821c47 --- /dev/null +++ b/.changeset/prompt-cache-breakpoints.md @@ -0,0 +1,5 @@ +--- +"@perstack/runtime": patch +--- + +Add dynamic prompt cache breakpoints for Anthropic prefix caching From 324dbc982fdbab0dfa13d230fa2b171da9f8bd56 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 02:06:41 +0000 Subject: [PATCH 3/7] Chore: Bump perstack and create-expert in changeset Co-Authored-By: Claude Opus 4.6 --- .changeset/prompt-cache-breakpoints.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.changeset/prompt-cache-breakpoints.md b/.changeset/prompt-cache-breakpoints.md index 22821c47..84575ffe 100644 --- a/.changeset/prompt-cache-breakpoints.md +++ b/.changeset/prompt-cache-breakpoints.md @@ -1,5 +1,7 @@ --- "@perstack/runtime": patch +"perstack": patch +"create-expert": patch --- Add dynamic prompt cache breakpoints for Anthropic prefix caching From a7a670e84eeb700aa1bbd1d04dd878a1042e1abc Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 02:15:12 +0000 Subject: [PATCH 4/7] Add: Cache breakpoint on last tool definition for Anthropic prefix caching Co-Authored-By: Claude Opus 4.6 --- packages/runtime/src/helpers/tool-set.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/runtime/src/helpers/tool-set.ts b/packages/runtime/src/helpers/tool-set.ts index db27d368..f51372f4 100644 --- a/packages/runtime/src/helpers/tool-set.ts +++ b/packages/runtime/src/helpers/tool-set.ts @@ -2,11 +2,20 @@ import type { SkillManager } from "@perstack/skill-manager" import { jsonSchema, type ToolSet, tool } from "ai" export function getToolSet(skillManager: SkillManager): ToolSet { + const defs = skillManager.getToolDefinitions() + const lastIndex = defs.length - 1 const tools: ToolSet = {} - for (const def of skillManager.getToolDefinitions()) { + for (let i = 0; i < defs.length; i++) { + const def = defs[i] tools[def.name] = tool({ description: def.description, inputSchema: jsonSchema(def.inputSchema), + // Cache breakpoint on last tool for Anthropic prefix caching. + // Prefix order: tools → system → messages. Caching the last tool + // creates a stable breakpoint that survives system/message changes. + ...(i === lastIndex + ? { providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } } } + : {}), }) } return tools From 1f3c97f7775fd136e93009182c5def875c7335ed Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 02:26:39 +0000 Subject: [PATCH 5/7] Update: Use 20-block lookback window strategy for cache breakpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BP1 on system message covers tools+system (prefix order: tools→system→messages). BP2-4 distributed every ~20 content blocks across conversation messages, working backwards from the last message. Removes redundant tool-level BP. Co-Authored-By: Claude Opus 4.6 --- packages/runtime/src/helpers/tool-set.ts | 11 +- packages/runtime/src/messages/message.test.ts | 187 +++++++++++++++++- packages/runtime/src/messages/message.ts | 53 ++++- 3 files changed, 230 insertions(+), 21 deletions(-) diff --git a/packages/runtime/src/helpers/tool-set.ts b/packages/runtime/src/helpers/tool-set.ts index f51372f4..db27d368 100644 --- a/packages/runtime/src/helpers/tool-set.ts +++ b/packages/runtime/src/helpers/tool-set.ts @@ -2,20 +2,11 @@ import type { SkillManager } from "@perstack/skill-manager" import { jsonSchema, type ToolSet, tool } from "ai" export function getToolSet(skillManager: SkillManager): ToolSet { - const defs = skillManager.getToolDefinitions() - const lastIndex = defs.length - 1 const tools: ToolSet = {} - for (let i = 0; i < defs.length; i++) { - const def = defs[i] + for (const def of skillManager.getToolDefinitions()) { tools[def.name] = tool({ description: def.description, inputSchema: jsonSchema(def.inputSchema), - // Cache breakpoint on last tool for Anthropic prefix caching. - // Prefix order: tools → system → messages. Caching the last tool - // creates a stable breakpoint that survives system/message changes. - ...(i === lastIndex - ? { providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } } } - : {}), }) } return tools diff --git a/packages/runtime/src/messages/message.test.ts b/packages/runtime/src/messages/message.test.ts index d8ab9100..c27062af 100644 --- a/packages/runtime/src/messages/message.test.ts +++ b/packages/runtime/src/messages/message.test.ts @@ -739,7 +739,7 @@ describe("applyCacheBreakpoints", () => { expect(result[1]).toBe(lastMsg) }) - it("handles multi-turn conversation correctly", () => { + it("handles multi-turn conversation correctly (< 20 blocks, single BP)", () => { const messages: Message[] = [ { type: "instructionMessage", @@ -819,4 +819,189 @@ describe("applyCacheBreakpoints", () => { expect(result[4].cache).toBeUndefined() expect(result[5].cache).toBe(true) }) + + it("places second BP when content blocks exceed 20-block lookback window", () => { + // Build a conversation with > 20 content blocks: + // Each tool-use step = expertMessage(3 parts) + toolMessage(1 part) = 4 blocks + // 6 steps = 24 blocks + 1 userMessage = 25 blocks total + const messages: Message[] = [ + { + type: "instructionMessage", + id: "instr", + cache: true, + contents: [{ id: "c-instr", type: "textPart", text: "System" }], + }, + { + type: "userMessage", + id: "user-0", + contents: [{ id: "c-u0", type: "textPart", text: "Go" }], + }, + ] + for (let step = 0; step < 6; step++) { + messages.push({ + type: "expertMessage", + id: `expert-${step}`, + contents: [ + { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" }, + { id: `txt-${step}`, type: "textPart", text: `Step ${step}` }, + { + id: `tc-${step}`, + type: "toolCallPart", + toolCallId: `tc-${step}`, + toolName: "doWork", + args: {}, + }, + ], + }) + messages.push({ + type: "toolMessage", + id: `tool-${step}`, + contents: [ + { + id: `tr-${step}`, + type: "toolResultPart", + toolCallId: `tc-${step}`, + toolName: "doWork", + contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }], + isError: false, + }, + ], + }) + } + // Total non-instruction: 1 (user) + 6*(3+1) = 25 blocks, 13 messages + + const result = applyCacheBreakpoints(messages) + + // BP1: instruction (index 0) + expect(result[0].cache).toBe(true) + // Last message (index 13) should always have BP + expect(result[13].cache).toBe(true) + + // A second BP should exist somewhere in the middle + const middleBps = result.slice(1, 13).filter((m) => m.cache === true) + expect(middleBps.length).toBe(1) + + // All other non-BP messages should have cache cleared + const nonBpMessages = result.slice(1).filter((m) => m.cache !== true) + for (const msg of nonBpMessages) { + expect(msg.cache).toBeUndefined() + } + }) + + it("places up to 3 message BPs for very long conversations (> 40 blocks)", () => { + // 11 tool-use steps = 44 blocks + 1 user = 45 blocks total + const messages: Message[] = [ + { + type: "instructionMessage", + id: "instr", + cache: true, + contents: [{ id: "c-instr", type: "textPart", text: "System" }], + }, + { + type: "userMessage", + id: "user-0", + contents: [{ id: "c-u0", type: "textPart", text: "Go" }], + }, + ] + for (let step = 0; step < 11; step++) { + messages.push({ + type: "expertMessage", + id: `expert-${step}`, + contents: [ + { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" }, + { id: `txt-${step}`, type: "textPart", text: `Step ${step}` }, + { + id: `tc-${step}`, + type: "toolCallPart", + toolCallId: `tc-${step}`, + toolName: "doWork", + args: {}, + }, + ], + }) + messages.push({ + type: "toolMessage", + id: `tool-${step}`, + contents: [ + { + id: `tr-${step}`, + type: "toolResultPart", + toolCallId: `tc-${step}`, + toolName: "doWork", + contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }], + isError: false, + }, + ], + }) + } + // Total non-instruction: 1 + 11*4 = 45 blocks, 24 messages (indices 0..24) + + const result = applyCacheBreakpoints(messages) + + // BP1: instruction + expect(result[0].cache).toBe(true) + // Last message always has BP + expect(result[result.length - 1].cache).toBe(true) + + // Should have 3 message BPs total (max) + const messageBps = result.slice(1).filter((m) => m.cache === true) + expect(messageBps.length).toBe(3) + }) + + it("does not exceed 3 message BPs even for extremely long conversations", () => { + // 20 tool-use steps = 80 blocks + 1 user = 81 blocks total + const messages: Message[] = [ + { + type: "instructionMessage", + id: "instr", + cache: true, + contents: [{ id: "c-instr", type: "textPart", text: "System" }], + }, + { + type: "userMessage", + id: "user-0", + contents: [{ id: "c-u0", type: "textPart", text: "Go" }], + }, + ] + for (let step = 0; step < 20; step++) { + messages.push({ + type: "expertMessage", + id: `expert-${step}`, + contents: [ + { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" }, + { id: `txt-${step}`, type: "textPart", text: `Step ${step}` }, + { + id: `tc-${step}`, + type: "toolCallPart", + toolCallId: `tc-${step}`, + toolName: "doWork", + args: {}, + }, + ], + }) + messages.push({ + type: "toolMessage", + id: `tool-${step}`, + contents: [ + { + id: `tr-${step}`, + type: "toolResultPart", + toolCallId: `tc-${step}`, + toolName: "doWork", + contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }], + isError: false, + }, + ], + }) + } + + const result = applyCacheBreakpoints(messages) + + // Should still max out at 3 message BPs + const messageBps = result.slice(1).filter((m) => m.cache === true) + expect(messageBps.length).toBe(3) + + // Last message always has BP + expect(result[result.length - 1].cache).toBe(true) + }) }) diff --git a/packages/runtime/src/messages/message.ts b/packages/runtime/src/messages/message.ts index 9c78df86..332eea7c 100644 --- a/packages/runtime/src/messages/message.ts +++ b/packages/runtime/src/messages/message.ts @@ -133,13 +133,16 @@ export function messageToCoreMessage(message: Message): ModelMessage { } /** - * Apply cache breakpoints to messages for optimal prompt caching. + * Apply cache breakpoints to messages for Anthropic prompt caching. * - * Anthropic supports up to 4 cache breakpoints. This function uses 2: - * - Breakpoint 1: Instruction message (preserves existing cache flag) - * - Breakpoint 2: Last message in conversation (dynamic, moves each turn) + * Anthropic allows up to 4 explicit cache breakpoints per request and uses a + * 20-block lookback window from each breakpoint to find cache matches. + * + * Breakpoint strategy: + * - BP1: Instruction message (caches tools + system; prefix order: tools → system → messages) + * - BP2–BP4: Up to 3 breakpoints across conversation messages, placed every ~20 content + * blocks (working backwards from the last message) to ensure full lookback coverage. * - * Cache flags on non-strategic positions are cleared to stay within limits. * This is a pure function — it does not mutate the input array. * * For non-Anthropic providers, cache flags are harmless (messageToCoreMessage @@ -150,20 +153,50 @@ export function applyCacheBreakpoints(messages: ReadonlyArray): Message return [...messages] } - const lastIndex = messages.length - 1 + // BP2–BP4: up to 3 message breakpoints (BP1 is the instruction message) + const MAX_MESSAGE_BREAKPOINTS = 3 + // Anthropic checks up to 20 content blocks before each breakpoint + const LOOKBACK_WINDOW = 20 + + // Collect non-instruction messages with their content block counts + const msgMeta: Array<{ originalIndex: number; blockCount: number }> = [] + for (let i = 0; i < messages.length; i++) { + if (messages[i].type !== "instructionMessage") { + msgMeta.push({ originalIndex: i, blockCount: messages[i].contents.length }) + } + } + + // Determine which messages get cache breakpoints + const breakpointIndices = new Set() + + if (msgMeta.length > 0) { + // Always place a breakpoint on the last message (dynamic frontier) + breakpointIndices.add(msgMeta[msgMeta.length - 1].originalIndex) + let bpsPlaced = 1 + let blocksSinceBp = 0 + + // Walk backwards, placing a breakpoint every ~20 content blocks + for (let i = msgMeta.length - 2; i >= 0 && bpsPlaced < MAX_MESSAGE_BREAKPOINTS; i--) { + blocksSinceBp += msgMeta[i].blockCount + if (blocksSinceBp >= LOOKBACK_WINDOW) { + breakpointIndices.add(msgMeta[i].originalIndex) + bpsPlaced++ + blocksSinceBp = 0 + } + } + } return messages.map((msg, index) => { - // Preserve instruction message cache (breakpoint 1) + // Preserve instruction message cache (BP1: caches tools + system) if (msg.type === "instructionMessage") { return msg } - // Set cache on the last message (breakpoint 2 — dynamic frontier) - if (index === lastIndex) { + if (breakpointIndices.has(index)) { return msg.cache === true ? msg : { ...msg, cache: true } } - // Clear cache from non-strategic positions + // Clear stale cache flags from non-strategic positions return msg.cache ? { ...msg, cache: undefined } : msg }) } From d157fa8c3bdedf0e4cc5bf8d26980c98cb2edc3e Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 03:02:14 +0000 Subject: [PATCH 6/7] Update: Switch to automatic caching via @ai-sdk/anthropic 3.0.47 Replace explicit applyCacheBreakpoints() with Anthropic's request-level automatic caching (cache_control: {type: "ephemeral"}). Auto-places breakpoints on last system, tool, and message blocks optimally. Co-Authored-By: Claude Opus 4.6 --- .changeset/prompt-cache-breakpoints.md | 3 +- bun.lock | 61 +-- e2e/perstack-cli/continue.test.ts | 4 +- package.json | 5 +- .../providers/anthropic/src/adapter.test.ts | 15 +- packages/providers/anthropic/src/adapter.ts | 14 +- .../src/messages/instruction-message.ts | 1 - packages/runtime/src/messages/message.test.ts | 414 +----------------- packages/runtime/src/messages/message.ts | 69 --- .../states/generating-tool-call.ts | 4 +- .../src/state-machine/states/init.test.ts | 1 - 11 files changed, 67 insertions(+), 524 deletions(-) diff --git a/.changeset/prompt-cache-breakpoints.md b/.changeset/prompt-cache-breakpoints.md index 84575ffe..1c296d73 100644 --- a/.changeset/prompt-cache-breakpoints.md +++ b/.changeset/prompt-cache-breakpoints.md @@ -1,7 +1,8 @@ --- "@perstack/runtime": patch +"@perstack/anthropic-provider": patch "perstack": patch "create-expert": patch --- -Add dynamic prompt cache breakpoints for Anthropic prefix caching +Enable automatic prompt caching for Anthropic via request-level cache_control diff --git a/bun.lock b/bun.lock index 4dad4e33..9ab53252 100644 --- a/bun.lock +++ b/bun.lock @@ -4,6 +4,9 @@ "workspaces": { "": { "name": "perstack-monorepo", + "dependencies": { + "@ai-sdk/anthropic": "^3.0.47", + }, "devDependencies": { "@biomejs/biome": "^2.4.2", "@changesets/changelog-github": "^0.5.2", @@ -18,10 +21,10 @@ }, "apps/base": { "name": "@perstack/base", - "version": "0.0.66", + "version": "0.0.68", "dependencies": { "@modelcontextprotocol/sdk": "^1.26.0", - "@perstack/core": "0.0.54", + "@perstack/core": "0.0.56", "commander": "^14.0.3", "zod": "^4.3.6", }, @@ -33,7 +36,7 @@ }, "apps/create-expert": { "name": "create-expert", - "version": "0.0.43", + "version": "0.0.45", "bin": { "create-expert": "bin/cli.ts", }, @@ -52,7 +55,7 @@ }, "apps/create-expert-skill": { "name": "@perstack/create-expert-skill", - "version": "0.0.3", + "version": "0.0.5", "dependencies": { "@modelcontextprotocol/sdk": "^1.26.0", "commander": "^14.0.3", @@ -68,7 +71,7 @@ }, "apps/perstack": { "name": "perstack", - "version": "0.0.95", + "version": "0.0.97", "dependencies": { "commander": "^14.0.3", }, @@ -85,7 +88,7 @@ }, "packages/core": { "name": "@perstack/core", - "version": "0.0.54", + "version": "0.0.56", "dependencies": { "@paralleldrive/cuid2": "^3.3.0", "zod": "^4.3.6", @@ -98,7 +101,7 @@ }, "packages/filesystem": { "name": "@perstack/filesystem-storage", - "version": "0.0.25", + "version": "0.0.27", "dependencies": { "@perstack/core": "workspace:*", }, @@ -111,7 +114,7 @@ }, "packages/installer": { "name": "@perstack/installer", - "version": "0.0.18", + "version": "0.0.20", "dependencies": { "@perstack/api-client": "^0.0.56", "@perstack/core": "workspace:*", @@ -127,7 +130,7 @@ }, "packages/log": { "name": "@perstack/log", - "version": "0.0.11", + "version": "0.0.13", "dependencies": { "@perstack/core": "workspace:*", "@perstack/filesystem-storage": "workspace:*", @@ -140,7 +143,7 @@ }, "packages/perstack-toml": { "name": "@perstack/perstack-toml", - "version": "0.0.10", + "version": "0.0.12", "dependencies": { "@perstack/core": "workspace:*", "smol-toml": "^1.6.0", @@ -154,7 +157,7 @@ }, "packages/providers/anthropic": { "name": "@perstack/anthropic-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/anthropic": "^3.0.44", "@perstack/core": "workspace:*", @@ -169,7 +172,7 @@ }, "packages/providers/azure-openai": { "name": "@perstack/azure-openai-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/azure": "^3.0.31", "@perstack/core": "workspace:*", @@ -184,7 +187,7 @@ }, "packages/providers/bedrock": { "name": "@perstack/bedrock-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/amazon-bedrock": "^4.0.60", "@perstack/core": "workspace:*", @@ -199,7 +202,7 @@ }, "packages/providers/core": { "name": "@perstack/provider-core", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@perstack/core": "workspace:*", "undici": "^7.22.0", @@ -213,7 +216,7 @@ }, "packages/providers/deepseek": { "name": "@perstack/deepseek-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/deepseek": "^2.0.20", "@perstack/core": "workspace:*", @@ -228,7 +231,7 @@ }, "packages/providers/google": { "name": "@perstack/google-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/google": "^3.0.29", "@perstack/core": "workspace:*", @@ -243,7 +246,7 @@ }, "packages/providers/ollama": { "name": "@perstack/ollama-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@perstack/core": "workspace:*", "@perstack/provider-core": "workspace:*", @@ -258,7 +261,7 @@ }, "packages/providers/openai": { "name": "@perstack/openai-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/openai": "^3.0.29", "@perstack/core": "workspace:*", @@ -273,7 +276,7 @@ }, "packages/providers/vertex": { "name": "@perstack/vertex-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/google-vertex": "^4.0.58", "@perstack/core": "workspace:*", @@ -288,7 +291,7 @@ }, "packages/react": { "name": "@perstack/react", - "version": "0.0.58", + "version": "0.0.60", "dependencies": { "@perstack/core": "workspace:*", }, @@ -307,7 +310,7 @@ }, "packages/runtime": { "name": "@perstack/runtime", - "version": "0.0.115", + "version": "0.0.117", "dependencies": { "@ai-sdk/amazon-bedrock": "^4.0.60", "@ai-sdk/anthropic": "^3.0.44", @@ -319,8 +322,8 @@ "@modelcontextprotocol/sdk": "^1.26.0", "@paralleldrive/cuid2": "^3.3.0", "@perstack/api-client": "^0.0.56", - "@perstack/base": "0.0.66", - "@perstack/core": "0.0.54", + "@perstack/base": "0.0.68", + "@perstack/core": "0.0.56", "ai": "^6.0.86", "ollama-ai-provider-v2": "^3.3.0", "smol-toml": "^1.6.0", @@ -346,7 +349,7 @@ }, "packages/skill-manager": { "name": "@perstack/skill-manager", - "version": "0.0.12", + "version": "0.0.14", "dependencies": { "@modelcontextprotocol/sdk": "^1.26.0", "@paralleldrive/cuid2": "^3.3.0", @@ -362,7 +365,7 @@ }, "packages/tui": { "name": "@perstack/tui", - "version": "0.0.16", + "version": "0.0.18", "dependencies": { "@paralleldrive/cuid2": "^3.3.0", "@perstack/core": "workspace:*", @@ -379,7 +382,7 @@ }, "packages/tui-components": { "name": "@perstack/tui-components", - "version": "0.0.18", + "version": "0.0.20", "dependencies": { "@perstack/core": "workspace:*", "@perstack/react": "workspace:*", @@ -397,7 +400,7 @@ "packages": { "@ai-sdk/amazon-bedrock": ["@ai-sdk/amazon-bedrock@4.0.63", "", { "dependencies": { "@ai-sdk/anthropic": "3.0.46", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@smithy/eventstream-codec": "^4.0.1", "@smithy/util-utf8": "^4.0.0", "aws4fetch": "^1.0.20" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-kNOaIaOXWFZFWbB0xM1l/bQYo7XwTkpdHbrA6n9A2U1c4/DcLF/+Rwc3vZF6MHPVSjoYVG0qxIa7jh39rKftYA=="], - "@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.46", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-zXJPiNHaIiQ6XUqLeSYZ3ZbSzjqt1pNWEUf2hlkXlmmw8IF8KI0ruuGaDwKCExmtuNRf0E4TDxhsc9wRgWTzpw=="], + "@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.47", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-E6Z3i/xvxGDxRskMMbuX9+xDK4l5LesrP2O7YQ0CcbAkYP25qTo/kYGf/AsJrLkNIY23HeO/kheUWtG1XZllDA=="], "@ai-sdk/azure": ["@ai-sdk/azure@3.0.31", "", { "dependencies": { "@ai-sdk/openai": "3.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-W9x6nt+yf+Ns0/Wx7U9TXHLmfu7mOUqy1b/drtVd3DvNfDudyruQM/YjM2268Q0FatSrPlA2RlnPVPGRH/4V8Q=="], @@ -1325,6 +1328,10 @@ "zod-to-json-schema": ["zod-to-json-schema@3.25.1", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA=="], + "@ai-sdk/amazon-bedrock/@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.46", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-zXJPiNHaIiQ6XUqLeSYZ3ZbSzjqt1pNWEUf2hlkXlmmw8IF8KI0ruuGaDwKCExmtuNRf0E4TDxhsc9wRgWTzpw=="], + + "@ai-sdk/google-vertex/@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.46", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-zXJPiNHaIiQ6XUqLeSYZ3ZbSzjqt1pNWEUf2hlkXlmmw8IF8KI0ruuGaDwKCExmtuNRf0E4TDxhsc9wRgWTzpw=="], + "@aws-crypto/util/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="], "@babel/code-frame/@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@7.28.5", "", {}, "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q=="], diff --git a/e2e/perstack-cli/continue.test.ts b/e2e/perstack-cli/continue.test.ts index 271ff045..ad43c3e5 100644 --- a/e2e/perstack-cli/continue.test.ts +++ b/e2e/perstack-cli/continue.test.ts @@ -74,8 +74,8 @@ describe.concurrent("Continue Job", () => { expect(completeEvents.length).toBe(1) // Verify usage tracking flows through multi-turn conversations. - // On turn 2, the conversation prefix from turn 1 is resent — with prompt caching - // enabled (applyCacheBreakpoints), cachedInputTokens should be populated. + // On turn 2, the conversation prefix from turn 1 is resent — with automatic + // prompt caching enabled via providerOptions, cachedInputTokens should be populated. // Actual cache hits depend on the model's minimum token threshold // (e.g. 1024 for Sonnet, 4096 for Haiku 4.5). const completeEvent = completeEvents[0] diff --git a/package.json b/package.json index 5434341c..f9773419 100644 --- a/package.json +++ b/package.json @@ -42,5 +42,8 @@ "engines": { "bun": ">=1.2.0" }, - "packageManager": "bun@1.3.5" + "packageManager": "bun@1.3.5", + "dependencies": { + "@ai-sdk/anthropic": "^3.0.47" + } } diff --git a/packages/providers/anthropic/src/adapter.test.ts b/packages/providers/anthropic/src/adapter.test.ts index f8393aff..9dd23a4a 100644 --- a/packages/providers/anthropic/src/adapter.test.ts +++ b/packages/providers/anthropic/src/adapter.test.ts @@ -64,19 +64,23 @@ describe("AnthropicProviderAdapter", () => { }) describe("getProviderOptions", () => { - it("returns undefined when no skills provided", () => { + it("returns cacheControl when no skills provided", () => { const adapter = new AnthropicProviderAdapter(mockConfig) const options = adapter.getProviderOptions() - expect(options).toBeUndefined() + expect(options).toEqual({ + anthropic: { cacheControl: { type: "ephemeral" } }, + }) }) - it("returns undefined when empty skills array provided", () => { + it("returns cacheControl when empty skills array provided", () => { const adapter = new AnthropicProviderAdapter(mockConfig) const options = adapter.getProviderOptions({ skills: [] }) - expect(options).toBeUndefined() + expect(options).toEqual({ + anthropic: { cacheControl: { type: "ephemeral" } }, + }) }) - it("returns provider options with builtin skills", () => { + it("returns provider options with builtin skills and cacheControl", () => { const adapter = new AnthropicProviderAdapter(mockConfig) const options = adapter.getProviderOptions({ skills: [{ type: "builtin", skillId: "pdf" }], @@ -86,6 +90,7 @@ describe("AnthropicProviderAdapter", () => { container: { skills: [{ type: "builtin", name: "pdf" }], }, + cacheControl: { type: "ephemeral" }, }, }) }) diff --git a/packages/providers/anthropic/src/adapter.ts b/packages/providers/anthropic/src/adapter.ts index 598bb93c..3b27d8d1 100644 --- a/packages/providers/anthropic/src/adapter.ts +++ b/packages/providers/anthropic/src/adapter.ts @@ -40,7 +40,19 @@ export class AnthropicProviderAdapter extends BaseProviderAdapter { } override getProviderOptions(config?: ProviderOptionsConfig): ProviderOptions | undefined { - return buildProviderOptions(config?.skills) + const skillOptions = buildProviderOptions(config?.skills) + const cacheOptions: ProviderOptions = { + anthropic: { cacheControl: { type: "ephemeral" } }, + } + if (!skillOptions) { + return cacheOptions + } + return { + anthropic: { + ...skillOptions["anthropic"], + ...cacheOptions["anthropic"], + }, + } } override getReasoningOptions(budget: ReasoningBudget): ProviderOptions | undefined { diff --git a/packages/runtime/src/messages/instruction-message.ts b/packages/runtime/src/messages/instruction-message.ts index b0adf1d1..4fd07d85 100644 --- a/packages/runtime/src/messages/instruction-message.ts +++ b/packages/runtime/src/messages/instruction-message.ts @@ -65,7 +65,6 @@ export function createInstructionMessage(expert: Expert, startedAt: number): Ins }, ], id: createId(), - cache: true, } } diff --git a/packages/runtime/src/messages/message.test.ts b/packages/runtime/src/messages/message.test.ts index c27062af..c3f97230 100644 --- a/packages/runtime/src/messages/message.test.ts +++ b/packages/runtime/src/messages/message.test.ts @@ -1,8 +1,6 @@ import { describe, expect, it } from "bun:test" -import type { Message } from "@perstack/core" import { createInstructionMessage } from "./instruction-message.js" import { - applyCacheBreakpoints, createExpertMessage, createToolMessage, createUserMessage, @@ -518,7 +516,7 @@ describe("@perstack/messages: instruction-message", () => { } const result = createInstructionMessage(expert, startedAt) expect(result.type).toBe("instructionMessage") - expect(result.cache).toBe(true) + expect(result.cache).toBeUndefined() expect(result.contents[0].type).toBe("textPart") expect(result.contents[0].text).toContain("You are a test expert.") }) @@ -595,413 +593,3 @@ describe("@perstack/messages: instruction-message", () => { }) }) }) - -describe("applyCacheBreakpoints", () => { - it("returns empty array for empty input", () => { - const result = applyCacheBreakpoints([]) - expect(result).toEqual([]) - }) - - it("returns copy of array when only one message exists", () => { - const messages: Message[] = [ - { - type: "instructionMessage", - id: "msg-1", - cache: true, - contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], - }, - ] - const result = applyCacheBreakpoints(messages) - expect(result).toEqual(messages) - expect(result).not.toBe(messages) - }) - - it("sets cache on last message when multiple messages exist", () => { - const messages: Message[] = [ - { - type: "instructionMessage", - id: "msg-1", - cache: true, - contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], - }, - { - type: "userMessage", - id: "msg-2", - contents: [{ id: "c-2", type: "textPart", text: "Hello" }], - }, - ] - const result = applyCacheBreakpoints(messages) - expect(result[0].cache).toBe(true) - expect(result[1].cache).toBe(true) - }) - - it("preserves instruction message cache flag", () => { - const messages: Message[] = [ - { - type: "instructionMessage", - id: "msg-1", - cache: true, - contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], - }, - { - type: "userMessage", - id: "msg-2", - contents: [{ id: "c-2", type: "textPart", text: "User input" }], - }, - { - type: "expertMessage", - id: "msg-3", - contents: [{ id: "c-3", type: "textPart", text: "Response" }], - }, - ] - const result = applyCacheBreakpoints(messages) - expect(result[0].cache).toBe(true) - expect(result[0]).toBe(messages[0]) - }) - - it("clears cache from middle messages", () => { - const messages: Message[] = [ - { - type: "instructionMessage", - id: "msg-1", - cache: true, - contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], - }, - { - type: "userMessage", - id: "msg-2", - cache: true, - contents: [{ id: "c-2", type: "textPart", text: "User input" }], - }, - { - type: "expertMessage", - id: "msg-3", - contents: [{ id: "c-3", type: "textPart", text: "Response" }], - }, - { - type: "toolMessage", - id: "msg-4", - contents: [ - { - id: "c-4", - type: "toolResultPart", - toolCallId: "tc-1", - toolName: "readFile", - contents: [{ id: "c-5", type: "textPart", text: "file contents" }], - isError: false, - }, - ], - }, - ] - const result = applyCacheBreakpoints(messages) - expect(result[0].cache).toBe(true) - expect(result[1].cache).toBeUndefined() - expect(result[2].cache).toBeUndefined() - expect(result[3].cache).toBe(true) - }) - - it("does not mutate the original messages array", () => { - const messages: Message[] = [ - { - type: "instructionMessage", - id: "msg-1", - cache: true, - contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], - }, - { - type: "userMessage", - id: "msg-2", - contents: [{ id: "c-2", type: "textPart", text: "Hello" }], - }, - ] - const original = JSON.parse(JSON.stringify(messages)) - applyCacheBreakpoints(messages) - expect(messages).toEqual(original) - }) - - it("avoids creating new object when last message already has cache true", () => { - const lastMsg: Message = { - type: "expertMessage", - id: "msg-2", - cache: true, - contents: [{ id: "c-2", type: "textPart", text: "Response" }], - } - const messages: Message[] = [ - { - type: "instructionMessage", - id: "msg-1", - cache: true, - contents: [{ id: "c-1", type: "textPart", text: "System prompt" }], - }, - lastMsg, - ] - const result = applyCacheBreakpoints(messages) - expect(result[1]).toBe(lastMsg) - }) - - it("handles multi-turn conversation correctly (< 20 blocks, single BP)", () => { - const messages: Message[] = [ - { - type: "instructionMessage", - id: "msg-1", - cache: true, - contents: [{ id: "c-1", type: "textPart", text: "System" }], - }, - { - type: "userMessage", - id: "msg-2", - contents: [{ id: "c-2", type: "textPart", text: "Input" }], - }, - { - type: "expertMessage", - id: "msg-3", - cache: true, - contents: [ - { - id: "c-3", - type: "toolCallPart", - toolCallId: "tc-1", - toolName: "search", - args: {}, - }, - ], - }, - { - type: "toolMessage", - id: "msg-4", - cache: true, - contents: [ - { - id: "c-4", - type: "toolResultPart", - toolCallId: "tc-1", - toolName: "search", - contents: [{ id: "c-5", type: "textPart", text: "results" }], - isError: false, - }, - ], - }, - { - type: "expertMessage", - id: "msg-5", - contents: [ - { - id: "c-6", - type: "toolCallPart", - toolCallId: "tc-2", - toolName: "write", - args: {}, - }, - ], - }, - { - type: "toolMessage", - id: "msg-6", - contents: [ - { - id: "c-7", - type: "toolResultPart", - toolCallId: "tc-2", - toolName: "write", - contents: [{ id: "c-8", type: "textPart", text: "done" }], - isError: false, - }, - ], - }, - ] - - const result = applyCacheBreakpoints(messages) - - expect(result[0].cache).toBe(true) - expect(result[1].cache).toBeUndefined() - expect(result[2].cache).toBeUndefined() - expect(result[3].cache).toBeUndefined() - expect(result[4].cache).toBeUndefined() - expect(result[5].cache).toBe(true) - }) - - it("places second BP when content blocks exceed 20-block lookback window", () => { - // Build a conversation with > 20 content blocks: - // Each tool-use step = expertMessage(3 parts) + toolMessage(1 part) = 4 blocks - // 6 steps = 24 blocks + 1 userMessage = 25 blocks total - const messages: Message[] = [ - { - type: "instructionMessage", - id: "instr", - cache: true, - contents: [{ id: "c-instr", type: "textPart", text: "System" }], - }, - { - type: "userMessage", - id: "user-0", - contents: [{ id: "c-u0", type: "textPart", text: "Go" }], - }, - ] - for (let step = 0; step < 6; step++) { - messages.push({ - type: "expertMessage", - id: `expert-${step}`, - contents: [ - { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" }, - { id: `txt-${step}`, type: "textPart", text: `Step ${step}` }, - { - id: `tc-${step}`, - type: "toolCallPart", - toolCallId: `tc-${step}`, - toolName: "doWork", - args: {}, - }, - ], - }) - messages.push({ - type: "toolMessage", - id: `tool-${step}`, - contents: [ - { - id: `tr-${step}`, - type: "toolResultPart", - toolCallId: `tc-${step}`, - toolName: "doWork", - contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }], - isError: false, - }, - ], - }) - } - // Total non-instruction: 1 (user) + 6*(3+1) = 25 blocks, 13 messages - - const result = applyCacheBreakpoints(messages) - - // BP1: instruction (index 0) - expect(result[0].cache).toBe(true) - // Last message (index 13) should always have BP - expect(result[13].cache).toBe(true) - - // A second BP should exist somewhere in the middle - const middleBps = result.slice(1, 13).filter((m) => m.cache === true) - expect(middleBps.length).toBe(1) - - // All other non-BP messages should have cache cleared - const nonBpMessages = result.slice(1).filter((m) => m.cache !== true) - for (const msg of nonBpMessages) { - expect(msg.cache).toBeUndefined() - } - }) - - it("places up to 3 message BPs for very long conversations (> 40 blocks)", () => { - // 11 tool-use steps = 44 blocks + 1 user = 45 blocks total - const messages: Message[] = [ - { - type: "instructionMessage", - id: "instr", - cache: true, - contents: [{ id: "c-instr", type: "textPart", text: "System" }], - }, - { - type: "userMessage", - id: "user-0", - contents: [{ id: "c-u0", type: "textPart", text: "Go" }], - }, - ] - for (let step = 0; step < 11; step++) { - messages.push({ - type: "expertMessage", - id: `expert-${step}`, - contents: [ - { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" }, - { id: `txt-${step}`, type: "textPart", text: `Step ${step}` }, - { - id: `tc-${step}`, - type: "toolCallPart", - toolCallId: `tc-${step}`, - toolName: "doWork", - args: {}, - }, - ], - }) - messages.push({ - type: "toolMessage", - id: `tool-${step}`, - contents: [ - { - id: `tr-${step}`, - type: "toolResultPart", - toolCallId: `tc-${step}`, - toolName: "doWork", - contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }], - isError: false, - }, - ], - }) - } - // Total non-instruction: 1 + 11*4 = 45 blocks, 24 messages (indices 0..24) - - const result = applyCacheBreakpoints(messages) - - // BP1: instruction - expect(result[0].cache).toBe(true) - // Last message always has BP - expect(result[result.length - 1].cache).toBe(true) - - // Should have 3 message BPs total (max) - const messageBps = result.slice(1).filter((m) => m.cache === true) - expect(messageBps.length).toBe(3) - }) - - it("does not exceed 3 message BPs even for extremely long conversations", () => { - // 20 tool-use steps = 80 blocks + 1 user = 81 blocks total - const messages: Message[] = [ - { - type: "instructionMessage", - id: "instr", - cache: true, - contents: [{ id: "c-instr", type: "textPart", text: "System" }], - }, - { - type: "userMessage", - id: "user-0", - contents: [{ id: "c-u0", type: "textPart", text: "Go" }], - }, - ] - for (let step = 0; step < 20; step++) { - messages.push({ - type: "expertMessage", - id: `expert-${step}`, - contents: [ - { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" }, - { id: `txt-${step}`, type: "textPart", text: `Step ${step}` }, - { - id: `tc-${step}`, - type: "toolCallPart", - toolCallId: `tc-${step}`, - toolName: "doWork", - args: {}, - }, - ], - }) - messages.push({ - type: "toolMessage", - id: `tool-${step}`, - contents: [ - { - id: `tr-${step}`, - type: "toolResultPart", - toolCallId: `tc-${step}`, - toolName: "doWork", - contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }], - isError: false, - }, - ], - }) - } - - const result = applyCacheBreakpoints(messages) - - // Should still max out at 3 message BPs - const messageBps = result.slice(1).filter((m) => m.cache === true) - expect(messageBps.length).toBe(3) - - // Last message always has BP - expect(result[result.length - 1].cache).toBe(true) - }) -}) diff --git a/packages/runtime/src/messages/message.ts b/packages/runtime/src/messages/message.ts index 332eea7c..d4bb1422 100644 --- a/packages/runtime/src/messages/message.ts +++ b/packages/runtime/src/messages/message.ts @@ -132,75 +132,6 @@ export function messageToCoreMessage(message: Message): ModelMessage { } } -/** - * Apply cache breakpoints to messages for Anthropic prompt caching. - * - * Anthropic allows up to 4 explicit cache breakpoints per request and uses a - * 20-block lookback window from each breakpoint to find cache matches. - * - * Breakpoint strategy: - * - BP1: Instruction message (caches tools + system; prefix order: tools → system → messages) - * - BP2–BP4: Up to 3 breakpoints across conversation messages, placed every ~20 content - * blocks (working backwards from the last message) to ensure full lookback coverage. - * - * This is a pure function — it does not mutate the input array. - * - * For non-Anthropic providers, cache flags are harmless (messageToCoreMessage - * only translates them to Anthropic providerOptions). - */ -export function applyCacheBreakpoints(messages: ReadonlyArray): Message[] { - if (messages.length <= 1) { - return [...messages] - } - - // BP2–BP4: up to 3 message breakpoints (BP1 is the instruction message) - const MAX_MESSAGE_BREAKPOINTS = 3 - // Anthropic checks up to 20 content blocks before each breakpoint - const LOOKBACK_WINDOW = 20 - - // Collect non-instruction messages with their content block counts - const msgMeta: Array<{ originalIndex: number; blockCount: number }> = [] - for (let i = 0; i < messages.length; i++) { - if (messages[i].type !== "instructionMessage") { - msgMeta.push({ originalIndex: i, blockCount: messages[i].contents.length }) - } - } - - // Determine which messages get cache breakpoints - const breakpointIndices = new Set() - - if (msgMeta.length > 0) { - // Always place a breakpoint on the last message (dynamic frontier) - breakpointIndices.add(msgMeta[msgMeta.length - 1].originalIndex) - let bpsPlaced = 1 - let blocksSinceBp = 0 - - // Walk backwards, placing a breakpoint every ~20 content blocks - for (let i = msgMeta.length - 2; i >= 0 && bpsPlaced < MAX_MESSAGE_BREAKPOINTS; i--) { - blocksSinceBp += msgMeta[i].blockCount - if (blocksSinceBp >= LOOKBACK_WINDOW) { - breakpointIndices.add(msgMeta[i].originalIndex) - bpsPlaced++ - blocksSinceBp = 0 - } - } - } - - return messages.map((msg, index) => { - // Preserve instruction message cache (BP1: caches tools + system) - if (msg.type === "instructionMessage") { - return msg - } - - if (breakpointIndices.has(index)) { - return msg.cache === true ? msg : { ...msg, cache: true } - } - - // Clear stale cache flags from non-strategic positions - return msg.cache ? { ...msg, cache: undefined } : msg - }) -} - function instructionContentsToCoreContent( contents: InstructionMessage["contents"], ): SystemModelMessage["content"] { diff --git a/packages/runtime/src/state-machine/states/generating-tool-call.ts b/packages/runtime/src/state-machine/states/generating-tool-call.ts index 30d44ac0..0bcdf011 100644 --- a/packages/runtime/src/state-machine/states/generating-tool-call.ts +++ b/packages/runtime/src/state-machine/states/generating-tool-call.ts @@ -22,7 +22,6 @@ import { getToolSet } from "../../helpers/tool-set.js" import { createEmptyUsage, sumUsage, usageFromGenerateTextResult } from "../../helpers/usage.js" import type { StreamCallbacks } from "../../llm/types.js" import { - applyCacheBreakpoints, createExpertMessage, createToolMessage, createUserMessage, @@ -101,7 +100,6 @@ export async function generatingToolCallLogic({ llmExecutor, }: RunSnapshot["context"]): Promise { const { messages } = checkpoint - const cachedMessages = applyCacheBreakpoints(messages) // Track if reasoning was completed via callback (to avoid duplicate emissions) let reasoningCompletedViaCallback = false @@ -129,7 +127,7 @@ export async function generatingToolCallLogic({ const executionResult = await llmExecutor.streamText( { - messages: cachedMessages.map(messageToCoreMessage), + messages: messages.map(messageToCoreMessage), maxRetries: setting.maxRetries, tools: getToolSet(skillManager), toolChoice: "auto", diff --git a/packages/runtime/src/state-machine/states/init.test.ts b/packages/runtime/src/state-machine/states/init.test.ts index 0e55473d..064e7e5d 100644 --- a/packages/runtime/src/state-machine/states/init.test.ts +++ b/packages/runtime/src/state-machine/states/init.test.ts @@ -43,7 +43,6 @@ describe("@perstack/runtime: StateMachineLogic['Init']", () => { type: "instructionMessage", id: expect.any(String), contents: [{ type: "textPart", id: expect.any(String), text: expect.any(String) }], - cache: true, }, { type: "userMessage", From 59a28048c0ed981f6ed346d5c47ec50602be447d Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 03:03:47 +0000 Subject: [PATCH 7/7] Fix: Move @ai-sdk/anthropic dependency to correct package scope Co-Authored-By: Claude Opus 4.6 --- bun.lock | 5 +---- package.json | 5 +---- packages/providers/anthropic/package.json | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/bun.lock b/bun.lock index 9ab53252..b3ba2c4d 100644 --- a/bun.lock +++ b/bun.lock @@ -4,9 +4,6 @@ "workspaces": { "": { "name": "perstack-monorepo", - "dependencies": { - "@ai-sdk/anthropic": "^3.0.47", - }, "devDependencies": { "@biomejs/biome": "^2.4.2", "@changesets/changelog-github": "^0.5.2", @@ -159,7 +156,7 @@ "name": "@perstack/anthropic-provider", "version": "0.0.29", "dependencies": { - "@ai-sdk/anthropic": "^3.0.44", + "@ai-sdk/anthropic": "^3.0.47", "@perstack/core": "workspace:*", "@perstack/provider-core": "workspace:*", }, diff --git a/package.json b/package.json index f9773419..5434341c 100644 --- a/package.json +++ b/package.json @@ -42,8 +42,5 @@ "engines": { "bun": ">=1.2.0" }, - "packageManager": "bun@1.3.5", - "dependencies": { - "@ai-sdk/anthropic": "^3.0.47" - } + "packageManager": "bun@1.3.5" } diff --git a/packages/providers/anthropic/package.json b/packages/providers/anthropic/package.json index 271f1312..8a37688f 100644 --- a/packages/providers/anthropic/package.json +++ b/packages/providers/anthropic/package.json @@ -27,7 +27,7 @@ "test": "bun test" }, "dependencies": { - "@ai-sdk/anthropic": "^3.0.44", + "@ai-sdk/anthropic": "^3.0.47", "@perstack/core": "workspace:*", "@perstack/provider-core": "workspace:*" },